juknife 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b457a3c9dece497c1266292d703d066695faf656
4
- data.tar.gz: c144c4631890a7c2702263090465ce4c900e5f84
3
+ metadata.gz: ad09b3333ab9843a6d35109af520c0ae6d15bd45
4
+ data.tar.gz: 54a7bdd6f233f1832d9749cccad7067bfa1189a8
5
5
  SHA512:
6
- metadata.gz: 35111faac73c8ec38c8fdb11cb657f120d19ce3f3f05846e27f281f106343897000c563acf6f3f55acad0de109a31ddf7e7e319e831ecf6140647593cbdc60fa
7
- data.tar.gz: 5a967a81ae83cff36c5f45dcb2a064b49b4fe73d6b8a1a401f2bb79638c07fe8c98c67f904fe49ee41c8df54732f38fb1d1f28f9c69bb00a3d695bf8bb8adc15
6
+ metadata.gz: 857e0b2836c4ae7299ddd8c7b1f86d430bf4c3df34dae6d92a32927cdc315e35d877e86e6349979beb36a2d1b116d0b447639fe29d08360e28ca687bcf9fc3c6
7
+ data.tar.gz: f6a53f1e907d93b53cd074a6d46855b4aede515385fa96ec5588bab82153373cf6969fce8850cbb0d6147d420c71d58e0eb972684d73c93f234cd50e12ee77eb
data/.codeclimate.yml ADDED
@@ -0,0 +1,20 @@
1
+ engines:
2
+ rubocop:
3
+ enabled: true
4
+ channel: rubocop-0-48
5
+ duplication:
6
+ enabled: true
7
+ config:
8
+ languages:
9
+ - ruby
10
+ fixme:
11
+ enabled: true
12
+
13
+ ratings:
14
+ paths:
15
+ - Gemfile.lock
16
+ - "**.rb"
17
+
18
+ exclude_paths:
19
+ - spec/
20
+ - vendor/
data/.gitignore CHANGED
@@ -10,3 +10,6 @@
10
10
 
11
11
  # rspec failure tracking
12
12
  .rspec_status
13
+
14
+ # simplecov
15
+ build
data/README.md CHANGED
@@ -7,7 +7,7 @@ A Ruby library for Web scraping. It allows to describe scraping nodes and text s
7
7
  For example, scraping on Google search can be written as follows:
8
8
 
9
9
  ```ruby
10
- class GoogleSearchScraper < Juknife::Scraper
10
+ class GoogleSearchAgent < Juknife::Agent
11
11
  request do
12
12
  get 'https://www.google.co.jp/search'
13
13
  user_agent 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
@@ -27,8 +27,8 @@ class GoogleSearchScraper < Juknife::Scraper
27
27
  end
28
28
  end
29
29
 
30
- scraper = GoogleSearchScraper.new
31
- scraper.scrape(query: 'test')
30
+ agent = GoogleSearchAgent.new
31
+ agent.scrape(query: 'test')
32
32
  # =>
33
33
  # {:results=>
34
34
  # [
data/circle.yml ADDED
@@ -0,0 +1,3 @@
1
+ machine:
2
+ ruby:
3
+ version: 2.4.1
data/juknife.gemspec CHANGED
@@ -29,10 +29,13 @@ Gem::Specification.new do |spec|
29
29
  spec.add_runtime_dependency 'activesupport', '~> 5.0.2'
30
30
  spec.add_runtime_dependency 'nokogiri', '~> 1.7'
31
31
  spec.add_runtime_dependency 'faraday', '~> 0.1'
32
+ spec.add_runtime_dependency 'faraday_middleware', '~> 0.11'
32
33
  spec.add_development_dependency 'bundler', '~> 1.14'
34
+ spec.add_development_dependency 'codeclimate-test-reporter'
35
+ spec.add_development_dependency 'pry'
33
36
  spec.add_development_dependency 'rake', '~> 10.0'
34
37
  spec.add_development_dependency 'rspec', '~> 3.0'
35
38
  spec.add_development_dependency 'rubocop'
39
+ spec.add_development_dependency 'simplecov'
36
40
  spec.add_development_dependency 'yard'
37
- spec.add_development_dependency 'pry'
38
41
  end
data/lib/juknife.rb CHANGED
@@ -1,10 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'juknife/version'
4
+ require 'active_support/dependencies/autoload'
4
5
 
5
6
  # The module that contains everything Juknife-related.
6
7
  module Juknife
7
- autoload :Request, 'juknife/request'
8
- autoload :Scraper, 'juknife/scraper'
9
- autoload :Scraping, 'juknife/scraping'
8
+ extend ActiveSupport::Autoload
9
+
10
+ autoload :Agent
11
+ autoload :Error
12
+ autoload :Middleware
13
+ autoload :Request
14
+ autoload :Scraping
10
15
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday_middleware'
4
+
5
+ module Juknife
6
+ # The agent to execute request and scraping to web page.
7
+ class Agent
8
+ def scrape(params = {})
9
+ response(params).body
10
+ end
11
+
12
+ def response(params)
13
+ connection(params).get
14
+ end
15
+
16
+ def connection(params)
17
+ @connection ||= Faraday.new do |b|
18
+ # request middlewares
19
+ b.use Middleware::RequestDSL, params, &self.class.request_dsl
20
+ b.use Faraday::Request::UrlEncoded
21
+
22
+ # response middlewares
23
+ b.use Middleware::ScrapingDSL, params, &self.class.scraping_dsl
24
+ b.use Middleware::HTMLParser
25
+ b.use Middleware::HTTPErrorHandler
26
+ b.use FaradayMiddleware::FollowRedirects
27
+
28
+ b.adapter :net_http
29
+ end
30
+ end
31
+
32
+ class << self
33
+ attr_reader :request_dsl, :scraping_dsl
34
+
35
+ def request(&block)
36
+ @request_dsl = block
37
+ end
38
+
39
+ def scraping(&block)
40
+ @scraping_dsl = block
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Juknife
4
+ class Error < StandardError
5
+ end
6
+
7
+ class HTTPClientError < Error
8
+ end
9
+
10
+ class HTTPServerError < Error
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Juknife
4
+ # :nodoc:
5
+ module Middleware
6
+ autoload :HTMLParser, 'juknife/middleware/html_parser'
7
+ autoload :HTTPErrorHandler, 'juknife/middleware/http_error_handler'
8
+ autoload :RequestDSL, 'juknife/middleware/request_dsl'
9
+ autoload :ScrapingDSL, 'juknife/middleware/scraping_dsl'
10
+ end
11
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday'
4
+ require 'nokogiri'
5
+
6
+ module Juknife
7
+ # :nodoc:
8
+ module Middleware
9
+ # A Fraday middleware to parse body string to Nokogiri document.
10
+ class HTMLParser < Faraday::Response::Middleware
11
+ def parse(body)
12
+ Nokogiri.parse(body)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday'
4
+
5
+ module Juknife
6
+ # :nodoc:
7
+ module Middleware
8
+ # A middleware to handle HTTP errors
9
+ class HTTPErrorHandler < Faraday::Response::Middleware
10
+ def on_complete(env)
11
+ case env[:status]
12
+ when 400..499
13
+ raise HTTPClientError, env[:status]
14
+ when 500..599
15
+ raise HTTPServerError, env[:status]
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday'
4
+
5
+ module Juknife
6
+ # :nodoc:
7
+ module Middleware
8
+ # A Faraday middleware to interpret Juknife request DSL
9
+ class RequestDSL < Faraday::Middleware
10
+ include Juknife::Request::DSL
11
+
12
+ attr_reader :params
13
+
14
+ def initialize(app, params = {}, &block)
15
+ super(app)
16
+ @params = params
17
+ instance_eval(&block)
18
+ end
19
+
20
+ def call(env) # rubocop: disable Metrics/AbcSize
21
+ uri = URI.parse(url_builder.call)
22
+ uri.query = query_builder&.call&.to_query
23
+
24
+ env[:url] = uri
25
+ env[:method] = http_method
26
+ env[:body] = body_builder&.call
27
+ env[:request_headers] ||= {}
28
+
29
+ if user_agent_builder
30
+ env[:request_headers]['User-Agent'] = user_agent_builder&.call
31
+ end
32
+
33
+ @app.call(env)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday'
4
+
5
+ module Juknife
6
+ # :nodoc:
7
+ module Middleware
8
+ # A Faraday middleware to interpret Juknife scraping DSL
9
+ class ScrapingDSL < Faraday::Response::Middleware
10
+ include Juknife::Scraping::DSL
11
+
12
+ def initialize(app, params = {}, &block)
13
+ super(app)
14
+ @params = params
15
+ instance_eval(&block)
16
+ end
17
+
18
+ def parse(doc)
19
+ context = Juknife::Scraping::Context.new(doc)
20
+
21
+ children.each do |child|
22
+ child.visit(context)
23
+ end
24
+
25
+ context.result
26
+ end
27
+ end
28
+ end
29
+ end
@@ -3,7 +3,9 @@
3
3
  module Juknife
4
4
  # :nodoc:
5
5
  module Request
6
- autoload :DSL, 'juknife/request/dsl'
7
- autoload :Runner, 'juknife/request/runner'
6
+ extend ActiveSupport::Autoload
7
+
8
+ autoload :DSL
9
+ autoload :Runner
8
10
  end
9
11
  end
@@ -3,8 +3,10 @@
3
3
  module Juknife
4
4
  # :nodoc:
5
5
  module Scraping
6
- autoload :DSL, 'juknife/scraping/dsl'
7
- autoload :Context, 'juknife/scraping/context'
8
- autoload :Runner, 'juknife/scraping/runner'
6
+ extend ActiveSupport::Autoload
7
+
8
+ autoload :DSL
9
+ autoload :Context
10
+ autoload :Runner
9
11
  end
10
12
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Juknife
4
- VERSION = '0.1.3'
4
+ VERSION = '0.2.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: juknife
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - nyamadori
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-05-11 00:00:00.000000000 Z
11
+ date: 2017-05-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.1'
55
+ - !ruby/object:Gem::Dependency
56
+ name: faraday_middleware
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.11'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.11'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: bundler
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -66,6 +80,34 @@ dependencies:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
82
  version: '1.14'
83
+ - !ruby/object:Gem::Dependency
84
+ name: codeclimate-test-reporter
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
69
111
  - !ruby/object:Gem::Dependency
70
112
  name: rake
71
113
  requirement: !ruby/object:Gem::Requirement
@@ -109,7 +151,7 @@ dependencies:
109
151
  - !ruby/object:Gem::Version
110
152
  version: '0'
111
153
  - !ruby/object:Gem::Dependency
112
- name: yard
154
+ name: simplecov
113
155
  requirement: !ruby/object:Gem::Requirement
114
156
  requirements:
115
157
  - - ">="
@@ -123,7 +165,7 @@ dependencies:
123
165
  - !ruby/object:Gem::Version
124
166
  version: '0'
125
167
  - !ruby/object:Gem::Dependency
126
- name: pry
168
+ name: yard
127
169
  requirement: !ruby/object:Gem::Requirement
128
170
  requirements:
129
171
  - - ">="
@@ -145,10 +187,10 @@ executables: []
145
187
  extensions: []
146
188
  extra_rdoc_files: []
147
189
  files:
190
+ - ".codeclimate.yml"
148
191
  - ".gitignore"
149
192
  - ".rspec"
150
193
  - ".rubocop.yml"
151
- - ".travis.yml"
152
194
  - ".yardopts"
153
195
  - CODE_OF_CONDUCT.md
154
196
  - Gemfile
@@ -157,12 +199,19 @@ files:
157
199
  - Rakefile
158
200
  - bin/console
159
201
  - bin/setup
202
+ - circle.yml
160
203
  - juknife.gemspec
161
204
  - lib/juknife.rb
205
+ - lib/juknife/agent.rb
206
+ - lib/juknife/error.rb
207
+ - lib/juknife/middleware.rb
208
+ - lib/juknife/middleware/html_parser.rb
209
+ - lib/juknife/middleware/http_error_handler.rb
210
+ - lib/juknife/middleware/request_dsl.rb
211
+ - lib/juknife/middleware/scraping_dsl.rb
162
212
  - lib/juknife/request.rb
163
213
  - lib/juknife/request/dsl.rb
164
214
  - lib/juknife/request/runner.rb
165
- - lib/juknife/scraper.rb
166
215
  - lib/juknife/scraping.rb
167
216
  - lib/juknife/scraping/context.rb
168
217
  - lib/juknife/scraping/dsl.rb
data/.travis.yml DELETED
@@ -1,8 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- rvm:
4
- - 2.4.1
5
- before_install: gem install bundler -v 1.14.6
6
- script:
7
- - bundle exec rspec
8
- - bundle exec rubocop
@@ -1,28 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Juknife
4
- # Juknife::Scraper is a scraper to request a web page and to extract data.
5
- class Scraper
6
- def initialize
7
- @scraping = Scraping::Runner.new(&self.class.scraping_block)
8
- @request = Request::Runner.new(&self.class.request_block)
9
- end
10
-
11
- def scrape(params = {})
12
- source = @request.run(params)
13
- @scraping.run(source, params)
14
- end
15
-
16
- class << self
17
- attr_reader :scraping_block, :request_block
18
-
19
- def scraping(&block)
20
- @scraping_block = block
21
- end
22
-
23
- def request(&block)
24
- @request_block = block
25
- end
26
- end
27
- end
28
- end