spidy 0.3.9 → 0.3.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 75215453c834a8e481b27cf4377235cc97ce6a6e4eff142a11743e68ee4982b4
4
- data.tar.gz: 25f3b14ad6f31b580396458c8075167f24fd4a2f6e7ff98947338ecd0588eb9d
3
+ metadata.gz: 76cb60ea985d1a663f24b7b024198d222756376bd9dd979a032c46ba39b16548
4
+ data.tar.gz: ff2e7f056f7ad5afe06df90adf0bb2e438c696472cde50c8d5758b2f9801684e
5
5
  SHA512:
6
- metadata.gz: 447b7152b807c7985e16b7b403d27f9f7b949264577e8e4dc11a52358cb9af49510696d29166adb60bb5b87158aa8d2c10faf7c810f8eced4c29f9eed8bb493a
7
- data.tar.gz: 55d82e5c495a7e5a0fd57b466e08ea072e3a712829b2d83d550b107147f30e3eaff54ed8dcec69163bf6f74e0c32990051af9de0d044cc19eadf39ecc749d003
6
+ metadata.gz: a721848978135752ddcfe3da30a293317a4852b41dc99209019ae71960538fe448ec4ad54da661e0b99edef3fcb85a84b095b99ddbbba9b628fdd4ac1be2f23c
7
+ data.tar.gz: a156f47f317cd4f1f0a66a13ac5102073723f139c5b797c8dc56d7dbdd41e342cb1ad1a6814812563033c68f448c4d57775c0edf300456dd164b90211632737e
data/.rubocop.yml CHANGED
@@ -1,7 +1,8 @@
1
1
  inherit_from: .rubocop_todo.yml
2
2
  AllCops:
3
+ TargetRubyVersion: 3.0.2
4
+ NewCops: enable
3
5
  DisplayCopNames: true
4
- TargetRubyVersion: 2.6
5
6
 
6
7
  Style/ClassAndModuleChildren:
7
8
  Enabled: false
@@ -9,7 +10,7 @@ Style/ClassAndModuleChildren:
9
10
  Style/SignalException:
10
11
  EnforcedStyle: semantic
11
12
 
12
- Naming/UncommunicativeMethodParamName:
13
+ Naming/MethodParameterName:
13
14
  AllowedNames:
14
15
  - as
15
16
 
@@ -17,8 +18,11 @@ Metrics/AbcSize:
17
18
  Max: 21
18
19
  Exclude:
19
20
 
21
+ Metrics/MethodLength:
22
+ Max: 15
23
+
20
24
  Metrics/LineLength:
21
- Max: 120
25
+ Max: 130
22
26
 
23
27
  Metrics/BlockLength:
24
28
  Max: 120
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- spidy (0.3.9)
4
+ spidy (0.3.10)
5
5
  activesupport
6
6
  mechanize
7
7
  pry
@@ -32,7 +32,7 @@ GEM
32
32
  coderay (1.1.3)
33
33
  concurrent-ruby (1.1.9)
34
34
  connection_pool (2.2.5)
35
- diff-lcs (1.4.4)
35
+ diff-lcs (1.5.0)
36
36
  domain_name (0.5.20190701)
37
37
  unf (>= 0.0.5, < 1.0.0)
38
38
  ffaker (2.20.0)
@@ -57,7 +57,6 @@ GEM
57
57
  mime-types-data (~> 3.2015)
58
58
  mime-types-data (3.2021.1115)
59
59
  mini_mime (1.1.2)
60
- mini_portile2 (2.6.1)
61
60
  minitest (5.15.0)
62
61
  mixlib-shellout (2.4.4)
63
62
  mustermann (1.1.1)
@@ -65,8 +64,7 @@ GEM
65
64
  net-http-digest_auth (1.4.1)
66
65
  net-http-persistent (4.0.1)
67
66
  connection_pool (~> 2.2)
68
- nokogiri (1.12.5)
69
- mini_portile2 (~> 2.6.1)
67
+ nokogiri (1.12.5-arm64-darwin)
70
68
  racc (~> 1.4)
71
69
  pry (0.14.1)
72
70
  coderay (~> 1.1)
@@ -121,7 +119,7 @@ GEM
121
119
  nokogiri (~> 1.8)
122
120
 
123
121
  PLATFORMS
124
- ruby
122
+ arm64-darwin-20
125
123
 
126
124
  DEPENDENCIES
127
125
  bundler (~> 2.0)
@@ -1,7 +1,7 @@
1
-
1
+ # frozen_string_literal: true
2
2
 
3
3
  Spidy.define do
4
- url_to_params = ->(url) {
4
+ url_to_params = lambda { |url|
5
5
  uri = URI.parse(url)
6
6
  params = URI.decode_www_form(uri.query).to_h if uri.query.present?
7
7
  params if params.present?
@@ -13,41 +13,41 @@ Spidy.define do
13
13
 
14
14
  limit_page = 3
15
15
  per_page = 25
16
- yielder.call(Nokogiri::HTML::Builder.new { |doc|
17
- doc.html {
18
- doc.body {
19
- doc.span.bold {
20
- doc.text "Hello world"
21
- }
22
- doc.main {
23
- (page * per_page + 1).upto((page + 1) * per_page).each do |i|
16
+ yielder.call(Nokogiri::HTML::Builder.new do |doc|
17
+ doc.html do
18
+ doc.body do
19
+ doc.span.bold do
20
+ doc.text 'Hello world'
21
+ end
22
+ doc.main do
23
+ ((page * per_page) + 1).upto((page + 1) * per_page).each do |i|
24
24
  doc.a("page #{i}", href: "http://localhost/?id=#{i}")
25
25
  end
26
- }
26
+ end
27
27
  doc.a('NEXT', href: "http://localhost/?page=#{page + 1}", class: 'next') if page < limit_page
28
- }
29
- }
30
- }.doc)
28
+ end
29
+ end
30
+ end.doc)
31
31
  }
32
32
 
33
33
  detail_page = proc { |url, &yielder|
34
34
  params = url_to_params.call(url)
35
35
  id = params['id']
36
36
 
37
- yielder.call(Nokogiri::HTML::Builder.new { |doc|
38
- doc.html {
39
- doc.body {
40
- doc.span.bold {
41
- doc.text "Hello world"
42
- }
37
+ yielder.call(Nokogiri::HTML::Builder.new do |doc|
38
+ doc.html do
39
+ doc.body do
40
+ doc.span.bold do
41
+ doc.text 'Hello world'
42
+ end
43
43
  doc.h1("title_#{id}", id: 'title')
44
44
  doc.main("body_#{id}", id: 'body')
45
45
  doc.div.sub do
46
46
  doc.span.name('testtest')
47
47
  end
48
- }
49
- }
50
- }.doc)
48
+ end
49
+ end
50
+ end.doc)
51
51
  }
52
52
 
53
53
  define(as: :html, connector: detail_page) do
data/example/proxy.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Spidy.define do
2
4
  user_agent 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0'
3
5
  socks_proxy '127.0.0.1', 9050
data/example/retry.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Spidy.define do
2
4
  spider(as: :json) do |yielder, connector|
3
5
  connector.call('https://httpbin.org/status/500') do |json|
data/example/wikip.rb CHANGED
@@ -11,11 +11,8 @@ Spidy.define do
11
11
 
12
12
  define(:infobox, as: :html, connector: :direct) do
13
13
  let(:columns) do
14
- html.search('tr').each do |tr|
15
- {
16
- name: tr.at('th')&.text,
17
- value: tr.at('td')&.text
18
- }
14
+ html.search('tr').map do |tr|
15
+ { name: tr.at('th')&.text, value: tr.at('td')&.text }
19
16
  end
20
17
  end
21
18
  end
data/exe/spidy CHANGED
@@ -6,10 +6,10 @@ require 'pry'
6
6
 
7
7
  if ARGV[1].blank?
8
8
  case ARGV[0]
9
- when 'version' then STDOUT.puts(Spidy::VERSION)
9
+ when 'version' then $stdout.puts(Spidy::VERSION)
10
10
  when 'console' then Spidy.shell.interactive
11
11
  else
12
- STDOUT.puts 'usage: spidy [version console]'
12
+ $stdout.puts 'usage: spidy [version console]'
13
13
  end
14
14
  else
15
15
  case ARGV[0]
@@ -19,6 +19,6 @@ else
19
19
  when 'each' then Spidy.shell(ARGV[1]).each(ARGV[2])
20
20
  when 'eval' then Spidy.shell(ARGV[1]).eval_call(ARGV[2])
21
21
  else
22
- STDOUT.puts 'usage: spidy [console function call each run] [file]'
22
+ $stdout.puts 'usage: spidy [console function call each run] [file]'
23
23
  end
24
24
  end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Spidy::Binder::Error < StandardError
4
+ end
@@ -17,9 +17,10 @@ module Spidy::Binder::Html
17
17
  instance_exec(&block)
18
18
  end
19
19
  rescue StandardError => e
20
- fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
20
+ raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
+
23
24
  def self.extended(obj)
24
25
  obj.alias_method :html, :resource
25
26
  end
@@ -17,9 +17,10 @@ module Spidy::Binder::Json
17
17
  instance_exec(&block)
18
18
  end
19
19
  rescue StandardError => e
20
- fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
20
+ raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
+
23
24
  def self.extended(obj)
24
25
  obj.alias_method :json, :resource
25
26
  end
@@ -17,9 +17,10 @@ module Spidy::Binder::Xml
17
17
  instance_exec(&block)
18
18
  end
19
19
  rescue StandardError => e
20
- fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
20
+ raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
+
23
24
  def self.extended(obj)
24
25
  obj.alias_method :xml, :resource
25
26
  end
data/lib/spidy/binder.rb CHANGED
@@ -5,6 +5,7 @@
5
5
  #
6
6
  module Spidy::Binder
7
7
  extend ActiveSupport::Autoload
8
+ autoload :Error
8
9
  autoload :Json
9
10
  autoload :Html
10
11
  autoload :Xml
@@ -5,8 +5,10 @@
5
5
  #
6
6
  class Spidy::CommandLine
7
7
  delegate :spidy, to: :@definition_file
8
- class_attribute :output, default: (proc { |result| STDOUT.puts(result.to_s) })
9
- class_attribute :error_handler, default: (proc { |e, url| STDERR.puts({ url: url, message: e.message, backtrace: e.backtrace }.to_json) })
8
+ class_attribute :output, default: (proc { |result| $stdout.puts(result.to_s) })
9
+ class_attribute :error_handler, default: (proc { |e, url|
10
+ warn({ url: url, message: e.message, backtrace: e.backtrace }.to_json)
11
+ })
10
12
 
11
13
  def eval_call(script)
12
14
  @definition_file.spidy.instance_eval(script)
@@ -14,40 +16,36 @@ class Spidy::CommandLine
14
16
 
15
17
  def initialize(definition_file)
16
18
  @definition_file = definition_file
17
- raise 'unloaded spidy' if definition_file.spidy.nil?
19
+ fail 'unloaded spidy' if definition_file.spidy.nil?
18
20
  end
19
21
 
20
22
  def each_stdin_lines(name)
21
- STDIN.each_line do |url|
22
- begin
23
- spidy.each(url.strip, name: name, &output)
24
- rescue => e
25
- error_handler.call(e, url)
26
- end
23
+ $stdin.each_line do |url|
24
+ spidy.each(url.strip, name: name, &output)
25
+ rescue StandardError => e
26
+ error_handler.call(e, url)
27
27
  end
28
28
  end
29
29
 
30
30
  def call_stdin_lines(name)
31
- STDIN.each_line do |url|
32
- begin
33
- spidy.call(url.strip, name: name, &output)
34
- rescue => e
35
- error_handler.call(e, url)
36
- end
31
+ $stdin.each_line do |url|
32
+ spidy.call(url.strip, name: name, &output)
33
+ rescue StandardError => e
34
+ error_handler.call(e, url)
37
35
  end
38
36
  end
39
37
 
40
38
  def call(name)
41
- return call_stdin_lines(name) if FileTest.pipe?(STDIN)
42
- spidy.call(name: name, &output) unless FileTest.pipe?(STDIN)
43
- rescue => e
39
+ return call_stdin_lines(name) if FileTest.pipe?($stdin)
40
+ spidy.call(name: name, &output) unless FileTest.pipe?($stdin)
41
+ rescue StandardError => e
44
42
  error_handler.call(e, nil)
45
43
  end
46
44
 
47
45
  def each(name)
48
- return each_stdin_lines(name) if FileTest.pipe?(STDIN)
46
+ return each_stdin_lines(name) if FileTest.pipe?($stdin)
49
47
  spidy.each(name: name, &output)
50
- rescue => e
48
+ rescue StandardError => e
51
49
  error_handler.call(e, nil)
52
50
  end
53
51
 
@@ -63,36 +61,32 @@ class Spidy::CommandLine
63
61
  end
64
62
 
65
63
  def build(name)
66
- build_shell(name)
67
- build_ruby(name)
64
+ File.write("#{name}.sh", build_shell_script(name))
65
+ File.write("#{name}.rb", build_ruby_script)
68
66
  end
69
67
 
70
68
  def build_shell(name)
71
- File.open("#{name}.sh", 'w') do |f|
72
- f.write <<~SHELL
73
- #!/bin/bash
74
- eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
75
- spider example
76
- SHELL
77
- end
69
+ <<~SHELL
70
+ #!/bin/bash
71
+ eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
72
+ spider
73
+ SHELL
78
74
  end
79
75
 
80
- def build_ruby(name)
81
- File.open("#{name}.rb", 'w') do |f|
82
- f.write <<~RUBY
83
- # frozen_string_literal: true
76
+ def build_ruby
77
+ <<~RUBY
78
+ # frozen_string_literal: true
84
79
 
85
- Spidy.define do
86
- spider(:example) do |yielder, connector|
87
- # connector.call(url) do |resource|
88
- # yielder.call(url or resource)
89
- # end
90
- end
80
+ Spidy.define do
81
+ spider(as: :html) do |yielder, connector|
82
+ # connector.call(url) do |resource|
83
+ # yielder.call(url or resource)
84
+ # end
85
+ end
91
86
 
92
- define(:example) do
93
- end
87
+ define(as: :html) do
94
88
  end
95
- RUBY
96
- end
89
+ end
90
+ RUBY
97
91
  end
98
92
  end
@@ -4,7 +4,7 @@
4
4
  # Direct resource ( not network resource )
5
5
  #
6
6
  class Spidy::Connector::Direct
7
- def call(resource, &yielder)
7
+ def call(resource)
8
8
  if block_given?
9
9
  yield resource
10
10
  else
@@ -12,6 +12,5 @@ class Spidy::Connector::Direct
12
12
  end
13
13
  end
14
14
 
15
- def initialize(user_agent:)
16
- end
15
+ def initialize(user_agent:); end
17
16
  end
@@ -14,13 +14,13 @@ class Spidy::Connector::Html
14
14
 
15
15
  attr_reader :agent
16
16
 
17
- def call(url, encoding: nil, retry_count: 5, &yielder)
17
+ def call(url, encoding: nil, &yielder)
18
18
  fail 'url is not specified' if url.blank?
19
19
  if encoding
20
20
  agent.default_encoding = encoding
21
21
  agent.force_default_encoding = true
22
22
  end
23
- connect(url, retry_count, yielder)
23
+ connect(url, yielder)
24
24
  end
25
25
 
26
26
  def refresh!
@@ -30,17 +30,19 @@ class Spidy::Connector::Html
30
30
 
31
31
  private
32
32
 
33
- def connect(url, retry_count, yielder)
33
+ def connect(url, yielder)
34
34
  result = nil
35
35
  agent.get(url) do |page|
36
- fail Spidy::Connector::Retry, object: page, response_code: page.try(:response_code) if page.title == 'Sorry, unable to access page...'
36
+ if page.title == 'Sorry, unable to access page...'
37
+ fail Spidy::Connector::Retry.new(object: page, response_code: page.try(:response_code))
38
+ end
37
39
 
38
40
  result = yielder.call(page)
39
41
  end
40
42
  result
41
43
  rescue Mechanize::ResponseCodeError => e
42
- raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code) if e.response_code == '429'
43
- raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code) if e.response_code == '502'
44
- raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code)
44
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code)) if e.response_code == '429'
45
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code)) if e.response_code == '502'
46
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code))
45
47
  end
46
48
  end
@@ -17,9 +17,9 @@ class Spidy::Connector::Json
17
17
  connect(url, &block)
18
18
  end
19
19
 
20
- def connect(url, retry_count: 5)
21
- OpenURI.open_uri(url, "User-Agent" => @user_agent) { |body| yield JSON.parse(body.read, symbolize_names: true) }
20
+ def connect(url)
21
+ OpenURI.open_uri(url, 'User-Agent' => @user_agent) { |body| yield JSON.parse(body.read, symbolize_names: true) }
22
22
  rescue OpenURI::HTTPError => e
23
- raise Spidy::Connector::Retry, error: e, response_code: e.io.status[0]
23
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.io.status[0])
24
24
  end
25
25
  end
@@ -13,11 +13,11 @@ class Spidy::Connector::Xml
13
13
  end
14
14
 
15
15
  def connect(url, &block)
16
- OpenURI.open_uri(url, "User-Agent" => @user_agent) do |body|
16
+ OpenURI.open_uri(url, 'User-Agent' => @user_agent) do |body|
17
17
  block.call Nokogiri::XML(body.read.gsub(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F]/, ''), url)
18
18
  end
19
19
  rescue OpenURI::HTTPError => e
20
- raise Spidy::Connector::Retry, error: e, response_code: e.io.status[0]
20
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.io.status[0])
21
21
  end
22
22
 
23
23
  def initialize(user_agent:)
@@ -27,7 +27,7 @@ module Spidy::Connector
27
27
  #
28
28
  # error output logger
29
29
  #
30
- DEFAULT_LOGGER = proc { |values| STDERR.puts(values.to_json) }
30
+ DEFAULT_LOGGER = proc { |values| warn(values.to_json) }
31
31
 
32
32
  #
33
33
  # static method
@@ -36,7 +36,9 @@ module Spidy::Connector
36
36
  extend ActiveSupport::Concern
37
37
  class_methods do
38
38
  def call(url, wait_time: 5, logger: Spidy::Connector::DEFAULT_LOGGER, user_agent: Spidy::Connector::USER_AGENT, &block)
39
- ::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(url, &block)
39
+ ::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(
40
+ url, &block
41
+ )
40
42
  end
41
43
  end
42
44
  end
@@ -51,6 +53,7 @@ module Spidy::Connector
51
53
  @object = object
52
54
  @response_code = response_code
53
55
  @error = error
56
+ super(error)
54
57
  end
55
58
  end
56
59
 
@@ -58,13 +61,13 @@ module Spidy::Connector
58
61
  # retry
59
62
  #
60
63
  class RetryableCaller
61
- attr_reader :origin_connector
64
+ attr_reader :origin_connector, :logger, :wait_time
62
65
 
63
- def initialize(connector, logger:, wait_time:)
66
+ def initialize(connector, logger:, wait_time:, retry_attempt_count: 5)
64
67
  @origin_connector = connector
65
68
  @logger = logger
66
69
  @wait_time = wait_time
67
- @retry_attempt_count = 5
70
+ @retry_attempt_count = retry_attempt_count
68
71
  end
69
72
 
70
73
  def call(url, &block)
@@ -73,18 +76,18 @@ module Spidy::Connector
73
76
  end
74
77
 
75
78
  def connect(url, retry_attempt_count: @retry_attempt_count, &block)
76
- @logger.call('connnector.get': url, 'connnector.accessed': Time.current)
77
- @origin_connector.call(url, &block)
79
+ logger.call('connnector.get': url, 'connnector.accessed': Time.current)
80
+ origin_connector.call(url, &block)
78
81
  rescue Spidy::Connector::Retry => e
79
- @logger.call('retry.accessed': Time.current,
80
- 'retry.uri': url,
81
- 'retry.response_code': e.response_code,
82
- 'retry.attempt_count': retry_attempt_count)
82
+ logger.call('retry.accessed': Time.current,
83
+ 'retry.uri': url,
84
+ 'retry.response_code': e.response_code,
85
+ 'retry.attempt_count': retry_attempt_count)
83
86
 
84
87
  retry_attempt_count -= 1
85
88
  if retry_attempt_count.positive?
86
- sleep @wait_time
87
- @origin_connector.refresh! if @origin_connector.respond_to?(:refresh!)
89
+ sleep wait_time
90
+ origin_connector.refresh! if origin_connector.respond_to?(:refresh!)
88
91
  retry
89
92
  end
90
93
  raise e.error
@@ -103,7 +106,7 @@ module Spidy::Connector
103
106
  end
104
107
 
105
108
  def call(url, &block)
106
- Socksify::proxy(socks_proxy[:host], socks_proxy[:port]) do
109
+ Socksify.proxy(socks_proxy[:host], socks_proxy[:port]) do
107
110
  connector.call(url, &block)
108
111
  end
109
112
  end
@@ -141,7 +144,6 @@ module Spidy::Connector
141
144
  fail "Not defined connnector[#{value}]" if connector.nil?
142
145
  return connector if socks_proxy.nil?
143
146
 
144
- tor = TorConnector.new(connector, socks_proxy)
145
- tor
147
+ TorConnector.new(connector, socks_proxy)
146
148
  end
147
149
  end
@@ -33,26 +33,34 @@ module Spidy::Definition
33
33
  spidy = @namespace[:"#{name}_spider"]
34
34
  fail "undefined spidy [#{name}]" if spidy.nil?
35
35
 
36
- spidy.call(source, &yielder)
36
+ if yielder
37
+ spidy.call(source, &yielder)
38
+ else
39
+ Enumerator.new do |enumerate_yielder|
40
+ spidy.call(source, &enumerate_yielder)
41
+ end
42
+ end
37
43
  end
38
44
 
39
45
  def spider(name = :default, connector: nil, as: nil, &define_block)
40
46
  @namespace ||= {}
41
- connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent, socks_proxy: @socks_proxy)
47
+ connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
48
+ socks_proxy: @socks_proxy)
42
49
  @namespace[:"#{name}_spider"] = proc do |source, &yielder|
43
50
  define_block.call(yielder, connector, source)
44
51
  end
45
52
  end
46
53
 
47
54
  def define(name = :default, connector: nil, as: nil, &define_block)
48
- connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent, socks_proxy: @socks_proxy)
55
+ connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
56
+ socks_proxy: @socks_proxy)
49
57
  binder_base = Spidy::Binder.const_get(as.to_s.classify)
50
58
  @namespace ||= {}
51
- @namespace[:"#{name}_scraper"] = Class.new(Spidy::DefineObject) do
59
+ @namespace[:"#{name}_scraper"] = Class.new(Spidy::DefinitionObject) do
52
60
  extend binder_base
53
61
  class_eval(&define_block)
54
62
  define_singleton_method(:call) do |source, &yielder|
55
- yielder = lambda { |result| break result } if yielder.nil?
63
+ yielder = ->(result) { break result } if yielder.nil?
56
64
  connection_yielder = lambda do |page|
57
65
  yielder.call(new(page, source))
58
66
  end
@@ -4,8 +4,7 @@
4
4
  # spidy interface binding
5
5
  #
6
6
  class Spidy::DefinitionFile
7
- attr_reader :path
8
- attr_reader :spidy
7
+ attr_reader :path, :spidy
9
8
 
10
9
  def self.open(filepath)
11
10
  object = new(filepath)
@@ -15,7 +14,7 @@ class Spidy::DefinitionFile
15
14
 
16
15
  # rubocop:disable Security/Eval
17
16
  def eval_definition
18
- @spidy = eval(File.open(path).read) if path
17
+ @spidy = eval(File.read(path)) if path
19
18
  end
20
19
  # rubocop:enable Security/Eval
21
20
 
@@ -1,4 +1,9 @@
1
- class Spidy::DefineObject
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # An object that represents the scraper defined by define block.
5
+ #
6
+ class Spidy::DefinitionObject
2
7
  class << self
3
8
  attr_reader :attribute_names
4
9
  end
@@ -14,6 +19,6 @@ class Spidy::DefineObject
14
19
  end
15
20
 
16
21
  def to_h
17
- self.class.attribute_names.map { |name| [name, send(name)] }.to_h
22
+ self.class.attribute_names.to_h { |name| [name, send(name)] }
18
23
  end
19
24
  end
data/lib/spidy/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spidy
4
- VERSION = '0.3.9'
4
+ VERSION = '0.3.12'
5
5
  end
data/lib/spidy.rb CHANGED
@@ -17,9 +17,9 @@ module Spidy
17
17
  autoload :Console
18
18
  autoload :Definition
19
19
  autoload :DefinitionFile
20
+ autoload :DefinitionObject
20
21
  autoload :Binder
21
22
  autoload :Connector
22
- autoload :DefineObject
23
23
 
24
24
  def self.shell(filepath = nil)
25
25
  Spidy::Shell.new(filepath)
data/spidy.gemspec CHANGED
@@ -25,17 +25,20 @@ Gem::Specification.new do |spec|
25
25
  spec.require_paths = ['lib']
26
26
 
27
27
  spec.add_development_dependency 'bundler', '~> 2.0'
28
+ spec.add_development_dependency 'capybara_discoball'
29
+ spec.add_development_dependency 'ffaker'
28
30
  spec.add_development_dependency 'pry'
29
31
  spec.add_development_dependency 'rake', '~> 13.0'
30
32
  spec.add_development_dependency 'rspec', '~> 3.0'
31
- spec.add_development_dependency 'ffaker'
32
33
  spec.add_development_dependency 'rspec-command'
33
- spec.add_development_dependency 'capybara_discoball'
34
34
  spec.add_development_dependency 'sinatra'
35
35
 
36
- spec.add_runtime_dependency 'tor'
37
36
  spec.add_runtime_dependency 'activesupport'
38
37
  spec.add_runtime_dependency 'mechanize'
39
- spec.add_runtime_dependency 'socksify'
40
38
  spec.add_runtime_dependency 'pry'
39
+ spec.add_runtime_dependency 'socksify'
40
+ spec.add_runtime_dependency 'tor'
41
+ spec.metadata = {
42
+ 'rubygems_mfa_required' => 'true'
43
+ }
41
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - aileron
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-12-23 00:00:00.000000000 Z
11
+ date: 2022-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: pry
28
+ name: capybara_discoball
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,63 +39,63 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rake
42
+ name: ffaker
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '13.0'
47
+ version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '13.0'
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: rspec
56
+ name: pry
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
- version: '3.0'
61
+ version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '3.0'
68
+ version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ffaker
70
+ name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: '13.0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: '13.0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: rspec-command
84
+ name: rspec
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '0'
89
+ version: '3.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">="
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '0'
96
+ version: '3.0'
97
97
  - !ruby/object:Gem::Dependency
98
- name: capybara_discoball
98
+ name: rspec-command
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - ">="
@@ -123,7 +123,7 @@ dependencies:
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
- name: tor
126
+ name: activesupport
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - ">="
@@ -137,7 +137,7 @@ dependencies:
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  - !ruby/object:Gem::Dependency
140
- name: activesupport
140
+ name: mechanize
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
143
  - - ">="
@@ -151,7 +151,7 @@ dependencies:
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
153
  - !ruby/object:Gem::Dependency
154
- name: mechanize
154
+ name: pry
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
157
  - - ">="
@@ -179,7 +179,7 @@ dependencies:
179
179
  - !ruby/object:Gem::Version
180
180
  version: '0'
181
181
  - !ruby/object:Gem::Dependency
182
- name: pry
182
+ name: tor
183
183
  requirement: !ruby/object:Gem::Requirement
184
184
  requirements:
185
185
  - - ">="
@@ -222,6 +222,7 @@ files:
222
222
  - exe/spidy
223
223
  - lib/spidy.rb
224
224
  - lib/spidy/binder.rb
225
+ - lib/spidy/binder/error.rb
225
226
  - lib/spidy/binder/html.rb
226
227
  - lib/spidy/binder/json.rb
227
228
  - lib/spidy/binder/xml.rb
@@ -232,9 +233,9 @@ files:
232
233
  - lib/spidy/connector/json.rb
233
234
  - lib/spidy/connector/xml.rb
234
235
  - lib/spidy/console.rb
235
- - lib/spidy/define_object.rb
236
236
  - lib/spidy/definition.rb
237
237
  - lib/spidy/definition_file.rb
238
+ - lib/spidy/definition_object.rb
238
239
  - lib/spidy/shell.rb
239
240
  - lib/spidy/spider.rb
240
241
  - lib/spidy/version.rb
@@ -243,7 +244,8 @@ files:
243
244
  homepage: https://github.com/aileron-inc/spidy
244
245
  licenses:
245
246
  - MIT
246
- metadata: {}
247
+ metadata:
248
+ rubygems_mfa_required: 'true'
247
249
  post_install_message:
248
250
  rdoc_options: []
249
251
  require_paths: