spidy 0.3.9 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 75215453c834a8e481b27cf4377235cc97ce6a6e4eff142a11743e68ee4982b4
4
- data.tar.gz: 25f3b14ad6f31b580396458c8075167f24fd4a2f6e7ff98947338ecd0588eb9d
3
+ metadata.gz: 76cb60ea985d1a663f24b7b024198d222756376bd9dd979a032c46ba39b16548
4
+ data.tar.gz: ff2e7f056f7ad5afe06df90adf0bb2e438c696472cde50c8d5758b2f9801684e
5
5
  SHA512:
6
- metadata.gz: 447b7152b807c7985e16b7b403d27f9f7b949264577e8e4dc11a52358cb9af49510696d29166adb60bb5b87158aa8d2c10faf7c810f8eced4c29f9eed8bb493a
7
- data.tar.gz: 55d82e5c495a7e5a0fd57b466e08ea072e3a712829b2d83d550b107147f30e3eaff54ed8dcec69163bf6f74e0c32990051af9de0d044cc19eadf39ecc749d003
6
+ metadata.gz: a721848978135752ddcfe3da30a293317a4852b41dc99209019ae71960538fe448ec4ad54da661e0b99edef3fcb85a84b095b99ddbbba9b628fdd4ac1be2f23c
7
+ data.tar.gz: a156f47f317cd4f1f0a66a13ac5102073723f139c5b797c8dc56d7dbdd41e342cb1ad1a6814812563033c68f448c4d57775c0edf300456dd164b90211632737e
data/.rubocop.yml CHANGED
@@ -1,7 +1,8 @@
1
1
  inherit_from: .rubocop_todo.yml
2
2
  AllCops:
3
+ TargetRubyVersion: 3.0.2
4
+ NewCops: enable
3
5
  DisplayCopNames: true
4
- TargetRubyVersion: 2.6
5
6
 
6
7
  Style/ClassAndModuleChildren:
7
8
  Enabled: false
@@ -9,7 +10,7 @@ Style/ClassAndModuleChildren:
9
10
  Style/SignalException:
10
11
  EnforcedStyle: semantic
11
12
 
12
- Naming/UncommunicativeMethodParamName:
13
+ Naming/MethodParameterName:
13
14
  AllowedNames:
14
15
  - as
15
16
 
@@ -17,8 +18,11 @@ Metrics/AbcSize:
17
18
  Max: 21
18
19
  Exclude:
19
20
 
21
+ Metrics/MethodLength:
22
+ Max: 15
23
+
20
24
  Metrics/LineLength:
21
- Max: 120
25
+ Max: 130
22
26
 
23
27
  Metrics/BlockLength:
24
28
  Max: 120
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- spidy (0.3.9)
4
+ spidy (0.3.10)
5
5
  activesupport
6
6
  mechanize
7
7
  pry
@@ -32,7 +32,7 @@ GEM
32
32
  coderay (1.1.3)
33
33
  concurrent-ruby (1.1.9)
34
34
  connection_pool (2.2.5)
35
- diff-lcs (1.4.4)
35
+ diff-lcs (1.5.0)
36
36
  domain_name (0.5.20190701)
37
37
  unf (>= 0.0.5, < 1.0.0)
38
38
  ffaker (2.20.0)
@@ -57,7 +57,6 @@ GEM
57
57
  mime-types-data (~> 3.2015)
58
58
  mime-types-data (3.2021.1115)
59
59
  mini_mime (1.1.2)
60
- mini_portile2 (2.6.1)
61
60
  minitest (5.15.0)
62
61
  mixlib-shellout (2.4.4)
63
62
  mustermann (1.1.1)
@@ -65,8 +64,7 @@ GEM
65
64
  net-http-digest_auth (1.4.1)
66
65
  net-http-persistent (4.0.1)
67
66
  connection_pool (~> 2.2)
68
- nokogiri (1.12.5)
69
- mini_portile2 (~> 2.6.1)
67
+ nokogiri (1.12.5-arm64-darwin)
70
68
  racc (~> 1.4)
71
69
  pry (0.14.1)
72
70
  coderay (~> 1.1)
@@ -121,7 +119,7 @@ GEM
121
119
  nokogiri (~> 1.8)
122
120
 
123
121
  PLATFORMS
124
- ruby
122
+ arm64-darwin-20
125
123
 
126
124
  DEPENDENCIES
127
125
  bundler (~> 2.0)
@@ -1,7 +1,7 @@
1
-
1
+ # frozen_string_literal: true
2
2
 
3
3
  Spidy.define do
4
- url_to_params = ->(url) {
4
+ url_to_params = lambda { |url|
5
5
  uri = URI.parse(url)
6
6
  params = URI.decode_www_form(uri.query).to_h if uri.query.present?
7
7
  params if params.present?
@@ -13,41 +13,41 @@ Spidy.define do
13
13
 
14
14
  limit_page = 3
15
15
  per_page = 25
16
- yielder.call(Nokogiri::HTML::Builder.new { |doc|
17
- doc.html {
18
- doc.body {
19
- doc.span.bold {
20
- doc.text "Hello world"
21
- }
22
- doc.main {
23
- (page * per_page + 1).upto((page + 1) * per_page).each do |i|
16
+ yielder.call(Nokogiri::HTML::Builder.new do |doc|
17
+ doc.html do
18
+ doc.body do
19
+ doc.span.bold do
20
+ doc.text 'Hello world'
21
+ end
22
+ doc.main do
23
+ ((page * per_page) + 1).upto((page + 1) * per_page).each do |i|
24
24
  doc.a("page #{i}", href: "http://localhost/?id=#{i}")
25
25
  end
26
- }
26
+ end
27
27
  doc.a('NEXT', href: "http://localhost/?page=#{page + 1}", class: 'next') if page < limit_page
28
- }
29
- }
30
- }.doc)
28
+ end
29
+ end
30
+ end.doc)
31
31
  }
32
32
 
33
33
  detail_page = proc { |url, &yielder|
34
34
  params = url_to_params.call(url)
35
35
  id = params['id']
36
36
 
37
- yielder.call(Nokogiri::HTML::Builder.new { |doc|
38
- doc.html {
39
- doc.body {
40
- doc.span.bold {
41
- doc.text "Hello world"
42
- }
37
+ yielder.call(Nokogiri::HTML::Builder.new do |doc|
38
+ doc.html do
39
+ doc.body do
40
+ doc.span.bold do
41
+ doc.text 'Hello world'
42
+ end
43
43
  doc.h1("title_#{id}", id: 'title')
44
44
  doc.main("body_#{id}", id: 'body')
45
45
  doc.div.sub do
46
46
  doc.span.name('testtest')
47
47
  end
48
- }
49
- }
50
- }.doc)
48
+ end
49
+ end
50
+ end.doc)
51
51
  }
52
52
 
53
53
  define(as: :html, connector: detail_page) do
data/example/proxy.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Spidy.define do
2
4
  user_agent 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0'
3
5
  socks_proxy '127.0.0.1', 9050
data/example/retry.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  Spidy.define do
2
4
  spider(as: :json) do |yielder, connector|
3
5
  connector.call('https://httpbin.org/status/500') do |json|
data/example/wikip.rb CHANGED
@@ -11,11 +11,8 @@ Spidy.define do
11
11
 
12
12
  define(:infobox, as: :html, connector: :direct) do
13
13
  let(:columns) do
14
- html.search('tr').each do |tr|
15
- {
16
- name: tr.at('th')&.text,
17
- value: tr.at('td')&.text
18
- }
14
+ html.search('tr').map do |tr|
15
+ { name: tr.at('th')&.text, value: tr.at('td')&.text }
19
16
  end
20
17
  end
21
18
  end
data/exe/spidy CHANGED
@@ -6,10 +6,10 @@ require 'pry'
6
6
 
7
7
  if ARGV[1].blank?
8
8
  case ARGV[0]
9
- when 'version' then STDOUT.puts(Spidy::VERSION)
9
+ when 'version' then $stdout.puts(Spidy::VERSION)
10
10
  when 'console' then Spidy.shell.interactive
11
11
  else
12
- STDOUT.puts 'usage: spidy [version console]'
12
+ $stdout.puts 'usage: spidy [version console]'
13
13
  end
14
14
  else
15
15
  case ARGV[0]
@@ -19,6 +19,6 @@ else
19
19
  when 'each' then Spidy.shell(ARGV[1]).each(ARGV[2])
20
20
  when 'eval' then Spidy.shell(ARGV[1]).eval_call(ARGV[2])
21
21
  else
22
- STDOUT.puts 'usage: spidy [console function call each run] [file]'
22
+ $stdout.puts 'usage: spidy [console function call each run] [file]'
23
23
  end
24
24
  end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Spidy::Binder::Error < StandardError
4
+ end
@@ -17,9 +17,10 @@ module Spidy::Binder::Html
17
17
  instance_exec(&block)
18
18
  end
19
19
  rescue StandardError => e
20
- fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
20
+ raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
+
23
24
  def self.extended(obj)
24
25
  obj.alias_method :html, :resource
25
26
  end
@@ -17,9 +17,10 @@ module Spidy::Binder::Json
17
17
  instance_exec(&block)
18
18
  end
19
19
  rescue StandardError => e
20
- fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
20
+ raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
+
23
24
  def self.extended(obj)
24
25
  obj.alias_method :json, :resource
25
26
  end
@@ -17,9 +17,10 @@ module Spidy::Binder::Xml
17
17
  instance_exec(&block)
18
18
  end
19
19
  rescue StandardError => e
20
- fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
20
+ raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
+
23
24
  def self.extended(obj)
24
25
  obj.alias_method :xml, :resource
25
26
  end
data/lib/spidy/binder.rb CHANGED
@@ -5,6 +5,7 @@
5
5
  #
6
6
  module Spidy::Binder
7
7
  extend ActiveSupport::Autoload
8
+ autoload :Error
8
9
  autoload :Json
9
10
  autoload :Html
10
11
  autoload :Xml
@@ -5,8 +5,10 @@
5
5
  #
6
6
  class Spidy::CommandLine
7
7
  delegate :spidy, to: :@definition_file
8
- class_attribute :output, default: (proc { |result| STDOUT.puts(result.to_s) })
9
- class_attribute :error_handler, default: (proc { |e, url| STDERR.puts({ url: url, message: e.message, backtrace: e.backtrace }.to_json) })
8
+ class_attribute :output, default: (proc { |result| $stdout.puts(result.to_s) })
9
+ class_attribute :error_handler, default: (proc { |e, url|
10
+ warn({ url: url, message: e.message, backtrace: e.backtrace }.to_json)
11
+ })
10
12
 
11
13
  def eval_call(script)
12
14
  @definition_file.spidy.instance_eval(script)
@@ -14,40 +16,36 @@ class Spidy::CommandLine
14
16
 
15
17
  def initialize(definition_file)
16
18
  @definition_file = definition_file
17
- raise 'unloaded spidy' if definition_file.spidy.nil?
19
+ fail 'unloaded spidy' if definition_file.spidy.nil?
18
20
  end
19
21
 
20
22
  def each_stdin_lines(name)
21
- STDIN.each_line do |url|
22
- begin
23
- spidy.each(url.strip, name: name, &output)
24
- rescue => e
25
- error_handler.call(e, url)
26
- end
23
+ $stdin.each_line do |url|
24
+ spidy.each(url.strip, name: name, &output)
25
+ rescue StandardError => e
26
+ error_handler.call(e, url)
27
27
  end
28
28
  end
29
29
 
30
30
  def call_stdin_lines(name)
31
- STDIN.each_line do |url|
32
- begin
33
- spidy.call(url.strip, name: name, &output)
34
- rescue => e
35
- error_handler.call(e, url)
36
- end
31
+ $stdin.each_line do |url|
32
+ spidy.call(url.strip, name: name, &output)
33
+ rescue StandardError => e
34
+ error_handler.call(e, url)
37
35
  end
38
36
  end
39
37
 
40
38
  def call(name)
41
- return call_stdin_lines(name) if FileTest.pipe?(STDIN)
42
- spidy.call(name: name, &output) unless FileTest.pipe?(STDIN)
43
- rescue => e
39
+ return call_stdin_lines(name) if FileTest.pipe?($stdin)
40
+ spidy.call(name: name, &output) unless FileTest.pipe?($stdin)
41
+ rescue StandardError => e
44
42
  error_handler.call(e, nil)
45
43
  end
46
44
 
47
45
  def each(name)
48
- return each_stdin_lines(name) if FileTest.pipe?(STDIN)
46
+ return each_stdin_lines(name) if FileTest.pipe?($stdin)
49
47
  spidy.each(name: name, &output)
50
- rescue => e
48
+ rescue StandardError => e
51
49
  error_handler.call(e, nil)
52
50
  end
53
51
 
@@ -63,36 +61,32 @@ class Spidy::CommandLine
63
61
  end
64
62
 
65
63
  def build(name)
66
- build_shell(name)
67
- build_ruby(name)
64
+ File.write("#{name}.sh", build_shell_script(name))
65
+ File.write("#{name}.rb", build_ruby_script)
68
66
  end
69
67
 
70
68
  def build_shell(name)
71
- File.open("#{name}.sh", 'w') do |f|
72
- f.write <<~SHELL
73
- #!/bin/bash
74
- eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
75
- spider example
76
- SHELL
77
- end
69
+ <<~SHELL
70
+ #!/bin/bash
71
+ eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
72
+ spider
73
+ SHELL
78
74
  end
79
75
 
80
- def build_ruby(name)
81
- File.open("#{name}.rb", 'w') do |f|
82
- f.write <<~RUBY
83
- # frozen_string_literal: true
76
+ def build_ruby
77
+ <<~RUBY
78
+ # frozen_string_literal: true
84
79
 
85
- Spidy.define do
86
- spider(:example) do |yielder, connector|
87
- # connector.call(url) do |resource|
88
- # yielder.call(url or resource)
89
- # end
90
- end
80
+ Spidy.define do
81
+ spider(as: :html) do |yielder, connector|
82
+ # connector.call(url) do |resource|
83
+ # yielder.call(url or resource)
84
+ # end
85
+ end
91
86
 
92
- define(:example) do
93
- end
87
+ define(as: :html) do
94
88
  end
95
- RUBY
96
- end
89
+ end
90
+ RUBY
97
91
  end
98
92
  end
@@ -4,7 +4,7 @@
4
4
  # Direct resource ( not network resource )
5
5
  #
6
6
  class Spidy::Connector::Direct
7
- def call(resource, &yielder)
7
+ def call(resource)
8
8
  if block_given?
9
9
  yield resource
10
10
  else
@@ -12,6 +12,5 @@ class Spidy::Connector::Direct
12
12
  end
13
13
  end
14
14
 
15
- def initialize(user_agent:)
16
- end
15
+ def initialize(user_agent:); end
17
16
  end
@@ -14,13 +14,13 @@ class Spidy::Connector::Html
14
14
 
15
15
  attr_reader :agent
16
16
 
17
- def call(url, encoding: nil, retry_count: 5, &yielder)
17
+ def call(url, encoding: nil, &yielder)
18
18
  fail 'url is not specified' if url.blank?
19
19
  if encoding
20
20
  agent.default_encoding = encoding
21
21
  agent.force_default_encoding = true
22
22
  end
23
- connect(url, retry_count, yielder)
23
+ connect(url, yielder)
24
24
  end
25
25
 
26
26
  def refresh!
@@ -30,17 +30,19 @@ class Spidy::Connector::Html
30
30
 
31
31
  private
32
32
 
33
- def connect(url, retry_count, yielder)
33
+ def connect(url, yielder)
34
34
  result = nil
35
35
  agent.get(url) do |page|
36
- fail Spidy::Connector::Retry, object: page, response_code: page.try(:response_code) if page.title == 'Sorry, unable to access page...'
36
+ if page.title == 'Sorry, unable to access page...'
37
+ fail Spidy::Connector::Retry.new(object: page, response_code: page.try(:response_code))
38
+ end
37
39
 
38
40
  result = yielder.call(page)
39
41
  end
40
42
  result
41
43
  rescue Mechanize::ResponseCodeError => e
42
- raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code) if e.response_code == '429'
43
- raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code) if e.response_code == '502'
44
- raise Spidy::Connector::Retry, error: e, response_code: e.try(:response_code)
44
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code)) if e.response_code == '429'
45
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code)) if e.response_code == '502'
46
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code))
45
47
  end
46
48
  end
@@ -17,9 +17,9 @@ class Spidy::Connector::Json
17
17
  connect(url, &block)
18
18
  end
19
19
 
20
- def connect(url, retry_count: 5)
21
- OpenURI.open_uri(url, "User-Agent" => @user_agent) { |body| yield JSON.parse(body.read, symbolize_names: true) }
20
+ def connect(url)
21
+ OpenURI.open_uri(url, 'User-Agent' => @user_agent) { |body| yield JSON.parse(body.read, symbolize_names: true) }
22
22
  rescue OpenURI::HTTPError => e
23
- raise Spidy::Connector::Retry, error: e, response_code: e.io.status[0]
23
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.io.status[0])
24
24
  end
25
25
  end
@@ -13,11 +13,11 @@ class Spidy::Connector::Xml
13
13
  end
14
14
 
15
15
  def connect(url, &block)
16
- OpenURI.open_uri(url, "User-Agent" => @user_agent) do |body|
16
+ OpenURI.open_uri(url, 'User-Agent' => @user_agent) do |body|
17
17
  block.call Nokogiri::XML(body.read.gsub(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F]/, ''), url)
18
18
  end
19
19
  rescue OpenURI::HTTPError => e
20
- raise Spidy::Connector::Retry, error: e, response_code: e.io.status[0]
20
+ raise Spidy::Connector::Retry.new(error: e, response_code: e.io.status[0])
21
21
  end
22
22
 
23
23
  def initialize(user_agent:)
@@ -27,7 +27,7 @@ module Spidy::Connector
27
27
  #
28
28
  # error output logger
29
29
  #
30
- DEFAULT_LOGGER = proc { |values| STDERR.puts(values.to_json) }
30
+ DEFAULT_LOGGER = proc { |values| warn(values.to_json) }
31
31
 
32
32
  #
33
33
  # static method
@@ -36,7 +36,9 @@ module Spidy::Connector
36
36
  extend ActiveSupport::Concern
37
37
  class_methods do
38
38
  def call(url, wait_time: 5, logger: Spidy::Connector::DEFAULT_LOGGER, user_agent: Spidy::Connector::USER_AGENT, &block)
39
- ::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(url, &block)
39
+ ::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(
40
+ url, &block
41
+ )
40
42
  end
41
43
  end
42
44
  end
@@ -51,6 +53,7 @@ module Spidy::Connector
51
53
  @object = object
52
54
  @response_code = response_code
53
55
  @error = error
56
+ super(error)
54
57
  end
55
58
  end
56
59
 
@@ -58,13 +61,13 @@ module Spidy::Connector
58
61
  # retry
59
62
  #
60
63
  class RetryableCaller
61
- attr_reader :origin_connector
64
+ attr_reader :origin_connector, :logger, :wait_time
62
65
 
63
- def initialize(connector, logger:, wait_time:)
66
+ def initialize(connector, logger:, wait_time:, retry_attempt_count: 5)
64
67
  @origin_connector = connector
65
68
  @logger = logger
66
69
  @wait_time = wait_time
67
- @retry_attempt_count = 5
70
+ @retry_attempt_count = retry_attempt_count
68
71
  end
69
72
 
70
73
  def call(url, &block)
@@ -73,18 +76,18 @@ module Spidy::Connector
73
76
  end
74
77
 
75
78
  def connect(url, retry_attempt_count: @retry_attempt_count, &block)
76
- @logger.call('connnector.get': url, 'connnector.accessed': Time.current)
77
- @origin_connector.call(url, &block)
79
+ logger.call('connnector.get': url, 'connnector.accessed': Time.current)
80
+ origin_connector.call(url, &block)
78
81
  rescue Spidy::Connector::Retry => e
79
- @logger.call('retry.accessed': Time.current,
80
- 'retry.uri': url,
81
- 'retry.response_code': e.response_code,
82
- 'retry.attempt_count': retry_attempt_count)
82
+ logger.call('retry.accessed': Time.current,
83
+ 'retry.uri': url,
84
+ 'retry.response_code': e.response_code,
85
+ 'retry.attempt_count': retry_attempt_count)
83
86
 
84
87
  retry_attempt_count -= 1
85
88
  if retry_attempt_count.positive?
86
- sleep @wait_time
87
- @origin_connector.refresh! if @origin_connector.respond_to?(:refresh!)
89
+ sleep wait_time
90
+ origin_connector.refresh! if origin_connector.respond_to?(:refresh!)
88
91
  retry
89
92
  end
90
93
  raise e.error
@@ -103,7 +106,7 @@ module Spidy::Connector
103
106
  end
104
107
 
105
108
  def call(url, &block)
106
- Socksify::proxy(socks_proxy[:host], socks_proxy[:port]) do
109
+ Socksify.proxy(socks_proxy[:host], socks_proxy[:port]) do
107
110
  connector.call(url, &block)
108
111
  end
109
112
  end
@@ -141,7 +144,6 @@ module Spidy::Connector
141
144
  fail "Not defined connnector[#{value}]" if connector.nil?
142
145
  return connector if socks_proxy.nil?
143
146
 
144
- tor = TorConnector.new(connector, socks_proxy)
145
- tor
147
+ TorConnector.new(connector, socks_proxy)
146
148
  end
147
149
  end
@@ -33,26 +33,34 @@ module Spidy::Definition
33
33
  spidy = @namespace[:"#{name}_spider"]
34
34
  fail "undefined spidy [#{name}]" if spidy.nil?
35
35
 
36
- spidy.call(source, &yielder)
36
+ if yielder
37
+ spidy.call(source, &yielder)
38
+ else
39
+ Enumerator.new do |enumerate_yielder|
40
+ spidy.call(source, &enumerate_yielder)
41
+ end
42
+ end
37
43
  end
38
44
 
39
45
  def spider(name = :default, connector: nil, as: nil, &define_block)
40
46
  @namespace ||= {}
41
- connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent, socks_proxy: @socks_proxy)
47
+ connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
48
+ socks_proxy: @socks_proxy)
42
49
  @namespace[:"#{name}_spider"] = proc do |source, &yielder|
43
50
  define_block.call(yielder, connector, source)
44
51
  end
45
52
  end
46
53
 
47
54
  def define(name = :default, connector: nil, as: nil, &define_block)
48
- connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent, socks_proxy: @socks_proxy)
55
+ connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
56
+ socks_proxy: @socks_proxy)
49
57
  binder_base = Spidy::Binder.const_get(as.to_s.classify)
50
58
  @namespace ||= {}
51
- @namespace[:"#{name}_scraper"] = Class.new(Spidy::DefineObject) do
59
+ @namespace[:"#{name}_scraper"] = Class.new(Spidy::DefinitionObject) do
52
60
  extend binder_base
53
61
  class_eval(&define_block)
54
62
  define_singleton_method(:call) do |source, &yielder|
55
- yielder = lambda { |result| break result } if yielder.nil?
63
+ yielder = ->(result) { break result } if yielder.nil?
56
64
  connection_yielder = lambda do |page|
57
65
  yielder.call(new(page, source))
58
66
  end
@@ -4,8 +4,7 @@
4
4
  # spidy interface binding
5
5
  #
6
6
  class Spidy::DefinitionFile
7
- attr_reader :path
8
- attr_reader :spidy
7
+ attr_reader :path, :spidy
9
8
 
10
9
  def self.open(filepath)
11
10
  object = new(filepath)
@@ -15,7 +14,7 @@ class Spidy::DefinitionFile
15
14
 
16
15
  # rubocop:disable Security/Eval
17
16
  def eval_definition
18
- @spidy = eval(File.open(path).read) if path
17
+ @spidy = eval(File.read(path)) if path
19
18
  end
20
19
  # rubocop:enable Security/Eval
21
20
 
@@ -1,4 +1,9 @@
1
- class Spidy::DefineObject
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # An object that represents the scraper defined by define block.
5
+ #
6
+ class Spidy::DefinitionObject
2
7
  class << self
3
8
  attr_reader :attribute_names
4
9
  end
@@ -14,6 +19,6 @@ class Spidy::DefineObject
14
19
  end
15
20
 
16
21
  def to_h
17
- self.class.attribute_names.map { |name| [name, send(name)] }.to_h
22
+ self.class.attribute_names.to_h { |name| [name, send(name)] }
18
23
  end
19
24
  end
data/lib/spidy/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spidy
4
- VERSION = '0.3.9'
4
+ VERSION = '0.3.12'
5
5
  end
data/lib/spidy.rb CHANGED
@@ -17,9 +17,9 @@ module Spidy
17
17
  autoload :Console
18
18
  autoload :Definition
19
19
  autoload :DefinitionFile
20
+ autoload :DefinitionObject
20
21
  autoload :Binder
21
22
  autoload :Connector
22
- autoload :DefineObject
23
23
 
24
24
  def self.shell(filepath = nil)
25
25
  Spidy::Shell.new(filepath)
data/spidy.gemspec CHANGED
@@ -25,17 +25,20 @@ Gem::Specification.new do |spec|
25
25
  spec.require_paths = ['lib']
26
26
 
27
27
  spec.add_development_dependency 'bundler', '~> 2.0'
28
+ spec.add_development_dependency 'capybara_discoball'
29
+ spec.add_development_dependency 'ffaker'
28
30
  spec.add_development_dependency 'pry'
29
31
  spec.add_development_dependency 'rake', '~> 13.0'
30
32
  spec.add_development_dependency 'rspec', '~> 3.0'
31
- spec.add_development_dependency 'ffaker'
32
33
  spec.add_development_dependency 'rspec-command'
33
- spec.add_development_dependency 'capybara_discoball'
34
34
  spec.add_development_dependency 'sinatra'
35
35
 
36
- spec.add_runtime_dependency 'tor'
37
36
  spec.add_runtime_dependency 'activesupport'
38
37
  spec.add_runtime_dependency 'mechanize'
39
- spec.add_runtime_dependency 'socksify'
40
38
  spec.add_runtime_dependency 'pry'
39
+ spec.add_runtime_dependency 'socksify'
40
+ spec.add_runtime_dependency 'tor'
41
+ spec.metadata = {
42
+ 'rubygems_mfa_required' => 'true'
43
+ }
41
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - aileron
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-12-23 00:00:00.000000000 Z
11
+ date: 2022-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: pry
28
+ name: capybara_discoball
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,63 +39,63 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rake
42
+ name: ffaker
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '13.0'
47
+ version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '13.0'
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: rspec
56
+ name: pry
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
- version: '3.0'
61
+ version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '3.0'
68
+ version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ffaker
70
+ name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: '13.0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: '13.0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: rspec-command
84
+ name: rspec
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '0'
89
+ version: '3.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">="
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '0'
96
+ version: '3.0'
97
97
  - !ruby/object:Gem::Dependency
98
- name: capybara_discoball
98
+ name: rspec-command
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - ">="
@@ -123,7 +123,7 @@ dependencies:
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
- name: tor
126
+ name: activesupport
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - ">="
@@ -137,7 +137,7 @@ dependencies:
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  - !ruby/object:Gem::Dependency
140
- name: activesupport
140
+ name: mechanize
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
143
  - - ">="
@@ -151,7 +151,7 @@ dependencies:
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
153
  - !ruby/object:Gem::Dependency
154
- name: mechanize
154
+ name: pry
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
157
  - - ">="
@@ -179,7 +179,7 @@ dependencies:
179
179
  - !ruby/object:Gem::Version
180
180
  version: '0'
181
181
  - !ruby/object:Gem::Dependency
182
- name: pry
182
+ name: tor
183
183
  requirement: !ruby/object:Gem::Requirement
184
184
  requirements:
185
185
  - - ">="
@@ -222,6 +222,7 @@ files:
222
222
  - exe/spidy
223
223
  - lib/spidy.rb
224
224
  - lib/spidy/binder.rb
225
+ - lib/spidy/binder/error.rb
225
226
  - lib/spidy/binder/html.rb
226
227
  - lib/spidy/binder/json.rb
227
228
  - lib/spidy/binder/xml.rb
@@ -232,9 +233,9 @@ files:
232
233
  - lib/spidy/connector/json.rb
233
234
  - lib/spidy/connector/xml.rb
234
235
  - lib/spidy/console.rb
235
- - lib/spidy/define_object.rb
236
236
  - lib/spidy/definition.rb
237
237
  - lib/spidy/definition_file.rb
238
+ - lib/spidy/definition_object.rb
238
239
  - lib/spidy/shell.rb
239
240
  - lib/spidy/spider.rb
240
241
  - lib/spidy/version.rb
@@ -243,7 +244,8 @@ files:
243
244
  homepage: https://github.com/aileron-inc/spidy
244
245
  licenses:
245
246
  - MIT
246
- metadata: {}
247
+ metadata:
248
+ rubygems_mfa_required: 'true'
247
249
  post_install_message:
248
250
  rdoc_options: []
249
251
  require_paths: