spidy 0.3.6 → 0.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -3
- data/.ruby-version +1 -1
- data/Gemfile.lock +67 -65
- data/example/master_detail.rb +23 -23
- data/example/proxy.rb +2 -0
- data/example/retry.rb +2 -0
- data/example/wikip.rb +2 -5
- data/exe/spidy +4 -3
- data/lib/spidy/binder/error.rb +4 -0
- data/lib/spidy/binder/html.rb +7 -5
- data/lib/spidy/binder/json.rb +6 -4
- data/lib/spidy/binder/xml.rb +7 -5
- data/lib/spidy/binder.rb +1 -44
- data/lib/spidy/command_line.rb +41 -43
- data/lib/spidy/connector/direct.rb +2 -3
- data/lib/spidy/connector/html.rb +9 -7
- data/lib/spidy/connector/json.rb +3 -3
- data/lib/spidy/connector/xml.rb +2 -2
- data/lib/spidy/connector.rb +18 -16
- data/lib/spidy/definition.rb +22 -16
- data/lib/spidy/definition_file.rb +2 -3
- data/lib/spidy/definition_object.rb +24 -0
- data/lib/spidy/shell.rb +1 -1
- data/lib/spidy/version.rb +1 -1
- data/lib/spidy.rb +1 -0
- data/spidy.gemspec +8 -5
- metadata +33 -30
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 76cb60ea985d1a663f24b7b024198d222756376bd9dd979a032c46ba39b16548
|
|
4
|
+
data.tar.gz: ff2e7f056f7ad5afe06df90adf0bb2e438c696472cde50c8d5758b2f9801684e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a721848978135752ddcfe3da30a293317a4852b41dc99209019ae71960538fe448ec4ad54da661e0b99edef3fcb85a84b095b99ddbbba9b628fdd4ac1be2f23c
|
|
7
|
+
data.tar.gz: a156f47f317cd4f1f0a66a13ac5102073723f139c5b797c8dc56d7dbdd41e342cb1ad1a6814812563033c68f448c4d57775c0edf300456dd164b90211632737e
|
data/.rubocop.yml
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
inherit_from: .rubocop_todo.yml
|
|
2
2
|
AllCops:
|
|
3
|
+
TargetRubyVersion: 3.0.2
|
|
4
|
+
NewCops: enable
|
|
3
5
|
DisplayCopNames: true
|
|
4
|
-
TargetRubyVersion: 2.6
|
|
5
6
|
|
|
6
7
|
Style/ClassAndModuleChildren:
|
|
7
8
|
Enabled: false
|
|
@@ -9,7 +10,7 @@ Style/ClassAndModuleChildren:
|
|
|
9
10
|
Style/SignalException:
|
|
10
11
|
EnforcedStyle: semantic
|
|
11
12
|
|
|
12
|
-
Naming/
|
|
13
|
+
Naming/MethodParameterName:
|
|
13
14
|
AllowedNames:
|
|
14
15
|
- as
|
|
15
16
|
|
|
@@ -17,8 +18,11 @@ Metrics/AbcSize:
|
|
|
17
18
|
Max: 21
|
|
18
19
|
Exclude:
|
|
19
20
|
|
|
21
|
+
Metrics/MethodLength:
|
|
22
|
+
Max: 15
|
|
23
|
+
|
|
20
24
|
Metrics/LineLength:
|
|
21
|
-
Max:
|
|
25
|
+
Max: 130
|
|
22
26
|
|
|
23
27
|
Metrics/BlockLength:
|
|
24
28
|
Max: 120
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
3.0.2
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
spidy (0.3.
|
|
4
|
+
spidy (0.3.10)
|
|
5
5
|
activesupport
|
|
6
6
|
mechanize
|
|
7
7
|
pry
|
|
@@ -11,124 +11,126 @@ PATH
|
|
|
11
11
|
GEM
|
|
12
12
|
remote: https://rubygems.org/
|
|
13
13
|
specs:
|
|
14
|
-
activesupport (
|
|
14
|
+
activesupport (7.0.0)
|
|
15
15
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
16
|
-
i18n (>=
|
|
17
|
-
minitest (
|
|
18
|
-
tzinfo (~>
|
|
19
|
-
|
|
20
|
-
addressable (2.7.0)
|
|
16
|
+
i18n (>= 1.6, < 2)
|
|
17
|
+
minitest (>= 5.1)
|
|
18
|
+
tzinfo (~> 2.0)
|
|
19
|
+
addressable (2.8.0)
|
|
21
20
|
public_suffix (>= 2.0.2, < 5.0)
|
|
22
|
-
capybara (3.
|
|
21
|
+
capybara (3.36.0)
|
|
23
22
|
addressable
|
|
23
|
+
matrix
|
|
24
24
|
mini_mime (>= 0.1.3)
|
|
25
25
|
nokogiri (~> 1.8)
|
|
26
26
|
rack (>= 1.6.0)
|
|
27
27
|
rack-test (>= 0.6.3)
|
|
28
|
-
regexp_parser (
|
|
28
|
+
regexp_parser (>= 1.5, < 3.0)
|
|
29
29
|
xpath (~> 3.2)
|
|
30
30
|
capybara_discoball (0.1.0)
|
|
31
31
|
capybara (>= 2.7, < 4)
|
|
32
|
-
coderay (1.1.
|
|
33
|
-
concurrent-ruby (1.1.
|
|
34
|
-
connection_pool (2.2.
|
|
35
|
-
diff-lcs (1.
|
|
32
|
+
coderay (1.1.3)
|
|
33
|
+
concurrent-ruby (1.1.9)
|
|
34
|
+
connection_pool (2.2.5)
|
|
35
|
+
diff-lcs (1.5.0)
|
|
36
36
|
domain_name (0.5.20190701)
|
|
37
37
|
unf (>= 0.0.5, < 1.0.0)
|
|
38
|
-
ffaker (2.
|
|
39
|
-
http-cookie (1.0.
|
|
38
|
+
ffaker (2.20.0)
|
|
39
|
+
http-cookie (1.0.4)
|
|
40
40
|
domain_name (~> 0.5)
|
|
41
|
-
i18n (1.8.
|
|
41
|
+
i18n (1.8.11)
|
|
42
42
|
concurrent-ruby (~> 1.0)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
43
|
+
matrix (0.4.2)
|
|
44
|
+
mechanize (2.8.3)
|
|
45
|
+
addressable (~> 2.8)
|
|
46
|
+
domain_name (~> 0.5, >= 0.5.20190701)
|
|
47
|
+
http-cookie (~> 1.0, >= 1.0.3)
|
|
48
|
+
mime-types (~> 3.0)
|
|
49
|
+
net-http-digest_auth (~> 1.4, >= 1.4.1)
|
|
50
|
+
net-http-persistent (>= 2.5.2, < 5.0.dev)
|
|
51
|
+
nokogiri (~> 1.11, >= 1.11.2)
|
|
52
|
+
rubyntlm (~> 0.6, >= 0.6.3)
|
|
53
|
+
webrick (~> 1.7)
|
|
54
|
+
webrobots (~> 0.1.2)
|
|
55
|
+
method_source (1.0.0)
|
|
56
|
+
mime-types (3.4.1)
|
|
54
57
|
mime-types-data (~> 3.2015)
|
|
55
|
-
mime-types-data (3.
|
|
56
|
-
mini_mime (1.
|
|
57
|
-
|
|
58
|
-
minitest (5.14.2)
|
|
58
|
+
mime-types-data (3.2021.1115)
|
|
59
|
+
mini_mime (1.1.2)
|
|
60
|
+
minitest (5.15.0)
|
|
59
61
|
mixlib-shellout (2.4.4)
|
|
60
62
|
mustermann (1.1.1)
|
|
61
63
|
ruby2_keywords (~> 0.0.1)
|
|
62
64
|
net-http-digest_auth (1.4.1)
|
|
63
|
-
net-http-persistent (4.0.
|
|
65
|
+
net-http-persistent (4.0.1)
|
|
64
66
|
connection_pool (~> 2.2)
|
|
65
|
-
nokogiri (1.
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
method_source (~> 0.9.0)
|
|
67
|
+
nokogiri (1.12.5-arm64-darwin)
|
|
68
|
+
racc (~> 1.4)
|
|
69
|
+
pry (0.14.1)
|
|
70
|
+
coderay (~> 1.1)
|
|
71
|
+
method_source (~> 1.0)
|
|
71
72
|
public_suffix (4.0.6)
|
|
73
|
+
racc (1.6.0)
|
|
72
74
|
rack (2.2.3)
|
|
73
|
-
rack-protection (2.0
|
|
75
|
+
rack-protection (2.1.0)
|
|
74
76
|
rack
|
|
75
77
|
rack-test (1.1.0)
|
|
76
78
|
rack (>= 1.0, < 3)
|
|
77
|
-
rake (
|
|
78
|
-
regexp_parser (
|
|
79
|
-
rspec (3.
|
|
80
|
-
rspec-core (~> 3.
|
|
81
|
-
rspec-expectations (~> 3.
|
|
82
|
-
rspec-mocks (~> 3.
|
|
79
|
+
rake (13.0.6)
|
|
80
|
+
regexp_parser (2.2.0)
|
|
81
|
+
rspec (3.10.0)
|
|
82
|
+
rspec-core (~> 3.10.0)
|
|
83
|
+
rspec-expectations (~> 3.10.0)
|
|
84
|
+
rspec-mocks (~> 3.10.0)
|
|
83
85
|
rspec-command (1.0.3)
|
|
84
86
|
mixlib-shellout (~> 2.0)
|
|
85
87
|
rspec (~> 3.2)
|
|
86
88
|
rspec-its (~> 1.2)
|
|
87
|
-
rspec-core (3.
|
|
88
|
-
rspec-support (~> 3.
|
|
89
|
-
rspec-expectations (3.
|
|
89
|
+
rspec-core (3.10.1)
|
|
90
|
+
rspec-support (~> 3.10.0)
|
|
91
|
+
rspec-expectations (3.10.1)
|
|
90
92
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
91
|
-
rspec-support (~> 3.
|
|
93
|
+
rspec-support (~> 3.10.0)
|
|
92
94
|
rspec-its (1.3.0)
|
|
93
95
|
rspec-core (>= 3.0.0)
|
|
94
96
|
rspec-expectations (>= 3.0.0)
|
|
95
|
-
rspec-mocks (3.
|
|
97
|
+
rspec-mocks (3.10.2)
|
|
96
98
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
97
|
-
rspec-support (~> 3.
|
|
98
|
-
rspec-support (3.
|
|
99
|
-
ruby2_keywords (0.0.
|
|
100
|
-
|
|
99
|
+
rspec-support (~> 3.10.0)
|
|
100
|
+
rspec-support (3.10.3)
|
|
101
|
+
ruby2_keywords (0.0.5)
|
|
102
|
+
rubyntlm (0.6.3)
|
|
103
|
+
sinatra (2.1.0)
|
|
101
104
|
mustermann (~> 1.0)
|
|
102
|
-
rack (~> 2.
|
|
103
|
-
rack-protection (= 2.0
|
|
105
|
+
rack (~> 2.2)
|
|
106
|
+
rack-protection (= 2.1.0)
|
|
104
107
|
tilt (~> 2.0)
|
|
105
108
|
socksify (1.7.1)
|
|
106
|
-
thread_safe (0.3.6)
|
|
107
109
|
tilt (2.0.10)
|
|
108
|
-
tor (0.1.
|
|
109
|
-
tzinfo (
|
|
110
|
-
|
|
110
|
+
tor (0.1.5)
|
|
111
|
+
tzinfo (2.0.4)
|
|
112
|
+
concurrent-ruby (~> 1.0)
|
|
111
113
|
unf (0.1.4)
|
|
112
114
|
unf_ext
|
|
113
|
-
unf_ext (0.0.
|
|
115
|
+
unf_ext (0.0.8)
|
|
116
|
+
webrick (1.7.0)
|
|
114
117
|
webrobots (0.1.2)
|
|
115
118
|
xpath (3.2.0)
|
|
116
119
|
nokogiri (~> 1.8)
|
|
117
|
-
zeitwerk (2.4.0)
|
|
118
120
|
|
|
119
121
|
PLATFORMS
|
|
120
|
-
|
|
122
|
+
arm64-darwin-20
|
|
121
123
|
|
|
122
124
|
DEPENDENCIES
|
|
123
125
|
bundler (~> 2.0)
|
|
124
126
|
capybara_discoball
|
|
125
127
|
ffaker
|
|
126
128
|
pry
|
|
127
|
-
rake (~>
|
|
129
|
+
rake (~> 13.0)
|
|
128
130
|
rspec (~> 3.0)
|
|
129
131
|
rspec-command
|
|
130
132
|
sinatra
|
|
131
133
|
spidy!
|
|
132
134
|
|
|
133
135
|
BUNDLED WITH
|
|
134
|
-
2.
|
|
136
|
+
2.2.22
|
data/example/master_detail.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
Spidy.define do
|
|
4
|
-
url_to_params =
|
|
4
|
+
url_to_params = lambda { |url|
|
|
5
5
|
uri = URI.parse(url)
|
|
6
6
|
params = URI.decode_www_form(uri.query).to_h if uri.query.present?
|
|
7
7
|
params if params.present?
|
|
@@ -13,41 +13,41 @@ Spidy.define do
|
|
|
13
13
|
|
|
14
14
|
limit_page = 3
|
|
15
15
|
per_page = 25
|
|
16
|
-
yielder.call(Nokogiri::HTML::Builder.new
|
|
17
|
-
doc.html
|
|
18
|
-
doc.body
|
|
19
|
-
doc.span.bold
|
|
20
|
-
doc.text
|
|
21
|
-
|
|
22
|
-
doc.main
|
|
23
|
-
(page * per_page + 1).upto((page + 1) * per_page).each do |i|
|
|
16
|
+
yielder.call(Nokogiri::HTML::Builder.new do |doc|
|
|
17
|
+
doc.html do
|
|
18
|
+
doc.body do
|
|
19
|
+
doc.span.bold do
|
|
20
|
+
doc.text 'Hello world'
|
|
21
|
+
end
|
|
22
|
+
doc.main do
|
|
23
|
+
((page * per_page) + 1).upto((page + 1) * per_page).each do |i|
|
|
24
24
|
doc.a("page #{i}", href: "http://localhost/?id=#{i}")
|
|
25
25
|
end
|
|
26
|
-
|
|
26
|
+
end
|
|
27
27
|
doc.a('NEXT', href: "http://localhost/?page=#{page + 1}", class: 'next') if page < limit_page
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end.doc)
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
detail_page = proc { |url, &yielder|
|
|
34
34
|
params = url_to_params.call(url)
|
|
35
35
|
id = params['id']
|
|
36
36
|
|
|
37
|
-
yielder.call(Nokogiri::HTML::Builder.new
|
|
38
|
-
doc.html
|
|
39
|
-
doc.body
|
|
40
|
-
doc.span.bold
|
|
41
|
-
doc.text
|
|
42
|
-
|
|
37
|
+
yielder.call(Nokogiri::HTML::Builder.new do |doc|
|
|
38
|
+
doc.html do
|
|
39
|
+
doc.body do
|
|
40
|
+
doc.span.bold do
|
|
41
|
+
doc.text 'Hello world'
|
|
42
|
+
end
|
|
43
43
|
doc.h1("title_#{id}", id: 'title')
|
|
44
44
|
doc.main("body_#{id}", id: 'body')
|
|
45
45
|
doc.div.sub do
|
|
46
46
|
doc.span.name('testtest')
|
|
47
47
|
end
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end.doc)
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
define(as: :html, connector: detail_page) do
|
data/example/proxy.rb
CHANGED
data/example/retry.rb
CHANGED
data/example/wikip.rb
CHANGED
|
@@ -11,11 +11,8 @@ Spidy.define do
|
|
|
11
11
|
|
|
12
12
|
define(:infobox, as: :html, connector: :direct) do
|
|
13
13
|
let(:columns) do
|
|
14
|
-
html.search('tr').
|
|
15
|
-
{
|
|
16
|
-
name: tr.at('th')&.text,
|
|
17
|
-
value: tr.at('td')&.text
|
|
18
|
-
}
|
|
14
|
+
html.search('tr').map do |tr|
|
|
15
|
+
{ name: tr.at('th')&.text, value: tr.at('td')&.text }
|
|
19
16
|
end
|
|
20
17
|
end
|
|
21
18
|
end
|
data/exe/spidy
CHANGED
|
@@ -6,10 +6,10 @@ require 'pry'
|
|
|
6
6
|
|
|
7
7
|
if ARGV[1].blank?
|
|
8
8
|
case ARGV[0]
|
|
9
|
-
when 'version' then
|
|
9
|
+
when 'version' then $stdout.puts(Spidy::VERSION)
|
|
10
10
|
when 'console' then Spidy.shell.interactive
|
|
11
11
|
else
|
|
12
|
-
|
|
12
|
+
$stdout.puts 'usage: spidy [version console]'
|
|
13
13
|
end
|
|
14
14
|
else
|
|
15
15
|
case ARGV[0]
|
|
@@ -17,7 +17,8 @@ else
|
|
|
17
17
|
when 'function' then Spidy.shell(ARGV[1]).function
|
|
18
18
|
when 'call' then Spidy.shell(ARGV[1]).call(ARGV[2])
|
|
19
19
|
when 'each' then Spidy.shell(ARGV[1]).each(ARGV[2])
|
|
20
|
+
when 'eval' then Spidy.shell(ARGV[1]).eval_call(ARGV[2])
|
|
20
21
|
else
|
|
21
|
-
|
|
22
|
+
$stdout.puts 'usage: spidy [console function call each run] [file]'
|
|
22
23
|
end
|
|
23
24
|
end
|
data/lib/spidy/binder/html.rb
CHANGED
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
#
|
|
4
4
|
# Bind html and convert to object
|
|
5
5
|
#
|
|
6
|
-
|
|
7
|
-
def
|
|
6
|
+
module Spidy::Binder::Html
|
|
7
|
+
def let(name, query = nil, &block)
|
|
8
8
|
@attribute_names ||= []
|
|
9
9
|
@attribute_names << name
|
|
10
10
|
|
|
11
|
-
return define_method(name) { html.at(query)&.text } if block.nil?
|
|
11
|
+
return define_method(name) { html.at(query)&.text&.strip } if block.nil?
|
|
12
12
|
|
|
13
13
|
define_method(name) do
|
|
14
14
|
if query.present?
|
|
@@ -17,9 +17,11 @@ class Spidy::Binder::Html < Spidy::Binder::Base
|
|
|
17
17
|
instance_exec(&block)
|
|
18
18
|
end
|
|
19
19
|
rescue StandardError => e
|
|
20
|
-
|
|
20
|
+
raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
def self.extended(obj)
|
|
25
|
+
obj.alias_method :html, :resource
|
|
26
|
+
end
|
|
25
27
|
end
|
data/lib/spidy/binder/json.rb
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
#
|
|
4
4
|
# Bind json and convert to object
|
|
5
5
|
#
|
|
6
|
-
|
|
7
|
-
def
|
|
6
|
+
module Spidy::Binder::Json
|
|
7
|
+
def let(name, *query, &block)
|
|
8
8
|
@attribute_names ||= []
|
|
9
9
|
@attribute_names << name
|
|
10
10
|
|
|
@@ -17,9 +17,11 @@ class Spidy::Binder::Json < Spidy::Binder::Base
|
|
|
17
17
|
instance_exec(&block)
|
|
18
18
|
end
|
|
19
19
|
rescue StandardError => e
|
|
20
|
-
|
|
20
|
+
raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
def self.extended(obj)
|
|
25
|
+
obj.alias_method :json, :resource
|
|
26
|
+
end
|
|
25
27
|
end
|
data/lib/spidy/binder/xml.rb
CHANGED
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
#
|
|
4
4
|
# Bind xml and convert to object
|
|
5
5
|
#
|
|
6
|
-
|
|
7
|
-
def
|
|
6
|
+
module Spidy::Binder::Xml
|
|
7
|
+
def let(name, query = nil, &block)
|
|
8
8
|
@attribute_names ||= []
|
|
9
9
|
@attribute_names << name
|
|
10
10
|
|
|
11
|
-
return define_method(name) { xml.at(query)&.text } if block.nil?
|
|
11
|
+
return define_method(name) { xml.at(query)&.text&.strip } if block.nil?
|
|
12
12
|
|
|
13
13
|
define_method(name) do
|
|
14
14
|
if query.present?
|
|
@@ -17,9 +17,11 @@ class Spidy::Binder::Xml < Spidy::Binder::Base
|
|
|
17
17
|
instance_exec(&block)
|
|
18
18
|
end
|
|
19
19
|
rescue StandardError => e
|
|
20
|
-
|
|
20
|
+
raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
def self.extended(obj)
|
|
25
|
+
obj.alias_method :xml, :resource
|
|
26
|
+
end
|
|
25
27
|
end
|
data/lib/spidy/binder.rb
CHANGED
|
@@ -5,51 +5,8 @@
|
|
|
5
5
|
#
|
|
6
6
|
module Spidy::Binder
|
|
7
7
|
extend ActiveSupport::Autoload
|
|
8
|
+
autoload :Error
|
|
8
9
|
autoload :Json
|
|
9
10
|
autoload :Html
|
|
10
11
|
autoload :Xml
|
|
11
|
-
|
|
12
|
-
class Error < StandardError
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
class Caller
|
|
16
|
-
def initialize(spidy, binder)
|
|
17
|
-
@spidy = spidy
|
|
18
|
-
@binder = binder
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def call(source, url: nil, define: nil, define_name: nil)
|
|
22
|
-
yield Class.new(@binder, &define).new(define_name, @spidy, source, url)
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
class Base
|
|
27
|
-
class << self
|
|
28
|
-
attr_reader :attribute_names
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
attr_reader :resource, :url
|
|
32
|
-
|
|
33
|
-
def initialize(define_name, spidy, resource, url)
|
|
34
|
-
@define_name = define_name
|
|
35
|
-
@spidy = spidy
|
|
36
|
-
@resource = resource
|
|
37
|
-
@url = url
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
def to_s
|
|
41
|
-
to_h.to_json
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def to_h
|
|
45
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def self.get(spidy, value)
|
|
51
|
-
return Caller.new(spidy, const_get(value.to_s.classify)) if name.is_a?(String) || name.is_a?(Symbol)
|
|
52
|
-
|
|
53
|
-
value
|
|
54
|
-
end
|
|
55
12
|
end
|
data/lib/spidy/command_line.rb
CHANGED
|
@@ -5,45 +5,47 @@
|
|
|
5
5
|
#
|
|
6
6
|
class Spidy::CommandLine
|
|
7
7
|
delegate :spidy, to: :@definition_file
|
|
8
|
-
class_attribute :output, default: (proc { |result|
|
|
9
|
-
class_attribute :error_handler, default: (proc { |e, url|
|
|
8
|
+
class_attribute :output, default: (proc { |result| $stdout.puts(result.to_s) })
|
|
9
|
+
class_attribute :error_handler, default: (proc { |e, url|
|
|
10
|
+
warn({ url: url, message: e.message, backtrace: e.backtrace }.to_json)
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
def eval_call(script)
|
|
14
|
+
@definition_file.spidy.instance_eval(script)
|
|
15
|
+
end
|
|
10
16
|
|
|
11
17
|
def initialize(definition_file)
|
|
12
18
|
@definition_file = definition_file
|
|
13
|
-
|
|
19
|
+
fail 'unloaded spidy' if definition_file.spidy.nil?
|
|
14
20
|
end
|
|
15
21
|
|
|
16
22
|
def each_stdin_lines(name)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
error_handler.call(e, url)
|
|
22
|
-
end
|
|
23
|
+
$stdin.each_line do |url|
|
|
24
|
+
spidy.each(url.strip, name: name, &output)
|
|
25
|
+
rescue StandardError => e
|
|
26
|
+
error_handler.call(e, url)
|
|
23
27
|
end
|
|
24
28
|
end
|
|
25
29
|
|
|
26
30
|
def call_stdin_lines(name)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
error_handler.call(e, url)
|
|
32
|
-
end
|
|
31
|
+
$stdin.each_line do |url|
|
|
32
|
+
spidy.call(url.strip, name: name, &output)
|
|
33
|
+
rescue StandardError => e
|
|
34
|
+
error_handler.call(e, url)
|
|
33
35
|
end
|
|
34
36
|
end
|
|
35
37
|
|
|
36
38
|
def call(name)
|
|
37
|
-
return call_stdin_lines(name) if FileTest.pipe?(
|
|
38
|
-
spidy.call(name: name, &output) unless FileTest.pipe?(
|
|
39
|
-
rescue => e
|
|
39
|
+
return call_stdin_lines(name) if FileTest.pipe?($stdin)
|
|
40
|
+
spidy.call(name: name, &output) unless FileTest.pipe?($stdin)
|
|
41
|
+
rescue StandardError => e
|
|
40
42
|
error_handler.call(e, nil)
|
|
41
43
|
end
|
|
42
44
|
|
|
43
45
|
def each(name)
|
|
44
|
-
return each_stdin_lines(name) if FileTest.pipe?(
|
|
46
|
+
return each_stdin_lines(name) if FileTest.pipe?($stdin)
|
|
45
47
|
spidy.each(name: name, &output)
|
|
46
|
-
rescue => e
|
|
48
|
+
rescue StandardError => e
|
|
47
49
|
error_handler.call(e, nil)
|
|
48
50
|
end
|
|
49
51
|
|
|
@@ -59,36 +61,32 @@ class Spidy::CommandLine
|
|
|
59
61
|
end
|
|
60
62
|
|
|
61
63
|
def build(name)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
+
File.write("#{name}.sh", build_shell_script(name))
|
|
65
|
+
File.write("#{name}.rb", build_ruby_script)
|
|
64
66
|
end
|
|
65
67
|
|
|
66
68
|
def build_shell(name)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
SHELL
|
|
73
|
-
end
|
|
69
|
+
<<~SHELL
|
|
70
|
+
#!/bin/bash
|
|
71
|
+
eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
|
|
72
|
+
spider
|
|
73
|
+
SHELL
|
|
74
74
|
end
|
|
75
75
|
|
|
76
|
-
def build_ruby
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
# frozen_string_literal: true
|
|
76
|
+
def build_ruby
|
|
77
|
+
<<~RUBY
|
|
78
|
+
# frozen_string_literal: true
|
|
80
79
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
80
|
+
Spidy.define do
|
|
81
|
+
spider(as: :html) do |yielder, connector|
|
|
82
|
+
# connector.call(url) do |resource|
|
|
83
|
+
# yielder.call(url or resource)
|
|
84
|
+
# end
|
|
85
|
+
end
|
|
87
86
|
|
|
88
|
-
|
|
89
|
-
end
|
|
87
|
+
define(as: :html) do
|
|
90
88
|
end
|
|
91
|
-
|
|
92
|
-
|
|
89
|
+
end
|
|
90
|
+
RUBY
|
|
93
91
|
end
|
|
94
92
|
end
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Direct resource ( not network resource )
|
|
5
5
|
#
|
|
6
6
|
class Spidy::Connector::Direct
|
|
7
|
-
def call(resource
|
|
7
|
+
def call(resource)
|
|
8
8
|
if block_given?
|
|
9
9
|
yield resource
|
|
10
10
|
else
|
|
@@ -12,6 +12,5 @@ class Spidy::Connector::Direct
|
|
|
12
12
|
end
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def initialize(user_agent:)
|
|
16
|
-
end
|
|
15
|
+
def initialize(user_agent:); end
|
|
17
16
|
end
|
data/lib/spidy/connector/html.rb
CHANGED
|
@@ -14,13 +14,13 @@ class Spidy::Connector::Html
|
|
|
14
14
|
|
|
15
15
|
attr_reader :agent
|
|
16
16
|
|
|
17
|
-
def call(url, encoding: nil,
|
|
17
|
+
def call(url, encoding: nil, &yielder)
|
|
18
18
|
fail 'url is not specified' if url.blank?
|
|
19
19
|
if encoding
|
|
20
20
|
agent.default_encoding = encoding
|
|
21
21
|
agent.force_default_encoding = true
|
|
22
22
|
end
|
|
23
|
-
connect(url,
|
|
23
|
+
connect(url, yielder)
|
|
24
24
|
end
|
|
25
25
|
|
|
26
26
|
def refresh!
|
|
@@ -30,17 +30,19 @@ class Spidy::Connector::Html
|
|
|
30
30
|
|
|
31
31
|
private
|
|
32
32
|
|
|
33
|
-
def connect(url,
|
|
33
|
+
def connect(url, yielder)
|
|
34
34
|
result = nil
|
|
35
35
|
agent.get(url) do |page|
|
|
36
|
-
|
|
36
|
+
if page.title == 'Sorry, unable to access page...'
|
|
37
|
+
fail Spidy::Connector::Retry.new(object: page, response_code: page.try(:response_code))
|
|
38
|
+
end
|
|
37
39
|
|
|
38
40
|
result = yielder.call(page)
|
|
39
41
|
end
|
|
40
42
|
result
|
|
41
43
|
rescue Mechanize::ResponseCodeError => e
|
|
42
|
-
raise Spidy::Connector::Retry
|
|
43
|
-
raise Spidy::Connector::Retry
|
|
44
|
-
raise Spidy::Connector::Retry
|
|
44
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code)) if e.response_code == '429'
|
|
45
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code)) if e.response_code == '502'
|
|
46
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code))
|
|
45
47
|
end
|
|
46
48
|
end
|
data/lib/spidy/connector/json.rb
CHANGED
|
@@ -17,9 +17,9 @@ class Spidy::Connector::Json
|
|
|
17
17
|
connect(url, &block)
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
-
def connect(url
|
|
21
|
-
OpenURI.open_uri(url,
|
|
20
|
+
def connect(url)
|
|
21
|
+
OpenURI.open_uri(url, 'User-Agent' => @user_agent) { |body| yield JSON.parse(body.read, symbolize_names: true) }
|
|
22
22
|
rescue OpenURI::HTTPError => e
|
|
23
|
-
raise Spidy::Connector::Retry
|
|
23
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.io.status[0])
|
|
24
24
|
end
|
|
25
25
|
end
|
data/lib/spidy/connector/xml.rb
CHANGED
|
@@ -13,11 +13,11 @@ class Spidy::Connector::Xml
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
def connect(url, &block)
|
|
16
|
-
OpenURI.open_uri(url,
|
|
16
|
+
OpenURI.open_uri(url, 'User-Agent' => @user_agent) do |body|
|
|
17
17
|
block.call Nokogiri::XML(body.read.gsub(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F]/, ''), url)
|
|
18
18
|
end
|
|
19
19
|
rescue OpenURI::HTTPError => e
|
|
20
|
-
raise Spidy::Connector::Retry
|
|
20
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.io.status[0])
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
def initialize(user_agent:)
|
data/lib/spidy/connector.rb
CHANGED
|
@@ -27,7 +27,7 @@ module Spidy::Connector
|
|
|
27
27
|
#
|
|
28
28
|
# error output logger
|
|
29
29
|
#
|
|
30
|
-
DEFAULT_LOGGER = proc { |values|
|
|
30
|
+
DEFAULT_LOGGER = proc { |values| warn(values.to_json) }
|
|
31
31
|
|
|
32
32
|
#
|
|
33
33
|
# static method
|
|
@@ -36,7 +36,9 @@ module Spidy::Connector
|
|
|
36
36
|
extend ActiveSupport::Concern
|
|
37
37
|
class_methods do
|
|
38
38
|
def call(url, wait_time: 5, logger: Spidy::Connector::DEFAULT_LOGGER, user_agent: Spidy::Connector::USER_AGENT, &block)
|
|
39
|
-
::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(
|
|
39
|
+
::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(
|
|
40
|
+
url, &block
|
|
41
|
+
)
|
|
40
42
|
end
|
|
41
43
|
end
|
|
42
44
|
end
|
|
@@ -51,6 +53,7 @@ module Spidy::Connector
|
|
|
51
53
|
@object = object
|
|
52
54
|
@response_code = response_code
|
|
53
55
|
@error = error
|
|
56
|
+
super(error)
|
|
54
57
|
end
|
|
55
58
|
end
|
|
56
59
|
|
|
@@ -58,13 +61,13 @@ module Spidy::Connector
|
|
|
58
61
|
# retry
|
|
59
62
|
#
|
|
60
63
|
class RetryableCaller
|
|
61
|
-
attr_reader :origin_connector
|
|
64
|
+
attr_reader :origin_connector, :logger, :wait_time
|
|
62
65
|
|
|
63
|
-
def initialize(connector, logger:, wait_time:)
|
|
66
|
+
def initialize(connector, logger:, wait_time:, retry_attempt_count: 5)
|
|
64
67
|
@origin_connector = connector
|
|
65
68
|
@logger = logger
|
|
66
69
|
@wait_time = wait_time
|
|
67
|
-
@retry_attempt_count =
|
|
70
|
+
@retry_attempt_count = retry_attempt_count
|
|
68
71
|
end
|
|
69
72
|
|
|
70
73
|
def call(url, &block)
|
|
@@ -73,18 +76,18 @@ module Spidy::Connector
|
|
|
73
76
|
end
|
|
74
77
|
|
|
75
78
|
def connect(url, retry_attempt_count: @retry_attempt_count, &block)
|
|
76
|
-
|
|
77
|
-
|
|
79
|
+
logger.call('connnector.get': url, 'connnector.accessed': Time.current)
|
|
80
|
+
origin_connector.call(url, &block)
|
|
78
81
|
rescue Spidy::Connector::Retry => e
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
82
|
+
logger.call('retry.accessed': Time.current,
|
|
83
|
+
'retry.uri': url,
|
|
84
|
+
'retry.response_code': e.response_code,
|
|
85
|
+
'retry.attempt_count': retry_attempt_count)
|
|
83
86
|
|
|
84
87
|
retry_attempt_count -= 1
|
|
85
88
|
if retry_attempt_count.positive?
|
|
86
|
-
sleep
|
|
87
|
-
|
|
89
|
+
sleep wait_time
|
|
90
|
+
origin_connector.refresh! if origin_connector.respond_to?(:refresh!)
|
|
88
91
|
retry
|
|
89
92
|
end
|
|
90
93
|
raise e.error
|
|
@@ -103,7 +106,7 @@ module Spidy::Connector
|
|
|
103
106
|
end
|
|
104
107
|
|
|
105
108
|
def call(url, &block)
|
|
106
|
-
Socksify
|
|
109
|
+
Socksify.proxy(socks_proxy[:host], socks_proxy[:port]) do
|
|
107
110
|
connector.call(url, &block)
|
|
108
111
|
end
|
|
109
112
|
end
|
|
@@ -141,7 +144,6 @@ module Spidy::Connector
|
|
|
141
144
|
fail "Not defined connnector[#{value}]" if connector.nil?
|
|
142
145
|
return connector if socks_proxy.nil?
|
|
143
146
|
|
|
144
|
-
|
|
145
|
-
tor
|
|
147
|
+
TorConnector.new(connector, socks_proxy)
|
|
146
148
|
end
|
|
147
149
|
end
|
data/lib/spidy/definition.rb
CHANGED
|
@@ -33,33 +33,39 @@ module Spidy::Definition
|
|
|
33
33
|
spidy = @namespace[:"#{name}_spider"]
|
|
34
34
|
fail "undefined spidy [#{name}]" if spidy.nil?
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
if yielder
|
|
37
|
+
spidy.call(source, &yielder)
|
|
38
|
+
else
|
|
39
|
+
Enumerator.new do |enumerate_yielder|
|
|
40
|
+
spidy.call(source, &enumerate_yielder)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
37
43
|
end
|
|
38
44
|
|
|
39
45
|
def spider(name = :default, connector: nil, as: nil, &define_block)
|
|
40
46
|
@namespace ||= {}
|
|
41
|
-
connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
|
|
47
|
+
connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
|
|
48
|
+
socks_proxy: @socks_proxy)
|
|
42
49
|
@namespace[:"#{name}_spider"] = proc do |source, &yielder|
|
|
43
50
|
define_block.call(yielder, connector, source)
|
|
44
51
|
end
|
|
45
52
|
end
|
|
46
53
|
|
|
47
|
-
def define(name = :default, connector: nil,
|
|
54
|
+
def define(name = :default, connector: nil, as: nil, &define_block)
|
|
55
|
+
connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
|
|
56
|
+
socks_proxy: @socks_proxy)
|
|
57
|
+
binder_base = Spidy::Binder.const_get(as.to_s.classify)
|
|
48
58
|
@namespace ||= {}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
yielder = lambda { |result| break result } if yielder.nil?
|
|
59
|
-
connection_yielder = lambda do |page|
|
|
60
|
-
binder.call(page, url: source, define: define_block, define_name: name) { |object| yielder.call(object) }
|
|
59
|
+
@namespace[:"#{name}_scraper"] = Class.new(Spidy::DefinitionObject) do
|
|
60
|
+
extend binder_base
|
|
61
|
+
class_eval(&define_block)
|
|
62
|
+
define_singleton_method(:call) do |source, &yielder|
|
|
63
|
+
yielder = ->(result) { break result } if yielder.nil?
|
|
64
|
+
connection_yielder = lambda do |page|
|
|
65
|
+
yielder.call(new(page, source))
|
|
66
|
+
end
|
|
67
|
+
connector.call(source, &connection_yielder)
|
|
61
68
|
end
|
|
62
|
-
connector.call(source, &connection_yielder)
|
|
63
69
|
end
|
|
64
70
|
end
|
|
65
71
|
end
|
|
@@ -4,8 +4,7 @@
|
|
|
4
4
|
# spidy interface binding
|
|
5
5
|
#
|
|
6
6
|
class Spidy::DefinitionFile
|
|
7
|
-
attr_reader :path
|
|
8
|
-
attr_reader :spidy
|
|
7
|
+
attr_reader :path, :spidy
|
|
9
8
|
|
|
10
9
|
def self.open(filepath)
|
|
11
10
|
object = new(filepath)
|
|
@@ -15,7 +14,7 @@ class Spidy::DefinitionFile
|
|
|
15
14
|
|
|
16
15
|
# rubocop:disable Security/Eval
|
|
17
16
|
def eval_definition
|
|
18
|
-
@spidy = eval(File.
|
|
17
|
+
@spidy = eval(File.read(path)) if path
|
|
19
18
|
end
|
|
20
19
|
# rubocop:enable Security/Eval
|
|
21
20
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# An object that represents the scraper defined by define block.
|
|
5
|
+
#
|
|
6
|
+
class Spidy::DefinitionObject
|
|
7
|
+
class << self
|
|
8
|
+
attr_reader :attribute_names
|
|
9
|
+
end
|
|
10
|
+
attr_reader :resource, :url
|
|
11
|
+
|
|
12
|
+
def initialize(resource, url)
|
|
13
|
+
@resource = resource
|
|
14
|
+
@url = url
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def to_s
|
|
18
|
+
to_h.to_json
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_h
|
|
22
|
+
self.class.attribute_names.to_h { |name| [name, send(name)] }
|
|
23
|
+
end
|
|
24
|
+
end
|
data/lib/spidy/shell.rb
CHANGED
data/lib/spidy/version.rb
CHANGED
data/lib/spidy.rb
CHANGED
data/spidy.gemspec
CHANGED
|
@@ -25,17 +25,20 @@ Gem::Specification.new do |spec|
|
|
|
25
25
|
spec.require_paths = ['lib']
|
|
26
26
|
|
|
27
27
|
spec.add_development_dependency 'bundler', '~> 2.0'
|
|
28
|
+
spec.add_development_dependency 'capybara_discoball'
|
|
29
|
+
spec.add_development_dependency 'ffaker'
|
|
28
30
|
spec.add_development_dependency 'pry'
|
|
29
|
-
spec.add_development_dependency 'rake', '~>
|
|
31
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
|
30
32
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
|
31
|
-
spec.add_development_dependency 'ffaker'
|
|
32
33
|
spec.add_development_dependency 'rspec-command'
|
|
33
|
-
spec.add_development_dependency 'capybara_discoball'
|
|
34
34
|
spec.add_development_dependency 'sinatra'
|
|
35
35
|
|
|
36
|
-
spec.add_runtime_dependency 'tor'
|
|
37
36
|
spec.add_runtime_dependency 'activesupport'
|
|
38
37
|
spec.add_runtime_dependency 'mechanize'
|
|
39
|
-
spec.add_runtime_dependency 'socksify'
|
|
40
38
|
spec.add_runtime_dependency 'pry'
|
|
39
|
+
spec.add_runtime_dependency 'socksify'
|
|
40
|
+
spec.add_runtime_dependency 'tor'
|
|
41
|
+
spec.metadata = {
|
|
42
|
+
'rubygems_mfa_required' => 'true'
|
|
43
|
+
}
|
|
41
44
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: spidy
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.12
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- aileron
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-02-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -25,7 +25,7 @@ dependencies:
|
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '2.0'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
|
-
name:
|
|
28
|
+
name: capybara_discoball
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
31
|
- - ">="
|
|
@@ -39,63 +39,63 @@ dependencies:
|
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '0'
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
|
-
name:
|
|
42
|
+
name: ffaker
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
44
44
|
requirements:
|
|
45
|
-
- - "
|
|
45
|
+
- - ">="
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: '
|
|
47
|
+
version: '0'
|
|
48
48
|
type: :development
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
|
-
- - "
|
|
52
|
+
- - ">="
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: '
|
|
54
|
+
version: '0'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
|
-
name:
|
|
56
|
+
name: pry
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
58
58
|
requirements:
|
|
59
|
-
- - "
|
|
59
|
+
- - ">="
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: '
|
|
61
|
+
version: '0'
|
|
62
62
|
type: :development
|
|
63
63
|
prerelease: false
|
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
|
66
|
-
- - "
|
|
66
|
+
- - ">="
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: '
|
|
68
|
+
version: '0'
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
|
-
name:
|
|
70
|
+
name: rake
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
72
72
|
requirements:
|
|
73
|
-
- - "
|
|
73
|
+
- - "~>"
|
|
74
74
|
- !ruby/object:Gem::Version
|
|
75
|
-
version: '0'
|
|
75
|
+
version: '13.0'
|
|
76
76
|
type: :development
|
|
77
77
|
prerelease: false
|
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
79
|
requirements:
|
|
80
|
-
- - "
|
|
80
|
+
- - "~>"
|
|
81
81
|
- !ruby/object:Gem::Version
|
|
82
|
-
version: '0'
|
|
82
|
+
version: '13.0'
|
|
83
83
|
- !ruby/object:Gem::Dependency
|
|
84
|
-
name: rspec
|
|
84
|
+
name: rspec
|
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
|
86
86
|
requirements:
|
|
87
|
-
- - "
|
|
87
|
+
- - "~>"
|
|
88
88
|
- !ruby/object:Gem::Version
|
|
89
|
-
version: '0'
|
|
89
|
+
version: '3.0'
|
|
90
90
|
type: :development
|
|
91
91
|
prerelease: false
|
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
|
93
93
|
requirements:
|
|
94
|
-
- - "
|
|
94
|
+
- - "~>"
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
|
-
version: '0'
|
|
96
|
+
version: '3.0'
|
|
97
97
|
- !ruby/object:Gem::Dependency
|
|
98
|
-
name:
|
|
98
|
+
name: rspec-command
|
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
|
100
100
|
requirements:
|
|
101
101
|
- - ">="
|
|
@@ -123,7 +123,7 @@ dependencies:
|
|
|
123
123
|
- !ruby/object:Gem::Version
|
|
124
124
|
version: '0'
|
|
125
125
|
- !ruby/object:Gem::Dependency
|
|
126
|
-
name:
|
|
126
|
+
name: activesupport
|
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
|
128
128
|
requirements:
|
|
129
129
|
- - ">="
|
|
@@ -137,7 +137,7 @@ dependencies:
|
|
|
137
137
|
- !ruby/object:Gem::Version
|
|
138
138
|
version: '0'
|
|
139
139
|
- !ruby/object:Gem::Dependency
|
|
140
|
-
name:
|
|
140
|
+
name: mechanize
|
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
|
142
142
|
requirements:
|
|
143
143
|
- - ">="
|
|
@@ -151,7 +151,7 @@ dependencies:
|
|
|
151
151
|
- !ruby/object:Gem::Version
|
|
152
152
|
version: '0'
|
|
153
153
|
- !ruby/object:Gem::Dependency
|
|
154
|
-
name:
|
|
154
|
+
name: pry
|
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
|
156
156
|
requirements:
|
|
157
157
|
- - ">="
|
|
@@ -179,7 +179,7 @@ dependencies:
|
|
|
179
179
|
- !ruby/object:Gem::Version
|
|
180
180
|
version: '0'
|
|
181
181
|
- !ruby/object:Gem::Dependency
|
|
182
|
-
name:
|
|
182
|
+
name: tor
|
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
|
184
184
|
requirements:
|
|
185
185
|
- - ">="
|
|
@@ -222,6 +222,7 @@ files:
|
|
|
222
222
|
- exe/spidy
|
|
223
223
|
- lib/spidy.rb
|
|
224
224
|
- lib/spidy/binder.rb
|
|
225
|
+
- lib/spidy/binder/error.rb
|
|
225
226
|
- lib/spidy/binder/html.rb
|
|
226
227
|
- lib/spidy/binder/json.rb
|
|
227
228
|
- lib/spidy/binder/xml.rb
|
|
@@ -234,6 +235,7 @@ files:
|
|
|
234
235
|
- lib/spidy/console.rb
|
|
235
236
|
- lib/spidy/definition.rb
|
|
236
237
|
- lib/spidy/definition_file.rb
|
|
238
|
+
- lib/spidy/definition_object.rb
|
|
237
239
|
- lib/spidy/shell.rb
|
|
238
240
|
- lib/spidy/spider.rb
|
|
239
241
|
- lib/spidy/version.rb
|
|
@@ -242,7 +244,8 @@ files:
|
|
|
242
244
|
homepage: https://github.com/aileron-inc/spidy
|
|
243
245
|
licenses:
|
|
244
246
|
- MIT
|
|
245
|
-
metadata:
|
|
247
|
+
metadata:
|
|
248
|
+
rubygems_mfa_required: 'true'
|
|
246
249
|
post_install_message:
|
|
247
250
|
rdoc_options: []
|
|
248
251
|
require_paths:
|
|
@@ -258,7 +261,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
258
261
|
- !ruby/object:Gem::Version
|
|
259
262
|
version: '0'
|
|
260
263
|
requirements: []
|
|
261
|
-
rubygems_version: 3.
|
|
264
|
+
rubygems_version: 3.2.22
|
|
262
265
|
signing_key:
|
|
263
266
|
specification_version: 4
|
|
264
267
|
summary: web spider dsl
|