spidy 0.3.9 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -3
- data/.ruby-version +1 -1
- data/CLAUDE.md +28 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +125 -70
- data/README.md +1 -1
- data/bin/console +2 -3
- data/example/master_detail.rb +23 -23
- data/example/proxy.rb +2 -0
- data/example/retry.rb +2 -0
- data/example/wikip.rb +2 -5
- data/exe/spidy +3 -4
- data/lib/spidy/binder/error.rb +4 -0
- data/lib/spidy/binder/html.rb +2 -1
- data/lib/spidy/binder/json.rb +2 -1
- data/lib/spidy/binder/xml.rb +2 -1
- data/lib/spidy/binder.rb +1 -0
- data/lib/spidy/command_line.rb +37 -43
- data/lib/spidy/connector/direct.rb +2 -3
- data/lib/spidy/connector/html.rb +9 -7
- data/lib/spidy/connector/json.rb +3 -3
- data/lib/spidy/connector/xml.rb +2 -2
- data/lib/spidy/connector.rb +18 -16
- data/lib/spidy/definition.rb +13 -5
- data/lib/spidy/definition_file.rb +2 -3
- data/lib/spidy/{define_object.rb → definition_object.rb} +7 -2
- data/lib/spidy/shell.rb +6 -1
- data/lib/spidy/version.rb +1 -1
- data/lib/spidy.rb +1 -1
- data/spidy.gemspec +7 -6
- metadata +27 -56
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b6a7d70df09642e17d34cc85e1973914b8b7e151c34670526cb4d6b2d3589227
|
4
|
+
data.tar.gz: 012b7def5510c16d68676bada533d452315244fc5d47d7f26a9bd71068a3f9a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b6682cd3d1499b115cdfba3964cab62ae65f3a7943fb87f53d8613c2a6553b4f2a4a0728f2af07990dc90ae03c07df4d33914739ab4df462a37c2b04f5efdc5
|
7
|
+
data.tar.gz: a2680dd41fb1a6dead95ecd20742560d749f5c3f27367baac7d3f1294d4c6ee7de946cb173e1f2750169e6a341915197e375d1e56ca0faf7c5be4491f5a55ae9
|
data/.rubocop.yml
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
inherit_from: .rubocop_todo.yml
|
2
2
|
AllCops:
|
3
|
+
TargetRubyVersion: 3.0.2
|
4
|
+
NewCops: enable
|
3
5
|
DisplayCopNames: true
|
4
|
-
TargetRubyVersion: 2.6
|
5
6
|
|
6
7
|
Style/ClassAndModuleChildren:
|
7
8
|
Enabled: false
|
@@ -9,7 +10,7 @@ Style/ClassAndModuleChildren:
|
|
9
10
|
Style/SignalException:
|
10
11
|
EnforcedStyle: semantic
|
11
12
|
|
12
|
-
Naming/
|
13
|
+
Naming/MethodParameterName:
|
13
14
|
AllowedNames:
|
14
15
|
- as
|
15
16
|
|
@@ -17,8 +18,11 @@ Metrics/AbcSize:
|
|
17
18
|
Max: 21
|
18
19
|
Exclude:
|
19
20
|
|
21
|
+
Metrics/MethodLength:
|
22
|
+
Max: 15
|
23
|
+
|
20
24
|
Metrics/LineLength:
|
21
|
-
Max:
|
25
|
+
Max: 130
|
22
26
|
|
23
27
|
Metrics/BlockLength:
|
24
28
|
Max: 120
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.
|
1
|
+
3.4.2
|
data/CLAUDE.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# Claude Helper for Spidy
|
2
|
+
|
3
|
+
## Build/Test/Lint Commands
|
4
|
+
- Install dependencies: `bundle install`
|
5
|
+
- Run all tests: `bundle exec rake spec`
|
6
|
+
- Run single test: `bundle exec rspec spec/path/to_spec.rb:LINE_NUMBER`
|
7
|
+
- Install gem locally: `bundle exec rake install`
|
8
|
+
- Release gem: `bundle exec rake release`
|
9
|
+
|
10
|
+
## Code Style Guidelines
|
11
|
+
- **Naming Conventions**:
|
12
|
+
- snake_case for methods/variables/files
|
13
|
+
- CamelCase for classes/modules
|
14
|
+
- SCREAMING_SNAKE_CASE for constants
|
15
|
+
- **File Organization**: Match file paths to module/class hierarchy
|
16
|
+
- **Imports**:
|
17
|
+
- Add `# frozen_string_literal: true` at file start
|
18
|
+
- Use `extend ActiveSupport::Autoload` for modules with sub-modules
|
19
|
+
- **Error Handling**: Create custom error classes inheriting from StandardError
|
20
|
+
- **Documentation**: Add brief comments before classes and methods
|
21
|
+
- **Testing**:
|
22
|
+
- Use RSpec with `expect` syntax
|
23
|
+
- Organize with `describe` and `specify` blocks
|
24
|
+
- Name test files with `_spec.rb` suffix
|
25
|
+
|
26
|
+
## Dependencies
|
27
|
+
- Runtime: activesupport, mechanize, socksify, tor
|
28
|
+
- Development: bundler, capybara_discoball, ffaker, rake, rspec, sinatra
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,138 +1,193 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
spidy (0.3.
|
5
|
-
activesupport
|
4
|
+
spidy (0.3.12)
|
5
|
+
activesupport (~> 7.1)
|
6
6
|
mechanize
|
7
|
-
pry
|
8
7
|
socksify
|
9
8
|
tor
|
10
9
|
|
11
10
|
GEM
|
12
11
|
remote: https://rubygems.org/
|
13
12
|
specs:
|
14
|
-
activesupport (7.
|
15
|
-
|
13
|
+
activesupport (7.2.2.1)
|
14
|
+
base64
|
15
|
+
benchmark (>= 0.3)
|
16
|
+
bigdecimal
|
17
|
+
concurrent-ruby (~> 1.0, >= 1.3.1)
|
18
|
+
connection_pool (>= 2.2.5)
|
19
|
+
drb
|
16
20
|
i18n (>= 1.6, < 2)
|
21
|
+
logger (>= 1.4.2)
|
17
22
|
minitest (>= 5.1)
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
23
|
+
securerandom (>= 0.3)
|
24
|
+
tzinfo (~> 2.0, >= 2.0.5)
|
25
|
+
addressable (2.8.7)
|
26
|
+
public_suffix (>= 2.0.2, < 7.0)
|
27
|
+
base64 (0.2.0)
|
28
|
+
benchmark (0.4.0)
|
29
|
+
bigdecimal (3.1.9)
|
30
|
+
capybara (3.40.0)
|
22
31
|
addressable
|
23
32
|
matrix
|
24
33
|
mini_mime (>= 0.1.3)
|
25
|
-
nokogiri (~> 1.
|
34
|
+
nokogiri (~> 1.11)
|
26
35
|
rack (>= 1.6.0)
|
27
36
|
rack-test (>= 0.6.3)
|
28
37
|
regexp_parser (>= 1.5, < 3.0)
|
29
38
|
xpath (~> 3.2)
|
30
39
|
capybara_discoball (0.1.0)
|
31
40
|
capybara (>= 2.7, < 4)
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
diff-lcs (1.
|
36
|
-
domain_name (0.
|
37
|
-
|
38
|
-
ffaker (2.
|
39
|
-
http-cookie (1.0.
|
41
|
+
concurrent-ruby (1.3.5)
|
42
|
+
connection_pool (2.5.0)
|
43
|
+
date (3.4.1)
|
44
|
+
diff-lcs (1.6.0)
|
45
|
+
domain_name (0.6.20240107)
|
46
|
+
drb (2.2.1)
|
47
|
+
ffaker (2.24.0)
|
48
|
+
http-cookie (1.0.8)
|
40
49
|
domain_name (~> 0.5)
|
41
|
-
i18n (1.
|
50
|
+
i18n (1.14.7)
|
42
51
|
concurrent-ruby (~> 1.0)
|
52
|
+
io-console (0.8.0)
|
53
|
+
irb (1.15.1)
|
54
|
+
pp (>= 0.6.0)
|
55
|
+
rdoc (>= 4.0.0)
|
56
|
+
reline (>= 0.4.2)
|
57
|
+
logger (1.6.6)
|
43
58
|
matrix (0.4.2)
|
44
|
-
mechanize (2.
|
59
|
+
mechanize (2.14.0)
|
45
60
|
addressable (~> 2.8)
|
61
|
+
base64
|
46
62
|
domain_name (~> 0.5, >= 0.5.20190701)
|
47
63
|
http-cookie (~> 1.0, >= 1.0.3)
|
48
|
-
mime-types (~> 3.
|
64
|
+
mime-types (~> 3.3)
|
49
65
|
net-http-digest_auth (~> 1.4, >= 1.4.1)
|
50
66
|
net-http-persistent (>= 2.5.2, < 5.0.dev)
|
67
|
+
nkf
|
51
68
|
nokogiri (~> 1.11, >= 1.11.2)
|
52
69
|
rubyntlm (~> 0.6, >= 0.6.3)
|
53
70
|
webrick (~> 1.7)
|
54
71
|
webrobots (~> 0.1.2)
|
55
|
-
|
56
|
-
|
72
|
+
mime-types (3.6.1)
|
73
|
+
logger
|
57
74
|
mime-types-data (~> 3.2015)
|
58
|
-
mime-types-data (3.
|
59
|
-
mini_mime (1.1.
|
60
|
-
|
61
|
-
minitest (5.15.0)
|
75
|
+
mime-types-data (3.2025.0318)
|
76
|
+
mini_mime (1.1.5)
|
77
|
+
minitest (5.25.5)
|
62
78
|
mixlib-shellout (2.4.4)
|
63
|
-
mustermann (
|
79
|
+
mustermann (3.0.3)
|
64
80
|
ruby2_keywords (~> 0.0.1)
|
65
81
|
net-http-digest_auth (1.4.1)
|
66
|
-
net-http-persistent (4.0.
|
82
|
+
net-http-persistent (4.0.5)
|
67
83
|
connection_pool (~> 2.2)
|
68
|
-
|
69
|
-
|
84
|
+
nkf (0.2.0)
|
85
|
+
nokogiri (1.18.5-aarch64-linux-gnu)
|
86
|
+
racc (~> 1.4)
|
87
|
+
nokogiri (1.18.5-aarch64-linux-musl)
|
88
|
+
racc (~> 1.4)
|
89
|
+
nokogiri (1.18.5-arm-linux-gnu)
|
90
|
+
racc (~> 1.4)
|
91
|
+
nokogiri (1.18.5-arm-linux-musl)
|
92
|
+
racc (~> 1.4)
|
93
|
+
nokogiri (1.18.5-arm64-darwin)
|
94
|
+
racc (~> 1.4)
|
95
|
+
nokogiri (1.18.5-x86_64-darwin)
|
96
|
+
racc (~> 1.4)
|
97
|
+
nokogiri (1.18.5-x86_64-linux-gnu)
|
98
|
+
racc (~> 1.4)
|
99
|
+
nokogiri (1.18.5-x86_64-linux-musl)
|
70
100
|
racc (~> 1.4)
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
rack
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
101
|
+
pp (0.6.2)
|
102
|
+
prettyprint
|
103
|
+
prettyprint (0.2.0)
|
104
|
+
psych (5.2.3)
|
105
|
+
date
|
106
|
+
stringio
|
107
|
+
public_suffix (6.0.1)
|
108
|
+
racc (1.8.1)
|
109
|
+
rack (3.1.12)
|
110
|
+
rack-protection (4.1.1)
|
111
|
+
base64 (>= 0.1.0)
|
112
|
+
logger (>= 1.6.0)
|
113
|
+
rack (>= 3.0.0, < 4)
|
114
|
+
rack-session (2.1.0)
|
115
|
+
base64 (>= 0.1.0)
|
116
|
+
rack (>= 3.0.0)
|
117
|
+
rack-test (2.2.0)
|
118
|
+
rack (>= 1.3)
|
119
|
+
rackup (2.2.1)
|
120
|
+
rack (>= 3)
|
121
|
+
rake (13.2.1)
|
122
|
+
rdoc (6.12.0)
|
123
|
+
psych (>= 4.0.0)
|
124
|
+
regexp_parser (2.10.0)
|
125
|
+
reline (0.6.0)
|
126
|
+
io-console (~> 0.5)
|
127
|
+
rspec (3.13.0)
|
128
|
+
rspec-core (~> 3.13.0)
|
129
|
+
rspec-expectations (~> 3.13.0)
|
130
|
+
rspec-mocks (~> 3.13.0)
|
87
131
|
rspec-command (1.0.3)
|
88
132
|
mixlib-shellout (~> 2.0)
|
89
133
|
rspec (~> 3.2)
|
90
134
|
rspec-its (~> 1.2)
|
91
|
-
rspec-core (3.
|
92
|
-
rspec-support (~> 3.
|
93
|
-
rspec-expectations (3.
|
135
|
+
rspec-core (3.13.3)
|
136
|
+
rspec-support (~> 3.13.0)
|
137
|
+
rspec-expectations (3.13.3)
|
94
138
|
diff-lcs (>= 1.2.0, < 2.0)
|
95
|
-
rspec-support (~> 3.
|
96
|
-
rspec-its (1.3.
|
139
|
+
rspec-support (~> 3.13.0)
|
140
|
+
rspec-its (1.3.1)
|
97
141
|
rspec-core (>= 3.0.0)
|
98
142
|
rspec-expectations (>= 3.0.0)
|
99
|
-
rspec-mocks (3.
|
143
|
+
rspec-mocks (3.13.2)
|
100
144
|
diff-lcs (>= 1.2.0, < 2.0)
|
101
|
-
rspec-support (~> 3.
|
102
|
-
rspec-support (3.
|
145
|
+
rspec-support (~> 3.13.0)
|
146
|
+
rspec-support (3.13.2)
|
103
147
|
ruby2_keywords (0.0.5)
|
104
|
-
rubyntlm (0.6.
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
148
|
+
rubyntlm (0.6.5)
|
149
|
+
base64
|
150
|
+
securerandom (0.4.1)
|
151
|
+
sinatra (4.1.1)
|
152
|
+
logger (>= 1.6.0)
|
153
|
+
mustermann (~> 3.0)
|
154
|
+
rack (>= 3.0.0, < 4)
|
155
|
+
rack-protection (= 4.1.1)
|
156
|
+
rack-session (>= 2.0.0, < 3)
|
109
157
|
tilt (~> 2.0)
|
110
158
|
socksify (1.7.1)
|
111
|
-
|
112
|
-
|
113
|
-
|
159
|
+
stringio (3.1.5)
|
160
|
+
tilt (2.6.0)
|
161
|
+
tor (0.1.7)
|
162
|
+
tzinfo (2.0.6)
|
114
163
|
concurrent-ruby (~> 1.0)
|
115
|
-
|
116
|
-
unf_ext
|
117
|
-
unf_ext (0.0.8)
|
118
|
-
webrick (1.7.0)
|
164
|
+
webrick (1.9.1)
|
119
165
|
webrobots (0.1.2)
|
120
166
|
xpath (3.2.0)
|
121
167
|
nokogiri (~> 1.8)
|
122
168
|
|
123
169
|
PLATFORMS
|
124
|
-
|
170
|
+
aarch64-linux-gnu
|
171
|
+
aarch64-linux-musl
|
172
|
+
arm-linux-gnu
|
173
|
+
arm-linux-musl
|
174
|
+
arm64-darwin
|
175
|
+
x86_64-darwin
|
176
|
+
x86_64-linux-gnu
|
177
|
+
x86_64-linux-musl
|
125
178
|
|
126
179
|
DEPENDENCIES
|
127
180
|
bundler (~> 2.0)
|
128
181
|
capybara_discoball
|
129
182
|
ffaker
|
130
|
-
|
183
|
+
irb
|
184
|
+
rackup
|
131
185
|
rake (~> 13.0)
|
132
186
|
rspec (~> 3.0)
|
133
187
|
rspec-command
|
134
188
|
sinatra
|
135
189
|
spidy!
|
190
|
+
webrick
|
136
191
|
|
137
192
|
BUNDLED WITH
|
138
|
-
2.
|
193
|
+
2.6.5
|
data/README.md
CHANGED
data/bin/console
CHANGED
@@ -3,12 +3,11 @@
|
|
3
3
|
|
4
4
|
require 'bundler/setup'
|
5
5
|
require 'spidy'
|
6
|
+
require 'irb'
|
6
7
|
|
7
8
|
# You can add fixtures and/or initialization code here to make experimenting
|
8
9
|
# with your gem easier. You can also use a different console, if you like.
|
9
10
|
|
10
|
-
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
-
require 'pry'
|
12
11
|
def reload!
|
13
12
|
ActiveSupport::Dependencies.clear
|
14
13
|
ActiveSupport::DescendantsTracker.clear
|
@@ -18,5 +17,5 @@ end
|
|
18
17
|
if ARGV[0]
|
19
18
|
Spidy.open(ARGV[0]).console
|
20
19
|
else
|
21
|
-
|
20
|
+
IRB.start
|
22
21
|
end
|
data/example/master_detail.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
Spidy.define do
|
4
|
-
url_to_params =
|
4
|
+
url_to_params = lambda { |url|
|
5
5
|
uri = URI.parse(url)
|
6
6
|
params = URI.decode_www_form(uri.query).to_h if uri.query.present?
|
7
7
|
params if params.present?
|
@@ -13,41 +13,41 @@ Spidy.define do
|
|
13
13
|
|
14
14
|
limit_page = 3
|
15
15
|
per_page = 25
|
16
|
-
yielder.call(Nokogiri::HTML::Builder.new
|
17
|
-
doc.html
|
18
|
-
doc.body
|
19
|
-
doc.span.bold
|
20
|
-
doc.text
|
21
|
-
|
22
|
-
doc.main
|
23
|
-
(page * per_page + 1).upto((page + 1) * per_page).each do |i|
|
16
|
+
yielder.call(Nokogiri::HTML::Builder.new do |doc|
|
17
|
+
doc.html do
|
18
|
+
doc.body do
|
19
|
+
doc.span.bold do
|
20
|
+
doc.text 'Hello world'
|
21
|
+
end
|
22
|
+
doc.main do
|
23
|
+
((page * per_page) + 1).upto((page + 1) * per_page).each do |i|
|
24
24
|
doc.a("page #{i}", href: "http://localhost/?id=#{i}")
|
25
25
|
end
|
26
|
-
|
26
|
+
end
|
27
27
|
doc.a('NEXT', href: "http://localhost/?page=#{page + 1}", class: 'next') if page < limit_page
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end.doc)
|
31
31
|
}
|
32
32
|
|
33
33
|
detail_page = proc { |url, &yielder|
|
34
34
|
params = url_to_params.call(url)
|
35
35
|
id = params['id']
|
36
36
|
|
37
|
-
yielder.call(Nokogiri::HTML::Builder.new
|
38
|
-
doc.html
|
39
|
-
doc.body
|
40
|
-
doc.span.bold
|
41
|
-
doc.text
|
42
|
-
|
37
|
+
yielder.call(Nokogiri::HTML::Builder.new do |doc|
|
38
|
+
doc.html do
|
39
|
+
doc.body do
|
40
|
+
doc.span.bold do
|
41
|
+
doc.text 'Hello world'
|
42
|
+
end
|
43
43
|
doc.h1("title_#{id}", id: 'title')
|
44
44
|
doc.main("body_#{id}", id: 'body')
|
45
45
|
doc.div.sub do
|
46
46
|
doc.span.name('testtest')
|
47
47
|
end
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end.doc)
|
51
51
|
}
|
52
52
|
|
53
53
|
define(as: :html, connector: detail_page) do
|
data/example/proxy.rb
CHANGED
data/example/retry.rb
CHANGED
data/example/wikip.rb
CHANGED
@@ -11,11 +11,8 @@ Spidy.define do
|
|
11
11
|
|
12
12
|
define(:infobox, as: :html, connector: :direct) do
|
13
13
|
let(:columns) do
|
14
|
-
html.search('tr').
|
15
|
-
{
|
16
|
-
name: tr.at('th')&.text,
|
17
|
-
value: tr.at('td')&.text
|
18
|
-
}
|
14
|
+
html.search('tr').map do |tr|
|
15
|
+
{ name: tr.at('th')&.text, value: tr.at('td')&.text }
|
19
16
|
end
|
20
17
|
end
|
21
18
|
end
|
data/exe/spidy
CHANGED
@@ -2,14 +2,13 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'spidy'
|
5
|
-
require 'pry'
|
6
5
|
|
7
6
|
if ARGV[1].blank?
|
8
7
|
case ARGV[0]
|
9
|
-
when 'version' then
|
8
|
+
when 'version' then $stdout.puts(Spidy::VERSION)
|
10
9
|
when 'console' then Spidy.shell.interactive
|
11
10
|
else
|
12
|
-
|
11
|
+
$stdout.puts 'usage: spidy [version console]'
|
13
12
|
end
|
14
13
|
else
|
15
14
|
case ARGV[0]
|
@@ -19,6 +18,6 @@ else
|
|
19
18
|
when 'each' then Spidy.shell(ARGV[1]).each(ARGV[2])
|
20
19
|
when 'eval' then Spidy.shell(ARGV[1]).eval_call(ARGV[2])
|
21
20
|
else
|
22
|
-
|
21
|
+
$stdout.puts 'usage: spidy [console function call each run] [file]'
|
23
22
|
end
|
24
23
|
end
|
data/lib/spidy/binder/html.rb
CHANGED
@@ -17,9 +17,10 @@ module Spidy::Binder::Html
|
|
17
17
|
instance_exec(&block)
|
18
18
|
end
|
19
19
|
rescue StandardError => e
|
20
|
-
|
20
|
+
raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
21
21
|
end
|
22
22
|
end
|
23
|
+
|
23
24
|
def self.extended(obj)
|
24
25
|
obj.alias_method :html, :resource
|
25
26
|
end
|
data/lib/spidy/binder/json.rb
CHANGED
@@ -17,9 +17,10 @@ module Spidy::Binder::Json
|
|
17
17
|
instance_exec(&block)
|
18
18
|
end
|
19
19
|
rescue StandardError => e
|
20
|
-
|
20
|
+
raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
21
21
|
end
|
22
22
|
end
|
23
|
+
|
23
24
|
def self.extended(obj)
|
24
25
|
obj.alias_method :json, :resource
|
25
26
|
end
|
data/lib/spidy/binder/xml.rb
CHANGED
@@ -17,9 +17,10 @@ module Spidy::Binder::Xml
|
|
17
17
|
instance_exec(&block)
|
18
18
|
end
|
19
19
|
rescue StandardError => e
|
20
|
-
|
20
|
+
raise Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
21
21
|
end
|
22
22
|
end
|
23
|
+
|
23
24
|
def self.extended(obj)
|
24
25
|
obj.alias_method :xml, :resource
|
25
26
|
end
|
data/lib/spidy/binder.rb
CHANGED
data/lib/spidy/command_line.rb
CHANGED
@@ -5,8 +5,10 @@
|
|
5
5
|
#
|
6
6
|
class Spidy::CommandLine
|
7
7
|
delegate :spidy, to: :@definition_file
|
8
|
-
class_attribute :output, default: (proc { |result|
|
9
|
-
class_attribute :error_handler, default: (proc { |e, url|
|
8
|
+
class_attribute :output, default: (proc { |result| $stdout.puts(result.to_s) })
|
9
|
+
class_attribute :error_handler, default: (proc { |e, url|
|
10
|
+
warn({ url: url, message: e.message, backtrace: e.backtrace }.to_json)
|
11
|
+
})
|
10
12
|
|
11
13
|
def eval_call(script)
|
12
14
|
@definition_file.spidy.instance_eval(script)
|
@@ -14,40 +16,36 @@ class Spidy::CommandLine
|
|
14
16
|
|
15
17
|
def initialize(definition_file)
|
16
18
|
@definition_file = definition_file
|
17
|
-
|
19
|
+
fail 'unloaded spidy' if definition_file.spidy.nil?
|
18
20
|
end
|
19
21
|
|
20
22
|
def each_stdin_lines(name)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
error_handler.call(e, url)
|
26
|
-
end
|
23
|
+
$stdin.each_line do |url|
|
24
|
+
spidy.each(url.strip, name: name, &output)
|
25
|
+
rescue StandardError => e
|
26
|
+
error_handler.call(e, url)
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
30
|
def call_stdin_lines(name)
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
error_handler.call(e, url)
|
36
|
-
end
|
31
|
+
$stdin.each_line do |url|
|
32
|
+
spidy.call(url.strip, name: name, &output)
|
33
|
+
rescue StandardError => e
|
34
|
+
error_handler.call(e, url)
|
37
35
|
end
|
38
36
|
end
|
39
37
|
|
40
38
|
def call(name)
|
41
|
-
return call_stdin_lines(name) if FileTest.pipe?(
|
42
|
-
spidy.call(name: name, &output) unless FileTest.pipe?(
|
43
|
-
rescue => e
|
39
|
+
return call_stdin_lines(name) if FileTest.pipe?($stdin)
|
40
|
+
spidy.call(name: name, &output) unless FileTest.pipe?($stdin)
|
41
|
+
rescue StandardError => e
|
44
42
|
error_handler.call(e, nil)
|
45
43
|
end
|
46
44
|
|
47
45
|
def each(name)
|
48
|
-
return each_stdin_lines(name) if FileTest.pipe?(
|
46
|
+
return each_stdin_lines(name) if FileTest.pipe?($stdin)
|
49
47
|
spidy.each(name: name, &output)
|
50
|
-
rescue => e
|
48
|
+
rescue StandardError => e
|
51
49
|
error_handler.call(e, nil)
|
52
50
|
end
|
53
51
|
|
@@ -63,36 +61,32 @@ class Spidy::CommandLine
|
|
63
61
|
end
|
64
62
|
|
65
63
|
def build(name)
|
66
|
-
|
67
|
-
|
64
|
+
File.write("#{name}.sh", build_shell_script(name))
|
65
|
+
File.write("#{name}.rb", build_ruby_script)
|
68
66
|
end
|
69
67
|
|
70
68
|
def build_shell(name)
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
SHELL
|
77
|
-
end
|
69
|
+
<<~SHELL
|
70
|
+
#!/bin/bash
|
71
|
+
eval "$(spidy $(dirname "${0}")/#{name}.rb shell)"
|
72
|
+
spider
|
73
|
+
SHELL
|
78
74
|
end
|
79
75
|
|
80
|
-
def build_ruby
|
81
|
-
|
82
|
-
|
83
|
-
# frozen_string_literal: true
|
76
|
+
def build_ruby
|
77
|
+
<<~RUBY
|
78
|
+
# frozen_string_literal: true
|
84
79
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
80
|
+
Spidy.define do
|
81
|
+
spider(as: :html) do |yielder, connector|
|
82
|
+
# connector.call(url) do |resource|
|
83
|
+
# yielder.call(url or resource)
|
84
|
+
# end
|
85
|
+
end
|
91
86
|
|
92
|
-
|
93
|
-
end
|
87
|
+
define(as: :html) do
|
94
88
|
end
|
95
|
-
|
96
|
-
|
89
|
+
end
|
90
|
+
RUBY
|
97
91
|
end
|
98
92
|
end
|
@@ -4,7 +4,7 @@
|
|
4
4
|
# Direct resource ( not network resource )
|
5
5
|
#
|
6
6
|
class Spidy::Connector::Direct
|
7
|
-
def call(resource
|
7
|
+
def call(resource)
|
8
8
|
if block_given?
|
9
9
|
yield resource
|
10
10
|
else
|
@@ -12,6 +12,5 @@ class Spidy::Connector::Direct
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
def initialize(user_agent:)
|
16
|
-
end
|
15
|
+
def initialize(user_agent:); end
|
17
16
|
end
|
data/lib/spidy/connector/html.rb
CHANGED
@@ -14,13 +14,13 @@ class Spidy::Connector::Html
|
|
14
14
|
|
15
15
|
attr_reader :agent
|
16
16
|
|
17
|
-
def call(url, encoding: nil,
|
17
|
+
def call(url, encoding: nil, &yielder)
|
18
18
|
fail 'url is not specified' if url.blank?
|
19
19
|
if encoding
|
20
20
|
agent.default_encoding = encoding
|
21
21
|
agent.force_default_encoding = true
|
22
22
|
end
|
23
|
-
connect(url,
|
23
|
+
connect(url, yielder)
|
24
24
|
end
|
25
25
|
|
26
26
|
def refresh!
|
@@ -30,17 +30,19 @@ class Spidy::Connector::Html
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
def connect(url,
|
33
|
+
def connect(url, yielder)
|
34
34
|
result = nil
|
35
35
|
agent.get(url) do |page|
|
36
|
-
|
36
|
+
if page.title == 'Sorry, unable to access page...'
|
37
|
+
fail Spidy::Connector::Retry.new(object: page, response_code: page.try(:response_code))
|
38
|
+
end
|
37
39
|
|
38
40
|
result = yielder.call(page)
|
39
41
|
end
|
40
42
|
result
|
41
43
|
rescue Mechanize::ResponseCodeError => e
|
42
|
-
raise Spidy::Connector::Retry
|
43
|
-
raise Spidy::Connector::Retry
|
44
|
-
raise Spidy::Connector::Retry
|
44
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code)) if e.response_code == '429'
|
45
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code)) if e.response_code == '502'
|
46
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.try(:response_code))
|
45
47
|
end
|
46
48
|
end
|
data/lib/spidy/connector/json.rb
CHANGED
@@ -17,9 +17,9 @@ class Spidy::Connector::Json
|
|
17
17
|
connect(url, &block)
|
18
18
|
end
|
19
19
|
|
20
|
-
def connect(url
|
21
|
-
OpenURI.open_uri(url,
|
20
|
+
def connect(url)
|
21
|
+
OpenURI.open_uri(url, 'User-Agent' => @user_agent) { |body| yield JSON.parse(body.read, symbolize_names: true) }
|
22
22
|
rescue OpenURI::HTTPError => e
|
23
|
-
raise Spidy::Connector::Retry
|
23
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.io.status[0])
|
24
24
|
end
|
25
25
|
end
|
data/lib/spidy/connector/xml.rb
CHANGED
@@ -13,11 +13,11 @@ class Spidy::Connector::Xml
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def connect(url, &block)
|
16
|
-
OpenURI.open_uri(url,
|
16
|
+
OpenURI.open_uri(url, 'User-Agent' => @user_agent) do |body|
|
17
17
|
block.call Nokogiri::XML(body.read.gsub(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F]/, ''), url)
|
18
18
|
end
|
19
19
|
rescue OpenURI::HTTPError => e
|
20
|
-
raise Spidy::Connector::Retry
|
20
|
+
raise Spidy::Connector::Retry.new(error: e, response_code: e.io.status[0])
|
21
21
|
end
|
22
22
|
|
23
23
|
def initialize(user_agent:)
|
data/lib/spidy/connector.rb
CHANGED
@@ -27,7 +27,7 @@ module Spidy::Connector
|
|
27
27
|
#
|
28
28
|
# error output logger
|
29
29
|
#
|
30
|
-
DEFAULT_LOGGER = proc { |values|
|
30
|
+
DEFAULT_LOGGER = proc { |values| warn(values.to_json) }
|
31
31
|
|
32
32
|
#
|
33
33
|
# static method
|
@@ -36,7 +36,9 @@ module Spidy::Connector
|
|
36
36
|
extend ActiveSupport::Concern
|
37
37
|
class_methods do
|
38
38
|
def call(url, wait_time: 5, logger: Spidy::Connector::DEFAULT_LOGGER, user_agent: Spidy::Connector::USER_AGENT, &block)
|
39
|
-
::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(
|
39
|
+
::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(
|
40
|
+
url, &block
|
41
|
+
)
|
40
42
|
end
|
41
43
|
end
|
42
44
|
end
|
@@ -51,6 +53,7 @@ module Spidy::Connector
|
|
51
53
|
@object = object
|
52
54
|
@response_code = response_code
|
53
55
|
@error = error
|
56
|
+
super(error)
|
54
57
|
end
|
55
58
|
end
|
56
59
|
|
@@ -58,13 +61,13 @@ module Spidy::Connector
|
|
58
61
|
# retry
|
59
62
|
#
|
60
63
|
class RetryableCaller
|
61
|
-
attr_reader :origin_connector
|
64
|
+
attr_reader :origin_connector, :logger, :wait_time
|
62
65
|
|
63
|
-
def initialize(connector, logger:, wait_time:)
|
66
|
+
def initialize(connector, logger:, wait_time:, retry_attempt_count: 5)
|
64
67
|
@origin_connector = connector
|
65
68
|
@logger = logger
|
66
69
|
@wait_time = wait_time
|
67
|
-
@retry_attempt_count =
|
70
|
+
@retry_attempt_count = retry_attempt_count
|
68
71
|
end
|
69
72
|
|
70
73
|
def call(url, &block)
|
@@ -73,18 +76,18 @@ module Spidy::Connector
|
|
73
76
|
end
|
74
77
|
|
75
78
|
def connect(url, retry_attempt_count: @retry_attempt_count, &block)
|
76
|
-
|
77
|
-
|
79
|
+
logger.call('connnector.get': url, 'connnector.accessed': Time.current)
|
80
|
+
origin_connector.call(url, &block)
|
78
81
|
rescue Spidy::Connector::Retry => e
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
82
|
+
logger.call('retry.accessed': Time.current,
|
83
|
+
'retry.uri': url,
|
84
|
+
'retry.response_code': e.response_code,
|
85
|
+
'retry.attempt_count': retry_attempt_count)
|
83
86
|
|
84
87
|
retry_attempt_count -= 1
|
85
88
|
if retry_attempt_count.positive?
|
86
|
-
sleep
|
87
|
-
|
89
|
+
sleep wait_time
|
90
|
+
origin_connector.refresh! if origin_connector.respond_to?(:refresh!)
|
88
91
|
retry
|
89
92
|
end
|
90
93
|
raise e.error
|
@@ -103,7 +106,7 @@ module Spidy::Connector
|
|
103
106
|
end
|
104
107
|
|
105
108
|
def call(url, &block)
|
106
|
-
Socksify
|
109
|
+
Socksify.proxy(socks_proxy[:host], socks_proxy[:port]) do
|
107
110
|
connector.call(url, &block)
|
108
111
|
end
|
109
112
|
end
|
@@ -141,7 +144,6 @@ module Spidy::Connector
|
|
141
144
|
fail "Not defined connnector[#{value}]" if connector.nil?
|
142
145
|
return connector if socks_proxy.nil?
|
143
146
|
|
144
|
-
|
145
|
-
tor
|
147
|
+
TorConnector.new(connector, socks_proxy)
|
146
148
|
end
|
147
149
|
end
|
data/lib/spidy/definition.rb
CHANGED
@@ -33,26 +33,34 @@ module Spidy::Definition
|
|
33
33
|
spidy = @namespace[:"#{name}_spider"]
|
34
34
|
fail "undefined spidy [#{name}]" if spidy.nil?
|
35
35
|
|
36
|
-
|
36
|
+
if yielder
|
37
|
+
spidy.call(source, &yielder)
|
38
|
+
else
|
39
|
+
Enumerator.new do |enumerate_yielder|
|
40
|
+
spidy.call(source, &enumerate_yielder)
|
41
|
+
end
|
42
|
+
end
|
37
43
|
end
|
38
44
|
|
39
45
|
def spider(name = :default, connector: nil, as: nil, &define_block)
|
40
46
|
@namespace ||= {}
|
41
|
-
connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
|
47
|
+
connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
|
48
|
+
socks_proxy: @socks_proxy)
|
42
49
|
@namespace[:"#{name}_spider"] = proc do |source, &yielder|
|
43
50
|
define_block.call(yielder, connector, source)
|
44
51
|
end
|
45
52
|
end
|
46
53
|
|
47
54
|
def define(name = :default, connector: nil, as: nil, &define_block)
|
48
|
-
connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
|
55
|
+
connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent,
|
56
|
+
socks_proxy: @socks_proxy)
|
49
57
|
binder_base = Spidy::Binder.const_get(as.to_s.classify)
|
50
58
|
@namespace ||= {}
|
51
|
-
@namespace[:"#{name}_scraper"] = Class.new(Spidy::
|
59
|
+
@namespace[:"#{name}_scraper"] = Class.new(Spidy::DefinitionObject) do
|
52
60
|
extend binder_base
|
53
61
|
class_eval(&define_block)
|
54
62
|
define_singleton_method(:call) do |source, &yielder|
|
55
|
-
yielder =
|
63
|
+
yielder = ->(result) { break result } if yielder.nil?
|
56
64
|
connection_yielder = lambda do |page|
|
57
65
|
yielder.call(new(page, source))
|
58
66
|
end
|
@@ -4,8 +4,7 @@
|
|
4
4
|
# spidy interface binding
|
5
5
|
#
|
6
6
|
class Spidy::DefinitionFile
|
7
|
-
attr_reader :path
|
8
|
-
attr_reader :spidy
|
7
|
+
attr_reader :path, :spidy
|
9
8
|
|
10
9
|
def self.open(filepath)
|
11
10
|
object = new(filepath)
|
@@ -15,7 +14,7 @@ class Spidy::DefinitionFile
|
|
15
14
|
|
16
15
|
# rubocop:disable Security/Eval
|
17
16
|
def eval_definition
|
18
|
-
@spidy = eval(File.
|
17
|
+
@spidy = eval(File.read(path)) if path
|
19
18
|
end
|
20
19
|
# rubocop:enable Security/Eval
|
21
20
|
|
@@ -1,4 +1,9 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# An object that represents the scraper defined by define block.
|
5
|
+
#
|
6
|
+
class Spidy::DefinitionObject
|
2
7
|
class << self
|
3
8
|
attr_reader :attribute_names
|
4
9
|
end
|
@@ -14,6 +19,6 @@ class Spidy::DefineObject
|
|
14
19
|
end
|
15
20
|
|
16
21
|
def to_h
|
17
|
-
self.class.attribute_names.
|
22
|
+
self.class.attribute_names.to_h { |name| [name, send(name)] }
|
18
23
|
end
|
19
24
|
end
|
data/lib/spidy/shell.rb
CHANGED
@@ -9,7 +9,12 @@ class Spidy::Shell
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def interactive
|
12
|
-
|
12
|
+
console = Spidy::Console.new(@definition_file)
|
13
|
+
require 'irb'
|
14
|
+
IRB.setup(nil)
|
15
|
+
irb = IRB::Irb.new(IRB::WorkSpace.new(console))
|
16
|
+
IRB.conf[:MAIN_CONTEXT] = irb.context
|
17
|
+
irb.eval_input
|
13
18
|
end
|
14
19
|
|
15
20
|
def command_line
|
data/lib/spidy/version.rb
CHANGED
data/lib/spidy.rb
CHANGED
data/spidy.gemspec
CHANGED
@@ -25,17 +25,18 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.require_paths = ['lib']
|
26
26
|
|
27
27
|
spec.add_development_dependency 'bundler', '~> 2.0'
|
28
|
-
spec.add_development_dependency '
|
28
|
+
spec.add_development_dependency 'capybara_discoball'
|
29
|
+
spec.add_development_dependency 'ffaker'
|
29
30
|
spec.add_development_dependency 'rake', '~> 13.0'
|
30
31
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
31
|
-
spec.add_development_dependency 'ffaker'
|
32
32
|
spec.add_development_dependency 'rspec-command'
|
33
|
-
spec.add_development_dependency 'capybara_discoball'
|
34
33
|
spec.add_development_dependency 'sinatra'
|
35
34
|
|
36
|
-
spec.add_runtime_dependency '
|
37
|
-
spec.add_runtime_dependency 'activesupport'
|
35
|
+
spec.add_runtime_dependency 'activesupport', '~> 7.1'
|
38
36
|
spec.add_runtime_dependency 'mechanize'
|
39
37
|
spec.add_runtime_dependency 'socksify'
|
40
|
-
spec.add_runtime_dependency '
|
38
|
+
spec.add_runtime_dependency 'tor'
|
39
|
+
spec.metadata = {
|
40
|
+
'rubygems_mfa_required' => 'true'
|
41
|
+
}
|
41
42
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
|
-
autorequire:
|
9
8
|
bindir: exe
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-03-19 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: bundler
|
@@ -25,7 +24,7 @@ dependencies:
|
|
25
24
|
- !ruby/object:Gem::Version
|
26
25
|
version: '2.0'
|
27
26
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
27
|
+
name: capybara_discoball
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
30
29
|
requirements:
|
31
30
|
- - ">="
|
@@ -38,34 +37,6 @@ dependencies:
|
|
38
37
|
- - ">="
|
39
38
|
- !ruby/object:Gem::Version
|
40
39
|
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '13.0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '13.0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rspec
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '3.0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '3.0'
|
69
40
|
- !ruby/object:Gem::Dependency
|
70
41
|
name: ffaker
|
71
42
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,35 +52,35 @@ dependencies:
|
|
81
52
|
- !ruby/object:Gem::Version
|
82
53
|
version: '0'
|
83
54
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
55
|
+
name: rake
|
85
56
|
requirement: !ruby/object:Gem::Requirement
|
86
57
|
requirements:
|
87
|
-
- - "
|
58
|
+
- - "~>"
|
88
59
|
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
60
|
+
version: '13.0'
|
90
61
|
type: :development
|
91
62
|
prerelease: false
|
92
63
|
version_requirements: !ruby/object:Gem::Requirement
|
93
64
|
requirements:
|
94
|
-
- - "
|
65
|
+
- - "~>"
|
95
66
|
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
67
|
+
version: '13.0'
|
97
68
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
69
|
+
name: rspec
|
99
70
|
requirement: !ruby/object:Gem::Requirement
|
100
71
|
requirements:
|
101
|
-
- - "
|
72
|
+
- - "~>"
|
102
73
|
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
74
|
+
version: '3.0'
|
104
75
|
type: :development
|
105
76
|
prerelease: false
|
106
77
|
version_requirements: !ruby/object:Gem::Requirement
|
107
78
|
requirements:
|
108
|
-
- - "
|
79
|
+
- - "~>"
|
109
80
|
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
81
|
+
version: '3.0'
|
111
82
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
83
|
+
name: rspec-command
|
113
84
|
requirement: !ruby/object:Gem::Requirement
|
114
85
|
requirements:
|
115
86
|
- - ">="
|
@@ -123,13 +94,13 @@ dependencies:
|
|
123
94
|
- !ruby/object:Gem::Version
|
124
95
|
version: '0'
|
125
96
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
97
|
+
name: sinatra
|
127
98
|
requirement: !ruby/object:Gem::Requirement
|
128
99
|
requirements:
|
129
100
|
- - ">="
|
130
101
|
- !ruby/object:Gem::Version
|
131
102
|
version: '0'
|
132
|
-
type: :
|
103
|
+
type: :development
|
133
104
|
prerelease: false
|
134
105
|
version_requirements: !ruby/object:Gem::Requirement
|
135
106
|
requirements:
|
@@ -140,16 +111,16 @@ dependencies:
|
|
140
111
|
name: activesupport
|
141
112
|
requirement: !ruby/object:Gem::Requirement
|
142
113
|
requirements:
|
143
|
-
- - "
|
114
|
+
- - "~>"
|
144
115
|
- !ruby/object:Gem::Version
|
145
|
-
version: '
|
116
|
+
version: '7.1'
|
146
117
|
type: :runtime
|
147
118
|
prerelease: false
|
148
119
|
version_requirements: !ruby/object:Gem::Requirement
|
149
120
|
requirements:
|
150
|
-
- - "
|
121
|
+
- - "~>"
|
151
122
|
- !ruby/object:Gem::Version
|
152
|
-
version: '
|
123
|
+
version: '7.1'
|
153
124
|
- !ruby/object:Gem::Dependency
|
154
125
|
name: mechanize
|
155
126
|
requirement: !ruby/object:Gem::Requirement
|
@@ -179,7 +150,7 @@ dependencies:
|
|
179
150
|
- !ruby/object:Gem::Version
|
180
151
|
version: '0'
|
181
152
|
- !ruby/object:Gem::Dependency
|
182
|
-
name:
|
153
|
+
name: tor
|
183
154
|
requirement: !ruby/object:Gem::Requirement
|
184
155
|
requirements:
|
185
156
|
- - ">="
|
@@ -192,7 +163,6 @@ dependencies:
|
|
192
163
|
- - ">="
|
193
164
|
- !ruby/object:Gem::Version
|
194
165
|
version: '0'
|
195
|
-
description:
|
196
166
|
email:
|
197
167
|
- aileron.cc@gmail.com
|
198
168
|
executables:
|
@@ -207,6 +177,7 @@ files:
|
|
207
177
|
- ".ruby-version"
|
208
178
|
- ".travis.yml"
|
209
179
|
- CHANGELOG.md
|
180
|
+
- CLAUDE.md
|
210
181
|
- CODE_OF_CONDUCT.md
|
211
182
|
- Gemfile
|
212
183
|
- Gemfile.lock
|
@@ -222,6 +193,7 @@ files:
|
|
222
193
|
- exe/spidy
|
223
194
|
- lib/spidy.rb
|
224
195
|
- lib/spidy/binder.rb
|
196
|
+
- lib/spidy/binder/error.rb
|
225
197
|
- lib/spidy/binder/html.rb
|
226
198
|
- lib/spidy/binder/json.rb
|
227
199
|
- lib/spidy/binder/xml.rb
|
@@ -232,9 +204,9 @@ files:
|
|
232
204
|
- lib/spidy/connector/json.rb
|
233
205
|
- lib/spidy/connector/xml.rb
|
234
206
|
- lib/spidy/console.rb
|
235
|
-
- lib/spidy/define_object.rb
|
236
207
|
- lib/spidy/definition.rb
|
237
208
|
- lib/spidy/definition_file.rb
|
209
|
+
- lib/spidy/definition_object.rb
|
238
210
|
- lib/spidy/shell.rb
|
239
211
|
- lib/spidy/spider.rb
|
240
212
|
- lib/spidy/version.rb
|
@@ -243,8 +215,8 @@ files:
|
|
243
215
|
homepage: https://github.com/aileron-inc/spidy
|
244
216
|
licenses:
|
245
217
|
- MIT
|
246
|
-
metadata:
|
247
|
-
|
218
|
+
metadata:
|
219
|
+
rubygems_mfa_required: 'true'
|
248
220
|
rdoc_options: []
|
249
221
|
require_paths:
|
250
222
|
- lib
|
@@ -259,8 +231,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
259
231
|
- !ruby/object:Gem::Version
|
260
232
|
version: '0'
|
261
233
|
requirements: []
|
262
|
-
rubygems_version: 3.
|
263
|
-
signing_key:
|
234
|
+
rubygems_version: 3.6.5
|
264
235
|
specification_version: 4
|
265
236
|
summary: web spider dsl
|
266
237
|
test_files: []
|