spidy 0.3.8 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +57 -56
- data/exe/spidy +2 -1
- data/lib/spidy/binder/html.rb +5 -4
- data/lib/spidy/binder/json.rb +5 -4
- data/lib/spidy/binder/xml.rb +5 -4
- data/lib/spidy/binder.rb +0 -44
- data/lib/spidy/command_line.rb +4 -0
- data/lib/spidy/define_object.rb +19 -0
- data/lib/spidy/definition.rb +12 -14
- data/lib/spidy/shell.rb +1 -1
- data/lib/spidy/version.rb +1 -1
- data/lib/spidy.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75215453c834a8e481b27cf4377235cc97ce6a6e4eff142a11743e68ee4982b4
|
4
|
+
data.tar.gz: 25f3b14ad6f31b580396458c8075167f24fd4a2f6e7ff98947338ecd0588eb9d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 447b7152b807c7985e16b7b403d27f9f7b949264577e8e4dc11a52358cb9af49510696d29166adb60bb5b87158aa8d2c10faf7c810f8eced4c29f9eed8bb493a
|
7
|
+
data.tar.gz: 55d82e5c495a7e5a0fd57b466e08ea072e3a712829b2d83d550b107147f30e3eaff54ed8dcec69163bf6f74e0c32990051af9de0d044cc19eadf39ecc749d003
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.2
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
spidy (0.3.
|
4
|
+
spidy (0.3.9)
|
5
5
|
activesupport
|
6
6
|
mechanize
|
7
7
|
pry
|
@@ -11,99 +11,101 @@ PATH
|
|
11
11
|
GEM
|
12
12
|
remote: https://rubygems.org/
|
13
13
|
specs:
|
14
|
-
activesupport (
|
14
|
+
activesupport (7.0.0)
|
15
15
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
16
16
|
i18n (>= 1.6, < 2)
|
17
17
|
minitest (>= 5.1)
|
18
18
|
tzinfo (~> 2.0)
|
19
|
-
|
20
|
-
addressable (2.7.0)
|
19
|
+
addressable (2.8.0)
|
21
20
|
public_suffix (>= 2.0.2, < 5.0)
|
22
|
-
capybara (3.
|
21
|
+
capybara (3.36.0)
|
23
22
|
addressable
|
23
|
+
matrix
|
24
24
|
mini_mime (>= 0.1.3)
|
25
25
|
nokogiri (~> 1.8)
|
26
26
|
rack (>= 1.6.0)
|
27
27
|
rack-test (>= 0.6.3)
|
28
|
-
regexp_parser (
|
28
|
+
regexp_parser (>= 1.5, < 3.0)
|
29
29
|
xpath (~> 3.2)
|
30
30
|
capybara_discoball (0.1.0)
|
31
31
|
capybara (>= 2.7, < 4)
|
32
|
-
coderay (1.1.
|
33
|
-
concurrent-ruby (1.1.
|
34
|
-
connection_pool (2.2.
|
35
|
-
diff-lcs (1.
|
32
|
+
coderay (1.1.3)
|
33
|
+
concurrent-ruby (1.1.9)
|
34
|
+
connection_pool (2.2.5)
|
35
|
+
diff-lcs (1.4.4)
|
36
36
|
domain_name (0.5.20190701)
|
37
37
|
unf (>= 0.0.5, < 1.0.0)
|
38
|
-
ffaker (2.
|
39
|
-
http-cookie (1.0.
|
38
|
+
ffaker (2.20.0)
|
39
|
+
http-cookie (1.0.4)
|
40
40
|
domain_name (~> 0.5)
|
41
|
-
i18n (1.8.
|
41
|
+
i18n (1.8.11)
|
42
42
|
concurrent-ruby (~> 1.0)
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
43
|
+
matrix (0.4.2)
|
44
|
+
mechanize (2.8.3)
|
45
|
+
addressable (~> 2.8)
|
46
|
+
domain_name (~> 0.5, >= 0.5.20190701)
|
47
|
+
http-cookie (~> 1.0, >= 1.0.3)
|
48
|
+
mime-types (~> 3.0)
|
49
|
+
net-http-digest_auth (~> 1.4, >= 1.4.1)
|
50
|
+
net-http-persistent (>= 2.5.2, < 5.0.dev)
|
51
|
+
nokogiri (~> 1.11, >= 1.11.2)
|
52
|
+
rubyntlm (~> 0.6, >= 0.6.3)
|
51
53
|
webrick (~> 1.7)
|
52
|
-
webrobots (
|
53
|
-
method_source (0.
|
54
|
-
mime-types (3.
|
54
|
+
webrobots (~> 0.1.2)
|
55
|
+
method_source (1.0.0)
|
56
|
+
mime-types (3.4.1)
|
55
57
|
mime-types-data (~> 3.2015)
|
56
|
-
mime-types-data (3.2021.
|
57
|
-
mini_mime (1.
|
58
|
-
mini_portile2 (2.
|
59
|
-
minitest (5.
|
58
|
+
mime-types-data (3.2021.1115)
|
59
|
+
mini_mime (1.1.2)
|
60
|
+
mini_portile2 (2.6.1)
|
61
|
+
minitest (5.15.0)
|
60
62
|
mixlib-shellout (2.4.4)
|
61
63
|
mustermann (1.1.1)
|
62
64
|
ruby2_keywords (~> 0.0.1)
|
63
65
|
net-http-digest_auth (1.4.1)
|
64
66
|
net-http-persistent (4.0.1)
|
65
67
|
connection_pool (~> 2.2)
|
66
|
-
nokogiri (1.
|
67
|
-
mini_portile2 (~> 2.
|
68
|
+
nokogiri (1.12.5)
|
69
|
+
mini_portile2 (~> 2.6.1)
|
68
70
|
racc (~> 1.4)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
method_source (~> 0.9.0)
|
71
|
+
pry (0.14.1)
|
72
|
+
coderay (~> 1.1)
|
73
|
+
method_source (~> 1.0)
|
73
74
|
public_suffix (4.0.6)
|
74
|
-
racc (1.
|
75
|
+
racc (1.6.0)
|
75
76
|
rack (2.2.3)
|
76
|
-
rack-protection (2.0
|
77
|
+
rack-protection (2.1.0)
|
77
78
|
rack
|
78
79
|
rack-test (1.1.0)
|
79
80
|
rack (>= 1.0, < 3)
|
80
|
-
rake (13.0.
|
81
|
-
regexp_parser (
|
82
|
-
rspec (3.
|
83
|
-
rspec-core (~> 3.
|
84
|
-
rspec-expectations (~> 3.
|
85
|
-
rspec-mocks (~> 3.
|
81
|
+
rake (13.0.6)
|
82
|
+
regexp_parser (2.2.0)
|
83
|
+
rspec (3.10.0)
|
84
|
+
rspec-core (~> 3.10.0)
|
85
|
+
rspec-expectations (~> 3.10.0)
|
86
|
+
rspec-mocks (~> 3.10.0)
|
86
87
|
rspec-command (1.0.3)
|
87
88
|
mixlib-shellout (~> 2.0)
|
88
89
|
rspec (~> 3.2)
|
89
90
|
rspec-its (~> 1.2)
|
90
|
-
rspec-core (3.
|
91
|
-
rspec-support (~> 3.
|
92
|
-
rspec-expectations (3.
|
91
|
+
rspec-core (3.10.1)
|
92
|
+
rspec-support (~> 3.10.0)
|
93
|
+
rspec-expectations (3.10.1)
|
93
94
|
diff-lcs (>= 1.2.0, < 2.0)
|
94
|
-
rspec-support (~> 3.
|
95
|
+
rspec-support (~> 3.10.0)
|
95
96
|
rspec-its (1.3.0)
|
96
97
|
rspec-core (>= 3.0.0)
|
97
98
|
rspec-expectations (>= 3.0.0)
|
98
|
-
rspec-mocks (3.
|
99
|
+
rspec-mocks (3.10.2)
|
99
100
|
diff-lcs (>= 1.2.0, < 2.0)
|
100
|
-
rspec-support (~> 3.
|
101
|
-
rspec-support (3.
|
102
|
-
ruby2_keywords (0.0.
|
103
|
-
|
101
|
+
rspec-support (~> 3.10.0)
|
102
|
+
rspec-support (3.10.3)
|
103
|
+
ruby2_keywords (0.0.5)
|
104
|
+
rubyntlm (0.6.3)
|
105
|
+
sinatra (2.1.0)
|
104
106
|
mustermann (~> 1.0)
|
105
|
-
rack (~> 2.
|
106
|
-
rack-protection (= 2.0
|
107
|
+
rack (~> 2.2)
|
108
|
+
rack-protection (= 2.1.0)
|
107
109
|
tilt (~> 2.0)
|
108
110
|
socksify (1.7.1)
|
109
111
|
tilt (2.0.10)
|
@@ -112,12 +114,11 @@ GEM
|
|
112
114
|
concurrent-ruby (~> 1.0)
|
113
115
|
unf (0.1.4)
|
114
116
|
unf_ext
|
115
|
-
unf_ext (0.0.
|
117
|
+
unf_ext (0.0.8)
|
116
118
|
webrick (1.7.0)
|
117
119
|
webrobots (0.1.2)
|
118
120
|
xpath (3.2.0)
|
119
121
|
nokogiri (~> 1.8)
|
120
|
-
zeitwerk (2.4.2)
|
121
122
|
|
122
123
|
PLATFORMS
|
123
124
|
ruby
|
@@ -134,4 +135,4 @@ DEPENDENCIES
|
|
134
135
|
spidy!
|
135
136
|
|
136
137
|
BUNDLED WITH
|
137
|
-
2.2.
|
138
|
+
2.2.22
|
data/exe/spidy
CHANGED
@@ -17,7 +17,8 @@ else
|
|
17
17
|
when 'function' then Spidy.shell(ARGV[1]).function
|
18
18
|
when 'call' then Spidy.shell(ARGV[1]).call(ARGV[2])
|
19
19
|
when 'each' then Spidy.shell(ARGV[1]).each(ARGV[2])
|
20
|
+
when 'eval' then Spidy.shell(ARGV[1]).eval_call(ARGV[2])
|
20
21
|
else
|
21
|
-
STDOUT.puts 'usage: spidy [console function call each] [file]'
|
22
|
+
STDOUT.puts 'usage: spidy [console function call each run] [file]'
|
22
23
|
end
|
23
24
|
end
|
data/lib/spidy/binder/html.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
#
|
4
4
|
# Bind html and convert to object
|
5
5
|
#
|
6
|
-
|
7
|
-
def
|
6
|
+
module Spidy::Binder::Html
|
7
|
+
def let(name, query = nil, &block)
|
8
8
|
@attribute_names ||= []
|
9
9
|
@attribute_names << name
|
10
10
|
|
@@ -20,6 +20,7 @@ class Spidy::Binder::Html < Spidy::Binder::Base
|
|
20
20
|
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
21
21
|
end
|
22
22
|
end
|
23
|
-
|
24
|
-
|
23
|
+
def self.extended(obj)
|
24
|
+
obj.alias_method :html, :resource
|
25
|
+
end
|
25
26
|
end
|
data/lib/spidy/binder/json.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
#
|
4
4
|
# Bind json and convert to object
|
5
5
|
#
|
6
|
-
|
7
|
-
def
|
6
|
+
module Spidy::Binder::Json
|
7
|
+
def let(name, *query, &block)
|
8
8
|
@attribute_names ||= []
|
9
9
|
@attribute_names << name
|
10
10
|
|
@@ -20,6 +20,7 @@ class Spidy::Binder::Json < Spidy::Binder::Base
|
|
20
20
|
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
21
21
|
end
|
22
22
|
end
|
23
|
-
|
24
|
-
|
23
|
+
def self.extended(obj)
|
24
|
+
obj.alias_method :json, :resource
|
25
|
+
end
|
25
26
|
end
|
data/lib/spidy/binder/xml.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
#
|
4
4
|
# Bind xml and convert to object
|
5
5
|
#
|
6
|
-
|
7
|
-
def
|
6
|
+
module Spidy::Binder::Xml
|
7
|
+
def let(name, query = nil, &block)
|
8
8
|
@attribute_names ||= []
|
9
9
|
@attribute_names << name
|
10
10
|
|
@@ -20,6 +20,7 @@ class Spidy::Binder::Xml < Spidy::Binder::Base
|
|
20
20
|
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
21
21
|
end
|
22
22
|
end
|
23
|
-
|
24
|
-
|
23
|
+
def self.extended(obj)
|
24
|
+
obj.alias_method :xml, :resource
|
25
|
+
end
|
25
26
|
end
|
data/lib/spidy/binder.rb
CHANGED
@@ -8,48 +8,4 @@ module Spidy::Binder
|
|
8
8
|
autoload :Json
|
9
9
|
autoload :Html
|
10
10
|
autoload :Xml
|
11
|
-
|
12
|
-
class Error < StandardError
|
13
|
-
end
|
14
|
-
|
15
|
-
class Caller
|
16
|
-
def initialize(spidy, binder)
|
17
|
-
@spidy = spidy
|
18
|
-
@binder = binder
|
19
|
-
end
|
20
|
-
|
21
|
-
def call(source, url: nil, define: nil, define_name: nil)
|
22
|
-
yield Class.new(@binder, &define).new(define_name, @spidy, source, url)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
class Base
|
27
|
-
class << self
|
28
|
-
attr_reader :attribute_names
|
29
|
-
end
|
30
|
-
|
31
|
-
attr_reader :resource, :url
|
32
|
-
|
33
|
-
def initialize(define_name, spidy, resource, url)
|
34
|
-
@define_name = define_name
|
35
|
-
@spidy = spidy
|
36
|
-
@resource = resource
|
37
|
-
@url = url
|
38
|
-
end
|
39
|
-
|
40
|
-
def to_s
|
41
|
-
to_h.to_json
|
42
|
-
end
|
43
|
-
|
44
|
-
def to_h
|
45
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
|
50
|
-
def self.get(spidy, value)
|
51
|
-
return Caller.new(spidy, const_get(value.to_s.classify)) if name.is_a?(String) || name.is_a?(Symbol)
|
52
|
-
|
53
|
-
value
|
54
|
-
end
|
55
11
|
end
|
data/lib/spidy/command_line.rb
CHANGED
@@ -8,6 +8,10 @@ class Spidy::CommandLine
|
|
8
8
|
class_attribute :output, default: (proc { |result| STDOUT.puts(result.to_s) })
|
9
9
|
class_attribute :error_handler, default: (proc { |e, url| STDERR.puts({ url: url, message: e.message, backtrace: e.backtrace }.to_json) })
|
10
10
|
|
11
|
+
def eval_call(script)
|
12
|
+
@definition_file.spidy.instance_eval(script)
|
13
|
+
end
|
14
|
+
|
11
15
|
def initialize(definition_file)
|
12
16
|
@definition_file = definition_file
|
13
17
|
raise 'unloaded spidy' if definition_file.spidy.nil?
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class Spidy::DefineObject
|
2
|
+
class << self
|
3
|
+
attr_reader :attribute_names
|
4
|
+
end
|
5
|
+
attr_reader :resource, :url
|
6
|
+
|
7
|
+
def initialize(resource, url)
|
8
|
+
@resource = resource
|
9
|
+
@url = url
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_s
|
13
|
+
to_h.to_json
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_h
|
17
|
+
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
18
|
+
end
|
19
|
+
end
|
data/lib/spidy/definition.rb
CHANGED
@@ -44,22 +44,20 @@ module Spidy::Definition
|
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
|
-
def define(name = :default, connector: nil,
|
48
|
-
@namespace ||= {}
|
47
|
+
def define(name = :default, connector: nil, as: nil, &define_block)
|
49
48
|
connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent, socks_proxy: @socks_proxy)
|
50
|
-
|
51
|
-
@namespace
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
49
|
+
binder_base = Spidy::Binder.const_get(as.to_s.classify)
|
50
|
+
@namespace ||= {}
|
51
|
+
@namespace[:"#{name}_scraper"] = Class.new(Spidy::DefineObject) do
|
52
|
+
extend binder_base
|
53
|
+
class_eval(&define_block)
|
54
|
+
define_singleton_method(:call) do |source, &yielder|
|
55
|
+
yielder = lambda { |result| break result } if yielder.nil?
|
56
|
+
connection_yielder = lambda do |page|
|
57
|
+
yielder.call(new(page, source))
|
58
|
+
end
|
59
|
+
connector.call(source, &connection_yielder)
|
61
60
|
end
|
62
|
-
connector.call(source, &connection_yielder)
|
63
61
|
end
|
64
62
|
end
|
65
63
|
end
|
data/lib/spidy/shell.rb
CHANGED
data/lib/spidy/version.rb
CHANGED
data/lib/spidy.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-12-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -232,6 +232,7 @@ files:
|
|
232
232
|
- lib/spidy/connector/json.rb
|
233
233
|
- lib/spidy/connector/xml.rb
|
234
234
|
- lib/spidy/console.rb
|
235
|
+
- lib/spidy/define_object.rb
|
235
236
|
- lib/spidy/definition.rb
|
236
237
|
- lib/spidy/definition_file.rb
|
237
238
|
- lib/spidy/shell.rb
|
@@ -258,7 +259,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
258
259
|
- !ruby/object:Gem::Version
|
259
260
|
version: '0'
|
260
261
|
requirements: []
|
261
|
-
rubygems_version: 3.2.
|
262
|
+
rubygems_version: 3.2.22
|
262
263
|
signing_key:
|
263
264
|
specification_version: 4
|
264
265
|
summary: web spider dsl
|