spidy 0.3.8 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 666e9b2dcad792e940aa9e6b6cb99eaf759b9e6af1b187519cbdb8bd573a2861
4
- data.tar.gz: 8477c2617d04470030ff95673033382996f2988d5f95ebacb5f94272309ad851
3
+ metadata.gz: 75215453c834a8e481b27cf4377235cc97ce6a6e4eff142a11743e68ee4982b4
4
+ data.tar.gz: 25f3b14ad6f31b580396458c8075167f24fd4a2f6e7ff98947338ecd0588eb9d
5
5
  SHA512:
6
- metadata.gz: 422486fe8f5e0207e3f55832051e04e22dd07cbbc6e1efa12a6db57bbb0103244f2e41516018da0597bdd785a7d3a8171d4c9bb32d3a503c358502a6f3133d44
7
- data.tar.gz: a841cc76004f5a020c4c3c343504be1448fbbcebd90fc1a06c3d34a9657acbe1d39a5c5c5be9b08433a34be4875675e62250578d2bb8d2b7e68a20150c2e3506
6
+ metadata.gz: 447b7152b807c7985e16b7b403d27f9f7b949264577e8e4dc11a52358cb9af49510696d29166adb60bb5b87158aa8d2c10faf7c810f8eced4c29f9eed8bb493a
7
+ data.tar.gz: 55d82e5c495a7e5a0fd57b466e08ea072e3a712829b2d83d550b107147f30e3eaff54ed8dcec69163bf6f74e0c32990051af9de0d044cc19eadf39ecc749d003
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.6
1
+ 3.0.2
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- spidy (0.3.7)
4
+ spidy (0.3.9)
5
5
  activesupport
6
6
  mechanize
7
7
  pry
@@ -11,99 +11,101 @@ PATH
11
11
  GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
- activesupport (6.1.3)
14
+ activesupport (7.0.0)
15
15
  concurrent-ruby (~> 1.0, >= 1.0.2)
16
16
  i18n (>= 1.6, < 2)
17
17
  minitest (>= 5.1)
18
18
  tzinfo (~> 2.0)
19
- zeitwerk (~> 2.3)
20
- addressable (2.7.0)
19
+ addressable (2.8.0)
21
20
  public_suffix (>= 2.0.2, < 5.0)
22
- capybara (3.33.0)
21
+ capybara (3.36.0)
23
22
  addressable
23
+ matrix
24
24
  mini_mime (>= 0.1.3)
25
25
  nokogiri (~> 1.8)
26
26
  rack (>= 1.6.0)
27
27
  rack-test (>= 0.6.3)
28
- regexp_parser (~> 1.5)
28
+ regexp_parser (>= 1.5, < 3.0)
29
29
  xpath (~> 3.2)
30
30
  capybara_discoball (0.1.0)
31
31
  capybara (>= 2.7, < 4)
32
- coderay (1.1.2)
33
- concurrent-ruby (1.1.8)
34
- connection_pool (2.2.3)
35
- diff-lcs (1.3)
32
+ coderay (1.1.3)
33
+ concurrent-ruby (1.1.9)
34
+ connection_pool (2.2.5)
35
+ diff-lcs (1.4.4)
36
36
  domain_name (0.5.20190701)
37
37
  unf (>= 0.0.5, < 1.0.0)
38
- ffaker (2.10.0)
39
- http-cookie (1.0.3)
38
+ ffaker (2.20.0)
39
+ http-cookie (1.0.4)
40
40
  domain_name (~> 0.5)
41
- i18n (1.8.9)
41
+ i18n (1.8.11)
42
42
  concurrent-ruby (~> 1.0)
43
- mechanize (2.7.7)
44
- domain_name (~> 0.5, >= 0.5.1)
45
- http-cookie (~> 1.0)
46
- mime-types (>= 1.17.2)
47
- net-http-digest_auth (~> 1.1, >= 1.1.1)
48
- net-http-persistent (>= 2.5.2)
49
- nokogiri (~> 1.6)
50
- ntlm-http (~> 0.1, >= 0.1.1)
43
+ matrix (0.4.2)
44
+ mechanize (2.8.3)
45
+ addressable (~> 2.8)
46
+ domain_name (~> 0.5, >= 0.5.20190701)
47
+ http-cookie (~> 1.0, >= 1.0.3)
48
+ mime-types (~> 3.0)
49
+ net-http-digest_auth (~> 1.4, >= 1.4.1)
50
+ net-http-persistent (>= 2.5.2, < 5.0.dev)
51
+ nokogiri (~> 1.11, >= 1.11.2)
52
+ rubyntlm (~> 0.6, >= 0.6.3)
51
53
  webrick (~> 1.7)
52
- webrobots (>= 0.0.9, < 0.2)
53
- method_source (0.9.2)
54
- mime-types (3.3.1)
54
+ webrobots (~> 0.1.2)
55
+ method_source (1.0.0)
56
+ mime-types (3.4.1)
55
57
  mime-types-data (~> 3.2015)
56
- mime-types-data (3.2021.0225)
57
- mini_mime (1.0.2)
58
- mini_portile2 (2.5.0)
59
- minitest (5.14.4)
58
+ mime-types-data (3.2021.1115)
59
+ mini_mime (1.1.2)
60
+ mini_portile2 (2.6.1)
61
+ minitest (5.15.0)
60
62
  mixlib-shellout (2.4.4)
61
63
  mustermann (1.1.1)
62
64
  ruby2_keywords (~> 0.0.1)
63
65
  net-http-digest_auth (1.4.1)
64
66
  net-http-persistent (4.0.1)
65
67
  connection_pool (~> 2.2)
66
- nokogiri (1.11.2)
67
- mini_portile2 (~> 2.5.0)
68
+ nokogiri (1.12.5)
69
+ mini_portile2 (~> 2.6.1)
68
70
  racc (~> 1.4)
69
- ntlm-http (0.1.1)
70
- pry (0.12.2)
71
- coderay (~> 1.1.0)
72
- method_source (~> 0.9.0)
71
+ pry (0.14.1)
72
+ coderay (~> 1.1)
73
+ method_source (~> 1.0)
73
74
  public_suffix (4.0.6)
74
- racc (1.5.2)
75
+ racc (1.6.0)
75
76
  rack (2.2.3)
76
- rack-protection (2.0.8.1)
77
+ rack-protection (2.1.0)
77
78
  rack
78
79
  rack-test (1.1.0)
79
80
  rack (>= 1.0, < 3)
80
- rake (13.0.3)
81
- regexp_parser (1.8.1)
82
- rspec (3.8.0)
83
- rspec-core (~> 3.8.0)
84
- rspec-expectations (~> 3.8.0)
85
- rspec-mocks (~> 3.8.0)
81
+ rake (13.0.6)
82
+ regexp_parser (2.2.0)
83
+ rspec (3.10.0)
84
+ rspec-core (~> 3.10.0)
85
+ rspec-expectations (~> 3.10.0)
86
+ rspec-mocks (~> 3.10.0)
86
87
  rspec-command (1.0.3)
87
88
  mixlib-shellout (~> 2.0)
88
89
  rspec (~> 3.2)
89
90
  rspec-its (~> 1.2)
90
- rspec-core (3.8.2)
91
- rspec-support (~> 3.8.0)
92
- rspec-expectations (3.8.4)
91
+ rspec-core (3.10.1)
92
+ rspec-support (~> 3.10.0)
93
+ rspec-expectations (3.10.1)
93
94
  diff-lcs (>= 1.2.0, < 2.0)
94
- rspec-support (~> 3.8.0)
95
+ rspec-support (~> 3.10.0)
95
96
  rspec-its (1.3.0)
96
97
  rspec-core (>= 3.0.0)
97
98
  rspec-expectations (>= 3.0.0)
98
- rspec-mocks (3.8.1)
99
+ rspec-mocks (3.10.2)
99
100
  diff-lcs (>= 1.2.0, < 2.0)
100
- rspec-support (~> 3.8.0)
101
- rspec-support (3.8.2)
102
- ruby2_keywords (0.0.2)
103
- sinatra (2.0.8.1)
101
+ rspec-support (~> 3.10.0)
102
+ rspec-support (3.10.3)
103
+ ruby2_keywords (0.0.5)
104
+ rubyntlm (0.6.3)
105
+ sinatra (2.1.0)
104
106
  mustermann (~> 1.0)
105
- rack (~> 2.0)
106
- rack-protection (= 2.0.8.1)
107
+ rack (~> 2.2)
108
+ rack-protection (= 2.1.0)
107
109
  tilt (~> 2.0)
108
110
  socksify (1.7.1)
109
111
  tilt (2.0.10)
@@ -112,12 +114,11 @@ GEM
112
114
  concurrent-ruby (~> 1.0)
113
115
  unf (0.1.4)
114
116
  unf_ext
115
- unf_ext (0.0.7.7)
117
+ unf_ext (0.0.8)
116
118
  webrick (1.7.0)
117
119
  webrobots (0.1.2)
118
120
  xpath (3.2.0)
119
121
  nokogiri (~> 1.8)
120
- zeitwerk (2.4.2)
121
122
 
122
123
  PLATFORMS
123
124
  ruby
@@ -134,4 +135,4 @@ DEPENDENCIES
134
135
  spidy!
135
136
 
136
137
  BUNDLED WITH
137
- 2.2.14
138
+ 2.2.22
data/exe/spidy CHANGED
@@ -17,7 +17,8 @@ else
17
17
  when 'function' then Spidy.shell(ARGV[1]).function
18
18
  when 'call' then Spidy.shell(ARGV[1]).call(ARGV[2])
19
19
  when 'each' then Spidy.shell(ARGV[1]).each(ARGV[2])
20
+ when 'eval' then Spidy.shell(ARGV[1]).eval_call(ARGV[2])
20
21
  else
21
- STDOUT.puts 'usage: spidy [console function call each] [file]'
22
+ STDOUT.puts 'usage: spidy [console function call each run] [file]'
22
23
  end
23
24
  end
@@ -3,8 +3,8 @@
3
3
  #
4
4
  # Bind html and convert to object
5
5
  #
6
- class Spidy::Binder::Html < Spidy::Binder::Base
7
- def self.let(name, query = nil, &block)
6
+ module Spidy::Binder::Html
7
+ def let(name, query = nil, &block)
8
8
  @attribute_names ||= []
9
9
  @attribute_names << name
10
10
 
@@ -20,6 +20,7 @@ class Spidy::Binder::Html < Spidy::Binder::Base
20
20
  fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
-
24
- alias_method :html, :resource
23
+ def self.extended(obj)
24
+ obj.alias_method :html, :resource
25
+ end
25
26
  end
@@ -3,8 +3,8 @@
3
3
  #
4
4
  # Bind json and convert to object
5
5
  #
6
- class Spidy::Binder::Json < Spidy::Binder::Base
7
- def self.let(name, *query, &block)
6
+ module Spidy::Binder::Json
7
+ def let(name, *query, &block)
8
8
  @attribute_names ||= []
9
9
  @attribute_names << name
10
10
 
@@ -20,6 +20,7 @@ class Spidy::Binder::Json < Spidy::Binder::Base
20
20
  fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
-
24
- alias_method :json, :resource
23
+ def self.extended(obj)
24
+ obj.alias_method :json, :resource
25
+ end
25
26
  end
@@ -3,8 +3,8 @@
3
3
  #
4
4
  # Bind xml and convert to object
5
5
  #
6
- class Spidy::Binder::Xml < Spidy::Binder::Base
7
- def self.let(name, query = nil, &block)
6
+ module Spidy::Binder::Xml
7
+ def let(name, query = nil, &block)
8
8
  @attribute_names ||= []
9
9
  @attribute_names << name
10
10
 
@@ -20,6 +20,7 @@ class Spidy::Binder::Xml < Spidy::Binder::Base
20
20
  fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
-
24
- alias_method :xml, :resource
23
+ def self.extended(obj)
24
+ obj.alias_method :xml, :resource
25
+ end
25
26
  end
data/lib/spidy/binder.rb CHANGED
@@ -8,48 +8,4 @@ module Spidy::Binder
8
8
  autoload :Json
9
9
  autoload :Html
10
10
  autoload :Xml
11
-
12
- class Error < StandardError
13
- end
14
-
15
- class Caller
16
- def initialize(spidy, binder)
17
- @spidy = spidy
18
- @binder = binder
19
- end
20
-
21
- def call(source, url: nil, define: nil, define_name: nil)
22
- yield Class.new(@binder, &define).new(define_name, @spidy, source, url)
23
- end
24
- end
25
-
26
- class Base
27
- class << self
28
- attr_reader :attribute_names
29
- end
30
-
31
- attr_reader :resource, :url
32
-
33
- def initialize(define_name, spidy, resource, url)
34
- @define_name = define_name
35
- @spidy = spidy
36
- @resource = resource
37
- @url = url
38
- end
39
-
40
- def to_s
41
- to_h.to_json
42
- end
43
-
44
- def to_h
45
- self.class.attribute_names.map { |name| [name, send(name)] }.to_h
46
- end
47
- end
48
-
49
-
50
- def self.get(spidy, value)
51
- return Caller.new(spidy, const_get(value.to_s.classify)) if name.is_a?(String) || name.is_a?(Symbol)
52
-
53
- value
54
- end
55
11
  end
@@ -8,6 +8,10 @@ class Spidy::CommandLine
8
8
  class_attribute :output, default: (proc { |result| STDOUT.puts(result.to_s) })
9
9
  class_attribute :error_handler, default: (proc { |e, url| STDERR.puts({ url: url, message: e.message, backtrace: e.backtrace }.to_json) })
10
10
 
11
+ def eval_call(script)
12
+ @definition_file.spidy.instance_eval(script)
13
+ end
14
+
11
15
  def initialize(definition_file)
12
16
  @definition_file = definition_file
13
17
  raise 'unloaded spidy' if definition_file.spidy.nil?
@@ -0,0 +1,19 @@
1
+ class Spidy::DefineObject
2
+ class << self
3
+ attr_reader :attribute_names
4
+ end
5
+ attr_reader :resource, :url
6
+
7
+ def initialize(resource, url)
8
+ @resource = resource
9
+ @url = url
10
+ end
11
+
12
+ def to_s
13
+ to_h.to_json
14
+ end
15
+
16
+ def to_h
17
+ self.class.attribute_names.map { |name| [name, send(name)] }.to_h
18
+ end
19
+ end
@@ -44,22 +44,20 @@ module Spidy::Definition
44
44
  end
45
45
  end
46
46
 
47
- def define(name = :default, connector: nil, binder: nil, as: nil, &define_block)
48
- @namespace ||= {}
47
+ def define(name = :default, connector: nil, as: nil, &define_block)
49
48
  connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent, socks_proxy: @socks_proxy)
50
- binder = Spidy::Binder.get(self, binder || as)
51
- @namespace[:"#{name}_scraper"] = define_proc(name, connector, binder, define_block)
52
- end
53
-
54
- private
55
-
56
- def define_proc(name, connector, binder, define_block)
57
- proc do |source, &yielder|
58
- yielder = lambda { |result| break result } if yielder.nil?
59
- connection_yielder = lambda do |page|
60
- binder.call(page, url: source, define: define_block, define_name: name) { |object| yielder.call(object) }
49
+ binder_base = Spidy::Binder.const_get(as.to_s.classify)
50
+ @namespace ||= {}
51
+ @namespace[:"#{name}_scraper"] = Class.new(Spidy::DefineObject) do
52
+ extend binder_base
53
+ class_eval(&define_block)
54
+ define_singleton_method(:call) do |source, &yielder|
55
+ yielder = lambda { |result| break result } if yielder.nil?
56
+ connection_yielder = lambda do |page|
57
+ yielder.call(new(page, source))
58
+ end
59
+ connector.call(source, &connection_yielder)
61
60
  end
62
- connector.call(source, &connection_yielder)
63
61
  end
64
62
  end
65
63
  end
data/lib/spidy/shell.rb CHANGED
@@ -16,5 +16,5 @@ class Spidy::Shell
16
16
  Spidy::CommandLine.new(@definition_file)
17
17
  end
18
18
 
19
- delegate :function, :each, :call, to: :command_line
19
+ delegate :function, :each, :call, :eval_call, to: :command_line
20
20
  end
data/lib/spidy/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spidy
4
- VERSION = '0.3.8'
4
+ VERSION = '0.3.9'
5
5
  end
data/lib/spidy.rb CHANGED
@@ -19,6 +19,7 @@ module Spidy
19
19
  autoload :DefinitionFile
20
20
  autoload :Binder
21
21
  autoload :Connector
22
+ autoload :DefineObject
22
23
 
23
24
  def self.shell(filepath = nil)
24
25
  Spidy::Shell.new(filepath)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - aileron
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-24 00:00:00.000000000 Z
11
+ date: 2021-12-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -232,6 +232,7 @@ files:
232
232
  - lib/spidy/connector/json.rb
233
233
  - lib/spidy/connector/xml.rb
234
234
  - lib/spidy/console.rb
235
+ - lib/spidy/define_object.rb
235
236
  - lib/spidy/definition.rb
236
237
  - lib/spidy/definition_file.rb
237
238
  - lib/spidy/shell.rb
@@ -258,7 +259,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
258
259
  - !ruby/object:Gem::Version
259
260
  version: '0'
260
261
  requirements: []
261
- rubygems_version: 3.2.13
262
+ rubygems_version: 3.2.22
262
263
  signing_key:
263
264
  specification_version: 4
264
265
  summary: web spider dsl