spidy 0.3.8 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 666e9b2dcad792e940aa9e6b6cb99eaf759b9e6af1b187519cbdb8bd573a2861
4
- data.tar.gz: 8477c2617d04470030ff95673033382996f2988d5f95ebacb5f94272309ad851
3
+ metadata.gz: 75215453c834a8e481b27cf4377235cc97ce6a6e4eff142a11743e68ee4982b4
4
+ data.tar.gz: 25f3b14ad6f31b580396458c8075167f24fd4a2f6e7ff98947338ecd0588eb9d
5
5
  SHA512:
6
- metadata.gz: 422486fe8f5e0207e3f55832051e04e22dd07cbbc6e1efa12a6db57bbb0103244f2e41516018da0597bdd785a7d3a8171d4c9bb32d3a503c358502a6f3133d44
7
- data.tar.gz: a841cc76004f5a020c4c3c343504be1448fbbcebd90fc1a06c3d34a9657acbe1d39a5c5c5be9b08433a34be4875675e62250578d2bb8d2b7e68a20150c2e3506
6
+ metadata.gz: 447b7152b807c7985e16b7b403d27f9f7b949264577e8e4dc11a52358cb9af49510696d29166adb60bb5b87158aa8d2c10faf7c810f8eced4c29f9eed8bb493a
7
+ data.tar.gz: 55d82e5c495a7e5a0fd57b466e08ea072e3a712829b2d83d550b107147f30e3eaff54ed8dcec69163bf6f74e0c32990051af9de0d044cc19eadf39ecc749d003
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.6
1
+ 3.0.2
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- spidy (0.3.7)
4
+ spidy (0.3.9)
5
5
  activesupport
6
6
  mechanize
7
7
  pry
@@ -11,99 +11,101 @@ PATH
11
11
  GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
- activesupport (6.1.3)
14
+ activesupport (7.0.0)
15
15
  concurrent-ruby (~> 1.0, >= 1.0.2)
16
16
  i18n (>= 1.6, < 2)
17
17
  minitest (>= 5.1)
18
18
  tzinfo (~> 2.0)
19
- zeitwerk (~> 2.3)
20
- addressable (2.7.0)
19
+ addressable (2.8.0)
21
20
  public_suffix (>= 2.0.2, < 5.0)
22
- capybara (3.33.0)
21
+ capybara (3.36.0)
23
22
  addressable
23
+ matrix
24
24
  mini_mime (>= 0.1.3)
25
25
  nokogiri (~> 1.8)
26
26
  rack (>= 1.6.0)
27
27
  rack-test (>= 0.6.3)
28
- regexp_parser (~> 1.5)
28
+ regexp_parser (>= 1.5, < 3.0)
29
29
  xpath (~> 3.2)
30
30
  capybara_discoball (0.1.0)
31
31
  capybara (>= 2.7, < 4)
32
- coderay (1.1.2)
33
- concurrent-ruby (1.1.8)
34
- connection_pool (2.2.3)
35
- diff-lcs (1.3)
32
+ coderay (1.1.3)
33
+ concurrent-ruby (1.1.9)
34
+ connection_pool (2.2.5)
35
+ diff-lcs (1.4.4)
36
36
  domain_name (0.5.20190701)
37
37
  unf (>= 0.0.5, < 1.0.0)
38
- ffaker (2.10.0)
39
- http-cookie (1.0.3)
38
+ ffaker (2.20.0)
39
+ http-cookie (1.0.4)
40
40
  domain_name (~> 0.5)
41
- i18n (1.8.9)
41
+ i18n (1.8.11)
42
42
  concurrent-ruby (~> 1.0)
43
- mechanize (2.7.7)
44
- domain_name (~> 0.5, >= 0.5.1)
45
- http-cookie (~> 1.0)
46
- mime-types (>= 1.17.2)
47
- net-http-digest_auth (~> 1.1, >= 1.1.1)
48
- net-http-persistent (>= 2.5.2)
49
- nokogiri (~> 1.6)
50
- ntlm-http (~> 0.1, >= 0.1.1)
43
+ matrix (0.4.2)
44
+ mechanize (2.8.3)
45
+ addressable (~> 2.8)
46
+ domain_name (~> 0.5, >= 0.5.20190701)
47
+ http-cookie (~> 1.0, >= 1.0.3)
48
+ mime-types (~> 3.0)
49
+ net-http-digest_auth (~> 1.4, >= 1.4.1)
50
+ net-http-persistent (>= 2.5.2, < 5.0.dev)
51
+ nokogiri (~> 1.11, >= 1.11.2)
52
+ rubyntlm (~> 0.6, >= 0.6.3)
51
53
  webrick (~> 1.7)
52
- webrobots (>= 0.0.9, < 0.2)
53
- method_source (0.9.2)
54
- mime-types (3.3.1)
54
+ webrobots (~> 0.1.2)
55
+ method_source (1.0.0)
56
+ mime-types (3.4.1)
55
57
  mime-types-data (~> 3.2015)
56
- mime-types-data (3.2021.0225)
57
- mini_mime (1.0.2)
58
- mini_portile2 (2.5.0)
59
- minitest (5.14.4)
58
+ mime-types-data (3.2021.1115)
59
+ mini_mime (1.1.2)
60
+ mini_portile2 (2.6.1)
61
+ minitest (5.15.0)
60
62
  mixlib-shellout (2.4.4)
61
63
  mustermann (1.1.1)
62
64
  ruby2_keywords (~> 0.0.1)
63
65
  net-http-digest_auth (1.4.1)
64
66
  net-http-persistent (4.0.1)
65
67
  connection_pool (~> 2.2)
66
- nokogiri (1.11.2)
67
- mini_portile2 (~> 2.5.0)
68
+ nokogiri (1.12.5)
69
+ mini_portile2 (~> 2.6.1)
68
70
  racc (~> 1.4)
69
- ntlm-http (0.1.1)
70
- pry (0.12.2)
71
- coderay (~> 1.1.0)
72
- method_source (~> 0.9.0)
71
+ pry (0.14.1)
72
+ coderay (~> 1.1)
73
+ method_source (~> 1.0)
73
74
  public_suffix (4.0.6)
74
- racc (1.5.2)
75
+ racc (1.6.0)
75
76
  rack (2.2.3)
76
- rack-protection (2.0.8.1)
77
+ rack-protection (2.1.0)
77
78
  rack
78
79
  rack-test (1.1.0)
79
80
  rack (>= 1.0, < 3)
80
- rake (13.0.3)
81
- regexp_parser (1.8.1)
82
- rspec (3.8.0)
83
- rspec-core (~> 3.8.0)
84
- rspec-expectations (~> 3.8.0)
85
- rspec-mocks (~> 3.8.0)
81
+ rake (13.0.6)
82
+ regexp_parser (2.2.0)
83
+ rspec (3.10.0)
84
+ rspec-core (~> 3.10.0)
85
+ rspec-expectations (~> 3.10.0)
86
+ rspec-mocks (~> 3.10.0)
86
87
  rspec-command (1.0.3)
87
88
  mixlib-shellout (~> 2.0)
88
89
  rspec (~> 3.2)
89
90
  rspec-its (~> 1.2)
90
- rspec-core (3.8.2)
91
- rspec-support (~> 3.8.0)
92
- rspec-expectations (3.8.4)
91
+ rspec-core (3.10.1)
92
+ rspec-support (~> 3.10.0)
93
+ rspec-expectations (3.10.1)
93
94
  diff-lcs (>= 1.2.0, < 2.0)
94
- rspec-support (~> 3.8.0)
95
+ rspec-support (~> 3.10.0)
95
96
  rspec-its (1.3.0)
96
97
  rspec-core (>= 3.0.0)
97
98
  rspec-expectations (>= 3.0.0)
98
- rspec-mocks (3.8.1)
99
+ rspec-mocks (3.10.2)
99
100
  diff-lcs (>= 1.2.0, < 2.0)
100
- rspec-support (~> 3.8.0)
101
- rspec-support (3.8.2)
102
- ruby2_keywords (0.0.2)
103
- sinatra (2.0.8.1)
101
+ rspec-support (~> 3.10.0)
102
+ rspec-support (3.10.3)
103
+ ruby2_keywords (0.0.5)
104
+ rubyntlm (0.6.3)
105
+ sinatra (2.1.0)
104
106
  mustermann (~> 1.0)
105
- rack (~> 2.0)
106
- rack-protection (= 2.0.8.1)
107
+ rack (~> 2.2)
108
+ rack-protection (= 2.1.0)
107
109
  tilt (~> 2.0)
108
110
  socksify (1.7.1)
109
111
  tilt (2.0.10)
@@ -112,12 +114,11 @@ GEM
112
114
  concurrent-ruby (~> 1.0)
113
115
  unf (0.1.4)
114
116
  unf_ext
115
- unf_ext (0.0.7.7)
117
+ unf_ext (0.0.8)
116
118
  webrick (1.7.0)
117
119
  webrobots (0.1.2)
118
120
  xpath (3.2.0)
119
121
  nokogiri (~> 1.8)
120
- zeitwerk (2.4.2)
121
122
 
122
123
  PLATFORMS
123
124
  ruby
@@ -134,4 +135,4 @@ DEPENDENCIES
134
135
  spidy!
135
136
 
136
137
  BUNDLED WITH
137
- 2.2.14
138
+ 2.2.22
data/exe/spidy CHANGED
@@ -17,7 +17,8 @@ else
17
17
  when 'function' then Spidy.shell(ARGV[1]).function
18
18
  when 'call' then Spidy.shell(ARGV[1]).call(ARGV[2])
19
19
  when 'each' then Spidy.shell(ARGV[1]).each(ARGV[2])
20
+ when 'eval' then Spidy.shell(ARGV[1]).eval_call(ARGV[2])
20
21
  else
21
- STDOUT.puts 'usage: spidy [console function call each] [file]'
22
+ STDOUT.puts 'usage: spidy [console function call each run] [file]'
22
23
  end
23
24
  end
@@ -3,8 +3,8 @@
3
3
  #
4
4
  # Bind html and convert to object
5
5
  #
6
- class Spidy::Binder::Html < Spidy::Binder::Base
7
- def self.let(name, query = nil, &block)
6
+ module Spidy::Binder::Html
7
+ def let(name, query = nil, &block)
8
8
  @attribute_names ||= []
9
9
  @attribute_names << name
10
10
 
@@ -20,6 +20,7 @@ class Spidy::Binder::Html < Spidy::Binder::Base
20
20
  fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
-
24
- alias_method :html, :resource
23
+ def self.extended(obj)
24
+ obj.alias_method :html, :resource
25
+ end
25
26
  end
@@ -3,8 +3,8 @@
3
3
  #
4
4
  # Bind json and convert to object
5
5
  #
6
- class Spidy::Binder::Json < Spidy::Binder::Base
7
- def self.let(name, *query, &block)
6
+ module Spidy::Binder::Json
7
+ def let(name, *query, &block)
8
8
  @attribute_names ||= []
9
9
  @attribute_names << name
10
10
 
@@ -20,6 +20,7 @@ class Spidy::Binder::Json < Spidy::Binder::Base
20
20
  fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
-
24
- alias_method :json, :resource
23
+ def self.extended(obj)
24
+ obj.alias_method :json, :resource
25
+ end
25
26
  end
@@ -3,8 +3,8 @@
3
3
  #
4
4
  # Bind xml and convert to object
5
5
  #
6
- class Spidy::Binder::Xml < Spidy::Binder::Base
7
- def self.let(name, query = nil, &block)
6
+ module Spidy::Binder::Xml
7
+ def let(name, query = nil, &block)
8
8
  @attribute_names ||= []
9
9
  @attribute_names << name
10
10
 
@@ -20,6 +20,7 @@ class Spidy::Binder::Xml < Spidy::Binder::Base
20
20
  fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
21
21
  end
22
22
  end
23
-
24
- alias_method :xml, :resource
23
+ def self.extended(obj)
24
+ obj.alias_method :xml, :resource
25
+ end
25
26
  end
data/lib/spidy/binder.rb CHANGED
@@ -8,48 +8,4 @@ module Spidy::Binder
8
8
  autoload :Json
9
9
  autoload :Html
10
10
  autoload :Xml
11
-
12
- class Error < StandardError
13
- end
14
-
15
- class Caller
16
- def initialize(spidy, binder)
17
- @spidy = spidy
18
- @binder = binder
19
- end
20
-
21
- def call(source, url: nil, define: nil, define_name: nil)
22
- yield Class.new(@binder, &define).new(define_name, @spidy, source, url)
23
- end
24
- end
25
-
26
- class Base
27
- class << self
28
- attr_reader :attribute_names
29
- end
30
-
31
- attr_reader :resource, :url
32
-
33
- def initialize(define_name, spidy, resource, url)
34
- @define_name = define_name
35
- @spidy = spidy
36
- @resource = resource
37
- @url = url
38
- end
39
-
40
- def to_s
41
- to_h.to_json
42
- end
43
-
44
- def to_h
45
- self.class.attribute_names.map { |name| [name, send(name)] }.to_h
46
- end
47
- end
48
-
49
-
50
- def self.get(spidy, value)
51
- return Caller.new(spidy, const_get(value.to_s.classify)) if name.is_a?(String) || name.is_a?(Symbol)
52
-
53
- value
54
- end
55
11
  end
@@ -8,6 +8,10 @@ class Spidy::CommandLine
8
8
  class_attribute :output, default: (proc { |result| STDOUT.puts(result.to_s) })
9
9
  class_attribute :error_handler, default: (proc { |e, url| STDERR.puts({ url: url, message: e.message, backtrace: e.backtrace }.to_json) })
10
10
 
11
+ def eval_call(script)
12
+ @definition_file.spidy.instance_eval(script)
13
+ end
14
+
11
15
  def initialize(definition_file)
12
16
  @definition_file = definition_file
13
17
  raise 'unloaded spidy' if definition_file.spidy.nil?
@@ -0,0 +1,19 @@
1
+ class Spidy::DefineObject
2
+ class << self
3
+ attr_reader :attribute_names
4
+ end
5
+ attr_reader :resource, :url
6
+
7
+ def initialize(resource, url)
8
+ @resource = resource
9
+ @url = url
10
+ end
11
+
12
+ def to_s
13
+ to_h.to_json
14
+ end
15
+
16
+ def to_h
17
+ self.class.attribute_names.map { |name| [name, send(name)] }.to_h
18
+ end
19
+ end
@@ -44,22 +44,20 @@ module Spidy::Definition
44
44
  end
45
45
  end
46
46
 
47
- def define(name = :default, connector: nil, binder: nil, as: nil, &define_block)
48
- @namespace ||= {}
47
+ def define(name = :default, connector: nil, as: nil, &define_block)
49
48
  connector = Spidy::Connector.get(connector || as, wait_time: @wait_time, user_agent: @user_agent, socks_proxy: @socks_proxy)
50
- binder = Spidy::Binder.get(self, binder || as)
51
- @namespace[:"#{name}_scraper"] = define_proc(name, connector, binder, define_block)
52
- end
53
-
54
- private
55
-
56
- def define_proc(name, connector, binder, define_block)
57
- proc do |source, &yielder|
58
- yielder = lambda { |result| break result } if yielder.nil?
59
- connection_yielder = lambda do |page|
60
- binder.call(page, url: source, define: define_block, define_name: name) { |object| yielder.call(object) }
49
+ binder_base = Spidy::Binder.const_get(as.to_s.classify)
50
+ @namespace ||= {}
51
+ @namespace[:"#{name}_scraper"] = Class.new(Spidy::DefineObject) do
52
+ extend binder_base
53
+ class_eval(&define_block)
54
+ define_singleton_method(:call) do |source, &yielder|
55
+ yielder = lambda { |result| break result } if yielder.nil?
56
+ connection_yielder = lambda do |page|
57
+ yielder.call(new(page, source))
58
+ end
59
+ connector.call(source, &connection_yielder)
61
60
  end
62
- connector.call(source, &connection_yielder)
63
61
  end
64
62
  end
65
63
  end
data/lib/spidy/shell.rb CHANGED
@@ -16,5 +16,5 @@ class Spidy::Shell
16
16
  Spidy::CommandLine.new(@definition_file)
17
17
  end
18
18
 
19
- delegate :function, :each, :call, to: :command_line
19
+ delegate :function, :each, :call, :eval_call, to: :command_line
20
20
  end
data/lib/spidy/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spidy
4
- VERSION = '0.3.8'
4
+ VERSION = '0.3.9'
5
5
  end
data/lib/spidy.rb CHANGED
@@ -19,6 +19,7 @@ module Spidy
19
19
  autoload :DefinitionFile
20
20
  autoload :Binder
21
21
  autoload :Connector
22
+ autoload :DefineObject
22
23
 
23
24
  def self.shell(filepath = nil)
24
25
  Spidy::Shell.new(filepath)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - aileron
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-24 00:00:00.000000000 Z
11
+ date: 2021-12-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -232,6 +232,7 @@ files:
232
232
  - lib/spidy/connector/json.rb
233
233
  - lib/spidy/connector/xml.rb
234
234
  - lib/spidy/console.rb
235
+ - lib/spidy/define_object.rb
235
236
  - lib/spidy/definition.rb
236
237
  - lib/spidy/definition_file.rb
237
238
  - lib/spidy/shell.rb
@@ -258,7 +259,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
258
259
  - !ruby/object:Gem::Version
259
260
  version: '0'
260
261
  requirements: []
261
- rubygems_version: 3.2.13
262
+ rubygems_version: 3.2.22
262
263
  signing_key:
263
264
  specification_version: 4
264
265
  summary: web spider dsl