spidy 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/spidy/binder.rb +33 -2
- data/lib/spidy/binder/html.rb +12 -29
- data/lib/spidy/binder/json.rb +15 -32
- data/lib/spidy/binder/xml.rb +12 -29
- data/lib/spidy/definition.rb +3 -3
- data/lib/spidy/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c032c78b7580bada781db301697b264b337d7c829bc5df4c3810a95770f56ee
|
4
|
+
data.tar.gz: 4b282b06cc91b3fcdef8187381ded42d01a689720ebfede4fb135b1639d47f11
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de20181e766b5b8d9189e657bfa20f4a4720e2c0a0adb7ece522309433ef5d93325d98d63a1b31304bd9a2575391318c3330c01f66356ae4110b7005c6560253
|
7
|
+
data.tar.gz: 5cf49000bb2f234c111216f9a554267df18a360c4b0511685c000b8e22312feaf3ba5b0b53b56612f0653157cd51e1da46e6140d7e4902b8a4a062894256b1d1
|
data/lib/spidy/binder.rb
CHANGED
@@ -9,17 +9,48 @@ module Spidy::Binder
|
|
9
9
|
autoload :Html
|
10
10
|
autoload :Xml
|
11
11
|
|
12
|
+
class Error < StandardError
|
13
|
+
end
|
14
|
+
|
12
15
|
class Caller
|
13
16
|
def initialize(spidy, binder)
|
14
17
|
@spidy = spidy
|
15
18
|
@binder = binder
|
16
19
|
end
|
17
20
|
|
18
|
-
def call(source, url: nil, define: nil)
|
19
|
-
yield Class.new(@binder, &define).new(@spidy, source, url)
|
21
|
+
def call(source, url: nil, define: nil, define_name: nil)
|
22
|
+
yield Class.new(@binder, &define).new(define_name, @spidy, source, url)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Base
|
27
|
+
class << self
|
28
|
+
attr_reader :attribute_names
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_reader :resource, :url
|
32
|
+
|
33
|
+
def initialize(define_name, spidy, resource, url)
|
34
|
+
@define_name = define_name
|
35
|
+
@spidy = spidy
|
36
|
+
@resource = resource
|
37
|
+
@url = url
|
38
|
+
end
|
39
|
+
|
40
|
+
def scraper(name, source)
|
41
|
+
lambda { |&block| @spidy.call(source, name: name, &block) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
to_h.to_json
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_h
|
49
|
+
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
20
50
|
end
|
21
51
|
end
|
22
52
|
|
53
|
+
|
23
54
|
def self.get(spidy, value)
|
24
55
|
return Caller.new(spidy, const_get(value.to_s.classify)) if name.is_a?(String) || name.is_a?(Symbol)
|
25
56
|
|
data/lib/spidy/binder/html.rb
CHANGED
@@ -3,40 +3,23 @@
|
|
3
3
|
#
|
4
4
|
# Bind html and convert to object
|
5
5
|
#
|
6
|
-
class Spidy::Binder::Html
|
7
|
-
|
8
|
-
|
6
|
+
class Spidy::Binder::Html < Spidy::Binder::Base
|
7
|
+
def self.let(name, query = nil, &block)
|
8
|
+
@attribute_names ||= []
|
9
|
+
@attribute_names << name
|
9
10
|
|
10
|
-
|
11
|
-
@attribute_names ||= []
|
12
|
-
@attribute_names << name
|
13
|
-
define_method(name) do
|
14
|
-
return html.at(query)&.text if block.nil?
|
15
|
-
return instance_exec(&block) if query.blank?
|
11
|
+
return define_method(name) { html.at(query)&.text } if block.nil?
|
16
12
|
|
13
|
+
define_method(name) do
|
14
|
+
if query.present?
|
17
15
|
instance_exec(html.at(query), &block)
|
16
|
+
else
|
17
|
+
instance_exec(&block)
|
18
18
|
end
|
19
|
+
rescue StandardError => e
|
20
|
+
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
22
|
-
|
23
|
-
alias_method :resource, :html
|
24
|
-
|
25
|
-
def initialize(spidy, html, url)
|
26
|
-
@spidy = spidy
|
27
|
-
@url = url
|
28
|
-
@html = html
|
29
|
-
end
|
30
|
-
|
31
|
-
def scraper(name, source)
|
32
|
-
lambda { |&block| @spidy.call(source, name: name, &block) }
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_s
|
36
|
-
to_h.to_json
|
37
|
-
end
|
38
|
-
|
39
|
-
def to_h
|
40
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
41
|
-
end
|
24
|
+
alias_method :html, :resource
|
42
25
|
end
|
data/lib/spidy/binder/json.rb
CHANGED
@@ -3,40 +3,23 @@
|
|
3
3
|
#
|
4
4
|
# Bind json and convert to object
|
5
5
|
#
|
6
|
-
class Spidy::Binder::Json
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
instance_exec(
|
6
|
+
class Spidy::Binder::Json < Spidy::Binder::Base
|
7
|
+
def self.let(name, *query, &block)
|
8
|
+
@attribute_names ||= []
|
9
|
+
@attribute_names << name
|
10
|
+
|
11
|
+
return define_method(name) { json.dig(*query) } if block.nil?
|
12
|
+
|
13
|
+
define_method(name) do
|
14
|
+
if query.present?
|
15
|
+
instance_exec(json.dig(*query), &block)
|
16
|
+
else
|
17
|
+
instance_exec(&block)
|
18
18
|
end
|
19
|
+
rescue StandardError => e
|
20
|
+
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
22
|
-
|
23
|
-
alias_method :resource, :json
|
24
|
-
|
25
|
-
def initialize(spidy, json, url)
|
26
|
-
@spidy = spidy
|
27
|
-
@json = json
|
28
|
-
@url = url
|
29
|
-
end
|
30
|
-
|
31
|
-
def scraper(name, source)
|
32
|
-
lambda { |&block| @spidy.call(source, name: name, &block) }
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_s
|
36
|
-
to_h.to_json
|
37
|
-
end
|
38
|
-
|
39
|
-
def to_h
|
40
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
41
|
-
end
|
24
|
+
alias_method :json, :resource
|
42
25
|
end
|
data/lib/spidy/binder/xml.rb
CHANGED
@@ -3,40 +3,23 @@
|
|
3
3
|
#
|
4
4
|
# Bind xml and convert to object
|
5
5
|
#
|
6
|
-
class Spidy::Binder::Xml
|
7
|
-
|
8
|
-
|
6
|
+
class Spidy::Binder::Xml < Spidy::Binder::Base
|
7
|
+
def self.let(name, query = nil, &block)
|
8
|
+
@attribute_names ||= []
|
9
|
+
@attribute_names << name
|
9
10
|
|
10
|
-
|
11
|
-
@attribute_names ||= []
|
12
|
-
@attribute_names << name
|
13
|
-
define_method(name) do
|
14
|
-
return xml.at(query)&.text if block.nil?
|
15
|
-
return instance_exec(&block) if query.blank?
|
11
|
+
return define_method(name) { xml.at(query)&.text } if block.nil?
|
16
12
|
|
13
|
+
define_method(name) do
|
14
|
+
if query.present?
|
17
15
|
instance_exec(xml.at(query), &block)
|
16
|
+
else
|
17
|
+
instance_exec(&block)
|
18
18
|
end
|
19
|
+
rescue StandardError => e
|
20
|
+
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
22
|
-
|
23
|
-
alias_method :resource, :xml
|
24
|
-
|
25
|
-
def initialize(spidy, xml, url)
|
26
|
-
@spidy = spidy
|
27
|
-
@xml = xml
|
28
|
-
@url = url
|
29
|
-
end
|
30
|
-
|
31
|
-
def scraper(name, source)
|
32
|
-
lambda { |&block| @spidy.call(source, name: name, &block) }
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_s
|
36
|
-
to_h.to_json
|
37
|
-
end
|
38
|
-
|
39
|
-
def to_h
|
40
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
41
|
-
end
|
24
|
+
alias_method :xml, :resource
|
42
25
|
end
|
data/lib/spidy/definition.rb
CHANGED
@@ -36,16 +36,16 @@ module Spidy::Definition
|
|
36
36
|
@namespace ||= {}
|
37
37
|
connector = Spidy::Connector.get(connector || as)
|
38
38
|
binder = Spidy::Binder.get(self, binder || as)
|
39
|
-
@namespace[:"#{name}_scraper"] = define_proc(connector, binder, define_block)
|
39
|
+
@namespace[:"#{name}_scraper"] = define_proc(name, connector, binder, define_block)
|
40
40
|
end
|
41
41
|
|
42
42
|
private
|
43
43
|
|
44
|
-
def define_proc(connector, binder, define_block)
|
44
|
+
def define_proc(name, connector, binder, define_block)
|
45
45
|
proc do |source, &yielder|
|
46
46
|
yielder = lambda { |result| break result } if yielder.nil?
|
47
47
|
connection_yielder = lambda do |page|
|
48
|
-
binder.call(page, url: source, define: define_block) { |object| yielder.call(object) }
|
48
|
+
binder.call(page, url: source, define: define_block, define_name: name) { |object| yielder.call(object) }
|
49
49
|
end
|
50
50
|
connector.call(source, &connection_yielder)
|
51
51
|
end
|
data/lib/spidy/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|