spidy 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/spidy/binder.rb +33 -2
- data/lib/spidy/binder/html.rb +12 -29
- data/lib/spidy/binder/json.rb +15 -32
- data/lib/spidy/binder/xml.rb +12 -29
- data/lib/spidy/definition.rb +3 -3
- data/lib/spidy/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c032c78b7580bada781db301697b264b337d7c829bc5df4c3810a95770f56ee
|
4
|
+
data.tar.gz: 4b282b06cc91b3fcdef8187381ded42d01a689720ebfede4fb135b1639d47f11
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de20181e766b5b8d9189e657bfa20f4a4720e2c0a0adb7ece522309433ef5d93325d98d63a1b31304bd9a2575391318c3330c01f66356ae4110b7005c6560253
|
7
|
+
data.tar.gz: 5cf49000bb2f234c111216f9a554267df18a360c4b0511685c000b8e22312feaf3ba5b0b53b56612f0653157cd51e1da46e6140d7e4902b8a4a062894256b1d1
|
data/lib/spidy/binder.rb
CHANGED
@@ -9,17 +9,48 @@ module Spidy::Binder
|
|
9
9
|
autoload :Html
|
10
10
|
autoload :Xml
|
11
11
|
|
12
|
+
class Error < StandardError
|
13
|
+
end
|
14
|
+
|
12
15
|
class Caller
|
13
16
|
def initialize(spidy, binder)
|
14
17
|
@spidy = spidy
|
15
18
|
@binder = binder
|
16
19
|
end
|
17
20
|
|
18
|
-
def call(source, url: nil, define: nil)
|
19
|
-
yield Class.new(@binder, &define).new(@spidy, source, url)
|
21
|
+
def call(source, url: nil, define: nil, define_name: nil)
|
22
|
+
yield Class.new(@binder, &define).new(define_name, @spidy, source, url)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Base
|
27
|
+
class << self
|
28
|
+
attr_reader :attribute_names
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_reader :resource, :url
|
32
|
+
|
33
|
+
def initialize(define_name, spidy, resource, url)
|
34
|
+
@define_name = define_name
|
35
|
+
@spidy = spidy
|
36
|
+
@resource = resource
|
37
|
+
@url = url
|
38
|
+
end
|
39
|
+
|
40
|
+
def scraper(name, source)
|
41
|
+
lambda { |&block| @spidy.call(source, name: name, &block) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
to_h.to_json
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_h
|
49
|
+
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
20
50
|
end
|
21
51
|
end
|
22
52
|
|
53
|
+
|
23
54
|
def self.get(spidy, value)
|
24
55
|
return Caller.new(spidy, const_get(value.to_s.classify)) if name.is_a?(String) || name.is_a?(Symbol)
|
25
56
|
|
data/lib/spidy/binder/html.rb
CHANGED
@@ -3,40 +3,23 @@
|
|
3
3
|
#
|
4
4
|
# Bind html and convert to object
|
5
5
|
#
|
6
|
-
class Spidy::Binder::Html
|
7
|
-
|
8
|
-
|
6
|
+
class Spidy::Binder::Html < Spidy::Binder::Base
|
7
|
+
def self.let(name, query = nil, &block)
|
8
|
+
@attribute_names ||= []
|
9
|
+
@attribute_names << name
|
9
10
|
|
10
|
-
|
11
|
-
@attribute_names ||= []
|
12
|
-
@attribute_names << name
|
13
|
-
define_method(name) do
|
14
|
-
return html.at(query)&.text if block.nil?
|
15
|
-
return instance_exec(&block) if query.blank?
|
11
|
+
return define_method(name) { html.at(query)&.text } if block.nil?
|
16
12
|
|
13
|
+
define_method(name) do
|
14
|
+
if query.present?
|
17
15
|
instance_exec(html.at(query), &block)
|
16
|
+
else
|
17
|
+
instance_exec(&block)
|
18
18
|
end
|
19
|
+
rescue StandardError => e
|
20
|
+
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
22
|
-
|
23
|
-
alias_method :resource, :html
|
24
|
-
|
25
|
-
def initialize(spidy, html, url)
|
26
|
-
@spidy = spidy
|
27
|
-
@url = url
|
28
|
-
@html = html
|
29
|
-
end
|
30
|
-
|
31
|
-
def scraper(name, source)
|
32
|
-
lambda { |&block| @spidy.call(source, name: name, &block) }
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_s
|
36
|
-
to_h.to_json
|
37
|
-
end
|
38
|
-
|
39
|
-
def to_h
|
40
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
41
|
-
end
|
24
|
+
alias_method :html, :resource
|
42
25
|
end
|
data/lib/spidy/binder/json.rb
CHANGED
@@ -3,40 +3,23 @@
|
|
3
3
|
#
|
4
4
|
# Bind json and convert to object
|
5
5
|
#
|
6
|
-
class Spidy::Binder::Json
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
instance_exec(
|
6
|
+
class Spidy::Binder::Json < Spidy::Binder::Base
|
7
|
+
def self.let(name, *query, &block)
|
8
|
+
@attribute_names ||= []
|
9
|
+
@attribute_names << name
|
10
|
+
|
11
|
+
return define_method(name) { json.dig(*query) } if block.nil?
|
12
|
+
|
13
|
+
define_method(name) do
|
14
|
+
if query.present?
|
15
|
+
instance_exec(json.dig(*query), &block)
|
16
|
+
else
|
17
|
+
instance_exec(&block)
|
18
18
|
end
|
19
|
+
rescue StandardError => e
|
20
|
+
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
22
|
-
|
23
|
-
alias_method :resource, :json
|
24
|
-
|
25
|
-
def initialize(spidy, json, url)
|
26
|
-
@spidy = spidy
|
27
|
-
@json = json
|
28
|
-
@url = url
|
29
|
-
end
|
30
|
-
|
31
|
-
def scraper(name, source)
|
32
|
-
lambda { |&block| @spidy.call(source, name: name, &block) }
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_s
|
36
|
-
to_h.to_json
|
37
|
-
end
|
38
|
-
|
39
|
-
def to_h
|
40
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
41
|
-
end
|
24
|
+
alias_method :json, :resource
|
42
25
|
end
|
data/lib/spidy/binder/xml.rb
CHANGED
@@ -3,40 +3,23 @@
|
|
3
3
|
#
|
4
4
|
# Bind xml and convert to object
|
5
5
|
#
|
6
|
-
class Spidy::Binder::Xml
|
7
|
-
|
8
|
-
|
6
|
+
class Spidy::Binder::Xml < Spidy::Binder::Base
|
7
|
+
def self.let(name, query = nil, &block)
|
8
|
+
@attribute_names ||= []
|
9
|
+
@attribute_names << name
|
9
10
|
|
10
|
-
|
11
|
-
@attribute_names ||= []
|
12
|
-
@attribute_names << name
|
13
|
-
define_method(name) do
|
14
|
-
return xml.at(query)&.text if block.nil?
|
15
|
-
return instance_exec(&block) if query.blank?
|
11
|
+
return define_method(name) { xml.at(query)&.text } if block.nil?
|
16
12
|
|
13
|
+
define_method(name) do
|
14
|
+
if query.present?
|
17
15
|
instance_exec(xml.at(query), &block)
|
16
|
+
else
|
17
|
+
instance_exec(&block)
|
18
18
|
end
|
19
|
+
rescue StandardError => e
|
20
|
+
fail Spidy::Binder::Error, "spidy(#{@define_name})##{name} => #{e.message}"
|
19
21
|
end
|
20
22
|
end
|
21
23
|
|
22
|
-
|
23
|
-
alias_method :resource, :xml
|
24
|
-
|
25
|
-
def initialize(spidy, xml, url)
|
26
|
-
@spidy = spidy
|
27
|
-
@xml = xml
|
28
|
-
@url = url
|
29
|
-
end
|
30
|
-
|
31
|
-
def scraper(name, source)
|
32
|
-
lambda { |&block| @spidy.call(source, name: name, &block) }
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_s
|
36
|
-
to_h.to_json
|
37
|
-
end
|
38
|
-
|
39
|
-
def to_h
|
40
|
-
self.class.attribute_names.map { |name| [name, send(name)] }.to_h
|
41
|
-
end
|
24
|
+
alias_method :xml, :resource
|
42
25
|
end
|
data/lib/spidy/definition.rb
CHANGED
@@ -36,16 +36,16 @@ module Spidy::Definition
|
|
36
36
|
@namespace ||= {}
|
37
37
|
connector = Spidy::Connector.get(connector || as)
|
38
38
|
binder = Spidy::Binder.get(self, binder || as)
|
39
|
-
@namespace[:"#{name}_scraper"] = define_proc(connector, binder, define_block)
|
39
|
+
@namespace[:"#{name}_scraper"] = define_proc(name, connector, binder, define_block)
|
40
40
|
end
|
41
41
|
|
42
42
|
private
|
43
43
|
|
44
|
-
def define_proc(connector, binder, define_block)
|
44
|
+
def define_proc(name, connector, binder, define_block)
|
45
45
|
proc do |source, &yielder|
|
46
46
|
yielder = lambda { |result| break result } if yielder.nil?
|
47
47
|
connection_yielder = lambda do |page|
|
48
|
-
binder.call(page, url: source, define: define_block) { |object| yielder.call(object) }
|
48
|
+
binder.call(page, url: source, define: define_block, define_name: name) { |object| yielder.call(object) }
|
49
49
|
end
|
50
50
|
connector.call(source, &connection_yielder)
|
51
51
|
end
|
data/lib/spidy/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-03-
|
11
|
+
date: 2020-03-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|