yasuri 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/yasuri/version.rb +1 -1
- data/lib/yasuri/yasuri.rb +17 -8
- data/lib/yasuri/yasuri_links_node.rb +5 -2
- data/lib/yasuri/yasuri_node.rb +1 -1
- data/lib/yasuri/yasuri_paginate_node.rb +6 -4
- data/lib/yasuri/yasuri_struct_node.rb +3 -2
- data/lib/yasuri/yasuri_text_node.rb +6 -7
- data/spec/yasuri_links_node_spec.rb +14 -0
- data/spec/yasuri_paginate_node_spec.rb +14 -0
- data/spec/yasuri_spec.rb +3 -3
- data/spec/yasuri_struct_node_spec.rb +11 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f667fce1e95bc5a16e350a1227abdde1a6db8514
|
4
|
+
data.tar.gz: 68cb3c5754f9636f9ae2c677feccfd62183c0af5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddb2760495645509953e05748ac617148f1a69916f955f4781fef4565dcb030d44a204d2999e5510c3dc43af6500c654886fe8eeb5694d339c76e8eb7fa195ba
|
7
|
+
data.tar.gz: 2ae5d067c108a7137739c3df51001abfbc6002bda5eeeec1e3632e3e57154e8f856effd0c0017b587768feb84c76b73889582ec526e07697c95d8ba140bb8e9a
|
data/lib/yasuri/version.rb
CHANGED
data/lib/yasuri/yasuri.rb
CHANGED
@@ -15,7 +15,7 @@ require_relative 'yasuri_node_generator'
|
|
15
15
|
module Yasuri
|
16
16
|
|
17
17
|
def self.json2tree(json_string)
|
18
|
-
json = JSON.parse(json_string)
|
18
|
+
json = JSON.parse(json_string, {symbolize_names: true})
|
19
19
|
Yasuri.hash2node(json)
|
20
20
|
end
|
21
21
|
|
@@ -30,26 +30,31 @@ module Yasuri
|
|
30
30
|
|
31
31
|
private
|
32
32
|
Text2Node = {
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
text: Yasuri::TextNode,
|
34
|
+
struct: Yasuri::StructNode,
|
35
|
+
links: Yasuri::LinksNode,
|
36
|
+
pages: Yasuri::PaginateNode
|
37
37
|
}
|
38
38
|
Node2Text = Text2Node.invert
|
39
39
|
|
40
|
-
ReservedKeys = %
|
40
|
+
ReservedKeys = %i|node name path children|
|
41
41
|
def self.hash2node(node_h)
|
42
42
|
node, name, path, children = ReservedKeys.map do |key|
|
43
43
|
node_h[key]
|
44
44
|
end
|
45
45
|
children ||= []
|
46
46
|
|
47
|
+
fail "Not found 'node' value in json" if node.nil?
|
48
|
+
fail "Not found 'name' value in json" if name.nil?
|
49
|
+
fail "Not found 'path' value in json" if path.nil?
|
50
|
+
|
47
51
|
childnodes = children.map{|c| Yasuri.hash2node(c) }
|
48
52
|
ReservedKeys.each{|key| node_h.delete(key)}
|
49
53
|
opt = node_h
|
50
54
|
|
51
|
-
klass = Text2Node[node]
|
52
|
-
|
55
|
+
klass = Text2Node[node.to_sym]
|
56
|
+
fail "Undefined node type #{node}" if klass.nil?
|
57
|
+
klass.new(path, name, childnodes, opt)
|
53
58
|
end
|
54
59
|
|
55
60
|
def self.node2hash(node)
|
@@ -73,6 +78,10 @@ module Yasuri
|
|
73
78
|
json
|
74
79
|
end
|
75
80
|
|
81
|
+
def self.NodeName(name, symbolize_names:false)
|
82
|
+
symbolize_names ? name.to_sym : name
|
83
|
+
end
|
84
|
+
|
76
85
|
def self.with_retry(retry_count = 5)
|
77
86
|
begin
|
78
87
|
return yield() if block_given?
|
@@ -6,14 +6,17 @@ require_relative 'yasuri_node'
|
|
6
6
|
module Yasuri
|
7
7
|
class LinksNode
|
8
8
|
include Node
|
9
|
-
def inject(agent, page,
|
9
|
+
def inject(agent, page, opt = {})
|
10
|
+
retry_count = opt[:retry_count] || 5
|
11
|
+
|
10
12
|
links = page.search(@xpath) || [] # links expected
|
11
13
|
links.map do |link|
|
12
14
|
link_button = Mechanize::Page::Link.new(link, agent, page)
|
13
15
|
child_page = Yasuri.with_retry(retry_count) { link_button.click }
|
14
16
|
|
15
17
|
child_results_kv = @children.map do |child_node|
|
16
|
-
|
18
|
+
child_name = Yasuri.NodeName(child_node.name, opt)
|
19
|
+
[child_name, child_node.inject(agent, child_page, opt)]
|
17
20
|
end
|
18
21
|
|
19
22
|
Hash[child_results_kv]
|
data/lib/yasuri/yasuri_node.rb
CHANGED
@@ -7,18 +7,20 @@ module Yasuri
|
|
7
7
|
class PaginateNode
|
8
8
|
include Node
|
9
9
|
|
10
|
-
def initialize(xpath, name, children = [], limit: nil
|
10
|
+
def initialize(xpath, name, children = [], limit: nil)
|
11
11
|
super(xpath, name, children)
|
12
|
-
@limit = limit
|
12
|
+
@limit = limit
|
13
13
|
end
|
14
14
|
|
15
|
-
def inject(agent, page,
|
15
|
+
def inject(agent, page, opt = {})
|
16
|
+
retry_count = opt[:retry_count] || 5
|
16
17
|
|
17
18
|
child_results = []
|
18
19
|
limit = @limit.nil? ? Float::MAX : @limit
|
19
20
|
while page
|
20
21
|
child_results_kv = @children.map do |child_node|
|
21
|
-
|
22
|
+
child_name = Yasuri.NodeName(child_node.name, opt)
|
23
|
+
[child_name, child_node.inject(agent, page, opt)]
|
22
24
|
end
|
23
25
|
child_results << Hash[child_results_kv]
|
24
26
|
|
@@ -6,11 +6,12 @@ require_relative 'yasuri_node'
|
|
6
6
|
module Yasuri
|
7
7
|
class StructNode
|
8
8
|
include Node
|
9
|
-
def inject(agent, page,
|
9
|
+
def inject(agent, page, opt = {})
|
10
10
|
sub_tags = page.search(@xpath)
|
11
11
|
sub_tags.map do |sub_tag|
|
12
12
|
child_results_kv = @children.map do |child_node|
|
13
|
-
|
13
|
+
child_name = Yasuri.NodeName(child_node.name, opt)
|
14
|
+
[child_name, child_node.inject(agent, sub_tag, opt)]
|
14
15
|
end
|
15
16
|
Hash[child_results_kv]
|
16
17
|
end
|
@@ -6,18 +6,17 @@ require_relative 'yasuri_node'
|
|
6
6
|
module Yasuri
|
7
7
|
class TextNode
|
8
8
|
include Node
|
9
|
-
def initialize(xpath, name, children = [], truncate: nil, opt: {})
|
10
|
-
super(xpath, name, children)
|
11
9
|
|
12
|
-
|
13
|
-
|
10
|
+
def initialize(xpath, name, children = [], truncate: nil)
|
11
|
+
super(xpath, name, children)
|
14
12
|
|
15
|
-
|
13
|
+
truncate = Regexp.new(truncate) if not truncate.nil? # regexp or nil
|
16
14
|
|
15
|
+
@truncate = truncate
|
17
16
|
@truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
|
18
|
-
|
19
17
|
end
|
20
|
-
|
18
|
+
|
19
|
+
def inject(agent, page, opt = {})
|
21
20
|
node = page.search(@xpath)
|
22
21
|
text = node.text.to_s
|
23
22
|
|
@@ -90,5 +90,19 @@ describe 'Yasuri' do
|
|
90
90
|
])
|
91
91
|
compare_generated_vs_original(generated, original, @index_page)
|
92
92
|
end
|
93
|
+
|
94
|
+
it 'return child node as symbol' do
|
95
|
+
root_node = Yasuri::LinksNode.new('/html/body/a', "root", [
|
96
|
+
Yasuri::TextNode.new('/html/body/p', "content"),
|
97
|
+
])
|
98
|
+
|
99
|
+
actual = root_node.inject(@agent, @index_page, symbolize_names: true )
|
100
|
+
expected = [
|
101
|
+
{:content => "Child 01 page."},
|
102
|
+
{:content => "Child 02 page."},
|
103
|
+
{:content => "Child 03 page."},
|
104
|
+
]
|
105
|
+
expect(actual).to match expected
|
106
|
+
end
|
93
107
|
end
|
94
108
|
end
|
@@ -81,5 +81,19 @@ describe 'Yasuri' do
|
|
81
81
|
], limit: 2)
|
82
82
|
compare_generated_vs_original(generated, original, @page)
|
83
83
|
end
|
84
|
+
|
85
|
+
it "return child node as symbol" do
|
86
|
+
root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
87
|
+
Yasuri::TextNode.new('/html/body/p', "content"),
|
88
|
+
])
|
89
|
+
actual = root_node.inject(@agent, @page, symbolize_names:true)
|
90
|
+
expected = [
|
91
|
+
{:content => "PaginationTest01"},
|
92
|
+
{:content => "PaginationTest02"},
|
93
|
+
{:content => "PaginationTest03"},
|
94
|
+
{:content => "PaginationTest04"},
|
95
|
+
]
|
96
|
+
expect(actual).to match expected
|
97
|
+
end
|
84
98
|
end
|
85
99
|
end
|
data/spec/yasuri_spec.rb
CHANGED
@@ -20,9 +20,8 @@ describe 'Yasuri' do
|
|
20
20
|
# json2tree #
|
21
21
|
#############
|
22
22
|
describe '.json2tree' do
|
23
|
-
it "
|
24
|
-
|
25
|
-
expect(tree).to be_nil
|
23
|
+
it "fail if empty json" do
|
24
|
+
expect { Yasuri.json2tree("{}") }.to raise_error
|
26
25
|
end
|
27
26
|
|
28
27
|
it "return TextNode" do
|
@@ -32,6 +31,7 @@ describe 'Yasuri' do
|
|
32
31
|
}|
|
33
32
|
generated = Yasuri.json2tree(src)
|
34
33
|
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
34
|
+
|
35
35
|
compare_generated_vs_original(generated, original, @index_page)
|
36
36
|
end
|
37
37
|
|
@@ -110,5 +110,16 @@ describe 'Yasuri' do
|
|
110
110
|
])
|
111
111
|
compare_generated_vs_original(generated, original, @page)
|
112
112
|
end
|
113
|
+
|
114
|
+
it 'return child node as symbol' do
|
115
|
+
node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
|
116
|
+
Yasuri::TextNode.new('./td[1]', "title"),
|
117
|
+
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
118
|
+
])
|
119
|
+
expected = @table_1996.map{|h| h.map{|k,v| [k.to_sym, v] }.to_h }
|
120
|
+
actual = node.inject(@agent, @page, symbolize_names:true)
|
121
|
+
expect(actual).to match expected
|
122
|
+
end
|
123
|
+
|
113
124
|
end
|
114
125
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yasuri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAC
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|