yasuri 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/yasuri/version.rb +1 -1
- data/lib/yasuri/yasuri.rb +17 -8
- data/lib/yasuri/yasuri_links_node.rb +5 -2
- data/lib/yasuri/yasuri_node.rb +1 -1
- data/lib/yasuri/yasuri_paginate_node.rb +6 -4
- data/lib/yasuri/yasuri_struct_node.rb +3 -2
- data/lib/yasuri/yasuri_text_node.rb +6 -7
- data/spec/yasuri_links_node_spec.rb +14 -0
- data/spec/yasuri_paginate_node_spec.rb +14 -0
- data/spec/yasuri_spec.rb +3 -3
- data/spec/yasuri_struct_node_spec.rb +11 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f667fce1e95bc5a16e350a1227abdde1a6db8514
|
4
|
+
data.tar.gz: 68cb3c5754f9636f9ae2c677feccfd62183c0af5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddb2760495645509953e05748ac617148f1a69916f955f4781fef4565dcb030d44a204d2999e5510c3dc43af6500c654886fe8eeb5694d339c76e8eb7fa195ba
|
7
|
+
data.tar.gz: 2ae5d067c108a7137739c3df51001abfbc6002bda5eeeec1e3632e3e57154e8f856effd0c0017b587768feb84c76b73889582ec526e07697c95d8ba140bb8e9a
|
data/lib/yasuri/version.rb
CHANGED
data/lib/yasuri/yasuri.rb
CHANGED
@@ -15,7 +15,7 @@ require_relative 'yasuri_node_generator'
|
|
15
15
|
module Yasuri
|
16
16
|
|
17
17
|
def self.json2tree(json_string)
|
18
|
-
json = JSON.parse(json_string)
|
18
|
+
json = JSON.parse(json_string, {symbolize_names: true})
|
19
19
|
Yasuri.hash2node(json)
|
20
20
|
end
|
21
21
|
|
@@ -30,26 +30,31 @@ module Yasuri
|
|
30
30
|
|
31
31
|
private
|
32
32
|
Text2Node = {
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
text: Yasuri::TextNode,
|
34
|
+
struct: Yasuri::StructNode,
|
35
|
+
links: Yasuri::LinksNode,
|
36
|
+
pages: Yasuri::PaginateNode
|
37
37
|
}
|
38
38
|
Node2Text = Text2Node.invert
|
39
39
|
|
40
|
-
ReservedKeys = %
|
40
|
+
ReservedKeys = %i|node name path children|
|
41
41
|
def self.hash2node(node_h)
|
42
42
|
node, name, path, children = ReservedKeys.map do |key|
|
43
43
|
node_h[key]
|
44
44
|
end
|
45
45
|
children ||= []
|
46
46
|
|
47
|
+
fail "Not found 'node' value in json" if node.nil?
|
48
|
+
fail "Not found 'name' value in json" if name.nil?
|
49
|
+
fail "Not found 'path' value in json" if path.nil?
|
50
|
+
|
47
51
|
childnodes = children.map{|c| Yasuri.hash2node(c) }
|
48
52
|
ReservedKeys.each{|key| node_h.delete(key)}
|
49
53
|
opt = node_h
|
50
54
|
|
51
|
-
klass = Text2Node[node]
|
52
|
-
|
55
|
+
klass = Text2Node[node.to_sym]
|
56
|
+
fail "Undefined node type #{node}" if klass.nil?
|
57
|
+
klass.new(path, name, childnodes, opt)
|
53
58
|
end
|
54
59
|
|
55
60
|
def self.node2hash(node)
|
@@ -73,6 +78,10 @@ module Yasuri
|
|
73
78
|
json
|
74
79
|
end
|
75
80
|
|
81
|
+
def self.NodeName(name, symbolize_names:false)
|
82
|
+
symbolize_names ? name.to_sym : name
|
83
|
+
end
|
84
|
+
|
76
85
|
def self.with_retry(retry_count = 5)
|
77
86
|
begin
|
78
87
|
return yield() if block_given?
|
@@ -6,14 +6,17 @@ require_relative 'yasuri_node'
|
|
6
6
|
module Yasuri
|
7
7
|
class LinksNode
|
8
8
|
include Node
|
9
|
-
def inject(agent, page,
|
9
|
+
def inject(agent, page, opt = {})
|
10
|
+
retry_count = opt[:retry_count] || 5
|
11
|
+
|
10
12
|
links = page.search(@xpath) || [] # links expected
|
11
13
|
links.map do |link|
|
12
14
|
link_button = Mechanize::Page::Link.new(link, agent, page)
|
13
15
|
child_page = Yasuri.with_retry(retry_count) { link_button.click }
|
14
16
|
|
15
17
|
child_results_kv = @children.map do |child_node|
|
16
|
-
|
18
|
+
child_name = Yasuri.NodeName(child_node.name, opt)
|
19
|
+
[child_name, child_node.inject(agent, child_page, opt)]
|
17
20
|
end
|
18
21
|
|
19
22
|
Hash[child_results_kv]
|
data/lib/yasuri/yasuri_node.rb
CHANGED
@@ -7,18 +7,20 @@ module Yasuri
|
|
7
7
|
class PaginateNode
|
8
8
|
include Node
|
9
9
|
|
10
|
-
def initialize(xpath, name, children = [], limit: nil
|
10
|
+
def initialize(xpath, name, children = [], limit: nil)
|
11
11
|
super(xpath, name, children)
|
12
|
-
@limit = limit
|
12
|
+
@limit = limit
|
13
13
|
end
|
14
14
|
|
15
|
-
def inject(agent, page,
|
15
|
+
def inject(agent, page, opt = {})
|
16
|
+
retry_count = opt[:retry_count] || 5
|
16
17
|
|
17
18
|
child_results = []
|
18
19
|
limit = @limit.nil? ? Float::MAX : @limit
|
19
20
|
while page
|
20
21
|
child_results_kv = @children.map do |child_node|
|
21
|
-
|
22
|
+
child_name = Yasuri.NodeName(child_node.name, opt)
|
23
|
+
[child_name, child_node.inject(agent, page, opt)]
|
22
24
|
end
|
23
25
|
child_results << Hash[child_results_kv]
|
24
26
|
|
@@ -6,11 +6,12 @@ require_relative 'yasuri_node'
|
|
6
6
|
module Yasuri
|
7
7
|
class StructNode
|
8
8
|
include Node
|
9
|
-
def inject(agent, page,
|
9
|
+
def inject(agent, page, opt = {})
|
10
10
|
sub_tags = page.search(@xpath)
|
11
11
|
sub_tags.map do |sub_tag|
|
12
12
|
child_results_kv = @children.map do |child_node|
|
13
|
-
|
13
|
+
child_name = Yasuri.NodeName(child_node.name, opt)
|
14
|
+
[child_name, child_node.inject(agent, sub_tag, opt)]
|
14
15
|
end
|
15
16
|
Hash[child_results_kv]
|
16
17
|
end
|
@@ -6,18 +6,17 @@ require_relative 'yasuri_node'
|
|
6
6
|
module Yasuri
|
7
7
|
class TextNode
|
8
8
|
include Node
|
9
|
-
def initialize(xpath, name, children = [], truncate: nil, opt: {})
|
10
|
-
super(xpath, name, children)
|
11
9
|
|
12
|
-
|
13
|
-
|
10
|
+
def initialize(xpath, name, children = [], truncate: nil)
|
11
|
+
super(xpath, name, children)
|
14
12
|
|
15
|
-
|
13
|
+
truncate = Regexp.new(truncate) if not truncate.nil? # regexp or nil
|
16
14
|
|
15
|
+
@truncate = truncate
|
17
16
|
@truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
|
18
|
-
|
19
17
|
end
|
20
|
-
|
18
|
+
|
19
|
+
def inject(agent, page, opt = {})
|
21
20
|
node = page.search(@xpath)
|
22
21
|
text = node.text.to_s
|
23
22
|
|
@@ -90,5 +90,19 @@ describe 'Yasuri' do
|
|
90
90
|
])
|
91
91
|
compare_generated_vs_original(generated, original, @index_page)
|
92
92
|
end
|
93
|
+
|
94
|
+
it 'return child node as symbol' do
|
95
|
+
root_node = Yasuri::LinksNode.new('/html/body/a', "root", [
|
96
|
+
Yasuri::TextNode.new('/html/body/p', "content"),
|
97
|
+
])
|
98
|
+
|
99
|
+
actual = root_node.inject(@agent, @index_page, symbolize_names: true )
|
100
|
+
expected = [
|
101
|
+
{:content => "Child 01 page."},
|
102
|
+
{:content => "Child 02 page."},
|
103
|
+
{:content => "Child 03 page."},
|
104
|
+
]
|
105
|
+
expect(actual).to match expected
|
106
|
+
end
|
93
107
|
end
|
94
108
|
end
|
@@ -81,5 +81,19 @@ describe 'Yasuri' do
|
|
81
81
|
], limit: 2)
|
82
82
|
compare_generated_vs_original(generated, original, @page)
|
83
83
|
end
|
84
|
+
|
85
|
+
it "return child node as symbol" do
|
86
|
+
root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
87
|
+
Yasuri::TextNode.new('/html/body/p', "content"),
|
88
|
+
])
|
89
|
+
actual = root_node.inject(@agent, @page, symbolize_names:true)
|
90
|
+
expected = [
|
91
|
+
{:content => "PaginationTest01"},
|
92
|
+
{:content => "PaginationTest02"},
|
93
|
+
{:content => "PaginationTest03"},
|
94
|
+
{:content => "PaginationTest04"},
|
95
|
+
]
|
96
|
+
expect(actual).to match expected
|
97
|
+
end
|
84
98
|
end
|
85
99
|
end
|
data/spec/yasuri_spec.rb
CHANGED
@@ -20,9 +20,8 @@ describe 'Yasuri' do
|
|
20
20
|
# json2tree #
|
21
21
|
#############
|
22
22
|
describe '.json2tree' do
|
23
|
-
it "
|
24
|
-
|
25
|
-
expect(tree).to be_nil
|
23
|
+
it "fail if empty json" do
|
24
|
+
expect { Yasuri.json2tree("{}") }.to raise_error
|
26
25
|
end
|
27
26
|
|
28
27
|
it "return TextNode" do
|
@@ -32,6 +31,7 @@ describe 'Yasuri' do
|
|
32
31
|
}|
|
33
32
|
generated = Yasuri.json2tree(src)
|
34
33
|
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
34
|
+
|
35
35
|
compare_generated_vs_original(generated, original, @index_page)
|
36
36
|
end
|
37
37
|
|
@@ -110,5 +110,16 @@ describe 'Yasuri' do
|
|
110
110
|
])
|
111
111
|
compare_generated_vs_original(generated, original, @page)
|
112
112
|
end
|
113
|
+
|
114
|
+
it 'return child node as symbol' do
|
115
|
+
node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
|
116
|
+
Yasuri::TextNode.new('./td[1]', "title"),
|
117
|
+
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
118
|
+
])
|
119
|
+
expected = @table_1996.map{|h| h.map{|k,v| [k.to_sym, v] }.to_h }
|
120
|
+
actual = node.inject(@agent, @page, symbolize_names:true)
|
121
|
+
expect(actual).to match expected
|
122
|
+
end
|
123
|
+
|
113
124
|
end
|
114
125
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yasuri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAC
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|