yasuri 2.0.12 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +35 -0
- data/.gitignore +1 -2
- data/.ruby-version +1 -0
- data/.travis.yml +1 -3
- data/README.md +87 -21
- data/USAGE.ja.md +368 -120
- data/USAGE.md +375 -125
- data/examples/example.rb +79 -0
- data/examples/github.yml +15 -0
- data/examples/sample.json +4 -0
- data/examples/sample.yml +11 -0
- data/exe/yasuri +5 -0
- data/lib/yasuri.rb +1 -0
- data/lib/yasuri/version.rb +1 -1
- data/lib/yasuri/yasuri.rb +86 -41
- data/lib/yasuri/yasuri_cli.rb +64 -0
- data/lib/yasuri/yasuri_links_node.rb +11 -5
- data/lib/yasuri/yasuri_map_node.rb +40 -0
- data/lib/yasuri/yasuri_node.rb +37 -2
- data/lib/yasuri/yasuri_node_generator.rb +16 -11
- data/lib/yasuri/yasuri_paginate_node.rb +10 -4
- data/lib/yasuri/yasuri_struct_node.rb +5 -1
- data/lib/yasuri/yasuri_text_node.rb +9 -2
- data/spec/cli_resources/tree.json +8 -0
- data/spec/cli_resources/tree.yml +5 -0
- data/spec/cli_resources/tree_wrong.json +9 -0
- data/spec/cli_resources/tree_wrong.yml +6 -0
- data/spec/spec_helper.rb +4 -9
- data/spec/yasuri_cli_spec.rb +96 -0
- data/spec/yasuri_links_node_spec.rb +34 -12
- data/spec/yasuri_map_spec.rb +75 -0
- data/spec/yasuri_paginate_node_spec.rb +22 -10
- data/spec/yasuri_spec.rb +244 -94
- data/spec/yasuri_struct_node_spec.rb +13 -17
- data/spec/yasuri_text_node_spec.rb +11 -12
- data/yasuri.gemspec +5 -3
- metadata +52 -18
- data/app.rb +0 -52
@@ -0,0 +1,75 @@
|
|
1
|
+
require_relative 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Yasuri' do
|
4
|
+
include_context 'httpserver'
|
5
|
+
|
6
|
+
before do
|
7
|
+
@uri = uri
|
8
|
+
end
|
9
|
+
|
10
|
+
describe '::MapNode' do
|
11
|
+
it "multi scrape in singe page" do
|
12
|
+
map = Yasuri.map_sample do
|
13
|
+
text_title '/html/head/title'
|
14
|
+
text_body_p '/html/body/p[1]'
|
15
|
+
end
|
16
|
+
actual = map.scrape(@uri)
|
17
|
+
|
18
|
+
expected = {
|
19
|
+
"title" => "Yasuri Test",
|
20
|
+
"body_p" => "Hello,Yasuri"
|
21
|
+
}
|
22
|
+
expect(actual).to include expected
|
23
|
+
end
|
24
|
+
|
25
|
+
it "nested multi scrape in singe page" do
|
26
|
+
map = Yasuri.map_sample do
|
27
|
+
map_group1 { text_child01 '/html/body/a[1]' }
|
28
|
+
map_group2 do
|
29
|
+
text_child01 '/html/body/a[1]'
|
30
|
+
text_child03 '/html/body/a[3]'
|
31
|
+
end
|
32
|
+
end
|
33
|
+
actual = map.scrape(@uri)
|
34
|
+
|
35
|
+
expected = {
|
36
|
+
"group1" => {
|
37
|
+
"child01" => "child01"
|
38
|
+
},
|
39
|
+
"group2" => {
|
40
|
+
"child01" => "child01",
|
41
|
+
"child03" => "child03"
|
42
|
+
}
|
43
|
+
}
|
44
|
+
expect(actual).to include expected
|
45
|
+
end
|
46
|
+
|
47
|
+
it "scrape with links node" do
|
48
|
+
map = Yasuri.map_sample do
|
49
|
+
map_group1 do
|
50
|
+
links_a '/html/body/a' do
|
51
|
+
text_content '/html/body/p'
|
52
|
+
end
|
53
|
+
text_child01 '/html/body/a[1]'
|
54
|
+
end
|
55
|
+
map_group2 do
|
56
|
+
text_child03 '/html/body/a[3]'
|
57
|
+
end
|
58
|
+
end
|
59
|
+
actual = map.scrape(@uri)
|
60
|
+
|
61
|
+
expected = {
|
62
|
+
"group1" => {
|
63
|
+
"a" => [
|
64
|
+
{"content" => "Child 01 page."},
|
65
|
+
{"content" => "Child 02 page."},
|
66
|
+
{"content" => "Child 03 page."},
|
67
|
+
],
|
68
|
+
"child01" => "child01"
|
69
|
+
},
|
70
|
+
"group2" => { "child03" => "child03" }
|
71
|
+
}
|
72
|
+
expect(actual).to include expected
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -11,16 +11,14 @@ describe 'Yasuri' do
|
|
11
11
|
|
12
12
|
describe '::PaginateNode' do
|
13
13
|
before do
|
14
|
-
@agent = Mechanize.new
|
15
14
|
@uri = uri + "/pagination/page01.html"
|
16
|
-
@page = @agent.get(@uri)
|
17
15
|
end
|
18
16
|
|
19
17
|
it "scrape each paginated pages" do
|
20
18
|
root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
21
19
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
22
20
|
])
|
23
|
-
actual = root_node.
|
21
|
+
actual = root_node.scrape(@uri)
|
24
22
|
expected = [
|
25
23
|
{"content" => "PaginationTest01"},
|
26
24
|
{"content" => "PaginationTest02"},
|
@@ -37,7 +35,7 @@ describe 'Yasuri' do
|
|
37
35
|
Yasuri::TextNode.new('./a', "text"),
|
38
36
|
]),
|
39
37
|
], flatten: true)
|
40
|
-
actual = root_node.
|
38
|
+
actual = root_node.scrape(@uri)
|
41
39
|
expected = [
|
42
40
|
"PaginationTest01",
|
43
41
|
{"text"=>""},
|
@@ -77,7 +75,7 @@ describe 'Yasuri' do
|
|
77
75
|
root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
78
76
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
79
77
|
], limit:3)
|
80
|
-
actual = root_node.
|
78
|
+
actual = root_node.scrape(@uri)
|
81
79
|
expected = [
|
82
80
|
{"content" => "PaginationTest01"},
|
83
81
|
{"content" => "PaginationTest02"},
|
@@ -91,7 +89,7 @@ describe 'Yasuri' do
|
|
91
89
|
root_node = Yasuri::PaginateNode.new(missing_xpath, "root", [
|
92
90
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
93
91
|
])
|
94
|
-
actual = root_node.
|
92
|
+
actual = root_node.scrape(@uri)
|
95
93
|
expected = [ {"content" => "PaginationTest01"}, ]
|
96
94
|
expect(actual).to match_array expected
|
97
95
|
end
|
@@ -100,7 +98,7 @@ describe 'Yasuri' do
|
|
100
98
|
root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
101
99
|
Yasuri::TextNode.new('/html/body/hoge', "content"),
|
102
100
|
])
|
103
|
-
actual = root_node.
|
101
|
+
actual = root_node.scrape(@uri)
|
104
102
|
expected = [ {"content" => ""}, {"content" => ""}, {"content" => ""}, {"content" => ""},]
|
105
103
|
expect(actual).to match_array expected
|
106
104
|
end
|
@@ -112,7 +110,7 @@ describe 'Yasuri' do
|
|
112
110
|
original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
113
111
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
114
112
|
])
|
115
|
-
compare_generated_vs_original(generated, original, @
|
113
|
+
compare_generated_vs_original(generated, original, @uri)
|
116
114
|
end
|
117
115
|
|
118
116
|
it 'can be defined by DSL, return single PaginateNode content limited' do
|
@@ -122,14 +120,14 @@ describe 'Yasuri' do
|
|
122
120
|
original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
123
121
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
124
122
|
], limit: 2)
|
125
|
-
compare_generated_vs_original(generated, original, @
|
123
|
+
compare_generated_vs_original(generated, original, @uri)
|
126
124
|
end
|
127
125
|
|
128
126
|
it "return child node as symbol" do
|
129
127
|
root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
130
128
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
131
129
|
])
|
132
|
-
actual = root_node.
|
130
|
+
actual = root_node.scrape(@uri, symbolize_names:true)
|
133
131
|
expected = [
|
134
132
|
{:content => "PaginationTest01"},
|
135
133
|
{:content => "PaginationTest02"},
|
@@ -138,5 +136,19 @@ describe 'Yasuri' do
|
|
138
136
|
]
|
139
137
|
expect(actual).to match expected
|
140
138
|
end
|
139
|
+
|
140
|
+
it "scrape with interval for each request" do
|
141
|
+
allow(Kernel).to receive(:sleep)
|
142
|
+
|
143
|
+
root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
144
|
+
Yasuri::TextNode.new('/html/body/p', "content"),
|
145
|
+
])
|
146
|
+
actual = root_node.scrape(@uri, interval_ms: 1000)
|
147
|
+
expect(actual.size).to match 4
|
148
|
+
|
149
|
+
expect(Kernel).to have_received(:sleep).exactly(4).times do |interval_sec|
|
150
|
+
expect(interval_sec).to match 1.0
|
151
|
+
end
|
152
|
+
end
|
141
153
|
end
|
142
154
|
end
|
data/spec/yasuri_spec.rb
CHANGED
@@ -8,11 +8,78 @@ describe 'Yasuri' do
|
|
8
8
|
include_context 'httpserver'
|
9
9
|
|
10
10
|
before do
|
11
|
-
@agent = Mechanize.new
|
12
11
|
@uri = uri
|
13
|
-
@index_page = @agent.get(@uri)
|
14
12
|
end
|
15
13
|
|
14
|
+
|
15
|
+
############
|
16
|
+
# yam2tree #
|
17
|
+
############
|
18
|
+
describe '.yaml2tree' do
|
19
|
+
it "fail if empty yaml" do
|
20
|
+
expect { Yasuri.yaml2tree(nil) }.to raise_error(RuntimeError)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "return text node" do
|
24
|
+
src = <<-EOB
|
25
|
+
text_content: "/html/body/p[1]"
|
26
|
+
EOB
|
27
|
+
generated = Yasuri.yaml2tree(src)
|
28
|
+
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
29
|
+
|
30
|
+
compare_generated_vs_original(generated, original, @uri)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "return text node as symbol" do
|
34
|
+
src = <<-EOB
|
35
|
+
:text_content:
|
36
|
+
:path: "/html/body/p[1]"
|
37
|
+
EOB
|
38
|
+
generated = Yasuri.yaml2tree(src)
|
39
|
+
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
40
|
+
|
41
|
+
compare_generated_vs_original(generated, original, @uri)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "return LinksNode/TextNode" do
|
45
|
+
|
46
|
+
src = <<-EOB
|
47
|
+
links_root:
|
48
|
+
path: "/html/body/a"
|
49
|
+
text_content: "/html/body/p"
|
50
|
+
EOB
|
51
|
+
generated = Yasuri.yaml2tree(src)
|
52
|
+
original = Yasuri::LinksNode.new('/html/body/a', "root", [
|
53
|
+
Yasuri::TextNode.new('/html/body/p', "content"),
|
54
|
+
])
|
55
|
+
|
56
|
+
compare_generated_vs_original(generated, original, @uri)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "return StructNode/StructNode/[TextNode,TextNode]" do
|
60
|
+
src = <<-EOB
|
61
|
+
struct_tables:
|
62
|
+
path: "/html/body/table"
|
63
|
+
struct_table:
|
64
|
+
path: "./tr"
|
65
|
+
text_title: "./td[1]"
|
66
|
+
text_pub_date: "./td[2]"
|
67
|
+
EOB
|
68
|
+
|
69
|
+
generated = Yasuri.yaml2tree(src)
|
70
|
+
original = Yasuri::StructNode.new('/html/body/table', "tables", [
|
71
|
+
Yasuri::StructNode.new('./tr', "table", [
|
72
|
+
Yasuri::TextNode.new('./td[1]', "title"),
|
73
|
+
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
74
|
+
])
|
75
|
+
])
|
76
|
+
uri = @uri + "/struct/structual_text.html"
|
77
|
+
compare_generated_vs_original(generated, original, uri)
|
78
|
+
end
|
79
|
+
|
80
|
+
end # end of describe '.yaml2tree'
|
81
|
+
|
82
|
+
|
16
83
|
#############
|
17
84
|
# json2tree #
|
18
85
|
#############
|
@@ -22,103 +89,107 @@ describe 'Yasuri' do
|
|
22
89
|
end
|
23
90
|
|
24
91
|
it "return TextNode" do
|
25
|
-
src = %q|
|
26
|
-
|
27
|
-
|
28
|
-
|
92
|
+
src = %q|
|
93
|
+
{
|
94
|
+
"text_content": "/html/body/p[1]"
|
95
|
+
}|
|
29
96
|
generated = Yasuri.json2tree(src)
|
30
97
|
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
31
98
|
|
32
|
-
compare_generated_vs_original(generated, original, @
|
99
|
+
compare_generated_vs_original(generated, original, @uri)
|
33
100
|
end
|
34
101
|
|
35
102
|
it "return TextNode with truncate_regexp" do
|
36
|
-
src = %q|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
103
|
+
src = %q|
|
104
|
+
{
|
105
|
+
"text_content": {
|
106
|
+
"path": "/html/body/p[1]",
|
107
|
+
"truncate" : "^[^,]+"
|
108
|
+
}
|
109
|
+
}|
|
41
110
|
generated = Yasuri.json2tree(src)
|
42
111
|
original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
|
43
|
-
compare_generated_vs_original(generated, original, @
|
112
|
+
compare_generated_vs_original(generated, original, @uri)
|
44
113
|
end
|
45
114
|
|
115
|
+
it "return MapNode with TextNodes" do
|
116
|
+
src = %q|
|
117
|
+
{
|
118
|
+
"text_content01": "/html/body/p[1]",
|
119
|
+
"text_content02": "/html/body/p[2]"
|
120
|
+
}|
|
121
|
+
generated = Yasuri.json2tree(src)
|
122
|
+
original = Yasuri::MapNode.new('parent', [
|
123
|
+
Yasuri::TextNode.new('/html/body/p[1]', "content01"),
|
124
|
+
Yasuri::TextNode.new('/html/body/p[2]', "content02"),
|
125
|
+
])
|
126
|
+
compare_generated_vs_original(generated, original, @uri)
|
127
|
+
end
|
46
128
|
|
47
129
|
it "return LinksNode/TextNode" do
|
48
|
-
src = %q|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
130
|
+
src = %q|
|
131
|
+
{
|
132
|
+
"links_root": {
|
133
|
+
"path": "/html/body/a",
|
134
|
+
"text_content": "/html/body/p"
|
135
|
+
}
|
136
|
+
}|
|
137
|
+
|
56
138
|
generated = Yasuri.json2tree(src)
|
57
139
|
original = Yasuri::LinksNode.new('/html/body/a', "root", [
|
58
140
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
59
141
|
])
|
60
142
|
|
61
|
-
compare_generated_vs_original(generated, original, @
|
143
|
+
compare_generated_vs_original(generated, original, @uri)
|
62
144
|
end
|
63
145
|
|
64
146
|
it "return PaginateNode/TextNode" do
|
65
|
-
src = %q|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
}|
|
147
|
+
src = %q|
|
148
|
+
{
|
149
|
+
"pages_root": {
|
150
|
+
"path": "/html/body/nav/span/a[@class=\'next\']",
|
151
|
+
"text_content": "/html/body/p"
|
152
|
+
}
|
153
|
+
}|
|
73
154
|
generated = Yasuri.json2tree(src)
|
74
155
|
original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
75
156
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
76
157
|
])
|
77
158
|
|
78
|
-
|
79
|
-
|
80
|
-
compare_generated_vs_original(generated, original, paginate_test_page)
|
159
|
+
uri = @uri + "/pagination/page01.html"
|
160
|
+
compare_generated_vs_original(generated, original, uri)
|
81
161
|
end
|
82
162
|
|
83
163
|
it "return PaginateNode/TextNode with limit" do
|
84
|
-
src = %q|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
}|
|
164
|
+
src = %q|
|
165
|
+
{
|
166
|
+
"pages_root": {
|
167
|
+
"path": "/html/body/nav/span/a[@class=\'next\']",
|
168
|
+
"limit": 2,
|
169
|
+
"text_content": "/html/body/p"
|
170
|
+
}
|
171
|
+
}|
|
93
172
|
generated = Yasuri.json2tree(src)
|
94
173
|
original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
95
174
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
96
175
|
], limit:2)
|
97
176
|
|
98
|
-
|
99
|
-
|
100
|
-
compare_generated_vs_original(generated, original, paginate_test_page)
|
177
|
+
uri = @uri + "/pagination/page01.html"
|
178
|
+
compare_generated_vs_original(generated, original, uri)
|
101
179
|
end
|
102
180
|
|
103
181
|
it "return StructNode/StructNode/[TextNode,TextNode]" do
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
},
|
116
|
-
{ "node" : "text",
|
117
|
-
"name" : "pub_date",
|
118
|
-
"path" : "./td[2]"
|
119
|
-
}]
|
120
|
-
}]
|
121
|
-
}|
|
182
|
+
src = %q|
|
183
|
+
{
|
184
|
+
"struct_tables": {
|
185
|
+
"path": "/html/body/table",
|
186
|
+
"struct_table": {
|
187
|
+
"path": "./tr",
|
188
|
+
"text_title": "./td[1]",
|
189
|
+
"text_pub_date": "./td[2]"
|
190
|
+
}
|
191
|
+
}
|
192
|
+
}|
|
122
193
|
generated = Yasuri.json2tree(src)
|
123
194
|
original = Yasuri::StructNode.new('/html/body/table', "tables", [
|
124
195
|
Yasuri::StructNode.new('./tr', "table", [
|
@@ -126,27 +197,27 @@ describe 'Yasuri' do
|
|
126
197
|
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
127
198
|
])
|
128
199
|
])
|
129
|
-
|
130
|
-
compare_generated_vs_original(generated, original,
|
200
|
+
uri = @uri + "/struct/structual_text.html"
|
201
|
+
compare_generated_vs_original(generated, original, uri)
|
131
202
|
end
|
132
203
|
end
|
133
204
|
|
205
|
+
|
134
206
|
#############
|
135
207
|
# tree2json #
|
136
208
|
#############
|
137
209
|
describe '.tree2json' do
|
138
210
|
it "return empty json" do
|
139
|
-
|
140
|
-
expect(json).to match "{}"
|
211
|
+
expect { Yasuri.tree2json(nil) }.to raise_error(RuntimeError)
|
141
212
|
end
|
142
213
|
|
143
214
|
it "return text node" do
|
144
215
|
node = Yasuri::TextNode.new("/html/head/title", "title")
|
145
216
|
json = Yasuri.tree2json(node)
|
146
|
-
expected_str = %q|
|
147
|
-
|
148
|
-
|
149
|
-
|
217
|
+
expected_str = %q|
|
218
|
+
{
|
219
|
+
"text_title": "/html/head/title"
|
220
|
+
}|
|
150
221
|
expected = JSON.parse(expected_str)
|
151
222
|
actual = JSON.parse(json)
|
152
223
|
expect(actual).to match expected
|
@@ -155,29 +226,49 @@ describe 'Yasuri' do
|
|
155
226
|
it "return text node with truncate_regexp" do
|
156
227
|
node = Yasuri::TextNode.new("/html/head/title", "title", truncate:/^[^,]+/)
|
157
228
|
json = Yasuri.tree2json(node)
|
158
|
-
expected_str = %q|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
229
|
+
expected_str = %q|
|
230
|
+
{
|
231
|
+
"text_title": {
|
232
|
+
"path": "/html/head/title",
|
233
|
+
"truncate": "^[^,]+"
|
234
|
+
}
|
235
|
+
}|
|
163
236
|
expected = Yasuri.tree2json(Yasuri.json2tree(expected_str))
|
164
237
|
actual = Yasuri.tree2json(Yasuri.json2tree(json))
|
165
238
|
expect(actual).to match expected
|
166
239
|
end
|
167
240
|
|
241
|
+
it "return map node with text nodes" do
|
242
|
+
tree = Yasuri::MapNode.new('parent', [
|
243
|
+
Yasuri::TextNode.new('/html/body/p[1]', "content01"),
|
244
|
+
Yasuri::TextNode.new('/html/body/p[2]', "content02"),
|
245
|
+
])
|
246
|
+
actual_json = Yasuri.tree2json(tree)
|
247
|
+
|
248
|
+
expected_json = %q|
|
249
|
+
{
|
250
|
+
"text_content01": "/html/body/p[1]",
|
251
|
+
"text_content02": "/html/body/p[2]"
|
252
|
+
}|
|
253
|
+
|
254
|
+
expected = Yasuri.tree2json(Yasuri.json2tree(expected_json))
|
255
|
+
actual = Yasuri.tree2json(Yasuri.json2tree(actual_json))
|
256
|
+
expect(actual).to match expected
|
257
|
+
end
|
258
|
+
|
168
259
|
it "return LinksNode/TextNode" do
|
169
260
|
tree = Yasuri::LinksNode.new('/html/body/a', "root", [
|
170
261
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
171
262
|
])
|
172
263
|
json = Yasuri.tree2json(tree)
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
264
|
+
|
265
|
+
expected_src = %q|
|
266
|
+
{
|
267
|
+
"links_root": {
|
268
|
+
"path": "/html/body/a",
|
269
|
+
"text_content":"/html/body/p"
|
270
|
+
}
|
271
|
+
}|
|
181
272
|
expected = JSON.parse(expected_src)
|
182
273
|
actual = JSON.parse(json)
|
183
274
|
expect(actual).to match expected
|
@@ -189,25 +280,84 @@ describe 'Yasuri' do
|
|
189
280
|
], limit:10)
|
190
281
|
|
191
282
|
json = Yasuri.tree2json(tree)
|
192
|
-
expected_src = %q|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
}|
|
283
|
+
expected_src = %q|
|
284
|
+
{
|
285
|
+
"pages_root": {
|
286
|
+
"path": "/html/body/nav/span/a[@class='next']",
|
287
|
+
"limit": 10,
|
288
|
+
"flatten": false,
|
289
|
+
"text_content": "/html/body/p"
|
290
|
+
}
|
291
|
+
}|
|
202
292
|
expected = JSON.parse(expected_src)
|
203
293
|
actual = JSON.parse(json)
|
204
294
|
expect(actual).to match expected
|
205
295
|
end
|
296
|
+
end
|
297
|
+
|
298
|
+
describe '.with_retry' do
|
299
|
+
it "call once if success" do
|
300
|
+
actual = Yasuri.with_retry(0){ 42 }
|
301
|
+
expect(actual).to match 42
|
302
|
+
end
|
303
|
+
|
304
|
+
it "call untile success" do
|
305
|
+
i = [1,1,0,0]
|
306
|
+
actual = Yasuri.with_retry(2){42 / i.pop } # 3 times in max
|
307
|
+
expect(actual).to match 42/1
|
308
|
+
end
|
309
|
+
|
310
|
+
it "raise error when exceed retry count" do
|
311
|
+
i = [1,0,0,0]
|
312
|
+
expect {
|
313
|
+
Yasuri.with_retry(2){42 / i.pop } # do this 3 times
|
314
|
+
}.to raise_error(Exception)
|
315
|
+
end
|
316
|
+
|
317
|
+
it "wait interval before run" do
|
318
|
+
allow(Kernel).to receive(:sleep)
|
319
|
+
Yasuri.with_retry(0){ 42 }
|
320
|
+
expect(Kernel).to have_received(:sleep).once
|
321
|
+
end
|
206
322
|
|
323
|
+
it "wait interval before run" do
|
324
|
+
allow(Kernel).to receive(:sleep)
|
325
|
+
Yasuri.with_retry(0){ 42 }
|
326
|
+
expect(Kernel).to have_received(:sleep).once
|
327
|
+
end
|
207
328
|
|
329
|
+
it "wait interval for each runs" do
|
330
|
+
allow(Kernel).to receive(:sleep)
|
208
331
|
|
332
|
+
i = [1,1,0,0]
|
333
|
+
Yasuri.with_retry(2){42 / i.pop } # 3 times in max
|
334
|
+
expect(Kernel).to have_received(:sleep).exactly(3).times
|
335
|
+
end
|
209
336
|
end
|
210
337
|
|
338
|
+
it "return StructNode/StructNode/[TextNode,TextNode]" do
|
339
|
+
tree = Yasuri::StructNode.new('/html/body/table', "tables", [
|
340
|
+
Yasuri::StructNode.new('./tr', "table", [
|
341
|
+
Yasuri::TextNode.new('./td[1]', "title"),
|
342
|
+
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
343
|
+
])
|
344
|
+
])
|
345
|
+
json = Yasuri.tree2json(tree)
|
346
|
+
expected_src = %q|
|
347
|
+
{
|
348
|
+
"struct_tables": {
|
349
|
+
"path": "/html/body/table",
|
350
|
+
"struct_table": {
|
351
|
+
"path": "./tr",
|
352
|
+
"text_title": "./td[1]",
|
353
|
+
"text_pub_date": "./td[2]"
|
354
|
+
}
|
355
|
+
}
|
356
|
+
}|
|
357
|
+
expected = JSON.parse(expected_src)
|
358
|
+
actual = JSON.parse(json)
|
359
|
+
expect(actual).to match expected
|
360
|
+
end
|
211
361
|
|
212
362
|
it 'has a version number' do
|
213
363
|
expect(Yasuri::VERSION).not_to be nil
|