yasuri 3.0.0 → 3.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/.rubocop.yml +49 -0
- data/.rubocop_todo.yml +0 -0
- data/README.md +70 -27
- data/Rakefile +1 -1
- data/USAGE.ja.md +366 -131
- data/USAGE.md +371 -136
- data/examples/example.rb +78 -0
- data/examples/github.yml +15 -0
- data/examples/sample.json +4 -0
- data/examples/sample.yml +11 -0
- data/exe/yasuri +5 -0
- data/lib/yasuri.rb +1 -0
- data/lib/yasuri/version.rb +1 -1
- data/lib/yasuri/yasuri.rb +96 -76
- data/lib/yasuri/yasuri_cli.rb +78 -0
- data/lib/yasuri/yasuri_links_node.rb +10 -6
- data/lib/yasuri/yasuri_map_node.rb +40 -0
- data/lib/yasuri/yasuri_node.rb +36 -4
- data/lib/yasuri/yasuri_node_generator.rb +14 -9
- data/lib/yasuri/yasuri_paginate_node.rb +26 -16
- data/lib/yasuri/yasuri_struct_node.rb +6 -4
- data/lib/yasuri/yasuri_text_node.rb +9 -7
- data/spec/cli_resources/tree.json +8 -0
- data/spec/cli_resources/tree.yml +5 -0
- data/spec/cli_resources/tree_wrong.json +9 -0
- data/spec/cli_resources/tree_wrong.yml +6 -0
- data/spec/servers/httpserver.rb +0 -2
- data/spec/spec_helper.rb +4 -6
- data/spec/yasuri_cli_spec.rb +114 -0
- data/spec/yasuri_links_node_spec.rb +82 -58
- data/spec/yasuri_map_spec.rb +71 -0
- data/spec/yasuri_paginate_node_spec.rb +99 -88
- data/spec/yasuri_spec.rb +196 -138
- data/spec/yasuri_struct_node_spec.rb +120 -100
- data/spec/yasuri_text_node_spec.rb +22 -32
- data/yasuri.gemspec +29 -22
- metadata +105 -15
- data/app.rb +0 -52
- data/spec/yasuri_node_spec.rb +0 -11
data/spec/yasuri_spec.rb
CHANGED
@@ -1,18 +1,8 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
# Author:: TAC (tac@tac42.net)
|
4
|
-
|
5
1
|
require_relative 'spec_helper'
|
6
2
|
|
7
3
|
describe 'Yasuri' do
|
8
4
|
include_context 'httpserver'
|
9
5
|
|
10
|
-
before do
|
11
|
-
@agent = Mechanize.new
|
12
|
-
@uri = uri
|
13
|
-
@index_page = @agent.get(@uri)
|
14
|
-
end
|
15
|
-
|
16
6
|
############
|
17
7
|
# yam2tree #
|
18
8
|
############
|
@@ -23,64 +13,49 @@ describe 'Yasuri' do
|
|
23
13
|
|
24
14
|
it "return text node" do
|
25
15
|
src = <<-EOB
|
26
|
-
|
27
|
-
|
28
|
-
path: "/html/body/p[1]"
|
29
|
-
EOB
|
16
|
+
text_content: "/html/body/p[1]"
|
17
|
+
EOB
|
30
18
|
generated = Yasuri.yaml2tree(src)
|
31
19
|
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
32
20
|
|
33
|
-
compare_generated_vs_original(generated, original,
|
21
|
+
compare_generated_vs_original(generated, original, uri)
|
34
22
|
end
|
35
23
|
|
36
24
|
it "return text node as symbol" do
|
37
25
|
src = <<-EOB
|
38
|
-
:
|
39
|
-
|
40
|
-
|
41
|
-
EOB
|
26
|
+
:text_content:
|
27
|
+
:path: "/html/body/p[1]"
|
28
|
+
EOB
|
42
29
|
generated = Yasuri.yaml2tree(src)
|
43
30
|
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
44
31
|
|
45
|
-
compare_generated_vs_original(generated, original,
|
32
|
+
compare_generated_vs_original(generated, original, uri)
|
46
33
|
end
|
47
34
|
|
48
35
|
it "return LinksNode/TextNode" do
|
49
36
|
|
50
37
|
src = <<-EOB
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
- content:
|
56
|
-
node: text
|
57
|
-
path: "/html/body/p"
|
58
|
-
EOB
|
38
|
+
links_root:
|
39
|
+
path: "/html/body/a"
|
40
|
+
text_content: "/html/body/p"
|
41
|
+
EOB
|
59
42
|
generated = Yasuri.yaml2tree(src)
|
60
43
|
original = Yasuri::LinksNode.new('/html/body/a', "root", [
|
61
44
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
62
45
|
])
|
63
46
|
|
64
|
-
compare_generated_vs_original(generated, original,
|
47
|
+
compare_generated_vs_original(generated, original, uri)
|
65
48
|
end
|
66
49
|
|
67
50
|
it "return StructNode/StructNode/[TextNode,TextNode]" do
|
68
51
|
src = <<-EOB
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
children:
|
77
|
-
- title:
|
78
|
-
node: text
|
79
|
-
path: "./td[1]"
|
80
|
-
- pub_date:
|
81
|
-
node: text
|
82
|
-
path: "./td[2]"
|
83
|
-
EOB
|
52
|
+
struct_tables:
|
53
|
+
path: "/html/body/table"
|
54
|
+
struct_table:
|
55
|
+
path: "./tr"
|
56
|
+
text_title: "./td[1]"
|
57
|
+
text_pub_date: "./td[2]"
|
58
|
+
EOB
|
84
59
|
|
85
60
|
generated = Yasuri.yaml2tree(src)
|
86
61
|
original = Yasuri::StructNode.new('/html/body/table', "tables", [
|
@@ -89,8 +64,8 @@ EOB
|
|
89
64
|
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
90
65
|
])
|
91
66
|
])
|
92
|
-
|
93
|
-
compare_generated_vs_original(generated, original,
|
67
|
+
test_uri = uri + "/struct/structual_text.html"
|
68
|
+
compare_generated_vs_original(generated, original, test_uri)
|
94
69
|
end
|
95
70
|
|
96
71
|
end # end of describe '.yaml2tree'
|
@@ -105,103 +80,107 @@ EOB
|
|
105
80
|
end
|
106
81
|
|
107
82
|
it "return TextNode" do
|
108
|
-
src = %q|
|
109
|
-
|
110
|
-
|
111
|
-
|
83
|
+
src = %q|
|
84
|
+
{
|
85
|
+
"text_content": "/html/body/p[1]"
|
86
|
+
}|
|
112
87
|
generated = Yasuri.json2tree(src)
|
113
88
|
original = Yasuri::TextNode.new('/html/body/p[1]', "content")
|
114
89
|
|
115
|
-
compare_generated_vs_original(generated, original,
|
90
|
+
compare_generated_vs_original(generated, original, uri)
|
116
91
|
end
|
117
92
|
|
118
93
|
it "return TextNode with truncate_regexp" do
|
119
|
-
src = %q|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
94
|
+
src = %q|
|
95
|
+
{
|
96
|
+
"text_content": {
|
97
|
+
"path": "/html/body/p[1]",
|
98
|
+
"truncate" : "^[^,]+"
|
99
|
+
}
|
100
|
+
}|
|
124
101
|
generated = Yasuri.json2tree(src)
|
125
102
|
original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
|
126
|
-
compare_generated_vs_original(generated, original,
|
103
|
+
compare_generated_vs_original(generated, original, uri)
|
127
104
|
end
|
128
105
|
|
106
|
+
it "return MapNode with TextNodes" do
|
107
|
+
src = %q|
|
108
|
+
{
|
109
|
+
"text_content01": "/html/body/p[1]",
|
110
|
+
"text_content02": "/html/body/p[2]"
|
111
|
+
}|
|
112
|
+
generated = Yasuri.json2tree(src)
|
113
|
+
original = Yasuri::MapNode.new('parent', [
|
114
|
+
Yasuri::TextNode.new('/html/body/p[1]', "content01"),
|
115
|
+
Yasuri::TextNode.new('/html/body/p[2]', "content02"),
|
116
|
+
])
|
117
|
+
compare_generated_vs_original(generated, original, uri)
|
118
|
+
end
|
129
119
|
|
130
120
|
it "return LinksNode/TextNode" do
|
131
|
-
src = %q|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
121
|
+
src = %q|
|
122
|
+
{
|
123
|
+
"links_root": {
|
124
|
+
"path": "/html/body/a",
|
125
|
+
"text_content": "/html/body/p"
|
126
|
+
}
|
127
|
+
}|
|
128
|
+
|
139
129
|
generated = Yasuri.json2tree(src)
|
140
130
|
original = Yasuri::LinksNode.new('/html/body/a', "root", [
|
141
131
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
142
132
|
])
|
143
133
|
|
144
|
-
compare_generated_vs_original(generated, original,
|
134
|
+
compare_generated_vs_original(generated, original, uri)
|
145
135
|
end
|
146
136
|
|
147
137
|
it "return PaginateNode/TextNode" do
|
148
|
-
src = %q|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
}|
|
138
|
+
src = %q|
|
139
|
+
{
|
140
|
+
"pages_root": {
|
141
|
+
"path": "/html/body/nav/span/a[@class=\'next\']",
|
142
|
+
"text_content": "/html/body/p"
|
143
|
+
}
|
144
|
+
}|
|
156
145
|
generated = Yasuri.json2tree(src)
|
157
146
|
original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
158
147
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
159
148
|
])
|
160
149
|
|
161
|
-
|
162
|
-
|
163
|
-
compare_generated_vs_original(generated, original, paginate_test_page)
|
150
|
+
test_uri = uri + "/pagination/page01.html"
|
151
|
+
compare_generated_vs_original(generated, original, test_uri)
|
164
152
|
end
|
165
153
|
|
166
154
|
it "return PaginateNode/TextNode with limit" do
|
167
|
-
src = %q|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
}|
|
155
|
+
src = %q|
|
156
|
+
{
|
157
|
+
"pages_root": {
|
158
|
+
"path": "/html/body/nav/span/a[@class=\'next\']",
|
159
|
+
"limit": 2,
|
160
|
+
"text_content": "/html/body/p"
|
161
|
+
}
|
162
|
+
}|
|
176
163
|
generated = Yasuri.json2tree(src)
|
177
164
|
original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
|
178
165
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
179
166
|
], limit:2)
|
180
167
|
|
181
|
-
|
182
|
-
|
183
|
-
compare_generated_vs_original(generated, original, paginate_test_page)
|
168
|
+
test_uri = uri + "/pagination/page01.html"
|
169
|
+
compare_generated_vs_original(generated, original, test_uri)
|
184
170
|
end
|
185
171
|
|
186
172
|
it "return StructNode/StructNode/[TextNode,TextNode]" do
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
},
|
199
|
-
{ "node" : "text",
|
200
|
-
"name" : "pub_date",
|
201
|
-
"path" : "./td[2]"
|
202
|
-
}]
|
203
|
-
}]
|
204
|
-
}|
|
173
|
+
src = %q|
|
174
|
+
{
|
175
|
+
"struct_tables": {
|
176
|
+
"path": "/html/body/table",
|
177
|
+
"struct_table": {
|
178
|
+
"path": "./tr",
|
179
|
+
"text_title": "./td[1]",
|
180
|
+
"text_pub_date": "./td[2]"
|
181
|
+
}
|
182
|
+
}
|
183
|
+
}|
|
205
184
|
generated = Yasuri.json2tree(src)
|
206
185
|
original = Yasuri::StructNode.new('/html/body/table', "tables", [
|
207
186
|
Yasuri::StructNode.new('./tr', "table", [
|
@@ -209,27 +188,27 @@ EOB
|
|
209
188
|
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
210
189
|
])
|
211
190
|
])
|
212
|
-
|
213
|
-
compare_generated_vs_original(generated, original,
|
191
|
+
test_uri = uri + "/struct/structual_text.html"
|
192
|
+
compare_generated_vs_original(generated, original, test_uri)
|
214
193
|
end
|
215
194
|
end
|
216
195
|
|
196
|
+
|
217
197
|
#############
|
218
198
|
# tree2json #
|
219
199
|
#############
|
220
200
|
describe '.tree2json' do
|
221
201
|
it "return empty json" do
|
222
|
-
|
223
|
-
expect(json).to match "{}"
|
202
|
+
expect { Yasuri.tree2json(nil) }.to raise_error(RuntimeError)
|
224
203
|
end
|
225
204
|
|
226
205
|
it "return text node" do
|
227
206
|
node = Yasuri::TextNode.new("/html/head/title", "title")
|
228
207
|
json = Yasuri.tree2json(node)
|
229
|
-
expected_str = %q|
|
230
|
-
|
231
|
-
|
232
|
-
|
208
|
+
expected_str = %q|
|
209
|
+
{
|
210
|
+
"text_title": "/html/head/title"
|
211
|
+
}|
|
233
212
|
expected = JSON.parse(expected_str)
|
234
213
|
actual = JSON.parse(json)
|
235
214
|
expect(actual).to match expected
|
@@ -238,29 +217,49 @@ EOB
|
|
238
217
|
it "return text node with truncate_regexp" do
|
239
218
|
node = Yasuri::TextNode.new("/html/head/title", "title", truncate:/^[^,]+/)
|
240
219
|
json = Yasuri.tree2json(node)
|
241
|
-
expected_str = %q|
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
220
|
+
expected_str = %q|
|
221
|
+
{
|
222
|
+
"text_title": {
|
223
|
+
"path": "/html/head/title",
|
224
|
+
"truncate": "^[^,]+"
|
225
|
+
}
|
226
|
+
}|
|
246
227
|
expected = Yasuri.tree2json(Yasuri.json2tree(expected_str))
|
247
228
|
actual = Yasuri.tree2json(Yasuri.json2tree(json))
|
248
229
|
expect(actual).to match expected
|
249
230
|
end
|
250
231
|
|
232
|
+
it "return map node with text nodes" do
|
233
|
+
tree = Yasuri::MapNode.new('parent', [
|
234
|
+
Yasuri::TextNode.new('/html/body/p[1]', "content01"),
|
235
|
+
Yasuri::TextNode.new('/html/body/p[2]', "content02"),
|
236
|
+
])
|
237
|
+
actual_json = Yasuri.tree2json(tree)
|
238
|
+
|
239
|
+
expected_json = %q|
|
240
|
+
{
|
241
|
+
"text_content01": "/html/body/p[1]",
|
242
|
+
"text_content02": "/html/body/p[2]"
|
243
|
+
}|
|
244
|
+
|
245
|
+
expected = Yasuri.tree2json(Yasuri.json2tree(expected_json))
|
246
|
+
actual = Yasuri.tree2json(Yasuri.json2tree(actual_json))
|
247
|
+
expect(actual).to match expected
|
248
|
+
end
|
249
|
+
|
251
250
|
it "return LinksNode/TextNode" do
|
252
251
|
tree = Yasuri::LinksNode.new('/html/body/a', "root", [
|
253
252
|
Yasuri::TextNode.new('/html/body/p', "content"),
|
254
253
|
])
|
255
254
|
json = Yasuri.tree2json(tree)
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
255
|
+
|
256
|
+
expected_src = %q|
|
257
|
+
{
|
258
|
+
"links_root": {
|
259
|
+
"path": "/html/body/a",
|
260
|
+
"text_content":"/html/body/p"
|
261
|
+
}
|
262
|
+
}|
|
264
263
|
expected = JSON.parse(expected_src)
|
265
264
|
actual = JSON.parse(json)
|
266
265
|
expect(actual).to match expected
|
@@ -272,25 +271,84 @@ EOB
|
|
272
271
|
], limit:10)
|
273
272
|
|
274
273
|
json = Yasuri.tree2json(tree)
|
275
|
-
expected_src = %q|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
}|
|
274
|
+
expected_src = %q|
|
275
|
+
{
|
276
|
+
"pages_root": {
|
277
|
+
"path": "/html/body/nav/span/a[@class='next']",
|
278
|
+
"limit": 10,
|
279
|
+
"flatten": false,
|
280
|
+
"text_content": "/html/body/p"
|
281
|
+
}
|
282
|
+
}|
|
285
283
|
expected = JSON.parse(expected_src)
|
286
284
|
actual = JSON.parse(json)
|
287
285
|
expect(actual).to match expected
|
288
286
|
end
|
287
|
+
end
|
288
|
+
|
289
|
+
describe '.with_retry' do
|
290
|
+
it "call once if success" do
|
291
|
+
actual = Yasuri.with_retry(0){ 42 }
|
292
|
+
expect(actual).to match 42
|
293
|
+
end
|
294
|
+
|
295
|
+
it "call untile success" do
|
296
|
+
i = [1,1,0,0]
|
297
|
+
actual = Yasuri.with_retry(2){42 / i.pop } # 3 times in max
|
298
|
+
expect(actual).to match 42/1
|
299
|
+
end
|
300
|
+
|
301
|
+
it "raise error when exceed retry count" do
|
302
|
+
i = [1,0,0,0]
|
303
|
+
expect {
|
304
|
+
Yasuri.with_retry(2){42 / i.pop } # do this 3 times
|
305
|
+
}.to raise_error(Exception)
|
306
|
+
end
|
307
|
+
|
308
|
+
it "wait interval before run" do
|
309
|
+
allow(Kernel).to receive(:sleep)
|
310
|
+
Yasuri.with_retry(0){ 42 }
|
311
|
+
expect(Kernel).to have_received(:sleep).once
|
312
|
+
end
|
289
313
|
|
314
|
+
it "wait interval before run" do
|
315
|
+
allow(Kernel).to receive(:sleep)
|
316
|
+
Yasuri.with_retry(0){ 42 }
|
317
|
+
expect(Kernel).to have_received(:sleep).once
|
318
|
+
end
|
290
319
|
|
320
|
+
it "wait interval for each runs" do
|
321
|
+
allow(Kernel).to receive(:sleep)
|
291
322
|
|
323
|
+
i = [1,1,0,0]
|
324
|
+
Yasuri.with_retry(2){42 / i.pop } # 3 times in max
|
325
|
+
expect(Kernel).to have_received(:sleep).exactly(3).times
|
326
|
+
end
|
292
327
|
end
|
293
328
|
|
329
|
+
it "return StructNode/StructNode/[TextNode,TextNode]" do
|
330
|
+
tree = Yasuri::StructNode.new('/html/body/table', "tables", [
|
331
|
+
Yasuri::StructNode.new('./tr', "table", [
|
332
|
+
Yasuri::TextNode.new('./td[1]', "title"),
|
333
|
+
Yasuri::TextNode.new('./td[2]', "pub_date"),
|
334
|
+
])
|
335
|
+
])
|
336
|
+
json = Yasuri.tree2json(tree)
|
337
|
+
expected_src = %q|
|
338
|
+
{
|
339
|
+
"struct_tables": {
|
340
|
+
"path": "/html/body/table",
|
341
|
+
"struct_table": {
|
342
|
+
"path": "./tr",
|
343
|
+
"text_title": "./td[1]",
|
344
|
+
"text_pub_date": "./td[2]"
|
345
|
+
}
|
346
|
+
}
|
347
|
+
}|
|
348
|
+
expected = JSON.parse(expected_src)
|
349
|
+
actual = JSON.parse(json)
|
350
|
+
expect(actual).to match expected
|
351
|
+
end
|
294
352
|
|
295
353
|
it 'has a version number' do
|
296
354
|
expect(Yasuri::VERSION).not_to be nil
|