yasuri 2.0.13 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/spec/yasuri_spec.rb CHANGED
@@ -1,18 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # Author:: TAC (tac@tac42.net)
4
-
5
1
  require_relative 'spec_helper'
6
2
 
7
3
  describe 'Yasuri' do
8
4
  include_context 'httpserver'
9
5
 
10
- before do
11
- @agent = Mechanize.new
12
- @uri = uri
13
- @index_page = @agent.get(@uri)
14
- end
15
-
16
6
  ############
17
7
  # yam2tree #
18
8
  ############
@@ -23,64 +13,49 @@ describe 'Yasuri' do
23
13
 
24
14
  it "return text node" do
25
15
  src = <<-EOB
26
- content:
27
- node: text
28
- path: "/html/body/p[1]"
29
- EOB
16
+ text_content: "/html/body/p[1]"
17
+ EOB
30
18
  generated = Yasuri.yaml2tree(src)
31
19
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
32
20
 
33
- compare_generated_vs_original(generated, original, @index_page)
21
+ compare_generated_vs_original(generated, original, uri)
34
22
  end
35
23
 
36
24
  it "return text node as symbol" do
37
25
  src = <<-EOB
38
- :content:
39
- :node: text
40
- :path: "/html/body/p[1]"
41
- EOB
26
+ :text_content:
27
+ :path: "/html/body/p[1]"
28
+ EOB
42
29
  generated = Yasuri.yaml2tree(src)
43
30
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
44
31
 
45
- compare_generated_vs_original(generated, original, @index_page)
32
+ compare_generated_vs_original(generated, original, uri)
46
33
  end
47
34
 
48
35
  it "return LinksNode/TextNode" do
49
36
 
50
37
  src = <<-EOB
51
- root:
52
- node: links
53
- path: "/html/body/a"
54
- children:
55
- - content:
56
- node: text
57
- path: "/html/body/p"
58
- EOB
38
+ links_root:
39
+ path: "/html/body/a"
40
+ text_content: "/html/body/p"
41
+ EOB
59
42
  generated = Yasuri.yaml2tree(src)
60
43
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
61
44
  Yasuri::TextNode.new('/html/body/p', "content"),
62
45
  ])
63
46
 
64
- compare_generated_vs_original(generated, original, @index_page)
47
+ compare_generated_vs_original(generated, original, uri)
65
48
  end
66
49
 
67
50
  it "return StructNode/StructNode/[TextNode,TextNode]" do
68
51
  src = <<-EOB
69
- tables:
70
- node: struct
71
- path: "/html/body/table"
72
- children:
73
- - table:
74
- node: struct
75
- path: "./tr"
76
- children:
77
- - title:
78
- node: text
79
- path: "./td[1]"
80
- - pub_date:
81
- node: text
82
- path: "./td[2]"
83
- EOB
52
+ struct_tables:
53
+ path: "/html/body/table"
54
+ struct_table:
55
+ path: "./tr"
56
+ text_title: "./td[1]"
57
+ text_pub_date: "./td[2]"
58
+ EOB
84
59
 
85
60
  generated = Yasuri.yaml2tree(src)
86
61
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
@@ -89,8 +64,8 @@ EOB
89
64
  Yasuri::TextNode.new('./td[2]', "pub_date"),
90
65
  ])
91
66
  ])
92
- page = @agent.get(@uri + "/struct/structual_text.html")
93
- compare_generated_vs_original(generated, original, page)
67
+ test_uri = uri + "/struct/structual_text.html"
68
+ compare_generated_vs_original(generated, original, test_uri)
94
69
  end
95
70
 
96
71
  end # end of describe '.yaml2tree'
@@ -105,103 +80,107 @@ EOB
105
80
  end
106
81
 
107
82
  it "return TextNode" do
108
- src = %q| { "node" : "text",
109
- "name" : "content",
110
- "path" : "/html/body/p[1]"
111
- }|
83
+ src = %q|
84
+ {
85
+ "text_content": "/html/body/p[1]"
86
+ }|
112
87
  generated = Yasuri.json2tree(src)
113
88
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
114
89
 
115
- compare_generated_vs_original(generated, original, @index_page)
90
+ compare_generated_vs_original(generated, original, uri)
116
91
  end
117
92
 
118
93
  it "return TextNode with truncate_regexp" do
119
- src = %q| { "node" : "text",
120
- "name" : "content",
121
- "path" : "/html/body/p[1]",
122
- "truncate" : "^[^,]+"
123
- }|
94
+ src = %q|
95
+ {
96
+ "text_content": {
97
+ "path": "/html/body/p[1]",
98
+ "truncate" : "^[^,]+"
99
+ }
100
+ }|
124
101
  generated = Yasuri.json2tree(src)
125
102
  original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
126
- compare_generated_vs_original(generated, original, @index_page)
103
+ compare_generated_vs_original(generated, original, uri)
127
104
  end
128
105
 
106
+ it "return MapNode with TextNodes" do
107
+ src = %q|
108
+ {
109
+ "text_content01": "/html/body/p[1]",
110
+ "text_content02": "/html/body/p[2]"
111
+ }|
112
+ generated = Yasuri.json2tree(src)
113
+ original = Yasuri::MapNode.new('parent', [
114
+ Yasuri::TextNode.new('/html/body/p[1]', "content01"),
115
+ Yasuri::TextNode.new('/html/body/p[2]', "content02"),
116
+ ])
117
+ compare_generated_vs_original(generated, original, uri)
118
+ end
129
119
 
130
120
  it "return LinksNode/TextNode" do
131
- src = %q| { "node" : "links",
132
- "name" : "root",
133
- "path" : "/html/body/a",
134
- "children" : [ { "node" : "text",
135
- "name" : "content",
136
- "path" : "/html/body/p"
137
- } ]
138
- }|
121
+ src = %q|
122
+ {
123
+ "links_root": {
124
+ "path": "/html/body/a",
125
+ "text_content": "/html/body/p"
126
+ }
127
+ }|
128
+
139
129
  generated = Yasuri.json2tree(src)
140
130
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
141
131
  Yasuri::TextNode.new('/html/body/p', "content"),
142
132
  ])
143
133
 
144
- compare_generated_vs_original(generated, original, @index_page)
134
+ compare_generated_vs_original(generated, original, uri)
145
135
  end
146
136
 
147
137
  it "return PaginateNode/TextNode" do
148
- src = %q|{ "node" : "pages",
149
- "name" : "root",
150
- "path" : "/html/body/nav/span/a[@class=\'next\']",
151
- "children" : [ { "node" : "text",
152
- "name" : "content",
153
- "path" : "/html/body/p"
154
- } ]
155
- }|
138
+ src = %q|
139
+ {
140
+ "pages_root": {
141
+ "path": "/html/body/nav/span/a[@class=\'next\']",
142
+ "text_content": "/html/body/p"
143
+ }
144
+ }|
156
145
  generated = Yasuri.json2tree(src)
157
146
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
158
147
  Yasuri::TextNode.new('/html/body/p', "content"),
159
148
  ])
160
149
 
161
- paginate_test_uri = @uri + "/pagination/page01.html"
162
- paginate_test_page = @agent.get(paginate_test_uri)
163
- compare_generated_vs_original(generated, original, paginate_test_page)
150
+ test_uri = uri + "/pagination/page01.html"
151
+ compare_generated_vs_original(generated, original, test_uri)
164
152
  end
165
153
 
166
154
  it "return PaginateNode/TextNode with limit" do
167
- src = %q|{ "node" : "pages",
168
- "name" : "root",
169
- "path" : "/html/body/nav/span/a[@class=\'next\']",
170
- "limit" : 2,
171
- "children" : [ { "node" : "text",
172
- "name" : "content",
173
- "path" : "/html/body/p"
174
- } ]
175
- }|
155
+ src = %q|
156
+ {
157
+ "pages_root": {
158
+ "path": "/html/body/nav/span/a[@class=\'next\']",
159
+ "limit": 2,
160
+ "text_content": "/html/body/p"
161
+ }
162
+ }|
176
163
  generated = Yasuri.json2tree(src)
177
164
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
178
165
  Yasuri::TextNode.new('/html/body/p', "content"),
179
166
  ], limit:2)
180
167
 
181
- paginate_test_uri = @uri + "/pagination/page01.html"
182
- paginate_test_page = @agent.get(paginate_test_uri)
183
- compare_generated_vs_original(generated, original, paginate_test_page)
168
+ test_uri = uri + "/pagination/page01.html"
169
+ compare_generated_vs_original(generated, original, test_uri)
184
170
  end
185
171
 
186
172
  it "return StructNode/StructNode/[TextNode,TextNode]" do
187
- src = %q| { "node" : "struct",
188
- "name" : "tables",
189
- "path" : "/html/body/table",
190
- "children" : [
191
- { "node" : "struct",
192
- "name" : "table",
193
- "path" : "./tr",
194
- "children" : [
195
- { "node" : "text",
196
- "name" : "title",
197
- "path" : "./td[1]"
198
- },
199
- { "node" : "text",
200
- "name" : "pub_date",
201
- "path" : "./td[2]"
202
- }]
203
- }]
204
- }|
173
+ src = %q|
174
+ {
175
+ "struct_tables": {
176
+ "path": "/html/body/table",
177
+ "struct_table": {
178
+ "path": "./tr",
179
+ "text_title": "./td[1]",
180
+ "text_pub_date": "./td[2]"
181
+ }
182
+ }
183
+ }|
205
184
  generated = Yasuri.json2tree(src)
206
185
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
207
186
  Yasuri::StructNode.new('./tr', "table", [
@@ -209,27 +188,27 @@ EOB
209
188
  Yasuri::TextNode.new('./td[2]', "pub_date"),
210
189
  ])
211
190
  ])
212
- page = @agent.get(@uri + "/struct/structual_text.html")
213
- compare_generated_vs_original(generated, original, page)
191
+ test_uri = uri + "/struct/structual_text.html"
192
+ compare_generated_vs_original(generated, original, test_uri)
214
193
  end
215
194
  end
216
195
 
196
+
217
197
  #############
218
198
  # tree2json #
219
199
  #############
220
200
  describe '.tree2json' do
221
201
  it "return empty json" do
222
- json = Yasuri.tree2json(nil)
223
- expect(json).to match "{}"
202
+ expect { Yasuri.tree2json(nil) }.to raise_error(RuntimeError)
224
203
  end
225
204
 
226
205
  it "return text node" do
227
206
  node = Yasuri::TextNode.new("/html/head/title", "title")
228
207
  json = Yasuri.tree2json(node)
229
- expected_str = %q| { "node": "text",
230
- "name": "title",
231
- "path": "/html/head/title"
232
- } |
208
+ expected_str = %q|
209
+ {
210
+ "text_title": "/html/head/title"
211
+ }|
233
212
  expected = JSON.parse(expected_str)
234
213
  actual = JSON.parse(json)
235
214
  expect(actual).to match expected
@@ -238,29 +217,49 @@ EOB
238
217
  it "return text node with truncate_regexp" do
239
218
  node = Yasuri::TextNode.new("/html/head/title", "title", truncate:/^[^,]+/)
240
219
  json = Yasuri.tree2json(node)
241
- expected_str = %q| { "node": "text",
242
- "name": "title",
243
- "path": "/html/head/title",
244
- "truncate": "^[^,]+"
245
- } |
220
+ expected_str = %q|
221
+ {
222
+ "text_title": {
223
+ "path": "/html/head/title",
224
+ "truncate": "^[^,]+"
225
+ }
226
+ }|
246
227
  expected = Yasuri.tree2json(Yasuri.json2tree(expected_str))
247
228
  actual = Yasuri.tree2json(Yasuri.json2tree(json))
248
229
  expect(actual).to match expected
249
230
  end
250
231
 
232
+ it "return map node with text nodes" do
233
+ tree = Yasuri::MapNode.new('parent', [
234
+ Yasuri::TextNode.new('/html/body/p[1]', "content01"),
235
+ Yasuri::TextNode.new('/html/body/p[2]', "content02"),
236
+ ])
237
+ actual_json = Yasuri.tree2json(tree)
238
+
239
+ expected_json = %q|
240
+ {
241
+ "text_content01": "/html/body/p[1]",
242
+ "text_content02": "/html/body/p[2]"
243
+ }|
244
+
245
+ expected = Yasuri.tree2json(Yasuri.json2tree(expected_json))
246
+ actual = Yasuri.tree2json(Yasuri.json2tree(actual_json))
247
+ expect(actual).to match expected
248
+ end
249
+
251
250
  it "return LinksNode/TextNode" do
252
251
  tree = Yasuri::LinksNode.new('/html/body/a', "root", [
253
252
  Yasuri::TextNode.new('/html/body/p', "content"),
254
253
  ])
255
254
  json = Yasuri.tree2json(tree)
256
- expected_src = %q| { "node" : "links",
257
- "name" : "root",
258
- "path" : "/html/body/a",
259
- "children" : [ { "node" : "text",
260
- "name" : "content",
261
- "path" : "/html/body/p"
262
- } ]
263
- }|
255
+
256
+ expected_src = %q|
257
+ {
258
+ "links_root": {
259
+ "path": "/html/body/a",
260
+ "text_content":"/html/body/p"
261
+ }
262
+ }|
264
263
  expected = JSON.parse(expected_src)
265
264
  actual = JSON.parse(json)
266
265
  expect(actual).to match expected
@@ -272,25 +271,84 @@ EOB
272
271
  ], limit:10)
273
272
 
274
273
  json = Yasuri.tree2json(tree)
275
- expected_src = %q| { "node" : "pages",
276
- "name" : "root",
277
- "path" : "/html/body/nav/span/a[@class='next']",
278
- "limit" : 10,
279
- "flatten" : false,
280
- "children" : [ { "node" : "text",
281
- "name" : "content",
282
- "path" : "/html/body/p"
283
- } ]
284
- }|
274
+ expected_src = %q|
275
+ {
276
+ "pages_root": {
277
+ "path": "/html/body/nav/span/a[@class='next']",
278
+ "limit": 10,
279
+ "flatten": false,
280
+ "text_content": "/html/body/p"
281
+ }
282
+ }|
285
283
  expected = JSON.parse(expected_src)
286
284
  actual = JSON.parse(json)
287
285
  expect(actual).to match expected
288
286
  end
287
+ end
288
+
289
+ describe '.with_retry' do
290
+ it "call once if success" do
291
+ actual = Yasuri.with_retry(0){ 42 }
292
+ expect(actual).to match 42
293
+ end
294
+
295
+ it "call untile success" do
296
+ i = [1,1,0,0]
297
+ actual = Yasuri.with_retry(2){42 / i.pop } # 3 times in max
298
+ expect(actual).to match 42/1
299
+ end
300
+
301
+ it "raise error when exceed retry count" do
302
+ i = [1,0,0,0]
303
+ expect {
304
+ Yasuri.with_retry(2){42 / i.pop } # do this 3 times
305
+ }.to raise_error(Exception)
306
+ end
307
+
308
+ it "wait interval before run" do
309
+ allow(Kernel).to receive(:sleep)
310
+ Yasuri.with_retry(0){ 42 }
311
+ expect(Kernel).to have_received(:sleep).once
312
+ end
289
313
 
314
+ it "wait interval before run" do
315
+ allow(Kernel).to receive(:sleep)
316
+ Yasuri.with_retry(0){ 42 }
317
+ expect(Kernel).to have_received(:sleep).once
318
+ end
290
319
 
320
+ it "wait interval for each runs" do
321
+ allow(Kernel).to receive(:sleep)
291
322
 
323
+ i = [1,1,0,0]
324
+ Yasuri.with_retry(2){42 / i.pop } # 3 times in max
325
+ expect(Kernel).to have_received(:sleep).exactly(3).times
326
+ end
292
327
  end
293
328
 
329
+ it "return StructNode/StructNode/[TextNode,TextNode]" do
330
+ tree = Yasuri::StructNode.new('/html/body/table', "tables", [
331
+ Yasuri::StructNode.new('./tr', "table", [
332
+ Yasuri::TextNode.new('./td[1]', "title"),
333
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
334
+ ])
335
+ ])
336
+ json = Yasuri.tree2json(tree)
337
+ expected_src = %q|
338
+ {
339
+ "struct_tables": {
340
+ "path": "/html/body/table",
341
+ "struct_table": {
342
+ "path": "./tr",
343
+ "text_title": "./td[1]",
344
+ "text_pub_date": "./td[2]"
345
+ }
346
+ }
347
+ }|
348
+ expected = JSON.parse(expected_src)
349
+ actual = JSON.parse(json)
350
+ expect(actual).to match expected
351
+ end
294
352
 
295
353
  it 'has a version number' do
296
354
  expect(Yasuri::VERSION).not_to be nil