yasuri 2.0.13 → 3.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/spec/yasuri_spec.rb CHANGED
@@ -1,18 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # Author:: TAC (tac@tac42.net)
4
-
5
1
  require_relative 'spec_helper'
6
2
 
7
3
  describe 'Yasuri' do
8
4
  include_context 'httpserver'
9
5
 
10
- before do
11
- @agent = Mechanize.new
12
- @uri = uri
13
- @index_page = @agent.get(@uri)
14
- end
15
-
16
6
  ############
17
7
  # yam2tree #
18
8
  ############
@@ -23,64 +13,49 @@ describe 'Yasuri' do
23
13
 
24
14
  it "return text node" do
25
15
  src = <<-EOB
26
- content:
27
- node: text
28
- path: "/html/body/p[1]"
29
- EOB
16
+ text_content: "/html/body/p[1]"
17
+ EOB
30
18
  generated = Yasuri.yaml2tree(src)
31
19
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
32
20
 
33
- compare_generated_vs_original(generated, original, @index_page)
21
+ compare_generated_vs_original(generated, original, uri)
34
22
  end
35
23
 
36
24
  it "return text node as symbol" do
37
25
  src = <<-EOB
38
- :content:
39
- :node: text
40
- :path: "/html/body/p[1]"
41
- EOB
26
+ :text_content:
27
+ :path: "/html/body/p[1]"
28
+ EOB
42
29
  generated = Yasuri.yaml2tree(src)
43
30
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
44
31
 
45
- compare_generated_vs_original(generated, original, @index_page)
32
+ compare_generated_vs_original(generated, original, uri)
46
33
  end
47
34
 
48
35
  it "return LinksNode/TextNode" do
49
36
 
50
37
  src = <<-EOB
51
- root:
52
- node: links
53
- path: "/html/body/a"
54
- children:
55
- - content:
56
- node: text
57
- path: "/html/body/p"
58
- EOB
38
+ links_root:
39
+ path: "/html/body/a"
40
+ text_content: "/html/body/p"
41
+ EOB
59
42
  generated = Yasuri.yaml2tree(src)
60
43
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
61
44
  Yasuri::TextNode.new('/html/body/p', "content"),
62
45
  ])
63
46
 
64
- compare_generated_vs_original(generated, original, @index_page)
47
+ compare_generated_vs_original(generated, original, uri)
65
48
  end
66
49
 
67
50
  it "return StructNode/StructNode/[TextNode,TextNode]" do
68
51
  src = <<-EOB
69
- tables:
70
- node: struct
71
- path: "/html/body/table"
72
- children:
73
- - table:
74
- node: struct
75
- path: "./tr"
76
- children:
77
- - title:
78
- node: text
79
- path: "./td[1]"
80
- - pub_date:
81
- node: text
82
- path: "./td[2]"
83
- EOB
52
+ struct_tables:
53
+ path: "/html/body/table"
54
+ struct_table:
55
+ path: "./tr"
56
+ text_title: "./td[1]"
57
+ text_pub_date: "./td[2]"
58
+ EOB
84
59
 
85
60
  generated = Yasuri.yaml2tree(src)
86
61
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
@@ -89,8 +64,8 @@ EOB
89
64
  Yasuri::TextNode.new('./td[2]', "pub_date"),
90
65
  ])
91
66
  ])
92
- page = @agent.get(@uri + "/struct/structual_text.html")
93
- compare_generated_vs_original(generated, original, page)
67
+ test_uri = uri + "/struct/structual_text.html"
68
+ compare_generated_vs_original(generated, original, test_uri)
94
69
  end
95
70
 
96
71
  end # end of describe '.yaml2tree'
@@ -105,103 +80,107 @@ EOB
105
80
  end
106
81
 
107
82
  it "return TextNode" do
108
- src = %q| { "node" : "text",
109
- "name" : "content",
110
- "path" : "/html/body/p[1]"
111
- }|
83
+ src = %q|
84
+ {
85
+ "text_content": "/html/body/p[1]"
86
+ }|
112
87
  generated = Yasuri.json2tree(src)
113
88
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
114
89
 
115
- compare_generated_vs_original(generated, original, @index_page)
90
+ compare_generated_vs_original(generated, original, uri)
116
91
  end
117
92
 
118
93
  it "return TextNode with truncate_regexp" do
119
- src = %q| { "node" : "text",
120
- "name" : "content",
121
- "path" : "/html/body/p[1]",
122
- "truncate" : "^[^,]+"
123
- }|
94
+ src = %q|
95
+ {
96
+ "text_content": {
97
+ "path": "/html/body/p[1]",
98
+ "truncate" : "^[^,]+"
99
+ }
100
+ }|
124
101
  generated = Yasuri.json2tree(src)
125
102
  original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
126
- compare_generated_vs_original(generated, original, @index_page)
103
+ compare_generated_vs_original(generated, original, uri)
127
104
  end
128
105
 
106
+ it "return MapNode with TextNodes" do
107
+ src = %q|
108
+ {
109
+ "text_content01": "/html/body/p[1]",
110
+ "text_content02": "/html/body/p[2]"
111
+ }|
112
+ generated = Yasuri.json2tree(src)
113
+ original = Yasuri::MapNode.new('parent', [
114
+ Yasuri::TextNode.new('/html/body/p[1]', "content01"),
115
+ Yasuri::TextNode.new('/html/body/p[2]', "content02"),
116
+ ])
117
+ compare_generated_vs_original(generated, original, uri)
118
+ end
129
119
 
130
120
  it "return LinksNode/TextNode" do
131
- src = %q| { "node" : "links",
132
- "name" : "root",
133
- "path" : "/html/body/a",
134
- "children" : [ { "node" : "text",
135
- "name" : "content",
136
- "path" : "/html/body/p"
137
- } ]
138
- }|
121
+ src = %q|
122
+ {
123
+ "links_root": {
124
+ "path": "/html/body/a",
125
+ "text_content": "/html/body/p"
126
+ }
127
+ }|
128
+
139
129
  generated = Yasuri.json2tree(src)
140
130
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
141
131
  Yasuri::TextNode.new('/html/body/p', "content"),
142
132
  ])
143
133
 
144
- compare_generated_vs_original(generated, original, @index_page)
134
+ compare_generated_vs_original(generated, original, uri)
145
135
  end
146
136
 
147
137
  it "return PaginateNode/TextNode" do
148
- src = %q|{ "node" : "pages",
149
- "name" : "root",
150
- "path" : "/html/body/nav/span/a[@class=\'next\']",
151
- "children" : [ { "node" : "text",
152
- "name" : "content",
153
- "path" : "/html/body/p"
154
- } ]
155
- }|
138
+ src = %q|
139
+ {
140
+ "pages_root": {
141
+ "path": "/html/body/nav/span/a[@class=\'next\']",
142
+ "text_content": "/html/body/p"
143
+ }
144
+ }|
156
145
  generated = Yasuri.json2tree(src)
157
146
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
158
147
  Yasuri::TextNode.new('/html/body/p', "content"),
159
148
  ])
160
149
 
161
- paginate_test_uri = @uri + "/pagination/page01.html"
162
- paginate_test_page = @agent.get(paginate_test_uri)
163
- compare_generated_vs_original(generated, original, paginate_test_page)
150
+ test_uri = uri + "/pagination/page01.html"
151
+ compare_generated_vs_original(generated, original, test_uri)
164
152
  end
165
153
 
166
154
  it "return PaginateNode/TextNode with limit" do
167
- src = %q|{ "node" : "pages",
168
- "name" : "root",
169
- "path" : "/html/body/nav/span/a[@class=\'next\']",
170
- "limit" : 2,
171
- "children" : [ { "node" : "text",
172
- "name" : "content",
173
- "path" : "/html/body/p"
174
- } ]
175
- }|
155
+ src = %q|
156
+ {
157
+ "pages_root": {
158
+ "path": "/html/body/nav/span/a[@class=\'next\']",
159
+ "limit": 2,
160
+ "text_content": "/html/body/p"
161
+ }
162
+ }|
176
163
  generated = Yasuri.json2tree(src)
177
164
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
178
165
  Yasuri::TextNode.new('/html/body/p', "content"),
179
166
  ], limit:2)
180
167
 
181
- paginate_test_uri = @uri + "/pagination/page01.html"
182
- paginate_test_page = @agent.get(paginate_test_uri)
183
- compare_generated_vs_original(generated, original, paginate_test_page)
168
+ test_uri = uri + "/pagination/page01.html"
169
+ compare_generated_vs_original(generated, original, test_uri)
184
170
  end
185
171
 
186
172
  it "return StructNode/StructNode/[TextNode,TextNode]" do
187
- src = %q| { "node" : "struct",
188
- "name" : "tables",
189
- "path" : "/html/body/table",
190
- "children" : [
191
- { "node" : "struct",
192
- "name" : "table",
193
- "path" : "./tr",
194
- "children" : [
195
- { "node" : "text",
196
- "name" : "title",
197
- "path" : "./td[1]"
198
- },
199
- { "node" : "text",
200
- "name" : "pub_date",
201
- "path" : "./td[2]"
202
- }]
203
- }]
204
- }|
173
+ src = %q|
174
+ {
175
+ "struct_tables": {
176
+ "path": "/html/body/table",
177
+ "struct_table": {
178
+ "path": "./tr",
179
+ "text_title": "./td[1]",
180
+ "text_pub_date": "./td[2]"
181
+ }
182
+ }
183
+ }|
205
184
  generated = Yasuri.json2tree(src)
206
185
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
207
186
  Yasuri::StructNode.new('./tr', "table", [
@@ -209,27 +188,27 @@ EOB
209
188
  Yasuri::TextNode.new('./td[2]', "pub_date"),
210
189
  ])
211
190
  ])
212
- page = @agent.get(@uri + "/struct/structual_text.html")
213
- compare_generated_vs_original(generated, original, page)
191
+ test_uri = uri + "/struct/structual_text.html"
192
+ compare_generated_vs_original(generated, original, test_uri)
214
193
  end
215
194
  end
216
195
 
196
+
217
197
  #############
218
198
  # tree2json #
219
199
  #############
220
200
  describe '.tree2json' do
221
201
  it "return empty json" do
222
- json = Yasuri.tree2json(nil)
223
- expect(json).to match "{}"
202
+ expect { Yasuri.tree2json(nil) }.to raise_error(RuntimeError)
224
203
  end
225
204
 
226
205
  it "return text node" do
227
206
  node = Yasuri::TextNode.new("/html/head/title", "title")
228
207
  json = Yasuri.tree2json(node)
229
- expected_str = %q| { "node": "text",
230
- "name": "title",
231
- "path": "/html/head/title"
232
- } |
208
+ expected_str = %q|
209
+ {
210
+ "text_title": "/html/head/title"
211
+ }|
233
212
  expected = JSON.parse(expected_str)
234
213
  actual = JSON.parse(json)
235
214
  expect(actual).to match expected
@@ -238,29 +217,49 @@ EOB
238
217
  it "return text node with truncate_regexp" do
239
218
  node = Yasuri::TextNode.new("/html/head/title", "title", truncate:/^[^,]+/)
240
219
  json = Yasuri.tree2json(node)
241
- expected_str = %q| { "node": "text",
242
- "name": "title",
243
- "path": "/html/head/title",
244
- "truncate": "^[^,]+"
245
- } |
220
+ expected_str = %q|
221
+ {
222
+ "text_title": {
223
+ "path": "/html/head/title",
224
+ "truncate": "^[^,]+"
225
+ }
226
+ }|
246
227
  expected = Yasuri.tree2json(Yasuri.json2tree(expected_str))
247
228
  actual = Yasuri.tree2json(Yasuri.json2tree(json))
248
229
  expect(actual).to match expected
249
230
  end
250
231
 
232
+ it "return map node with text nodes" do
233
+ tree = Yasuri::MapNode.new('parent', [
234
+ Yasuri::TextNode.new('/html/body/p[1]', "content01"),
235
+ Yasuri::TextNode.new('/html/body/p[2]', "content02"),
236
+ ])
237
+ actual_json = Yasuri.tree2json(tree)
238
+
239
+ expected_json = %q|
240
+ {
241
+ "text_content01": "/html/body/p[1]",
242
+ "text_content02": "/html/body/p[2]"
243
+ }|
244
+
245
+ expected = Yasuri.tree2json(Yasuri.json2tree(expected_json))
246
+ actual = Yasuri.tree2json(Yasuri.json2tree(actual_json))
247
+ expect(actual).to match expected
248
+ end
249
+
251
250
  it "return LinksNode/TextNode" do
252
251
  tree = Yasuri::LinksNode.new('/html/body/a', "root", [
253
252
  Yasuri::TextNode.new('/html/body/p', "content"),
254
253
  ])
255
254
  json = Yasuri.tree2json(tree)
256
- expected_src = %q| { "node" : "links",
257
- "name" : "root",
258
- "path" : "/html/body/a",
259
- "children" : [ { "node" : "text",
260
- "name" : "content",
261
- "path" : "/html/body/p"
262
- } ]
263
- }|
255
+
256
+ expected_src = %q|
257
+ {
258
+ "links_root": {
259
+ "path": "/html/body/a",
260
+ "text_content":"/html/body/p"
261
+ }
262
+ }|
264
263
  expected = JSON.parse(expected_src)
265
264
  actual = JSON.parse(json)
266
265
  expect(actual).to match expected
@@ -272,25 +271,84 @@ EOB
272
271
  ], limit:10)
273
272
 
274
273
  json = Yasuri.tree2json(tree)
275
- expected_src = %q| { "node" : "pages",
276
- "name" : "root",
277
- "path" : "/html/body/nav/span/a[@class='next']",
278
- "limit" : 10,
279
- "flatten" : false,
280
- "children" : [ { "node" : "text",
281
- "name" : "content",
282
- "path" : "/html/body/p"
283
- } ]
284
- }|
274
+ expected_src = %q|
275
+ {
276
+ "pages_root": {
277
+ "path": "/html/body/nav/span/a[@class='next']",
278
+ "limit": 10,
279
+ "flatten": false,
280
+ "text_content": "/html/body/p"
281
+ }
282
+ }|
285
283
  expected = JSON.parse(expected_src)
286
284
  actual = JSON.parse(json)
287
285
  expect(actual).to match expected
288
286
  end
287
+ end
288
+
289
+ describe '.with_retry' do
290
+ it "call once if success" do
291
+ actual = Yasuri.with_retry(0){ 42 }
292
+ expect(actual).to match 42
293
+ end
294
+
295
+ it "call untile success" do
296
+ i = [1,1,0,0]
297
+ actual = Yasuri.with_retry(2){42 / i.pop } # 3 times in max
298
+ expect(actual).to match 42/1
299
+ end
300
+
301
+ it "raise error when exceed retry count" do
302
+ i = [1,0,0,0]
303
+ expect {
304
+ Yasuri.with_retry(2){42 / i.pop } # do this 3 times
305
+ }.to raise_error(Exception)
306
+ end
307
+
308
+ it "wait interval before run" do
309
+ allow(Kernel).to receive(:sleep)
310
+ Yasuri.with_retry(0){ 42 }
311
+ expect(Kernel).to have_received(:sleep).once
312
+ end
289
313
 
314
+ it "wait interval before run" do
315
+ allow(Kernel).to receive(:sleep)
316
+ Yasuri.with_retry(0){ 42 }
317
+ expect(Kernel).to have_received(:sleep).once
318
+ end
290
319
 
320
+ it "wait interval for each runs" do
321
+ allow(Kernel).to receive(:sleep)
291
322
 
323
+ i = [1,1,0,0]
324
+ Yasuri.with_retry(2){42 / i.pop } # 3 times in max
325
+ expect(Kernel).to have_received(:sleep).exactly(3).times
326
+ end
292
327
  end
293
328
 
329
+ it "return StructNode/StructNode/[TextNode,TextNode]" do
330
+ tree = Yasuri::StructNode.new('/html/body/table', "tables", [
331
+ Yasuri::StructNode.new('./tr', "table", [
332
+ Yasuri::TextNode.new('./td[1]', "title"),
333
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
334
+ ])
335
+ ])
336
+ json = Yasuri.tree2json(tree)
337
+ expected_src = %q|
338
+ {
339
+ "struct_tables": {
340
+ "path": "/html/body/table",
341
+ "struct_table": {
342
+ "path": "./tr",
343
+ "text_title": "./td[1]",
344
+ "text_pub_date": "./td[2]"
345
+ }
346
+ }
347
+ }|
348
+ expected = JSON.parse(expected_src)
349
+ actual = JSON.parse(json)
350
+ expect(actual).to match expected
351
+ end
294
352
 
295
353
  it 'has a version number' do
296
354
  expect(Yasuri::VERSION).not_to be nil