yasuri 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/spec/yasuri_spec.rb CHANGED
@@ -8,9 +8,7 @@ describe 'Yasuri' do
8
8
  include_context 'httpserver'
9
9
 
10
10
  before do
11
- @agent = Mechanize.new
12
11
  @uri = uri
13
- @index_page = @agent.get(@uri)
14
12
  end
15
13
 
16
14
 
@@ -29,7 +27,7 @@ describe 'Yasuri' do
29
27
  generated = Yasuri.yaml2tree(src)
30
28
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
31
29
 
32
- compare_generated_vs_original(generated, original, @index_page)
30
+ compare_generated_vs_original(generated, original, @uri)
33
31
  end
34
32
 
35
33
  it "return text node as symbol" do
@@ -40,7 +38,7 @@ describe 'Yasuri' do
40
38
  generated = Yasuri.yaml2tree(src)
41
39
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
42
40
 
43
- compare_generated_vs_original(generated, original, @index_page)
41
+ compare_generated_vs_original(generated, original, @uri)
44
42
  end
45
43
 
46
44
  it "return LinksNode/TextNode" do
@@ -55,7 +53,7 @@ describe 'Yasuri' do
55
53
  Yasuri::TextNode.new('/html/body/p', "content"),
56
54
  ])
57
55
 
58
- compare_generated_vs_original(generated, original, @index_page)
56
+ compare_generated_vs_original(generated, original, @uri)
59
57
  end
60
58
 
61
59
  it "return StructNode/StructNode/[TextNode,TextNode]" do
@@ -75,8 +73,8 @@ describe 'Yasuri' do
75
73
  Yasuri::TextNode.new('./td[2]', "pub_date"),
76
74
  ])
77
75
  ])
78
- page = @agent.get(@uri + "/struct/structual_text.html")
79
- compare_generated_vs_original(generated, original, page)
76
+ uri = @uri + "/struct/structual_text.html"
77
+ compare_generated_vs_original(generated, original, uri)
80
78
  end
81
79
 
82
80
  end # end of describe '.yaml2tree'
@@ -98,7 +96,7 @@ describe 'Yasuri' do
98
96
  generated = Yasuri.json2tree(src)
99
97
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
100
98
 
101
- compare_generated_vs_original(generated, original, @index_page)
99
+ compare_generated_vs_original(generated, original, @uri)
102
100
  end
103
101
 
104
102
  it "return TextNode with truncate_regexp" do
@@ -111,7 +109,7 @@ describe 'Yasuri' do
111
109
  }|
112
110
  generated = Yasuri.json2tree(src)
113
111
  original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
114
- compare_generated_vs_original(generated, original, @index_page)
112
+ compare_generated_vs_original(generated, original, @uri)
115
113
  end
116
114
 
117
115
  it "return MapNode with TextNodes" do
@@ -125,7 +123,7 @@ describe 'Yasuri' do
125
123
  Yasuri::TextNode.new('/html/body/p[1]', "content01"),
126
124
  Yasuri::TextNode.new('/html/body/p[2]', "content02"),
127
125
  ])
128
- compare_generated_vs_original(generated, original, @index_page)
126
+ compare_generated_vs_original(generated, original, @uri)
129
127
  end
130
128
 
131
129
  it "return LinksNode/TextNode" do
@@ -142,7 +140,7 @@ describe 'Yasuri' do
142
140
  Yasuri::TextNode.new('/html/body/p', "content"),
143
141
  ])
144
142
 
145
- compare_generated_vs_original(generated, original, @index_page)
143
+ compare_generated_vs_original(generated, original, @uri)
146
144
  end
147
145
 
148
146
  it "return PaginateNode/TextNode" do
@@ -158,9 +156,8 @@ describe 'Yasuri' do
158
156
  Yasuri::TextNode.new('/html/body/p', "content"),
159
157
  ])
160
158
 
161
- paginate_test_uri = @uri + "/pagination/page01.html"
162
- paginate_test_page = @agent.get(paginate_test_uri)
163
- compare_generated_vs_original(generated, original, paginate_test_page)
159
+ uri = @uri + "/pagination/page01.html"
160
+ compare_generated_vs_original(generated, original, uri)
164
161
  end
165
162
 
166
163
  it "return PaginateNode/TextNode with limit" do
@@ -177,9 +174,8 @@ describe 'Yasuri' do
177
174
  Yasuri::TextNode.new('/html/body/p', "content"),
178
175
  ], limit:2)
179
176
 
180
- paginate_test_uri = @uri + "/pagination/page01.html"
181
- paginate_test_page = @agent.get(paginate_test_uri)
182
- compare_generated_vs_original(generated, original, paginate_test_page)
177
+ uri = @uri + "/pagination/page01.html"
178
+ compare_generated_vs_original(generated, original, uri)
183
179
  end
184
180
 
185
181
  it "return StructNode/StructNode/[TextNode,TextNode]" do
@@ -201,8 +197,8 @@ describe 'Yasuri' do
201
197
  Yasuri::TextNode.new('./td[2]', "pub_date"),
202
198
  ])
203
199
  ])
204
- page = @agent.get(@uri + "/struct/structual_text.html")
205
- compare_generated_vs_original(generated, original, page)
200
+ uri = @uri + "/struct/structual_text.html"
201
+ compare_generated_vs_original(generated, original, uri)
206
202
  end
207
203
  end
208
204
 
@@ -299,6 +295,46 @@ describe 'Yasuri' do
299
295
  end
300
296
  end
301
297
 
298
+ describe '.with_retry' do
299
+ it "call once if success" do
300
+ actual = Yasuri.with_retry(0){ 42 }
301
+ expect(actual).to match 42
302
+ end
303
+
304
+ it "call untile success" do
305
+ i = [1,1,0,0]
306
+ actual = Yasuri.with_retry(2){42 / i.pop } # 3 times in max
307
+ expect(actual).to match 42/1
308
+ end
309
+
310
+ it "raise error when exceed retry count" do
311
+ i = [1,0,0,0]
312
+ expect {
313
+ Yasuri.with_retry(2){42 / i.pop } # do this 3 times
314
+ }.to raise_error(Exception)
315
+ end
316
+
317
+ it "wait interval before run" do
318
+ allow(Kernel).to receive(:sleep)
319
+ Yasuri.with_retry(0){ 42 }
320
+ expect(Kernel).to have_received(:sleep).once
321
+ end
322
+
323
+ it "wait interval before run" do
324
+ allow(Kernel).to receive(:sleep)
325
+ Yasuri.with_retry(0){ 42 }
326
+ expect(Kernel).to have_received(:sleep).once
327
+ end
328
+
329
+ it "wait interval for each runs" do
330
+ allow(Kernel).to receive(:sleep)
331
+
332
+ i = [1,1,0,0]
333
+ Yasuri.with_retry(2){42 / i.pop } # 3 times in max
334
+ expect(Kernel).to have_received(:sleep).exactly(3).times
335
+ end
336
+ end
337
+
302
338
  it "return StructNode/StructNode/[TextNode,TextNode]" do
303
339
  tree = Yasuri::StructNode.new('/html/body/table', "tables", [
304
340
  Yasuri::StructNode.new('./tr', "table", [
@@ -11,8 +11,7 @@ describe 'Yasuri' do
11
11
 
12
12
  describe '::StructNode' do
13
13
  before do
14
- @agent = Mechanize.new
15
- @page = @agent.get(uri + "/struct/structual_text.html")
14
+ @uri = uri + "/struct/structual_text.html"
16
15
 
17
16
  @table_1996 = [
18
17
  { "title" => "The Perfect Insider",
@@ -53,7 +52,7 @@ describe 'Yasuri' do
53
52
  Yasuri::TextNode.new('./td[2]', "pub_date"),
54
53
  ])
55
54
  expected = @table_1996
56
- actual = node.inject(@agent, @page)
55
+ actual = node.scrape(@uri)
57
56
  expect(actual).to match expected
58
57
  end
59
58
 
@@ -63,7 +62,7 @@ describe 'Yasuri' do
63
62
  Yasuri::TextNode.new('./td[2]', "pub_date"),
64
63
  ])
65
64
  expected = @table_1996.first
66
- actual = node.inject(@agent, @page)
65
+ actual = node.scrape(@uri)
67
66
  expect(actual).to match expected
68
67
  end
69
68
 
@@ -72,7 +71,7 @@ describe 'Yasuri' do
72
71
  node = Yasuri::StructNode.new(no_match_xpath, "table", [
73
72
  Yasuri::TextNode.new('./td[1]', "title")
74
73
  ])
75
- actual = node.inject(@agent, @page)
74
+ actual = node.scrape(@uri)
76
75
  expect(actual).to be_empty
77
76
  end
78
77
 
@@ -81,7 +80,7 @@ describe 'Yasuri' do
81
80
  node = Yasuri::StructNode.new(invalid_xpath, "table", [
82
81
  Yasuri::TextNode.new('./td[1]', "title")
83
82
  ])
84
- expect { node.inject(@agent, @page) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
83
+ expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
85
84
  end
86
85
 
87
86
  it 'fail with invalid xpath in children' do
@@ -90,7 +89,7 @@ describe 'Yasuri' do
90
89
  Yasuri::TextNode.new(invalid_xpath, "title"),
91
90
  Yasuri::TextNode.new('./td[2]', "pub_date"),
92
91
  ])
93
- expect { node.inject(@agent, @page) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
92
+ expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
94
93
  end
95
94
 
96
95
  it 'scrape all tables' do
@@ -101,7 +100,7 @@ describe 'Yasuri' do
101
100
  ])
102
101
  ])
103
102
  expected = @all_tables
104
- actual = node.inject(@agent, @page)
103
+ actual = node.scrape(@uri)
105
104
  expect(actual).to match expected
106
105
  end
107
106
 
@@ -118,7 +117,7 @@ describe 'Yasuri' do
118
117
  Yasuri::TextNode.new('./td[2]', "pub_date"),
119
118
  ])
120
119
  ])
121
- compare_generated_vs_original(generated, original, @page)
120
+ compare_generated_vs_original(generated, original, @uri)
122
121
  end
123
122
 
124
123
  it 'return child node as symbol' do
@@ -127,7 +126,7 @@ describe 'Yasuri' do
127
126
  Yasuri::TextNode.new('./td[2]', "pub_date"),
128
127
  ])
129
128
  expected = @table_1996.map{|h| h.map{|k,v| [k.to_sym, v] }.to_h }
130
- actual = node.inject(@agent, @page, symbolize_names:true)
129
+ actual = node.scrape(@uri, symbolize_names:true)
131
130
  expect(actual).to match expected
132
131
  end
133
132
 
@@ -135,9 +134,7 @@ describe 'Yasuri' do
135
134
 
136
135
  describe '::StructNode::Links' do
137
136
  before do
138
- @agent = Mechanize.new
139
- @page = @agent.get(uri + "/struct/structual_links.html")
140
-
137
+ @uri = uri + "/struct/structual_links.html"
141
138
  @table = [
142
139
  { "title" => "Child01,02",
143
140
  "child" => [{"p" => "Child 01 page."}, {"p" => "Child 02 page."}] },
@@ -155,22 +152,21 @@ describe 'Yasuri' do
155
152
  ])
156
153
  ])
157
154
  expected = @table
158
- actual = node.inject(@agent, @page)
155
+ actual = node.scrape(@uri)
159
156
  expect(actual).to match expected
160
157
  end
161
158
  end # descrive
162
159
 
163
160
  describe '::StructNode::Pages' do
164
161
  before do
165
- @agent = Mechanize.new
166
- @page = @agent.get(uri + "/struct/structual_text.html") #dummy
162
+ @uri = uri + "/struct/structual_text.html"
167
163
  end
168
164
 
169
165
  it 'not supported' do
170
166
  node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
171
167
  Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "pages", [])
172
168
  ])
173
- expect{ node.inject(@agent, @page) }.to raise_error(NotImplementedError, "PagenateNode inside StructNode, Not Supported")
169
+ expect{ node.scrape(@uri) }.to raise_error(NotImplementedError, "PagenateNode inside StructNode, Not Supported")
174
170
  end
175
171
  end
176
172
  end
@@ -10,69 +10,68 @@ describe 'Yasuri' do
10
10
  include_context 'httpserver'
11
11
 
12
12
  before do
13
- @agent = Mechanize.new
14
- @index_page = @agent.get(uri)
13
+ @uri = uri
15
14
  end
16
15
 
17
16
  describe '::TextNode' do
18
17
  before { @node = Yasuri::TextNode.new('/html/body/p[1]', "title") }
19
18
 
20
19
  it 'scrape text text <p>Hello,Yasuri</p>' do
21
- actual = @node.inject(@agent, @index_page)
20
+ actual = @node.scrape(@uri)
22
21
  expect(actual).to eq "Hello,Yasuri"
23
22
  end
24
23
 
25
24
  it 'return empty text if no match node' do
26
25
  no_match_node = Yasuri::TextNode.new('/html/body/no_match_node', "title")
27
- actual = no_match_node.inject(@agent, @index_page)
26
+ actual = no_match_node.scrape(@uri)
28
27
  expect(actual).to be_empty
29
28
  end
30
29
 
31
30
  it 'fail with invalid xpath' do
32
31
  invalid_xpath = '/html/body/no_match_node['
33
32
  node = Yasuri::TextNode.new(invalid_xpath, "title")
34
- expect { node.inject(@agent, @index_page) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
33
+ expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
35
34
  end
36
35
 
37
36
  it "can be defined by DSL, return single TextNode title" do
38
37
  generated = Yasuri.text_title '/html/body/p[1]'
39
38
  original = Yasuri::TextNode.new('/html/body/p[1]', "title")
40
- compare_generated_vs_original(generated, original, @index_page)
39
+ compare_generated_vs_original(generated, original, @uri)
41
40
  end
42
41
 
43
42
  it "can be truncated with regexp" do
44
43
  node = Yasuri.text_title '/html/body/p[1]', truncate:/^[^,]+/
45
- actual = node.inject(@agent, @index_page)
44
+ actual = node.scrape(@uri)
46
45
  expect(actual).to eq "Hello"
47
46
  end
48
47
 
49
48
  it "return first captured if matched given capture pattern" do
50
49
  node = Yasuri.text_title '/html/body/p[1]', truncate:/H(.+)i/
51
- actual = node.inject(@agent, @index_page)
50
+ actual = node.scrape(@uri)
52
51
  expect(actual).to eq "ello,Yasur"
53
52
  end
54
53
 
55
54
  it "can be truncated with regexp" do
56
55
  node = Yasuri.text_title '/html/body/p[1]', truncate:/[^,]+$/
57
- actual = node.inject(@agent, @index_page)
56
+ actual = node.scrape(@uri)
58
57
  expect(actual).to eq "Yasuri"
59
58
  end
60
59
 
61
60
  it "return empty string if truncated with no match to regexp" do
62
61
  node = Yasuri.text_title '/html/body/p[1]', truncate:/^hoge/
63
- actual = node.inject(@agent, @index_page)
62
+ actual = node.scrape(@uri)
64
63
  expect(actual).to be_empty
65
64
  end
66
65
 
67
66
  it "return symbol method applied string" do
68
67
  node = Yasuri.text_title '/html/body/p[1]', proc: :upcase
69
- actual = node.inject(@agent, @index_page)
68
+ actual = node.scrape(@uri)
70
69
  expect(actual).to eq "HELLO,YASURI"
71
70
  end
72
71
 
73
72
  it "return apply multi arguments" do
74
73
  node = Yasuri.text_title '/html/body/p[1]', proc: :upcase, truncate:/H(.+)i/
75
- actual = node.inject(@agent, @index_page)
74
+ actual = node.scrape(@uri)
76
75
  expect(actual).to eq "ELLO,YASUR"
77
76
  end
78
77
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yasuri
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAC
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-25 00:00:00.000000000 Z
11
+ date: 2021-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -170,7 +170,10 @@ files:
170
170
  - Rakefile
171
171
  - USAGE.ja.md
172
172
  - USAGE.md
173
- - app.rb
173
+ - examples/example.rb
174
+ - examples/github.yml
175
+ - examples/sample.json
176
+ - examples/sample.yml
174
177
  - exe/yasuri
175
178
  - lib/yasuri.rb
176
179
  - lib/yasuri/version.rb
data/app.rb DELETED
@@ -1,52 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # -*- coding: utf-8 -*-
3
-
4
- # Author:: TAC (tac@tac42.net)
5
-
6
- require 'pp'
7
- require 'time'
8
- require 'mechanize'
9
-
10
- require_relative 'lib/yasuri/yasuri'
11
-
12
- agent = Mechanize.new
13
-
14
- uri = "http://www.asahi.com/"
15
-
16
- # Node tree constructing by DSL
17
- root = Yasuri.links_top '//*[@id="MainInner"]/div[1]/ul/li/a' do
18
- text_title '//*[@id="MainInner"]/div[1]/div/h1'
19
- text_article '//*[@id="MainInner"]/div/div[@class="ArticleText"]'
20
- end
21
-
22
- # Node tree constructing by JSON
23
- src = <<-EOJSON
24
- { "node" : "links",
25
- "name" : "root",
26
- "path" : "//*[@id='MainInner']/div[1]/ul/li/a",
27
- "children" : [
28
- { "node" : "text",
29
- "name" : "title",
30
- "path" : "//*[@id='MainInner']/div[1]/div/h1"
31
- },
32
- { "node" : "text",
33
- "name" : "article",
34
- "path" : "//*[@id='MainInner']/div/div[@class='ArticleText']"
35
- }
36
- ]
37
- }
38
- EOJSON
39
- root = Yasuri.json2tree(src)
40
-
41
- # Access to parsed resources
42
- page = agent.get(uri)
43
- contents = root.inject(agent, page)
44
-
45
- contents.each do |h|
46
- t = h['title']
47
- a = h['article']
48
-
49
- puts t
50
- puts a
51
- puts "=" * 100
52
- end