yasuri 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 765b5141cd81658858ac62ea8306c399f4edda16
4
- data.tar.gz: ecde98805bf7d3536e83496e6ac85ec5767f0321
3
+ metadata.gz: 39e4792feb25676d0f6e6f524d93ece20fb83530
4
+ data.tar.gz: fefe743c8e4807b69535f998e53d71784685aff6
5
5
  SHA512:
6
- metadata.gz: 6b0f3466e7017608ec24b75c15ccc8c2f64fff3b173aa2e818f02c1b115c1796d0e8f901879d5dae7c4de312bec3c309a6f530b3f3f7b65e19b5ee58ccf4db4d
7
- data.tar.gz: d6551a758c588a3a0c08865551ebef957863643125607cdb1b6f143f56de62691a76444fc3c85dc455793c61903794b61c0eaeb6515dda6536e130760942d2d0
6
+ metadata.gz: e532b3ad29681aebaec403bec9c34bcba96f14e6f5b8b6909d89d00938ec1988a4a26396a2fc058abe30a8f04e1c2fdfb8f755daee868c3b9eff6d4daf6c79a2
7
+ data.tar.gz: 469ac8b9a30715322e2efab60b2665b3da6eab66666bfbfc7af3cfd05d75750aa47e26b8763a373c5691395c93af5a7b35a71704358cf35f96e8662880ada5de
@@ -1,3 +1,3 @@
1
1
  module Yasuri
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
data/lib/yasuri/yasuri.rb CHANGED
@@ -67,6 +67,12 @@ module Yasuri
67
67
 
68
68
  class PaginateNode
69
69
  include Node
70
+
71
+ def initialize(xpath, name, children = [], limit = Float::INFINITY)
72
+ super(xpath, name, children)
73
+ @limit = limit
74
+ end
75
+
70
76
  def inject(agent, page, retry_count = 5)
71
77
 
72
78
  child_results = []
@@ -81,6 +87,7 @@ module Yasuri
81
87
 
82
88
  link_button = Mechanize::Page::Link.new(link, agent, page)
83
89
  page = Yasuri.with_retry(retry_count) { link_button.click }
90
+ break if (@limit -= 1) <= 0
84
91
  end
85
92
 
86
93
  child_results
@@ -101,19 +108,20 @@ module Yasuri
101
108
  end
102
109
 
103
110
  def self.gen(name, *args, &block)
104
- xpath, children = *args
111
+ xpath, opt = *args
105
112
  children = Yasuri::NodeGenerator.new.gen_recursive(&block) if block_given?
106
113
 
107
114
  case name
108
115
  when /^text_(.+)$/
109
- truncate_regexp, dummy = children
110
- Yasuri::TextNode.new(xpath, $1, children || [])
116
+ truncate_regexp = opt
117
+ Yasuri::TextNode.new(xpath, $1, truncate_regexp)
111
118
  when /^struct_(.+)$/
112
119
  Yasuri::StructNode.new(xpath, $1, children || [])
113
120
  when /^links_(.+)$/
114
121
  Yasuri::LinksNode.new(xpath, $1, children || [])
115
122
  when /^pages_(.+)$/
116
- Yasuri::PaginateNode.new(xpath, $1, children || [])
123
+ limit = opt || Float::INFINITY
124
+ Yasuri::PaginateNode.new(xpath, $1, children || [], limit)
117
125
  else
118
126
  nil
119
127
  end
data/spec/yasuri_spec.rb CHANGED
@@ -278,6 +278,20 @@ describe 'Yasuri' do
278
278
  expect(actual).to match expected
279
279
  end
280
280
 
281
+ it "scrape each paginated pages limited" do
282
+ root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
283
+ Yasuri::TextNode.new('/html/body/p', "content"),
284
+ ], 3)
285
+ actual = root_node.inject(@agent, @page)
286
+ expected = [
287
+ {"content" => "PaginationTest01"},
288
+ {"content" => "PaginationTest02"},
289
+ {"content" => "PaginationTest03"},
290
+ ]
291
+ expect(actual).to match expected
292
+ end
293
+
294
+
281
295
  it 'return first content if paginate link node is not found' do
282
296
  missing_xpath = "/html/body/nav/span/b[@class='next']"
283
297
  root_node = Yasuri::PaginateNode.new(missing_xpath, "root", [
@@ -304,7 +318,17 @@ describe 'Yasuri' do
304
318
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
305
319
  Yasuri::TextNode.new('/html/body/p', "content"),
306
320
  ])
307
- compare_generated_vs_original(generated, original)
321
+ compare_generated_vs_original(generated, original, @page)
322
+ end
323
+
324
+ it 'can be defined by DSL, return single PaginateNode content limited' do
325
+ generated = Yasuri.pages_next "/html/body/nav/span/a[@class='next']", 2 do
326
+ text_content '/html/body/p'
327
+ end
328
+ original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
329
+ Yasuri::TextNode.new('/html/body/p', "content"),
330
+ ], 2)
331
+ compare_generated_vs_original(generated, original, @page)
308
332
  end
309
333
  end
310
334
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yasuri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAC