yasuri 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 765b5141cd81658858ac62ea8306c399f4edda16
4
- data.tar.gz: ecde98805bf7d3536e83496e6ac85ec5767f0321
3
+ metadata.gz: 39e4792feb25676d0f6e6f524d93ece20fb83530
4
+ data.tar.gz: fefe743c8e4807b69535f998e53d71784685aff6
5
5
  SHA512:
6
- metadata.gz: 6b0f3466e7017608ec24b75c15ccc8c2f64fff3b173aa2e818f02c1b115c1796d0e8f901879d5dae7c4de312bec3c309a6f530b3f3f7b65e19b5ee58ccf4db4d
7
- data.tar.gz: d6551a758c588a3a0c08865551ebef957863643125607cdb1b6f143f56de62691a76444fc3c85dc455793c61903794b61c0eaeb6515dda6536e130760942d2d0
6
+ metadata.gz: e532b3ad29681aebaec403bec9c34bcba96f14e6f5b8b6909d89d00938ec1988a4a26396a2fc058abe30a8f04e1c2fdfb8f755daee868c3b9eff6d4daf6c79a2
7
+ data.tar.gz: 469ac8b9a30715322e2efab60b2665b3da6eab66666bfbfc7af3cfd05d75750aa47e26b8763a373c5691395c93af5a7b35a71704358cf35f96e8662880ada5de
@@ -1,3 +1,3 @@
1
1
  module Yasuri
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
data/lib/yasuri/yasuri.rb CHANGED
@@ -67,6 +67,12 @@ module Yasuri
67
67
 
68
68
  class PaginateNode
69
69
  include Node
70
+
71
+ def initialize(xpath, name, children = [], limit = Float::INFINITY)
72
+ super(xpath, name, children)
73
+ @limit = limit
74
+ end
75
+
70
76
  def inject(agent, page, retry_count = 5)
71
77
 
72
78
  child_results = []
@@ -81,6 +87,7 @@ module Yasuri
81
87
 
82
88
  link_button = Mechanize::Page::Link.new(link, agent, page)
83
89
  page = Yasuri.with_retry(retry_count) { link_button.click }
90
+ break if (@limit -= 1) <= 0
84
91
  end
85
92
 
86
93
  child_results
@@ -101,19 +108,20 @@ module Yasuri
101
108
  end
102
109
 
103
110
  def self.gen(name, *args, &block)
104
- xpath, children = *args
111
+ xpath, opt = *args
105
112
  children = Yasuri::NodeGenerator.new.gen_recursive(&block) if block_given?
106
113
 
107
114
  case name
108
115
  when /^text_(.+)$/
109
- truncate_regexp, dummy = children
110
- Yasuri::TextNode.new(xpath, $1, children || [])
116
+ truncate_regexp = opt
117
+ Yasuri::TextNode.new(xpath, $1, truncate_regexp)
111
118
  when /^struct_(.+)$/
112
119
  Yasuri::StructNode.new(xpath, $1, children || [])
113
120
  when /^links_(.+)$/
114
121
  Yasuri::LinksNode.new(xpath, $1, children || [])
115
122
  when /^pages_(.+)$/
116
- Yasuri::PaginateNode.new(xpath, $1, children || [])
123
+ limit = opt || Float::INFINITY
124
+ Yasuri::PaginateNode.new(xpath, $1, children || [], limit)
117
125
  else
118
126
  nil
119
127
  end
data/spec/yasuri_spec.rb CHANGED
@@ -278,6 +278,20 @@ describe 'Yasuri' do
278
278
  expect(actual).to match expected
279
279
  end
280
280
 
281
+ it "scrape each paginated pages limited" do
282
+ root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
283
+ Yasuri::TextNode.new('/html/body/p', "content"),
284
+ ], 3)
285
+ actual = root_node.inject(@agent, @page)
286
+ expected = [
287
+ {"content" => "PaginationTest01"},
288
+ {"content" => "PaginationTest02"},
289
+ {"content" => "PaginationTest03"},
290
+ ]
291
+ expect(actual).to match expected
292
+ end
293
+
294
+
281
295
  it 'return first content if paginate link node is not found' do
282
296
  missing_xpath = "/html/body/nav/span/b[@class='next']"
283
297
  root_node = Yasuri::PaginateNode.new(missing_xpath, "root", [
@@ -304,7 +318,17 @@ describe 'Yasuri' do
304
318
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
305
319
  Yasuri::TextNode.new('/html/body/p', "content"),
306
320
  ])
307
- compare_generated_vs_original(generated, original)
321
+ compare_generated_vs_original(generated, original, @page)
322
+ end
323
+
324
+ it 'can be defined by DSL, return single PaginateNode content limited' do
325
+ generated = Yasuri.pages_next "/html/body/nav/span/a[@class='next']", 2 do
326
+ text_content '/html/body/p'
327
+ end
328
+ original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
329
+ Yasuri::TextNode.new('/html/body/p', "content"),
330
+ ], 2)
331
+ compare_generated_vs_original(generated, original, @page)
308
332
  end
309
333
  end
310
334
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yasuri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAC