yasuri 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 39e4792feb25676d0f6e6f524d93ece20fb83530
4
- data.tar.gz: fefe743c8e4807b69535f998e53d71784685aff6
3
+ metadata.gz: 61a2aa3974c697ecc14b991521961ec54f3ff5c0
4
+ data.tar.gz: 03cf5b02e7a646175183725d38d50a9538df7eed
5
5
  SHA512:
6
- metadata.gz: e532b3ad29681aebaec403bec9c34bcba96f14e6f5b8b6909d89d00938ec1988a4a26396a2fc058abe30a8f04e1c2fdfb8f755daee868c3b9eff6d4daf6c79a2
7
- data.tar.gz: 469ac8b9a30715322e2efab60b2665b3da6eab66666bfbfc7af3cfd05d75750aa47e26b8763a373c5691395c93af5a7b35a71704358cf35f96e8662880ada5de
6
+ metadata.gz: 2108d6b78c8704fa4d99491c1684dfd686c656d7bd039b8c80e52fa2150837958c25f37afabdcf198b10153b2f32216408735182fb0d58b3007f4808ac2226c9
7
+ data.tar.gz: c956e589ab7676e844110870e18c8f842afb6fd566a5ddd4758ec9e328961ff1b485bac9eebecdac6456f7ed28a9793d9dd4dd9bc2f89dd8f9131003c6db9a5d
@@ -1,3 +1,3 @@
1
1
  module Yasuri
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
data/lib/yasuri/yasuri.rb CHANGED
@@ -10,7 +10,7 @@ module Yasuri
10
10
  module Node
11
11
  attr_reader :url, :xpath, :name
12
12
 
13
- def initialize(xpath, name, children = [])
13
+ def initialize(xpath, name, children = [], opt: {})
14
14
  @xpath, @name, @children = xpath, name, children
15
15
  end
16
16
 
@@ -21,9 +21,9 @@ module Yasuri
21
21
 
22
22
  class TextNode
23
23
  include Node
24
- def initialize(xpath, name, children = [])
24
+ def initialize(xpath, name, children = [], truncate_regexp: nil, opt: {})
25
25
  super(xpath, name, children)
26
- @truncate_regexp, dummy = *children
26
+ @truncate_regexp = truncate_regexp
27
27
  end
28
28
  def inject(agent, page, retry_count = 5)
29
29
  node = page.search(@xpath)
@@ -68,9 +68,9 @@ module Yasuri
68
68
  class PaginateNode
69
69
  include Node
70
70
 
71
- def initialize(xpath, name, children = [], limit = Float::INFINITY)
71
+ def initialize(xpath, name, children = [], limit: nil, opt: {})
72
72
  super(xpath, name, children)
73
- @limit = limit
73
+ @limit = limit || opt["limit"] || Float::MAX
74
74
  end
75
75
 
76
76
  def inject(agent, page, retry_count = 5)
@@ -120,8 +120,9 @@ module Yasuri
120
120
  when /^links_(.+)$/
121
121
  Yasuri::LinksNode.new(xpath, $1, children || [])
122
122
  when /^pages_(.+)$/
123
- limit = opt || Float::INFINITY
124
- Yasuri::PaginateNode.new(xpath, $1, children || [], limit)
123
+ xpath, limit = *args
124
+ limit = limit || Float::MAX
125
+ Yasuri::PaginateNode.new(xpath, $1, children || [], limit: limit)
125
126
  else
126
127
  nil
127
128
  end
@@ -146,16 +147,19 @@ module Yasuri
146
147
  "links" => LinksNode,
147
148
  "pages" => PaginateNode
148
149
  }
150
+ ReservedKeys = %w|node name path children|
149
151
  def self.hash2node(node_h)
150
- node, name, path, children = %w|node name path children|.map do |key|
152
+ node, name, path, children = ReservedKeys.map do |key|
151
153
  node_h[key]
152
154
  end
153
155
  children ||= []
154
156
 
155
157
  childnodes = children.map{|c| Yasuri.hash2node(c) }
158
+ ReservedKeys.each{|key| node_h.delete(key)}
159
+ opt = node_h
156
160
 
157
161
  klass = Text2Node[node]
158
- klass ? klass.new(path, name, childnodes) : nil
162
+ klass ? klass.new(path, name, childnodes, opt: opt) : nil
159
163
  end
160
164
 
161
165
  def self.with_retry(retry_count = 5)
data/spec/yasuri_spec.rb CHANGED
@@ -54,19 +54,19 @@ describe 'Yasuri' do
54
54
  end
55
55
 
56
56
  it "can be truncated with regexp" do
57
- node = Yasuri.text_title '/html/body/p[1]', /^[^,]+/
57
+ node = Yasuri.text_title '/html/body/p[1]', truncate_regexp:/^[^,]+/
58
58
  actual = node.inject(@agent, @index_page)
59
59
  expect(actual).to eq "Hello"
60
60
  end
61
61
 
62
62
  it "can be truncated with regexp" do
63
- node = Yasuri.text_title '/html/body/p[1]', /[^,]+$/
63
+ node = Yasuri.text_title '/html/body/p[1]', truncate_regexp:/[^,]+$/
64
64
  actual = node.inject(@agent, @index_page)
65
65
  expect(actual).to eq "Yasuri"
66
66
  end
67
67
 
68
68
  it "return empty string if truncated with no match to regexp" do
69
- node = Yasuri.text_title '/html/body/p[1]', /^hoge/
69
+ node = Yasuri.text_title '/html/body/p[1]', truncate_regexp:/^hoge/
70
70
  actual = node.inject(@agent, @index_page)
71
71
  expect(actual).to be_empty
72
72
  end
@@ -281,7 +281,7 @@ describe 'Yasuri' do
281
281
  it "scrape each paginated pages limited" do
282
282
  root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
283
283
  Yasuri::TextNode.new('/html/body/p', "content"),
284
- ], 3)
284
+ ], limit:3)
285
285
  actual = root_node.inject(@agent, @page)
286
286
  expected = [
287
287
  {"content" => "PaginationTest01"},
@@ -327,7 +327,7 @@ describe 'Yasuri' do
327
327
  end
328
328
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
329
329
  Yasuri::TextNode.new('/html/body/p', "content"),
330
- ], 2)
330
+ ], limit: 2)
331
331
  compare_generated_vs_original(generated, original, @page)
332
332
  end
333
333
  end
@@ -386,6 +386,26 @@ describe 'Yasuri' do
386
386
  compare_generated_vs_original(generated, original, paginate_test_page)
387
387
  end
388
388
 
389
+ it "return PaginateNode/TextNode with limit" do
390
+ src = %q|{ "node" : "pages",
391
+ "name" : "root",
392
+ "path" : "/html/body/nav/span/a[@class=\'next\']",
393
+ "limit" : 2,
394
+ "children" : [ { "node" : "text",
395
+ "name" : "content",
396
+ "path" : "/html/body/p"
397
+ } ]
398
+ }|
399
+ generated = Yasuri.json2tree(src)
400
+ original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
401
+ Yasuri::TextNode.new('/html/body/p', "content"),
402
+ ], limit:2)
403
+
404
+ paginate_test_uri = @uri + "/pagination/page01.html"
405
+ paginate_test_page = @agent.get(paginate_test_uri)
406
+ compare_generated_vs_original(generated, original, paginate_test_page)
407
+ end
408
+
389
409
  it "return StructNode/StructNode/[TextNode,TextNode]" do
390
410
  src = %q| { "node" : "struct",
391
411
  "name" : "tables",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yasuri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-23 00:00:00.000000000 Z
11
+ date: 2015-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler