yasuri 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 39e4792feb25676d0f6e6f524d93ece20fb83530
4
- data.tar.gz: fefe743c8e4807b69535f998e53d71784685aff6
3
+ metadata.gz: 61a2aa3974c697ecc14b991521961ec54f3ff5c0
4
+ data.tar.gz: 03cf5b02e7a646175183725d38d50a9538df7eed
5
5
  SHA512:
6
- metadata.gz: e532b3ad29681aebaec403bec9c34bcba96f14e6f5b8b6909d89d00938ec1988a4a26396a2fc058abe30a8f04e1c2fdfb8f755daee868c3b9eff6d4daf6c79a2
7
- data.tar.gz: 469ac8b9a30715322e2efab60b2665b3da6eab66666bfbfc7af3cfd05d75750aa47e26b8763a373c5691395c93af5a7b35a71704358cf35f96e8662880ada5de
6
+ metadata.gz: 2108d6b78c8704fa4d99491c1684dfd686c656d7bd039b8c80e52fa2150837958c25f37afabdcf198b10153b2f32216408735182fb0d58b3007f4808ac2226c9
7
+ data.tar.gz: c956e589ab7676e844110870e18c8f842afb6fd566a5ddd4758ec9e328961ff1b485bac9eebecdac6456f7ed28a9793d9dd4dd9bc2f89dd8f9131003c6db9a5d
@@ -1,3 +1,3 @@
1
1
  module Yasuri
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
data/lib/yasuri/yasuri.rb CHANGED
@@ -10,7 +10,7 @@ module Yasuri
10
10
  module Node
11
11
  attr_reader :url, :xpath, :name
12
12
 
13
- def initialize(xpath, name, children = [])
13
+ def initialize(xpath, name, children = [], opt: {})
14
14
  @xpath, @name, @children = xpath, name, children
15
15
  end
16
16
 
@@ -21,9 +21,9 @@ module Yasuri
21
21
 
22
22
  class TextNode
23
23
  include Node
24
- def initialize(xpath, name, children = [])
24
+ def initialize(xpath, name, children = [], truncate_regexp: nil, opt: {})
25
25
  super(xpath, name, children)
26
- @truncate_regexp, dummy = *children
26
+ @truncate_regexp = truncate_regexp
27
27
  end
28
28
  def inject(agent, page, retry_count = 5)
29
29
  node = page.search(@xpath)
@@ -68,9 +68,9 @@ module Yasuri
68
68
  class PaginateNode
69
69
  include Node
70
70
 
71
- def initialize(xpath, name, children = [], limit = Float::INFINITY)
71
+ def initialize(xpath, name, children = [], limit: nil, opt: {})
72
72
  super(xpath, name, children)
73
- @limit = limit
73
+ @limit = limit || opt["limit"] || Float::MAX
74
74
  end
75
75
 
76
76
  def inject(agent, page, retry_count = 5)
@@ -120,8 +120,9 @@ module Yasuri
120
120
  when /^links_(.+)$/
121
121
  Yasuri::LinksNode.new(xpath, $1, children || [])
122
122
  when /^pages_(.+)$/
123
- limit = opt || Float::INFINITY
124
- Yasuri::PaginateNode.new(xpath, $1, children || [], limit)
123
+ xpath, limit = *args
124
+ limit = limit || Float::MAX
125
+ Yasuri::PaginateNode.new(xpath, $1, children || [], limit: limit)
125
126
  else
126
127
  nil
127
128
  end
@@ -146,16 +147,19 @@ module Yasuri
146
147
  "links" => LinksNode,
147
148
  "pages" => PaginateNode
148
149
  }
150
+ ReservedKeys = %w|node name path children|
149
151
  def self.hash2node(node_h)
150
- node, name, path, children = %w|node name path children|.map do |key|
152
+ node, name, path, children = ReservedKeys.map do |key|
151
153
  node_h[key]
152
154
  end
153
155
  children ||= []
154
156
 
155
157
  childnodes = children.map{|c| Yasuri.hash2node(c) }
158
+ ReservedKeys.each{|key| node_h.delete(key)}
159
+ opt = node_h
156
160
 
157
161
  klass = Text2Node[node]
158
- klass ? klass.new(path, name, childnodes) : nil
162
+ klass ? klass.new(path, name, childnodes, opt: opt) : nil
159
163
  end
160
164
 
161
165
  def self.with_retry(retry_count = 5)
data/spec/yasuri_spec.rb CHANGED
@@ -54,19 +54,19 @@ describe 'Yasuri' do
54
54
  end
55
55
 
56
56
  it "can be truncated with regexp" do
57
- node = Yasuri.text_title '/html/body/p[1]', /^[^,]+/
57
+ node = Yasuri.text_title '/html/body/p[1]', truncate_regexp:/^[^,]+/
58
58
  actual = node.inject(@agent, @index_page)
59
59
  expect(actual).to eq "Hello"
60
60
  end
61
61
 
62
62
  it "can be truncated with regexp" do
63
- node = Yasuri.text_title '/html/body/p[1]', /[^,]+$/
63
+ node = Yasuri.text_title '/html/body/p[1]', truncate_regexp:/[^,]+$/
64
64
  actual = node.inject(@agent, @index_page)
65
65
  expect(actual).to eq "Yasuri"
66
66
  end
67
67
 
68
68
  it "return empty string if truncated with no match to regexp" do
69
- node = Yasuri.text_title '/html/body/p[1]', /^hoge/
69
+ node = Yasuri.text_title '/html/body/p[1]', truncate_regexp:/^hoge/
70
70
  actual = node.inject(@agent, @index_page)
71
71
  expect(actual).to be_empty
72
72
  end
@@ -281,7 +281,7 @@ describe 'Yasuri' do
281
281
  it "scrape each paginated pages limited" do
282
282
  root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
283
283
  Yasuri::TextNode.new('/html/body/p', "content"),
284
- ], 3)
284
+ ], limit:3)
285
285
  actual = root_node.inject(@agent, @page)
286
286
  expected = [
287
287
  {"content" => "PaginationTest01"},
@@ -327,7 +327,7 @@ describe 'Yasuri' do
327
327
  end
328
328
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
329
329
  Yasuri::TextNode.new('/html/body/p', "content"),
330
- ], 2)
330
+ ], limit: 2)
331
331
  compare_generated_vs_original(generated, original, @page)
332
332
  end
333
333
  end
@@ -386,6 +386,26 @@ describe 'Yasuri' do
386
386
  compare_generated_vs_original(generated, original, paginate_test_page)
387
387
  end
388
388
 
389
+ it "return PaginateNode/TextNode with limit" do
390
+ src = %q|{ "node" : "pages",
391
+ "name" : "root",
392
+ "path" : "/html/body/nav/span/a[@class=\'next\']",
393
+ "limit" : 2,
394
+ "children" : [ { "node" : "text",
395
+ "name" : "content",
396
+ "path" : "/html/body/p"
397
+ } ]
398
+ }|
399
+ generated = Yasuri.json2tree(src)
400
+ original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
401
+ Yasuri::TextNode.new('/html/body/p', "content"),
402
+ ], limit:2)
403
+
404
+ paginate_test_uri = @uri + "/pagination/page01.html"
405
+ paginate_test_page = @agent.get(paginate_test_uri)
406
+ compare_generated_vs_original(generated, original, paginate_test_page)
407
+ end
408
+
389
409
  it "return StructNode/StructNode/[TextNode,TextNode]" do
390
410
  src = %q| { "node" : "struct",
391
411
  "name" : "tables",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yasuri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-23 00:00:00.000000000 Z
11
+ date: 2015-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler