yasuri 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18c8b6da6ca1f9d5433adc128b83ed5d5a8e353e
4
- data.tar.gz: 07ba467f8d62982e4a8e969da42b839d8ee07664
3
+ metadata.gz: f667fce1e95bc5a16e350a1227abdde1a6db8514
4
+ data.tar.gz: 68cb3c5754f9636f9ae2c677feccfd62183c0af5
5
5
  SHA512:
6
- metadata.gz: 232a5893c4511b0ef80b34a95d58af4d1bb8683512a1cb41b7f9a6d19def75d3c825b12c383bf51bab09e12fe8bd54cd91b79b32640319b60d6185a46ed7f086
7
- data.tar.gz: 4cb31e01b60861d13770b8b9d033cef35fe6bbdd226d11703826040840ac979fcfd98c162b18322baa8f9102fd2ed059ac0c15c277c49217feaf39173800dea6
6
+ metadata.gz: ddb2760495645509953e05748ac617148f1a69916f955f4781fef4565dcb030d44a204d2999e5510c3dc43af6500c654886fe8eeb5694d339c76e8eb7fa195ba
7
+ data.tar.gz: 2ae5d067c108a7137739c3df51001abfbc6002bda5eeeec1e3632e3e57154e8f856effd0c0017b587768feb84c76b73889582ec526e07697c95d8ba140bb8e9a
@@ -1,3 +1,3 @@
1
1
  module Yasuri
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
data/lib/yasuri/yasuri.rb CHANGED
@@ -15,7 +15,7 @@ require_relative 'yasuri_node_generator'
15
15
  module Yasuri
16
16
 
17
17
  def self.json2tree(json_string)
18
- json = JSON.parse(json_string)
18
+ json = JSON.parse(json_string, {symbolize_names: true})
19
19
  Yasuri.hash2node(json)
20
20
  end
21
21
 
@@ -30,26 +30,31 @@ module Yasuri
30
30
 
31
31
  private
32
32
  Text2Node = {
33
- "text" => Yasuri::TextNode,
34
- "struct" => Yasuri::StructNode,
35
- "links" => Yasuri::LinksNode,
36
- "pages" => Yasuri::PaginateNode
33
+ text: Yasuri::TextNode,
34
+ struct: Yasuri::StructNode,
35
+ links: Yasuri::LinksNode,
36
+ pages: Yasuri::PaginateNode
37
37
  }
38
38
  Node2Text = Text2Node.invert
39
39
 
40
- ReservedKeys = %w|node name path children|
40
+ ReservedKeys = %i|node name path children|
41
41
  def self.hash2node(node_h)
42
42
  node, name, path, children = ReservedKeys.map do |key|
43
43
  node_h[key]
44
44
  end
45
45
  children ||= []
46
46
 
47
+ fail "Not found 'node' value in json" if node.nil?
48
+ fail "Not found 'name' value in json" if name.nil?
49
+ fail "Not found 'path' value in json" if path.nil?
50
+
47
51
  childnodes = children.map{|c| Yasuri.hash2node(c) }
48
52
  ReservedKeys.each{|key| node_h.delete(key)}
49
53
  opt = node_h
50
54
 
51
- klass = Text2Node[node]
52
- klass ? klass.new(path, name, childnodes, opt: opt) : nil
55
+ klass = Text2Node[node.to_sym]
56
+ fail "Undefined node type #{node}" if klass.nil?
57
+ klass.new(path, name, childnodes, opt)
53
58
  end
54
59
 
55
60
  def self.node2hash(node)
@@ -73,6 +78,10 @@ module Yasuri
73
78
  json
74
79
  end
75
80
 
81
+ def self.NodeName(name, symbolize_names:false)
82
+ symbolize_names ? name.to_sym : name
83
+ end
84
+
76
85
  def self.with_retry(retry_count = 5)
77
86
  begin
78
87
  return yield() if block_given?
@@ -6,14 +6,17 @@ require_relative 'yasuri_node'
6
6
  module Yasuri
7
7
  class LinksNode
8
8
  include Node
9
- def inject(agent, page, retry_count = 5)
9
+ def inject(agent, page, opt = {})
10
+ retry_count = opt[:retry_count] || 5
11
+
10
12
  links = page.search(@xpath) || [] # links expected
11
13
  links.map do |link|
12
14
  link_button = Mechanize::Page::Link.new(link, agent, page)
13
15
  child_page = Yasuri.with_retry(retry_count) { link_button.click }
14
16
 
15
17
  child_results_kv = @children.map do |child_node|
16
- [child_node.name, child_node.inject(agent, child_page, retry_count)]
18
+ child_name = Yasuri.NodeName(child_node.name, opt)
19
+ [child_name, child_node.inject(agent, child_page, opt)]
17
20
  end
18
21
 
19
22
  Hash[child_results_kv]
@@ -11,7 +11,7 @@ module Yasuri
11
11
  @xpath, @name, @children = xpath, name, children
12
12
  end
13
13
 
14
- def inject(agent, page)
14
+ def inject(agent, page, opt = {})
15
15
  fail "#{Kernel.__method__} is not implemented."
16
16
  end
17
17
  def opts
@@ -7,18 +7,20 @@ module Yasuri
7
7
  class PaginateNode
8
8
  include Node
9
9
 
10
- def initialize(xpath, name, children = [], limit: nil, opt: {})
10
+ def initialize(xpath, name, children = [], limit: nil)
11
11
  super(xpath, name, children)
12
- @limit = limit || opt["limit"]
12
+ @limit = limit
13
13
  end
14
14
 
15
- def inject(agent, page, retry_count = 5)
15
+ def inject(agent, page, opt = {})
16
+ retry_count = opt[:retry_count] || 5
16
17
 
17
18
  child_results = []
18
19
  limit = @limit.nil? ? Float::MAX : @limit
19
20
  while page
20
21
  child_results_kv = @children.map do |child_node|
21
- [child_node.name, child_node.inject(agent, page, retry_count)]
22
+ child_name = Yasuri.NodeName(child_node.name, opt)
23
+ [child_name, child_node.inject(agent, page, opt)]
22
24
  end
23
25
  child_results << Hash[child_results_kv]
24
26
 
@@ -6,11 +6,12 @@ require_relative 'yasuri_node'
6
6
  module Yasuri
7
7
  class StructNode
8
8
  include Node
9
- def inject(agent, page, retry_count = 5)
9
+ def inject(agent, page, opt = {})
10
10
  sub_tags = page.search(@xpath)
11
11
  sub_tags.map do |sub_tag|
12
12
  child_results_kv = @children.map do |child_node|
13
- [child_node.name, child_node.inject(agent, sub_tag, retry_count)]
13
+ child_name = Yasuri.NodeName(child_node.name, opt)
14
+ [child_name, child_node.inject(agent, sub_tag, opt)]
14
15
  end
15
16
  Hash[child_results_kv]
16
17
  end
@@ -6,18 +6,17 @@ require_relative 'yasuri_node'
6
6
  module Yasuri
7
7
  class TextNode
8
8
  include Node
9
- def initialize(xpath, name, children = [], truncate: nil, opt: {})
10
- super(xpath, name, children)
11
9
 
12
- truncate_opt = opt["truncate"] #str
13
- truncate_opt = Regexp.new(truncate_opt) if not truncate_opt.nil? # regexp or nil
10
+ def initialize(xpath, name, children = [], truncate: nil)
11
+ super(xpath, name, children)
14
12
 
15
- @truncate = truncate || truncate_opt || nil # regexp or nil
13
+ truncate = Regexp.new(truncate) if not truncate.nil? # regexp or nil
16
14
 
15
+ @truncate = truncate
17
16
  @truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
18
-
19
17
  end
20
- def inject(agent, page, retry_count = 5)
18
+
19
+ def inject(agent, page, opt = {})
21
20
  node = page.search(@xpath)
22
21
  text = node.text.to_s
23
22
 
@@ -90,5 +90,19 @@ describe 'Yasuri' do
90
90
  ])
91
91
  compare_generated_vs_original(generated, original, @index_page)
92
92
  end
93
+
94
+ it 'return child node as symbol' do
95
+ root_node = Yasuri::LinksNode.new('/html/body/a', "root", [
96
+ Yasuri::TextNode.new('/html/body/p', "content"),
97
+ ])
98
+
99
+ actual = root_node.inject(@agent, @index_page, symbolize_names: true )
100
+ expected = [
101
+ {:content => "Child 01 page."},
102
+ {:content => "Child 02 page."},
103
+ {:content => "Child 03 page."},
104
+ ]
105
+ expect(actual).to match expected
106
+ end
93
107
  end
94
108
  end
@@ -81,5 +81,19 @@ describe 'Yasuri' do
81
81
  ], limit: 2)
82
82
  compare_generated_vs_original(generated, original, @page)
83
83
  end
84
+
85
+ it "return child node as symbol" do
86
+ root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
87
+ Yasuri::TextNode.new('/html/body/p', "content"),
88
+ ])
89
+ actual = root_node.inject(@agent, @page, symbolize_names:true)
90
+ expected = [
91
+ {:content => "PaginationTest01"},
92
+ {:content => "PaginationTest02"},
93
+ {:content => "PaginationTest03"},
94
+ {:content => "PaginationTest04"},
95
+ ]
96
+ expect(actual).to match expected
97
+ end
84
98
  end
85
99
  end
data/spec/yasuri_spec.rb CHANGED
@@ -20,9 +20,8 @@ describe 'Yasuri' do
20
20
  # json2tree #
21
21
  #############
22
22
  describe '.json2tree' do
23
- it "return empty tree" do
24
- tree = Yasuri.json2tree("{}")
25
- expect(tree).to be_nil
23
+ it "fail if empty json" do
24
+ expect { Yasuri.json2tree("{}") }.to raise_error
26
25
  end
27
26
 
28
27
  it "return TextNode" do
@@ -32,6 +31,7 @@ describe 'Yasuri' do
32
31
  }|
33
32
  generated = Yasuri.json2tree(src)
34
33
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
34
+
35
35
  compare_generated_vs_original(generated, original, @index_page)
36
36
  end
37
37
 
@@ -110,5 +110,16 @@ describe 'Yasuri' do
110
110
  ])
111
111
  compare_generated_vs_original(generated, original, @page)
112
112
  end
113
+
114
+ it 'return child node as symbol' do
115
+ node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
116
+ Yasuri::TextNode.new('./td[1]', "title"),
117
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
118
+ ])
119
+ expected = @table_1996.map{|h| h.map{|k,v| [k.to_sym, v] }.to_h }
120
+ actual = node.inject(@agent, @page, symbolize_names:true)
121
+ expect(actual).to match expected
122
+ end
123
+
113
124
  end
114
125
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yasuri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-26 00:00:00.000000000 Z
11
+ date: 2015-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler