yasuri 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18c8b6da6ca1f9d5433adc128b83ed5d5a8e353e
4
- data.tar.gz: 07ba467f8d62982e4a8e969da42b839d8ee07664
3
+ metadata.gz: f667fce1e95bc5a16e350a1227abdde1a6db8514
4
+ data.tar.gz: 68cb3c5754f9636f9ae2c677feccfd62183c0af5
5
5
  SHA512:
6
- metadata.gz: 232a5893c4511b0ef80b34a95d58af4d1bb8683512a1cb41b7f9a6d19def75d3c825b12c383bf51bab09e12fe8bd54cd91b79b32640319b60d6185a46ed7f086
7
- data.tar.gz: 4cb31e01b60861d13770b8b9d033cef35fe6bbdd226d11703826040840ac979fcfd98c162b18322baa8f9102fd2ed059ac0c15c277c49217feaf39173800dea6
6
+ metadata.gz: ddb2760495645509953e05748ac617148f1a69916f955f4781fef4565dcb030d44a204d2999e5510c3dc43af6500c654886fe8eeb5694d339c76e8eb7fa195ba
7
+ data.tar.gz: 2ae5d067c108a7137739c3df51001abfbc6002bda5eeeec1e3632e3e57154e8f856effd0c0017b587768feb84c76b73889582ec526e07697c95d8ba140bb8e9a
@@ -1,3 +1,3 @@
1
1
  module Yasuri
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
data/lib/yasuri/yasuri.rb CHANGED
@@ -15,7 +15,7 @@ require_relative 'yasuri_node_generator'
15
15
  module Yasuri
16
16
 
17
17
  def self.json2tree(json_string)
18
- json = JSON.parse(json_string)
18
+ json = JSON.parse(json_string, {symbolize_names: true})
19
19
  Yasuri.hash2node(json)
20
20
  end
21
21
 
@@ -30,26 +30,31 @@ module Yasuri
30
30
 
31
31
  private
32
32
  Text2Node = {
33
- "text" => Yasuri::TextNode,
34
- "struct" => Yasuri::StructNode,
35
- "links" => Yasuri::LinksNode,
36
- "pages" => Yasuri::PaginateNode
33
+ text: Yasuri::TextNode,
34
+ struct: Yasuri::StructNode,
35
+ links: Yasuri::LinksNode,
36
+ pages: Yasuri::PaginateNode
37
37
  }
38
38
  Node2Text = Text2Node.invert
39
39
 
40
- ReservedKeys = %w|node name path children|
40
+ ReservedKeys = %i|node name path children|
41
41
  def self.hash2node(node_h)
42
42
  node, name, path, children = ReservedKeys.map do |key|
43
43
  node_h[key]
44
44
  end
45
45
  children ||= []
46
46
 
47
+ fail "Not found 'node' value in json" if node.nil?
48
+ fail "Not found 'name' value in json" if name.nil?
49
+ fail "Not found 'path' value in json" if path.nil?
50
+
47
51
  childnodes = children.map{|c| Yasuri.hash2node(c) }
48
52
  ReservedKeys.each{|key| node_h.delete(key)}
49
53
  opt = node_h
50
54
 
51
- klass = Text2Node[node]
52
- klass ? klass.new(path, name, childnodes, opt: opt) : nil
55
+ klass = Text2Node[node.to_sym]
56
+ fail "Undefined node type #{node}" if klass.nil?
57
+ klass.new(path, name, childnodes, opt)
53
58
  end
54
59
 
55
60
  def self.node2hash(node)
@@ -73,6 +78,10 @@ module Yasuri
73
78
  json
74
79
  end
75
80
 
81
+ def self.NodeName(name, symbolize_names:false)
82
+ symbolize_names ? name.to_sym : name
83
+ end
84
+
76
85
  def self.with_retry(retry_count = 5)
77
86
  begin
78
87
  return yield() if block_given?
@@ -6,14 +6,17 @@ require_relative 'yasuri_node'
6
6
  module Yasuri
7
7
  class LinksNode
8
8
  include Node
9
- def inject(agent, page, retry_count = 5)
9
+ def inject(agent, page, opt = {})
10
+ retry_count = opt[:retry_count] || 5
11
+
10
12
  links = page.search(@xpath) || [] # links expected
11
13
  links.map do |link|
12
14
  link_button = Mechanize::Page::Link.new(link, agent, page)
13
15
  child_page = Yasuri.with_retry(retry_count) { link_button.click }
14
16
 
15
17
  child_results_kv = @children.map do |child_node|
16
- [child_node.name, child_node.inject(agent, child_page, retry_count)]
18
+ child_name = Yasuri.NodeName(child_node.name, opt)
19
+ [child_name, child_node.inject(agent, child_page, opt)]
17
20
  end
18
21
 
19
22
  Hash[child_results_kv]
@@ -11,7 +11,7 @@ module Yasuri
11
11
  @xpath, @name, @children = xpath, name, children
12
12
  end
13
13
 
14
- def inject(agent, page)
14
+ def inject(agent, page, opt = {})
15
15
  fail "#{Kernel.__method__} is not implemented."
16
16
  end
17
17
  def opts
@@ -7,18 +7,20 @@ module Yasuri
7
7
  class PaginateNode
8
8
  include Node
9
9
 
10
- def initialize(xpath, name, children = [], limit: nil, opt: {})
10
+ def initialize(xpath, name, children = [], limit: nil)
11
11
  super(xpath, name, children)
12
- @limit = limit || opt["limit"]
12
+ @limit = limit
13
13
  end
14
14
 
15
- def inject(agent, page, retry_count = 5)
15
+ def inject(agent, page, opt = {})
16
+ retry_count = opt[:retry_count] || 5
16
17
 
17
18
  child_results = []
18
19
  limit = @limit.nil? ? Float::MAX : @limit
19
20
  while page
20
21
  child_results_kv = @children.map do |child_node|
21
- [child_node.name, child_node.inject(agent, page, retry_count)]
22
+ child_name = Yasuri.NodeName(child_node.name, opt)
23
+ [child_name, child_node.inject(agent, page, opt)]
22
24
  end
23
25
  child_results << Hash[child_results_kv]
24
26
 
@@ -6,11 +6,12 @@ require_relative 'yasuri_node'
6
6
  module Yasuri
7
7
  class StructNode
8
8
  include Node
9
- def inject(agent, page, retry_count = 5)
9
+ def inject(agent, page, opt = {})
10
10
  sub_tags = page.search(@xpath)
11
11
  sub_tags.map do |sub_tag|
12
12
  child_results_kv = @children.map do |child_node|
13
- [child_node.name, child_node.inject(agent, sub_tag, retry_count)]
13
+ child_name = Yasuri.NodeName(child_node.name, opt)
14
+ [child_name, child_node.inject(agent, sub_tag, opt)]
14
15
  end
15
16
  Hash[child_results_kv]
16
17
  end
@@ -6,18 +6,17 @@ require_relative 'yasuri_node'
6
6
  module Yasuri
7
7
  class TextNode
8
8
  include Node
9
- def initialize(xpath, name, children = [], truncate: nil, opt: {})
10
- super(xpath, name, children)
11
9
 
12
- truncate_opt = opt["truncate"] #str
13
- truncate_opt = Regexp.new(truncate_opt) if not truncate_opt.nil? # regexp or nil
10
+ def initialize(xpath, name, children = [], truncate: nil)
11
+ super(xpath, name, children)
14
12
 
15
- @truncate = truncate || truncate_opt || nil # regexp or nil
13
+ truncate = Regexp.new(truncate) if not truncate.nil? # regexp or nil
16
14
 
15
+ @truncate = truncate
17
16
  @truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
18
-
19
17
  end
20
- def inject(agent, page, retry_count = 5)
18
+
19
+ def inject(agent, page, opt = {})
21
20
  node = page.search(@xpath)
22
21
  text = node.text.to_s
23
22
 
@@ -90,5 +90,19 @@ describe 'Yasuri' do
90
90
  ])
91
91
  compare_generated_vs_original(generated, original, @index_page)
92
92
  end
93
+
94
+ it 'return child node as symbol' do
95
+ root_node = Yasuri::LinksNode.new('/html/body/a', "root", [
96
+ Yasuri::TextNode.new('/html/body/p', "content"),
97
+ ])
98
+
99
+ actual = root_node.inject(@agent, @index_page, symbolize_names: true )
100
+ expected = [
101
+ {:content => "Child 01 page."},
102
+ {:content => "Child 02 page."},
103
+ {:content => "Child 03 page."},
104
+ ]
105
+ expect(actual).to match expected
106
+ end
93
107
  end
94
108
  end
@@ -81,5 +81,19 @@ describe 'Yasuri' do
81
81
  ], limit: 2)
82
82
  compare_generated_vs_original(generated, original, @page)
83
83
  end
84
+
85
+ it "return child node as symbol" do
86
+ root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
87
+ Yasuri::TextNode.new('/html/body/p', "content"),
88
+ ])
89
+ actual = root_node.inject(@agent, @page, symbolize_names:true)
90
+ expected = [
91
+ {:content => "PaginationTest01"},
92
+ {:content => "PaginationTest02"},
93
+ {:content => "PaginationTest03"},
94
+ {:content => "PaginationTest04"},
95
+ ]
96
+ expect(actual).to match expected
97
+ end
84
98
  end
85
99
  end
data/spec/yasuri_spec.rb CHANGED
@@ -20,9 +20,8 @@ describe 'Yasuri' do
20
20
  # json2tree #
21
21
  #############
22
22
  describe '.json2tree' do
23
- it "return empty tree" do
24
- tree = Yasuri.json2tree("{}")
25
- expect(tree).to be_nil
23
+ it "fail if empty json" do
24
+ expect { Yasuri.json2tree("{}") }.to raise_error
26
25
  end
27
26
 
28
27
  it "return TextNode" do
@@ -32,6 +31,7 @@ describe 'Yasuri' do
32
31
  }|
33
32
  generated = Yasuri.json2tree(src)
34
33
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
34
+
35
35
  compare_generated_vs_original(generated, original, @index_page)
36
36
  end
37
37
 
@@ -110,5 +110,16 @@ describe 'Yasuri' do
110
110
  ])
111
111
  compare_generated_vs_original(generated, original, @page)
112
112
  end
113
+
114
+ it 'return child node as symbol' do
115
+ node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
116
+ Yasuri::TextNode.new('./td[1]', "title"),
117
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
118
+ ])
119
+ expected = @table_1996.map{|h| h.map{|k,v| [k.to_sym, v] }.to_h }
120
+ actual = node.inject(@agent, @page, symbolize_names:true)
121
+ expect(actual).to match expected
122
+ end
123
+
113
124
  end
114
125
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yasuri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAC
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-26 00:00:00.000000000 Z
11
+ date: 2015-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler