yasuri 3.1.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,64 @@
1
+ require 'thor'
2
+ require 'json'
3
+ require 'yasuri'
4
+ require 'mechanize'
5
+
6
+ module Yasuri
7
+ class CLI < Thor
8
+ package_name "yasuri"
9
+
10
+ default_command :scrape
11
+ desc "scrape <URI> [[--file <TREE_FILE>] or [--json <JSON>]]", "Getting from <URI> and scrape it. with <JSON> or json/yml from <TREE_FILE>. They should be Yasuri's format json or yaml string."
12
+ option :file, {aliases: 'f', desc: "path to file that written yasuri tree as json or yaml", type: :string}
13
+ option :json, {aliases: 'j', desc: "yasuri tree format json string", type: :string}
14
+ def scrape(uri)
15
+ # argument validations
16
+ if [options[:file], options[:json]].compact.count != 1
17
+ $stderr.puts "ERROR: Only one of `--file` or `--json` option should be specified."
18
+ return -1
19
+ end
20
+ if options[:file]&.empty? or options[:file] == "file" or options[:json]&.empty?
21
+ $stderr.puts "ERROR: --file option require not empty argument."
22
+ return -1
23
+ end
24
+ if options[:json]&.empty? or options[:json] == "json"
25
+ $stderr.puts "ERROR: --json option require not empty argument."
26
+ return -1
27
+ end
28
+
29
+ tree = if options[:file]
30
+ src = File.read(options[:file])
31
+
32
+ begin
33
+ Yasuri.json2tree(src)
34
+ rescue
35
+ begin
36
+ Yasuri.yaml2tree(src)
37
+ rescue => e
38
+ $stderr.puts "ERROR: Failed to convert to yasuri tree `#{options[:file]}`. #{e.message}"
39
+ return -1
40
+ end
41
+ end
42
+ else
43
+ begin
44
+ Yasuri.json2tree(options[:json])
45
+ rescue => e
46
+ $stderr.puts "ERROR: Failed to convert json to yasuri tree. #{e.message}"
47
+ return -1
48
+ end
49
+ end
50
+
51
+ agent = Mechanize.new
52
+ root_page = agent.get(uri)
53
+ result = tree.inject(agent, root_page)
54
+
55
+ if result.instance_of?(String)
56
+ puts result
57
+ else
58
+ j result
59
+ end
60
+
61
+ return 0
62
+ end
63
+ end
64
+ end
@@ -7,7 +7,7 @@ module Yasuri
7
7
  class LinksNode
8
8
  include Node
9
9
  def inject(agent, page, opt = {}, element = page)
10
- retry_count = opt[:retry_count] || 5
10
+ retry_count = opt[:retry_count] || Yasuri::DefaultRetryCount
11
11
 
12
12
  links = element.search(@xpath) || [] # links expected
13
13
  links.map do |link|
@@ -15,7 +15,7 @@ module Yasuri
15
15
  child_page = Yasuri.with_retry(retry_count) { link_button.click }
16
16
 
17
17
  child_results_kv = @children.map do |child_node|
18
- child_name = Yasuri.NodeName(child_node.name, opt)
18
+ child_name = Yasuri.node_name(child_node.name, opt)
19
19
  [child_name, child_node.inject(agent, child_page, opt)]
20
20
  end
21
21
 
@@ -24,7 +24,7 @@ module Yasuri
24
24
  end
25
25
 
26
26
  def node_type_str
27
- "links"
27
+ "links".freeze
28
28
  end
29
29
  end # class
30
30
  end # module
@@ -3,7 +3,7 @@ module Yasuri
3
3
  class MapNode
4
4
  attr_reader :name, :children
5
5
 
6
- def initialize(name, children, opt: {})
6
+ def initialize(name, children, **opt)
7
7
  @name = name
8
8
  @children = children
9
9
  @opt = opt
@@ -16,39 +16,24 @@ module Yasuri
16
16
  Hash[child_results_kv]
17
17
  end
18
18
 
19
- def opts
20
- {}
21
- end
22
-
23
19
  def to_h
24
- h = {}
25
- h["node"] = "map"
26
- h["name"] = self.name
27
- h["children"] = self.children.map{|c| c.to_h} if not children.empty?
20
+ node_hash = {}
21
+ self.opts.each{|k, v| node_hash[k] = v if not v.nil?}
28
22
 
29
- self.opts.each do |key,value|
30
- h[key] = value if not value.nil?
23
+ children.each do |child|
24
+ child_node_name = "#{child.node_type_str}_#{child.name}"
25
+ node_hash[child_node_name] = child.to_h
31
26
  end
32
27
 
33
- h
28
+ node_hash
34
29
  end
35
30
 
36
- def self.hash2node(node_h)
37
- reservedKeys = %i|node name children|
38
-
39
- node, name, children = reservedKeys.map do |key|
40
- node_h[key]
41
- end
42
-
43
- fail "Not found 'name' value in map" if name.nil?
44
- fail "Not found 'children' value in map" if children.nil?
45
- children ||= []
46
-
47
- childnodes = children.map{|c| Yasuri.hash2node(c) }
48
- reservedKeys.each{|key| node_h.delete(key)}
49
- opt = node_h
31
+ def opts
32
+ {}
33
+ end
50
34
 
51
- self.new(name, childnodes, **opt)
35
+ def node_type_str
36
+ "map".freeze
52
37
  end
53
38
  end
54
39
  end
@@ -7,7 +7,7 @@ module Yasuri
7
7
  module Node
8
8
  attr_reader :url, :xpath, :name, :children
9
9
 
10
- def initialize(xpath, name, children = [], opt: {})
10
+ def initialize(xpath, name, children = [], **opt)
11
11
  @xpath, @name, @children = xpath, name, children
12
12
  end
13
13
 
@@ -15,50 +15,28 @@ module Yasuri
15
15
  fail "#{Kernel.__method__} is not implemented in included class."
16
16
  end
17
17
 
18
- def opts
19
- {}
20
- end
21
-
22
18
  def to_h
23
- h = {}
24
- h["node"] = self.node_type_str
25
- h["name"] = self.name
26
- h["path"] = self.xpath
27
- h["children"] = self.children.map{|c| c.to_h} if not children.empty?
28
-
29
- self.opts.each do |key,value|
30
- h[key] = value if not value.nil?
31
- end
32
-
33
- h
34
- end
19
+ return @xpath if @xpath and @children.empty? and self.opts.values.compact.empty?
35
20
 
36
- module ClassMethods
37
- def hash2node(node_h)
38
- reservedKeys = %i|node name path children|
21
+ node_hash = {}
22
+ self.opts.each{|k, v| node_hash[k] = v if not v.nil?}
39
23
 
40
- node, name, path, children = ReservedKeys.map do |key|
41
- node_h[key]
42
- end
24
+ node_hash[:path] = @xpath if @xpath
43
25
 
44
- fail "Not found 'name' value in map" if name.nil?
45
- fail "Not found 'path' value in map" if path.nil?
46
- children ||= []
47
-
48
- childnodes = children.map{|c| Yasuri.hash2node(c) }
49
- reservedKeys.each{|key| node_h.delete(key)}
50
- opt = node_h
51
-
52
- self.new(path, name, childnodes, **opt)
26
+ children.each do |child|
27
+ child_node_name = "#{child.node_type_str}_#{child.name}"
28
+ node_hash[child_node_name] = child.to_h
53
29
  end
54
30
 
55
- def node_type_str
56
- fail "#{Kernel.__method__} is not implemented in included class."
57
- end
31
+ node_hash
32
+ end
33
+
34
+ def opts
35
+ {}
58
36
  end
59
37
 
60
- def self.included(base)
61
- base.extend(ClassMethods)
38
+ def node_type_str
39
+ fail "#{Kernel.__method__} is not implemented in included class."
62
40
  end
63
41
  end
64
42
  end
@@ -14,7 +14,7 @@ module Yasuri
14
14
  end
15
15
 
16
16
  def inject(agent, page, opt = {}, element = page)
17
- retry_count = opt[:retry_count] || 5
17
+ retry_count = opt[:retry_count] || Yasuri::DefaultRetryCount
18
18
 
19
19
  raise NotImplementedError.new("PagenateNode inside StructNode, Not Supported") if page != element
20
20
 
@@ -22,12 +22,12 @@ module Yasuri
22
22
  limit = @limit.nil? ? Float::MAX : @limit
23
23
  while page
24
24
  child_results_kv = @children.map do |child_node|
25
- child_name = Yasuri.NodeName(child_node.name, opt)
25
+ child_name = Yasuri.node_name(child_node.name, opt)
26
26
  [child_name, child_node.inject(agent, page, opt)]
27
27
  end
28
28
  child_results << Hash[child_results_kv]
29
29
 
30
- link = page.search(@xpath).first
30
+ link = page.search(@xpath).first # Todo raise: link is not found
31
31
  break if link == nil
32
32
 
33
33
  link_button = Mechanize::Page::Link.new(link, agent, page)
@@ -41,12 +41,13 @@ module Yasuri
41
41
 
42
42
  child_results
43
43
  end
44
+
44
45
  def opts
45
46
  {limit:@limit, flatten:@flatten}
46
47
  end
47
48
 
48
49
  def node_type_str
49
- "pages"
50
+ "pages".freeze
50
51
  end
51
52
  end
52
53
  end
@@ -10,12 +10,16 @@ module Yasuri
10
10
  sub_tags = element.search(@xpath)
11
11
  tree = sub_tags.map do |sub_tag|
12
12
  child_results_kv = @children.map do |child_node|
13
- child_name = Yasuri.NodeName(child_node.name, opt)
13
+ child_name = Yasuri.node_name(child_node.name, opt)
14
14
  [child_name, child_node.inject(agent, page, opt, sub_tag)]
15
15
  end
16
16
  Hash[child_results_kv]
17
17
  end
18
18
  tree.size == 1 ? tree.first : tree
19
19
  end # inject
20
+
21
+ def node_type_str
22
+ "struct".freeze
23
+ end
20
24
  end
21
25
  end
@@ -18,7 +18,6 @@ module Yasuri
18
18
  @truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
19
19
 
20
20
  @proc = proc.nil? ? nil : proc.to_sym
21
-
22
21
  end
23
22
 
24
23
  def inject(agent, page, opt = {}, element = page)
@@ -31,15 +30,16 @@ module Yasuri
31
30
  end
32
31
 
33
32
  text = text.__send__(@proc) if @proc && text.respond_to?(@proc)
34
- text
35
- end
36
33
 
37
- def node_type_str
38
- "text"
34
+ text
39
35
  end
40
36
 
41
37
  def opts
42
38
  {truncate:@truncate, proc:@proc}
43
39
  end
40
+
41
+ def node_type_str
42
+ "text".freeze
43
+ end
44
44
  end
45
45
  end
@@ -0,0 +1,8 @@
1
+ {
2
+ "pages_root": {
3
+ "path": "/html/body/nav/span/a[@class='next']",
4
+ "limit": 10,
5
+ "flatten": false,
6
+ "text_content": "/html/body/p"
7
+ }
8
+ }
@@ -0,0 +1,5 @@
1
+ pages_root:
2
+ path: "/html/body/nav/span/a[@class='next']"
3
+ limit: 10
4
+ flatten: false
5
+ text_content: "/html/body/p"
@@ -0,0 +1,9 @@
1
+ {
2
+ ,,,
3
+ "pages_root": {
4
+ "path": "/html/body/nav/span/a[@class='next']",
5
+ "limit": 10,
6
+ "flatten": false,
7
+ "text_content": "/html/body/p"
8
+ }
9
+ }
@@ -0,0 +1,6 @@
1
+ ,,,
2
+ pages_root:
3
+ path: "/html/body/nav/span/a[@class='next']"
4
+ limit: 10
5
+ flatten: false
6
+ text_content: "/html/body/p"
data/spec/spec_helper.rb CHANGED
@@ -16,7 +16,7 @@ require 'simplecov'
16
16
  require 'coveralls'
17
17
  Coveralls.wear!
18
18
 
19
- SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
19
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new [
20
20
  SimpleCov::Formatter::HTMLFormatter,
21
21
  Coveralls::SimpleCov::Formatter
22
22
  ]
@@ -0,0 +1,83 @@
1
+ require_relative 'spec_helper'
2
+
3
+ describe 'Yasuri' do
4
+ include_context 'httpserver'
5
+
6
+ before do
7
+ @agent = Mechanize.new
8
+ @index_page = @agent.get(uri)
9
+
10
+ @res_dir = File.expand_path('../cli_resources', __FILE__)
11
+ end
12
+
13
+ describe 'cli scrape' do
14
+ it "require --file or --json option" do
15
+ expect {
16
+ Yasuri::CLI.new.invoke(:scrape, [uri], {})
17
+ }.to output("ERROR: Only one of `--file` or `--json` option should be specified.\n").to_stderr
18
+ end
19
+
20
+ it "only one of --file or --json option" do
21
+ expect {
22
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "path.json", json: '{"text_title": "/html/head/title"}'})
23
+ }.to output("ERROR: Only one of `--file` or `--json` option should be specified.\n").to_stderr
24
+ end
25
+
26
+ it "require --file option is not empty string" do
27
+ expect {
28
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "file"})
29
+ }.to output("ERROR: --file option require not empty argument.\n").to_stderr
30
+ end
31
+
32
+ it "require --json option is not empty string" do
33
+ expect {
34
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: "json"})
35
+ }.to output("ERROR: --json option require not empty argument.\n").to_stderr
36
+ end
37
+
38
+
39
+ it "display text node as simple string" do
40
+ expect {
41
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: '{"text_title": "/html/head/title"}'})
42
+ }.to output("Yasuri Test\n").to_stdout
43
+ end
44
+
45
+ it "display texts in single json" do
46
+ expect {
47
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: '{"text_c1":"/html/body/p[1]", "text_c2":"/html/body/p[2]"}'})
48
+ }.to output('{"c1":"Hello,Yasuri","c2":"Last Modify - 2015/02/14"}'+"\n").to_stdout
49
+ end
50
+
51
+
52
+ it "display text node as simple string via json file" do
53
+ expect {
54
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "#{@res_dir}/tree.json"})
55
+ }.to output('[{"content":"Hello,YasuriLast Modify - 2015/02/14"}]' + "\n").to_stdout
56
+ end
57
+ it "display text node as simple string via yaml file" do
58
+ expect {
59
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "#{@res_dir}/tree.yml"})
60
+ }.to output('[{"content":"Hello,YasuriLast Modify - 2015/02/14"}]' + "\n").to_stdout
61
+ end
62
+
63
+
64
+ it "display ERROR when json string is wrong" do
65
+ wrong_json = '{,,}'
66
+ expect {
67
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: wrong_json})
68
+ }.to output("ERROR: Failed to convert json to yasuri tree. 809: unexpected token at '#{wrong_json}'\n").to_stderr
69
+ end
70
+ it "display ERROR when json file contains is wrong" do
71
+ file_path = "#{@res_dir}/tree_wrong.json"
72
+ expect {
73
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: file_path})
74
+ }.to output("ERROR: Failed to convert to yasuri tree `#{file_path}`. (<unknown>): did not find expected node content while parsing a flow node at line 2 column 3\n").to_stderr
75
+ end
76
+ it "display ERROR when yaml file contains is wrong" do
77
+ file_path = "#{@res_dir}/tree_wrong.yml"
78
+ expect {
79
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: file_path})
80
+ }.to output("ERROR: Failed to convert to yasuri tree `#{file_path}`. (<unknown>): did not find expected node content while parsing a block node at line 1 column 1\n").to_stderr
81
+ end
82
+ end
83
+ end
data/spec/yasuri_spec.rb CHANGED
@@ -13,6 +13,7 @@ describe 'Yasuri' do
13
13
  @index_page = @agent.get(@uri)
14
14
  end
15
15
 
16
+
16
17
  ############
17
18
  # yam2tree #
18
19
  ############
@@ -23,10 +24,8 @@ describe 'Yasuri' do
23
24
 
24
25
  it "return text node" do
25
26
  src = <<-EOB
26
- content:
27
- node: text
28
- path: "/html/body/p[1]"
29
- EOB
27
+ text_content: "/html/body/p[1]"
28
+ EOB
30
29
  generated = Yasuri.yaml2tree(src)
31
30
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
32
31
 
@@ -35,10 +34,9 @@ EOB
35
34
 
36
35
  it "return text node as symbol" do
37
36
  src = <<-EOB
38
- :content:
39
- :node: text
40
- :path: "/html/body/p[1]"
41
- EOB
37
+ :text_content:
38
+ :path: "/html/body/p[1]"
39
+ EOB
42
40
  generated = Yasuri.yaml2tree(src)
43
41
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
44
42
 
@@ -48,14 +46,10 @@ EOB
48
46
  it "return LinksNode/TextNode" do
49
47
 
50
48
  src = <<-EOB
51
- root:
52
- node: links
53
- path: "/html/body/a"
54
- children:
55
- - content:
56
- node: text
57
- path: "/html/body/p"
58
- EOB
49
+ links_root:
50
+ path: "/html/body/a"
51
+ text_content: "/html/body/p"
52
+ EOB
59
53
  generated = Yasuri.yaml2tree(src)
60
54
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
61
55
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -66,21 +60,13 @@ EOB
66
60
 
67
61
  it "return StructNode/StructNode/[TextNode,TextNode]" do
68
62
  src = <<-EOB
69
- tables:
70
- node: struct
71
- path: "/html/body/table"
72
- children:
73
- - table:
74
- node: struct
75
- path: "./tr"
76
- children:
77
- - title:
78
- node: text
79
- path: "./td[1]"
80
- - pub_date:
81
- node: text
82
- path: "./td[2]"
83
- EOB
63
+ struct_tables:
64
+ path: "/html/body/table"
65
+ struct_table:
66
+ path: "./tr"
67
+ text_title: "./td[1]"
68
+ text_pub_date: "./td[2]"
69
+ EOB
84
70
 
85
71
  generated = Yasuri.yaml2tree(src)
86
72
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
@@ -105,10 +91,10 @@ EOB
105
91
  end
106
92
 
107
93
  it "return TextNode" do
108
- src = %q| { "node" : "text",
109
- "name" : "content",
110
- "path" : "/html/body/p[1]"
111
- }|
94
+ src = %q|
95
+ {
96
+ "text_content": "/html/body/p[1]"
97
+ }|
112
98
  generated = Yasuri.json2tree(src)
113
99
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
114
100
 
@@ -116,30 +102,24 @@ EOB
116
102
  end
117
103
 
118
104
  it "return TextNode with truncate_regexp" do
119
- src = %q| { "node" : "text",
120
- "name" : "content",
121
- "path" : "/html/body/p[1]",
122
- "truncate" : "^[^,]+"
123
- }|
105
+ src = %q|
106
+ {
107
+ "text_content": {
108
+ "path": "/html/body/p[1]",
109
+ "truncate" : "^[^,]+"
110
+ }
111
+ }|
124
112
  generated = Yasuri.json2tree(src)
125
113
  original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
126
114
  compare_generated_vs_original(generated, original, @index_page)
127
115
  end
128
116
 
129
117
  it "return MapNode with TextNodes" do
130
- src = %q| { "node" : "map",
131
- "name" : "parent",
132
- "children" : [
133
- { "node" : "text",
134
- "name" : "content01",
135
- "path" : "/html/body/p[1]"
136
- },
137
- { "node" : "text",
138
- "name" : "content02",
139
- "path" : "/html/body/p[2]"
140
- }
141
- ]
142
- }|
118
+ src = %q|
119
+ {
120
+ "text_content01": "/html/body/p[1]",
121
+ "text_content02": "/html/body/p[2]"
122
+ }|
143
123
  generated = Yasuri.json2tree(src)
144
124
  original = Yasuri::MapNode.new('parent', [
145
125
  Yasuri::TextNode.new('/html/body/p[1]', "content01"),
@@ -149,14 +129,14 @@ EOB
149
129
  end
150
130
 
151
131
  it "return LinksNode/TextNode" do
152
- src = %q| { "node" : "links",
153
- "name" : "root",
154
- "path" : "/html/body/a",
155
- "children" : [ { "node" : "text",
156
- "name" : "content",
157
- "path" : "/html/body/p"
158
- } ]
159
- }|
132
+ src = %q|
133
+ {
134
+ "links_root": {
135
+ "path": "/html/body/a",
136
+ "text_content": "/html/body/p"
137
+ }
138
+ }|
139
+
160
140
  generated = Yasuri.json2tree(src)
161
141
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
162
142
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -166,14 +146,13 @@ EOB
166
146
  end
167
147
 
168
148
  it "return PaginateNode/TextNode" do
169
- src = %q|{ "node" : "pages",
170
- "name" : "root",
171
- "path" : "/html/body/nav/span/a[@class=\'next\']",
172
- "children" : [ { "node" : "text",
173
- "name" : "content",
174
- "path" : "/html/body/p"
175
- } ]
176
- }|
149
+ src = %q|
150
+ {
151
+ "pages_root": {
152
+ "path": "/html/body/nav/span/a[@class=\'next\']",
153
+ "text_content": "/html/body/p"
154
+ }
155
+ }|
177
156
  generated = Yasuri.json2tree(src)
178
157
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
179
158
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -185,15 +164,14 @@ EOB
185
164
  end
186
165
 
187
166
  it "return PaginateNode/TextNode with limit" do
188
- src = %q|{ "node" : "pages",
189
- "name" : "root",
190
- "path" : "/html/body/nav/span/a[@class=\'next\']",
191
- "limit" : 2,
192
- "children" : [ { "node" : "text",
193
- "name" : "content",
194
- "path" : "/html/body/p"
195
- } ]
196
- }|
167
+ src = %q|
168
+ {
169
+ "pages_root": {
170
+ "path": "/html/body/nav/span/a[@class=\'next\']",
171
+ "limit": 2,
172
+ "text_content": "/html/body/p"
173
+ }
174
+ }|
197
175
  generated = Yasuri.json2tree(src)
198
176
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
199
177
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -205,24 +183,17 @@ EOB
205
183
  end
206
184
 
207
185
  it "return StructNode/StructNode/[TextNode,TextNode]" do
208
- src = %q| { "node" : "struct",
209
- "name" : "tables",
210
- "path" : "/html/body/table",
211
- "children" : [
212
- { "node" : "struct",
213
- "name" : "table",
214
- "path" : "./tr",
215
- "children" : [
216
- { "node" : "text",
217
- "name" : "title",
218
- "path" : "./td[1]"
219
- },
220
- { "node" : "text",
221
- "name" : "pub_date",
222
- "path" : "./td[2]"
223
- }]
224
- }]
225
- }|
186
+ src = %q|
187
+ {
188
+ "struct_tables": {
189
+ "path": "/html/body/table",
190
+ "struct_table": {
191
+ "path": "./tr",
192
+ "text_title": "./td[1]",
193
+ "text_pub_date": "./td[2]"
194
+ }
195
+ }
196
+ }|
226
197
  generated = Yasuri.json2tree(src)
227
198
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
228
199
  Yasuri::StructNode.new('./tr', "table", [
@@ -235,22 +206,22 @@ EOB
235
206
  end
236
207
  end
237
208
 
209
+
238
210
  #############
239
211
  # tree2json #
240
212
  #############
241
213
  describe '.tree2json' do
242
214
  it "return empty json" do
243
- json = Yasuri.tree2json(nil)
244
- expect(json).to match "{}"
215
+ expect { Yasuri.tree2json(nil) }.to raise_error(RuntimeError)
245
216
  end
246
217
 
247
218
  it "return text node" do
248
219
  node = Yasuri::TextNode.new("/html/head/title", "title")
249
220
  json = Yasuri.tree2json(node)
250
- expected_str = %q| { "node": "text",
251
- "name": "title",
252
- "path": "/html/head/title"
253
- } |
221
+ expected_str = %q|
222
+ {
223
+ "text_title": "/html/head/title"
224
+ }|
254
225
  expected = JSON.parse(expected_str)
255
226
  actual = JSON.parse(json)
256
227
  expect(actual).to match expected
@@ -259,11 +230,13 @@ EOB
259
230
  it "return text node with truncate_regexp" do
260
231
  node = Yasuri::TextNode.new("/html/head/title", "title", truncate:/^[^,]+/)
261
232
  json = Yasuri.tree2json(node)
262
- expected_str = %q| { "node": "text",
263
- "name": "title",
264
- "path": "/html/head/title",
265
- "truncate": "^[^,]+"
266
- } |
233
+ expected_str = %q|
234
+ {
235
+ "text_title": {
236
+ "path": "/html/head/title",
237
+ "truncate": "^[^,]+"
238
+ }
239
+ }|
267
240
  expected = Yasuri.tree2json(Yasuri.json2tree(expected_str))
268
241
  actual = Yasuri.tree2json(Yasuri.json2tree(json))
269
242
  expect(actual).to match expected
@@ -276,19 +249,12 @@ EOB
276
249
  ])
277
250
  actual_json = Yasuri.tree2json(tree)
278
251
 
279
- expected_json = %q| { "node" : "map",
280
- "name" : "parent",
281
- "children" : [
282
- { "node" : "text",
283
- "name" : "content01",
284
- "path" : "/html/body/p[1]"
285
- },
286
- { "node" : "text",
287
- "name" : "content02",
288
- "path" : "/html/body/p[2]"
289
- }
290
- ]
252
+ expected_json = %q|
253
+ {
254
+ "text_content01": "/html/body/p[1]",
255
+ "text_content02": "/html/body/p[2]"
291
256
  }|
257
+
292
258
  expected = Yasuri.tree2json(Yasuri.json2tree(expected_json))
293
259
  actual = Yasuri.tree2json(Yasuri.json2tree(actual_json))
294
260
  expect(actual).to match expected
@@ -299,14 +265,14 @@ EOB
299
265
  Yasuri::TextNode.new('/html/body/p', "content"),
300
266
  ])
301
267
  json = Yasuri.tree2json(tree)
302
- expected_src = %q| { "node" : "links",
303
- "name" : "root",
304
- "path" : "/html/body/a",
305
- "children" : [ { "node" : "text",
306
- "name" : "content",
307
- "path" : "/html/body/p"
308
- } ]
309
- }|
268
+
269
+ expected_src = %q|
270
+ {
271
+ "links_root": {
272
+ "path": "/html/body/a",
273
+ "text_content":"/html/body/p"
274
+ }
275
+ }|
310
276
  expected = JSON.parse(expected_src)
311
277
  actual = JSON.parse(json)
312
278
  expect(actual).to match expected
@@ -318,25 +284,44 @@ EOB
318
284
  ], limit:10)
319
285
 
320
286
  json = Yasuri.tree2json(tree)
321
- expected_src = %q| { "node" : "pages",
322
- "name" : "root",
323
- "path" : "/html/body/nav/span/a[@class='next']",
324
- "limit" : 10,
325
- "flatten" : false,
326
- "children" : [ { "node" : "text",
327
- "name" : "content",
328
- "path" : "/html/body/p"
329
- } ]
330
- }|
287
+ expected_src = %q|
288
+ {
289
+ "pages_root": {
290
+ "path": "/html/body/nav/span/a[@class='next']",
291
+ "limit": 10,
292
+ "flatten": false,
293
+ "text_content": "/html/body/p"
294
+ }
295
+ }|
331
296
  expected = JSON.parse(expected_src)
332
297
  actual = JSON.parse(json)
333
298
  expect(actual).to match expected
334
299
  end
335
-
336
-
337
-
338
300
  end
339
301
 
302
+ it "return StructNode/StructNode/[TextNode,TextNode]" do
303
+ tree = Yasuri::StructNode.new('/html/body/table', "tables", [
304
+ Yasuri::StructNode.new('./tr', "table", [
305
+ Yasuri::TextNode.new('./td[1]', "title"),
306
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
307
+ ])
308
+ ])
309
+ json = Yasuri.tree2json(tree)
310
+ expected_src = %q|
311
+ {
312
+ "struct_tables": {
313
+ "path": "/html/body/table",
314
+ "struct_table": {
315
+ "path": "./tr",
316
+ "text_title": "./td[1]",
317
+ "text_pub_date": "./td[2]"
318
+ }
319
+ }
320
+ }|
321
+ expected = JSON.parse(expected_src)
322
+ actual = JSON.parse(json)
323
+ expect(actual).to match expected
324
+ end
340
325
 
341
326
  it 'has a version number' do
342
327
  expect(Yasuri::VERSION).not_to be nil