yasuri 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ require 'thor'
2
+ require 'json'
3
+ require 'yasuri'
4
+ require 'mechanize'
5
+
6
+ module Yasuri
7
+ class CLI < Thor
8
+ package_name "yasuri"
9
+
10
+ default_command :scrape
11
+ desc "scrape <URI> [[--file <TREE_FILE>] or [--json <JSON>]]", "Getting from <URI> and scrape it. with <JSON> or json/yml from <TREE_FILE>. They should be Yasuri's format json or yaml string."
12
+ option :file, {aliases: 'f', desc: "path to file that written yasuri tree as json or yaml", type: :string}
13
+ option :json, {aliases: 'j', desc: "yasuri tree format json string", type: :string}
14
+ def scrape(uri)
15
+ # argument validations
16
+ if [options[:file], options[:json]].compact.count != 1
17
+ $stderr.puts "ERROR: Only one of `--file` or `--json` option should be specified."
18
+ return -1
19
+ end
20
+ if options[:file]&.empty? or options[:file] == "file" or options[:json]&.empty?
21
+ $stderr.puts "ERROR: --file option require not empty argument."
22
+ return -1
23
+ end
24
+ if options[:json]&.empty? or options[:json] == "json"
25
+ $stderr.puts "ERROR: --json option require not empty argument."
26
+ return -1
27
+ end
28
+
29
+ tree = if options[:file]
30
+ src = File.read(options[:file])
31
+
32
+ begin
33
+ Yasuri.json2tree(src)
34
+ rescue
35
+ begin
36
+ Yasuri.yaml2tree(src)
37
+ rescue => e
38
+ $stderr.puts "ERROR: Failed to convert to yasuri tree `#{options[:file]}`. #{e.message}"
39
+ return -1
40
+ end
41
+ end
42
+ else
43
+ begin
44
+ Yasuri.json2tree(options[:json])
45
+ rescue => e
46
+ $stderr.puts "ERROR: Failed to convert json to yasuri tree. #{e.message}"
47
+ return -1
48
+ end
49
+ end
50
+
51
+ agent = Mechanize.new
52
+ root_page = agent.get(uri)
53
+ result = tree.inject(agent, root_page)
54
+
55
+ if result.instance_of?(String)
56
+ puts result
57
+ else
58
+ j result
59
+ end
60
+
61
+ return 0
62
+ end
63
+ end
64
+ end
@@ -7,7 +7,7 @@ module Yasuri
7
7
  class LinksNode
8
8
  include Node
9
9
  def inject(agent, page, opt = {}, element = page)
10
- retry_count = opt[:retry_count] || 5
10
+ retry_count = opt[:retry_count] || Yasuri::DefaultRetryCount
11
11
 
12
12
  links = element.search(@xpath) || [] # links expected
13
13
  links.map do |link|
@@ -15,7 +15,7 @@ module Yasuri
15
15
  child_page = Yasuri.with_retry(retry_count) { link_button.click }
16
16
 
17
17
  child_results_kv = @children.map do |child_node|
18
- child_name = Yasuri.NodeName(child_node.name, opt)
18
+ child_name = Yasuri.node_name(child_node.name, opt)
19
19
  [child_name, child_node.inject(agent, child_page, opt)]
20
20
  end
21
21
 
@@ -24,7 +24,7 @@ module Yasuri
24
24
  end
25
25
 
26
26
  def node_type_str
27
- "links"
27
+ "links".freeze
28
28
  end
29
29
  end # class
30
30
  end # module
@@ -3,7 +3,7 @@ module Yasuri
3
3
  class MapNode
4
4
  attr_reader :name, :children
5
5
 
6
- def initialize(name, children, opt: {})
6
+ def initialize(name, children, **opt)
7
7
  @name = name
8
8
  @children = children
9
9
  @opt = opt
@@ -16,39 +16,24 @@ module Yasuri
16
16
  Hash[child_results_kv]
17
17
  end
18
18
 
19
- def opts
20
- {}
21
- end
22
-
23
19
  def to_h
24
- h = {}
25
- h["node"] = "map"
26
- h["name"] = self.name
27
- h["children"] = self.children.map{|c| c.to_h} if not children.empty?
20
+ node_hash = {}
21
+ self.opts.each{|k, v| node_hash[k] = v if not v.nil?}
28
22
 
29
- self.opts.each do |key,value|
30
- h[key] = value if not value.nil?
23
+ children.each do |child|
24
+ child_node_name = "#{child.node_type_str}_#{child.name}"
25
+ node_hash[child_node_name] = child.to_h
31
26
  end
32
27
 
33
- h
28
+ node_hash
34
29
  end
35
30
 
36
- def self.hash2node(node_h)
37
- reservedKeys = %i|node name children|
38
-
39
- node, name, children = reservedKeys.map do |key|
40
- node_h[key]
41
- end
42
-
43
- fail "Not found 'name' value in map" if name.nil?
44
- fail "Not found 'children' value in map" if children.nil?
45
- children ||= []
46
-
47
- childnodes = children.map{|c| Yasuri.hash2node(c) }
48
- reservedKeys.each{|key| node_h.delete(key)}
49
- opt = node_h
31
+ def opts
32
+ {}
33
+ end
50
34
 
51
- self.new(name, childnodes, **opt)
35
+ def node_type_str
36
+ "map".freeze
52
37
  end
53
38
  end
54
39
  end
@@ -7,7 +7,7 @@ module Yasuri
7
7
  module Node
8
8
  attr_reader :url, :xpath, :name, :children
9
9
 
10
- def initialize(xpath, name, children = [], opt: {})
10
+ def initialize(xpath, name, children = [], **opt)
11
11
  @xpath, @name, @children = xpath, name, children
12
12
  end
13
13
 
@@ -15,50 +15,28 @@ module Yasuri
15
15
  fail "#{Kernel.__method__} is not implemented in included class."
16
16
  end
17
17
 
18
- def opts
19
- {}
20
- end
21
-
22
18
  def to_h
23
- h = {}
24
- h["node"] = self.node_type_str
25
- h["name"] = self.name
26
- h["path"] = self.xpath
27
- h["children"] = self.children.map{|c| c.to_h} if not children.empty?
28
-
29
- self.opts.each do |key,value|
30
- h[key] = value if not value.nil?
31
- end
32
-
33
- h
34
- end
19
+ return @xpath if @xpath and @children.empty? and self.opts.values.compact.empty?
35
20
 
36
- module ClassMethods
37
- def hash2node(node_h)
38
- reservedKeys = %i|node name path children|
21
+ node_hash = {}
22
+ self.opts.each{|k, v| node_hash[k] = v if not v.nil?}
39
23
 
40
- node, name, path, children = ReservedKeys.map do |key|
41
- node_h[key]
42
- end
24
+ node_hash[:path] = @xpath if @xpath
43
25
 
44
- fail "Not found 'name' value in map" if name.nil?
45
- fail "Not found 'path' value in map" if path.nil?
46
- children ||= []
47
-
48
- childnodes = children.map{|c| Yasuri.hash2node(c) }
49
- reservedKeys.each{|key| node_h.delete(key)}
50
- opt = node_h
51
-
52
- self.new(path, name, childnodes, **opt)
26
+ children.each do |child|
27
+ child_node_name = "#{child.node_type_str}_#{child.name}"
28
+ node_hash[child_node_name] = child.to_h
53
29
  end
54
30
 
55
- def node_type_str
56
- fail "#{Kernel.__method__} is not implemented in included class."
57
- end
31
+ node_hash
32
+ end
33
+
34
+ def opts
35
+ {}
58
36
  end
59
37
 
60
- def self.included(base)
61
- base.extend(ClassMethods)
38
+ def node_type_str
39
+ fail "#{Kernel.__method__} is not implemented in included class."
62
40
  end
63
41
  end
64
42
  end
@@ -14,7 +14,7 @@ module Yasuri
14
14
  end
15
15
 
16
16
  def inject(agent, page, opt = {}, element = page)
17
- retry_count = opt[:retry_count] || 5
17
+ retry_count = opt[:retry_count] || Yasuri::DefaultRetryCount
18
18
 
19
19
  raise NotImplementedError.new("PagenateNode inside StructNode, Not Supported") if page != element
20
20
 
@@ -22,12 +22,12 @@ module Yasuri
22
22
  limit = @limit.nil? ? Float::MAX : @limit
23
23
  while page
24
24
  child_results_kv = @children.map do |child_node|
25
- child_name = Yasuri.NodeName(child_node.name, opt)
25
+ child_name = Yasuri.node_name(child_node.name, opt)
26
26
  [child_name, child_node.inject(agent, page, opt)]
27
27
  end
28
28
  child_results << Hash[child_results_kv]
29
29
 
30
- link = page.search(@xpath).first
30
+ link = page.search(@xpath).first # Todo raise: link is not found
31
31
  break if link == nil
32
32
 
33
33
  link_button = Mechanize::Page::Link.new(link, agent, page)
@@ -41,12 +41,13 @@ module Yasuri
41
41
 
42
42
  child_results
43
43
  end
44
+
44
45
  def opts
45
46
  {limit:@limit, flatten:@flatten}
46
47
  end
47
48
 
48
49
  def node_type_str
49
- "pages"
50
+ "pages".freeze
50
51
  end
51
52
  end
52
53
  end
@@ -10,12 +10,16 @@ module Yasuri
10
10
  sub_tags = element.search(@xpath)
11
11
  tree = sub_tags.map do |sub_tag|
12
12
  child_results_kv = @children.map do |child_node|
13
- child_name = Yasuri.NodeName(child_node.name, opt)
13
+ child_name = Yasuri.node_name(child_node.name, opt)
14
14
  [child_name, child_node.inject(agent, page, opt, sub_tag)]
15
15
  end
16
16
  Hash[child_results_kv]
17
17
  end
18
18
  tree.size == 1 ? tree.first : tree
19
19
  end # inject
20
+
21
+ def node_type_str
22
+ "struct".freeze
23
+ end
20
24
  end
21
25
  end
@@ -18,7 +18,6 @@ module Yasuri
18
18
  @truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
19
19
 
20
20
  @proc = proc.nil? ? nil : proc.to_sym
21
-
22
21
  end
23
22
 
24
23
  def inject(agent, page, opt = {}, element = page)
@@ -31,15 +30,16 @@ module Yasuri
31
30
  end
32
31
 
33
32
  text = text.__send__(@proc) if @proc && text.respond_to?(@proc)
34
- text
35
- end
36
33
 
37
- def node_type_str
38
- "text"
34
+ text
39
35
  end
40
36
 
41
37
  def opts
42
38
  {truncate:@truncate, proc:@proc}
43
39
  end
40
+
41
+ def node_type_str
42
+ "text".freeze
43
+ end
44
44
  end
45
45
  end
@@ -0,0 +1,8 @@
1
+ {
2
+ "pages_root": {
3
+ "path": "/html/body/nav/span/a[@class='next']",
4
+ "limit": 10,
5
+ "flatten": false,
6
+ "text_content": "/html/body/p"
7
+ }
8
+ }
@@ -0,0 +1,5 @@
1
+ pages_root:
2
+ path: "/html/body/nav/span/a[@class='next']"
3
+ limit: 10
4
+ flatten: false
5
+ text_content: "/html/body/p"
@@ -0,0 +1,9 @@
1
+ {
2
+ ,,,
3
+ "pages_root": {
4
+ "path": "/html/body/nav/span/a[@class='next']",
5
+ "limit": 10,
6
+ "flatten": false,
7
+ "text_content": "/html/body/p"
8
+ }
9
+ }
@@ -0,0 +1,6 @@
1
+ ,,,
2
+ pages_root:
3
+ path: "/html/body/nav/span/a[@class='next']"
4
+ limit: 10
5
+ flatten: false
6
+ text_content: "/html/body/p"
data/spec/spec_helper.rb CHANGED
@@ -16,7 +16,7 @@ require 'simplecov'
16
16
  require 'coveralls'
17
17
  Coveralls.wear!
18
18
 
19
- SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
19
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new [
20
20
  SimpleCov::Formatter::HTMLFormatter,
21
21
  Coveralls::SimpleCov::Formatter
22
22
  ]
@@ -0,0 +1,83 @@
1
+ require_relative 'spec_helper'
2
+
3
+ describe 'Yasuri' do
4
+ include_context 'httpserver'
5
+
6
+ before do
7
+ @agent = Mechanize.new
8
+ @index_page = @agent.get(uri)
9
+
10
+ @res_dir = File.expand_path('../cli_resources', __FILE__)
11
+ end
12
+
13
+ describe 'cli scrape' do
14
+ it "require --file or --json option" do
15
+ expect {
16
+ Yasuri::CLI.new.invoke(:scrape, [uri], {})
17
+ }.to output("ERROR: Only one of `--file` or `--json` option should be specified.\n").to_stderr
18
+ end
19
+
20
+ it "only one of --file or --json option" do
21
+ expect {
22
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "path.json", json: '{"text_title": "/html/head/title"}'})
23
+ }.to output("ERROR: Only one of `--file` or `--json` option should be specified.\n").to_stderr
24
+ end
25
+
26
+ it "require --file option is not empty string" do
27
+ expect {
28
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "file"})
29
+ }.to output("ERROR: --file option require not empty argument.\n").to_stderr
30
+ end
31
+
32
+ it "require --json option is not empty string" do
33
+ expect {
34
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: "json"})
35
+ }.to output("ERROR: --json option require not empty argument.\n").to_stderr
36
+ end
37
+
38
+
39
+ it "display text node as simple string" do
40
+ expect {
41
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: '{"text_title": "/html/head/title"}'})
42
+ }.to output("Yasuri Test\n").to_stdout
43
+ end
44
+
45
+ it "display texts in single json" do
46
+ expect {
47
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: '{"text_c1":"/html/body/p[1]", "text_c2":"/html/body/p[2]"}'})
48
+ }.to output('{"c1":"Hello,Yasuri","c2":"Last Modify - 2015/02/14"}'+"\n").to_stdout
49
+ end
50
+
51
+
52
+ it "display text node as simple string via json file" do
53
+ expect {
54
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "#{@res_dir}/tree.json"})
55
+ }.to output('[{"content":"Hello,YasuriLast Modify - 2015/02/14"}]' + "\n").to_stdout
56
+ end
57
+ it "display text node as simple string via yaml file" do
58
+ expect {
59
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "#{@res_dir}/tree.yml"})
60
+ }.to output('[{"content":"Hello,YasuriLast Modify - 2015/02/14"}]' + "\n").to_stdout
61
+ end
62
+
63
+
64
+ it "display ERROR when json string is wrong" do
65
+ wrong_json = '{,,}'
66
+ expect {
67
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: wrong_json})
68
+ }.to output("ERROR: Failed to convert json to yasuri tree. 809: unexpected token at '#{wrong_json}'\n").to_stderr
69
+ end
70
+ it "display ERROR when json file contains is wrong" do
71
+ file_path = "#{@res_dir}/tree_wrong.json"
72
+ expect {
73
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: file_path})
74
+ }.to output("ERROR: Failed to convert to yasuri tree `#{file_path}`. (<unknown>): did not find expected node content while parsing a flow node at line 2 column 3\n").to_stderr
75
+ end
76
+ it "display ERROR when yaml file contains is wrong" do
77
+ file_path = "#{@res_dir}/tree_wrong.yml"
78
+ expect {
79
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: file_path})
80
+ }.to output("ERROR: Failed to convert to yasuri tree `#{file_path}`. (<unknown>): did not find expected node content while parsing a block node at line 1 column 1\n").to_stderr
81
+ end
82
+ end
83
+ end
data/spec/yasuri_spec.rb CHANGED
@@ -13,6 +13,7 @@ describe 'Yasuri' do
13
13
  @index_page = @agent.get(@uri)
14
14
  end
15
15
 
16
+
16
17
  ############
17
18
  # yam2tree #
18
19
  ############
@@ -23,10 +24,8 @@ describe 'Yasuri' do
23
24
 
24
25
  it "return text node" do
25
26
  src = <<-EOB
26
- content:
27
- node: text
28
- path: "/html/body/p[1]"
29
- EOB
27
+ text_content: "/html/body/p[1]"
28
+ EOB
30
29
  generated = Yasuri.yaml2tree(src)
31
30
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
32
31
 
@@ -35,10 +34,9 @@ EOB
35
34
 
36
35
  it "return text node as symbol" do
37
36
  src = <<-EOB
38
- :content:
39
- :node: text
40
- :path: "/html/body/p[1]"
41
- EOB
37
+ :text_content:
38
+ :path: "/html/body/p[1]"
39
+ EOB
42
40
  generated = Yasuri.yaml2tree(src)
43
41
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
44
42
 
@@ -48,14 +46,10 @@ EOB
48
46
  it "return LinksNode/TextNode" do
49
47
 
50
48
  src = <<-EOB
51
- root:
52
- node: links
53
- path: "/html/body/a"
54
- children:
55
- - content:
56
- node: text
57
- path: "/html/body/p"
58
- EOB
49
+ links_root:
50
+ path: "/html/body/a"
51
+ text_content: "/html/body/p"
52
+ EOB
59
53
  generated = Yasuri.yaml2tree(src)
60
54
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
61
55
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -66,21 +60,13 @@ EOB
66
60
 
67
61
  it "return StructNode/StructNode/[TextNode,TextNode]" do
68
62
  src = <<-EOB
69
- tables:
70
- node: struct
71
- path: "/html/body/table"
72
- children:
73
- - table:
74
- node: struct
75
- path: "./tr"
76
- children:
77
- - title:
78
- node: text
79
- path: "./td[1]"
80
- - pub_date:
81
- node: text
82
- path: "./td[2]"
83
- EOB
63
+ struct_tables:
64
+ path: "/html/body/table"
65
+ struct_table:
66
+ path: "./tr"
67
+ text_title: "./td[1]"
68
+ text_pub_date: "./td[2]"
69
+ EOB
84
70
 
85
71
  generated = Yasuri.yaml2tree(src)
86
72
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
@@ -105,10 +91,10 @@ EOB
105
91
  end
106
92
 
107
93
  it "return TextNode" do
108
- src = %q| { "node" : "text",
109
- "name" : "content",
110
- "path" : "/html/body/p[1]"
111
- }|
94
+ src = %q|
95
+ {
96
+ "text_content": "/html/body/p[1]"
97
+ }|
112
98
  generated = Yasuri.json2tree(src)
113
99
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
114
100
 
@@ -116,30 +102,24 @@ EOB
116
102
  end
117
103
 
118
104
  it "return TextNode with truncate_regexp" do
119
- src = %q| { "node" : "text",
120
- "name" : "content",
121
- "path" : "/html/body/p[1]",
122
- "truncate" : "^[^,]+"
123
- }|
105
+ src = %q|
106
+ {
107
+ "text_content": {
108
+ "path": "/html/body/p[1]",
109
+ "truncate" : "^[^,]+"
110
+ }
111
+ }|
124
112
  generated = Yasuri.json2tree(src)
125
113
  original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
126
114
  compare_generated_vs_original(generated, original, @index_page)
127
115
  end
128
116
 
129
117
  it "return MapNode with TextNodes" do
130
- src = %q| { "node" : "map",
131
- "name" : "parent",
132
- "children" : [
133
- { "node" : "text",
134
- "name" : "content01",
135
- "path" : "/html/body/p[1]"
136
- },
137
- { "node" : "text",
138
- "name" : "content02",
139
- "path" : "/html/body/p[2]"
140
- }
141
- ]
142
- }|
118
+ src = %q|
119
+ {
120
+ "text_content01": "/html/body/p[1]",
121
+ "text_content02": "/html/body/p[2]"
122
+ }|
143
123
  generated = Yasuri.json2tree(src)
144
124
  original = Yasuri::MapNode.new('parent', [
145
125
  Yasuri::TextNode.new('/html/body/p[1]', "content01"),
@@ -149,14 +129,14 @@ EOB
149
129
  end
150
130
 
151
131
  it "return LinksNode/TextNode" do
152
- src = %q| { "node" : "links",
153
- "name" : "root",
154
- "path" : "/html/body/a",
155
- "children" : [ { "node" : "text",
156
- "name" : "content",
157
- "path" : "/html/body/p"
158
- } ]
159
- }|
132
+ src = %q|
133
+ {
134
+ "links_root": {
135
+ "path": "/html/body/a",
136
+ "text_content": "/html/body/p"
137
+ }
138
+ }|
139
+
160
140
  generated = Yasuri.json2tree(src)
161
141
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
162
142
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -166,14 +146,13 @@ EOB
166
146
  end
167
147
 
168
148
  it "return PaginateNode/TextNode" do
169
- src = %q|{ "node" : "pages",
170
- "name" : "root",
171
- "path" : "/html/body/nav/span/a[@class=\'next\']",
172
- "children" : [ { "node" : "text",
173
- "name" : "content",
174
- "path" : "/html/body/p"
175
- } ]
176
- }|
149
+ src = %q|
150
+ {
151
+ "pages_root": {
152
+ "path": "/html/body/nav/span/a[@class=\'next\']",
153
+ "text_content": "/html/body/p"
154
+ }
155
+ }|
177
156
  generated = Yasuri.json2tree(src)
178
157
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
179
158
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -185,15 +164,14 @@ EOB
185
164
  end
186
165
 
187
166
  it "return PaginateNode/TextNode with limit" do
188
- src = %q|{ "node" : "pages",
189
- "name" : "root",
190
- "path" : "/html/body/nav/span/a[@class=\'next\']",
191
- "limit" : 2,
192
- "children" : [ { "node" : "text",
193
- "name" : "content",
194
- "path" : "/html/body/p"
195
- } ]
196
- }|
167
+ src = %q|
168
+ {
169
+ "pages_root": {
170
+ "path": "/html/body/nav/span/a[@class=\'next\']",
171
+ "limit": 2,
172
+ "text_content": "/html/body/p"
173
+ }
174
+ }|
197
175
  generated = Yasuri.json2tree(src)
198
176
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
199
177
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -205,24 +183,17 @@ EOB
205
183
  end
206
184
 
207
185
  it "return StructNode/StructNode/[TextNode,TextNode]" do
208
- src = %q| { "node" : "struct",
209
- "name" : "tables",
210
- "path" : "/html/body/table",
211
- "children" : [
212
- { "node" : "struct",
213
- "name" : "table",
214
- "path" : "./tr",
215
- "children" : [
216
- { "node" : "text",
217
- "name" : "title",
218
- "path" : "./td[1]"
219
- },
220
- { "node" : "text",
221
- "name" : "pub_date",
222
- "path" : "./td[2]"
223
- }]
224
- }]
225
- }|
186
+ src = %q|
187
+ {
188
+ "struct_tables": {
189
+ "path": "/html/body/table",
190
+ "struct_table": {
191
+ "path": "./tr",
192
+ "text_title": "./td[1]",
193
+ "text_pub_date": "./td[2]"
194
+ }
195
+ }
196
+ }|
226
197
  generated = Yasuri.json2tree(src)
227
198
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
228
199
  Yasuri::StructNode.new('./tr', "table", [
@@ -235,22 +206,22 @@ EOB
235
206
  end
236
207
  end
237
208
 
209
+
238
210
  #############
239
211
  # tree2json #
240
212
  #############
241
213
  describe '.tree2json' do
242
214
  it "return empty json" do
243
- json = Yasuri.tree2json(nil)
244
- expect(json).to match "{}"
215
+ expect { Yasuri.tree2json(nil) }.to raise_error(RuntimeError)
245
216
  end
246
217
 
247
218
  it "return text node" do
248
219
  node = Yasuri::TextNode.new("/html/head/title", "title")
249
220
  json = Yasuri.tree2json(node)
250
- expected_str = %q| { "node": "text",
251
- "name": "title",
252
- "path": "/html/head/title"
253
- } |
221
+ expected_str = %q|
222
+ {
223
+ "text_title": "/html/head/title"
224
+ }|
254
225
  expected = JSON.parse(expected_str)
255
226
  actual = JSON.parse(json)
256
227
  expect(actual).to match expected
@@ -259,11 +230,13 @@ EOB
259
230
  it "return text node with truncate_regexp" do
260
231
  node = Yasuri::TextNode.new("/html/head/title", "title", truncate:/^[^,]+/)
261
232
  json = Yasuri.tree2json(node)
262
- expected_str = %q| { "node": "text",
263
- "name": "title",
264
- "path": "/html/head/title",
265
- "truncate": "^[^,]+"
266
- } |
233
+ expected_str = %q|
234
+ {
235
+ "text_title": {
236
+ "path": "/html/head/title",
237
+ "truncate": "^[^,]+"
238
+ }
239
+ }|
267
240
  expected = Yasuri.tree2json(Yasuri.json2tree(expected_str))
268
241
  actual = Yasuri.tree2json(Yasuri.json2tree(json))
269
242
  expect(actual).to match expected
@@ -276,19 +249,12 @@ EOB
276
249
  ])
277
250
  actual_json = Yasuri.tree2json(tree)
278
251
 
279
- expected_json = %q| { "node" : "map",
280
- "name" : "parent",
281
- "children" : [
282
- { "node" : "text",
283
- "name" : "content01",
284
- "path" : "/html/body/p[1]"
285
- },
286
- { "node" : "text",
287
- "name" : "content02",
288
- "path" : "/html/body/p[2]"
289
- }
290
- ]
252
+ expected_json = %q|
253
+ {
254
+ "text_content01": "/html/body/p[1]",
255
+ "text_content02": "/html/body/p[2]"
291
256
  }|
257
+
292
258
  expected = Yasuri.tree2json(Yasuri.json2tree(expected_json))
293
259
  actual = Yasuri.tree2json(Yasuri.json2tree(actual_json))
294
260
  expect(actual).to match expected
@@ -299,14 +265,14 @@ EOB
299
265
  Yasuri::TextNode.new('/html/body/p', "content"),
300
266
  ])
301
267
  json = Yasuri.tree2json(tree)
302
- expected_src = %q| { "node" : "links",
303
- "name" : "root",
304
- "path" : "/html/body/a",
305
- "children" : [ { "node" : "text",
306
- "name" : "content",
307
- "path" : "/html/body/p"
308
- } ]
309
- }|
268
+
269
+ expected_src = %q|
270
+ {
271
+ "links_root": {
272
+ "path": "/html/body/a",
273
+ "text_content":"/html/body/p"
274
+ }
275
+ }|
310
276
  expected = JSON.parse(expected_src)
311
277
  actual = JSON.parse(json)
312
278
  expect(actual).to match expected
@@ -318,25 +284,44 @@ EOB
318
284
  ], limit:10)
319
285
 
320
286
  json = Yasuri.tree2json(tree)
321
- expected_src = %q| { "node" : "pages",
322
- "name" : "root",
323
- "path" : "/html/body/nav/span/a[@class='next']",
324
- "limit" : 10,
325
- "flatten" : false,
326
- "children" : [ { "node" : "text",
327
- "name" : "content",
328
- "path" : "/html/body/p"
329
- } ]
330
- }|
287
+ expected_src = %q|
288
+ {
289
+ "pages_root": {
290
+ "path": "/html/body/nav/span/a[@class='next']",
291
+ "limit": 10,
292
+ "flatten": false,
293
+ "text_content": "/html/body/p"
294
+ }
295
+ }|
331
296
  expected = JSON.parse(expected_src)
332
297
  actual = JSON.parse(json)
333
298
  expect(actual).to match expected
334
299
  end
335
-
336
-
337
-
338
300
  end
339
301
 
302
+ it "return StructNode/StructNode/[TextNode,TextNode]" do
303
+ tree = Yasuri::StructNode.new('/html/body/table', "tables", [
304
+ Yasuri::StructNode.new('./tr', "table", [
305
+ Yasuri::TextNode.new('./td[1]', "title"),
306
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
307
+ ])
308
+ ])
309
+ json = Yasuri.tree2json(tree)
310
+ expected_src = %q|
311
+ {
312
+ "struct_tables": {
313
+ "path": "/html/body/table",
314
+ "struct_table": {
315
+ "path": "./tr",
316
+ "text_title": "./td[1]",
317
+ "text_pub_date": "./td[2]"
318
+ }
319
+ }
320
+ }|
321
+ expected = JSON.parse(expected_src)
322
+ actual = JSON.parse(json)
323
+ expect(actual).to match expected
324
+ end
340
325
 
341
326
  it 'has a version number' do
342
327
  expect(Yasuri::VERSION).not_to be nil