yasuri 2.0.11 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,16 +6,20 @@ require_relative 'yasuri_node'
6
6
  module Yasuri
7
7
  class StructNode
8
8
  include Node
9
- def inject(agent, page, opt = {})
10
- sub_tags = page.search(@xpath)
9
+ def inject(agent, page, opt = {}, element = page)
10
+ sub_tags = element.search(@xpath)
11
11
  tree = sub_tags.map do |sub_tag|
12
12
  child_results_kv = @children.map do |child_node|
13
- child_name = Yasuri.NodeName(child_node.name, opt)
14
- [child_name, child_node.inject(agent, sub_tag, opt)]
13
+ child_name = Yasuri.node_name(child_node.name, opt)
14
+ [child_name, child_node.inject(agent, page, opt, sub_tag)]
15
15
  end
16
16
  Hash[child_results_kv]
17
17
  end
18
18
  tree.size == 1 ? tree.first : tree
19
19
  end # inject
20
+
21
+ def node_type_str
22
+ "struct".freeze
23
+ end
20
24
  end
21
25
  end
@@ -7,19 +7,21 @@ module Yasuri
7
7
  class TextNode
8
8
  include Node
9
9
 
10
- def initialize(xpath, name, children = [], truncate: nil, proc:nil)
10
+ def initialize(xpath, name, children = [], **opt)
11
11
  super(xpath, name, children)
12
12
 
13
+ truncate = opt[:truncate]
14
+ proc = opt[:proc]
15
+
13
16
  truncate = Regexp.new(truncate) if not truncate.nil? # regexp or nil
14
17
  @truncate = truncate
15
18
  @truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
16
19
 
17
20
  @proc = proc.nil? ? nil : proc.to_sym
18
-
19
21
  end
20
22
 
21
- def inject(agent, page, opt = {})
22
- node = page.search(@xpath)
23
+ def inject(agent, page, opt = {}, element = page)
24
+ node = element.search(@xpath)
23
25
  text = node.text.to_s
24
26
 
25
27
  if @truncate
@@ -28,11 +30,16 @@ module Yasuri
28
30
  end
29
31
 
30
32
  text = text.__send__(@proc) if @proc && text.respond_to?(@proc)
33
+
31
34
  text
32
35
  end
33
36
 
34
37
  def opts
35
38
  {truncate:@truncate, proc:@proc}
36
39
  end
40
+
41
+ def node_type_str
42
+ "text".freeze
43
+ end
37
44
  end
38
45
  end
@@ -0,0 +1,8 @@
1
+ {
2
+ "pages_root": {
3
+ "path": "/html/body/nav/span/a[@class='next']",
4
+ "limit": 10,
5
+ "flatten": false,
6
+ "text_content": "/html/body/p"
7
+ }
8
+ }
@@ -0,0 +1,5 @@
1
+ pages_root:
2
+ path: "/html/body/nav/span/a[@class='next']"
3
+ limit: 10
4
+ flatten: false
5
+ text_content: "/html/body/p"
@@ -0,0 +1,9 @@
1
+ {
2
+ ,,,
3
+ "pages_root": {
4
+ "path": "/html/body/nav/span/a[@class='next']",
5
+ "limit": 10,
6
+ "flatten": false,
7
+ "text_content": "/html/body/p"
8
+ }
9
+ }
@@ -0,0 +1,6 @@
1
+ ,,,
2
+ pages_root:
3
+ path: "/html/body/nav/span/a[@class='next']"
4
+ limit: 10
5
+ flatten: false
6
+ text_content: "/html/body/p"
@@ -0,0 +1,30 @@
1
+ <html>
2
+ <head>
3
+ <title>StructualLinksTest</title>
4
+ </head>
5
+ <body>
6
+
7
+ <table>
8
+ <thead>
9
+ <tr>
10
+ <th>Title</th>
11
+ <th>Links</th>
12
+ </tr>
13
+ </thead>
14
+ <tr>
15
+ <td>Child01,02</td>
16
+ <td><a href="../child01.html">Child01</a></td>
17
+ <td><a href="../child02.html">Child02</a></td>
18
+ <td>../child02.html</td>
19
+ </tr>
20
+
21
+ <tr>
22
+ <td>Child01,02,03</td>
23
+ <td><a href="../child01.html">Child01</a></td>
24
+ <td><a href="../child02.html">Child02</a></td>
25
+ <td><a href="../child03.html">Child03</a></td>
26
+ </tr>
27
+ </table>
28
+
29
+ </body>
30
+ </html>
data/spec/spec_helper.rb CHANGED
@@ -12,16 +12,11 @@ shared_context 'httpserver' do
12
12
  }
13
13
  end
14
14
 
15
-
16
- # ENV['CODECLIMATE_REPO_TOKEN'] = "0dc78d33107a7f11f257c0218ac1a37e0073005bb9734f2fd61d0f7e803fc151"
17
- # require "codeclimate-test-reporter"
18
- # CodeClimate::TestReporter.start
19
-
20
15
  require 'simplecov'
21
16
  require 'coveralls'
22
17
  Coveralls.wear!
23
18
 
24
- SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
19
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new [
25
20
  SimpleCov::Formatter::HTMLFormatter,
26
21
  Coveralls::SimpleCov::Formatter
27
22
  ]
@@ -0,0 +1,83 @@
1
+ require_relative 'spec_helper'
2
+
3
+ describe 'Yasuri' do
4
+ include_context 'httpserver'
5
+
6
+ before do
7
+ @agent = Mechanize.new
8
+ @index_page = @agent.get(uri)
9
+
10
+ @res_dir = File.expand_path('../cli_resources', __FILE__)
11
+ end
12
+
13
+ describe 'cli scrape' do
14
+ it "require --file or --json option" do
15
+ expect {
16
+ Yasuri::CLI.new.invoke(:scrape, [uri], {})
17
+ }.to output("ERROR: Only one of `--file` or `--json` option should be specified.\n").to_stderr
18
+ end
19
+
20
+ it "only one of --file or --json option" do
21
+ expect {
22
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "path.json", json: '{"text_title": "/html/head/title"}'})
23
+ }.to output("ERROR: Only one of `--file` or `--json` option should be specified.\n").to_stderr
24
+ end
25
+
26
+ it "require --file option is not empty string" do
27
+ expect {
28
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "file"})
29
+ }.to output("ERROR: --file option require not empty argument.\n").to_stderr
30
+ end
31
+
32
+ it "require --json option is not empty string" do
33
+ expect {
34
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: "json"})
35
+ }.to output("ERROR: --json option require not empty argument.\n").to_stderr
36
+ end
37
+
38
+
39
+ it "display text node as simple string" do
40
+ expect {
41
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: '{"text_title": "/html/head/title"}'})
42
+ }.to output("Yasuri Test\n").to_stdout
43
+ end
44
+
45
+ it "display texts in single json" do
46
+ expect {
47
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: '{"text_c1":"/html/body/p[1]", "text_c2":"/html/body/p[2]"}'})
48
+ }.to output('{"c1":"Hello,Yasuri","c2":"Last Modify - 2015/02/14"}'+"\n").to_stdout
49
+ end
50
+
51
+
52
+ it "display text node as simple string via json file" do
53
+ expect {
54
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "#{@res_dir}/tree.json"})
55
+ }.to output('[{"content":"Hello,YasuriLast Modify - 2015/02/14"}]' + "\n").to_stdout
56
+ end
57
+ it "display text node as simple string via yaml file" do
58
+ expect {
59
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "#{@res_dir}/tree.yml"})
60
+ }.to output('[{"content":"Hello,YasuriLast Modify - 2015/02/14"}]' + "\n").to_stdout
61
+ end
62
+
63
+
64
+ it "display ERROR when json string is wrong" do
65
+ wrong_json = '{,,}'
66
+ expect {
67
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: wrong_json})
68
+ }.to output("ERROR: Failed to convert json to yasuri tree. 809: unexpected token at '#{wrong_json}'\n").to_stderr
69
+ end
70
+ it "display ERROR when json file contains is wrong" do
71
+ file_path = "#{@res_dir}/tree_wrong.json"
72
+ expect {
73
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: file_path})
74
+ }.to output("ERROR: Failed to convert to yasuri tree `#{file_path}`. (<unknown>): did not find expected node content while parsing a flow node at line 2 column 3\n").to_stderr
75
+ end
76
+ it "display ERROR when yaml file contains is wrong" do
77
+ file_path = "#{@res_dir}/tree_wrong.yml"
78
+ expect {
79
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: file_path})
80
+ }.to output("ERROR: Failed to convert to yasuri tree `#{file_path}`. (<unknown>): did not find expected node content while parsing a block node at line 1 column 1\n").to_stderr
81
+ end
82
+ end
83
+ end
@@ -59,10 +59,18 @@ describe 'Yasuri' do
59
59
  ]
60
60
  expect(actual).to match expected
61
61
  end
62
- it 'can be defined by DSL, return single LinkNode title' do
63
- generated = Yasuri.links_title '/html/body/a'
64
- original = Yasuri::LinksNode.new('/html/body/a', "title")
65
- compare_generated_vs_original(generated, original, @index_page)
62
+ it 'can be defined by DSL, return no contains if no child node' do
63
+ root_node = Yasuri.links_title '/html/body/a'
64
+ actual = root_node.inject(@agent, @index_page)
65
+ expected = [{}, {}, {}] # Empty if no child node under links node.
66
+ expect(actual).to match expected
67
+ end
68
+
69
+ it 'can be defined return no contains if no child node' do
70
+ root_node = Yasuri::LinksNode.new('/html/body/a', "title")
71
+ actual = root_node.inject(@agent, @index_page)
72
+ expected = [{}, {}, {}] # Empty if no child node under links node.
73
+ expect(actual).to match expected
66
74
  end
67
75
  it 'can be defined by DSL, return nested contents under link' do
68
76
  generated = Yasuri.links_title '/html/body/a' do
@@ -0,0 +1,76 @@
1
+ require_relative 'spec_helper'
2
+
3
+ describe 'Yasuri' do
4
+ include_context 'httpserver'
5
+
6
+ before do
7
+ @agent = Mechanize.new
8
+ @index_page = @agent.get(uri)
9
+ end
10
+
11
+ describe '::MapNode' do
12
+ it "multi scrape in singe page" do
13
+ map = Yasuri.map_sample do
14
+ text_title '/html/head/title'
15
+ text_body_p '/html/body/p[1]'
16
+ end
17
+ actual = map.inject(@agent, @index_page)
18
+
19
+ expected = {
20
+ "title" => "Yasuri Test",
21
+ "body_p" => "Hello,Yasuri"
22
+ }
23
+ expect(actual).to include expected
24
+ end
25
+
26
+ it "nested multi scrape in singe page" do
27
+ map = Yasuri.map_sample do
28
+ map_group1 { text_child01 '/html/body/a[1]' }
29
+ map_group2 do
30
+ text_child01 '/html/body/a[1]'
31
+ text_child03 '/html/body/a[3]'
32
+ end
33
+ end
34
+ actual = map.inject(@agent, @index_page)
35
+
36
+ expected = {
37
+ "group1" => {
38
+ "child01" => "child01"
39
+ },
40
+ "group2" => {
41
+ "child01" => "child01",
42
+ "child03" => "child03"
43
+ }
44
+ }
45
+ expect(actual).to include expected
46
+ end
47
+
48
+ it "scrape with links node" do
49
+ map = Yasuri.map_sample do
50
+ map_group1 do
51
+ links_a '/html/body/a' do
52
+ text_content '/html/body/p'
53
+ end
54
+ text_child01 '/html/body/a[1]'
55
+ end
56
+ map_group2 do
57
+ text_child03 '/html/body/a[3]'
58
+ end
59
+ end
60
+ actual = map.inject(@agent, @index_page)
61
+
62
+ expected = {
63
+ "group1" => {
64
+ "a" => [
65
+ {"content" => "Child 01 page."},
66
+ {"content" => "Child 02 page."},
67
+ {"content" => "Child 03 page."},
68
+ ],
69
+ "child01" => "child01"
70
+ },
71
+ "group2" => { "child03" => "child03" }
72
+ }
73
+ expect(actual).to include expected
74
+ end
75
+ end
76
+ end
@@ -30,6 +30,49 @@ describe 'Yasuri' do
30
30
  expect(actual).to match expected
31
31
  end
32
32
 
33
+ it "scrape each paginated pages with flatten" do
34
+ root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
35
+ Yasuri::TextNode.new('/html/body/p', "content"),
36
+ Yasuri::StructNode.new('/html/body/nav/span', "span", [
37
+ Yasuri::TextNode.new('./a', "text"),
38
+ ]),
39
+ ], flatten: true)
40
+ actual = root_node.inject(@agent, @page)
41
+ expected = [
42
+ "PaginationTest01",
43
+ {"text"=>""},
44
+ {"text"=>""},
45
+ {"text" => "2"},
46
+ {"text" => "3"},
47
+ {"text" => "4"},
48
+ {"text"=>"NextPage »"},
49
+ "PaginationTest02",
50
+ {"text"=>"« PreviousPage"},
51
+ {"text" => "1"},
52
+ {"text"=>""},
53
+ {"text" => "3"},
54
+ {"text" => "4"},
55
+ {"text"=>"NextPage »"},
56
+ "PaginationTest03",
57
+ {"text"=>"« PreviousPage"},
58
+ {"text" => "1"},
59
+ {"text" => "2"},
60
+ {"text"=>""},
61
+ {"text" => "4"},
62
+ {"text"=>"NextPage »"},
63
+ "PaginationTest04",
64
+ {"text"=>"« PreviousPage"},
65
+ {"text" => "1"},
66
+ {"text" => "2"},
67
+ {"text" => "3"},
68
+ {"text"=>""},
69
+ {"text"=>""},
70
+ ]
71
+
72
+ expect(actual).to match expected
73
+ end
74
+
75
+
33
76
  it "scrape each paginated pages limited" do
34
77
  root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
35
78
  Yasuri::TextNode.new('/html/body/p', "content"),
data/spec/yasuri_spec.rb CHANGED
@@ -13,6 +13,75 @@ describe 'Yasuri' do
13
13
  @index_page = @agent.get(@uri)
14
14
  end
15
15
 
16
+
17
+ ############
18
+ # yam2tree #
19
+ ############
20
+ describe '.yaml2tree' do
21
+ it "fail if empty yaml" do
22
+ expect { Yasuri.yaml2tree(nil) }.to raise_error(RuntimeError)
23
+ end
24
+
25
+ it "return text node" do
26
+ src = <<-EOB
27
+ text_content: "/html/body/p[1]"
28
+ EOB
29
+ generated = Yasuri.yaml2tree(src)
30
+ original = Yasuri::TextNode.new('/html/body/p[1]', "content")
31
+
32
+ compare_generated_vs_original(generated, original, @index_page)
33
+ end
34
+
35
+ it "return text node as symbol" do
36
+ src = <<-EOB
37
+ :text_content:
38
+ :path: "/html/body/p[1]"
39
+ EOB
40
+ generated = Yasuri.yaml2tree(src)
41
+ original = Yasuri::TextNode.new('/html/body/p[1]', "content")
42
+
43
+ compare_generated_vs_original(generated, original, @index_page)
44
+ end
45
+
46
+ it "return LinksNode/TextNode" do
47
+
48
+ src = <<-EOB
49
+ links_root:
50
+ path: "/html/body/a"
51
+ text_content: "/html/body/p"
52
+ EOB
53
+ generated = Yasuri.yaml2tree(src)
54
+ original = Yasuri::LinksNode.new('/html/body/a', "root", [
55
+ Yasuri::TextNode.new('/html/body/p', "content"),
56
+ ])
57
+
58
+ compare_generated_vs_original(generated, original, @index_page)
59
+ end
60
+
61
+ it "return StructNode/StructNode/[TextNode,TextNode]" do
62
+ src = <<-EOB
63
+ struct_tables:
64
+ path: "/html/body/table"
65
+ struct_table:
66
+ path: "./tr"
67
+ text_title: "./td[1]"
68
+ text_pub_date: "./td[2]"
69
+ EOB
70
+
71
+ generated = Yasuri.yaml2tree(src)
72
+ original = Yasuri::StructNode.new('/html/body/table', "tables", [
73
+ Yasuri::StructNode.new('./tr', "table", [
74
+ Yasuri::TextNode.new('./td[1]', "title"),
75
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
76
+ ])
77
+ ])
78
+ page = @agent.get(@uri + "/struct/structual_text.html")
79
+ compare_generated_vs_original(generated, original, page)
80
+ end
81
+
82
+ end # end of describe '.yaml2tree'
83
+
84
+
16
85
  #############
17
86
  # json2tree #
18
87
  #############
@@ -22,10 +91,10 @@ describe 'Yasuri' do
22
91
  end
23
92
 
24
93
  it "return TextNode" do
25
- src = %q| { "node" : "text",
26
- "name" : "content",
27
- "path" : "/html/body/p[1]"
28
- }|
94
+ src = %q|
95
+ {
96
+ "text_content": "/html/body/p[1]"
97
+ }|
29
98
  generated = Yasuri.json2tree(src)
30
99
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
31
100
 
@@ -33,26 +102,41 @@ describe 'Yasuri' do
33
102
  end
34
103
 
35
104
  it "return TextNode with truncate_regexp" do
36
- src = %q| { "node" : "text",
37
- "name" : "content",
38
- "path" : "/html/body/p[1]",
39
- "truncate" : "^[^,]+"
40
- }|
105
+ src = %q|
106
+ {
107
+ "text_content": {
108
+ "path": "/html/body/p[1]",
109
+ "truncate" : "^[^,]+"
110
+ }
111
+ }|
41
112
  generated = Yasuri.json2tree(src)
42
113
  original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
43
114
  compare_generated_vs_original(generated, original, @index_page)
44
115
  end
45
116
 
117
+ it "return MapNode with TextNodes" do
118
+ src = %q|
119
+ {
120
+ "text_content01": "/html/body/p[1]",
121
+ "text_content02": "/html/body/p[2]"
122
+ }|
123
+ generated = Yasuri.json2tree(src)
124
+ original = Yasuri::MapNode.new('parent', [
125
+ Yasuri::TextNode.new('/html/body/p[1]', "content01"),
126
+ Yasuri::TextNode.new('/html/body/p[2]', "content02"),
127
+ ])
128
+ compare_generated_vs_original(generated, original, @index_page)
129
+ end
46
130
 
47
131
  it "return LinksNode/TextNode" do
48
- src = %q| { "node" : "links",
49
- "name" : "root",
50
- "path" : "/html/body/a",
51
- "children" : [ { "node" : "text",
52
- "name" : "content",
53
- "path" : "/html/body/p"
54
- } ]
55
- }|
132
+ src = %q|
133
+ {
134
+ "links_root": {
135
+ "path": "/html/body/a",
136
+ "text_content": "/html/body/p"
137
+ }
138
+ }|
139
+
56
140
  generated = Yasuri.json2tree(src)
57
141
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
58
142
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -62,14 +146,13 @@ describe 'Yasuri' do
62
146
  end
63
147
 
64
148
  it "return PaginateNode/TextNode" do
65
- src = %q|{ "node" : "pages",
66
- "name" : "root",
67
- "path" : "/html/body/nav/span/a[@class=\'next\']",
68
- "children" : [ { "node" : "text",
69
- "name" : "content",
70
- "path" : "/html/body/p"
71
- } ]
72
- }|
149
+ src = %q|
150
+ {
151
+ "pages_root": {
152
+ "path": "/html/body/nav/span/a[@class=\'next\']",
153
+ "text_content": "/html/body/p"
154
+ }
155
+ }|
73
156
  generated = Yasuri.json2tree(src)
74
157
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
75
158
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -81,15 +164,14 @@ describe 'Yasuri' do
81
164
  end
82
165
 
83
166
  it "return PaginateNode/TextNode with limit" do
84
- src = %q|{ "node" : "pages",
85
- "name" : "root",
86
- "path" : "/html/body/nav/span/a[@class=\'next\']",
87
- "limit" : 2,
88
- "children" : [ { "node" : "text",
89
- "name" : "content",
90
- "path" : "/html/body/p"
91
- } ]
92
- }|
167
+ src = %q|
168
+ {
169
+ "pages_root": {
170
+ "path": "/html/body/nav/span/a[@class=\'next\']",
171
+ "limit": 2,
172
+ "text_content": "/html/body/p"
173
+ }
174
+ }|
93
175
  generated = Yasuri.json2tree(src)
94
176
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
95
177
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -101,24 +183,17 @@ describe 'Yasuri' do
101
183
  end
102
184
 
103
185
  it "return StructNode/StructNode/[TextNode,TextNode]" do
104
- src = %q| { "node" : "struct",
105
- "name" : "tables",
106
- "path" : "/html/body/table",
107
- "children" : [
108
- { "node" : "struct",
109
- "name" : "table",
110
- "path" : "./tr",
111
- "children" : [
112
- { "node" : "text",
113
- "name" : "title",
114
- "path" : "./td[1]"
115
- },
116
- { "node" : "text",
117
- "name" : "pub_date",
118
- "path" : "./td[2]"
119
- }]
120
- }]
121
- }|
186
+ src = %q|
187
+ {
188
+ "struct_tables": {
189
+ "path": "/html/body/table",
190
+ "struct_table": {
191
+ "path": "./tr",
192
+ "text_title": "./td[1]",
193
+ "text_pub_date": "./td[2]"
194
+ }
195
+ }
196
+ }|
122
197
  generated = Yasuri.json2tree(src)
123
198
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
124
199
  Yasuri::StructNode.new('./tr', "table", [
@@ -126,27 +201,27 @@ describe 'Yasuri' do
126
201
  Yasuri::TextNode.new('./td[2]', "pub_date"),
127
202
  ])
128
203
  ])
129
- page = @agent.get(@uri + "/structual_text.html")
204
+ page = @agent.get(@uri + "/struct/structual_text.html")
130
205
  compare_generated_vs_original(generated, original, page)
131
206
  end
132
207
  end
133
208
 
209
+
134
210
  #############
135
211
  # tree2json #
136
212
  #############
137
213
  describe '.tree2json' do
138
214
  it "return empty json" do
139
- json = Yasuri.tree2json(nil)
140
- expect(json).to match "{}"
215
+ expect { Yasuri.tree2json(nil) }.to raise_error(RuntimeError)
141
216
  end
142
217
 
143
218
  it "return text node" do
144
219
  node = Yasuri::TextNode.new("/html/head/title", "title")
145
220
  json = Yasuri.tree2json(node)
146
- expected_str = %q| { "node": "text",
147
- "name": "title",
148
- "path": "/html/head/title"
149
- } |
221
+ expected_str = %q|
222
+ {
223
+ "text_title": "/html/head/title"
224
+ }|
150
225
  expected = JSON.parse(expected_str)
151
226
  actual = JSON.parse(json)
152
227
  expect(actual).to match expected
@@ -155,29 +230,49 @@ describe 'Yasuri' do
155
230
  it "return text node with truncate_regexp" do
156
231
  node = Yasuri::TextNode.new("/html/head/title", "title", truncate:/^[^,]+/)
157
232
  json = Yasuri.tree2json(node)
158
- expected_str = %q| { "node": "text",
159
- "name": "title",
160
- "path": "/html/head/title",
161
- "truncate": "^[^,]+"
162
- } |
233
+ expected_str = %q|
234
+ {
235
+ "text_title": {
236
+ "path": "/html/head/title",
237
+ "truncate": "^[^,]+"
238
+ }
239
+ }|
163
240
  expected = Yasuri.tree2json(Yasuri.json2tree(expected_str))
164
241
  actual = Yasuri.tree2json(Yasuri.json2tree(json))
165
242
  expect(actual).to match expected
166
243
  end
167
244
 
245
+ it "return map node with text nodes" do
246
+ tree = Yasuri::MapNode.new('parent', [
247
+ Yasuri::TextNode.new('/html/body/p[1]', "content01"),
248
+ Yasuri::TextNode.new('/html/body/p[2]', "content02"),
249
+ ])
250
+ actual_json = Yasuri.tree2json(tree)
251
+
252
+ expected_json = %q|
253
+ {
254
+ "text_content01": "/html/body/p[1]",
255
+ "text_content02": "/html/body/p[2]"
256
+ }|
257
+
258
+ expected = Yasuri.tree2json(Yasuri.json2tree(expected_json))
259
+ actual = Yasuri.tree2json(Yasuri.json2tree(actual_json))
260
+ expect(actual).to match expected
261
+ end
262
+
168
263
  it "return LinksNode/TextNode" do
169
264
  tree = Yasuri::LinksNode.new('/html/body/a', "root", [
170
265
  Yasuri::TextNode.new('/html/body/p', "content"),
171
266
  ])
172
267
  json = Yasuri.tree2json(tree)
173
- expected_src = %q| { "node" : "links",
174
- "name" : "root",
175
- "path" : "/html/body/a",
176
- "children" : [ { "node" : "text",
177
- "name" : "content",
178
- "path" : "/html/body/p"
179
- } ]
180
- }|
268
+
269
+ expected_src = %q|
270
+ {
271
+ "links_root": {
272
+ "path": "/html/body/a",
273
+ "text_content":"/html/body/p"
274
+ }
275
+ }|
181
276
  expected = JSON.parse(expected_src)
182
277
  actual = JSON.parse(json)
183
278
  expect(actual).to match expected
@@ -189,24 +284,44 @@ describe 'Yasuri' do
189
284
  ], limit:10)
190
285
 
191
286
  json = Yasuri.tree2json(tree)
192
- expected_src = %q| { "node" : "pages",
193
- "name" : "root",
194
- "path" : "/html/body/nav/span/a[@class='next']",
195
- "limit" : 10,
196
- "children" : [ { "node" : "text",
197
- "name" : "content",
198
- "path" : "/html/body/p"
199
- } ]
200
- }|
287
+ expected_src = %q|
288
+ {
289
+ "pages_root": {
290
+ "path": "/html/body/nav/span/a[@class='next']",
291
+ "limit": 10,
292
+ "flatten": false,
293
+ "text_content": "/html/body/p"
294
+ }
295
+ }|
201
296
  expected = JSON.parse(expected_src)
202
297
  actual = JSON.parse(json)
203
298
  expect(actual).to match expected
204
299
  end
205
-
206
-
207
-
208
300
  end
209
301
 
302
+ it "return StructNode/StructNode/[TextNode,TextNode]" do
303
+ tree = Yasuri::StructNode.new('/html/body/table', "tables", [
304
+ Yasuri::StructNode.new('./tr', "table", [
305
+ Yasuri::TextNode.new('./td[1]', "title"),
306
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
307
+ ])
308
+ ])
309
+ json = Yasuri.tree2json(tree)
310
+ expected_src = %q|
311
+ {
312
+ "struct_tables": {
313
+ "path": "/html/body/table",
314
+ "struct_table": {
315
+ "path": "./tr",
316
+ "text_title": "./td[1]",
317
+ "text_pub_date": "./td[2]"
318
+ }
319
+ }
320
+ }|
321
+ expected = JSON.parse(expected_src)
322
+ actual = JSON.parse(json)
323
+ expect(actual).to match expected
324
+ end
210
325
 
211
326
  it 'has a version number' do
212
327
  expect(Yasuri::VERSION).not_to be nil