yasuri 2.0.11 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,16 +6,20 @@ require_relative 'yasuri_node'
6
6
  module Yasuri
7
7
  class StructNode
8
8
  include Node
9
- def inject(agent, page, opt = {})
10
- sub_tags = page.search(@xpath)
9
+ def inject(agent, page, opt = {}, element = page)
10
+ sub_tags = element.search(@xpath)
11
11
  tree = sub_tags.map do |sub_tag|
12
12
  child_results_kv = @children.map do |child_node|
13
- child_name = Yasuri.NodeName(child_node.name, opt)
14
- [child_name, child_node.inject(agent, sub_tag, opt)]
13
+ child_name = Yasuri.node_name(child_node.name, opt)
14
+ [child_name, child_node.inject(agent, page, opt, sub_tag)]
15
15
  end
16
16
  Hash[child_results_kv]
17
17
  end
18
18
  tree.size == 1 ? tree.first : tree
19
19
  end # inject
20
+
21
+ def node_type_str
22
+ "struct".freeze
23
+ end
20
24
  end
21
25
  end
@@ -7,19 +7,21 @@ module Yasuri
7
7
  class TextNode
8
8
  include Node
9
9
 
10
- def initialize(xpath, name, children = [], truncate: nil, proc:nil)
10
+ def initialize(xpath, name, children = [], **opt)
11
11
  super(xpath, name, children)
12
12
 
13
+ truncate = opt[:truncate]
14
+ proc = opt[:proc]
15
+
13
16
  truncate = Regexp.new(truncate) if not truncate.nil? # regexp or nil
14
17
  @truncate = truncate
15
18
  @truncate = Regexp.new(@truncate.to_s) if not @truncate.nil?
16
19
 
17
20
  @proc = proc.nil? ? nil : proc.to_sym
18
-
19
21
  end
20
22
 
21
- def inject(agent, page, opt = {})
22
- node = page.search(@xpath)
23
+ def inject(agent, page, opt = {}, element = page)
24
+ node = element.search(@xpath)
23
25
  text = node.text.to_s
24
26
 
25
27
  if @truncate
@@ -28,11 +30,16 @@ module Yasuri
28
30
  end
29
31
 
30
32
  text = text.__send__(@proc) if @proc && text.respond_to?(@proc)
33
+
31
34
  text
32
35
  end
33
36
 
34
37
  def opts
35
38
  {truncate:@truncate, proc:@proc}
36
39
  end
40
+
41
+ def node_type_str
42
+ "text".freeze
43
+ end
37
44
  end
38
45
  end
@@ -0,0 +1,8 @@
1
+ {
2
+ "pages_root": {
3
+ "path": "/html/body/nav/span/a[@class='next']",
4
+ "limit": 10,
5
+ "flatten": false,
6
+ "text_content": "/html/body/p"
7
+ }
8
+ }
@@ -0,0 +1,5 @@
1
+ pages_root:
2
+ path: "/html/body/nav/span/a[@class='next']"
3
+ limit: 10
4
+ flatten: false
5
+ text_content: "/html/body/p"
@@ -0,0 +1,9 @@
1
+ {
2
+ ,,,
3
+ "pages_root": {
4
+ "path": "/html/body/nav/span/a[@class='next']",
5
+ "limit": 10,
6
+ "flatten": false,
7
+ "text_content": "/html/body/p"
8
+ }
9
+ }
@@ -0,0 +1,6 @@
1
+ ,,,
2
+ pages_root:
3
+ path: "/html/body/nav/span/a[@class='next']"
4
+ limit: 10
5
+ flatten: false
6
+ text_content: "/html/body/p"
@@ -0,0 +1,30 @@
1
+ <html>
2
+ <head>
3
+ <title>StructualLinksTest</title>
4
+ </head>
5
+ <body>
6
+
7
+ <table>
8
+ <thead>
9
+ <tr>
10
+ <th>Title</th>
11
+ <th>Links</th>
12
+ </tr>
13
+ </thead>
14
+ <tr>
15
+ <td>Child01,02</td>
16
+ <td><a href="../child01.html">Child01</a></td>
17
+ <td><a href="../child02.html">Child02</a></td>
18
+ <td>../child02.html</td>
19
+ </tr>
20
+
21
+ <tr>
22
+ <td>Child01,02,03</td>
23
+ <td><a href="../child01.html">Child01</a></td>
24
+ <td><a href="../child02.html">Child02</a></td>
25
+ <td><a href="../child03.html">Child03</a></td>
26
+ </tr>
27
+ </table>
28
+
29
+ </body>
30
+ </html>
data/spec/spec_helper.rb CHANGED
@@ -12,16 +12,11 @@ shared_context 'httpserver' do
12
12
  }
13
13
  end
14
14
 
15
-
16
- # ENV['CODECLIMATE_REPO_TOKEN'] = "0dc78d33107a7f11f257c0218ac1a37e0073005bb9734f2fd61d0f7e803fc151"
17
- # require "codeclimate-test-reporter"
18
- # CodeClimate::TestReporter.start
19
-
20
15
  require 'simplecov'
21
16
  require 'coveralls'
22
17
  Coveralls.wear!
23
18
 
24
- SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
19
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new [
25
20
  SimpleCov::Formatter::HTMLFormatter,
26
21
  Coveralls::SimpleCov::Formatter
27
22
  ]
@@ -0,0 +1,83 @@
1
+ require_relative 'spec_helper'
2
+
3
+ describe 'Yasuri' do
4
+ include_context 'httpserver'
5
+
6
+ before do
7
+ @agent = Mechanize.new
8
+ @index_page = @agent.get(uri)
9
+
10
+ @res_dir = File.expand_path('../cli_resources', __FILE__)
11
+ end
12
+
13
+ describe 'cli scrape' do
14
+ it "require --file or --json option" do
15
+ expect {
16
+ Yasuri::CLI.new.invoke(:scrape, [uri], {})
17
+ }.to output("ERROR: Only one of `--file` or `--json` option should be specified.\n").to_stderr
18
+ end
19
+
20
+ it "only one of --file or --json option" do
21
+ expect {
22
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "path.json", json: '{"text_title": "/html/head/title"}'})
23
+ }.to output("ERROR: Only one of `--file` or `--json` option should be specified.\n").to_stderr
24
+ end
25
+
26
+ it "require --file option is not empty string" do
27
+ expect {
28
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "file"})
29
+ }.to output("ERROR: --file option require not empty argument.\n").to_stderr
30
+ end
31
+
32
+ it "require --json option is not empty string" do
33
+ expect {
34
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: "json"})
35
+ }.to output("ERROR: --json option require not empty argument.\n").to_stderr
36
+ end
37
+
38
+
39
+ it "display text node as simple string" do
40
+ expect {
41
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: '{"text_title": "/html/head/title"}'})
42
+ }.to output("Yasuri Test\n").to_stdout
43
+ end
44
+
45
+ it "display texts in single json" do
46
+ expect {
47
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: '{"text_c1":"/html/body/p[1]", "text_c2":"/html/body/p[2]"}'})
48
+ }.to output('{"c1":"Hello,Yasuri","c2":"Last Modify - 2015/02/14"}'+"\n").to_stdout
49
+ end
50
+
51
+
52
+ it "display text node as simple string via json file" do
53
+ expect {
54
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "#{@res_dir}/tree.json"})
55
+ }.to output('[{"content":"Hello,YasuriLast Modify - 2015/02/14"}]' + "\n").to_stdout
56
+ end
57
+ it "display text node as simple string via yaml file" do
58
+ expect {
59
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: "#{@res_dir}/tree.yml"})
60
+ }.to output('[{"content":"Hello,YasuriLast Modify - 2015/02/14"}]' + "\n").to_stdout
61
+ end
62
+
63
+
64
+ it "display ERROR when json string is wrong" do
65
+ wrong_json = '{,,}'
66
+ expect {
67
+ Yasuri::CLI.new.invoke(:scrape, [uri], {json: wrong_json})
68
+ }.to output("ERROR: Failed to convert json to yasuri tree. 809: unexpected token at '#{wrong_json}'\n").to_stderr
69
+ end
70
+ it "display ERROR when json file contains is wrong" do
71
+ file_path = "#{@res_dir}/tree_wrong.json"
72
+ expect {
73
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: file_path})
74
+ }.to output("ERROR: Failed to convert to yasuri tree `#{file_path}`. (<unknown>): did not find expected node content while parsing a flow node at line 2 column 3\n").to_stderr
75
+ end
76
+ it "display ERROR when yaml file contains is wrong" do
77
+ file_path = "#{@res_dir}/tree_wrong.yml"
78
+ expect {
79
+ Yasuri::CLI.new.invoke(:scrape, [uri], {file: file_path})
80
+ }.to output("ERROR: Failed to convert to yasuri tree `#{file_path}`. (<unknown>): did not find expected node content while parsing a block node at line 1 column 1\n").to_stderr
81
+ end
82
+ end
83
+ end
@@ -59,10 +59,18 @@ describe 'Yasuri' do
59
59
  ]
60
60
  expect(actual).to match expected
61
61
  end
62
- it 'can be defined by DSL, return single LinkNode title' do
63
- generated = Yasuri.links_title '/html/body/a'
64
- original = Yasuri::LinksNode.new('/html/body/a', "title")
65
- compare_generated_vs_original(generated, original, @index_page)
62
+ it 'can be defined by DSL, return no contains if no child node' do
63
+ root_node = Yasuri.links_title '/html/body/a'
64
+ actual = root_node.inject(@agent, @index_page)
65
+ expected = [{}, {}, {}] # Empty if no child node under links node.
66
+ expect(actual).to match expected
67
+ end
68
+
69
+ it 'can be defined return no contains if no child node' do
70
+ root_node = Yasuri::LinksNode.new('/html/body/a', "title")
71
+ actual = root_node.inject(@agent, @index_page)
72
+ expected = [{}, {}, {}] # Empty if no child node under links node.
73
+ expect(actual).to match expected
66
74
  end
67
75
  it 'can be defined by DSL, return nested contents under link' do
68
76
  generated = Yasuri.links_title '/html/body/a' do
@@ -0,0 +1,76 @@
1
+ require_relative 'spec_helper'
2
+
3
+ describe 'Yasuri' do
4
+ include_context 'httpserver'
5
+
6
+ before do
7
+ @agent = Mechanize.new
8
+ @index_page = @agent.get(uri)
9
+ end
10
+
11
+ describe '::MapNode' do
12
+ it "multi scrape in singe page" do
13
+ map = Yasuri.map_sample do
14
+ text_title '/html/head/title'
15
+ text_body_p '/html/body/p[1]'
16
+ end
17
+ actual = map.inject(@agent, @index_page)
18
+
19
+ expected = {
20
+ "title" => "Yasuri Test",
21
+ "body_p" => "Hello,Yasuri"
22
+ }
23
+ expect(actual).to include expected
24
+ end
25
+
26
+ it "nested multi scrape in singe page" do
27
+ map = Yasuri.map_sample do
28
+ map_group1 { text_child01 '/html/body/a[1]' }
29
+ map_group2 do
30
+ text_child01 '/html/body/a[1]'
31
+ text_child03 '/html/body/a[3]'
32
+ end
33
+ end
34
+ actual = map.inject(@agent, @index_page)
35
+
36
+ expected = {
37
+ "group1" => {
38
+ "child01" => "child01"
39
+ },
40
+ "group2" => {
41
+ "child01" => "child01",
42
+ "child03" => "child03"
43
+ }
44
+ }
45
+ expect(actual).to include expected
46
+ end
47
+
48
+ it "scrape with links node" do
49
+ map = Yasuri.map_sample do
50
+ map_group1 do
51
+ links_a '/html/body/a' do
52
+ text_content '/html/body/p'
53
+ end
54
+ text_child01 '/html/body/a[1]'
55
+ end
56
+ map_group2 do
57
+ text_child03 '/html/body/a[3]'
58
+ end
59
+ end
60
+ actual = map.inject(@agent, @index_page)
61
+
62
+ expected = {
63
+ "group1" => {
64
+ "a" => [
65
+ {"content" => "Child 01 page."},
66
+ {"content" => "Child 02 page."},
67
+ {"content" => "Child 03 page."},
68
+ ],
69
+ "child01" => "child01"
70
+ },
71
+ "group2" => { "child03" => "child03" }
72
+ }
73
+ expect(actual).to include expected
74
+ end
75
+ end
76
+ end
@@ -30,6 +30,49 @@ describe 'Yasuri' do
30
30
  expect(actual).to match expected
31
31
  end
32
32
 
33
+ it "scrape each paginated pages with flatten" do
34
+ root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
35
+ Yasuri::TextNode.new('/html/body/p', "content"),
36
+ Yasuri::StructNode.new('/html/body/nav/span', "span", [
37
+ Yasuri::TextNode.new('./a', "text"),
38
+ ]),
39
+ ], flatten: true)
40
+ actual = root_node.inject(@agent, @page)
41
+ expected = [
42
+ "PaginationTest01",
43
+ {"text"=>""},
44
+ {"text"=>""},
45
+ {"text" => "2"},
46
+ {"text" => "3"},
47
+ {"text" => "4"},
48
+ {"text"=>"NextPage »"},
49
+ "PaginationTest02",
50
+ {"text"=>"« PreviousPage"},
51
+ {"text" => "1"},
52
+ {"text"=>""},
53
+ {"text" => "3"},
54
+ {"text" => "4"},
55
+ {"text"=>"NextPage »"},
56
+ "PaginationTest03",
57
+ {"text"=>"« PreviousPage"},
58
+ {"text" => "1"},
59
+ {"text" => "2"},
60
+ {"text"=>""},
61
+ {"text" => "4"},
62
+ {"text"=>"NextPage »"},
63
+ "PaginationTest04",
64
+ {"text"=>"« PreviousPage"},
65
+ {"text" => "1"},
66
+ {"text" => "2"},
67
+ {"text" => "3"},
68
+ {"text"=>""},
69
+ {"text"=>""},
70
+ ]
71
+
72
+ expect(actual).to match expected
73
+ end
74
+
75
+
33
76
  it "scrape each paginated pages limited" do
34
77
  root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
35
78
  Yasuri::TextNode.new('/html/body/p', "content"),
data/spec/yasuri_spec.rb CHANGED
@@ -13,6 +13,75 @@ describe 'Yasuri' do
13
13
  @index_page = @agent.get(@uri)
14
14
  end
15
15
 
16
+
17
+ ############
18
+ # yam2tree #
19
+ ############
20
+ describe '.yaml2tree' do
21
+ it "fail if empty yaml" do
22
+ expect { Yasuri.yaml2tree(nil) }.to raise_error(RuntimeError)
23
+ end
24
+
25
+ it "return text node" do
26
+ src = <<-EOB
27
+ text_content: "/html/body/p[1]"
28
+ EOB
29
+ generated = Yasuri.yaml2tree(src)
30
+ original = Yasuri::TextNode.new('/html/body/p[1]', "content")
31
+
32
+ compare_generated_vs_original(generated, original, @index_page)
33
+ end
34
+
35
+ it "return text node as symbol" do
36
+ src = <<-EOB
37
+ :text_content:
38
+ :path: "/html/body/p[1]"
39
+ EOB
40
+ generated = Yasuri.yaml2tree(src)
41
+ original = Yasuri::TextNode.new('/html/body/p[1]', "content")
42
+
43
+ compare_generated_vs_original(generated, original, @index_page)
44
+ end
45
+
46
+ it "return LinksNode/TextNode" do
47
+
48
+ src = <<-EOB
49
+ links_root:
50
+ path: "/html/body/a"
51
+ text_content: "/html/body/p"
52
+ EOB
53
+ generated = Yasuri.yaml2tree(src)
54
+ original = Yasuri::LinksNode.new('/html/body/a', "root", [
55
+ Yasuri::TextNode.new('/html/body/p', "content"),
56
+ ])
57
+
58
+ compare_generated_vs_original(generated, original, @index_page)
59
+ end
60
+
61
+ it "return StructNode/StructNode/[TextNode,TextNode]" do
62
+ src = <<-EOB
63
+ struct_tables:
64
+ path: "/html/body/table"
65
+ struct_table:
66
+ path: "./tr"
67
+ text_title: "./td[1]"
68
+ text_pub_date: "./td[2]"
69
+ EOB
70
+
71
+ generated = Yasuri.yaml2tree(src)
72
+ original = Yasuri::StructNode.new('/html/body/table', "tables", [
73
+ Yasuri::StructNode.new('./tr', "table", [
74
+ Yasuri::TextNode.new('./td[1]', "title"),
75
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
76
+ ])
77
+ ])
78
+ page = @agent.get(@uri + "/struct/structual_text.html")
79
+ compare_generated_vs_original(generated, original, page)
80
+ end
81
+
82
+ end # end of describe '.yaml2tree'
83
+
84
+
16
85
  #############
17
86
  # json2tree #
18
87
  #############
@@ -22,10 +91,10 @@ describe 'Yasuri' do
22
91
  end
23
92
 
24
93
  it "return TextNode" do
25
- src = %q| { "node" : "text",
26
- "name" : "content",
27
- "path" : "/html/body/p[1]"
28
- }|
94
+ src = %q|
95
+ {
96
+ "text_content": "/html/body/p[1]"
97
+ }|
29
98
  generated = Yasuri.json2tree(src)
30
99
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
31
100
 
@@ -33,26 +102,41 @@ describe 'Yasuri' do
33
102
  end
34
103
 
35
104
  it "return TextNode with truncate_regexp" do
36
- src = %q| { "node" : "text",
37
- "name" : "content",
38
- "path" : "/html/body/p[1]",
39
- "truncate" : "^[^,]+"
40
- }|
105
+ src = %q|
106
+ {
107
+ "text_content": {
108
+ "path": "/html/body/p[1]",
109
+ "truncate" : "^[^,]+"
110
+ }
111
+ }|
41
112
  generated = Yasuri.json2tree(src)
42
113
  original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
43
114
  compare_generated_vs_original(generated, original, @index_page)
44
115
  end
45
116
 
117
+ it "return MapNode with TextNodes" do
118
+ src = %q|
119
+ {
120
+ "text_content01": "/html/body/p[1]",
121
+ "text_content02": "/html/body/p[2]"
122
+ }|
123
+ generated = Yasuri.json2tree(src)
124
+ original = Yasuri::MapNode.new('parent', [
125
+ Yasuri::TextNode.new('/html/body/p[1]', "content01"),
126
+ Yasuri::TextNode.new('/html/body/p[2]', "content02"),
127
+ ])
128
+ compare_generated_vs_original(generated, original, @index_page)
129
+ end
46
130
 
47
131
  it "return LinksNode/TextNode" do
48
- src = %q| { "node" : "links",
49
- "name" : "root",
50
- "path" : "/html/body/a",
51
- "children" : [ { "node" : "text",
52
- "name" : "content",
53
- "path" : "/html/body/p"
54
- } ]
55
- }|
132
+ src = %q|
133
+ {
134
+ "links_root": {
135
+ "path": "/html/body/a",
136
+ "text_content": "/html/body/p"
137
+ }
138
+ }|
139
+
56
140
  generated = Yasuri.json2tree(src)
57
141
  original = Yasuri::LinksNode.new('/html/body/a', "root", [
58
142
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -62,14 +146,13 @@ describe 'Yasuri' do
62
146
  end
63
147
 
64
148
  it "return PaginateNode/TextNode" do
65
- src = %q|{ "node" : "pages",
66
- "name" : "root",
67
- "path" : "/html/body/nav/span/a[@class=\'next\']",
68
- "children" : [ { "node" : "text",
69
- "name" : "content",
70
- "path" : "/html/body/p"
71
- } ]
72
- }|
149
+ src = %q|
150
+ {
151
+ "pages_root": {
152
+ "path": "/html/body/nav/span/a[@class=\'next\']",
153
+ "text_content": "/html/body/p"
154
+ }
155
+ }|
73
156
  generated = Yasuri.json2tree(src)
74
157
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
75
158
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -81,15 +164,14 @@ describe 'Yasuri' do
81
164
  end
82
165
 
83
166
  it "return PaginateNode/TextNode with limit" do
84
- src = %q|{ "node" : "pages",
85
- "name" : "root",
86
- "path" : "/html/body/nav/span/a[@class=\'next\']",
87
- "limit" : 2,
88
- "children" : [ { "node" : "text",
89
- "name" : "content",
90
- "path" : "/html/body/p"
91
- } ]
92
- }|
167
+ src = %q|
168
+ {
169
+ "pages_root": {
170
+ "path": "/html/body/nav/span/a[@class=\'next\']",
171
+ "limit": 2,
172
+ "text_content": "/html/body/p"
173
+ }
174
+ }|
93
175
  generated = Yasuri.json2tree(src)
94
176
  original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
95
177
  Yasuri::TextNode.new('/html/body/p', "content"),
@@ -101,24 +183,17 @@ describe 'Yasuri' do
101
183
  end
102
184
 
103
185
  it "return StructNode/StructNode/[TextNode,TextNode]" do
104
- src = %q| { "node" : "struct",
105
- "name" : "tables",
106
- "path" : "/html/body/table",
107
- "children" : [
108
- { "node" : "struct",
109
- "name" : "table",
110
- "path" : "./tr",
111
- "children" : [
112
- { "node" : "text",
113
- "name" : "title",
114
- "path" : "./td[1]"
115
- },
116
- { "node" : "text",
117
- "name" : "pub_date",
118
- "path" : "./td[2]"
119
- }]
120
- }]
121
- }|
186
+ src = %q|
187
+ {
188
+ "struct_tables": {
189
+ "path": "/html/body/table",
190
+ "struct_table": {
191
+ "path": "./tr",
192
+ "text_title": "./td[1]",
193
+ "text_pub_date": "./td[2]"
194
+ }
195
+ }
196
+ }|
122
197
  generated = Yasuri.json2tree(src)
123
198
  original = Yasuri::StructNode.new('/html/body/table', "tables", [
124
199
  Yasuri::StructNode.new('./tr', "table", [
@@ -126,27 +201,27 @@ describe 'Yasuri' do
126
201
  Yasuri::TextNode.new('./td[2]', "pub_date"),
127
202
  ])
128
203
  ])
129
- page = @agent.get(@uri + "/structual_text.html")
204
+ page = @agent.get(@uri + "/struct/structual_text.html")
130
205
  compare_generated_vs_original(generated, original, page)
131
206
  end
132
207
  end
133
208
 
209
+
134
210
  #############
135
211
  # tree2json #
136
212
  #############
137
213
  describe '.tree2json' do
138
214
  it "return empty json" do
139
- json = Yasuri.tree2json(nil)
140
- expect(json).to match "{}"
215
+ expect { Yasuri.tree2json(nil) }.to raise_error(RuntimeError)
141
216
  end
142
217
 
143
218
  it "return text node" do
144
219
  node = Yasuri::TextNode.new("/html/head/title", "title")
145
220
  json = Yasuri.tree2json(node)
146
- expected_str = %q| { "node": "text",
147
- "name": "title",
148
- "path": "/html/head/title"
149
- } |
221
+ expected_str = %q|
222
+ {
223
+ "text_title": "/html/head/title"
224
+ }|
150
225
  expected = JSON.parse(expected_str)
151
226
  actual = JSON.parse(json)
152
227
  expect(actual).to match expected
@@ -155,29 +230,49 @@ describe 'Yasuri' do
155
230
  it "return text node with truncate_regexp" do
156
231
  node = Yasuri::TextNode.new("/html/head/title", "title", truncate:/^[^,]+/)
157
232
  json = Yasuri.tree2json(node)
158
- expected_str = %q| { "node": "text",
159
- "name": "title",
160
- "path": "/html/head/title",
161
- "truncate": "^[^,]+"
162
- } |
233
+ expected_str = %q|
234
+ {
235
+ "text_title": {
236
+ "path": "/html/head/title",
237
+ "truncate": "^[^,]+"
238
+ }
239
+ }|
163
240
  expected = Yasuri.tree2json(Yasuri.json2tree(expected_str))
164
241
  actual = Yasuri.tree2json(Yasuri.json2tree(json))
165
242
  expect(actual).to match expected
166
243
  end
167
244
 
245
+ it "return map node with text nodes" do
246
+ tree = Yasuri::MapNode.new('parent', [
247
+ Yasuri::TextNode.new('/html/body/p[1]', "content01"),
248
+ Yasuri::TextNode.new('/html/body/p[2]', "content02"),
249
+ ])
250
+ actual_json = Yasuri.tree2json(tree)
251
+
252
+ expected_json = %q|
253
+ {
254
+ "text_content01": "/html/body/p[1]",
255
+ "text_content02": "/html/body/p[2]"
256
+ }|
257
+
258
+ expected = Yasuri.tree2json(Yasuri.json2tree(expected_json))
259
+ actual = Yasuri.tree2json(Yasuri.json2tree(actual_json))
260
+ expect(actual).to match expected
261
+ end
262
+
168
263
  it "return LinksNode/TextNode" do
169
264
  tree = Yasuri::LinksNode.new('/html/body/a', "root", [
170
265
  Yasuri::TextNode.new('/html/body/p', "content"),
171
266
  ])
172
267
  json = Yasuri.tree2json(tree)
173
- expected_src = %q| { "node" : "links",
174
- "name" : "root",
175
- "path" : "/html/body/a",
176
- "children" : [ { "node" : "text",
177
- "name" : "content",
178
- "path" : "/html/body/p"
179
- } ]
180
- }|
268
+
269
+ expected_src = %q|
270
+ {
271
+ "links_root": {
272
+ "path": "/html/body/a",
273
+ "text_content":"/html/body/p"
274
+ }
275
+ }|
181
276
  expected = JSON.parse(expected_src)
182
277
  actual = JSON.parse(json)
183
278
  expect(actual).to match expected
@@ -189,24 +284,44 @@ describe 'Yasuri' do
189
284
  ], limit:10)
190
285
 
191
286
  json = Yasuri.tree2json(tree)
192
- expected_src = %q| { "node" : "pages",
193
- "name" : "root",
194
- "path" : "/html/body/nav/span/a[@class='next']",
195
- "limit" : 10,
196
- "children" : [ { "node" : "text",
197
- "name" : "content",
198
- "path" : "/html/body/p"
199
- } ]
200
- }|
287
+ expected_src = %q|
288
+ {
289
+ "pages_root": {
290
+ "path": "/html/body/nav/span/a[@class='next']",
291
+ "limit": 10,
292
+ "flatten": false,
293
+ "text_content": "/html/body/p"
294
+ }
295
+ }|
201
296
  expected = JSON.parse(expected_src)
202
297
  actual = JSON.parse(json)
203
298
  expect(actual).to match expected
204
299
  end
205
-
206
-
207
-
208
300
  end
209
301
 
302
+ it "return StructNode/StructNode/[TextNode,TextNode]" do
303
+ tree = Yasuri::StructNode.new('/html/body/table', "tables", [
304
+ Yasuri::StructNode.new('./tr', "table", [
305
+ Yasuri::TextNode.new('./td[1]', "title"),
306
+ Yasuri::TextNode.new('./td[2]', "pub_date"),
307
+ ])
308
+ ])
309
+ json = Yasuri.tree2json(tree)
310
+ expected_src = %q|
311
+ {
312
+ "struct_tables": {
313
+ "path": "/html/body/table",
314
+ "struct_table": {
315
+ "path": "./tr",
316
+ "text_title": "./td[1]",
317
+ "text_pub_date": "./td[2]"
318
+ }
319
+ }
320
+ }|
321
+ expected = JSON.parse(expected_src)
322
+ actual = JSON.parse(json)
323
+ expect(actual).to match expected
324
+ end
210
325
 
211
326
  it 'has a version number' do
212
327
  expect(Yasuri::VERSION).not_to be nil