yasuri 3.3.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,20 +3,16 @@ require_relative 'spec_helper'
3
3
  describe 'Yasuri' do
4
4
  include_context 'httpserver'
5
5
 
6
- before do
7
- @uri = uri
8
- end
9
-
10
6
  describe '::MapNode' do
11
7
  it "multi scrape in singe page" do
12
8
  map = Yasuri.map_sample do
13
9
  text_title '/html/head/title'
14
10
  text_body_p '/html/body/p[1]'
15
11
  end
16
- actual = map.scrape(@uri)
12
+ actual = map.scrape(uri)
17
13
 
18
14
  expected = {
19
- "title" => "Yasuri Test",
15
+ "title" => "Yasuri Test",
20
16
  "body_p" => "Hello,Yasuri"
21
17
  }
22
18
  expect(actual).to include expected
@@ -24,13 +20,13 @@ describe 'Yasuri' do
24
20
 
25
21
  it "nested multi scrape in singe page" do
26
22
  map = Yasuri.map_sample do
27
- map_group1 { text_child01 '/html/body/a[1]' }
23
+ map_group1 { text_child01 '/html/body/a[1]' }
28
24
  map_group2 do
29
25
  text_child01 '/html/body/a[1]'
30
26
  text_child03 '/html/body/a[3]'
31
27
  end
32
28
  end
33
- actual = map.scrape(@uri)
29
+ actual = map.scrape(uri)
34
30
 
35
31
  expected = {
36
32
  "group1" => {
@@ -50,20 +46,20 @@ describe 'Yasuri' do
50
46
  links_a '/html/body/a' do
51
47
  text_content '/html/body/p'
52
48
  end
53
- text_child01 '/html/body/a[1]'
49
+ text_child01 '/html/body/a[1]'
54
50
  end
55
51
  map_group2 do
56
52
  text_child03 '/html/body/a[3]'
57
53
  end
58
54
  end
59
- actual = map.scrape(@uri)
55
+ actual = map.scrape(uri)
60
56
 
61
57
  expected = {
62
58
  "group1" => {
63
59
  "a" => [
64
- {"content" => "Child 01 page."},
65
- {"content" => "Child 02 page."},
66
- {"content" => "Child 03 page."},
60
+ { "content" => "Child 01 page." },
61
+ { "content" => "Child 02 page." },
62
+ { "content" => "Child 03 page." }
67
63
  ],
68
64
  "child01" => "child01"
69
65
  },
@@ -72,4 +68,4 @@ describe 'Yasuri' do
72
68
  expect(actual).to include expected
73
69
  end
74
70
  end
75
- end
71
+ end
@@ -1,105 +1,96 @@
1
1
 
2
- # Author:: TAC (tac@tac42.net)
3
-
4
2
  require_relative 'spec_helper'
5
3
 
6
- ############
7
- # Paginate #
8
- ############
9
4
  describe 'Yasuri' do
10
5
  include_context 'httpserver'
11
6
 
12
7
  describe '::PaginateNode' do
13
- before do
14
- @uri = uri + "/pagination/page01.html"
15
- end
8
+ let(:uri_paginate) { "#{uri}/pagination/page01.html" }
16
9
 
17
10
  it "scrape each paginated pages" do
18
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
19
- Yasuri::TextNode.new('/html/body/p', "content"),
20
- ])
21
- actual = root_node.scrape(@uri)
11
+ root_node = Yasuri::PaginateNode.new(
12
+ "/html/body/nav/span/a[@class='next']", "root", [
13
+ Yasuri::TextNode.new('/html/body/p', "content")
14
+ ]
15
+ )
16
+ actual = root_node.scrape(uri_paginate)
22
17
  expected = [
23
- {"content" => "PaginationTest01"},
24
- {"content" => "PaginationTest02"},
25
- {"content" => "PaginationTest03"},
26
- {"content" => "PaginationTest04"},
18
+ { "content" => "PaginationTest01" },
19
+ { "content" => "PaginationTest02" },
20
+ { "content" => "PaginationTest03" },
21
+ { "content" => "PaginationTest04" }
27
22
  ]
28
23
  expect(actual).to match expected
29
24
  end
30
25
 
31
26
  it "scrape each paginated pages with flatten" do
32
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
33
- Yasuri::TextNode.new('/html/body/p', "content"),
34
- Yasuri::StructNode.new('/html/body/nav/span', "span", [
35
- Yasuri::TextNode.new('./a', "text"),
36
- ]),
37
- ], flatten: true)
38
- actual = root_node.scrape(@uri)
27
+ root_node = Yasuri::PaginateNode.new(
28
+ "/html/body/nav/span/a[@class='next']", "root", [
29
+ Yasuri::TextNode.new('/html/body/p', "content"),
30
+ Yasuri::StructNode.new(
31
+ '/html/body/nav/span', "span", [
32
+ Yasuri::TextNode.new('./a', "text")
33
+ ]
34
+ )
35
+ ], flatten: true
36
+ )
37
+ actual = root_node.scrape(uri_paginate)
39
38
  expected = [
40
- "PaginationTest01",
41
- {"text"=>""},
42
- {"text"=>""},
43
- {"text" => "2"},
44
- {"text" => "3"},
45
- {"text" => "4"},
46
- {"text"=>"NextPage »"},
47
- "PaginationTest02",
48
- {"text"=>"« PreviousPage"},
49
- {"text" => "1"},
50
- {"text"=>""},
51
- {"text" => "3"},
52
- {"text" => "4"},
53
- {"text"=>"NextPage »"},
54
- "PaginationTest03",
55
- {"text"=>"« PreviousPage"},
56
- {"text" => "1"},
57
- {"text" => "2"},
58
- {"text"=>""},
59
- {"text" => "4"},
60
- {"text"=>"NextPage »"},
61
- "PaginationTest04",
62
- {"text"=>"« PreviousPage"},
63
- {"text" => "1"},
64
- {"text" => "2"},
65
- {"text" => "3"},
66
- {"text"=>""},
67
- {"text"=>""},
39
+ "PaginationTest01", { "text" => "" },
40
+ { "text" => "" }, { "text" => "2" }, { "text" => "3" }, { "text" => "4" },
41
+ { "text" => "NextPage »" },
42
+
43
+ "PaginationTest02", { "text" => "« PreviousPage" },
44
+ { "text" => "1" }, { "text" => "" }, { "text" => "3" }, { "text" => "4" },
45
+ { "text" => "NextPage »" },
46
+
47
+ "PaginationTest03", { "text" => "« PreviousPage" },
48
+ { "text" => "1" }, { "text" => "2" }, { "text" => "" }, { "text" => "4" },
49
+ { "text" => "NextPage »" },
50
+
51
+ "PaginationTest04", { "text" => "« PreviousPage" },
52
+ { "text" => "1" }, { "text" => "2" }, { "text" => "3" }, { "text" => "" },
53
+ { "text" => "" }
68
54
  ]
69
55
 
70
56
  expect(actual).to match expected
71
57
  end
72
58
 
73
-
74
59
  it "scrape each paginated pages limited" do
75
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
76
- Yasuri::TextNode.new('/html/body/p', "content"),
77
- ], limit:3)
78
- actual = root_node.scrape(@uri)
60
+ root_node = Yasuri::PaginateNode.new(
61
+ "/html/body/nav/span/a[@class='next']", "root", [
62
+ Yasuri::TextNode.new('/html/body/p', "content")
63
+ ], limit: 3
64
+ )
65
+ actual = root_node.scrape(uri_paginate)
79
66
  expected = [
80
- {"content" => "PaginationTest01"},
81
- {"content" => "PaginationTest02"},
82
- {"content" => "PaginationTest03"},
67
+ { "content" => "PaginationTest01" },
68
+ { "content" => "PaginationTest02" },
69
+ { "content" => "PaginationTest03" }
83
70
  ]
84
71
  expect(actual).to match expected
85
72
  end
86
73
 
87
74
  it 'return first content if paginate link node is not found' do
88
75
  missing_xpath = "/html/body/nav/span/b[@class='next']"
89
- root_node = Yasuri::PaginateNode.new(missing_xpath, "root", [
90
- Yasuri::TextNode.new('/html/body/p', "content"),
91
- ])
92
- actual = root_node.scrape(@uri)
93
- expected = [ {"content" => "PaginationTest01"}, ]
76
+ root_node = Yasuri::PaginateNode.new(
77
+ missing_xpath, "root", [
78
+ Yasuri::TextNode.new('/html/body/p', "content")
79
+ ]
80
+ )
81
+ actual = root_node.scrape(uri_paginate)
82
+ expected = [{ "content" => "PaginationTest01" }]
94
83
  expect(actual).to match_array expected
95
84
  end
96
85
 
97
86
  it 'return empty hashes if content node is not found' do
98
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
99
- Yasuri::TextNode.new('/html/body/hoge', "content"),
100
- ])
101
- actual = root_node.scrape(@uri)
102
- expected = [ {"content" => ""}, {"content" => ""}, {"content" => ""}, {"content" => ""},]
87
+ root_node = Yasuri::PaginateNode.new(
88
+ "/html/body/nav/span/a[@class='next']", "root", [
89
+ Yasuri::TextNode.new('/html/body/hoge', "content")
90
+ ]
91
+ )
92
+ actual = root_node.scrape(uri_paginate)
93
+ expected = [{ "content" => "" }, { "content" => "" }, { "content" => "" }, { "content" => "" }]
103
94
  expect(actual).to match_array expected
104
95
  end
105
96
 
@@ -107,32 +98,38 @@ describe 'Yasuri' do
107
98
  generated = Yasuri.pages_next "/html/body/nav/span/a[@class='next']" do
108
99
  text_content '/html/body/p'
109
100
  end
110
- original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
111
- Yasuri::TextNode.new('/html/body/p', "content"),
112
- ])
113
- compare_generated_vs_original(generated, original, @uri)
101
+ original = Yasuri::PaginateNode.new(
102
+ "/html/body/nav/span/a[@class='next']", "root", [
103
+ Yasuri::TextNode.new('/html/body/p', "content")
104
+ ]
105
+ )
106
+ compare_generated_vs_original(generated, original, uri_paginate)
114
107
  end
115
108
 
116
109
  it 'can be defined by DSL, return single PaginateNode content limited' do
117
- generated = Yasuri.pages_next "/html/body/nav/span/a[@class='next']", limit:2 do
110
+ generated = Yasuri.pages_next "/html/body/nav/span/a[@class='next']", limit: 2 do
118
111
  text_content '/html/body/p'
119
112
  end
120
- original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
121
- Yasuri::TextNode.new('/html/body/p', "content"),
122
- ], limit: 2)
123
- compare_generated_vs_original(generated, original, @uri)
113
+ original = Yasuri::PaginateNode.new(
114
+ "/html/body/nav/span/a[@class='next']", "root", [
115
+ Yasuri::TextNode.new('/html/body/p', "content")
116
+ ], limit: 2
117
+ )
118
+ compare_generated_vs_original(generated, original, uri_paginate)
124
119
  end
125
120
 
126
121
  it "return child node as symbol" do
127
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
128
- Yasuri::TextNode.new('/html/body/p', "content"),
129
- ])
130
- actual = root_node.scrape(@uri, symbolize_names:true)
122
+ root_node = Yasuri::PaginateNode.new(
123
+ "/html/body/nav/span/a[@class='next']", "root", [
124
+ Yasuri::TextNode.new('/html/body/p', "content")
125
+ ]
126
+ )
127
+ actual = root_node.scrape(uri_paginate, symbolize_names: true)
131
128
  expected = [
132
- {:content => "PaginationTest01"},
133
- {:content => "PaginationTest02"},
134
- {:content => "PaginationTest03"},
135
- {:content => "PaginationTest04"},
129
+ { content: "PaginationTest01" },
130
+ { content: "PaginationTest02" },
131
+ { content: "PaginationTest03" },
132
+ { content: "PaginationTest04" }
136
133
  ]
137
134
  expect(actual).to match expected
138
135
  end
@@ -140,10 +137,12 @@ describe 'Yasuri' do
140
137
  it "scrape with interval for each request" do
141
138
  allow(Kernel).to receive(:sleep)
142
139
 
143
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
144
- Yasuri::TextNode.new('/html/body/p', "content"),
145
- ])
146
- actual = root_node.scrape(@uri, interval_ms: 1000)
140
+ root_node = Yasuri::PaginateNode.new(
141
+ "/html/body/nav/span/a[@class='next']", "root", [
142
+ Yasuri::TextNode.new('/html/body/p', "content")
143
+ ]
144
+ )
145
+ actual = root_node.scrape(uri_paginate, interval_ms: 1000)
147
146
  expect(actual.size).to match 4
148
147
 
149
148
  expect(Kernel).to have_received(:sleep).exactly(4).times do |interval_sec|
data/spec/yasuri_spec.rb CHANGED
@@ -1,17 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # Author:: TAC (tac@tac42.net)
4
-
5
1
  require_relative 'spec_helper'
6
2
 
7
3
  describe 'Yasuri' do
8
4
  include_context 'httpserver'
9
5
 
10
- before do
11
- @uri = uri
12
- end
13
-
14
-
15
6
  ############
16
7
  # yam2tree #
17
8
  ############
@@ -27,7 +18,7 @@ describe 'Yasuri' do
27
18
  generated = Yasuri.yaml2tree(src)
28
19
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
29
20
 
30
- compare_generated_vs_original(generated, original, @uri)
21
+ compare_generated_vs_original(generated, original, uri)
31
22
  end
32
23
 
33
24
  it "return text node as symbol" do
@@ -38,7 +29,7 @@ describe 'Yasuri' do
38
29
  generated = Yasuri.yaml2tree(src)
39
30
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
40
31
 
41
- compare_generated_vs_original(generated, original, @uri)
32
+ compare_generated_vs_original(generated, original, uri)
42
33
  end
43
34
 
44
35
  it "return LinksNode/TextNode" do
@@ -53,7 +44,7 @@ describe 'Yasuri' do
53
44
  Yasuri::TextNode.new('/html/body/p', "content"),
54
45
  ])
55
46
 
56
- compare_generated_vs_original(generated, original, @uri)
47
+ compare_generated_vs_original(generated, original, uri)
57
48
  end
58
49
 
59
50
  it "return StructNode/StructNode/[TextNode,TextNode]" do
@@ -73,8 +64,8 @@ describe 'Yasuri' do
73
64
  Yasuri::TextNode.new('./td[2]', "pub_date"),
74
65
  ])
75
66
  ])
76
- uri = @uri + "/struct/structual_text.html"
77
- compare_generated_vs_original(generated, original, uri)
67
+ test_uri = uri + "/struct/structual_text.html"
68
+ compare_generated_vs_original(generated, original, test_uri)
78
69
  end
79
70
 
80
71
  end # end of describe '.yaml2tree'
@@ -96,7 +87,7 @@ describe 'Yasuri' do
96
87
  generated = Yasuri.json2tree(src)
97
88
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
98
89
 
99
- compare_generated_vs_original(generated, original, @uri)
90
+ compare_generated_vs_original(generated, original, uri)
100
91
  end
101
92
 
102
93
  it "return TextNode with truncate_regexp" do
@@ -109,7 +100,7 @@ describe 'Yasuri' do
109
100
  }|
110
101
  generated = Yasuri.json2tree(src)
111
102
  original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
112
- compare_generated_vs_original(generated, original, @uri)
103
+ compare_generated_vs_original(generated, original, uri)
113
104
  end
114
105
 
115
106
  it "return MapNode with TextNodes" do
@@ -123,7 +114,7 @@ describe 'Yasuri' do
123
114
  Yasuri::TextNode.new('/html/body/p[1]', "content01"),
124
115
  Yasuri::TextNode.new('/html/body/p[2]', "content02"),
125
116
  ])
126
- compare_generated_vs_original(generated, original, @uri)
117
+ compare_generated_vs_original(generated, original, uri)
127
118
  end
128
119
 
129
120
  it "return LinksNode/TextNode" do
@@ -140,7 +131,7 @@ describe 'Yasuri' do
140
131
  Yasuri::TextNode.new('/html/body/p', "content"),
141
132
  ])
142
133
 
143
- compare_generated_vs_original(generated, original, @uri)
134
+ compare_generated_vs_original(generated, original, uri)
144
135
  end
145
136
 
146
137
  it "return PaginateNode/TextNode" do
@@ -156,8 +147,8 @@ describe 'Yasuri' do
156
147
  Yasuri::TextNode.new('/html/body/p', "content"),
157
148
  ])
158
149
 
159
- uri = @uri + "/pagination/page01.html"
160
- compare_generated_vs_original(generated, original, uri)
150
+ test_uri = uri + "/pagination/page01.html"
151
+ compare_generated_vs_original(generated, original, test_uri)
161
152
  end
162
153
 
163
154
  it "return PaginateNode/TextNode with limit" do
@@ -174,8 +165,8 @@ describe 'Yasuri' do
174
165
  Yasuri::TextNode.new('/html/body/p', "content"),
175
166
  ], limit:2)
176
167
 
177
- uri = @uri + "/pagination/page01.html"
178
- compare_generated_vs_original(generated, original, uri)
168
+ test_uri = uri + "/pagination/page01.html"
169
+ compare_generated_vs_original(generated, original, test_uri)
179
170
  end
180
171
 
181
172
  it "return StructNode/StructNode/[TextNode,TextNode]" do
@@ -197,8 +188,8 @@ describe 'Yasuri' do
197
188
  Yasuri::TextNode.new('./td[2]', "pub_date"),
198
189
  ])
199
190
  ])
200
- uri = @uri + "/struct/structual_text.html"
201
- compare_generated_vs_original(generated, original, uri)
191
+ test_uri = uri + "/struct/structual_text.html"
192
+ compare_generated_vs_original(generated, original, test_uri)
202
193
  end
203
194
  end
204
195
 
@@ -1,106 +1,121 @@
1
1
 
2
- # Author:: TAC (tac@tac42.net)
3
-
4
2
  require_relative 'spec_helper'
5
3
 
6
- ##########
7
- # Struct #
8
- ##########
9
4
  describe 'Yasuri' do
10
5
  include_context 'httpserver'
11
6
 
12
7
  describe '::StructNode' do
13
- before do
14
- @uri = uri + "/struct/structual_text.html"
15
-
16
- @table_1996 = [
17
- { "title" => "The Perfect Insider",
8
+ let(:uri_struct) { "#{uri}/struct/structual_text.html" }
9
+ let(:table1996) do
10
+ [
11
+ { "title" => "The Perfect Insider",
18
12
  "pub_date" => "1996/4/5" },
19
- { "title" => "Doctors in Isolated Room",
13
+ { "title" => "Doctors in Isolated Room",
20
14
  "pub_date" => "1996/7/5" },
21
- { "title" => "Mathematical Goodbye",
22
- "pub_date" => "1996/9/5" },
15
+ { "title" => "Mathematical Goodbye",
16
+ "pub_date" => "1996/9/5" }
23
17
  ]
24
- @table_1997 = [
25
- { "title" => "Jack the Poetical Private",
18
+ end
19
+ let(:table1997) do
20
+ [
21
+ { "title" => "Jack the Poetical Private",
26
22
  "pub_date" => "1997/1/5" },
27
- { "title" => "Who Inside",
23
+ { "title" => "Who Inside",
28
24
  "pub_date" => "1997/4/5" },
29
- { "title" => "Illusion Acts Like Magic",
30
- "pub_date" => "1997/10/5" },
25
+ { "title" => "Illusion Acts Like Magic",
26
+ "pub_date" => "1997/10/5" }
31
27
  ]
32
- @table_1998 = [
33
- { "title" => "Replaceable Summer",
28
+ end
29
+ let(:table1998) do
30
+ [
31
+ { "title" => "Replaceable Summer",
34
32
  "pub_date" => "1998/1/7" },
35
- { "title" => "Switch Back",
33
+ { "title" => "Switch Back",
36
34
  "pub_date" => "1998/4/5" },
37
- { "title" => "Numerical Models",
35
+ { "title" => "Numerical Models",
38
36
  "pub_date" => "1998/7/5" },
39
- { "title" => "The Perfect Outsider",
40
- "pub_date" => "1998/10/5" },
37
+ { "title" => "The Perfect Outsider",
38
+ "pub_date" => "1998/10/5" }
41
39
  ]
42
- @all_tables = [
43
- {"table" => @table_1996},
44
- {"table" => @table_1997},
45
- {"table" => @table_1998},
40
+ end
41
+
42
+ let(:all_tables) do
43
+ [
44
+ { "table" => table1996 },
45
+ { "table" => table1997 },
46
+ { "table" => table1998 }
46
47
  ]
47
48
  end
48
49
 
49
50
  it 'scrape single table contents' do
50
- node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
51
- Yasuri::TextNode.new('./td[1]', "title"),
52
- Yasuri::TextNode.new('./td[2]', "pub_date"),
53
- ])
54
- expected = @table_1996
55
- actual = node.scrape(@uri)
51
+ node = Yasuri::StructNode.new(
52
+ '/html/body/table[1]/tr', "table", [
53
+ Yasuri::TextNode.new('./td[1]', "title"),
54
+ Yasuri::TextNode.new('./td[2]', "pub_date")
55
+ ]
56
+ )
57
+ expected = table1996
58
+ actual = node.scrape(uri_struct)
56
59
  expect(actual).to match expected
57
60
  end
58
61
 
59
62
  it 'return single result without array' do
60
- node = Yasuri::StructNode.new('/html/body/table[1]/tr[1]', "table_first_tr", [
61
- Yasuri::TextNode.new('./td[1]', "title"),
62
- Yasuri::TextNode.new('./td[2]', "pub_date"),
63
- ])
64
- expected = @table_1996.first
65
- actual = node.scrape(@uri)
63
+ node = Yasuri::StructNode.new(
64
+ '/html/body/table[1]/tr[1]', "table_first_tr", [
65
+ Yasuri::TextNode.new('./td[1]', "title"),
66
+ Yasuri::TextNode.new('./td[2]', "pub_date")
67
+ ]
68
+ )
69
+ expected = table1996.first
70
+ actual = node.scrape(uri_struct)
66
71
  expect(actual).to match expected
67
72
  end
68
73
 
69
74
  it 'return empty text if no match node' do
70
75
  no_match_xpath = '/html/body/table[1]/t'
71
- node = Yasuri::StructNode.new(no_match_xpath, "table", [
72
- Yasuri::TextNode.new('./td[1]', "title")
73
- ])
74
- actual = node.scrape(@uri)
76
+ node = Yasuri::StructNode.new(
77
+ no_match_xpath, "table", [
78
+ Yasuri::TextNode.new('./td[1]', "title")
79
+ ]
80
+ )
81
+ actual = node.scrape(uri_struct)
75
82
  expect(actual).to be_empty
76
83
  end
77
84
 
78
85
  it 'fail with invalid xpath' do
79
86
  invalid_xpath = '/html/body/table[1]/table[1]/tr['
80
- node = Yasuri::StructNode.new(invalid_xpath, "table", [
81
- Yasuri::TextNode.new('./td[1]', "title")
82
- ])
83
- expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
87
+ node = Yasuri::StructNode.new(
88
+ invalid_xpath, "table", [
89
+ Yasuri::TextNode.new('./td[1]', "title")
90
+ ]
91
+ )
92
+ expect { node.scrape(uri_struct) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
84
93
  end
85
94
 
86
95
  it 'fail with invalid xpath in children' do
87
96
  invalid_xpath = './td[1]['
88
- node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
89
- Yasuri::TextNode.new(invalid_xpath, "title"),
90
- Yasuri::TextNode.new('./td[2]', "pub_date"),
91
- ])
92
- expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
97
+ node = Yasuri::StructNode.new(
98
+ '/html/body/table[1]/tr', "table", [
99
+ Yasuri::TextNode.new(invalid_xpath, "title"),
100
+ Yasuri::TextNode.new('./td[2]', "pub_date")
101
+ ]
102
+ )
103
+ expect { node.scrape(uri_struct) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
93
104
  end
94
105
 
95
106
  it 'scrape all tables' do
96
- node = Yasuri::StructNode.new('/html/body/table', "tables", [
97
- Yasuri::StructNode.new('./tr', "table", [
98
- Yasuri::TextNode.new('./td[1]', "title"),
99
- Yasuri::TextNode.new('./td[2]', "pub_date"),
100
- ])
101
- ])
102
- expected = @all_tables
103
- actual = node.scrape(@uri)
107
+ node = Yasuri::StructNode.new(
108
+ '/html/body/table', "tables", [
109
+ Yasuri::StructNode.new(
110
+ './tr', "table", [
111
+ Yasuri::TextNode.new('./td[1]', "title"),
112
+ Yasuri::TextNode.new('./td[2]', "pub_date")
113
+ ]
114
+ )
115
+ ]
116
+ )
117
+ expected = all_tables
118
+ actual = node.scrape(uri_struct)
104
119
  expect(actual).to match expected
105
120
  end
106
121
 
@@ -111,62 +126,71 @@ describe 'Yasuri' do
111
126
  text_pub_date './td[2]'
112
127
  end
113
128
  end
114
- original = Yasuri::StructNode.new('/html/body/table', "tables", [
115
- Yasuri::StructNode.new('./tr', "table", [
116
- Yasuri::TextNode.new('./td[1]', "title"),
117
- Yasuri::TextNode.new('./td[2]', "pub_date"),
118
- ])
119
- ])
120
- compare_generated_vs_original(generated, original, @uri)
129
+ original = Yasuri::StructNode.new(
130
+ '/html/body/table', "tables", [
131
+ Yasuri::StructNode.new(
132
+ './tr', "table", [
133
+ Yasuri::TextNode.new('./td[1]', "title"),
134
+ Yasuri::TextNode.new('./td[2]', "pub_date")
135
+ ]
136
+ )
137
+ ]
138
+ )
139
+ compare_generated_vs_original(generated, original, uri_struct)
121
140
  end
122
141
 
123
142
  it 'return child node as symbol' do
124
- node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
125
- Yasuri::TextNode.new('./td[1]', "title"),
126
- Yasuri::TextNode.new('./td[2]', "pub_date"),
127
- ])
128
- expected = @table_1996.map{|h| h.map{|k,v| [k.to_sym, v] }.to_h }
129
- actual = node.scrape(@uri, symbolize_names:true)
143
+ node = Yasuri::StructNode.new(
144
+ '/html/body/table[1]/tr', "table", [
145
+ Yasuri::TextNode.new('./td[1]', "title"),
146
+ Yasuri::TextNode.new('./td[2]', "pub_date")
147
+ ]
148
+ )
149
+ expected = table1996.map { |h| h.transform_keys(&:to_sym) }
150
+ actual = node.scrape(uri_struct, symbolize_names: true)
130
151
  expect(actual).to match expected
131
152
  end
132
-
133
153
  end
134
154
 
135
155
  describe '::StructNode::Links' do
136
- before do
137
- @uri = uri + "/struct/structual_links.html"
138
- @table = [
156
+ let(:uri_struct) { "#{uri}/struct/structual_links.html" }
157
+ let(:table) do
158
+ [
139
159
  { "title" => "Child01,02",
140
- "child" => [{"p" => "Child 01 page."}, {"p" => "Child 02 page."}] },
160
+ "child" => [{ "p" => "Child 01 page." }, { "p" => "Child 02 page." }] },
141
161
 
142
162
  { "title" => "Child01,02,03",
143
- "child" => [{"p" => "Child 01 page."}, {"p" => "Child 02 page."}, {"p" => "Child 03 page."}]}
163
+ "child" => [{ "p" => "Child 01 page." }, { "p" => "Child 02 page." }, { "p" => "Child 03 page." }] }
144
164
  ]
145
165
  end
146
166
 
147
167
  it 'return child node in links inside struct' do
148
- node = Yasuri::StructNode.new('/html/body/table/tr', "table", [
149
- Yasuri::TextNode.new('./td[1]', "title"),
150
- Yasuri::LinksNode.new('./td/a', "child", [
151
- Yasuri::TextNode.new('/html/body/p', "p"),
152
- ])
153
- ])
154
- expected = @table
155
- actual = node.scrape(@uri)
168
+ node = Yasuri::StructNode.new(
169
+ '/html/body/table/tr', "table", [
170
+ Yasuri::TextNode.new('./td[1]', "title"),
171
+ Yasuri::LinksNode.new(
172
+ './td/a', "child", [
173
+ Yasuri::TextNode.new('/html/body/p', "p")
174
+ ]
175
+ )
176
+ ]
177
+ )
178
+ expected = table
179
+ actual = node.scrape(uri_struct)
156
180
  expect(actual).to match expected
157
181
  end
158
- end # descrive
182
+ end
159
183
 
160
184
  describe '::StructNode::Pages' do
161
- before do
162
- @uri = uri + "/struct/structual_text.html"
163
- end
185
+ let(:uri_struct) { "#{uri}/struct/structual_text.html" }
164
186
 
165
187
  it 'not supported' do
166
- node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
167
- Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "pages", [])
168
- ])
169
- expect{ node.scrape(@uri) }.to raise_error(NotImplementedError, "PagenateNode inside StructNode, Not Supported")
188
+ node = Yasuri::StructNode.new(
189
+ '/html/body/table[1]/tr', "table", [
190
+ Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "pages", [])
191
+ ]
192
+ )
193
+ expect { node.scrape(uri_struct) }.to raise_error(NotImplementedError, "PagenateNode inside StructNode, Not Supported")
170
194
  end
171
195
  end
172
196
  end