yasuri 3.3.0 → 3.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,20 +3,16 @@ require_relative 'spec_helper'
3
3
  describe 'Yasuri' do
4
4
  include_context 'httpserver'
5
5
 
6
- before do
7
- @uri = uri
8
- end
9
-
10
6
  describe '::MapNode' do
11
7
  it "multi scrape in singe page" do
12
8
  map = Yasuri.map_sample do
13
9
  text_title '/html/head/title'
14
10
  text_body_p '/html/body/p[1]'
15
11
  end
16
- actual = map.scrape(@uri)
12
+ actual = map.scrape(uri)
17
13
 
18
14
  expected = {
19
- "title" => "Yasuri Test",
15
+ "title" => "Yasuri Test",
20
16
  "body_p" => "Hello,Yasuri"
21
17
  }
22
18
  expect(actual).to include expected
@@ -24,13 +20,13 @@ describe 'Yasuri' do
24
20
 
25
21
  it "nested multi scrape in singe page" do
26
22
  map = Yasuri.map_sample do
27
- map_group1 { text_child01 '/html/body/a[1]' }
23
+ map_group1 { text_child01 '/html/body/a[1]' }
28
24
  map_group2 do
29
25
  text_child01 '/html/body/a[1]'
30
26
  text_child03 '/html/body/a[3]'
31
27
  end
32
28
  end
33
- actual = map.scrape(@uri)
29
+ actual = map.scrape(uri)
34
30
 
35
31
  expected = {
36
32
  "group1" => {
@@ -50,20 +46,20 @@ describe 'Yasuri' do
50
46
  links_a '/html/body/a' do
51
47
  text_content '/html/body/p'
52
48
  end
53
- text_child01 '/html/body/a[1]'
49
+ text_child01 '/html/body/a[1]'
54
50
  end
55
51
  map_group2 do
56
52
  text_child03 '/html/body/a[3]'
57
53
  end
58
54
  end
59
- actual = map.scrape(@uri)
55
+ actual = map.scrape(uri)
60
56
 
61
57
  expected = {
62
58
  "group1" => {
63
59
  "a" => [
64
- {"content" => "Child 01 page."},
65
- {"content" => "Child 02 page."},
66
- {"content" => "Child 03 page."},
60
+ { "content" => "Child 01 page." },
61
+ { "content" => "Child 02 page." },
62
+ { "content" => "Child 03 page." }
67
63
  ],
68
64
  "child01" => "child01"
69
65
  },
@@ -72,4 +68,4 @@ describe 'Yasuri' do
72
68
  expect(actual).to include expected
73
69
  end
74
70
  end
75
- end
71
+ end
@@ -1,105 +1,96 @@
1
1
 
2
- # Author:: TAC (tac@tac42.net)
3
-
4
2
  require_relative 'spec_helper'
5
3
 
6
- ############
7
- # Paginate #
8
- ############
9
4
  describe 'Yasuri' do
10
5
  include_context 'httpserver'
11
6
 
12
7
  describe '::PaginateNode' do
13
- before do
14
- @uri = uri + "/pagination/page01.html"
15
- end
8
+ let(:uri_paginate) { "#{uri}/pagination/page01.html" }
16
9
 
17
10
  it "scrape each paginated pages" do
18
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
19
- Yasuri::TextNode.new('/html/body/p', "content"),
20
- ])
21
- actual = root_node.scrape(@uri)
11
+ root_node = Yasuri::PaginateNode.new(
12
+ "/html/body/nav/span/a[@class='next']", "root", [
13
+ Yasuri::TextNode.new('/html/body/p', "content")
14
+ ]
15
+ )
16
+ actual = root_node.scrape(uri_paginate)
22
17
  expected = [
23
- {"content" => "PaginationTest01"},
24
- {"content" => "PaginationTest02"},
25
- {"content" => "PaginationTest03"},
26
- {"content" => "PaginationTest04"},
18
+ { "content" => "PaginationTest01" },
19
+ { "content" => "PaginationTest02" },
20
+ { "content" => "PaginationTest03" },
21
+ { "content" => "PaginationTest04" }
27
22
  ]
28
23
  expect(actual).to match expected
29
24
  end
30
25
 
31
26
  it "scrape each paginated pages with flatten" do
32
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
33
- Yasuri::TextNode.new('/html/body/p', "content"),
34
- Yasuri::StructNode.new('/html/body/nav/span', "span", [
35
- Yasuri::TextNode.new('./a', "text"),
36
- ]),
37
- ], flatten: true)
38
- actual = root_node.scrape(@uri)
27
+ root_node = Yasuri::PaginateNode.new(
28
+ "/html/body/nav/span/a[@class='next']", "root", [
29
+ Yasuri::TextNode.new('/html/body/p', "content"),
30
+ Yasuri::StructNode.new(
31
+ '/html/body/nav/span', "span", [
32
+ Yasuri::TextNode.new('./a', "text")
33
+ ]
34
+ )
35
+ ], flatten: true
36
+ )
37
+ actual = root_node.scrape(uri_paginate)
39
38
  expected = [
40
- "PaginationTest01",
41
- {"text"=>""},
42
- {"text"=>""},
43
- {"text" => "2"},
44
- {"text" => "3"},
45
- {"text" => "4"},
46
- {"text"=>"NextPage »"},
47
- "PaginationTest02",
48
- {"text"=>"« PreviousPage"},
49
- {"text" => "1"},
50
- {"text"=>""},
51
- {"text" => "3"},
52
- {"text" => "4"},
53
- {"text"=>"NextPage »"},
54
- "PaginationTest03",
55
- {"text"=>"« PreviousPage"},
56
- {"text" => "1"},
57
- {"text" => "2"},
58
- {"text"=>""},
59
- {"text" => "4"},
60
- {"text"=>"NextPage »"},
61
- "PaginationTest04",
62
- {"text"=>"« PreviousPage"},
63
- {"text" => "1"},
64
- {"text" => "2"},
65
- {"text" => "3"},
66
- {"text"=>""},
67
- {"text"=>""},
39
+ "PaginationTest01", { "text" => "" },
40
+ { "text" => "" }, { "text" => "2" }, { "text" => "3" }, { "text" => "4" },
41
+ { "text" => "NextPage »" },
42
+
43
+ "PaginationTest02", { "text" => "« PreviousPage" },
44
+ { "text" => "1" }, { "text" => "" }, { "text" => "3" }, { "text" => "4" },
45
+ { "text" => "NextPage »" },
46
+
47
+ "PaginationTest03", { "text" => "« PreviousPage" },
48
+ { "text" => "1" }, { "text" => "2" }, { "text" => "" }, { "text" => "4" },
49
+ { "text" => "NextPage »" },
50
+
51
+ "PaginationTest04", { "text" => "« PreviousPage" },
52
+ { "text" => "1" }, { "text" => "2" }, { "text" => "3" }, { "text" => "" },
53
+ { "text" => "" }
68
54
  ]
69
55
 
70
56
  expect(actual).to match expected
71
57
  end
72
58
 
73
-
74
59
  it "scrape each paginated pages limited" do
75
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
76
- Yasuri::TextNode.new('/html/body/p', "content"),
77
- ], limit:3)
78
- actual = root_node.scrape(@uri)
60
+ root_node = Yasuri::PaginateNode.new(
61
+ "/html/body/nav/span/a[@class='next']", "root", [
62
+ Yasuri::TextNode.new('/html/body/p', "content")
63
+ ], limit: 3
64
+ )
65
+ actual = root_node.scrape(uri_paginate)
79
66
  expected = [
80
- {"content" => "PaginationTest01"},
81
- {"content" => "PaginationTest02"},
82
- {"content" => "PaginationTest03"},
67
+ { "content" => "PaginationTest01" },
68
+ { "content" => "PaginationTest02" },
69
+ { "content" => "PaginationTest03" }
83
70
  ]
84
71
  expect(actual).to match expected
85
72
  end
86
73
 
87
74
  it 'return first content if paginate link node is not found' do
88
75
  missing_xpath = "/html/body/nav/span/b[@class='next']"
89
- root_node = Yasuri::PaginateNode.new(missing_xpath, "root", [
90
- Yasuri::TextNode.new('/html/body/p', "content"),
91
- ])
92
- actual = root_node.scrape(@uri)
93
- expected = [ {"content" => "PaginationTest01"}, ]
76
+ root_node = Yasuri::PaginateNode.new(
77
+ missing_xpath, "root", [
78
+ Yasuri::TextNode.new('/html/body/p', "content")
79
+ ]
80
+ )
81
+ actual = root_node.scrape(uri_paginate)
82
+ expected = [{ "content" => "PaginationTest01" }]
94
83
  expect(actual).to match_array expected
95
84
  end
96
85
 
97
86
  it 'return empty hashes if content node is not found' do
98
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
99
- Yasuri::TextNode.new('/html/body/hoge', "content"),
100
- ])
101
- actual = root_node.scrape(@uri)
102
- expected = [ {"content" => ""}, {"content" => ""}, {"content" => ""}, {"content" => ""},]
87
+ root_node = Yasuri::PaginateNode.new(
88
+ "/html/body/nav/span/a[@class='next']", "root", [
89
+ Yasuri::TextNode.new('/html/body/hoge', "content")
90
+ ]
91
+ )
92
+ actual = root_node.scrape(uri_paginate)
93
+ expected = [{ "content" => "" }, { "content" => "" }, { "content" => "" }, { "content" => "" }]
103
94
  expect(actual).to match_array expected
104
95
  end
105
96
 
@@ -107,32 +98,38 @@ describe 'Yasuri' do
107
98
  generated = Yasuri.pages_next "/html/body/nav/span/a[@class='next']" do
108
99
  text_content '/html/body/p'
109
100
  end
110
- original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
111
- Yasuri::TextNode.new('/html/body/p', "content"),
112
- ])
113
- compare_generated_vs_original(generated, original, @uri)
101
+ original = Yasuri::PaginateNode.new(
102
+ "/html/body/nav/span/a[@class='next']", "root", [
103
+ Yasuri::TextNode.new('/html/body/p', "content")
104
+ ]
105
+ )
106
+ compare_generated_vs_original(generated, original, uri_paginate)
114
107
  end
115
108
 
116
109
  it 'can be defined by DSL, return single PaginateNode content limited' do
117
- generated = Yasuri.pages_next "/html/body/nav/span/a[@class='next']", limit:2 do
110
+ generated = Yasuri.pages_next "/html/body/nav/span/a[@class='next']", limit: 2 do
118
111
  text_content '/html/body/p'
119
112
  end
120
- original = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
121
- Yasuri::TextNode.new('/html/body/p', "content"),
122
- ], limit: 2)
123
- compare_generated_vs_original(generated, original, @uri)
113
+ original = Yasuri::PaginateNode.new(
114
+ "/html/body/nav/span/a[@class='next']", "root", [
115
+ Yasuri::TextNode.new('/html/body/p', "content")
116
+ ], limit: 2
117
+ )
118
+ compare_generated_vs_original(generated, original, uri_paginate)
124
119
  end
125
120
 
126
121
  it "return child node as symbol" do
127
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
128
- Yasuri::TextNode.new('/html/body/p', "content"),
129
- ])
130
- actual = root_node.scrape(@uri, symbolize_names:true)
122
+ root_node = Yasuri::PaginateNode.new(
123
+ "/html/body/nav/span/a[@class='next']", "root", [
124
+ Yasuri::TextNode.new('/html/body/p', "content")
125
+ ]
126
+ )
127
+ actual = root_node.scrape(uri_paginate, symbolize_names: true)
131
128
  expected = [
132
- {:content => "PaginationTest01"},
133
- {:content => "PaginationTest02"},
134
- {:content => "PaginationTest03"},
135
- {:content => "PaginationTest04"},
129
+ { content: "PaginationTest01" },
130
+ { content: "PaginationTest02" },
131
+ { content: "PaginationTest03" },
132
+ { content: "PaginationTest04" }
136
133
  ]
137
134
  expect(actual).to match expected
138
135
  end
@@ -140,10 +137,12 @@ describe 'Yasuri' do
140
137
  it "scrape with interval for each request" do
141
138
  allow(Kernel).to receive(:sleep)
142
139
 
143
- root_node = Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "root", [
144
- Yasuri::TextNode.new('/html/body/p', "content"),
145
- ])
146
- actual = root_node.scrape(@uri, interval_ms: 1000)
140
+ root_node = Yasuri::PaginateNode.new(
141
+ "/html/body/nav/span/a[@class='next']", "root", [
142
+ Yasuri::TextNode.new('/html/body/p', "content")
143
+ ]
144
+ )
145
+ actual = root_node.scrape(uri_paginate, interval_ms: 1000)
147
146
  expect(actual.size).to match 4
148
147
 
149
148
  expect(Kernel).to have_received(:sleep).exactly(4).times do |interval_sec|
data/spec/yasuri_spec.rb CHANGED
@@ -1,17 +1,8 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # Author:: TAC (tac@tac42.net)
4
-
5
1
  require_relative 'spec_helper'
6
2
 
7
3
  describe 'Yasuri' do
8
4
  include_context 'httpserver'
9
5
 
10
- before do
11
- @uri = uri
12
- end
13
-
14
-
15
6
  ############
16
7
  # yam2tree #
17
8
  ############
@@ -27,7 +18,7 @@ describe 'Yasuri' do
27
18
  generated = Yasuri.yaml2tree(src)
28
19
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
29
20
 
30
- compare_generated_vs_original(generated, original, @uri)
21
+ compare_generated_vs_original(generated, original, uri)
31
22
  end
32
23
 
33
24
  it "return text node as symbol" do
@@ -38,7 +29,7 @@ describe 'Yasuri' do
38
29
  generated = Yasuri.yaml2tree(src)
39
30
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
40
31
 
41
- compare_generated_vs_original(generated, original, @uri)
32
+ compare_generated_vs_original(generated, original, uri)
42
33
  end
43
34
 
44
35
  it "return LinksNode/TextNode" do
@@ -53,7 +44,7 @@ describe 'Yasuri' do
53
44
  Yasuri::TextNode.new('/html/body/p', "content"),
54
45
  ])
55
46
 
56
- compare_generated_vs_original(generated, original, @uri)
47
+ compare_generated_vs_original(generated, original, uri)
57
48
  end
58
49
 
59
50
  it "return StructNode/StructNode/[TextNode,TextNode]" do
@@ -73,8 +64,8 @@ describe 'Yasuri' do
73
64
  Yasuri::TextNode.new('./td[2]', "pub_date"),
74
65
  ])
75
66
  ])
76
- uri = @uri + "/struct/structual_text.html"
77
- compare_generated_vs_original(generated, original, uri)
67
+ test_uri = uri + "/struct/structual_text.html"
68
+ compare_generated_vs_original(generated, original, test_uri)
78
69
  end
79
70
 
80
71
  end # end of describe '.yaml2tree'
@@ -96,7 +87,7 @@ describe 'Yasuri' do
96
87
  generated = Yasuri.json2tree(src)
97
88
  original = Yasuri::TextNode.new('/html/body/p[1]', "content")
98
89
 
99
- compare_generated_vs_original(generated, original, @uri)
90
+ compare_generated_vs_original(generated, original, uri)
100
91
  end
101
92
 
102
93
  it "return TextNode with truncate_regexp" do
@@ -109,7 +100,7 @@ describe 'Yasuri' do
109
100
  }|
110
101
  generated = Yasuri.json2tree(src)
111
102
  original = Yasuri::TextNode.new('/html/body/p[1]', "content", truncate:/^[^,]+/)
112
- compare_generated_vs_original(generated, original, @uri)
103
+ compare_generated_vs_original(generated, original, uri)
113
104
  end
114
105
 
115
106
  it "return MapNode with TextNodes" do
@@ -123,7 +114,7 @@ describe 'Yasuri' do
123
114
  Yasuri::TextNode.new('/html/body/p[1]', "content01"),
124
115
  Yasuri::TextNode.new('/html/body/p[2]', "content02"),
125
116
  ])
126
- compare_generated_vs_original(generated, original, @uri)
117
+ compare_generated_vs_original(generated, original, uri)
127
118
  end
128
119
 
129
120
  it "return LinksNode/TextNode" do
@@ -140,7 +131,7 @@ describe 'Yasuri' do
140
131
  Yasuri::TextNode.new('/html/body/p', "content"),
141
132
  ])
142
133
 
143
- compare_generated_vs_original(generated, original, @uri)
134
+ compare_generated_vs_original(generated, original, uri)
144
135
  end
145
136
 
146
137
  it "return PaginateNode/TextNode" do
@@ -156,8 +147,8 @@ describe 'Yasuri' do
156
147
  Yasuri::TextNode.new('/html/body/p', "content"),
157
148
  ])
158
149
 
159
- uri = @uri + "/pagination/page01.html"
160
- compare_generated_vs_original(generated, original, uri)
150
+ test_uri = uri + "/pagination/page01.html"
151
+ compare_generated_vs_original(generated, original, test_uri)
161
152
  end
162
153
 
163
154
  it "return PaginateNode/TextNode with limit" do
@@ -174,8 +165,8 @@ describe 'Yasuri' do
174
165
  Yasuri::TextNode.new('/html/body/p', "content"),
175
166
  ], limit:2)
176
167
 
177
- uri = @uri + "/pagination/page01.html"
178
- compare_generated_vs_original(generated, original, uri)
168
+ test_uri = uri + "/pagination/page01.html"
169
+ compare_generated_vs_original(generated, original, test_uri)
179
170
  end
180
171
 
181
172
  it "return StructNode/StructNode/[TextNode,TextNode]" do
@@ -197,8 +188,8 @@ describe 'Yasuri' do
197
188
  Yasuri::TextNode.new('./td[2]', "pub_date"),
198
189
  ])
199
190
  ])
200
- uri = @uri + "/struct/structual_text.html"
201
- compare_generated_vs_original(generated, original, uri)
191
+ test_uri = uri + "/struct/structual_text.html"
192
+ compare_generated_vs_original(generated, original, test_uri)
202
193
  end
203
194
  end
204
195
 
@@ -1,106 +1,121 @@
1
1
 
2
- # Author:: TAC (tac@tac42.net)
3
-
4
2
  require_relative 'spec_helper'
5
3
 
6
- ##########
7
- # Struct #
8
- ##########
9
4
  describe 'Yasuri' do
10
5
  include_context 'httpserver'
11
6
 
12
7
  describe '::StructNode' do
13
- before do
14
- @uri = uri + "/struct/structual_text.html"
15
-
16
- @table_1996 = [
17
- { "title" => "The Perfect Insider",
8
+ let(:uri_struct) { "#{uri}/struct/structual_text.html" }
9
+ let(:table1996) do
10
+ [
11
+ { "title" => "The Perfect Insider",
18
12
  "pub_date" => "1996/4/5" },
19
- { "title" => "Doctors in Isolated Room",
13
+ { "title" => "Doctors in Isolated Room",
20
14
  "pub_date" => "1996/7/5" },
21
- { "title" => "Mathematical Goodbye",
22
- "pub_date" => "1996/9/5" },
15
+ { "title" => "Mathematical Goodbye",
16
+ "pub_date" => "1996/9/5" }
23
17
  ]
24
- @table_1997 = [
25
- { "title" => "Jack the Poetical Private",
18
+ end
19
+ let(:table1997) do
20
+ [
21
+ { "title" => "Jack the Poetical Private",
26
22
  "pub_date" => "1997/1/5" },
27
- { "title" => "Who Inside",
23
+ { "title" => "Who Inside",
28
24
  "pub_date" => "1997/4/5" },
29
- { "title" => "Illusion Acts Like Magic",
30
- "pub_date" => "1997/10/5" },
25
+ { "title" => "Illusion Acts Like Magic",
26
+ "pub_date" => "1997/10/5" }
31
27
  ]
32
- @table_1998 = [
33
- { "title" => "Replaceable Summer",
28
+ end
29
+ let(:table1998) do
30
+ [
31
+ { "title" => "Replaceable Summer",
34
32
  "pub_date" => "1998/1/7" },
35
- { "title" => "Switch Back",
33
+ { "title" => "Switch Back",
36
34
  "pub_date" => "1998/4/5" },
37
- { "title" => "Numerical Models",
35
+ { "title" => "Numerical Models",
38
36
  "pub_date" => "1998/7/5" },
39
- { "title" => "The Perfect Outsider",
40
- "pub_date" => "1998/10/5" },
37
+ { "title" => "The Perfect Outsider",
38
+ "pub_date" => "1998/10/5" }
41
39
  ]
42
- @all_tables = [
43
- {"table" => @table_1996},
44
- {"table" => @table_1997},
45
- {"table" => @table_1998},
40
+ end
41
+
42
+ let(:all_tables) do
43
+ [
44
+ { "table" => table1996 },
45
+ { "table" => table1997 },
46
+ { "table" => table1998 }
46
47
  ]
47
48
  end
48
49
 
49
50
  it 'scrape single table contents' do
50
- node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
51
- Yasuri::TextNode.new('./td[1]', "title"),
52
- Yasuri::TextNode.new('./td[2]', "pub_date"),
53
- ])
54
- expected = @table_1996
55
- actual = node.scrape(@uri)
51
+ node = Yasuri::StructNode.new(
52
+ '/html/body/table[1]/tr', "table", [
53
+ Yasuri::TextNode.new('./td[1]', "title"),
54
+ Yasuri::TextNode.new('./td[2]', "pub_date")
55
+ ]
56
+ )
57
+ expected = table1996
58
+ actual = node.scrape(uri_struct)
56
59
  expect(actual).to match expected
57
60
  end
58
61
 
59
62
  it 'return single result without array' do
60
- node = Yasuri::StructNode.new('/html/body/table[1]/tr[1]', "table_first_tr", [
61
- Yasuri::TextNode.new('./td[1]', "title"),
62
- Yasuri::TextNode.new('./td[2]', "pub_date"),
63
- ])
64
- expected = @table_1996.first
65
- actual = node.scrape(@uri)
63
+ node = Yasuri::StructNode.new(
64
+ '/html/body/table[1]/tr[1]', "table_first_tr", [
65
+ Yasuri::TextNode.new('./td[1]', "title"),
66
+ Yasuri::TextNode.new('./td[2]', "pub_date")
67
+ ]
68
+ )
69
+ expected = table1996.first
70
+ actual = node.scrape(uri_struct)
66
71
  expect(actual).to match expected
67
72
  end
68
73
 
69
74
  it 'return empty text if no match node' do
70
75
  no_match_xpath = '/html/body/table[1]/t'
71
- node = Yasuri::StructNode.new(no_match_xpath, "table", [
72
- Yasuri::TextNode.new('./td[1]', "title")
73
- ])
74
- actual = node.scrape(@uri)
76
+ node = Yasuri::StructNode.new(
77
+ no_match_xpath, "table", [
78
+ Yasuri::TextNode.new('./td[1]', "title")
79
+ ]
80
+ )
81
+ actual = node.scrape(uri_struct)
75
82
  expect(actual).to be_empty
76
83
  end
77
84
 
78
85
  it 'fail with invalid xpath' do
79
86
  invalid_xpath = '/html/body/table[1]/table[1]/tr['
80
- node = Yasuri::StructNode.new(invalid_xpath, "table", [
81
- Yasuri::TextNode.new('./td[1]', "title")
82
- ])
83
- expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
87
+ node = Yasuri::StructNode.new(
88
+ invalid_xpath, "table", [
89
+ Yasuri::TextNode.new('./td[1]', "title")
90
+ ]
91
+ )
92
+ expect { node.scrape(uri_struct) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
84
93
  end
85
94
 
86
95
  it 'fail with invalid xpath in children' do
87
96
  invalid_xpath = './td[1]['
88
- node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
89
- Yasuri::TextNode.new(invalid_xpath, "title"),
90
- Yasuri::TextNode.new('./td[2]', "pub_date"),
91
- ])
92
- expect { node.scrape(@uri) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
97
+ node = Yasuri::StructNode.new(
98
+ '/html/body/table[1]/tr', "table", [
99
+ Yasuri::TextNode.new(invalid_xpath, "title"),
100
+ Yasuri::TextNode.new('./td[2]', "pub_date")
101
+ ]
102
+ )
103
+ expect { node.scrape(uri_struct) }.to raise_error(Nokogiri::XML::XPath::SyntaxError)
93
104
  end
94
105
 
95
106
  it 'scrape all tables' do
96
- node = Yasuri::StructNode.new('/html/body/table', "tables", [
97
- Yasuri::StructNode.new('./tr', "table", [
98
- Yasuri::TextNode.new('./td[1]', "title"),
99
- Yasuri::TextNode.new('./td[2]', "pub_date"),
100
- ])
101
- ])
102
- expected = @all_tables
103
- actual = node.scrape(@uri)
107
+ node = Yasuri::StructNode.new(
108
+ '/html/body/table', "tables", [
109
+ Yasuri::StructNode.new(
110
+ './tr', "table", [
111
+ Yasuri::TextNode.new('./td[1]', "title"),
112
+ Yasuri::TextNode.new('./td[2]', "pub_date")
113
+ ]
114
+ )
115
+ ]
116
+ )
117
+ expected = all_tables
118
+ actual = node.scrape(uri_struct)
104
119
  expect(actual).to match expected
105
120
  end
106
121
 
@@ -111,62 +126,71 @@ describe 'Yasuri' do
111
126
  text_pub_date './td[2]'
112
127
  end
113
128
  end
114
- original = Yasuri::StructNode.new('/html/body/table', "tables", [
115
- Yasuri::StructNode.new('./tr', "table", [
116
- Yasuri::TextNode.new('./td[1]', "title"),
117
- Yasuri::TextNode.new('./td[2]', "pub_date"),
118
- ])
119
- ])
120
- compare_generated_vs_original(generated, original, @uri)
129
+ original = Yasuri::StructNode.new(
130
+ '/html/body/table', "tables", [
131
+ Yasuri::StructNode.new(
132
+ './tr', "table", [
133
+ Yasuri::TextNode.new('./td[1]', "title"),
134
+ Yasuri::TextNode.new('./td[2]', "pub_date")
135
+ ]
136
+ )
137
+ ]
138
+ )
139
+ compare_generated_vs_original(generated, original, uri_struct)
121
140
  end
122
141
 
123
142
  it 'return child node as symbol' do
124
- node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
125
- Yasuri::TextNode.new('./td[1]', "title"),
126
- Yasuri::TextNode.new('./td[2]', "pub_date"),
127
- ])
128
- expected = @table_1996.map{|h| h.map{|k,v| [k.to_sym, v] }.to_h }
129
- actual = node.scrape(@uri, symbolize_names:true)
143
+ node = Yasuri::StructNode.new(
144
+ '/html/body/table[1]/tr', "table", [
145
+ Yasuri::TextNode.new('./td[1]', "title"),
146
+ Yasuri::TextNode.new('./td[2]', "pub_date")
147
+ ]
148
+ )
149
+ expected = table1996.map { |h| h.transform_keys(&:to_sym) }
150
+ actual = node.scrape(uri_struct, symbolize_names: true)
130
151
  expect(actual).to match expected
131
152
  end
132
-
133
153
  end
134
154
 
135
155
  describe '::StructNode::Links' do
136
- before do
137
- @uri = uri + "/struct/structual_links.html"
138
- @table = [
156
+ let(:uri_struct) { "#{uri}/struct/structual_links.html" }
157
+ let(:table) do
158
+ [
139
159
  { "title" => "Child01,02",
140
- "child" => [{"p" => "Child 01 page."}, {"p" => "Child 02 page."}] },
160
+ "child" => [{ "p" => "Child 01 page." }, { "p" => "Child 02 page." }] },
141
161
 
142
162
  { "title" => "Child01,02,03",
143
- "child" => [{"p" => "Child 01 page."}, {"p" => "Child 02 page."}, {"p" => "Child 03 page."}]}
163
+ "child" => [{ "p" => "Child 01 page." }, { "p" => "Child 02 page." }, { "p" => "Child 03 page." }] }
144
164
  ]
145
165
  end
146
166
 
147
167
  it 'return child node in links inside struct' do
148
- node = Yasuri::StructNode.new('/html/body/table/tr', "table", [
149
- Yasuri::TextNode.new('./td[1]', "title"),
150
- Yasuri::LinksNode.new('./td/a', "child", [
151
- Yasuri::TextNode.new('/html/body/p', "p"),
152
- ])
153
- ])
154
- expected = @table
155
- actual = node.scrape(@uri)
168
+ node = Yasuri::StructNode.new(
169
+ '/html/body/table/tr', "table", [
170
+ Yasuri::TextNode.new('./td[1]', "title"),
171
+ Yasuri::LinksNode.new(
172
+ './td/a', "child", [
173
+ Yasuri::TextNode.new('/html/body/p', "p")
174
+ ]
175
+ )
176
+ ]
177
+ )
178
+ expected = table
179
+ actual = node.scrape(uri_struct)
156
180
  expect(actual).to match expected
157
181
  end
158
- end # descrive
182
+ end
159
183
 
160
184
  describe '::StructNode::Pages' do
161
- before do
162
- @uri = uri + "/struct/structual_text.html"
163
- end
185
+ let(:uri_struct) { "#{uri}/struct/structual_text.html" }
164
186
 
165
187
  it 'not supported' do
166
- node = Yasuri::StructNode.new('/html/body/table[1]/tr', "table", [
167
- Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "pages", [])
168
- ])
169
- expect{ node.scrape(@uri) }.to raise_error(NotImplementedError, "PagenateNode inside StructNode, Not Supported")
188
+ node = Yasuri::StructNode.new(
189
+ '/html/body/table[1]/tr', "table", [
190
+ Yasuri::PaginateNode.new("/html/body/nav/span/a[@class='next']", "pages", [])
191
+ ]
192
+ )
193
+ expect { node.scrape(uri_struct) }.to raise_error(NotImplementedError, "PagenateNode inside StructNode, Not Supported")
170
194
  end
171
195
  end
172
196
  end