table_parser 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/lib/table_parser/parser.rb +1 -1
- data/lib/table_parser.rb +1 -1
- data/test/test_table_parser.rb +85 -9
- metadata +29 -10
data/.gemtest
ADDED
File without changes
|
data/lib/table_parser/parser.rb
CHANGED
data/lib/table_parser.rb
CHANGED
data/test/test_table_parser.rb
CHANGED
@@ -5,7 +5,16 @@ require 'open-uri'
|
|
5
5
|
|
6
6
|
class TestTableParser < Test::Unit::TestCase
|
7
7
|
def test_parse_rowspan
|
8
|
-
html =
|
8
|
+
html = <<EOF
|
9
|
+
<html><body><table>
|
10
|
+
|
11
|
+
<tr><td>A</td><td>B</td></tr>
|
12
|
+
<tr><td rowspan="2">1</td><td>2</td></tr>
|
13
|
+
<tr><td rowspan="2">3</td></tr>
|
14
|
+
<tr><td>4</td></tr>
|
15
|
+
|
16
|
+
</table></body></html>
|
17
|
+
EOF
|
9
18
|
doc = Nokogiri::HTML(html)
|
10
19
|
table = TableParser::Table.new doc, "/html/body/table", {:dup_rows => false, :dup_cols => false}
|
11
20
|
assert_equal(2, table.columns.size, 'header_count should = 2 ')
|
@@ -42,7 +51,21 @@ class TestTableParser < Test::Unit::TestCase
|
|
42
51
|
end
|
43
52
|
|
44
53
|
def test_parse_colspan
|
45
|
-
html =
|
54
|
+
html = <<EOF
|
55
|
+
<html><body><table>
|
56
|
+
<tr><td>A</td><td colspan="2">B</td></tr>
|
57
|
+
<tr><td rowspan="2">A1</td><td>B1</td><td>C1</td></tr>
|
58
|
+
<tr><td>B2</td><td>C2</td></tr>
|
59
|
+
<tr><td>A3</td><td>B3</td><td>C3</td></tr>
|
60
|
+
<tr><td>A4</td><td colspan="2" rowspan="2">B4</td></tr>
|
61
|
+
<tr><td>A5</td></tr>
|
62
|
+
<tr><td rowspan="2">A1</td><td>B1</td><td>C1</td></tr>
|
63
|
+
<tr><td>B2</td><td>C2</td></tr>
|
64
|
+
<tr><td>A3</td><td>B3</td><td>C3</td></tr>
|
65
|
+
<tr><td>A4</td><td colspan="2" rowspan="2">B4</td></tr>
|
66
|
+
<tr><td>A5</td></tr>
|
67
|
+
</table></body></html>
|
68
|
+
EOF
|
46
69
|
doc = Nokogiri::HTML(html)
|
47
70
|
table = TableParser::Table.new doc, "/html/body/table"
|
48
71
|
assert_equal(3, table.columns.size, 'header_count should = 3 ')
|
@@ -81,7 +104,19 @@ class TestTableParser < Test::Unit::TestCase
|
|
81
104
|
end
|
82
105
|
|
83
106
|
def test_parse_complex2
|
84
|
-
html =
|
107
|
+
html = <<EOF
|
108
|
+
<html><body><table><tr><td>Header1</td><td>Header2</td><td>Header3</td><td>Header4</td></tr>
|
109
|
+
<tr><td rowspan="3">A1</td><td>A2</td><td rowspan="2">A3</td><td>A4</td></tr>
|
110
|
+
<tr><td>B2</td><td>B4</td></tr>
|
111
|
+
<tr><td>C2</td><td rowspan="2">C3</td><td>C4</td></tr>
|
112
|
+
<tr><td rowspan="3">D1</td><td>D2</td><td>D4</td></tr>
|
113
|
+
<tr><td>E2</td><td rowspan="2">E3</td><td>E4</td></tr>
|
114
|
+
<tr><td>F2</td><td>F4</td></tr>
|
115
|
+
<tr><td rowspan="3">G1</td><td>G2</td><td rowspan="2">G3</td><td>G4</td></tr>
|
116
|
+
<tr><td>H2</td><td>H4</td></tr>
|
117
|
+
<tr><td>I2</td><td>I3</td><td>I4</td></tr>
|
118
|
+
</table></body></html>
|
119
|
+
EOF
|
85
120
|
doc = Nokogiri::HTML(html)
|
86
121
|
table = TableParser::Table.new doc, "/html/body/table"
|
87
122
|
|
@@ -114,7 +149,15 @@ class TestTableParser < Test::Unit::TestCase
|
|
114
149
|
end
|
115
150
|
|
116
151
|
def test_parse_complex_colrowspan
|
117
|
-
html =
|
152
|
+
html = <<EOF
|
153
|
+
<html><body><table><tr><td>A</td><td>B</td><td>C</td><td>D</td><td>E</td></tr>
|
154
|
+
<tr><td rowspan="5">1</td><td>2</td><td>3</td><td>4</td><td>5</td></tr>
|
155
|
+
<tr><td rowspan="2" colspan="2">2b</td><td>4b</td><td>5b</td></tr>
|
156
|
+
<tr><td rowspan="2">4c</td><td>5c</td></tr>
|
157
|
+
<tr><td rowspan="2">2d</td><td>3d</td><td>5d</td></tr>
|
158
|
+
<tr><td>3e</td><td>4e</td><td>5e</td></tr>
|
159
|
+
</table></body></html>
|
160
|
+
EOF
|
118
161
|
|
119
162
|
doc = Nokogiri::HTML(html)
|
120
163
|
table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => false}
|
@@ -145,11 +188,44 @@ class TestTableParser < Test::Unit::TestCase
|
|
145
188
|
assert_equal(5, table[4].size)
|
146
189
|
end
|
147
190
|
|
148
|
-
def
|
149
|
-
html =
|
150
|
-
|
151
|
-
|
152
|
-
|
191
|
+
def test_parse_th
|
192
|
+
html = <<EOF
|
193
|
+
<html><body><table><tr><th>A</th><th>B</th><th>C</th><th>D</th><th>E</th></tr>
|
194
|
+
<tr><td rowspan="5">1</td><td>2</td><td>3</td><td>4</td><td>5</td></tr>
|
195
|
+
<tr><td rowspan="2" colspan="2">2b</td><td>4b</td><td>5b</td></tr>
|
196
|
+
<tr><td rowspan="2">4c</td><td>5c</td></tr>
|
197
|
+
<tr><td rowspan="2">2d</td><td>3d</td><td>5d</td></tr>
|
198
|
+
<tr><td>3e</td><td>4e</td><td>5e</td></tr>
|
199
|
+
</table></body></html>
|
200
|
+
EOF
|
201
|
+
|
202
|
+
doc = Nokogiri::HTML(html)
|
203
|
+
table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => false}
|
204
|
+
puts table
|
205
|
+
assert_equal(5, table.columns.size, 'header_count should = 5 ')
|
206
|
+
assert_equal(1, table[0].size)
|
207
|
+
assert_equal(3, table[1].size)
|
208
|
+
assert_equal(3, table[2].size)
|
209
|
+
assert_equal(4, table[3].size)
|
210
|
+
assert_equal(5, table[4].size)
|
211
|
+
|
212
|
+
table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => true}
|
213
|
+
puts table
|
214
|
+
assert_equal(5, table.columns.size, 'header_count should = 5 ')
|
215
|
+
assert_equal(5, table[0].size)
|
216
|
+
assert_equal(5, table[1].size)
|
217
|
+
assert_equal(3, table[2].size)
|
218
|
+
assert_equal(5, table[3].size)
|
219
|
+
assert_equal(5, table[4].size)
|
220
|
+
|
221
|
+
table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => true, :dup_rows => true}
|
222
|
+
puts table
|
223
|
+
assert_equal(5, table.columns.size, 'header_count should = 5 ')
|
224
|
+
assert_equal(5, table[0].size)
|
225
|
+
assert_equal(5, table[1].size)
|
226
|
+
assert_equal(5, table[2].size)
|
227
|
+
assert_equal(5, table[3].size)
|
228
|
+
assert_equal(5, table[4].size)
|
153
229
|
end
|
154
230
|
|
155
231
|
end
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 3
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 7
|
9
|
+
- 0
|
10
|
+
version: 0.7.0
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Francis Chong
|
@@ -9,19 +15,25 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date:
|
18
|
+
date: 2011-05-05 00:00:00 +08:00
|
13
19
|
default_executable:
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
22
|
name: hoe
|
17
|
-
|
18
|
-
|
19
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
20
26
|
requirements:
|
21
27
|
- - ">="
|
22
28
|
- !ruby/object:Gem::Version
|
23
|
-
|
24
|
-
|
29
|
+
hash: 35
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 9
|
33
|
+
- 4
|
34
|
+
version: 2.9.4
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
25
37
|
description: Parsing table could be difficult when its structure contains colspan or rowspan. TableParser parser HTML tables, group them by columns, with colspan and rowspan respected.
|
26
38
|
email:
|
27
39
|
- francis@ignition.hk
|
@@ -44,6 +56,7 @@ files:
|
|
44
56
|
- lib/table_parser/table_column.rb
|
45
57
|
- lib/table_parser/table_node.rb
|
46
58
|
- test/test_table_parser.rb
|
59
|
+
- .gemtest
|
47
60
|
has_rdoc: true
|
48
61
|
homepage:
|
49
62
|
licenses: []
|
@@ -55,21 +68,27 @@ rdoc_options:
|
|
55
68
|
require_paths:
|
56
69
|
- lib
|
57
70
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
58
72
|
requirements:
|
59
73
|
- - ">="
|
60
74
|
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
61
78
|
version: "0"
|
62
|
-
version:
|
63
79
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
64
81
|
requirements:
|
65
82
|
- - ">="
|
66
83
|
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
67
87
|
version: "0"
|
68
|
-
version:
|
69
88
|
requirements: []
|
70
89
|
|
71
90
|
rubyforge_project: table_parser
|
72
|
-
rubygems_version: 1.
|
91
|
+
rubygems_version: 1.6.2
|
73
92
|
signing_key:
|
74
93
|
specification_version: 3
|
75
94
|
summary: Parsing table could be difficult when its structure contains colspan or rowspan
|