table_parser 0.6.1 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/lib/table_parser/parser.rb +1 -1
- data/lib/table_parser.rb +1 -1
- data/test/test_table_parser.rb +85 -9
- metadata +29 -10
data/.gemtest
ADDED
File without changes
|
data/lib/table_parser/parser.rb
CHANGED
data/lib/table_parser.rb
CHANGED
data/test/test_table_parser.rb
CHANGED
@@ -5,7 +5,16 @@ require 'open-uri'
|
|
5
5
|
|
6
6
|
class TestTableParser < Test::Unit::TestCase
|
7
7
|
def test_parse_rowspan
|
8
|
-
html =
|
8
|
+
html = <<EOF
|
9
|
+
<html><body><table>
|
10
|
+
|
11
|
+
<tr><td>A</td><td>B</td></tr>
|
12
|
+
<tr><td rowspan="2">1</td><td>2</td></tr>
|
13
|
+
<tr><td rowspan="2">3</td></tr>
|
14
|
+
<tr><td>4</td></tr>
|
15
|
+
|
16
|
+
</table></body></html>
|
17
|
+
EOF
|
9
18
|
doc = Nokogiri::HTML(html)
|
10
19
|
table = TableParser::Table.new doc, "/html/body/table", {:dup_rows => false, :dup_cols => false}
|
11
20
|
assert_equal(2, table.columns.size, 'header_count should = 2 ')
|
@@ -42,7 +51,21 @@ class TestTableParser < Test::Unit::TestCase
|
|
42
51
|
end
|
43
52
|
|
44
53
|
def test_parse_colspan
|
45
|
-
html =
|
54
|
+
html = <<EOF
|
55
|
+
<html><body><table>
|
56
|
+
<tr><td>A</td><td colspan="2">B</td></tr>
|
57
|
+
<tr><td rowspan="2">A1</td><td>B1</td><td>C1</td></tr>
|
58
|
+
<tr><td>B2</td><td>C2</td></tr>
|
59
|
+
<tr><td>A3</td><td>B3</td><td>C3</td></tr>
|
60
|
+
<tr><td>A4</td><td colspan="2" rowspan="2">B4</td></tr>
|
61
|
+
<tr><td>A5</td></tr>
|
62
|
+
<tr><td rowspan="2">A1</td><td>B1</td><td>C1</td></tr>
|
63
|
+
<tr><td>B2</td><td>C2</td></tr>
|
64
|
+
<tr><td>A3</td><td>B3</td><td>C3</td></tr>
|
65
|
+
<tr><td>A4</td><td colspan="2" rowspan="2">B4</td></tr>
|
66
|
+
<tr><td>A5</td></tr>
|
67
|
+
</table></body></html>
|
68
|
+
EOF
|
46
69
|
doc = Nokogiri::HTML(html)
|
47
70
|
table = TableParser::Table.new doc, "/html/body/table"
|
48
71
|
assert_equal(3, table.columns.size, 'header_count should = 3 ')
|
@@ -81,7 +104,19 @@ class TestTableParser < Test::Unit::TestCase
|
|
81
104
|
end
|
82
105
|
|
83
106
|
def test_parse_complex2
|
84
|
-
html =
|
107
|
+
html = <<EOF
|
108
|
+
<html><body><table><tr><td>Header1</td><td>Header2</td><td>Header3</td><td>Header4</td></tr>
|
109
|
+
<tr><td rowspan="3">A1</td><td>A2</td><td rowspan="2">A3</td><td>A4</td></tr>
|
110
|
+
<tr><td>B2</td><td>B4</td></tr>
|
111
|
+
<tr><td>C2</td><td rowspan="2">C3</td><td>C4</td></tr>
|
112
|
+
<tr><td rowspan="3">D1</td><td>D2</td><td>D4</td></tr>
|
113
|
+
<tr><td>E2</td><td rowspan="2">E3</td><td>E4</td></tr>
|
114
|
+
<tr><td>F2</td><td>F4</td></tr>
|
115
|
+
<tr><td rowspan="3">G1</td><td>G2</td><td rowspan="2">G3</td><td>G4</td></tr>
|
116
|
+
<tr><td>H2</td><td>H4</td></tr>
|
117
|
+
<tr><td>I2</td><td>I3</td><td>I4</td></tr>
|
118
|
+
</table></body></html>
|
119
|
+
EOF
|
85
120
|
doc = Nokogiri::HTML(html)
|
86
121
|
table = TableParser::Table.new doc, "/html/body/table"
|
87
122
|
|
@@ -114,7 +149,15 @@ class TestTableParser < Test::Unit::TestCase
|
|
114
149
|
end
|
115
150
|
|
116
151
|
def test_parse_complex_colrowspan
|
117
|
-
html =
|
152
|
+
html = <<EOF
|
153
|
+
<html><body><table><tr><td>A</td><td>B</td><td>C</td><td>D</td><td>E</td></tr>
|
154
|
+
<tr><td rowspan="5">1</td><td>2</td><td>3</td><td>4</td><td>5</td></tr>
|
155
|
+
<tr><td rowspan="2" colspan="2">2b</td><td>4b</td><td>5b</td></tr>
|
156
|
+
<tr><td rowspan="2">4c</td><td>5c</td></tr>
|
157
|
+
<tr><td rowspan="2">2d</td><td>3d</td><td>5d</td></tr>
|
158
|
+
<tr><td>3e</td><td>4e</td><td>5e</td></tr>
|
159
|
+
</table></body></html>
|
160
|
+
EOF
|
118
161
|
|
119
162
|
doc = Nokogiri::HTML(html)
|
120
163
|
table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => false}
|
@@ -145,11 +188,44 @@ class TestTableParser < Test::Unit::TestCase
|
|
145
188
|
assert_equal(5, table[4].size)
|
146
189
|
end
|
147
190
|
|
148
|
-
def
|
149
|
-
html =
|
150
|
-
|
151
|
-
|
152
|
-
|
191
|
+
def test_parse_th
|
192
|
+
html = <<EOF
|
193
|
+
<html><body><table><tr><th>A</th><th>B</th><th>C</th><th>D</th><th>E</th></tr>
|
194
|
+
<tr><td rowspan="5">1</td><td>2</td><td>3</td><td>4</td><td>5</td></tr>
|
195
|
+
<tr><td rowspan="2" colspan="2">2b</td><td>4b</td><td>5b</td></tr>
|
196
|
+
<tr><td rowspan="2">4c</td><td>5c</td></tr>
|
197
|
+
<tr><td rowspan="2">2d</td><td>3d</td><td>5d</td></tr>
|
198
|
+
<tr><td>3e</td><td>4e</td><td>5e</td></tr>
|
199
|
+
</table></body></html>
|
200
|
+
EOF
|
201
|
+
|
202
|
+
doc = Nokogiri::HTML(html)
|
203
|
+
table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => false}
|
204
|
+
puts table
|
205
|
+
assert_equal(5, table.columns.size, 'header_count should = 5 ')
|
206
|
+
assert_equal(1, table[0].size)
|
207
|
+
assert_equal(3, table[1].size)
|
208
|
+
assert_equal(3, table[2].size)
|
209
|
+
assert_equal(4, table[3].size)
|
210
|
+
assert_equal(5, table[4].size)
|
211
|
+
|
212
|
+
table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => true}
|
213
|
+
puts table
|
214
|
+
assert_equal(5, table.columns.size, 'header_count should = 5 ')
|
215
|
+
assert_equal(5, table[0].size)
|
216
|
+
assert_equal(5, table[1].size)
|
217
|
+
assert_equal(3, table[2].size)
|
218
|
+
assert_equal(5, table[3].size)
|
219
|
+
assert_equal(5, table[4].size)
|
220
|
+
|
221
|
+
table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => true, :dup_rows => true}
|
222
|
+
puts table
|
223
|
+
assert_equal(5, table.columns.size, 'header_count should = 5 ')
|
224
|
+
assert_equal(5, table[0].size)
|
225
|
+
assert_equal(5, table[1].size)
|
226
|
+
assert_equal(5, table[2].size)
|
227
|
+
assert_equal(5, table[3].size)
|
228
|
+
assert_equal(5, table[4].size)
|
153
229
|
end
|
154
230
|
|
155
231
|
end
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 3
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 7
|
9
|
+
- 0
|
10
|
+
version: 0.7.0
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Francis Chong
|
@@ -9,19 +15,25 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date:
|
18
|
+
date: 2011-05-05 00:00:00 +08:00
|
13
19
|
default_executable:
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
22
|
name: hoe
|
17
|
-
|
18
|
-
|
19
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
20
26
|
requirements:
|
21
27
|
- - ">="
|
22
28
|
- !ruby/object:Gem::Version
|
23
|
-
|
24
|
-
|
29
|
+
hash: 35
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 9
|
33
|
+
- 4
|
34
|
+
version: 2.9.4
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
25
37
|
description: Parsing table could be difficult when its structure contains colspan or rowspan. TableParser parser HTML tables, group them by columns, with colspan and rowspan respected.
|
26
38
|
email:
|
27
39
|
- francis@ignition.hk
|
@@ -44,6 +56,7 @@ files:
|
|
44
56
|
- lib/table_parser/table_column.rb
|
45
57
|
- lib/table_parser/table_node.rb
|
46
58
|
- test/test_table_parser.rb
|
59
|
+
- .gemtest
|
47
60
|
has_rdoc: true
|
48
61
|
homepage:
|
49
62
|
licenses: []
|
@@ -55,21 +68,27 @@ rdoc_options:
|
|
55
68
|
require_paths:
|
56
69
|
- lib
|
57
70
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
58
72
|
requirements:
|
59
73
|
- - ">="
|
60
74
|
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
61
78
|
version: "0"
|
62
|
-
version:
|
63
79
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
64
81
|
requirements:
|
65
82
|
- - ">="
|
66
83
|
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
67
87
|
version: "0"
|
68
|
-
version:
|
69
88
|
requirements: []
|
70
89
|
|
71
90
|
rubyforge_project: table_parser
|
72
|
-
rubygems_version: 1.
|
91
|
+
rubygems_version: 1.6.2
|
73
92
|
signing_key:
|
74
93
|
specification_version: 3
|
75
94
|
summary: Parsing table could be difficult when its structure contains colspan or rowspan
|