table_parser 0.6.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
@@ -9,7 +9,7 @@ module TableParser
9
9
  rows = []
10
10
  table = doc.xpath(xpath)
11
11
  rows = table.xpath("./tr").collect do |row|
12
- row.xpath("./td").collect do |col|
12
+ row.xpath("./td|./th").collect do |col|
13
13
  col
14
14
  end
15
15
  end
data/lib/table_parser.rb CHANGED
@@ -4,5 +4,5 @@ require 'table_parser/table'
4
4
  require 'table_parser/parser'
5
5
 
6
6
  module TableParser
7
- VERSION = '0.6.1'
7
+ VERSION = '0.7.0'
8
8
  end
@@ -5,7 +5,16 @@ require 'open-uri'
5
5
 
6
6
  class TestTableParser < Test::Unit::TestCase
7
7
  def test_parse_rowspan
8
- html = open("rowspan.html").read
8
+ html = <<EOF
9
+ <html><body><table>
10
+
11
+ <tr><td>A</td><td>B</td></tr>
12
+ <tr><td rowspan="2">1</td><td>2</td></tr>
13
+ <tr><td rowspan="2">3</td></tr>
14
+ <tr><td>4</td></tr>
15
+
16
+ </table></body></html>
17
+ EOF
9
18
  doc = Nokogiri::HTML(html)
10
19
  table = TableParser::Table.new doc, "/html/body/table", {:dup_rows => false, :dup_cols => false}
11
20
  assert_equal(2, table.columns.size, 'header_count should = 2 ')
@@ -42,7 +51,21 @@ class TestTableParser < Test::Unit::TestCase
42
51
  end
43
52
 
44
53
  def test_parse_colspan
45
- html = open("colspan.html").read
54
+ html = <<EOF
55
+ <html><body><table>
56
+ <tr><td>A</td><td colspan="2">B</td></tr>
57
+ <tr><td rowspan="2">A1</td><td>B1</td><td>C1</td></tr>
58
+ <tr><td>B2</td><td>C2</td></tr>
59
+ <tr><td>A3</td><td>B3</td><td>C3</td></tr>
60
+ <tr><td>A4</td><td colspan="2" rowspan="2">B4</td></tr>
61
+ <tr><td>A5</td></tr>
62
+ <tr><td rowspan="2">A1</td><td>B1</td><td>C1</td></tr>
63
+ <tr><td>B2</td><td>C2</td></tr>
64
+ <tr><td>A3</td><td>B3</td><td>C3</td></tr>
65
+ <tr><td>A4</td><td colspan="2" rowspan="2">B4</td></tr>
66
+ <tr><td>A5</td></tr>
67
+ </table></body></html>
68
+ EOF
46
69
  doc = Nokogiri::HTML(html)
47
70
  table = TableParser::Table.new doc, "/html/body/table"
48
71
  assert_equal(3, table.columns.size, 'header_count should = 3 ')
@@ -81,7 +104,19 @@ class TestTableParser < Test::Unit::TestCase
81
104
  end
82
105
 
83
106
  def test_parse_complex2
84
- html = open("complex2.html").read
107
+ html = <<EOF
108
+ <html><body><table><tr><td>Header1</td><td>Header2</td><td>Header3</td><td>Header4</td></tr>
109
+ <tr><td rowspan="3">A1</td><td>A2</td><td rowspan="2">A3</td><td>A4</td></tr>
110
+ <tr><td>B2</td><td>B4</td></tr>
111
+ <tr><td>C2</td><td rowspan="2">C3</td><td>C4</td></tr>
112
+ <tr><td rowspan="3">D1</td><td>D2</td><td>D4</td></tr>
113
+ <tr><td>E2</td><td rowspan="2">E3</td><td>E4</td></tr>
114
+ <tr><td>F2</td><td>F4</td></tr>
115
+ <tr><td rowspan="3">G1</td><td>G2</td><td rowspan="2">G3</td><td>G4</td></tr>
116
+ <tr><td>H2</td><td>H4</td></tr>
117
+ <tr><td>I2</td><td>I3</td><td>I4</td></tr>
118
+ </table></body></html>
119
+ EOF
85
120
  doc = Nokogiri::HTML(html)
86
121
  table = TableParser::Table.new doc, "/html/body/table"
87
122
 
@@ -114,7 +149,15 @@ class TestTableParser < Test::Unit::TestCase
114
149
  end
115
150
 
116
151
  def test_parse_complex_colrowspan
117
- html = open("table_rowcol.html").read
152
+ html = <<EOF
153
+ <html><body><table><tr><td>A</td><td>B</td><td>C</td><td>D</td><td>E</td></tr>
154
+ <tr><td rowspan="5">1</td><td>2</td><td>3</td><td>4</td><td>5</td></tr>
155
+ <tr><td rowspan="2" colspan="2">2b</td><td>4b</td><td>5b</td></tr>
156
+ <tr><td rowspan="2">4c</td><td>5c</td></tr>
157
+ <tr><td rowspan="2">2d</td><td>3d</td><td>5d</td></tr>
158
+ <tr><td>3e</td><td>4e</td><td>5e</td></tr>
159
+ </table></body></html>
160
+ EOF
118
161
 
119
162
  doc = Nokogiri::HTML(html)
120
163
  table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => false}
@@ -145,11 +188,44 @@ class TestTableParser < Test::Unit::TestCase
145
188
  assert_equal(5, table[4].size)
146
189
  end
147
190
 
148
- def test_web
149
- html = open("test4.html").read
150
- doc = Nokogiri::HTML::Document.parse(html, nil, "Shift_JIS")
151
- table = TableParser::Table.new doc, "/html/body/div/div[3]/div/div[2]/table", {:header => false, :dup_rows => false}
152
- puts table.columns[0].size
191
+ def test_parse_th
192
+ html = <<EOF
193
+ <html><body><table><tr><th>A</th><th>B</th><th>C</th><th>D</th><th>E</th></tr>
194
+ <tr><td rowspan="5">1</td><td>2</td><td>3</td><td>4</td><td>5</td></tr>
195
+ <tr><td rowspan="2" colspan="2">2b</td><td>4b</td><td>5b</td></tr>
196
+ <tr><td rowspan="2">4c</td><td>5c</td></tr>
197
+ <tr><td rowspan="2">2d</td><td>3d</td><td>5d</td></tr>
198
+ <tr><td>3e</td><td>4e</td><td>5e</td></tr>
199
+ </table></body></html>
200
+ EOF
201
+
202
+ doc = Nokogiri::HTML(html)
203
+ table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => false}
204
+ puts table
205
+ assert_equal(5, table.columns.size, 'header_count should = 5 ')
206
+ assert_equal(1, table[0].size)
207
+ assert_equal(3, table[1].size)
208
+ assert_equal(3, table[2].size)
209
+ assert_equal(4, table[3].size)
210
+ assert_equal(5, table[4].size)
211
+
212
+ table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => true}
213
+ puts table
214
+ assert_equal(5, table.columns.size, 'header_count should = 5 ')
215
+ assert_equal(5, table[0].size)
216
+ assert_equal(5, table[1].size)
217
+ assert_equal(3, table[2].size)
218
+ assert_equal(5, table[3].size)
219
+ assert_equal(5, table[4].size)
220
+
221
+ table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => true, :dup_rows => true}
222
+ puts table
223
+ assert_equal(5, table.columns.size, 'header_count should = 5 ')
224
+ assert_equal(5, table[0].size)
225
+ assert_equal(5, table[1].size)
226
+ assert_equal(5, table[2].size)
227
+ assert_equal(5, table[3].size)
228
+ assert_equal(5, table[4].size)
153
229
  end
154
230
 
155
231
  end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: table_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ hash: 3
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 7
9
+ - 0
10
+ version: 0.7.0
5
11
  platform: ruby
6
12
  authors:
7
13
  - Francis Chong
@@ -9,19 +15,25 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-01-27 00:00:00 +08:00
18
+ date: 2011-05-05 00:00:00 +08:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: hoe
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
27
  - - ">="
22
28
  - !ruby/object:Gem::Version
23
- version: 2.4.0
24
- version:
29
+ hash: 35
30
+ segments:
31
+ - 2
32
+ - 9
33
+ - 4
34
+ version: 2.9.4
35
+ type: :development
36
+ version_requirements: *id001
25
37
  description: Parsing table could be difficult when its structure contains colspan or rowspan. TableParser parser HTML tables, group them by columns, with colspan and rowspan respected.
26
38
  email:
27
39
  - francis@ignition.hk
@@ -44,6 +56,7 @@ files:
44
56
  - lib/table_parser/table_column.rb
45
57
  - lib/table_parser/table_node.rb
46
58
  - test/test_table_parser.rb
59
+ - .gemtest
47
60
  has_rdoc: true
48
61
  homepage:
49
62
  licenses: []
@@ -55,21 +68,27 @@ rdoc_options:
55
68
  require_paths:
56
69
  - lib
57
70
  required_ruby_version: !ruby/object:Gem::Requirement
71
+ none: false
58
72
  requirements:
59
73
  - - ">="
60
74
  - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
61
78
  version: "0"
62
- version:
63
79
  required_rubygems_version: !ruby/object:Gem::Requirement
80
+ none: false
64
81
  requirements:
65
82
  - - ">="
66
83
  - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
67
87
  version: "0"
68
- version:
69
88
  requirements: []
70
89
 
71
90
  rubyforge_project: table_parser
72
- rubygems_version: 1.3.5
91
+ rubygems_version: 1.6.2
73
92
  signing_key:
74
93
  specification_version: 3
75
94
  summary: Parsing table could be difficult when its structure contains colspan or rowspan