table_parser 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
@@ -9,7 +9,7 @@ module TableParser
9
9
  rows = []
10
10
  table = doc.xpath(xpath)
11
11
  rows = table.xpath("./tr").collect do |row|
12
- row.xpath("./td").collect do |col|
12
+ row.xpath("./td|./th").collect do |col|
13
13
  col
14
14
  end
15
15
  end
data/lib/table_parser.rb CHANGED
@@ -4,5 +4,5 @@ require 'table_parser/table'
4
4
  require 'table_parser/parser'
5
5
 
6
6
  module TableParser
7
- VERSION = '0.6.1'
7
+ VERSION = '0.7.0'
8
8
  end
@@ -5,7 +5,16 @@ require 'open-uri'
5
5
 
6
6
  class TestTableParser < Test::Unit::TestCase
7
7
  def test_parse_rowspan
8
- html = open("rowspan.html").read
8
+ html = <<EOF
9
+ <html><body><table>
10
+
11
+ <tr><td>A</td><td>B</td></tr>
12
+ <tr><td rowspan="2">1</td><td>2</td></tr>
13
+ <tr><td rowspan="2">3</td></tr>
14
+ <tr><td>4</td></tr>
15
+
16
+ </table></body></html>
17
+ EOF
9
18
  doc = Nokogiri::HTML(html)
10
19
  table = TableParser::Table.new doc, "/html/body/table", {:dup_rows => false, :dup_cols => false}
11
20
  assert_equal(2, table.columns.size, 'header_count should = 2 ')
@@ -42,7 +51,21 @@ class TestTableParser < Test::Unit::TestCase
42
51
  end
43
52
 
44
53
  def test_parse_colspan
45
- html = open("colspan.html").read
54
+ html = <<EOF
55
+ <html><body><table>
56
+ <tr><td>A</td><td colspan="2">B</td></tr>
57
+ <tr><td rowspan="2">A1</td><td>B1</td><td>C1</td></tr>
58
+ <tr><td>B2</td><td>C2</td></tr>
59
+ <tr><td>A3</td><td>B3</td><td>C3</td></tr>
60
+ <tr><td>A4</td><td colspan="2" rowspan="2">B4</td></tr>
61
+ <tr><td>A5</td></tr>
62
+ <tr><td rowspan="2">A1</td><td>B1</td><td>C1</td></tr>
63
+ <tr><td>B2</td><td>C2</td></tr>
64
+ <tr><td>A3</td><td>B3</td><td>C3</td></tr>
65
+ <tr><td>A4</td><td colspan="2" rowspan="2">B4</td></tr>
66
+ <tr><td>A5</td></tr>
67
+ </table></body></html>
68
+ EOF
46
69
  doc = Nokogiri::HTML(html)
47
70
  table = TableParser::Table.new doc, "/html/body/table"
48
71
  assert_equal(3, table.columns.size, 'header_count should = 3 ')
@@ -81,7 +104,19 @@ class TestTableParser < Test::Unit::TestCase
81
104
  end
82
105
 
83
106
  def test_parse_complex2
84
- html = open("complex2.html").read
107
+ html = <<EOF
108
+ <html><body><table><tr><td>Header1</td><td>Header2</td><td>Header3</td><td>Header4</td></tr>
109
+ <tr><td rowspan="3">A1</td><td>A2</td><td rowspan="2">A3</td><td>A4</td></tr>
110
+ <tr><td>B2</td><td>B4</td></tr>
111
+ <tr><td>C2</td><td rowspan="2">C3</td><td>C4</td></tr>
112
+ <tr><td rowspan="3">D1</td><td>D2</td><td>D4</td></tr>
113
+ <tr><td>E2</td><td rowspan="2">E3</td><td>E4</td></tr>
114
+ <tr><td>F2</td><td>F4</td></tr>
115
+ <tr><td rowspan="3">G1</td><td>G2</td><td rowspan="2">G3</td><td>G4</td></tr>
116
+ <tr><td>H2</td><td>H4</td></tr>
117
+ <tr><td>I2</td><td>I3</td><td>I4</td></tr>
118
+ </table></body></html>
119
+ EOF
85
120
  doc = Nokogiri::HTML(html)
86
121
  table = TableParser::Table.new doc, "/html/body/table"
87
122
 
@@ -114,7 +149,15 @@ class TestTableParser < Test::Unit::TestCase
114
149
  end
115
150
 
116
151
  def test_parse_complex_colrowspan
117
- html = open("table_rowcol.html").read
152
+ html = <<EOF
153
+ <html><body><table><tr><td>A</td><td>B</td><td>C</td><td>D</td><td>E</td></tr>
154
+ <tr><td rowspan="5">1</td><td>2</td><td>3</td><td>4</td><td>5</td></tr>
155
+ <tr><td rowspan="2" colspan="2">2b</td><td>4b</td><td>5b</td></tr>
156
+ <tr><td rowspan="2">4c</td><td>5c</td></tr>
157
+ <tr><td rowspan="2">2d</td><td>3d</td><td>5d</td></tr>
158
+ <tr><td>3e</td><td>4e</td><td>5e</td></tr>
159
+ </table></body></html>
160
+ EOF
118
161
 
119
162
  doc = Nokogiri::HTML(html)
120
163
  table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => false}
@@ -145,11 +188,44 @@ class TestTableParser < Test::Unit::TestCase
145
188
  assert_equal(5, table[4].size)
146
189
  end
147
190
 
148
- def test_web
149
- html = open("test4.html").read
150
- doc = Nokogiri::HTML::Document.parse(html, nil, "Shift_JIS")
151
- table = TableParser::Table.new doc, "/html/body/div/div[3]/div/div[2]/table", {:header => false, :dup_rows => false}
152
- puts table.columns[0].size
191
+ def test_parse_th
192
+ html = <<EOF
193
+ <html><body><table><tr><th>A</th><th>B</th><th>C</th><th>D</th><th>E</th></tr>
194
+ <tr><td rowspan="5">1</td><td>2</td><td>3</td><td>4</td><td>5</td></tr>
195
+ <tr><td rowspan="2" colspan="2">2b</td><td>4b</td><td>5b</td></tr>
196
+ <tr><td rowspan="2">4c</td><td>5c</td></tr>
197
+ <tr><td rowspan="2">2d</td><td>3d</td><td>5d</td></tr>
198
+ <tr><td>3e</td><td>4e</td><td>5e</td></tr>
199
+ </table></body></html>
200
+ EOF
201
+
202
+ doc = Nokogiri::HTML(html)
203
+ table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => false}
204
+ puts table
205
+ assert_equal(5, table.columns.size, 'header_count should = 5 ')
206
+ assert_equal(1, table[0].size)
207
+ assert_equal(3, table[1].size)
208
+ assert_equal(3, table[2].size)
209
+ assert_equal(4, table[3].size)
210
+ assert_equal(5, table[4].size)
211
+
212
+ table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => false, :dup_rows => true}
213
+ puts table
214
+ assert_equal(5, table.columns.size, 'header_count should = 5 ')
215
+ assert_equal(5, table[0].size)
216
+ assert_equal(5, table[1].size)
217
+ assert_equal(3, table[2].size)
218
+ assert_equal(5, table[3].size)
219
+ assert_equal(5, table[4].size)
220
+
221
+ table = TableParser::Table.new doc, "/html/body/table", {:dup_cols => true, :dup_rows => true}
222
+ puts table
223
+ assert_equal(5, table.columns.size, 'header_count should = 5 ')
224
+ assert_equal(5, table[0].size)
225
+ assert_equal(5, table[1].size)
226
+ assert_equal(5, table[2].size)
227
+ assert_equal(5, table[3].size)
228
+ assert_equal(5, table[4].size)
153
229
  end
154
230
 
155
231
  end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: table_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ hash: 3
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 7
9
+ - 0
10
+ version: 0.7.0
5
11
  platform: ruby
6
12
  authors:
7
13
  - Francis Chong
@@ -9,19 +15,25 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-01-27 00:00:00 +08:00
18
+ date: 2011-05-05 00:00:00 +08:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: hoe
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
27
  - - ">="
22
28
  - !ruby/object:Gem::Version
23
- version: 2.4.0
24
- version:
29
+ hash: 35
30
+ segments:
31
+ - 2
32
+ - 9
33
+ - 4
34
+ version: 2.9.4
35
+ type: :development
36
+ version_requirements: *id001
25
37
  description: Parsing table could be difficult when its structure contains colspan or rowspan. TableParser parser HTML tables, group them by columns, with colspan and rowspan respected.
26
38
  email:
27
39
  - francis@ignition.hk
@@ -44,6 +56,7 @@ files:
44
56
  - lib/table_parser/table_column.rb
45
57
  - lib/table_parser/table_node.rb
46
58
  - test/test_table_parser.rb
59
+ - .gemtest
47
60
  has_rdoc: true
48
61
  homepage:
49
62
  licenses: []
@@ -55,21 +68,27 @@ rdoc_options:
55
68
  require_paths:
56
69
  - lib
57
70
  required_ruby_version: !ruby/object:Gem::Requirement
71
+ none: false
58
72
  requirements:
59
73
  - - ">="
60
74
  - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
61
78
  version: "0"
62
- version:
63
79
  required_rubygems_version: !ruby/object:Gem::Requirement
80
+ none: false
64
81
  requirements:
65
82
  - - ">="
66
83
  - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
67
87
  version: "0"
68
- version:
69
88
  requirements: []
70
89
 
71
90
  rubyforge_project: table_parser
72
- rubygems_version: 1.3.5
91
+ rubygems_version: 1.6.2
73
92
  signing_key:
74
93
  specification_version: 3
75
94
  summary: Parsing table could be difficult when its structure contains colspan or rowspan