table_parser 0.5.5 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/table_parser/parser.rb +4 -2
- data/lib/table_parser.rb +1 -1
- data/test/test_table_parser.rb +18 -11
- metadata +2 -2
data/lib/table_parser/parser.rb
CHANGED
@@ -23,7 +23,8 @@ module TableParser
|
|
23
23
|
rows.first.collect do |col|
|
24
24
|
header = TableColumn.new(col)
|
25
25
|
headers << header
|
26
|
-
|
26
|
+
colspan = col["colspan"].to_i rescue 1
|
27
|
+
(colspan-1).times do
|
27
28
|
headers << TableColumn.new(col)
|
28
29
|
end
|
29
30
|
end
|
@@ -32,7 +33,8 @@ module TableParser
|
|
32
33
|
rows.first.collect do |col|
|
33
34
|
header = TableColumn.new(nil)
|
34
35
|
headers << header
|
35
|
-
|
36
|
+
colspan = col["colspan"].to_i rescue 1
|
37
|
+
(colspan-1).times do
|
36
38
|
headers << TableColumn.new(nil)
|
37
39
|
end
|
38
40
|
end
|
data/lib/table_parser.rb
CHANGED
data/test/test_table_parser.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "test/unit"
|
2
2
|
require "table_parser"
|
3
3
|
require 'iconv'
|
4
|
+
require 'open-uri'
|
4
5
|
|
5
6
|
class TestTableParser < Test::Unit::TestCase
|
6
7
|
def test_parse_rowspan
|
@@ -28,18 +29,27 @@ class TestTableParser < Test::Unit::TestCase
|
|
28
29
|
end
|
29
30
|
|
30
31
|
def test_parse_colspan
|
31
|
-
html = "<html><body><table
|
32
|
+
html = "<html><body><table>\
|
33
|
+
<tr><td>A</td><td colspan=\"2\">B</td></tr>\
|
32
34
|
<tr><td rowspan=\"2\">A1</td><td>B1</td><td>C1</td></tr> \
|
33
35
|
<tr><td>B2</td><td>C2</td></tr>\
|
34
|
-
<tr><td>A3</td><td>B3</td><td>C3</td></tr
|
36
|
+
<tr><td>A3</td><td>B3</td><td>C3</td></tr>\
|
37
|
+
<tr><td>A4</td><td colspan=\"2\" rowspan=\"2\">B4</td></tr>\
|
38
|
+
<tr><td>A5</td></tr>\
|
39
|
+
<tr><td rowspan=\"2\">A1</td><td>B1</td><td>C1</td></tr> \
|
40
|
+
<tr><td>B2</td><td>C2</td></tr>\
|
41
|
+
<tr><td>A3</td><td>B3</td><td>C3</td></tr>\
|
42
|
+
<tr><td>A4</td><td colspan=\"2\" rowspan=\"2\">B4</td></tr>\
|
43
|
+
<tr><td>A5</td></tr>\
|
44
|
+
</table></body></html>"
|
35
45
|
doc = Nokogiri::HTML(html)
|
36
46
|
table = TableParser::Table.new doc, "/html/body/table"
|
47
|
+
puts table
|
37
48
|
|
38
49
|
assert_equal(3, table.columns.size, 'header_count should = 3 ')
|
39
|
-
assert_equal(
|
40
|
-
assert_equal(
|
41
|
-
assert_equal(
|
42
|
-
|
50
|
+
assert_equal(10, table[0].size)
|
51
|
+
assert_equal(10, table[1].size)
|
52
|
+
assert_equal(10, table[2].size)
|
43
53
|
end
|
44
54
|
|
45
55
|
def test_parse_complex
|
@@ -94,11 +104,8 @@ class TestTableParser < Test::Unit::TestCase
|
|
94
104
|
|
95
105
|
def test_web
|
96
106
|
html = open("test4.html").read
|
97
|
-
|
98
|
-
|
99
|
-
doc = Nokogiri::HTML::Document.parse(html)
|
100
|
-
doc.xpath("//img").remove
|
107
|
+
doc = Nokogiri::HTML::Document.parse(html, nil, "Shift_JIS")
|
101
108
|
table = TableParser::Table.new doc, "/html/body/div/div[3]/div/div[2]/table", {:header => false, :dup_rows => false}
|
102
|
-
puts table.columns[
|
109
|
+
puts table.columns[0].size
|
103
110
|
end
|
104
111
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francis Chong
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-16 00:00:00 +08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|