table_parser 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/table_parser.rb +1 -1
- data/lib/table_parser/parser.rb +7 -6
- data/test/test_table_parser.rb +12 -21
- metadata +2 -2
data/lib/table_parser.rb
CHANGED
data/lib/table_parser/parser.rb
CHANGED
@@ -47,6 +47,8 @@ module TableParser
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
+
puts " data/length = #{data.size}"
|
51
|
+
|
50
52
|
# handle rowspan
|
51
53
|
data.each_index do |row_index|
|
52
54
|
row = data[row_index]
|
@@ -54,6 +56,7 @@ module TableParser
|
|
54
56
|
col = row[col_index]
|
55
57
|
if headers[col_index]
|
56
58
|
headers[col_index].children << col if col.class != EmptyTableNode
|
59
|
+
|
57
60
|
if col.colspan > 1
|
58
61
|
if dup_cols
|
59
62
|
row.insert(col_index, TableNode.new(col.element, col.rowspan, col.colspan - 1))
|
@@ -61,17 +64,15 @@ module TableParser
|
|
61
64
|
row.insert(col_index, EmptyTableNode.new(col.rowspan, col.colspan - 1))
|
62
65
|
end
|
63
66
|
end
|
64
|
-
|
65
|
-
if col.rowspan > 1 && data[row_index
|
67
|
+
|
68
|
+
if col.rowspan > 1 && data[row_index].size > 0
|
66
69
|
if dup_rows
|
67
|
-
data[row_index
|
70
|
+
data[row_index].insert(col_index, TableNode.new(col.element, col.rowspan - 1))
|
68
71
|
else
|
69
|
-
data[row_index
|
72
|
+
data[row_index].insert(col_index, EmptyTableNode.new(col.rowspan - 1))
|
70
73
|
end
|
71
74
|
end
|
72
75
|
end
|
73
|
-
|
74
|
-
|
75
76
|
end
|
76
77
|
end
|
77
78
|
data
|
data/test/test_table_parser.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require "test/unit"
|
2
2
|
require "table_parser"
|
3
|
+
require 'iconv'
|
3
4
|
|
4
5
|
class TestTableParser < Test::Unit::TestCase
|
5
6
|
def test_parse_rowspan
|
@@ -77,28 +78,7 @@ class TestTableParser < Test::Unit::TestCase
|
|
77
78
|
assert_equal 9, table[2].size
|
78
79
|
assert_equal 9, table[3].size
|
79
80
|
end
|
80
|
-
|
81
|
-
def test_parse_web
|
82
|
-
doc = Nokogiri::HTML(open("test.html").read)
|
83
|
-
table = TableParser::Table.new doc, "/html/body/table"
|
84
|
-
|
85
|
-
assert_equal 11, table.columns.size
|
86
|
-
assert_equal 9, table[0].size
|
87
|
-
assert_equal 9, table[1].size
|
88
|
-
assert_equal 9, table[2].size
|
89
|
-
assert_equal 9, table[3].size
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_parse_web2
|
93
|
-
doc = Nokogiri::HTML(open("test2.html").read)
|
94
|
-
|
95
|
-
table = doc.xpath("//div[@id='timetable_box-week']/table")
|
96
|
-
table.xpath("./tr[1]").remove
|
97
|
-
|
98
|
-
table = TableParser::Table.new doc, "//div[@id='timetable_box-week']/table", {:dup_cols => false, :dup_rows => false}
|
99
81
|
|
100
|
-
end
|
101
|
-
|
102
82
|
def test_parse_noheader
|
103
83
|
html = "<html><body><table><tr><td>A</td><td>B</td></tr>\
|
104
84
|
<tr><td rowspan=\"2\">1</td><td>2</td></tr> \
|
@@ -110,4 +90,15 @@ class TestTableParser < Test::Unit::TestCase
|
|
110
90
|
assert_equal(3, table[0].size)
|
111
91
|
assert_equal(3, table[1].size)
|
112
92
|
end
|
93
|
+
|
94
|
+
|
95
|
+
def test_web
|
96
|
+
html = open("test4.html").read
|
97
|
+
html.gsub!(/<!--.*-->/, "");
|
98
|
+
|
99
|
+
doc = Nokogiri::HTML::Document.parse(html)
|
100
|
+
doc.xpath("//img").remove
|
101
|
+
table = TableParser::Table.new doc, "/html/body/div/div[3]/div/div[2]/table", {:header => false, :dup_rows => false}
|
102
|
+
puts table.columns[2].size
|
103
|
+
end
|
113
104
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francis Chong
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-11 00:00:00 +08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|