table_parser 0.5.3 → 0.5.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/table_parser.rb +1 -1
- data/lib/table_parser/parser.rb +7 -6
- data/test/test_table_parser.rb +12 -21
- metadata +2 -2
data/lib/table_parser.rb
CHANGED
data/lib/table_parser/parser.rb
CHANGED
@@ -47,6 +47,8 @@ module TableParser
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
+
puts " data/length = #{data.size}"
|
51
|
+
|
50
52
|
# handle rowspan
|
51
53
|
data.each_index do |row_index|
|
52
54
|
row = data[row_index]
|
@@ -54,6 +56,7 @@ module TableParser
|
|
54
56
|
col = row[col_index]
|
55
57
|
if headers[col_index]
|
56
58
|
headers[col_index].children << col if col.class != EmptyTableNode
|
59
|
+
|
57
60
|
if col.colspan > 1
|
58
61
|
if dup_cols
|
59
62
|
row.insert(col_index, TableNode.new(col.element, col.rowspan, col.colspan - 1))
|
@@ -61,17 +64,15 @@ module TableParser
|
|
61
64
|
row.insert(col_index, EmptyTableNode.new(col.rowspan, col.colspan - 1))
|
62
65
|
end
|
63
66
|
end
|
64
|
-
|
65
|
-
if col.rowspan > 1 && data[row_index
|
67
|
+
|
68
|
+
if col.rowspan > 1 && data[row_index].size > 0
|
66
69
|
if dup_rows
|
67
|
-
data[row_index
|
70
|
+
data[row_index].insert(col_index, TableNode.new(col.element, col.rowspan - 1))
|
68
71
|
else
|
69
|
-
data[row_index
|
72
|
+
data[row_index].insert(col_index, EmptyTableNode.new(col.rowspan - 1))
|
70
73
|
end
|
71
74
|
end
|
72
75
|
end
|
73
|
-
|
74
|
-
|
75
76
|
end
|
76
77
|
end
|
77
78
|
data
|
data/test/test_table_parser.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require "test/unit"
|
2
2
|
require "table_parser"
|
3
|
+
require 'iconv'
|
3
4
|
|
4
5
|
class TestTableParser < Test::Unit::TestCase
|
5
6
|
def test_parse_rowspan
|
@@ -77,28 +78,7 @@ class TestTableParser < Test::Unit::TestCase
|
|
77
78
|
assert_equal 9, table[2].size
|
78
79
|
assert_equal 9, table[3].size
|
79
80
|
end
|
80
|
-
|
81
|
-
def test_parse_web
|
82
|
-
doc = Nokogiri::HTML(open("test.html").read)
|
83
|
-
table = TableParser::Table.new doc, "/html/body/table"
|
84
|
-
|
85
|
-
assert_equal 11, table.columns.size
|
86
|
-
assert_equal 9, table[0].size
|
87
|
-
assert_equal 9, table[1].size
|
88
|
-
assert_equal 9, table[2].size
|
89
|
-
assert_equal 9, table[3].size
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_parse_web2
|
93
|
-
doc = Nokogiri::HTML(open("test2.html").read)
|
94
|
-
|
95
|
-
table = doc.xpath("//div[@id='timetable_box-week']/table")
|
96
|
-
table.xpath("./tr[1]").remove
|
97
|
-
|
98
|
-
table = TableParser::Table.new doc, "//div[@id='timetable_box-week']/table", {:dup_cols => false, :dup_rows => false}
|
99
81
|
|
100
|
-
end
|
101
|
-
|
102
82
|
def test_parse_noheader
|
103
83
|
html = "<html><body><table><tr><td>A</td><td>B</td></tr>\
|
104
84
|
<tr><td rowspan=\"2\">1</td><td>2</td></tr> \
|
@@ -110,4 +90,15 @@ class TestTableParser < Test::Unit::TestCase
|
|
110
90
|
assert_equal(3, table[0].size)
|
111
91
|
assert_equal(3, table[1].size)
|
112
92
|
end
|
93
|
+
|
94
|
+
|
95
|
+
def test_web
|
96
|
+
html = open("test4.html").read
|
97
|
+
html.gsub!(/<!--.*-->/, "");
|
98
|
+
|
99
|
+
doc = Nokogiri::HTML::Document.parse(html)
|
100
|
+
doc.xpath("//img").remove
|
101
|
+
table = TableParser::Table.new doc, "/html/body/div/div[3]/div/div[2]/table", {:header => false, :dup_rows => false}
|
102
|
+
puts table.columns[2].size
|
103
|
+
end
|
113
104
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francis Chong
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-11 00:00:00 +08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|