table_parser 0.5.2 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/table_parser/parser.rb +17 -7
- data/lib/table_parser/table.rb +6 -1
- data/lib/table_parser/table_node.rb +1 -1
- data/lib/table_parser.rb +1 -1
- data/test/test_table_parser.rb +13 -1
- metadata +2 -2
data/lib/table_parser/parser.rb
CHANGED
@@ -16,17 +16,27 @@ module TableParser
|
|
16
16
|
rows
|
17
17
|
end
|
18
18
|
|
19
|
-
def self.extract_column_headers(rows, dup_rows, dup_cols)
|
19
|
+
def self.extract_column_headers(rows, dup_rows, dup_cols, has_header)
|
20
20
|
headers = []
|
21
|
-
rows.first.collect do |col|
|
22
|
-
header = TableColumn.new(col)
|
23
|
-
headers << header
|
24
21
|
|
25
|
-
|
26
|
-
|
22
|
+
if has_header
|
23
|
+
rows.first.collect do |col|
|
24
|
+
header = TableColumn.new(col)
|
25
|
+
headers << header
|
26
|
+
(header.colspan-1).times do
|
27
|
+
headers << TableColumn.new(col)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
rows.delete_at(0)
|
31
|
+
else
|
32
|
+
rows.first.collect do |col|
|
33
|
+
header = TableColumn.new(nil)
|
34
|
+
headers << header
|
35
|
+
(header.colspan-1).times do
|
36
|
+
headers << TableColumn.new(nil)
|
37
|
+
end
|
27
38
|
end
|
28
39
|
end
|
29
|
-
rows.delete_at(0)
|
30
40
|
headers
|
31
41
|
end
|
32
42
|
|
data/lib/table_parser/table.rb
CHANGED
@@ -2,6 +2,11 @@ module TableParser
|
|
2
2
|
class Table
|
3
3
|
attr_reader :nodes, :columns
|
4
4
|
def initialize(doc, xpath_to_table="//table[0]", options={})
|
5
|
+
if options.has_key?(:header)
|
6
|
+
header = options[:header]
|
7
|
+
else
|
8
|
+
header = true
|
9
|
+
end
|
5
10
|
|
6
11
|
if options.has_key?(:dup_rows)
|
7
12
|
dup_rows = options[:dup_rows]
|
@@ -16,7 +21,7 @@ module TableParser
|
|
16
21
|
end
|
17
22
|
|
18
23
|
table = Parser.extract_table(doc, xpath_to_table)
|
19
|
-
@columns = Parser.extract_column_headers(table, dup_rows, dup_cols)
|
24
|
+
@columns = Parser.extract_column_headers(table, dup_rows, dup_cols, header)
|
20
25
|
@nodes = Parser.extract_nodes(table, @columns, dup_rows, dup_cols)
|
21
26
|
end
|
22
27
|
|
@@ -3,7 +3,7 @@ module TableParser
|
|
3
3
|
attr_reader :element, :text, :rowspan, :colspan
|
4
4
|
def initialize(element, rowspan=nil, colspan=nil)
|
5
5
|
@element = element
|
6
|
-
@text = element.text.strip
|
6
|
+
@text = element.text.strip rescue ""
|
7
7
|
@colspan = colspan || element["colspan"].to_i rescue 1
|
8
8
|
@rowspan = rowspan || element["rowspan"].to_i rescue 1
|
9
9
|
end
|
data/lib/table_parser.rb
CHANGED
data/test/test_table_parser.rb
CHANGED
@@ -96,6 +96,18 @@ class TestTableParser < Test::Unit::TestCase
|
|
96
96
|
table.xpath("./tr[1]").remove
|
97
97
|
|
98
98
|
table = TableParser::Table.new doc, "//div[@id='timetable_box-week']/table", {:dup_cols => false, :dup_rows => false}
|
99
|
-
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_parse_noheader
|
103
|
+
html = "<html><body><table><tr><td>A</td><td>B</td></tr>\
|
104
|
+
<tr><td rowspan=\"2\">1</td><td>2</td></tr> \
|
105
|
+
<tr><td>3</td></tr></table></body></html>"
|
106
|
+
doc = Nokogiri::HTML(html)
|
107
|
+
table = TableParser::Table.new doc, "/html/body/table", {:header => false}
|
108
|
+
|
109
|
+
assert_equal(2, table.columns.size, 'header_count should = 2 ')
|
110
|
+
assert_equal(3, table[0].size)
|
111
|
+
assert_equal(3, table[1].size)
|
100
112
|
end
|
101
113
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francis Chong
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-06 00:00:00 +08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|