table_parser 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt
CHANGED
data/lib/table_parser.rb
CHANGED
data/lib/table_parser/parser.rb
CHANGED
@@ -18,14 +18,14 @@ module TableParser
|
|
18
18
|
rows
|
19
19
|
end
|
20
20
|
|
21
|
-
def self.
|
21
|
+
def self.extract_column_headers(rows)
|
22
22
|
headers = []
|
23
23
|
rows.first.collect do |col|
|
24
|
-
header =
|
24
|
+
header = TableColumn.new(col)
|
25
25
|
headers << header
|
26
26
|
|
27
27
|
(header.colspan-1).times do
|
28
|
-
headers <<
|
28
|
+
headers << TableColumn.new(col)
|
29
29
|
end
|
30
30
|
end
|
31
31
|
rows.delete_at(0)
|
data/lib/table_parser/table.rb
CHANGED
@@ -1,23 +1,19 @@
|
|
1
1
|
module TableParser
|
2
2
|
class Table
|
3
|
-
attr_reader :nodes, :
|
3
|
+
attr_reader :nodes, :columns
|
4
4
|
def initialize(input, xpath_to_table="//table[0]")
|
5
5
|
table = Parser.extract_table(input, xpath_to_table)
|
6
|
-
@
|
7
|
-
@nodes = Parser.extract_nodes(table, @
|
6
|
+
@columns = Parser.extract_column_headers(table)
|
7
|
+
@nodes = Parser.extract_nodes(table, @columns)
|
8
8
|
end
|
9
9
|
|
10
10
|
def to_s
|
11
11
|
"Table<#{@headers.collect{|h| h.to_s }.join("\n")}>"
|
12
12
|
end
|
13
|
-
|
14
|
-
def header_count
|
15
|
-
@headers.size
|
16
|
-
end
|
17
|
-
|
13
|
+
|
18
14
|
# get column by index
|
19
15
|
def [](index)
|
20
|
-
@
|
16
|
+
@columns[index]
|
21
17
|
end
|
22
18
|
end
|
23
19
|
end
|
data/test/test_table_parser.rb
CHANGED
@@ -8,7 +8,7 @@ class TestTableParser < Test::Unit::TestCase
|
|
8
8
|
<tr><td>3</td></tr></table></body></html>",
|
9
9
|
"/html/body/table"
|
10
10
|
|
11
|
-
assert_equal(2, table.
|
11
|
+
assert_equal(2, table.columns.size, 'header_count should = 2 ')
|
12
12
|
assert_equal(2, table[0].size)
|
13
13
|
assert_equal(2, table[1].size)
|
14
14
|
end
|
@@ -20,7 +20,7 @@ class TestTableParser < Test::Unit::TestCase
|
|
20
20
|
<tr><td>A3</td><td>B3</td><td>C3</td></tr><tr><td>A4</td><td>B4</td><td>C4</td></tr></table></body></html>",
|
21
21
|
"/html/body/table"
|
22
22
|
|
23
|
-
assert_equal(3, table.
|
23
|
+
assert_equal(3, table.columns.size, 'header_count should = 3 ')
|
24
24
|
assert_equal(4, table[0].size)
|
25
25
|
assert_equal(4, table[1].size)
|
26
26
|
assert_equal(4, table[2].size)
|
@@ -35,7 +35,7 @@ class TestTableParser < Test::Unit::TestCase
|
|
35
35
|
</table></body></html>",
|
36
36
|
"/html/body/table"
|
37
37
|
|
38
|
-
assert_equal 4, table.
|
38
|
+
assert_equal 4, table.columns.size
|
39
39
|
assert_equal 3, table[0].size
|
40
40
|
assert_equal 3, table[1].size
|
41
41
|
assert_equal 3, table[2].size
|
@@ -55,7 +55,7 @@ class TestTableParser < Test::Unit::TestCase
|
|
55
55
|
</table></body></html>",
|
56
56
|
"/html/body/table"
|
57
57
|
|
58
|
-
assert_equal 4, table.
|
58
|
+
assert_equal 4, table.columns.size
|
59
59
|
assert_equal 9, table[0].size
|
60
60
|
assert_equal 9, table[1].size
|
61
61
|
assert_equal 9, table[2].size
|
@@ -66,7 +66,7 @@ class TestTableParser < Test::Unit::TestCase
|
|
66
66
|
table = TableParser::Table.new open("test.html").read,
|
67
67
|
"/html/body/table"
|
68
68
|
|
69
|
-
assert_equal 11, table.
|
69
|
+
assert_equal 11, table.columns.size
|
70
70
|
assert_equal 9, table[0].size
|
71
71
|
assert_equal 9, table[1].size
|
72
72
|
assert_equal 9, table[2].size
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francis Chong
|
@@ -42,7 +42,7 @@ files:
|
|
42
42
|
- lib/table_parser.rb
|
43
43
|
- lib/table_parser/parser.rb
|
44
44
|
- lib/table_parser/table.rb
|
45
|
-
- lib/table_parser/
|
45
|
+
- lib/table_parser/table_column.rb
|
46
46
|
- lib/table_parser/table_node.rb
|
47
47
|
- test/test_table_parser.rb
|
48
48
|
has_rdoc: true
|