table_parser 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.txt CHANGED
@@ -12,20 +12,46 @@ Parsing table could be difficult when its structure contains colspan or rowspan.
12
12
 
13
13
  * sudo gem install table_parser
14
14
 
15
- == DEVELOPERS:
16
15
 
17
- * Francis Chong francis at ignition dot hk
16
+ == USAGE:
17
+
18
+ Use TableParser::Table to create parsed HTML table.
19
+
20
+ For example, following code:
21
+ <pre>
22
+ html = "&lt;html&gt;&lt;body&gt;&lt;table&gt;&lt;tr&gt;&lt;td&gt;A&lt;/td&gt;&lt;td&gt;B&lt;/td&gt;&lt;/tr&gt;\
23
+ &lt;tr&gt;&lt;td rowspan=\"2\"&gt;1&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;/tr&gt; \
24
+ &lt;tr&gt;&lt;td&gt;3&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/body&gt;&lt;/html&gt;"
25
+ table = TableParser::Table.new(html, "/html/body/table")
26
+ </pre>
27
+
28
+ Result in following parsed table:
18
29
 
19
- After checking out the source, run:
30
+ <pre>
31
+ Table&lt;TableColumn&lt;name=A, children=[1],[1]&gt;,TableColumn&lt;name=B, children=[2],[3]&gt;&gt;
32
+ </pre>
33
+ Note the first column contains duplicated item, because the first row contains "rowspan" element. If this is not desired, use following syntax to skip duplication:
34
+ <pre>
35
+ html = "&lt;html&gt;&lt;body&gt;&lt;table&gt;&lt;tr&gt;&lt;td&gt;A&lt;/td&gt;&lt;td&gt;B&lt;/td&gt;&lt;/tr&gt;\
36
+ &lt;tr&gt;&lt;td rowspan=\"2\"&gt;1&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;/tr&gt; \
37
+ &lt;tr&gt;&lt;td&gt;3&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/body&gt;&lt;/html&gt;"
38
+ table = TableParser::Table.new(html, "/html/body/table", false)
39
+ </pre>
20
40
 
21
- $ rake newb
41
+ Which result in following parsed table:
22
42
 
23
- This task will install any missing dependencies, run the tests/specs,
24
- and generate the RDoc.
43
+ <pre>
44
+ Table&lt;TableColumn&lt;name=A, children=[1],[1]&gt;,TableColumn&lt;name=B, children=[2],[3]&gt;&gt;
45
+ </pre>
46
+
47
+
48
+ == DEVELOPERS:
49
+
50
+ * Francis Chong francis at ignition dot hk
25
51
 
26
52
  == LICENSE:
27
53
 
28
- (The MIT License)
54
+ The MIT License
29
55
 
30
56
  Copyright (c) 2010 Ignition Soft
31
57
 
@@ -32,7 +32,7 @@ module TableParser
32
32
  headers
33
33
  end
34
34
 
35
- def self.extract_nodes(rows, headers)
35
+ def self.extract_nodes(rows, headers, duplicate_colspan)
36
36
  data = rows.collect do |row|
37
37
  row.collect do |ele|
38
38
  node = TableNode.new(ele)
@@ -44,18 +44,21 @@ module TableParser
44
44
  row = data[row_index]
45
45
  row.each_index do |col_index|
46
46
  col = row[col_index]
47
- headers[col_index].children << col
47
+ headers[col_index].children << col if col.class != EmptyTableNode
48
48
 
49
49
  if col.colspan > 1
50
50
  col.insert(col_index, TableNode.new(col.element, col.rowspan, col.colspan - 1))
51
51
  end
52
52
 
53
53
  if col.rowspan > 1 && data[row_index+1]
54
- data[row_index+1].insert(col_index, TableNode.new(col.element, col.rowspan - 1))
54
+ if duplicate_colspan
55
+ data[row_index+1].insert(col_index, TableNode.new(col.element, col.rowspan - 1))
56
+ else
57
+ data[row_index+1].insert(col_index, EmptyTableNode.new(col.rowspan - 1))
58
+ end
55
59
  end
56
60
  end
57
- end
58
-
61
+ end
59
62
  data
60
63
  end
61
64
 
@@ -1,14 +1,14 @@
1
1
  module TableParser
2
2
  class Table
3
3
  attr_reader :nodes, :columns
4
- def initialize(input, xpath_to_table="//table[0]")
4
+ def initialize(input, xpath_to_table="//table[0]", duplicate_colspan=true)
5
5
  table = Parser.extract_table(input, xpath_to_table)
6
6
  @columns = Parser.extract_column_headers(table)
7
- @nodes = Parser.extract_nodes(table, @columns)
7
+ @nodes = Parser.extract_nodes(table, @columns, duplicate_colspan)
8
8
  end
9
9
 
10
10
  def to_s
11
- "Table<#{@headers.collect{|h| h.to_s }.join("\n")}>"
11
+ "Table<#{@columns.collect{|h| h.to_s }.join(",")}>"
12
12
  end
13
13
 
14
14
  # get column by index
@@ -15,7 +15,7 @@ module TableParser
15
15
  end
16
16
 
17
17
  def to_s
18
- "[name=#{text}, children=#{@children.collect{|c| c.to_s }.join(",")}]"
18
+ "TableColumn<name=#{text}, children=#{@children.collect{|c| c.to_s }.join(",")}>"
19
19
  end
20
20
 
21
21
  end
@@ -3,8 +3,7 @@ module TableParser
3
3
  attr_reader :element, :text, :rowspan, :colspan
4
4
  def initialize(element, rowspan=nil, colspan=nil)
5
5
  @element = element
6
- @text = element.text
7
-
6
+ @text = element.text
8
7
  @colspan = colspan || element["colspan"].to_i rescue 1
9
8
  @rowspan = rowspan || element["rowspan"].to_i rescue 1
10
9
  end
@@ -13,4 +12,13 @@ module TableParser
13
12
  "[#{@text}]"
14
13
  end
15
14
  end
15
+
16
+ class EmptyTableNode < TableNode
17
+ def initialize(rowspan=nil, colspan=nil)
18
+ @element = nil
19
+ @text = ""
20
+ @colspan = colspan || element["colspan"].to_i rescue 1
21
+ @rowspan = rowspan || element["rowspan"].to_i rescue 1
22
+ end
23
+ end
16
24
  end
data/lib/table_parser.rb CHANGED
@@ -4,5 +4,5 @@ require 'table_parser/table'
4
4
  require 'table_parser/parser'
5
5
 
6
6
  module TableParser
7
- VERSION = '0.3.0'
7
+ VERSION = '0.4.0'
8
8
  end
@@ -8,11 +8,24 @@ class TestTableParser < Test::Unit::TestCase
8
8
  <tr><td>3</td></tr></table></body></html>",
9
9
  "/html/body/table"
10
10
 
11
+ puts table
11
12
  assert_equal(2, table.columns.size, 'header_count should = 2 ')
12
13
  assert_equal(2, table[0].size)
13
14
  assert_equal(2, table[1].size)
14
15
  end
15
16
 
17
+ def test_parse_rowspan_disable_dup
18
+ table = TableParser::Table.new "<html><body><table><tr><td>A</td><td>B</td></tr>\
19
+ <tr><td rowspan=\"2\">1</td><td>2</td></tr> \
20
+ <tr><td>3</td></tr></table></body></html>",
21
+ "/html/body/table", false
22
+
23
+ puts table
24
+ assert_equal(2, table.columns.size, 'header_count should = 2 ')
25
+ assert_equal(1, table[0].size)
26
+ assert_equal(2, table[1].size)
27
+ end
28
+
16
29
  def test_parse_colspan
17
30
  table = TableParser::Table.new "<html><body><table><tr><td>A</td><td colspan=\"2\">B</td></tr>\
18
31
  <tr><td rowspan=\"2\">A1</td><td>B1</td><td>C1</td></tr> \
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: table_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Chong