table_parser 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.txt CHANGED
@@ -12,20 +12,46 @@ Parsing table could be difficult when its structure contains colspan or rowspan.
12
12
 
13
13
  * sudo gem install table_parser
14
14
 
15
- == DEVELOPERS:
16
15
 
17
- * Francis Chong francis at ignition dot hk
16
+ == USAGE:
17
+
18
+ Use TableParser::Table to create parsed HTML table.
19
+
20
+ For example, following code:
21
+ <pre>
22
+ html = "&lt;html&gt;&lt;body&gt;&lt;table&gt;&lt;tr&gt;&lt;td&gt;A&lt;/td&gt;&lt;td&gt;B&lt;/td&gt;&lt;/tr&gt;\
23
+ &lt;tr&gt;&lt;td rowspan=\"2\"&gt;1&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;/tr&gt; \
24
+ &lt;tr&gt;&lt;td&gt;3&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/body&gt;&lt;/html&gt;"
25
+ table = TableParser::Table.new(html, "/html/body/table")
26
+ </pre>
27
+
28
+ Result in following parsed table:
18
29
 
19
- After checking out the source, run:
30
+ <pre>
31
+ Table&lt;TableColumn&lt;name=A, children=[1],[1]&gt;,TableColumn&lt;name=B, children=[2],[3]&gt;&gt;
32
+ </pre>
33
+ Note the first column contains duplicated item, because the first row contains "rowspan" element. If this is not desired, use following syntax to skip duplication:
34
+ <pre>
35
+ html = "&lt;html&gt;&lt;body&gt;&lt;table&gt;&lt;tr&gt;&lt;td&gt;A&lt;/td&gt;&lt;td&gt;B&lt;/td&gt;&lt;/tr&gt;\
36
+ &lt;tr&gt;&lt;td rowspan=\"2\"&gt;1&lt;/td&gt;&lt;td&gt;2&lt;/td&gt;&lt;/tr&gt; \
37
+ &lt;tr&gt;&lt;td&gt;3&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;/body&gt;&lt;/html&gt;"
38
+ table = TableParser::Table.new(html, "/html/body/table", false)
39
+ </pre>
20
40
 
21
- $ rake newb
41
+ Which result in following parsed table:
22
42
 
23
- This task will install any missing dependencies, run the tests/specs,
24
- and generate the RDoc.
43
+ <pre>
44
+ Table&lt;TableColumn&lt;name=A, children=[1],[1]&gt;,TableColumn&lt;name=B, children=[2],[3]&gt;&gt;
45
+ </pre>
46
+
47
+
48
+ == DEVELOPERS:
49
+
50
+ * Francis Chong francis at ignition dot hk
25
51
 
26
52
  == LICENSE:
27
53
 
28
- (The MIT License)
54
+ The MIT License
29
55
 
30
56
  Copyright (c) 2010 Ignition Soft
31
57
 
@@ -32,7 +32,7 @@ module TableParser
32
32
  headers
33
33
  end
34
34
 
35
- def self.extract_nodes(rows, headers)
35
+ def self.extract_nodes(rows, headers, duplicate_colspan)
36
36
  data = rows.collect do |row|
37
37
  row.collect do |ele|
38
38
  node = TableNode.new(ele)
@@ -44,18 +44,21 @@ module TableParser
44
44
  row = data[row_index]
45
45
  row.each_index do |col_index|
46
46
  col = row[col_index]
47
- headers[col_index].children << col
47
+ headers[col_index].children << col if col.class != EmptyTableNode
48
48
 
49
49
  if col.colspan > 1
50
50
  col.insert(col_index, TableNode.new(col.element, col.rowspan, col.colspan - 1))
51
51
  end
52
52
 
53
53
  if col.rowspan > 1 && data[row_index+1]
54
- data[row_index+1].insert(col_index, TableNode.new(col.element, col.rowspan - 1))
54
+ if duplicate_colspan
55
+ data[row_index+1].insert(col_index, TableNode.new(col.element, col.rowspan - 1))
56
+ else
57
+ data[row_index+1].insert(col_index, EmptyTableNode.new(col.rowspan - 1))
58
+ end
55
59
  end
56
60
  end
57
- end
58
-
61
+ end
59
62
  data
60
63
  end
61
64
 
@@ -1,14 +1,14 @@
1
1
  module TableParser
2
2
  class Table
3
3
  attr_reader :nodes, :columns
4
- def initialize(input, xpath_to_table="//table[0]")
4
+ def initialize(input, xpath_to_table="//table[0]", duplicate_colspan=true)
5
5
  table = Parser.extract_table(input, xpath_to_table)
6
6
  @columns = Parser.extract_column_headers(table)
7
- @nodes = Parser.extract_nodes(table, @columns)
7
+ @nodes = Parser.extract_nodes(table, @columns, duplicate_colspan)
8
8
  end
9
9
 
10
10
  def to_s
11
- "Table<#{@headers.collect{|h| h.to_s }.join("\n")}>"
11
+ "Table<#{@columns.collect{|h| h.to_s }.join(",")}>"
12
12
  end
13
13
 
14
14
  # get column by index
@@ -15,7 +15,7 @@ module TableParser
15
15
  end
16
16
 
17
17
  def to_s
18
- "[name=#{text}, children=#{@children.collect{|c| c.to_s }.join(",")}]"
18
+ "TableColumn<name=#{text}, children=#{@children.collect{|c| c.to_s }.join(",")}>"
19
19
  end
20
20
 
21
21
  end
@@ -3,8 +3,7 @@ module TableParser
3
3
  attr_reader :element, :text, :rowspan, :colspan
4
4
  def initialize(element, rowspan=nil, colspan=nil)
5
5
  @element = element
6
- @text = element.text
7
-
6
+ @text = element.text
8
7
  @colspan = colspan || element["colspan"].to_i rescue 1
9
8
  @rowspan = rowspan || element["rowspan"].to_i rescue 1
10
9
  end
@@ -13,4 +12,13 @@ module TableParser
13
12
  "[#{@text}]"
14
13
  end
15
14
  end
15
+
16
+ class EmptyTableNode < TableNode
17
+ def initialize(rowspan=nil, colspan=nil)
18
+ @element = nil
19
+ @text = ""
20
+ @colspan = colspan || element["colspan"].to_i rescue 1
21
+ @rowspan = rowspan || element["rowspan"].to_i rescue 1
22
+ end
23
+ end
16
24
  end
data/lib/table_parser.rb CHANGED
@@ -4,5 +4,5 @@ require 'table_parser/table'
4
4
  require 'table_parser/parser'
5
5
 
6
6
  module TableParser
7
- VERSION = '0.3.0'
7
+ VERSION = '0.4.0'
8
8
  end
@@ -8,11 +8,24 @@ class TestTableParser < Test::Unit::TestCase
8
8
  <tr><td>3</td></tr></table></body></html>",
9
9
  "/html/body/table"
10
10
 
11
+ puts table
11
12
  assert_equal(2, table.columns.size, 'header_count should = 2 ')
12
13
  assert_equal(2, table[0].size)
13
14
  assert_equal(2, table[1].size)
14
15
  end
15
16
 
17
+ def test_parse_rowspan_disable_dup
18
+ table = TableParser::Table.new "<html><body><table><tr><td>A</td><td>B</td></tr>\
19
+ <tr><td rowspan=\"2\">1</td><td>2</td></tr> \
20
+ <tr><td>3</td></tr></table></body></html>",
21
+ "/html/body/table", false
22
+
23
+ puts table
24
+ assert_equal(2, table.columns.size, 'header_count should = 2 ')
25
+ assert_equal(1, table[0].size)
26
+ assert_equal(2, table[1].size)
27
+ end
28
+
16
29
  def test_parse_colspan
17
30
  table = TableParser::Table.new "<html><body><table><tr><td>A</td><td colspan=\"2\">B</td></tr>\
18
31
  <tr><td rowspan=\"2\">A1</td><td>B1</td><td>C1</td></tr> \
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: table_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Chong