table_parser 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,17 +16,27 @@ module TableParser
16
16
  rows
17
17
  end
18
18
 
19
- def self.extract_column_headers(rows, dup_rows, dup_cols)
19
+ def self.extract_column_headers(rows, dup_rows, dup_cols, has_header)
20
20
  headers = []
21
- rows.first.collect do |col|
22
- header = TableColumn.new(col)
23
- headers << header
24
21
 
25
- (header.colspan-1).times do
26
- headers << TableColumn.new(col)
22
+ if has_header
23
+ rows.first.collect do |col|
24
+ header = TableColumn.new(col)
25
+ headers << header
26
+ (header.colspan-1).times do
27
+ headers << TableColumn.new(col)
28
+ end
29
+ end
30
+ rows.delete_at(0)
31
+ else
32
+ rows.first.collect do |col|
33
+ header = TableColumn.new(nil)
34
+ headers << header
35
+ (header.colspan-1).times do
36
+ headers << TableColumn.new(nil)
37
+ end
27
38
  end
28
39
  end
29
- rows.delete_at(0)
30
40
  headers
31
41
  end
32
42
 
@@ -2,6 +2,11 @@ module TableParser
2
2
  class Table
3
3
  attr_reader :nodes, :columns
4
4
  def initialize(doc, xpath_to_table="//table[0]", options={})
5
+ if options.has_key?(:header)
6
+ header = options[:header]
7
+ else
8
+ header = true
9
+ end
5
10
 
6
11
  if options.has_key?(:dup_rows)
7
12
  dup_rows = options[:dup_rows]
@@ -16,7 +21,7 @@ module TableParser
16
21
  end
17
22
 
18
23
  table = Parser.extract_table(doc, xpath_to_table)
19
- @columns = Parser.extract_column_headers(table, dup_rows, dup_cols)
24
+ @columns = Parser.extract_column_headers(table, dup_rows, dup_cols, header)
20
25
  @nodes = Parser.extract_nodes(table, @columns, dup_rows, dup_cols)
21
26
  end
22
27
 
@@ -3,7 +3,7 @@ module TableParser
3
3
  attr_reader :element, :text, :rowspan, :colspan
4
4
  def initialize(element, rowspan=nil, colspan=nil)
5
5
  @element = element
6
- @text = element.text.strip
6
+ @text = element.text.strip rescue ""
7
7
  @colspan = colspan || element["colspan"].to_i rescue 1
8
8
  @rowspan = rowspan || element["rowspan"].to_i rescue 1
9
9
  end
data/lib/table_parser.rb CHANGED
@@ -4,5 +4,5 @@ require 'table_parser/table'
4
4
  require 'table_parser/parser'
5
5
 
6
6
  module TableParser
7
- VERSION = '0.5.2'
7
+ VERSION = '0.5.3'
8
8
  end
@@ -96,6 +96,18 @@ class TestTableParser < Test::Unit::TestCase
96
96
  table.xpath("./tr[1]").remove
97
97
 
98
98
  table = TableParser::Table.new doc, "//div[@id='timetable_box-week']/table", {:dup_cols => false, :dup_rows => false}
99
- puts table.columns.select(){|c| c.text =~ /[0-9]+月[0-9]+日/ }
99
+
100
+ end
101
+
102
+ def test_parse_noheader
103
+ html = "<html><body><table><tr><td>A</td><td>B</td></tr>\
104
+ <tr><td rowspan=\"2\">1</td><td>2</td></tr> \
105
+ <tr><td>3</td></tr></table></body></html>"
106
+ doc = Nokogiri::HTML(html)
107
+ table = TableParser::Table.new doc, "/html/body/table", {:header => false}
108
+
109
+ assert_equal(2, table.columns.size, 'header_count should = 2 ')
110
+ assert_equal(3, table[0].size)
111
+ assert_equal(3, table[1].size)
100
112
  end
101
113
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: table_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Chong
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-05 00:00:00 +08:00
12
+ date: 2010-01-06 00:00:00 +08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency