table_parser 0.5.2 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,17 +16,27 @@ module TableParser
16
16
  rows
17
17
  end
18
18
 
19
- def self.extract_column_headers(rows, dup_rows, dup_cols)
19
+ def self.extract_column_headers(rows, dup_rows, dup_cols, has_header)
20
20
  headers = []
21
- rows.first.collect do |col|
22
- header = TableColumn.new(col)
23
- headers << header
24
21
 
25
- (header.colspan-1).times do
26
- headers << TableColumn.new(col)
22
+ if has_header
23
+ rows.first.collect do |col|
24
+ header = TableColumn.new(col)
25
+ headers << header
26
+ (header.colspan-1).times do
27
+ headers << TableColumn.new(col)
28
+ end
29
+ end
30
+ rows.delete_at(0)
31
+ else
32
+ rows.first.collect do |col|
33
+ header = TableColumn.new(nil)
34
+ headers << header
35
+ (header.colspan-1).times do
36
+ headers << TableColumn.new(nil)
37
+ end
27
38
  end
28
39
  end
29
- rows.delete_at(0)
30
40
  headers
31
41
  end
32
42
 
@@ -2,6 +2,11 @@ module TableParser
2
2
  class Table
3
3
  attr_reader :nodes, :columns
4
4
  def initialize(doc, xpath_to_table="//table[0]", options={})
5
+ if options.has_key?(:header)
6
+ header = options[:header]
7
+ else
8
+ header = true
9
+ end
5
10
 
6
11
  if options.has_key?(:dup_rows)
7
12
  dup_rows = options[:dup_rows]
@@ -16,7 +21,7 @@ module TableParser
16
21
  end
17
22
 
18
23
  table = Parser.extract_table(doc, xpath_to_table)
19
- @columns = Parser.extract_column_headers(table, dup_rows, dup_cols)
24
+ @columns = Parser.extract_column_headers(table, dup_rows, dup_cols, header)
20
25
  @nodes = Parser.extract_nodes(table, @columns, dup_rows, dup_cols)
21
26
  end
22
27
 
@@ -3,7 +3,7 @@ module TableParser
3
3
  attr_reader :element, :text, :rowspan, :colspan
4
4
  def initialize(element, rowspan=nil, colspan=nil)
5
5
  @element = element
6
- @text = element.text.strip
6
+ @text = element.text.strip rescue ""
7
7
  @colspan = colspan || element["colspan"].to_i rescue 1
8
8
  @rowspan = rowspan || element["rowspan"].to_i rescue 1
9
9
  end
data/lib/table_parser.rb CHANGED
@@ -4,5 +4,5 @@ require 'table_parser/table'
4
4
  require 'table_parser/parser'
5
5
 
6
6
  module TableParser
7
- VERSION = '0.5.2'
7
+ VERSION = '0.5.3'
8
8
  end
@@ -96,6 +96,18 @@ class TestTableParser < Test::Unit::TestCase
96
96
  table.xpath("./tr[1]").remove
97
97
 
98
98
  table = TableParser::Table.new doc, "//div[@id='timetable_box-week']/table", {:dup_cols => false, :dup_rows => false}
99
- puts table.columns.select(){|c| c.text =~ /[0-9]+月[0-9]+日/ }
99
+
100
+ end
101
+
102
+ def test_parse_noheader
103
+ html = "<html><body><table><tr><td>A</td><td>B</td></tr>\
104
+ <tr><td rowspan=\"2\">1</td><td>2</td></tr> \
105
+ <tr><td>3</td></tr></table></body></html>"
106
+ doc = Nokogiri::HTML(html)
107
+ table = TableParser::Table.new doc, "/html/body/table", {:header => false}
108
+
109
+ assert_equal(2, table.columns.size, 'header_count should = 2 ')
110
+ assert_equal(3, table[0].size)
111
+ assert_equal(3, table[1].size)
100
112
  end
101
113
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: table_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Chong
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-05 00:00:00 +08:00
12
+ date: 2010-01-06 00:00:00 +08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency