remote_table 1.1.10 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/remote_table.rb CHANGED
@@ -4,11 +4,19 @@ require 'active_support/version'
4
4
  active_support/core_ext/hash
5
5
  active_support/core_ext/string
6
6
  active_support/core_ext/module
7
- active_support/core_ext/array/wrap
7
+ active_support/core_ext/array
8
8
  }.each do |active_support_3_requirement|
9
9
  require active_support_3_requirement
10
10
  end if ::ActiveSupport::VERSION::MAJOR == 3
11
11
 
12
+ class Hash
13
+ attr_accessor :row_hash
14
+ end
15
+
16
+ class Array
17
+ attr_accessor :row_hash
18
+ end
19
+
12
20
  class RemoteTable
13
21
  autoload :Format, 'remote_table/format'
14
22
  autoload :Properties, 'remote_table/properties'
@@ -53,40 +61,52 @@ class RemoteTable
53
61
  @options.freeze
54
62
  end
55
63
 
64
+ # not thread safe
56
65
  def each(&blk)
57
- to_a.each { |row| yield row }
66
+ if fully_cached?
67
+ cache.each(&blk)
68
+ else
69
+ mark_download!
70
+ retval = format.each do |row|
71
+ row.row_hash = ::RemoteTable.hasher.hash row
72
+ transformer.transform(row).each do |virtual_row|
73
+ if properties.errata
74
+ next if properties.errata.rejects? virtual_row
75
+ properties.errata.correct! virtual_row
76
+ end
77
+ next if properties.select and !properties.select.call(virtual_row)
78
+ next if properties.reject and properties.reject.call(virtual_row)
79
+ cache.push virtual_row unless properties.streaming
80
+ yield virtual_row
81
+ end
82
+ end
83
+ fully_cached! unless properties.streaming
84
+ retval
85
+ end
58
86
  end
59
87
  alias :each_row :each
60
88
 
61
89
  def to_a
62
- return @to_a if @to_a.is_a? ::Array
63
- @to_a = []
64
- format.each do |row|
65
- row['row_hash'] = ::RemoteTable.hasher.hash row
66
- # allow the transformer to return multiple "virtual rows" for every real row
67
- ::Array.wrap(transformer.transform(row)).each do |virtual_row|
68
- if properties.errata
69
- next if properties.errata.rejects? virtual_row
70
- properties.errata.correct! virtual_row
71
- end
72
- next if properties.select and !properties.select.call(virtual_row)
73
- next if properties.reject and properties.reject.call(virtual_row)
74
- @to_a.push virtual_row
75
- end
90
+ if fully_cached?
91
+ cache.dup
92
+ else
93
+ map { |row| row }
76
94
  end
77
- @to_a
78
95
  end
79
96
  alias :rows :to_a
80
97
 
81
98
  # Get a row by row number
82
99
  def [](row_number)
83
- to_a[row_number]
100
+ if fully_cached?
101
+ cache[row_number]
102
+ else
103
+ to_a[row_number]
104
+ end
84
105
  end
85
106
 
86
107
  # clear the row cache to save memory
87
108
  def free
88
- @to_a.clear if @to_a.is_a?(::Array)
89
- @to_a = nil
109
+ cache.clear
90
110
  ::GC.start
91
111
  nil
92
112
  end
@@ -120,4 +140,28 @@ class RemoteTable
120
140
  def transformer
121
141
  @transformer ||= Transformer.new self
122
142
  end
143
+
144
+ attr_reader :download_count
145
+
146
+ private
147
+
148
+ def mark_download!
149
+ @download_count ||= 0
150
+ @download_count += 1
151
+ if properties.warn_on_multiple_downloads and download_count > 1
152
+ $stderr.puts "[remote_table] Warning: #{url} has been downloaded #{download_count} times."
153
+ end
154
+ end
155
+
156
+ def fully_cached!
157
+ @fully_cached = true
158
+ end
159
+
160
+ def fully_cached?
161
+ !!@fully_cached
162
+ end
163
+
164
+ def cache
165
+ @cache ||= []
166
+ end
123
167
  end
@@ -1,6 +1,7 @@
1
1
  if ::RUBY_VERSION >= '1.9'
2
2
  require 'ensure/encoding'
3
3
  else
4
+ $KCODE = 'U'
4
5
  require 'iconv'
5
6
  end
6
7
 
@@ -12,9 +13,11 @@ class RemoteTable
12
13
  autoload :OpenOffice, 'remote_table/format/open_office'
13
14
  autoload :FixedWidth, 'remote_table/format/fixed_width'
14
15
  autoload :HTML, 'remote_table/format/html'
16
+ autoload :XML, 'remote_table/format/xml'
15
17
 
16
18
  autoload :Textual, 'remote_table/format/mixins/textual'
17
- autoload :Rooable, 'remote_table/format/mixins/rooable'
19
+ autoload :ProcessedByRoo, 'remote_table/format/mixins/processed_by_roo'
20
+ autoload :ProcessedByNokogiri, 'remote_table/format/mixins/processed_by_nokogiri'
18
21
 
19
22
  attr_reader :t
20
23
 
@@ -26,7 +29,13 @@ class RemoteTable
26
29
  if ::RUBY_VERSION >= '1.9'
27
30
  str.ensure_encoding 'UTF-8', :external_encoding => t.properties.encoding, :invalid_characters => :transcode
28
31
  else
29
- ::Iconv.conv('UTF-8//TRANSLIT', t.properties.encoding[0], str + ' ')[0..-2]
32
+ return str if t.properties.encoding[0] =~ /utf.?8/i
33
+ begin
34
+ ::Iconv.conv('UTF-8//TRANSLIT', t.properties.encoding[0], str + ' ')[0..-2]
35
+ rescue ::Iconv::IllegalSequence
36
+ $stderr.puts "[remote_table] Unable to transliterate #{str} into UTF-8 given #{t.properties.encoding[0]}"
37
+ str
38
+ end
30
39
  end
31
40
  end
32
41
 
@@ -6,7 +6,7 @@ else
6
6
  require 'fastercsv'
7
7
  ::RemoteTable::CSV = ::FasterCSV
8
8
  rescue ::LoadError
9
- $stderr.puts "[remote_table gem] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
9
+ $stderr.puts "[remote_table] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
10
10
  raise $!
11
11
  end
12
12
  end
@@ -19,26 +19,18 @@ class RemoteTable
19
19
  remove_useless_characters!
20
20
  skip_rows!
21
21
  CSV.foreach(t.local_file.path, fastercsv_options) do |row|
22
- ordered_hash = ::ActiveSupport::OrderedHash.new
23
- filled_values = 0
24
- case row
25
- when CSV::Row
26
- row.each do |header, value|
27
- next if header.blank?
28
- value = '' if value.nil?
29
- ordered_hash[header] = utf8 value
30
- filled_values += 1 if value.present?
31
- end
32
- when ::Array
33
- index = 0
34
- row.each do |value|
35
- value = '' if value.nil?
36
- ordered_hash[index] = utf8 value
37
- filled_values += 1 if value.present?
38
- index += 1
22
+ if row.is_a?(CSV::Row)
23
+ output = row.inject(::ActiveSupport::OrderedHash.new) do |memo, (key, value)|
24
+ if key.present?
25
+ value = '' if value.nil?
26
+ memo[key] = utf8 value
27
+ end
28
+ memo
39
29
  end
30
+ yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
31
+ else
32
+ yield row if t.properties.keep_blank_rows or row.any? { |v| v.present? }
40
33
  end
41
- yield ordered_hash if t.properties.keep_blank_rows or filled_values > 0
42
34
  end
43
35
  ensure
44
36
  t.local_file.delete
@@ -62,7 +54,7 @@ class RemoteTable
62
54
  def fastercsv_options
63
55
  hsh = t.options.slice *FASTERCSV_OPTIONS
64
56
  hsh.merge! 'skip_blanks' => !t.properties.keep_blank_rows
65
- hsh.reverse_merge! 'headers' => :first_row
57
+ hsh.reverse_merge! 'headers' => t.properties.headers
66
58
  hsh.reverse_merge! 'col_sep' => t.properties.delimiter
67
59
  hsh.symbolize_keys
68
60
  end
@@ -1,7 +1,7 @@
1
1
  class RemoteTable
2
2
  class Format
3
3
  class Excel < Format
4
- include Rooable
4
+ include ProcessedByRoo
5
5
  def roo_class
6
6
  ::Excel
7
7
  end
@@ -1,7 +1,7 @@
1
1
  class RemoteTable
2
2
  class Format
3
3
  class Excelx < Format
4
- include Rooable
4
+ include ProcessedByRoo
5
5
  def roo_class
6
6
  ::Excelx
7
7
  end
@@ -8,20 +8,23 @@ class RemoteTable
8
8
  crop_rows!
9
9
  skip_rows!
10
10
  cut_columns!
11
- parser.parse[:rows].each do |hash|
12
- hash.reject! { |k, v| k.blank? }
13
- hash.each do |k, v|
14
- hash[k] = utf8 v
11
+ parser.parse[:rows].each do |row|
12
+ row.reject! { |k, v| k.blank? }
13
+ row.each do |k, v|
14
+ row[k] = utf8 v
15
15
  end
16
- yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
16
+ yield row if t.properties.keep_blank_rows or row.any? { |k, v| v.present? }
17
17
  end
18
18
  ensure
19
19
  t.local_file.delete
20
20
  end
21
+
21
22
  private
23
+
22
24
  def parser
23
25
  @parser ||= ::Slither::Parser.new definition, t.local_file.path
24
26
  end
27
+
25
28
  def definition
26
29
  @definition ||= if t.properties.schema_name.is_a?(::String) or t.properties.schema_name.is_a?(::Symbol)
27
30
  ::Slither.send :definition, t.properties.schema_name
@@ -4,37 +4,10 @@ class RemoteTable
4
4
  class Format
5
5
  class HTML < Format
6
6
  include Textual
7
- def each(&blk)
8
- remove_useless_characters!
9
- html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
10
- ::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
11
- values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
12
- if html_headers.nil?
13
- html_headers = values
14
- next
15
- end
16
- hash = zip html_headers, values
17
- yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
18
- end
19
- ensure
20
- t.local_file.delete
21
- end
22
-
23
- private
24
-
25
- # http://snippets.dzone.com/posts/show/406
26
- def zip(keys, values)
27
- hash = ::Hash.new
28
- keys.zip(values) { |k,v| hash[k]=v }
29
- hash
30
- end
31
-
32
- # should we be doing this in ruby?
33
- def unescaped_html_without_soft_hyphens
34
- str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
35
- # get rid of MS Office baddies
36
- str.gsub! '&shy;', ''
37
- str
7
+ include ProcessedByNokogiri
8
+
9
+ def nokogiri_class
10
+ ::Nokogiri::HTML::Document
38
11
  end
39
12
  end
40
13
  end
@@ -0,0 +1,67 @@
1
+ require 'nokogiri'
2
+ require 'cgi'
3
+ class RemoteTable
4
+ class Format
5
+ module ProcessedByNokogiri
6
+ def each
7
+ remove_useless_characters!
8
+ first_row = true
9
+ keys = t.properties.headers if t.properties.headers.is_a?(::Array)
10
+ xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, 'UTF-8')
11
+ (row_css? ? xml.css(t.properties.row_css) : xml.xpath(t.properties.row_xpath)).each do |row|
12
+ values = if column_css?
13
+ row.css(t.properties.column_css)
14
+ elsif column_xpath?
15
+ row.xpath(t.properties.column_xpath)
16
+ else
17
+ [row]
18
+ end.map { |cell| cell.content.gsub(/\s+/, ' ').strip }
19
+ if first_row and t.properties.use_first_row_as_header?
20
+ keys = values
21
+ first_row = false
22
+ next
23
+ end
24
+ output = if t.properties.output_class == ::Array
25
+ values
26
+ else
27
+ zip keys, values
28
+ end
29
+ if t.properties.keep_blank_rows or values.any?
30
+ yield output
31
+ end
32
+ end
33
+ ensure
34
+ t.local_file.delete
35
+ end
36
+
37
+ private
38
+
39
+ def row_css?
40
+ !!t.properties.row_css
41
+ end
42
+
43
+ def column_css?
44
+ !!t.properties.column_css
45
+ end
46
+
47
+ def column_xpath?
48
+ !!t.properties.column_xpath
49
+ end
50
+
51
+ # http://snippets.dzone.com/posts/show/406
52
+ def zip(keys, values)
53
+ hash = ::ActiveSupport::OrderedHash.new
54
+ keys.zip(values) { |k,v| hash[k]=v }
55
+ hash
56
+ end
57
+
58
+ # should we be doing this in ruby?
59
+ def unescaped_xml_without_soft_hyphens
60
+ str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
61
+ # get rid of MS Office baddies
62
+ str.gsub! '&shy;', ''
63
+ str
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,52 @@
1
+ require 'roo'
2
+ class RemoteTable
3
+ class Format
4
+ module ProcessedByRoo
5
+ def each(&blk)
6
+ spreadsheet = roo_class.new t.local_file.path, nil, :ignore
7
+ spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
8
+ if t.properties.output_class == ::Array
9
+ (first_data_row..spreadsheet.last_row).each do |y|
10
+ output = (1..spreadsheet.last_column).map do |x|
11
+ spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
12
+ end
13
+ yield output if t.properties.keep_blank_rows or output.any? { |v| v.present? }
14
+ end
15
+ else
16
+ keys = {}
17
+ if t.properties.use_first_row_as_header?
18
+ (1..spreadsheet.last_column).each do |x|
19
+ keys[x] = spreadsheet.cell(header_row, x)
20
+ keys[x] = spreadsheet.cell(header_row - 1, x) if keys[x].blank? # look up
21
+ end
22
+ else
23
+ (1..spreadsheet.last_column).each do |x|
24
+ keys[x] = t.properties.headers[x - 1]
25
+ end
26
+ end
27
+ (first_data_row..spreadsheet.last_row).each do |y|
28
+ output = (1..spreadsheet.last_column).inject(::ActiveSupport::OrderedHash.new) do |memo, x|
29
+ if keys[x].present?
30
+ memo[keys[x]] = spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
31
+ end
32
+ memo
33
+ end
34
+ yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
35
+ end
36
+ end
37
+ ensure
38
+ t.local_file.delete
39
+ end
40
+
41
+ private
42
+
43
+ def header_row
44
+ 1 + t.properties.skip
45
+ end
46
+
47
+ def first_data_row
48
+ 1 + header_row
49
+ end
50
+ end
51
+ end
52
+ end
@@ -1,7 +1,7 @@
1
1
  class RemoteTable
2
2
  class Format
3
3
  class OpenOffice < Format
4
- include Rooable
4
+ include ProcessedByRoo
5
5
  def roo_class
6
6
  ::Openoffice
7
7
  end
@@ -0,0 +1,14 @@
1
+ require 'nokogiri'
2
+ require 'cgi'
3
+ class RemoteTable
4
+ class Format
5
+ class XML < Format
6
+ include Textual
7
+ include ProcessedByNokogiri
8
+
9
+ def nokogiri_class
10
+ ::Nokogiri::XML::Document
11
+ end
12
+ end
13
+ end
14
+ end
@@ -12,10 +12,16 @@ class RemoteTable
12
12
  class Hasher
13
13
  include ::Singleton
14
14
  def hash(row)
15
- row = row.dup
16
- row.stringify_keys!
17
- str = row.keys.sort.map do |k|
18
- row[k].to_query k
15
+ str = if row.is_a?(::Array)
16
+ tmp_ary = []
17
+ row.each_with_index do |v, i|
18
+ tmp_ary.push v.to_query(i.to_s)
19
+ end
20
+ tmp_ary
21
+ else
22
+ row.stringify_keys.keys.sort.map do |k|
23
+ row[k].to_query k
24
+ end
19
25
  end.join('&')
20
26
  ::Digest::MD5.hexdigest str
21
27
  end
@@ -17,9 +17,32 @@ class RemoteTable
17
17
  @uri
18
18
  end
19
19
 
20
+ # Whether to stream the rows without caching them. Saves memory, but you have to re-download the file every time you...
21
+ # * call []
22
+ # * call each
23
+ # Defaults to false.
24
+ def streaming
25
+ t.options['streaming'] || false
26
+ end
27
+
28
+ # Defaults to true.
29
+ def warn_on_multiple_downloads
30
+ t.options['warn_on_multiple_downloads'] != false
31
+ end
32
+
20
33
  # The headers specified by the user
34
+ #
35
+ # Default: :first_row
21
36
  def headers
22
- t.options['headers']
37
+ t.options['headers'].nil? ? :first_row : t.options['headers']
38
+ end
39
+
40
+ def use_first_row_as_header?
41
+ headers == :first_row
42
+ end
43
+
44
+ def output_class
45
+ headers == false ? ::Array : ::ActiveSupport::OrderedHash
23
46
  end
24
47
 
25
48
  # The sheet specified by the user as a number or a string
@@ -52,7 +75,7 @@ class RemoteTable
52
75
  #
53
76
  # Default: "UTF-8"
54
77
  def encoding
55
- @encoding ||= ::Array.wrap(t.options['encoding'] || [ 'UTF-8', 'US-ASCII', 'ASCII-8BIT' ])
78
+ @encoding ||= ::Array.wrap(t.options['encoding'] || [ 'ISO-8859-1', 'US-ASCII', 'WINDOWS-1252', 'ASCII-8BIT', 'UTF-8' ])
56
79
  end
57
80
 
58
81
  # The delimiter
@@ -71,6 +94,16 @@ class RemoteTable
71
94
  def column_xpath
72
95
  t.options['column_xpath']
73
96
  end
97
+
98
+ # The CSS selector used to find rows
99
+ def row_css
100
+ t.options['row_css']
101
+ end
102
+
103
+ # The CSS selector used to find columns
104
+ def column_css
105
+ t.options['column_css']
106
+ end
74
107
 
75
108
  # The compression type.
76
109
  #
@@ -205,6 +238,8 @@ class RemoteTable
205
238
  Format::FixedWidth
206
239
  when /htm/
207
240
  Format::HTML
241
+ when /xml/
242
+ Format::XML
208
243
  else
209
244
  Format::Delimited
210
245
  end
@@ -9,7 +9,7 @@ class RemoteTable
9
9
  if legacy_transformer
10
10
  legacy_transformer.apply row
11
11
  else
12
- row
12
+ [row]
13
13
  end
14
14
  end
15
15
  def legacy_transformer
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "1.1.10"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -106,16 +106,10 @@ class TestOldSyntax < Test::Unit::TestCase
106
106
  assert_equal '2', t.rows.first['dup_header']
107
107
  end
108
108
 
109
- should "respect field order in CSVs without headers" do
109
+ should "return an Array when instructed not to use headers" do
110
110
  t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
111
111
  t.rows.each do |row|
112
- last_column_number = -1
113
- row.each do |column_number, v|
114
- next if column_number == 'row_hash'
115
- assert column_number.is_a?(Numeric)
116
- assert(column_number > last_column_number)
117
- last_column_number = column_number
118
- end
112
+ assert row.is_a?(::Array)
119
113
  end
120
114
  end
121
115
 
@@ -9,7 +9,7 @@ class TestRemoteTable < Test::Unit::TestCase
9
9
 
10
10
  should "add a row hash to every row" do
11
11
  t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
12
- assert_equal "06d8a738551c17735e2731e25c8d0461", t[5]['row_hash']
12
+ assert_equal "06d8a738551c17735e2731e25c8d0461", t[5].row_hash
13
13
  end
14
14
 
15
15
  should "open a google doc" do
@@ -56,19 +56,40 @@ class TestRemoteTable < Test::Unit::TestCase
56
56
  should 'allow itself to be cleared for save memory' do
57
57
  t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
58
58
  t.to_a
59
- assert_equal Array, t.instance_variable_get(:@to_a).class
59
+ assert t.send(:cache).length > 0
60
60
  t.free
61
- assert_equal NilClass, t.instance_variable_get(:@to_a).class
61
+ assert t.send(:cache).length == 0
62
62
  end
63
-
63
+
64
64
  # fixes ArgumentError: invalid byte sequence in UTF-8
65
- should %{safely strip soft hyphens and read non-utf8 html} do
66
- t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :encoding => 'windows-1252', :row_xpath => '//table/tr[2]/td/table/tr', :column_xpath => 'td'
65
+ should %{safely strip soft hyphens and read windows-1252 html} do
66
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table/tr[2]/td/table/tr', :column_xpath => 'td'
67
67
  assert t.rows.detect { |row| row['Model'] == 'A300B4600' }
68
68
  end
69
69
 
70
- should %{transliterate characters into UTF-8 as long as the user provides the right encoding} do
71
- t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
70
+ should %{transliterate characters from ISO-8859-1} do
71
+ t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv'
72
72
  assert t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }
73
73
  end
74
+
75
+ should %{read xml with css selectors} do
76
+ t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
77
+ assert /(AM|PM)/.match(t[0][0])
78
+ end
79
+
80
+ should %{optionally stream rows instead of caching them} do
81
+ t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
82
+ time1 = t[0][0]
83
+ assert /\d\d\d\d-\d\d-\d\d/.match(time1)
84
+ sleep 1
85
+ time2 = t[0][0]
86
+ assert(time1 != time2)
87
+ end
88
+
89
+ should %{not die when it reads Åland Islands} do
90
+ t = RemoteTable.new :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';'
91
+ assert_nothing_raised do
92
+ t[1][0]
93
+ end
94
+ end
74
95
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 1
9
- - 10
10
- version: 1.1.10
8
+ - 2
9
+ - 0
10
+ version: 1.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2011-05-03 00:00:00 Z
19
+ date: 2011-05-05 00:00:00 Z
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: activesupport
@@ -293,9 +293,11 @@ files:
293
293
  - lib/remote_table/format/excelx.rb
294
294
  - lib/remote_table/format/fixed_width.rb
295
295
  - lib/remote_table/format/html.rb
296
- - lib/remote_table/format/mixins/rooable.rb
296
+ - lib/remote_table/format/mixins/processed_by_nokogiri.rb
297
+ - lib/remote_table/format/mixins/processed_by_roo.rb
297
298
  - lib/remote_table/format/mixins/textual.rb
298
299
  - lib/remote_table/format/open_office.rb
300
+ - lib/remote_table/format/xml.rb
299
301
  - lib/remote_table/hasher.rb
300
302
  - lib/remote_table/local_file.rb
301
303
  - lib/remote_table/properties.rb
@@ -1,49 +0,0 @@
1
- require 'roo'
2
- class RemoteTable
3
- class Format
4
- module Rooable
5
- def each(&blk)
6
- spreadsheet = roo_class.new t.local_file.path, nil, :ignore
7
- spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
8
- column_references = ::Hash.new
9
- if t.properties.headers == false
10
- # zero-based numeric keys
11
- for col in (1..spreadsheet.last_column)
12
- column_references[col] = col - 1
13
- end
14
- elsif t.properties.headers.is_a? ::Array
15
- # names
16
- for col in (1..spreadsheet.last_column)
17
- column_references[col] = t.properties.headers[col - 1]
18
- end
19
- else
20
- # read t.properties.headers from the file itself
21
- for col in (1..spreadsheet.last_column)
22
- column_references[col] = spreadsheet.cell(header_row, col)
23
- column_references[col] = spreadsheet.cell(header_row - 1, col) if column_references[col].blank? # lspreadsheetk up
24
- end
25
- end
26
- first_data_row.upto(spreadsheet.last_row) do |raw_row|
27
- ordered_hash = ::ActiveSupport::OrderedHash.new
28
- for col in (1..spreadsheet.last_column)
29
- next if column_references[col].blank?
30
- ordered_hash[column_references[col]] = spreadsheet.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
31
- end
32
- yield ordered_hash if t.properties.keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
33
- end
34
- ensure
35
- t.local_file.delete
36
- end
37
-
38
- private
39
-
40
- def header_row
41
- 1 + t.properties.skip
42
- end
43
-
44
- def first_data_row
45
- 1 + header_row
46
- end
47
- end
48
- end
49
- end