remote_table 1.1.10 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/remote_table.rb CHANGED
@@ -4,11 +4,19 @@ require 'active_support/version'
4
4
  active_support/core_ext/hash
5
5
  active_support/core_ext/string
6
6
  active_support/core_ext/module
7
- active_support/core_ext/array/wrap
7
+ active_support/core_ext/array
8
8
  }.each do |active_support_3_requirement|
9
9
  require active_support_3_requirement
10
10
  end if ::ActiveSupport::VERSION::MAJOR == 3
11
11
 
12
+ class Hash
13
+ attr_accessor :row_hash
14
+ end
15
+
16
+ class Array
17
+ attr_accessor :row_hash
18
+ end
19
+
12
20
  class RemoteTable
13
21
  autoload :Format, 'remote_table/format'
14
22
  autoload :Properties, 'remote_table/properties'
@@ -53,40 +61,52 @@ class RemoteTable
53
61
  @options.freeze
54
62
  end
55
63
 
64
+ # not thread safe
56
65
  def each(&blk)
57
- to_a.each { |row| yield row }
66
+ if fully_cached?
67
+ cache.each(&blk)
68
+ else
69
+ mark_download!
70
+ retval = format.each do |row|
71
+ row.row_hash = ::RemoteTable.hasher.hash row
72
+ transformer.transform(row).each do |virtual_row|
73
+ if properties.errata
74
+ next if properties.errata.rejects? virtual_row
75
+ properties.errata.correct! virtual_row
76
+ end
77
+ next if properties.select and !properties.select.call(virtual_row)
78
+ next if properties.reject and properties.reject.call(virtual_row)
79
+ cache.push virtual_row unless properties.streaming
80
+ yield virtual_row
81
+ end
82
+ end
83
+ fully_cached! unless properties.streaming
84
+ retval
85
+ end
58
86
  end
59
87
  alias :each_row :each
60
88
 
61
89
  def to_a
62
- return @to_a if @to_a.is_a? ::Array
63
- @to_a = []
64
- format.each do |row|
65
- row['row_hash'] = ::RemoteTable.hasher.hash row
66
- # allow the transformer to return multiple "virtual rows" for every real row
67
- ::Array.wrap(transformer.transform(row)).each do |virtual_row|
68
- if properties.errata
69
- next if properties.errata.rejects? virtual_row
70
- properties.errata.correct! virtual_row
71
- end
72
- next if properties.select and !properties.select.call(virtual_row)
73
- next if properties.reject and properties.reject.call(virtual_row)
74
- @to_a.push virtual_row
75
- end
90
+ if fully_cached?
91
+ cache.dup
92
+ else
93
+ map { |row| row }
76
94
  end
77
- @to_a
78
95
  end
79
96
  alias :rows :to_a
80
97
 
81
98
  # Get a row by row number
82
99
  def [](row_number)
83
- to_a[row_number]
100
+ if fully_cached?
101
+ cache[row_number]
102
+ else
103
+ to_a[row_number]
104
+ end
84
105
  end
85
106
 
86
107
  # clear the row cache to save memory
87
108
  def free
88
- @to_a.clear if @to_a.is_a?(::Array)
89
- @to_a = nil
109
+ cache.clear
90
110
  ::GC.start
91
111
  nil
92
112
  end
@@ -120,4 +140,28 @@ class RemoteTable
120
140
  def transformer
121
141
  @transformer ||= Transformer.new self
122
142
  end
143
+
144
+ attr_reader :download_count
145
+
146
+ private
147
+
148
+ def mark_download!
149
+ @download_count ||= 0
150
+ @download_count += 1
151
+ if properties.warn_on_multiple_downloads and download_count > 1
152
+ $stderr.puts "[remote_table] Warning: #{url} has been downloaded #{download_count} times."
153
+ end
154
+ end
155
+
156
+ def fully_cached!
157
+ @fully_cached = true
158
+ end
159
+
160
+ def fully_cached?
161
+ !!@fully_cached
162
+ end
163
+
164
+ def cache
165
+ @cache ||= []
166
+ end
123
167
  end
@@ -1,6 +1,7 @@
1
1
  if ::RUBY_VERSION >= '1.9'
2
2
  require 'ensure/encoding'
3
3
  else
4
+ $KCODE = 'U'
4
5
  require 'iconv'
5
6
  end
6
7
 
@@ -12,9 +13,11 @@ class RemoteTable
12
13
  autoload :OpenOffice, 'remote_table/format/open_office'
13
14
  autoload :FixedWidth, 'remote_table/format/fixed_width'
14
15
  autoload :HTML, 'remote_table/format/html'
16
+ autoload :XML, 'remote_table/format/xml'
15
17
 
16
18
  autoload :Textual, 'remote_table/format/mixins/textual'
17
- autoload :Rooable, 'remote_table/format/mixins/rooable'
19
+ autoload :ProcessedByRoo, 'remote_table/format/mixins/processed_by_roo'
20
+ autoload :ProcessedByNokogiri, 'remote_table/format/mixins/processed_by_nokogiri'
18
21
 
19
22
  attr_reader :t
20
23
 
@@ -26,7 +29,13 @@ class RemoteTable
26
29
  if ::RUBY_VERSION >= '1.9'
27
30
  str.ensure_encoding 'UTF-8', :external_encoding => t.properties.encoding, :invalid_characters => :transcode
28
31
  else
29
- ::Iconv.conv('UTF-8//TRANSLIT', t.properties.encoding[0], str + ' ')[0..-2]
32
+ return str if t.properties.encoding[0] =~ /utf.?8/i
33
+ begin
34
+ ::Iconv.conv('UTF-8//TRANSLIT', t.properties.encoding[0], str + ' ')[0..-2]
35
+ rescue ::Iconv::IllegalSequence
36
+ $stderr.puts "[remote_table] Unable to transliterate #{str} into UTF-8 given #{t.properties.encoding[0]}"
37
+ str
38
+ end
30
39
  end
31
40
  end
32
41
 
@@ -6,7 +6,7 @@ else
6
6
  require 'fastercsv'
7
7
  ::RemoteTable::CSV = ::FasterCSV
8
8
  rescue ::LoadError
9
- $stderr.puts "[remote_table gem] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
9
+ $stderr.puts "[remote_table] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
10
10
  raise $!
11
11
  end
12
12
  end
@@ -19,26 +19,18 @@ class RemoteTable
19
19
  remove_useless_characters!
20
20
  skip_rows!
21
21
  CSV.foreach(t.local_file.path, fastercsv_options) do |row|
22
- ordered_hash = ::ActiveSupport::OrderedHash.new
23
- filled_values = 0
24
- case row
25
- when CSV::Row
26
- row.each do |header, value|
27
- next if header.blank?
28
- value = '' if value.nil?
29
- ordered_hash[header] = utf8 value
30
- filled_values += 1 if value.present?
31
- end
32
- when ::Array
33
- index = 0
34
- row.each do |value|
35
- value = '' if value.nil?
36
- ordered_hash[index] = utf8 value
37
- filled_values += 1 if value.present?
38
- index += 1
22
+ if row.is_a?(CSV::Row)
23
+ output = row.inject(::ActiveSupport::OrderedHash.new) do |memo, (key, value)|
24
+ if key.present?
25
+ value = '' if value.nil?
26
+ memo[key] = utf8 value
27
+ end
28
+ memo
39
29
  end
30
+ yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
31
+ else
32
+ yield row if t.properties.keep_blank_rows or row.any? { |v| v.present? }
40
33
  end
41
- yield ordered_hash if t.properties.keep_blank_rows or filled_values > 0
42
34
  end
43
35
  ensure
44
36
  t.local_file.delete
@@ -62,7 +54,7 @@ class RemoteTable
62
54
  def fastercsv_options
63
55
  hsh = t.options.slice *FASTERCSV_OPTIONS
64
56
  hsh.merge! 'skip_blanks' => !t.properties.keep_blank_rows
65
- hsh.reverse_merge! 'headers' => :first_row
57
+ hsh.reverse_merge! 'headers' => t.properties.headers
66
58
  hsh.reverse_merge! 'col_sep' => t.properties.delimiter
67
59
  hsh.symbolize_keys
68
60
  end
@@ -1,7 +1,7 @@
1
1
  class RemoteTable
2
2
  class Format
3
3
  class Excel < Format
4
- include Rooable
4
+ include ProcessedByRoo
5
5
  def roo_class
6
6
  ::Excel
7
7
  end
@@ -1,7 +1,7 @@
1
1
  class RemoteTable
2
2
  class Format
3
3
  class Excelx < Format
4
- include Rooable
4
+ include ProcessedByRoo
5
5
  def roo_class
6
6
  ::Excelx
7
7
  end
@@ -8,20 +8,23 @@ class RemoteTable
8
8
  crop_rows!
9
9
  skip_rows!
10
10
  cut_columns!
11
- parser.parse[:rows].each do |hash|
12
- hash.reject! { |k, v| k.blank? }
13
- hash.each do |k, v|
14
- hash[k] = utf8 v
11
+ parser.parse[:rows].each do |row|
12
+ row.reject! { |k, v| k.blank? }
13
+ row.each do |k, v|
14
+ row[k] = utf8 v
15
15
  end
16
- yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
16
+ yield row if t.properties.keep_blank_rows or row.any? { |k, v| v.present? }
17
17
  end
18
18
  ensure
19
19
  t.local_file.delete
20
20
  end
21
+
21
22
  private
23
+
22
24
  def parser
23
25
  @parser ||= ::Slither::Parser.new definition, t.local_file.path
24
26
  end
27
+
25
28
  def definition
26
29
  @definition ||= if t.properties.schema_name.is_a?(::String) or t.properties.schema_name.is_a?(::Symbol)
27
30
  ::Slither.send :definition, t.properties.schema_name
@@ -4,37 +4,10 @@ class RemoteTable
4
4
  class Format
5
5
  class HTML < Format
6
6
  include Textual
7
- def each(&blk)
8
- remove_useless_characters!
9
- html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
10
- ::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
11
- values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
12
- if html_headers.nil?
13
- html_headers = values
14
- next
15
- end
16
- hash = zip html_headers, values
17
- yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
18
- end
19
- ensure
20
- t.local_file.delete
21
- end
22
-
23
- private
24
-
25
- # http://snippets.dzone.com/posts/show/406
26
- def zip(keys, values)
27
- hash = ::Hash.new
28
- keys.zip(values) { |k,v| hash[k]=v }
29
- hash
30
- end
31
-
32
- # should we be doing this in ruby?
33
- def unescaped_html_without_soft_hyphens
34
- str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
35
- # get rid of MS Office baddies
36
- str.gsub! '&shy;', ''
37
- str
7
+ include ProcessedByNokogiri
8
+
9
+ def nokogiri_class
10
+ ::Nokogiri::HTML::Document
38
11
  end
39
12
  end
40
13
  end
@@ -0,0 +1,67 @@
1
+ require 'nokogiri'
2
+ require 'cgi'
3
+ class RemoteTable
4
+ class Format
5
+ module ProcessedByNokogiri
6
+ def each
7
+ remove_useless_characters!
8
+ first_row = true
9
+ keys = t.properties.headers if t.properties.headers.is_a?(::Array)
10
+ xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, 'UTF-8')
11
+ (row_css? ? xml.css(t.properties.row_css) : xml.xpath(t.properties.row_xpath)).each do |row|
12
+ values = if column_css?
13
+ row.css(t.properties.column_css)
14
+ elsif column_xpath?
15
+ row.xpath(t.properties.column_xpath)
16
+ else
17
+ [row]
18
+ end.map { |cell| cell.content.gsub(/\s+/, ' ').strip }
19
+ if first_row and t.properties.use_first_row_as_header?
20
+ keys = values
21
+ first_row = false
22
+ next
23
+ end
24
+ output = if t.properties.output_class == ::Array
25
+ values
26
+ else
27
+ zip keys, values
28
+ end
29
+ if t.properties.keep_blank_rows or values.any?
30
+ yield output
31
+ end
32
+ end
33
+ ensure
34
+ t.local_file.delete
35
+ end
36
+
37
+ private
38
+
39
+ def row_css?
40
+ !!t.properties.row_css
41
+ end
42
+
43
+ def column_css?
44
+ !!t.properties.column_css
45
+ end
46
+
47
+ def column_xpath?
48
+ !!t.properties.column_xpath
49
+ end
50
+
51
+ # http://snippets.dzone.com/posts/show/406
52
+ def zip(keys, values)
53
+ hash = ::ActiveSupport::OrderedHash.new
54
+ keys.zip(values) { |k,v| hash[k]=v }
55
+ hash
56
+ end
57
+
58
+ # should we be doing this in ruby?
59
+ def unescaped_xml_without_soft_hyphens
60
+ str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
61
+ # get rid of MS Office baddies
62
+ str.gsub! '&shy;', ''
63
+ str
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,52 @@
1
+ require 'roo'
2
+ class RemoteTable
3
+ class Format
4
+ module ProcessedByRoo
5
+ def each(&blk)
6
+ spreadsheet = roo_class.new t.local_file.path, nil, :ignore
7
+ spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
8
+ if t.properties.output_class == ::Array
9
+ (first_data_row..spreadsheet.last_row).each do |y|
10
+ output = (1..spreadsheet.last_column).map do |x|
11
+ spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
12
+ end
13
+ yield output if t.properties.keep_blank_rows or output.any? { |v| v.present? }
14
+ end
15
+ else
16
+ keys = {}
17
+ if t.properties.use_first_row_as_header?
18
+ (1..spreadsheet.last_column).each do |x|
19
+ keys[x] = spreadsheet.cell(header_row, x)
20
+ keys[x] = spreadsheet.cell(header_row - 1, x) if keys[x].blank? # look up
21
+ end
22
+ else
23
+ (1..spreadsheet.last_column).each do |x|
24
+ keys[x] = t.properties.headers[x - 1]
25
+ end
26
+ end
27
+ (first_data_row..spreadsheet.last_row).each do |y|
28
+ output = (1..spreadsheet.last_column).inject(::ActiveSupport::OrderedHash.new) do |memo, x|
29
+ if keys[x].present?
30
+ memo[keys[x]] = spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
31
+ end
32
+ memo
33
+ end
34
+ yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
35
+ end
36
+ end
37
+ ensure
38
+ t.local_file.delete
39
+ end
40
+
41
+ private
42
+
43
+ def header_row
44
+ 1 + t.properties.skip
45
+ end
46
+
47
+ def first_data_row
48
+ 1 + header_row
49
+ end
50
+ end
51
+ end
52
+ end
@@ -1,7 +1,7 @@
1
1
  class RemoteTable
2
2
  class Format
3
3
  class OpenOffice < Format
4
- include Rooable
4
+ include ProcessedByRoo
5
5
  def roo_class
6
6
  ::Openoffice
7
7
  end
@@ -0,0 +1,14 @@
1
+ require 'nokogiri'
2
+ require 'cgi'
3
+ class RemoteTable
4
+ class Format
5
+ class XML < Format
6
+ include Textual
7
+ include ProcessedByNokogiri
8
+
9
+ def nokogiri_class
10
+ ::Nokogiri::XML::Document
11
+ end
12
+ end
13
+ end
14
+ end
@@ -12,10 +12,16 @@ class RemoteTable
12
12
  class Hasher
13
13
  include ::Singleton
14
14
  def hash(row)
15
- row = row.dup
16
- row.stringify_keys!
17
- str = row.keys.sort.map do |k|
18
- row[k].to_query k
15
+ str = if row.is_a?(::Array)
16
+ tmp_ary = []
17
+ row.each_with_index do |v, i|
18
+ tmp_ary.push v.to_query(i.to_s)
19
+ end
20
+ tmp_ary
21
+ else
22
+ row.stringify_keys.keys.sort.map do |k|
23
+ row[k].to_query k
24
+ end
19
25
  end.join('&')
20
26
  ::Digest::MD5.hexdigest str
21
27
  end
@@ -17,9 +17,32 @@ class RemoteTable
17
17
  @uri
18
18
  end
19
19
 
20
+ # Whether to stream the rows without caching them. Saves memory, but you have to re-download the file every time you...
21
+ # * call []
22
+ # * call each
23
+ # Defaults to false.
24
+ def streaming
25
+ t.options['streaming'] || false
26
+ end
27
+
28
+ # Defaults to true.
29
+ def warn_on_multiple_downloads
30
+ t.options['warn_on_multiple_downloads'] != false
31
+ end
32
+
20
33
  # The headers specified by the user
34
+ #
35
+ # Default: :first_row
21
36
  def headers
22
- t.options['headers']
37
+ t.options['headers'].nil? ? :first_row : t.options['headers']
38
+ end
39
+
40
+ def use_first_row_as_header?
41
+ headers == :first_row
42
+ end
43
+
44
+ def output_class
45
+ headers == false ? ::Array : ::ActiveSupport::OrderedHash
23
46
  end
24
47
 
25
48
  # The sheet specified by the user as a number or a string
@@ -52,7 +75,7 @@ class RemoteTable
52
75
  #
53
76
  # Default: "UTF-8"
54
77
  def encoding
55
- @encoding ||= ::Array.wrap(t.options['encoding'] || [ 'UTF-8', 'US-ASCII', 'ASCII-8BIT' ])
78
+ @encoding ||= ::Array.wrap(t.options['encoding'] || [ 'ISO-8859-1', 'US-ASCII', 'WINDOWS-1252', 'ASCII-8BIT', 'UTF-8' ])
56
79
  end
57
80
 
58
81
  # The delimiter
@@ -71,6 +94,16 @@ class RemoteTable
71
94
  def column_xpath
72
95
  t.options['column_xpath']
73
96
  end
97
+
98
+ # The CSS selector used to find rows
99
+ def row_css
100
+ t.options['row_css']
101
+ end
102
+
103
+ # The CSS selector used to find columns
104
+ def column_css
105
+ t.options['column_css']
106
+ end
74
107
 
75
108
  # The compression type.
76
109
  #
@@ -205,6 +238,8 @@ class RemoteTable
205
238
  Format::FixedWidth
206
239
  when /htm/
207
240
  Format::HTML
241
+ when /xml/
242
+ Format::XML
208
243
  else
209
244
  Format::Delimited
210
245
  end
@@ -9,7 +9,7 @@ class RemoteTable
9
9
  if legacy_transformer
10
10
  legacy_transformer.apply row
11
11
  else
12
- row
12
+ [row]
13
13
  end
14
14
  end
15
15
  def legacy_transformer
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "1.1.10"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -106,16 +106,10 @@ class TestOldSyntax < Test::Unit::TestCase
106
106
  assert_equal '2', t.rows.first['dup_header']
107
107
  end
108
108
 
109
- should "respect field order in CSVs without headers" do
109
+ should "return an Array when instructed not to use headers" do
110
110
  t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
111
111
  t.rows.each do |row|
112
- last_column_number = -1
113
- row.each do |column_number, v|
114
- next if column_number == 'row_hash'
115
- assert column_number.is_a?(Numeric)
116
- assert(column_number > last_column_number)
117
- last_column_number = column_number
118
- end
112
+ assert row.is_a?(::Array)
119
113
  end
120
114
  end
121
115
 
@@ -9,7 +9,7 @@ class TestRemoteTable < Test::Unit::TestCase
9
9
 
10
10
  should "add a row hash to every row" do
11
11
  t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
12
- assert_equal "06d8a738551c17735e2731e25c8d0461", t[5]['row_hash']
12
+ assert_equal "06d8a738551c17735e2731e25c8d0461", t[5].row_hash
13
13
  end
14
14
 
15
15
  should "open a google doc" do
@@ -56,19 +56,40 @@ class TestRemoteTable < Test::Unit::TestCase
56
56
  should 'allow itself to be cleared for save memory' do
57
57
  t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
58
58
  t.to_a
59
- assert_equal Array, t.instance_variable_get(:@to_a).class
59
+ assert t.send(:cache).length > 0
60
60
  t.free
61
- assert_equal NilClass, t.instance_variable_get(:@to_a).class
61
+ assert t.send(:cache).length == 0
62
62
  end
63
-
63
+
64
64
  # fixes ArgumentError: invalid byte sequence in UTF-8
65
- should %{safely strip soft hyphens and read non-utf8 html} do
66
- t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :encoding => 'windows-1252', :row_xpath => '//table/tr[2]/td/table/tr', :column_xpath => 'td'
65
+ should %{safely strip soft hyphens and read windows-1252 html} do
66
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table/tr[2]/td/table/tr', :column_xpath => 'td'
67
67
  assert t.rows.detect { |row| row['Model'] == 'A300B4600' }
68
68
  end
69
69
 
70
- should %{transliterate characters into UTF-8 as long as the user provides the right encoding} do
71
- t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
70
+ should %{transliterate characters from ISO-8859-1} do
71
+ t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv'
72
72
  assert t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }
73
73
  end
74
+
75
+ should %{read xml with css selectors} do
76
+ t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
77
+ assert /(AM|PM)/.match(t[0][0])
78
+ end
79
+
80
+ should %{optionally stream rows instead of caching them} do
81
+ t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
82
+ time1 = t[0][0]
83
+ assert /\d\d\d\d-\d\d-\d\d/.match(time1)
84
+ sleep 1
85
+ time2 = t[0][0]
86
+ assert(time1 != time2)
87
+ end
88
+
89
+ should %{not die when it reads Åland Islands} do
90
+ t = RemoteTable.new :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';'
91
+ assert_nothing_raised do
92
+ t[1][0]
93
+ end
94
+ end
74
95
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 1
9
- - 10
10
- version: 1.1.10
8
+ - 2
9
+ - 0
10
+ version: 1.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2011-05-03 00:00:00 Z
19
+ date: 2011-05-05 00:00:00 Z
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: activesupport
@@ -293,9 +293,11 @@ files:
293
293
  - lib/remote_table/format/excelx.rb
294
294
  - lib/remote_table/format/fixed_width.rb
295
295
  - lib/remote_table/format/html.rb
296
- - lib/remote_table/format/mixins/rooable.rb
296
+ - lib/remote_table/format/mixins/processed_by_nokogiri.rb
297
+ - lib/remote_table/format/mixins/processed_by_roo.rb
297
298
  - lib/remote_table/format/mixins/textual.rb
298
299
  - lib/remote_table/format/open_office.rb
300
+ - lib/remote_table/format/xml.rb
299
301
  - lib/remote_table/hasher.rb
300
302
  - lib/remote_table/local_file.rb
301
303
  - lib/remote_table/properties.rb
@@ -1,49 +0,0 @@
1
- require 'roo'
2
- class RemoteTable
3
- class Format
4
- module Rooable
5
- def each(&blk)
6
- spreadsheet = roo_class.new t.local_file.path, nil, :ignore
7
- spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
8
- column_references = ::Hash.new
9
- if t.properties.headers == false
10
- # zero-based numeric keys
11
- for col in (1..spreadsheet.last_column)
12
- column_references[col] = col - 1
13
- end
14
- elsif t.properties.headers.is_a? ::Array
15
- # names
16
- for col in (1..spreadsheet.last_column)
17
- column_references[col] = t.properties.headers[col - 1]
18
- end
19
- else
20
- # read t.properties.headers from the file itself
21
- for col in (1..spreadsheet.last_column)
22
- column_references[col] = spreadsheet.cell(header_row, col)
23
- column_references[col] = spreadsheet.cell(header_row - 1, col) if column_references[col].blank? # lspreadsheetk up
24
- end
25
- end
26
- first_data_row.upto(spreadsheet.last_row) do |raw_row|
27
- ordered_hash = ::ActiveSupport::OrderedHash.new
28
- for col in (1..spreadsheet.last_column)
29
- next if column_references[col].blank?
30
- ordered_hash[column_references[col]] = spreadsheet.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
31
- end
32
- yield ordered_hash if t.properties.keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
33
- end
34
- ensure
35
- t.local_file.delete
36
- end
37
-
38
- private
39
-
40
- def header_row
41
- 1 + t.properties.skip
42
- end
43
-
44
- def first_data_row
45
- 1 + header_row
46
- end
47
- end
48
- end
49
- end