remote_table 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -24,4 +24,4 @@ See the test file and also data_miner examples of custom parsers.
24
24
 
25
25
  == Copyright
26
26
 
27
- Copyright (c) 2009 Brighter Planet. See LICENSE for details.
27
+ Copyright (c) 2010 Brighter Planet. See LICENSE for details.
data/Rakefile CHANGED
@@ -10,8 +10,10 @@ begin
10
10
  gem.email = "seamus@abshere.net"
11
11
  gem.homepage = "http://github.com/seamusabshere/remote_table"
12
12
  gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- %w{ activesupport fastercsv ryanwood-slither }.each { |name| gem.add_dependency name }
14
- gem.add_dependency 'roo', '1.3.11'
13
+ gem.add_dependency 'roo', '~>1.3.11'
14
+ gem.add_dependency 'fastercsv', '~>1.5.0'
15
+ gem.add_dependency 'activesupport', '~>2.3.4'
16
+ gem.add_dependency 'ryanwood-slither', '~>0.99.3'
15
17
  gem.require_path = "lib"
16
18
  gem.files.include %w(lib/remote_table) unless gem.files.empty? # seems to fail once it's in the wild
17
19
  gem.rdoc_options << '--line-numbers' << '--inline-source'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 0.2.0
data/lib/remote_table.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'rubygems'
2
- require 'activesupport'
2
+ require 'active_support'
3
3
  require 'tempfile'
4
4
  require 'fastercsv'
5
5
  require 'slither'
@@ -2,6 +2,7 @@ class RemoteTable
2
2
  class File
3
3
  attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
4
4
  attr_accessor :path
5
+ attr_accessor :keep_blank_rows
5
6
 
6
7
  def initialize(bus)
7
8
  @filename = bus[:filename]
@@ -9,6 +10,7 @@ class RemoteTable
9
10
  @delimiter = bus[:delimiter]
10
11
  @sheet = bus[:sheet] || 0
11
12
  @skip = bus[:skip] # rows
13
+ @keep_blank_rows = bus[:keep_blank_rows] || false
12
14
  @crop = bus[:crop] # rows
13
15
  @cut = bus[:cut] # columns
14
16
  @headers = bus[:headers]
@@ -26,7 +28,7 @@ class RemoteTable
26
28
 
27
29
  private
28
30
 
29
- # doesn't support trap or spacer
31
+ # doesn't support trap
30
32
  def define_fixed_width_schema!
31
33
  raise "can't define both schema_name and schema" if !schema_name.blank?
32
34
  self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
@@ -35,7 +37,7 @@ class RemoteTable
35
37
  d.rows do |row|
36
38
  row.trap(&trap)
37
39
  schema.each do |name, width, options|
38
- if name == :spacer
40
+ if name == 'spacer'
39
41
  row.spacer width
40
42
  else
41
43
  row.column name, width, options
@@ -3,15 +3,28 @@ class RemoteTable
3
3
  def each_row(&block)
4
4
  skip_rows!
5
5
  FasterCSV.foreach(path, fastercsv_options) do |row|
6
- if row.respond_to?(:fields) # it's a traditional fastercsv row hash
7
- next if row.fields.compact.blank?
8
- hash = HashWithIndifferentAccess.new(row.to_hash)
9
- else # it's an array, which i think happens if you're using :headers => nil or :col_sep
10
- next if row.compact.blank?
6
+ ordered_hash = ActiveSupport::OrderedHash.new
7
+ filled_values = 0
8
+ case row
9
+ when FasterCSV::Row
10
+ row.each do |header, value|
11
+ next if header.blank?
12
+ value = '' if value.nil?
13
+ ordered_hash[header] = value
14
+ filled_values += 1 if value.present?
15
+ end
16
+ when Array
11
17
  index = 0
12
- hash = row.inject(ActiveSupport::OrderedHash.new) { |memo, element| memo[index] = element; index += 1; memo }
18
+ row.each do |value|
19
+ value = '' if value.nil?
20
+ ordered_hash[index] = value
21
+ filled_values += 1 if value.present?
22
+ index += 1
23
+ end
24
+ else
25
+ raise "Unexpected #{row.inspect}"
13
26
  end
14
- yield hash
27
+ yield ordered_hash if keep_blank_rows or filled_values.nonzero?
15
28
  end
16
29
  ensure
17
30
  restore_rows!
@@ -20,7 +33,7 @@ class RemoteTable
20
33
  private
21
34
 
22
35
  def fastercsv_options
23
- fastercsv_options = { :skip_blanks => true, :header_converters => lambda { |k| k.toutf8 } }
36
+ fastercsv_options = { :skip_blanks => !keep_blank_rows, :header_converters => lambda { |k| k.to_s.toutf8 } }
24
37
  if headers == false
25
38
  fastercsv_options.merge!(:headers => nil)
26
39
  else
@@ -5,7 +5,10 @@ class RemoteTable
5
5
  skip_rows!
6
6
  cut_columns!
7
7
  a = Slither.parse(path, schema_name)
8
- a[:rows].each { |row| yield HashWithIndifferentAccess.new(row) }
8
+ a[:rows].each do |hash|
9
+ hash.reject! { |k, v| k.blank? }
10
+ yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
11
+ end
9
12
  ensure
10
13
  uncut_columns!
11
14
  unskip_rows!
@@ -1,19 +1,20 @@
1
1
  class RemoteTable
2
2
  module RooSpreadsheet
3
3
  def each_row(&block)
4
- headers = {}
4
+ headers = Hash.new
5
5
  oo = roo_klass.new(path, nil, :ignore)
6
6
  oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
7
7
  for col in (1..oo.last_column)
8
8
  headers[col] = oo.cell(header_row, col)
9
9
  headers[col] = oo.cell(header_row - 1, col) if headers[col].blank? # look up
10
10
  end
11
- first_data_row.upto(oo.last_row) do |row|
12
- values = {}
11
+ first_data_row.upto(oo.last_row) do |raw_row|
12
+ ordered_hash = ActiveSupport::OrderedHash.new
13
13
  for col in (1..oo.last_column)
14
- values[headers[col]] = oo.cell(row, col).to_s.gsub(/<[^>]+>/, '').strip
14
+ next if headers[col].blank?
15
+ ordered_hash[headers[col]] = oo.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
15
16
  end
16
- yield HashWithIndifferentAccess.new(values)
17
+ yield ordered_hash if keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
17
18
  end
18
19
  end
19
20
 
@@ -55,7 +55,7 @@ class RemoteTable
55
55
  # in C but not in the others, we can default to the basename of the package
56
56
  # in order to do this we'll need to mv the uncompressed file on top of the original file
57
57
  def identify(path)
58
- ::File.mv(path, file_path(path)) if !packing and [ nil, :bz2, :gz ].include?(compression)
58
+ FileUtils.mv(path, file_path(path)) if !packing and [ nil, :bz2, :gz ].include?(compression)
59
59
  end
60
60
 
61
61
  def file_path(path)
@@ -18,8 +18,16 @@ class RemoteTable
18
18
  self
19
19
  end
20
20
 
21
+ # - convert OrderedHash to a Hash (otherwise field ordering will be saved)
22
+ # - dump it
23
+ # - digest it
24
+ def self.row_hash(row)
25
+ Digest::MD5.hexdigest Marshal.dump(Hash.new.replace(row))
26
+ end
27
+
21
28
  def each_row(&block)
22
29
  raw_table.each_row do |row|
30
+ row['row_hash'] = self.class.row_hash(row)
23
31
  virtual_rows = transform ? transform.apply(row) : row # allow transform.apply(row) to return multiple rows
24
32
  Array.wrap(virtual_rows).each do |virtual_row|
25
33
  next if select and !select.call(virtual_row)
data/remote_table.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{remote_table}
8
- s.version = "0.1.6"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2009-11-05}
12
+ s.date = %q{2010-02-24}
13
13
  s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -55,21 +55,21 @@ Gem::Specification.new do |s|
55
55
  s.specification_version = 3
56
56
 
57
57
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
58
- s.add_runtime_dependency(%q<activesupport>, [">= 0"])
59
- s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
60
- s.add_runtime_dependency(%q<ryanwood-slither>, [">= 0"])
61
- s.add_runtime_dependency(%q<roo>, ["= 1.3.11"])
58
+ s.add_runtime_dependency(%q<roo>, ["~> 1.3.11"])
59
+ s.add_runtime_dependency(%q<fastercsv>, ["~> 1.5.0"])
60
+ s.add_runtime_dependency(%q<activesupport>, ["~> 2.3.4"])
61
+ s.add_runtime_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
62
62
  else
63
- s.add_dependency(%q<activesupport>, [">= 0"])
64
- s.add_dependency(%q<fastercsv>, [">= 0"])
65
- s.add_dependency(%q<ryanwood-slither>, [">= 0"])
66
- s.add_dependency(%q<roo>, ["= 1.3.11"])
63
+ s.add_dependency(%q<roo>, ["~> 1.3.11"])
64
+ s.add_dependency(%q<fastercsv>, ["~> 1.5.0"])
65
+ s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
66
+ s.add_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
67
67
  end
68
68
  else
69
- s.add_dependency(%q<activesupport>, [">= 0"])
70
- s.add_dependency(%q<fastercsv>, [">= 0"])
71
- s.add_dependency(%q<ryanwood-slither>, [">= 0"])
72
- s.add_dependency(%q<roo>, ["= 1.3.11"])
69
+ s.add_dependency(%q<roo>, ["~> 1.3.11"])
70
+ s.add_dependency(%q<fastercsv>, ["~> 1.5.0"])
71
+ s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
72
+ s.add_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
73
73
  end
74
74
  end
75
75
 
@@ -7,7 +7,7 @@ class FuelOilParser
7
7
  def add_hints!(bus)
8
8
  bus[:sheet] = 'Data 1'
9
9
  bus[:skip] = 2
10
- bus[:select] = lambda { |row| row[:year] > 1989 }
10
+ bus[:select] = lambda { |row| row['year'] > 1989 }
11
11
  end
12
12
  def apply(row)
13
13
  virtual_rows = []
@@ -23,18 +23,31 @@ class FuelOilParser
23
23
  locatable = "#{$1} (State)"
24
24
  end
25
25
  date = Time.parse(date)
26
- virtual_rows << HashWithIndifferentAccess.new(
27
- :locatable => locatable,
28
- :cost => cost,
29
- :year => date.year,
30
- :month => date.month
31
- )
26
+ virtual_rows << {
27
+ 'locatable' => locatable,
28
+ 'cost' => cost,
29
+ 'year' => date.year,
30
+ 'month' => date.month
31
+ }
32
32
  end
33
33
  virtual_rows
34
34
  end
35
35
  end
36
36
 
37
37
  class RemoteTableTest < Test::Unit::TestCase
38
+ def setup
39
+ @test2_rows_with_blanks = [
40
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
41
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
42
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
43
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
44
+ ]
45
+ @test2_rows = [
46
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
47
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
48
+ ]
49
+ end
50
+
38
51
  should "open an XLS inside a zip file" do
39
52
  t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
40
53
  assert_equal 'ACURA', t.rows.first['Manufacturer']
@@ -43,15 +56,26 @@ class RemoteTableTest < Test::Unit::TestCase
43
56
  assert_equal 'V70 XC AWD', t.rows.last['carline name']
44
57
  end
45
58
 
46
- should "have indifferent hash access" do
59
+ should "not have indifferent string/symbol hash access" do
47
60
  t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
48
- assert_equal 'ACURA', t.rows.first['Manufacturer'.to_sym]
49
- assert_equal 'NSX', t.rows.first['carline name'.to_sym]
50
- assert_equal 'VOLVO', t.rows.last['Manufacturer'.to_sym]
51
- assert_equal 'V70 XC AWD', t.rows.last['carline name'.to_sym]
61
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
62
+ assert_equal nil, t.rows.first[:Manufacturer]
63
+ end
64
+
65
+ should "hash rows without paying attention to order" do
66
+ x = ActiveSupport::OrderedHash.new
67
+ x[:a] = 1
68
+ x[:b] = 2
69
+
70
+ y = ActiveSupport::OrderedHash.new
71
+ y[:b] = 2
72
+ y[:a] = 1
73
+
74
+ assert Marshal.dump(x) != Marshal.dump(y)
75
+ assert RemoteTable::Transform.row_hash(x) == RemoteTable::Transform.row_hash(y)
52
76
  end
53
77
 
54
- should "open a Google Docs url" do
78
+ should "open a Google Docs url (as a CSV)" do
55
79
  t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
56
80
  assert_equal 'Gulf Coast', t.rows.first['PAD district name']
57
81
  assert_equal 'AL', t.rows.first['State']
@@ -59,10 +83,141 @@ class RemoteTableTest < Test::Unit::TestCase
59
83
  assert_equal 'WY', t.rows.last['State']
60
84
  end
61
85
 
86
+ should "open a Google Docs url as a CSV without headers" do
87
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
88
+ assert_equal 'AL', t.rows.first[0]
89
+ assert_equal 'Gulf Coast', t.rows.first[4]
90
+ assert_equal 'WY', t.rows.last[0]
91
+ assert_equal 'Rocky Mountain', t.rows.last[4]
92
+ end
93
+
94
+ should "take the last of values if the header is duplicated" do
95
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
96
+ assert_equal '2', t.rows.first['dup_header']
97
+ end
98
+
99
+ should "respect field order in CSVs without headers" do
100
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
101
+ last_k = -1
102
+ saw_string = false
103
+ t.rows.each do |row|
104
+ row.each do |k, v|
105
+ if k.is_a?(Fixnum) and last_k.is_a?(Fixnum)
106
+ assert !saw_string
107
+ assert k > last_k
108
+ end
109
+ last_k = k
110
+ saw_string = k.is_a?(String)
111
+ end
112
+ end
113
+ end
114
+
115
+ %w{ csv ods xls }.each do |format|
116
+ eval %{
117
+ should "read #{format}" do
118
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}')
119
+ # no blank headers
120
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
121
+ # correct values
122
+ t.rows.each_with_index do |row, index|
123
+ assert_equal row.except('row_hash'), @test2_rows[index]
124
+ end
125
+ end
126
+
127
+ should "read #{format}, keeping blank rows" do
128
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true)
129
+ # no blank headers
130
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
131
+ # correct values
132
+ t.rows.each_with_index do |row, index|
133
+ assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
134
+ end
135
+ end
136
+ }
137
+ end
138
+
139
+ should "read fixed width correctly" do
140
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
141
+ :format => :fixed_width,
142
+ :skip => 1,
143
+ :schema => [[ 'header4', 10, { :type => :string } ],
144
+ [ 'spacer', 1 ],
145
+ [ 'header5', 10, { :type => :string } ],
146
+ [ 'spacer', 12 ],
147
+ [ 'header6', 10, { :type => :string } ]])
148
+
149
+ # no blank headers
150
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
151
+ # correct values
152
+ t.rows.each_with_index do |row, index|
153
+ assert_equal row.except('row_hash'), @test2_rows[index]
154
+ end
155
+ end
156
+
157
+ should "read fixed width correctly, keeping blank rows" do
158
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
159
+ :format => :fixed_width,
160
+ :keep_blank_rows => true,
161
+ :skip => 1,
162
+ :schema => [[ 'header4', 10, { :type => :string } ],
163
+ [ 'spacer', 1 ],
164
+ [ 'header5', 10, { :type => :string } ],
165
+ [ 'spacer', 12 ],
166
+ [ 'header6', 10, { :type => :string } ]])
167
+
168
+ # no blank headers
169
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
170
+ # correct values
171
+ t.rows.each_with_index do |row, index|
172
+ assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
173
+ end
174
+ end
175
+
176
+ should "have the same row hash across formats" do
177
+ csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
178
+ ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
179
+ xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
180
+ fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
181
+ :format => :fixed_width,
182
+ :skip => 1,
183
+ :schema => [[ 'header1', 10, { :type => :string } ],
184
+ [ 'spacer', 1 ],
185
+ [ 'header2', 10, { :type => :string } ],
186
+ [ 'spacer', 12 ],
187
+ [ 'header3', 10, { :type => :string } ]])
188
+
189
+ csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
190
+ ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
191
+ xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
192
+ fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
193
+ :format => :fixed_width,
194
+ :skip => 1,
195
+ :schema => [[ 'spacer', 11 ],
196
+ [ 'header2', 10, { :type => :string } ],
197
+ [ 'spacer', 1 ],
198
+ [ 'header3', 10, { :type => :string } ],
199
+ [ 'spacer', 1 ],
200
+ [ 'header1', 10, { :type => :string } ]])
201
+
202
+
203
+ reference = csv.rows[0]['row_hash']
204
+
205
+ # same row hashes
206
+ assert_equal reference, ods.rows[0]['row_hash']
207
+ assert_equal reference, xls.rows[0]['row_hash']
208
+ assert_equal reference, fixed_width.rows[0]['row_hash']
209
+ # same row hashes with different order
210
+ assert_equal reference, csv2.rows[0]['row_hash']
211
+ assert_equal reference, ods2.rows[0]['row_hash']
212
+ assert_equal reference, xls2.rows[0]['row_hash']
213
+ assert_equal reference, fixed_width2.rows[0]['row_hash']
214
+ end
215
+
62
216
  should "open an ODS" do
63
- t = RemoteTable.new(:url => 'http://static.brighterplanet.com/science/profiler/footprint_model.ods', :sheet => 'Export')
64
- assert_equal 'automobiles', t.rows.first['component']
65
- assert_equal 2005.0, t.rows.first['period'].to_f
217
+ t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
218
+
219
+ assert_equal 'Central Africa', t.rows[5]['name']
220
+ assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
66
221
  end
67
222
 
68
223
  should "open a CSV inside a zip file" do
@@ -79,10 +234,10 @@ class RemoteTableTest < Test::Unit::TestCase
79
234
  :format => :fixed_width,
80
235
  :crop => 21..26, # inclusive
81
236
  :cut => '2-',
82
- :select => lambda { |row| /\A[A-Z]/.match row[:code] },
83
- :schema => [[ :code, 2, { :type => :string } ],
84
- [ :spacer, 2 ],
85
- [ :name, 52, { :type => :string } ]])
237
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
238
+ :schema => [[ 'code', 2, { :type => :string } ],
239
+ [ 'spacer', 2 ],
240
+ [ 'name', 52, { :type => :string } ]])
86
241
  assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
87
242
  assert_equal 'R', t.rows.first['code']
88
243
  assert_equal 'electricity', t.rows.last['name']
@@ -92,10 +247,23 @@ class RemoteTableTest < Test::Unit::TestCase
92
247
  should "open an XLS with a parser" do
93
248
  ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
94
249
  ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
95
-
250
+
96
251
  t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
97
252
  :transform => { :class => FuelOilParser })
98
- assert_equal ma_1990_01, t.rows[0]
99
- assert_equal ga_1990_01, t.rows[1]
253
+ assert t.rows.include?(ma_1990_01)
254
+ assert t.rows.include?(ga_1990_01)
255
+ end
256
+
257
+ should "provide a row_hash on demand" do
258
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
259
+ :filename => 'Gd6-dsc.txt',
260
+ :format => :fixed_width,
261
+ :crop => 21..26, # inclusive
262
+ :cut => '2-',
263
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
264
+ :schema => [[ 'code', 2, { :type => :string } ],
265
+ [ 'spacer', 2 ],
266
+ [ 'name', 52, { :type => :string } ]])
267
+ assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
100
268
  end
101
269
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
@@ -10,18 +10,18 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-11-05 00:00:00 -05:00
13
+ date: 2010-02-24 00:00:00 -05:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: activesupport
17
+ name: roo
18
18
  type: :runtime
19
19
  version_requirement:
20
20
  version_requirements: !ruby/object:Gem::Requirement
21
21
  requirements:
22
- - - ">="
22
+ - - ~>
23
23
  - !ruby/object:Gem::Version
24
- version: "0"
24
+ version: 1.3.11
25
25
  version:
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: fastercsv
@@ -29,29 +29,29 @@ dependencies:
29
29
  version_requirement:
30
30
  version_requirements: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - ">="
32
+ - - ~>
33
33
  - !ruby/object:Gem::Version
34
- version: "0"
34
+ version: 1.5.0
35
35
  version:
36
36
  - !ruby/object:Gem::Dependency
37
- name: ryanwood-slither
37
+ name: activesupport
38
38
  type: :runtime
39
39
  version_requirement:
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  requirements:
42
- - - ">="
42
+ - - ~>
43
43
  - !ruby/object:Gem::Version
44
- version: "0"
44
+ version: 2.3.4
45
45
  version:
46
46
  - !ruby/object:Gem::Dependency
47
- name: roo
47
+ name: ryanwood-slither
48
48
  type: :runtime
49
49
  version_requirement:
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "="
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
- version: 1.3.11
54
+ version: 0.99.3
55
55
  version:
56
56
  description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
57
57
  email: seamus@abshere.net