remote_table 0.1.6 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -24,4 +24,4 @@ See the test file and also data_miner examples of custom parsers.
24
24
 
25
25
  == Copyright
26
26
 
27
- Copyright (c) 2009 Brighter Planet. See LICENSE for details.
27
+ Copyright (c) 2010 Brighter Planet. See LICENSE for details.
data/Rakefile CHANGED
@@ -10,8 +10,10 @@ begin
10
10
  gem.email = "seamus@abshere.net"
11
11
  gem.homepage = "http://github.com/seamusabshere/remote_table"
12
12
  gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- %w{ activesupport fastercsv ryanwood-slither }.each { |name| gem.add_dependency name }
14
- gem.add_dependency 'roo', '1.3.11'
13
+ gem.add_dependency 'roo', '~>1.3.11'
14
+ gem.add_dependency 'fastercsv', '~>1.5.0'
15
+ gem.add_dependency 'activesupport', '~>2.3.4'
16
+ gem.add_dependency 'ryanwood-slither', '~>0.99.3'
15
17
  gem.require_path = "lib"
16
18
  gem.files.include %w(lib/remote_table) unless gem.files.empty? # seems to fail once it's in the wild
17
19
  gem.rdoc_options << '--line-numbers' << '--inline-source'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 0.2.0
data/lib/remote_table.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'rubygems'
2
- require 'activesupport'
2
+ require 'active_support'
3
3
  require 'tempfile'
4
4
  require 'fastercsv'
5
5
  require 'slither'
@@ -2,6 +2,7 @@ class RemoteTable
2
2
  class File
3
3
  attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
4
4
  attr_accessor :path
5
+ attr_accessor :keep_blank_rows
5
6
 
6
7
  def initialize(bus)
7
8
  @filename = bus[:filename]
@@ -9,6 +10,7 @@ class RemoteTable
9
10
  @delimiter = bus[:delimiter]
10
11
  @sheet = bus[:sheet] || 0
11
12
  @skip = bus[:skip] # rows
13
+ @keep_blank_rows = bus[:keep_blank_rows] || false
12
14
  @crop = bus[:crop] # rows
13
15
  @cut = bus[:cut] # columns
14
16
  @headers = bus[:headers]
@@ -26,7 +28,7 @@ class RemoteTable
26
28
 
27
29
  private
28
30
 
29
- # doesn't support trap or spacer
31
+ # doesn't support trap
30
32
  def define_fixed_width_schema!
31
33
  raise "can't define both schema_name and schema" if !schema_name.blank?
32
34
  self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
@@ -35,7 +37,7 @@ class RemoteTable
35
37
  d.rows do |row|
36
38
  row.trap(&trap)
37
39
  schema.each do |name, width, options|
38
- if name == :spacer
40
+ if name == 'spacer'
39
41
  row.spacer width
40
42
  else
41
43
  row.column name, width, options
@@ -3,15 +3,28 @@ class RemoteTable
3
3
  def each_row(&block)
4
4
  skip_rows!
5
5
  FasterCSV.foreach(path, fastercsv_options) do |row|
6
- if row.respond_to?(:fields) # it's a traditional fastercsv row hash
7
- next if row.fields.compact.blank?
8
- hash = HashWithIndifferentAccess.new(row.to_hash)
9
- else # it's an array, which i think happens if you're using :headers => nil or :col_sep
10
- next if row.compact.blank?
6
+ ordered_hash = ActiveSupport::OrderedHash.new
7
+ filled_values = 0
8
+ case row
9
+ when FasterCSV::Row
10
+ row.each do |header, value|
11
+ next if header.blank?
12
+ value = '' if value.nil?
13
+ ordered_hash[header] = value
14
+ filled_values += 1 if value.present?
15
+ end
16
+ when Array
11
17
  index = 0
12
- hash = row.inject(ActiveSupport::OrderedHash.new) { |memo, element| memo[index] = element; index += 1; memo }
18
+ row.each do |value|
19
+ value = '' if value.nil?
20
+ ordered_hash[index] = value
21
+ filled_values += 1 if value.present?
22
+ index += 1
23
+ end
24
+ else
25
+ raise "Unexpected #{row.inspect}"
13
26
  end
14
- yield hash
27
+ yield ordered_hash if keep_blank_rows or filled_values.nonzero?
15
28
  end
16
29
  ensure
17
30
  restore_rows!
@@ -20,7 +33,7 @@ class RemoteTable
20
33
  private
21
34
 
22
35
  def fastercsv_options
23
- fastercsv_options = { :skip_blanks => true, :header_converters => lambda { |k| k.toutf8 } }
36
+ fastercsv_options = { :skip_blanks => !keep_blank_rows, :header_converters => lambda { |k| k.to_s.toutf8 } }
24
37
  if headers == false
25
38
  fastercsv_options.merge!(:headers => nil)
26
39
  else
@@ -5,7 +5,10 @@ class RemoteTable
5
5
  skip_rows!
6
6
  cut_columns!
7
7
  a = Slither.parse(path, schema_name)
8
- a[:rows].each { |row| yield HashWithIndifferentAccess.new(row) }
8
+ a[:rows].each do |hash|
9
+ hash.reject! { |k, v| k.blank? }
10
+ yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
11
+ end
9
12
  ensure
10
13
  uncut_columns!
11
14
  unskip_rows!
@@ -1,19 +1,20 @@
1
1
  class RemoteTable
2
2
  module RooSpreadsheet
3
3
  def each_row(&block)
4
- headers = {}
4
+ headers = Hash.new
5
5
  oo = roo_klass.new(path, nil, :ignore)
6
6
  oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
7
7
  for col in (1..oo.last_column)
8
8
  headers[col] = oo.cell(header_row, col)
9
9
  headers[col] = oo.cell(header_row - 1, col) if headers[col].blank? # look up
10
10
  end
11
- first_data_row.upto(oo.last_row) do |row|
12
- values = {}
11
+ first_data_row.upto(oo.last_row) do |raw_row|
12
+ ordered_hash = ActiveSupport::OrderedHash.new
13
13
  for col in (1..oo.last_column)
14
- values[headers[col]] = oo.cell(row, col).to_s.gsub(/<[^>]+>/, '').strip
14
+ next if headers[col].blank?
15
+ ordered_hash[headers[col]] = oo.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
15
16
  end
16
- yield HashWithIndifferentAccess.new(values)
17
+ yield ordered_hash if keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
17
18
  end
18
19
  end
19
20
 
@@ -55,7 +55,7 @@ class RemoteTable
55
55
  # in C but not in the others, we can default to the basename of the package
56
56
  # in order to do this we'll need to mv the uncompressed file on top of the original file
57
57
  def identify(path)
58
- ::File.mv(path, file_path(path)) if !packing and [ nil, :bz2, :gz ].include?(compression)
58
+ FileUtils.mv(path, file_path(path)) if !packing and [ nil, :bz2, :gz ].include?(compression)
59
59
  end
60
60
 
61
61
  def file_path(path)
@@ -18,8 +18,16 @@ class RemoteTable
18
18
  self
19
19
  end
20
20
 
21
+ # - convert OrderedHash to a Hash (otherwise field ordering will be saved)
22
+ # - dump it
23
+ # - digest it
24
+ def self.row_hash(row)
25
+ Digest::MD5.hexdigest Marshal.dump(Hash.new.replace(row))
26
+ end
27
+
21
28
  def each_row(&block)
22
29
  raw_table.each_row do |row|
30
+ row['row_hash'] = self.class.row_hash(row)
23
31
  virtual_rows = transform ? transform.apply(row) : row # allow transform.apply(row) to return multiple rows
24
32
  Array.wrap(virtual_rows).each do |virtual_row|
25
33
  next if select and !select.call(virtual_row)
data/remote_table.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{remote_table}
8
- s.version = "0.1.6"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2009-11-05}
12
+ s.date = %q{2010-02-24}
13
13
  s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -55,21 +55,21 @@ Gem::Specification.new do |s|
55
55
  s.specification_version = 3
56
56
 
57
57
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
58
- s.add_runtime_dependency(%q<activesupport>, [">= 0"])
59
- s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
60
- s.add_runtime_dependency(%q<ryanwood-slither>, [">= 0"])
61
- s.add_runtime_dependency(%q<roo>, ["= 1.3.11"])
58
+ s.add_runtime_dependency(%q<roo>, ["~> 1.3.11"])
59
+ s.add_runtime_dependency(%q<fastercsv>, ["~> 1.5.0"])
60
+ s.add_runtime_dependency(%q<activesupport>, ["~> 2.3.4"])
61
+ s.add_runtime_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
62
62
  else
63
- s.add_dependency(%q<activesupport>, [">= 0"])
64
- s.add_dependency(%q<fastercsv>, [">= 0"])
65
- s.add_dependency(%q<ryanwood-slither>, [">= 0"])
66
- s.add_dependency(%q<roo>, ["= 1.3.11"])
63
+ s.add_dependency(%q<roo>, ["~> 1.3.11"])
64
+ s.add_dependency(%q<fastercsv>, ["~> 1.5.0"])
65
+ s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
66
+ s.add_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
67
67
  end
68
68
  else
69
- s.add_dependency(%q<activesupport>, [">= 0"])
70
- s.add_dependency(%q<fastercsv>, [">= 0"])
71
- s.add_dependency(%q<ryanwood-slither>, [">= 0"])
72
- s.add_dependency(%q<roo>, ["= 1.3.11"])
69
+ s.add_dependency(%q<roo>, ["~> 1.3.11"])
70
+ s.add_dependency(%q<fastercsv>, ["~> 1.5.0"])
71
+ s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
72
+ s.add_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
73
73
  end
74
74
  end
75
75
 
@@ -7,7 +7,7 @@ class FuelOilParser
7
7
  def add_hints!(bus)
8
8
  bus[:sheet] = 'Data 1'
9
9
  bus[:skip] = 2
10
- bus[:select] = lambda { |row| row[:year] > 1989 }
10
+ bus[:select] = lambda { |row| row['year'] > 1989 }
11
11
  end
12
12
  def apply(row)
13
13
  virtual_rows = []
@@ -23,18 +23,31 @@ class FuelOilParser
23
23
  locatable = "#{$1} (State)"
24
24
  end
25
25
  date = Time.parse(date)
26
- virtual_rows << HashWithIndifferentAccess.new(
27
- :locatable => locatable,
28
- :cost => cost,
29
- :year => date.year,
30
- :month => date.month
31
- )
26
+ virtual_rows << {
27
+ 'locatable' => locatable,
28
+ 'cost' => cost,
29
+ 'year' => date.year,
30
+ 'month' => date.month
31
+ }
32
32
  end
33
33
  virtual_rows
34
34
  end
35
35
  end
36
36
 
37
37
  class RemoteTableTest < Test::Unit::TestCase
38
+ def setup
39
+ @test2_rows_with_blanks = [
40
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
41
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
42
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
43
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
44
+ ]
45
+ @test2_rows = [
46
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
47
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
48
+ ]
49
+ end
50
+
38
51
  should "open an XLS inside a zip file" do
39
52
  t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
40
53
  assert_equal 'ACURA', t.rows.first['Manufacturer']
@@ -43,15 +56,26 @@ class RemoteTableTest < Test::Unit::TestCase
43
56
  assert_equal 'V70 XC AWD', t.rows.last['carline name']
44
57
  end
45
58
 
46
- should "have indifferent hash access" do
59
+ should "not have indifferent string/symbol hash access" do
47
60
  t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
48
- assert_equal 'ACURA', t.rows.first['Manufacturer'.to_sym]
49
- assert_equal 'NSX', t.rows.first['carline name'.to_sym]
50
- assert_equal 'VOLVO', t.rows.last['Manufacturer'.to_sym]
51
- assert_equal 'V70 XC AWD', t.rows.last['carline name'.to_sym]
61
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
62
+ assert_equal nil, t.rows.first[:Manufacturer]
63
+ end
64
+
65
+ should "hash rows without paying attention to order" do
66
+ x = ActiveSupport::OrderedHash.new
67
+ x[:a] = 1
68
+ x[:b] = 2
69
+
70
+ y = ActiveSupport::OrderedHash.new
71
+ y[:b] = 2
72
+ y[:a] = 1
73
+
74
+ assert Marshal.dump(x) != Marshal.dump(y)
75
+ assert RemoteTable::Transform.row_hash(x) == RemoteTable::Transform.row_hash(y)
52
76
  end
53
77
 
54
- should "open a Google Docs url" do
78
+ should "open a Google Docs url (as a CSV)" do
55
79
  t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
56
80
  assert_equal 'Gulf Coast', t.rows.first['PAD district name']
57
81
  assert_equal 'AL', t.rows.first['State']
@@ -59,10 +83,141 @@ class RemoteTableTest < Test::Unit::TestCase
59
83
  assert_equal 'WY', t.rows.last['State']
60
84
  end
61
85
 
86
+ should "open a Google Docs url as a CSV without headers" do
87
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
88
+ assert_equal 'AL', t.rows.first[0]
89
+ assert_equal 'Gulf Coast', t.rows.first[4]
90
+ assert_equal 'WY', t.rows.last[0]
91
+ assert_equal 'Rocky Mountain', t.rows.last[4]
92
+ end
93
+
94
+ should "take the last of values if the header is duplicated" do
95
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
96
+ assert_equal '2', t.rows.first['dup_header']
97
+ end
98
+
99
+ should "respect field order in CSVs without headers" do
100
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
101
+ last_k = -1
102
+ saw_string = false
103
+ t.rows.each do |row|
104
+ row.each do |k, v|
105
+ if k.is_a?(Fixnum) and last_k.is_a?(Fixnum)
106
+ assert !saw_string
107
+ assert k > last_k
108
+ end
109
+ last_k = k
110
+ saw_string = k.is_a?(String)
111
+ end
112
+ end
113
+ end
114
+
115
+ %w{ csv ods xls }.each do |format|
116
+ eval %{
117
+ should "read #{format}" do
118
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}')
119
+ # no blank headers
120
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
121
+ # correct values
122
+ t.rows.each_with_index do |row, index|
123
+ assert_equal row.except('row_hash'), @test2_rows[index]
124
+ end
125
+ end
126
+
127
+ should "read #{format}, keeping blank rows" do
128
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true)
129
+ # no blank headers
130
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
131
+ # correct values
132
+ t.rows.each_with_index do |row, index|
133
+ assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
134
+ end
135
+ end
136
+ }
137
+ end
138
+
139
+ should "read fixed width correctly" do
140
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
141
+ :format => :fixed_width,
142
+ :skip => 1,
143
+ :schema => [[ 'header4', 10, { :type => :string } ],
144
+ [ 'spacer', 1 ],
145
+ [ 'header5', 10, { :type => :string } ],
146
+ [ 'spacer', 12 ],
147
+ [ 'header6', 10, { :type => :string } ]])
148
+
149
+ # no blank headers
150
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
151
+ # correct values
152
+ t.rows.each_with_index do |row, index|
153
+ assert_equal row.except('row_hash'), @test2_rows[index]
154
+ end
155
+ end
156
+
157
+ should "read fixed width correctly, keeping blank rows" do
158
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
159
+ :format => :fixed_width,
160
+ :keep_blank_rows => true,
161
+ :skip => 1,
162
+ :schema => [[ 'header4', 10, { :type => :string } ],
163
+ [ 'spacer', 1 ],
164
+ [ 'header5', 10, { :type => :string } ],
165
+ [ 'spacer', 12 ],
166
+ [ 'header6', 10, { :type => :string } ]])
167
+
168
+ # no blank headers
169
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
170
+ # correct values
171
+ t.rows.each_with_index do |row, index|
172
+ assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
173
+ end
174
+ end
175
+
176
+ should "have the same row hash across formats" do
177
+ csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
178
+ ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
179
+ xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
180
+ fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
181
+ :format => :fixed_width,
182
+ :skip => 1,
183
+ :schema => [[ 'header1', 10, { :type => :string } ],
184
+ [ 'spacer', 1 ],
185
+ [ 'header2', 10, { :type => :string } ],
186
+ [ 'spacer', 12 ],
187
+ [ 'header3', 10, { :type => :string } ]])
188
+
189
+ csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
190
+ ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
191
+ xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
192
+ fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
193
+ :format => :fixed_width,
194
+ :skip => 1,
195
+ :schema => [[ 'spacer', 11 ],
196
+ [ 'header2', 10, { :type => :string } ],
197
+ [ 'spacer', 1 ],
198
+ [ 'header3', 10, { :type => :string } ],
199
+ [ 'spacer', 1 ],
200
+ [ 'header1', 10, { :type => :string } ]])
201
+
202
+
203
+ reference = csv.rows[0]['row_hash']
204
+
205
+ # same row hashes
206
+ assert_equal reference, ods.rows[0]['row_hash']
207
+ assert_equal reference, xls.rows[0]['row_hash']
208
+ assert_equal reference, fixed_width.rows[0]['row_hash']
209
+ # same row hashes with different order
210
+ assert_equal reference, csv2.rows[0]['row_hash']
211
+ assert_equal reference, ods2.rows[0]['row_hash']
212
+ assert_equal reference, xls2.rows[0]['row_hash']
213
+ assert_equal reference, fixed_width2.rows[0]['row_hash']
214
+ end
215
+
62
216
  should "open an ODS" do
63
- t = RemoteTable.new(:url => 'http://static.brighterplanet.com/science/profiler/footprint_model.ods', :sheet => 'Export')
64
- assert_equal 'automobiles', t.rows.first['component']
65
- assert_equal 2005.0, t.rows.first['period'].to_f
217
+ t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
218
+
219
+ assert_equal 'Central Africa', t.rows[5]['name']
220
+ assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
66
221
  end
67
222
 
68
223
  should "open a CSV inside a zip file" do
@@ -79,10 +234,10 @@ class RemoteTableTest < Test::Unit::TestCase
79
234
  :format => :fixed_width,
80
235
  :crop => 21..26, # inclusive
81
236
  :cut => '2-',
82
- :select => lambda { |row| /\A[A-Z]/.match row[:code] },
83
- :schema => [[ :code, 2, { :type => :string } ],
84
- [ :spacer, 2 ],
85
- [ :name, 52, { :type => :string } ]])
237
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
238
+ :schema => [[ 'code', 2, { :type => :string } ],
239
+ [ 'spacer', 2 ],
240
+ [ 'name', 52, { :type => :string } ]])
86
241
  assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
87
242
  assert_equal 'R', t.rows.first['code']
88
243
  assert_equal 'electricity', t.rows.last['name']
@@ -92,10 +247,23 @@ class RemoteTableTest < Test::Unit::TestCase
92
247
  should "open an XLS with a parser" do
93
248
  ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
94
249
  ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
95
-
250
+
96
251
  t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
97
252
  :transform => { :class => FuelOilParser })
98
- assert_equal ma_1990_01, t.rows[0]
99
- assert_equal ga_1990_01, t.rows[1]
253
+ assert t.rows.include?(ma_1990_01)
254
+ assert t.rows.include?(ga_1990_01)
255
+ end
256
+
257
+ should "provide a row_hash on demand" do
258
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
259
+ :filename => 'Gd6-dsc.txt',
260
+ :format => :fixed_width,
261
+ :crop => 21..26, # inclusive
262
+ :cut => '2-',
263
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
264
+ :schema => [[ 'code', 2, { :type => :string } ],
265
+ [ 'spacer', 2 ],
266
+ [ 'name', 52, { :type => :string } ]])
267
+ assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
100
268
  end
101
269
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
@@ -10,18 +10,18 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-11-05 00:00:00 -05:00
13
+ date: 2010-02-24 00:00:00 -05:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: activesupport
17
+ name: roo
18
18
  type: :runtime
19
19
  version_requirement:
20
20
  version_requirements: !ruby/object:Gem::Requirement
21
21
  requirements:
22
- - - ">="
22
+ - - ~>
23
23
  - !ruby/object:Gem::Version
24
- version: "0"
24
+ version: 1.3.11
25
25
  version:
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: fastercsv
@@ -29,29 +29,29 @@ dependencies:
29
29
  version_requirement:
30
30
  version_requirements: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - ">="
32
+ - - ~>
33
33
  - !ruby/object:Gem::Version
34
- version: "0"
34
+ version: 1.5.0
35
35
  version:
36
36
  - !ruby/object:Gem::Dependency
37
- name: ryanwood-slither
37
+ name: activesupport
38
38
  type: :runtime
39
39
  version_requirement:
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  requirements:
42
- - - ">="
42
+ - - ~>
43
43
  - !ruby/object:Gem::Version
44
- version: "0"
44
+ version: 2.3.4
45
45
  version:
46
46
  - !ruby/object:Gem::Dependency
47
- name: roo
47
+ name: ryanwood-slither
48
48
  type: :runtime
49
49
  version_requirement:
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "="
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
- version: 1.3.11
54
+ version: 0.99.3
55
55
  version:
56
56
  description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
57
57
  email: seamus@abshere.net