remote_table 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +17 -0
- data/README.markdown +59 -37
- data/lib/remote_table.rb +478 -56
- data/lib/remote_table/delimited.rb +91 -0
- data/lib/remote_table/fixed_width.rb +81 -0
- data/lib/remote_table/html.rb +13 -0
- data/lib/remote_table/{local_file.rb → local_copy.rb} +26 -22
- data/lib/remote_table/ods.rb +17 -0
- data/lib/remote_table/plaintext.rb +67 -0
- data/lib/remote_table/processed_by_nokogiri.rb +76 -0
- data/lib/remote_table/processed_by_roo.rb +97 -0
- data/lib/remote_table/transformer.rb +9 -5
- data/lib/remote_table/version.rb +1 -1
- data/lib/remote_table/xls.rb +11 -0
- data/lib/remote_table/xlsx.rb +11 -0
- data/lib/remote_table/xml.rb +13 -0
- data/lib/remote_table/yaml.rb +14 -0
- data/remote_table.gemspec +2 -2
- data/test/test_big.rb +1 -1
- data/test/test_remote_table.rb +26 -21
- metadata +19 -20
- data/lib/remote_table/config.rb +0 -251
- data/lib/remote_table/format.rb +0 -49
- data/lib/remote_table/format/delimited.rb +0 -60
- data/lib/remote_table/format/excel.rb +0 -10
- data/lib/remote_table/format/excelx.rb +0 -10
- data/lib/remote_table/format/fixed_width.rb +0 -60
- data/lib/remote_table/format/html.rb +0 -12
- data/lib/remote_table/format/mixins/processed_by_nokogiri.rb +0 -70
- data/lib/remote_table/format/mixins/processed_by_roo.rb +0 -63
- data/lib/remote_table/format/mixins/textual.rb +0 -43
- data/lib/remote_table/format/open_office.rb +0 -13
- data/lib/remote_table/format/xml.rb +0 -12
- data/lib/remote_table/format/yaml.rb +0 -14
data/CHANGELOG
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
2.0.0 / 2012-05-08
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* New names for options... (not really breaking, these deprecated options are still accepted)
|
6
|
+
:errata -> :errata_settings
|
7
|
+
:transform -> :transform_settings
|
8
|
+
:select -> :pre_select (to avoid conflict with Enumerable#select)
|
9
|
+
:reject -> :pre_reject
|
10
|
+
:encoding -> :internal_encoding
|
11
|
+
|
12
|
+
* Enhancements
|
13
|
+
|
14
|
+
* Every option is documented
|
15
|
+
* Refactored to simplify and DRY
|
16
|
+
* Thread safe
|
17
|
+
|
1
18
|
1.4.0 / 2012-04-12
|
2
19
|
|
3
20
|
* Enhancements
|
data/README.markdown
CHANGED
@@ -1,25 +1,38 @@
|
|
1
1
|
# remote_table
|
2
2
|
|
3
|
-
Open local or remote XLSX, XLS, ODS, CSV
|
3
|
+
Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma separated), TSV (tab separated), other delimited, fixed-width files.
|
4
4
|
|
5
|
-
|
5
|
+
Tested on MRI 1.8, MRI 1.9, and JRuby 1.6.7+. Thread-safe.
|
6
6
|
|
7
|
-
|
7
|
+
## Real-world usage
|
8
|
+
|
9
|
+
<p><a href="http://brighterplanet.com"><img src="https://s3.amazonaws.com/static.brighterplanet.com/assets/logos/flush-left/inline/green/rasterized/brighter_planet-160-transparent.png" alt="Brighter Planet logo"/></a></p>
|
10
|
+
|
11
|
+
We use `remote_table` for [data science at Brighter Planet](http://brighterplanet.com/research) and in production at
|
12
|
+
|
13
|
+
* [Brighter Planet's impact estimate web service](http://impact.brighterplanet.com)
|
14
|
+
* [Brighter Planet's reference data web service](http://data.brighterplanet.com)
|
15
|
+
|
16
|
+
It's also a big part of
|
17
|
+
|
18
|
+
* the [`data_miner`](https://github.com/seamusabshere/data_miner) library
|
19
|
+
* the [`earth`](https://github.com/brighterplanet/earth) library
|
8
20
|
|
9
21
|
## Example
|
10
22
|
|
11
|
-
|
12
|
-
|
23
|
+
>> require 'remote_table'
|
24
|
+
remote_table.rb:8:in `<top (required)>': iconv will be deprecated in the future, use String#encode instead.
|
25
|
+
[remote_table] Apologies - using iconv because Ruby 1.9.x's String#encode doesn't have transliteration tables (yet)
|
13
26
|
=> true
|
14
|
-
|
15
|
-
=> #<RemoteTable:
|
16
|
-
|
27
|
+
>> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv'
|
28
|
+
=> #<RemoteTable:0x00000101b87390 @download_count_mutex=#<Mutex:0x00000101b87228>, @iconv_mutex=#<Mutex:0x00000101b87200>, @extend_bang_mutex=#<Mutex:0x00000101b871d8>, @errata_mutex=#<Mutex:0x00000101b871b0>, @cache=[], @download_count=0, @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip", @format=nil, @headers=:first_row, @compression=:zip, @packing=nil, @streaming=false, @warn_on_multiple_downloads=true, @delimiter=",", @sheet=nil, @keep_blank_rows=false, @form_data=nil, @skip=0, @internal_encoding="UTF-8", @row_xpath=nil, @column_xpath=nil, @row_css=nil, @column_css=nil, @glob=nil, @filename="98guide6.csv", @transform_settings=nil, @cut=nil, @crop=nil, @schema=nil, @schema_name=nil, @pre_select=nil, @pre_reject=nil, @errata_settings=nil, @other_options={}, @transformer=#<RemoteTable::Transformer:0x00000101b8c2f0 @t=#<RemoteTable:0x00000101b87390 ...>, @legacy_transformer_mutex=#<Mutex:0x00000101b8c2a0>>, @local_copy=#<RemoteTable::LocalCopy:0x00000101b8bf58 @t=#<RemoteTable:0x00000101b87390 ...>, @encoded_io_mutex=#<Mutex:0x00000101b8be18>, @generate_mutex=#<Mutex:0x00000101b8bdc8>>>
|
29
|
+
>> t.rows.length
|
17
30
|
=> 806
|
18
|
-
|
31
|
+
>> t.rows.first.length
|
19
32
|
=> 26
|
20
|
-
|
33
|
+
>> require 'pp'
|
21
34
|
=> true
|
22
|
-
|
35
|
+
>> pp t[23]
|
23
36
|
{"Class"=>"TWO SEATERS",
|
24
37
|
"Manufacturer"=>"PORSCHE",
|
25
38
|
"carline name"=>"BOXSTER",
|
@@ -47,7 +60,19 @@ Used by [the Brighter Planet Reference Data web service](http://data.brighterpla
|
|
47
60
|
"eng dscr"=>"",
|
48
61
|
"trans dscr"=>""}
|
49
62
|
|
50
|
-
|
63
|
+
## Columns and rows
|
64
|
+
|
65
|
+
* If there are headers, you get an <code>Array</code> of <code>Hash</code>es with **string keys**.
|
66
|
+
* If you set <code>:headers => false</code>, then you get an <code>Array</code> of <code>Array</code>s.
|
67
|
+
|
68
|
+
## Row keys
|
69
|
+
|
70
|
+
Row keys are **strings**. Row keys are NOT symbolized.
|
71
|
+
|
72
|
+
row['foobar'] # correct
|
73
|
+
row[:foobar] # incorrect
|
74
|
+
|
75
|
+
You can call <code>symbolize_keys</code> yourself, but we don't do it automatically to avoid creating loads of garbage symbols.
|
51
76
|
|
52
77
|
## Supported formats
|
53
78
|
|
@@ -59,7 +84,7 @@ You get an <code>Array</code> of <code>Hash</code>es with **string keys**. If yo
|
|
59
84
|
</tr>
|
60
85
|
<tr>
|
61
86
|
<td>Delimited (CSV, TSV, etc.)</td>
|
62
|
-
<td>All <code>RemoteTable::
|
87
|
+
<td>All <code>RemoteTable::Delimited::PASSTHROUGH_CSV_SETTINGS</code>, for example <code>:col_sep</code>, are passed directly to fastercsv.</td>
|
63
88
|
<td>
|
64
89
|
<a href="http://fastercsv.rubyforge.org/">fastercsv</a> (1.8);
|
65
90
|
<a href="http://www.ruby-doc.org/stdlib-1.9.3/libdoc/csv/rdoc/index.html">stdlib</code></a> (1.9)
|
@@ -147,12 +172,12 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
147
172
|
:row_xpath => '//table/tr[2]/td/table/tr',
|
148
173
|
:column_xpath => 'td',
|
149
174
|
:errata => { RemoteTable.new('https://spreadsheets.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdGVBRnhkRGhSaVptSDJ5bXJGbkpUSWc&output=csv', :responder => Aircraft::Guru.new },
|
150
|
-
:select =>
|
175
|
+
:select => proc { |record| manufacturer_whitelist? record['Manufacturer'] })
|
151
176
|
|
152
177
|
# OpenFlights.org airports database
|
153
178
|
RemoteTable.new('https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat',
|
154
179
|
:headers => %w{ id name city country_name iata_code icao_code latitude longitude altitude timezone daylight_savings },
|
155
|
-
:select =>
|
180
|
+
:select => proc { |record| record['iata_code'].present? },
|
156
181
|
:errata => { RemoteTable.new('https://spreadsheets.google.com/pub?key=0AoQJbWqPrREqdFc2UzhQYU5PWEQ0N21yWFZGNmc2a3c&gid=0&output=csv', :responder => Airport::Guru.new }) # see https://github.com/brighterplanet/earth/blob/master/lib/earth/air/aircraft/data_miner.rb
|
157
182
|
|
158
183
|
# T100 flight segment data for #{month.strftime('%B %Y')}
|
@@ -162,7 +187,7 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
162
187
|
:compression => :zip,
|
163
188
|
:glob => '/*.csv',
|
164
189
|
:errata => { RemoteTable.new('https://spreadsheets.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdGxpYU1qWFR3d0syTVMyQVVOaDd0V3c&output=csv', :responder => FlightSegment::Guru.new },
|
165
|
-
:select =>
|
190
|
+
:select => proc { |record| record['DEPARTURES_PERFORMED'].to_i > 0 })
|
166
191
|
|
167
192
|
# 1995 Fuel Economy Guide
|
168
193
|
# for definition of `:fuel_economy_guide_b` and `AutomobileMakeModelYearVariant::ParserB` see https://github.com/brighterplanet/earth/blob/master/lib/earth/automobile/automobile_make_model_year_variant/data_miner.rb
|
@@ -171,7 +196,7 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
171
196
|
:format => :fixed_width,
|
172
197
|
:cut => '13-',
|
173
198
|
:schema_name => :fuel_economy_guide_b,
|
174
|
-
:select =>
|
199
|
+
:select => proc { |row| row['model'].present? and (row['suppress_code'].blank? or row['suppress_code'].to_f == 0) and row['state_code'] == 'F' },
|
175
200
|
:transform => { :class => AutomobileMakeModelYearVariant::ParserB, :year => 1995 },
|
176
201
|
:errata => { :url => "https://docs.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdDkxTElWRVlvUXB3Uy04SDhSYWkzakE&output=csv", :responder => AutomobileMakeModelYearVariant::Guru.new })
|
177
202
|
|
@@ -181,30 +206,30 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
181
206
|
:filename => '98guide6.csv',
|
182
207
|
:transform => { :class => AutomobileMakeModelYearVariant::ParserC, :year => 1998 },
|
183
208
|
:errata => { :url => "https://docs.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdDkxTElWRVlvUXB3Uy04SDhSYWkzakE&output=csv", :responder => AutomobileMakeModelYearVariant::Guru.new },
|
184
|
-
:select =>
|
209
|
+
:select => proc { |row| row['model'].present? })
|
185
210
|
|
186
211
|
# annual corporate average fuel economy data for domestic and imported vehicle fleets from the NHTSA
|
187
212
|
RemoteTable.new('https://spreadsheets.google.com/pub?key=0AoQJbWqPrREqdEdXWXB6dkVLWkowLXhYSFVUT01sS2c&hl=en&gid=0&output=csv',
|
188
213
|
:errata => { 'url' => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv' },
|
189
|
-
:select =>
|
214
|
+
:select => proc { |row| row['volume'].to_i > 0 })
|
190
215
|
|
191
216
|
# total vehicle miles travelled by gasoline passenger cars from the 2010 EPA GHG Inventory
|
192
217
|
RemoteTable.new('http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
193
218
|
:filename => 'Annex Tables/Annex 3/Table A-87.csv',
|
194
219
|
:skip => 1,
|
195
|
-
:select =>
|
220
|
+
:select => proc { |row| row['Year'].to_i.to_s == row['Year'] })
|
196
221
|
|
197
222
|
# total vehicle miles travelled from the 2010 EPA GHG Inventory
|
198
223
|
RemoteTable.new('http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
199
224
|
:filename => 'Annex Tables/Annex 3/Table A-87.csv',
|
200
225
|
:skip => 1,
|
201
|
-
:select =>
|
226
|
+
:select => proc { |row| row['Year'].to_i.to_s == row['Year'] })
|
202
227
|
|
203
228
|
# total travel distribution from the 2010 EPA GHG Inventory
|
204
229
|
RemoteTable.new('http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
205
230
|
:filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
206
231
|
:skip => 1,
|
207
|
-
:select =>
|
232
|
+
:select => proc { |row| row['Vehicle Age'].to_i.to_s == row['Vehicle Age'] })
|
208
233
|
|
209
234
|
# building characteristics from the 2003 EIA Commercial Building Energy Consumption Survey
|
210
235
|
RemoteTable.new('http://www.eia.gov/emeu/cbecs/cbecs2003/public_use_2003/data/FILE02.csv',
|
@@ -215,7 +240,7 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
215
240
|
# for definition of `CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER` see https://github.com/brighterplanet/earth/blob/master/lib/earth/industry/cbecs_energy_intensity/data_miner.rb
|
216
241
|
RemoteTable.new("http://www.eia.gov/emeu/cbecs/cbecs2003/detailed_tables_2003/2003set10/2003excel/C17.xls",
|
217
242
|
:headers => false,
|
218
|
-
:select =>
|
243
|
+
:select => proc { |row| CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER.call(row) },
|
219
244
|
:crop => (21..37))
|
220
245
|
|
221
246
|
# U.S. Census 2002 NAICS code list
|
@@ -238,13 +263,13 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
238
263
|
RemoteTable.new('http://www.census.gov/popest/about/geo/state_geocodes_v2009.txt',
|
239
264
|
:skip => 6,
|
240
265
|
:headers => %w{ Region Division FIPS Name },
|
241
|
-
:select =>
|
266
|
+
:select => proc { |row| row['Division'].to_i > 0 and row['FIPS'].to_i == 0 })
|
242
267
|
|
243
268
|
# state census divisions from the U.S. Census
|
244
269
|
RemoteTable.new('http://www.census.gov/popest/about/geo/state_geocodes_v2009.txt',
|
245
270
|
:skip => 8,
|
246
271
|
:headers => ['Region', 'Division', 'State FIPS', 'Name'],
|
247
|
-
:select =>
|
272
|
+
:select => proc { |row| row['State FIPS'].to_i > 0 })
|
248
273
|
|
249
274
|
# OpenGeoCode.org's Country Codes to Country Names list
|
250
275
|
RemoteTable.new('http://opengeocode.org/download/countrynames.txt',
|
@@ -267,19 +292,19 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
267
292
|
RemoteTable.new('http://www.epa.gov/cleanenergy/documents/egridzips/eGRID2010V1_1_STIE_USGC.xls',
|
268
293
|
:sheet => 'STIE07',
|
269
294
|
:skip => 4,
|
270
|
-
:select =>
|
295
|
+
:select => proc { |row| row['eGRID2010 year 2007 file state sequence number'].to_i.between?(1, 51) })
|
271
296
|
|
272
297
|
# eGRID 2010 subregions and electricity emission factors
|
273
298
|
RemoteTable.new('http://www.epa.gov/cleanenergy/documents/egridzips/eGRID2010_Version1-1_xls_only.zip',
|
274
299
|
:filename => 'eGRID2010V1_1_year07_AGGREGATION.xls',
|
275
300
|
:sheet => 'SRL07',
|
276
301
|
:skip => 4,
|
277
|
-
:select =>
|
302
|
+
:select => proc { |row| row['SEQSRL07'].to_i.between?(1, 26) })
|
278
303
|
|
279
304
|
# U.S. Census State ANSI Code file
|
280
305
|
RemoteTable.new('http://www.census.gov/geo/www/ansi/state.txt',
|
281
306
|
:delimiter => '|',
|
282
|
-
:select =>
|
307
|
+
:select => proc { |record| record['STATE'].to_i < 60 })
|
283
308
|
|
284
309
|
# Mapping Hacks zipcode database
|
285
310
|
RemoteTable.new('http://mappinghacks.com/data/zipcode.zip',
|
@@ -295,18 +320,18 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
295
320
|
# Brighter Planet's list of cat and dog breeds, genders, and weights
|
296
321
|
RemoteTable.new('http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv',
|
297
322
|
:encoding => 'ISO-8859-1',
|
298
|
-
:select =>
|
323
|
+
:select => proc { |row| row['gender'].present? })
|
299
324
|
|
300
325
|
# residential electricity prices from the EIA
|
301
326
|
RemoteTable.new('http://www.eia.doe.gov/cneaf/electricity/page/sales_revenue.xls',
|
302
|
-
:select =>
|
327
|
+
:select => proc { |row| row['Year'].to_s.first(4).to_i > 1989 })
|
303
328
|
|
304
329
|
# residential natural gas prices from the EIA
|
305
330
|
# for definition of `NaturalGasParser` see https://github.com/brighterplanet/earth/blob/master/lib/earth/residence/residence_fuel_price/data_miner.rb
|
306
331
|
RemoteTable.new('http://tonto.eia.doe.gov/dnav/ng/xls/ng_pri_sum_a_EPG0_FWA_DMcf_a.xls',
|
307
332
|
:sheet => 'Data 1',
|
308
333
|
:skip => 2,
|
309
|
-
:select =>
|
334
|
+
:select => proc { |row| row['year'].to_i > 1989 },
|
310
335
|
:transform => { :class => NaturalGasParser })
|
311
336
|
|
312
337
|
# 2005 EIA Residential Energy Consumption Survey microdata
|
@@ -375,7 +400,7 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
375
400
|
:format => :fixed_width,
|
376
401
|
:crop => 21..26, # inclusive
|
377
402
|
:cut => '2-',
|
378
|
-
:select =>
|
403
|
+
:select => proc { |row| /\A[A-Z]/.match row['code'] },
|
379
404
|
:schema => [[ 'code', 2, { :type => :string } ],
|
380
405
|
[ 'spacer', 2 ],
|
381
406
|
[ 'name', 52, { :type => :string } ]]
|
@@ -420,14 +445,11 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
420
445
|
|
421
446
|
## Requirements
|
422
447
|
|
423
|
-
*
|
424
|
-
* Unix tools like curl, iconv, perl, cat, cut, tail, etc. accessible from `ENV['PATH']`
|
425
|
-
|
426
|
-
As this library matures, that requirement should go away.
|
448
|
+
* Unix tools like curl, iconv, perl, cat, cut, tail, etc. accessible from your `$PATH`
|
427
449
|
|
428
450
|
## Wishlist
|
429
451
|
|
430
|
-
*
|
452
|
+
* Win32 compat
|
431
453
|
* The new "custom parser" syntax (aka transformer) hasn't been defined yet... only the old-style syntax is available
|
432
454
|
|
433
455
|
## Authors
|
data/lib/remote_table.rb
CHANGED
@@ -3,6 +3,14 @@ if ::RUBY_VERSION < '1.9' and $KCODE != 'UTF8'
|
|
3
3
|
$KCODE = 'UTF8'
|
4
4
|
end
|
5
5
|
|
6
|
+
require 'thread'
|
7
|
+
|
8
|
+
require 'iconv'
|
9
|
+
if RUBY_VERSION >= '1.9'
|
10
|
+
# for an excellent explanation see http://blog.segment7.net/2010/12/17/from-iconv-iconv-to-string-encode
|
11
|
+
Kernel.warn "[remote_table] Apologies - using iconv because Ruby 1.9.x's String#encode doesn't have transliteration tables (yet)"
|
12
|
+
end
|
13
|
+
|
6
14
|
require 'active_support'
|
7
15
|
require 'active_support/version'
|
8
16
|
if ::ActiveSupport::VERSION::MAJOR >= 3
|
@@ -10,78 +18,432 @@ if ::ActiveSupport::VERSION::MAJOR >= 3
|
|
10
18
|
end
|
11
19
|
require 'hash_digest'
|
12
20
|
|
13
|
-
require 'remote_table/
|
14
|
-
require 'remote_table/config'
|
15
|
-
require 'remote_table/local_file'
|
21
|
+
require 'remote_table/local_copy'
|
16
22
|
require 'remote_table/transformer'
|
17
23
|
|
24
|
+
require 'remote_table/plaintext'
|
25
|
+
require 'remote_table/processed_by_roo'
|
26
|
+
require 'remote_table/processed_by_nokogiri'
|
27
|
+
require 'remote_table/xls'
|
28
|
+
require 'remote_table/xlsx'
|
29
|
+
require 'remote_table/delimited'
|
30
|
+
require 'remote_table/ods'
|
31
|
+
require 'remote_table/fixed_width'
|
32
|
+
require 'remote_table/html'
|
33
|
+
require 'remote_table/xml'
|
34
|
+
require 'remote_table/yaml'
|
35
|
+
|
18
36
|
class Hash
|
37
|
+
# Added by remote_table to store a hash (think checksum) of the data with which a particular Hash is initialized.
|
38
|
+
# @return [String]
|
19
39
|
attr_accessor :row_hash
|
20
40
|
end
|
21
41
|
|
22
42
|
class Array
|
43
|
+
# Added by remote_table to store a hash (think checksum) of the data with which a particular Array is initialized.
|
44
|
+
# @return [String]
|
23
45
|
attr_accessor :row_hash
|
24
46
|
end
|
25
47
|
|
26
|
-
|
27
|
-
|
48
|
+
# Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma separated), TSV (tab separated), other delimited, fixed-width files.
|
49
|
+
class RemoteTable
|
50
|
+
class << self
|
51
|
+
# Guess compression based on URL. Used internally.
|
52
|
+
# @return [Symbol,nil]
|
53
|
+
def guess_compression(url)
|
54
|
+
extname = ::File.extname(::URI.parse(url).path).downcase
|
55
|
+
case extname
|
56
|
+
when /gz/, /gunzip/
|
57
|
+
:gz
|
58
|
+
when /zip/
|
59
|
+
:zip
|
60
|
+
when /bz2/, /bunzip2/
|
61
|
+
:bz2
|
62
|
+
when /exe/
|
63
|
+
:exe
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Guess packing from URL. Used internally.
|
68
|
+
# @return [Symbol,nil]
|
69
|
+
def guess_packing(url)
|
70
|
+
basename = ::File.basename(::URI.parse(url).path).downcase
|
71
|
+
if basename.include?('.tar') or basename.include?('.tgz')
|
72
|
+
:tar
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Guess file format from the basename. Since a file might be decompressed and/or pulled out of an archive with a glob, this usually can't be called until a file is downloaded.
|
77
|
+
# @return [Symbol,nil]
|
78
|
+
def guess_format(basename)
|
79
|
+
case basename.to_s.downcase
|
80
|
+
when /ods/, /open_?office/
|
81
|
+
:ods
|
82
|
+
when /xlsx/, /excelx/
|
83
|
+
:xlsx
|
84
|
+
when /xls/, /excel/
|
85
|
+
:xls
|
86
|
+
when /csv/, /tsv/, /delimited/
|
87
|
+
# note that there is no RemoteTable::Csv class - it's normalized to :delimited
|
88
|
+
:delimited
|
89
|
+
when /fixed_?width/
|
90
|
+
:fixed_width
|
91
|
+
when /htm/
|
92
|
+
:html
|
93
|
+
when /xml/
|
94
|
+
:xml
|
95
|
+
when /yaml/, /yml/
|
96
|
+
:yaml
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Given a Google Docs spreadsheet URL, make sure it uses CSV output.
|
101
|
+
# @return [String]
|
102
|
+
def google_spreadsheet_csv_url(url)
|
103
|
+
uri = ::URI.parse url
|
104
|
+
params = uri.query.split('&')
|
105
|
+
params.delete_if { |param| param.start_with?('output=') }
|
106
|
+
params << 'output=csv'
|
107
|
+
uri.query = params.join('&')
|
108
|
+
uri.to_s
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# @private
|
113
|
+
# Here to support legacy code.
|
28
114
|
class Transform
|
29
115
|
def self.row_hash(row)
|
30
116
|
::HashDigest.hexdigest row
|
31
117
|
end
|
32
118
|
end
|
33
119
|
|
120
|
+
EXTERNAL_ENCODING = 'UTF-8'
|
121
|
+
EXTERNAL_ENCODING_ICONV = 'UTF-8//TRANSLIT'
|
122
|
+
GOOGLE_DOCS_SPREADSHEET = [
|
123
|
+
/docs.google.com/i,
|
124
|
+
/spreadsheets.google.com/i
|
125
|
+
]
|
126
|
+
VALID = {
|
127
|
+
:compression => [:gz, :zip, :bz2, :exe],
|
128
|
+
:packing => [:tar],
|
129
|
+
:format => [:xlsx, :xls, :delimited, :ods, :fixed_width, :html, :xml, :yaml, :csv],
|
130
|
+
}
|
131
|
+
DEFAULT = {
|
132
|
+
:streaming => false,
|
133
|
+
:warn_on_multiple_downloads => true,
|
134
|
+
:headers => :first_row,
|
135
|
+
:keep_blank_rows => false,
|
136
|
+
:skip => 0,
|
137
|
+
:internal_encoding => 'UTF-8',
|
138
|
+
:delimiter => ','
|
139
|
+
}
|
140
|
+
OLD_SETTING_NAMES = {
|
141
|
+
:internal_encoding => [:encoding],
|
142
|
+
:transform_settings => [:transform],
|
143
|
+
:pre_select => [:select],
|
144
|
+
:pre_reject => [:reject],
|
145
|
+
:errata_settings => [:errata],
|
146
|
+
}
|
147
|
+
|
34
148
|
include ::Enumerable
|
35
149
|
|
150
|
+
# The URL of the local or remote file.
|
151
|
+
#
|
152
|
+
# * Local: "file:///Users/myuser/Desktop/holidays.csv"
|
153
|
+
# * Remote: "http://data.brighterplanet.com/countries.csv"
|
154
|
+
#
|
155
|
+
# @return [String]
|
36
156
|
attr_reader :url
|
37
|
-
|
157
|
+
|
158
|
+
# @private
|
159
|
+
# A cache of rows, created unless +:streaming+ is enabled.
|
160
|
+
# @return [Array<Hash,Array>]
|
161
|
+
attr_reader :cache
|
162
|
+
|
163
|
+
# @private
|
164
|
+
# How many times this file has been downloaded. RemoteTable will emit a warning if you download it more than once.
|
165
|
+
# @return [Integer]
|
166
|
+
attr_reader :download_count
|
167
|
+
|
168
|
+
# @private
|
169
|
+
# Used internally to access the transformer (aka parser).
|
170
|
+
attr_reader :transformer
|
171
|
+
|
172
|
+
# @private
|
173
|
+
# Used internally to access to a downloaded copy of the file.
|
174
|
+
# @return [RemoteTable::LocalCopy]
|
175
|
+
attr_reader :local_copy
|
176
|
+
|
177
|
+
# Whether to stream the rows without caching them. Saves memory, but you have to re-download the file every time you enumerate its rows. Defaults to false.
|
178
|
+
# @return [true,false]
|
179
|
+
attr_reader :streaming
|
180
|
+
|
181
|
+
# Whether to warn the user on multiple downloads. Defaults to true.
|
182
|
+
# @return [true,false]
|
183
|
+
attr_reader :warn_on_multiple_downloads
|
184
|
+
|
185
|
+
# Headers specified by the user: +:first_row+ (the default), +false+, or a list of headers.
|
186
|
+
# @return [:first_row,false,Array<String>]
|
187
|
+
attr_reader :headers
|
188
|
+
|
189
|
+
# The sheet specified by the user as a number or a string.
|
190
|
+
# @return[String,Integer]
|
191
|
+
attr_reader :sheet
|
192
|
+
|
193
|
+
# Whether to keep blank rows. Default is false.
|
194
|
+
# @return [true,false]
|
195
|
+
attr_reader :keep_blank_rows
|
196
|
+
|
197
|
+
# Form data to POST in the download request. It should probably be in +application/x-www-form-urlencoded+.
|
198
|
+
# @return [String]
|
199
|
+
attr_reader :form_data
|
200
|
+
|
201
|
+
# How many rows to skip at the beginning of the file or table. Default is 0.
|
202
|
+
# @return [Integer]
|
203
|
+
attr_reader :skip
|
204
|
+
|
205
|
+
# The original encoding of the source file. Default is UTF-8. Previously passed as +:encoding+.
|
206
|
+
# @return [String]
|
207
|
+
attr_reader :internal_encoding
|
208
|
+
|
209
|
+
# The delimiter, a.k.a. column separator. Passed to Ruby CSV as +:col_sep+. Default is :delimited.
|
210
|
+
# @return [String]
|
211
|
+
attr_reader :delimiter
|
212
|
+
|
213
|
+
# The XPath used to find rows in HTML or XML.
|
214
|
+
# @return [String]
|
215
|
+
attr_reader :row_xpath
|
216
|
+
|
217
|
+
# The XPath used to find columns in HTML or XML.
|
218
|
+
# @return [String]
|
219
|
+
attr_reader :column_xpath
|
220
|
+
|
221
|
+
# The CSS selector used to find rows in HTML or XML.
|
222
|
+
# @return [String]
|
223
|
+
attr_reader :row_css
|
224
|
+
|
225
|
+
# The CSS selector used to find columns in HTML or XML.
|
226
|
+
# @return [String]
|
227
|
+
attr_reader :column_css
|
228
|
+
|
229
|
+
# The format of the source file. Can be +:xlsx+, +:xls+, +:delimited+, +:ods+, +:fixed_width+, +:html+, +:xml+, +:yaml+.
|
230
|
+
# @return [Symbol]
|
231
|
+
attr_reader :format
|
232
|
+
|
233
|
+
# The compression type. Guessed from URL if not provided. +:gz+, +:zip+, +:bz2+, and +:exe+ (treated as +:zip+) are supported.
|
234
|
+
# @return [Symbol]
|
235
|
+
attr_reader :compression
|
236
|
+
|
237
|
+
# The packing type. Guessed from URL if not provided. Only +:tar+ is supported.
|
238
|
+
# @return [Symbol]
|
239
|
+
attr_reader :packing
|
240
|
+
|
241
|
+
# The glob used to pick a file out of an archive.
|
242
|
+
#
|
243
|
+
# @return [String]
|
244
|
+
#
|
245
|
+
# @example Pick out the only CSV in a ZIP file
|
246
|
+
# RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
247
|
+
attr_reader :glob
|
248
|
+
|
249
|
+
# The filename, which can be used to pick a file out of an archive.
|
250
|
+
#
|
251
|
+
# @return [String]
|
252
|
+
#
|
253
|
+
# @example Specify the filename to get out of a ZIP file
|
254
|
+
# RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
|
255
|
+
attr_reader :filename
|
256
|
+
|
257
|
+
# Pick specific columns out of a plaintext file using an argument to the UNIX [+cut+ utility](http://en.wikipedia.org/wiki/Cut_%28Unix%29).
|
258
|
+
#
|
259
|
+
# @return [String]
|
260
|
+
#
|
261
|
+
# @example Pick ALMOST out of ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
262
|
+
# # $ echo ABCDEFGHIJKLMNOPQRSTUVWXYZ | cut -c '1,12,13,15,19,20'
|
263
|
+
# # ALMOST
|
264
|
+
# RemoteTable.new 'file:///atoz.txt', :cut => '1,12,13,15,19,20'
|
265
|
+
attr_reader :cut
|
266
|
+
|
267
|
+
# Use a range of rows in a plaintext file.
|
268
|
+
#
|
269
|
+
# @return [Range]
|
270
|
+
#
|
271
|
+
# @example Only take rows 21 through 37
|
272
|
+
# RemoteTable.new("http://www.eia.gov/emeu/cbecs/cbecs2003/detailed_tables_2003/2003set10/2003excel/C17.xls",
|
273
|
+
# :headers => false,
|
274
|
+
# :select => proc { |row| CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER.call(row) },
|
275
|
+
# :crop => (21..37))
|
276
|
+
attr_reader :crop
|
277
|
+
|
278
|
+
# The fixed-width schema, given as a multi-dimensional array.
|
279
|
+
#
|
280
|
+
# @return [Array<Array{String,Integer,Hash}>]
|
281
|
+
#
|
282
|
+
# @example From the tests
|
283
|
+
# RemoteTable.new('http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
284
|
+
# :format => :fixed_width,
|
285
|
+
# :skip => 1,
|
286
|
+
# :schema => [[ 'header4', 10, { :type => :string } ],
|
287
|
+
# [ 'spacer', 1 ],
|
288
|
+
# [ 'header5', 10, { :type => :string } ],
|
289
|
+
# [ 'spacer', 12 ],
|
290
|
+
# [ 'header6', 10, { :type => :string } ]])
|
291
|
+
attr_reader :schema
|
38
292
|
|
39
|
-
#
|
293
|
+
# If you somehow already defined a fixed-width schema (so you can re-use it?), specify it here.
|
294
|
+
# @return [String,Symbol]
|
295
|
+
attr_reader :schema_name
|
296
|
+
|
297
|
+
# A proc that decides whether to include a row. Previously passed as +:select+.
|
298
|
+
# @return [Proc]
|
299
|
+
attr_reader :pre_select
|
300
|
+
|
301
|
+
# A proc that decides whether to include a row. Previously passed as +:reject+.
|
302
|
+
# @return [Proc]
|
303
|
+
attr_reader :pre_reject
|
304
|
+
|
305
|
+
# Settings to create a transformer.
|
306
|
+
# @return [Hash]
|
307
|
+
attr_reader :transform_settings
|
308
|
+
|
309
|
+
# A hash of settings to initialize an Errata instance to be used on every row. Previously passed as +:errata+.
|
310
|
+
#
|
311
|
+
# See the Errata library at https://github.com/seamusabshere/errata
|
312
|
+
#
|
313
|
+
# @return [Hash]
|
314
|
+
attr_reader :errata_settings
|
315
|
+
|
316
|
+
# The format of the source file. Can be specified as: :xlsx, :xls, :delimited (aka :csv), :ods, :fixed_width, :html, :xml, :yaml
|
317
|
+
#
|
318
|
+
# Note: treats all +docs.google.com+ and +spreadsheets.google.com+ URLs as +:delimited+.
|
40
319
|
#
|
41
|
-
#
|
320
|
+
# Default: guessed from file extension (which is usually the same as the URL, but sometimes not if you pick out a specific file from an archive)
|
42
321
|
#
|
43
|
-
#
|
44
|
-
|
45
|
-
|
46
|
-
#
|
322
|
+
# @return [Hash]
|
323
|
+
attr_reader :format
|
324
|
+
|
325
|
+
# Options passed by the user that may be passed through to the underlying parsing library.
|
326
|
+
# @return [Hash]
|
327
|
+
attr_reader :other_options
|
328
|
+
|
329
|
+
# Create a new RemoteTable, which is an Enumerable.
|
330
|
+
#
|
331
|
+
# Does not immediately download/parse... it's lazy-loading.
|
332
|
+
#
|
333
|
+
# @overload initialize(settings)
|
334
|
+
# @param [Hash] settings Settings including +:url+.
|
47
335
|
#
|
48
|
-
#
|
336
|
+
# @overload initialize(url, settings)
|
337
|
+
# @param [String] url The URL to the local or remote file.
|
338
|
+
# @param [Hash] settings Settings.
|
339
|
+
#
|
340
|
+
# @example Open an XLSX
|
341
|
+
# RemoteTable.new('http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
342
|
+
#
|
343
|
+
# @example Open a CSV inside a ZIP file
|
344
|
+
# RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
345
|
+
# :filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
346
|
+
# :skip => 1,
|
347
|
+
# :pre_select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
|
49
348
|
def initialize(*args)
|
50
|
-
|
51
|
-
|
349
|
+
@download_count_mutex = ::Mutex.new
|
350
|
+
@iconv_mutex = ::Mutex.new
|
351
|
+
@extend_bang_mutex = ::Mutex.new
|
352
|
+
@errata_mutex = ::Mutex.new
|
353
|
+
|
354
|
+
@cache = []
|
355
|
+
@download_count = 0
|
356
|
+
|
357
|
+
settings = args.last.is_a?(::Hash) ? args.last.symbolize_keys : {}
|
358
|
+
|
52
359
|
@url = if args.first.is_a? ::String
|
53
|
-
args.first
|
360
|
+
args.first
|
54
361
|
else
|
55
|
-
|
362
|
+
grab settings, :url
|
363
|
+
end
|
364
|
+
@format = RemoteTable.guess_format grab(settings, :format)
|
365
|
+
if GOOGLE_DOCS_SPREADSHEET.any? { |regex| regex =~ url }
|
366
|
+
@url = RemoteTable.google_spreadsheet_csv_url url
|
367
|
+
@format = :delimited
|
368
|
+
end
|
369
|
+
|
370
|
+
@headers = grab settings, :headers
|
371
|
+
if headers.is_a?(::Array) and headers.any?(&:blank?)
|
372
|
+
raise ::ArgumentError, "[remote_table] If you specify headers, none of them can be blank"
|
56
373
|
end
|
57
|
-
|
374
|
+
|
375
|
+
@compression = grab(settings, :compression) || RemoteTable.guess_compression(url)
|
376
|
+
@packing = grab(settings, :packing) || RemoteTable.guess_packing(url)
|
377
|
+
|
378
|
+
@streaming = grab settings, :streaming
|
379
|
+
@warn_on_multiple_downloads = grab settings, :warn_on_multiple_downloads
|
380
|
+
@delimiter = grab settings, :delimiter
|
381
|
+
@sheet = grab settings, :sheet
|
382
|
+
@keep_blank_rows = grab settings, :keep_blank_rows
|
383
|
+
@form_data = grab settings, :form_data
|
384
|
+
@skip = grab settings, :skip
|
385
|
+
@internal_encoding = grab settings, :internal_encoding
|
386
|
+
@row_xpath = grab settings, :row_xpath
|
387
|
+
@column_xpath = grab settings, :column_xpath
|
388
|
+
@row_css = grab settings, :row_css
|
389
|
+
@column_css = grab settings, :column_css
|
390
|
+
@glob = grab settings, :glob
|
391
|
+
@filename = grab settings, :filename
|
392
|
+
@transform_settings = grab settings, :transform_settings
|
393
|
+
@cut = grab settings, :cut
|
394
|
+
@crop = grab settings, :crop
|
395
|
+
@schema = grab settings, :schema
|
396
|
+
@schema_name = grab settings, :schema_name
|
397
|
+
@pre_select = grab settings, :pre_select
|
398
|
+
@pre_reject = grab settings, :pre_reject
|
399
|
+
@errata_settings = grab settings, :errata_settings
|
400
|
+
|
401
|
+
@other_options = settings
|
402
|
+
|
403
|
+
@transformer = Transformer.new self
|
404
|
+
@local_copy = LocalCopy.new self
|
58
405
|
end
|
59
|
-
|
60
|
-
#
|
61
|
-
|
406
|
+
|
407
|
+
# Yield each row.
|
408
|
+
#
|
409
|
+
# @return [nil]
|
410
|
+
#
|
411
|
+
# @yield [Hash,Array] A hash or an array depending on whether the RemoteTable has named headers (column names).
|
412
|
+
def each
|
413
|
+
extend!
|
62
414
|
if fully_cached?
|
63
|
-
cache.each
|
415
|
+
cache.each do |row|
|
416
|
+
yield row
|
417
|
+
end
|
64
418
|
else
|
65
419
|
mark_download!
|
66
|
-
|
420
|
+
memo = _each do |row|
|
67
421
|
transformer.transform(row).each do |virtual_row|
|
68
422
|
virtual_row.row_hash = ::HashDigest.hexdigest row
|
69
|
-
if
|
70
|
-
next if
|
71
|
-
|
423
|
+
if errata
|
424
|
+
next if errata.rejects? virtual_row
|
425
|
+
errata.correct! virtual_row
|
426
|
+
end
|
427
|
+
next if pre_select and !pre_select.call(virtual_row)
|
428
|
+
next if pre_reject and pre_reject.call(virtual_row)
|
429
|
+
unless streaming
|
430
|
+
cache.push virtual_row
|
72
431
|
end
|
73
|
-
next if config.select and !config.select.call(virtual_row)
|
74
|
-
next if config.reject and config.reject.call(virtual_row)
|
75
|
-
cache.push virtual_row unless config.streaming
|
76
432
|
yield virtual_row
|
77
433
|
end
|
78
434
|
end
|
79
|
-
|
80
|
-
|
435
|
+
unless streaming
|
436
|
+
fully_cached!
|
437
|
+
end
|
438
|
+
memo
|
81
439
|
end
|
440
|
+
nil
|
82
441
|
end
|
442
|
+
|
443
|
+
# @deprecated
|
83
444
|
alias :each_row :each
|
84
445
|
|
446
|
+
# @return [Array<Hash,Array>] All rows.
|
85
447
|
def to_a
|
86
448
|
if fully_cached?
|
87
449
|
cache.dup
|
@@ -89,9 +451,13 @@ class RemoteTable
|
|
89
451
|
map { |row| row }
|
90
452
|
end
|
91
453
|
end
|
454
|
+
|
455
|
+
# @deprecated
|
92
456
|
alias :rows :to_a
|
93
457
|
|
94
|
-
# Get a row by row number
|
458
|
+
# Get a row by row number. Zero-based.
|
459
|
+
#
|
460
|
+
# @return [Hash,Array]
|
95
461
|
def [](row_number)
|
96
462
|
if fully_cached?
|
97
463
|
cache[row_number]
|
@@ -100,35 +466,37 @@ class RemoteTable
|
|
100
466
|
end
|
101
467
|
end
|
102
468
|
|
103
|
-
#
|
469
|
+
# Clear the row cache in case it helps your GC.
|
470
|
+
#
|
471
|
+
# @return [nil]
|
104
472
|
def free
|
473
|
+
@fully_cached = false
|
474
|
+
@errata = nil
|
105
475
|
cache.clear
|
106
476
|
nil
|
107
477
|
end
|
108
|
-
|
109
|
-
#
|
110
|
-
def
|
111
|
-
@
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
@transformer ||= Transformer.new self
|
478
|
+
|
479
|
+
# @private
|
480
|
+
def errata
|
481
|
+
@errata || @errata_mutex.synchronize do
|
482
|
+
@errata ||= begin
|
483
|
+
if defined?(::Errata) and errata_settings.is_a?(::Errata)
|
484
|
+
::Kernel.warn %{[remote_table] Passing :errata_settings as an Errata object is deprecated. Please pass a Hash of settings instead.}
|
485
|
+
errata_settings
|
486
|
+
elsif errata_settings.is_a?(::Hash)
|
487
|
+
::Errata.new errata_settings
|
488
|
+
end
|
489
|
+
end
|
490
|
+
end
|
122
491
|
end
|
123
|
-
|
124
|
-
attr_reader :download_count
|
125
|
-
|
492
|
+
|
126
493
|
private
|
127
494
|
|
128
495
|
def mark_download!
|
129
|
-
@
|
130
|
-
|
131
|
-
|
496
|
+
@download_count_mutex.synchronize do
|
497
|
+
@download_count += 1
|
498
|
+
end
|
499
|
+
if warn_on_multiple_downloads and download_count > 1
|
132
500
|
::Kernel.warn "[remote_table] #{url} has been downloaded #{download_count} times."
|
133
501
|
end
|
134
502
|
end
|
@@ -140,8 +508,62 @@ class RemoteTable
|
|
140
508
|
def fully_cached?
|
141
509
|
!!@fully_cached
|
142
510
|
end
|
143
|
-
|
144
|
-
def
|
145
|
-
@
|
511
|
+
|
512
|
+
def iconv
|
513
|
+
@iconv || @iconv_mutex.synchronize do
|
514
|
+
@iconv ||= ::Iconv.new(EXTERNAL_ENCODING_ICONV, internal_encoding)
|
515
|
+
end
|
516
|
+
end
|
517
|
+
|
518
|
+
def transliterate_to_utf8(str)
|
519
|
+
if str.is_a?(::String)
|
520
|
+
[ iconv.iconv(str), iconv.iconv(nil) ].join
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
def assume_utf8(str)
|
525
|
+
if str.is_a?(::String) and ::RUBY_VERSION >= '1.9'
|
526
|
+
str.encode! EXTERNAL_ENCODING
|
527
|
+
else
|
528
|
+
str
|
529
|
+
end
|
530
|
+
end
|
531
|
+
|
532
|
+
def grab(settings, k)
|
533
|
+
user_specified = false
|
534
|
+
memo = nil
|
535
|
+
if (old_names = OLD_SETTING_NAMES[k]) and old_names.any? { |old_k| settings.has_key?(old_k) }
|
536
|
+
user_specified = true
|
537
|
+
memo = old_names.map { |old_k| settings.delete(old_k) }.compact.first
|
538
|
+
end
|
539
|
+
if settings.has_key?(k)
|
540
|
+
user_specified = true
|
541
|
+
memo = settings.delete k
|
542
|
+
end
|
543
|
+
if not user_specified and DEFAULT.has_key?(k)
|
544
|
+
memo = DEFAULT[k]
|
545
|
+
end
|
546
|
+
if memo and (valid = VALID[k]) and not valid.include?(memo.to_sym)
|
547
|
+
raise ::ArgumentError, %{[remote_table] #{k.inspect} => #{memo.inspect} is not a valid setting. Valid settings are #{valid.inspect}.}
|
548
|
+
end
|
549
|
+
memo
|
550
|
+
end
|
551
|
+
|
552
|
+
def extend!
|
553
|
+
return if @extend_bang
|
554
|
+
@extend_bang_mutex.synchronize do
|
555
|
+
return if @extend_bang
|
556
|
+
@extend_bang = true
|
557
|
+
format_module = if format
|
558
|
+
RemoteTable.const_get format.to_s.camelcase
|
559
|
+
elsif format = RemoteTable.guess_format(local_copy.path)
|
560
|
+
@format = format
|
561
|
+
RemoteTable.const_get format.to_s.camelcase
|
562
|
+
else
|
563
|
+
Delimited
|
564
|
+
end
|
565
|
+
extend format_module
|
566
|
+
after_extend if respond_to?(:after_extend)
|
567
|
+
end
|
146
568
|
end
|
147
569
|
end
|