remote_table 1.4.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +17 -0
- data/README.markdown +59 -37
- data/lib/remote_table.rb +478 -56
- data/lib/remote_table/delimited.rb +91 -0
- data/lib/remote_table/fixed_width.rb +81 -0
- data/lib/remote_table/html.rb +13 -0
- data/lib/remote_table/{local_file.rb → local_copy.rb} +26 -22
- data/lib/remote_table/ods.rb +17 -0
- data/lib/remote_table/plaintext.rb +67 -0
- data/lib/remote_table/processed_by_nokogiri.rb +76 -0
- data/lib/remote_table/processed_by_roo.rb +97 -0
- data/lib/remote_table/transformer.rb +9 -5
- data/lib/remote_table/version.rb +1 -1
- data/lib/remote_table/xls.rb +11 -0
- data/lib/remote_table/xlsx.rb +11 -0
- data/lib/remote_table/xml.rb +13 -0
- data/lib/remote_table/yaml.rb +14 -0
- data/remote_table.gemspec +2 -2
- data/test/test_big.rb +1 -1
- data/test/test_remote_table.rb +26 -21
- metadata +19 -20
- data/lib/remote_table/config.rb +0 -251
- data/lib/remote_table/format.rb +0 -49
- data/lib/remote_table/format/delimited.rb +0 -60
- data/lib/remote_table/format/excel.rb +0 -10
- data/lib/remote_table/format/excelx.rb +0 -10
- data/lib/remote_table/format/fixed_width.rb +0 -60
- data/lib/remote_table/format/html.rb +0 -12
- data/lib/remote_table/format/mixins/processed_by_nokogiri.rb +0 -70
- data/lib/remote_table/format/mixins/processed_by_roo.rb +0 -63
- data/lib/remote_table/format/mixins/textual.rb +0 -43
- data/lib/remote_table/format/open_office.rb +0 -13
- data/lib/remote_table/format/xml.rb +0 -12
- data/lib/remote_table/format/yaml.rb +0 -14
data/CHANGELOG
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
2.0.0 / 2012-05-08
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* New names for options... (not really breaking, these deprecated options are still accepted)
|
6
|
+
:errata -> :errata_settings
|
7
|
+
:transform -> :transform_settings
|
8
|
+
:select -> :pre_select (to avoid conflict with Enumerable#select)
|
9
|
+
:reject -> :pre_reject
|
10
|
+
:encoding -> :internal_encoding
|
11
|
+
|
12
|
+
* Enhancements
|
13
|
+
|
14
|
+
* Every option is documented
|
15
|
+
* Refactored to simplify and DRY
|
16
|
+
* Thread safe
|
17
|
+
|
1
18
|
1.4.0 / 2012-04-12
|
2
19
|
|
3
20
|
* Enhancements
|
data/README.markdown
CHANGED
@@ -1,25 +1,38 @@
|
|
1
1
|
# remote_table
|
2
2
|
|
3
|
-
Open local or remote XLSX, XLS, ODS, CSV
|
3
|
+
Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma separated), TSV (tab separated), other delimited, fixed-width files.
|
4
4
|
|
5
|
-
|
5
|
+
Tested on MRI 1.8, MRI 1.9, and JRuby 1.6.7+. Thread-safe.
|
6
6
|
|
7
|
-
|
7
|
+
## Real-world usage
|
8
|
+
|
9
|
+
<p><a href="http://brighterplanet.com"><img src="https://s3.amazonaws.com/static.brighterplanet.com/assets/logos/flush-left/inline/green/rasterized/brighter_planet-160-transparent.png" alt="Brighter Planet logo"/></a></p>
|
10
|
+
|
11
|
+
We use `remote_table` for [data science at Brighter Planet](http://brighterplanet.com/research) and in production at
|
12
|
+
|
13
|
+
* [Brighter Planet's impact estimate web service](http://impact.brighterplanet.com)
|
14
|
+
* [Brighter Planet's reference data web service](http://data.brighterplanet.com)
|
15
|
+
|
16
|
+
It's also a big part of
|
17
|
+
|
18
|
+
* the [`data_miner`](https://github.com/seamusabshere/data_miner) library
|
19
|
+
* the [`earth`](https://github.com/brighterplanet/earth) library
|
8
20
|
|
9
21
|
## Example
|
10
22
|
|
11
|
-
|
12
|
-
|
23
|
+
>> require 'remote_table'
|
24
|
+
remote_table.rb:8:in `<top (required)>': iconv will be deprecated in the future, use String#encode instead.
|
25
|
+
[remote_table] Apologies - using iconv because Ruby 1.9.x's String#encode doesn't have transliteration tables (yet)
|
13
26
|
=> true
|
14
|
-
|
15
|
-
=> #<RemoteTable:
|
16
|
-
|
27
|
+
>> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv'
|
28
|
+
=> #<RemoteTable:0x00000101b87390 @download_count_mutex=#<Mutex:0x00000101b87228>, @iconv_mutex=#<Mutex:0x00000101b87200>, @extend_bang_mutex=#<Mutex:0x00000101b871d8>, @errata_mutex=#<Mutex:0x00000101b871b0>, @cache=[], @download_count=0, @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip", @format=nil, @headers=:first_row, @compression=:zip, @packing=nil, @streaming=false, @warn_on_multiple_downloads=true, @delimiter=",", @sheet=nil, @keep_blank_rows=false, @form_data=nil, @skip=0, @internal_encoding="UTF-8", @row_xpath=nil, @column_xpath=nil, @row_css=nil, @column_css=nil, @glob=nil, @filename="98guide6.csv", @transform_settings=nil, @cut=nil, @crop=nil, @schema=nil, @schema_name=nil, @pre_select=nil, @pre_reject=nil, @errata_settings=nil, @other_options={}, @transformer=#<RemoteTable::Transformer:0x00000101b8c2f0 @t=#<RemoteTable:0x00000101b87390 ...>, @legacy_transformer_mutex=#<Mutex:0x00000101b8c2a0>>, @local_copy=#<RemoteTable::LocalCopy:0x00000101b8bf58 @t=#<RemoteTable:0x00000101b87390 ...>, @encoded_io_mutex=#<Mutex:0x00000101b8be18>, @generate_mutex=#<Mutex:0x00000101b8bdc8>>>
|
29
|
+
>> t.rows.length
|
17
30
|
=> 806
|
18
|
-
|
31
|
+
>> t.rows.first.length
|
19
32
|
=> 26
|
20
|
-
|
33
|
+
>> require 'pp'
|
21
34
|
=> true
|
22
|
-
|
35
|
+
>> pp t[23]
|
23
36
|
{"Class"=>"TWO SEATERS",
|
24
37
|
"Manufacturer"=>"PORSCHE",
|
25
38
|
"carline name"=>"BOXSTER",
|
@@ -47,7 +60,19 @@ Used by [the Brighter Planet Reference Data web service](http://data.brighterpla
|
|
47
60
|
"eng dscr"=>"",
|
48
61
|
"trans dscr"=>""}
|
49
62
|
|
50
|
-
|
63
|
+
## Columns and rows
|
64
|
+
|
65
|
+
* If there are headers, you get an <code>Array</code> of <code>Hash</code>es with **string keys**.
|
66
|
+
* If you set <code>:headers => false</code>, then you get an <code>Array</code> of <code>Array</code>s.
|
67
|
+
|
68
|
+
## Row keys
|
69
|
+
|
70
|
+
Row keys are **strings**. Row keys are NOT symbolized.
|
71
|
+
|
72
|
+
row['foobar'] # correct
|
73
|
+
row[:foobar] # incorrect
|
74
|
+
|
75
|
+
You can call <code>symbolize_keys</code> yourself, but we don't do it automatically to avoid creating loads of garbage symbols.
|
51
76
|
|
52
77
|
## Supported formats
|
53
78
|
|
@@ -59,7 +84,7 @@ You get an <code>Array</code> of <code>Hash</code>es with **string keys**. If yo
|
|
59
84
|
</tr>
|
60
85
|
<tr>
|
61
86
|
<td>Delimited (CSV, TSV, etc.)</td>
|
62
|
-
<td>All <code>RemoteTable::
|
87
|
+
<td>All <code>RemoteTable::Delimited::PASSTHROUGH_CSV_SETTINGS</code>, for example <code>:col_sep</code>, are passed directly to fastercsv.</td>
|
63
88
|
<td>
|
64
89
|
<a href="http://fastercsv.rubyforge.org/">fastercsv</a> (1.8);
|
65
90
|
<a href="http://www.ruby-doc.org/stdlib-1.9.3/libdoc/csv/rdoc/index.html">stdlib</code></a> (1.9)
|
@@ -147,12 +172,12 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
147
172
|
:row_xpath => '//table/tr[2]/td/table/tr',
|
148
173
|
:column_xpath => 'td',
|
149
174
|
:errata => { RemoteTable.new('https://spreadsheets.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdGVBRnhkRGhSaVptSDJ5bXJGbkpUSWc&output=csv', :responder => Aircraft::Guru.new },
|
150
|
-
:select =>
|
175
|
+
:select => proc { |record| manufacturer_whitelist? record['Manufacturer'] })
|
151
176
|
|
152
177
|
# OpenFlights.org airports database
|
153
178
|
RemoteTable.new('https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat',
|
154
179
|
:headers => %w{ id name city country_name iata_code icao_code latitude longitude altitude timezone daylight_savings },
|
155
|
-
:select =>
|
180
|
+
:select => proc { |record| record['iata_code'].present? },
|
156
181
|
:errata => { RemoteTable.new('https://spreadsheets.google.com/pub?key=0AoQJbWqPrREqdFc2UzhQYU5PWEQ0N21yWFZGNmc2a3c&gid=0&output=csv', :responder => Airport::Guru.new }) # see https://github.com/brighterplanet/earth/blob/master/lib/earth/air/aircraft/data_miner.rb
|
157
182
|
|
158
183
|
# T100 flight segment data for #{month.strftime('%B %Y')}
|
@@ -162,7 +187,7 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
162
187
|
:compression => :zip,
|
163
188
|
:glob => '/*.csv',
|
164
189
|
:errata => { RemoteTable.new('https://spreadsheets.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdGxpYU1qWFR3d0syTVMyQVVOaDd0V3c&output=csv', :responder => FlightSegment::Guru.new },
|
165
|
-
:select =>
|
190
|
+
:select => proc { |record| record['DEPARTURES_PERFORMED'].to_i > 0 })
|
166
191
|
|
167
192
|
# 1995 Fuel Economy Guide
|
168
193
|
# for definition of `:fuel_economy_guide_b` and `AutomobileMakeModelYearVariant::ParserB` see https://github.com/brighterplanet/earth/blob/master/lib/earth/automobile/automobile_make_model_year_variant/data_miner.rb
|
@@ -171,7 +196,7 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
171
196
|
:format => :fixed_width,
|
172
197
|
:cut => '13-',
|
173
198
|
:schema_name => :fuel_economy_guide_b,
|
174
|
-
:select =>
|
199
|
+
:select => proc { |row| row['model'].present? and (row['suppress_code'].blank? or row['suppress_code'].to_f == 0) and row['state_code'] == 'F' },
|
175
200
|
:transform => { :class => AutomobileMakeModelYearVariant::ParserB, :year => 1995 },
|
176
201
|
:errata => { :url => "https://docs.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdDkxTElWRVlvUXB3Uy04SDhSYWkzakE&output=csv", :responder => AutomobileMakeModelYearVariant::Guru.new })
|
177
202
|
|
@@ -181,30 +206,30 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
181
206
|
:filename => '98guide6.csv',
|
182
207
|
:transform => { :class => AutomobileMakeModelYearVariant::ParserC, :year => 1998 },
|
183
208
|
:errata => { :url => "https://docs.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdDkxTElWRVlvUXB3Uy04SDhSYWkzakE&output=csv", :responder => AutomobileMakeModelYearVariant::Guru.new },
|
184
|
-
:select =>
|
209
|
+
:select => proc { |row| row['model'].present? })
|
185
210
|
|
186
211
|
# annual corporate average fuel economy data for domestic and imported vehicle fleets from the NHTSA
|
187
212
|
RemoteTable.new('https://spreadsheets.google.com/pub?key=0AoQJbWqPrREqdEdXWXB6dkVLWkowLXhYSFVUT01sS2c&hl=en&gid=0&output=csv',
|
188
213
|
:errata => { 'url' => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv' },
|
189
|
-
:select =>
|
214
|
+
:select => proc { |row| row['volume'].to_i > 0 })
|
190
215
|
|
191
216
|
# total vehicle miles travelled by gasoline passenger cars from the 2010 EPA GHG Inventory
|
192
217
|
RemoteTable.new('http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
193
218
|
:filename => 'Annex Tables/Annex 3/Table A-87.csv',
|
194
219
|
:skip => 1,
|
195
|
-
:select =>
|
220
|
+
:select => proc { |row| row['Year'].to_i.to_s == row['Year'] })
|
196
221
|
|
197
222
|
# total vehicle miles travelled from the 2010 EPA GHG Inventory
|
198
223
|
RemoteTable.new('http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
199
224
|
:filename => 'Annex Tables/Annex 3/Table A-87.csv',
|
200
225
|
:skip => 1,
|
201
|
-
:select =>
|
226
|
+
:select => proc { |row| row['Year'].to_i.to_s == row['Year'] })
|
202
227
|
|
203
228
|
# total travel distribution from the 2010 EPA GHG Inventory
|
204
229
|
RemoteTable.new('http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
205
230
|
:filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
206
231
|
:skip => 1,
|
207
|
-
:select =>
|
232
|
+
:select => proc { |row| row['Vehicle Age'].to_i.to_s == row['Vehicle Age'] })
|
208
233
|
|
209
234
|
# building characteristics from the 2003 EIA Commercial Building Energy Consumption Survey
|
210
235
|
RemoteTable.new('http://www.eia.gov/emeu/cbecs/cbecs2003/public_use_2003/data/FILE02.csv',
|
@@ -215,7 +240,7 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
215
240
|
# for definition of `CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER` see https://github.com/brighterplanet/earth/blob/master/lib/earth/industry/cbecs_energy_intensity/data_miner.rb
|
216
241
|
RemoteTable.new("http://www.eia.gov/emeu/cbecs/cbecs2003/detailed_tables_2003/2003set10/2003excel/C17.xls",
|
217
242
|
:headers => false,
|
218
|
-
:select =>
|
243
|
+
:select => proc { |row| CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER.call(row) },
|
219
244
|
:crop => (21..37))
|
220
245
|
|
221
246
|
# U.S. Census 2002 NAICS code list
|
@@ -238,13 +263,13 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
238
263
|
RemoteTable.new('http://www.census.gov/popest/about/geo/state_geocodes_v2009.txt',
|
239
264
|
:skip => 6,
|
240
265
|
:headers => %w{ Region Division FIPS Name },
|
241
|
-
:select =>
|
266
|
+
:select => proc { |row| row['Division'].to_i > 0 and row['FIPS'].to_i == 0 })
|
242
267
|
|
243
268
|
# state census divisions from the U.S. Census
|
244
269
|
RemoteTable.new('http://www.census.gov/popest/about/geo/state_geocodes_v2009.txt',
|
245
270
|
:skip => 8,
|
246
271
|
:headers => ['Region', 'Division', 'State FIPS', 'Name'],
|
247
|
-
:select =>
|
272
|
+
:select => proc { |row| row['State FIPS'].to_i > 0 })
|
248
273
|
|
249
274
|
# OpenGeoCode.org's Country Codes to Country Names list
|
250
275
|
RemoteTable.new('http://opengeocode.org/download/countrynames.txt',
|
@@ -267,19 +292,19 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
267
292
|
RemoteTable.new('http://www.epa.gov/cleanenergy/documents/egridzips/eGRID2010V1_1_STIE_USGC.xls',
|
268
293
|
:sheet => 'STIE07',
|
269
294
|
:skip => 4,
|
270
|
-
:select =>
|
295
|
+
:select => proc { |row| row['eGRID2010 year 2007 file state sequence number'].to_i.between?(1, 51) })
|
271
296
|
|
272
297
|
# eGRID 2010 subregions and electricity emission factors
|
273
298
|
RemoteTable.new('http://www.epa.gov/cleanenergy/documents/egridzips/eGRID2010_Version1-1_xls_only.zip',
|
274
299
|
:filename => 'eGRID2010V1_1_year07_AGGREGATION.xls',
|
275
300
|
:sheet => 'SRL07',
|
276
301
|
:skip => 4,
|
277
|
-
:select =>
|
302
|
+
:select => proc { |row| row['SEQSRL07'].to_i.between?(1, 26) })
|
278
303
|
|
279
304
|
# U.S. Census State ANSI Code file
|
280
305
|
RemoteTable.new('http://www.census.gov/geo/www/ansi/state.txt',
|
281
306
|
:delimiter => '|',
|
282
|
-
:select =>
|
307
|
+
:select => proc { |record| record['STATE'].to_i < 60 })
|
283
308
|
|
284
309
|
# Mapping Hacks zipcode database
|
285
310
|
RemoteTable.new('http://mappinghacks.com/data/zipcode.zip',
|
@@ -295,18 +320,18 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
295
320
|
# Brighter Planet's list of cat and dog breeds, genders, and weights
|
296
321
|
RemoteTable.new('http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv',
|
297
322
|
:encoding => 'ISO-8859-1',
|
298
|
-
:select =>
|
323
|
+
:select => proc { |row| row['gender'].present? })
|
299
324
|
|
300
325
|
# residential electricity prices from the EIA
|
301
326
|
RemoteTable.new('http://www.eia.doe.gov/cneaf/electricity/page/sales_revenue.xls',
|
302
|
-
:select =>
|
327
|
+
:select => proc { |row| row['Year'].to_s.first(4).to_i > 1989 })
|
303
328
|
|
304
329
|
# residential natural gas prices from the EIA
|
305
330
|
# for definition of `NaturalGasParser` see https://github.com/brighterplanet/earth/blob/master/lib/earth/residence/residence_fuel_price/data_miner.rb
|
306
331
|
RemoteTable.new('http://tonto.eia.doe.gov/dnav/ng/xls/ng_pri_sum_a_EPG0_FWA_DMcf_a.xls',
|
307
332
|
:sheet => 'Data 1',
|
308
333
|
:skip => 2,
|
309
|
-
:select =>
|
334
|
+
:select => proc { |row| row['year'].to_i > 1989 },
|
310
335
|
:transform => { :class => NaturalGasParser })
|
311
336
|
|
312
337
|
# 2005 EIA Residential Energy Consumption Survey microdata
|
@@ -375,7 +400,7 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
375
400
|
:format => :fixed_width,
|
376
401
|
:crop => 21..26, # inclusive
|
377
402
|
:cut => '2-',
|
378
|
-
:select =>
|
403
|
+
:select => proc { |row| /\A[A-Z]/.match row['code'] },
|
379
404
|
:schema => [[ 'code', 2, { :type => :string } ],
|
380
405
|
[ 'spacer', 2 ],
|
381
406
|
[ 'name', 52, { :type => :string } ]]
|
@@ -420,14 +445,11 @@ Everything is forced into UTF-8. You can improve the quality of the conversion b
|
|
420
445
|
|
421
446
|
## Requirements
|
422
447
|
|
423
|
-
*
|
424
|
-
* Unix tools like curl, iconv, perl, cat, cut, tail, etc. accessible from `ENV['PATH']`
|
425
|
-
|
426
|
-
As this library matures, that requirement should go away.
|
448
|
+
* Unix tools like curl, iconv, perl, cat, cut, tail, etc. accessible from your `$PATH`
|
427
449
|
|
428
450
|
## Wishlist
|
429
451
|
|
430
|
-
*
|
452
|
+
* Win32 compat
|
431
453
|
* The new "custom parser" syntax (aka transformer) hasn't been defined yet... only the old-style syntax is available
|
432
454
|
|
433
455
|
## Authors
|
data/lib/remote_table.rb
CHANGED
@@ -3,6 +3,14 @@ if ::RUBY_VERSION < '1.9' and $KCODE != 'UTF8'
|
|
3
3
|
$KCODE = 'UTF8'
|
4
4
|
end
|
5
5
|
|
6
|
+
require 'thread'
|
7
|
+
|
8
|
+
require 'iconv'
|
9
|
+
if RUBY_VERSION >= '1.9'
|
10
|
+
# for an excellent explanation see http://blog.segment7.net/2010/12/17/from-iconv-iconv-to-string-encode
|
11
|
+
Kernel.warn "[remote_table] Apologies - using iconv because Ruby 1.9.x's String#encode doesn't have transliteration tables (yet)"
|
12
|
+
end
|
13
|
+
|
6
14
|
require 'active_support'
|
7
15
|
require 'active_support/version'
|
8
16
|
if ::ActiveSupport::VERSION::MAJOR >= 3
|
@@ -10,78 +18,432 @@ if ::ActiveSupport::VERSION::MAJOR >= 3
|
|
10
18
|
end
|
11
19
|
require 'hash_digest'
|
12
20
|
|
13
|
-
require 'remote_table/
|
14
|
-
require 'remote_table/config'
|
15
|
-
require 'remote_table/local_file'
|
21
|
+
require 'remote_table/local_copy'
|
16
22
|
require 'remote_table/transformer'
|
17
23
|
|
24
|
+
require 'remote_table/plaintext'
|
25
|
+
require 'remote_table/processed_by_roo'
|
26
|
+
require 'remote_table/processed_by_nokogiri'
|
27
|
+
require 'remote_table/xls'
|
28
|
+
require 'remote_table/xlsx'
|
29
|
+
require 'remote_table/delimited'
|
30
|
+
require 'remote_table/ods'
|
31
|
+
require 'remote_table/fixed_width'
|
32
|
+
require 'remote_table/html'
|
33
|
+
require 'remote_table/xml'
|
34
|
+
require 'remote_table/yaml'
|
35
|
+
|
18
36
|
class Hash
|
37
|
+
# Added by remote_table to store a hash (think checksum) of the data with which a particular Hash is initialized.
|
38
|
+
# @return [String]
|
19
39
|
attr_accessor :row_hash
|
20
40
|
end
|
21
41
|
|
22
42
|
class Array
|
43
|
+
# Added by remote_table to store a hash (think checksum) of the data with which a particular Array is initialized.
|
44
|
+
# @return [String]
|
23
45
|
attr_accessor :row_hash
|
24
46
|
end
|
25
47
|
|
26
|
-
|
27
|
-
|
48
|
+
# Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma separated), TSV (tab separated), other delimited, fixed-width files.
|
49
|
+
class RemoteTable
|
50
|
+
class << self
|
51
|
+
# Guess compression based on URL. Used internally.
|
52
|
+
# @return [Symbol,nil]
|
53
|
+
def guess_compression(url)
|
54
|
+
extname = ::File.extname(::URI.parse(url).path).downcase
|
55
|
+
case extname
|
56
|
+
when /gz/, /gunzip/
|
57
|
+
:gz
|
58
|
+
when /zip/
|
59
|
+
:zip
|
60
|
+
when /bz2/, /bunzip2/
|
61
|
+
:bz2
|
62
|
+
when /exe/
|
63
|
+
:exe
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Guess packing from URL. Used internally.
|
68
|
+
# @return [Symbol,nil]
|
69
|
+
def guess_packing(url)
|
70
|
+
basename = ::File.basename(::URI.parse(url).path).downcase
|
71
|
+
if basename.include?('.tar') or basename.include?('.tgz')
|
72
|
+
:tar
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Guess file format from the basename. Since a file might be decompressed and/or pulled out of an archive with a glob, this usually can't be called until a file is downloaded.
|
77
|
+
# @return [Symbol,nil]
|
78
|
+
def guess_format(basename)
|
79
|
+
case basename.to_s.downcase
|
80
|
+
when /ods/, /open_?office/
|
81
|
+
:ods
|
82
|
+
when /xlsx/, /excelx/
|
83
|
+
:xlsx
|
84
|
+
when /xls/, /excel/
|
85
|
+
:xls
|
86
|
+
when /csv/, /tsv/, /delimited/
|
87
|
+
# note that there is no RemoteTable::Csv class - it's normalized to :delimited
|
88
|
+
:delimited
|
89
|
+
when /fixed_?width/
|
90
|
+
:fixed_width
|
91
|
+
when /htm/
|
92
|
+
:html
|
93
|
+
when /xml/
|
94
|
+
:xml
|
95
|
+
when /yaml/, /yml/
|
96
|
+
:yaml
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Given a Google Docs spreadsheet URL, make sure it uses CSV output.
|
101
|
+
# @return [String]
|
102
|
+
def google_spreadsheet_csv_url(url)
|
103
|
+
uri = ::URI.parse url
|
104
|
+
params = uri.query.split('&')
|
105
|
+
params.delete_if { |param| param.start_with?('output=') }
|
106
|
+
params << 'output=csv'
|
107
|
+
uri.query = params.join('&')
|
108
|
+
uri.to_s
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# @private
|
113
|
+
# Here to support legacy code.
|
28
114
|
class Transform
|
29
115
|
def self.row_hash(row)
|
30
116
|
::HashDigest.hexdigest row
|
31
117
|
end
|
32
118
|
end
|
33
119
|
|
120
|
+
EXTERNAL_ENCODING = 'UTF-8'
|
121
|
+
EXTERNAL_ENCODING_ICONV = 'UTF-8//TRANSLIT'
|
122
|
+
GOOGLE_DOCS_SPREADSHEET = [
|
123
|
+
/docs.google.com/i,
|
124
|
+
/spreadsheets.google.com/i
|
125
|
+
]
|
126
|
+
VALID = {
|
127
|
+
:compression => [:gz, :zip, :bz2, :exe],
|
128
|
+
:packing => [:tar],
|
129
|
+
:format => [:xlsx, :xls, :delimited, :ods, :fixed_width, :html, :xml, :yaml, :csv],
|
130
|
+
}
|
131
|
+
DEFAULT = {
|
132
|
+
:streaming => false,
|
133
|
+
:warn_on_multiple_downloads => true,
|
134
|
+
:headers => :first_row,
|
135
|
+
:keep_blank_rows => false,
|
136
|
+
:skip => 0,
|
137
|
+
:internal_encoding => 'UTF-8',
|
138
|
+
:delimiter => ','
|
139
|
+
}
|
140
|
+
OLD_SETTING_NAMES = {
|
141
|
+
:internal_encoding => [:encoding],
|
142
|
+
:transform_settings => [:transform],
|
143
|
+
:pre_select => [:select],
|
144
|
+
:pre_reject => [:reject],
|
145
|
+
:errata_settings => [:errata],
|
146
|
+
}
|
147
|
+
|
34
148
|
include ::Enumerable
|
35
149
|
|
150
|
+
# The URL of the local or remote file.
|
151
|
+
#
|
152
|
+
# * Local: "file:///Users/myuser/Desktop/holidays.csv"
|
153
|
+
# * Remote: "http://data.brighterplanet.com/countries.csv"
|
154
|
+
#
|
155
|
+
# @return [String]
|
36
156
|
attr_reader :url
|
37
|
-
|
157
|
+
|
158
|
+
# @private
|
159
|
+
# A cache of rows, created unless +:streaming+ is enabled.
|
160
|
+
# @return [Array<Hash,Array>]
|
161
|
+
attr_reader :cache
|
162
|
+
|
163
|
+
# @private
|
164
|
+
# How many times this file has been downloaded. RemoteTable will emit a warning if you download it more than once.
|
165
|
+
# @return [Integer]
|
166
|
+
attr_reader :download_count
|
167
|
+
|
168
|
+
# @private
|
169
|
+
# Used internally to access the transformer (aka parser).
|
170
|
+
attr_reader :transformer
|
171
|
+
|
172
|
+
# @private
|
173
|
+
# Used internally to access to a downloaded copy of the file.
|
174
|
+
# @return [RemoteTable::LocalCopy]
|
175
|
+
attr_reader :local_copy
|
176
|
+
|
177
|
+
# Whether to stream the rows without caching them. Saves memory, but you have to re-download the file every time you enumerate its rows. Defaults to false.
|
178
|
+
# @return [true,false]
|
179
|
+
attr_reader :streaming
|
180
|
+
|
181
|
+
# Whether to warn the user on multiple downloads. Defaults to true.
|
182
|
+
# @return [true,false]
|
183
|
+
attr_reader :warn_on_multiple_downloads
|
184
|
+
|
185
|
+
# Headers specified by the user: +:first_row+ (the default), +false+, or a list of headers.
|
186
|
+
# @return [:first_row,false,Array<String>]
|
187
|
+
attr_reader :headers
|
188
|
+
|
189
|
+
# The sheet specified by the user as a number or a string.
|
190
|
+
# @return[String,Integer]
|
191
|
+
attr_reader :sheet
|
192
|
+
|
193
|
+
# Whether to keep blank rows. Default is false.
|
194
|
+
# @return [true,false]
|
195
|
+
attr_reader :keep_blank_rows
|
196
|
+
|
197
|
+
# Form data to POST in the download request. It should probably be in +application/x-www-form-urlencoded+.
|
198
|
+
# @return [String]
|
199
|
+
attr_reader :form_data
|
200
|
+
|
201
|
+
# How many rows to skip at the beginning of the file or table. Default is 0.
|
202
|
+
# @return [Integer]
|
203
|
+
attr_reader :skip
|
204
|
+
|
205
|
+
# The original encoding of the source file. Default is UTF-8. Previously passed as +:encoding+.
|
206
|
+
# @return [String]
|
207
|
+
attr_reader :internal_encoding
|
208
|
+
|
209
|
+
# The delimiter, a.k.a. column separator. Passed to Ruby CSV as +:col_sep+. Default is :delimited.
|
210
|
+
# @return [String]
|
211
|
+
attr_reader :delimiter
|
212
|
+
|
213
|
+
# The XPath used to find rows in HTML or XML.
|
214
|
+
# @return [String]
|
215
|
+
attr_reader :row_xpath
|
216
|
+
|
217
|
+
# The XPath used to find columns in HTML or XML.
|
218
|
+
# @return [String]
|
219
|
+
attr_reader :column_xpath
|
220
|
+
|
221
|
+
# The CSS selector used to find rows in HTML or XML.
|
222
|
+
# @return [String]
|
223
|
+
attr_reader :row_css
|
224
|
+
|
225
|
+
# The CSS selector used to find columns in HTML or XML.
|
226
|
+
# @return [String]
|
227
|
+
attr_reader :column_css
|
228
|
+
|
229
|
+
# The format of the source file. Can be +:xlsx+, +:xls+, +:delimited+, +:ods+, +:fixed_width+, +:html+, +:xml+, +:yaml+.
|
230
|
+
# @return [Symbol]
|
231
|
+
attr_reader :format
|
232
|
+
|
233
|
+
# The compression type. Guessed from URL if not provided. +:gz+, +:zip+, +:bz2+, and +:exe+ (treated as +:zip+) are supported.
|
234
|
+
# @return [Symbol]
|
235
|
+
attr_reader :compression
|
236
|
+
|
237
|
+
# The packing type. Guessed from URL if not provided. Only +:tar+ is supported.
|
238
|
+
# @return [Symbol]
|
239
|
+
attr_reader :packing
|
240
|
+
|
241
|
+
# The glob used to pick a file out of an archive.
|
242
|
+
#
|
243
|
+
# @return [String]
|
244
|
+
#
|
245
|
+
# @example Pick out the only CSV in a ZIP file
|
246
|
+
# RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
247
|
+
attr_reader :glob
|
248
|
+
|
249
|
+
# The filename, which can be used to pick a file out of an archive.
|
250
|
+
#
|
251
|
+
# @return [String]
|
252
|
+
#
|
253
|
+
# @example Specify the filename to get out of a ZIP file
|
254
|
+
# RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
|
255
|
+
attr_reader :filename
|
256
|
+
|
257
|
+
# Pick specific columns out of a plaintext file using an argument to the UNIX [+cut+ utility](http://en.wikipedia.org/wiki/Cut_%28Unix%29).
|
258
|
+
#
|
259
|
+
# @return [String]
|
260
|
+
#
|
261
|
+
# @example Pick ALMOST out of ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
262
|
+
# # $ echo ABCDEFGHIJKLMNOPQRSTUVWXYZ | cut -c '1,12,13,15,19,20'
|
263
|
+
# # ALMOST
|
264
|
+
# RemoteTable.new 'file:///atoz.txt', :cut => '1,12,13,15,19,20'
|
265
|
+
attr_reader :cut
|
266
|
+
|
267
|
+
# Use a range of rows in a plaintext file.
|
268
|
+
#
|
269
|
+
# @return [Range]
|
270
|
+
#
|
271
|
+
# @example Only take rows 21 through 37
|
272
|
+
# RemoteTable.new("http://www.eia.gov/emeu/cbecs/cbecs2003/detailed_tables_2003/2003set10/2003excel/C17.xls",
|
273
|
+
# :headers => false,
|
274
|
+
# :select => proc { |row| CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER.call(row) },
|
275
|
+
# :crop => (21..37))
|
276
|
+
attr_reader :crop
|
277
|
+
|
278
|
+
# The fixed-width schema, given as a multi-dimensional array.
|
279
|
+
#
|
280
|
+
# @return [Array<Array{String,Integer,Hash}>]
|
281
|
+
#
|
282
|
+
# @example From the tests
|
283
|
+
# RemoteTable.new('http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
284
|
+
# :format => :fixed_width,
|
285
|
+
# :skip => 1,
|
286
|
+
# :schema => [[ 'header4', 10, { :type => :string } ],
|
287
|
+
# [ 'spacer', 1 ],
|
288
|
+
# [ 'header5', 10, { :type => :string } ],
|
289
|
+
# [ 'spacer', 12 ],
|
290
|
+
# [ 'header6', 10, { :type => :string } ]])
|
291
|
+
attr_reader :schema
|
38
292
|
|
39
|
-
#
|
293
|
+
# If you somehow already defined a fixed-width schema (so you can re-use it?), specify it here.
|
294
|
+
# @return [String,Symbol]
|
295
|
+
attr_reader :schema_name
|
296
|
+
|
297
|
+
# A proc that decides whether to include a row. Previously passed as +:select+.
|
298
|
+
# @return [Proc]
|
299
|
+
attr_reader :pre_select
|
300
|
+
|
301
|
+
# A proc that decides whether to include a row. Previously passed as +:reject+.
|
302
|
+
# @return [Proc]
|
303
|
+
attr_reader :pre_reject
|
304
|
+
|
305
|
+
# Settings to create a transformer.
|
306
|
+
# @return [Hash]
|
307
|
+
attr_reader :transform_settings
|
308
|
+
|
309
|
+
# A hash of settings to initialize an Errata instance to be used on every row. Previously passed as +:errata+.
|
310
|
+
#
|
311
|
+
# See the Errata library at https://github.com/seamusabshere/errata
|
312
|
+
#
|
313
|
+
# @return [Hash]
|
314
|
+
attr_reader :errata_settings
|
315
|
+
|
316
|
+
# The format of the source file. Can be specified as: :xlsx, :xls, :delimited (aka :csv), :ods, :fixed_width, :html, :xml, :yaml
|
317
|
+
#
|
318
|
+
# Note: treats all +docs.google.com+ and +spreadsheets.google.com+ URLs as +:delimited+.
|
40
319
|
#
|
41
|
-
#
|
320
|
+
# Default: guessed from file extension (which is usually the same as the URL, but sometimes not if you pick out a specific file from an archive)
|
42
321
|
#
|
43
|
-
#
|
44
|
-
|
45
|
-
|
46
|
-
#
|
322
|
+
# @return [Hash]
|
323
|
+
attr_reader :format
|
324
|
+
|
325
|
+
# Options passed by the user that may be passed through to the underlying parsing library.
|
326
|
+
# @return [Hash]
|
327
|
+
attr_reader :other_options
|
328
|
+
|
329
|
+
# Create a new RemoteTable, which is an Enumerable.
|
330
|
+
#
|
331
|
+
# Does not immediately download/parse... it's lazy-loading.
|
332
|
+
#
|
333
|
+
# @overload initialize(settings)
|
334
|
+
# @param [Hash] settings Settings including +:url+.
|
47
335
|
#
|
48
|
-
#
|
336
|
+
# @overload initialize(url, settings)
|
337
|
+
# @param [String] url The URL to the local or remote file.
|
338
|
+
# @param [Hash] settings Settings.
|
339
|
+
#
|
340
|
+
# @example Open an XLSX
|
341
|
+
# RemoteTable.new('http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
342
|
+
#
|
343
|
+
# @example Open a CSV inside a ZIP file
|
344
|
+
# RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
345
|
+
# :filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
346
|
+
# :skip => 1,
|
347
|
+
# :pre_select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
|
49
348
|
def initialize(*args)
|
50
|
-
|
51
|
-
|
349
|
+
@download_count_mutex = ::Mutex.new
|
350
|
+
@iconv_mutex = ::Mutex.new
|
351
|
+
@extend_bang_mutex = ::Mutex.new
|
352
|
+
@errata_mutex = ::Mutex.new
|
353
|
+
|
354
|
+
@cache = []
|
355
|
+
@download_count = 0
|
356
|
+
|
357
|
+
settings = args.last.is_a?(::Hash) ? args.last.symbolize_keys : {}
|
358
|
+
|
52
359
|
@url = if args.first.is_a? ::String
|
53
|
-
args.first
|
360
|
+
args.first
|
54
361
|
else
|
55
|
-
|
362
|
+
grab settings, :url
|
363
|
+
end
|
364
|
+
@format = RemoteTable.guess_format grab(settings, :format)
|
365
|
+
if GOOGLE_DOCS_SPREADSHEET.any? { |regex| regex =~ url }
|
366
|
+
@url = RemoteTable.google_spreadsheet_csv_url url
|
367
|
+
@format = :delimited
|
368
|
+
end
|
369
|
+
|
370
|
+
@headers = grab settings, :headers
|
371
|
+
if headers.is_a?(::Array) and headers.any?(&:blank?)
|
372
|
+
raise ::ArgumentError, "[remote_table] If you specify headers, none of them can be blank"
|
56
373
|
end
|
57
|
-
|
374
|
+
|
375
|
+
@compression = grab(settings, :compression) || RemoteTable.guess_compression(url)
|
376
|
+
@packing = grab(settings, :packing) || RemoteTable.guess_packing(url)
|
377
|
+
|
378
|
+
@streaming = grab settings, :streaming
|
379
|
+
@warn_on_multiple_downloads = grab settings, :warn_on_multiple_downloads
|
380
|
+
@delimiter = grab settings, :delimiter
|
381
|
+
@sheet = grab settings, :sheet
|
382
|
+
@keep_blank_rows = grab settings, :keep_blank_rows
|
383
|
+
@form_data = grab settings, :form_data
|
384
|
+
@skip = grab settings, :skip
|
385
|
+
@internal_encoding = grab settings, :internal_encoding
|
386
|
+
@row_xpath = grab settings, :row_xpath
|
387
|
+
@column_xpath = grab settings, :column_xpath
|
388
|
+
@row_css = grab settings, :row_css
|
389
|
+
@column_css = grab settings, :column_css
|
390
|
+
@glob = grab settings, :glob
|
391
|
+
@filename = grab settings, :filename
|
392
|
+
@transform_settings = grab settings, :transform_settings
|
393
|
+
@cut = grab settings, :cut
|
394
|
+
@crop = grab settings, :crop
|
395
|
+
@schema = grab settings, :schema
|
396
|
+
@schema_name = grab settings, :schema_name
|
397
|
+
@pre_select = grab settings, :pre_select
|
398
|
+
@pre_reject = grab settings, :pre_reject
|
399
|
+
@errata_settings = grab settings, :errata_settings
|
400
|
+
|
401
|
+
@other_options = settings
|
402
|
+
|
403
|
+
@transformer = Transformer.new self
|
404
|
+
@local_copy = LocalCopy.new self
|
58
405
|
end
|
59
|
-
|
60
|
-
#
|
61
|
-
|
406
|
+
|
407
|
+
# Yield each row.
|
408
|
+
#
|
409
|
+
# @return [nil]
|
410
|
+
#
|
411
|
+
# @yield [Hash,Array] A hash or an array depending on whether the RemoteTable has named headers (column names).
|
412
|
+
def each
|
413
|
+
extend!
|
62
414
|
if fully_cached?
|
63
|
-
cache.each
|
415
|
+
cache.each do |row|
|
416
|
+
yield row
|
417
|
+
end
|
64
418
|
else
|
65
419
|
mark_download!
|
66
|
-
|
420
|
+
memo = _each do |row|
|
67
421
|
transformer.transform(row).each do |virtual_row|
|
68
422
|
virtual_row.row_hash = ::HashDigest.hexdigest row
|
69
|
-
if
|
70
|
-
next if
|
71
|
-
|
423
|
+
if errata
|
424
|
+
next if errata.rejects? virtual_row
|
425
|
+
errata.correct! virtual_row
|
426
|
+
end
|
427
|
+
next if pre_select and !pre_select.call(virtual_row)
|
428
|
+
next if pre_reject and pre_reject.call(virtual_row)
|
429
|
+
unless streaming
|
430
|
+
cache.push virtual_row
|
72
431
|
end
|
73
|
-
next if config.select and !config.select.call(virtual_row)
|
74
|
-
next if config.reject and config.reject.call(virtual_row)
|
75
|
-
cache.push virtual_row unless config.streaming
|
76
432
|
yield virtual_row
|
77
433
|
end
|
78
434
|
end
|
79
|
-
|
80
|
-
|
435
|
+
unless streaming
|
436
|
+
fully_cached!
|
437
|
+
end
|
438
|
+
memo
|
81
439
|
end
|
440
|
+
nil
|
82
441
|
end
|
442
|
+
|
443
|
+
# @deprecated
|
83
444
|
alias :each_row :each
|
84
445
|
|
446
|
+
# @return [Array<Hash,Array>] All rows.
|
85
447
|
def to_a
|
86
448
|
if fully_cached?
|
87
449
|
cache.dup
|
@@ -89,9 +451,13 @@ class RemoteTable
|
|
89
451
|
map { |row| row }
|
90
452
|
end
|
91
453
|
end
|
454
|
+
|
455
|
+
# @deprecated
|
92
456
|
alias :rows :to_a
|
93
457
|
|
94
|
-
# Get a row by row number
|
458
|
+
# Get a row by row number. Zero-based.
|
459
|
+
#
|
460
|
+
# @return [Hash,Array]
|
95
461
|
def [](row_number)
|
96
462
|
if fully_cached?
|
97
463
|
cache[row_number]
|
@@ -100,35 +466,37 @@ class RemoteTable
|
|
100
466
|
end
|
101
467
|
end
|
102
468
|
|
103
|
-
#
|
469
|
+
# Clear the row cache in case it helps your GC.
|
470
|
+
#
|
471
|
+
# @return [nil]
|
104
472
|
def free
|
473
|
+
@fully_cached = false
|
474
|
+
@errata = nil
|
105
475
|
cache.clear
|
106
476
|
nil
|
107
477
|
end
|
108
|
-
|
109
|
-
#
|
110
|
-
def
|
111
|
-
@
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
@transformer ||= Transformer.new self
|
478
|
+
|
479
|
+
# @private
|
480
|
+
def errata
|
481
|
+
@errata || @errata_mutex.synchronize do
|
482
|
+
@errata ||= begin
|
483
|
+
if defined?(::Errata) and errata_settings.is_a?(::Errata)
|
484
|
+
::Kernel.warn %{[remote_table] Passing :errata_settings as an Errata object is deprecated. Please pass a Hash of settings instead.}
|
485
|
+
errata_settings
|
486
|
+
elsif errata_settings.is_a?(::Hash)
|
487
|
+
::Errata.new errata_settings
|
488
|
+
end
|
489
|
+
end
|
490
|
+
end
|
122
491
|
end
|
123
|
-
|
124
|
-
attr_reader :download_count
|
125
|
-
|
492
|
+
|
126
493
|
private
|
127
494
|
|
128
495
|
def mark_download!
|
129
|
-
@
|
130
|
-
|
131
|
-
|
496
|
+
@download_count_mutex.synchronize do
|
497
|
+
@download_count += 1
|
498
|
+
end
|
499
|
+
if warn_on_multiple_downloads and download_count > 1
|
132
500
|
::Kernel.warn "[remote_table] #{url} has been downloaded #{download_count} times."
|
133
501
|
end
|
134
502
|
end
|
@@ -140,8 +508,62 @@ class RemoteTable
|
|
140
508
|
def fully_cached?
|
141
509
|
!!@fully_cached
|
142
510
|
end
|
143
|
-
|
144
|
-
def
|
145
|
-
@
|
511
|
+
|
512
|
+
def iconv
|
513
|
+
@iconv || @iconv_mutex.synchronize do
|
514
|
+
@iconv ||= ::Iconv.new(EXTERNAL_ENCODING_ICONV, internal_encoding)
|
515
|
+
end
|
516
|
+
end
|
517
|
+
|
518
|
+
def transliterate_to_utf8(str)
|
519
|
+
if str.is_a?(::String)
|
520
|
+
[ iconv.iconv(str), iconv.iconv(nil) ].join
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
def assume_utf8(str)
|
525
|
+
if str.is_a?(::String) and ::RUBY_VERSION >= '1.9'
|
526
|
+
str.encode! EXTERNAL_ENCODING
|
527
|
+
else
|
528
|
+
str
|
529
|
+
end
|
530
|
+
end
|
531
|
+
|
532
|
+
def grab(settings, k)
|
533
|
+
user_specified = false
|
534
|
+
memo = nil
|
535
|
+
if (old_names = OLD_SETTING_NAMES[k]) and old_names.any? { |old_k| settings.has_key?(old_k) }
|
536
|
+
user_specified = true
|
537
|
+
memo = old_names.map { |old_k| settings.delete(old_k) }.compact.first
|
538
|
+
end
|
539
|
+
if settings.has_key?(k)
|
540
|
+
user_specified = true
|
541
|
+
memo = settings.delete k
|
542
|
+
end
|
543
|
+
if not user_specified and DEFAULT.has_key?(k)
|
544
|
+
memo = DEFAULT[k]
|
545
|
+
end
|
546
|
+
if memo and (valid = VALID[k]) and not valid.include?(memo.to_sym)
|
547
|
+
raise ::ArgumentError, %{[remote_table] #{k.inspect} => #{memo.inspect} is not a valid setting. Valid settings are #{valid.inspect}.}
|
548
|
+
end
|
549
|
+
memo
|
550
|
+
end
|
551
|
+
|
552
|
+
def extend!
|
553
|
+
return if @extend_bang
|
554
|
+
@extend_bang_mutex.synchronize do
|
555
|
+
return if @extend_bang
|
556
|
+
@extend_bang = true
|
557
|
+
format_module = if format
|
558
|
+
RemoteTable.const_get format.to_s.camelcase
|
559
|
+
elsif format = RemoteTable.guess_format(local_copy.path)
|
560
|
+
@format = format
|
561
|
+
RemoteTable.const_get format.to_s.camelcase
|
562
|
+
else
|
563
|
+
Delimited
|
564
|
+
end
|
565
|
+
extend format_module
|
566
|
+
after_extend if respond_to?(:after_extend)
|
567
|
+
end
|
146
568
|
end
|
147
569
|
end
|