remote_table 2.1.1 → 2.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +9 -9
- data/CHANGELOG +6 -0
- data/lib/remote_table.rb +41 -39
- data/lib/remote_table/processed_by_roo.rb +4 -6
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +2 -2
- data/test/test_remote_table.rb +29 -0
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NDMzYjM5MmViMDA2OTRjMDMwMjljYmJjMmJhNmExODNmMTkzYmI3OA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
7
|
-
|
6
|
+
MDkxZmI3MWM4ZTJjYWM3NDYxOGQzMzRiYzIyMGQ2ODAyOWY0NTNjNg==
|
7
|
+
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OWU5NzM2OWM1MDljMTg3MzJiOWZjN2MxMzliYjhhZDdmNDYyY2E5ZWFhOTE2
|
10
|
+
MWI4YjA1NWM5YjhiMDYxMjA1ZDBkZTFiZTQ4OGJiMDlmNGVhNjg5Nzg2ZTkz
|
11
|
+
MmIxZTZmOGQyMmRhNTI4OTk5YjU1ODM0NjI1ZmIzYzMyZmFiNmI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZTFhMTk4ZjAyZTdkM2RmZmM1ZmEyZmI5ZWQzMWFkZTliZWIxNmMxMjdhYTBm
|
14
|
+
NDg0YTQ3YThlYzgxNTRhZTQ5YTYzMDU5ZGYxODkzNmIxOWMwMzk5YjMwZTA4
|
15
|
+
ODVmOGRkNDBmNmExMjUyOWRmNTMzN2U1MjFmZjUyNzZmMzM5OTE=
|
data/CHANGELOG
CHANGED
data/lib/remote_table.rb
CHANGED
@@ -72,25 +72,25 @@ class RemoteTable
|
|
72
72
|
# Guess file format from the basename. Since a file might be decompressed and/or pulled out of an archive with a glob, this usually can't be called until a file is downloaded.
|
73
73
|
# @return [Symbol,nil]
|
74
74
|
def guess_format(basename)
|
75
|
-
case basename.to_s.downcase
|
76
|
-
when /ods/, /open_?office/
|
75
|
+
case basename.to_s.downcase.strip
|
76
|
+
when /ods\z/, /open_?office\z/
|
77
77
|
:ods
|
78
|
-
when /xlsx/, /excelx/
|
78
|
+
when /xlsx\z/, /excelx\z/
|
79
79
|
:xlsx
|
80
|
-
when /xls/, /excel/
|
80
|
+
when /xls\z/, /excel\z/
|
81
81
|
:xls
|
82
|
-
when /csv/, /tsv/, /delimited/
|
82
|
+
when /csv\z/, /tsv\z/, /delimited\z/
|
83
83
|
# note that there is no RemoteTable::Csv class - it's normalized to :delimited
|
84
84
|
:delimited
|
85
|
-
when /fixed_?width/
|
85
|
+
when /fixed_?width\z/
|
86
86
|
:fixed_width
|
87
|
-
when /
|
87
|
+
when /html?\z/
|
88
88
|
:html
|
89
|
-
when /xml/
|
89
|
+
when /xml\z/
|
90
90
|
:xml
|
91
|
-
when /yaml/, /yml/
|
91
|
+
when /yaml\z/, /yml\z/
|
92
92
|
:yaml
|
93
|
-
when /shp/
|
93
|
+
when /shp\z/
|
94
94
|
:shp
|
95
95
|
end
|
96
96
|
end
|
@@ -144,7 +144,7 @@ class RemoteTable
|
|
144
144
|
}
|
145
145
|
|
146
146
|
include ::Enumerable
|
147
|
-
|
147
|
+
|
148
148
|
# The URL of the local or remote file.
|
149
149
|
#
|
150
150
|
# @example Local
|
@@ -185,23 +185,23 @@ class RemoteTable
|
|
185
185
|
# Whether to warn the user on multiple downloads. Defaults to true.
|
186
186
|
# @return [true,false]
|
187
187
|
attr_reader :warn_on_multiple_downloads
|
188
|
-
|
188
|
+
|
189
189
|
# Headers specified by the user: +:first_row+ (the default), +false+, or a list of headers.
|
190
190
|
# @return [:first_row,false,Array<String>]
|
191
191
|
attr_reader :headers
|
192
|
-
|
192
|
+
|
193
193
|
# The sheet specified by the user as a number or a string.
|
194
194
|
# @return[String,Integer]
|
195
195
|
attr_reader :sheet
|
196
|
-
|
196
|
+
|
197
197
|
# Whether to keep blank rows. Default is false.
|
198
198
|
# @return [true,false]
|
199
199
|
attr_reader :keep_blank_rows
|
200
|
-
|
200
|
+
|
201
201
|
# Form data to POST in the download request. It should probably be in +application/x-www-form-urlencoded+.
|
202
202
|
# @return [String]
|
203
203
|
attr_reader :form_data
|
204
|
-
|
204
|
+
|
205
205
|
# How many rows to skip at the beginning of the file or table. Default is 0.
|
206
206
|
# @return [Integer]
|
207
207
|
attr_reader :skip
|
@@ -209,15 +209,15 @@ class RemoteTable
|
|
209
209
|
# The original encoding of the source file. Default is UTF-8. Previously passed as +:encoding+.
|
210
210
|
# @return [String]
|
211
211
|
attr_reader :internal_encoding
|
212
|
-
|
212
|
+
|
213
213
|
# The delimiter, a.k.a. column separator. Passed to Ruby CSV as +:col_sep+. Default is :delimited.
|
214
214
|
# @return [String]
|
215
215
|
attr_reader :delimiter
|
216
|
-
|
216
|
+
|
217
217
|
# The XPath used to find rows in HTML or XML.
|
218
218
|
# @return [String]
|
219
219
|
attr_reader :row_xpath
|
220
|
-
|
220
|
+
|
221
221
|
# The XPath used to find columns in HTML or XML.
|
222
222
|
# @return [String]
|
223
223
|
attr_reader :column_xpath
|
@@ -225,11 +225,11 @@ class RemoteTable
|
|
225
225
|
# The CSS selector used to find rows in HTML or XML.
|
226
226
|
# @return [String]
|
227
227
|
attr_reader :row_css
|
228
|
-
|
228
|
+
|
229
229
|
# The CSS selector used to find columns in HTML or XML.
|
230
230
|
# @return [String]
|
231
231
|
attr_reader :column_css
|
232
|
-
|
232
|
+
|
233
233
|
# The format of the source file. Can be +:xlsx+, +:xls+, +:delimited+, +:ods+, +:fixed_width+, +:html+, +:xml+, +:yaml+.
|
234
234
|
# @return [Symbol]
|
235
235
|
attr_reader :format
|
@@ -241,7 +241,7 @@ class RemoteTable
|
|
241
241
|
# The packing type. Guessed from URL if not provided. Only +:tar+ is supported.
|
242
242
|
# @return [Symbol]
|
243
243
|
attr_reader :packing
|
244
|
-
|
244
|
+
|
245
245
|
# The glob used to pick a file out of an archive.
|
246
246
|
#
|
247
247
|
# @return [String]
|
@@ -249,7 +249,7 @@ class RemoteTable
|
|
249
249
|
# @example Pick out the only CSV in a ZIP file
|
250
250
|
# RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
251
251
|
attr_reader :glob
|
252
|
-
|
252
|
+
|
253
253
|
# The filename, which can be used to pick a file out of an archive.
|
254
254
|
#
|
255
255
|
# @return [String]
|
@@ -267,7 +267,7 @@ class RemoteTable
|
|
267
267
|
# # ALMOST
|
268
268
|
# RemoteTable.new 'file:///atoz.txt', :cut => '1,12,13,15,19,20'
|
269
269
|
attr_reader :cut
|
270
|
-
|
270
|
+
|
271
271
|
# Use a range of rows in a plaintext file.
|
272
272
|
#
|
273
273
|
# @return [Range]
|
@@ -278,7 +278,7 @@ class RemoteTable
|
|
278
278
|
# :select => proc { |row| CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER.call(row) },
|
279
279
|
# :crop => (21..37))
|
280
280
|
attr_reader :crop
|
281
|
-
|
281
|
+
|
282
282
|
# The fixed-width schema, given as a multi-dimensional array.
|
283
283
|
#
|
284
284
|
# @return [Array<Array{String,Integer,Hash}>]
|
@@ -293,15 +293,15 @@ class RemoteTable
|
|
293
293
|
# [ 'spacer', 12 ],
|
294
294
|
# [ 'header6', 10, { :type => :string } ]])
|
295
295
|
attr_reader :schema
|
296
|
-
|
296
|
+
|
297
297
|
# If you somehow already defined a fixed-width schema (so you can re-use it?), specify it here.
|
298
298
|
# @return [String,Symbol]
|
299
299
|
attr_reader :schema_name
|
300
|
-
|
300
|
+
|
301
301
|
# A proc that decides whether to include a row. Previously passed as +:select+.
|
302
302
|
# @return [Proc]
|
303
303
|
attr_reader :pre_select
|
304
|
-
|
304
|
+
|
305
305
|
# A proc that decides whether to include a row. Previously passed as +:reject+.
|
306
306
|
# @return [Proc]
|
307
307
|
attr_reader :pre_reject
|
@@ -309,14 +309,14 @@ class RemoteTable
|
|
309
309
|
# Settings to create a transformer.
|
310
310
|
# @return [Hash]
|
311
311
|
attr_reader :transform_settings
|
312
|
-
|
313
|
-
# A hash
|
312
|
+
|
313
|
+
# A hash to initialize an Errata instance to be used on every row. Applied after creating +row_hash+ and before passing to +:synthesize+ procs, etc. Previously passed as +:errata+.
|
314
314
|
#
|
315
315
|
# See the Errata library at https://github.com/seamusabshere/errata
|
316
316
|
#
|
317
317
|
# @return [Hash]
|
318
318
|
attr_reader :errata_settings
|
319
|
-
|
319
|
+
|
320
320
|
# The format of the source file. Can be specified as: :xlsx, :xls, :delimited (aka :csv), :ods, :fixed_width, :html, :xml, :yaml
|
321
321
|
#
|
322
322
|
# Note: treats all +docs.google.com+ and +spreadsheets.google.com+ URLs as +:delimited+.
|
@@ -332,6 +332,8 @@ class RemoteTable
|
|
332
332
|
|
333
333
|
# Create a new RemoteTable, which is an Enumerable.
|
334
334
|
#
|
335
|
+
# Options are set at creation using any of the attributes listed... RDoc will say they're "read-only" because you can't set/change them after creation.
|
336
|
+
#
|
335
337
|
# Does not immediately download/parse... it's lazy-loading.
|
336
338
|
#
|
337
339
|
# @overload initialize(settings)
|
@@ -402,7 +404,7 @@ class RemoteTable
|
|
402
404
|
@errata_settings = grab settings, :errata_settings
|
403
405
|
|
404
406
|
@other_options = settings
|
405
|
-
|
407
|
+
|
406
408
|
@transformer = Transformer.new self
|
407
409
|
@local_copy = LocalCopy.new self
|
408
410
|
end
|
@@ -445,7 +447,7 @@ class RemoteTable
|
|
445
447
|
|
446
448
|
# @deprecated
|
447
449
|
alias :each_row :each
|
448
|
-
|
450
|
+
|
449
451
|
# @return [Array<Hash,Array>] All rows.
|
450
452
|
def to_a
|
451
453
|
if fully_cached?
|
@@ -457,7 +459,7 @@ class RemoteTable
|
|
457
459
|
|
458
460
|
# @deprecated
|
459
461
|
alias :rows :to_a
|
460
|
-
|
462
|
+
|
461
463
|
# Get a row by row number. Zero-based.
|
462
464
|
#
|
463
465
|
# @return [Hash,Array]
|
@@ -468,7 +470,7 @@ class RemoteTable
|
|
468
470
|
to_a[row_number]
|
469
471
|
end
|
470
472
|
end
|
471
|
-
|
473
|
+
|
472
474
|
# Clear the row cache in case it helps your GC.
|
473
475
|
#
|
474
476
|
# @return [nil]
|
@@ -494,7 +496,7 @@ class RemoteTable
|
|
494
496
|
end
|
495
497
|
|
496
498
|
private
|
497
|
-
|
499
|
+
|
498
500
|
def mark_download!
|
499
501
|
@download_count_mutex.synchronize do
|
500
502
|
@download_count += 1
|
@@ -502,12 +504,12 @@ class RemoteTable
|
|
502
504
|
if warn_on_multiple_downloads and download_count > 1
|
503
505
|
::Kernel.warn "[remote_table] #{url} has been downloaded #{download_count} times."
|
504
506
|
end
|
505
|
-
end
|
506
|
-
|
507
|
+
end
|
508
|
+
|
507
509
|
def fully_cached!
|
508
510
|
@fully_cached = true
|
509
511
|
end
|
510
|
-
|
512
|
+
|
511
513
|
def fully_cached?
|
512
514
|
!!@fully_cached
|
513
515
|
end
|
@@ -6,27 +6,25 @@ class RemoteTable
|
|
6
6
|
|
7
7
|
# Yield each row using Roo.
|
8
8
|
def _each
|
9
|
-
# sometimes Roo forgets to require iconv.
|
10
|
-
require 'iconv'
|
11
9
|
require 'roo'
|
12
10
|
|
13
11
|
spreadsheet = roo_class.new local_copy.path, nil, :ignore
|
14
12
|
if sheet
|
15
13
|
spreadsheet.default_sheet = sheet
|
16
14
|
end
|
17
|
-
|
15
|
+
|
18
16
|
first_row = if crop
|
19
17
|
crop.first + 1
|
20
18
|
else
|
21
19
|
skip + 1
|
22
20
|
end
|
23
|
-
|
21
|
+
|
24
22
|
last_row = if crop
|
25
23
|
crop.last
|
26
24
|
else
|
27
25
|
spreadsheet.last_row
|
28
26
|
end
|
29
|
-
|
27
|
+
|
30
28
|
if not headers
|
31
29
|
|
32
30
|
# create an array to represent this row
|
@@ -48,7 +46,7 @@ class RemoteTable
|
|
48
46
|
end
|
49
47
|
|
50
48
|
else
|
51
|
-
|
49
|
+
|
52
50
|
# create a hash to represent this row
|
53
51
|
current_headers = ::ActiveSupport::OrderedHash.new
|
54
52
|
if headers == :first_row
|
data/lib/remote_table/version.rb
CHANGED
data/remote_table.gemspec
CHANGED
@@ -16,9 +16,9 @@ Gem::Specification.new do |s|
|
|
16
16
|
s.files = `git ls-files`.split("\n")
|
17
17
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
18
|
s.require_paths = ["lib"]
|
19
|
-
|
19
|
+
|
20
20
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
21
|
-
s.add_runtime_dependency 'roo', '>= 1.
|
21
|
+
s.add_runtime_dependency 'roo', '>= 1.11'
|
22
22
|
s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
|
23
23
|
s.add_runtime_dependency 'i18n' # activesupport?
|
24
24
|
s.add_runtime_dependency 'unix_utils', '>=0.0.8'
|
data/test/test_remote_table.rb
CHANGED
@@ -165,4 +165,33 @@ describe RemoteTable do
|
|
165
165
|
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
|
166
166
|
t[0]['Class'].must_equal 'TWO SEATERS'
|
167
167
|
end
|
168
|
+
|
169
|
+
{
|
170
|
+
'foo.ods' => :ods,
|
171
|
+
'foo.open_office' => :ods,
|
172
|
+
'foo.xlsx' => :xlsx,
|
173
|
+
'foo.excelx' => :xlsx,
|
174
|
+
'foo.xls' => :xls,
|
175
|
+
'foo.excel' => :xls,
|
176
|
+
'foo.csv' => :delimited,
|
177
|
+
'foo.tsv' => :delimited,
|
178
|
+
'foo.delimited' => :delimited,
|
179
|
+
'foo.fixed_width' => :fixed_width,
|
180
|
+
'foo.htm' => :html,
|
181
|
+
'foo.html' => :html,
|
182
|
+
'foo.xml' => :xml,
|
183
|
+
'foo.yaml' => :yaml,
|
184
|
+
'foo.yml' => :yaml,
|
185
|
+
'foo.shp' => :shp
|
186
|
+
}.each do |basename, format|
|
187
|
+
it "detects the #{format} format from the filename #{basename}" do
|
188
|
+
RemoteTable.guess_format(basename).must_equal format
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
it "detects the correct extension name without confusion from basename" do
|
193
|
+
[ 'foo.xls', 'xlsx.xls', 'foo_xls' ].each do |basename|
|
194
|
+
RemoteTable.guess_format(basename).must_equal :xls
|
195
|
+
end
|
196
|
+
end
|
168
197
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-10-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -31,14 +31,14 @@ dependencies:
|
|
31
31
|
requirements:
|
32
32
|
- - ! '>='
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: 1.
|
34
|
+
version: '1.11'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - ! '>='
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: 1.
|
41
|
+
version: '1.11'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: fixed_width-multibyte
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -283,7 +283,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
283
283
|
version: '0'
|
284
284
|
requirements: []
|
285
285
|
rubyforge_project: remotetable
|
286
|
-
rubygems_version: 2.
|
286
|
+
rubygems_version: 2.1.5
|
287
287
|
signing_key:
|
288
288
|
specification_version: 4
|
289
289
|
summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma
|