remote_table 2.1.1 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +9 -9
- data/CHANGELOG +6 -0
- data/lib/remote_table.rb +41 -39
- data/lib/remote_table/processed_by_roo.rb +4 -6
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +2 -2
- data/test/test_remote_table.rb +29 -0
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NDMzYjM5MmViMDA2OTRjMDMwMjljYmJjMmJhNmExODNmMTkzYmI3OA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
7
|
-
|
6
|
+
MDkxZmI3MWM4ZTJjYWM3NDYxOGQzMzRiYzIyMGQ2ODAyOWY0NTNjNg==
|
7
|
+
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OWU5NzM2OWM1MDljMTg3MzJiOWZjN2MxMzliYjhhZDdmNDYyY2E5ZWFhOTE2
|
10
|
+
MWI4YjA1NWM5YjhiMDYxMjA1ZDBkZTFiZTQ4OGJiMDlmNGVhNjg5Nzg2ZTkz
|
11
|
+
MmIxZTZmOGQyMmRhNTI4OTk5YjU1ODM0NjI1ZmIzYzMyZmFiNmI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZTFhMTk4ZjAyZTdkM2RmZmM1ZmEyZmI5ZWQzMWFkZTliZWIxNmMxMjdhYTBm
|
14
|
+
NDg0YTQ3YThlYzgxNTRhZTQ5YTYzMDU5ZGYxODkzNmIxOWMwMzk5YjMwZTA4
|
15
|
+
ODVmOGRkNDBmNmExMjUyOWRmNTMzN2U1MjFmZjUyNzZmMzM5OTE=
|
data/CHANGELOG
CHANGED
data/lib/remote_table.rb
CHANGED
@@ -72,25 +72,25 @@ class RemoteTable
|
|
72
72
|
# Guess file format from the basename. Since a file might be decompressed and/or pulled out of an archive with a glob, this usually can't be called until a file is downloaded.
|
73
73
|
# @return [Symbol,nil]
|
74
74
|
def guess_format(basename)
|
75
|
-
case basename.to_s.downcase
|
76
|
-
when /ods/, /open_?office/
|
75
|
+
case basename.to_s.downcase.strip
|
76
|
+
when /ods\z/, /open_?office\z/
|
77
77
|
:ods
|
78
|
-
when /xlsx/, /excelx/
|
78
|
+
when /xlsx\z/, /excelx\z/
|
79
79
|
:xlsx
|
80
|
-
when /xls/, /excel/
|
80
|
+
when /xls\z/, /excel\z/
|
81
81
|
:xls
|
82
|
-
when /csv/, /tsv/, /delimited/
|
82
|
+
when /csv\z/, /tsv\z/, /delimited\z/
|
83
83
|
# note that there is no RemoteTable::Csv class - it's normalized to :delimited
|
84
84
|
:delimited
|
85
|
-
when /fixed_?width/
|
85
|
+
when /fixed_?width\z/
|
86
86
|
:fixed_width
|
87
|
-
when /
|
87
|
+
when /html?\z/
|
88
88
|
:html
|
89
|
-
when /xml/
|
89
|
+
when /xml\z/
|
90
90
|
:xml
|
91
|
-
when /yaml/, /yml/
|
91
|
+
when /yaml\z/, /yml\z/
|
92
92
|
:yaml
|
93
|
-
when /shp/
|
93
|
+
when /shp\z/
|
94
94
|
:shp
|
95
95
|
end
|
96
96
|
end
|
@@ -144,7 +144,7 @@ class RemoteTable
|
|
144
144
|
}
|
145
145
|
|
146
146
|
include ::Enumerable
|
147
|
-
|
147
|
+
|
148
148
|
# The URL of the local or remote file.
|
149
149
|
#
|
150
150
|
# @example Local
|
@@ -185,23 +185,23 @@ class RemoteTable
|
|
185
185
|
# Whether to warn the user on multiple downloads. Defaults to true.
|
186
186
|
# @return [true,false]
|
187
187
|
attr_reader :warn_on_multiple_downloads
|
188
|
-
|
188
|
+
|
189
189
|
# Headers specified by the user: +:first_row+ (the default), +false+, or a list of headers.
|
190
190
|
# @return [:first_row,false,Array<String>]
|
191
191
|
attr_reader :headers
|
192
|
-
|
192
|
+
|
193
193
|
# The sheet specified by the user as a number or a string.
|
194
194
|
# @return[String,Integer]
|
195
195
|
attr_reader :sheet
|
196
|
-
|
196
|
+
|
197
197
|
# Whether to keep blank rows. Default is false.
|
198
198
|
# @return [true,false]
|
199
199
|
attr_reader :keep_blank_rows
|
200
|
-
|
200
|
+
|
201
201
|
# Form data to POST in the download request. It should probably be in +application/x-www-form-urlencoded+.
|
202
202
|
# @return [String]
|
203
203
|
attr_reader :form_data
|
204
|
-
|
204
|
+
|
205
205
|
# How many rows to skip at the beginning of the file or table. Default is 0.
|
206
206
|
# @return [Integer]
|
207
207
|
attr_reader :skip
|
@@ -209,15 +209,15 @@ class RemoteTable
|
|
209
209
|
# The original encoding of the source file. Default is UTF-8. Previously passed as +:encoding+.
|
210
210
|
# @return [String]
|
211
211
|
attr_reader :internal_encoding
|
212
|
-
|
212
|
+
|
213
213
|
# The delimiter, a.k.a. column separator. Passed to Ruby CSV as +:col_sep+. Default is :delimited.
|
214
214
|
# @return [String]
|
215
215
|
attr_reader :delimiter
|
216
|
-
|
216
|
+
|
217
217
|
# The XPath used to find rows in HTML or XML.
|
218
218
|
# @return [String]
|
219
219
|
attr_reader :row_xpath
|
220
|
-
|
220
|
+
|
221
221
|
# The XPath used to find columns in HTML or XML.
|
222
222
|
# @return [String]
|
223
223
|
attr_reader :column_xpath
|
@@ -225,11 +225,11 @@ class RemoteTable
|
|
225
225
|
# The CSS selector used to find rows in HTML or XML.
|
226
226
|
# @return [String]
|
227
227
|
attr_reader :row_css
|
228
|
-
|
228
|
+
|
229
229
|
# The CSS selector used to find columns in HTML or XML.
|
230
230
|
# @return [String]
|
231
231
|
attr_reader :column_css
|
232
|
-
|
232
|
+
|
233
233
|
# The format of the source file. Can be +:xlsx+, +:xls+, +:delimited+, +:ods+, +:fixed_width+, +:html+, +:xml+, +:yaml+.
|
234
234
|
# @return [Symbol]
|
235
235
|
attr_reader :format
|
@@ -241,7 +241,7 @@ class RemoteTable
|
|
241
241
|
# The packing type. Guessed from URL if not provided. Only +:tar+ is supported.
|
242
242
|
# @return [Symbol]
|
243
243
|
attr_reader :packing
|
244
|
-
|
244
|
+
|
245
245
|
# The glob used to pick a file out of an archive.
|
246
246
|
#
|
247
247
|
# @return [String]
|
@@ -249,7 +249,7 @@ class RemoteTable
|
|
249
249
|
# @example Pick out the only CSV in a ZIP file
|
250
250
|
# RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
251
251
|
attr_reader :glob
|
252
|
-
|
252
|
+
|
253
253
|
# The filename, which can be used to pick a file out of an archive.
|
254
254
|
#
|
255
255
|
# @return [String]
|
@@ -267,7 +267,7 @@ class RemoteTable
|
|
267
267
|
# # ALMOST
|
268
268
|
# RemoteTable.new 'file:///atoz.txt', :cut => '1,12,13,15,19,20'
|
269
269
|
attr_reader :cut
|
270
|
-
|
270
|
+
|
271
271
|
# Use a range of rows in a plaintext file.
|
272
272
|
#
|
273
273
|
# @return [Range]
|
@@ -278,7 +278,7 @@ class RemoteTable
|
|
278
278
|
# :select => proc { |row| CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER.call(row) },
|
279
279
|
# :crop => (21..37))
|
280
280
|
attr_reader :crop
|
281
|
-
|
281
|
+
|
282
282
|
# The fixed-width schema, given as a multi-dimensional array.
|
283
283
|
#
|
284
284
|
# @return [Array<Array{String,Integer,Hash}>]
|
@@ -293,15 +293,15 @@ class RemoteTable
|
|
293
293
|
# [ 'spacer', 12 ],
|
294
294
|
# [ 'header6', 10, { :type => :string } ]])
|
295
295
|
attr_reader :schema
|
296
|
-
|
296
|
+
|
297
297
|
# If you somehow already defined a fixed-width schema (so you can re-use it?), specify it here.
|
298
298
|
# @return [String,Symbol]
|
299
299
|
attr_reader :schema_name
|
300
|
-
|
300
|
+
|
301
301
|
# A proc that decides whether to include a row. Previously passed as +:select+.
|
302
302
|
# @return [Proc]
|
303
303
|
attr_reader :pre_select
|
304
|
-
|
304
|
+
|
305
305
|
# A proc that decides whether to include a row. Previously passed as +:reject+.
|
306
306
|
# @return [Proc]
|
307
307
|
attr_reader :pre_reject
|
@@ -309,14 +309,14 @@ class RemoteTable
|
|
309
309
|
# Settings to create a transformer.
|
310
310
|
# @return [Hash]
|
311
311
|
attr_reader :transform_settings
|
312
|
-
|
313
|
-
# A hash
|
312
|
+
|
313
|
+
# A hash to initialize an Errata instance to be used on every row. Applied after creating +row_hash+ and before passing to +:synthesize+ procs, etc. Previously passed as +:errata+.
|
314
314
|
#
|
315
315
|
# See the Errata library at https://github.com/seamusabshere/errata
|
316
316
|
#
|
317
317
|
# @return [Hash]
|
318
318
|
attr_reader :errata_settings
|
319
|
-
|
319
|
+
|
320
320
|
# The format of the source file. Can be specified as: :xlsx, :xls, :delimited (aka :csv), :ods, :fixed_width, :html, :xml, :yaml
|
321
321
|
#
|
322
322
|
# Note: treats all +docs.google.com+ and +spreadsheets.google.com+ URLs as +:delimited+.
|
@@ -332,6 +332,8 @@ class RemoteTable
|
|
332
332
|
|
333
333
|
# Create a new RemoteTable, which is an Enumerable.
|
334
334
|
#
|
335
|
+
# Options are set at creation using any of the attributes listed... RDoc will say they're "read-only" because you can't set/change them after creation.
|
336
|
+
#
|
335
337
|
# Does not immediately download/parse... it's lazy-loading.
|
336
338
|
#
|
337
339
|
# @overload initialize(settings)
|
@@ -402,7 +404,7 @@ class RemoteTable
|
|
402
404
|
@errata_settings = grab settings, :errata_settings
|
403
405
|
|
404
406
|
@other_options = settings
|
405
|
-
|
407
|
+
|
406
408
|
@transformer = Transformer.new self
|
407
409
|
@local_copy = LocalCopy.new self
|
408
410
|
end
|
@@ -445,7 +447,7 @@ class RemoteTable
|
|
445
447
|
|
446
448
|
# @deprecated
|
447
449
|
alias :each_row :each
|
448
|
-
|
450
|
+
|
449
451
|
# @return [Array<Hash,Array>] All rows.
|
450
452
|
def to_a
|
451
453
|
if fully_cached?
|
@@ -457,7 +459,7 @@ class RemoteTable
|
|
457
459
|
|
458
460
|
# @deprecated
|
459
461
|
alias :rows :to_a
|
460
|
-
|
462
|
+
|
461
463
|
# Get a row by row number. Zero-based.
|
462
464
|
#
|
463
465
|
# @return [Hash,Array]
|
@@ -468,7 +470,7 @@ class RemoteTable
|
|
468
470
|
to_a[row_number]
|
469
471
|
end
|
470
472
|
end
|
471
|
-
|
473
|
+
|
472
474
|
# Clear the row cache in case it helps your GC.
|
473
475
|
#
|
474
476
|
# @return [nil]
|
@@ -494,7 +496,7 @@ class RemoteTable
|
|
494
496
|
end
|
495
497
|
|
496
498
|
private
|
497
|
-
|
499
|
+
|
498
500
|
def mark_download!
|
499
501
|
@download_count_mutex.synchronize do
|
500
502
|
@download_count += 1
|
@@ -502,12 +504,12 @@ class RemoteTable
|
|
502
504
|
if warn_on_multiple_downloads and download_count > 1
|
503
505
|
::Kernel.warn "[remote_table] #{url} has been downloaded #{download_count} times."
|
504
506
|
end
|
505
|
-
end
|
506
|
-
|
507
|
+
end
|
508
|
+
|
507
509
|
def fully_cached!
|
508
510
|
@fully_cached = true
|
509
511
|
end
|
510
|
-
|
512
|
+
|
511
513
|
def fully_cached?
|
512
514
|
!!@fully_cached
|
513
515
|
end
|
@@ -6,27 +6,25 @@ class RemoteTable
|
|
6
6
|
|
7
7
|
# Yield each row using Roo.
|
8
8
|
def _each
|
9
|
-
# sometimes Roo forgets to require iconv.
|
10
|
-
require 'iconv'
|
11
9
|
require 'roo'
|
12
10
|
|
13
11
|
spreadsheet = roo_class.new local_copy.path, nil, :ignore
|
14
12
|
if sheet
|
15
13
|
spreadsheet.default_sheet = sheet
|
16
14
|
end
|
17
|
-
|
15
|
+
|
18
16
|
first_row = if crop
|
19
17
|
crop.first + 1
|
20
18
|
else
|
21
19
|
skip + 1
|
22
20
|
end
|
23
|
-
|
21
|
+
|
24
22
|
last_row = if crop
|
25
23
|
crop.last
|
26
24
|
else
|
27
25
|
spreadsheet.last_row
|
28
26
|
end
|
29
|
-
|
27
|
+
|
30
28
|
if not headers
|
31
29
|
|
32
30
|
# create an array to represent this row
|
@@ -48,7 +46,7 @@ class RemoteTable
|
|
48
46
|
end
|
49
47
|
|
50
48
|
else
|
51
|
-
|
49
|
+
|
52
50
|
# create a hash to represent this row
|
53
51
|
current_headers = ::ActiveSupport::OrderedHash.new
|
54
52
|
if headers == :first_row
|
data/lib/remote_table/version.rb
CHANGED
data/remote_table.gemspec
CHANGED
@@ -16,9 +16,9 @@ Gem::Specification.new do |s|
|
|
16
16
|
s.files = `git ls-files`.split("\n")
|
17
17
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
18
|
s.require_paths = ["lib"]
|
19
|
-
|
19
|
+
|
20
20
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
21
|
-
s.add_runtime_dependency 'roo', '>= 1.
|
21
|
+
s.add_runtime_dependency 'roo', '>= 1.11'
|
22
22
|
s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
|
23
23
|
s.add_runtime_dependency 'i18n' # activesupport?
|
24
24
|
s.add_runtime_dependency 'unix_utils', '>=0.0.8'
|
data/test/test_remote_table.rb
CHANGED
@@ -165,4 +165,33 @@ describe RemoteTable do
|
|
165
165
|
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
|
166
166
|
t[0]['Class'].must_equal 'TWO SEATERS'
|
167
167
|
end
|
168
|
+
|
169
|
+
{
|
170
|
+
'foo.ods' => :ods,
|
171
|
+
'foo.open_office' => :ods,
|
172
|
+
'foo.xlsx' => :xlsx,
|
173
|
+
'foo.excelx' => :xlsx,
|
174
|
+
'foo.xls' => :xls,
|
175
|
+
'foo.excel' => :xls,
|
176
|
+
'foo.csv' => :delimited,
|
177
|
+
'foo.tsv' => :delimited,
|
178
|
+
'foo.delimited' => :delimited,
|
179
|
+
'foo.fixed_width' => :fixed_width,
|
180
|
+
'foo.htm' => :html,
|
181
|
+
'foo.html' => :html,
|
182
|
+
'foo.xml' => :xml,
|
183
|
+
'foo.yaml' => :yaml,
|
184
|
+
'foo.yml' => :yaml,
|
185
|
+
'foo.shp' => :shp
|
186
|
+
}.each do |basename, format|
|
187
|
+
it "detects the #{format} format from the filename #{basename}" do
|
188
|
+
RemoteTable.guess_format(basename).must_equal format
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
it "detects the correct extension name without confusion from basename" do
|
193
|
+
[ 'foo.xls', 'xlsx.xls', 'foo_xls' ].each do |basename|
|
194
|
+
RemoteTable.guess_format(basename).must_equal :xls
|
195
|
+
end
|
196
|
+
end
|
168
197
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-10-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -31,14 +31,14 @@ dependencies:
|
|
31
31
|
requirements:
|
32
32
|
- - ! '>='
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: 1.
|
34
|
+
version: '1.11'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - ! '>='
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: 1.
|
41
|
+
version: '1.11'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: fixed_width-multibyte
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -283,7 +283,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
283
283
|
version: '0'
|
284
284
|
requirements: []
|
285
285
|
rubyforge_project: remotetable
|
286
|
-
rubygems_version: 2.
|
286
|
+
rubygems_version: 2.1.5
|
287
287
|
signing_key:
|
288
288
|
specification_version: 4
|
289
289
|
summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma
|