remote_table 3.0.0.rc2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG +35 -0
- data/lib/remote_table.rb +36 -36
- data/lib/remote_table/delimited.rb +1 -1
- data/lib/remote_table/processed_by_roo.rb +8 -7
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +2 -3
- data/test/data/backup/http___www.customerreferenceprogram.org_uploads_CRP_RFP_template.xlsx +0 -0
- data/test/data/bom.csv +2 -0
- data/test/data/faa-aircraft.html.bz2 +0 -0
- data/test/helper.rb +0 -4
- data/test/test_old_syntax.rb +10 -9
- data/test/test_remote.rb +2 -12
- data/test/test_remote_table.rb +30 -3
- metadata +15 -57
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NmMzNjkxNDIyZmJlOGQ0MjNlNjZhZjIwMTE1NDNkNWMzN2I2ZGYxNg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZWQ4MDg4YzQwNTEwMTlkMjk3OTM1NzQzOTI5YzNhODBiOWU3NDVmMA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YTg0N2ZiZjBhNTllNTg2MmEzMDQ0OGZkZDQwODNkMDg3YjUzY2M1YWMwZTdj
|
10
|
+
ODcwMWZhNzQxMzI0ZDY5MDU2MTY5ZDVkNjEwMzZkY2Q3YzExMmFmMzUwNzcz
|
11
|
+
MzI3ZjY1N2M2ZTYzYjY4YmNhN2RlODQzZTIwMDkyYmEyM2Q1Zjk=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MWY4YjMyNjA3Yzc1ZTRiMDYxNTEyMjk4M2Q5YmFhZDc2OTUwNTBiODVhZWVi
|
14
|
+
YTIwNTEzYmRmMjdiMGRjODkzNTIyM2FlM2FkMWNhNDhjNDk3YWQ4NTk3YzJl
|
15
|
+
OWVlMjkwY2UyMTZiNzk0MjM0Mjg3ZTFhNTcwYWZiNjVhNDMyMzM=
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,32 @@
|
|
1
|
+
3.0.0 / 2013-11-26
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Include columns with blank headers as "untitled_N"
|
6
|
+
* skip blank lines at the top of CSV files when looking for headers [rc2]
|
7
|
+
* require rubyzip2 for Roo compatibility with Ruby 1.9 [rc2]
|
8
|
+
* :parser object must respond to #call(row), not #parse(row) [rc1]
|
9
|
+
* Whitespace now automatically "normalized" (/\s+/ -> ' ', stripped) for CSV [rc1]
|
10
|
+
* Strip whitespace from headers unless you provide them as an array [beta]
|
11
|
+
* Not passing anything options on to CSV... use :delimiter instead of :col_sep [beta]
|
12
|
+
* Just use :encoding to specify a file's internal/initial/original encoding. No more :internal_encoding, this was jargony. [alpha]
|
13
|
+
* No more shapefile support - hard to imagine a shapefile as a table [alpha]
|
14
|
+
* No more :transform, legacy or not [alpha]
|
15
|
+
* For :errata, now you pass an object that responds to #rejects?(row) and #correct!(row) [alpha]
|
16
|
+
* No more :errata_settings [alpha]
|
17
|
+
|
18
|
+
* Enhancements
|
19
|
+
|
20
|
+
* eagerly load format-specific methods (instead of waiting for #each to be called) [rc2]
|
21
|
+
* use new Roo class names (Roo::OpenOffice instead of Roo::Openoffice) [rc2]
|
22
|
+
* Support relative paths with spaces [beta]
|
23
|
+
* :parser option takes an object that responds to #parse(row) and returns an array of one or more rows. [alpha]
|
24
|
+
* RemoteTable.transpose(url, key_key, value_key) helper [alpha]
|
25
|
+
|
26
|
+
* Bug fixes
|
27
|
+
|
28
|
+
* pass CSV options to header parser - before, options like delimiter weren't passed so it couldn't read TSVs [rc2]
|
29
|
+
|
1
30
|
3.0.0.rc2 / 2013-08-30
|
2
31
|
|
3
32
|
* Breaking changes
|
@@ -48,6 +77,12 @@
|
|
48
77
|
* :parser option takes an object that responds to #parse(row) and returns an array of one or more rows.
|
49
78
|
* RemoteTable.transpose(url, key_key, value_key) helper
|
50
79
|
|
80
|
+
2.1.2 / 2013-10-08
|
81
|
+
|
82
|
+
* Bug fixes
|
83
|
+
|
84
|
+
* Correctly detect format from filename - don't get confused if a filename has "xls" in it (thanks @activefx #10)
|
85
|
+
|
51
86
|
2.1.1 / 2013-03-25
|
52
87
|
|
53
88
|
* Enhancements
|
data/lib/remote_table.rb
CHANGED
@@ -78,23 +78,23 @@ class RemoteTable
|
|
78
78
|
# Guess file format from the basename. Since a file might be decompressed and/or pulled out of an archive with a glob, this usually can't be called until a file is downloaded.
|
79
79
|
# @return [Symbol,nil]
|
80
80
|
def guess_format(basename)
|
81
|
-
case basename.to_s.downcase
|
82
|
-
when /ods/, /open_?office/
|
81
|
+
case basename.to_s.downcase.strip
|
82
|
+
when /ods\z/, /open_?office\z/
|
83
83
|
:ods
|
84
|
-
when /xlsx/, /excelx/
|
84
|
+
when /xlsx\z/, /excelx\z/
|
85
85
|
:xlsx
|
86
|
-
when /xls/, /excel/
|
86
|
+
when /xls\z/, /excel\z/
|
87
87
|
:xls
|
88
|
-
when /csv/, /tsv/, /delimited/
|
88
|
+
when /csv\z/, /tsv\z/, /delimited\z/
|
89
89
|
# note that there is no RemoteTable::Csv class - it's normalized to :delimited
|
90
90
|
:delimited
|
91
|
-
when /fixed_?width/
|
91
|
+
when /fixed_?width\z/
|
92
92
|
:fixed_width
|
93
|
-
when /
|
93
|
+
when /html?\z/
|
94
94
|
:html
|
95
|
-
when /xml/
|
95
|
+
when /xml\z/
|
96
96
|
:xml
|
97
|
-
when /yaml/, /yml/
|
97
|
+
when /yaml\z/, /yml\z/
|
98
98
|
:yaml
|
99
99
|
end
|
100
100
|
end
|
@@ -166,7 +166,7 @@ class RemoteTable
|
|
166
166
|
}
|
167
167
|
|
168
168
|
include ::Enumerable
|
169
|
-
|
169
|
+
|
170
170
|
# The URL of the local or remote file.
|
171
171
|
#
|
172
172
|
# @example Local
|
@@ -203,7 +203,7 @@ class RemoteTable
|
|
203
203
|
# Whether to warn the user on multiple downloads. Defaults to true.
|
204
204
|
# @return [true,false]
|
205
205
|
attr_reader :warn_on_multiple_downloads
|
206
|
-
|
206
|
+
|
207
207
|
# Headers specified by the user: +:first_row+ (the default), +false+, or a list of headers.
|
208
208
|
# @return [:first_row,false,Array<String>]
|
209
209
|
attr_reader :headers
|
@@ -214,19 +214,19 @@ class RemoteTable
|
|
214
214
|
#
|
215
215
|
# @return [String]
|
216
216
|
attr_reader :quote_char
|
217
|
-
|
217
|
+
|
218
218
|
# The sheet specified by the user as a number or a string.
|
219
219
|
# @return[String,Integer]
|
220
220
|
attr_reader :sheet
|
221
|
-
|
221
|
+
|
222
222
|
# Whether to keep blank rows. Default is false.
|
223
223
|
# @return [true,false]
|
224
224
|
attr_reader :keep_blank_rows
|
225
|
-
|
225
|
+
|
226
226
|
# Form data to POST in the download request. It should probably be in +application/x-www-form-urlencoded+.
|
227
227
|
# @return [String]
|
228
228
|
attr_reader :form_data
|
229
|
-
|
229
|
+
|
230
230
|
# How many rows to skip at the beginning of the file or table. Default is 0.
|
231
231
|
# @return [Integer]
|
232
232
|
attr_reader :skip
|
@@ -234,15 +234,15 @@ class RemoteTable
|
|
234
234
|
# The original encoding of the source file. Default is UTF-8.
|
235
235
|
# @return [String]
|
236
236
|
attr_reader :encoding
|
237
|
-
|
237
|
+
|
238
238
|
# The delimiter, a.k.a. column separator. Passed to Ruby CSV as +:col_sep+. Default is ','.
|
239
239
|
# @return [String]
|
240
240
|
attr_reader :delimiter
|
241
|
-
|
241
|
+
|
242
242
|
# The XPath used to find rows in HTML or XML.
|
243
243
|
# @return [String]
|
244
244
|
attr_reader :row_xpath
|
245
|
-
|
245
|
+
|
246
246
|
# The XPath used to find columns in HTML or XML.
|
247
247
|
# @return [String]
|
248
248
|
attr_reader :column_xpath
|
@@ -250,11 +250,11 @@ class RemoteTable
|
|
250
250
|
# The CSS selector used to find rows in HTML or XML.
|
251
251
|
# @return [String]
|
252
252
|
attr_reader :row_css
|
253
|
-
|
253
|
+
|
254
254
|
# The CSS selector used to find columns in HTML or XML.
|
255
255
|
# @return [String]
|
256
256
|
attr_reader :column_css
|
257
|
-
|
257
|
+
|
258
258
|
# The format of the source file. Can be +:xlsx+, +:xls+, +:delimited+, +:ods+, +:fixed_width+, +:html+, +:xml+, +:yaml+.
|
259
259
|
# @return [Symbol]
|
260
260
|
attr_reader :format
|
@@ -266,7 +266,7 @@ class RemoteTable
|
|
266
266
|
# The packing type. Guessed from URL if not provided. Only +:tar+ is supported.
|
267
267
|
# @return [Symbol]
|
268
268
|
attr_reader :packing
|
269
|
-
|
269
|
+
|
270
270
|
# The glob used to pick a file out of an archive.
|
271
271
|
#
|
272
272
|
# @return [String]
|
@@ -274,7 +274,7 @@ class RemoteTable
|
|
274
274
|
# @example Pick out the only CSV in a ZIP file
|
275
275
|
# RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
276
276
|
attr_reader :glob
|
277
|
-
|
277
|
+
|
278
278
|
# The filename, which can be used to pick a file out of an archive.
|
279
279
|
#
|
280
280
|
# @return [String]
|
@@ -292,7 +292,7 @@ class RemoteTable
|
|
292
292
|
# # ALMOST
|
293
293
|
# RemoteTable.new 'file:///atoz.txt', :cut => '1,12,13,15,19,20'
|
294
294
|
attr_reader :cut
|
295
|
-
|
295
|
+
|
296
296
|
# Use a range of rows in a plaintext file.
|
297
297
|
#
|
298
298
|
# @return [Range]
|
@@ -303,7 +303,7 @@ class RemoteTable
|
|
303
303
|
# :select => proc { |row| CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER.call(row) },
|
304
304
|
# :crop => (21..37))
|
305
305
|
attr_reader :crop
|
306
|
-
|
306
|
+
|
307
307
|
# The fixed-width schema, given as a multi-dimensional array.
|
308
308
|
#
|
309
309
|
# @return [Array<Array{String,Integer,Hash}>]
|
@@ -318,15 +318,15 @@ class RemoteTable
|
|
318
318
|
# [ 'spacer', 12 ],
|
319
319
|
# [ 'header6', 10, { :type => :string } ]])
|
320
320
|
attr_reader :schema
|
321
|
-
|
321
|
+
|
322
322
|
# If you somehow already defined a fixed-width schema (so you can re-use it?), specify it here.
|
323
323
|
# @return [String,Symbol]
|
324
324
|
attr_reader :schema_name
|
325
|
-
|
325
|
+
|
326
326
|
# A proc that decides whether to include a row. Previously passed as +:select+.
|
327
327
|
# @return [Proc]
|
328
328
|
attr_reader :pre_select
|
329
|
-
|
329
|
+
|
330
330
|
# A proc that decides whether to include a row. Previously passed as +:reject+.
|
331
331
|
# @return [Proc]
|
332
332
|
attr_reader :pre_reject
|
@@ -340,7 +340,7 @@ class RemoteTable
|
|
340
340
|
#
|
341
341
|
# @return [Hash]
|
342
342
|
attr_reader :errata
|
343
|
-
|
343
|
+
|
344
344
|
# The format of the source file. Can be specified as: :xlsx, :xls, :delimited (aka :csv), :ods, :fixed_width, :html, :xml, :yaml
|
345
345
|
#
|
346
346
|
# Note: treats all +docs.google.com+ and +spreadsheets.google.com+ URLs as +:delimited+.
|
@@ -442,7 +442,7 @@ class RemoteTable
|
|
442
442
|
@parser = grab settings, :parser
|
443
443
|
|
444
444
|
@other_options = settings
|
445
|
-
|
445
|
+
|
446
446
|
@local_copy = LocalCopy.new self
|
447
447
|
extend!
|
448
448
|
end
|
@@ -485,7 +485,7 @@ class RemoteTable
|
|
485
485
|
|
486
486
|
# @deprecated
|
487
487
|
alias :each_row :each
|
488
|
-
|
488
|
+
|
489
489
|
# @return [Array<Hash,Array>] All rows.
|
490
490
|
def to_a
|
491
491
|
if fully_cached?
|
@@ -497,7 +497,7 @@ class RemoteTable
|
|
497
497
|
|
498
498
|
# @deprecated
|
499
499
|
alias :rows :to_a
|
500
|
-
|
500
|
+
|
501
501
|
# Get a row by row number. Zero-based.
|
502
502
|
#
|
503
503
|
# @return [Hash,Array]
|
@@ -508,7 +508,7 @@ class RemoteTable
|
|
508
508
|
to_a[row_number]
|
509
509
|
end
|
510
510
|
end
|
511
|
-
|
511
|
+
|
512
512
|
# Clear the row cache in case it helps your GC.
|
513
513
|
#
|
514
514
|
# @return [nil]
|
@@ -523,7 +523,7 @@ class RemoteTable
|
|
523
523
|
def preprocess!
|
524
524
|
# noop, overridden sometimes
|
525
525
|
end
|
526
|
-
|
526
|
+
|
527
527
|
def mark_download!
|
528
528
|
@download_count_mutex.synchronize do
|
529
529
|
@download_count += 1
|
@@ -531,12 +531,12 @@ class RemoteTable
|
|
531
531
|
if warn_on_multiple_downloads and download_count > 1
|
532
532
|
::Kernel.warn "[remote_table] #{url} has been downloaded #{download_count} times."
|
533
533
|
end
|
534
|
-
end
|
535
|
-
|
534
|
+
end
|
535
|
+
|
536
536
|
def fully_cached!
|
537
537
|
@fully_cached = true
|
538
538
|
end
|
539
|
-
|
539
|
+
|
540
540
|
def fully_cached?
|
541
541
|
!!@fully_cached
|
542
542
|
end
|
@@ -84,7 +84,7 @@ class RemoteTable
|
|
84
84
|
if proto_headers
|
85
85
|
proto_headers.map do |v|
|
86
86
|
header = RemoteTable.normalize_whitespace v
|
87
|
-
header.present? ? header : "
|
87
|
+
header.present? ? header : "untitled_#{i+=1}"
|
88
88
|
end
|
89
89
|
else
|
90
90
|
raise "No headers found in first line: #{line.inspect}"
|
@@ -6,27 +6,25 @@ class RemoteTable
|
|
6
6
|
|
7
7
|
# Yield each row using Roo.
|
8
8
|
def _each
|
9
|
-
# sometimes Roo forgets to require iconv.
|
10
|
-
require 'iconv'
|
11
9
|
require 'roo'
|
12
10
|
|
13
|
-
spreadsheet = roo_class.new local_copy.path,
|
11
|
+
spreadsheet = roo_class.new local_copy.path, :file_warning => :ignore
|
14
12
|
if sheet
|
15
13
|
spreadsheet.default_sheet = sheet
|
16
14
|
end
|
17
|
-
|
15
|
+
|
18
16
|
first_row = if crop
|
19
17
|
crop.first + 1
|
20
18
|
else
|
21
19
|
skip + 1
|
22
20
|
end
|
23
|
-
|
21
|
+
|
24
22
|
last_row = if crop
|
25
23
|
crop.last
|
26
24
|
else
|
27
25
|
spreadsheet.last_row
|
28
26
|
end
|
29
|
-
|
27
|
+
|
30
28
|
if not headers
|
31
29
|
|
32
30
|
# create an array to represent this row
|
@@ -48,9 +46,10 @@ class RemoteTable
|
|
48
46
|
end
|
49
47
|
|
50
48
|
else
|
51
|
-
|
49
|
+
|
52
50
|
# create a hash to represent this row
|
53
51
|
current_headers = ::ActiveSupport::OrderedHash.new
|
52
|
+
i = 0
|
54
53
|
if headers == :first_row
|
55
54
|
(1..spreadsheet.last_column).each do |x|
|
56
55
|
v = spreadsheet.cell(first_row, x)
|
@@ -62,6 +61,8 @@ class RemoteTable
|
|
62
61
|
v = assume_utf8 v
|
63
62
|
# 'foobar' is found at column 6
|
64
63
|
current_headers[v] = x
|
64
|
+
else
|
65
|
+
current_headers["untitled_#{i+=1}"] = x
|
65
66
|
end
|
66
67
|
end
|
67
68
|
# "advance the cursor"
|
data/lib/remote_table/version.rb
CHANGED
data/remote_table.gemspec
CHANGED
@@ -16,9 +16,9 @@ Gem::Specification.new do |s|
|
|
16
16
|
s.files = `git ls-files`.split("\n")
|
17
17
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
18
|
s.require_paths = ["lib"]
|
19
|
-
|
19
|
+
|
20
20
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
21
|
-
s.add_runtime_dependency 'roo', '>= 1.
|
21
|
+
s.add_runtime_dependency 'roo', '>= 1.11'
|
22
22
|
s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
|
23
23
|
s.add_runtime_dependency 'i18n' # activesupport?
|
24
24
|
s.add_runtime_dependency 'unix_utils', '>=0.0.8'
|
@@ -28,7 +28,6 @@ Gem::Specification.new do |s|
|
|
28
28
|
|
29
29
|
s.add_development_dependency 'errata', '>=0.2.0'
|
30
30
|
s.add_development_dependency 'minitest'
|
31
|
-
s.add_development_dependency 'minitest-reporters'
|
32
31
|
s.add_development_dependency 'rake'
|
33
32
|
s.add_development_dependency 'yard'
|
34
33
|
s.add_development_dependency 'pry'
|
Binary file
|
data/test/data/bom.csv
ADDED
Binary file
|
data/test/helper.rb
CHANGED
@@ -1,11 +1,7 @@
|
|
1
1
|
require 'bundler/setup'
|
2
2
|
|
3
|
-
require 'minitest/spec'
|
4
|
-
require 'minitest/reporters'
|
5
3
|
require 'minitest/autorun'
|
6
4
|
# require 'pry-rescue/minitest'
|
7
|
-
#MiniTest::Unit.runner = MiniTest::SuiteRunner.new
|
8
|
-
#MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
9
5
|
require 'remote_table'
|
10
6
|
|
11
7
|
class MiniTest::Spec
|
data/test/test_old_syntax.rb
CHANGED
@@ -5,30 +5,30 @@ $test2_rows_with_blanks = [
|
|
5
5
|
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
6
6
|
{ 'header4' => '', 'header5' => '', 'header6' => '' },
|
7
7
|
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
8
|
-
]
|
8
|
+
].map { |hsh| hsh.merge('untitled_1' => '') }
|
9
9
|
$test2_rows = [
|
10
10
|
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
11
11
|
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
12
|
-
]
|
12
|
+
].map { |hsh| hsh.merge('untitled_1' => '') }
|
13
13
|
$test2_rows_with_blanks.freeze
|
14
14
|
$test2_rows.freeze
|
15
15
|
|
16
16
|
describe RemoteTable do
|
17
17
|
describe "when using old-style syntax" do
|
18
18
|
it "open an XLSX like an array (numbered columns)" do
|
19
|
-
t = RemoteTable.new(
|
19
|
+
t = RemoteTable.new('test/data/backup/http___www.customerreferenceprogram.org_uploads_CRP_RFP_template.xlsx', :headers => false)
|
20
20
|
t.rows[0][0].must_equal "Requirements"
|
21
21
|
t.rows[5][0].must_equal "Software-As-A-Service"
|
22
22
|
end
|
23
23
|
|
24
24
|
it "open an XLSX with custom headers" do
|
25
|
-
t = RemoteTable.new(
|
25
|
+
t = RemoteTable.new('test/data/backup/http___www.customerreferenceprogram.org_uploads_CRP_RFP_template.xlsx', :headers => %w{foo bar baz})
|
26
26
|
t.rows[0]['foo'].must_equal "Requirements"
|
27
27
|
t.rows[5]['foo'].must_equal "Software-As-A-Service"
|
28
28
|
end
|
29
29
|
|
30
30
|
it "open an XLSX" do
|
31
|
-
t = RemoteTable.new(
|
31
|
+
t = RemoteTable.new('test/data/backup/http___www.customerreferenceprogram.org_uploads_CRP_RFP_template.xlsx')
|
32
32
|
t.rows[5]["Requirements"].must_equal "Secure encryption of all data"
|
33
33
|
end
|
34
34
|
|
@@ -43,8 +43,9 @@ describe RemoteTable do
|
|
43
43
|
end
|
44
44
|
|
45
45
|
it "ignore UTF-8 byte order marks" do
|
46
|
-
t = RemoteTable.new
|
47
|
-
t.rows
|
46
|
+
t = RemoteTable.new 'test/data/bom.csv'
|
47
|
+
t.rows[0]['one'].must_equal '1'
|
48
|
+
t.rows[0]['two'].must_equal '2'
|
48
49
|
end
|
49
50
|
|
50
51
|
# this will die with an error about libcurl if your curl doesn't support ssl
|
@@ -139,7 +140,7 @@ describe RemoteTable do
|
|
139
140
|
t.rows.all? { |row| row.keys.all?(&:present?) }.must_equal true
|
140
141
|
# correct values
|
141
142
|
t.rows.each_with_index do |row, index|
|
142
|
-
$test2_rows[index].must_equal row.except('row_hash')
|
143
|
+
$test2_rows[index].except('untitled_1').must_equal row.except('row_hash')
|
143
144
|
end
|
144
145
|
end
|
145
146
|
|
@@ -158,7 +159,7 @@ describe RemoteTable do
|
|
158
159
|
t.rows.all? { |row| row.keys.all?(&:present?) }.must_equal true
|
159
160
|
# correct values
|
160
161
|
t.rows.each_with_index do |row, index|
|
161
|
-
$test2_rows_with_blanks[index].must_equal row.except('row_hash')
|
162
|
+
$test2_rows_with_blanks[index].except('untitled_1').must_equal row.except('row_hash')
|
162
163
|
end
|
163
164
|
end
|
164
165
|
|
data/test/test_remote.rb
CHANGED
@@ -4,16 +4,6 @@ require 'tempfile'
|
|
4
4
|
|
5
5
|
describe RemoteTable do
|
6
6
|
describe 'used on remote files' do
|
7
|
-
it "open an XLSX" do
|
8
|
-
t = RemoteTable.new 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
9
|
-
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
10
|
-
end
|
11
|
-
|
12
|
-
it "does its best to download urls without http://" do
|
13
|
-
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
14
|
-
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
15
|
-
end
|
16
|
-
|
17
7
|
it "add a row hash to every row" do
|
18
8
|
t = RemoteTable.new(:url => 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
19
9
|
t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
|
@@ -91,11 +81,11 @@ describe RemoteTable do
|
|
91
81
|
end
|
92
82
|
|
93
83
|
it "read only certain rows of an XLSX" do
|
94
|
-
t = RemoteTable.new '
|
84
|
+
t = RemoteTable.new 'test/data/backup/http___www.customerreferenceprogram.org_uploads_CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
|
95
85
|
t[0][0].must_equal "Permissioning and access groups for all content"
|
96
86
|
t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
|
97
87
|
|
98
|
-
t = RemoteTable.new '
|
88
|
+
t = RemoteTable.new 'test/data/backup/http___www.customerreferenceprogram.org_uploads_CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
|
99
89
|
t[0]['col1'].must_equal "Permissioning and access groups for all content"
|
100
90
|
t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
|
101
91
|
end
|
data/test/test_remote_table.rb
CHANGED
@@ -74,10 +74,37 @@ describe RemoteTable do
|
|
74
74
|
end
|
75
75
|
|
76
76
|
# fixes ArgumentError: invalid byte sequence in UTF-8
|
77
|
-
# disabled because xpath not be somehow broken - works in chrome
|
78
77
|
it %{safely strip soft hyphens and read windows-1252 html} do
|
79
|
-
row_xpath = '/html/body/table[2]/
|
80
|
-
t = RemoteTable.new 'test/data/faa-aircraft.html', :row_xpath => row_xpath, :column_xpath => 'td', :encoding => 'windows-1252'
|
78
|
+
row_xpath = '/html/body/table[2]/tr/td/center/table/tr[3]/td/table/tr'
|
79
|
+
t = RemoteTable.new 'test/data/faa-aircraft.html.bz2', :row_xpath => row_xpath, :column_xpath => 'td', :encoding => 'windows-1252', format: :html
|
81
80
|
t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
|
82
81
|
end
|
82
|
+
|
83
|
+
{
|
84
|
+
'foo.ods' => :ods,
|
85
|
+
'foo.open_office' => :ods,
|
86
|
+
'foo.xlsx' => :xlsx,
|
87
|
+
'foo.excelx' => :xlsx,
|
88
|
+
'foo.xls' => :xls,
|
89
|
+
'foo.excel' => :xls,
|
90
|
+
'foo.csv' => :delimited,
|
91
|
+
'foo.tsv' => :delimited,
|
92
|
+
'foo.delimited' => :delimited,
|
93
|
+
'foo.fixed_width' => :fixed_width,
|
94
|
+
'foo.htm' => :html,
|
95
|
+
'foo.html' => :html,
|
96
|
+
'foo.xml' => :xml,
|
97
|
+
'foo.yaml' => :yaml,
|
98
|
+
'foo.yml' => :yaml
|
99
|
+
}.each do |basename, format|
|
100
|
+
it "detects the #{format} format from the filename #{basename}" do
|
101
|
+
RemoteTable.guess_format(basename).must_equal format
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
it "detects the correct extension name without confusion from basename" do
|
106
|
+
[ 'foo.xls', 'xlsx.xls', 'foo_xls' ].each do |basename|
|
107
|
+
RemoteTable.guess_format(basename).must_equal :xls
|
108
|
+
end
|
109
|
+
end
|
83
110
|
end
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.0
|
5
|
-
prerelease: 6
|
4
|
+
version: 3.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Seamus Abshere
|
@@ -10,12 +9,11 @@ authors:
|
|
10
9
|
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date: 2013-
|
12
|
+
date: 2013-11-26 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: activesupport
|
17
16
|
requirement: !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
17
|
requirements:
|
20
18
|
- - ! '>='
|
21
19
|
- !ruby/object:Gem::Version
|
@@ -23,7 +21,6 @@ dependencies:
|
|
23
21
|
type: :runtime
|
24
22
|
prerelease: false
|
25
23
|
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
none: false
|
27
24
|
requirements:
|
28
25
|
- - ! '>='
|
29
26
|
- !ruby/object:Gem::Version
|
@@ -31,23 +28,20 @@ dependencies:
|
|
31
28
|
- !ruby/object:Gem::Dependency
|
32
29
|
name: roo
|
33
30
|
requirement: !ruby/object:Gem::Requirement
|
34
|
-
none: false
|
35
31
|
requirements:
|
36
32
|
- - ! '>='
|
37
33
|
- !ruby/object:Gem::Version
|
38
|
-
version: 1.
|
34
|
+
version: '1.11'
|
39
35
|
type: :runtime
|
40
36
|
prerelease: false
|
41
37
|
version_requirements: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
38
|
requirements:
|
44
39
|
- - ! '>='
|
45
40
|
- !ruby/object:Gem::Version
|
46
|
-
version: 1.
|
41
|
+
version: '1.11'
|
47
42
|
- !ruby/object:Gem::Dependency
|
48
43
|
name: fixed_width-multibyte
|
49
44
|
requirement: !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
45
|
requirements:
|
52
46
|
- - ! '>='
|
53
47
|
- !ruby/object:Gem::Version
|
@@ -55,7 +49,6 @@ dependencies:
|
|
55
49
|
type: :runtime
|
56
50
|
prerelease: false
|
57
51
|
version_requirements: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
52
|
requirements:
|
60
53
|
- - ! '>='
|
61
54
|
- !ruby/object:Gem::Version
|
@@ -63,7 +56,6 @@ dependencies:
|
|
63
56
|
- !ruby/object:Gem::Dependency
|
64
57
|
name: i18n
|
65
58
|
requirement: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
59
|
requirements:
|
68
60
|
- - ! '>='
|
69
61
|
- !ruby/object:Gem::Version
|
@@ -71,7 +63,6 @@ dependencies:
|
|
71
63
|
type: :runtime
|
72
64
|
prerelease: false
|
73
65
|
version_requirements: !ruby/object:Gem::Requirement
|
74
|
-
none: false
|
75
66
|
requirements:
|
76
67
|
- - ! '>='
|
77
68
|
- !ruby/object:Gem::Version
|
@@ -79,7 +70,6 @@ dependencies:
|
|
79
70
|
- !ruby/object:Gem::Dependency
|
80
71
|
name: unix_utils
|
81
72
|
requirement: !ruby/object:Gem::Requirement
|
82
|
-
none: false
|
83
73
|
requirements:
|
84
74
|
- - ! '>='
|
85
75
|
- !ruby/object:Gem::Version
|
@@ -87,7 +77,6 @@ dependencies:
|
|
87
77
|
type: :runtime
|
88
78
|
prerelease: false
|
89
79
|
version_requirements: !ruby/object:Gem::Requirement
|
90
|
-
none: false
|
91
80
|
requirements:
|
92
81
|
- - ! '>='
|
93
82
|
- !ruby/object:Gem::Version
|
@@ -95,7 +84,6 @@ dependencies:
|
|
95
84
|
- !ruby/object:Gem::Dependency
|
96
85
|
name: fastercsv
|
97
86
|
requirement: !ruby/object:Gem::Requirement
|
98
|
-
none: false
|
99
87
|
requirements:
|
100
88
|
- - ! '>='
|
101
89
|
- !ruby/object:Gem::Version
|
@@ -103,7 +91,6 @@ dependencies:
|
|
103
91
|
type: :runtime
|
104
92
|
prerelease: false
|
105
93
|
version_requirements: !ruby/object:Gem::Requirement
|
106
|
-
none: false
|
107
94
|
requirements:
|
108
95
|
- - ! '>='
|
109
96
|
- !ruby/object:Gem::Version
|
@@ -111,7 +98,6 @@ dependencies:
|
|
111
98
|
- !ruby/object:Gem::Dependency
|
112
99
|
name: hash_digest
|
113
100
|
requirement: !ruby/object:Gem::Requirement
|
114
|
-
none: false
|
115
101
|
requirements:
|
116
102
|
- - ! '>='
|
117
103
|
- !ruby/object:Gem::Version
|
@@ -119,7 +105,6 @@ dependencies:
|
|
119
105
|
type: :runtime
|
120
106
|
prerelease: false
|
121
107
|
version_requirements: !ruby/object:Gem::Requirement
|
122
|
-
none: false
|
123
108
|
requirements:
|
124
109
|
- - ! '>='
|
125
110
|
- !ruby/object:Gem::Version
|
@@ -127,7 +112,6 @@ dependencies:
|
|
127
112
|
- !ruby/object:Gem::Dependency
|
128
113
|
name: rubyzip2
|
129
114
|
requirement: !ruby/object:Gem::Requirement
|
130
|
-
none: false
|
131
115
|
requirements:
|
132
116
|
- - ! '>='
|
133
117
|
- !ruby/object:Gem::Version
|
@@ -135,7 +119,6 @@ dependencies:
|
|
135
119
|
type: :runtime
|
136
120
|
prerelease: false
|
137
121
|
version_requirements: !ruby/object:Gem::Requirement
|
138
|
-
none: false
|
139
122
|
requirements:
|
140
123
|
- - ! '>='
|
141
124
|
- !ruby/object:Gem::Version
|
@@ -143,7 +126,6 @@ dependencies:
|
|
143
126
|
- !ruby/object:Gem::Dependency
|
144
127
|
name: errata
|
145
128
|
requirement: !ruby/object:Gem::Requirement
|
146
|
-
none: false
|
147
129
|
requirements:
|
148
130
|
- - ! '>='
|
149
131
|
- !ruby/object:Gem::Version
|
@@ -151,7 +133,6 @@ dependencies:
|
|
151
133
|
type: :development
|
152
134
|
prerelease: false
|
153
135
|
version_requirements: !ruby/object:Gem::Requirement
|
154
|
-
none: false
|
155
136
|
requirements:
|
156
137
|
- - ! '>='
|
157
138
|
- !ruby/object:Gem::Version
|
@@ -159,7 +140,6 @@ dependencies:
|
|
159
140
|
- !ruby/object:Gem::Dependency
|
160
141
|
name: minitest
|
161
142
|
requirement: !ruby/object:Gem::Requirement
|
162
|
-
none: false
|
163
143
|
requirements:
|
164
144
|
- - ! '>='
|
165
145
|
- !ruby/object:Gem::Version
|
@@ -167,23 +147,6 @@ dependencies:
|
|
167
147
|
type: :development
|
168
148
|
prerelease: false
|
169
149
|
version_requirements: !ruby/object:Gem::Requirement
|
170
|
-
none: false
|
171
|
-
requirements:
|
172
|
-
- - ! '>='
|
173
|
-
- !ruby/object:Gem::Version
|
174
|
-
version: '0'
|
175
|
-
- !ruby/object:Gem::Dependency
|
176
|
-
name: minitest-reporters
|
177
|
-
requirement: !ruby/object:Gem::Requirement
|
178
|
-
none: false
|
179
|
-
requirements:
|
180
|
-
- - ! '>='
|
181
|
-
- !ruby/object:Gem::Version
|
182
|
-
version: '0'
|
183
|
-
type: :development
|
184
|
-
prerelease: false
|
185
|
-
version_requirements: !ruby/object:Gem::Requirement
|
186
|
-
none: false
|
187
150
|
requirements:
|
188
151
|
- - ! '>='
|
189
152
|
- !ruby/object:Gem::Version
|
@@ -191,7 +154,6 @@ dependencies:
|
|
191
154
|
- !ruby/object:Gem::Dependency
|
192
155
|
name: rake
|
193
156
|
requirement: !ruby/object:Gem::Requirement
|
194
|
-
none: false
|
195
157
|
requirements:
|
196
158
|
- - ! '>='
|
197
159
|
- !ruby/object:Gem::Version
|
@@ -199,7 +161,6 @@ dependencies:
|
|
199
161
|
type: :development
|
200
162
|
prerelease: false
|
201
163
|
version_requirements: !ruby/object:Gem::Requirement
|
202
|
-
none: false
|
203
164
|
requirements:
|
204
165
|
- - ! '>='
|
205
166
|
- !ruby/object:Gem::Version
|
@@ -207,7 +168,6 @@ dependencies:
|
|
207
168
|
- !ruby/object:Gem::Dependency
|
208
169
|
name: yard
|
209
170
|
requirement: !ruby/object:Gem::Requirement
|
210
|
-
none: false
|
211
171
|
requirements:
|
212
172
|
- - ! '>='
|
213
173
|
- !ruby/object:Gem::Version
|
@@ -215,7 +175,6 @@ dependencies:
|
|
215
175
|
type: :development
|
216
176
|
prerelease: false
|
217
177
|
version_requirements: !ruby/object:Gem::Requirement
|
218
|
-
none: false
|
219
178
|
requirements:
|
220
179
|
- - ! '>='
|
221
180
|
- !ruby/object:Gem::Version
|
@@ -223,7 +182,6 @@ dependencies:
|
|
223
182
|
- !ruby/object:Gem::Dependency
|
224
183
|
name: pry
|
225
184
|
requirement: !ruby/object:Gem::Requirement
|
226
|
-
none: false
|
227
185
|
requirements:
|
228
186
|
- - ! '>='
|
229
187
|
- !ruby/object:Gem::Version
|
@@ -231,7 +189,6 @@ dependencies:
|
|
231
189
|
type: :development
|
232
190
|
prerelease: false
|
233
191
|
version_requirements: !ruby/object:Gem::Requirement
|
234
|
-
none: false
|
235
192
|
requirements:
|
236
193
|
- - ! '>='
|
237
194
|
- !ruby/object:Gem::Version
|
@@ -239,7 +196,6 @@ dependencies:
|
|
239
196
|
- !ruby/object:Gem::Dependency
|
240
197
|
name: pry-rescue
|
241
198
|
requirement: !ruby/object:Gem::Requirement
|
242
|
-
none: false
|
243
199
|
requirements:
|
244
200
|
- - ! '>='
|
245
201
|
- !ruby/object:Gem::Version
|
@@ -247,7 +203,6 @@ dependencies:
|
|
247
203
|
type: :development
|
248
204
|
prerelease: false
|
249
205
|
version_requirements: !ruby/object:Gem::Requirement
|
250
|
-
none: false
|
251
206
|
requirements:
|
252
207
|
- - ! '>='
|
253
208
|
- !ruby/object:Gem::Version
|
@@ -255,7 +210,6 @@ dependencies:
|
|
255
210
|
- !ruby/object:Gem::Dependency
|
256
211
|
name: pry-stack_explorer
|
257
212
|
requirement: !ruby/object:Gem::Requirement
|
258
|
-
none: false
|
259
213
|
requirements:
|
260
214
|
- - ! '>='
|
261
215
|
- !ruby/object:Gem::Version
|
@@ -263,7 +217,6 @@ dependencies:
|
|
263
217
|
type: :development
|
264
218
|
prerelease: false
|
265
219
|
version_requirements: !ruby/object:Gem::Requirement
|
266
|
-
none: false
|
267
220
|
requirements:
|
268
221
|
- - ! '>='
|
269
222
|
- !ruby/object:Gem::Version
|
@@ -301,8 +254,11 @@ files:
|
|
301
254
|
- lib/remote_table/yaml.rb
|
302
255
|
- remote_table.gemspec
|
303
256
|
- test/data/airports.utf8.csv
|
257
|
+
- test/data/backup/http___www.customerreferenceprogram.org_uploads_CRP_RFP_template.xlsx
|
258
|
+
- test/data/bom.csv
|
304
259
|
- test/data/color.csv
|
305
260
|
- test/data/data.yml
|
261
|
+
- test/data/faa-aircraft.html.bz2
|
306
262
|
- test/data/list-en1-semic-3.neooffice.binary.ods
|
307
263
|
- test/data/list-en1-semic-3.neooffice.iso-8859-1.csv
|
308
264
|
- test/data/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
|
@@ -331,33 +287,35 @@ files:
|
|
331
287
|
- test/test_transpose.rb
|
332
288
|
homepage: https://github.com/seamusabshere/remote_table
|
333
289
|
licenses: []
|
290
|
+
metadata: {}
|
334
291
|
post_install_message:
|
335
292
|
rdoc_options: []
|
336
293
|
require_paths:
|
337
294
|
- lib
|
338
295
|
required_ruby_version: !ruby/object:Gem::Requirement
|
339
|
-
none: false
|
340
296
|
requirements:
|
341
297
|
- - ! '>='
|
342
298
|
- !ruby/object:Gem::Version
|
343
299
|
version: '0'
|
344
300
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
345
|
-
none: false
|
346
301
|
requirements:
|
347
|
-
- - ! '
|
302
|
+
- - ! '>='
|
348
303
|
- !ruby/object:Gem::Version
|
349
|
-
version:
|
304
|
+
version: '0'
|
350
305
|
requirements: []
|
351
306
|
rubyforge_project: remotetable
|
352
|
-
rubygems_version: 1.
|
307
|
+
rubygems_version: 2.1.11
|
353
308
|
signing_key:
|
354
|
-
specification_version:
|
309
|
+
specification_version: 4
|
355
310
|
summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma
|
356
311
|
separated), TSV (tab separated), other delimited, fixed-width files, and shapefiles.
|
357
312
|
test_files:
|
358
313
|
- test/data/airports.utf8.csv
|
314
|
+
- test/data/backup/http___www.customerreferenceprogram.org_uploads_CRP_RFP_template.xlsx
|
315
|
+
- test/data/bom.csv
|
359
316
|
- test/data/color.csv
|
360
317
|
- test/data/data.yml
|
318
|
+
- test/data/faa-aircraft.html.bz2
|
361
319
|
- test/data/list-en1-semic-3.neooffice.binary.ods
|
362
320
|
- test/data/list-en1-semic-3.neooffice.iso-8859-1.csv
|
363
321
|
- test/data/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
|