remote_table 2.1.1 → 2.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MWUyNDhjYmNhYmEyM2I0Yzc4ZjZmMjJjYTA5NTkzODM5N2Q3MjI1Ng==
4
+ NDMzYjM5MmViMDA2OTRjMDMwMjljYmJjMmJhNmExODNmMTkzYmI3OA==
5
5
  data.tar.gz: !binary |-
6
- MWRlNzI0YTEzMjc5NWRkOTY1Nzk5Y2Q4NzgxYTc2MzliZTBmMDFmOA==
7
- !binary "U0hBNTEy":
6
+ MDkxZmI3MWM4ZTJjYWM3NDYxOGQzMzRiYzIyMGQ2ODAyOWY0NTNjNg==
7
+ SHA512:
8
8
  metadata.gz: !binary |-
9
- MDBlMDg5NTE2YzM3YTk5ZjI4M2U5OWNkZmFlOWQ3ZmUwOWI3MGM5NjZjYTgx
10
- MDQ4Yjk1OTBhYzk2NjQxYzJiZGZiMDU0MGU3MmU5MTg4ZmU2ZWExNzEwYjYy
11
- ODdiN2EzNWNjODA0NTJlOGVjMWNmY2ZmYjA0NzMyZGU5YWYzOGM=
9
+ OWU5NzM2OWM1MDljMTg3MzJiOWZjN2MxMzliYjhhZDdmNDYyY2E5ZWFhOTE2
10
+ MWI4YjA1NWM5YjhiMDYxMjA1ZDBkZTFiZTQ4OGJiMDlmNGVhNjg5Nzg2ZTkz
11
+ MmIxZTZmOGQyMmRhNTI4OTk5YjU1ODM0NjI1ZmIzYzMyZmFiNmI=
12
12
  data.tar.gz: !binary |-
13
- YjkyMDdjYWYwMGQ2Mzg3OTU0N2Q4NDMzNmNkZGVkMzE1NGFiMWMzYWUzYzJh
14
- NjBhNjdkYTQ2ZTc2Nzg3NTU0YzhkYTNlOTMyZTkyYTBjM2Q0YTk1NGVkMDgy
15
- NTNjZGU5MzEyMTEyNDhmZGJiNmJkY2VjZmZjYmMxMmEyYjNjNWQ=
13
+ ZTFhMTk4ZjAyZTdkM2RmZmM1ZmEyZmI5ZWQzMWFkZTliZWIxNmMxMjdhYTBm
14
+ NDg0YTQ3YThlYzgxNTRhZTQ5YTYzMDU5ZGYxODkzNmIxOWMwMzk5YjMwZTA4
15
+ ODVmOGRkNDBmNmExMjUyOWRmNTMzN2U1MjFmZjUyNzZmMzM5OTE=
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 2.1.2 / 2013-10-08
2
+
3
+ * Bug fixes
4
+
5
+ * Correctly detect format from filename - don't get confused if a filename has "xls" in it (thanks @activefx #10)
6
+
1
7
  2.1.1 / 2013-03-25
2
8
 
3
9
  * Enhancements
data/lib/remote_table.rb CHANGED
@@ -72,25 +72,25 @@ class RemoteTable
72
72
  # Guess file format from the basename. Since a file might be decompressed and/or pulled out of an archive with a glob, this usually can't be called until a file is downloaded.
73
73
  # @return [Symbol,nil]
74
74
  def guess_format(basename)
75
- case basename.to_s.downcase
76
- when /ods/, /open_?office/
75
+ case basename.to_s.downcase.strip
76
+ when /ods\z/, /open_?office\z/
77
77
  :ods
78
- when /xlsx/, /excelx/
78
+ when /xlsx\z/, /excelx\z/
79
79
  :xlsx
80
- when /xls/, /excel/
80
+ when /xls\z/, /excel\z/
81
81
  :xls
82
- when /csv/, /tsv/, /delimited/
82
+ when /csv\z/, /tsv\z/, /delimited\z/
83
83
  # note that there is no RemoteTable::Csv class - it's normalized to :delimited
84
84
  :delimited
85
- when /fixed_?width/
85
+ when /fixed_?width\z/
86
86
  :fixed_width
87
- when /htm/
87
+ when /html?\z/
88
88
  :html
89
- when /xml/
89
+ when /xml\z/
90
90
  :xml
91
- when /yaml/, /yml/
91
+ when /yaml\z/, /yml\z/
92
92
  :yaml
93
- when /shp/
93
+ when /shp\z/
94
94
  :shp
95
95
  end
96
96
  end
@@ -144,7 +144,7 @@ class RemoteTable
144
144
  }
145
145
 
146
146
  include ::Enumerable
147
-
147
+
148
148
  # The URL of the local or remote file.
149
149
  #
150
150
  # @example Local
@@ -185,23 +185,23 @@ class RemoteTable
185
185
  # Whether to warn the user on multiple downloads. Defaults to true.
186
186
  # @return [true,false]
187
187
  attr_reader :warn_on_multiple_downloads
188
-
188
+
189
189
  # Headers specified by the user: +:first_row+ (the default), +false+, or a list of headers.
190
190
  # @return [:first_row,false,Array<String>]
191
191
  attr_reader :headers
192
-
192
+
193
193
  # The sheet specified by the user as a number or a string.
194
194
  # @return[String,Integer]
195
195
  attr_reader :sheet
196
-
196
+
197
197
  # Whether to keep blank rows. Default is false.
198
198
  # @return [true,false]
199
199
  attr_reader :keep_blank_rows
200
-
200
+
201
201
  # Form data to POST in the download request. It should probably be in +application/x-www-form-urlencoded+.
202
202
  # @return [String]
203
203
  attr_reader :form_data
204
-
204
+
205
205
  # How many rows to skip at the beginning of the file or table. Default is 0.
206
206
  # @return [Integer]
207
207
  attr_reader :skip
@@ -209,15 +209,15 @@ class RemoteTable
209
209
  # The original encoding of the source file. Default is UTF-8. Previously passed as +:encoding+.
210
210
  # @return [String]
211
211
  attr_reader :internal_encoding
212
-
212
+
213
213
  # The delimiter, a.k.a. column separator. Passed to Ruby CSV as +:col_sep+. Default is :delimited.
214
214
  # @return [String]
215
215
  attr_reader :delimiter
216
-
216
+
217
217
  # The XPath used to find rows in HTML or XML.
218
218
  # @return [String]
219
219
  attr_reader :row_xpath
220
-
220
+
221
221
  # The XPath used to find columns in HTML or XML.
222
222
  # @return [String]
223
223
  attr_reader :column_xpath
@@ -225,11 +225,11 @@ class RemoteTable
225
225
  # The CSS selector used to find rows in HTML or XML.
226
226
  # @return [String]
227
227
  attr_reader :row_css
228
-
228
+
229
229
  # The CSS selector used to find columns in HTML or XML.
230
230
  # @return [String]
231
231
  attr_reader :column_css
232
-
232
+
233
233
  # The format of the source file. Can be +:xlsx+, +:xls+, +:delimited+, +:ods+, +:fixed_width+, +:html+, +:xml+, +:yaml+.
234
234
  # @return [Symbol]
235
235
  attr_reader :format
@@ -241,7 +241,7 @@ class RemoteTable
241
241
  # The packing type. Guessed from URL if not provided. Only +:tar+ is supported.
242
242
  # @return [Symbol]
243
243
  attr_reader :packing
244
-
244
+
245
245
  # The glob used to pick a file out of an archive.
246
246
  #
247
247
  # @return [String]
@@ -249,7 +249,7 @@ class RemoteTable
249
249
  # @example Pick out the only CSV in a ZIP file
250
250
  # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
251
251
  attr_reader :glob
252
-
252
+
253
253
  # The filename, which can be used to pick a file out of an archive.
254
254
  #
255
255
  # @return [String]
@@ -267,7 +267,7 @@ class RemoteTable
267
267
  # # ALMOST
268
268
  # RemoteTable.new 'file:///atoz.txt', :cut => '1,12,13,15,19,20'
269
269
  attr_reader :cut
270
-
270
+
271
271
  # Use a range of rows in a plaintext file.
272
272
  #
273
273
  # @return [Range]
@@ -278,7 +278,7 @@ class RemoteTable
278
278
  # :select => proc { |row| CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER.call(row) },
279
279
  # :crop => (21..37))
280
280
  attr_reader :crop
281
-
281
+
282
282
  # The fixed-width schema, given as a multi-dimensional array.
283
283
  #
284
284
  # @return [Array<Array{String,Integer,Hash}>]
@@ -293,15 +293,15 @@ class RemoteTable
293
293
  # [ 'spacer', 12 ],
294
294
  # [ 'header6', 10, { :type => :string } ]])
295
295
  attr_reader :schema
296
-
296
+
297
297
  # If you somehow already defined a fixed-width schema (so you can re-use it?), specify it here.
298
298
  # @return [String,Symbol]
299
299
  attr_reader :schema_name
300
-
300
+
301
301
  # A proc that decides whether to include a row. Previously passed as +:select+.
302
302
  # @return [Proc]
303
303
  attr_reader :pre_select
304
-
304
+
305
305
  # A proc that decides whether to include a row. Previously passed as +:reject+.
306
306
  # @return [Proc]
307
307
  attr_reader :pre_reject
@@ -309,14 +309,14 @@ class RemoteTable
309
309
  # Settings to create a transformer.
310
310
  # @return [Hash]
311
311
  attr_reader :transform_settings
312
-
313
- # A hash of settings to initialize an Errata instance to be used on every row. Previously passed as +:errata+.
312
+
313
+ # A hash to initialize an Errata instance to be used on every row. Applied after creating +row_hash+ and before passing to +:synthesize+ procs, etc. Previously passed as +:errata+.
314
314
  #
315
315
  # See the Errata library at https://github.com/seamusabshere/errata
316
316
  #
317
317
  # @return [Hash]
318
318
  attr_reader :errata_settings
319
-
319
+
320
320
  # The format of the source file. Can be specified as: :xlsx, :xls, :delimited (aka :csv), :ods, :fixed_width, :html, :xml, :yaml
321
321
  #
322
322
  # Note: treats all +docs.google.com+ and +spreadsheets.google.com+ URLs as +:delimited+.
@@ -332,6 +332,8 @@ class RemoteTable
332
332
 
333
333
  # Create a new RemoteTable, which is an Enumerable.
334
334
  #
335
+ # Options are set at creation using any of the attributes listed... RDoc will say they're "read-only" because you can't set/change them after creation.
336
+ #
335
337
  # Does not immediately download/parse... it's lazy-loading.
336
338
  #
337
339
  # @overload initialize(settings)
@@ -402,7 +404,7 @@ class RemoteTable
402
404
  @errata_settings = grab settings, :errata_settings
403
405
 
404
406
  @other_options = settings
405
-
407
+
406
408
  @transformer = Transformer.new self
407
409
  @local_copy = LocalCopy.new self
408
410
  end
@@ -445,7 +447,7 @@ class RemoteTable
445
447
 
446
448
  # @deprecated
447
449
  alias :each_row :each
448
-
450
+
449
451
  # @return [Array<Hash,Array>] All rows.
450
452
  def to_a
451
453
  if fully_cached?
@@ -457,7 +459,7 @@ class RemoteTable
457
459
 
458
460
  # @deprecated
459
461
  alias :rows :to_a
460
-
462
+
461
463
  # Get a row by row number. Zero-based.
462
464
  #
463
465
  # @return [Hash,Array]
@@ -468,7 +470,7 @@ class RemoteTable
468
470
  to_a[row_number]
469
471
  end
470
472
  end
471
-
473
+
472
474
  # Clear the row cache in case it helps your GC.
473
475
  #
474
476
  # @return [nil]
@@ -494,7 +496,7 @@ class RemoteTable
494
496
  end
495
497
 
496
498
  private
497
-
499
+
498
500
  def mark_download!
499
501
  @download_count_mutex.synchronize do
500
502
  @download_count += 1
@@ -502,12 +504,12 @@ class RemoteTable
502
504
  if warn_on_multiple_downloads and download_count > 1
503
505
  ::Kernel.warn "[remote_table] #{url} has been downloaded #{download_count} times."
504
506
  end
505
- end
506
-
507
+ end
508
+
507
509
  def fully_cached!
508
510
  @fully_cached = true
509
511
  end
510
-
512
+
511
513
  def fully_cached?
512
514
  !!@fully_cached
513
515
  end
@@ -6,27 +6,25 @@ class RemoteTable
6
6
 
7
7
  # Yield each row using Roo.
8
8
  def _each
9
- # sometimes Roo forgets to require iconv.
10
- require 'iconv'
11
9
  require 'roo'
12
10
 
13
11
  spreadsheet = roo_class.new local_copy.path, nil, :ignore
14
12
  if sheet
15
13
  spreadsheet.default_sheet = sheet
16
14
  end
17
-
15
+
18
16
  first_row = if crop
19
17
  crop.first + 1
20
18
  else
21
19
  skip + 1
22
20
  end
23
-
21
+
24
22
  last_row = if crop
25
23
  crop.last
26
24
  else
27
25
  spreadsheet.last_row
28
26
  end
29
-
27
+
30
28
  if not headers
31
29
 
32
30
  # create an array to represent this row
@@ -48,7 +46,7 @@ class RemoteTable
48
46
  end
49
47
 
50
48
  else
51
-
49
+
52
50
  # create a hash to represent this row
53
51
  current_headers = ::ActiveSupport::OrderedHash.new
54
52
  if headers == :first_row
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "2.1.1"
2
+ VERSION = "2.1.2"
3
3
  end
data/remote_table.gemspec CHANGED
@@ -16,9 +16,9 @@ Gem::Specification.new do |s|
16
16
  s.files = `git ls-files`.split("\n")
17
17
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
18
  s.require_paths = ["lib"]
19
-
19
+
20
20
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
21
- s.add_runtime_dependency 'roo', '>= 1.10.3'
21
+ s.add_runtime_dependency 'roo', '>= 1.11'
22
22
  s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
23
23
  s.add_runtime_dependency 'i18n' # activesupport?
24
24
  s.add_runtime_dependency 'unix_utils', '>=0.0.8'
@@ -165,4 +165,33 @@ describe RemoteTable do
165
165
  t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
166
166
  t[0]['Class'].must_equal 'TWO SEATERS'
167
167
  end
168
+
169
+ {
170
+ 'foo.ods' => :ods,
171
+ 'foo.open_office' => :ods,
172
+ 'foo.xlsx' => :xlsx,
173
+ 'foo.excelx' => :xlsx,
174
+ 'foo.xls' => :xls,
175
+ 'foo.excel' => :xls,
176
+ 'foo.csv' => :delimited,
177
+ 'foo.tsv' => :delimited,
178
+ 'foo.delimited' => :delimited,
179
+ 'foo.fixed_width' => :fixed_width,
180
+ 'foo.htm' => :html,
181
+ 'foo.html' => :html,
182
+ 'foo.xml' => :xml,
183
+ 'foo.yaml' => :yaml,
184
+ 'foo.yml' => :yaml,
185
+ 'foo.shp' => :shp
186
+ }.each do |basename, format|
187
+ it "detects the #{format} format from the filename #{basename}" do
188
+ RemoteTable.guess_format(basename).must_equal format
189
+ end
190
+ end
191
+
192
+ it "detects the correct extension name without confusion from basename" do
193
+ [ 'foo.xls', 'xlsx.xls', 'foo_xls' ].each do |basename|
194
+ RemoteTable.guess_format(basename).must_equal :xls
195
+ end
196
+ end
168
197
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 2.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-25 00:00:00.000000000 Z
12
+ date: 2013-10-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -31,14 +31,14 @@ dependencies:
31
31
  requirements:
32
32
  - - ! '>='
33
33
  - !ruby/object:Gem::Version
34
- version: 1.10.3
34
+ version: '1.11'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - ! '>='
40
40
  - !ruby/object:Gem::Version
41
- version: 1.10.3
41
+ version: '1.11'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: fixed_width-multibyte
44
44
  requirement: !ruby/object:Gem::Requirement
@@ -283,7 +283,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
283
283
  version: '0'
284
284
  requirements: []
285
285
  rubyforge_project: remotetable
286
- rubygems_version: 2.0.3
286
+ rubygems_version: 2.1.5
287
287
  signing_key:
288
288
  specification_version: 4
289
289
  summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma