remote_table 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MWUyNDhjYmNhYmEyM2I0Yzc4ZjZmMjJjYTA5NTkzODM5N2Q3MjI1Ng==
4
+ NDMzYjM5MmViMDA2OTRjMDMwMjljYmJjMmJhNmExODNmMTkzYmI3OA==
5
5
  data.tar.gz: !binary |-
6
- MWRlNzI0YTEzMjc5NWRkOTY1Nzk5Y2Q4NzgxYTc2MzliZTBmMDFmOA==
7
- !binary "U0hBNTEy":
6
+ MDkxZmI3MWM4ZTJjYWM3NDYxOGQzMzRiYzIyMGQ2ODAyOWY0NTNjNg==
7
+ SHA512:
8
8
  metadata.gz: !binary |-
9
- MDBlMDg5NTE2YzM3YTk5ZjI4M2U5OWNkZmFlOWQ3ZmUwOWI3MGM5NjZjYTgx
10
- MDQ4Yjk1OTBhYzk2NjQxYzJiZGZiMDU0MGU3MmU5MTg4ZmU2ZWExNzEwYjYy
11
- ODdiN2EzNWNjODA0NTJlOGVjMWNmY2ZmYjA0NzMyZGU5YWYzOGM=
9
+ OWU5NzM2OWM1MDljMTg3MzJiOWZjN2MxMzliYjhhZDdmNDYyY2E5ZWFhOTE2
10
+ MWI4YjA1NWM5YjhiMDYxMjA1ZDBkZTFiZTQ4OGJiMDlmNGVhNjg5Nzg2ZTkz
11
+ MmIxZTZmOGQyMmRhNTI4OTk5YjU1ODM0NjI1ZmIzYzMyZmFiNmI=
12
12
  data.tar.gz: !binary |-
13
- YjkyMDdjYWYwMGQ2Mzg3OTU0N2Q4NDMzNmNkZGVkMzE1NGFiMWMzYWUzYzJh
14
- NjBhNjdkYTQ2ZTc2Nzg3NTU0YzhkYTNlOTMyZTkyYTBjM2Q0YTk1NGVkMDgy
15
- NTNjZGU5MzEyMTEyNDhmZGJiNmJkY2VjZmZjYmMxMmEyYjNjNWQ=
13
+ ZTFhMTk4ZjAyZTdkM2RmZmM1ZmEyZmI5ZWQzMWFkZTliZWIxNmMxMjdhYTBm
14
+ NDg0YTQ3YThlYzgxNTRhZTQ5YTYzMDU5ZGYxODkzNmIxOWMwMzk5YjMwZTA4
15
+ ODVmOGRkNDBmNmExMjUyOWRmNTMzN2U1MjFmZjUyNzZmMzM5OTE=
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 2.1.2 / 2013-10-08
2
+
3
+ * Bug fixes
4
+
5
+ * Correctly detect format from filename - don't get confused if a filename has "xls" in it (thanks @activefx #10)
6
+
1
7
  2.1.1 / 2013-03-25
2
8
 
3
9
  * Enhancements
data/lib/remote_table.rb CHANGED
@@ -72,25 +72,25 @@ class RemoteTable
72
72
  # Guess file format from the basename. Since a file might be decompressed and/or pulled out of an archive with a glob, this usually can't be called until a file is downloaded.
73
73
  # @return [Symbol,nil]
74
74
  def guess_format(basename)
75
- case basename.to_s.downcase
76
- when /ods/, /open_?office/
75
+ case basename.to_s.downcase.strip
76
+ when /ods\z/, /open_?office\z/
77
77
  :ods
78
- when /xlsx/, /excelx/
78
+ when /xlsx\z/, /excelx\z/
79
79
  :xlsx
80
- when /xls/, /excel/
80
+ when /xls\z/, /excel\z/
81
81
  :xls
82
- when /csv/, /tsv/, /delimited/
82
+ when /csv\z/, /tsv\z/, /delimited\z/
83
83
  # note that there is no RemoteTable::Csv class - it's normalized to :delimited
84
84
  :delimited
85
- when /fixed_?width/
85
+ when /fixed_?width\z/
86
86
  :fixed_width
87
- when /htm/
87
+ when /html?\z/
88
88
  :html
89
- when /xml/
89
+ when /xml\z/
90
90
  :xml
91
- when /yaml/, /yml/
91
+ when /yaml\z/, /yml\z/
92
92
  :yaml
93
- when /shp/
93
+ when /shp\z/
94
94
  :shp
95
95
  end
96
96
  end
@@ -144,7 +144,7 @@ class RemoteTable
144
144
  }
145
145
 
146
146
  include ::Enumerable
147
-
147
+
148
148
  # The URL of the local or remote file.
149
149
  #
150
150
  # @example Local
@@ -185,23 +185,23 @@ class RemoteTable
185
185
  # Whether to warn the user on multiple downloads. Defaults to true.
186
186
  # @return [true,false]
187
187
  attr_reader :warn_on_multiple_downloads
188
-
188
+
189
189
  # Headers specified by the user: +:first_row+ (the default), +false+, or a list of headers.
190
190
  # @return [:first_row,false,Array<String>]
191
191
  attr_reader :headers
192
-
192
+
193
193
  # The sheet specified by the user as a number or a string.
194
194
  # @return[String,Integer]
195
195
  attr_reader :sheet
196
-
196
+
197
197
  # Whether to keep blank rows. Default is false.
198
198
  # @return [true,false]
199
199
  attr_reader :keep_blank_rows
200
-
200
+
201
201
  # Form data to POST in the download request. It should probably be in +application/x-www-form-urlencoded+.
202
202
  # @return [String]
203
203
  attr_reader :form_data
204
-
204
+
205
205
  # How many rows to skip at the beginning of the file or table. Default is 0.
206
206
  # @return [Integer]
207
207
  attr_reader :skip
@@ -209,15 +209,15 @@ class RemoteTable
209
209
  # The original encoding of the source file. Default is UTF-8. Previously passed as +:encoding+.
210
210
  # @return [String]
211
211
  attr_reader :internal_encoding
212
-
212
+
213
213
  # The delimiter, a.k.a. column separator. Passed to Ruby CSV as +:col_sep+. Default is :delimited.
214
214
  # @return [String]
215
215
  attr_reader :delimiter
216
-
216
+
217
217
  # The XPath used to find rows in HTML or XML.
218
218
  # @return [String]
219
219
  attr_reader :row_xpath
220
-
220
+
221
221
  # The XPath used to find columns in HTML or XML.
222
222
  # @return [String]
223
223
  attr_reader :column_xpath
@@ -225,11 +225,11 @@ class RemoteTable
225
225
  # The CSS selector used to find rows in HTML or XML.
226
226
  # @return [String]
227
227
  attr_reader :row_css
228
-
228
+
229
229
  # The CSS selector used to find columns in HTML or XML.
230
230
  # @return [String]
231
231
  attr_reader :column_css
232
-
232
+
233
233
  # The format of the source file. Can be +:xlsx+, +:xls+, +:delimited+, +:ods+, +:fixed_width+, +:html+, +:xml+, +:yaml+.
234
234
  # @return [Symbol]
235
235
  attr_reader :format
@@ -241,7 +241,7 @@ class RemoteTable
241
241
  # The packing type. Guessed from URL if not provided. Only +:tar+ is supported.
242
242
  # @return [Symbol]
243
243
  attr_reader :packing
244
-
244
+
245
245
  # The glob used to pick a file out of an archive.
246
246
  #
247
247
  # @return [String]
@@ -249,7 +249,7 @@ class RemoteTable
249
249
  # @example Pick out the only CSV in a ZIP file
250
250
  # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
251
251
  attr_reader :glob
252
-
252
+
253
253
  # The filename, which can be used to pick a file out of an archive.
254
254
  #
255
255
  # @return [String]
@@ -267,7 +267,7 @@ class RemoteTable
267
267
  # # ALMOST
268
268
  # RemoteTable.new 'file:///atoz.txt', :cut => '1,12,13,15,19,20'
269
269
  attr_reader :cut
270
-
270
+
271
271
  # Use a range of rows in a plaintext file.
272
272
  #
273
273
  # @return [Range]
@@ -278,7 +278,7 @@ class RemoteTable
278
278
  # :select => proc { |row| CbecsEnergyIntensity::NAICS_CODE_SYNTHESIZER.call(row) },
279
279
  # :crop => (21..37))
280
280
  attr_reader :crop
281
-
281
+
282
282
  # The fixed-width schema, given as a multi-dimensional array.
283
283
  #
284
284
  # @return [Array<Array{String,Integer,Hash}>]
@@ -293,15 +293,15 @@ class RemoteTable
293
293
  # [ 'spacer', 12 ],
294
294
  # [ 'header6', 10, { :type => :string } ]])
295
295
  attr_reader :schema
296
-
296
+
297
297
  # If you somehow already defined a fixed-width schema (so you can re-use it?), specify it here.
298
298
  # @return [String,Symbol]
299
299
  attr_reader :schema_name
300
-
300
+
301
301
  # A proc that decides whether to include a row. Previously passed as +:select+.
302
302
  # @return [Proc]
303
303
  attr_reader :pre_select
304
-
304
+
305
305
  # A proc that decides whether to include a row. Previously passed as +:reject+.
306
306
  # @return [Proc]
307
307
  attr_reader :pre_reject
@@ -309,14 +309,14 @@ class RemoteTable
309
309
  # Settings to create a transformer.
310
310
  # @return [Hash]
311
311
  attr_reader :transform_settings
312
-
313
- # A hash of settings to initialize an Errata instance to be used on every row. Previously passed as +:errata+.
312
+
313
+ # A hash to initialize an Errata instance to be used on every row. Applied after creating +row_hash+ and before passing to +:synthesize+ procs, etc. Previously passed as +:errata+.
314
314
  #
315
315
  # See the Errata library at https://github.com/seamusabshere/errata
316
316
  #
317
317
  # @return [Hash]
318
318
  attr_reader :errata_settings
319
-
319
+
320
320
  # The format of the source file. Can be specified as: :xlsx, :xls, :delimited (aka :csv), :ods, :fixed_width, :html, :xml, :yaml
321
321
  #
322
322
  # Note: treats all +docs.google.com+ and +spreadsheets.google.com+ URLs as +:delimited+.
@@ -332,6 +332,8 @@ class RemoteTable
332
332
 
333
333
  # Create a new RemoteTable, which is an Enumerable.
334
334
  #
335
+ # Options are set at creation using any of the attributes listed... RDoc will say they're "read-only" because you can't set/change them after creation.
336
+ #
335
337
  # Does not immediately download/parse... it's lazy-loading.
336
338
  #
337
339
  # @overload initialize(settings)
@@ -402,7 +404,7 @@ class RemoteTable
402
404
  @errata_settings = grab settings, :errata_settings
403
405
 
404
406
  @other_options = settings
405
-
407
+
406
408
  @transformer = Transformer.new self
407
409
  @local_copy = LocalCopy.new self
408
410
  end
@@ -445,7 +447,7 @@ class RemoteTable
445
447
 
446
448
  # @deprecated
447
449
  alias :each_row :each
448
-
450
+
449
451
  # @return [Array<Hash,Array>] All rows.
450
452
  def to_a
451
453
  if fully_cached?
@@ -457,7 +459,7 @@ class RemoteTable
457
459
 
458
460
  # @deprecated
459
461
  alias :rows :to_a
460
-
462
+
461
463
  # Get a row by row number. Zero-based.
462
464
  #
463
465
  # @return [Hash,Array]
@@ -468,7 +470,7 @@ class RemoteTable
468
470
  to_a[row_number]
469
471
  end
470
472
  end
471
-
473
+
472
474
  # Clear the row cache in case it helps your GC.
473
475
  #
474
476
  # @return [nil]
@@ -494,7 +496,7 @@ class RemoteTable
494
496
  end
495
497
 
496
498
  private
497
-
499
+
498
500
  def mark_download!
499
501
  @download_count_mutex.synchronize do
500
502
  @download_count += 1
@@ -502,12 +504,12 @@ class RemoteTable
502
504
  if warn_on_multiple_downloads and download_count > 1
503
505
  ::Kernel.warn "[remote_table] #{url} has been downloaded #{download_count} times."
504
506
  end
505
- end
506
-
507
+ end
508
+
507
509
  def fully_cached!
508
510
  @fully_cached = true
509
511
  end
510
-
512
+
511
513
  def fully_cached?
512
514
  !!@fully_cached
513
515
  end
@@ -6,27 +6,25 @@ class RemoteTable
6
6
 
7
7
  # Yield each row using Roo.
8
8
  def _each
9
- # sometimes Roo forgets to require iconv.
10
- require 'iconv'
11
9
  require 'roo'
12
10
 
13
11
  spreadsheet = roo_class.new local_copy.path, nil, :ignore
14
12
  if sheet
15
13
  spreadsheet.default_sheet = sheet
16
14
  end
17
-
15
+
18
16
  first_row = if crop
19
17
  crop.first + 1
20
18
  else
21
19
  skip + 1
22
20
  end
23
-
21
+
24
22
  last_row = if crop
25
23
  crop.last
26
24
  else
27
25
  spreadsheet.last_row
28
26
  end
29
-
27
+
30
28
  if not headers
31
29
 
32
30
  # create an array to represent this row
@@ -48,7 +46,7 @@ class RemoteTable
48
46
  end
49
47
 
50
48
  else
51
-
49
+
52
50
  # create a hash to represent this row
53
51
  current_headers = ::ActiveSupport::OrderedHash.new
54
52
  if headers == :first_row
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "2.1.1"
2
+ VERSION = "2.1.2"
3
3
  end
data/remote_table.gemspec CHANGED
@@ -16,9 +16,9 @@ Gem::Specification.new do |s|
16
16
  s.files = `git ls-files`.split("\n")
17
17
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
18
  s.require_paths = ["lib"]
19
-
19
+
20
20
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
21
- s.add_runtime_dependency 'roo', '>= 1.10.3'
21
+ s.add_runtime_dependency 'roo', '>= 1.11'
22
22
  s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
23
23
  s.add_runtime_dependency 'i18n' # activesupport?
24
24
  s.add_runtime_dependency 'unix_utils', '>=0.0.8'
@@ -165,4 +165,33 @@ describe RemoteTable do
165
165
  t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
166
166
  t[0]['Class'].must_equal 'TWO SEATERS'
167
167
  end
168
+
169
+ {
170
+ 'foo.ods' => :ods,
171
+ 'foo.open_office' => :ods,
172
+ 'foo.xlsx' => :xlsx,
173
+ 'foo.excelx' => :xlsx,
174
+ 'foo.xls' => :xls,
175
+ 'foo.excel' => :xls,
176
+ 'foo.csv' => :delimited,
177
+ 'foo.tsv' => :delimited,
178
+ 'foo.delimited' => :delimited,
179
+ 'foo.fixed_width' => :fixed_width,
180
+ 'foo.htm' => :html,
181
+ 'foo.html' => :html,
182
+ 'foo.xml' => :xml,
183
+ 'foo.yaml' => :yaml,
184
+ 'foo.yml' => :yaml,
185
+ 'foo.shp' => :shp
186
+ }.each do |basename, format|
187
+ it "detects the #{format} format from the filename #{basename}" do
188
+ RemoteTable.guess_format(basename).must_equal format
189
+ end
190
+ end
191
+
192
+ it "detects the correct extension name without confusion from basename" do
193
+ [ 'foo.xls', 'xlsx.xls', 'foo_xls' ].each do |basename|
194
+ RemoteTable.guess_format(basename).must_equal :xls
195
+ end
196
+ end
168
197
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 2.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-25 00:00:00.000000000 Z
12
+ date: 2013-10-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -31,14 +31,14 @@ dependencies:
31
31
  requirements:
32
32
  - - ! '>='
33
33
  - !ruby/object:Gem::Version
34
- version: 1.10.3
34
+ version: '1.11'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - ! '>='
40
40
  - !ruby/object:Gem::Version
41
- version: 1.10.3
41
+ version: '1.11'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: fixed_width-multibyte
44
44
  requirement: !ruby/object:Gem::Requirement
@@ -283,7 +283,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
283
283
  version: '0'
284
284
  requirements: []
285
285
  rubyforge_project: remotetable
286
- rubygems_version: 2.0.3
286
+ rubygems_version: 2.1.5
287
287
  signing_key:
288
288
  specification_version: 4
289
289
  summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV (comma