remote_table 1.1.6 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -21,3 +21,5 @@ begin
21
21
  rescue LoadError
22
22
  puts "Rdoc is not available"
23
23
  end
24
+
25
+ task :default => :test
@@ -1,3 +1,7 @@
1
+ if ::RUBY_VERSION >= '1.9'
2
+ require 'ensure/encoding'
3
+ end
4
+
1
5
  class RemoteTable
2
6
  class Format
3
7
  autoload :Excel, 'remote_table/format/excel'
@@ -16,6 +20,14 @@ class RemoteTable
16
20
  @t = t
17
21
  end
18
22
 
23
+ def utf8(str)
24
+ if ::RUBY_VERSION >= '1.9'
25
+ str.ensure_encoding 'UTF-8', :external_encoding => t.properties.encoding, :invalid_characters => :transcode
26
+ else
27
+ str
28
+ end
29
+ end
30
+
19
31
  include ::Enumerable
20
32
  def each
21
33
  raise "must be defined by format"
@@ -1,9 +1,10 @@
1
1
  if RUBY_VERSION >= '1.9'
2
2
  require 'csv'
3
- ::FasterCSV = ::CSV
3
+ ::RemoteTable::CSV = ::CSV
4
4
  else
5
5
  begin
6
6
  require 'fastercsv'
7
+ ::RemoteTable::CSV = ::FasterCSV
7
8
  rescue ::LoadError
8
9
  $stderr.puts "[remote_table gem] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
9
10
  raise $!
@@ -15,25 +16,24 @@ class RemoteTable
15
16
  class Delimited < Format
16
17
  include Textual
17
18
  def each(&blk)
18
- convert_file_to_utf8!
19
19
  remove_useless_characters!
20
20
  skip_rows!
21
- ::FasterCSV.foreach(t.local_file.path, fastercsv_options) do |row|
21
+ CSV.foreach(t.local_file.path, fastercsv_options) do |row|
22
22
  ordered_hash = ::ActiveSupport::OrderedHash.new
23
23
  filled_values = 0
24
24
  case row
25
- when ::FasterCSV::Row
25
+ when CSV::Row
26
26
  row.each do |header, value|
27
27
  next if header.blank?
28
28
  value = '' if value.nil?
29
- ordered_hash[header] = value
29
+ ordered_hash[header] = utf8 value
30
30
  filled_values += 1 if value.present?
31
31
  end
32
32
  when ::Array
33
33
  index = 0
34
34
  row.each do |value|
35
35
  value = '' if value.nil?
36
- ordered_hash[index] = value
36
+ ordered_hash[index] = utf8 value
37
37
  filled_values += 1 if value.present?
38
38
  index += 1
39
39
  end
@@ -4,13 +4,15 @@ class RemoteTable
4
4
  class FixedWidth < Format
5
5
  include Textual
6
6
  def each(&blk)
7
- convert_file_to_utf8!
8
7
  remove_useless_characters!
9
8
  crop_rows!
10
9
  skip_rows!
11
10
  cut_columns!
12
11
  parser.parse[:rows].each do |hash|
13
12
  hash.reject! { |k, v| k.blank? }
13
+ hash.each do |k, v|
14
+ hash[k] = utf8 v
15
+ end
14
16
  yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
15
17
  end
16
18
  ensure
@@ -5,11 +5,10 @@ class RemoteTable
5
5
  class HTML < Format
6
6
  include Textual
7
7
  def each(&blk)
8
- convert_file_to_utf8!
9
8
  remove_useless_characters!
10
9
  html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
11
10
  ::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
12
- values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
11
+ values = row.xpath(t.properties.column_xpath).map { |td| utf8 td.content.gsub(/\s+/, ' ').strip }
13
12
  if html_headers.nil?
14
13
  html_headers = values
15
14
  next
@@ -2,11 +2,7 @@ require 'fileutils'
2
2
  require 'escape'
3
3
  class RemoteTable
4
4
  class Format
5
- module Textual
6
- def convert_file_to_utf8!
7
- ::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.encoding} -t UTF-8"
8
- end
9
-
5
+ module Textual
10
6
  USELESS_CHARACTERS = [
11
7
  '\xef\xbb\xbf', # UTF-8 byte order mark
12
8
  '\xc2\xad' # soft hyphen, often inserted by MS Office (html: &shy;)
@@ -48,11 +48,11 @@ class RemoteTable
48
48
  t.options['skip'].to_i
49
49
  end
50
50
 
51
- # The encoding
51
+ # Likely external encoding
52
52
  #
53
53
  # Default: "UTF-8"
54
54
  def encoding
55
- t.options['encoding'] || 'UTF-8'
55
+ @encoding ||= ::Array.wrap(t.options['encoding'] || [ 'UTF-8', 'US-ASCII', 'ASCII-8BIT' ])
56
56
  end
57
57
 
58
58
  # The delimiter
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "1.1.6"
2
+ VERSION = "1.1.7"
3
3
  end
data/remote_table.gemspec CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |s|
30
30
  s.add_dependency 'google-spreadsheet-ruby' #roo
31
31
  s.add_dependency 'escape', '>=0.0.4'
32
32
  s.add_dependency 'posix-spawn'
33
+ s.add_dependency 'ensure-encoding'
33
34
  unless RUBY_VERSION >= '1.9'
34
35
  s.add_dependency 'fastercsv', '>=1.5.0'
35
36
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 29
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 1
9
- - 6
10
- version: 1.1.6
9
+ - 7
10
+ version: 1.1.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2011-03-31 00:00:00 -05:00
19
+ date: 2011-04-20 00:00:00 -05:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
@@ -183,9 +183,23 @@ dependencies:
183
183
  type: :runtime
184
184
  version_requirements: *id011
185
185
  - !ruby/object:Gem::Dependency
186
- name: fastercsv
186
+ name: ensure-encoding
187
187
  prerelease: false
188
188
  requirement: &id012 !ruby/object:Gem::Requirement
189
+ none: false
190
+ requirements:
191
+ - - ">="
192
+ - !ruby/object:Gem::Version
193
+ hash: 3
194
+ segments:
195
+ - 0
196
+ version: "0"
197
+ type: :runtime
198
+ version_requirements: *id012
199
+ - !ruby/object:Gem::Dependency
200
+ name: fastercsv
201
+ prerelease: false
202
+ requirement: &id013 !ruby/object:Gem::Requirement
189
203
  none: false
190
204
  requirements:
191
205
  - - ">="
@@ -197,11 +211,11 @@ dependencies:
197
211
  - 0
198
212
  version: 1.5.0
199
213
  type: :runtime
200
- version_requirements: *id012
214
+ version_requirements: *id013
201
215
  - !ruby/object:Gem::Dependency
202
216
  name: errata
203
217
  prerelease: false
204
- requirement: &id013 !ruby/object:Gem::Requirement
218
+ requirement: &id014 !ruby/object:Gem::Requirement
205
219
  none: false
206
220
  requirements:
207
221
  - - ">="
@@ -213,11 +227,11 @@ dependencies:
213
227
  - 0
214
228
  version: 0.2.0
215
229
  type: :development
216
- version_requirements: *id013
230
+ version_requirements: *id014
217
231
  - !ruby/object:Gem::Dependency
218
232
  name: test-unit
219
233
  prerelease: false
220
- requirement: &id014 !ruby/object:Gem::Requirement
234
+ requirement: &id015 !ruby/object:Gem::Requirement
221
235
  none: false
222
236
  requirements:
223
237
  - - ">="
@@ -227,11 +241,11 @@ dependencies:
227
241
  - 0
228
242
  version: "0"
229
243
  type: :development
230
- version_requirements: *id014
244
+ version_requirements: *id015
231
245
  - !ruby/object:Gem::Dependency
232
246
  name: shoulda
233
247
  prerelease: false
234
- requirement: &id015 !ruby/object:Gem::Requirement
248
+ requirement: &id016 !ruby/object:Gem::Requirement
235
249
  none: false
236
250
  requirements:
237
251
  - - ">="
@@ -241,11 +255,11 @@ dependencies:
241
255
  - 0
242
256
  version: "0"
243
257
  type: :development
244
- version_requirements: *id015
258
+ version_requirements: *id016
245
259
  - !ruby/object:Gem::Dependency
246
260
  name: ruby-debug
247
261
  prerelease: false
248
- requirement: &id016 !ruby/object:Gem::Requirement
262
+ requirement: &id017 !ruby/object:Gem::Requirement
249
263
  none: false
250
264
  requirements:
251
265
  - - ">="
@@ -255,7 +269,7 @@ dependencies:
255
269
  - 0
256
270
  version: "0"
257
271
  type: :development
258
- version_requirements: *id016
272
+ version_requirements: *id017
259
273
  description: Gives you a standard way to parse various formats and treat them as an array of hashes.
260
274
  email:
261
275
  - seamus@abshere.net
@@ -268,7 +282,6 @@ extra_rdoc_files: []
268
282
  files:
269
283
  - .document
270
284
  - .gitignore
271
- - CHANGELOG
272
285
  - Gemfile
273
286
  - LICENSE
274
287
  - README.rdoc
data/CHANGELOG DELETED
@@ -1,12 +0,0 @@
1
- 1.1.0
2
- * Fixed difference in row hashes between Ruby 1.8 and 1.9
3
- * Properly required fastercsv depending on Ruby version
4
- 1.0.0
5
- * Using bundler and gemspec instead of jeweler
6
- * Refactored to follow more Ruby conventions
7
- * Suggesting new syntax that looks more like an Enumerable... t[5] instead of t.rows[5]
8
- * Switching to string option keys (but old syntax is supported)
9
- [...no changelog for 0.1.6--1.0.0...sorry]
10
- 0.1.6
11
- * For CSVs, force convert headers using String#toutf8. :encoding => 'N'|'U' didn't work.
12
- * Fix handling of long urls when passing off to Tempfile.