remote_table 1.1.6 → 1.1.7

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -21,3 +21,5 @@ begin
21
21
  rescue LoadError
22
22
  puts "Rdoc is not available"
23
23
  end
24
+
25
+ task :default => :test
@@ -1,3 +1,7 @@
1
+ if ::RUBY_VERSION >= '1.9'
2
+ require 'ensure/encoding'
3
+ end
4
+
1
5
  class RemoteTable
2
6
  class Format
3
7
  autoload :Excel, 'remote_table/format/excel'
@@ -16,6 +20,14 @@ class RemoteTable
16
20
  @t = t
17
21
  end
18
22
 
23
+ def utf8(str)
24
+ if ::RUBY_VERSION >= '1.9'
25
+ str.ensure_encoding 'UTF-8', :external_encoding => t.properties.encoding, :invalid_characters => :transcode
26
+ else
27
+ str
28
+ end
29
+ end
30
+
19
31
  include ::Enumerable
20
32
  def each
21
33
  raise "must be defined by format"
@@ -1,9 +1,10 @@
1
1
  if RUBY_VERSION >= '1.9'
2
2
  require 'csv'
3
- ::FasterCSV = ::CSV
3
+ ::RemoteTable::CSV = ::CSV
4
4
  else
5
5
  begin
6
6
  require 'fastercsv'
7
+ ::RemoteTable::CSV = ::FasterCSV
7
8
  rescue ::LoadError
8
9
  $stderr.puts "[remote_table gem] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
9
10
  raise $!
@@ -15,25 +16,24 @@ class RemoteTable
15
16
  class Delimited < Format
16
17
  include Textual
17
18
  def each(&blk)
18
- convert_file_to_utf8!
19
19
  remove_useless_characters!
20
20
  skip_rows!
21
- ::FasterCSV.foreach(t.local_file.path, fastercsv_options) do |row|
21
+ CSV.foreach(t.local_file.path, fastercsv_options) do |row|
22
22
  ordered_hash = ::ActiveSupport::OrderedHash.new
23
23
  filled_values = 0
24
24
  case row
25
- when ::FasterCSV::Row
25
+ when CSV::Row
26
26
  row.each do |header, value|
27
27
  next if header.blank?
28
28
  value = '' if value.nil?
29
- ordered_hash[header] = value
29
+ ordered_hash[header] = utf8 value
30
30
  filled_values += 1 if value.present?
31
31
  end
32
32
  when ::Array
33
33
  index = 0
34
34
  row.each do |value|
35
35
  value = '' if value.nil?
36
- ordered_hash[index] = value
36
+ ordered_hash[index] = utf8 value
37
37
  filled_values += 1 if value.present?
38
38
  index += 1
39
39
  end
@@ -4,13 +4,15 @@ class RemoteTable
4
4
  class FixedWidth < Format
5
5
  include Textual
6
6
  def each(&blk)
7
- convert_file_to_utf8!
8
7
  remove_useless_characters!
9
8
  crop_rows!
10
9
  skip_rows!
11
10
  cut_columns!
12
11
  parser.parse[:rows].each do |hash|
13
12
  hash.reject! { |k, v| k.blank? }
13
+ hash.each do |k, v|
14
+ hash[k] = utf8 v
15
+ end
14
16
  yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
15
17
  end
16
18
  ensure
@@ -5,11 +5,10 @@ class RemoteTable
5
5
  class HTML < Format
6
6
  include Textual
7
7
  def each(&blk)
8
- convert_file_to_utf8!
9
8
  remove_useless_characters!
10
9
  html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
11
10
  ::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
12
- values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
11
+ values = row.xpath(t.properties.column_xpath).map { |td| utf8 td.content.gsub(/\s+/, ' ').strip }
13
12
  if html_headers.nil?
14
13
  html_headers = values
15
14
  next
@@ -2,11 +2,7 @@ require 'fileutils'
2
2
  require 'escape'
3
3
  class RemoteTable
4
4
  class Format
5
- module Textual
6
- def convert_file_to_utf8!
7
- ::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.encoding} -t UTF-8"
8
- end
9
-
5
+ module Textual
10
6
  USELESS_CHARACTERS = [
11
7
  '\xef\xbb\xbf', # UTF-8 byte order mark
12
8
  '\xc2\xad' # soft hyphen, often inserted by MS Office (html: &shy;)
@@ -48,11 +48,11 @@ class RemoteTable
48
48
  t.options['skip'].to_i
49
49
  end
50
50
 
51
- # The encoding
51
+ # Likely external encoding
52
52
  #
53
53
  # Default: "UTF-8"
54
54
  def encoding
55
- t.options['encoding'] || 'UTF-8'
55
+ @encoding ||= ::Array.wrap(t.options['encoding'] || [ 'UTF-8', 'US-ASCII', 'ASCII-8BIT' ])
56
56
  end
57
57
 
58
58
  # The delimiter
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "1.1.6"
2
+ VERSION = "1.1.7"
3
3
  end
data/remote_table.gemspec CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |s|
30
30
  s.add_dependency 'google-spreadsheet-ruby' #roo
31
31
  s.add_dependency 'escape', '>=0.0.4'
32
32
  s.add_dependency 'posix-spawn'
33
+ s.add_dependency 'ensure-encoding'
33
34
  unless RUBY_VERSION >= '1.9'
34
35
  s.add_dependency 'fastercsv', '>=1.5.0'
35
36
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 29
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 1
9
- - 6
10
- version: 1.1.6
9
+ - 7
10
+ version: 1.1.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2011-03-31 00:00:00 -05:00
19
+ date: 2011-04-20 00:00:00 -05:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
@@ -183,9 +183,23 @@ dependencies:
183
183
  type: :runtime
184
184
  version_requirements: *id011
185
185
  - !ruby/object:Gem::Dependency
186
- name: fastercsv
186
+ name: ensure-encoding
187
187
  prerelease: false
188
188
  requirement: &id012 !ruby/object:Gem::Requirement
189
+ none: false
190
+ requirements:
191
+ - - ">="
192
+ - !ruby/object:Gem::Version
193
+ hash: 3
194
+ segments:
195
+ - 0
196
+ version: "0"
197
+ type: :runtime
198
+ version_requirements: *id012
199
+ - !ruby/object:Gem::Dependency
200
+ name: fastercsv
201
+ prerelease: false
202
+ requirement: &id013 !ruby/object:Gem::Requirement
189
203
  none: false
190
204
  requirements:
191
205
  - - ">="
@@ -197,11 +211,11 @@ dependencies:
197
211
  - 0
198
212
  version: 1.5.0
199
213
  type: :runtime
200
- version_requirements: *id012
214
+ version_requirements: *id013
201
215
  - !ruby/object:Gem::Dependency
202
216
  name: errata
203
217
  prerelease: false
204
- requirement: &id013 !ruby/object:Gem::Requirement
218
+ requirement: &id014 !ruby/object:Gem::Requirement
205
219
  none: false
206
220
  requirements:
207
221
  - - ">="
@@ -213,11 +227,11 @@ dependencies:
213
227
  - 0
214
228
  version: 0.2.0
215
229
  type: :development
216
- version_requirements: *id013
230
+ version_requirements: *id014
217
231
  - !ruby/object:Gem::Dependency
218
232
  name: test-unit
219
233
  prerelease: false
220
- requirement: &id014 !ruby/object:Gem::Requirement
234
+ requirement: &id015 !ruby/object:Gem::Requirement
221
235
  none: false
222
236
  requirements:
223
237
  - - ">="
@@ -227,11 +241,11 @@ dependencies:
227
241
  - 0
228
242
  version: "0"
229
243
  type: :development
230
- version_requirements: *id014
244
+ version_requirements: *id015
231
245
  - !ruby/object:Gem::Dependency
232
246
  name: shoulda
233
247
  prerelease: false
234
- requirement: &id015 !ruby/object:Gem::Requirement
248
+ requirement: &id016 !ruby/object:Gem::Requirement
235
249
  none: false
236
250
  requirements:
237
251
  - - ">="
@@ -241,11 +255,11 @@ dependencies:
241
255
  - 0
242
256
  version: "0"
243
257
  type: :development
244
- version_requirements: *id015
258
+ version_requirements: *id016
245
259
  - !ruby/object:Gem::Dependency
246
260
  name: ruby-debug
247
261
  prerelease: false
248
- requirement: &id016 !ruby/object:Gem::Requirement
262
+ requirement: &id017 !ruby/object:Gem::Requirement
249
263
  none: false
250
264
  requirements:
251
265
  - - ">="
@@ -255,7 +269,7 @@ dependencies:
255
269
  - 0
256
270
  version: "0"
257
271
  type: :development
258
- version_requirements: *id016
272
+ version_requirements: *id017
259
273
  description: Gives you a standard way to parse various formats and treat them as an array of hashes.
260
274
  email:
261
275
  - seamus@abshere.net
@@ -268,7 +282,6 @@ extra_rdoc_files: []
268
282
  files:
269
283
  - .document
270
284
  - .gitignore
271
- - CHANGELOG
272
285
  - Gemfile
273
286
  - LICENSE
274
287
  - README.rdoc
data/CHANGELOG DELETED
@@ -1,12 +0,0 @@
1
- 1.1.0
2
- * Fixed difference in row hashes between Ruby 1.8 and 1.9
3
- * Properly required fastercsv depending on Ruby version
4
- 1.0.0
5
- * Using bundler and gemspec instead of jeweler
6
- * Refactored to follow more Ruby conventions
7
- * Suggesting new syntax that looks more like an Enumerable... t[5] instead of t.rows[5]
8
- * Switching to string option keys (but old syntax is supported)
9
- [...no changelog for 0.1.6--1.0.0...sorry]
10
- 0.1.6
11
- * For CSVs, force convert headers using String#toutf8. :encoding => 'N'|'U' didn't work.
12
- * Fix handling of long urls when passing off to Tempfile.