remote_table 1.1.6 → 1.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +2 -0
- data/lib/remote_table/format.rb +12 -0
- data/lib/remote_table/format/delimited.rb +6 -6
- data/lib/remote_table/format/fixed_width.rb +3 -1
- data/lib/remote_table/format/html.rb +1 -2
- data/lib/remote_table/format/mixins/textual.rb +1 -5
- data/lib/remote_table/properties.rb +2 -2
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +1 -0
- metadata +28 -15
- data/CHANGELOG +0 -12
data/Rakefile
CHANGED
data/lib/remote_table/format.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
if ::RUBY_VERSION >= '1.9'
|
2
|
+
require 'ensure/encoding'
|
3
|
+
end
|
4
|
+
|
1
5
|
class RemoteTable
|
2
6
|
class Format
|
3
7
|
autoload :Excel, 'remote_table/format/excel'
|
@@ -16,6 +20,14 @@ class RemoteTable
|
|
16
20
|
@t = t
|
17
21
|
end
|
18
22
|
|
23
|
+
def utf8(str)
|
24
|
+
if ::RUBY_VERSION >= '1.9'
|
25
|
+
str.ensure_encoding 'UTF-8', :external_encoding => t.properties.encoding, :invalid_characters => :transcode
|
26
|
+
else
|
27
|
+
str
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
19
31
|
include ::Enumerable
|
20
32
|
def each
|
21
33
|
raise "must be defined by format"
|
@@ -1,9 +1,10 @@
|
|
1
1
|
if RUBY_VERSION >= '1.9'
|
2
2
|
require 'csv'
|
3
|
-
::
|
3
|
+
::RemoteTable::CSV = ::CSV
|
4
4
|
else
|
5
5
|
begin
|
6
6
|
require 'fastercsv'
|
7
|
+
::RemoteTable::CSV = ::FasterCSV
|
7
8
|
rescue ::LoadError
|
8
9
|
$stderr.puts "[remote_table gem] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
|
9
10
|
raise $!
|
@@ -15,25 +16,24 @@ class RemoteTable
|
|
15
16
|
class Delimited < Format
|
16
17
|
include Textual
|
17
18
|
def each(&blk)
|
18
|
-
convert_file_to_utf8!
|
19
19
|
remove_useless_characters!
|
20
20
|
skip_rows!
|
21
|
-
|
21
|
+
CSV.foreach(t.local_file.path, fastercsv_options) do |row|
|
22
22
|
ordered_hash = ::ActiveSupport::OrderedHash.new
|
23
23
|
filled_values = 0
|
24
24
|
case row
|
25
|
-
when ::
|
25
|
+
when CSV::Row
|
26
26
|
row.each do |header, value|
|
27
27
|
next if header.blank?
|
28
28
|
value = '' if value.nil?
|
29
|
-
ordered_hash[header] = value
|
29
|
+
ordered_hash[header] = utf8 value
|
30
30
|
filled_values += 1 if value.present?
|
31
31
|
end
|
32
32
|
when ::Array
|
33
33
|
index = 0
|
34
34
|
row.each do |value|
|
35
35
|
value = '' if value.nil?
|
36
|
-
ordered_hash[index] = value
|
36
|
+
ordered_hash[index] = utf8 value
|
37
37
|
filled_values += 1 if value.present?
|
38
38
|
index += 1
|
39
39
|
end
|
@@ -4,13 +4,15 @@ class RemoteTable
|
|
4
4
|
class FixedWidth < Format
|
5
5
|
include Textual
|
6
6
|
def each(&blk)
|
7
|
-
convert_file_to_utf8!
|
8
7
|
remove_useless_characters!
|
9
8
|
crop_rows!
|
10
9
|
skip_rows!
|
11
10
|
cut_columns!
|
12
11
|
parser.parse[:rows].each do |hash|
|
13
12
|
hash.reject! { |k, v| k.blank? }
|
13
|
+
hash.each do |k, v|
|
14
|
+
hash[k] = utf8 v
|
15
|
+
end
|
14
16
|
yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
|
15
17
|
end
|
16
18
|
ensure
|
@@ -5,11 +5,10 @@ class RemoteTable
|
|
5
5
|
class HTML < Format
|
6
6
|
include Textual
|
7
7
|
def each(&blk)
|
8
|
-
convert_file_to_utf8!
|
9
8
|
remove_useless_characters!
|
10
9
|
html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
|
11
10
|
::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
|
12
|
-
values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
11
|
+
values = row.xpath(t.properties.column_xpath).map { |td| utf8 td.content.gsub(/\s+/, ' ').strip }
|
13
12
|
if html_headers.nil?
|
14
13
|
html_headers = values
|
15
14
|
next
|
@@ -2,11 +2,7 @@ require 'fileutils'
|
|
2
2
|
require 'escape'
|
3
3
|
class RemoteTable
|
4
4
|
class Format
|
5
|
-
module Textual
|
6
|
-
def convert_file_to_utf8!
|
7
|
-
::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.encoding} -t UTF-8"
|
8
|
-
end
|
9
|
-
|
5
|
+
module Textual
|
10
6
|
USELESS_CHARACTERS = [
|
11
7
|
'\xef\xbb\xbf', # UTF-8 byte order mark
|
12
8
|
'\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
|
@@ -48,11 +48,11 @@ class RemoteTable
|
|
48
48
|
t.options['skip'].to_i
|
49
49
|
end
|
50
50
|
|
51
|
-
#
|
51
|
+
# Likely external encoding
|
52
52
|
#
|
53
53
|
# Default: "UTF-8"
|
54
54
|
def encoding
|
55
|
-
t.options['encoding'] || 'UTF-8'
|
55
|
+
@encoding ||= ::Array.wrap(t.options['encoding'] || [ 'UTF-8', 'US-ASCII', 'ASCII-8BIT' ])
|
56
56
|
end
|
57
57
|
|
58
58
|
# The delimiter
|
data/lib/remote_table/version.rb
CHANGED
data/remote_table.gemspec
CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |s|
|
|
30
30
|
s.add_dependency 'google-spreadsheet-ruby' #roo
|
31
31
|
s.add_dependency 'escape', '>=0.0.4'
|
32
32
|
s.add_dependency 'posix-spawn'
|
33
|
+
s.add_dependency 'ensure-encoding'
|
33
34
|
unless RUBY_VERSION >= '1.9'
|
34
35
|
s.add_dependency 'fastercsv', '>=1.5.0'
|
35
36
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 1.1.
|
9
|
+
- 7
|
10
|
+
version: 1.1.7
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Seamus Abshere
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-04-20 00:00:00 -05:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -183,9 +183,23 @@ dependencies:
|
|
183
183
|
type: :runtime
|
184
184
|
version_requirements: *id011
|
185
185
|
- !ruby/object:Gem::Dependency
|
186
|
-
name:
|
186
|
+
name: ensure-encoding
|
187
187
|
prerelease: false
|
188
188
|
requirement: &id012 !ruby/object:Gem::Requirement
|
189
|
+
none: false
|
190
|
+
requirements:
|
191
|
+
- - ">="
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
hash: 3
|
194
|
+
segments:
|
195
|
+
- 0
|
196
|
+
version: "0"
|
197
|
+
type: :runtime
|
198
|
+
version_requirements: *id012
|
199
|
+
- !ruby/object:Gem::Dependency
|
200
|
+
name: fastercsv
|
201
|
+
prerelease: false
|
202
|
+
requirement: &id013 !ruby/object:Gem::Requirement
|
189
203
|
none: false
|
190
204
|
requirements:
|
191
205
|
- - ">="
|
@@ -197,11 +211,11 @@ dependencies:
|
|
197
211
|
- 0
|
198
212
|
version: 1.5.0
|
199
213
|
type: :runtime
|
200
|
-
version_requirements: *
|
214
|
+
version_requirements: *id013
|
201
215
|
- !ruby/object:Gem::Dependency
|
202
216
|
name: errata
|
203
217
|
prerelease: false
|
204
|
-
requirement: &
|
218
|
+
requirement: &id014 !ruby/object:Gem::Requirement
|
205
219
|
none: false
|
206
220
|
requirements:
|
207
221
|
- - ">="
|
@@ -213,11 +227,11 @@ dependencies:
|
|
213
227
|
- 0
|
214
228
|
version: 0.2.0
|
215
229
|
type: :development
|
216
|
-
version_requirements: *
|
230
|
+
version_requirements: *id014
|
217
231
|
- !ruby/object:Gem::Dependency
|
218
232
|
name: test-unit
|
219
233
|
prerelease: false
|
220
|
-
requirement: &
|
234
|
+
requirement: &id015 !ruby/object:Gem::Requirement
|
221
235
|
none: false
|
222
236
|
requirements:
|
223
237
|
- - ">="
|
@@ -227,11 +241,11 @@ dependencies:
|
|
227
241
|
- 0
|
228
242
|
version: "0"
|
229
243
|
type: :development
|
230
|
-
version_requirements: *
|
244
|
+
version_requirements: *id015
|
231
245
|
- !ruby/object:Gem::Dependency
|
232
246
|
name: shoulda
|
233
247
|
prerelease: false
|
234
|
-
requirement: &
|
248
|
+
requirement: &id016 !ruby/object:Gem::Requirement
|
235
249
|
none: false
|
236
250
|
requirements:
|
237
251
|
- - ">="
|
@@ -241,11 +255,11 @@ dependencies:
|
|
241
255
|
- 0
|
242
256
|
version: "0"
|
243
257
|
type: :development
|
244
|
-
version_requirements: *
|
258
|
+
version_requirements: *id016
|
245
259
|
- !ruby/object:Gem::Dependency
|
246
260
|
name: ruby-debug
|
247
261
|
prerelease: false
|
248
|
-
requirement: &
|
262
|
+
requirement: &id017 !ruby/object:Gem::Requirement
|
249
263
|
none: false
|
250
264
|
requirements:
|
251
265
|
- - ">="
|
@@ -255,7 +269,7 @@ dependencies:
|
|
255
269
|
- 0
|
256
270
|
version: "0"
|
257
271
|
type: :development
|
258
|
-
version_requirements: *
|
272
|
+
version_requirements: *id017
|
259
273
|
description: Gives you a standard way to parse various formats and treat them as an array of hashes.
|
260
274
|
email:
|
261
275
|
- seamus@abshere.net
|
@@ -268,7 +282,6 @@ extra_rdoc_files: []
|
|
268
282
|
files:
|
269
283
|
- .document
|
270
284
|
- .gitignore
|
271
|
-
- CHANGELOG
|
272
285
|
- Gemfile
|
273
286
|
- LICENSE
|
274
287
|
- README.rdoc
|
data/CHANGELOG
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
1.1.0
|
2
|
-
* Fixed difference in row hashes between Ruby 1.8 and 1.9
|
3
|
-
* Properly required fastercsv depending on Ruby version
|
4
|
-
1.0.0
|
5
|
-
* Using bundler and gemspec instead of jeweler
|
6
|
-
* Refactored to follow more Ruby conventions
|
7
|
-
* Suggesting new syntax that looks more like an Enumerable... t[5] instead of t.rows[5]
|
8
|
-
* Switching to string option keys (but old syntax is supported)
|
9
|
-
[...no changelog for 0.1.6--1.0.0...sorry]
|
10
|
-
0.1.6
|
11
|
-
* For CSVs, force convert headers using String#toutf8. :encoding => 'N'|'U' didn't work.
|
12
|
-
* Fix handling of long urls when passing off to Tempfile.
|