remote_table 1.1.6 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -0
- data/lib/remote_table/format.rb +12 -0
- data/lib/remote_table/format/delimited.rb +6 -6
- data/lib/remote_table/format/fixed_width.rb +3 -1
- data/lib/remote_table/format/html.rb +1 -2
- data/lib/remote_table/format/mixins/textual.rb +1 -5
- data/lib/remote_table/properties.rb +2 -2
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +1 -0
- metadata +28 -15
- data/CHANGELOG +0 -12
data/Rakefile
CHANGED
data/lib/remote_table/format.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
if ::RUBY_VERSION >= '1.9'
|
2
|
+
require 'ensure/encoding'
|
3
|
+
end
|
4
|
+
|
1
5
|
class RemoteTable
|
2
6
|
class Format
|
3
7
|
autoload :Excel, 'remote_table/format/excel'
|
@@ -16,6 +20,14 @@ class RemoteTable
|
|
16
20
|
@t = t
|
17
21
|
end
|
18
22
|
|
23
|
+
def utf8(str)
|
24
|
+
if ::RUBY_VERSION >= '1.9'
|
25
|
+
str.ensure_encoding 'UTF-8', :external_encoding => t.properties.encoding, :invalid_characters => :transcode
|
26
|
+
else
|
27
|
+
str
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
19
31
|
include ::Enumerable
|
20
32
|
def each
|
21
33
|
raise "must be defined by format"
|
@@ -1,9 +1,10 @@
|
|
1
1
|
if RUBY_VERSION >= '1.9'
|
2
2
|
require 'csv'
|
3
|
-
::
|
3
|
+
::RemoteTable::CSV = ::CSV
|
4
4
|
else
|
5
5
|
begin
|
6
6
|
require 'fastercsv'
|
7
|
+
::RemoteTable::CSV = ::FasterCSV
|
7
8
|
rescue ::LoadError
|
8
9
|
$stderr.puts "[remote_table gem] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
|
9
10
|
raise $!
|
@@ -15,25 +16,24 @@ class RemoteTable
|
|
15
16
|
class Delimited < Format
|
16
17
|
include Textual
|
17
18
|
def each(&blk)
|
18
|
-
convert_file_to_utf8!
|
19
19
|
remove_useless_characters!
|
20
20
|
skip_rows!
|
21
|
-
|
21
|
+
CSV.foreach(t.local_file.path, fastercsv_options) do |row|
|
22
22
|
ordered_hash = ::ActiveSupport::OrderedHash.new
|
23
23
|
filled_values = 0
|
24
24
|
case row
|
25
|
-
when ::
|
25
|
+
when CSV::Row
|
26
26
|
row.each do |header, value|
|
27
27
|
next if header.blank?
|
28
28
|
value = '' if value.nil?
|
29
|
-
ordered_hash[header] = value
|
29
|
+
ordered_hash[header] = utf8 value
|
30
30
|
filled_values += 1 if value.present?
|
31
31
|
end
|
32
32
|
when ::Array
|
33
33
|
index = 0
|
34
34
|
row.each do |value|
|
35
35
|
value = '' if value.nil?
|
36
|
-
ordered_hash[index] = value
|
36
|
+
ordered_hash[index] = utf8 value
|
37
37
|
filled_values += 1 if value.present?
|
38
38
|
index += 1
|
39
39
|
end
|
@@ -4,13 +4,15 @@ class RemoteTable
|
|
4
4
|
class FixedWidth < Format
|
5
5
|
include Textual
|
6
6
|
def each(&blk)
|
7
|
-
convert_file_to_utf8!
|
8
7
|
remove_useless_characters!
|
9
8
|
crop_rows!
|
10
9
|
skip_rows!
|
11
10
|
cut_columns!
|
12
11
|
parser.parse[:rows].each do |hash|
|
13
12
|
hash.reject! { |k, v| k.blank? }
|
13
|
+
hash.each do |k, v|
|
14
|
+
hash[k] = utf8 v
|
15
|
+
end
|
14
16
|
yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
|
15
17
|
end
|
16
18
|
ensure
|
@@ -5,11 +5,10 @@ class RemoteTable
|
|
5
5
|
class HTML < Format
|
6
6
|
include Textual
|
7
7
|
def each(&blk)
|
8
|
-
convert_file_to_utf8!
|
9
8
|
remove_useless_characters!
|
10
9
|
html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
|
11
10
|
::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
|
12
|
-
values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
11
|
+
values = row.xpath(t.properties.column_xpath).map { |td| utf8 td.content.gsub(/\s+/, ' ').strip }
|
13
12
|
if html_headers.nil?
|
14
13
|
html_headers = values
|
15
14
|
next
|
@@ -2,11 +2,7 @@ require 'fileutils'
|
|
2
2
|
require 'escape'
|
3
3
|
class RemoteTable
|
4
4
|
class Format
|
5
|
-
module Textual
|
6
|
-
def convert_file_to_utf8!
|
7
|
-
::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.encoding} -t UTF-8"
|
8
|
-
end
|
9
|
-
|
5
|
+
module Textual
|
10
6
|
USELESS_CHARACTERS = [
|
11
7
|
'\xef\xbb\xbf', # UTF-8 byte order mark
|
12
8
|
'\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
|
@@ -48,11 +48,11 @@ class RemoteTable
|
|
48
48
|
t.options['skip'].to_i
|
49
49
|
end
|
50
50
|
|
51
|
-
#
|
51
|
+
# Likely external encoding
|
52
52
|
#
|
53
53
|
# Default: "UTF-8"
|
54
54
|
def encoding
|
55
|
-
t.options['encoding'] || 'UTF-8'
|
55
|
+
@encoding ||= ::Array.wrap(t.options['encoding'] || [ 'UTF-8', 'US-ASCII', 'ASCII-8BIT' ])
|
56
56
|
end
|
57
57
|
|
58
58
|
# The delimiter
|
data/lib/remote_table/version.rb
CHANGED
data/remote_table.gemspec
CHANGED
@@ -30,6 +30,7 @@ Gem::Specification.new do |s|
|
|
30
30
|
s.add_dependency 'google-spreadsheet-ruby' #roo
|
31
31
|
s.add_dependency 'escape', '>=0.0.4'
|
32
32
|
s.add_dependency 'posix-spawn'
|
33
|
+
s.add_dependency 'ensure-encoding'
|
33
34
|
unless RUBY_VERSION >= '1.9'
|
34
35
|
s.add_dependency 'fastercsv', '>=1.5.0'
|
35
36
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 1.1.
|
9
|
+
- 7
|
10
|
+
version: 1.1.7
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Seamus Abshere
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-04-20 00:00:00 -05:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
@@ -183,9 +183,23 @@ dependencies:
|
|
183
183
|
type: :runtime
|
184
184
|
version_requirements: *id011
|
185
185
|
- !ruby/object:Gem::Dependency
|
186
|
-
name:
|
186
|
+
name: ensure-encoding
|
187
187
|
prerelease: false
|
188
188
|
requirement: &id012 !ruby/object:Gem::Requirement
|
189
|
+
none: false
|
190
|
+
requirements:
|
191
|
+
- - ">="
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
hash: 3
|
194
|
+
segments:
|
195
|
+
- 0
|
196
|
+
version: "0"
|
197
|
+
type: :runtime
|
198
|
+
version_requirements: *id012
|
199
|
+
- !ruby/object:Gem::Dependency
|
200
|
+
name: fastercsv
|
201
|
+
prerelease: false
|
202
|
+
requirement: &id013 !ruby/object:Gem::Requirement
|
189
203
|
none: false
|
190
204
|
requirements:
|
191
205
|
- - ">="
|
@@ -197,11 +211,11 @@ dependencies:
|
|
197
211
|
- 0
|
198
212
|
version: 1.5.0
|
199
213
|
type: :runtime
|
200
|
-
version_requirements: *
|
214
|
+
version_requirements: *id013
|
201
215
|
- !ruby/object:Gem::Dependency
|
202
216
|
name: errata
|
203
217
|
prerelease: false
|
204
|
-
requirement: &
|
218
|
+
requirement: &id014 !ruby/object:Gem::Requirement
|
205
219
|
none: false
|
206
220
|
requirements:
|
207
221
|
- - ">="
|
@@ -213,11 +227,11 @@ dependencies:
|
|
213
227
|
- 0
|
214
228
|
version: 0.2.0
|
215
229
|
type: :development
|
216
|
-
version_requirements: *
|
230
|
+
version_requirements: *id014
|
217
231
|
- !ruby/object:Gem::Dependency
|
218
232
|
name: test-unit
|
219
233
|
prerelease: false
|
220
|
-
requirement: &
|
234
|
+
requirement: &id015 !ruby/object:Gem::Requirement
|
221
235
|
none: false
|
222
236
|
requirements:
|
223
237
|
- - ">="
|
@@ -227,11 +241,11 @@ dependencies:
|
|
227
241
|
- 0
|
228
242
|
version: "0"
|
229
243
|
type: :development
|
230
|
-
version_requirements: *
|
244
|
+
version_requirements: *id015
|
231
245
|
- !ruby/object:Gem::Dependency
|
232
246
|
name: shoulda
|
233
247
|
prerelease: false
|
234
|
-
requirement: &
|
248
|
+
requirement: &id016 !ruby/object:Gem::Requirement
|
235
249
|
none: false
|
236
250
|
requirements:
|
237
251
|
- - ">="
|
@@ -241,11 +255,11 @@ dependencies:
|
|
241
255
|
- 0
|
242
256
|
version: "0"
|
243
257
|
type: :development
|
244
|
-
version_requirements: *
|
258
|
+
version_requirements: *id016
|
245
259
|
- !ruby/object:Gem::Dependency
|
246
260
|
name: ruby-debug
|
247
261
|
prerelease: false
|
248
|
-
requirement: &
|
262
|
+
requirement: &id017 !ruby/object:Gem::Requirement
|
249
263
|
none: false
|
250
264
|
requirements:
|
251
265
|
- - ">="
|
@@ -255,7 +269,7 @@ dependencies:
|
|
255
269
|
- 0
|
256
270
|
version: "0"
|
257
271
|
type: :development
|
258
|
-
version_requirements: *
|
272
|
+
version_requirements: *id017
|
259
273
|
description: Gives you a standard way to parse various formats and treat them as an array of hashes.
|
260
274
|
email:
|
261
275
|
- seamus@abshere.net
|
@@ -268,7 +282,6 @@ extra_rdoc_files: []
|
|
268
282
|
files:
|
269
283
|
- .document
|
270
284
|
- .gitignore
|
271
|
-
- CHANGELOG
|
272
285
|
- Gemfile
|
273
286
|
- LICENSE
|
274
287
|
- README.rdoc
|
data/CHANGELOG
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
1.1.0
|
2
|
-
* Fixed difference in row hashes between Ruby 1.8 and 1.9
|
3
|
-
* Properly required fastercsv depending on Ruby version
|
4
|
-
1.0.0
|
5
|
-
* Using bundler and gemspec instead of jeweler
|
6
|
-
* Refactored to follow more Ruby conventions
|
7
|
-
* Suggesting new syntax that looks more like an Enumerable... t[5] instead of t.rows[5]
|
8
|
-
* Switching to string option keys (but old syntax is supported)
|
9
|
-
[...no changelog for 0.1.6--1.0.0...sorry]
|
10
|
-
0.1.6
|
11
|
-
* For CSVs, force convert headers using String#toutf8. :encoding => 'N'|'U' didn't work.
|
12
|
-
* Fix handling of long urls when passing off to Tempfile.
|