remote_table 1.1.10 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/remote_table.rb +64 -20
- data/lib/remote_table/format.rb +11 -2
- data/lib/remote_table/format/delimited.rb +12 -20
- data/lib/remote_table/format/excel.rb +1 -1
- data/lib/remote_table/format/excelx.rb +1 -1
- data/lib/remote_table/format/fixed_width.rb +8 -5
- data/lib/remote_table/format/html.rb +4 -31
- data/lib/remote_table/format/mixins/processed_by_nokogiri.rb +67 -0
- data/lib/remote_table/format/mixins/processed_by_roo.rb +52 -0
- data/lib/remote_table/format/open_office.rb +1 -1
- data/lib/remote_table/format/xml.rb +14 -0
- data/lib/remote_table/hasher.rb +10 -4
- data/lib/remote_table/properties.rb +37 -2
- data/lib/remote_table/transformer.rb +1 -1
- data/lib/remote_table/version.rb +1 -1
- data/test/test_old_syntax.rb +2 -8
- data/test/test_remote_table.rb +29 -8
- metadata +8 -6
- data/lib/remote_table/format/mixins/rooable.rb +0 -49
data/lib/remote_table.rb
CHANGED
@@ -4,11 +4,19 @@ require 'active_support/version'
|
|
4
4
|
active_support/core_ext/hash
|
5
5
|
active_support/core_ext/string
|
6
6
|
active_support/core_ext/module
|
7
|
-
active_support/core_ext/array
|
7
|
+
active_support/core_ext/array
|
8
8
|
}.each do |active_support_3_requirement|
|
9
9
|
require active_support_3_requirement
|
10
10
|
end if ::ActiveSupport::VERSION::MAJOR == 3
|
11
11
|
|
12
|
+
class Hash
|
13
|
+
attr_accessor :row_hash
|
14
|
+
end
|
15
|
+
|
16
|
+
class Array
|
17
|
+
attr_accessor :row_hash
|
18
|
+
end
|
19
|
+
|
12
20
|
class RemoteTable
|
13
21
|
autoload :Format, 'remote_table/format'
|
14
22
|
autoload :Properties, 'remote_table/properties'
|
@@ -53,40 +61,52 @@ class RemoteTable
|
|
53
61
|
@options.freeze
|
54
62
|
end
|
55
63
|
|
64
|
+
# not thread safe
|
56
65
|
def each(&blk)
|
57
|
-
|
66
|
+
if fully_cached?
|
67
|
+
cache.each(&blk)
|
68
|
+
else
|
69
|
+
mark_download!
|
70
|
+
retval = format.each do |row|
|
71
|
+
row.row_hash = ::RemoteTable.hasher.hash row
|
72
|
+
transformer.transform(row).each do |virtual_row|
|
73
|
+
if properties.errata
|
74
|
+
next if properties.errata.rejects? virtual_row
|
75
|
+
properties.errata.correct! virtual_row
|
76
|
+
end
|
77
|
+
next if properties.select and !properties.select.call(virtual_row)
|
78
|
+
next if properties.reject and properties.reject.call(virtual_row)
|
79
|
+
cache.push virtual_row unless properties.streaming
|
80
|
+
yield virtual_row
|
81
|
+
end
|
82
|
+
end
|
83
|
+
fully_cached! unless properties.streaming
|
84
|
+
retval
|
85
|
+
end
|
58
86
|
end
|
59
87
|
alias :each_row :each
|
60
88
|
|
61
89
|
def to_a
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
# allow the transformer to return multiple "virtual rows" for every real row
|
67
|
-
::Array.wrap(transformer.transform(row)).each do |virtual_row|
|
68
|
-
if properties.errata
|
69
|
-
next if properties.errata.rejects? virtual_row
|
70
|
-
properties.errata.correct! virtual_row
|
71
|
-
end
|
72
|
-
next if properties.select and !properties.select.call(virtual_row)
|
73
|
-
next if properties.reject and properties.reject.call(virtual_row)
|
74
|
-
@to_a.push virtual_row
|
75
|
-
end
|
90
|
+
if fully_cached?
|
91
|
+
cache.dup
|
92
|
+
else
|
93
|
+
map { |row| row }
|
76
94
|
end
|
77
|
-
@to_a
|
78
95
|
end
|
79
96
|
alias :rows :to_a
|
80
97
|
|
81
98
|
# Get a row by row number
|
82
99
|
def [](row_number)
|
83
|
-
|
100
|
+
if fully_cached?
|
101
|
+
cache[row_number]
|
102
|
+
else
|
103
|
+
to_a[row_number]
|
104
|
+
end
|
84
105
|
end
|
85
106
|
|
86
107
|
# clear the row cache to save memory
|
87
108
|
def free
|
88
|
-
|
89
|
-
@to_a = nil
|
109
|
+
cache.clear
|
90
110
|
::GC.start
|
91
111
|
nil
|
92
112
|
end
|
@@ -120,4 +140,28 @@ class RemoteTable
|
|
120
140
|
def transformer
|
121
141
|
@transformer ||= Transformer.new self
|
122
142
|
end
|
143
|
+
|
144
|
+
attr_reader :download_count
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
def mark_download!
|
149
|
+
@download_count ||= 0
|
150
|
+
@download_count += 1
|
151
|
+
if properties.warn_on_multiple_downloads and download_count > 1
|
152
|
+
$stderr.puts "[remote_table] Warning: #{url} has been downloaded #{download_count} times."
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def fully_cached!
|
157
|
+
@fully_cached = true
|
158
|
+
end
|
159
|
+
|
160
|
+
def fully_cached?
|
161
|
+
!!@fully_cached
|
162
|
+
end
|
163
|
+
|
164
|
+
def cache
|
165
|
+
@cache ||= []
|
166
|
+
end
|
123
167
|
end
|
data/lib/remote_table/format.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
if ::RUBY_VERSION >= '1.9'
|
2
2
|
require 'ensure/encoding'
|
3
3
|
else
|
4
|
+
$KCODE = 'U'
|
4
5
|
require 'iconv'
|
5
6
|
end
|
6
7
|
|
@@ -12,9 +13,11 @@ class RemoteTable
|
|
12
13
|
autoload :OpenOffice, 'remote_table/format/open_office'
|
13
14
|
autoload :FixedWidth, 'remote_table/format/fixed_width'
|
14
15
|
autoload :HTML, 'remote_table/format/html'
|
16
|
+
autoload :XML, 'remote_table/format/xml'
|
15
17
|
|
16
18
|
autoload :Textual, 'remote_table/format/mixins/textual'
|
17
|
-
autoload :
|
19
|
+
autoload :ProcessedByRoo, 'remote_table/format/mixins/processed_by_roo'
|
20
|
+
autoload :ProcessedByNokogiri, 'remote_table/format/mixins/processed_by_nokogiri'
|
18
21
|
|
19
22
|
attr_reader :t
|
20
23
|
|
@@ -26,7 +29,13 @@ class RemoteTable
|
|
26
29
|
if ::RUBY_VERSION >= '1.9'
|
27
30
|
str.ensure_encoding 'UTF-8', :external_encoding => t.properties.encoding, :invalid_characters => :transcode
|
28
31
|
else
|
29
|
-
|
32
|
+
return str if t.properties.encoding[0] =~ /utf.?8/i
|
33
|
+
begin
|
34
|
+
::Iconv.conv('UTF-8//TRANSLIT', t.properties.encoding[0], str + ' ')[0..-2]
|
35
|
+
rescue ::Iconv::IllegalSequence
|
36
|
+
$stderr.puts "[remote_table] Unable to transliterate #{str} into UTF-8 given #{t.properties.encoding[0]}"
|
37
|
+
str
|
38
|
+
end
|
30
39
|
end
|
31
40
|
end
|
32
41
|
|
@@ -6,7 +6,7 @@ else
|
|
6
6
|
require 'fastercsv'
|
7
7
|
::RemoteTable::CSV = ::FasterCSV
|
8
8
|
rescue ::LoadError
|
9
|
-
$stderr.puts "[remote_table
|
9
|
+
$stderr.puts "[remote_table] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
|
10
10
|
raise $!
|
11
11
|
end
|
12
12
|
end
|
@@ -19,26 +19,18 @@ class RemoteTable
|
|
19
19
|
remove_useless_characters!
|
20
20
|
skip_rows!
|
21
21
|
CSV.foreach(t.local_file.path, fastercsv_options) do |row|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
ordered_hash[header] = utf8 value
|
30
|
-
filled_values += 1 if value.present?
|
31
|
-
end
|
32
|
-
when ::Array
|
33
|
-
index = 0
|
34
|
-
row.each do |value|
|
35
|
-
value = '' if value.nil?
|
36
|
-
ordered_hash[index] = utf8 value
|
37
|
-
filled_values += 1 if value.present?
|
38
|
-
index += 1
|
22
|
+
if row.is_a?(CSV::Row)
|
23
|
+
output = row.inject(::ActiveSupport::OrderedHash.new) do |memo, (key, value)|
|
24
|
+
if key.present?
|
25
|
+
value = '' if value.nil?
|
26
|
+
memo[key] = utf8 value
|
27
|
+
end
|
28
|
+
memo
|
39
29
|
end
|
30
|
+
yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
|
31
|
+
else
|
32
|
+
yield row if t.properties.keep_blank_rows or row.any? { |v| v.present? }
|
40
33
|
end
|
41
|
-
yield ordered_hash if t.properties.keep_blank_rows or filled_values > 0
|
42
34
|
end
|
43
35
|
ensure
|
44
36
|
t.local_file.delete
|
@@ -62,7 +54,7 @@ class RemoteTable
|
|
62
54
|
def fastercsv_options
|
63
55
|
hsh = t.options.slice *FASTERCSV_OPTIONS
|
64
56
|
hsh.merge! 'skip_blanks' => !t.properties.keep_blank_rows
|
65
|
-
hsh.reverse_merge! 'headers' =>
|
57
|
+
hsh.reverse_merge! 'headers' => t.properties.headers
|
66
58
|
hsh.reverse_merge! 'col_sep' => t.properties.delimiter
|
67
59
|
hsh.symbolize_keys
|
68
60
|
end
|
@@ -8,20 +8,23 @@ class RemoteTable
|
|
8
8
|
crop_rows!
|
9
9
|
skip_rows!
|
10
10
|
cut_columns!
|
11
|
-
parser.parse[:rows].each do |
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
parser.parse[:rows].each do |row|
|
12
|
+
row.reject! { |k, v| k.blank? }
|
13
|
+
row.each do |k, v|
|
14
|
+
row[k] = utf8 v
|
15
15
|
end
|
16
|
-
yield
|
16
|
+
yield row if t.properties.keep_blank_rows or row.any? { |k, v| v.present? }
|
17
17
|
end
|
18
18
|
ensure
|
19
19
|
t.local_file.delete
|
20
20
|
end
|
21
|
+
|
21
22
|
private
|
23
|
+
|
22
24
|
def parser
|
23
25
|
@parser ||= ::Slither::Parser.new definition, t.local_file.path
|
24
26
|
end
|
27
|
+
|
25
28
|
def definition
|
26
29
|
@definition ||= if t.properties.schema_name.is_a?(::String) or t.properties.schema_name.is_a?(::Symbol)
|
27
30
|
::Slither.send :definition, t.properties.schema_name
|
@@ -4,37 +4,10 @@ class RemoteTable
|
|
4
4
|
class Format
|
5
5
|
class HTML < Format
|
6
6
|
include Textual
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
::Nokogiri::HTML
|
11
|
-
values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
12
|
-
if html_headers.nil?
|
13
|
-
html_headers = values
|
14
|
-
next
|
15
|
-
end
|
16
|
-
hash = zip html_headers, values
|
17
|
-
yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
|
18
|
-
end
|
19
|
-
ensure
|
20
|
-
t.local_file.delete
|
21
|
-
end
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
# http://snippets.dzone.com/posts/show/406
|
26
|
-
def zip(keys, values)
|
27
|
-
hash = ::Hash.new
|
28
|
-
keys.zip(values) { |k,v| hash[k]=v }
|
29
|
-
hash
|
30
|
-
end
|
31
|
-
|
32
|
-
# should we be doing this in ruby?
|
33
|
-
def unescaped_html_without_soft_hyphens
|
34
|
-
str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
|
35
|
-
# get rid of MS Office baddies
|
36
|
-
str.gsub! '­', ''
|
37
|
-
str
|
7
|
+
include ProcessedByNokogiri
|
8
|
+
|
9
|
+
def nokogiri_class
|
10
|
+
::Nokogiri::HTML::Document
|
38
11
|
end
|
39
12
|
end
|
40
13
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'cgi'
|
3
|
+
class RemoteTable
|
4
|
+
class Format
|
5
|
+
module ProcessedByNokogiri
|
6
|
+
def each
|
7
|
+
remove_useless_characters!
|
8
|
+
first_row = true
|
9
|
+
keys = t.properties.headers if t.properties.headers.is_a?(::Array)
|
10
|
+
xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, 'UTF-8')
|
11
|
+
(row_css? ? xml.css(t.properties.row_css) : xml.xpath(t.properties.row_xpath)).each do |row|
|
12
|
+
values = if column_css?
|
13
|
+
row.css(t.properties.column_css)
|
14
|
+
elsif column_xpath?
|
15
|
+
row.xpath(t.properties.column_xpath)
|
16
|
+
else
|
17
|
+
[row]
|
18
|
+
end.map { |cell| cell.content.gsub(/\s+/, ' ').strip }
|
19
|
+
if first_row and t.properties.use_first_row_as_header?
|
20
|
+
keys = values
|
21
|
+
first_row = false
|
22
|
+
next
|
23
|
+
end
|
24
|
+
output = if t.properties.output_class == ::Array
|
25
|
+
values
|
26
|
+
else
|
27
|
+
zip keys, values
|
28
|
+
end
|
29
|
+
if t.properties.keep_blank_rows or values.any?
|
30
|
+
yield output
|
31
|
+
end
|
32
|
+
end
|
33
|
+
ensure
|
34
|
+
t.local_file.delete
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def row_css?
|
40
|
+
!!t.properties.row_css
|
41
|
+
end
|
42
|
+
|
43
|
+
def column_css?
|
44
|
+
!!t.properties.column_css
|
45
|
+
end
|
46
|
+
|
47
|
+
def column_xpath?
|
48
|
+
!!t.properties.column_xpath
|
49
|
+
end
|
50
|
+
|
51
|
+
# http://snippets.dzone.com/posts/show/406
|
52
|
+
def zip(keys, values)
|
53
|
+
hash = ::ActiveSupport::OrderedHash.new
|
54
|
+
keys.zip(values) { |k,v| hash[k]=v }
|
55
|
+
hash
|
56
|
+
end
|
57
|
+
|
58
|
+
# should we be doing this in ruby?
|
59
|
+
def unescaped_xml_without_soft_hyphens
|
60
|
+
str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
|
61
|
+
# get rid of MS Office baddies
|
62
|
+
str.gsub! '­', ''
|
63
|
+
str
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'roo'
|
2
|
+
class RemoteTable
|
3
|
+
class Format
|
4
|
+
module ProcessedByRoo
|
5
|
+
def each(&blk)
|
6
|
+
spreadsheet = roo_class.new t.local_file.path, nil, :ignore
|
7
|
+
spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
|
8
|
+
if t.properties.output_class == ::Array
|
9
|
+
(first_data_row..spreadsheet.last_row).each do |y|
|
10
|
+
output = (1..spreadsheet.last_column).map do |x|
|
11
|
+
spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
|
12
|
+
end
|
13
|
+
yield output if t.properties.keep_blank_rows or output.any? { |v| v.present? }
|
14
|
+
end
|
15
|
+
else
|
16
|
+
keys = {}
|
17
|
+
if t.properties.use_first_row_as_header?
|
18
|
+
(1..spreadsheet.last_column).each do |x|
|
19
|
+
keys[x] = spreadsheet.cell(header_row, x)
|
20
|
+
keys[x] = spreadsheet.cell(header_row - 1, x) if keys[x].blank? # look up
|
21
|
+
end
|
22
|
+
else
|
23
|
+
(1..spreadsheet.last_column).each do |x|
|
24
|
+
keys[x] = t.properties.headers[x - 1]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
(first_data_row..spreadsheet.last_row).each do |y|
|
28
|
+
output = (1..spreadsheet.last_column).inject(::ActiveSupport::OrderedHash.new) do |memo, x|
|
29
|
+
if keys[x].present?
|
30
|
+
memo[keys[x]] = spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
|
31
|
+
end
|
32
|
+
memo
|
33
|
+
end
|
34
|
+
yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
ensure
|
38
|
+
t.local_file.delete
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def header_row
|
44
|
+
1 + t.properties.skip
|
45
|
+
end
|
46
|
+
|
47
|
+
def first_data_row
|
48
|
+
1 + header_row
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/remote_table/hasher.rb
CHANGED
@@ -12,10 +12,16 @@ class RemoteTable
|
|
12
12
|
class Hasher
|
13
13
|
include ::Singleton
|
14
14
|
def hash(row)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
str = if row.is_a?(::Array)
|
16
|
+
tmp_ary = []
|
17
|
+
row.each_with_index do |v, i|
|
18
|
+
tmp_ary.push v.to_query(i.to_s)
|
19
|
+
end
|
20
|
+
tmp_ary
|
21
|
+
else
|
22
|
+
row.stringify_keys.keys.sort.map do |k|
|
23
|
+
row[k].to_query k
|
24
|
+
end
|
19
25
|
end.join('&')
|
20
26
|
::Digest::MD5.hexdigest str
|
21
27
|
end
|
@@ -17,9 +17,32 @@ class RemoteTable
|
|
17
17
|
@uri
|
18
18
|
end
|
19
19
|
|
20
|
+
# Whether to stream the rows without caching them. Saves memory, but you have to re-download the file every time you...
|
21
|
+
# * call []
|
22
|
+
# * call each
|
23
|
+
# Defaults to false.
|
24
|
+
def streaming
|
25
|
+
t.options['streaming'] || false
|
26
|
+
end
|
27
|
+
|
28
|
+
# Defaults to true.
|
29
|
+
def warn_on_multiple_downloads
|
30
|
+
t.options['warn_on_multiple_downloads'] != false
|
31
|
+
end
|
32
|
+
|
20
33
|
# The headers specified by the user
|
34
|
+
#
|
35
|
+
# Default: :first_row
|
21
36
|
def headers
|
22
|
-
t.options['headers']
|
37
|
+
t.options['headers'].nil? ? :first_row : t.options['headers']
|
38
|
+
end
|
39
|
+
|
40
|
+
def use_first_row_as_header?
|
41
|
+
headers == :first_row
|
42
|
+
end
|
43
|
+
|
44
|
+
def output_class
|
45
|
+
headers == false ? ::Array : ::ActiveSupport::OrderedHash
|
23
46
|
end
|
24
47
|
|
25
48
|
# The sheet specified by the user as a number or a string
|
@@ -52,7 +75,7 @@ class RemoteTable
|
|
52
75
|
#
|
53
76
|
# Default: "UTF-8"
|
54
77
|
def encoding
|
55
|
-
@encoding ||= ::Array.wrap(t.options['encoding'] || [ '
|
78
|
+
@encoding ||= ::Array.wrap(t.options['encoding'] || [ 'ISO-8859-1', 'US-ASCII', 'WINDOWS-1252', 'ASCII-8BIT', 'UTF-8' ])
|
56
79
|
end
|
57
80
|
|
58
81
|
# The delimiter
|
@@ -71,6 +94,16 @@ class RemoteTable
|
|
71
94
|
def column_xpath
|
72
95
|
t.options['column_xpath']
|
73
96
|
end
|
97
|
+
|
98
|
+
# The CSS selector used to find rows
|
99
|
+
def row_css
|
100
|
+
t.options['row_css']
|
101
|
+
end
|
102
|
+
|
103
|
+
# The CSS selector used to find columns
|
104
|
+
def column_css
|
105
|
+
t.options['column_css']
|
106
|
+
end
|
74
107
|
|
75
108
|
# The compression type.
|
76
109
|
#
|
@@ -205,6 +238,8 @@ class RemoteTable
|
|
205
238
|
Format::FixedWidth
|
206
239
|
when /htm/
|
207
240
|
Format::HTML
|
241
|
+
when /xml/
|
242
|
+
Format::XML
|
208
243
|
else
|
209
244
|
Format::Delimited
|
210
245
|
end
|
data/lib/remote_table/version.rb
CHANGED
data/test/test_old_syntax.rb
CHANGED
@@ -106,16 +106,10 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
106
106
|
assert_equal '2', t.rows.first['dup_header']
|
107
107
|
end
|
108
108
|
|
109
|
-
should "
|
109
|
+
should "return an Array when instructed not to use headers" do
|
110
110
|
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
111
111
|
t.rows.each do |row|
|
112
|
-
|
113
|
-
row.each do |column_number, v|
|
114
|
-
next if column_number == 'row_hash'
|
115
|
-
assert column_number.is_a?(Numeric)
|
116
|
-
assert(column_number > last_column_number)
|
117
|
-
last_column_number = column_number
|
118
|
-
end
|
112
|
+
assert row.is_a?(::Array)
|
119
113
|
end
|
120
114
|
end
|
121
115
|
|
data/test/test_remote_table.rb
CHANGED
@@ -9,7 +9,7 @@ class TestRemoteTable < Test::Unit::TestCase
|
|
9
9
|
|
10
10
|
should "add a row hash to every row" do
|
11
11
|
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
12
|
-
assert_equal "06d8a738551c17735e2731e25c8d0461", t[5]
|
12
|
+
assert_equal "06d8a738551c17735e2731e25c8d0461", t[5].row_hash
|
13
13
|
end
|
14
14
|
|
15
15
|
should "open a google doc" do
|
@@ -56,19 +56,40 @@ class TestRemoteTable < Test::Unit::TestCase
|
|
56
56
|
should 'allow itself to be cleared for save memory' do
|
57
57
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
58
58
|
t.to_a
|
59
|
-
|
59
|
+
assert t.send(:cache).length > 0
|
60
60
|
t.free
|
61
|
-
|
61
|
+
assert t.send(:cache).length == 0
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
# fixes ArgumentError: invalid byte sequence in UTF-8
|
65
|
-
should %{safely strip soft hyphens and read
|
66
|
-
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :
|
65
|
+
should %{safely strip soft hyphens and read windows-1252 html} do
|
66
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table/tr[2]/td/table/tr', :column_xpath => 'td'
|
67
67
|
assert t.rows.detect { |row| row['Model'] == 'A300B4600' }
|
68
68
|
end
|
69
69
|
|
70
|
-
should %{transliterate characters
|
71
|
-
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv'
|
70
|
+
should %{transliterate characters from ISO-8859-1} do
|
71
|
+
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv'
|
72
72
|
assert t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }
|
73
73
|
end
|
74
|
+
|
75
|
+
should %{read xml with css selectors} do
|
76
|
+
t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
|
77
|
+
assert /(AM|PM)/.match(t[0][0])
|
78
|
+
end
|
79
|
+
|
80
|
+
should %{optionally stream rows instead of caching them} do
|
81
|
+
t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
|
82
|
+
time1 = t[0][0]
|
83
|
+
assert /\d\d\d\d-\d\d-\d\d/.match(time1)
|
84
|
+
sleep 1
|
85
|
+
time2 = t[0][0]
|
86
|
+
assert(time1 != time2)
|
87
|
+
end
|
88
|
+
|
89
|
+
should %{not die when it reads Åland Islands} do
|
90
|
+
t = RemoteTable.new :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';'
|
91
|
+
assert_nothing_raised do
|
92
|
+
t[1][0]
|
93
|
+
end
|
94
|
+
end
|
74
95
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 1.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 1.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Seamus Abshere
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-05-
|
19
|
+
date: 2011-05-05 00:00:00 Z
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
name: activesupport
|
@@ -293,9 +293,11 @@ files:
|
|
293
293
|
- lib/remote_table/format/excelx.rb
|
294
294
|
- lib/remote_table/format/fixed_width.rb
|
295
295
|
- lib/remote_table/format/html.rb
|
296
|
-
- lib/remote_table/format/mixins/
|
296
|
+
- lib/remote_table/format/mixins/processed_by_nokogiri.rb
|
297
|
+
- lib/remote_table/format/mixins/processed_by_roo.rb
|
297
298
|
- lib/remote_table/format/mixins/textual.rb
|
298
299
|
- lib/remote_table/format/open_office.rb
|
300
|
+
- lib/remote_table/format/xml.rb
|
299
301
|
- lib/remote_table/hasher.rb
|
300
302
|
- lib/remote_table/local_file.rb
|
301
303
|
- lib/remote_table/properties.rb
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require 'roo'
|
2
|
-
class RemoteTable
|
3
|
-
class Format
|
4
|
-
module Rooable
|
5
|
-
def each(&blk)
|
6
|
-
spreadsheet = roo_class.new t.local_file.path, nil, :ignore
|
7
|
-
spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
|
8
|
-
column_references = ::Hash.new
|
9
|
-
if t.properties.headers == false
|
10
|
-
# zero-based numeric keys
|
11
|
-
for col in (1..spreadsheet.last_column)
|
12
|
-
column_references[col] = col - 1
|
13
|
-
end
|
14
|
-
elsif t.properties.headers.is_a? ::Array
|
15
|
-
# names
|
16
|
-
for col in (1..spreadsheet.last_column)
|
17
|
-
column_references[col] = t.properties.headers[col - 1]
|
18
|
-
end
|
19
|
-
else
|
20
|
-
# read t.properties.headers from the file itself
|
21
|
-
for col in (1..spreadsheet.last_column)
|
22
|
-
column_references[col] = spreadsheet.cell(header_row, col)
|
23
|
-
column_references[col] = spreadsheet.cell(header_row - 1, col) if column_references[col].blank? # lspreadsheetk up
|
24
|
-
end
|
25
|
-
end
|
26
|
-
first_data_row.upto(spreadsheet.last_row) do |raw_row|
|
27
|
-
ordered_hash = ::ActiveSupport::OrderedHash.new
|
28
|
-
for col in (1..spreadsheet.last_column)
|
29
|
-
next if column_references[col].blank?
|
30
|
-
ordered_hash[column_references[col]] = spreadsheet.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
|
31
|
-
end
|
32
|
-
yield ordered_hash if t.properties.keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
|
33
|
-
end
|
34
|
-
ensure
|
35
|
-
t.local_file.delete
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
def header_row
|
41
|
-
1 + t.properties.skip
|
42
|
-
end
|
43
|
-
|
44
|
-
def first_data_row
|
45
|
-
1 + header_row
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|