remote_table 1.1.10 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/remote_table.rb +64 -20
- data/lib/remote_table/format.rb +11 -2
- data/lib/remote_table/format/delimited.rb +12 -20
- data/lib/remote_table/format/excel.rb +1 -1
- data/lib/remote_table/format/excelx.rb +1 -1
- data/lib/remote_table/format/fixed_width.rb +8 -5
- data/lib/remote_table/format/html.rb +4 -31
- data/lib/remote_table/format/mixins/processed_by_nokogiri.rb +67 -0
- data/lib/remote_table/format/mixins/processed_by_roo.rb +52 -0
- data/lib/remote_table/format/open_office.rb +1 -1
- data/lib/remote_table/format/xml.rb +14 -0
- data/lib/remote_table/hasher.rb +10 -4
- data/lib/remote_table/properties.rb +37 -2
- data/lib/remote_table/transformer.rb +1 -1
- data/lib/remote_table/version.rb +1 -1
- data/test/test_old_syntax.rb +2 -8
- data/test/test_remote_table.rb +29 -8
- metadata +8 -6
- data/lib/remote_table/format/mixins/rooable.rb +0 -49
data/lib/remote_table.rb
CHANGED
@@ -4,11 +4,19 @@ require 'active_support/version'
|
|
4
4
|
active_support/core_ext/hash
|
5
5
|
active_support/core_ext/string
|
6
6
|
active_support/core_ext/module
|
7
|
-
active_support/core_ext/array
|
7
|
+
active_support/core_ext/array
|
8
8
|
}.each do |active_support_3_requirement|
|
9
9
|
require active_support_3_requirement
|
10
10
|
end if ::ActiveSupport::VERSION::MAJOR == 3
|
11
11
|
|
12
|
+
class Hash
|
13
|
+
attr_accessor :row_hash
|
14
|
+
end
|
15
|
+
|
16
|
+
class Array
|
17
|
+
attr_accessor :row_hash
|
18
|
+
end
|
19
|
+
|
12
20
|
class RemoteTable
|
13
21
|
autoload :Format, 'remote_table/format'
|
14
22
|
autoload :Properties, 'remote_table/properties'
|
@@ -53,40 +61,52 @@ class RemoteTable
|
|
53
61
|
@options.freeze
|
54
62
|
end
|
55
63
|
|
64
|
+
# not thread safe
|
56
65
|
def each(&blk)
|
57
|
-
|
66
|
+
if fully_cached?
|
67
|
+
cache.each(&blk)
|
68
|
+
else
|
69
|
+
mark_download!
|
70
|
+
retval = format.each do |row|
|
71
|
+
row.row_hash = ::RemoteTable.hasher.hash row
|
72
|
+
transformer.transform(row).each do |virtual_row|
|
73
|
+
if properties.errata
|
74
|
+
next if properties.errata.rejects? virtual_row
|
75
|
+
properties.errata.correct! virtual_row
|
76
|
+
end
|
77
|
+
next if properties.select and !properties.select.call(virtual_row)
|
78
|
+
next if properties.reject and properties.reject.call(virtual_row)
|
79
|
+
cache.push virtual_row unless properties.streaming
|
80
|
+
yield virtual_row
|
81
|
+
end
|
82
|
+
end
|
83
|
+
fully_cached! unless properties.streaming
|
84
|
+
retval
|
85
|
+
end
|
58
86
|
end
|
59
87
|
alias :each_row :each
|
60
88
|
|
61
89
|
def to_a
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
# allow the transformer to return multiple "virtual rows" for every real row
|
67
|
-
::Array.wrap(transformer.transform(row)).each do |virtual_row|
|
68
|
-
if properties.errata
|
69
|
-
next if properties.errata.rejects? virtual_row
|
70
|
-
properties.errata.correct! virtual_row
|
71
|
-
end
|
72
|
-
next if properties.select and !properties.select.call(virtual_row)
|
73
|
-
next if properties.reject and properties.reject.call(virtual_row)
|
74
|
-
@to_a.push virtual_row
|
75
|
-
end
|
90
|
+
if fully_cached?
|
91
|
+
cache.dup
|
92
|
+
else
|
93
|
+
map { |row| row }
|
76
94
|
end
|
77
|
-
@to_a
|
78
95
|
end
|
79
96
|
alias :rows :to_a
|
80
97
|
|
81
98
|
# Get a row by row number
|
82
99
|
def [](row_number)
|
83
|
-
|
100
|
+
if fully_cached?
|
101
|
+
cache[row_number]
|
102
|
+
else
|
103
|
+
to_a[row_number]
|
104
|
+
end
|
84
105
|
end
|
85
106
|
|
86
107
|
# clear the row cache to save memory
|
87
108
|
def free
|
88
|
-
|
89
|
-
@to_a = nil
|
109
|
+
cache.clear
|
90
110
|
::GC.start
|
91
111
|
nil
|
92
112
|
end
|
@@ -120,4 +140,28 @@ class RemoteTable
|
|
120
140
|
def transformer
|
121
141
|
@transformer ||= Transformer.new self
|
122
142
|
end
|
143
|
+
|
144
|
+
attr_reader :download_count
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
def mark_download!
|
149
|
+
@download_count ||= 0
|
150
|
+
@download_count += 1
|
151
|
+
if properties.warn_on_multiple_downloads and download_count > 1
|
152
|
+
$stderr.puts "[remote_table] Warning: #{url} has been downloaded #{download_count} times."
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def fully_cached!
|
157
|
+
@fully_cached = true
|
158
|
+
end
|
159
|
+
|
160
|
+
def fully_cached?
|
161
|
+
!!@fully_cached
|
162
|
+
end
|
163
|
+
|
164
|
+
def cache
|
165
|
+
@cache ||= []
|
166
|
+
end
|
123
167
|
end
|
data/lib/remote_table/format.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
if ::RUBY_VERSION >= '1.9'
|
2
2
|
require 'ensure/encoding'
|
3
3
|
else
|
4
|
+
$KCODE = 'U'
|
4
5
|
require 'iconv'
|
5
6
|
end
|
6
7
|
|
@@ -12,9 +13,11 @@ class RemoteTable
|
|
12
13
|
autoload :OpenOffice, 'remote_table/format/open_office'
|
13
14
|
autoload :FixedWidth, 'remote_table/format/fixed_width'
|
14
15
|
autoload :HTML, 'remote_table/format/html'
|
16
|
+
autoload :XML, 'remote_table/format/xml'
|
15
17
|
|
16
18
|
autoload :Textual, 'remote_table/format/mixins/textual'
|
17
|
-
autoload :
|
19
|
+
autoload :ProcessedByRoo, 'remote_table/format/mixins/processed_by_roo'
|
20
|
+
autoload :ProcessedByNokogiri, 'remote_table/format/mixins/processed_by_nokogiri'
|
18
21
|
|
19
22
|
attr_reader :t
|
20
23
|
|
@@ -26,7 +29,13 @@ class RemoteTable
|
|
26
29
|
if ::RUBY_VERSION >= '1.9'
|
27
30
|
str.ensure_encoding 'UTF-8', :external_encoding => t.properties.encoding, :invalid_characters => :transcode
|
28
31
|
else
|
29
|
-
|
32
|
+
return str if t.properties.encoding[0] =~ /utf.?8/i
|
33
|
+
begin
|
34
|
+
::Iconv.conv('UTF-8//TRANSLIT', t.properties.encoding[0], str + ' ')[0..-2]
|
35
|
+
rescue ::Iconv::IllegalSequence
|
36
|
+
$stderr.puts "[remote_table] Unable to transliterate #{str} into UTF-8 given #{t.properties.encoding[0]}"
|
37
|
+
str
|
38
|
+
end
|
30
39
|
end
|
31
40
|
end
|
32
41
|
|
@@ -6,7 +6,7 @@ else
|
|
6
6
|
require 'fastercsv'
|
7
7
|
::RemoteTable::CSV = ::FasterCSV
|
8
8
|
rescue ::LoadError
|
9
|
-
$stderr.puts "[remote_table
|
9
|
+
$stderr.puts "[remote_table] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
|
10
10
|
raise $!
|
11
11
|
end
|
12
12
|
end
|
@@ -19,26 +19,18 @@ class RemoteTable
|
|
19
19
|
remove_useless_characters!
|
20
20
|
skip_rows!
|
21
21
|
CSV.foreach(t.local_file.path, fastercsv_options) do |row|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
ordered_hash[header] = utf8 value
|
30
|
-
filled_values += 1 if value.present?
|
31
|
-
end
|
32
|
-
when ::Array
|
33
|
-
index = 0
|
34
|
-
row.each do |value|
|
35
|
-
value = '' if value.nil?
|
36
|
-
ordered_hash[index] = utf8 value
|
37
|
-
filled_values += 1 if value.present?
|
38
|
-
index += 1
|
22
|
+
if row.is_a?(CSV::Row)
|
23
|
+
output = row.inject(::ActiveSupport::OrderedHash.new) do |memo, (key, value)|
|
24
|
+
if key.present?
|
25
|
+
value = '' if value.nil?
|
26
|
+
memo[key] = utf8 value
|
27
|
+
end
|
28
|
+
memo
|
39
29
|
end
|
30
|
+
yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
|
31
|
+
else
|
32
|
+
yield row if t.properties.keep_blank_rows or row.any? { |v| v.present? }
|
40
33
|
end
|
41
|
-
yield ordered_hash if t.properties.keep_blank_rows or filled_values > 0
|
42
34
|
end
|
43
35
|
ensure
|
44
36
|
t.local_file.delete
|
@@ -62,7 +54,7 @@ class RemoteTable
|
|
62
54
|
def fastercsv_options
|
63
55
|
hsh = t.options.slice *FASTERCSV_OPTIONS
|
64
56
|
hsh.merge! 'skip_blanks' => !t.properties.keep_blank_rows
|
65
|
-
hsh.reverse_merge! 'headers' =>
|
57
|
+
hsh.reverse_merge! 'headers' => t.properties.headers
|
66
58
|
hsh.reverse_merge! 'col_sep' => t.properties.delimiter
|
67
59
|
hsh.symbolize_keys
|
68
60
|
end
|
@@ -8,20 +8,23 @@ class RemoteTable
|
|
8
8
|
crop_rows!
|
9
9
|
skip_rows!
|
10
10
|
cut_columns!
|
11
|
-
parser.parse[:rows].each do |
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
parser.parse[:rows].each do |row|
|
12
|
+
row.reject! { |k, v| k.blank? }
|
13
|
+
row.each do |k, v|
|
14
|
+
row[k] = utf8 v
|
15
15
|
end
|
16
|
-
yield
|
16
|
+
yield row if t.properties.keep_blank_rows or row.any? { |k, v| v.present? }
|
17
17
|
end
|
18
18
|
ensure
|
19
19
|
t.local_file.delete
|
20
20
|
end
|
21
|
+
|
21
22
|
private
|
23
|
+
|
22
24
|
def parser
|
23
25
|
@parser ||= ::Slither::Parser.new definition, t.local_file.path
|
24
26
|
end
|
27
|
+
|
25
28
|
def definition
|
26
29
|
@definition ||= if t.properties.schema_name.is_a?(::String) or t.properties.schema_name.is_a?(::Symbol)
|
27
30
|
::Slither.send :definition, t.properties.schema_name
|
@@ -4,37 +4,10 @@ class RemoteTable
|
|
4
4
|
class Format
|
5
5
|
class HTML < Format
|
6
6
|
include Textual
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
::Nokogiri::HTML
|
11
|
-
values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
12
|
-
if html_headers.nil?
|
13
|
-
html_headers = values
|
14
|
-
next
|
15
|
-
end
|
16
|
-
hash = zip html_headers, values
|
17
|
-
yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
|
18
|
-
end
|
19
|
-
ensure
|
20
|
-
t.local_file.delete
|
21
|
-
end
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
# http://snippets.dzone.com/posts/show/406
|
26
|
-
def zip(keys, values)
|
27
|
-
hash = ::Hash.new
|
28
|
-
keys.zip(values) { |k,v| hash[k]=v }
|
29
|
-
hash
|
30
|
-
end
|
31
|
-
|
32
|
-
# should we be doing this in ruby?
|
33
|
-
def unescaped_html_without_soft_hyphens
|
34
|
-
str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
|
35
|
-
# get rid of MS Office baddies
|
36
|
-
str.gsub! '­', ''
|
37
|
-
str
|
7
|
+
include ProcessedByNokogiri
|
8
|
+
|
9
|
+
def nokogiri_class
|
10
|
+
::Nokogiri::HTML::Document
|
38
11
|
end
|
39
12
|
end
|
40
13
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'cgi'
|
3
|
+
class RemoteTable
|
4
|
+
class Format
|
5
|
+
module ProcessedByNokogiri
|
6
|
+
def each
|
7
|
+
remove_useless_characters!
|
8
|
+
first_row = true
|
9
|
+
keys = t.properties.headers if t.properties.headers.is_a?(::Array)
|
10
|
+
xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, 'UTF-8')
|
11
|
+
(row_css? ? xml.css(t.properties.row_css) : xml.xpath(t.properties.row_xpath)).each do |row|
|
12
|
+
values = if column_css?
|
13
|
+
row.css(t.properties.column_css)
|
14
|
+
elsif column_xpath?
|
15
|
+
row.xpath(t.properties.column_xpath)
|
16
|
+
else
|
17
|
+
[row]
|
18
|
+
end.map { |cell| cell.content.gsub(/\s+/, ' ').strip }
|
19
|
+
if first_row and t.properties.use_first_row_as_header?
|
20
|
+
keys = values
|
21
|
+
first_row = false
|
22
|
+
next
|
23
|
+
end
|
24
|
+
output = if t.properties.output_class == ::Array
|
25
|
+
values
|
26
|
+
else
|
27
|
+
zip keys, values
|
28
|
+
end
|
29
|
+
if t.properties.keep_blank_rows or values.any?
|
30
|
+
yield output
|
31
|
+
end
|
32
|
+
end
|
33
|
+
ensure
|
34
|
+
t.local_file.delete
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def row_css?
|
40
|
+
!!t.properties.row_css
|
41
|
+
end
|
42
|
+
|
43
|
+
def column_css?
|
44
|
+
!!t.properties.column_css
|
45
|
+
end
|
46
|
+
|
47
|
+
def column_xpath?
|
48
|
+
!!t.properties.column_xpath
|
49
|
+
end
|
50
|
+
|
51
|
+
# http://snippets.dzone.com/posts/show/406
|
52
|
+
def zip(keys, values)
|
53
|
+
hash = ::ActiveSupport::OrderedHash.new
|
54
|
+
keys.zip(values) { |k,v| hash[k]=v }
|
55
|
+
hash
|
56
|
+
end
|
57
|
+
|
58
|
+
# should we be doing this in ruby?
|
59
|
+
def unescaped_xml_without_soft_hyphens
|
60
|
+
str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
|
61
|
+
# get rid of MS Office baddies
|
62
|
+
str.gsub! '­', ''
|
63
|
+
str
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'roo'
|
2
|
+
class RemoteTable
|
3
|
+
class Format
|
4
|
+
module ProcessedByRoo
|
5
|
+
def each(&blk)
|
6
|
+
spreadsheet = roo_class.new t.local_file.path, nil, :ignore
|
7
|
+
spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
|
8
|
+
if t.properties.output_class == ::Array
|
9
|
+
(first_data_row..spreadsheet.last_row).each do |y|
|
10
|
+
output = (1..spreadsheet.last_column).map do |x|
|
11
|
+
spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
|
12
|
+
end
|
13
|
+
yield output if t.properties.keep_blank_rows or output.any? { |v| v.present? }
|
14
|
+
end
|
15
|
+
else
|
16
|
+
keys = {}
|
17
|
+
if t.properties.use_first_row_as_header?
|
18
|
+
(1..spreadsheet.last_column).each do |x|
|
19
|
+
keys[x] = spreadsheet.cell(header_row, x)
|
20
|
+
keys[x] = spreadsheet.cell(header_row - 1, x) if keys[x].blank? # look up
|
21
|
+
end
|
22
|
+
else
|
23
|
+
(1..spreadsheet.last_column).each do |x|
|
24
|
+
keys[x] = t.properties.headers[x - 1]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
(first_data_row..spreadsheet.last_row).each do |y|
|
28
|
+
output = (1..spreadsheet.last_column).inject(::ActiveSupport::OrderedHash.new) do |memo, x|
|
29
|
+
if keys[x].present?
|
30
|
+
memo[keys[x]] = spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
|
31
|
+
end
|
32
|
+
memo
|
33
|
+
end
|
34
|
+
yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
ensure
|
38
|
+
t.local_file.delete
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def header_row
|
44
|
+
1 + t.properties.skip
|
45
|
+
end
|
46
|
+
|
47
|
+
def first_data_row
|
48
|
+
1 + header_row
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/remote_table/hasher.rb
CHANGED
@@ -12,10 +12,16 @@ class RemoteTable
|
|
12
12
|
class Hasher
|
13
13
|
include ::Singleton
|
14
14
|
def hash(row)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
str = if row.is_a?(::Array)
|
16
|
+
tmp_ary = []
|
17
|
+
row.each_with_index do |v, i|
|
18
|
+
tmp_ary.push v.to_query(i.to_s)
|
19
|
+
end
|
20
|
+
tmp_ary
|
21
|
+
else
|
22
|
+
row.stringify_keys.keys.sort.map do |k|
|
23
|
+
row[k].to_query k
|
24
|
+
end
|
19
25
|
end.join('&')
|
20
26
|
::Digest::MD5.hexdigest str
|
21
27
|
end
|
@@ -17,9 +17,32 @@ class RemoteTable
|
|
17
17
|
@uri
|
18
18
|
end
|
19
19
|
|
20
|
+
# Whether to stream the rows without caching them. Saves memory, but you have to re-download the file every time you...
|
21
|
+
# * call []
|
22
|
+
# * call each
|
23
|
+
# Defaults to false.
|
24
|
+
def streaming
|
25
|
+
t.options['streaming'] || false
|
26
|
+
end
|
27
|
+
|
28
|
+
# Defaults to true.
|
29
|
+
def warn_on_multiple_downloads
|
30
|
+
t.options['warn_on_multiple_downloads'] != false
|
31
|
+
end
|
32
|
+
|
20
33
|
# The headers specified by the user
|
34
|
+
#
|
35
|
+
# Default: :first_row
|
21
36
|
def headers
|
22
|
-
t.options['headers']
|
37
|
+
t.options['headers'].nil? ? :first_row : t.options['headers']
|
38
|
+
end
|
39
|
+
|
40
|
+
def use_first_row_as_header?
|
41
|
+
headers == :first_row
|
42
|
+
end
|
43
|
+
|
44
|
+
def output_class
|
45
|
+
headers == false ? ::Array : ::ActiveSupport::OrderedHash
|
23
46
|
end
|
24
47
|
|
25
48
|
# The sheet specified by the user as a number or a string
|
@@ -52,7 +75,7 @@ class RemoteTable
|
|
52
75
|
#
|
53
76
|
# Default: "UTF-8"
|
54
77
|
def encoding
|
55
|
-
@encoding ||= ::Array.wrap(t.options['encoding'] || [ '
|
78
|
+
@encoding ||= ::Array.wrap(t.options['encoding'] || [ 'ISO-8859-1', 'US-ASCII', 'WINDOWS-1252', 'ASCII-8BIT', 'UTF-8' ])
|
56
79
|
end
|
57
80
|
|
58
81
|
# The delimiter
|
@@ -71,6 +94,16 @@ class RemoteTable
|
|
71
94
|
def column_xpath
|
72
95
|
t.options['column_xpath']
|
73
96
|
end
|
97
|
+
|
98
|
+
# The CSS selector used to find rows
|
99
|
+
def row_css
|
100
|
+
t.options['row_css']
|
101
|
+
end
|
102
|
+
|
103
|
+
# The CSS selector used to find columns
|
104
|
+
def column_css
|
105
|
+
t.options['column_css']
|
106
|
+
end
|
74
107
|
|
75
108
|
# The compression type.
|
76
109
|
#
|
@@ -205,6 +238,8 @@ class RemoteTable
|
|
205
238
|
Format::FixedWidth
|
206
239
|
when /htm/
|
207
240
|
Format::HTML
|
241
|
+
when /xml/
|
242
|
+
Format::XML
|
208
243
|
else
|
209
244
|
Format::Delimited
|
210
245
|
end
|
data/lib/remote_table/version.rb
CHANGED
data/test/test_old_syntax.rb
CHANGED
@@ -106,16 +106,10 @@ class TestOldSyntax < Test::Unit::TestCase
|
|
106
106
|
assert_equal '2', t.rows.first['dup_header']
|
107
107
|
end
|
108
108
|
|
109
|
-
should "
|
109
|
+
should "return an Array when instructed not to use headers" do
|
110
110
|
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
111
111
|
t.rows.each do |row|
|
112
|
-
|
113
|
-
row.each do |column_number, v|
|
114
|
-
next if column_number == 'row_hash'
|
115
|
-
assert column_number.is_a?(Numeric)
|
116
|
-
assert(column_number > last_column_number)
|
117
|
-
last_column_number = column_number
|
118
|
-
end
|
112
|
+
assert row.is_a?(::Array)
|
119
113
|
end
|
120
114
|
end
|
121
115
|
|
data/test/test_remote_table.rb
CHANGED
@@ -9,7 +9,7 @@ class TestRemoteTable < Test::Unit::TestCase
|
|
9
9
|
|
10
10
|
should "add a row hash to every row" do
|
11
11
|
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
12
|
-
assert_equal "06d8a738551c17735e2731e25c8d0461", t[5]
|
12
|
+
assert_equal "06d8a738551c17735e2731e25c8d0461", t[5].row_hash
|
13
13
|
end
|
14
14
|
|
15
15
|
should "open a google doc" do
|
@@ -56,19 +56,40 @@ class TestRemoteTable < Test::Unit::TestCase
|
|
56
56
|
should 'allow itself to be cleared for save memory' do
|
57
57
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
58
58
|
t.to_a
|
59
|
-
|
59
|
+
assert t.send(:cache).length > 0
|
60
60
|
t.free
|
61
|
-
|
61
|
+
assert t.send(:cache).length == 0
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
# fixes ArgumentError: invalid byte sequence in UTF-8
|
65
|
-
should %{safely strip soft hyphens and read
|
66
|
-
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :
|
65
|
+
should %{safely strip soft hyphens and read windows-1252 html} do
|
66
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table/tr[2]/td/table/tr', :column_xpath => 'td'
|
67
67
|
assert t.rows.detect { |row| row['Model'] == 'A300B4600' }
|
68
68
|
end
|
69
69
|
|
70
|
-
should %{transliterate characters
|
71
|
-
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv'
|
70
|
+
should %{transliterate characters from ISO-8859-1} do
|
71
|
+
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv'
|
72
72
|
assert t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }
|
73
73
|
end
|
74
|
+
|
75
|
+
should %{read xml with css selectors} do
|
76
|
+
t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
|
77
|
+
assert /(AM|PM)/.match(t[0][0])
|
78
|
+
end
|
79
|
+
|
80
|
+
should %{optionally stream rows instead of caching them} do
|
81
|
+
t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
|
82
|
+
time1 = t[0][0]
|
83
|
+
assert /\d\d\d\d-\d\d-\d\d/.match(time1)
|
84
|
+
sleep 1
|
85
|
+
time2 = t[0][0]
|
86
|
+
assert(time1 != time2)
|
87
|
+
end
|
88
|
+
|
89
|
+
should %{not die when it reads Åland Islands} do
|
90
|
+
t = RemoteTable.new :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';'
|
91
|
+
assert_nothing_raised do
|
92
|
+
t[1][0]
|
93
|
+
end
|
94
|
+
end
|
74
95
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 1.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 1.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Seamus Abshere
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-05-
|
19
|
+
date: 2011-05-05 00:00:00 Z
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
name: activesupport
|
@@ -293,9 +293,11 @@ files:
|
|
293
293
|
- lib/remote_table/format/excelx.rb
|
294
294
|
- lib/remote_table/format/fixed_width.rb
|
295
295
|
- lib/remote_table/format/html.rb
|
296
|
-
- lib/remote_table/format/mixins/
|
296
|
+
- lib/remote_table/format/mixins/processed_by_nokogiri.rb
|
297
|
+
- lib/remote_table/format/mixins/processed_by_roo.rb
|
297
298
|
- lib/remote_table/format/mixins/textual.rb
|
298
299
|
- lib/remote_table/format/open_office.rb
|
300
|
+
- lib/remote_table/format/xml.rb
|
299
301
|
- lib/remote_table/hasher.rb
|
300
302
|
- lib/remote_table/local_file.rb
|
301
303
|
- lib/remote_table/properties.rb
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require 'roo'
|
2
|
-
class RemoteTable
|
3
|
-
class Format
|
4
|
-
module Rooable
|
5
|
-
def each(&blk)
|
6
|
-
spreadsheet = roo_class.new t.local_file.path, nil, :ignore
|
7
|
-
spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
|
8
|
-
column_references = ::Hash.new
|
9
|
-
if t.properties.headers == false
|
10
|
-
# zero-based numeric keys
|
11
|
-
for col in (1..spreadsheet.last_column)
|
12
|
-
column_references[col] = col - 1
|
13
|
-
end
|
14
|
-
elsif t.properties.headers.is_a? ::Array
|
15
|
-
# names
|
16
|
-
for col in (1..spreadsheet.last_column)
|
17
|
-
column_references[col] = t.properties.headers[col - 1]
|
18
|
-
end
|
19
|
-
else
|
20
|
-
# read t.properties.headers from the file itself
|
21
|
-
for col in (1..spreadsheet.last_column)
|
22
|
-
column_references[col] = spreadsheet.cell(header_row, col)
|
23
|
-
column_references[col] = spreadsheet.cell(header_row - 1, col) if column_references[col].blank? # lspreadsheetk up
|
24
|
-
end
|
25
|
-
end
|
26
|
-
first_data_row.upto(spreadsheet.last_row) do |raw_row|
|
27
|
-
ordered_hash = ::ActiveSupport::OrderedHash.new
|
28
|
-
for col in (1..spreadsheet.last_column)
|
29
|
-
next if column_references[col].blank?
|
30
|
-
ordered_hash[column_references[col]] = spreadsheet.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
|
31
|
-
end
|
32
|
-
yield ordered_hash if t.properties.keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
|
33
|
-
end
|
34
|
-
ensure
|
35
|
-
t.local_file.delete
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
def header_row
|
41
|
-
1 + t.properties.skip
|
42
|
-
end
|
43
|
-
|
44
|
-
def first_data_row
|
45
|
-
1 + header_row
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|