remote_table 0.1.6 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +1 -1
- data/Rakefile +4 -2
- data/VERSION +1 -1
- data/lib/remote_table.rb +1 -1
- data/lib/remote_table/file.rb +4 -2
- data/lib/remote_table/file/csv.rb +21 -8
- data/lib/remote_table/file/fixed_width.rb +4 -1
- data/lib/remote_table/file/roo_spreadsheet.rb +6 -5
- data/lib/remote_table/package.rb +1 -1
- data/lib/remote_table/transform.rb +8 -0
- data/remote_table.gemspec +14 -14
- data/test/remote_table_test.rb +191 -23
- metadata +13 -13
data/README.rdoc
CHANGED
data/Rakefile
CHANGED
@@ -10,8 +10,10 @@ begin
|
|
10
10
|
gem.email = "seamus@abshere.net"
|
11
11
|
gem.homepage = "http://github.com/seamusabshere/remote_table"
|
12
12
|
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
-
|
14
|
-
gem.add_dependency '
|
13
|
+
gem.add_dependency 'roo', '~>1.3.11'
|
14
|
+
gem.add_dependency 'fastercsv', '~>1.5.0'
|
15
|
+
gem.add_dependency 'activesupport', '~>2.3.4'
|
16
|
+
gem.add_dependency 'ryanwood-slither', '~>0.99.3'
|
15
17
|
gem.require_path = "lib"
|
16
18
|
gem.files.include %w(lib/remote_table) unless gem.files.empty? # seems to fail once it's in the wild
|
17
19
|
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/remote_table.rb
CHANGED
data/lib/remote_table/file.rb
CHANGED
@@ -2,6 +2,7 @@ class RemoteTable
|
|
2
2
|
class File
|
3
3
|
attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
|
4
4
|
attr_accessor :path
|
5
|
+
attr_accessor :keep_blank_rows
|
5
6
|
|
6
7
|
def initialize(bus)
|
7
8
|
@filename = bus[:filename]
|
@@ -9,6 +10,7 @@ class RemoteTable
|
|
9
10
|
@delimiter = bus[:delimiter]
|
10
11
|
@sheet = bus[:sheet] || 0
|
11
12
|
@skip = bus[:skip] # rows
|
13
|
+
@keep_blank_rows = bus[:keep_blank_rows] || false
|
12
14
|
@crop = bus[:crop] # rows
|
13
15
|
@cut = bus[:cut] # columns
|
14
16
|
@headers = bus[:headers]
|
@@ -26,7 +28,7 @@ class RemoteTable
|
|
26
28
|
|
27
29
|
private
|
28
30
|
|
29
|
-
# doesn't support trap
|
31
|
+
# doesn't support trap
|
30
32
|
def define_fixed_width_schema!
|
31
33
|
raise "can't define both schema_name and schema" if !schema_name.blank?
|
32
34
|
self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
|
@@ -35,7 +37,7 @@ class RemoteTable
|
|
35
37
|
d.rows do |row|
|
36
38
|
row.trap(&trap)
|
37
39
|
schema.each do |name, width, options|
|
38
|
-
if name ==
|
40
|
+
if name == 'spacer'
|
39
41
|
row.spacer width
|
40
42
|
else
|
41
43
|
row.column name, width, options
|
@@ -3,15 +3,28 @@ class RemoteTable
|
|
3
3
|
def each_row(&block)
|
4
4
|
skip_rows!
|
5
5
|
FasterCSV.foreach(path, fastercsv_options) do |row|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
ordered_hash = ActiveSupport::OrderedHash.new
|
7
|
+
filled_values = 0
|
8
|
+
case row
|
9
|
+
when FasterCSV::Row
|
10
|
+
row.each do |header, value|
|
11
|
+
next if header.blank?
|
12
|
+
value = '' if value.nil?
|
13
|
+
ordered_hash[header] = value
|
14
|
+
filled_values += 1 if value.present?
|
15
|
+
end
|
16
|
+
when Array
|
11
17
|
index = 0
|
12
|
-
|
18
|
+
row.each do |value|
|
19
|
+
value = '' if value.nil?
|
20
|
+
ordered_hash[index] = value
|
21
|
+
filled_values += 1 if value.present?
|
22
|
+
index += 1
|
23
|
+
end
|
24
|
+
else
|
25
|
+
raise "Unexpected #{row.inspect}"
|
13
26
|
end
|
14
|
-
yield
|
27
|
+
yield ordered_hash if keep_blank_rows or filled_values.nonzero?
|
15
28
|
end
|
16
29
|
ensure
|
17
30
|
restore_rows!
|
@@ -20,7 +33,7 @@ class RemoteTable
|
|
20
33
|
private
|
21
34
|
|
22
35
|
def fastercsv_options
|
23
|
-
fastercsv_options = { :skip_blanks =>
|
36
|
+
fastercsv_options = { :skip_blanks => !keep_blank_rows, :header_converters => lambda { |k| k.to_s.toutf8 } }
|
24
37
|
if headers == false
|
25
38
|
fastercsv_options.merge!(:headers => nil)
|
26
39
|
else
|
@@ -5,7 +5,10 @@ class RemoteTable
|
|
5
5
|
skip_rows!
|
6
6
|
cut_columns!
|
7
7
|
a = Slither.parse(path, schema_name)
|
8
|
-
a[:rows].each
|
8
|
+
a[:rows].each do |hash|
|
9
|
+
hash.reject! { |k, v| k.blank? }
|
10
|
+
yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
|
11
|
+
end
|
9
12
|
ensure
|
10
13
|
uncut_columns!
|
11
14
|
unskip_rows!
|
@@ -1,19 +1,20 @@
|
|
1
1
|
class RemoteTable
|
2
2
|
module RooSpreadsheet
|
3
3
|
def each_row(&block)
|
4
|
-
headers =
|
4
|
+
headers = Hash.new
|
5
5
|
oo = roo_klass.new(path, nil, :ignore)
|
6
6
|
oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
|
7
7
|
for col in (1..oo.last_column)
|
8
8
|
headers[col] = oo.cell(header_row, col)
|
9
9
|
headers[col] = oo.cell(header_row - 1, col) if headers[col].blank? # look up
|
10
10
|
end
|
11
|
-
first_data_row.upto(oo.last_row) do |
|
12
|
-
|
11
|
+
first_data_row.upto(oo.last_row) do |raw_row|
|
12
|
+
ordered_hash = ActiveSupport::OrderedHash.new
|
13
13
|
for col in (1..oo.last_column)
|
14
|
-
|
14
|
+
next if headers[col].blank?
|
15
|
+
ordered_hash[headers[col]] = oo.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
|
15
16
|
end
|
16
|
-
yield
|
17
|
+
yield ordered_hash if keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
data/lib/remote_table/package.rb
CHANGED
@@ -55,7 +55,7 @@ class RemoteTable
|
|
55
55
|
# in C but not in the others, we can default to the basename of the package
|
56
56
|
# in order to do this we'll need to mv the uncompressed file on top of the original file
|
57
57
|
def identify(path)
|
58
|
-
|
58
|
+
FileUtils.mv(path, file_path(path)) if !packing and [ nil, :bz2, :gz ].include?(compression)
|
59
59
|
end
|
60
60
|
|
61
61
|
def file_path(path)
|
@@ -18,8 +18,16 @@ class RemoteTable
|
|
18
18
|
self
|
19
19
|
end
|
20
20
|
|
21
|
+
# - convert OrderedHash to a Hash (otherwise field ordering will be saved)
|
22
|
+
# - dump it
|
23
|
+
# - digest it
|
24
|
+
def self.row_hash(row)
|
25
|
+
Digest::MD5.hexdigest Marshal.dump(Hash.new.replace(row))
|
26
|
+
end
|
27
|
+
|
21
28
|
def each_row(&block)
|
22
29
|
raw_table.each_row do |row|
|
30
|
+
row['row_hash'] = self.class.row_hash(row)
|
23
31
|
virtual_rows = transform ? transform.apply(row) : row # allow transform.apply(row) to return multiple rows
|
24
32
|
Array.wrap(virtual_rows).each do |virtual_row|
|
25
33
|
next if select and !select.call(virtual_row)
|
data/remote_table.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{remote_table}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-02-24}
|
13
13
|
s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -55,21 +55,21 @@ Gem::Specification.new do |s|
|
|
55
55
|
s.specification_version = 3
|
56
56
|
|
57
57
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
58
|
-
s.add_runtime_dependency(%q<
|
59
|
-
s.add_runtime_dependency(%q<fastercsv>, ["
|
60
|
-
s.add_runtime_dependency(%q<
|
61
|
-
s.add_runtime_dependency(%q<
|
58
|
+
s.add_runtime_dependency(%q<roo>, ["~> 1.3.11"])
|
59
|
+
s.add_runtime_dependency(%q<fastercsv>, ["~> 1.5.0"])
|
60
|
+
s.add_runtime_dependency(%q<activesupport>, ["~> 2.3.4"])
|
61
|
+
s.add_runtime_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
|
62
62
|
else
|
63
|
-
s.add_dependency(%q<
|
64
|
-
s.add_dependency(%q<fastercsv>, ["
|
65
|
-
s.add_dependency(%q<
|
66
|
-
s.add_dependency(%q<
|
63
|
+
s.add_dependency(%q<roo>, ["~> 1.3.11"])
|
64
|
+
s.add_dependency(%q<fastercsv>, ["~> 1.5.0"])
|
65
|
+
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
66
|
+
s.add_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
|
67
67
|
end
|
68
68
|
else
|
69
|
-
s.add_dependency(%q<
|
70
|
-
s.add_dependency(%q<fastercsv>, ["
|
71
|
-
s.add_dependency(%q<
|
72
|
-
s.add_dependency(%q<
|
69
|
+
s.add_dependency(%q<roo>, ["~> 1.3.11"])
|
70
|
+
s.add_dependency(%q<fastercsv>, ["~> 1.5.0"])
|
71
|
+
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
72
|
+
s.add_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
data/test/remote_table_test.rb
CHANGED
@@ -7,7 +7,7 @@ class FuelOilParser
|
|
7
7
|
def add_hints!(bus)
|
8
8
|
bus[:sheet] = 'Data 1'
|
9
9
|
bus[:skip] = 2
|
10
|
-
bus[:select] = lambda { |row| row[
|
10
|
+
bus[:select] = lambda { |row| row['year'] > 1989 }
|
11
11
|
end
|
12
12
|
def apply(row)
|
13
13
|
virtual_rows = []
|
@@ -23,18 +23,31 @@ class FuelOilParser
|
|
23
23
|
locatable = "#{$1} (State)"
|
24
24
|
end
|
25
25
|
date = Time.parse(date)
|
26
|
-
virtual_rows <<
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
26
|
+
virtual_rows << {
|
27
|
+
'locatable' => locatable,
|
28
|
+
'cost' => cost,
|
29
|
+
'year' => date.year,
|
30
|
+
'month' => date.month
|
31
|
+
}
|
32
32
|
end
|
33
33
|
virtual_rows
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
37
|
class RemoteTableTest < Test::Unit::TestCase
|
38
|
+
def setup
|
39
|
+
@test2_rows_with_blanks = [
|
40
|
+
{ 'header4' => '', 'header5' => '', 'header6' => '' },
|
41
|
+
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
42
|
+
{ 'header4' => '', 'header5' => '', 'header6' => '' },
|
43
|
+
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
44
|
+
]
|
45
|
+
@test2_rows = [
|
46
|
+
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
47
|
+
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
48
|
+
]
|
49
|
+
end
|
50
|
+
|
38
51
|
should "open an XLS inside a zip file" do
|
39
52
|
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
40
53
|
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
@@ -43,15 +56,26 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
43
56
|
assert_equal 'V70 XC AWD', t.rows.last['carline name']
|
44
57
|
end
|
45
58
|
|
46
|
-
should "have indifferent hash access" do
|
59
|
+
should "not have indifferent string/symbol hash access" do
|
47
60
|
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
48
|
-
assert_equal 'ACURA', t.rows.first['Manufacturer'
|
49
|
-
assert_equal
|
50
|
-
|
51
|
-
|
61
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
62
|
+
assert_equal nil, t.rows.first[:Manufacturer]
|
63
|
+
end
|
64
|
+
|
65
|
+
should "hash rows without paying attention to order" do
|
66
|
+
x = ActiveSupport::OrderedHash.new
|
67
|
+
x[:a] = 1
|
68
|
+
x[:b] = 2
|
69
|
+
|
70
|
+
y = ActiveSupport::OrderedHash.new
|
71
|
+
y[:b] = 2
|
72
|
+
y[:a] = 1
|
73
|
+
|
74
|
+
assert Marshal.dump(x) != Marshal.dump(y)
|
75
|
+
assert RemoteTable::Transform.row_hash(x) == RemoteTable::Transform.row_hash(y)
|
52
76
|
end
|
53
77
|
|
54
|
-
should "open a Google Docs url" do
|
78
|
+
should "open a Google Docs url (as a CSV)" do
|
55
79
|
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
56
80
|
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
57
81
|
assert_equal 'AL', t.rows.first['State']
|
@@ -59,10 +83,141 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
59
83
|
assert_equal 'WY', t.rows.last['State']
|
60
84
|
end
|
61
85
|
|
86
|
+
should "open a Google Docs url as a CSV without headers" do
|
87
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
88
|
+
assert_equal 'AL', t.rows.first[0]
|
89
|
+
assert_equal 'Gulf Coast', t.rows.first[4]
|
90
|
+
assert_equal 'WY', t.rows.last[0]
|
91
|
+
assert_equal 'Rocky Mountain', t.rows.last[4]
|
92
|
+
end
|
93
|
+
|
94
|
+
should "take the last of values if the header is duplicated" do
|
95
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
|
96
|
+
assert_equal '2', t.rows.first['dup_header']
|
97
|
+
end
|
98
|
+
|
99
|
+
should "respect field order in CSVs without headers" do
|
100
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
101
|
+
last_k = -1
|
102
|
+
saw_string = false
|
103
|
+
t.rows.each do |row|
|
104
|
+
row.each do |k, v|
|
105
|
+
if k.is_a?(Fixnum) and last_k.is_a?(Fixnum)
|
106
|
+
assert !saw_string
|
107
|
+
assert k > last_k
|
108
|
+
end
|
109
|
+
last_k = k
|
110
|
+
saw_string = k.is_a?(String)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
%w{ csv ods xls }.each do |format|
|
116
|
+
eval %{
|
117
|
+
should "read #{format}" do
|
118
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}')
|
119
|
+
# no blank headers
|
120
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
121
|
+
# correct values
|
122
|
+
t.rows.each_with_index do |row, index|
|
123
|
+
assert_equal row.except('row_hash'), @test2_rows[index]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
should "read #{format}, keeping blank rows" do
|
128
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true)
|
129
|
+
# no blank headers
|
130
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
131
|
+
# correct values
|
132
|
+
t.rows.each_with_index do |row, index|
|
133
|
+
assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
}
|
137
|
+
end
|
138
|
+
|
139
|
+
should "read fixed width correctly" do
|
140
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
141
|
+
:format => :fixed_width,
|
142
|
+
:skip => 1,
|
143
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
144
|
+
[ 'spacer', 1 ],
|
145
|
+
[ 'header5', 10, { :type => :string } ],
|
146
|
+
[ 'spacer', 12 ],
|
147
|
+
[ 'header6', 10, { :type => :string } ]])
|
148
|
+
|
149
|
+
# no blank headers
|
150
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
151
|
+
# correct values
|
152
|
+
t.rows.each_with_index do |row, index|
|
153
|
+
assert_equal row.except('row_hash'), @test2_rows[index]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
should "read fixed width correctly, keeping blank rows" do
|
158
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
159
|
+
:format => :fixed_width,
|
160
|
+
:keep_blank_rows => true,
|
161
|
+
:skip => 1,
|
162
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
163
|
+
[ 'spacer', 1 ],
|
164
|
+
[ 'header5', 10, { :type => :string } ],
|
165
|
+
[ 'spacer', 12 ],
|
166
|
+
[ 'header6', 10, { :type => :string } ]])
|
167
|
+
|
168
|
+
# no blank headers
|
169
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
170
|
+
# correct values
|
171
|
+
t.rows.each_with_index do |row, index|
|
172
|
+
assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
should "have the same row hash across formats" do
|
177
|
+
csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
|
178
|
+
ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
|
179
|
+
xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
|
180
|
+
fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
|
181
|
+
:format => :fixed_width,
|
182
|
+
:skip => 1,
|
183
|
+
:schema => [[ 'header1', 10, { :type => :string } ],
|
184
|
+
[ 'spacer', 1 ],
|
185
|
+
[ 'header2', 10, { :type => :string } ],
|
186
|
+
[ 'spacer', 12 ],
|
187
|
+
[ 'header3', 10, { :type => :string } ]])
|
188
|
+
|
189
|
+
csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
|
190
|
+
ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
|
191
|
+
xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
|
192
|
+
fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
|
193
|
+
:format => :fixed_width,
|
194
|
+
:skip => 1,
|
195
|
+
:schema => [[ 'spacer', 11 ],
|
196
|
+
[ 'header2', 10, { :type => :string } ],
|
197
|
+
[ 'spacer', 1 ],
|
198
|
+
[ 'header3', 10, { :type => :string } ],
|
199
|
+
[ 'spacer', 1 ],
|
200
|
+
[ 'header1', 10, { :type => :string } ]])
|
201
|
+
|
202
|
+
|
203
|
+
reference = csv.rows[0]['row_hash']
|
204
|
+
|
205
|
+
# same row hashes
|
206
|
+
assert_equal reference, ods.rows[0]['row_hash']
|
207
|
+
assert_equal reference, xls.rows[0]['row_hash']
|
208
|
+
assert_equal reference, fixed_width.rows[0]['row_hash']
|
209
|
+
# same row hashes with different order
|
210
|
+
assert_equal reference, csv2.rows[0]['row_hash']
|
211
|
+
assert_equal reference, ods2.rows[0]['row_hash']
|
212
|
+
assert_equal reference, xls2.rows[0]['row_hash']
|
213
|
+
assert_equal reference, fixed_width2.rows[0]['row_hash']
|
214
|
+
end
|
215
|
+
|
62
216
|
should "open an ODS" do
|
63
|
-
t = RemoteTable.new(:url => 'http://
|
64
|
-
|
65
|
-
assert_equal
|
217
|
+
t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
|
218
|
+
|
219
|
+
assert_equal 'Central Africa', t.rows[5]['name']
|
220
|
+
assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
|
66
221
|
end
|
67
222
|
|
68
223
|
should "open a CSV inside a zip file" do
|
@@ -79,10 +234,10 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
79
234
|
:format => :fixed_width,
|
80
235
|
:crop => 21..26, # inclusive
|
81
236
|
:cut => '2-',
|
82
|
-
:select => lambda { |row| /\A[A-Z]/.match row[
|
83
|
-
:schema => [[
|
84
|
-
[
|
85
|
-
[
|
237
|
+
:select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
238
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
239
|
+
[ 'spacer', 2 ],
|
240
|
+
[ 'name', 52, { :type => :string } ]])
|
86
241
|
assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
|
87
242
|
assert_equal 'R', t.rows.first['code']
|
88
243
|
assert_equal 'electricity', t.rows.last['name']
|
@@ -92,10 +247,23 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
92
247
|
should "open an XLS with a parser" do
|
93
248
|
ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
|
94
249
|
ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
|
95
|
-
|
250
|
+
|
96
251
|
t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
|
97
252
|
:transform => { :class => FuelOilParser })
|
98
|
-
|
99
|
-
|
253
|
+
assert t.rows.include?(ma_1990_01)
|
254
|
+
assert t.rows.include?(ga_1990_01)
|
255
|
+
end
|
256
|
+
|
257
|
+
should "provide a row_hash on demand" do
|
258
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
259
|
+
:filename => 'Gd6-dsc.txt',
|
260
|
+
:format => :fixed_width,
|
261
|
+
:crop => 21..26, # inclusive
|
262
|
+
:cut => '2-',
|
263
|
+
:select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
264
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
265
|
+
[ 'spacer', 2 ],
|
266
|
+
[ 'name', 52, { :type => :string } ]])
|
267
|
+
assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
|
100
268
|
end
|
101
269
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
@@ -10,18 +10,18 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date:
|
13
|
+
date: 2010-02-24 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
|
-
name:
|
17
|
+
name: roo
|
18
18
|
type: :runtime
|
19
19
|
version_requirement:
|
20
20
|
version_requirements: !ruby/object:Gem::Requirement
|
21
21
|
requirements:
|
22
|
-
- -
|
22
|
+
- - ~>
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version:
|
24
|
+
version: 1.3.11
|
25
25
|
version:
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: fastercsv
|
@@ -29,29 +29,29 @@ dependencies:
|
|
29
29
|
version_requirement:
|
30
30
|
version_requirements: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- -
|
32
|
+
- - ~>
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
34
|
+
version: 1.5.0
|
35
35
|
version:
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
|
-
name:
|
37
|
+
name: activesupport
|
38
38
|
type: :runtime
|
39
39
|
version_requirement:
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
requirements:
|
42
|
-
- -
|
42
|
+
- - ~>
|
43
43
|
- !ruby/object:Gem::Version
|
44
|
-
version:
|
44
|
+
version: 2.3.4
|
45
45
|
version:
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
47
|
+
name: ryanwood-slither
|
48
48
|
type: :runtime
|
49
49
|
version_requirement:
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 0.99.3
|
55
55
|
version:
|
56
56
|
description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
|
57
57
|
email: seamus@abshere.net
|