remote_table 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -1
- data/Rakefile +4 -2
- data/VERSION +1 -1
- data/lib/remote_table.rb +1 -1
- data/lib/remote_table/file.rb +4 -2
- data/lib/remote_table/file/csv.rb +21 -8
- data/lib/remote_table/file/fixed_width.rb +4 -1
- data/lib/remote_table/file/roo_spreadsheet.rb +6 -5
- data/lib/remote_table/package.rb +1 -1
- data/lib/remote_table/transform.rb +8 -0
- data/remote_table.gemspec +14 -14
- data/test/remote_table_test.rb +191 -23
- metadata +13 -13
data/README.rdoc
CHANGED
data/Rakefile
CHANGED
@@ -10,8 +10,10 @@ begin
|
|
10
10
|
gem.email = "seamus@abshere.net"
|
11
11
|
gem.homepage = "http://github.com/seamusabshere/remote_table"
|
12
12
|
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
-
|
14
|
-
gem.add_dependency '
|
13
|
+
gem.add_dependency 'roo', '~>1.3.11'
|
14
|
+
gem.add_dependency 'fastercsv', '~>1.5.0'
|
15
|
+
gem.add_dependency 'activesupport', '~>2.3.4'
|
16
|
+
gem.add_dependency 'ryanwood-slither', '~>0.99.3'
|
15
17
|
gem.require_path = "lib"
|
16
18
|
gem.files.include %w(lib/remote_table) unless gem.files.empty? # seems to fail once it's in the wild
|
17
19
|
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/remote_table.rb
CHANGED
data/lib/remote_table/file.rb
CHANGED
@@ -2,6 +2,7 @@ class RemoteTable
|
|
2
2
|
class File
|
3
3
|
attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
|
4
4
|
attr_accessor :path
|
5
|
+
attr_accessor :keep_blank_rows
|
5
6
|
|
6
7
|
def initialize(bus)
|
7
8
|
@filename = bus[:filename]
|
@@ -9,6 +10,7 @@ class RemoteTable
|
|
9
10
|
@delimiter = bus[:delimiter]
|
10
11
|
@sheet = bus[:sheet] || 0
|
11
12
|
@skip = bus[:skip] # rows
|
13
|
+
@keep_blank_rows = bus[:keep_blank_rows] || false
|
12
14
|
@crop = bus[:crop] # rows
|
13
15
|
@cut = bus[:cut] # columns
|
14
16
|
@headers = bus[:headers]
|
@@ -26,7 +28,7 @@ class RemoteTable
|
|
26
28
|
|
27
29
|
private
|
28
30
|
|
29
|
-
# doesn't support trap
|
31
|
+
# doesn't support trap
|
30
32
|
def define_fixed_width_schema!
|
31
33
|
raise "can't define both schema_name and schema" if !schema_name.blank?
|
32
34
|
self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
|
@@ -35,7 +37,7 @@ class RemoteTable
|
|
35
37
|
d.rows do |row|
|
36
38
|
row.trap(&trap)
|
37
39
|
schema.each do |name, width, options|
|
38
|
-
if name ==
|
40
|
+
if name == 'spacer'
|
39
41
|
row.spacer width
|
40
42
|
else
|
41
43
|
row.column name, width, options
|
@@ -3,15 +3,28 @@ class RemoteTable
|
|
3
3
|
def each_row(&block)
|
4
4
|
skip_rows!
|
5
5
|
FasterCSV.foreach(path, fastercsv_options) do |row|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
ordered_hash = ActiveSupport::OrderedHash.new
|
7
|
+
filled_values = 0
|
8
|
+
case row
|
9
|
+
when FasterCSV::Row
|
10
|
+
row.each do |header, value|
|
11
|
+
next if header.blank?
|
12
|
+
value = '' if value.nil?
|
13
|
+
ordered_hash[header] = value
|
14
|
+
filled_values += 1 if value.present?
|
15
|
+
end
|
16
|
+
when Array
|
11
17
|
index = 0
|
12
|
-
|
18
|
+
row.each do |value|
|
19
|
+
value = '' if value.nil?
|
20
|
+
ordered_hash[index] = value
|
21
|
+
filled_values += 1 if value.present?
|
22
|
+
index += 1
|
23
|
+
end
|
24
|
+
else
|
25
|
+
raise "Unexpected #{row.inspect}"
|
13
26
|
end
|
14
|
-
yield
|
27
|
+
yield ordered_hash if keep_blank_rows or filled_values.nonzero?
|
15
28
|
end
|
16
29
|
ensure
|
17
30
|
restore_rows!
|
@@ -20,7 +33,7 @@ class RemoteTable
|
|
20
33
|
private
|
21
34
|
|
22
35
|
def fastercsv_options
|
23
|
-
fastercsv_options = { :skip_blanks =>
|
36
|
+
fastercsv_options = { :skip_blanks => !keep_blank_rows, :header_converters => lambda { |k| k.to_s.toutf8 } }
|
24
37
|
if headers == false
|
25
38
|
fastercsv_options.merge!(:headers => nil)
|
26
39
|
else
|
@@ -5,7 +5,10 @@ class RemoteTable
|
|
5
5
|
skip_rows!
|
6
6
|
cut_columns!
|
7
7
|
a = Slither.parse(path, schema_name)
|
8
|
-
a[:rows].each
|
8
|
+
a[:rows].each do |hash|
|
9
|
+
hash.reject! { |k, v| k.blank? }
|
10
|
+
yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
|
11
|
+
end
|
9
12
|
ensure
|
10
13
|
uncut_columns!
|
11
14
|
unskip_rows!
|
@@ -1,19 +1,20 @@
|
|
1
1
|
class RemoteTable
|
2
2
|
module RooSpreadsheet
|
3
3
|
def each_row(&block)
|
4
|
-
headers =
|
4
|
+
headers = Hash.new
|
5
5
|
oo = roo_klass.new(path, nil, :ignore)
|
6
6
|
oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
|
7
7
|
for col in (1..oo.last_column)
|
8
8
|
headers[col] = oo.cell(header_row, col)
|
9
9
|
headers[col] = oo.cell(header_row - 1, col) if headers[col].blank? # look up
|
10
10
|
end
|
11
|
-
first_data_row.upto(oo.last_row) do |
|
12
|
-
|
11
|
+
first_data_row.upto(oo.last_row) do |raw_row|
|
12
|
+
ordered_hash = ActiveSupport::OrderedHash.new
|
13
13
|
for col in (1..oo.last_column)
|
14
|
-
|
14
|
+
next if headers[col].blank?
|
15
|
+
ordered_hash[headers[col]] = oo.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
|
15
16
|
end
|
16
|
-
yield
|
17
|
+
yield ordered_hash if keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
data/lib/remote_table/package.rb
CHANGED
@@ -55,7 +55,7 @@ class RemoteTable
|
|
55
55
|
# in C but not in the others, we can default to the basename of the package
|
56
56
|
# in order to do this we'll need to mv the uncompressed file on top of the original file
|
57
57
|
def identify(path)
|
58
|
-
|
58
|
+
FileUtils.mv(path, file_path(path)) if !packing and [ nil, :bz2, :gz ].include?(compression)
|
59
59
|
end
|
60
60
|
|
61
61
|
def file_path(path)
|
@@ -18,8 +18,16 @@ class RemoteTable
|
|
18
18
|
self
|
19
19
|
end
|
20
20
|
|
21
|
+
# - convert OrderedHash to a Hash (otherwise field ordering will be saved)
|
22
|
+
# - dump it
|
23
|
+
# - digest it
|
24
|
+
def self.row_hash(row)
|
25
|
+
Digest::MD5.hexdigest Marshal.dump(Hash.new.replace(row))
|
26
|
+
end
|
27
|
+
|
21
28
|
def each_row(&block)
|
22
29
|
raw_table.each_row do |row|
|
30
|
+
row['row_hash'] = self.class.row_hash(row)
|
23
31
|
virtual_rows = transform ? transform.apply(row) : row # allow transform.apply(row) to return multiple rows
|
24
32
|
Array.wrap(virtual_rows).each do |virtual_row|
|
25
33
|
next if select and !select.call(virtual_row)
|
data/remote_table.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{remote_table}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-02-24}
|
13
13
|
s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -55,21 +55,21 @@ Gem::Specification.new do |s|
|
|
55
55
|
s.specification_version = 3
|
56
56
|
|
57
57
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
58
|
-
s.add_runtime_dependency(%q<
|
59
|
-
s.add_runtime_dependency(%q<fastercsv>, ["
|
60
|
-
s.add_runtime_dependency(%q<
|
61
|
-
s.add_runtime_dependency(%q<
|
58
|
+
s.add_runtime_dependency(%q<roo>, ["~> 1.3.11"])
|
59
|
+
s.add_runtime_dependency(%q<fastercsv>, ["~> 1.5.0"])
|
60
|
+
s.add_runtime_dependency(%q<activesupport>, ["~> 2.3.4"])
|
61
|
+
s.add_runtime_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
|
62
62
|
else
|
63
|
-
s.add_dependency(%q<
|
64
|
-
s.add_dependency(%q<fastercsv>, ["
|
65
|
-
s.add_dependency(%q<
|
66
|
-
s.add_dependency(%q<
|
63
|
+
s.add_dependency(%q<roo>, ["~> 1.3.11"])
|
64
|
+
s.add_dependency(%q<fastercsv>, ["~> 1.5.0"])
|
65
|
+
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
66
|
+
s.add_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
|
67
67
|
end
|
68
68
|
else
|
69
|
-
s.add_dependency(%q<
|
70
|
-
s.add_dependency(%q<fastercsv>, ["
|
71
|
-
s.add_dependency(%q<
|
72
|
-
s.add_dependency(%q<
|
69
|
+
s.add_dependency(%q<roo>, ["~> 1.3.11"])
|
70
|
+
s.add_dependency(%q<fastercsv>, ["~> 1.5.0"])
|
71
|
+
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
72
|
+
s.add_dependency(%q<ryanwood-slither>, ["~> 0.99.3"])
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
data/test/remote_table_test.rb
CHANGED
@@ -7,7 +7,7 @@ class FuelOilParser
|
|
7
7
|
def add_hints!(bus)
|
8
8
|
bus[:sheet] = 'Data 1'
|
9
9
|
bus[:skip] = 2
|
10
|
-
bus[:select] = lambda { |row| row[
|
10
|
+
bus[:select] = lambda { |row| row['year'] > 1989 }
|
11
11
|
end
|
12
12
|
def apply(row)
|
13
13
|
virtual_rows = []
|
@@ -23,18 +23,31 @@ class FuelOilParser
|
|
23
23
|
locatable = "#{$1} (State)"
|
24
24
|
end
|
25
25
|
date = Time.parse(date)
|
26
|
-
virtual_rows <<
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
26
|
+
virtual_rows << {
|
27
|
+
'locatable' => locatable,
|
28
|
+
'cost' => cost,
|
29
|
+
'year' => date.year,
|
30
|
+
'month' => date.month
|
31
|
+
}
|
32
32
|
end
|
33
33
|
virtual_rows
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
37
|
class RemoteTableTest < Test::Unit::TestCase
|
38
|
+
def setup
|
39
|
+
@test2_rows_with_blanks = [
|
40
|
+
{ 'header4' => '', 'header5' => '', 'header6' => '' },
|
41
|
+
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
42
|
+
{ 'header4' => '', 'header5' => '', 'header6' => '' },
|
43
|
+
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
44
|
+
]
|
45
|
+
@test2_rows = [
|
46
|
+
{ 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
|
47
|
+
{ 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
|
48
|
+
]
|
49
|
+
end
|
50
|
+
|
38
51
|
should "open an XLS inside a zip file" do
|
39
52
|
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
40
53
|
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
@@ -43,15 +56,26 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
43
56
|
assert_equal 'V70 XC AWD', t.rows.last['carline name']
|
44
57
|
end
|
45
58
|
|
46
|
-
should "have indifferent hash access" do
|
59
|
+
should "not have indifferent string/symbol hash access" do
|
47
60
|
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
48
|
-
assert_equal 'ACURA', t.rows.first['Manufacturer'
|
49
|
-
assert_equal
|
50
|
-
|
51
|
-
|
61
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
62
|
+
assert_equal nil, t.rows.first[:Manufacturer]
|
63
|
+
end
|
64
|
+
|
65
|
+
should "hash rows without paying attention to order" do
|
66
|
+
x = ActiveSupport::OrderedHash.new
|
67
|
+
x[:a] = 1
|
68
|
+
x[:b] = 2
|
69
|
+
|
70
|
+
y = ActiveSupport::OrderedHash.new
|
71
|
+
y[:b] = 2
|
72
|
+
y[:a] = 1
|
73
|
+
|
74
|
+
assert Marshal.dump(x) != Marshal.dump(y)
|
75
|
+
assert RemoteTable::Transform.row_hash(x) == RemoteTable::Transform.row_hash(y)
|
52
76
|
end
|
53
77
|
|
54
|
-
should "open a Google Docs url" do
|
78
|
+
should "open a Google Docs url (as a CSV)" do
|
55
79
|
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
56
80
|
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
57
81
|
assert_equal 'AL', t.rows.first['State']
|
@@ -59,10 +83,141 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
59
83
|
assert_equal 'WY', t.rows.last['State']
|
60
84
|
end
|
61
85
|
|
86
|
+
should "open a Google Docs url as a CSV without headers" do
|
87
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
88
|
+
assert_equal 'AL', t.rows.first[0]
|
89
|
+
assert_equal 'Gulf Coast', t.rows.first[4]
|
90
|
+
assert_equal 'WY', t.rows.last[0]
|
91
|
+
assert_equal 'Rocky Mountain', t.rows.last[4]
|
92
|
+
end
|
93
|
+
|
94
|
+
should "take the last of values if the header is duplicated" do
|
95
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
|
96
|
+
assert_equal '2', t.rows.first['dup_header']
|
97
|
+
end
|
98
|
+
|
99
|
+
should "respect field order in CSVs without headers" do
|
100
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
|
101
|
+
last_k = -1
|
102
|
+
saw_string = false
|
103
|
+
t.rows.each do |row|
|
104
|
+
row.each do |k, v|
|
105
|
+
if k.is_a?(Fixnum) and last_k.is_a?(Fixnum)
|
106
|
+
assert !saw_string
|
107
|
+
assert k > last_k
|
108
|
+
end
|
109
|
+
last_k = k
|
110
|
+
saw_string = k.is_a?(String)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
%w{ csv ods xls }.each do |format|
|
116
|
+
eval %{
|
117
|
+
should "read #{format}" do
|
118
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}')
|
119
|
+
# no blank headers
|
120
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
121
|
+
# correct values
|
122
|
+
t.rows.each_with_index do |row, index|
|
123
|
+
assert_equal row.except('row_hash'), @test2_rows[index]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
should "read #{format}, keeping blank rows" do
|
128
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true)
|
129
|
+
# no blank headers
|
130
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
131
|
+
# correct values
|
132
|
+
t.rows.each_with_index do |row, index|
|
133
|
+
assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
}
|
137
|
+
end
|
138
|
+
|
139
|
+
should "read fixed width correctly" do
|
140
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
141
|
+
:format => :fixed_width,
|
142
|
+
:skip => 1,
|
143
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
144
|
+
[ 'spacer', 1 ],
|
145
|
+
[ 'header5', 10, { :type => :string } ],
|
146
|
+
[ 'spacer', 12 ],
|
147
|
+
[ 'header6', 10, { :type => :string } ]])
|
148
|
+
|
149
|
+
# no blank headers
|
150
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
151
|
+
# correct values
|
152
|
+
t.rows.each_with_index do |row, index|
|
153
|
+
assert_equal row.except('row_hash'), @test2_rows[index]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
should "read fixed width correctly, keeping blank rows" do
|
158
|
+
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
159
|
+
:format => :fixed_width,
|
160
|
+
:keep_blank_rows => true,
|
161
|
+
:skip => 1,
|
162
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
163
|
+
[ 'spacer', 1 ],
|
164
|
+
[ 'header5', 10, { :type => :string } ],
|
165
|
+
[ 'spacer', 12 ],
|
166
|
+
[ 'header6', 10, { :type => :string } ]])
|
167
|
+
|
168
|
+
# no blank headers
|
169
|
+
assert t.rows.all? { |row| row.keys.all?(&:present?) }
|
170
|
+
# correct values
|
171
|
+
t.rows.each_with_index do |row, index|
|
172
|
+
assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
should "have the same row hash across formats" do
|
177
|
+
csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
|
178
|
+
ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
|
179
|
+
xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
|
180
|
+
fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
|
181
|
+
:format => :fixed_width,
|
182
|
+
:skip => 1,
|
183
|
+
:schema => [[ 'header1', 10, { :type => :string } ],
|
184
|
+
[ 'spacer', 1 ],
|
185
|
+
[ 'header2', 10, { :type => :string } ],
|
186
|
+
[ 'spacer', 12 ],
|
187
|
+
[ 'header3', 10, { :type => :string } ]])
|
188
|
+
|
189
|
+
csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
|
190
|
+
ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
|
191
|
+
xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
|
192
|
+
fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
|
193
|
+
:format => :fixed_width,
|
194
|
+
:skip => 1,
|
195
|
+
:schema => [[ 'spacer', 11 ],
|
196
|
+
[ 'header2', 10, { :type => :string } ],
|
197
|
+
[ 'spacer', 1 ],
|
198
|
+
[ 'header3', 10, { :type => :string } ],
|
199
|
+
[ 'spacer', 1 ],
|
200
|
+
[ 'header1', 10, { :type => :string } ]])
|
201
|
+
|
202
|
+
|
203
|
+
reference = csv.rows[0]['row_hash']
|
204
|
+
|
205
|
+
# same row hashes
|
206
|
+
assert_equal reference, ods.rows[0]['row_hash']
|
207
|
+
assert_equal reference, xls.rows[0]['row_hash']
|
208
|
+
assert_equal reference, fixed_width.rows[0]['row_hash']
|
209
|
+
# same row hashes with different order
|
210
|
+
assert_equal reference, csv2.rows[0]['row_hash']
|
211
|
+
assert_equal reference, ods2.rows[0]['row_hash']
|
212
|
+
assert_equal reference, xls2.rows[0]['row_hash']
|
213
|
+
assert_equal reference, fixed_width2.rows[0]['row_hash']
|
214
|
+
end
|
215
|
+
|
62
216
|
should "open an ODS" do
|
63
|
-
t = RemoteTable.new(:url => 'http://
|
64
|
-
|
65
|
-
assert_equal
|
217
|
+
t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
|
218
|
+
|
219
|
+
assert_equal 'Central Africa', t.rows[5]['name']
|
220
|
+
assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
|
66
221
|
end
|
67
222
|
|
68
223
|
should "open a CSV inside a zip file" do
|
@@ -79,10 +234,10 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
79
234
|
:format => :fixed_width,
|
80
235
|
:crop => 21..26, # inclusive
|
81
236
|
:cut => '2-',
|
82
|
-
:select => lambda { |row| /\A[A-Z]/.match row[
|
83
|
-
:schema => [[
|
84
|
-
[
|
85
|
-
[
|
237
|
+
:select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
238
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
239
|
+
[ 'spacer', 2 ],
|
240
|
+
[ 'name', 52, { :type => :string } ]])
|
86
241
|
assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
|
87
242
|
assert_equal 'R', t.rows.first['code']
|
88
243
|
assert_equal 'electricity', t.rows.last['name']
|
@@ -92,10 +247,23 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
92
247
|
should "open an XLS with a parser" do
|
93
248
|
ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
|
94
249
|
ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
|
95
|
-
|
250
|
+
|
96
251
|
t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
|
97
252
|
:transform => { :class => FuelOilParser })
|
98
|
-
|
99
|
-
|
253
|
+
assert t.rows.include?(ma_1990_01)
|
254
|
+
assert t.rows.include?(ga_1990_01)
|
255
|
+
end
|
256
|
+
|
257
|
+
should "provide a row_hash on demand" do
|
258
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
259
|
+
:filename => 'Gd6-dsc.txt',
|
260
|
+
:format => :fixed_width,
|
261
|
+
:crop => 21..26, # inclusive
|
262
|
+
:cut => '2-',
|
263
|
+
:select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
264
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
265
|
+
[ 'spacer', 2 ],
|
266
|
+
[ 'name', 52, { :type => :string } ]])
|
267
|
+
assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
|
100
268
|
end
|
101
269
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
@@ -10,18 +10,18 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date:
|
13
|
+
date: 2010-02-24 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
|
-
name:
|
17
|
+
name: roo
|
18
18
|
type: :runtime
|
19
19
|
version_requirement:
|
20
20
|
version_requirements: !ruby/object:Gem::Requirement
|
21
21
|
requirements:
|
22
|
-
- -
|
22
|
+
- - ~>
|
23
23
|
- !ruby/object:Gem::Version
|
24
|
-
version:
|
24
|
+
version: 1.3.11
|
25
25
|
version:
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: fastercsv
|
@@ -29,29 +29,29 @@ dependencies:
|
|
29
29
|
version_requirement:
|
30
30
|
version_requirements: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- -
|
32
|
+
- - ~>
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
34
|
+
version: 1.5.0
|
35
35
|
version:
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
|
-
name:
|
37
|
+
name: activesupport
|
38
38
|
type: :runtime
|
39
39
|
version_requirement:
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
requirements:
|
42
|
-
- -
|
42
|
+
- - ~>
|
43
43
|
- !ruby/object:Gem::Version
|
44
|
-
version:
|
44
|
+
version: 2.3.4
|
45
45
|
version:
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
47
|
+
name: ryanwood-slither
|
48
48
|
type: :runtime
|
49
49
|
version_requirement:
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 0.99.3
|
55
55
|
version:
|
56
56
|
description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
|
57
57
|
email: seamus@abshere.net
|