remote_table 2.1.2 → 3.0.0.alpha
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +12 -3
- data/README.markdown +1 -8
- data/lib/remote_table.rb +72 -87
- data/lib/remote_table/fixed_width.rb +5 -5
- data/lib/remote_table/local_copy.rb +1 -1
- data/lib/remote_table/plaintext.rb +3 -3
- data/lib/remote_table/processed_by_roo.rb +6 -4
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +3 -4
- data/test/{support → data}/airports.utf8.csv +0 -0
- data/test/data/color.csv +3 -0
- data/test/{fixtures → data}/data.yml +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.binary.ods +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.iso-8859-1.csv +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64 +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.csv +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.fixed_width-62 +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.html +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.xml +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html +0 -0
- data/test/{support → data}/list-en1-semic-3.original.iso-8859-1.csv +0 -0
- data/test/data/ranges.csv +4 -0
- data/test/test_errata.rb +2 -2
- data/test/test_local.rb +10 -0
- data/test/test_old_syntax.rb +0 -13
- data/test/test_parser.rb +24 -0
- data/test/test_remote.rb +113 -0
- data/test/test_remote_table.rb +30 -165
- data/test/test_transpose.rb +11 -0
- metadata +86 -66
- checksums.yaml +0 -15
- data/lib/remote_table/shp.rb +0 -30
- data/lib/remote_table/transformer.rb +0 -29
- data/test/test_old_transform.rb +0 -47
- data/test/test_shapefile.rb +0 -13
@@ -78,7 +78,7 @@ class RemoteTable
|
|
78
78
|
def encoded_io
|
79
79
|
@encoded_io || @encoded_io_mutex.synchronize do
|
80
80
|
@encoded_io ||= if ::RUBY_VERSION >= '1.9'
|
81
|
-
::File.open path, 'rb', :internal_encoding => t.
|
81
|
+
::File.open path, 'rb', :internal_encoding => t.encoding, :external_encoding => RemoteTable::EXTERNAL_ENCODING
|
82
82
|
else
|
83
83
|
::File.open path, 'rb'
|
84
84
|
end
|
@@ -25,7 +25,7 @@ class RemoteTable
|
|
25
25
|
|
26
26
|
# Remove bytes that are both useless and harmful in the vast majority of cases.
|
27
27
|
def delete_harmful!
|
28
|
-
harmful = [ Plaintext.soft_hyphen(
|
28
|
+
harmful = [ Plaintext.soft_hyphen(encoding), UTF8_BOM ]
|
29
29
|
local_copy.in_place :perl, "s/#{harmful.join('//g; s/')}//g"
|
30
30
|
end
|
31
31
|
|
@@ -35,12 +35,12 @@ class RemoteTable
|
|
35
35
|
# iconv -c -t UTF-8//TRANSLIT -f WINDOWS-1252
|
36
36
|
def transliterate_whole_file_to_utf8!
|
37
37
|
if ::UnixUtils.available?('iconv')
|
38
|
-
local_copy.in_place :iconv, RemoteTable::EXTERNAL_ENCODING_ICONV,
|
38
|
+
local_copy.in_place :iconv, RemoteTable::EXTERNAL_ENCODING_ICONV, encoding
|
39
39
|
else
|
40
40
|
::Kernel.warn %{[remote_table] iconv not available in your $PATH, not performing transliteration}
|
41
41
|
end
|
42
42
|
# now that we've force-transliterated to UTF-8, act as though this is what the user had specified
|
43
|
-
@
|
43
|
+
@encoding = RemoteTable::EXTERNAL_ENCODING
|
44
44
|
end
|
45
45
|
|
46
46
|
# No matter what the EOL are SUPPOSED to be, run it through Perl with a regex that will convert all EOLS to \n
|
@@ -6,25 +6,27 @@ class RemoteTable
|
|
6
6
|
|
7
7
|
# Yield each row using Roo.
|
8
8
|
def _each
|
9
|
+
# sometimes Roo forgets to require iconv.
|
10
|
+
require 'iconv'
|
9
11
|
require 'roo'
|
10
12
|
|
11
13
|
spreadsheet = roo_class.new local_copy.path, nil, :ignore
|
12
14
|
if sheet
|
13
15
|
spreadsheet.default_sheet = sheet
|
14
16
|
end
|
15
|
-
|
17
|
+
|
16
18
|
first_row = if crop
|
17
19
|
crop.first + 1
|
18
20
|
else
|
19
21
|
skip + 1
|
20
22
|
end
|
21
|
-
|
23
|
+
|
22
24
|
last_row = if crop
|
23
25
|
crop.last
|
24
26
|
else
|
25
27
|
spreadsheet.last_row
|
26
28
|
end
|
27
|
-
|
29
|
+
|
28
30
|
if not headers
|
29
31
|
|
30
32
|
# create an array to represent this row
|
@@ -46,7 +48,7 @@ class RemoteTable
|
|
46
48
|
end
|
47
49
|
|
48
50
|
else
|
49
|
-
|
51
|
+
|
50
52
|
# create a hash to represent this row
|
51
53
|
current_headers = ::ActiveSupport::OrderedHash.new
|
52
54
|
if headers == :first_row
|
data/lib/remote_table/version.rb
CHANGED
data/remote_table.gemspec
CHANGED
@@ -16,9 +16,9 @@ Gem::Specification.new do |s|
|
|
16
16
|
s.files = `git ls-files`.split("\n")
|
17
17
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
18
|
s.require_paths = ["lib"]
|
19
|
-
|
19
|
+
|
20
20
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
21
|
-
s.add_runtime_dependency 'roo', '>= 1.
|
21
|
+
s.add_runtime_dependency 'roo', '>= 1.10.3'
|
22
22
|
s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
|
23
23
|
s.add_runtime_dependency 'i18n' # activesupport?
|
24
24
|
s.add_runtime_dependency 'unix_utils', '>=0.0.8'
|
@@ -26,10 +26,9 @@ Gem::Specification.new do |s|
|
|
26
26
|
s.add_runtime_dependency 'hash_digest'
|
27
27
|
|
28
28
|
s.add_development_dependency 'errata', '>=0.2.0'
|
29
|
-
s.add_development_dependency 'georuby'
|
30
|
-
s.add_development_dependency 'dbf'
|
31
29
|
s.add_development_dependency 'minitest'
|
32
30
|
s.add_development_dependency 'minitest-reporters'
|
33
31
|
s.add_development_dependency 'rake'
|
34
32
|
s.add_development_dependency 'yard'
|
33
|
+
s.add_development_dependency 'pry'
|
35
34
|
end
|
File without changes
|
data/test/data/color.csv
ADDED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/test/test_errata.rb
CHANGED
@@ -50,8 +50,8 @@ describe RemoteTable do
|
|
50
50
|
:encoding => 'windows-1252',
|
51
51
|
:row_xpath => '//table[2]//table[1]//tr[3]//tr',
|
52
52
|
:column_xpath => 'td',
|
53
|
-
:errata =>
|
54
|
-
:responder => AircraftGuru.new
|
53
|
+
:errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
54
|
+
:responder => AircraftGuru.new)
|
55
55
|
g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
|
56
56
|
g1.wont_be_nil
|
57
57
|
g1['Manufacturer'].must_equal 'GULFSTREAM AEROSPACE'
|
data/test/test_local.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'helper'
|
3
|
+
|
4
|
+
describe RemoteTable do
|
5
|
+
describe 'used on local files' do
|
6
|
+
it "understands relative paths" do
|
7
|
+
RemoteTable.new('test/data/color.csv').to_a.must_equal RemoteTable.new(File.expand_path('../../test/data/color.csv', __FILE__)).to_a
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
data/test/test_old_syntax.rb
CHANGED
@@ -67,19 +67,6 @@ describe RemoteTable do
|
|
67
67
|
t.rows.last['Model'].must_equal 'EZ King Cobra'
|
68
68
|
end
|
69
69
|
|
70
|
-
it "hash rows without paying attention to order" do
|
71
|
-
x = ActiveSupport::OrderedHash.new
|
72
|
-
x[:a] = 1
|
73
|
-
x[:b] = 2
|
74
|
-
|
75
|
-
y = ActiveSupport::OrderedHash.new
|
76
|
-
y[:b] = 2
|
77
|
-
y[:a] = 1
|
78
|
-
|
79
|
-
Marshal.dump(x).wont_equal Marshal.dump(y)
|
80
|
-
RemoteTable::Transform.row_hash(y).must_equal RemoteTable::Transform.row_hash(x)
|
81
|
-
end
|
82
|
-
|
83
70
|
it "open a Google Docs url (as a CSV)" do
|
84
71
|
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
85
72
|
t.rows.first['PAD district name'].must_equal 'Gulf Coast'
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe RemoteTable do
|
4
|
+
describe ":parser option" do
|
5
|
+
it "takes a parser object that responds to #parse(row) and returns an array of rows" do
|
6
|
+
class GradeRangeParser
|
7
|
+
def parse(row)
|
8
|
+
row['range'].split('-').map do |subrange|
|
9
|
+
virtual_row = row.dup
|
10
|
+
virtual_row.delete 'range'
|
11
|
+
virtual_row['grade'] = subrange
|
12
|
+
virtual_row
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
t = RemoteTable.new "file://#{File.expand_path('../data/ranges.csv', __FILE__)}", parser: GradeRangeParser.new
|
17
|
+
t[0].must_equal 'description' => 'great', 'grade' => 'A'
|
18
|
+
t[1].must_equal 'description' => 'great', 'grade' => 'B'
|
19
|
+
t[2].must_equal 'description' => 'ok', 'grade' => 'C'
|
20
|
+
t[3].must_equal 'description' => 'bad', 'grade' => 'D'
|
21
|
+
t[4].must_equal 'description' => 'bad', 'grade' => 'F'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/test/test_remote.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'helper'
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
describe RemoteTable do
|
6
|
+
describe 'used on remote files' do
|
7
|
+
it "open an XLSX" do
|
8
|
+
t = RemoteTable.new 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
9
|
+
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "does its best to download urls without http://" do
|
13
|
+
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
14
|
+
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
15
|
+
end
|
16
|
+
|
17
|
+
it "add a row hash to every row" do
|
18
|
+
t = RemoteTable.new(:url => 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
19
|
+
t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
|
20
|
+
end
|
21
|
+
|
22
|
+
it "open a google doc" do
|
23
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
24
|
+
t[0]['name'].must_equal 'Seamus Abshere'
|
25
|
+
end
|
26
|
+
|
27
|
+
it "open a csv with custom headers" do
|
28
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
|
29
|
+
t[0]['col2'].must_equal 'name'
|
30
|
+
t[1]['col2'].must_equal 'Seamus Abshere'
|
31
|
+
end
|
32
|
+
|
33
|
+
it "return an ordered hash" do
|
34
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
35
|
+
t[0].class.must_equal ::ActiveSupport::OrderedHash
|
36
|
+
end
|
37
|
+
|
38
|
+
it "open a csv inside a zip file" do
|
39
|
+
t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
40
|
+
:filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
41
|
+
:skip => 1,
|
42
|
+
:select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
|
43
|
+
t[0]['LDGV'].must_equal '9.09%'
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'not blow up if each is called twice' do
|
47
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
48
|
+
count = 0
|
49
|
+
t.each { |row| count += 1 }
|
50
|
+
first_run = count
|
51
|
+
t.each { |row| count += 1}
|
52
|
+
count.must_equal first_run*2
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'allow itself to be cleared for save memory' do
|
56
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
57
|
+
t.to_a
|
58
|
+
t.send(:cache).length.must_be :>, 0
|
59
|
+
t.free
|
60
|
+
t.send(:cache).length.must_equal 0
|
61
|
+
end
|
62
|
+
|
63
|
+
# fixes ArgumentError: invalid byte sequence in UTF-8
|
64
|
+
it %{safely strip soft hyphens and read windows-1252 html} do
|
65
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
|
66
|
+
t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
|
67
|
+
end
|
68
|
+
|
69
|
+
it %{transliterate characters from ISO-8859-1} do
|
70
|
+
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
|
71
|
+
t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
|
72
|
+
end
|
73
|
+
|
74
|
+
it %{read xml with css selectors} do
|
75
|
+
t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
|
76
|
+
/(AM|PM)/.match(t[0][0]).wont_equal nil
|
77
|
+
end
|
78
|
+
|
79
|
+
it %{optionally stream rows instead of caching them} do
|
80
|
+
t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
|
81
|
+
time1 = t[0][0]
|
82
|
+
/\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
|
83
|
+
sleep 1
|
84
|
+
time2 = t[0][0]
|
85
|
+
time1.wont_equal time2
|
86
|
+
end
|
87
|
+
|
88
|
+
it %{recode as UTF-8 even ISO-8859-1 (or any other encoding)} do
|
89
|
+
t = RemoteTable.new 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';', :encoding => 'ISO-8859-1'
|
90
|
+
t[1][0].must_equal %{ÅLAND ISLANDS}
|
91
|
+
end
|
92
|
+
|
93
|
+
it %{parse a big CSV that is not UTF-8} do
|
94
|
+
t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false#, :encoding => 'UTF-8'
|
95
|
+
t[0][1].must_equal 'Goroka'
|
96
|
+
end
|
97
|
+
|
98
|
+
it "read only certain rows of an XLSX" do
|
99
|
+
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
|
100
|
+
t[0][0].must_equal "Permissioning and access groups for all content"
|
101
|
+
t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
|
102
|
+
|
103
|
+
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
|
104
|
+
t[0]['col1'].must_equal "Permissioning and access groups for all content"
|
105
|
+
t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
|
106
|
+
end
|
107
|
+
|
108
|
+
it "doesn't get confused by :format => nil" do
|
109
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
|
110
|
+
t[0]['Class'].must_equal 'TWO SEATERS'
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
data/test/test_remote_table.rb
CHANGED
@@ -3,130 +3,32 @@ require 'helper'
|
|
3
3
|
require 'tempfile'
|
4
4
|
|
5
5
|
describe RemoteTable do
|
6
|
-
it "open an XLSX" do
|
7
|
-
t = RemoteTable.new 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
8
|
-
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
9
|
-
end
|
10
|
-
|
11
6
|
it "doesn't screw up UTF-8" do
|
12
|
-
t = RemoteTable.new "file://#{File.expand_path('../
|
7
|
+
t = RemoteTable.new "file://#{File.expand_path('../data/airports.utf8.csv', __FILE__)}"
|
13
8
|
t[3]['city'].must_equal "Puerto Inírida"
|
14
9
|
end
|
15
10
|
|
16
11
|
it "likes paths as much as urls for local files" do
|
17
|
-
by_url = RemoteTable.new "file://#{File.expand_path('../
|
18
|
-
by_path = RemoteTable.new File.expand_path('../
|
12
|
+
by_url = RemoteTable.new "file://#{File.expand_path('../data/airports.utf8.csv', __FILE__)}"
|
13
|
+
by_path = RemoteTable.new File.expand_path('../data/airports.utf8.csv', __FILE__)
|
19
14
|
by_path.rows.must_equal by_url.rows
|
20
15
|
end
|
21
16
|
|
22
|
-
it "does its best to download urls without http://" do
|
23
|
-
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
24
|
-
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
25
|
-
end
|
26
|
-
|
27
|
-
it "add a row hash to every row" do
|
28
|
-
t = RemoteTable.new(:url => 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
29
|
-
t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
|
30
|
-
end
|
31
|
-
|
32
|
-
it "open a google doc" do
|
33
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
34
|
-
t[0]['name'].must_equal 'Seamus Abshere'
|
35
|
-
end
|
36
|
-
|
37
|
-
it "open a csv with custom headers" do
|
38
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
|
39
|
-
t[0]['col2'].must_equal 'name'
|
40
|
-
t[1]['col2'].must_equal 'Seamus Abshere'
|
41
|
-
end
|
42
|
-
|
43
|
-
it "open a yaml" do
|
44
|
-
t = RemoteTable.new "file://#{File.expand_path('../fixtures/data.yml', __FILE__)}"
|
45
|
-
t[0]['name'].must_equal 'Seamus Abshere'
|
46
|
-
t[0]['city'].must_equal 'Madison'
|
47
|
-
t[1]['name'].must_equal 'Derek Kastner'
|
48
|
-
t[1]['city'].must_equal 'Lansing'
|
49
|
-
end
|
50
|
-
|
51
|
-
it "return an ordered hash" do
|
52
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
53
|
-
t[0].class.must_equal ::ActiveSupport::OrderedHash
|
54
|
-
end
|
55
|
-
|
56
|
-
it "pass through fastercsv options" do
|
57
|
-
f = Tempfile.new 'pass-through-fastercsv-options'
|
58
|
-
f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
|
59
|
-
f.flush
|
60
|
-
t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil # this should really be "headers => false"
|
61
|
-
t[0][2].must_equal %{Body example with a <a href="">link</a>}
|
62
|
-
f.close
|
63
|
-
end
|
64
|
-
|
65
|
-
it "open a csv inside a zip file" do
|
66
|
-
t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
67
|
-
:filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
68
|
-
:skip => 1,
|
69
|
-
:select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
|
70
|
-
t[0]['LDGV'].must_equal '9.09%'
|
71
|
-
end
|
72
|
-
|
73
|
-
it 'not blow up if each is called twice' do
|
74
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
75
|
-
count = 0
|
76
|
-
t.each { |row| count += 1 }
|
77
|
-
first_run = count
|
78
|
-
t.each { |row| count += 1}
|
79
|
-
count.must_equal first_run*2
|
80
|
-
end
|
81
|
-
|
82
|
-
it 'allow itself to be cleared for save memory' do
|
83
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
84
|
-
t.to_a
|
85
|
-
t.send(:cache).length.must_be :>, 0
|
86
|
-
t.free
|
87
|
-
t.send(:cache).length.must_equal 0
|
88
|
-
end
|
89
|
-
|
90
|
-
# fixes ArgumentError: invalid byte sequence in UTF-8
|
91
|
-
it %{safely strip soft hyphens and read windows-1252 html} do
|
92
|
-
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
|
93
|
-
t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
|
94
|
-
end
|
95
|
-
|
96
|
-
it %{transliterate characters from ISO-8859-1} do
|
97
|
-
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
|
98
|
-
t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
|
99
|
-
end
|
100
|
-
|
101
|
-
it %{read xml with css selectors} do
|
102
|
-
t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
|
103
|
-
/(AM|PM)/.match(t[0][0]).wont_equal nil
|
104
|
-
end
|
105
|
-
|
106
|
-
it %{optionally stream rows instead of caching them} do
|
107
|
-
t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
|
108
|
-
time1 = t[0][0]
|
109
|
-
/\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
|
110
|
-
sleep 1
|
111
|
-
time2 = t[0][0]
|
112
|
-
time1.wont_equal time2
|
113
|
-
end
|
114
|
-
|
115
17
|
{
|
116
|
-
# IMPOSSIBLE "../
|
117
|
-
"../
|
118
|
-
"../
|
119
|
-
"../
|
120
|
-
"../
|
121
|
-
"../
|
122
|
-
# TODO "../
|
123
|
-
# TODO "../
|
124
|
-
# TODO "../
|
125
|
-
"../
|
126
|
-
"../
|
127
|
-
"../
|
128
|
-
"../
|
129
|
-
"../
|
18
|
+
# IMPOSSIBLE "../data/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls" => {:format=>"xls", :encoding=>"binary"},
|
19
|
+
"../data/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx" => {:format=>"xlsx"},
|
20
|
+
"../data/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls" => {:format=>"xls"},
|
21
|
+
"../data/list-en1-semic-3.neooffice.binary.ods" => {:format=>"ods"},
|
22
|
+
"../data/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64" => {:format=>"fixed_width", :encoding=>"iso-8859-1", :schema => [['name', 63, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
|
23
|
+
"../data/list-en1-semic-3.neooffice.utf-8.fixed_width-62" => {:format=>"fixed_width", :schema => [['name', 61, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
|
24
|
+
# TODO "../data/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html" => {:format=>"html" },
|
25
|
+
# TODO "../data/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html" => {:format=>"html", :encoding=>"iso-8859-1"},
|
26
|
+
# TODO "../data/list-en1-semic-3.neooffice.utf-8.html" => {:format=>"html" },
|
27
|
+
"../data/list-en1-semic-3.neooffice.utf-8.xml" => {:format=>"xml", :row_css=>'Row', :column_css => 'Data', :select => proc { |row| row[1].to_s =~ /[A-Z]{2}/ }},
|
28
|
+
"../data/list-en1-semic-3.neooffice.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
|
29
|
+
"../data/list-en1-semic-3.original.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
|
30
|
+
"../data/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma" => {:format=>"csv", :encoding=>"MACROMAN"}, # comma because no option in excel
|
31
|
+
"../data/list-en1-semic-3.neooffice.utf-8.csv" => {:format=>"csv", :delimiter => ';'}
|
130
32
|
}.each do |k, v|
|
131
33
|
it %{open #{k} with encoding #{v[:encoding] || 'default'}} do
|
132
34
|
options = v.merge(:headers => false, :skip => 2)
|
@@ -141,57 +43,20 @@ describe RemoteTable do
|
|
141
43
|
end
|
142
44
|
end
|
143
45
|
|
144
|
-
it
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
t[0][1].must_equal 'Goroka'
|
152
|
-
end
|
153
|
-
|
154
|
-
it "read only certain rows of an XLSX" do
|
155
|
-
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
|
156
|
-
t[0][0].must_equal "Permissioning and access groups for all content"
|
157
|
-
t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
|
158
|
-
|
159
|
-
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
|
160
|
-
t[0]['col1'].must_equal "Permissioning and access groups for all content"
|
161
|
-
t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
|
162
|
-
end
|
163
|
-
|
164
|
-
it "doesn't get confused by :format => nil" do
|
165
|
-
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
|
166
|
-
t[0]['Class'].must_equal 'TWO SEATERS'
|
167
|
-
end
|
168
|
-
|
169
|
-
{
|
170
|
-
'foo.ods' => :ods,
|
171
|
-
'foo.open_office' => :ods,
|
172
|
-
'foo.xlsx' => :xlsx,
|
173
|
-
'foo.excelx' => :xlsx,
|
174
|
-
'foo.xls' => :xls,
|
175
|
-
'foo.excel' => :xls,
|
176
|
-
'foo.csv' => :delimited,
|
177
|
-
'foo.tsv' => :delimited,
|
178
|
-
'foo.delimited' => :delimited,
|
179
|
-
'foo.fixed_width' => :fixed_width,
|
180
|
-
'foo.htm' => :html,
|
181
|
-
'foo.html' => :html,
|
182
|
-
'foo.xml' => :xml,
|
183
|
-
'foo.yaml' => :yaml,
|
184
|
-
'foo.yml' => :yaml,
|
185
|
-
'foo.shp' => :shp
|
186
|
-
}.each do |basename, format|
|
187
|
-
it "detects the #{format} format from the filename #{basename}" do
|
188
|
-
RemoteTable.guess_format(basename).must_equal format
|
189
|
-
end
|
46
|
+
it "pass through fastercsv options" do
|
47
|
+
f = Tempfile.new 'pass-through-fastercsv-options'
|
48
|
+
f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
|
49
|
+
f.flush
|
50
|
+
t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil # this should really be "headers => false"
|
51
|
+
t[0][2].must_equal %{Body example with a <a href="">link</a>}
|
52
|
+
f.close
|
190
53
|
end
|
191
54
|
|
192
|
-
it "
|
193
|
-
|
194
|
-
|
195
|
-
|
55
|
+
it "open a yaml" do
|
56
|
+
t = RemoteTable.new "file://#{File.expand_path('../data/data.yml', __FILE__)}"
|
57
|
+
t[0]['name'].must_equal 'Seamus Abshere'
|
58
|
+
t[0]['city'].must_equal 'Madison'
|
59
|
+
t[1]['name'].must_equal 'Derek Kastner'
|
60
|
+
t[1]['city'].must_equal 'Lansing'
|
196
61
|
end
|
197
62
|
end
|