remote_table 2.1.2 → 3.0.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +12 -3
- data/README.markdown +1 -8
- data/lib/remote_table.rb +72 -87
- data/lib/remote_table/fixed_width.rb +5 -5
- data/lib/remote_table/local_copy.rb +1 -1
- data/lib/remote_table/plaintext.rb +3 -3
- data/lib/remote_table/processed_by_roo.rb +6 -4
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +3 -4
- data/test/{support → data}/airports.utf8.csv +0 -0
- data/test/data/color.csv +3 -0
- data/test/{fixtures → data}/data.yml +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.binary.ods +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.iso-8859-1.csv +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64 +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.csv +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.fixed_width-62 +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.html +0 -0
- data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.xml +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma +0 -0
- data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html +0 -0
- data/test/{support → data}/list-en1-semic-3.original.iso-8859-1.csv +0 -0
- data/test/data/ranges.csv +4 -0
- data/test/test_errata.rb +2 -2
- data/test/test_local.rb +10 -0
- data/test/test_old_syntax.rb +0 -13
- data/test/test_parser.rb +24 -0
- data/test/test_remote.rb +113 -0
- data/test/test_remote_table.rb +30 -165
- data/test/test_transpose.rb +11 -0
- metadata +86 -66
- checksums.yaml +0 -15
- data/lib/remote_table/shp.rb +0 -30
- data/lib/remote_table/transformer.rb +0 -29
- data/test/test_old_transform.rb +0 -47
- data/test/test_shapefile.rb +0 -13
@@ -78,7 +78,7 @@ class RemoteTable
|
|
78
78
|
def encoded_io
|
79
79
|
@encoded_io || @encoded_io_mutex.synchronize do
|
80
80
|
@encoded_io ||= if ::RUBY_VERSION >= '1.9'
|
81
|
-
::File.open path, 'rb', :internal_encoding => t.
|
81
|
+
::File.open path, 'rb', :internal_encoding => t.encoding, :external_encoding => RemoteTable::EXTERNAL_ENCODING
|
82
82
|
else
|
83
83
|
::File.open path, 'rb'
|
84
84
|
end
|
@@ -25,7 +25,7 @@ class RemoteTable
|
|
25
25
|
|
26
26
|
# Remove bytes that are both useless and harmful in the vast majority of cases.
|
27
27
|
def delete_harmful!
|
28
|
-
harmful = [ Plaintext.soft_hyphen(
|
28
|
+
harmful = [ Plaintext.soft_hyphen(encoding), UTF8_BOM ]
|
29
29
|
local_copy.in_place :perl, "s/#{harmful.join('//g; s/')}//g"
|
30
30
|
end
|
31
31
|
|
@@ -35,12 +35,12 @@ class RemoteTable
|
|
35
35
|
# iconv -c -t UTF-8//TRANSLIT -f WINDOWS-1252
|
36
36
|
def transliterate_whole_file_to_utf8!
|
37
37
|
if ::UnixUtils.available?('iconv')
|
38
|
-
local_copy.in_place :iconv, RemoteTable::EXTERNAL_ENCODING_ICONV,
|
38
|
+
local_copy.in_place :iconv, RemoteTable::EXTERNAL_ENCODING_ICONV, encoding
|
39
39
|
else
|
40
40
|
::Kernel.warn %{[remote_table] iconv not available in your $PATH, not performing transliteration}
|
41
41
|
end
|
42
42
|
# now that we've force-transliterated to UTF-8, act as though this is what the user had specified
|
43
|
-
@
|
43
|
+
@encoding = RemoteTable::EXTERNAL_ENCODING
|
44
44
|
end
|
45
45
|
|
46
46
|
# No matter what the EOL are SUPPOSED to be, run it through Perl with a regex that will convert all EOLS to \n
|
@@ -6,25 +6,27 @@ class RemoteTable
|
|
6
6
|
|
7
7
|
# Yield each row using Roo.
|
8
8
|
def _each
|
9
|
+
# sometimes Roo forgets to require iconv.
|
10
|
+
require 'iconv'
|
9
11
|
require 'roo'
|
10
12
|
|
11
13
|
spreadsheet = roo_class.new local_copy.path, nil, :ignore
|
12
14
|
if sheet
|
13
15
|
spreadsheet.default_sheet = sheet
|
14
16
|
end
|
15
|
-
|
17
|
+
|
16
18
|
first_row = if crop
|
17
19
|
crop.first + 1
|
18
20
|
else
|
19
21
|
skip + 1
|
20
22
|
end
|
21
|
-
|
23
|
+
|
22
24
|
last_row = if crop
|
23
25
|
crop.last
|
24
26
|
else
|
25
27
|
spreadsheet.last_row
|
26
28
|
end
|
27
|
-
|
29
|
+
|
28
30
|
if not headers
|
29
31
|
|
30
32
|
# create an array to represent this row
|
@@ -46,7 +48,7 @@ class RemoteTable
|
|
46
48
|
end
|
47
49
|
|
48
50
|
else
|
49
|
-
|
51
|
+
|
50
52
|
# create a hash to represent this row
|
51
53
|
current_headers = ::ActiveSupport::OrderedHash.new
|
52
54
|
if headers == :first_row
|
data/lib/remote_table/version.rb
CHANGED
data/remote_table.gemspec
CHANGED
@@ -16,9 +16,9 @@ Gem::Specification.new do |s|
|
|
16
16
|
s.files = `git ls-files`.split("\n")
|
17
17
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
18
|
s.require_paths = ["lib"]
|
19
|
-
|
19
|
+
|
20
20
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
21
|
-
s.add_runtime_dependency 'roo', '>= 1.
|
21
|
+
s.add_runtime_dependency 'roo', '>= 1.10.3'
|
22
22
|
s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
|
23
23
|
s.add_runtime_dependency 'i18n' # activesupport?
|
24
24
|
s.add_runtime_dependency 'unix_utils', '>=0.0.8'
|
@@ -26,10 +26,9 @@ Gem::Specification.new do |s|
|
|
26
26
|
s.add_runtime_dependency 'hash_digest'
|
27
27
|
|
28
28
|
s.add_development_dependency 'errata', '>=0.2.0'
|
29
|
-
s.add_development_dependency 'georuby'
|
30
|
-
s.add_development_dependency 'dbf'
|
31
29
|
s.add_development_dependency 'minitest'
|
32
30
|
s.add_development_dependency 'minitest-reporters'
|
33
31
|
s.add_development_dependency 'rake'
|
34
32
|
s.add_development_dependency 'yard'
|
33
|
+
s.add_development_dependency 'pry'
|
35
34
|
end
|
File without changes
|
data/test/data/color.csv
ADDED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/test/test_errata.rb
CHANGED
@@ -50,8 +50,8 @@ describe RemoteTable do
|
|
50
50
|
:encoding => 'windows-1252',
|
51
51
|
:row_xpath => '//table[2]//table[1]//tr[3]//tr',
|
52
52
|
:column_xpath => 'td',
|
53
|
-
:errata =>
|
54
|
-
:responder => AircraftGuru.new
|
53
|
+
:errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
54
|
+
:responder => AircraftGuru.new)
|
55
55
|
g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
|
56
56
|
g1.wont_be_nil
|
57
57
|
g1['Manufacturer'].must_equal 'GULFSTREAM AEROSPACE'
|
data/test/test_local.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'helper'
|
3
|
+
|
4
|
+
describe RemoteTable do
|
5
|
+
describe 'used on local files' do
|
6
|
+
it "understands relative paths" do
|
7
|
+
RemoteTable.new('test/data/color.csv').to_a.must_equal RemoteTable.new(File.expand_path('../../test/data/color.csv', __FILE__)).to_a
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
data/test/test_old_syntax.rb
CHANGED
@@ -67,19 +67,6 @@ describe RemoteTable do
|
|
67
67
|
t.rows.last['Model'].must_equal 'EZ King Cobra'
|
68
68
|
end
|
69
69
|
|
70
|
-
it "hash rows without paying attention to order" do
|
71
|
-
x = ActiveSupport::OrderedHash.new
|
72
|
-
x[:a] = 1
|
73
|
-
x[:b] = 2
|
74
|
-
|
75
|
-
y = ActiveSupport::OrderedHash.new
|
76
|
-
y[:b] = 2
|
77
|
-
y[:a] = 1
|
78
|
-
|
79
|
-
Marshal.dump(x).wont_equal Marshal.dump(y)
|
80
|
-
RemoteTable::Transform.row_hash(y).must_equal RemoteTable::Transform.row_hash(x)
|
81
|
-
end
|
82
|
-
|
83
70
|
it "open a Google Docs url (as a CSV)" do
|
84
71
|
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
85
72
|
t.rows.first['PAD district name'].must_equal 'Gulf Coast'
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe RemoteTable do
|
4
|
+
describe ":parser option" do
|
5
|
+
it "takes a parser object that responds to #parse(row) and returns an array of rows" do
|
6
|
+
class GradeRangeParser
|
7
|
+
def parse(row)
|
8
|
+
row['range'].split('-').map do |subrange|
|
9
|
+
virtual_row = row.dup
|
10
|
+
virtual_row.delete 'range'
|
11
|
+
virtual_row['grade'] = subrange
|
12
|
+
virtual_row
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
t = RemoteTable.new "file://#{File.expand_path('../data/ranges.csv', __FILE__)}", parser: GradeRangeParser.new
|
17
|
+
t[0].must_equal 'description' => 'great', 'grade' => 'A'
|
18
|
+
t[1].must_equal 'description' => 'great', 'grade' => 'B'
|
19
|
+
t[2].must_equal 'description' => 'ok', 'grade' => 'C'
|
20
|
+
t[3].must_equal 'description' => 'bad', 'grade' => 'D'
|
21
|
+
t[4].must_equal 'description' => 'bad', 'grade' => 'F'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/test/test_remote.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'helper'
|
3
|
+
require 'tempfile'
|
4
|
+
|
5
|
+
describe RemoteTable do
|
6
|
+
describe 'used on remote files' do
|
7
|
+
it "open an XLSX" do
|
8
|
+
t = RemoteTable.new 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
9
|
+
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "does its best to download urls without http://" do
|
13
|
+
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
14
|
+
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
15
|
+
end
|
16
|
+
|
17
|
+
it "add a row hash to every row" do
|
18
|
+
t = RemoteTable.new(:url => 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
19
|
+
t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
|
20
|
+
end
|
21
|
+
|
22
|
+
it "open a google doc" do
|
23
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
24
|
+
t[0]['name'].must_equal 'Seamus Abshere'
|
25
|
+
end
|
26
|
+
|
27
|
+
it "open a csv with custom headers" do
|
28
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
|
29
|
+
t[0]['col2'].must_equal 'name'
|
30
|
+
t[1]['col2'].must_equal 'Seamus Abshere'
|
31
|
+
end
|
32
|
+
|
33
|
+
it "return an ordered hash" do
|
34
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
35
|
+
t[0].class.must_equal ::ActiveSupport::OrderedHash
|
36
|
+
end
|
37
|
+
|
38
|
+
it "open a csv inside a zip file" do
|
39
|
+
t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
40
|
+
:filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
41
|
+
:skip => 1,
|
42
|
+
:select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
|
43
|
+
t[0]['LDGV'].must_equal '9.09%'
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'not blow up if each is called twice' do
|
47
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
48
|
+
count = 0
|
49
|
+
t.each { |row| count += 1 }
|
50
|
+
first_run = count
|
51
|
+
t.each { |row| count += 1}
|
52
|
+
count.must_equal first_run*2
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'allow itself to be cleared for save memory' do
|
56
|
+
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
57
|
+
t.to_a
|
58
|
+
t.send(:cache).length.must_be :>, 0
|
59
|
+
t.free
|
60
|
+
t.send(:cache).length.must_equal 0
|
61
|
+
end
|
62
|
+
|
63
|
+
# fixes ArgumentError: invalid byte sequence in UTF-8
|
64
|
+
it %{safely strip soft hyphens and read windows-1252 html} do
|
65
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
|
66
|
+
t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
|
67
|
+
end
|
68
|
+
|
69
|
+
it %{transliterate characters from ISO-8859-1} do
|
70
|
+
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
|
71
|
+
t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
|
72
|
+
end
|
73
|
+
|
74
|
+
it %{read xml with css selectors} do
|
75
|
+
t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
|
76
|
+
/(AM|PM)/.match(t[0][0]).wont_equal nil
|
77
|
+
end
|
78
|
+
|
79
|
+
it %{optionally stream rows instead of caching them} do
|
80
|
+
t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
|
81
|
+
time1 = t[0][0]
|
82
|
+
/\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
|
83
|
+
sleep 1
|
84
|
+
time2 = t[0][0]
|
85
|
+
time1.wont_equal time2
|
86
|
+
end
|
87
|
+
|
88
|
+
it %{recode as UTF-8 even ISO-8859-1 (or any other encoding)} do
|
89
|
+
t = RemoteTable.new 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';', :encoding => 'ISO-8859-1'
|
90
|
+
t[1][0].must_equal %{ÅLAND ISLANDS}
|
91
|
+
end
|
92
|
+
|
93
|
+
it %{parse a big CSV that is not UTF-8} do
|
94
|
+
t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false#, :encoding => 'UTF-8'
|
95
|
+
t[0][1].must_equal 'Goroka'
|
96
|
+
end
|
97
|
+
|
98
|
+
it "read only certain rows of an XLSX" do
|
99
|
+
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
|
100
|
+
t[0][0].must_equal "Permissioning and access groups for all content"
|
101
|
+
t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
|
102
|
+
|
103
|
+
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
|
104
|
+
t[0]['col1'].must_equal "Permissioning and access groups for all content"
|
105
|
+
t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
|
106
|
+
end
|
107
|
+
|
108
|
+
it "doesn't get confused by :format => nil" do
|
109
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
|
110
|
+
t[0]['Class'].must_equal 'TWO SEATERS'
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
data/test/test_remote_table.rb
CHANGED
@@ -3,130 +3,32 @@ require 'helper'
|
|
3
3
|
require 'tempfile'
|
4
4
|
|
5
5
|
describe RemoteTable do
|
6
|
-
it "open an XLSX" do
|
7
|
-
t = RemoteTable.new 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
8
|
-
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
9
|
-
end
|
10
|
-
|
11
6
|
it "doesn't screw up UTF-8" do
|
12
|
-
t = RemoteTable.new "file://#{File.expand_path('../
|
7
|
+
t = RemoteTable.new "file://#{File.expand_path('../data/airports.utf8.csv', __FILE__)}"
|
13
8
|
t[3]['city'].must_equal "Puerto Inírida"
|
14
9
|
end
|
15
10
|
|
16
11
|
it "likes paths as much as urls for local files" do
|
17
|
-
by_url = RemoteTable.new "file://#{File.expand_path('../
|
18
|
-
by_path = RemoteTable.new File.expand_path('../
|
12
|
+
by_url = RemoteTable.new "file://#{File.expand_path('../data/airports.utf8.csv', __FILE__)}"
|
13
|
+
by_path = RemoteTable.new File.expand_path('../data/airports.utf8.csv', __FILE__)
|
19
14
|
by_path.rows.must_equal by_url.rows
|
20
15
|
end
|
21
16
|
|
22
|
-
it "does its best to download urls without http://" do
|
23
|
-
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
24
|
-
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
25
|
-
end
|
26
|
-
|
27
|
-
it "add a row hash to every row" do
|
28
|
-
t = RemoteTable.new(:url => 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
29
|
-
t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
|
30
|
-
end
|
31
|
-
|
32
|
-
it "open a google doc" do
|
33
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
34
|
-
t[0]['name'].must_equal 'Seamus Abshere'
|
35
|
-
end
|
36
|
-
|
37
|
-
it "open a csv with custom headers" do
|
38
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
|
39
|
-
t[0]['col2'].must_equal 'name'
|
40
|
-
t[1]['col2'].must_equal 'Seamus Abshere'
|
41
|
-
end
|
42
|
-
|
43
|
-
it "open a yaml" do
|
44
|
-
t = RemoteTable.new "file://#{File.expand_path('../fixtures/data.yml', __FILE__)}"
|
45
|
-
t[0]['name'].must_equal 'Seamus Abshere'
|
46
|
-
t[0]['city'].must_equal 'Madison'
|
47
|
-
t[1]['name'].must_equal 'Derek Kastner'
|
48
|
-
t[1]['city'].must_equal 'Lansing'
|
49
|
-
end
|
50
|
-
|
51
|
-
it "return an ordered hash" do
|
52
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
53
|
-
t[0].class.must_equal ::ActiveSupport::OrderedHash
|
54
|
-
end
|
55
|
-
|
56
|
-
it "pass through fastercsv options" do
|
57
|
-
f = Tempfile.new 'pass-through-fastercsv-options'
|
58
|
-
f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
|
59
|
-
f.flush
|
60
|
-
t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil # this should really be "headers => false"
|
61
|
-
t[0][2].must_equal %{Body example with a <a href="">link</a>}
|
62
|
-
f.close
|
63
|
-
end
|
64
|
-
|
65
|
-
it "open a csv inside a zip file" do
|
66
|
-
t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
67
|
-
:filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
68
|
-
:skip => 1,
|
69
|
-
:select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
|
70
|
-
t[0]['LDGV'].must_equal '9.09%'
|
71
|
-
end
|
72
|
-
|
73
|
-
it 'not blow up if each is called twice' do
|
74
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
75
|
-
count = 0
|
76
|
-
t.each { |row| count += 1 }
|
77
|
-
first_run = count
|
78
|
-
t.each { |row| count += 1}
|
79
|
-
count.must_equal first_run*2
|
80
|
-
end
|
81
|
-
|
82
|
-
it 'allow itself to be cleared for save memory' do
|
83
|
-
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
84
|
-
t.to_a
|
85
|
-
t.send(:cache).length.must_be :>, 0
|
86
|
-
t.free
|
87
|
-
t.send(:cache).length.must_equal 0
|
88
|
-
end
|
89
|
-
|
90
|
-
# fixes ArgumentError: invalid byte sequence in UTF-8
|
91
|
-
it %{safely strip soft hyphens and read windows-1252 html} do
|
92
|
-
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
|
93
|
-
t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
|
94
|
-
end
|
95
|
-
|
96
|
-
it %{transliterate characters from ISO-8859-1} do
|
97
|
-
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
|
98
|
-
t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
|
99
|
-
end
|
100
|
-
|
101
|
-
it %{read xml with css selectors} do
|
102
|
-
t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
|
103
|
-
/(AM|PM)/.match(t[0][0]).wont_equal nil
|
104
|
-
end
|
105
|
-
|
106
|
-
it %{optionally stream rows instead of caching them} do
|
107
|
-
t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
|
108
|
-
time1 = t[0][0]
|
109
|
-
/\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
|
110
|
-
sleep 1
|
111
|
-
time2 = t[0][0]
|
112
|
-
time1.wont_equal time2
|
113
|
-
end
|
114
|
-
|
115
17
|
{
|
116
|
-
# IMPOSSIBLE "../
|
117
|
-
"../
|
118
|
-
"../
|
119
|
-
"../
|
120
|
-
"../
|
121
|
-
"../
|
122
|
-
# TODO "../
|
123
|
-
# TODO "../
|
124
|
-
# TODO "../
|
125
|
-
"../
|
126
|
-
"../
|
127
|
-
"../
|
128
|
-
"../
|
129
|
-
"../
|
18
|
+
# IMPOSSIBLE "../data/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls" => {:format=>"xls", :encoding=>"binary"},
|
19
|
+
"../data/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx" => {:format=>"xlsx"},
|
20
|
+
"../data/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls" => {:format=>"xls"},
|
21
|
+
"../data/list-en1-semic-3.neooffice.binary.ods" => {:format=>"ods"},
|
22
|
+
"../data/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64" => {:format=>"fixed_width", :encoding=>"iso-8859-1", :schema => [['name', 63, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
|
23
|
+
"../data/list-en1-semic-3.neooffice.utf-8.fixed_width-62" => {:format=>"fixed_width", :schema => [['name', 61, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
|
24
|
+
# TODO "../data/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html" => {:format=>"html" },
|
25
|
+
# TODO "../data/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html" => {:format=>"html", :encoding=>"iso-8859-1"},
|
26
|
+
# TODO "../data/list-en1-semic-3.neooffice.utf-8.html" => {:format=>"html" },
|
27
|
+
"../data/list-en1-semic-3.neooffice.utf-8.xml" => {:format=>"xml", :row_css=>'Row', :column_css => 'Data', :select => proc { |row| row[1].to_s =~ /[A-Z]{2}/ }},
|
28
|
+
"../data/list-en1-semic-3.neooffice.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
|
29
|
+
"../data/list-en1-semic-3.original.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
|
30
|
+
"../data/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma" => {:format=>"csv", :encoding=>"MACROMAN"}, # comma because no option in excel
|
31
|
+
"../data/list-en1-semic-3.neooffice.utf-8.csv" => {:format=>"csv", :delimiter => ';'}
|
130
32
|
}.each do |k, v|
|
131
33
|
it %{open #{k} with encoding #{v[:encoding] || 'default'}} do
|
132
34
|
options = v.merge(:headers => false, :skip => 2)
|
@@ -141,57 +43,20 @@ describe RemoteTable do
|
|
141
43
|
end
|
142
44
|
end
|
143
45
|
|
144
|
-
it
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
t[0][1].must_equal 'Goroka'
|
152
|
-
end
|
153
|
-
|
154
|
-
it "read only certain rows of an XLSX" do
|
155
|
-
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
|
156
|
-
t[0][0].must_equal "Permissioning and access groups for all content"
|
157
|
-
t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
|
158
|
-
|
159
|
-
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
|
160
|
-
t[0]['col1'].must_equal "Permissioning and access groups for all content"
|
161
|
-
t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
|
162
|
-
end
|
163
|
-
|
164
|
-
it "doesn't get confused by :format => nil" do
|
165
|
-
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
|
166
|
-
t[0]['Class'].must_equal 'TWO SEATERS'
|
167
|
-
end
|
168
|
-
|
169
|
-
{
|
170
|
-
'foo.ods' => :ods,
|
171
|
-
'foo.open_office' => :ods,
|
172
|
-
'foo.xlsx' => :xlsx,
|
173
|
-
'foo.excelx' => :xlsx,
|
174
|
-
'foo.xls' => :xls,
|
175
|
-
'foo.excel' => :xls,
|
176
|
-
'foo.csv' => :delimited,
|
177
|
-
'foo.tsv' => :delimited,
|
178
|
-
'foo.delimited' => :delimited,
|
179
|
-
'foo.fixed_width' => :fixed_width,
|
180
|
-
'foo.htm' => :html,
|
181
|
-
'foo.html' => :html,
|
182
|
-
'foo.xml' => :xml,
|
183
|
-
'foo.yaml' => :yaml,
|
184
|
-
'foo.yml' => :yaml,
|
185
|
-
'foo.shp' => :shp
|
186
|
-
}.each do |basename, format|
|
187
|
-
it "detects the #{format} format from the filename #{basename}" do
|
188
|
-
RemoteTable.guess_format(basename).must_equal format
|
189
|
-
end
|
46
|
+
it "pass through fastercsv options" do
|
47
|
+
f = Tempfile.new 'pass-through-fastercsv-options'
|
48
|
+
f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
|
49
|
+
f.flush
|
50
|
+
t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil # this should really be "headers => false"
|
51
|
+
t[0][2].must_equal %{Body example with a <a href="">link</a>}
|
52
|
+
f.close
|
190
53
|
end
|
191
54
|
|
192
|
-
it "
|
193
|
-
|
194
|
-
|
195
|
-
|
55
|
+
it "open a yaml" do
|
56
|
+
t = RemoteTable.new "file://#{File.expand_path('../data/data.yml', __FILE__)}"
|
57
|
+
t[0]['name'].must_equal 'Seamus Abshere'
|
58
|
+
t[0]['city'].must_equal 'Madison'
|
59
|
+
t[1]['name'].must_equal 'Derek Kastner'
|
60
|
+
t[1]['city'].must_equal 'Lansing'
|
196
61
|
end
|
197
62
|
end
|