remote_table 2.1.2 → 3.0.0.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/CHANGELOG +12 -3
  2. data/README.markdown +1 -8
  3. data/lib/remote_table.rb +72 -87
  4. data/lib/remote_table/fixed_width.rb +5 -5
  5. data/lib/remote_table/local_copy.rb +1 -1
  6. data/lib/remote_table/plaintext.rb +3 -3
  7. data/lib/remote_table/processed_by_roo.rb +6 -4
  8. data/lib/remote_table/version.rb +1 -1
  9. data/remote_table.gemspec +3 -4
  10. data/test/{support → data}/airports.utf8.csv +0 -0
  11. data/test/data/color.csv +3 -0
  12. data/test/{fixtures → data}/data.yml +0 -0
  13. data/test/{support → data}/list-en1-semic-3.neooffice.binary.ods +0 -0
  14. data/test/{support → data}/list-en1-semic-3.neooffice.iso-8859-1.csv +0 -0
  15. data/test/{support → data}/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64 +0 -0
  16. data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.csv +0 -0
  17. data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.fixed_width-62 +0 -0
  18. data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.html +0 -0
  19. data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.xml +0 -0
  20. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls +0 -0
  21. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls +0 -0
  22. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx +0 -0
  23. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html +0 -0
  24. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma +0 -0
  25. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html +0 -0
  26. data/test/{support → data}/list-en1-semic-3.original.iso-8859-1.csv +0 -0
  27. data/test/data/ranges.csv +4 -0
  28. data/test/test_errata.rb +2 -2
  29. data/test/test_local.rb +10 -0
  30. data/test/test_old_syntax.rb +0 -13
  31. data/test/test_parser.rb +24 -0
  32. data/test/test_remote.rb +113 -0
  33. data/test/test_remote_table.rb +30 -165
  34. data/test/test_transpose.rb +11 -0
  35. metadata +86 -66
  36. checksums.yaml +0 -15
  37. data/lib/remote_table/shp.rb +0 -30
  38. data/lib/remote_table/transformer.rb +0 -29
  39. data/test/test_old_transform.rb +0 -47
  40. data/test/test_shapefile.rb +0 -13
@@ -78,7 +78,7 @@ class RemoteTable
78
78
  def encoded_io
79
79
  @encoded_io || @encoded_io_mutex.synchronize do
80
80
  @encoded_io ||= if ::RUBY_VERSION >= '1.9'
81
- ::File.open path, 'rb', :internal_encoding => t.internal_encoding, :external_encoding => RemoteTable::EXTERNAL_ENCODING
81
+ ::File.open path, 'rb', :internal_encoding => t.encoding, :external_encoding => RemoteTable::EXTERNAL_ENCODING
82
82
  else
83
83
  ::File.open path, 'rb'
84
84
  end
@@ -25,7 +25,7 @@ class RemoteTable
25
25
 
26
26
  # Remove bytes that are both useless and harmful in the vast majority of cases.
27
27
  def delete_harmful!
28
- harmful = [ Plaintext.soft_hyphen(internal_encoding), UTF8_BOM ]
28
+ harmful = [ Plaintext.soft_hyphen(encoding), UTF8_BOM ]
29
29
  local_copy.in_place :perl, "s/#{harmful.join('//g; s/')}//g"
30
30
  end
31
31
 
@@ -35,12 +35,12 @@ class RemoteTable
35
35
  # iconv -c -t UTF-8//TRANSLIT -f WINDOWS-1252
36
36
  def transliterate_whole_file_to_utf8!
37
37
  if ::UnixUtils.available?('iconv')
38
- local_copy.in_place :iconv, RemoteTable::EXTERNAL_ENCODING_ICONV, internal_encoding
38
+ local_copy.in_place :iconv, RemoteTable::EXTERNAL_ENCODING_ICONV, encoding
39
39
  else
40
40
  ::Kernel.warn %{[remote_table] iconv not available in your $PATH, not performing transliteration}
41
41
  end
42
42
  # now that we've force-transliterated to UTF-8, act as though this is what the user had specified
43
- @internal_encoding = RemoteTable::EXTERNAL_ENCODING
43
+ @encoding = RemoteTable::EXTERNAL_ENCODING
44
44
  end
45
45
 
46
46
  # No matter what the EOL are SUPPOSED to be, run it through Perl with a regex that will convert all EOLS to \n
@@ -6,25 +6,27 @@ class RemoteTable
6
6
 
7
7
  # Yield each row using Roo.
8
8
  def _each
9
+ # sometimes Roo forgets to require iconv.
10
+ require 'iconv'
9
11
  require 'roo'
10
12
 
11
13
  spreadsheet = roo_class.new local_copy.path, nil, :ignore
12
14
  if sheet
13
15
  spreadsheet.default_sheet = sheet
14
16
  end
15
-
17
+
16
18
  first_row = if crop
17
19
  crop.first + 1
18
20
  else
19
21
  skip + 1
20
22
  end
21
-
23
+
22
24
  last_row = if crop
23
25
  crop.last
24
26
  else
25
27
  spreadsheet.last_row
26
28
  end
27
-
29
+
28
30
  if not headers
29
31
 
30
32
  # create an array to represent this row
@@ -46,7 +48,7 @@ class RemoteTable
46
48
  end
47
49
 
48
50
  else
49
-
51
+
50
52
  # create a hash to represent this row
51
53
  current_headers = ::ActiveSupport::OrderedHash.new
52
54
  if headers == :first_row
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "2.1.2"
2
+ VERSION = '3.0.0.alpha'
3
3
  end
data/remote_table.gemspec CHANGED
@@ -16,9 +16,9 @@ Gem::Specification.new do |s|
16
16
  s.files = `git ls-files`.split("\n")
17
17
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
18
  s.require_paths = ["lib"]
19
-
19
+
20
20
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
21
- s.add_runtime_dependency 'roo', '>= 1.11'
21
+ s.add_runtime_dependency 'roo', '>= 1.10.3'
22
22
  s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
23
23
  s.add_runtime_dependency 'i18n' # activesupport?
24
24
  s.add_runtime_dependency 'unix_utils', '>=0.0.8'
@@ -26,10 +26,9 @@ Gem::Specification.new do |s|
26
26
  s.add_runtime_dependency 'hash_digest'
27
27
 
28
28
  s.add_development_dependency 'errata', '>=0.2.0'
29
- s.add_development_dependency 'georuby'
30
- s.add_development_dependency 'dbf'
31
29
  s.add_development_dependency 'minitest'
32
30
  s.add_development_dependency 'minitest-reporters'
33
31
  s.add_development_dependency 'rake'
34
32
  s.add_development_dependency 'yard'
33
+ s.add_development_dependency 'pry'
35
34
  end
File without changes
@@ -0,0 +1,3 @@
1
+ en,es,ru
2
+ green,verde,зеленый
3
+ red,rojo,красный
File without changes
@@ -0,0 +1,4 @@
1
+ description,range
2
+ great,A-B
3
+ ok,C
4
+ bad,D-F
data/test/test_errata.rb CHANGED
@@ -50,8 +50,8 @@ describe RemoteTable do
50
50
  :encoding => 'windows-1252',
51
51
  :row_xpath => '//table[2]//table[1]//tr[3]//tr',
52
52
  :column_xpath => 'td',
53
- :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
54
- :responder => AircraftGuru.new }
53
+ :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
54
+ :responder => AircraftGuru.new)
55
55
  g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
56
56
  g1.wont_be_nil
57
57
  g1['Manufacturer'].must_equal 'GULFSTREAM AEROSPACE'
@@ -0,0 +1,10 @@
1
+ # encoding: utf-8
2
+ require 'helper'
3
+
4
+ describe RemoteTable do
5
+ describe 'used on local files' do
6
+ it "understands relative paths" do
7
+ RemoteTable.new('test/data/color.csv').to_a.must_equal RemoteTable.new(File.expand_path('../../test/data/color.csv', __FILE__)).to_a
8
+ end
9
+ end
10
+ end
@@ -67,19 +67,6 @@ describe RemoteTable do
67
67
  t.rows.last['Model'].must_equal 'EZ King Cobra'
68
68
  end
69
69
 
70
- it "hash rows without paying attention to order" do
71
- x = ActiveSupport::OrderedHash.new
72
- x[:a] = 1
73
- x[:b] = 2
74
-
75
- y = ActiveSupport::OrderedHash.new
76
- y[:b] = 2
77
- y[:a] = 1
78
-
79
- Marshal.dump(x).wont_equal Marshal.dump(y)
80
- RemoteTable::Transform.row_hash(y).must_equal RemoteTable::Transform.row_hash(x)
81
- end
82
-
83
70
  it "open a Google Docs url (as a CSV)" do
84
71
  t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
85
72
  t.rows.first['PAD district name'].must_equal 'Gulf Coast'
@@ -0,0 +1,24 @@
1
+ require 'helper'
2
+
3
+ describe RemoteTable do
4
+ describe ":parser option" do
5
+ it "takes a parser object that responds to #parse(row) and returns an array of rows" do
6
+ class GradeRangeParser
7
+ def parse(row)
8
+ row['range'].split('-').map do |subrange|
9
+ virtual_row = row.dup
10
+ virtual_row.delete 'range'
11
+ virtual_row['grade'] = subrange
12
+ virtual_row
13
+ end
14
+ end
15
+ end
16
+ t = RemoteTable.new "file://#{File.expand_path('../data/ranges.csv', __FILE__)}", parser: GradeRangeParser.new
17
+ t[0].must_equal 'description' => 'great', 'grade' => 'A'
18
+ t[1].must_equal 'description' => 'great', 'grade' => 'B'
19
+ t[2].must_equal 'description' => 'ok', 'grade' => 'C'
20
+ t[3].must_equal 'description' => 'bad', 'grade' => 'D'
21
+ t[4].must_equal 'description' => 'bad', 'grade' => 'F'
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,113 @@
1
+ # encoding: utf-8
2
+ require 'helper'
3
+ require 'tempfile'
4
+
5
+ describe RemoteTable do
6
+ describe 'used on remote files' do
7
+ it "open an XLSX" do
8
+ t = RemoteTable.new 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
9
+ t[5]["Requirements"].must_equal "Secure encryption of all data"
10
+ end
11
+
12
+ it "does its best to download urls without http://" do
13
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
14
+ t[5]["Requirements"].must_equal "Secure encryption of all data"
15
+ end
16
+
17
+ it "add a row hash to every row" do
18
+ t = RemoteTable.new(:url => 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
19
+ t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
20
+ end
21
+
22
+ it "open a google doc" do
23
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
24
+ t[0]['name'].must_equal 'Seamus Abshere'
25
+ end
26
+
27
+ it "open a csv with custom headers" do
28
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
29
+ t[0]['col2'].must_equal 'name'
30
+ t[1]['col2'].must_equal 'Seamus Abshere'
31
+ end
32
+
33
+ it "return an ordered hash" do
34
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
35
+ t[0].class.must_equal ::ActiveSupport::OrderedHash
36
+ end
37
+
38
+ it "open a csv inside a zip file" do
39
+ t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
40
+ :filename => 'Annex Tables/Annex 3/Table A-93.csv',
41
+ :skip => 1,
42
+ :select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
43
+ t[0]['LDGV'].must_equal '9.09%'
44
+ end
45
+
46
+ it 'not blow up if each is called twice' do
47
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
48
+ count = 0
49
+ t.each { |row| count += 1 }
50
+ first_run = count
51
+ t.each { |row| count += 1}
52
+ count.must_equal first_run*2
53
+ end
54
+
55
+ it 'allow itself to be cleared for save memory' do
56
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
57
+ t.to_a
58
+ t.send(:cache).length.must_be :>, 0
59
+ t.free
60
+ t.send(:cache).length.must_equal 0
61
+ end
62
+
63
+ # fixes ArgumentError: invalid byte sequence in UTF-8
64
+ it %{safely strip soft hyphens and read windows-1252 html} do
65
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
66
+ t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
67
+ end
68
+
69
+ it %{transliterate characters from ISO-8859-1} do
70
+ t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
71
+ t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
72
+ end
73
+
74
+ it %{read xml with css selectors} do
75
+ t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
76
+ /(AM|PM)/.match(t[0][0]).wont_equal nil
77
+ end
78
+
79
+ it %{optionally stream rows instead of caching them} do
80
+ t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
81
+ time1 = t[0][0]
82
+ /\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
83
+ sleep 1
84
+ time2 = t[0][0]
85
+ time1.wont_equal time2
86
+ end
87
+
88
+ it %{recode as UTF-8 even ISO-8859-1 (or any other encoding)} do
89
+ t = RemoteTable.new 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';', :encoding => 'ISO-8859-1'
90
+ t[1][0].must_equal %{ÅLAND ISLANDS}
91
+ end
92
+
93
+ it %{parse a big CSV that is not UTF-8} do
94
+ t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false#, :encoding => 'UTF-8'
95
+ t[0][1].must_equal 'Goroka'
96
+ end
97
+
98
+ it "read only certain rows of an XLSX" do
99
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
100
+ t[0][0].must_equal "Permissioning and access groups for all content"
101
+ t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
102
+
103
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
104
+ t[0]['col1'].must_equal "Permissioning and access groups for all content"
105
+ t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
106
+ end
107
+
108
+ it "doesn't get confused by :format => nil" do
109
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
110
+ t[0]['Class'].must_equal 'TWO SEATERS'
111
+ end
112
+ end
113
+ end
@@ -3,130 +3,32 @@ require 'helper'
3
3
  require 'tempfile'
4
4
 
5
5
  describe RemoteTable do
6
- it "open an XLSX" do
7
- t = RemoteTable.new 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
8
- t[5]["Requirements"].must_equal "Secure encryption of all data"
9
- end
10
-
11
6
  it "doesn't screw up UTF-8" do
12
- t = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
7
+ t = RemoteTable.new "file://#{File.expand_path('../data/airports.utf8.csv', __FILE__)}"
13
8
  t[3]['city'].must_equal "Puerto Inírida"
14
9
  end
15
10
 
16
11
  it "likes paths as much as urls for local files" do
17
- by_url = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
18
- by_path = RemoteTable.new File.expand_path('../support/airports.utf8.csv', __FILE__)
12
+ by_url = RemoteTable.new "file://#{File.expand_path('../data/airports.utf8.csv', __FILE__)}"
13
+ by_path = RemoteTable.new File.expand_path('../data/airports.utf8.csv', __FILE__)
19
14
  by_path.rows.must_equal by_url.rows
20
15
  end
21
16
 
22
- it "does its best to download urls without http://" do
23
- t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
24
- t[5]["Requirements"].must_equal "Secure encryption of all data"
25
- end
26
-
27
- it "add a row hash to every row" do
28
- t = RemoteTable.new(:url => 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
29
- t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
30
- end
31
-
32
- it "open a google doc" do
33
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
34
- t[0]['name'].must_equal 'Seamus Abshere'
35
- end
36
-
37
- it "open a csv with custom headers" do
38
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
39
- t[0]['col2'].must_equal 'name'
40
- t[1]['col2'].must_equal 'Seamus Abshere'
41
- end
42
-
43
- it "open a yaml" do
44
- t = RemoteTable.new "file://#{File.expand_path('../fixtures/data.yml', __FILE__)}"
45
- t[0]['name'].must_equal 'Seamus Abshere'
46
- t[0]['city'].must_equal 'Madison'
47
- t[1]['name'].must_equal 'Derek Kastner'
48
- t[1]['city'].must_equal 'Lansing'
49
- end
50
-
51
- it "return an ordered hash" do
52
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
53
- t[0].class.must_equal ::ActiveSupport::OrderedHash
54
- end
55
-
56
- it "pass through fastercsv options" do
57
- f = Tempfile.new 'pass-through-fastercsv-options'
58
- f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
59
- f.flush
60
- t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil # this should really be "headers => false"
61
- t[0][2].must_equal %{Body example with a <a href="">link</a>}
62
- f.close
63
- end
64
-
65
- it "open a csv inside a zip file" do
66
- t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
67
- :filename => 'Annex Tables/Annex 3/Table A-93.csv',
68
- :skip => 1,
69
- :select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
70
- t[0]['LDGV'].must_equal '9.09%'
71
- end
72
-
73
- it 'not blow up if each is called twice' do
74
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
75
- count = 0
76
- t.each { |row| count += 1 }
77
- first_run = count
78
- t.each { |row| count += 1}
79
- count.must_equal first_run*2
80
- end
81
-
82
- it 'allow itself to be cleared for save memory' do
83
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
84
- t.to_a
85
- t.send(:cache).length.must_be :>, 0
86
- t.free
87
- t.send(:cache).length.must_equal 0
88
- end
89
-
90
- # fixes ArgumentError: invalid byte sequence in UTF-8
91
- it %{safely strip soft hyphens and read windows-1252 html} do
92
- t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
93
- t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
94
- end
95
-
96
- it %{transliterate characters from ISO-8859-1} do
97
- t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
98
- t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
99
- end
100
-
101
- it %{read xml with css selectors} do
102
- t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
103
- /(AM|PM)/.match(t[0][0]).wont_equal nil
104
- end
105
-
106
- it %{optionally stream rows instead of caching them} do
107
- t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
108
- time1 = t[0][0]
109
- /\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
110
- sleep 1
111
- time2 = t[0][0]
112
- time1.wont_equal time2
113
- end
114
-
115
17
  {
116
- # IMPOSSIBLE "../support/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls" => {:format=>"xls", :encoding=>"binary"},
117
- "../support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx" => {:format=>"xlsx"},
118
- "../support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls" => {:format=>"xls"},
119
- "../support/list-en1-semic-3.neooffice.binary.ods" => {:format=>"ods"},
120
- "../support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64" => {:format=>"fixed_width", :encoding=>"iso-8859-1", :schema => [['name', 63, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
121
- "../support/list-en1-semic-3.neooffice.utf-8.fixed_width-62" => {:format=>"fixed_width", :schema => [['name', 61, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
122
- # TODO "../support/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html" => {:format=>"html" },
123
- # TODO "../support/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html" => {:format=>"html", :encoding=>"iso-8859-1"},
124
- # TODO "../support/list-en1-semic-3.neooffice.utf-8.html" => {:format=>"html" },
125
- "../support/list-en1-semic-3.neooffice.utf-8.xml" => {:format=>"xml", :row_css=>'Row', :column_css => 'Data', :select => proc { |row| row[1].to_s =~ /[A-Z]{2}/ }},
126
- "../support/list-en1-semic-3.neooffice.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
127
- "../support/list-en1-semic-3.original.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
128
- "../support/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma" => {:format=>"csv", :encoding=>"MACROMAN"}, # comma because no option in excel
129
- "../support/list-en1-semic-3.neooffice.utf-8.csv" => {:format=>"csv", :delimiter => ';'}
18
+ # IMPOSSIBLE "../data/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls" => {:format=>"xls", :encoding=>"binary"},
19
+ "../data/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx" => {:format=>"xlsx"},
20
+ "../data/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls" => {:format=>"xls"},
21
+ "../data/list-en1-semic-3.neooffice.binary.ods" => {:format=>"ods"},
22
+ "../data/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64" => {:format=>"fixed_width", :encoding=>"iso-8859-1", :schema => [['name', 63, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
23
+ "../data/list-en1-semic-3.neooffice.utf-8.fixed_width-62" => {:format=>"fixed_width", :schema => [['name', 61, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
24
+ # TODO "../data/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html" => {:format=>"html" },
25
+ # TODO "../data/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html" => {:format=>"html", :encoding=>"iso-8859-1"},
26
+ # TODO "../data/list-en1-semic-3.neooffice.utf-8.html" => {:format=>"html" },
27
+ "../data/list-en1-semic-3.neooffice.utf-8.xml" => {:format=>"xml", :row_css=>'Row', :column_css => 'Data', :select => proc { |row| row[1].to_s =~ /[A-Z]{2}/ }},
28
+ "../data/list-en1-semic-3.neooffice.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
29
+ "../data/list-en1-semic-3.original.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
30
+ "../data/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma" => {:format=>"csv", :encoding=>"MACROMAN"}, # comma because no option in excel
31
+ "../data/list-en1-semic-3.neooffice.utf-8.csv" => {:format=>"csv", :delimiter => ';'}
130
32
  }.each do |k, v|
131
33
  it %{open #{k} with encoding #{v[:encoding] || 'default'}} do
132
34
  options = v.merge(:headers => false, :skip => 2)
@@ -141,57 +43,20 @@ describe RemoteTable do
141
43
  end
142
44
  end
143
45
 
144
- it %{recode as UTF-8 even ISO-8859-1 (or any other encoding)} do
145
- t = RemoteTable.new 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';', :encoding => 'ISO-8859-1'
146
- t[1][0].must_equal %{ÅLAND ISLANDS}
147
- end
148
-
149
- it %{parse a big CSV that is not UTF-8} do
150
- t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false#, :encoding => 'UTF-8'
151
- t[0][1].must_equal 'Goroka'
152
- end
153
-
154
- it "read only certain rows of an XLSX" do
155
- t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
156
- t[0][0].must_equal "Permissioning and access groups for all content"
157
- t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
158
-
159
- t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
160
- t[0]['col1'].must_equal "Permissioning and access groups for all content"
161
- t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
162
- end
163
-
164
- it "doesn't get confused by :format => nil" do
165
- t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
166
- t[0]['Class'].must_equal 'TWO SEATERS'
167
- end
168
-
169
- {
170
- 'foo.ods' => :ods,
171
- 'foo.open_office' => :ods,
172
- 'foo.xlsx' => :xlsx,
173
- 'foo.excelx' => :xlsx,
174
- 'foo.xls' => :xls,
175
- 'foo.excel' => :xls,
176
- 'foo.csv' => :delimited,
177
- 'foo.tsv' => :delimited,
178
- 'foo.delimited' => :delimited,
179
- 'foo.fixed_width' => :fixed_width,
180
- 'foo.htm' => :html,
181
- 'foo.html' => :html,
182
- 'foo.xml' => :xml,
183
- 'foo.yaml' => :yaml,
184
- 'foo.yml' => :yaml,
185
- 'foo.shp' => :shp
186
- }.each do |basename, format|
187
- it "detects the #{format} format from the filename #{basename}" do
188
- RemoteTable.guess_format(basename).must_equal format
189
- end
46
+ it "pass through fastercsv options" do
47
+ f = Tempfile.new 'pass-through-fastercsv-options'
48
+ f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
49
+ f.flush
50
+ t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil # this should really be "headers => false"
51
+ t[0][2].must_equal %{Body example with a <a href="">link</a>}
52
+ f.close
190
53
  end
191
54
 
192
- it "detects the correct extension name without confusion from basename" do
193
- [ 'foo.xls', 'xlsx.xls', 'foo_xls' ].each do |basename|
194
- RemoteTable.guess_format(basename).must_equal :xls
195
- end
55
+ it "open a yaml" do
56
+ t = RemoteTable.new "file://#{File.expand_path('../data/data.yml', __FILE__)}"
57
+ t[0]['name'].must_equal 'Seamus Abshere'
58
+ t[0]['city'].must_equal 'Madison'
59
+ t[1]['name'].must_equal 'Derek Kastner'
60
+ t[1]['city'].must_equal 'Lansing'
196
61
  end
197
62
  end