remote_table 2.1.2 → 3.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/CHANGELOG +12 -3
  2. data/README.markdown +1 -8
  3. data/lib/remote_table.rb +72 -87
  4. data/lib/remote_table/fixed_width.rb +5 -5
  5. data/lib/remote_table/local_copy.rb +1 -1
  6. data/lib/remote_table/plaintext.rb +3 -3
  7. data/lib/remote_table/processed_by_roo.rb +6 -4
  8. data/lib/remote_table/version.rb +1 -1
  9. data/remote_table.gemspec +3 -4
  10. data/test/{support → data}/airports.utf8.csv +0 -0
  11. data/test/data/color.csv +3 -0
  12. data/test/{fixtures → data}/data.yml +0 -0
  13. data/test/{support → data}/list-en1-semic-3.neooffice.binary.ods +0 -0
  14. data/test/{support → data}/list-en1-semic-3.neooffice.iso-8859-1.csv +0 -0
  15. data/test/{support → data}/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64 +0 -0
  16. data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.csv +0 -0
  17. data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.fixed_width-62 +0 -0
  18. data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.html +0 -0
  19. data/test/{support → data}/list-en1-semic-3.neooffice.utf-8.xml +0 -0
  20. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls +0 -0
  21. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls +0 -0
  22. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx +0 -0
  23. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html +0 -0
  24. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma +0 -0
  25. data/test/{support → data}/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html +0 -0
  26. data/test/{support → data}/list-en1-semic-3.original.iso-8859-1.csv +0 -0
  27. data/test/data/ranges.csv +4 -0
  28. data/test/test_errata.rb +2 -2
  29. data/test/test_local.rb +10 -0
  30. data/test/test_old_syntax.rb +0 -13
  31. data/test/test_parser.rb +24 -0
  32. data/test/test_remote.rb +113 -0
  33. data/test/test_remote_table.rb +30 -165
  34. data/test/test_transpose.rb +11 -0
  35. metadata +86 -66
  36. checksums.yaml +0 -15
  37. data/lib/remote_table/shp.rb +0 -30
  38. data/lib/remote_table/transformer.rb +0 -29
  39. data/test/test_old_transform.rb +0 -47
  40. data/test/test_shapefile.rb +0 -13
@@ -78,7 +78,7 @@ class RemoteTable
78
78
  def encoded_io
79
79
  @encoded_io || @encoded_io_mutex.synchronize do
80
80
  @encoded_io ||= if ::RUBY_VERSION >= '1.9'
81
- ::File.open path, 'rb', :internal_encoding => t.internal_encoding, :external_encoding => RemoteTable::EXTERNAL_ENCODING
81
+ ::File.open path, 'rb', :internal_encoding => t.encoding, :external_encoding => RemoteTable::EXTERNAL_ENCODING
82
82
  else
83
83
  ::File.open path, 'rb'
84
84
  end
@@ -25,7 +25,7 @@ class RemoteTable
25
25
 
26
26
  # Remove bytes that are both useless and harmful in the vast majority of cases.
27
27
  def delete_harmful!
28
- harmful = [ Plaintext.soft_hyphen(internal_encoding), UTF8_BOM ]
28
+ harmful = [ Plaintext.soft_hyphen(encoding), UTF8_BOM ]
29
29
  local_copy.in_place :perl, "s/#{harmful.join('//g; s/')}//g"
30
30
  end
31
31
 
@@ -35,12 +35,12 @@ class RemoteTable
35
35
  # iconv -c -t UTF-8//TRANSLIT -f WINDOWS-1252
36
36
  def transliterate_whole_file_to_utf8!
37
37
  if ::UnixUtils.available?('iconv')
38
- local_copy.in_place :iconv, RemoteTable::EXTERNAL_ENCODING_ICONV, internal_encoding
38
+ local_copy.in_place :iconv, RemoteTable::EXTERNAL_ENCODING_ICONV, encoding
39
39
  else
40
40
  ::Kernel.warn %{[remote_table] iconv not available in your $PATH, not performing transliteration}
41
41
  end
42
42
  # now that we've force-transliterated to UTF-8, act as though this is what the user had specified
43
- @internal_encoding = RemoteTable::EXTERNAL_ENCODING
43
+ @encoding = RemoteTable::EXTERNAL_ENCODING
44
44
  end
45
45
 
46
46
  # No matter what the EOL are SUPPOSED to be, run it through Perl with a regex that will convert all EOLS to \n
@@ -6,25 +6,27 @@ class RemoteTable
6
6
 
7
7
  # Yield each row using Roo.
8
8
  def _each
9
+ # sometimes Roo forgets to require iconv.
10
+ require 'iconv'
9
11
  require 'roo'
10
12
 
11
13
  spreadsheet = roo_class.new local_copy.path, nil, :ignore
12
14
  if sheet
13
15
  spreadsheet.default_sheet = sheet
14
16
  end
15
-
17
+
16
18
  first_row = if crop
17
19
  crop.first + 1
18
20
  else
19
21
  skip + 1
20
22
  end
21
-
23
+
22
24
  last_row = if crop
23
25
  crop.last
24
26
  else
25
27
  spreadsheet.last_row
26
28
  end
27
-
29
+
28
30
  if not headers
29
31
 
30
32
  # create an array to represent this row
@@ -46,7 +48,7 @@ class RemoteTable
46
48
  end
47
49
 
48
50
  else
49
-
51
+
50
52
  # create a hash to represent this row
51
53
  current_headers = ::ActiveSupport::OrderedHash.new
52
54
  if headers == :first_row
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "2.1.2"
2
+ VERSION = '3.0.0.alpha'
3
3
  end
data/remote_table.gemspec CHANGED
@@ -16,9 +16,9 @@ Gem::Specification.new do |s|
16
16
  s.files = `git ls-files`.split("\n")
17
17
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
18
  s.require_paths = ["lib"]
19
-
19
+
20
20
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
21
- s.add_runtime_dependency 'roo', '>= 1.11'
21
+ s.add_runtime_dependency 'roo', '>= 1.10.3'
22
22
  s.add_runtime_dependency 'fixed_width-multibyte', '>=0.2.3'
23
23
  s.add_runtime_dependency 'i18n' # activesupport?
24
24
  s.add_runtime_dependency 'unix_utils', '>=0.0.8'
@@ -26,10 +26,9 @@ Gem::Specification.new do |s|
26
26
  s.add_runtime_dependency 'hash_digest'
27
27
 
28
28
  s.add_development_dependency 'errata', '>=0.2.0'
29
- s.add_development_dependency 'georuby'
30
- s.add_development_dependency 'dbf'
31
29
  s.add_development_dependency 'minitest'
32
30
  s.add_development_dependency 'minitest-reporters'
33
31
  s.add_development_dependency 'rake'
34
32
  s.add_development_dependency 'yard'
33
+ s.add_development_dependency 'pry'
35
34
  end
File without changes
@@ -0,0 +1,3 @@
1
+ en,es,ru
2
+ green,verde,зеленый
3
+ red,rojo,красный
File without changes
@@ -0,0 +1,4 @@
1
+ description,range
2
+ great,A-B
3
+ ok,C
4
+ bad,D-F
data/test/test_errata.rb CHANGED
@@ -50,8 +50,8 @@ describe RemoteTable do
50
50
  :encoding => 'windows-1252',
51
51
  :row_xpath => '//table[2]//table[1]//tr[3]//tr',
52
52
  :column_xpath => 'td',
53
- :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
54
- :responder => AircraftGuru.new }
53
+ :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
54
+ :responder => AircraftGuru.new)
55
55
  g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
56
56
  g1.wont_be_nil
57
57
  g1['Manufacturer'].must_equal 'GULFSTREAM AEROSPACE'
@@ -0,0 +1,10 @@
1
+ # encoding: utf-8
2
+ require 'helper'
3
+
4
+ describe RemoteTable do
5
+ describe 'used on local files' do
6
+ it "understands relative paths" do
7
+ RemoteTable.new('test/data/color.csv').to_a.must_equal RemoteTable.new(File.expand_path('../../test/data/color.csv', __FILE__)).to_a
8
+ end
9
+ end
10
+ end
@@ -67,19 +67,6 @@ describe RemoteTable do
67
67
  t.rows.last['Model'].must_equal 'EZ King Cobra'
68
68
  end
69
69
 
70
- it "hash rows without paying attention to order" do
71
- x = ActiveSupport::OrderedHash.new
72
- x[:a] = 1
73
- x[:b] = 2
74
-
75
- y = ActiveSupport::OrderedHash.new
76
- y[:b] = 2
77
- y[:a] = 1
78
-
79
- Marshal.dump(x).wont_equal Marshal.dump(y)
80
- RemoteTable::Transform.row_hash(y).must_equal RemoteTable::Transform.row_hash(x)
81
- end
82
-
83
70
  it "open a Google Docs url (as a CSV)" do
84
71
  t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
85
72
  t.rows.first['PAD district name'].must_equal 'Gulf Coast'
@@ -0,0 +1,24 @@
1
+ require 'helper'
2
+
3
+ describe RemoteTable do
4
+ describe ":parser option" do
5
+ it "takes a parser object that responds to #parse(row) and returns an array of rows" do
6
+ class GradeRangeParser
7
+ def parse(row)
8
+ row['range'].split('-').map do |subrange|
9
+ virtual_row = row.dup
10
+ virtual_row.delete 'range'
11
+ virtual_row['grade'] = subrange
12
+ virtual_row
13
+ end
14
+ end
15
+ end
16
+ t = RemoteTable.new "file://#{File.expand_path('../data/ranges.csv', __FILE__)}", parser: GradeRangeParser.new
17
+ t[0].must_equal 'description' => 'great', 'grade' => 'A'
18
+ t[1].must_equal 'description' => 'great', 'grade' => 'B'
19
+ t[2].must_equal 'description' => 'ok', 'grade' => 'C'
20
+ t[3].must_equal 'description' => 'bad', 'grade' => 'D'
21
+ t[4].must_equal 'description' => 'bad', 'grade' => 'F'
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,113 @@
1
+ # encoding: utf-8
2
+ require 'helper'
3
+ require 'tempfile'
4
+
5
+ describe RemoteTable do
6
+ describe 'used on remote files' do
7
+ it "open an XLSX" do
8
+ t = RemoteTable.new 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
9
+ t[5]["Requirements"].must_equal "Secure encryption of all data"
10
+ end
11
+
12
+ it "does its best to download urls without http://" do
13
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
14
+ t[5]["Requirements"].must_equal "Secure encryption of all data"
15
+ end
16
+
17
+ it "add a row hash to every row" do
18
+ t = RemoteTable.new(:url => 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
19
+ t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
20
+ end
21
+
22
+ it "open a google doc" do
23
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
24
+ t[0]['name'].must_equal 'Seamus Abshere'
25
+ end
26
+
27
+ it "open a csv with custom headers" do
28
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
29
+ t[0]['col2'].must_equal 'name'
30
+ t[1]['col2'].must_equal 'Seamus Abshere'
31
+ end
32
+
33
+ it "return an ordered hash" do
34
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
35
+ t[0].class.must_equal ::ActiveSupport::OrderedHash
36
+ end
37
+
38
+ it "open a csv inside a zip file" do
39
+ t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
40
+ :filename => 'Annex Tables/Annex 3/Table A-93.csv',
41
+ :skip => 1,
42
+ :select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
43
+ t[0]['LDGV'].must_equal '9.09%'
44
+ end
45
+
46
+ it 'not blow up if each is called twice' do
47
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
48
+ count = 0
49
+ t.each { |row| count += 1 }
50
+ first_run = count
51
+ t.each { |row| count += 1}
52
+ count.must_equal first_run*2
53
+ end
54
+
55
+ it 'allow itself to be cleared for save memory' do
56
+ t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
57
+ t.to_a
58
+ t.send(:cache).length.must_be :>, 0
59
+ t.free
60
+ t.send(:cache).length.must_equal 0
61
+ end
62
+
63
+ # fixes ArgumentError: invalid byte sequence in UTF-8
64
+ it %{safely strip soft hyphens and read windows-1252 html} do
65
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
66
+ t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
67
+ end
68
+
69
+ it %{transliterate characters from ISO-8859-1} do
70
+ t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
71
+ t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
72
+ end
73
+
74
+ it %{read xml with css selectors} do
75
+ t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
76
+ /(AM|PM)/.match(t[0][0]).wont_equal nil
77
+ end
78
+
79
+ it %{optionally stream rows instead of caching them} do
80
+ t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
81
+ time1 = t[0][0]
82
+ /\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
83
+ sleep 1
84
+ time2 = t[0][0]
85
+ time1.wont_equal time2
86
+ end
87
+
88
+ it %{recode as UTF-8 even ISO-8859-1 (or any other encoding)} do
89
+ t = RemoteTable.new 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';', :encoding => 'ISO-8859-1'
90
+ t[1][0].must_equal %{ÅLAND ISLANDS}
91
+ end
92
+
93
+ it %{parse a big CSV that is not UTF-8} do
94
+ t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false#, :encoding => 'UTF-8'
95
+ t[0][1].must_equal 'Goroka'
96
+ end
97
+
98
+ it "read only certain rows of an XLSX" do
99
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
100
+ t[0][0].must_equal "Permissioning and access groups for all content"
101
+ t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
102
+
103
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
104
+ t[0]['col1'].must_equal "Permissioning and access groups for all content"
105
+ t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
106
+ end
107
+
108
+ it "doesn't get confused by :format => nil" do
109
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
110
+ t[0]['Class'].must_equal 'TWO SEATERS'
111
+ end
112
+ end
113
+ end
@@ -3,130 +3,32 @@ require 'helper'
3
3
  require 'tempfile'
4
4
 
5
5
  describe RemoteTable do
6
- it "open an XLSX" do
7
- t = RemoteTable.new 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
8
- t[5]["Requirements"].must_equal "Secure encryption of all data"
9
- end
10
-
11
6
  it "doesn't screw up UTF-8" do
12
- t = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
7
+ t = RemoteTable.new "file://#{File.expand_path('../data/airports.utf8.csv', __FILE__)}"
13
8
  t[3]['city'].must_equal "Puerto Inírida"
14
9
  end
15
10
 
16
11
  it "likes paths as much as urls for local files" do
17
- by_url = RemoteTable.new "file://#{File.expand_path('../support/airports.utf8.csv', __FILE__)}"
18
- by_path = RemoteTable.new File.expand_path('../support/airports.utf8.csv', __FILE__)
12
+ by_url = RemoteTable.new "file://#{File.expand_path('../data/airports.utf8.csv', __FILE__)}"
13
+ by_path = RemoteTable.new File.expand_path('../data/airports.utf8.csv', __FILE__)
19
14
  by_path.rows.must_equal by_url.rows
20
15
  end
21
16
 
22
- it "does its best to download urls without http://" do
23
- t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
24
- t[5]["Requirements"].must_equal "Secure encryption of all data"
25
- end
26
-
27
- it "add a row hash to every row" do
28
- t = RemoteTable.new(:url => 'http://www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
29
- t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
30
- end
31
-
32
- it "open a google doc" do
33
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
34
- t[0]['name'].must_equal 'Seamus Abshere'
35
- end
36
-
37
- it "open a csv with custom headers" do
38
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
39
- t[0]['col2'].must_equal 'name'
40
- t[1]['col2'].must_equal 'Seamus Abshere'
41
- end
42
-
43
- it "open a yaml" do
44
- t = RemoteTable.new "file://#{File.expand_path('../fixtures/data.yml', __FILE__)}"
45
- t[0]['name'].must_equal 'Seamus Abshere'
46
- t[0]['city'].must_equal 'Madison'
47
- t[1]['name'].must_equal 'Derek Kastner'
48
- t[1]['city'].must_equal 'Lansing'
49
- end
50
-
51
- it "return an ordered hash" do
52
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
53
- t[0].class.must_equal ::ActiveSupport::OrderedHash
54
- end
55
-
56
- it "pass through fastercsv options" do
57
- f = Tempfile.new 'pass-through-fastercsv-options'
58
- f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
59
- f.flush
60
- t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil # this should really be "headers => false"
61
- t[0][2].must_equal %{Body example with a <a href="">link</a>}
62
- f.close
63
- end
64
-
65
- it "open a csv inside a zip file" do
66
- t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
67
- :filename => 'Annex Tables/Annex 3/Table A-93.csv',
68
- :skip => 1,
69
- :select => proc { |row| row['Vehicle Age'].strip =~ /^\d+$/ }
70
- t[0]['LDGV'].must_equal '9.09%'
71
- end
72
-
73
- it 'not blow up if each is called twice' do
74
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
75
- count = 0
76
- t.each { |row| count += 1 }
77
- first_run = count
78
- t.each { |row| count += 1}
79
- count.must_equal first_run*2
80
- end
81
-
82
- it 'allow itself to be cleared for save memory' do
83
- t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
84
- t.to_a
85
- t.send(:cache).length.must_be :>, 0
86
- t.free
87
- t.send(:cache).length.must_equal 0
88
- end
89
-
90
- # fixes ArgumentError: invalid byte sequence in UTF-8
91
- it %{safely strip soft hyphens and read windows-1252 html} do
92
- t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
93
- t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
94
- end
95
-
96
- it %{transliterate characters from ISO-8859-1} do
97
- t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
98
- t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
99
- end
100
-
101
- it %{read xml with css selectors} do
102
- t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
103
- /(AM|PM)/.match(t[0][0]).wont_equal nil
104
- end
105
-
106
- it %{optionally stream rows instead of caching them} do
107
- t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
108
- time1 = t[0][0]
109
- /\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
110
- sleep 1
111
- time2 = t[0][0]
112
- time1.wont_equal time2
113
- end
114
-
115
17
  {
116
- # IMPOSSIBLE "../support/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls" => {:format=>"xls", :encoding=>"binary"},
117
- "../support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx" => {:format=>"xlsx"},
118
- "../support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls" => {:format=>"xls"},
119
- "../support/list-en1-semic-3.neooffice.binary.ods" => {:format=>"ods"},
120
- "../support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64" => {:format=>"fixed_width", :encoding=>"iso-8859-1", :schema => [['name', 63, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
121
- "../support/list-en1-semic-3.neooffice.utf-8.fixed_width-62" => {:format=>"fixed_width", :schema => [['name', 61, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
122
- # TODO "../support/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html" => {:format=>"html" },
123
- # TODO "../support/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html" => {:format=>"html", :encoding=>"iso-8859-1"},
124
- # TODO "../support/list-en1-semic-3.neooffice.utf-8.html" => {:format=>"html" },
125
- "../support/list-en1-semic-3.neooffice.utf-8.xml" => {:format=>"xml", :row_css=>'Row', :column_css => 'Data', :select => proc { |row| row[1].to_s =~ /[A-Z]{2}/ }},
126
- "../support/list-en1-semic-3.neooffice.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
127
- "../support/list-en1-semic-3.original.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
128
- "../support/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma" => {:format=>"csv", :encoding=>"MACROMAN"}, # comma because no option in excel
129
- "../support/list-en1-semic-3.neooffice.utf-8.csv" => {:format=>"csv", :delimiter => ';'}
18
+ # IMPOSSIBLE "../data/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls" => {:format=>"xls", :encoding=>"binary"},
19
+ "../data/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx" => {:format=>"xlsx"},
20
+ "../data/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls" => {:format=>"xls"},
21
+ "../data/list-en1-semic-3.neooffice.binary.ods" => {:format=>"ods"},
22
+ "../data/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64" => {:format=>"fixed_width", :encoding=>"iso-8859-1", :schema => [['name', 63, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
23
+ "../data/list-en1-semic-3.neooffice.utf-8.fixed_width-62" => {:format=>"fixed_width", :schema => [['name', 61, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
24
+ # TODO "../data/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html" => {:format=>"html" },
25
+ # TODO "../data/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html" => {:format=>"html", :encoding=>"iso-8859-1"},
26
+ # TODO "../data/list-en1-semic-3.neooffice.utf-8.html" => {:format=>"html" },
27
+ "../data/list-en1-semic-3.neooffice.utf-8.xml" => {:format=>"xml", :row_css=>'Row', :column_css => 'Data', :select => proc { |row| row[1].to_s =~ /[A-Z]{2}/ }},
28
+ "../data/list-en1-semic-3.neooffice.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
29
+ "../data/list-en1-semic-3.original.iso-8859-1.csv" => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
30
+ "../data/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma" => {:format=>"csv", :encoding=>"MACROMAN"}, # comma because no option in excel
31
+ "../data/list-en1-semic-3.neooffice.utf-8.csv" => {:format=>"csv", :delimiter => ';'}
130
32
  }.each do |k, v|
131
33
  it %{open #{k} with encoding #{v[:encoding] || 'default'}} do
132
34
  options = v.merge(:headers => false, :skip => 2)
@@ -141,57 +43,20 @@ describe RemoteTable do
141
43
  end
142
44
  end
143
45
 
144
- it %{recode as UTF-8 even ISO-8859-1 (or any other encoding)} do
145
- t = RemoteTable.new 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';', :encoding => 'ISO-8859-1'
146
- t[1][0].must_equal %{ÅLAND ISLANDS}
147
- end
148
-
149
- it %{parse a big CSV that is not UTF-8} do
150
- t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false#, :encoding => 'UTF-8'
151
- t[0][1].must_equal 'Goroka'
152
- end
153
-
154
- it "read only certain rows of an XLSX" do
155
- t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
156
- t[0][0].must_equal "Permissioning and access groups for all content"
157
- t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
158
-
159
- t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
160
- t[0]['col1'].must_equal "Permissioning and access groups for all content"
161
- t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
162
- end
163
-
164
- it "doesn't get confused by :format => nil" do
165
- t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls', :format => nil
166
- t[0]['Class'].must_equal 'TWO SEATERS'
167
- end
168
-
169
- {
170
- 'foo.ods' => :ods,
171
- 'foo.open_office' => :ods,
172
- 'foo.xlsx' => :xlsx,
173
- 'foo.excelx' => :xlsx,
174
- 'foo.xls' => :xls,
175
- 'foo.excel' => :xls,
176
- 'foo.csv' => :delimited,
177
- 'foo.tsv' => :delimited,
178
- 'foo.delimited' => :delimited,
179
- 'foo.fixed_width' => :fixed_width,
180
- 'foo.htm' => :html,
181
- 'foo.html' => :html,
182
- 'foo.xml' => :xml,
183
- 'foo.yaml' => :yaml,
184
- 'foo.yml' => :yaml,
185
- 'foo.shp' => :shp
186
- }.each do |basename, format|
187
- it "detects the #{format} format from the filename #{basename}" do
188
- RemoteTable.guess_format(basename).must_equal format
189
- end
46
+ it "pass through fastercsv options" do
47
+ f = Tempfile.new 'pass-through-fastercsv-options'
48
+ f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
49
+ f.flush
50
+ t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil # this should really be "headers => false"
51
+ t[0][2].must_equal %{Body example with a <a href="">link</a>}
52
+ f.close
190
53
  end
191
54
 
192
- it "detects the correct extension name without confusion from basename" do
193
- [ 'foo.xls', 'xlsx.xls', 'foo_xls' ].each do |basename|
194
- RemoteTable.guess_format(basename).must_equal :xls
195
- end
55
+ it "open a yaml" do
56
+ t = RemoteTable.new "file://#{File.expand_path('../data/data.yml', __FILE__)}"
57
+ t[0]['name'].must_equal 'Seamus Abshere'
58
+ t[0]['city'].must_equal 'Madison'
59
+ t[1]['name'].must_equal 'Derek Kastner'
60
+ t[1]['city'].must_equal 'Lansing'
196
61
  end
197
62
  end