remote_table 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/CHANGELOG +19 -0
- data/Gemfile +7 -1
- data/README.markdown +440 -0
- data/Rakefile +6 -14
- data/lib/remote_table.rb +27 -38
- data/lib/remote_table/{properties.rb → config.rb} +39 -43
- data/lib/remote_table/format.rb +24 -27
- data/lib/remote_table/format/delimited.rb +17 -21
- data/lib/remote_table/format/fixed_width.rb +9 -9
- data/lib/remote_table/format/html.rb +0 -2
- data/lib/remote_table/format/mixins/processed_by_nokogiri.rb +13 -12
- data/lib/remote_table/format/mixins/processed_by_roo.rb +17 -13
- data/lib/remote_table/format/mixins/textual.rb +13 -13
- data/lib/remote_table/format/open_office.rb +3 -0
- data/lib/remote_table/format/xml.rb +0 -2
- data/lib/remote_table/format/yaml.rb +14 -0
- data/lib/remote_table/local_file.rb +69 -7
- data/lib/remote_table/transformer.rb +7 -4
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +5 -13
- data/test/fixtures/data.yml +4 -0
- data/test/helper.rb +8 -9
- data/test/test_big.rb +43 -53
- data/test/test_errata.rb +27 -25
- data/test/test_old_syntax.rb +193 -191
- data/test/test_old_transform.rb +12 -10
- data/test/test_remote_table.rb +57 -47
- metadata +48 -64
- data/.document +0 -5
- data/README.rdoc +0 -167
- data/lib/remote_table/utils.rb +0 -157
data/test/test_old_transform.rb
CHANGED
@@ -31,15 +31,17 @@ class NaturalGasParser
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
34
|
+
describe RemoteTable do
|
35
|
+
describe "when using old-style parser" do
|
36
|
+
it "open an XLS with a parser" do
|
37
|
+
t = RemoteTable.new(:url => 'http://www.eia.gov/dnav/ng/xls/ng_pri_sum_a_EPG0_FWA_DMcf_a.xls',
|
38
|
+
:sheet => 'Data 1',
|
39
|
+
:skip => 2,
|
40
|
+
:select => proc { |row| row['year'].to_i > 1989 },
|
41
|
+
:transform => { :class => NaturalGasParser })
|
42
|
+
t[0]['locatable_type'].must_equal 'Country'
|
43
|
+
t[0]['locatable_id'].must_equal 'US'
|
44
|
+
t[0].row_hash.must_be :present?
|
45
|
+
end
|
44
46
|
end
|
45
47
|
end
|
data/test/test_remote_table.rb
CHANGED
@@ -1,89 +1,99 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'helper'
|
3
|
+
require 'tempfile'
|
3
4
|
|
4
|
-
|
5
|
-
|
5
|
+
describe RemoteTable do
|
6
|
+
it "open an XLSX" do
|
6
7
|
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
7
|
-
|
8
|
+
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
8
9
|
end
|
9
10
|
|
10
|
-
|
11
|
+
it "add a row hash to every row" do
|
11
12
|
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
12
|
-
|
13
|
+
t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
|
13
14
|
end
|
14
15
|
|
15
|
-
|
16
|
+
it "open a google doc" do
|
16
17
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
17
|
-
|
18
|
+
t[0]['name'].must_equal 'Seamus Abshere'
|
18
19
|
end
|
19
20
|
|
20
|
-
|
21
|
+
it "open a csv with custom headers" do
|
21
22
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
|
22
|
-
|
23
|
-
|
23
|
+
t[0]['col2'].must_equal 'name'
|
24
|
+
t[1]['col2'].must_equal 'Seamus Abshere'
|
25
|
+
end
|
26
|
+
|
27
|
+
it "open a yaml" do
|
28
|
+
t = RemoteTable.new "file://#{File.expand_path('../fixtures/data.yml', __FILE__)}"
|
29
|
+
t[0]['name'].must_equal 'Seamus Abshere'
|
30
|
+
t[0]['city'].must_equal 'Madison'
|
31
|
+
t[1]['name'].must_equal 'Derek Kastner'
|
32
|
+
t[1]['city'].must_equal 'Lansing'
|
24
33
|
end
|
25
34
|
|
26
|
-
|
35
|
+
it "return an ordered hash" do
|
27
36
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
28
|
-
|
37
|
+
t[0].class.must_equal ::ActiveSupport::OrderedHash
|
29
38
|
end
|
30
39
|
|
31
|
-
|
40
|
+
it "pass through fastercsv options" do
|
32
41
|
f = Tempfile.new 'pass-through-fastercsv-options'
|
33
42
|
f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
|
34
43
|
f.flush
|
35
44
|
t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil
|
36
|
-
|
45
|
+
t[0][2].must_equal %{Body example with a <a href="">link</a>}
|
37
46
|
f.close
|
38
47
|
end
|
39
48
|
|
40
|
-
|
49
|
+
it "open a csv inside a zip file" do
|
41
50
|
t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
42
51
|
:filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
43
52
|
:skip => 1,
|
44
53
|
:select => lambda { |row| row['Vehicle Age'].to_i.to_s == row['Vehicle Age'] }
|
45
|
-
|
54
|
+
t[0]['LDGV'].must_equal '9.09%'
|
46
55
|
end
|
47
56
|
|
48
|
-
|
57
|
+
it 'not blow up if each is called twice' do
|
49
58
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
59
|
+
count = 0
|
60
|
+
t.each { |row| count += 1 }
|
61
|
+
first_run = count
|
62
|
+
t.each { |row| count += 1}
|
63
|
+
count.must_equal first_run*2
|
54
64
|
end
|
55
65
|
|
56
|
-
|
66
|
+
it 'allow itself to be cleared for save memory' do
|
57
67
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
58
68
|
t.to_a
|
59
|
-
|
69
|
+
t.send(:cache).length.must_be :>, 0
|
60
70
|
t.free
|
61
|
-
|
71
|
+
t.send(:cache).length.must_equal 0
|
62
72
|
end
|
63
73
|
|
64
74
|
# fixes ArgumentError: invalid byte sequence in UTF-8
|
65
|
-
|
66
|
-
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table
|
67
|
-
|
75
|
+
it %{safely strip soft hyphens and read windows-1252 html} do
|
76
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
|
77
|
+
t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
|
68
78
|
end
|
69
79
|
|
70
|
-
|
80
|
+
it %{transliterate characters from ISO-8859-1} do
|
71
81
|
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
|
72
|
-
|
82
|
+
t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
|
73
83
|
end
|
74
84
|
|
75
|
-
|
85
|
+
it %{read xml with css selectors} do
|
76
86
|
t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
|
77
|
-
|
87
|
+
/(AM|PM)/.match(t[0][0]).wont_equal nil
|
78
88
|
end
|
79
89
|
|
80
|
-
|
90
|
+
it %{optionally stream rows instead of caching them} do
|
81
91
|
t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
|
82
92
|
time1 = t[0][0]
|
83
|
-
|
93
|
+
/\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
|
84
94
|
sleep 1
|
85
95
|
time2 = t[0][0]
|
86
|
-
|
96
|
+
time1.wont_equal time2
|
87
97
|
end
|
88
98
|
|
89
99
|
{
|
@@ -102,37 +112,37 @@ class TestRemoteTable < Test::Unit::TestCase
|
|
102
112
|
"../support/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma" => {:format=>"csv", :encoding=>"MACROMAN"}, # comma because no option in excel
|
103
113
|
"../support/list-en1-semic-3.neooffice.utf-8.csv" => {:format=>"csv", :delimiter => ';'}
|
104
114
|
}.each do |k, v|
|
105
|
-
|
115
|
+
it %{open #{k} with encoding #{v[:encoding] || 'default'}} do
|
106
116
|
options = v.merge(:headers => false, :skip => 2)
|
107
117
|
t = RemoteTable.new "file://#{File.expand_path(k, __FILE__)}", options
|
108
118
|
a = %{ÅLAND ISLANDS}
|
109
119
|
b = (t[1].is_a?(::Array) ? t[1][0] : t[1]['name'])
|
110
120
|
if RUBY_VERSION >= '1.9'
|
111
|
-
|
112
|
-
|
121
|
+
a.encoding.to_s.must_equal 'UTF-8'
|
122
|
+
b.encoding.to_s.must_equal 'UTF-8'
|
113
123
|
end
|
114
|
-
|
124
|
+
b.must_equal a
|
115
125
|
end
|
116
126
|
end
|
117
127
|
|
118
|
-
|
128
|
+
it %{recode as UTF-8 even ISO-8859-1 (or any other encoding)} do
|
119
129
|
t = RemoteTable.new 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';', :encoding => 'ISO-8859-1'
|
120
|
-
|
130
|
+
t[1][0].must_equal %{ÅLAND ISLANDS}
|
121
131
|
end
|
122
132
|
|
123
|
-
|
133
|
+
it %{parse a big CSV that is not UTF-8} do
|
124
134
|
t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false#, :encoding => 'UTF-8'
|
125
|
-
|
135
|
+
t[0][1].must_equal 'Goroka'
|
126
136
|
end
|
127
137
|
|
128
|
-
|
138
|
+
it "read only certain rows of an XLSX" do
|
129
139
|
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
|
130
|
-
|
131
|
-
|
140
|
+
t[0][0].must_equal "Permissioning and access groups for all content"
|
141
|
+
t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
|
132
142
|
|
133
143
|
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
|
134
|
-
|
135
|
-
|
144
|
+
t[0]['col1'].must_equal "Permissioning and access groups for all content"
|
145
|
+
t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
|
136
146
|
end
|
137
147
|
|
138
148
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2012-04-12 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activesupport
|
17
|
-
requirement:
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,15 @@ dependencies:
|
|
22
22
|
version: 2.3.4
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements:
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ! '>='
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: 2.3.4
|
26
31
|
- !ruby/object:Gem::Dependency
|
27
32
|
name: roo
|
28
|
-
requirement:
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
29
34
|
none: false
|
30
35
|
requirements:
|
31
36
|
- - ! '>='
|
@@ -33,32 +38,31 @@ dependencies:
|
|
33
38
|
version: '1.9'
|
34
39
|
type: :runtime
|
35
40
|
prerelease: false
|
36
|
-
version_requirements:
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '1.9'
|
37
47
|
- !ruby/object:Gem::Dependency
|
38
48
|
name: fixed_width-multibyte
|
39
|
-
requirement:
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
40
50
|
none: false
|
41
51
|
requirements:
|
42
52
|
- - ! '>='
|
43
53
|
- !ruby/object:Gem::Version
|
44
|
-
version:
|
54
|
+
version: 0.2.3
|
45
55
|
type: :runtime
|
46
56
|
prerelease: false
|
47
|
-
version_requirements:
|
48
|
-
- !ruby/object:Gem::Dependency
|
49
|
-
name: i18n
|
50
|
-
requirement: &2165418440 !ruby/object:Gem::Requirement
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
58
|
none: false
|
52
59
|
requirements:
|
53
60
|
- - ! '>='
|
54
61
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
56
|
-
type: :runtime
|
57
|
-
prerelease: false
|
58
|
-
version_requirements: *2165418440
|
62
|
+
version: 0.2.3
|
59
63
|
- !ruby/object:Gem::Dependency
|
60
|
-
name:
|
61
|
-
requirement:
|
64
|
+
name: i18n
|
65
|
+
requirement: !ruby/object:Gem::Requirement
|
62
66
|
none: false
|
63
67
|
requirements:
|
64
68
|
- - ! '>='
|
@@ -66,84 +70,60 @@ dependencies:
|
|
66
70
|
version: '0'
|
67
71
|
type: :runtime
|
68
72
|
prerelease: false
|
69
|
-
version_requirements:
|
70
|
-
- !ruby/object:Gem::Dependency
|
71
|
-
name: ensure-encoding
|
72
|
-
requirement: &2165407140 !ruby/object:Gem::Requirement
|
73
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
74
|
none: false
|
74
75
|
requirements:
|
75
76
|
- - ! '>='
|
76
77
|
- !ruby/object:Gem::Version
|
77
78
|
version: '0'
|
78
|
-
type: :runtime
|
79
|
-
prerelease: false
|
80
|
-
version_requirements: *2165407140
|
81
79
|
- !ruby/object:Gem::Dependency
|
82
|
-
name:
|
83
|
-
requirement:
|
80
|
+
name: unix_utils
|
81
|
+
requirement: !ruby/object:Gem::Requirement
|
84
82
|
none: false
|
85
83
|
requirements:
|
86
84
|
- - ! '>='
|
87
85
|
- !ruby/object:Gem::Version
|
88
|
-
version:
|
86
|
+
version: 0.0.8
|
89
87
|
type: :runtime
|
90
88
|
prerelease: false
|
91
|
-
version_requirements:
|
92
|
-
- !ruby/object:Gem::Dependency
|
93
|
-
name: hash_digest
|
94
|
-
requirement: &2165405380 !ruby/object:Gem::Requirement
|
89
|
+
version_requirements: !ruby/object:Gem::Requirement
|
95
90
|
none: false
|
96
91
|
requirements:
|
97
92
|
- - ! '>='
|
98
93
|
- !ruby/object:Gem::Version
|
99
|
-
version:
|
100
|
-
type: :runtime
|
101
|
-
prerelease: false
|
102
|
-
version_requirements: *2165405380
|
94
|
+
version: 0.0.8
|
103
95
|
- !ruby/object:Gem::Dependency
|
104
|
-
name:
|
105
|
-
requirement:
|
96
|
+
name: fastercsv
|
97
|
+
requirement: !ruby/object:Gem::Requirement
|
106
98
|
none: false
|
107
99
|
requirements:
|
108
100
|
- - ! '>='
|
109
101
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
111
|
-
type: :
|
102
|
+
version: 1.5.0
|
103
|
+
type: :runtime
|
112
104
|
prerelease: false
|
113
|
-
version_requirements:
|
114
|
-
- !ruby/object:Gem::Dependency
|
115
|
-
name: test-unit
|
116
|
-
requirement: &2165404080 !ruby/object:Gem::Requirement
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
117
106
|
none: false
|
118
107
|
requirements:
|
119
108
|
- - ! '>='
|
120
109
|
- !ruby/object:Gem::Version
|
121
|
-
version:
|
122
|
-
type: :development
|
123
|
-
prerelease: false
|
124
|
-
version_requirements: *2165404080
|
110
|
+
version: 1.5.0
|
125
111
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
127
|
-
requirement:
|
112
|
+
name: hash_digest
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
128
114
|
none: false
|
129
115
|
requirements:
|
130
116
|
- - ! '>='
|
131
117
|
- !ruby/object:Gem::Version
|
132
118
|
version: '0'
|
133
|
-
type: :
|
119
|
+
type: :runtime
|
134
120
|
prerelease: false
|
135
|
-
version_requirements:
|
136
|
-
- !ruby/object:Gem::Dependency
|
137
|
-
name: rake
|
138
|
-
requirement: &2165403060 !ruby/object:Gem::Requirement
|
121
|
+
version_requirements: !ruby/object:Gem::Requirement
|
139
122
|
none: false
|
140
123
|
requirements:
|
141
124
|
- - ! '>='
|
142
125
|
- !ruby/object:Gem::Version
|
143
126
|
version: '0'
|
144
|
-
type: :development
|
145
|
-
prerelease: false
|
146
|
-
version_requirements: *2165403060
|
147
127
|
description: Gives you a standard way to parse various formats and treat them as an
|
148
128
|
array of hashes.
|
149
129
|
email:
|
@@ -152,14 +132,15 @@ executables: []
|
|
152
132
|
extensions: []
|
153
133
|
extra_rdoc_files: []
|
154
134
|
files:
|
155
|
-
- .document
|
156
135
|
- .gitattributes
|
157
136
|
- .gitignore
|
137
|
+
- CHANGELOG
|
158
138
|
- Gemfile
|
159
139
|
- LICENSE
|
160
|
-
- README.
|
140
|
+
- README.markdown
|
161
141
|
- Rakefile
|
162
142
|
- lib/remote_table.rb
|
143
|
+
- lib/remote_table/config.rb
|
163
144
|
- lib/remote_table/format.rb
|
164
145
|
- lib/remote_table/format/delimited.rb
|
165
146
|
- lib/remote_table/format/excel.rb
|
@@ -171,12 +152,12 @@ files:
|
|
171
152
|
- lib/remote_table/format/mixins/textual.rb
|
172
153
|
- lib/remote_table/format/open_office.rb
|
173
154
|
- lib/remote_table/format/xml.rb
|
155
|
+
- lib/remote_table/format/yaml.rb
|
174
156
|
- lib/remote_table/local_file.rb
|
175
|
-
- lib/remote_table/properties.rb
|
176
157
|
- lib/remote_table/transformer.rb
|
177
|
-
- lib/remote_table/utils.rb
|
178
158
|
- lib/remote_table/version.rb
|
179
159
|
- remote_table.gemspec
|
160
|
+
- test/fixtures/data.yml
|
180
161
|
- test/helper.rb
|
181
162
|
- test/support/list-en1-semic-3.neooffice.binary.ods
|
182
163
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
|
@@ -217,11 +198,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
217
198
|
version: '0'
|
218
199
|
requirements: []
|
219
200
|
rubyforge_project: remotetable
|
220
|
-
rubygems_version: 1.8.
|
201
|
+
rubygems_version: 1.8.21
|
221
202
|
signing_key:
|
222
203
|
specification_version: 3
|
223
|
-
summary: Open local or remote XLSX, XLS, ODS, CSV
|
204
|
+
summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV, TSV (tab
|
205
|
+
separated), other delimited, fixed-width files.
|
224
206
|
test_files:
|
207
|
+
- test/fixtures/data.yml
|
225
208
|
- test/helper.rb
|
226
209
|
- test/support/list-en1-semic-3.neooffice.binary.ods
|
227
210
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
|
@@ -242,3 +225,4 @@ test_files:
|
|
242
225
|
- test/test_old_syntax.rb
|
243
226
|
- test/test_old_transform.rb
|
244
227
|
- test/test_remote_table.rb
|
228
|
+
has_rdoc:
|