remote_table 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/CHANGELOG +19 -0
- data/Gemfile +7 -1
- data/README.markdown +440 -0
- data/Rakefile +6 -14
- data/lib/remote_table.rb +27 -38
- data/lib/remote_table/{properties.rb → config.rb} +39 -43
- data/lib/remote_table/format.rb +24 -27
- data/lib/remote_table/format/delimited.rb +17 -21
- data/lib/remote_table/format/fixed_width.rb +9 -9
- data/lib/remote_table/format/html.rb +0 -2
- data/lib/remote_table/format/mixins/processed_by_nokogiri.rb +13 -12
- data/lib/remote_table/format/mixins/processed_by_roo.rb +17 -13
- data/lib/remote_table/format/mixins/textual.rb +13 -13
- data/lib/remote_table/format/open_office.rb +3 -0
- data/lib/remote_table/format/xml.rb +0 -2
- data/lib/remote_table/format/yaml.rb +14 -0
- data/lib/remote_table/local_file.rb +69 -7
- data/lib/remote_table/transformer.rb +7 -4
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +5 -13
- data/test/fixtures/data.yml +4 -0
- data/test/helper.rb +8 -9
- data/test/test_big.rb +43 -53
- data/test/test_errata.rb +27 -25
- data/test/test_old_syntax.rb +193 -191
- data/test/test_old_transform.rb +12 -10
- data/test/test_remote_table.rb +57 -47
- metadata +48 -64
- data/.document +0 -5
- data/README.rdoc +0 -167
- data/lib/remote_table/utils.rb +0 -157
data/test/test_old_transform.rb
CHANGED
@@ -31,15 +31,17 @@ class NaturalGasParser
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
34
|
+
describe RemoteTable do
|
35
|
+
describe "when using old-style parser" do
|
36
|
+
it "open an XLS with a parser" do
|
37
|
+
t = RemoteTable.new(:url => 'http://www.eia.gov/dnav/ng/xls/ng_pri_sum_a_EPG0_FWA_DMcf_a.xls',
|
38
|
+
:sheet => 'Data 1',
|
39
|
+
:skip => 2,
|
40
|
+
:select => proc { |row| row['year'].to_i > 1989 },
|
41
|
+
:transform => { :class => NaturalGasParser })
|
42
|
+
t[0]['locatable_type'].must_equal 'Country'
|
43
|
+
t[0]['locatable_id'].must_equal 'US'
|
44
|
+
t[0].row_hash.must_be :present?
|
45
|
+
end
|
44
46
|
end
|
45
47
|
end
|
data/test/test_remote_table.rb
CHANGED
@@ -1,89 +1,99 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'helper'
|
3
|
+
require 'tempfile'
|
3
4
|
|
4
|
-
|
5
|
-
|
5
|
+
describe RemoteTable do
|
6
|
+
it "open an XLSX" do
|
6
7
|
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
7
|
-
|
8
|
+
t[5]["Requirements"].must_equal "Secure encryption of all data"
|
8
9
|
end
|
9
10
|
|
10
|
-
|
11
|
+
it "add a row hash to every row" do
|
11
12
|
t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
|
12
|
-
|
13
|
+
t[5].row_hash.must_equal "06d8a738551c17735e2731e25c8d0461"
|
13
14
|
end
|
14
15
|
|
15
|
-
|
16
|
+
it "open a google doc" do
|
16
17
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
17
|
-
|
18
|
+
t[0]['name'].must_equal 'Seamus Abshere'
|
18
19
|
end
|
19
20
|
|
20
|
-
|
21
|
+
it "open a csv with custom headers" do
|
21
22
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
|
22
|
-
|
23
|
-
|
23
|
+
t[0]['col2'].must_equal 'name'
|
24
|
+
t[1]['col2'].must_equal 'Seamus Abshere'
|
25
|
+
end
|
26
|
+
|
27
|
+
it "open a yaml" do
|
28
|
+
t = RemoteTable.new "file://#{File.expand_path('../fixtures/data.yml', __FILE__)}"
|
29
|
+
t[0]['name'].must_equal 'Seamus Abshere'
|
30
|
+
t[0]['city'].must_equal 'Madison'
|
31
|
+
t[1]['name'].must_equal 'Derek Kastner'
|
32
|
+
t[1]['city'].must_equal 'Lansing'
|
24
33
|
end
|
25
34
|
|
26
|
-
|
35
|
+
it "return an ordered hash" do
|
27
36
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
28
|
-
|
37
|
+
t[0].class.must_equal ::ActiveSupport::OrderedHash
|
29
38
|
end
|
30
39
|
|
31
|
-
|
40
|
+
it "pass through fastercsv options" do
|
32
41
|
f = Tempfile.new 'pass-through-fastercsv-options'
|
33
42
|
f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
|
34
43
|
f.flush
|
35
44
|
t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil
|
36
|
-
|
45
|
+
t[0][2].must_equal %{Body example with a <a href="">link</a>}
|
37
46
|
f.close
|
38
47
|
end
|
39
48
|
|
40
|
-
|
49
|
+
it "open a csv inside a zip file" do
|
41
50
|
t = RemoteTable.new 'http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
|
42
51
|
:filename => 'Annex Tables/Annex 3/Table A-93.csv',
|
43
52
|
:skip => 1,
|
44
53
|
:select => lambda { |row| row['Vehicle Age'].to_i.to_s == row['Vehicle Age'] }
|
45
|
-
|
54
|
+
t[0]['LDGV'].must_equal '9.09%'
|
46
55
|
end
|
47
56
|
|
48
|
-
|
57
|
+
it 'not blow up if each is called twice' do
|
49
58
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
59
|
+
count = 0
|
60
|
+
t.each { |row| count += 1 }
|
61
|
+
first_run = count
|
62
|
+
t.each { |row| count += 1}
|
63
|
+
count.must_equal first_run*2
|
54
64
|
end
|
55
65
|
|
56
|
-
|
66
|
+
it 'allow itself to be cleared for save memory' do
|
57
67
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
58
68
|
t.to_a
|
59
|
-
|
69
|
+
t.send(:cache).length.must_be :>, 0
|
60
70
|
t.free
|
61
|
-
|
71
|
+
t.send(:cache).length.must_equal 0
|
62
72
|
end
|
63
73
|
|
64
74
|
# fixes ArgumentError: invalid byte sequence in UTF-8
|
65
|
-
|
66
|
-
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table
|
67
|
-
|
75
|
+
it %{safely strip soft hyphens and read windows-1252 html} do
|
76
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table[2]//table[1]//tr[3]//tr', :column_xpath => 'td', :encoding => 'windows-1252'
|
77
|
+
t.rows.detect { |row| row['Model'] == 'A300B4600' }.wont_equal nil
|
68
78
|
end
|
69
79
|
|
70
|
-
|
80
|
+
it %{transliterate characters from ISO-8859-1} do
|
71
81
|
t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
|
72
|
-
|
82
|
+
t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }.wont_equal nil
|
73
83
|
end
|
74
84
|
|
75
|
-
|
85
|
+
it %{read xml with css selectors} do
|
76
86
|
t = RemoteTable.new 'http://www.nanonull.com/TimeService/TimeService.asmx/getCityTime?city=Chicago', :format => :xml, :row_css => 'string', :headers => false
|
77
|
-
|
87
|
+
/(AM|PM)/.match(t[0][0]).wont_equal nil
|
78
88
|
end
|
79
89
|
|
80
|
-
|
90
|
+
it %{optionally stream rows instead of caching them} do
|
81
91
|
t = RemoteTable.new 'http://www.earthtools.org/timezone/40.71417/-74.00639', :format => :xml, :row_xpath => '//timezone/isotime', :headers => false, :streaming => true
|
82
92
|
time1 = t[0][0]
|
83
|
-
|
93
|
+
/\d\d\d\d-\d\d-\d\d/.match(time1).wont_equal nil
|
84
94
|
sleep 1
|
85
95
|
time2 = t[0][0]
|
86
|
-
|
96
|
+
time1.wont_equal time2
|
87
97
|
end
|
88
98
|
|
89
99
|
{
|
@@ -102,37 +112,37 @@ class TestRemoteTable < Test::Unit::TestCase
|
|
102
112
|
"../support/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma" => {:format=>"csv", :encoding=>"MACROMAN"}, # comma because no option in excel
|
103
113
|
"../support/list-en1-semic-3.neooffice.utf-8.csv" => {:format=>"csv", :delimiter => ';'}
|
104
114
|
}.each do |k, v|
|
105
|
-
|
115
|
+
it %{open #{k} with encoding #{v[:encoding] || 'default'}} do
|
106
116
|
options = v.merge(:headers => false, :skip => 2)
|
107
117
|
t = RemoteTable.new "file://#{File.expand_path(k, __FILE__)}", options
|
108
118
|
a = %{ÅLAND ISLANDS}
|
109
119
|
b = (t[1].is_a?(::Array) ? t[1][0] : t[1]['name'])
|
110
120
|
if RUBY_VERSION >= '1.9'
|
111
|
-
|
112
|
-
|
121
|
+
a.encoding.to_s.must_equal 'UTF-8'
|
122
|
+
b.encoding.to_s.must_equal 'UTF-8'
|
113
123
|
end
|
114
|
-
|
124
|
+
b.must_equal a
|
115
125
|
end
|
116
126
|
end
|
117
127
|
|
118
|
-
|
128
|
+
it %{recode as UTF-8 even ISO-8859-1 (or any other encoding)} do
|
119
129
|
t = RemoteTable.new 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';', :encoding => 'ISO-8859-1'
|
120
|
-
|
130
|
+
t[1][0].must_equal %{ÅLAND ISLANDS}
|
121
131
|
end
|
122
132
|
|
123
|
-
|
133
|
+
it %{parse a big CSV that is not UTF-8} do
|
124
134
|
t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false#, :encoding => 'UTF-8'
|
125
|
-
|
135
|
+
t[0][1].must_equal 'Goroka'
|
126
136
|
end
|
127
137
|
|
128
|
-
|
138
|
+
it "read only certain rows of an XLSX" do
|
129
139
|
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => false
|
130
|
-
|
131
|
-
|
140
|
+
t[0][0].must_equal "Permissioning and access groups for all content"
|
141
|
+
t[4][0].must_equal "Manage Multiple Incentive Programs for Participants"
|
132
142
|
|
133
143
|
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :crop => 11..16, :headers => %w{ col1 }
|
134
|
-
|
135
|
-
|
144
|
+
t[0]['col1'].must_equal "Permissioning and access groups for all content"
|
145
|
+
t[4]['col1'].must_equal "Manage Multiple Incentive Programs for Participants"
|
136
146
|
end
|
137
147
|
|
138
148
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2012-04-12 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activesupport
|
17
|
-
requirement:
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,15 @@ dependencies:
|
|
22
22
|
version: 2.3.4
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements:
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ! '>='
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: 2.3.4
|
26
31
|
- !ruby/object:Gem::Dependency
|
27
32
|
name: roo
|
28
|
-
requirement:
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
29
34
|
none: false
|
30
35
|
requirements:
|
31
36
|
- - ! '>='
|
@@ -33,32 +38,31 @@ dependencies:
|
|
33
38
|
version: '1.9'
|
34
39
|
type: :runtime
|
35
40
|
prerelease: false
|
36
|
-
version_requirements:
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '1.9'
|
37
47
|
- !ruby/object:Gem::Dependency
|
38
48
|
name: fixed_width-multibyte
|
39
|
-
requirement:
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
40
50
|
none: false
|
41
51
|
requirements:
|
42
52
|
- - ! '>='
|
43
53
|
- !ruby/object:Gem::Version
|
44
|
-
version:
|
54
|
+
version: 0.2.3
|
45
55
|
type: :runtime
|
46
56
|
prerelease: false
|
47
|
-
version_requirements:
|
48
|
-
- !ruby/object:Gem::Dependency
|
49
|
-
name: i18n
|
50
|
-
requirement: &2165418440 !ruby/object:Gem::Requirement
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
58
|
none: false
|
52
59
|
requirements:
|
53
60
|
- - ! '>='
|
54
61
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
56
|
-
type: :runtime
|
57
|
-
prerelease: false
|
58
|
-
version_requirements: *2165418440
|
62
|
+
version: 0.2.3
|
59
63
|
- !ruby/object:Gem::Dependency
|
60
|
-
name:
|
61
|
-
requirement:
|
64
|
+
name: i18n
|
65
|
+
requirement: !ruby/object:Gem::Requirement
|
62
66
|
none: false
|
63
67
|
requirements:
|
64
68
|
- - ! '>='
|
@@ -66,84 +70,60 @@ dependencies:
|
|
66
70
|
version: '0'
|
67
71
|
type: :runtime
|
68
72
|
prerelease: false
|
69
|
-
version_requirements:
|
70
|
-
- !ruby/object:Gem::Dependency
|
71
|
-
name: ensure-encoding
|
72
|
-
requirement: &2165407140 !ruby/object:Gem::Requirement
|
73
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
74
|
none: false
|
74
75
|
requirements:
|
75
76
|
- - ! '>='
|
76
77
|
- !ruby/object:Gem::Version
|
77
78
|
version: '0'
|
78
|
-
type: :runtime
|
79
|
-
prerelease: false
|
80
|
-
version_requirements: *2165407140
|
81
79
|
- !ruby/object:Gem::Dependency
|
82
|
-
name:
|
83
|
-
requirement:
|
80
|
+
name: unix_utils
|
81
|
+
requirement: !ruby/object:Gem::Requirement
|
84
82
|
none: false
|
85
83
|
requirements:
|
86
84
|
- - ! '>='
|
87
85
|
- !ruby/object:Gem::Version
|
88
|
-
version:
|
86
|
+
version: 0.0.8
|
89
87
|
type: :runtime
|
90
88
|
prerelease: false
|
91
|
-
version_requirements:
|
92
|
-
- !ruby/object:Gem::Dependency
|
93
|
-
name: hash_digest
|
94
|
-
requirement: &2165405380 !ruby/object:Gem::Requirement
|
89
|
+
version_requirements: !ruby/object:Gem::Requirement
|
95
90
|
none: false
|
96
91
|
requirements:
|
97
92
|
- - ! '>='
|
98
93
|
- !ruby/object:Gem::Version
|
99
|
-
version:
|
100
|
-
type: :runtime
|
101
|
-
prerelease: false
|
102
|
-
version_requirements: *2165405380
|
94
|
+
version: 0.0.8
|
103
95
|
- !ruby/object:Gem::Dependency
|
104
|
-
name:
|
105
|
-
requirement:
|
96
|
+
name: fastercsv
|
97
|
+
requirement: !ruby/object:Gem::Requirement
|
106
98
|
none: false
|
107
99
|
requirements:
|
108
100
|
- - ! '>='
|
109
101
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
111
|
-
type: :
|
102
|
+
version: 1.5.0
|
103
|
+
type: :runtime
|
112
104
|
prerelease: false
|
113
|
-
version_requirements:
|
114
|
-
- !ruby/object:Gem::Dependency
|
115
|
-
name: test-unit
|
116
|
-
requirement: &2165404080 !ruby/object:Gem::Requirement
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
117
106
|
none: false
|
118
107
|
requirements:
|
119
108
|
- - ! '>='
|
120
109
|
- !ruby/object:Gem::Version
|
121
|
-
version:
|
122
|
-
type: :development
|
123
|
-
prerelease: false
|
124
|
-
version_requirements: *2165404080
|
110
|
+
version: 1.5.0
|
125
111
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
127
|
-
requirement:
|
112
|
+
name: hash_digest
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
128
114
|
none: false
|
129
115
|
requirements:
|
130
116
|
- - ! '>='
|
131
117
|
- !ruby/object:Gem::Version
|
132
118
|
version: '0'
|
133
|
-
type: :
|
119
|
+
type: :runtime
|
134
120
|
prerelease: false
|
135
|
-
version_requirements:
|
136
|
-
- !ruby/object:Gem::Dependency
|
137
|
-
name: rake
|
138
|
-
requirement: &2165403060 !ruby/object:Gem::Requirement
|
121
|
+
version_requirements: !ruby/object:Gem::Requirement
|
139
122
|
none: false
|
140
123
|
requirements:
|
141
124
|
- - ! '>='
|
142
125
|
- !ruby/object:Gem::Version
|
143
126
|
version: '0'
|
144
|
-
type: :development
|
145
|
-
prerelease: false
|
146
|
-
version_requirements: *2165403060
|
147
127
|
description: Gives you a standard way to parse various formats and treat them as an
|
148
128
|
array of hashes.
|
149
129
|
email:
|
@@ -152,14 +132,15 @@ executables: []
|
|
152
132
|
extensions: []
|
153
133
|
extra_rdoc_files: []
|
154
134
|
files:
|
155
|
-
- .document
|
156
135
|
- .gitattributes
|
157
136
|
- .gitignore
|
137
|
+
- CHANGELOG
|
158
138
|
- Gemfile
|
159
139
|
- LICENSE
|
160
|
-
- README.
|
140
|
+
- README.markdown
|
161
141
|
- Rakefile
|
162
142
|
- lib/remote_table.rb
|
143
|
+
- lib/remote_table/config.rb
|
163
144
|
- lib/remote_table/format.rb
|
164
145
|
- lib/remote_table/format/delimited.rb
|
165
146
|
- lib/remote_table/format/excel.rb
|
@@ -171,12 +152,12 @@ files:
|
|
171
152
|
- lib/remote_table/format/mixins/textual.rb
|
172
153
|
- lib/remote_table/format/open_office.rb
|
173
154
|
- lib/remote_table/format/xml.rb
|
155
|
+
- lib/remote_table/format/yaml.rb
|
174
156
|
- lib/remote_table/local_file.rb
|
175
|
-
- lib/remote_table/properties.rb
|
176
157
|
- lib/remote_table/transformer.rb
|
177
|
-
- lib/remote_table/utils.rb
|
178
158
|
- lib/remote_table/version.rb
|
179
159
|
- remote_table.gemspec
|
160
|
+
- test/fixtures/data.yml
|
180
161
|
- test/helper.rb
|
181
162
|
- test/support/list-en1-semic-3.neooffice.binary.ods
|
182
163
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
|
@@ -217,11 +198,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
217
198
|
version: '0'
|
218
199
|
requirements: []
|
219
200
|
rubyforge_project: remotetable
|
220
|
-
rubygems_version: 1.8.
|
201
|
+
rubygems_version: 1.8.21
|
221
202
|
signing_key:
|
222
203
|
specification_version: 3
|
223
|
-
summary: Open local or remote XLSX, XLS, ODS, CSV
|
204
|
+
summary: Open Google Docs spreadsheets, local or remote XLSX, XLS, ODS, CSV, TSV (tab
|
205
|
+
separated), other delimited, fixed-width files.
|
224
206
|
test_files:
|
207
|
+
- test/fixtures/data.yml
|
225
208
|
- test/helper.rb
|
226
209
|
- test/support/list-en1-semic-3.neooffice.binary.ods
|
227
210
|
- test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
|
@@ -242,3 +225,4 @@ test_files:
|
|
242
225
|
- test/test_old_syntax.rb
|
243
226
|
- test/test_old_transform.rb
|
244
227
|
- test/test_remote_table.rb
|
228
|
+
has_rdoc:
|