remote_table 0.2.8 → 0.2.9
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/remote_table.rb +5 -0
- data/lib/remote_table/request.rb +28 -33
- data/remote_table.gemspec +2 -2
- data/test/remote_table_test.rb +11 -8
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.9
|
data/lib/remote_table.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require 'digest/md5'
|
2
2
|
require 'iconv'
|
3
3
|
require 'active_support'
|
4
|
+
begin; require 'active_support/core_ext/object/blank'; rescue MissingSourceFile; end
|
5
|
+
begin; require 'active_support/core_ext/string/inflections'; rescue MissingSourceFile; end
|
6
|
+
begin; require 'active_support/core_ext/array/wrap'; rescue MissingSourceFile; end
|
7
|
+
begin; require 'active_support/core_ext/hash/except'; rescue MissingSourceFile; end
|
8
|
+
begin; require 'active_support/core_ext/string/starts_ends_with'; rescue MissingSourceFile; end
|
4
9
|
require 'tempfile'
|
5
10
|
require 'fastercsv'
|
6
11
|
require 'slither'
|
data/lib/remote_table/request.rb
CHANGED
@@ -1,51 +1,46 @@
|
|
1
1
|
class RemoteTable
|
2
2
|
class Request
|
3
|
-
attr_accessor :
|
3
|
+
attr_accessor :parsed_url, :post_data, :username, :password
|
4
4
|
attr_accessor :form_data
|
5
5
|
|
6
6
|
# TODO: support post_data
|
7
7
|
# TODO: support HTTP basic auth
|
8
8
|
def initialize(bus)
|
9
|
-
@
|
9
|
+
@parsed_url = URI.parse(bus[:url]) or raise "need url"
|
10
|
+
if @parsed_url.host == 'spreadsheets.google.com' and (bus[:format].blank? or bus[:format].to_s == 'csv')
|
11
|
+
@parsed_url.query = 'output=csv&' + @parsed_url.query.sub(/\&*output=.*(\&|\z)/, '')
|
12
|
+
end
|
10
13
|
@form_data = bus[:form_data]
|
11
14
|
end
|
12
15
|
|
13
16
|
def download
|
14
|
-
path = ::File.join
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
17
|
+
path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
|
18
|
+
if parsed_url.scheme == 'file'
|
19
|
+
parsed_url.path
|
20
|
+
else
|
21
|
+
cmd = %{
|
22
|
+
curl \
|
23
|
+
--silent \
|
24
|
+
--header "Expect: " \
|
25
|
+
--location \
|
26
|
+
#{"--data \"#{form_data}\"" if form_data.present?} \
|
27
|
+
"#{parsed_url}" \
|
28
|
+
--output "#{path}"
|
29
|
+
}
|
30
|
+
`#{cmd}`
|
31
|
+
path
|
32
|
+
end
|
26
33
|
end
|
27
34
|
|
28
35
|
private
|
29
36
|
|
30
|
-
def staging_dir_path
|
31
|
-
|
32
|
-
|
33
|
-
FileUtils.
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
def tempfile_path_from_url
|
39
|
-
Tempfile.open(url.gsub(/[^a-z0-9]+/i, '_')[0,100]).path
|
40
|
-
end
|
41
|
-
|
42
|
-
def url_with_google_docs_handling
|
43
|
-
url = self.url
|
44
|
-
if url.include?('spreadsheets.google.com')
|
45
|
-
url = url.gsub(/\&output=.*(\&|\z)/, '')
|
46
|
-
url << "&output=csv"
|
47
|
-
end
|
48
|
-
url
|
37
|
+
def staging_dir_path
|
38
|
+
return @_staging_dir_path if @_staging_dir_path
|
39
|
+
@_staging_dir_path = Tempfile.open(parsed_url.to_s.gsub(/[^a-z0-9]+/i, '_')[0,100]).path
|
40
|
+
FileUtils.rm_f @_staging_dir_path
|
41
|
+
FileUtils.mkdir @_staging_dir_path
|
42
|
+
at_exit { FileUtils.rm_rf @_staging_dir_path }
|
43
|
+
@_staging_dir_path
|
49
44
|
end
|
50
45
|
end
|
51
46
|
end
|
data/remote_table.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{remote_table}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.9"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-04-15}
|
13
13
|
s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/remote_table_test.rb
CHANGED
@@ -11,16 +11,18 @@ class FuelOilParser
|
|
11
11
|
end
|
12
12
|
def apply(row)
|
13
13
|
virtual_rows = []
|
14
|
-
row.keys.grep(/(
|
14
|
+
row.keys.grep(/(.+) Residual Fuel Oil/) do |location_column_name|
|
15
|
+
first_part = $1
|
15
16
|
next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
|
16
|
-
if
|
17
|
+
if first_part.starts_with?('U.S.')
|
17
18
|
locatable = "united_states (Country)"
|
18
|
-
elsif
|
19
|
-
/\(PADD (.*)\)/.match(
|
20
|
-
|
21
|
-
|
19
|
+
elsif first_part.include?('PADD')
|
20
|
+
/\(PADD (.*)\)/.match(first_part)
|
21
|
+
padd_part = $1
|
22
|
+
next if padd_part == '1' # skip PADD 1 because we always prefer subdistricts
|
23
|
+
locatable = "#{padd_part} (PetroleumAdministrationForDefenseDistrict)"
|
22
24
|
else
|
23
|
-
locatable = "#{
|
25
|
+
locatable = "#{first_part} (State)"
|
24
26
|
end
|
25
27
|
date = Time.parse(date)
|
26
28
|
virtual_rows << {
|
@@ -160,7 +162,7 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
160
162
|
assert_equal row.except('row_hash'), @test2_rows[index]
|
161
163
|
end
|
162
164
|
end
|
163
|
-
|
165
|
+
|
164
166
|
should "read fixed width correctly, keeping blank rows" do
|
165
167
|
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
166
168
|
:format => :fixed_width,
|
@@ -257,6 +259,7 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
257
259
|
|
258
260
|
t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
|
259
261
|
:transform => { :class => FuelOilParser })
|
262
|
+
|
260
263
|
assert t.rows.include?(ma_1990_01)
|
261
264
|
assert t.rows.include?(ga_1990_01)
|
262
265
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 9
|
9
|
+
version: 0.2.9
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-04-15 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|