remote_table 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/remote_table.rb +5 -0
- data/lib/remote_table/request.rb +28 -33
- data/remote_table.gemspec +2 -2
- data/test/remote_table_test.rb +11 -8
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.9
|
data/lib/remote_table.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require 'digest/md5'
|
2
2
|
require 'iconv'
|
3
3
|
require 'active_support'
|
4
|
+
begin; require 'active_support/core_ext/object/blank'; rescue MissingSourceFile; end
|
5
|
+
begin; require 'active_support/core_ext/string/inflections'; rescue MissingSourceFile; end
|
6
|
+
begin; require 'active_support/core_ext/array/wrap'; rescue MissingSourceFile; end
|
7
|
+
begin; require 'active_support/core_ext/hash/except'; rescue MissingSourceFile; end
|
8
|
+
begin; require 'active_support/core_ext/string/starts_ends_with'; rescue MissingSourceFile; end
|
4
9
|
require 'tempfile'
|
5
10
|
require 'fastercsv'
|
6
11
|
require 'slither'
|
data/lib/remote_table/request.rb
CHANGED
@@ -1,51 +1,46 @@
|
|
1
1
|
class RemoteTable
|
2
2
|
class Request
|
3
|
-
attr_accessor :
|
3
|
+
attr_accessor :parsed_url, :post_data, :username, :password
|
4
4
|
attr_accessor :form_data
|
5
5
|
|
6
6
|
# TODO: support post_data
|
7
7
|
# TODO: support HTTP basic auth
|
8
8
|
def initialize(bus)
|
9
|
-
@
|
9
|
+
@parsed_url = URI.parse(bus[:url]) or raise "need url"
|
10
|
+
if @parsed_url.host == 'spreadsheets.google.com' and (bus[:format].blank? or bus[:format].to_s == 'csv')
|
11
|
+
@parsed_url.query = 'output=csv&' + @parsed_url.query.sub(/\&*output=.*(\&|\z)/, '')
|
12
|
+
end
|
10
13
|
@form_data = bus[:form_data]
|
11
14
|
end
|
12
15
|
|
13
16
|
def download
|
14
|
-
path = ::File.join
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
17
|
+
path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
|
18
|
+
if parsed_url.scheme == 'file'
|
19
|
+
parsed_url.path
|
20
|
+
else
|
21
|
+
cmd = %{
|
22
|
+
curl \
|
23
|
+
--silent \
|
24
|
+
--header "Expect: " \
|
25
|
+
--location \
|
26
|
+
#{"--data \"#{form_data}\"" if form_data.present?} \
|
27
|
+
"#{parsed_url}" \
|
28
|
+
--output "#{path}"
|
29
|
+
}
|
30
|
+
`#{cmd}`
|
31
|
+
path
|
32
|
+
end
|
26
33
|
end
|
27
34
|
|
28
35
|
private
|
29
36
|
|
30
|
-
def staging_dir_path
|
31
|
-
|
32
|
-
|
33
|
-
FileUtils.
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
def tempfile_path_from_url
|
39
|
-
Tempfile.open(url.gsub(/[^a-z0-9]+/i, '_')[0,100]).path
|
40
|
-
end
|
41
|
-
|
42
|
-
def url_with_google_docs_handling
|
43
|
-
url = self.url
|
44
|
-
if url.include?('spreadsheets.google.com')
|
45
|
-
url = url.gsub(/\&output=.*(\&|\z)/, '')
|
46
|
-
url << "&output=csv"
|
47
|
-
end
|
48
|
-
url
|
37
|
+
def staging_dir_path
|
38
|
+
return @_staging_dir_path if @_staging_dir_path
|
39
|
+
@_staging_dir_path = Tempfile.open(parsed_url.to_s.gsub(/[^a-z0-9]+/i, '_')[0,100]).path
|
40
|
+
FileUtils.rm_f @_staging_dir_path
|
41
|
+
FileUtils.mkdir @_staging_dir_path
|
42
|
+
at_exit { FileUtils.rm_rf @_staging_dir_path }
|
43
|
+
@_staging_dir_path
|
49
44
|
end
|
50
45
|
end
|
51
46
|
end
|
data/remote_table.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{remote_table}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.9"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-04-15}
|
13
13
|
s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/remote_table_test.rb
CHANGED
@@ -11,16 +11,18 @@ class FuelOilParser
|
|
11
11
|
end
|
12
12
|
def apply(row)
|
13
13
|
virtual_rows = []
|
14
|
-
row.keys.grep(/(
|
14
|
+
row.keys.grep(/(.+) Residual Fuel Oil/) do |location_column_name|
|
15
|
+
first_part = $1
|
15
16
|
next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
|
16
|
-
if
|
17
|
+
if first_part.starts_with?('U.S.')
|
17
18
|
locatable = "united_states (Country)"
|
18
|
-
elsif
|
19
|
-
/\(PADD (.*)\)/.match(
|
20
|
-
|
21
|
-
|
19
|
+
elsif first_part.include?('PADD')
|
20
|
+
/\(PADD (.*)\)/.match(first_part)
|
21
|
+
padd_part = $1
|
22
|
+
next if padd_part == '1' # skip PADD 1 because we always prefer subdistricts
|
23
|
+
locatable = "#{padd_part} (PetroleumAdministrationForDefenseDistrict)"
|
22
24
|
else
|
23
|
-
locatable = "#{
|
25
|
+
locatable = "#{first_part} (State)"
|
24
26
|
end
|
25
27
|
date = Time.parse(date)
|
26
28
|
virtual_rows << {
|
@@ -160,7 +162,7 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
160
162
|
assert_equal row.except('row_hash'), @test2_rows[index]
|
161
163
|
end
|
162
164
|
end
|
163
|
-
|
165
|
+
|
164
166
|
should "read fixed width correctly, keeping blank rows" do
|
165
167
|
t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
166
168
|
:format => :fixed_width,
|
@@ -257,6 +259,7 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
257
259
|
|
258
260
|
t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
|
259
261
|
:transform => { :class => FuelOilParser })
|
262
|
+
|
260
263
|
assert t.rows.include?(ma_1990_01)
|
261
264
|
assert t.rows.include?(ga_1990_01)
|
262
265
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 9
|
9
|
+
version: 0.2.9
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-04-15 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|