remote_table 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.8
1
+ 0.2.9
data/lib/remote_table.rb CHANGED
@@ -1,6 +1,11 @@
1
1
  require 'digest/md5'
2
2
  require 'iconv'
3
3
  require 'active_support'
4
+ begin; require 'active_support/core_ext/object/blank'; rescue MissingSourceFile; end
5
+ begin; require 'active_support/core_ext/string/inflections'; rescue MissingSourceFile; end
6
+ begin; require 'active_support/core_ext/array/wrap'; rescue MissingSourceFile; end
7
+ begin; require 'active_support/core_ext/hash/except'; rescue MissingSourceFile; end
8
+ begin; require 'active_support/core_ext/string/starts_ends_with'; rescue MissingSourceFile; end
4
9
  require 'tempfile'
5
10
  require 'fastercsv'
6
11
  require 'slither'
@@ -1,51 +1,46 @@
1
1
  class RemoteTable
2
2
  class Request
3
- attr_accessor :url, :post_data, :username, :password
3
+ attr_accessor :parsed_url, :post_data, :username, :password
4
4
  attr_accessor :form_data
5
5
 
6
6
  # TODO: support post_data
7
7
  # TODO: support HTTP basic auth
8
8
  def initialize(bus)
9
- @url = bus[:url] or raise "need url"
9
+ @parsed_url = URI.parse(bus[:url]) or raise "need url"
10
+ if @parsed_url.host == 'spreadsheets.google.com' and (bus[:format].blank? or bus[:format].to_s == 'csv')
11
+ @parsed_url.query = 'output=csv&' + @parsed_url.query.sub(/\&*output=.*(\&|\z)/, '')
12
+ end
10
13
  @form_data = bus[:form_data]
11
14
  end
12
15
 
13
16
  def download
14
- path = ::File.join(staging_dir_path, 'REMOTE_TABLE_PACKAGE')
15
- cmd = %{
16
- curl \
17
- --silent \
18
- --header "Expect: " \
19
- --location \
20
- #{"--data \"#{form_data}\"" if form_data.present?} \
21
- "#{url_with_google_docs_handling}" \
22
- --output "#{path}"
23
- }
24
- `#{cmd}`
25
- path
17
+ path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
18
+ if parsed_url.scheme == 'file'
19
+ parsed_url.path
20
+ else
21
+ cmd = %{
22
+ curl \
23
+ --silent \
24
+ --header "Expect: " \
25
+ --location \
26
+ #{"--data \"#{form_data}\"" if form_data.present?} \
27
+ "#{parsed_url}" \
28
+ --output "#{path}"
29
+ }
30
+ `#{cmd}`
31
+ path
32
+ end
26
33
  end
27
34
 
28
35
  private
29
36
 
30
- def staging_dir_path
31
- path = tempfile_path_from_url
32
- FileUtils.rm_f(path)
33
- FileUtils.mkdir(path)
34
- at_exit { FileUtils.rm_rf(path) }
35
- path
36
- end
37
-
38
- def tempfile_path_from_url
39
- Tempfile.open(url.gsub(/[^a-z0-9]+/i, '_')[0,100]).path
40
- end
41
-
42
- def url_with_google_docs_handling
43
- url = self.url
44
- if url.include?('spreadsheets.google.com')
45
- url = url.gsub(/\&output=.*(\&|\z)/, '')
46
- url << "&output=csv"
47
- end
48
- url
37
+ def staging_dir_path
38
+ return @_staging_dir_path if @_staging_dir_path
39
+ @_staging_dir_path = Tempfile.open(parsed_url.to_s.gsub(/[^a-z0-9]+/i, '_')[0,100]).path
40
+ FileUtils.rm_f @_staging_dir_path
41
+ FileUtils.mkdir @_staging_dir_path
42
+ at_exit { FileUtils.rm_rf @_staging_dir_path }
43
+ @_staging_dir_path
49
44
  end
50
45
  end
51
46
  end
data/remote_table.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{remote_table}
8
- s.version = "0.2.8"
8
+ s.version = "0.2.9"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-03-30}
12
+ s.date = %q{2010-04-15}
13
13
  s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -11,16 +11,18 @@ class FuelOilParser
11
11
  end
12
12
  def apply(row)
13
13
  virtual_rows = []
14
- row.keys.grep(/(.*) Residual Fuel Oil/) do |location_column_name|
14
+ row.keys.grep(/(.+) Residual Fuel Oil/) do |location_column_name|
15
+ first_part = $1
15
16
  next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
16
- if $1.starts_with?('U.S.')
17
+ if first_part.starts_with?('U.S.')
17
18
  locatable = "united_states (Country)"
18
- elsif $1.include?('PADD')
19
- /\(PADD (.*)\)/.match($1)
20
- next if $1 == '1' # skip PADD 1 because we always prefer subdistricts
21
- locatable = "#{$1} (PetroleumAdministrationForDefenseDistrict)"
19
+ elsif first_part.include?('PADD')
20
+ /\(PADD (.*)\)/.match(first_part)
21
+ padd_part = $1
22
+ next if padd_part == '1' # skip PADD 1 because we always prefer subdistricts
23
+ locatable = "#{padd_part} (PetroleumAdministrationForDefenseDistrict)"
22
24
  else
23
- locatable = "#{$1} (State)"
25
+ locatable = "#{first_part} (State)"
24
26
  end
25
27
  date = Time.parse(date)
26
28
  virtual_rows << {
@@ -160,7 +162,7 @@ class RemoteTableTest < Test::Unit::TestCase
160
162
  assert_equal row.except('row_hash'), @test2_rows[index]
161
163
  end
162
164
  end
163
-
165
+
164
166
  should "read fixed width correctly, keeping blank rows" do
165
167
  t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
166
168
  :format => :fixed_width,
@@ -257,6 +259,7 @@ class RemoteTableTest < Test::Unit::TestCase
257
259
 
258
260
  t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
259
261
  :transform => { :class => FuelOilParser })
262
+
260
263
  assert t.rows.include?(ma_1990_01)
261
264
  assert t.rows.include?(ga_1990_01)
262
265
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 8
9
- version: 0.2.8
8
+ - 9
9
+ version: 0.2.9
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-03-30 00:00:00 -04:00
18
+ date: 2010-04-15 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency