remote_table 1.2.4 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -15,7 +15,7 @@ As this library matures, those should go away.
15
15
 
16
16
  ==Example
17
17
 
18
- ?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
18
+ ?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv'
19
19
  => #<RemoteTable:0x359da50 [...]>
20
20
  ?> t[0]
21
21
  => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
data/lib/remote_table.rb CHANGED
@@ -28,9 +28,7 @@ class RemoteTable
28
28
  autoload :Properties, 'remote_table/properties'
29
29
  autoload :LocalFile, 'remote_table/local_file'
30
30
  autoload :Transformer, 'remote_table/transformer'
31
-
32
- # singletons
33
- autoload :Executor, 'remote_table/executor'
31
+ autoload :Utils, 'remote_table/utils'
34
32
 
35
33
  # Legacy
36
34
  class Transform
@@ -49,18 +47,17 @@ class RemoteTable
49
47
  # RemoteTable.new(url, options = {})
50
48
  #
51
49
  # New syntax:
52
- # RemoteTable.new('www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', 'foo' => 'bar')
50
+ # RemoteTable.new('www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :foo => 'bar')
53
51
  # Old syntax:
54
52
  # RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :foo => 'bar')
55
53
  #
56
54
  # See the <tt>Properties</tt> object for the sorts of options you can pass.
57
55
  def initialize(*args)
58
- @options = args.last.is_a?(::Hash) ? args.last.dup : {}
59
- @options.stringify_keys!
56
+ @options = args.last.is_a?(::Hash) ? args.last.symbolize_keys : {}
60
57
  @url = if args.first.is_a? ::String
61
58
  args.first.dup
62
59
  else
63
- @options['url'].dup
60
+ @options[:url].dup
64
61
  end
65
62
  @url.freeze
66
63
  @options.freeze
@@ -112,15 +109,9 @@ class RemoteTable
112
109
  # clear the row cache to save memory
113
110
  def free
114
111
  cache.clear
115
- ::GC.start
116
112
  nil
117
113
  end
118
114
 
119
- # Used internally to execute stuff in shells.
120
- def self.executor
121
- Executor.instance
122
- end
123
-
124
115
  # Used internally to access to a downloaded copy of the file
125
116
  def local_file
126
117
  @local_file ||= LocalFile.new self
@@ -1,10 +1,10 @@
1
1
  if RUBY_VERSION >= '1.9'
2
2
  require 'csv'
3
- ::RemoteTable::CSV = ::CSV
3
+ ::RemoteTable::MyCSV = ::CSV
4
4
  else
5
5
  begin
6
6
  require 'fastercsv'
7
- ::RemoteTable::CSV = ::FasterCSV
7
+ ::RemoteTable::MyCSV = ::FasterCSV
8
8
  rescue ::LoadError
9
9
  $stderr.puts "[remote_table] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
10
10
  raise $!
@@ -20,8 +20,8 @@ class RemoteTable
20
20
  fix_newlines!
21
21
  transliterate_whole_file_to_utf8!
22
22
  skip_rows!
23
- CSV.new(t.local_file.encoded_io, fastercsv_options).each do |row|
24
- if row.is_a?(CSV::Row)
23
+ MyCSV.new(t.local_file.encoded_io, fastercsv_options).each do |row|
24
+ if row.is_a?(MyCSV::Row)
25
25
  hash = row.inject(::ActiveSupport::OrderedHash.new) do |memo, (k, v)|
26
26
  if k.present?
27
27
  memo[k] = v.to_s
@@ -35,30 +35,29 @@ class RemoteTable
35
35
  end
36
36
  end
37
37
  ensure
38
- t.local_file.delete
38
+ t.local_file.cleanup
39
39
  end
40
40
 
41
41
  private
42
42
 
43
- FASTERCSV_OPTIONS = %w{
44
- unconverted_fields
45
- col_sep
46
- headers
47
- row_sep
48
- return_headers
49
- header_converters
50
- quote_char
51
- skip_blanks
52
- converters
53
- force_quotes
54
- }
43
+ FASTERCSV_OPTIONS = [
44
+ :unconverted_fields,
45
+ :col_sep,
46
+ :headers,
47
+ :row_sep,
48
+ :return_headers,
49
+ :header_converters,
50
+ :quote_char,
51
+ :skip_blanks,
52
+ :converters,
53
+ :force_quotes,
54
+ ]
55
55
 
56
56
  def fastercsv_options
57
57
  hsh = t.options.slice *FASTERCSV_OPTIONS
58
- hsh.merge! 'skip_blanks' => !t.properties.keep_blank_rows
59
- hsh.reverse_merge! 'headers' => t.properties.headers
60
- hsh.reverse_merge! 'col_sep' => t.properties.delimiter
61
- hsh.symbolize_keys
58
+ hsh.merge! :skip_blanks => !t.properties.keep_blank_rows
59
+ hsh.reverse_merge! :headers => t.properties.headers
60
+ hsh.reverse_merge! :col_sep => t.properties.delimiter
62
61
  end
63
62
  end
64
63
  end
@@ -19,7 +19,7 @@ class RemoteTable
19
19
  yield row if t.properties.keep_blank_rows or row.any? { |k, v| v.present? }
20
20
  end
21
21
  ensure
22
- t.local_file.delete
22
+ t.local_file.cleanup
23
23
  end
24
24
 
25
25
  private
@@ -42,6 +42,7 @@ class RemoteTable
42
42
  d.rows do |row|
43
43
  row.trap(&everything)
44
44
  t.properties.schema.each do |name, width, options|
45
+ name = name.to_s
45
46
  if name == 'spacer'
46
47
  row.spacer width
47
48
  else
@@ -7,8 +7,9 @@ class RemoteTable
7
7
  raise "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML" unless t.properties.row_css or t.properties.row_xpath
8
8
  remove_useless_characters!
9
9
  transliterate_whole_file_to_utf8!
10
- first_row = true
11
- keys = t.properties.headers if t.properties.headers.is_a?(::Array)
10
+
11
+ headers = t.properties.headers
12
+
12
13
  xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, 'UTF-8')
13
14
  (row_css? ? xml.css(t.properties.row_css) : xml.xpath(t.properties.row_xpath)).each do |row|
14
15
  values = if column_css?
@@ -18,22 +19,21 @@ class RemoteTable
18
19
  else
19
20
  [row]
20
21
  end.map { |cell| assume_utf8 cell.content.gsub(/\s+/, ' ').strip }
21
- if first_row and t.properties.use_first_row_as_header?
22
- keys = values
23
- first_row = false
22
+ if headers == :first_row
23
+ headers = values.select(&:present?)
24
24
  next
25
25
  end
26
26
  output = if t.properties.output_class == ::Array
27
27
  values
28
28
  else
29
- zip keys, values
29
+ zip headers, values
30
30
  end
31
31
  if t.properties.keep_blank_rows or values.any?
32
32
  yield output
33
33
  end
34
34
  end
35
35
  ensure
36
- t.local_file.delete
36
+ t.local_file.cleanup
37
37
  end
38
38
 
39
39
  private
@@ -5,44 +5,54 @@ class RemoteTable
5
5
  def each(&blk)
6
6
  spreadsheet = roo_class.new t.local_file.path, nil, :ignore
7
7
  spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
8
+
9
+ first_row = if t.properties.crop
10
+ t.properties.crop.first + 1
11
+ else
12
+ t.properties.skip + 1
13
+ end
14
+
15
+ last_row = if t.properties.crop
16
+ t.properties.crop.last
17
+ else
18
+ spreadsheet.last_row
19
+ end
20
+
8
21
  if t.properties.output_class == ::Array
9
- (first_row..spreadsheet.last_row).each do |y|
22
+ (first_row..last_row).each do |y|
10
23
  output = (1..spreadsheet.last_column).map do |x|
11
24
  assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
12
25
  end
13
26
  yield output if t.properties.keep_blank_rows or output.any? { |v| v.present? }
14
27
  end
15
28
  else
16
- keys = {}
29
+ headers = {}
17
30
  if t.properties.use_first_row_as_header?
18
31
  (1..spreadsheet.last_column).each do |x|
19
- keys[x] = spreadsheet.cell(first_row, x)
20
- keys[x] = spreadsheet.cell(first_row - 1, x) if keys[x].blank? # look up
21
- keys[x] = assume_utf8 keys[x]
32
+ v = spreadsheet.cell(first_row, x)
33
+ v = spreadsheet.cell(first_row - 1, x) if v.blank? # look up
34
+ if v.present?
35
+ v = assume_utf8 v
36
+ headers[v] = x # 'foobar' is found at column 6
37
+ end
22
38
  end
39
+ # "advance the cursor"
40
+ first_row += 1
23
41
  else
24
- (1..spreadsheet.last_column).each do |x|
25
- keys[x] = assume_utf8 t.properties.headers[x - 1]
42
+ t.properties.headers.each_with_index do |k, i|
43
+ headers[k] = i + 1
26
44
  end
27
45
  end
28
- (first_row+1..spreadsheet.last_row).each do |y|
29
- output = (1..spreadsheet.last_column).inject(::ActiveSupport::OrderedHash.new) do |memo, x|
30
- if keys[x].present?
31
- memo[keys[x]] = assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
32
- end
33
- memo
46
+ (first_row..last_row).each do |y|
47
+ output = ::ActiveSupport::OrderedHash.new
48
+ headers.each do |k, x|
49
+ output[k] = assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
34
50
  end
35
51
  yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
36
52
  end
37
53
  end
38
54
  ensure
39
- t.local_file.delete
40
- end
41
-
42
- private
43
-
44
- def first_row
45
- 1 + t.properties.skip
55
+ t.local_file.cleanup
46
56
  end
47
57
  end
48
58
  end
@@ -1,5 +1,4 @@
1
1
  require 'fileutils'
2
- require 'escape'
3
2
  class RemoteTable
4
3
  class Format
5
4
  module Textual
@@ -8,35 +7,36 @@ class RemoteTable
8
7
  '\xc2\xad', # soft hyphen, often inserted by MS Office (html: &shy;)
9
8
  ]
10
9
  def remove_useless_characters!
11
- ::RemoteTable.executor.bang t.local_file.path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
10
+ Utils.in_place t.local_file.path, 'perl', '-pe', "s/#{USELESS_CHARACTERS.join '//g; s/'}//g"
12
11
  if t.properties.internal_encoding =~ /windows.?1252/i
13
12
  # soft hyphen again, as I have seen it appear in windows 1252
14
- ::RemoteTable.executor.bang t.local_file.path, %q{perl -pe 's/\xad//g'}
13
+ Utils.in_place t.local_file.path, 'perl', '-pe', 's/\xad//g'
15
14
  end
16
15
  end
17
16
 
18
17
  def transliterate_whole_file_to_utf8!
19
- ::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.internal_encoding} -t #{::Escape.shell_single_word t.properties.external_encoding_iconv}"
20
- t.properties.update 'encoding' => t.properties.external_encoding
18
+ Utils.in_place t.local_file.path, 'iconv', '-c', '-f', t.properties.internal_encoding, '-t', t.properties.external_encoding_iconv, :ignore_error => true
19
+ t.properties.update :encoding => t.properties.external_encoding
21
20
  end
22
21
 
23
22
  def fix_newlines!
24
- ::RemoteTable.executor.bang t.local_file.path, %q{perl -pe 's/\r\n|\n|\r/\n/g'}
23
+ Utils.in_place t.local_file.path, 'perl', '-pe', 's/\r\n|\n|\r/\n/g'
25
24
  end
26
25
 
27
26
  def skip_rows!
28
27
  return unless t.properties.skip > 0
29
- ::RemoteTable.executor.bang t.local_file.path, "tail -n +#{t.properties.skip + 1}"
28
+ Utils.in_place t.local_file.path, 'tail', '-n', "+#{t.properties.skip + 1}"
30
29
  end
31
30
 
32
31
  def crop_rows!
33
32
  return unless t.properties.crop
34
- ::RemoteTable.executor.bang t.local_file.path, "tail -n +#{::Escape.shell_single_word t.properties.crop.first.to_s} | head -n #{t.properties.crop.last - t.properties.crop.first + 1}"
33
+ Utils.in_place t.local_file.path, 'tail', '-n', "+#{t.properties.crop.first}"
34
+ Utils.in_place t.local_file.path, 'head', '-n', (t.properties.crop.last - t.properties.crop.first + 1).to_s
35
35
  end
36
36
 
37
37
  def cut_columns!
38
38
  return unless t.properties.cut
39
- ::RemoteTable.executor.bang t.local_file.path, "cut -c #{::Escape.shell_single_word t.properties.cut.to_s}"
39
+ Utils.in_place t.local_file.path, 'cut', '-c', t.properties.cut.to_s
40
40
  end
41
41
  end
42
42
  end
@@ -1,6 +1,5 @@
1
1
  require 'fileutils'
2
- require 'escape'
3
- require 'tmpdir'
2
+
4
3
  class RemoteTable
5
4
  class LocalFile #:nodoc:all
6
5
 
@@ -11,7 +10,7 @@ class RemoteTable
11
10
  end
12
11
 
13
12
  def path
14
- save_locally
13
+ generate unless generated?
15
14
  @path
16
15
  end
17
16
 
@@ -23,95 +22,34 @@ class RemoteTable
23
22
  end
24
23
  end
25
24
 
26
- def delete
25
+ def cleanup
27
26
  if @encoded_io.respond_to?(:closed?) and !@encoded_io.closed?
28
27
  @encoded_io.close
29
28
  end
30
- ::FileUtils.rm_rf staging_dir_path
31
29
  @encoded_io = nil
30
+ if @path and ::File.exist?(@path)
31
+ ::FileUtils.rm_f @path
32
+ end
32
33
  @path = nil
33
- @staging_dir_path = nil
34
+ @generated = nil
34
35
  end
35
36
 
36
37
  private
37
38
 
38
- def staging_dir_path #:nodoc:
39
- return @staging_dir_path if @staging_dir_path.is_a?(::String)
40
- srand # in case this was forked by resque
41
- @staging_dir_path = ::File.join ::Dir.tmpdir, 'remote_table_gem', rand.to_s
42
- ::FileUtils.mkdir_p @staging_dir_path
43
- @staging_dir_path
44
- end
45
-
46
- def save_locally
47
- return if @path.is_a?(::String)
48
- @path = ::File.join(staging_dir_path, ::File.basename(t.properties.uri.path))
49
- download
50
- decompress
51
- unpack
52
- pick
53
- @path
54
- end
55
-
56
- def download
57
- if t.properties.uri.scheme == 'file'
58
- ::FileUtils.cp t.properties.uri.path, @path
59
- else
60
- # sabshere 1/20/11 FIXME: ::RemoteTable.config.curl_bin_path or smth
61
- # sabshere 7/20/11 make web requests move more slowly so you don't get accused of DOS
62
- sleep t.properties.delay_between_requests if t.properties.delay_between_requests
63
- $stderr.puts "[remote_table] Downloading #{t.properties.uri.to_s}"
64
- ::RemoteTable.executor.backtick_with_reporting %{
65
- curl
66
- --silent
67
- --show-error
68
- --location
69
- --header "Expect: "
70
- #{"--data #{::Escape.shell_single_word t.properties.form_data}" if t.properties.form_data.present?}
71
- --output #{::Escape.shell_single_word @path}
72
- #{::Escape.shell_single_word t.properties.uri.to_s}
73
- 2>&1
74
- }
75
- end
76
- end
77
-
78
- def decompress
79
- return unless t.properties.compression
80
- new_path = @path.chomp ".#{t.properties.compression}"
81
- raise_on_error = true
82
- cmd = case t.properties.compression
83
- when 'zip', 'exe'
84
- # can't set path yet because there may be multiple files
85
- raise_on_error = false
86
- "unzip -qq -n #{::Escape.shell_single_word @path} -d #{::File.dirname(@path)}"
87
- when 'bz2'
88
- @path = new_path
89
- "bunzip2 --stdout #{::Escape.shell_single_word @path} > #{::Escape.shell_single_word new_path}"
90
- when 'gz'
91
- @path = new_path
92
- "gunzip --stdout #{::Escape.shell_single_word @path} > #{::Escape.shell_single_word new_path}"
93
- end
94
- ::RemoteTable.executor.backtick_with_reporting cmd, raise_on_error
39
+ def generated?
40
+ @generated == true
95
41
  end
96
-
97
- def unpack
98
- return unless t.properties.packing
99
- cmd = case t.properties.packing
100
- when 'tar'
101
- "tar -xf #{::Escape.shell_single_word @path} -C #{::File.dirname(@path)}"
42
+
43
+ def generate
44
+ tmp_path = Utils.download t.properties.uri, t.properties.form_data
45
+ if compression = t.properties.compression
46
+ tmp_path = Utils.decompress tmp_path, compression
102
47
  end
103
- ::RemoteTable.executor.backtick_with_reporting cmd
104
- end
105
-
106
- # ex. A: 2007-01.csv.gz (compression not capable of storing multiple files)
107
- # ex. B: 2007-01.tar.gz (packing)
108
- # ex. C: 2007-01.zip (compression capable of storing multiple files)
109
- def pick
110
- if t.properties.filename.present?
111
- @path = ::File.join ::File.dirname(@path), t.properties.filename
112
- elsif t.properties.glob.present?
113
- @path = ::Dir[::File.dirname(@path)+t.properties.glob].first
48
+ if packing = t.properties.packing
49
+ tmp_path = Utils.unpack tmp_path, packing
114
50
  end
51
+ @path = Utils.pick tmp_path, :filename => t.properties.filename, :glob => t.properties.glob
52
+ @generated = true
115
53
  end
116
54
  end
117
55
  end
@@ -7,22 +7,18 @@ class RemoteTable
7
7
 
8
8
  def initialize(t)
9
9
  @t = t
10
- @current_options = t.options.dup
10
+ @current_options = t.options.symbolize_keys
11
11
  end
12
12
 
13
13
  def update(options)
14
14
  current_options.update options
15
15
  end
16
-
17
- def delay_between_requests
18
- current_options['delay_between_requests'] || (::ENV.has_key?('REMOTE_TABLE_DELAY_BETWEEN_REQUESTS') ? ::ENV['REMOTE_TABLE_DELAY_BETWEEN_REQUESTS'].to_i : nil)
19
- end
20
-
16
+
21
17
  # The parsed URI of the file to get.
22
18
  def uri
23
19
  return @uri if @uri.is_a?(::URI)
24
20
  @uri = ::URI.parse t.url
25
- if @uri.host == 'spreadsheets.google.com'
21
+ if @uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
26
22
  @uri.query = 'output=csv&' + @uri.query.sub(/\&?output=.*?(\&|\z)/, '\1')
27
23
  end
28
24
  @uri
@@ -33,19 +29,19 @@ class RemoteTable
33
29
  # * call each
34
30
  # Defaults to false.
35
31
  def streaming
36
- current_options['streaming'] || false
32
+ current_options[:streaming] || false
37
33
  end
38
34
 
39
35
  # Defaults to true.
40
36
  def warn_on_multiple_downloads
41
- current_options['warn_on_multiple_downloads'] != false
37
+ current_options[:warn_on_multiple_downloads] != false
42
38
  end
43
39
 
44
40
  # The headers specified by the user
45
41
  #
46
42
  # Default: :first_row
47
43
  def headers
48
- current_options['headers'].nil? ? :first_row : current_options['headers']
44
+ current_options[:headers].nil? ? :first_row : current_options[:headers]
49
45
  end
50
46
 
51
47
  def use_first_row_as_header?
@@ -60,30 +56,30 @@ class RemoteTable
60
56
  #
61
57
  # Default: 0
62
58
  def sheet
63
- current_options['sheet'] || 0
59
+ current_options[:sheet] || 0
64
60
  end
65
61
 
66
62
  # Whether to keep blank rows
67
63
  #
68
64
  # Default: false
69
65
  def keep_blank_rows
70
- current_options['keep_blank_rows'] || false
66
+ current_options[:keep_blank_rows] || false
71
67
  end
72
68
 
73
69
  # Form data to send in with the download request
74
70
  def form_data
75
- current_options['form_data']
71
+ current_options[:form_data]
76
72
  end
77
73
 
78
74
  # How many rows to skip
79
75
  #
80
76
  # Default: 0
81
77
  def skip
82
- current_options['skip'].to_i
78
+ current_options[:skip] || 0
83
79
  end
84
80
 
85
81
  def internal_encoding
86
- (current_options['encoding'] || 'UTF-8').upcase
82
+ (current_options[:encoding] || 'UTF-8').upcase
87
83
  end
88
84
 
89
85
  def external_encoding
@@ -98,49 +94,47 @@ class RemoteTable
98
94
  #
99
95
  # Default: ","
100
96
  def delimiter
101
- current_options['delimiter'] || ','
97
+ current_options[:delimiter] || ','
102
98
  end
103
99
 
104
100
  # The XPath used to find rows
105
101
  def row_xpath
106
- current_options['row_xpath']
102
+ current_options[:row_xpath]
107
103
  end
108
104
 
109
105
  # The XPath used to find columns
110
106
  def column_xpath
111
- current_options['column_xpath']
107
+ current_options[:column_xpath]
112
108
  end
113
109
 
114
110
  # The CSS selector used to find rows
115
111
  def row_css
116
- current_options['row_css']
112
+ current_options[:row_css]
117
113
  end
118
114
 
119
115
  # The CSS selector used to find columns
120
116
  def column_css
121
- current_options['column_css']
117
+ current_options[:column_css]
122
118
  end
123
119
 
124
120
  # The compression type.
125
121
  #
126
122
  # Default: guessed from URI.
127
123
  #
128
- # Can be specified as: "gz", "zip", "bz2", "exe" (treated as "zip")
124
+ # Can be specified as: :gz, :zip, :bz2, :exe (treated as :zip)
129
125
  def compression
130
- clue = if current_options['compression']
131
- current_options['compression'].to_s
132
- else
133
- ::File.extname uri.path
126
+ if current_options.has_key?(:compression)
127
+ return current_options[:compression]
134
128
  end
135
- case clue.downcase
129
+ case ::File.extname(uri.path).downcase
136
130
  when /gz/, /gunzip/
137
- 'gz'
131
+ :gz
138
132
  when /zip/
139
- 'zip'
133
+ :zip
140
134
  when /bz2/, /bunzip2/
141
- 'bz2'
135
+ :bz2
142
136
  when /exe/
143
- 'exe'
137
+ :exe
144
138
  end
145
139
  end
146
140
 
@@ -148,82 +142,79 @@ class RemoteTable
148
142
  #
149
143
  # Default: guessed from URI.
150
144
  #
151
- # Can be specified as: "tar"
145
+ # Can be specified as: :tar
152
146
  def packing
153
- clue = if current_options['packing']
154
- current_options['packing'].to_s
155
- else
156
- ::File.extname(uri.path.sub(/\.#{compression}\z/, ''))
147
+ if current_options.has_key?(:packing)
148
+ return current_options[:packing]
157
149
  end
158
- case clue.downcase
159
- when /tar/
160
- 'tar'
150
+ if uri.path =~ %r{\.tar(?:\.|$)}i
151
+ :tar
161
152
  end
162
153
  end
163
154
 
164
155
  # The glob used to pick a file out of an archive.
165
156
  #
166
157
  # Example:
167
- # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', 'glob' => '/*.csv'
158
+ # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
168
159
  def glob
169
- current_options['glob']
160
+ current_options[:glob]
170
161
  end
171
162
 
172
163
  # The filename, which can be used to pick a file out of an archive.
173
164
  #
174
165
  # Example:
175
- # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', 'filename' => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
166
+ # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
176
167
  def filename
177
- current_options['filename']
168
+ current_options[:filename]
178
169
  end
179
170
 
180
171
  # Cut columns up to this character
181
172
  def cut
182
- current_options['cut']
173
+ current_options[:cut]
183
174
  end
184
175
 
185
176
  # Crop rows after this line
186
177
  def crop
187
- current_options['crop']
178
+ current_options[:crop]
188
179
  end
189
180
 
190
181
  # The fixed-width schema, given as an array
191
182
  #
192
183
  # Example:
193
184
  # RemoteTable.new('http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
194
- # 'format' => 'fixed_width',
195
- # 'skip' => 1,
196
- # 'schema' => [[ 'header4', 10, { :type => :string } ],
185
+ # :format => :fixed_width,
186
+ # :skip => 1,
187
+ # :schema => [[ 'header4', 10, { :type => :string } ],
197
188
  # [ 'spacer', 1 ],
198
189
  # [ 'header5', 10, { :type => :string } ],
199
190
  # [ 'spacer', 12 ],
200
191
  # [ 'header6', 10, { :type => :string } ]])
201
192
  def schema
202
- current_options['schema']
193
+ current_options[:schema]
203
194
  end
204
195
 
205
196
  # The name of the fixed-width schema according to FixedWidth
206
197
  def schema_name
207
- current_options['schema_name']
198
+ current_options[:schema_name]
208
199
  end
209
200
 
210
201
  # A proc to call to decide whether to return a row.
211
202
  def select
212
- current_options['select']
203
+ current_options[:select]
213
204
  end
214
205
 
215
206
  # A proc to call to decide whether to return a row.
216
207
  def reject
217
- current_options['reject']
208
+ current_options[:reject]
218
209
  end
219
210
 
220
211
  # A hash of options to create a new Errata instance (see the Errata gem at http://github.com/seamusabshere/errata) to be used on every row.
221
212
  def errata
222
- return unless current_options.has_key? 'errata'
223
- @errata ||= if current_options['errata'].is_a? ::Hash
224
- ::Errata.new current_options['errata']
213
+ return unless current_options.has_key? :errata
214
+ @errata ||= if current_options[:errata].is_a? ::Hash
215
+ ::Errata.new current_options[:errata]
225
216
  else
226
- current_options['errata']
217
+ current_options[:errata]
227
218
  end
228
219
  end
229
220
 
@@ -233,15 +224,15 @@ class RemoteTable
233
224
  #
234
225
  # Default: guessed from file extension (which is usually the same as the URI, but sometimes not if you pick out a specific file from an archive)
235
226
  #
236
- # Can be specified as: "xlsx", "xls", "csv", "ods", "fixed_width", "html"
227
+ # Can be specified as: :xlsx, :xls, :delimited (aka :csv and :tsv), :ods, :fixed_width, :html
237
228
  def format
238
- return Format::Delimited if uri.host == 'spreadsheets.google.com'
239
- clue = if current_options['format']
240
- current_options['format'].to_s
229
+ return Format::Delimited if uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
230
+ clue = if current_options.has_key?(:format)
231
+ current_options[:format]
241
232
  else
242
- ::File.extname t.local_file.path
233
+ t.local_file.path
243
234
  end
244
- case clue.downcase
235
+ case clue.to_s.downcase
245
236
  when /xlsx/, /excelx/
246
237
  Format::Excelx
247
238
  when /xls/, /excel/