remote_table 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -15,7 +15,7 @@ As this library matures, those should go away.
15
15
 
16
16
  ==Example
17
17
 
18
- ?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
18
+ ?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv'
19
19
  => #<RemoteTable:0x359da50 [...]>
20
20
  ?> t[0]
21
21
  => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
data/lib/remote_table.rb CHANGED
@@ -28,9 +28,7 @@ class RemoteTable
28
28
  autoload :Properties, 'remote_table/properties'
29
29
  autoload :LocalFile, 'remote_table/local_file'
30
30
  autoload :Transformer, 'remote_table/transformer'
31
-
32
- # singletons
33
- autoload :Executor, 'remote_table/executor'
31
+ autoload :Utils, 'remote_table/utils'
34
32
 
35
33
  # Legacy
36
34
  class Transform
@@ -49,18 +47,17 @@ class RemoteTable
49
47
  # RemoteTable.new(url, options = {})
50
48
  #
51
49
  # New syntax:
52
- # RemoteTable.new('www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', 'foo' => 'bar')
50
+ # RemoteTable.new('www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :foo => 'bar')
53
51
  # Old syntax:
54
52
  # RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :foo => 'bar')
55
53
  #
56
54
  # See the <tt>Properties</tt> object for the sorts of options you can pass.
57
55
  def initialize(*args)
58
- @options = args.last.is_a?(::Hash) ? args.last.dup : {}
59
- @options.stringify_keys!
56
+ @options = args.last.is_a?(::Hash) ? args.last.symbolize_keys : {}
60
57
  @url = if args.first.is_a? ::String
61
58
  args.first.dup
62
59
  else
63
- @options['url'].dup
60
+ @options[:url].dup
64
61
  end
65
62
  @url.freeze
66
63
  @options.freeze
@@ -112,15 +109,9 @@ class RemoteTable
112
109
  # clear the row cache to save memory
113
110
  def free
114
111
  cache.clear
115
- ::GC.start
116
112
  nil
117
113
  end
118
114
 
119
- # Used internally to execute stuff in shells.
120
- def self.executor
121
- Executor.instance
122
- end
123
-
124
115
  # Used internally to access to a downloaded copy of the file
125
116
  def local_file
126
117
  @local_file ||= LocalFile.new self
@@ -1,10 +1,10 @@
1
1
  if RUBY_VERSION >= '1.9'
2
2
  require 'csv'
3
- ::RemoteTable::CSV = ::CSV
3
+ ::RemoteTable::MyCSV = ::CSV
4
4
  else
5
5
  begin
6
6
  require 'fastercsv'
7
- ::RemoteTable::CSV = ::FasterCSV
7
+ ::RemoteTable::MyCSV = ::FasterCSV
8
8
  rescue ::LoadError
9
9
  $stderr.puts "[remote_table] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
10
10
  raise $!
@@ -20,8 +20,8 @@ class RemoteTable
20
20
  fix_newlines!
21
21
  transliterate_whole_file_to_utf8!
22
22
  skip_rows!
23
- CSV.new(t.local_file.encoded_io, fastercsv_options).each do |row|
24
- if row.is_a?(CSV::Row)
23
+ MyCSV.new(t.local_file.encoded_io, fastercsv_options).each do |row|
24
+ if row.is_a?(MyCSV::Row)
25
25
  hash = row.inject(::ActiveSupport::OrderedHash.new) do |memo, (k, v)|
26
26
  if k.present?
27
27
  memo[k] = v.to_s
@@ -35,30 +35,29 @@ class RemoteTable
35
35
  end
36
36
  end
37
37
  ensure
38
- t.local_file.delete
38
+ t.local_file.cleanup
39
39
  end
40
40
 
41
41
  private
42
42
 
43
- FASTERCSV_OPTIONS = %w{
44
- unconverted_fields
45
- col_sep
46
- headers
47
- row_sep
48
- return_headers
49
- header_converters
50
- quote_char
51
- skip_blanks
52
- converters
53
- force_quotes
54
- }
43
+ FASTERCSV_OPTIONS = [
44
+ :unconverted_fields,
45
+ :col_sep,
46
+ :headers,
47
+ :row_sep,
48
+ :return_headers,
49
+ :header_converters,
50
+ :quote_char,
51
+ :skip_blanks,
52
+ :converters,
53
+ :force_quotes,
54
+ ]
55
55
 
56
56
  def fastercsv_options
57
57
  hsh = t.options.slice *FASTERCSV_OPTIONS
58
- hsh.merge! 'skip_blanks' => !t.properties.keep_blank_rows
59
- hsh.reverse_merge! 'headers' => t.properties.headers
60
- hsh.reverse_merge! 'col_sep' => t.properties.delimiter
61
- hsh.symbolize_keys
58
+ hsh.merge! :skip_blanks => !t.properties.keep_blank_rows
59
+ hsh.reverse_merge! :headers => t.properties.headers
60
+ hsh.reverse_merge! :col_sep => t.properties.delimiter
62
61
  end
63
62
  end
64
63
  end
@@ -19,7 +19,7 @@ class RemoteTable
19
19
  yield row if t.properties.keep_blank_rows or row.any? { |k, v| v.present? }
20
20
  end
21
21
  ensure
22
- t.local_file.delete
22
+ t.local_file.cleanup
23
23
  end
24
24
 
25
25
  private
@@ -42,6 +42,7 @@ class RemoteTable
42
42
  d.rows do |row|
43
43
  row.trap(&everything)
44
44
  t.properties.schema.each do |name, width, options|
45
+ name = name.to_s
45
46
  if name == 'spacer'
46
47
  row.spacer width
47
48
  else
@@ -7,8 +7,9 @@ class RemoteTable
7
7
  raise "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML" unless t.properties.row_css or t.properties.row_xpath
8
8
  remove_useless_characters!
9
9
  transliterate_whole_file_to_utf8!
10
- first_row = true
11
- keys = t.properties.headers if t.properties.headers.is_a?(::Array)
10
+
11
+ headers = t.properties.headers
12
+
12
13
  xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, 'UTF-8')
13
14
  (row_css? ? xml.css(t.properties.row_css) : xml.xpath(t.properties.row_xpath)).each do |row|
14
15
  values = if column_css?
@@ -18,22 +19,21 @@ class RemoteTable
18
19
  else
19
20
  [row]
20
21
  end.map { |cell| assume_utf8 cell.content.gsub(/\s+/, ' ').strip }
21
- if first_row and t.properties.use_first_row_as_header?
22
- keys = values
23
- first_row = false
22
+ if headers == :first_row
23
+ headers = values.select(&:present?)
24
24
  next
25
25
  end
26
26
  output = if t.properties.output_class == ::Array
27
27
  values
28
28
  else
29
- zip keys, values
29
+ zip headers, values
30
30
  end
31
31
  if t.properties.keep_blank_rows or values.any?
32
32
  yield output
33
33
  end
34
34
  end
35
35
  ensure
36
- t.local_file.delete
36
+ t.local_file.cleanup
37
37
  end
38
38
 
39
39
  private
@@ -5,44 +5,54 @@ class RemoteTable
5
5
  def each(&blk)
6
6
  spreadsheet = roo_class.new t.local_file.path, nil, :ignore
7
7
  spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
8
+
9
+ first_row = if t.properties.crop
10
+ t.properties.crop.first + 1
11
+ else
12
+ t.properties.skip + 1
13
+ end
14
+
15
+ last_row = if t.properties.crop
16
+ t.properties.crop.last
17
+ else
18
+ spreadsheet.last_row
19
+ end
20
+
8
21
  if t.properties.output_class == ::Array
9
- (first_row..spreadsheet.last_row).each do |y|
22
+ (first_row..last_row).each do |y|
10
23
  output = (1..spreadsheet.last_column).map do |x|
11
24
  assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
12
25
  end
13
26
  yield output if t.properties.keep_blank_rows or output.any? { |v| v.present? }
14
27
  end
15
28
  else
16
- keys = {}
29
+ headers = {}
17
30
  if t.properties.use_first_row_as_header?
18
31
  (1..spreadsheet.last_column).each do |x|
19
- keys[x] = spreadsheet.cell(first_row, x)
20
- keys[x] = spreadsheet.cell(first_row - 1, x) if keys[x].blank? # look up
21
- keys[x] = assume_utf8 keys[x]
32
+ v = spreadsheet.cell(first_row, x)
33
+ v = spreadsheet.cell(first_row - 1, x) if v.blank? # look up
34
+ if v.present?
35
+ v = assume_utf8 v
36
+ headers[v] = x # 'foobar' is found at column 6
37
+ end
22
38
  end
39
+ # "advance the cursor"
40
+ first_row += 1
23
41
  else
24
- (1..spreadsheet.last_column).each do |x|
25
- keys[x] = assume_utf8 t.properties.headers[x - 1]
42
+ t.properties.headers.each_with_index do |k, i|
43
+ headers[k] = i + 1
26
44
  end
27
45
  end
28
- (first_row+1..spreadsheet.last_row).each do |y|
29
- output = (1..spreadsheet.last_column).inject(::ActiveSupport::OrderedHash.new) do |memo, x|
30
- if keys[x].present?
31
- memo[keys[x]] = assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
32
- end
33
- memo
46
+ (first_row..last_row).each do |y|
47
+ output = ::ActiveSupport::OrderedHash.new
48
+ headers.each do |k, x|
49
+ output[k] = assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
34
50
  end
35
51
  yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
36
52
  end
37
53
  end
38
54
  ensure
39
- t.local_file.delete
40
- end
41
-
42
- private
43
-
44
- def first_row
45
- 1 + t.properties.skip
55
+ t.local_file.cleanup
46
56
  end
47
57
  end
48
58
  end
@@ -1,5 +1,4 @@
1
1
  require 'fileutils'
2
- require 'escape'
3
2
  class RemoteTable
4
3
  class Format
5
4
  module Textual
@@ -8,35 +7,36 @@ class RemoteTable
8
7
  '\xc2\xad', # soft hyphen, often inserted by MS Office (html: &shy;)
9
8
  ]
10
9
  def remove_useless_characters!
11
- ::RemoteTable.executor.bang t.local_file.path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
10
+ Utils.in_place t.local_file.path, 'perl', '-pe', "s/#{USELESS_CHARACTERS.join '//g; s/'}//g"
12
11
  if t.properties.internal_encoding =~ /windows.?1252/i
13
12
  # soft hyphen again, as I have seen it appear in windows 1252
14
- ::RemoteTable.executor.bang t.local_file.path, %q{perl -pe 's/\xad//g'}
13
+ Utils.in_place t.local_file.path, 'perl', '-pe', 's/\xad//g'
15
14
  end
16
15
  end
17
16
 
18
17
  def transliterate_whole_file_to_utf8!
19
- ::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.internal_encoding} -t #{::Escape.shell_single_word t.properties.external_encoding_iconv}"
20
- t.properties.update 'encoding' => t.properties.external_encoding
18
+ Utils.in_place t.local_file.path, 'iconv', '-c', '-f', t.properties.internal_encoding, '-t', t.properties.external_encoding_iconv, :ignore_error => true
19
+ t.properties.update :encoding => t.properties.external_encoding
21
20
  end
22
21
 
23
22
  def fix_newlines!
24
- ::RemoteTable.executor.bang t.local_file.path, %q{perl -pe 's/\r\n|\n|\r/\n/g'}
23
+ Utils.in_place t.local_file.path, 'perl', '-pe', 's/\r\n|\n|\r/\n/g'
25
24
  end
26
25
 
27
26
  def skip_rows!
28
27
  return unless t.properties.skip > 0
29
- ::RemoteTable.executor.bang t.local_file.path, "tail -n +#{t.properties.skip + 1}"
28
+ Utils.in_place t.local_file.path, 'tail', '-n', "+#{t.properties.skip + 1}"
30
29
  end
31
30
 
32
31
  def crop_rows!
33
32
  return unless t.properties.crop
34
- ::RemoteTable.executor.bang t.local_file.path, "tail -n +#{::Escape.shell_single_word t.properties.crop.first.to_s} | head -n #{t.properties.crop.last - t.properties.crop.first + 1}"
33
+ Utils.in_place t.local_file.path, 'tail', '-n', "+#{t.properties.crop.first}"
34
+ Utils.in_place t.local_file.path, 'head', '-n', (t.properties.crop.last - t.properties.crop.first + 1).to_s
35
35
  end
36
36
 
37
37
  def cut_columns!
38
38
  return unless t.properties.cut
39
- ::RemoteTable.executor.bang t.local_file.path, "cut -c #{::Escape.shell_single_word t.properties.cut.to_s}"
39
+ Utils.in_place t.local_file.path, 'cut', '-c', t.properties.cut.to_s
40
40
  end
41
41
  end
42
42
  end
@@ -1,6 +1,5 @@
1
1
  require 'fileutils'
2
- require 'escape'
3
- require 'tmpdir'
2
+
4
3
  class RemoteTable
5
4
  class LocalFile #:nodoc:all
6
5
 
@@ -11,7 +10,7 @@ class RemoteTable
11
10
  end
12
11
 
13
12
  def path
14
- save_locally
13
+ generate unless generated?
15
14
  @path
16
15
  end
17
16
 
@@ -23,95 +22,34 @@ class RemoteTable
23
22
  end
24
23
  end
25
24
 
26
- def delete
25
+ def cleanup
27
26
  if @encoded_io.respond_to?(:closed?) and !@encoded_io.closed?
28
27
  @encoded_io.close
29
28
  end
30
- ::FileUtils.rm_rf staging_dir_path
31
29
  @encoded_io = nil
30
+ if @path and ::File.exist?(@path)
31
+ ::FileUtils.rm_f @path
32
+ end
32
33
  @path = nil
33
- @staging_dir_path = nil
34
+ @generated = nil
34
35
  end
35
36
 
36
37
  private
37
38
 
38
- def staging_dir_path #:nodoc:
39
- return @staging_dir_path if @staging_dir_path.is_a?(::String)
40
- srand # in case this was forked by resque
41
- @staging_dir_path = ::File.join ::Dir.tmpdir, 'remote_table_gem', rand.to_s
42
- ::FileUtils.mkdir_p @staging_dir_path
43
- @staging_dir_path
44
- end
45
-
46
- def save_locally
47
- return if @path.is_a?(::String)
48
- @path = ::File.join(staging_dir_path, ::File.basename(t.properties.uri.path))
49
- download
50
- decompress
51
- unpack
52
- pick
53
- @path
54
- end
55
-
56
- def download
57
- if t.properties.uri.scheme == 'file'
58
- ::FileUtils.cp t.properties.uri.path, @path
59
- else
60
- # sabshere 1/20/11 FIXME: ::RemoteTable.config.curl_bin_path or smth
61
- # sabshere 7/20/11 make web requests move more slowly so you don't get accused of DOS
62
- sleep t.properties.delay_between_requests if t.properties.delay_between_requests
63
- $stderr.puts "[remote_table] Downloading #{t.properties.uri.to_s}"
64
- ::RemoteTable.executor.backtick_with_reporting %{
65
- curl
66
- --silent
67
- --show-error
68
- --location
69
- --header "Expect: "
70
- #{"--data #{::Escape.shell_single_word t.properties.form_data}" if t.properties.form_data.present?}
71
- --output #{::Escape.shell_single_word @path}
72
- #{::Escape.shell_single_word t.properties.uri.to_s}
73
- 2>&1
74
- }
75
- end
76
- end
77
-
78
- def decompress
79
- return unless t.properties.compression
80
- new_path = @path.chomp ".#{t.properties.compression}"
81
- raise_on_error = true
82
- cmd = case t.properties.compression
83
- when 'zip', 'exe'
84
- # can't set path yet because there may be multiple files
85
- raise_on_error = false
86
- "unzip -qq -n #{::Escape.shell_single_word @path} -d #{::File.dirname(@path)}"
87
- when 'bz2'
88
- @path = new_path
89
- "bunzip2 --stdout #{::Escape.shell_single_word @path} > #{::Escape.shell_single_word new_path}"
90
- when 'gz'
91
- @path = new_path
92
- "gunzip --stdout #{::Escape.shell_single_word @path} > #{::Escape.shell_single_word new_path}"
93
- end
94
- ::RemoteTable.executor.backtick_with_reporting cmd, raise_on_error
39
+ def generated?
40
+ @generated == true
95
41
  end
96
-
97
- def unpack
98
- return unless t.properties.packing
99
- cmd = case t.properties.packing
100
- when 'tar'
101
- "tar -xf #{::Escape.shell_single_word @path} -C #{::File.dirname(@path)}"
42
+
43
+ def generate
44
+ tmp_path = Utils.download t.properties.uri, t.properties.form_data
45
+ if compression = t.properties.compression
46
+ tmp_path = Utils.decompress tmp_path, compression
102
47
  end
103
- ::RemoteTable.executor.backtick_with_reporting cmd
104
- end
105
-
106
- # ex. A: 2007-01.csv.gz (compression not capable of storing multiple files)
107
- # ex. B: 2007-01.tar.gz (packing)
108
- # ex. C: 2007-01.zip (compression capable of storing multiple files)
109
- def pick
110
- if t.properties.filename.present?
111
- @path = ::File.join ::File.dirname(@path), t.properties.filename
112
- elsif t.properties.glob.present?
113
- @path = ::Dir[::File.dirname(@path)+t.properties.glob].first
48
+ if packing = t.properties.packing
49
+ tmp_path = Utils.unpack tmp_path, packing
114
50
  end
51
+ @path = Utils.pick tmp_path, :filename => t.properties.filename, :glob => t.properties.glob
52
+ @generated = true
115
53
  end
116
54
  end
117
55
  end
@@ -7,22 +7,18 @@ class RemoteTable
7
7
 
8
8
  def initialize(t)
9
9
  @t = t
10
- @current_options = t.options.dup
10
+ @current_options = t.options.symbolize_keys
11
11
  end
12
12
 
13
13
  def update(options)
14
14
  current_options.update options
15
15
  end
16
-
17
- def delay_between_requests
18
- current_options['delay_between_requests'] || (::ENV.has_key?('REMOTE_TABLE_DELAY_BETWEEN_REQUESTS') ? ::ENV['REMOTE_TABLE_DELAY_BETWEEN_REQUESTS'].to_i : nil)
19
- end
20
-
16
+
21
17
  # The parsed URI of the file to get.
22
18
  def uri
23
19
  return @uri if @uri.is_a?(::URI)
24
20
  @uri = ::URI.parse t.url
25
- if @uri.host == 'spreadsheets.google.com'
21
+ if @uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
26
22
  @uri.query = 'output=csv&' + @uri.query.sub(/\&?output=.*?(\&|\z)/, '\1')
27
23
  end
28
24
  @uri
@@ -33,19 +29,19 @@ class RemoteTable
33
29
  # * call each
34
30
  # Defaults to false.
35
31
  def streaming
36
- current_options['streaming'] || false
32
+ current_options[:streaming] || false
37
33
  end
38
34
 
39
35
  # Defaults to true.
40
36
  def warn_on_multiple_downloads
41
- current_options['warn_on_multiple_downloads'] != false
37
+ current_options[:warn_on_multiple_downloads] != false
42
38
  end
43
39
 
44
40
  # The headers specified by the user
45
41
  #
46
42
  # Default: :first_row
47
43
  def headers
48
- current_options['headers'].nil? ? :first_row : current_options['headers']
44
+ current_options[:headers].nil? ? :first_row : current_options[:headers]
49
45
  end
50
46
 
51
47
  def use_first_row_as_header?
@@ -60,30 +56,30 @@ class RemoteTable
60
56
  #
61
57
  # Default: 0
62
58
  def sheet
63
- current_options['sheet'] || 0
59
+ current_options[:sheet] || 0
64
60
  end
65
61
 
66
62
  # Whether to keep blank rows
67
63
  #
68
64
  # Default: false
69
65
  def keep_blank_rows
70
- current_options['keep_blank_rows'] || false
66
+ current_options[:keep_blank_rows] || false
71
67
  end
72
68
 
73
69
  # Form data to send in with the download request
74
70
  def form_data
75
- current_options['form_data']
71
+ current_options[:form_data]
76
72
  end
77
73
 
78
74
  # How many rows to skip
79
75
  #
80
76
  # Default: 0
81
77
  def skip
82
- current_options['skip'].to_i
78
+ current_options[:skip] || 0
83
79
  end
84
80
 
85
81
  def internal_encoding
86
- (current_options['encoding'] || 'UTF-8').upcase
82
+ (current_options[:encoding] || 'UTF-8').upcase
87
83
  end
88
84
 
89
85
  def external_encoding
@@ -98,49 +94,47 @@ class RemoteTable
98
94
  #
99
95
  # Default: ","
100
96
  def delimiter
101
- current_options['delimiter'] || ','
97
+ current_options[:delimiter] || ','
102
98
  end
103
99
 
104
100
  # The XPath used to find rows
105
101
  def row_xpath
106
- current_options['row_xpath']
102
+ current_options[:row_xpath]
107
103
  end
108
104
 
109
105
  # The XPath used to find columns
110
106
  def column_xpath
111
- current_options['column_xpath']
107
+ current_options[:column_xpath]
112
108
  end
113
109
 
114
110
  # The CSS selector used to find rows
115
111
  def row_css
116
- current_options['row_css']
112
+ current_options[:row_css]
117
113
  end
118
114
 
119
115
  # The CSS selector used to find columns
120
116
  def column_css
121
- current_options['column_css']
117
+ current_options[:column_css]
122
118
  end
123
119
 
124
120
  # The compression type.
125
121
  #
126
122
  # Default: guessed from URI.
127
123
  #
128
- # Can be specified as: "gz", "zip", "bz2", "exe" (treated as "zip")
124
+ # Can be specified as: :gz, :zip, :bz2, :exe (treated as :zip)
129
125
  def compression
130
- clue = if current_options['compression']
131
- current_options['compression'].to_s
132
- else
133
- ::File.extname uri.path
126
+ if current_options.has_key?(:compression)
127
+ return current_options[:compression]
134
128
  end
135
- case clue.downcase
129
+ case ::File.extname(uri.path).downcase
136
130
  when /gz/, /gunzip/
137
- 'gz'
131
+ :gz
138
132
  when /zip/
139
- 'zip'
133
+ :zip
140
134
  when /bz2/, /bunzip2/
141
- 'bz2'
135
+ :bz2
142
136
  when /exe/
143
- 'exe'
137
+ :exe
144
138
  end
145
139
  end
146
140
 
@@ -148,82 +142,79 @@ class RemoteTable
148
142
  #
149
143
  # Default: guessed from URI.
150
144
  #
151
- # Can be specified as: "tar"
145
+ # Can be specified as: :tar
152
146
  def packing
153
- clue = if current_options['packing']
154
- current_options['packing'].to_s
155
- else
156
- ::File.extname(uri.path.sub(/\.#{compression}\z/, ''))
147
+ if current_options.has_key?(:packing)
148
+ return current_options[:packing]
157
149
  end
158
- case clue.downcase
159
- when /tar/
160
- 'tar'
150
+ if uri.path =~ %r{\.tar(?:\.|$)}i
151
+ :tar
161
152
  end
162
153
  end
163
154
 
164
155
  # The glob used to pick a file out of an archive.
165
156
  #
166
157
  # Example:
167
- # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', 'glob' => '/*.csv'
158
+ # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
168
159
  def glob
169
- current_options['glob']
160
+ current_options[:glob]
170
161
  end
171
162
 
172
163
  # The filename, which can be used to pick a file out of an archive.
173
164
  #
174
165
  # Example:
175
- # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', 'filename' => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
166
+ # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
176
167
  def filename
177
- current_options['filename']
168
+ current_options[:filename]
178
169
  end
179
170
 
180
171
  # Cut columns up to this character
181
172
  def cut
182
- current_options['cut']
173
+ current_options[:cut]
183
174
  end
184
175
 
185
176
  # Crop rows after this line
186
177
  def crop
187
- current_options['crop']
178
+ current_options[:crop]
188
179
  end
189
180
 
190
181
  # The fixed-width schema, given as an array
191
182
  #
192
183
  # Example:
193
184
  # RemoteTable.new('http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
194
- # 'format' => 'fixed_width',
195
- # 'skip' => 1,
196
- # 'schema' => [[ 'header4', 10, { :type => :string } ],
185
+ # :format => :fixed_width,
186
+ # :skip => 1,
187
+ # :schema => [[ 'header4', 10, { :type => :string } ],
197
188
  # [ 'spacer', 1 ],
198
189
  # [ 'header5', 10, { :type => :string } ],
199
190
  # [ 'spacer', 12 ],
200
191
  # [ 'header6', 10, { :type => :string } ]])
201
192
  def schema
202
- current_options['schema']
193
+ current_options[:schema]
203
194
  end
204
195
 
205
196
  # The name of the fixed-width schema according to FixedWidth
206
197
  def schema_name
207
- current_options['schema_name']
198
+ current_options[:schema_name]
208
199
  end
209
200
 
210
201
  # A proc to call to decide whether to return a row.
211
202
  def select
212
- current_options['select']
203
+ current_options[:select]
213
204
  end
214
205
 
215
206
  # A proc to call to decide whether to return a row.
216
207
  def reject
217
- current_options['reject']
208
+ current_options[:reject]
218
209
  end
219
210
 
220
211
  # A hash of options to create a new Errata instance (see the Errata gem at http://github.com/seamusabshere/errata) to be used on every row.
221
212
  def errata
222
- return unless current_options.has_key? 'errata'
223
- @errata ||= if current_options['errata'].is_a? ::Hash
224
- ::Errata.new current_options['errata']
213
+ return unless current_options.has_key? :errata
214
+ @errata ||= if current_options[:errata].is_a? ::Hash
215
+ ::Errata.new current_options[:errata]
225
216
  else
226
- current_options['errata']
217
+ current_options[:errata]
227
218
  end
228
219
  end
229
220
 
@@ -233,15 +224,15 @@ class RemoteTable
233
224
  #
234
225
  # Default: guessed from file extension (which is usually the same as the URI, but sometimes not if you pick out a specific file from an archive)
235
226
  #
236
- # Can be specified as: "xlsx", "xls", "csv", "ods", "fixed_width", "html"
227
+ # Can be specified as: :xlsx, :xls, :delimited (aka :csv and :tsv), :ods, :fixed_width, :html
237
228
  def format
238
- return Format::Delimited if uri.host == 'spreadsheets.google.com'
239
- clue = if current_options['format']
240
- current_options['format'].to_s
229
+ return Format::Delimited if uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
230
+ clue = if current_options.has_key?(:format)
231
+ current_options[:format]
241
232
  else
242
- ::File.extname t.local_file.path
233
+ t.local_file.path
243
234
  end
244
- case clue.downcase
235
+ case clue.to_s.downcase
245
236
  when /xlsx/, /excelx/
246
237
  Format::Excelx
247
238
  when /xls/, /excel/