remote_table 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Brighter Planet
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,27 @@
1
+ =remote_table
2
+
3
+ Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
4
+
5
+ ==Real-life usage
6
+
7
+ Used by data_miner (http://github.com/seamusabshere/data_miner)
8
+
9
+ ==Example
10
+
11
+ Taken from <tt>#{GEMDIR}/test/remote_table_test.rb</tt>:
12
+
13
+ >> t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
14
+ => #<RemoteTable:0x359da50 @transform=#<RemoteTable::Transform:0x359d154 @select=nil, @reject=nil>, @file=#<RemoteTable::File:0x35970c4 @delimiter=nil, @headers=nil, @cut=nil, @filename="98guide6.csv", @skip=nil, @schema_name=nil, @crop=nil, @format=:csv, @trap=nil, @sheet=0, @schema=nil>, @package=#<RemoteTable::Package:0x359c538 @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip", @filename="98guide6.csv", @compression=:zip, @packing=nil>, @request=#<RemoteTable::Request:0x3596bec @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip">>
15
+ >> t.rows.first
16
+ => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
17
+
18
+ See the test file and also data_miner examples of custom parsers.
19
+
20
+ ==Authors
21
+
22
+ * Seamus Abshere <seamus@abshere.net>
23
+ * Andy Rossmeissl <andy@rossmeissl.net>
24
+
25
+ == Copyright
26
+
27
+ Copyright (c) 2009 Brighter Planet. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,66 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "remote_table"
8
+ gem.summary = %Q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
9
+ gem.description = %Q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
10
+ gem.email = "seamus@abshere.net"
11
+ gem.homepage = "http://github.com/seamusabshere/remote_table"
12
+ gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
+ %w{ activesupport fastercsv ryanwood-slither }.each { |name| gem.add_dependency name }
14
+ gem.add_dependency 'roo', '1.3.11'
15
+ gem.require_path = "lib"
16
+ gem.files.include %w(lib/remote_table) unless gem.files.empty? # seems to fail once it's in the wild
17
+ gem.rdoc_options << '--line-numbers' << '--inline-source'
18
+ gem.requirements << 'curl'
19
+ gem.rubyforge_project = "remotetable"
20
+ end
21
+ Jeweler::GemcutterTasks.new
22
+ Jeweler::RubyforgeTasks.new do |rubyforge|
23
+ rubyforge.doc_task = "rdoc"
24
+ end
25
+ rescue LoadError
26
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
27
+ end
28
+
29
+ require 'rake/testtask'
30
+ Rake::TestTask.new(:test) do |test|
31
+ test.libs << 'lib' << 'test'
32
+ test.pattern = 'test/**/*_test.rb'
33
+ test.verbose = true
34
+ end
35
+
36
+ begin
37
+ require 'rcov/rcovtask'
38
+ Rcov::RcovTask.new do |test|
39
+ test.libs << 'test'
40
+ test.pattern = 'test/**/*_test.rb'
41
+ test.verbose = true
42
+ end
43
+ rescue LoadError
44
+ task :rcov do
45
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
46
+ end
47
+ end
48
+
49
+
50
+
51
+
52
+ task :default => :test
53
+
54
+ require 'rake/rdoctask'
55
+ Rake::RDocTask.new do |rdoc|
56
+ if File.exist?('VERSION')
57
+ version = File.read('VERSION')
58
+ else
59
+ version = ""
60
+ end
61
+
62
+ rdoc.rdoc_dir = 'rdoc'
63
+ rdoc.title = "remote_table #{version}"
64
+ rdoc.rdoc_files.include('README*')
65
+ rdoc.rdoc_files.include('lib/**/*.rb')
66
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.5
@@ -0,0 +1,45 @@
1
+ class RemoteTable
2
+ module Csv
3
+ def each_row(&block)
4
+ skip_rows!
5
+ FasterCSV.parse(open(path), fastercsv_options) do |row|
6
+ if row.respond_to?(:fields) # it's a traditional fastercsv row hash
7
+ next if row.fields.compact.blank?
8
+ hash = HashWithIndifferentAccess.new(row.to_hash)
9
+ else # it's an array, which i think happens if you're using :headers => nil or :col_sep
10
+ next if row.compact.blank?
11
+ index = 0
12
+ hash = row.inject(ActiveSupport::OrderedHash.new) { |memo, element| memo[index] = element; index += 1; memo }
13
+ end
14
+ yield hash
15
+ end
16
+ ensure
17
+ restore_rows!
18
+ end
19
+
20
+ private
21
+
22
+ def fastercsv_options
23
+ fastercsv_options = { :skip_blanks => true } # ...and this will skip []
24
+ if headers == false
25
+ fastercsv_options.merge!(:headers => nil)
26
+ else
27
+ fastercsv_options.merge!(:headers => :first_row)
28
+ end
29
+ fastercsv_options.merge!(:col_sep => delimiter) if delimiter
30
+ fastercsv_options
31
+ end
32
+
33
+ def skip_rows!
34
+ return unless skip
35
+ original = "#{path}.original"
36
+ FileUtils.cp(path, original)
37
+ `cat #{original} | tail -n +#{skip + 1} > #{path}`
38
+ end
39
+
40
+ def restore_rows!
41
+ return unless skip
42
+ FileUtils.mv "#{path}.original", path
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,53 @@
1
+ class RemoteTable
2
+ module FixedWidth
3
+ def each_row(&block)
4
+ crop_rows!
5
+ skip_rows!
6
+ cut_columns!
7
+ a = Slither.parse(path, schema_name)
8
+ a[:rows].each { |row| yield HashWithIndifferentAccess.new(row) }
9
+ ensure
10
+ uncut_columns!
11
+ unskip_rows!
12
+ uncrop_rows!
13
+ end
14
+
15
+ private
16
+
17
+ def cut_columns!
18
+ return unless cut
19
+ original = "#{path}.uncut"
20
+ FileUtils.cp(path, original)
21
+ `cat #{original} | cut -c #{cut} > #{path}`
22
+ end
23
+
24
+ def uncut_columns!
25
+ return unless cut
26
+ FileUtils.mv "#{path}.uncut", path
27
+ end
28
+
29
+ def skip_rows!
30
+ return unless skip
31
+ original = "#{path}.unskipped"
32
+ FileUtils.cp(path, original)
33
+ `cat #{original} | tail -n +#{skip + 1} > #{path}`
34
+ end
35
+
36
+ def unskip_rows!
37
+ return unless skip
38
+ FileUtils.mv "#{path}.unskipped", path
39
+ end
40
+
41
+ def crop_rows!
42
+ return unless crop
43
+ original = "#{path}.uncropped"
44
+ FileUtils.cp(path, original)
45
+ `cat #{original} | tail -n +#{crop.first} | head -n #{crop.last - crop.first + 1} > #{path}`
46
+ end
47
+
48
+ def uncrop_rows!
49
+ return unless crop
50
+ FileUtils.mv "#{path}.uncropped", path
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,11 @@
1
+ class RemoteTable
2
+ module Ods
3
+ def self.extended(base)
4
+ base.send :extend, RooSpreadsheet
5
+ end
6
+
7
+ def roo_klass
8
+ Openoffice
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,30 @@
1
+ class RemoteTable
2
+ module RooSpreadsheet
3
+ def each_row(&block)
4
+ headers = {}
5
+ oo = roo_klass.new(path, nil, :ignore)
6
+ oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
7
+ for col in (1..oo.last_column)
8
+ headers[col] = oo.cell(header_row, col)
9
+ headers[col] = oo.cell(header_row - 1, col) if headers[col].blank? # look up
10
+ end
11
+ first_data_row.upto(oo.last_row) do |row|
12
+ values = {}
13
+ for col in (1..oo.last_column)
14
+ values[headers[col]] = oo.cell(row, col).to_s.gsub(/<[^>]+>/, '').strip
15
+ end
16
+ yield HashWithIndifferentAccess.new(values)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def header_row
23
+ 1 + skip.to_i
24
+ end
25
+
26
+ def first_data_row
27
+ 1 + header_row
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,11 @@
1
+ class RemoteTable
2
+ module Xls
3
+ def self.extended(base)
4
+ base.send :extend, RooSpreadsheet
5
+ end
6
+
7
+ def roo_klass
8
+ Excel
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,56 @@
1
+ class RemoteTable
2
+ class File
3
+ attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
4
+ attr_accessor :path
5
+
6
+ def initialize(bus)
7
+ @filename = bus[:filename]
8
+ @format = bus[:format] || format_from_filename
9
+ @delimiter = bus[:delimiter]
10
+ @sheet = bus[:sheet] || 0
11
+ @skip = bus[:skip] # rows
12
+ @crop = bus[:crop] # rows
13
+ @cut = bus[:cut] # columns
14
+ @headers = bus[:headers]
15
+ @schema = bus[:schema]
16
+ @schema_name = bus[:schema_name]
17
+ @trap = bus[:trap]
18
+ extend "RemoteTable::#{format.to_s.camelcase}".constantize
19
+ end
20
+
21
+ def tabulate(path)
22
+ define_fixed_width_schema! if format == :fixed_width and schema.is_a?(Array) # TODO move to generic subclass callback
23
+ self.path = path
24
+ self
25
+ end
26
+
27
+ private
28
+
29
+ # doesn't support trap or spacer
30
+ def define_fixed_width_schema!
31
+ raise "can't define both schema_name and schema" if !schema_name.blank?
32
+ self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
33
+ self.trap ||= lambda { true }
34
+ Slither.define schema_name do |d|
35
+ d.rows do |row|
36
+ row.trap(&trap)
37
+ schema.each do |name, width, options|
38
+ if name == :spacer
39
+ row.spacer width
40
+ else
41
+ row.column name, width, options
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+ def format_from_filename
49
+ extname = ::File.extname(filename).gsub('.', '')
50
+ return :csv if extname.blank?
51
+ format = [ :xls, :ods ].detect { |i| i == extname.to_sym }
52
+ format = :csv if format.blank?
53
+ format
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,84 @@
1
+ class RemoteTable
2
+ class Package
3
+ attr_accessor :url, :compression, :packing, :filename
4
+
5
+ def initialize(bus)
6
+ @url = bus[:url] or raise "need url"
7
+ @compression = bus[:compression] || compression_from_basename
8
+ @packing = bus[:packing] || packing_from_basename_and_compression
9
+ @filename = bus[:filename] || filename_from_basename_and_compression_and_packing
10
+ add_hints!(bus)
11
+ end
12
+
13
+ def add_hints!(hash)
14
+ hash[:filename] = filename unless hash.has_key?(:filename)
15
+ end
16
+
17
+ def stage(path)
18
+ decompress(path)
19
+ unpack(path)
20
+ identify(path)
21
+ file_path(path)
22
+ end
23
+
24
+ private
25
+
26
+ def decompress(path)
27
+ return unless compression
28
+ cmd, args = case compression
29
+ when :zip, :exe
30
+ ["unzip", "-d #{::File.dirname(path)}"]
31
+ when :bz2
32
+ 'bunzip2'
33
+ when :gz
34
+ 'gunzip'
35
+ end
36
+ move_and_process path, compression, cmd, args
37
+ end
38
+
39
+ def unpack(path)
40
+ return unless packing
41
+ cmd, args = case packing
42
+ when :tar
43
+ ['tar -xf', "-C #{::File.dirname(path)}"]
44
+ end
45
+ move_and_process path, packing, cmd, args
46
+ end
47
+
48
+ def move_and_process(path, extname, cmd, args)
49
+ `mv #{path} #{path}.#{extname} && #{cmd} #{path}.#{extname} #{args}`
50
+ end
51
+
52
+ # ex. A: 2007-01.csv.gz (compression not capable of storing multiple files)
53
+ # ex. B: 2007-01.tar.gz (packing)
54
+ # ex. C: 2007-01.zip (compression capable of storing multiple files)
55
+ # in C but not in the others, we can default to the basename of the package
56
+ # in order to do this we'll need to mv the uncompressed file on top of the original file
57
+ def identify(path)
58
+ ::File.mv(path, file_path(path)) if !packing and [ nil, :bz2, :gz ].include?(compression)
59
+ end
60
+
61
+ def file_path(path)
62
+ ::File.join(::File.dirname(path), filename)
63
+ end
64
+
65
+ def basename_parts
66
+ ::File.basename(URI.parse(url).path).split('.').map(&:to_sym)
67
+ end
68
+
69
+ def compression_from_basename
70
+ [ :zip, :exe, :bz2, :gz ].detect { |i| i == basename_parts.last }
71
+ end
72
+
73
+ def packing_from_basename_and_compression
74
+ [ :tar ].detect { |i| i == ((basename_parts.last == compression) ? basename_parts[-2] : basename_parts.last) }
75
+ end
76
+
77
+ def filename_from_basename_and_compression_and_packing
78
+ ary = basename_parts
79
+ ary.pop if ary.last == compression
80
+ ary.pop if ary.last == packing
81
+ ary.join('.')
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,36 @@
1
+ class RemoteTable
2
+ class Request
3
+ attr_accessor :url, :post_data, :username, :password
4
+
5
+ # TODO: support post_data
6
+ # TODO: support HTTP basic auth
7
+ def initialize(bus)
8
+ @url = bus[:url] or raise "need url"
9
+ end
10
+
11
+ def download
12
+ path = ::File.join(staging_dir_path, 'REMOTE_TABLE_PACKAGE')
13
+ `curl --silent \"#{url_with_google_docs_handling}\" > #{path}`
14
+ path
15
+ end
16
+
17
+ private
18
+
19
+ def staging_dir_path
20
+ path = Tempfile.open(url.gsub(/[^a-z0-9]+/i, '_')).path
21
+ FileUtils.rm_f(path)
22
+ FileUtils.mkdir(path)
23
+ at_exit { FileUtils.rm_rf(path) }
24
+ path
25
+ end
26
+
27
+ def url_with_google_docs_handling
28
+ url = self.url
29
+ if url.include?('spreadsheets.google.com')
30
+ url = url.gsub(/\&output=.*(\&|\z)/, '')
31
+ url << "&output=csv"
32
+ end
33
+ url
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,32 @@
1
+ class RemoteTable
2
+ class Transform
3
+ attr_accessor :select, :reject, :transform_class, :transform_options, :transform, :raw_table
4
+
5
+ def initialize(bus)
6
+ if transform_params = bus.delete(:transform)
7
+ @transform_class = transform_params.delete(:class)
8
+ @transform_options = transform_params
9
+ @transform = @transform_class.new(@transform_options)
10
+ @transform.add_hints!(bus)
11
+ end
12
+ @select = bus[:select]
13
+ @reject = bus[:reject]
14
+ end
15
+
16
+ def apply(raw_table)
17
+ self.raw_table = raw_table
18
+ self
19
+ end
20
+
21
+ def each_row(&block)
22
+ raw_table.each_row do |row|
23
+ virtual_rows = transform ? transform.apply(row) : row # allow transform.apply(row) to return multiple rows
24
+ Array.wrap(virtual_rows).each do |virtual_row|
25
+ next if select and !select.call(virtual_row)
26
+ next if reject and reject.call(virtual_row)
27
+ yield virtual_row
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'activesupport'
3
+ require 'tempfile'
4
+ require 'fastercsv'
5
+ require 'slither'
6
+ require 'roo'
7
+ require 'remote_table/transform'
8
+ require 'remote_table/request'
9
+ require 'remote_table/package'
10
+ require 'remote_table/file'
11
+ require 'remote_table/file/csv'
12
+ require 'remote_table/file/fixed_width'
13
+ require 'remote_table/file/roo_spreadsheet'
14
+ require 'remote_table/file/ods'
15
+ require 'remote_table/file/xls'
16
+
17
+ class RemoteTable
18
+ attr_accessor :request, :package, :file, :transform
19
+ attr_accessor :table
20
+
21
+ def initialize(bus)
22
+ @transform = Transform.new(bus)
23
+ @package = Package.new(bus)
24
+ @request = Request.new(bus)
25
+ @file = File.new(bus)
26
+ end
27
+
28
+ def each_row
29
+ finish_table! unless table
30
+ table.each_row { |row| yield row }
31
+ end
32
+
33
+ def rows
34
+ cache_rows! if @_row_cache.nil?
35
+ @_row_cache
36
+ end
37
+
38
+ private
39
+
40
+ def finish_table!
41
+ package_path = request.download
42
+ file_path = package.stage(package_path)
43
+ raw_table = file.tabulate(file_path)
44
+ self.table = transform.apply(raw_table) # must return something that responds to each_row
45
+ end
46
+
47
+ def cache_rows!
48
+ @_row_cache = []
49
+ each_row { |row| @_row_cache << row }
50
+ end
51
+ end
@@ -0,0 +1,74 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{remote_table}
8
+ s.version = "0.1.5"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
+ s.date = %q{2009-11-02}
13
+ s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
14
+ s.email = %q{seamus@abshere.net}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/remote_table.rb",
27
+ "lib/remote_table/file.rb",
28
+ "lib/remote_table/file/csv.rb",
29
+ "lib/remote_table/file/fixed_width.rb",
30
+ "lib/remote_table/file/ods.rb",
31
+ "lib/remote_table/file/roo_spreadsheet.rb",
32
+ "lib/remote_table/file/xls.rb",
33
+ "lib/remote_table/package.rb",
34
+ "lib/remote_table/request.rb",
35
+ "lib/remote_table/transform.rb",
36
+ "remote_table.gemspec",
37
+ "test/remote_table_test.rb",
38
+ "test/test_helper.rb"
39
+ ]
40
+ s.homepage = %q{http://github.com/seamusabshere/remote_table}
41
+ s.rdoc_options = ["--charset=UTF-8", "--line-numbers", "--inline-source"]
42
+ s.require_paths = ["lib"]
43
+ s.requirements = ["curl"]
44
+ s.rubyforge_project = %q{remotetable}
45
+ s.rubygems_version = %q{1.3.5}
46
+ s.summary = %q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
47
+ s.test_files = [
48
+ "test/remote_table_test.rb",
49
+ "test/test_helper.rb"
50
+ ]
51
+
52
+ if s.respond_to? :specification_version then
53
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
54
+ s.specification_version = 3
55
+
56
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
57
+ s.add_runtime_dependency(%q<activesupport>, [">= 0"])
58
+ s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
59
+ s.add_runtime_dependency(%q<ryanwood-slither>, [">= 0"])
60
+ s.add_runtime_dependency(%q<roo>, ["= 1.3.11"])
61
+ else
62
+ s.add_dependency(%q<activesupport>, [">= 0"])
63
+ s.add_dependency(%q<fastercsv>, [">= 0"])
64
+ s.add_dependency(%q<ryanwood-slither>, [">= 0"])
65
+ s.add_dependency(%q<roo>, ["= 1.3.11"])
66
+ end
67
+ else
68
+ s.add_dependency(%q<activesupport>, [">= 0"])
69
+ s.add_dependency(%q<fastercsv>, [">= 0"])
70
+ s.add_dependency(%q<ryanwood-slither>, [">= 0"])
71
+ s.add_dependency(%q<roo>, ["= 1.3.11"])
72
+ end
73
+ end
74
+
@@ -0,0 +1,101 @@
1
+ require 'test_helper'
2
+
3
+ class FuelOilParser
4
+ def initialize(options = {})
5
+ # nothing
6
+ end
7
+ def add_hints!(bus)
8
+ bus[:sheet] = 'Data 1'
9
+ bus[:skip] = 2
10
+ bus[:select] = lambda { |row| row[:year] > 1989 }
11
+ end
12
+ def apply(row)
13
+ virtual_rows = []
14
+ row.keys.grep(/(.*) Residual Fuel Oil/) do |location_column_name|
15
+ next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
16
+ if $1.starts_with?('U.S.')
17
+ locatable = "united_states (Country)"
18
+ elsif $1.include?('PADD')
19
+ /\(PADD (.*)\)/.match($1)
20
+ next if $1 == '1' # skip PADD 1 because we always prefer subdistricts
21
+ locatable = "#{$1} (PetroleumAdministrationForDefenseDistrict)"
22
+ else
23
+ locatable = "#{$1} (State)"
24
+ end
25
+ date = Time.parse(date)
26
+ virtual_rows << HashWithIndifferentAccess.new(
27
+ :locatable => locatable,
28
+ :cost => cost,
29
+ :year => date.year,
30
+ :month => date.month
31
+ )
32
+ end
33
+ virtual_rows
34
+ end
35
+ end
36
+
37
+ class RemoteTableTest < Test::Unit::TestCase
38
+ should "open an XLS inside a zip file" do
39
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
40
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
41
+ assert_equal 'NSX', t.rows.first['carline name']
42
+ assert_equal 'VOLVO', t.rows.last['Manufacturer']
43
+ assert_equal 'V70 XC AWD', t.rows.last['carline name']
44
+ end
45
+
46
+ should "have indifferent hash access" do
47
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
48
+ assert_equal 'ACURA', t.rows.first['Manufacturer'.to_sym]
49
+ assert_equal 'NSX', t.rows.first['carline name'.to_sym]
50
+ assert_equal 'VOLVO', t.rows.last['Manufacturer'.to_sym]
51
+ assert_equal 'V70 XC AWD', t.rows.last['carline name'.to_sym]
52
+ end
53
+
54
+ should "open a Google Docs url" do
55
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
56
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
57
+ assert_equal 'AL', t.rows.first['State']
58
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
59
+ assert_equal 'WY', t.rows.last['State']
60
+ end
61
+
62
+ should "open an ODS" do
63
+ t = RemoteTable.new(:url => 'http://static.brighterplanet.com/science/profiler/footprint_model.ods', :sheet => 'Export')
64
+ assert_equal 'automobiles', t.rows.first['component']
65
+ assert_equal 2005.0, t.rows.first['period'].to_f
66
+ end
67
+
68
+ should "open a CSV inside a zip file" do
69
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
70
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
71
+ assert_equal 'NSX', t.rows.first['carline name']
72
+ assert_equal 'TOYOTA', t.rows.last['Manufacturer']
73
+ assert_equal 'RAV4 SOFT TOP 4WD', t.rows.last['carline name']
74
+ end
75
+
76
+ should "open a fixed-width file with an inline schema inside a zip file" do
77
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
78
+ :filename => 'Gd6-dsc.txt',
79
+ :format => :fixed_width,
80
+ :crop => 21..26, # inclusive
81
+ :cut => '2-',
82
+ :select => lambda { |row| /\A[A-Z]/.match row[:code] },
83
+ :schema => [[ :code, 2, { :type => :string } ],
84
+ [ :spacer, 2 ],
85
+ [ :name, 52, { :type => :string } ]])
86
+ assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
87
+ assert_equal 'R', t.rows.first['code']
88
+ assert_equal 'electricity', t.rows.last['name']
89
+ assert_equal 'El', t.rows.last['code']
90
+ end
91
+
92
+ should "open an XLS with a parser" do
93
+ ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
94
+ ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
95
+
96
+ t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
97
+ :transform => { :class => FuelOilParser })
98
+ assert_equal ma_1990_01, t.rows[0]
99
+ assert_equal ga_1990_01, t.rows[1]
100
+ end
101
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'remote_table'
8
+
9
+ class Test::Unit::TestCase
10
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: remote_table
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.5
5
+ platform: ruby
6
+ authors:
7
+ - Seamus Abshere
8
+ - Andy Rossmeissl
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-11-02 00:00:00 -05:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: activesupport
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ version:
26
+ - !ruby/object:Gem::Dependency
27
+ name: fastercsv
28
+ type: :runtime
29
+ version_requirement:
30
+ version_requirements: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ version:
36
+ - !ruby/object:Gem::Dependency
37
+ name: ryanwood-slither
38
+ type: :runtime
39
+ version_requirement:
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: "0"
45
+ version:
46
+ - !ruby/object:Gem::Dependency
47
+ name: roo
48
+ type: :runtime
49
+ version_requirement:
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.3.11
55
+ version:
56
+ description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
57
+ email: seamus@abshere.net
58
+ executables: []
59
+
60
+ extensions: []
61
+
62
+ extra_rdoc_files:
63
+ - LICENSE
64
+ - README.rdoc
65
+ files:
66
+ - .document
67
+ - .gitignore
68
+ - LICENSE
69
+ - README.rdoc
70
+ - Rakefile
71
+ - VERSION
72
+ - lib/remote_table.rb
73
+ - lib/remote_table/file.rb
74
+ - lib/remote_table/file/csv.rb
75
+ - lib/remote_table/file/fixed_width.rb
76
+ - lib/remote_table/file/ods.rb
77
+ - lib/remote_table/file/roo_spreadsheet.rb
78
+ - lib/remote_table/file/xls.rb
79
+ - lib/remote_table/package.rb
80
+ - lib/remote_table/request.rb
81
+ - lib/remote_table/transform.rb
82
+ - remote_table.gemspec
83
+ - test/remote_table_test.rb
84
+ - test/test_helper.rb
85
+ has_rdoc: true
86
+ homepage: http://github.com/seamusabshere/remote_table
87
+ licenses: []
88
+
89
+ post_install_message:
90
+ rdoc_options:
91
+ - --charset=UTF-8
92
+ - --line-numbers
93
+ - --inline-source
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: "0"
101
+ version:
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: "0"
107
+ version:
108
+ requirements:
109
+ - curl
110
+ rubyforge_project: remotetable
111
+ rubygems_version: 1.3.5
112
+ signing_key:
113
+ specification_version: 3
114
+ summary: Remotely open and parse XLS, ODS, CSV and fixed-width tables.
115
+ test_files:
116
+ - test/remote_table_test.rb
117
+ - test/test_helper.rb