remote_table 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Brighter Planet
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,27 @@
1
+ =remote_table
2
+
3
+ Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
4
+
5
+ ==Real-life usage
6
+
7
+ Used by data_miner (http://github.com/seamusabshere/data_miner)
8
+
9
+ ==Example
10
+
11
+ Taken from <tt>#{GEMDIR}/test/remote_table_test.rb</tt>:
12
+
13
+ >> t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
14
+ => #<RemoteTable:0x359da50 @transform=#<RemoteTable::Transform:0x359d154 @select=nil, @reject=nil>, @file=#<RemoteTable::File:0x35970c4 @delimiter=nil, @headers=nil, @cut=nil, @filename="98guide6.csv", @skip=nil, @schema_name=nil, @crop=nil, @format=:csv, @trap=nil, @sheet=0, @schema=nil>, @package=#<RemoteTable::Package:0x359c538 @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip", @filename="98guide6.csv", @compression=:zip, @packing=nil>, @request=#<RemoteTable::Request:0x3596bec @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip">>
15
+ >> t.rows.first
16
+ => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
17
+
18
+ See the test file and also data_miner examples of custom parsers.
19
+
20
+ ==Authors
21
+
22
+ * Seamus Abshere <seamus@abshere.net>
23
+ * Andy Rossmeissl <andy@rossmeissl.net>
24
+
25
+ == Copyright
26
+
27
+ Copyright (c) 2009 Brighter Planet. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,66 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "remote_table"
8
+ gem.summary = %Q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
9
+ gem.description = %Q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
10
+ gem.email = "seamus@abshere.net"
11
+ gem.homepage = "http://github.com/seamusabshere/remote_table"
12
+ gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
+ %w{ activesupport fastercsv ryanwood-slither }.each { |name| gem.add_dependency name }
14
+ gem.add_dependency 'roo', '1.3.11'
15
+ gem.require_path = "lib"
16
+ gem.files.include %w(lib/remote_table) unless gem.files.empty? # seems to fail once it's in the wild
17
+ gem.rdoc_options << '--line-numbers' << '--inline-source'
18
+ gem.requirements << 'curl'
19
+ gem.rubyforge_project = "remotetable"
20
+ end
21
+ Jeweler::GemcutterTasks.new
22
+ Jeweler::RubyforgeTasks.new do |rubyforge|
23
+ rubyforge.doc_task = "rdoc"
24
+ end
25
+ rescue LoadError
26
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
27
+ end
28
+
29
+ require 'rake/testtask'
30
+ Rake::TestTask.new(:test) do |test|
31
+ test.libs << 'lib' << 'test'
32
+ test.pattern = 'test/**/*_test.rb'
33
+ test.verbose = true
34
+ end
35
+
36
+ begin
37
+ require 'rcov/rcovtask'
38
+ Rcov::RcovTask.new do |test|
39
+ test.libs << 'test'
40
+ test.pattern = 'test/**/*_test.rb'
41
+ test.verbose = true
42
+ end
43
+ rescue LoadError
44
+ task :rcov do
45
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
46
+ end
47
+ end
48
+
49
+
50
+
51
+
52
+ task :default => :test
53
+
54
+ require 'rake/rdoctask'
55
+ Rake::RDocTask.new do |rdoc|
56
+ if File.exist?('VERSION')
57
+ version = File.read('VERSION')
58
+ else
59
+ version = ""
60
+ end
61
+
62
+ rdoc.rdoc_dir = 'rdoc'
63
+ rdoc.title = "remote_table #{version}"
64
+ rdoc.rdoc_files.include('README*')
65
+ rdoc.rdoc_files.include('lib/**/*.rb')
66
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.5
@@ -0,0 +1,45 @@
1
+ class RemoteTable
2
+ module Csv
3
+ def each_row(&block)
4
+ skip_rows!
5
+ FasterCSV.parse(open(path), fastercsv_options) do |row|
6
+ if row.respond_to?(:fields) # it's a traditional fastercsv row hash
7
+ next if row.fields.compact.blank?
8
+ hash = HashWithIndifferentAccess.new(row.to_hash)
9
+ else # it's an array, which i think happens if you're using :headers => nil or :col_sep
10
+ next if row.compact.blank?
11
+ index = 0
12
+ hash = row.inject(ActiveSupport::OrderedHash.new) { |memo, element| memo[index] = element; index += 1; memo }
13
+ end
14
+ yield hash
15
+ end
16
+ ensure
17
+ restore_rows!
18
+ end
19
+
20
+ private
21
+
22
+ def fastercsv_options
23
+ fastercsv_options = { :skip_blanks => true } # ...and this will skip []
24
+ if headers == false
25
+ fastercsv_options.merge!(:headers => nil)
26
+ else
27
+ fastercsv_options.merge!(:headers => :first_row)
28
+ end
29
+ fastercsv_options.merge!(:col_sep => delimiter) if delimiter
30
+ fastercsv_options
31
+ end
32
+
33
+ def skip_rows!
34
+ return unless skip
35
+ original = "#{path}.original"
36
+ FileUtils.cp(path, original)
37
+ `cat #{original} | tail -n +#{skip + 1} > #{path}`
38
+ end
39
+
40
+ def restore_rows!
41
+ return unless skip
42
+ FileUtils.mv "#{path}.original", path
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,53 @@
1
+ class RemoteTable
2
+ module FixedWidth
3
+ def each_row(&block)
4
+ crop_rows!
5
+ skip_rows!
6
+ cut_columns!
7
+ a = Slither.parse(path, schema_name)
8
+ a[:rows].each { |row| yield HashWithIndifferentAccess.new(row) }
9
+ ensure
10
+ uncut_columns!
11
+ unskip_rows!
12
+ uncrop_rows!
13
+ end
14
+
15
+ private
16
+
17
+ def cut_columns!
18
+ return unless cut
19
+ original = "#{path}.uncut"
20
+ FileUtils.cp(path, original)
21
+ `cat #{original} | cut -c #{cut} > #{path}`
22
+ end
23
+
24
+ def uncut_columns!
25
+ return unless cut
26
+ FileUtils.mv "#{path}.uncut", path
27
+ end
28
+
29
+ def skip_rows!
30
+ return unless skip
31
+ original = "#{path}.unskipped"
32
+ FileUtils.cp(path, original)
33
+ `cat #{original} | tail -n +#{skip + 1} > #{path}`
34
+ end
35
+
36
+ def unskip_rows!
37
+ return unless skip
38
+ FileUtils.mv "#{path}.unskipped", path
39
+ end
40
+
41
+ def crop_rows!
42
+ return unless crop
43
+ original = "#{path}.uncropped"
44
+ FileUtils.cp(path, original)
45
+ `cat #{original} | tail -n +#{crop.first} | head -n #{crop.last - crop.first + 1} > #{path}`
46
+ end
47
+
48
+ def uncrop_rows!
49
+ return unless crop
50
+ FileUtils.mv "#{path}.uncropped", path
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,11 @@
1
+ class RemoteTable
2
+ module Ods
3
+ def self.extended(base)
4
+ base.send :extend, RooSpreadsheet
5
+ end
6
+
7
+ def roo_klass
8
+ Openoffice
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,30 @@
1
+ class RemoteTable
2
+ module RooSpreadsheet
3
+ def each_row(&block)
4
+ headers = {}
5
+ oo = roo_klass.new(path, nil, :ignore)
6
+ oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
7
+ for col in (1..oo.last_column)
8
+ headers[col] = oo.cell(header_row, col)
9
+ headers[col] = oo.cell(header_row - 1, col) if headers[col].blank? # look up
10
+ end
11
+ first_data_row.upto(oo.last_row) do |row|
12
+ values = {}
13
+ for col in (1..oo.last_column)
14
+ values[headers[col]] = oo.cell(row, col).to_s.gsub(/<[^>]+>/, '').strip
15
+ end
16
+ yield HashWithIndifferentAccess.new(values)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def header_row
23
+ 1 + skip.to_i
24
+ end
25
+
26
+ def first_data_row
27
+ 1 + header_row
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,11 @@
1
+ class RemoteTable
2
+ module Xls
3
+ def self.extended(base)
4
+ base.send :extend, RooSpreadsheet
5
+ end
6
+
7
+ def roo_klass
8
+ Excel
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,56 @@
1
+ class RemoteTable
2
+ class File
3
+ attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
4
+ attr_accessor :path
5
+
6
+ def initialize(bus)
7
+ @filename = bus[:filename]
8
+ @format = bus[:format] || format_from_filename
9
+ @delimiter = bus[:delimiter]
10
+ @sheet = bus[:sheet] || 0
11
+ @skip = bus[:skip] # rows
12
+ @crop = bus[:crop] # rows
13
+ @cut = bus[:cut] # columns
14
+ @headers = bus[:headers]
15
+ @schema = bus[:schema]
16
+ @schema_name = bus[:schema_name]
17
+ @trap = bus[:trap]
18
+ extend "RemoteTable::#{format.to_s.camelcase}".constantize
19
+ end
20
+
21
+ def tabulate(path)
22
+ define_fixed_width_schema! if format == :fixed_width and schema.is_a?(Array) # TODO move to generic subclass callback
23
+ self.path = path
24
+ self
25
+ end
26
+
27
+ private
28
+
29
+ # doesn't support trap or spacer
30
+ def define_fixed_width_schema!
31
+ raise "can't define both schema_name and schema" if !schema_name.blank?
32
+ self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
33
+ self.trap ||= lambda { true }
34
+ Slither.define schema_name do |d|
35
+ d.rows do |row|
36
+ row.trap(&trap)
37
+ schema.each do |name, width, options|
38
+ if name == :spacer
39
+ row.spacer width
40
+ else
41
+ row.column name, width, options
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+
48
+ def format_from_filename
49
+ extname = ::File.extname(filename).gsub('.', '')
50
+ return :csv if extname.blank?
51
+ format = [ :xls, :ods ].detect { |i| i == extname.to_sym }
52
+ format = :csv if format.blank?
53
+ format
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,84 @@
1
+ class RemoteTable
2
+ class Package
3
+ attr_accessor :url, :compression, :packing, :filename
4
+
5
+ def initialize(bus)
6
+ @url = bus[:url] or raise "need url"
7
+ @compression = bus[:compression] || compression_from_basename
8
+ @packing = bus[:packing] || packing_from_basename_and_compression
9
+ @filename = bus[:filename] || filename_from_basename_and_compression_and_packing
10
+ add_hints!(bus)
11
+ end
12
+
13
+ def add_hints!(hash)
14
+ hash[:filename] = filename unless hash.has_key?(:filename)
15
+ end
16
+
17
+ def stage(path)
18
+ decompress(path)
19
+ unpack(path)
20
+ identify(path)
21
+ file_path(path)
22
+ end
23
+
24
+ private
25
+
26
+ def decompress(path)
27
+ return unless compression
28
+ cmd, args = case compression
29
+ when :zip, :exe
30
+ ["unzip", "-d #{::File.dirname(path)}"]
31
+ when :bz2
32
+ 'bunzip2'
33
+ when :gz
34
+ 'gunzip'
35
+ end
36
+ move_and_process path, compression, cmd, args
37
+ end
38
+
39
+ def unpack(path)
40
+ return unless packing
41
+ cmd, args = case packing
42
+ when :tar
43
+ ['tar -xf', "-C #{::File.dirname(path)}"]
44
+ end
45
+ move_and_process path, packing, cmd, args
46
+ end
47
+
48
+ def move_and_process(path, extname, cmd, args)
49
+ `mv #{path} #{path}.#{extname} && #{cmd} #{path}.#{extname} #{args}`
50
+ end
51
+
52
+ # ex. A: 2007-01.csv.gz (compression not capable of storing multiple files)
53
+ # ex. B: 2007-01.tar.gz (packing)
54
+ # ex. C: 2007-01.zip (compression capable of storing multiple files)
55
+ # in C but not in the others, we can default to the basename of the package
56
+ # in order to do this we'll need to mv the uncompressed file on top of the original file
57
+ def identify(path)
58
+ ::File.mv(path, file_path(path)) if !packing and [ nil, :bz2, :gz ].include?(compression)
59
+ end
60
+
61
+ def file_path(path)
62
+ ::File.join(::File.dirname(path), filename)
63
+ end
64
+
65
+ def basename_parts
66
+ ::File.basename(URI.parse(url).path).split('.').map(&:to_sym)
67
+ end
68
+
69
+ def compression_from_basename
70
+ [ :zip, :exe, :bz2, :gz ].detect { |i| i == basename_parts.last }
71
+ end
72
+
73
+ def packing_from_basename_and_compression
74
+ [ :tar ].detect { |i| i == ((basename_parts.last == compression) ? basename_parts[-2] : basename_parts.last) }
75
+ end
76
+
77
+ def filename_from_basename_and_compression_and_packing
78
+ ary = basename_parts
79
+ ary.pop if ary.last == compression
80
+ ary.pop if ary.last == packing
81
+ ary.join('.')
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,36 @@
1
+ class RemoteTable
2
+ class Request
3
+ attr_accessor :url, :post_data, :username, :password
4
+
5
+ # TODO: support post_data
6
+ # TODO: support HTTP basic auth
7
+ def initialize(bus)
8
+ @url = bus[:url] or raise "need url"
9
+ end
10
+
11
+ def download
12
+ path = ::File.join(staging_dir_path, 'REMOTE_TABLE_PACKAGE')
13
+ `curl --silent \"#{url_with_google_docs_handling}\" > #{path}`
14
+ path
15
+ end
16
+
17
+ private
18
+
19
+ def staging_dir_path
20
+ path = Tempfile.open(url.gsub(/[^a-z0-9]+/i, '_')).path
21
+ FileUtils.rm_f(path)
22
+ FileUtils.mkdir(path)
23
+ at_exit { FileUtils.rm_rf(path) }
24
+ path
25
+ end
26
+
27
+ def url_with_google_docs_handling
28
+ url = self.url
29
+ if url.include?('spreadsheets.google.com')
30
+ url = url.gsub(/\&output=.*(\&|\z)/, '')
31
+ url << "&output=csv"
32
+ end
33
+ url
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,32 @@
1
+ class RemoteTable
2
+ class Transform
3
+ attr_accessor :select, :reject, :transform_class, :transform_options, :transform, :raw_table
4
+
5
+ def initialize(bus)
6
+ if transform_params = bus.delete(:transform)
7
+ @transform_class = transform_params.delete(:class)
8
+ @transform_options = transform_params
9
+ @transform = @transform_class.new(@transform_options)
10
+ @transform.add_hints!(bus)
11
+ end
12
+ @select = bus[:select]
13
+ @reject = bus[:reject]
14
+ end
15
+
16
+ def apply(raw_table)
17
+ self.raw_table = raw_table
18
+ self
19
+ end
20
+
21
+ def each_row(&block)
22
+ raw_table.each_row do |row|
23
+ virtual_rows = transform ? transform.apply(row) : row # allow transform.apply(row) to return multiple rows
24
+ Array.wrap(virtual_rows).each do |virtual_row|
25
+ next if select and !select.call(virtual_row)
26
+ next if reject and reject.call(virtual_row)
27
+ yield virtual_row
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'activesupport'
3
+ require 'tempfile'
4
+ require 'fastercsv'
5
+ require 'slither'
6
+ require 'roo'
7
+ require 'remote_table/transform'
8
+ require 'remote_table/request'
9
+ require 'remote_table/package'
10
+ require 'remote_table/file'
11
+ require 'remote_table/file/csv'
12
+ require 'remote_table/file/fixed_width'
13
+ require 'remote_table/file/roo_spreadsheet'
14
+ require 'remote_table/file/ods'
15
+ require 'remote_table/file/xls'
16
+
17
+ class RemoteTable
18
+ attr_accessor :request, :package, :file, :transform
19
+ attr_accessor :table
20
+
21
+ def initialize(bus)
22
+ @transform = Transform.new(bus)
23
+ @package = Package.new(bus)
24
+ @request = Request.new(bus)
25
+ @file = File.new(bus)
26
+ end
27
+
28
+ def each_row
29
+ finish_table! unless table
30
+ table.each_row { |row| yield row }
31
+ end
32
+
33
+ def rows
34
+ cache_rows! if @_row_cache.nil?
35
+ @_row_cache
36
+ end
37
+
38
+ private
39
+
40
+ def finish_table!
41
+ package_path = request.download
42
+ file_path = package.stage(package_path)
43
+ raw_table = file.tabulate(file_path)
44
+ self.table = transform.apply(raw_table) # must return something that responds to each_row
45
+ end
46
+
47
+ def cache_rows!
48
+ @_row_cache = []
49
+ each_row { |row| @_row_cache << row }
50
+ end
51
+ end
@@ -0,0 +1,74 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{remote_table}
8
+ s.version = "0.1.5"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
+ s.date = %q{2009-11-02}
13
+ s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
14
+ s.email = %q{seamus@abshere.net}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/remote_table.rb",
27
+ "lib/remote_table/file.rb",
28
+ "lib/remote_table/file/csv.rb",
29
+ "lib/remote_table/file/fixed_width.rb",
30
+ "lib/remote_table/file/ods.rb",
31
+ "lib/remote_table/file/roo_spreadsheet.rb",
32
+ "lib/remote_table/file/xls.rb",
33
+ "lib/remote_table/package.rb",
34
+ "lib/remote_table/request.rb",
35
+ "lib/remote_table/transform.rb",
36
+ "remote_table.gemspec",
37
+ "test/remote_table_test.rb",
38
+ "test/test_helper.rb"
39
+ ]
40
+ s.homepage = %q{http://github.com/seamusabshere/remote_table}
41
+ s.rdoc_options = ["--charset=UTF-8", "--line-numbers", "--inline-source"]
42
+ s.require_paths = ["lib"]
43
+ s.requirements = ["curl"]
44
+ s.rubyforge_project = %q{remotetable}
45
+ s.rubygems_version = %q{1.3.5}
46
+ s.summary = %q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
47
+ s.test_files = [
48
+ "test/remote_table_test.rb",
49
+ "test/test_helper.rb"
50
+ ]
51
+
52
+ if s.respond_to? :specification_version then
53
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
54
+ s.specification_version = 3
55
+
56
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
57
+ s.add_runtime_dependency(%q<activesupport>, [">= 0"])
58
+ s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
59
+ s.add_runtime_dependency(%q<ryanwood-slither>, [">= 0"])
60
+ s.add_runtime_dependency(%q<roo>, ["= 1.3.11"])
61
+ else
62
+ s.add_dependency(%q<activesupport>, [">= 0"])
63
+ s.add_dependency(%q<fastercsv>, [">= 0"])
64
+ s.add_dependency(%q<ryanwood-slither>, [">= 0"])
65
+ s.add_dependency(%q<roo>, ["= 1.3.11"])
66
+ end
67
+ else
68
+ s.add_dependency(%q<activesupport>, [">= 0"])
69
+ s.add_dependency(%q<fastercsv>, [">= 0"])
70
+ s.add_dependency(%q<ryanwood-slither>, [">= 0"])
71
+ s.add_dependency(%q<roo>, ["= 1.3.11"])
72
+ end
73
+ end
74
+
@@ -0,0 +1,101 @@
1
+ require 'test_helper'
2
+
3
+ class FuelOilParser
4
+ def initialize(options = {})
5
+ # nothing
6
+ end
7
+ def add_hints!(bus)
8
+ bus[:sheet] = 'Data 1'
9
+ bus[:skip] = 2
10
+ bus[:select] = lambda { |row| row[:year] > 1989 }
11
+ end
12
+ def apply(row)
13
+ virtual_rows = []
14
+ row.keys.grep(/(.*) Residual Fuel Oil/) do |location_column_name|
15
+ next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
16
+ if $1.starts_with?('U.S.')
17
+ locatable = "united_states (Country)"
18
+ elsif $1.include?('PADD')
19
+ /\(PADD (.*)\)/.match($1)
20
+ next if $1 == '1' # skip PADD 1 because we always prefer subdistricts
21
+ locatable = "#{$1} (PetroleumAdministrationForDefenseDistrict)"
22
+ else
23
+ locatable = "#{$1} (State)"
24
+ end
25
+ date = Time.parse(date)
26
+ virtual_rows << HashWithIndifferentAccess.new(
27
+ :locatable => locatable,
28
+ :cost => cost,
29
+ :year => date.year,
30
+ :month => date.month
31
+ )
32
+ end
33
+ virtual_rows
34
+ end
35
+ end
36
+
37
+ class RemoteTableTest < Test::Unit::TestCase
38
+ should "open an XLS inside a zip file" do
39
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
40
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
41
+ assert_equal 'NSX', t.rows.first['carline name']
42
+ assert_equal 'VOLVO', t.rows.last['Manufacturer']
43
+ assert_equal 'V70 XC AWD', t.rows.last['carline name']
44
+ end
45
+
46
+ should "have indifferent hash access" do
47
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
48
+ assert_equal 'ACURA', t.rows.first['Manufacturer'.to_sym]
49
+ assert_equal 'NSX', t.rows.first['carline name'.to_sym]
50
+ assert_equal 'VOLVO', t.rows.last['Manufacturer'.to_sym]
51
+ assert_equal 'V70 XC AWD', t.rows.last['carline name'.to_sym]
52
+ end
53
+
54
+ should "open a Google Docs url" do
55
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
56
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
57
+ assert_equal 'AL', t.rows.first['State']
58
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
59
+ assert_equal 'WY', t.rows.last['State']
60
+ end
61
+
62
+ should "open an ODS" do
63
+ t = RemoteTable.new(:url => 'http://static.brighterplanet.com/science/profiler/footprint_model.ods', :sheet => 'Export')
64
+ assert_equal 'automobiles', t.rows.first['component']
65
+ assert_equal 2005.0, t.rows.first['period'].to_f
66
+ end
67
+
68
+ should "open a CSV inside a zip file" do
69
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
70
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
71
+ assert_equal 'NSX', t.rows.first['carline name']
72
+ assert_equal 'TOYOTA', t.rows.last['Manufacturer']
73
+ assert_equal 'RAV4 SOFT TOP 4WD', t.rows.last['carline name']
74
+ end
75
+
76
+ should "open a fixed-width file with an inline schema inside a zip file" do
77
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
78
+ :filename => 'Gd6-dsc.txt',
79
+ :format => :fixed_width,
80
+ :crop => 21..26, # inclusive
81
+ :cut => '2-',
82
+ :select => lambda { |row| /\A[A-Z]/.match row[:code] },
83
+ :schema => [[ :code, 2, { :type => :string } ],
84
+ [ :spacer, 2 ],
85
+ [ :name, 52, { :type => :string } ]])
86
+ assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
87
+ assert_equal 'R', t.rows.first['code']
88
+ assert_equal 'electricity', t.rows.last['name']
89
+ assert_equal 'El', t.rows.last['code']
90
+ end
91
+
92
+ should "open an XLS with a parser" do
93
+ ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
94
+ ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
95
+
96
+ t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
97
+ :transform => { :class => FuelOilParser })
98
+ assert_equal ma_1990_01, t.rows[0]
99
+ assert_equal ga_1990_01, t.rows[1]
100
+ end
101
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'remote_table'
8
+
9
+ class Test::Unit::TestCase
10
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: remote_table
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.5
5
+ platform: ruby
6
+ authors:
7
+ - Seamus Abshere
8
+ - Andy Rossmeissl
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-11-02 00:00:00 -05:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: activesupport
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ version:
26
+ - !ruby/object:Gem::Dependency
27
+ name: fastercsv
28
+ type: :runtime
29
+ version_requirement:
30
+ version_requirements: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ version:
36
+ - !ruby/object:Gem::Dependency
37
+ name: ryanwood-slither
38
+ type: :runtime
39
+ version_requirement:
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: "0"
45
+ version:
46
+ - !ruby/object:Gem::Dependency
47
+ name: roo
48
+ type: :runtime
49
+ version_requirement:
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.3.11
55
+ version:
56
+ description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
57
+ email: seamus@abshere.net
58
+ executables: []
59
+
60
+ extensions: []
61
+
62
+ extra_rdoc_files:
63
+ - LICENSE
64
+ - README.rdoc
65
+ files:
66
+ - .document
67
+ - .gitignore
68
+ - LICENSE
69
+ - README.rdoc
70
+ - Rakefile
71
+ - VERSION
72
+ - lib/remote_table.rb
73
+ - lib/remote_table/file.rb
74
+ - lib/remote_table/file/csv.rb
75
+ - lib/remote_table/file/fixed_width.rb
76
+ - lib/remote_table/file/ods.rb
77
+ - lib/remote_table/file/roo_spreadsheet.rb
78
+ - lib/remote_table/file/xls.rb
79
+ - lib/remote_table/package.rb
80
+ - lib/remote_table/request.rb
81
+ - lib/remote_table/transform.rb
82
+ - remote_table.gemspec
83
+ - test/remote_table_test.rb
84
+ - test/test_helper.rb
85
+ has_rdoc: true
86
+ homepage: http://github.com/seamusabshere/remote_table
87
+ licenses: []
88
+
89
+ post_install_message:
90
+ rdoc_options:
91
+ - --charset=UTF-8
92
+ - --line-numbers
93
+ - --inline-source
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: "0"
101
+ version:
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: "0"
107
+ version:
108
+ requirements:
109
+ - curl
110
+ rubyforge_project: remotetable
111
+ rubygems_version: 1.3.5
112
+ signing_key:
113
+ specification_version: 3
114
+ summary: Remotely open and parse XLS, ODS, CSV and fixed-width tables.
115
+ test_files:
116
+ - test/remote_table_test.rb
117
+ - test/test_helper.rb