seamusabshere-remote_table 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +5 -0
- data/LICENSE +20 -0
- data/README.rdoc +18 -0
- data/Rakefile +66 -0
- data/VERSION +1 -0
- data/lib/remote_table.rb +52 -0
- data/lib/remote_table/file.rb +56 -0
- data/lib/remote_table/file/csv.rb +45 -0
- data/lib/remote_table/file/fixed_width.rb +53 -0
- data/lib/remote_table/file/ods.rb +11 -0
- data/lib/remote_table/file/roo_spreadsheet.rb +30 -0
- data/lib/remote_table/file/xls.rb +11 -0
- data/lib/remote_table/package.rb +84 -0
- data/lib/remote_table/request.rb +36 -0
- data/lib/remote_table/transform.rb +32 -0
- data/remote_table.gemspec +81 -0
- data/test/remote_table_test.rb +101 -0
- data/test/test_helper.rb +10 -0
- metadata +116 -0
data/.document
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Seamus Abshere
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
= remote_table
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Note on Patches/Pull Requests
|
6
|
+
|
7
|
+
* Fork the project.
|
8
|
+
* Make your feature addition or bug fix.
|
9
|
+
* Add tests for it. This is important so I don't break it in a
|
10
|
+
future version unintentionally.
|
11
|
+
* Commit, do not mess with rakefile, version, or history.
|
12
|
+
(if you want to have your own version, that is fine but
|
13
|
+
bump version in a commit by itself I can ignore when I pull)
|
14
|
+
* Send me a pull request. Bonus points for topic branches.
|
15
|
+
|
16
|
+
== Copyright
|
17
|
+
|
18
|
+
Copyright (c) 2009 Seamus Abshere. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "remote_table"
|
8
|
+
gem.summary = %Q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
|
9
|
+
gem.description = %Q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
10
|
+
gem.email = "seamus@abshere.net"
|
11
|
+
gem.homepage = "http://github.com/seamusabshere/remote_table"
|
12
|
+
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
+
%w{ activesupport roo fastercsv ryanwood-slither }.each { |name| gem.add_dependency name } # TODO: do I need to include activesupport, etc.?
|
14
|
+
gem.require_path = "lib"
|
15
|
+
gem.files.include %w(lib/remote_table/**/*) unless gem.files.empty? # seems to fail once it's in the wild
|
16
|
+
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
17
|
+
gem.requirements << 'curl'
|
18
|
+
# gem.rubyforge_project = "remotetable"
|
19
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
20
|
+
end
|
21
|
+
|
22
|
+
Jeweler::RubyforgeTasks.new do |rubyforge|
|
23
|
+
rubyforge.doc_task = "rdoc"
|
24
|
+
end
|
25
|
+
rescue LoadError
|
26
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
27
|
+
end
|
28
|
+
|
29
|
+
require 'rake/testtask'
|
30
|
+
Rake::TestTask.new(:test) do |test|
|
31
|
+
test.libs << 'lib' << 'test'
|
32
|
+
test.pattern = 'test/**/*_test.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
require 'rcov/rcovtask'
|
38
|
+
Rcov::RcovTask.new do |test|
|
39
|
+
test.libs << 'test'
|
40
|
+
test.pattern = 'test/**/*_test.rb'
|
41
|
+
test.verbose = true
|
42
|
+
end
|
43
|
+
rescue LoadError
|
44
|
+
task :rcov do
|
45
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
task :default => :test
|
53
|
+
|
54
|
+
require 'rake/rdoctask'
|
55
|
+
Rake::RDocTask.new do |rdoc|
|
56
|
+
if File.exist?('VERSION')
|
57
|
+
version = File.read('VERSION')
|
58
|
+
else
|
59
|
+
version = ""
|
60
|
+
end
|
61
|
+
|
62
|
+
rdoc.rdoc_dir = 'rdoc'
|
63
|
+
rdoc.title = "remote_table #{version}"
|
64
|
+
rdoc.rdoc_files.include('README*')
|
65
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
66
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/remote_table.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activesupport'
|
3
|
+
require 'activerecord'
|
4
|
+
require 'tempfile'
|
5
|
+
require 'fastercsv'
|
6
|
+
require 'slither' # config.gem 'ryanwood-slither', :lib => 'slither', :source => 'http://gems.github.com'
|
7
|
+
require 'roo'
|
8
|
+
require 'remote_table/transform'
|
9
|
+
require 'remote_table/request'
|
10
|
+
require 'remote_table/package'
|
11
|
+
require 'remote_table/file'
|
12
|
+
require 'remote_table/file/csv'
|
13
|
+
require 'remote_table/file/fixed_width'
|
14
|
+
require 'remote_table/file/roo_spreadsheet'
|
15
|
+
require 'remote_table/file/ods'
|
16
|
+
require 'remote_table/file/xls'
|
17
|
+
|
18
|
+
class RemoteTable
|
19
|
+
attr_accessor :request, :package, :file, :transform
|
20
|
+
attr_accessor :table
|
21
|
+
|
22
|
+
def initialize(bus)
|
23
|
+
@transform = Transform.new(bus)
|
24
|
+
@package = Package.new(bus)
|
25
|
+
@request = Request.new(bus)
|
26
|
+
@file = File.new(bus)
|
27
|
+
end
|
28
|
+
|
29
|
+
def each_row
|
30
|
+
finish_table! unless table
|
31
|
+
table.each_row { |row| yield row }
|
32
|
+
end
|
33
|
+
|
34
|
+
def rows
|
35
|
+
cache_rows! if @_row_cache.nil?
|
36
|
+
@_row_cache
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def finish_table!
|
42
|
+
package_path = request.download
|
43
|
+
file_path = package.stage(package_path)
|
44
|
+
raw_table = file.tabulate(file_path)
|
45
|
+
self.table = transform.apply(raw_table) # must return something that responds to each_row
|
46
|
+
end
|
47
|
+
|
48
|
+
def cache_rows!
|
49
|
+
@_row_cache = []
|
50
|
+
each_row { |row| @_row_cache << row }
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
class RemoteTable
|
2
|
+
class File
|
3
|
+
attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
|
4
|
+
attr_accessor :path
|
5
|
+
|
6
|
+
def initialize(bus)
|
7
|
+
@filename = bus[:filename]
|
8
|
+
@format = bus[:format] || format_from_filename
|
9
|
+
@delimiter = bus[:delimiter]
|
10
|
+
@sheet = bus[:sheet] || 0
|
11
|
+
@skip = bus[:skip] # rows
|
12
|
+
@crop = bus[:crop] # rows
|
13
|
+
@cut = bus[:cut] # columns
|
14
|
+
@headers = bus[:headers]
|
15
|
+
@schema = bus[:schema]
|
16
|
+
@schema_name = bus[:schema_name]
|
17
|
+
@trap = bus[:trap]
|
18
|
+
extend "RemoteTable::#{format.to_s.camelcase}".constantize
|
19
|
+
end
|
20
|
+
|
21
|
+
def tabulate(path)
|
22
|
+
define_fixed_width_schema! if format == :fixed_width and schema.is_a?(Array) # TODO move to generic subclass callback
|
23
|
+
self.path = path
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
# doesn't support trap or spacer
|
30
|
+
def define_fixed_width_schema!
|
31
|
+
raise "can't define both schema_name and schema" if !schema_name.blank?
|
32
|
+
self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
|
33
|
+
self.trap ||= lambda { true }
|
34
|
+
Slither.define schema_name do |d|
|
35
|
+
d.rows do |row|
|
36
|
+
row.trap(&trap)
|
37
|
+
schema.each do |name, width, options|
|
38
|
+
if name == :spacer
|
39
|
+
row.spacer width
|
40
|
+
else
|
41
|
+
row.column name, width, options
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def format_from_filename
|
49
|
+
extname = ::File.extname(filename).gsub('.', '')
|
50
|
+
return :csv if extname.blank?
|
51
|
+
format = [ :xls, :ods ].detect { |i| i == extname.to_sym }
|
52
|
+
format = :csv if format.blank?
|
53
|
+
format
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class RemoteTable
|
2
|
+
module Csv
|
3
|
+
def each_row(&block)
|
4
|
+
skip_rows!
|
5
|
+
FasterCSV.parse(open(path), fastercsv_options) do |row|
|
6
|
+
if row.respond_to?(:fields) # it's a traditional fastercsv row hash
|
7
|
+
next if row.fields.compact.blank?
|
8
|
+
hash = HashWithIndifferentAccess.new(row.to_hash)
|
9
|
+
else # it's an array, which i think happens if you're using :headers => nil or :col_sep
|
10
|
+
next if row.compact.blank?
|
11
|
+
index = 0
|
12
|
+
hash = row.inject(ActiveSupport::OrderedHash.new) { |memo, element| memo[index] = element; index += 1; memo }
|
13
|
+
end
|
14
|
+
yield hash
|
15
|
+
end
|
16
|
+
ensure
|
17
|
+
restore_rows!
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def fastercsv_options
|
23
|
+
fastercsv_options = { :skip_blanks => true } # ...and this will skip []
|
24
|
+
if headers == false
|
25
|
+
fastercsv_options.merge!(:headers => nil)
|
26
|
+
else
|
27
|
+
fastercsv_options.merge!(:headers => :first_row)
|
28
|
+
end
|
29
|
+
fastercsv_options.merge!(:col_sep => delimiter) if delimiter
|
30
|
+
fastercsv_options
|
31
|
+
end
|
32
|
+
|
33
|
+
def skip_rows!
|
34
|
+
return unless skip
|
35
|
+
original = "#{path}.original"
|
36
|
+
FileUtils.cp(path, original)
|
37
|
+
`cat #{original} | tail -n +#{skip + 1} > #{path}`
|
38
|
+
end
|
39
|
+
|
40
|
+
def restore_rows!
|
41
|
+
return unless skip
|
42
|
+
FileUtils.mv "#{path}.original", path
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class RemoteTable
|
2
|
+
module FixedWidth
|
3
|
+
def each_row(&block)
|
4
|
+
crop_rows!
|
5
|
+
skip_rows!
|
6
|
+
cut_columns!
|
7
|
+
a = Slither.parse(path, schema_name)
|
8
|
+
a[:rows].each { |row| yield HashWithIndifferentAccess.new(row) }
|
9
|
+
ensure
|
10
|
+
uncut_columns!
|
11
|
+
unskip_rows!
|
12
|
+
uncrop_rows!
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def cut_columns!
|
18
|
+
return unless cut
|
19
|
+
original = "#{path}.uncut"
|
20
|
+
FileUtils.cp(path, original)
|
21
|
+
`cat #{original} | cut -c #{cut} > #{path}`
|
22
|
+
end
|
23
|
+
|
24
|
+
def uncut_columns!
|
25
|
+
return unless cut
|
26
|
+
FileUtils.mv "#{path}.uncut", path
|
27
|
+
end
|
28
|
+
|
29
|
+
def skip_rows!
|
30
|
+
return unless skip
|
31
|
+
original = "#{path}.unskipped"
|
32
|
+
FileUtils.cp(path, original)
|
33
|
+
`cat #{original} | tail -n +#{skip + 1} > #{path}`
|
34
|
+
end
|
35
|
+
|
36
|
+
def unskip_rows!
|
37
|
+
return unless skip
|
38
|
+
FileUtils.mv "#{path}.unskipped", path
|
39
|
+
end
|
40
|
+
|
41
|
+
def crop_rows!
|
42
|
+
return unless crop
|
43
|
+
original = "#{path}.uncropped"
|
44
|
+
FileUtils.cp(path, original)
|
45
|
+
`cat #{original} | tail -n +#{crop.first} | head -n #{crop.last - crop.first + 1} > #{path}`
|
46
|
+
end
|
47
|
+
|
48
|
+
def uncrop_rows!
|
49
|
+
return unless crop
|
50
|
+
FileUtils.mv "#{path}.uncropped", path
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class RemoteTable
|
2
|
+
module RooSpreadsheet
|
3
|
+
def each_row(&block)
|
4
|
+
headers = {}
|
5
|
+
oo = roo_klass.new(path, nil, :ignore)
|
6
|
+
oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
|
7
|
+
for col in (1..oo.last_column)
|
8
|
+
headers[col] = oo.cell(header_row, col)
|
9
|
+
headers[col] = oo.cell(header_row - 1, col) if headers[col].blank? # look up
|
10
|
+
end
|
11
|
+
first_data_row.upto(oo.last_row) do |row|
|
12
|
+
values = {}
|
13
|
+
for col in (1..oo.last_column)
|
14
|
+
values[headers[col]] = oo.cell(row, col).to_s.gsub(/<[^>]+>/, '').strip
|
15
|
+
end
|
16
|
+
yield HashWithIndifferentAccess.new(values)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def header_row
|
23
|
+
1 + skip.to_i
|
24
|
+
end
|
25
|
+
|
26
|
+
def first_data_row
|
27
|
+
1 + header_row
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
class RemoteTable
|
2
|
+
class Package
|
3
|
+
attr_accessor :url, :compression, :packing, :filename
|
4
|
+
|
5
|
+
def initialize(bus)
|
6
|
+
@url = bus[:url] or raise "need url"
|
7
|
+
@compression = bus[:compression] || compression_from_basename
|
8
|
+
@packing = bus[:packing] || packing_from_basename_and_compression
|
9
|
+
@filename = bus[:filename] || filename_from_basename_and_compression_and_packing
|
10
|
+
add_hints!(bus)
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_hints!(hash)
|
14
|
+
hash[:filename] = filename unless hash.has_key?(:filename)
|
15
|
+
end
|
16
|
+
|
17
|
+
def stage(path)
|
18
|
+
decompress(path)
|
19
|
+
unpack(path)
|
20
|
+
identify(path)
|
21
|
+
file_path(path)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def decompress(path)
|
27
|
+
return unless compression
|
28
|
+
cmd, args = case compression
|
29
|
+
when :zip, :exe
|
30
|
+
["unzip", "-d #{::File.dirname(path)}"]
|
31
|
+
when :bz2
|
32
|
+
'bunzip2'
|
33
|
+
when :gz
|
34
|
+
'gunzip'
|
35
|
+
end
|
36
|
+
move_and_process path, compression, cmd, args
|
37
|
+
end
|
38
|
+
|
39
|
+
def unpack(path)
|
40
|
+
return unless packing
|
41
|
+
cmd, args = case packing
|
42
|
+
when :tar
|
43
|
+
['tar -xf', "-C #{::File.dirname(path)}"]
|
44
|
+
end
|
45
|
+
move_and_process path, packing, cmd, args
|
46
|
+
end
|
47
|
+
|
48
|
+
def move_and_process(path, extname, cmd, args)
|
49
|
+
`mv #{path} #{path}.#{extname} && #{cmd} #{path}.#{extname} #{args}`
|
50
|
+
end
|
51
|
+
|
52
|
+
# ex. A: 2007-01.csv.gz (compression not capable of storing multiple files)
|
53
|
+
# ex. B: 2007-01.tar.gz (packing)
|
54
|
+
# ex. C: 2007-01.zip (compression capable of storing multiple files)
|
55
|
+
# in C but not in the others, we can default to the basename of the package
|
56
|
+
# in order to do this we'll need to mv the uncompressed file on top of the original file
|
57
|
+
def identify(path)
|
58
|
+
::File.mv(path, file_path(path)) if !packing and [ nil, :bz2, :gz ].include?(compression)
|
59
|
+
end
|
60
|
+
|
61
|
+
def file_path(path)
|
62
|
+
::File.join(::File.dirname(path), filename)
|
63
|
+
end
|
64
|
+
|
65
|
+
def basename_parts
|
66
|
+
::File.basename(URI.parse(url).path).split('.').map(&:to_sym)
|
67
|
+
end
|
68
|
+
|
69
|
+
def compression_from_basename
|
70
|
+
[ :zip, :exe, :bz2, :gz ].detect { |i| i == basename_parts.last }
|
71
|
+
end
|
72
|
+
|
73
|
+
def packing_from_basename_and_compression
|
74
|
+
[ :tar ].detect { |i| i == ((basename_parts.last == compression) ? basename_parts[-2] : basename_parts.last) }
|
75
|
+
end
|
76
|
+
|
77
|
+
def filename_from_basename_and_compression_and_packing
|
78
|
+
ary = basename_parts
|
79
|
+
ary.pop if ary.last == compression
|
80
|
+
ary.pop if ary.last == packing
|
81
|
+
ary.join('.')
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
class RemoteTable
|
2
|
+
class Request
|
3
|
+
attr_accessor :url, :post_data, :username, :password
|
4
|
+
|
5
|
+
# TODO: support post_data
|
6
|
+
# TODO: support HTTP basic auth
|
7
|
+
def initialize(bus)
|
8
|
+
@url = bus[:url] or raise "need url"
|
9
|
+
end
|
10
|
+
|
11
|
+
def download
|
12
|
+
path = ::File.join(staging_dir_path, 'REMOTE_TABLE_PACKAGE')
|
13
|
+
`curl --silent \"#{url_with_google_docs_handling}\" > #{path}`
|
14
|
+
path
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def staging_dir_path
|
20
|
+
path = Tempfile.open(url.gsub(/[^a-z0-9]+/i, '_')).path
|
21
|
+
FileUtils.rm_f(path)
|
22
|
+
FileUtils.mkdir(path)
|
23
|
+
at_exit { FileUtils.rm_rf(path) }
|
24
|
+
path
|
25
|
+
end
|
26
|
+
|
27
|
+
def url_with_google_docs_handling
|
28
|
+
url = self.url
|
29
|
+
if url.include?('spreadsheets.google.com')
|
30
|
+
url = url.gsub(/\&output=.*(\&|\z)/, '')
|
31
|
+
url << "&output=csv"
|
32
|
+
end
|
33
|
+
url
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
class RemoteTable
|
2
|
+
class Transform
|
3
|
+
attr_accessor :select, :reject, :transform_class, :transform_options, :transform, :raw_table
|
4
|
+
|
5
|
+
def initialize(bus)
|
6
|
+
if transform_params = bus.delete(:transform)
|
7
|
+
@transform_class = transform_params.delete(:class)
|
8
|
+
@transform_options = transform_params
|
9
|
+
@transform = @transform_class.new(@transform_options)
|
10
|
+
@transform.add_hints!(bus)
|
11
|
+
end
|
12
|
+
@select = bus[:select]
|
13
|
+
@reject = bus[:reject]
|
14
|
+
end
|
15
|
+
|
16
|
+
def apply(raw_table)
|
17
|
+
self.raw_table = raw_table
|
18
|
+
self
|
19
|
+
end
|
20
|
+
|
21
|
+
def each_row(&block)
|
22
|
+
raw_table.each_row do |row|
|
23
|
+
virtual_rows = transform ? transform.apply(row) : row # allow transform.apply(row) to return multiple rows
|
24
|
+
Array.wrap(virtual_rows).each do |virtual_row|
|
25
|
+
next if select and !select.call(virtual_row)
|
26
|
+
next if reject and reject.call(virtual_row)
|
27
|
+
yield virtual_row
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{remote_table}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
+
s.date = %q{2009-08-18}
|
13
|
+
s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
14
|
+
s.email = %q{seamus@abshere.net}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/remote_table.rb",
|
27
|
+
"lib/remote_table/file.rb",
|
28
|
+
"lib/remote_table/file.rb",
|
29
|
+
"lib/remote_table/file/csv.rb",
|
30
|
+
"lib/remote_table/file/csv.rb",
|
31
|
+
"lib/remote_table/file/fixed_width.rb",
|
32
|
+
"lib/remote_table/file/fixed_width.rb",
|
33
|
+
"lib/remote_table/file/ods.rb",
|
34
|
+
"lib/remote_table/file/ods.rb",
|
35
|
+
"lib/remote_table/file/roo_spreadsheet.rb",
|
36
|
+
"lib/remote_table/file/roo_spreadsheet.rb",
|
37
|
+
"lib/remote_table/file/xls.rb",
|
38
|
+
"lib/remote_table/file/xls.rb",
|
39
|
+
"lib/remote_table/package.rb",
|
40
|
+
"lib/remote_table/package.rb",
|
41
|
+
"lib/remote_table/request.rb",
|
42
|
+
"lib/remote_table/request.rb",
|
43
|
+
"lib/remote_table/transform.rb",
|
44
|
+
"lib/remote_table/transform.rb",
|
45
|
+
"remote_table.gemspec",
|
46
|
+
"test/remote_table_test.rb",
|
47
|
+
"test/test_helper.rb"
|
48
|
+
]
|
49
|
+
s.homepage = %q{http://github.com/seamusabshere/remote_table}
|
50
|
+
s.rdoc_options = ["--charset=UTF-8", "--line-numbers", "--inline-source"]
|
51
|
+
s.require_paths = ["lib"]
|
52
|
+
s.requirements = ["curl"]
|
53
|
+
s.rubygems_version = %q{1.3.5}
|
54
|
+
s.summary = %q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
|
55
|
+
s.test_files = [
|
56
|
+
"test/remote_table_test.rb",
|
57
|
+
"test/test_helper.rb"
|
58
|
+
]
|
59
|
+
|
60
|
+
if s.respond_to? :specification_version then
|
61
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
62
|
+
s.specification_version = 3
|
63
|
+
|
64
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
65
|
+
s.add_runtime_dependency(%q<activesupport>, [">= 0"])
|
66
|
+
s.add_runtime_dependency(%q<roo>, [">= 0"])
|
67
|
+
s.add_runtime_dependency(%q<fastercsv>, [">= 0"])
|
68
|
+
s.add_runtime_dependency(%q<ryanwood-slither>, [">= 0"])
|
69
|
+
else
|
70
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
71
|
+
s.add_dependency(%q<roo>, [">= 0"])
|
72
|
+
s.add_dependency(%q<fastercsv>, [">= 0"])
|
73
|
+
s.add_dependency(%q<ryanwood-slither>, [">= 0"])
|
74
|
+
end
|
75
|
+
else
|
76
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
77
|
+
s.add_dependency(%q<roo>, [">= 0"])
|
78
|
+
s.add_dependency(%q<fastercsv>, [">= 0"])
|
79
|
+
s.add_dependency(%q<ryanwood-slither>, [">= 0"])
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class FuelOilParser
|
4
|
+
def initialize(options = {})
|
5
|
+
# nothing
|
6
|
+
end
|
7
|
+
def add_hints!(bus)
|
8
|
+
bus[:sheet] = 'Data 1'
|
9
|
+
bus[:skip] = 2
|
10
|
+
bus[:select] = lambda { |row| row[:year] > 1989 }
|
11
|
+
end
|
12
|
+
def apply(row)
|
13
|
+
virtual_rows = []
|
14
|
+
row.keys.grep(/(.*) Residual Fuel Oil/) do |location_column_name|
|
15
|
+
next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
|
16
|
+
if $1.starts_with?('U.S.')
|
17
|
+
locatable = "united_states (Country)"
|
18
|
+
elsif $1.include?('PADD')
|
19
|
+
/\(PADD (.*)\)/.match($1)
|
20
|
+
next if $1 == '1' # skip PADD 1 because we always prefer subdistricts
|
21
|
+
locatable = "#{$1} (PetroleumAdministrationForDefenseDistrict)"
|
22
|
+
else
|
23
|
+
locatable = "#{$1} (State)"
|
24
|
+
end
|
25
|
+
date = Time.parse(date)
|
26
|
+
virtual_rows << HashWithIndifferentAccess.new(
|
27
|
+
:locatable => locatable,
|
28
|
+
:cost => cost,
|
29
|
+
:year => date.year,
|
30
|
+
:month => date.month
|
31
|
+
)
|
32
|
+
end
|
33
|
+
virtual_rows
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class RemoteTableTest < Test::Unit::TestCase
|
38
|
+
should "open an XLS inside a zip file" do
|
39
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
40
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
41
|
+
assert_equal 'NSX', t.rows.first['carline name']
|
42
|
+
assert_equal 'VOLVO', t.rows.last['Manufacturer']
|
43
|
+
assert_equal 'V70 XC AWD', t.rows.last['carline name']
|
44
|
+
end
|
45
|
+
|
46
|
+
should "have indifferent hash access" do
|
47
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
|
48
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer'.to_sym]
|
49
|
+
assert_equal 'NSX', t.rows.first['carline name'.to_sym]
|
50
|
+
assert_equal 'VOLVO', t.rows.last['Manufacturer'.to_sym]
|
51
|
+
assert_equal 'V70 XC AWD', t.rows.last['carline name'.to_sym]
|
52
|
+
end
|
53
|
+
|
54
|
+
should "open a Google Docs url" do
|
55
|
+
t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
|
56
|
+
assert_equal 'Gulf Coast', t.rows.first['PAD district name']
|
57
|
+
assert_equal 'AL', t.rows.first['State']
|
58
|
+
assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
|
59
|
+
assert_equal 'WY', t.rows.last['State']
|
60
|
+
end
|
61
|
+
|
62
|
+
should "open an ODS" do
|
63
|
+
t = RemoteTable.new(:url => 'http://static.brighterplanet.com/science/profiler/footprint_model.ods', :sheet => 'Export')
|
64
|
+
assert_equal 'automobiles', t.rows.first['component']
|
65
|
+
assert_equal 2005.0, t.rows.first['period'].to_f
|
66
|
+
end
|
67
|
+
|
68
|
+
should "open a CSV inside a zip file" do
|
69
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
|
70
|
+
assert_equal 'ACURA', t.rows.first['Manufacturer']
|
71
|
+
assert_equal 'NSX', t.rows.first['carline name']
|
72
|
+
assert_equal 'TOYOTA', t.rows.last['Manufacturer']
|
73
|
+
assert_equal 'RAV4 SOFT TOP 4WD', t.rows.last['carline name']
|
74
|
+
end
|
75
|
+
|
76
|
+
should "open a fixed-width file with an inline schema inside a zip file" do
|
77
|
+
t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
78
|
+
:filename => 'Gd6-dsc.txt',
|
79
|
+
:format => :fixed_width,
|
80
|
+
:crop => 21..26, # inclusive
|
81
|
+
:cut => '2-',
|
82
|
+
:select => lambda { |row| /\A[A-Z]/.match row[:code] },
|
83
|
+
:schema => [[ :code, 2, { :type => :string } ],
|
84
|
+
[ :spacer, 2 ],
|
85
|
+
[ :name, 52, { :type => :string } ]])
|
86
|
+
assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
|
87
|
+
assert_equal 'R', t.rows.first['code']
|
88
|
+
assert_equal 'electricity', t.rows.last['name']
|
89
|
+
assert_equal 'El', t.rows.last['code']
|
90
|
+
end
|
91
|
+
|
92
|
+
should "open an XLS with a parser" do
|
93
|
+
ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
|
94
|
+
ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
|
95
|
+
|
96
|
+
t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
|
97
|
+
:transform => { :class => FuelOilParser })
|
98
|
+
assert_equal ma_1990_01, t.rows[0]
|
99
|
+
assert_equal ga_1990_01, t.rows[1]
|
100
|
+
end
|
101
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: seamusabshere-remote_table
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Seamus Abshere
|
8
|
+
- Andy Rossmeissl
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-08-18 00:00:00 -07:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: activesupport
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
version:
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: roo
|
28
|
+
type: :runtime
|
29
|
+
version_requirement:
|
30
|
+
version_requirements: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: "0"
|
35
|
+
version:
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: fastercsv
|
38
|
+
type: :runtime
|
39
|
+
version_requirement:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: "0"
|
45
|
+
version:
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: ryanwood-slither
|
48
|
+
type: :runtime
|
49
|
+
version_requirement:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
|
57
|
+
email: seamus@abshere.net
|
58
|
+
executables: []
|
59
|
+
|
60
|
+
extensions: []
|
61
|
+
|
62
|
+
extra_rdoc_files:
|
63
|
+
- LICENSE
|
64
|
+
- README.rdoc
|
65
|
+
files:
|
66
|
+
- .document
|
67
|
+
- .gitignore
|
68
|
+
- LICENSE
|
69
|
+
- README.rdoc
|
70
|
+
- Rakefile
|
71
|
+
- VERSION
|
72
|
+
- lib/remote_table.rb
|
73
|
+
- lib/remote_table/file.rb
|
74
|
+
- lib/remote_table/file/csv.rb
|
75
|
+
- lib/remote_table/file/fixed_width.rb
|
76
|
+
- lib/remote_table/file/ods.rb
|
77
|
+
- lib/remote_table/file/roo_spreadsheet.rb
|
78
|
+
- lib/remote_table/file/xls.rb
|
79
|
+
- lib/remote_table/package.rb
|
80
|
+
- lib/remote_table/request.rb
|
81
|
+
- lib/remote_table/transform.rb
|
82
|
+
- remote_table.gemspec
|
83
|
+
- test/remote_table_test.rb
|
84
|
+
- test/test_helper.rb
|
85
|
+
has_rdoc: false
|
86
|
+
homepage: http://github.com/seamusabshere/remote_table
|
87
|
+
licenses:
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options:
|
90
|
+
- --charset=UTF-8
|
91
|
+
- --line-numbers
|
92
|
+
- --inline-source
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: "0"
|
100
|
+
version:
|
101
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: "0"
|
106
|
+
version:
|
107
|
+
requirements:
|
108
|
+
- curl
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 1.3.5
|
111
|
+
signing_key:
|
112
|
+
specification_version: 3
|
113
|
+
summary: Remotely open and parse XLS, ODS, CSV and fixed-width tables.
|
114
|
+
test_files:
|
115
|
+
- test/remote_table_test.rb
|
116
|
+
- test/test_helper.rb
|