remote_table-ruby19 0.2.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ class RemoteTable
2
+ module Ods
3
+ def self.extended(base)
4
+ base.send :extend, RooSpreadsheet
5
+ end
6
+
7
+ def roo_klass
8
+ Openoffice
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,44 @@
1
+ class RemoteTable
2
+ module RooSpreadsheet
3
+ def each_row(&block)
4
+ oo = roo_klass.new(path, nil, :ignore)
5
+ oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
6
+ column_references = Hash.new
7
+ if headers == false
8
+ # zero-based numeric keys
9
+ for col in (1..oo.last_column)
10
+ column_references[col] = col - 1
11
+ end
12
+ elsif headers.is_a? Array
13
+ # names
14
+ for col in (1..oo.last_column)
15
+ column_references[col] = headers[col - 1]
16
+ end
17
+ else
18
+ # read headers from the file itself
19
+ for col in (1..oo.last_column)
20
+ column_references[col] = oo.cell(header_row, col)
21
+ column_references[col] = oo.cell(header_row - 1, col) if column_references[col].blank? # look up
22
+ end
23
+ end
24
+ first_data_row.upto(oo.last_row) do |raw_row|
25
+ ordered_hash = ActiveSupport::OrderedHash.new
26
+ for col in (1..oo.last_column)
27
+ next if column_references[col].blank?
28
+ ordered_hash[column_references[col]] = oo.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
29
+ end
30
+ yield ordered_hash if keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def header_row
37
+ 1 + skip.to_i
38
+ end
39
+
40
+ def first_data_row
41
+ 1 + header_row
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,11 @@
1
+ class RemoteTable
2
+ module Xls
3
+ def self.extended(base)
4
+ base.send :extend, RooSpreadsheet
5
+ end
6
+
7
+ def roo_klass
8
+ Excel
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ class RemoteTable
2
+ module Xlsx
3
+ def self.extended(base)
4
+ base.send :extend, RooSpreadsheet
5
+ end
6
+
7
+ def roo_klass
8
+ Excelx
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,89 @@
1
+ class RemoteTable
2
+ class Package
3
+ attr_accessor :url, :compression, :packing, :filename, :glob
4
+
5
+ def initialize(bus)
6
+ @url = bus[:url] or raise "need url"
7
+ @compression = bus[:compression] || compression_from_basename
8
+ @packing = bus[:packing] || packing_from_basename_and_compression
9
+ @filename = bus[:filename] || filename_from_basename_and_compression_and_packing
10
+ @glob = bus[:glob]
11
+ add_hints!(bus)
12
+ end
13
+
14
+ def add_hints!(hash)
15
+ hash[:filename] = filename unless hash.has_key?(:filename)
16
+ end
17
+
18
+ def stage(path)
19
+ decompress(path)
20
+ unpack(path)
21
+ identify(path)
22
+ file_path(path)
23
+ end
24
+
25
+ private
26
+
27
+ def decompress(path)
28
+ return unless compression
29
+ cmd, args = case compression
30
+ when :zip, :exe
31
+ ["unzip", "-d #{Escape.shell_single_word ::File.dirname(path)}"]
32
+ when :bz2
33
+ 'bunzip2'
34
+ when :gz
35
+ 'gunzip'
36
+ end
37
+ move_and_process path, compression, cmd, args
38
+ end
39
+
40
+ def unpack(path)
41
+ return unless packing
42
+ cmd, args = case packing
43
+ when :tar
44
+ ['tar -xf', "-C #{::File.dirname(path)}"]
45
+ end
46
+ move_and_process path, packing, cmd, args
47
+ end
48
+
49
+ def move_and_process(path, extname, cmd, args)
50
+ new_path = "#{path}.#{extname}"
51
+ FileUtils.mv path, new_path
52
+ RemoteTable.backtick_with_reporting "#{cmd} #{Escape.shell_single_word new_path} #{args}"
53
+ end
54
+
55
+ # ex. A: 2007-01.csv.gz (compression not capable of storing multiple files)
56
+ # ex. B: 2007-01.tar.gz (packing)
57
+ # ex. C: 2007-01.zip (compression capable of storing multiple files)
58
+ def identify(path)
59
+ if glob.present?
60
+ FileUtils.mv Dir[::File.dirname(path) + glob].first, file_path(path)
61
+ elsif !packing and [ nil, :bz2, :gz ].include?(compression)
62
+ FileUtils.mv path, file_path(path)
63
+ end
64
+ end
65
+
66
+ def file_path(path)
67
+ ::File.join(::File.dirname(path), filename)
68
+ end
69
+
70
+ def basename_parts
71
+ ::File.basename(URI.parse(url).path).split('.').map(&:to_sym)
72
+ end
73
+
74
+ def compression_from_basename
75
+ [ :zip, :exe, :bz2, :gz ].detect { |i| i == basename_parts.last }
76
+ end
77
+
78
+ def packing_from_basename_and_compression
79
+ [ :tar ].detect { |i| i == ((basename_parts.last == compression) ? basename_parts[-2] : basename_parts.last) }
80
+ end
81
+
82
+ def filename_from_basename_and_compression_and_packing
83
+ ary = basename_parts
84
+ ary.pop if ary.last == compression
85
+ ary.pop if ary.last == packing
86
+ ary.join('.')
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,44 @@
1
+ class RemoteTable
2
+ class Request
3
+ attr_accessor :parsed_url, :post_data, :username, :password
4
+ attr_accessor :form_data
5
+
6
+ # TODO: support HTTP basic auth
7
+ def initialize(bus)
8
+ raise(ArgumentError, "RemoteTable needs :url option") unless bus[:url].present?
9
+ @parsed_url = URI.parse bus[:url]
10
+ if @parsed_url.host == 'spreadsheets.google.com'
11
+ if bus[:format].blank? or bus[:format].to_s == 'csv'
12
+ @parsed_url.query = 'output=csv&' + @parsed_url.query.sub(/\&?output=.*?(\&|\z)/, '\1')
13
+ end
14
+ end
15
+ @form_data = bus[:form_data]
16
+ end
17
+
18
+ def download
19
+ path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
20
+ if parsed_url.scheme == 'file'
21
+ ::FileUtils.cp parsed_url.path, path
22
+ else
23
+ RemoteTable.backtick_with_reporting %{
24
+ curl
25
+ --header "Expect: "
26
+ --location
27
+ #{"--data #{Escape.shell_single_word form_data}" if form_data.present?}
28
+ #{Escape.shell_single_word parsed_url.to_s}
29
+ --output #{Escape.shell_single_word path}
30
+ 2>&1
31
+ }
32
+ end
33
+ path
34
+ end
35
+
36
+ def staging_dir_path
37
+ return @_staging_dir_path if @_staging_dir_path
38
+ @_staging_dir_path = ::File.join Dir.tmpdir, 'remote_table_gem', rand.to_s
39
+ FileUtils.mkdir_p @_staging_dir_path
40
+ RemoteTable.remove_at_exit @_staging_dir_path
41
+ @_staging_dir_path
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,47 @@
1
+ class RemoteTable
2
+ class Transform
3
+ attr_accessor :select, :reject, :transform_class, :transform_options, :transform, :raw_table
4
+ attr_accessor :errata
5
+
6
+ def initialize(bus)
7
+ if transform_params = bus.delete(:transform)
8
+ @transform_class = transform_params.delete(:class)
9
+ @transform_options = transform_params
10
+ @transform = @transform_class.new(@transform_options)
11
+ @transform.add_hints!(bus)
12
+ end
13
+ @select = bus[:select]
14
+ @reject = bus[:reject]
15
+ @errata = bus[:errata]
16
+ end
17
+
18
+ # the null transformation
19
+ def apply(raw_table)
20
+ self.raw_table = raw_table
21
+ self
22
+ end
23
+
24
+ # - convert OrderedHash to a Hash (otherwise field ordering will be saved)
25
+ # - dump it
26
+ # - digest it
27
+ def self.row_hash(row)
28
+ Digest::MD5.hexdigest Marshal.dump(Hash.new.replace(row))
29
+ end
30
+
31
+ def each_row(&block)
32
+ raw_table.each_row do |row|
33
+ row['row_hash'] = self.class.row_hash(row)
34
+ virtual_rows = transform ? transform.apply(row) : row # allow transform.apply(row) to return multiple rows
35
+ Array.wrap(virtual_rows).each do |virtual_row|
36
+ if errata
37
+ next if errata.rejects? virtual_row
38
+ errata.correct! virtual_row
39
+ end
40
+ next if select and !select.call(virtual_row)
41
+ next if reject and reject.call(virtual_row)
42
+ yield virtual_row
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,86 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{remote_table-ruby19}
8
+ s.version = "0.2.30"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
+ s.date = %q{2010-09-08}
13
+ s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
14
+ s.email = %q{seamus@abshere.net}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "CHANGELOG",
23
+ "LICENSE",
24
+ "README.rdoc",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "lib/remote_table.rb",
28
+ "lib/remote_table/file.rb",
29
+ "lib/remote_table/file/csv.rb",
30
+ "lib/remote_table/file/fixed_width.rb",
31
+ "lib/remote_table/file/html.rb",
32
+ "lib/remote_table/file/ods.rb",
33
+ "lib/remote_table/file/roo_spreadsheet.rb",
34
+ "lib/remote_table/file/xls.rb",
35
+ "lib/remote_table/file/xlsx.rb",
36
+ "lib/remote_table/package.rb",
37
+ "lib/remote_table/request.rb",
38
+ "lib/remote_table/transform.rb",
39
+ "remote_table.gemspec",
40
+ "test/remote_table_test.rb",
41
+ "test/test_helper.rb"
42
+ ]
43
+ s.homepage = %q{http://github.com/seamusabshere/remote_table}
44
+ s.rdoc_options = ["--charset=UTF-8", "--line-numbers", "--inline-source"]
45
+ s.require_paths = ["lib"]
46
+ s.requirements = ["curl"]
47
+ s.rubyforge_project = %q{remotetable}
48
+ s.rubygems_version = %q{1.3.7}
49
+ s.summary = %q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
50
+ s.test_files = [
51
+ "test/remote_table_test.rb",
52
+ "test/test_helper.rb"
53
+ ]
54
+
55
+ if s.respond_to? :specification_version then
56
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
57
+ s.specification_version = 3
58
+
59
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
60
+ s.add_runtime_dependency(%q<roo>, ["= 1.3.11"])
61
+ s.add_runtime_dependency(%q<fastercsv>, [">= 1.5.0"])
62
+ s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
63
+ s.add_runtime_dependency(%q<slither>, [">= 0.99.3"])
64
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
65
+ s.add_runtime_dependency(%q<escape>, [">= 0.0.4"])
66
+ s.add_development_dependency(%q<errata>, [">= 0.2.0"])
67
+ else
68
+ s.add_dependency(%q<roo>, ["= 1.3.11"])
69
+ s.add_dependency(%q<fastercsv>, [">= 1.5.0"])
70
+ s.add_dependency(%q<activesupport>, [">= 2.3.4"])
71
+ s.add_dependency(%q<slither>, [">= 0.99.3"])
72
+ s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
73
+ s.add_dependency(%q<escape>, [">= 0.0.4"])
74
+ s.add_dependency(%q<errata>, [">= 0.2.0"])
75
+ end
76
+ else
77
+ s.add_dependency(%q<roo>, ["= 1.3.11"])
78
+ s.add_dependency(%q<fastercsv>, [">= 1.5.0"])
79
+ s.add_dependency(%q<activesupport>, [">= 2.3.4"])
80
+ s.add_dependency(%q<slither>, [">= 0.99.3"])
81
+ s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
82
+ s.add_dependency(%q<escape>, [">= 0.0.4"])
83
+ s.add_dependency(%q<errata>, [">= 0.2.0"])
84
+ end
85
+ end
86
+
@@ -0,0 +1,386 @@
1
+ require 'test_helper'
2
+
3
+ class FuelOilParser
4
+ def initialize(options = {})
5
+ # nothing
6
+ end
7
+ def add_hints!(bus)
8
+ bus[:sheet] = 'Data 1'
9
+ bus[:skip] = 2
10
+ bus[:select] = lambda { |row| row['year'] > 1989 }
11
+ end
12
+ def apply(row)
13
+ virtual_rows = []
14
+ row.keys.grep(/(.+) Residual Fuel Oil/) do |location_column_name|
15
+ first_part = $1
16
+ next if (cost = row[location_column_name]).blank? or (date = row['Date']).blank?
17
+ if first_part.start_with?('U.S.')
18
+ locatable = "united_states (Country)"
19
+ elsif first_part.include?('PADD')
20
+ /\(PADD (.*)\)/.match(first_part)
21
+ padd_part = $1
22
+ next if padd_part == '1' # skip PADD 1 because we always prefer subdistricts
23
+ locatable = "#{padd_part} (PetroleumAdministrationForDefenseDistrict)"
24
+ else
25
+ locatable = "#{first_part} (State)"
26
+ end
27
+ date = Time.parse(date)
28
+ virtual_rows << {
29
+ 'locatable' => locatable,
30
+ 'cost' => cost,
31
+ 'year' => date.year,
32
+ 'month' => date.month
33
+ }
34
+ end
35
+ virtual_rows
36
+ end
37
+ end
38
+
39
+ class AircraftGuru
40
+ def is_a_dc_plane?(row)
41
+ row['Designator'] =~ /^DC\d/i
42
+ end
43
+
44
+ # def is_a_crj_900?(row)
45
+ # row['Designator'].downcase == 'crj9'
46
+ # end
47
+
48
+ def is_a_g159?(row)
49
+ row['Designator'] =~ /^G159$/
50
+ end
51
+
52
+ def is_a_galx?(row)
53
+ row['Designator'] =~ /^GALX$/
54
+ end
55
+
56
+ def method_missing(method_id, *args, &block)
57
+ if method_id.to_s =~ /\Ais_n?o?t?_?attributed_to_([^\?]+)/
58
+ manufacturer_name = $1
59
+ manufacturer_regexp = Regexp.new(manufacturer_name.gsub('_', ' ?'), Regexp::IGNORECASE)
60
+ matches = manufacturer_regexp.match(args.first['Manufacturer']) # row['Manufacturer'] =~ /mcdonnell douglas/i
61
+ method_id.to_s.include?('not_attributed') ? matches.nil? : !matches.nil?
62
+ else
63
+ super
64
+ end
65
+ end
66
+ end
67
+
68
+ class RemoteTableTest < Test::Unit::TestCase
69
+ def setup
70
+ @test2_rows_with_blanks = [
71
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
72
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
73
+ { 'header4' => '', 'header5' => '', 'header6' => '' },
74
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
75
+ ]
76
+ @test2_rows = [
77
+ { 'header4' => '1 at 4', 'header5' => '1 at 5', 'header6' => '1 at 6' },
78
+ { 'header4' => '2 at 4', 'header5' => '2 at 5', 'header6' => '2 at 6' },
79
+ ]
80
+ end
81
+
82
+ if ENV['ALL'] == 'true' or ENV['SLOW'] == 'true'
83
+ should "open an XLS inside a zip file" do
84
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
85
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
86
+ assert_equal 'NSX', t.rows.first['carline name']
87
+ assert_equal 'VOLVO', t.rows.last['Manufacturer']
88
+ assert_equal 'V70 XC AWD', t.rows.last['carline name']
89
+ end
90
+
91
+ should "not have indifferent string/symbol hash access" do
92
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls')
93
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
94
+ assert_equal nil, t.rows.first[:Manufacturer]
95
+ end
96
+
97
+ should "open a CSV inside a zip file" do
98
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
99
+ assert_equal 'ACURA', t.rows.first['Manufacturer']
100
+ assert_equal 'NSX', t.rows.first['carline name']
101
+ assert_equal 'TOYOTA', t.rows.last['Manufacturer']
102
+ assert_equal 'RAV4 SOFT TOP 4WD', t.rows.last['carline name']
103
+ end
104
+
105
+ should "open a fixed-width file with an inline schema inside a zip file" do
106
+ t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
107
+ :filename => 'Gd6-dsc.txt',
108
+ :format => :fixed_width,
109
+ :crop => 21..26, # inclusive
110
+ :cut => '2-',
111
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
112
+ :schema => [[ 'code', 2, { :type => :string } ],
113
+ [ 'spacer', 2 ],
114
+ [ 'name', 52, { :type => :string } ]])
115
+ assert_equal 'regular grade gasoline (octane number of 87)', t.rows.first['name']
116
+ assert_equal 'R', t.rows.first['code']
117
+ assert_equal 'electricity', t.rows.last['name']
118
+ assert_equal 'El', t.rows.last['code']
119
+ end
120
+
121
+ should "send form data, follow redirects and use a filename glob" do
122
+ url = 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0'
123
+ form_data = 'UserTableName=T_100_Segment__All_Carriers&DBShortName=Air_Carriers&RawDataTable=T_T100_SEGMENT_ALL_CARRIER&sqlstr=+SELECT+DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE+FROM++T_T100_SEGMENT_ALL_CARRIER+WHERE+Month+%3D1+AND+YEAR%3D2008&varlist=DEPARTURES_SCHEDULED%2CDEPARTURES_PERFORMED%2CPAYLOAD%2CSEATS%2CPASSENGERS%2CFREIGHT%2CMAIL%2CDISTANCE%2CRAMP_TO_RAMP%2CAIR_TIME%2CUNIQUE_CARRIER%2CAIRLINE_ID%2CUNIQUE_CARRIER_NAME%2CUNIQUE_CARRIER_ENTITY%2CREGION%2CCARRIER%2CCARRIER_NAME%2CCARRIER_GROUP%2CCARRIER_GROUP_NEW%2CORIGIN%2CORIGIN_CITY_NAME%2CORIGIN_CITY_NUM%2CORIGIN_STATE_ABR%2CORIGIN_STATE_FIPS%2CORIGIN_STATE_NM%2CORIGIN_COUNTRY%2CORIGIN_COUNTRY_NAME%2CORIGIN_WAC%2CDEST%2CDEST_CITY_NAME%2CDEST_CITY_NUM%2CDEST_STATE_ABR%2CDEST_STATE_FIPS%2CDEST_STATE_NM%2CDEST_COUNTRY%2CDEST_COUNTRY_NAME%2CDEST_WAC%2CAIRCRAFT_GROUP%2CAIRCRAFT_TYPE%2CAIRCRAFT_CONFIG%2CYEAR%2CQUARTER%2CMONTH%2CDISTANCE_GROUP%2CCLASS%2CDATA_SOURCE&grouplist=&suml=&sumRegion=&filter1=title%3D&filter2=title%3D&geo=All%A0&time=January&timename=Month&GEOGRAPHY=All&XYEAR=2008&FREQUENCY=1&AllVars=All&VarName=DEPARTURES_SCHEDULED&VarDesc=DepScheduled&VarType=Num&VarName=DEPARTURES_PERFORMED&VarDesc=DepPerformed&VarType=Num&VarName=PAYLOAD&VarDesc=Payload&VarType=Num&VarName=SEATS&VarDesc=Seats&VarType=Num&VarName=PASSENGERS&VarDesc=Passengers&VarType=Num&VarName=FREIGHT&VarDesc=Freight&VarType=Num&VarName=MAIL&VarDesc=Mail&VarType=Num&VarName=DISTANCE&VarDesc=Distance&VarType=Num&VarName=RAMP_TO_RAMP&VarDesc=RampToRamp&VarType=Num&VarName=AIR_TIME&VarDesc=AirTime&VarType=Num&VarName=UNIQUE_CARRIER&VarDesc=UniqueCarrier&VarType=Char&VarName=AIRLINE_ID&VarDesc=AirlineID&VarType=Num&VarName=UNIQUE_CARRIER_NAME&VarDesc=UniqueCarrierName&VarType=Char&VarName=UNIQUE_CARRIER_ENTITY&VarDesc=UniqCarrierEntity&VarType=Char&VarName=REGION&VarDesc=CarrierRegion&VarType=Char&VarName=CARRIER&VarDesc=Carrier&VarType=Char&VarName=CARRIER_NAME&VarDesc=CarrierName&VarType=Char&VarName=CARRIER_GROUP&VarDesc=CarrierGroup&VarType=Num&VarName=CARRIER_GROUP_NEW&VarDesc=CarrierGroupNew&VarType=Num&VarName=ORIGIN&VarDesc=Origin&VarType=Char&VarName=ORIGIN_CITY_NAME&VarDesc=OriginCityName&VarType=Char&VarName=ORIGIN_CITY_NUM&VarDesc=OriginCityNum&VarType=Num&VarName=ORIGIN_STATE_ABR&VarDesc=OriginState&VarType=Char&VarName=ORIGIN_STATE_FIPS&VarDesc=OriginStateFips&VarType=Char&VarName=ORIGIN_STATE_NM&VarDesc=OriginStateName&VarType=Char&VarName=ORIGIN_COUNTRY&VarDesc=OriginCountry&VarType=Char&VarName=ORIGIN_COUNTRY_NAME&VarDesc=OriginCountryName&VarType=Char&VarName=ORIGIN_WAC&VarDesc=OriginWac&VarType=Num&VarName=DEST&VarDesc=Dest&VarType=Char&VarName=DEST_CITY_NAME&VarDesc=DestCityName&VarType=Char&VarName=DEST_CITY_NUM&VarDesc=DestCityNum&VarType=Num&VarName=DEST_STATE_ABR&VarDesc=DestState&VarType=Char&VarName=DEST_STATE_FIPS&VarDesc=DestStateFips&VarType=Char&VarName=DEST_STATE_NM&VarDesc=DestStateName&VarType=Char&VarName=DEST_COUNTRY&VarDesc=DestCountry&VarType=Char&VarName=DEST_COUNTRY_NAME&VarDesc=DestCountryName&VarType=Char&VarName=DEST_WAC&VarDesc=DestWac&VarType=Num&VarName=AIRCRAFT_GROUP&VarDesc=AircraftGroup&VarType=Num&VarName=AIRCRAFT_TYPE&VarDesc=AircraftType&VarType=Char&VarName=AIRCRAFT_CONFIG&VarDesc=AircraftConfig&VarType=Num&VarName=YEAR&VarDesc=Year&VarType=Num&VarName=QUARTER&VarDesc=Quarter&VarType=Num&VarName=MONTH&VarDesc=Month&VarType=Num&VarName=DISTANCE_GROUP&VarDesc=DistanceGroup&VarType=Num&VarName=CLASS&VarDesc=Class&VarType=Char&VarName=DATA_SOURCE&VarDesc=DataSource&VarType=Char'
124
+ t = RemoteTable.new :url => url, :form_data => form_data, :compression => :zip, :glob => '/*.csv'
125
+ assert_equal 'United States of America', t.rows.first['DEST_COUNTRY_NAME']
126
+ end
127
+ end
128
+
129
+ if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
130
+ end
131
+
132
+ if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
133
+ should "open an XLSX like an array (numbered columns)" do
134
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false)
135
+ assert_equal "Secure encryption of all data", t.rows[5][0]
136
+ end
137
+
138
+ should "open an XLSX with custom headers" do
139
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz})
140
+ assert_equal "Secure encryption of all data", t.rows[5]['foo']
141
+ end
142
+
143
+ should "open an XLSX" do
144
+ t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx')
145
+ assert_equal "Secure encryption of all data", t.rows[5]["Requirements"]
146
+ end
147
+
148
+ should "work on filenames with spaces, using globbing" do
149
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
150
+ assert_equal 'ASTON MARTIN', t.rows.first['MFR']
151
+ end
152
+
153
+ should "work on filenames with spaces" do
154
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
155
+ assert_equal 'ASTON MARTIN', t.rows.first['MFR']
156
+ end
157
+
158
+ should "ignore UTF-8 byte order marks" do
159
+ t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
160
+ assert_equal 'Tawleed', t.rows.first['name']
161
+ end
162
+
163
+ should "be able to apply errata files" do
164
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
165
+ :encoding => 'windows-1252',
166
+ :row_xpath => '//table/tr[2]/td/table/tr',
167
+ :column_xpath => 'td',
168
+ :errata => Errata.new(:table => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'),
169
+ :responder => AircraftGuru.new)
170
+ g1 = t.rows.detect { |row| row['Model'] =~ /Gulfstream I/ }
171
+ assert g1
172
+ assert_equal 'GULFSTREAM AEROSPACE', g1['Manufacturer']
173
+ assert_equal 'Gulfstream I', g1['Model']
174
+ end
175
+
176
+ # this will die with an error about libcurl if your curl doesn't support ssl
177
+ should "connect using HTTPS if available" do
178
+ t = RemoteTable.new(:url => 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
179
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
180
+ assert_equal 'AL', t.rows.first['State']
181
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
182
+ assert_equal 'WY', t.rows.last['State']
183
+ end
184
+
185
+ should "read an HTML table made with frontpage" do
186
+ t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
187
+ :encoding => 'US-ASCII',
188
+ :row_xpath => '//table/tr[2]/td/table/tr',
189
+ :column_xpath => 'td'
190
+ assert_equal 'E110', t.rows.first['Designator']
191
+ assert_equal 'EMBRAER', t.rows.first['Manufacturer']
192
+ assert_equal 'EZKC', t.rows.last['Designator']
193
+ assert_equal 'EZ King Cobra', t.rows.last['Model']
194
+ end
195
+
196
+ should "hash rows without paying attention to order" do
197
+ x = ActiveSupport::OrderedHash.new
198
+ x[:a] = 1
199
+ x[:b] = 2
200
+
201
+ y = ActiveSupport::OrderedHash.new
202
+ y[:b] = 2
203
+ y[:a] = 1
204
+
205
+ assert Marshal.dump(x) != Marshal.dump(y)
206
+ assert RemoteTable::Transform.row_hash(x) == RemoteTable::Transform.row_hash(y)
207
+ end
208
+
209
+ should "open a Google Docs url (as a CSV)" do
210
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA')
211
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
212
+ assert_equal 'AL', t.rows.first['State']
213
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
214
+ assert_equal 'WY', t.rows.last['State']
215
+ end
216
+
217
+ should "open a Google Docs url (as a CSV, with sheet options)" do
218
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0')
219
+ assert_equal 'Gulf Coast', t.rows.first['PAD district name']
220
+ assert_equal 'AL', t.rows.first['State']
221
+ assert_equal 'Rocky Mountain', t.rows.last['PAD district name']
222
+ assert_equal 'WY', t.rows.last['State']
223
+ end
224
+
225
+ should "open a Google Docs url as a CSV without headers" do
226
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
227
+ assert_equal 'AL', t.rows.first[0]
228
+ assert_equal 'Gulf Coast', t.rows.first[4]
229
+ assert_equal 'WY', t.rows.last[0]
230
+ assert_equal 'Rocky Mountain', t.rows.last[4]
231
+ end
232
+
233
+ should "take the last of values if the header is duplicated" do
234
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg')
235
+ assert_equal '2', t.rows.first['dup_header']
236
+ end
237
+
238
+ should "respect field order in CSVs without headers" do
239
+ t = RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false)
240
+ last_k = -1
241
+ saw_string = false
242
+ t.rows.each do |row|
243
+ row.each do |k, v|
244
+ if k.is_a?(Fixnum) and last_k.is_a?(Fixnum)
245
+ assert !saw_string
246
+ assert k > last_k
247
+ end
248
+ last_k = k
249
+ saw_string = k.is_a?(String)
250
+ end
251
+ end
252
+ end
253
+
254
+ %w{ csv ods xls }.each do |format|
255
+ eval %{
256
+ should "read #{format}" do
257
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}')
258
+ # no blank headers
259
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
260
+ # correct values
261
+ t.rows.each_with_index do |row, index|
262
+ assert_equal row.except('row_hash'), @test2_rows[index]
263
+ end
264
+ end
265
+
266
+ should "read #{format}, keeping blank rows" do
267
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true)
268
+ # no blank headers
269
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
270
+ # correct values
271
+ t.rows.each_with_index do |row, index|
272
+ assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
273
+ end
274
+ end
275
+ }
276
+ end
277
+
278
+ should "read fixed width correctly" do
279
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
280
+ :format => :fixed_width,
281
+ :skip => 1,
282
+ :schema => [[ 'header4', 10, { :type => :string } ],
283
+ [ 'spacer', 1 ],
284
+ [ 'header5', 10, { :type => :string } ],
285
+ [ 'spacer', 12 ],
286
+ [ 'header6', 10, { :type => :string } ]])
287
+
288
+ # no blank headers
289
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
290
+ # correct values
291
+ t.rows.each_with_index do |row, index|
292
+ assert_equal row.except('row_hash'), @test2_rows[index]
293
+ end
294
+ end
295
+
296
+ should "read fixed width correctly, keeping blank rows" do
297
+ t = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
298
+ :format => :fixed_width,
299
+ :keep_blank_rows => true,
300
+ :skip => 1,
301
+ :schema => [[ 'header4', 10, { :type => :string } ],
302
+ [ 'spacer', 1 ],
303
+ [ 'header5', 10, { :type => :string } ],
304
+ [ 'spacer', 12 ],
305
+ [ 'header6', 10, { :type => :string } ]])
306
+
307
+ # no blank headers
308
+ assert t.rows.all? { |row| row.keys.all?(&:present?) }
309
+ # correct values
310
+ t.rows.each_with_index do |row, index|
311
+ assert_equal row.except('row_hash'), @test2_rows_with_blanks[index]
312
+ end
313
+ end
314
+
315
+ should "have the same row hash across formats" do
316
+ csv = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv')
317
+ ods = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods')
318
+ xls = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls')
319
+ fixed_width = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
320
+ :format => :fixed_width,
321
+ :skip => 1,
322
+ :schema => [[ 'header1', 10, { :type => :string } ],
323
+ [ 'spacer', 1 ],
324
+ [ 'header2', 10, { :type => :string } ],
325
+ [ 'spacer', 12 ],
326
+ [ 'header3', 10, { :type => :string } ]])
327
+
328
+ csv2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv')
329
+ ods2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods')
330
+ xls2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
331
+ fixed_width2 = RemoteTable.new(:url => 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
332
+ :format => :fixed_width,
333
+ :skip => 1,
334
+ :schema => [[ 'spacer', 11 ],
335
+ [ 'header2', 10, { :type => :string } ],
336
+ [ 'spacer', 1 ],
337
+ [ 'header3', 10, { :type => :string } ],
338
+ [ 'spacer', 1 ],
339
+ [ 'header1', 10, { :type => :string } ]])
340
+
341
+
342
+ reference = csv.rows[0]['row_hash']
343
+
344
+ # same row hashes
345
+ assert_equal reference, ods.rows[0]['row_hash']
346
+ assert_equal reference, xls.rows[0]['row_hash']
347
+ assert_equal reference, fixed_width.rows[0]['row_hash']
348
+ # same row hashes with different order
349
+ assert_equal reference, csv2.rows[0]['row_hash']
350
+ assert_equal reference, ods2.rows[0]['row_hash']
351
+ assert_equal reference, xls2.rows[0]['row_hash']
352
+ assert_equal reference, fixed_width2.rows[0]['row_hash']
353
+ end
354
+
355
+ should "open an ODS" do
356
+ t = RemoteTable.new(:url => 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true)
357
+
358
+ assert_equal 'Central Africa', t.rows[5]['name']
359
+ assert_equal 99, t.rows[5]['MAP DATA population (millions) 2002'].to_i
360
+ end
361
+
362
+ should "open an XLS with a parser" do
363
+ ma_1990_01 = {"month"=>1, "cost"=>"54.0", "locatable"=>"Massachusetts (State)", "year"=>1990}
364
+ ga_1990_01 = {"month"=>1, "cost"=>"50.7", "locatable"=>"Georgia (State)", "year"=>1990}
365
+
366
+ t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls',
367
+ :transform => { :class => FuelOilParser })
368
+
369
+ assert t.rows.include?(ma_1990_01)
370
+ assert t.rows.include?(ga_1990_01)
371
+ end
372
+
373
+ # should "provide a row_hash on demand" do
374
+ # t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
375
+ # :filename => 'Gd6-dsc.txt',
376
+ # :format => :fixed_width,
377
+ # :crop => 21..26, # inclusive
378
+ # :cut => '2-',
379
+ # :select => lambda { |row| /\A[A-Z]/.match row['code'] },
380
+ # :schema => [[ 'code', 2, { :type => :string } ],
381
+ # [ 'spacer', 2 ],
382
+ # [ 'name', 52, { :type => :string } ]])
383
+ # assert_equal 'a8a5d7f17b56772723c657eb62b0f238', t.rows.first['row_hash']
384
+ # end
385
+ end
386
+ end