remote_table 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -4,24 +4,140 @@ Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
4
4
 
5
5
  ==Real-life usage
6
6
 
7
- Used by data_miner (http://github.com/seamusabshere/data_miner)
7
+ Used by http://data.brighterplanet.com and the data_miner gem (http://github.com/seamusabshere/data_miner)
8
8
 
9
9
  ==Example
10
10
 
11
- Taken from <tt>#{GEMDIR}/test/test_remote_table.rb</tt>:
12
-
13
- should "open an XLSX" do
14
- t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
15
- assert_equal "Secure encryption of all data", t[5]["Requirements"]
16
- end
17
-
18
- or on the console
19
-
20
11
  ?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
21
12
  => #<RemoteTable:0x359da50 [...]>
22
13
  ?> t[0]
23
14
  => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
24
15
 
16
+ More examples:
17
+
18
+ RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil
19
+
20
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv'
21
+
22
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods'
23
+
24
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls'
25
+
26
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv'
27
+
28
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods'
29
+
30
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls'
31
+
32
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}'
33
+
34
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true
35
+
36
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0'
37
+
38
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA'
39
+
40
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false
41
+
42
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
43
+
44
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
45
+
46
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg'
47
+
48
+ RemoteTable.new 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls', :transform => { :class => FuelOilParser }
49
+
50
+ RemoteTable.new 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
51
+
52
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls'
53
+
54
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
55
+
56
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
57
+
58
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv'
59
+
60
+ RemoteTable.new 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true
61
+
62
+ RemoteTable.new 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA'
63
+
64
+ RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
65
+
66
+ RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz}
67
+
68
+ RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false
69
+
70
+ RemoteTable.new 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0', :form_data => 'UserTableName=T_100_Segment__All_Carriers&[...]', :compression => :zip, :glob => '/*.csv'
71
+
72
+ RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
73
+ :encoding => 'US-ASCII',
74
+ :row_xpath => '//table/tr[2]/td/table/tr',
75
+ :column_xpath => 'td'
76
+
77
+ RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
78
+ :encoding => 'windows-1252',
79
+ :row_xpath => '//table/tr[2]/td/table/tr',
80
+ :column_xpath => 'td',
81
+ :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
82
+ :responder => AircraftGuru.new)
83
+
84
+ RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
85
+ :encoding => 'windows-1252',
86
+ :row_xpath => '//table/tr[2]/td/table/tr',
87
+ :column_xpath => 'td',
88
+ :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
89
+ :responder => AircraftGuru.new }
90
+
91
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
92
+ :filename => 'Gd6-dsc.txt',
93
+ :format => :fixed_width,
94
+ :crop => 21..26, # inclusive
95
+ :cut => '2-',
96
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
97
+ :schema => [[ 'code', 2, { :type => :string } ],
98
+ [ 'spacer', 2 ],
99
+ [ 'name', 52, { :type => :string } ]]
100
+
101
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
102
+ :format => :fixed_width,
103
+ :skip => 1,
104
+ :schema => [[ 'header4', 10, { :type => :string } ],
105
+ [ 'spacer', 1 ],
106
+ [ 'header5', 10, { :type => :string } ],
107
+ [ 'spacer', 12 ],
108
+ [ 'header6', 10, { :type => :string } ]]
109
+
110
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
111
+ :format => :fixed_width,
112
+ :keep_blank_rows => true,
113
+ :skip => 1,
114
+ :schema => [[ 'header4', 10, { :type => :string } ],
115
+ [ 'spacer', 1 ],
116
+ [ 'header5', 10, { :type => :string } ],
117
+ [ 'spacer', 12 ],
118
+ [ 'header6', 10, { :type => :string } ]]
119
+
120
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
121
+ :format => :fixed_width,
122
+ :skip => 1,
123
+ :schema => [[ 'header1', 10, { :type => :string } ],
124
+ [ 'spacer', 1 ],
125
+ [ 'header2', 10, { :type => :string } ],
126
+ [ 'spacer', 12 ],
127
+ [ 'header3', 10, { :type => :string } ]]
128
+
129
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
130
+ :format => :fixed_width,
131
+ :skip => 1,
132
+ :schema => [[ 'spacer', 11 ],
133
+ [ 'header2', 10, { :type => :string } ],
134
+ [ 'spacer', 1 ],
135
+ [ 'header3', 10, { :type => :string } ],
136
+ [ 'spacer', 1 ],
137
+ [ 'header1', 10, { :type => :string } ]]
138
+
139
+ ==Custom parsers
140
+
25
141
  See the test file and also data_miner examples of custom parsers.
26
142
 
27
143
  ==Wishlist
@@ -47,17 +47,25 @@ class RemoteTable
47
47
 
48
48
  private
49
49
 
50
+ FASTERCSV_OPTIONS = %w{
51
+ unconverted_fields
52
+ col_sep
53
+ headers
54
+ row_sep
55
+ return_headers
56
+ header_converters
57
+ quote_char
58
+ skip_blanks
59
+ converters
60
+ force_quotes
61
+ }
62
+
50
63
  def fastercsv_options
51
- fastercsv_options = { :skip_blanks => !t.properties.keep_blank_rows }
52
- if t.properties.headers == false
53
- fastercsv_options.merge!(:headers => nil)
54
- elsif t.properties.headers.is_a?(::Array)
55
- fastercsv_options.merge!(:headers => t.properties.headers)
56
- else
57
- fastercsv_options.merge!(:headers => :first_row)
58
- end
59
- fastercsv_options.merge!(:col_sep => t.properties.delimiter) if t.properties.delimiter
60
- fastercsv_options
64
+ hsh = t.options.slice *FASTERCSV_OPTIONS
65
+ hsh.merge! 'skip_blanks' => !t.properties.keep_blank_rows
66
+ hsh.reverse_merge! 'headers' => :first_row
67
+ hsh.reverse_merge! 'col_sep' => t.properties.delimiter
68
+ hsh.symbolize_keys
61
69
  end
62
70
  end
63
71
  end
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "1.1.1"
2
+ VERSION = "1.1.2"
3
3
  end
data/test/helper.rb CHANGED
@@ -4,6 +4,7 @@ Bundler.setup
4
4
  require 'test/unit'
5
5
  require 'shoulda'
6
6
  require 'ruby-debug'
7
+ require 'tempfile'
7
8
 
8
9
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
9
10
  $LOAD_PATH.unshift(File.dirname(__FILE__))
@@ -26,4 +26,13 @@ class TestRemoteTable < Test::Unit::TestCase
26
26
  t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
27
27
  assert_equal ::ActiveSupport::OrderedHash, t[0].class
28
28
  end
29
+
30
+ should "pass through fastercsv options" do
31
+ f = Tempfile.new 'pass-through-fastercsv-options'
32
+ f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
33
+ f.flush
34
+ t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil
35
+ assert_equal %{Body example with a <a href="">link</a>}, t[0][2]
36
+ f.close
37
+ end
29
38
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 1
9
- - 1
10
- version: 1.1.1
9
+ - 2
10
+ version: 1.1.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2011-03-11 00:00:00 -06:00
19
+ date: 2011-03-24 00:00:00 -05:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency