remote_table 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -4,24 +4,140 @@ Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
4
4
 
5
5
  ==Real-life usage
6
6
 
7
- Used by data_miner (http://github.com/seamusabshere/data_miner)
7
+ Used by http://data.brighterplanet.com and the data_miner gem (http://github.com/seamusabshere/data_miner)
8
8
 
9
9
  ==Example
10
10
 
11
- Taken from <tt>#{GEMDIR}/test/test_remote_table.rb</tt>:
12
-
13
- should "open an XLSX" do
14
- t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
15
- assert_equal "Secure encryption of all data", t[5]["Requirements"]
16
- end
17
-
18
- or on the console
19
-
20
11
  ?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
21
12
  => #<RemoteTable:0x359da50 [...]>
22
13
  ?> t[0]
23
14
  => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
24
15
 
16
+ More examples:
17
+
18
+ RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil
19
+
20
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv'
21
+
22
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods'
23
+
24
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls'
25
+
26
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv'
27
+
28
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods'
29
+
30
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls'
31
+
32
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}'
33
+
34
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true
35
+
36
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0'
37
+
38
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA'
39
+
40
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false
41
+
42
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
43
+
44
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
45
+
46
+ RemoteTable.new 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg'
47
+
48
+ RemoteTable.new 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls', :transform => { :class => FuelOilParser }
49
+
50
+ RemoteTable.new 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
51
+
52
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls'
53
+
54
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
55
+
56
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
57
+
58
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv'
59
+
60
+ RemoteTable.new 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true
61
+
62
+ RemoteTable.new 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA'
63
+
64
+ RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
65
+
66
+ RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz}
67
+
68
+ RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false
69
+
70
+ RemoteTable.new 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0', :form_data => 'UserTableName=T_100_Segment__All_Carriers&[...]', :compression => :zip, :glob => '/*.csv'
71
+
72
+ RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
73
+ :encoding => 'US-ASCII',
74
+ :row_xpath => '//table/tr[2]/td/table/tr',
75
+ :column_xpath => 'td'
76
+
77
+ RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
78
+ :encoding => 'windows-1252',
79
+ :row_xpath => '//table/tr[2]/td/table/tr',
80
+ :column_xpath => 'td',
81
+ :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
82
+ :responder => AircraftGuru.new)
83
+
84
+ RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
85
+ :encoding => 'windows-1252',
86
+ :row_xpath => '//table/tr[2]/td/table/tr',
87
+ :column_xpath => 'td',
88
+ :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
89
+ :responder => AircraftGuru.new }
90
+
91
+ RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
92
+ :filename => 'Gd6-dsc.txt',
93
+ :format => :fixed_width,
94
+ :crop => 21..26, # inclusive
95
+ :cut => '2-',
96
+ :select => lambda { |row| /\A[A-Z]/.match row['code'] },
97
+ :schema => [[ 'code', 2, { :type => :string } ],
98
+ [ 'spacer', 2 ],
99
+ [ 'name', 52, { :type => :string } ]]
100
+
101
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
102
+ :format => :fixed_width,
103
+ :skip => 1,
104
+ :schema => [[ 'header4', 10, { :type => :string } ],
105
+ [ 'spacer', 1 ],
106
+ [ 'header5', 10, { :type => :string } ],
107
+ [ 'spacer', 12 ],
108
+ [ 'header6', 10, { :type => :string } ]]
109
+
110
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
111
+ :format => :fixed_width,
112
+ :keep_blank_rows => true,
113
+ :skip => 1,
114
+ :schema => [[ 'header4', 10, { :type => :string } ],
115
+ [ 'spacer', 1 ],
116
+ [ 'header5', 10, { :type => :string } ],
117
+ [ 'spacer', 12 ],
118
+ [ 'header6', 10, { :type => :string } ]]
119
+
120
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
121
+ :format => :fixed_width,
122
+ :skip => 1,
123
+ :schema => [[ 'header1', 10, { :type => :string } ],
124
+ [ 'spacer', 1 ],
125
+ [ 'header2', 10, { :type => :string } ],
126
+ [ 'spacer', 12 ],
127
+ [ 'header3', 10, { :type => :string } ]]
128
+
129
+ RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
130
+ :format => :fixed_width,
131
+ :skip => 1,
132
+ :schema => [[ 'spacer', 11 ],
133
+ [ 'header2', 10, { :type => :string } ],
134
+ [ 'spacer', 1 ],
135
+ [ 'header3', 10, { :type => :string } ],
136
+ [ 'spacer', 1 ],
137
+ [ 'header1', 10, { :type => :string } ]]
138
+
139
+ ==Custom parsers
140
+
25
141
  See the test file and also data_miner examples of custom parsers.
26
142
 
27
143
  ==Wishlist
@@ -47,17 +47,25 @@ class RemoteTable
47
47
 
48
48
  private
49
49
 
50
+ FASTERCSV_OPTIONS = %w{
51
+ unconverted_fields
52
+ col_sep
53
+ headers
54
+ row_sep
55
+ return_headers
56
+ header_converters
57
+ quote_char
58
+ skip_blanks
59
+ converters
60
+ force_quotes
61
+ }
62
+
50
63
  def fastercsv_options
51
- fastercsv_options = { :skip_blanks => !t.properties.keep_blank_rows }
52
- if t.properties.headers == false
53
- fastercsv_options.merge!(:headers => nil)
54
- elsif t.properties.headers.is_a?(::Array)
55
- fastercsv_options.merge!(:headers => t.properties.headers)
56
- else
57
- fastercsv_options.merge!(:headers => :first_row)
58
- end
59
- fastercsv_options.merge!(:col_sep => t.properties.delimiter) if t.properties.delimiter
60
- fastercsv_options
64
+ hsh = t.options.slice *FASTERCSV_OPTIONS
65
+ hsh.merge! 'skip_blanks' => !t.properties.keep_blank_rows
66
+ hsh.reverse_merge! 'headers' => :first_row
67
+ hsh.reverse_merge! 'col_sep' => t.properties.delimiter
68
+ hsh.symbolize_keys
61
69
  end
62
70
  end
63
71
  end
@@ -1,3 +1,3 @@
1
1
  class RemoteTable
2
- VERSION = "1.1.1"
2
+ VERSION = "1.1.2"
3
3
  end
data/test/helper.rb CHANGED
@@ -4,6 +4,7 @@ Bundler.setup
4
4
  require 'test/unit'
5
5
  require 'shoulda'
6
6
  require 'ruby-debug'
7
+ require 'tempfile'
7
8
 
8
9
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
9
10
  $LOAD_PATH.unshift(File.dirname(__FILE__))
@@ -26,4 +26,13 @@ class TestRemoteTable < Test::Unit::TestCase
26
26
  t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
27
27
  assert_equal ::ActiveSupport::OrderedHash, t[0].class
28
28
  end
29
+
30
+ should "pass through fastercsv options" do
31
+ f = Tempfile.new 'pass-through-fastercsv-options'
32
+ f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
33
+ f.flush
34
+ t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil
35
+ assert_equal %{Body example with a <a href="">link</a>}, t[0][2]
36
+ f.close
37
+ end
29
38
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 1
9
- - 1
10
- version: 1.1.1
9
+ - 2
10
+ version: 1.1.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2011-03-11 00:00:00 -06:00
19
+ date: 2011-03-24 00:00:00 -05:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency