remote_table 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +126 -10
- data/lib/remote_table/format/delimited.rb +18 -10
- data/lib/remote_table/version.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/test_remote_table.rb +9 -0
- metadata +4 -4
data/README.rdoc
CHANGED
@@ -4,24 +4,140 @@ Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
|
|
4
4
|
|
5
5
|
==Real-life usage
|
6
6
|
|
7
|
-
Used by data_miner (http://github.com/seamusabshere/data_miner)
|
7
|
+
Used by http://data.brighterplanet.com and the data_miner gem (http://github.com/seamusabshere/data_miner)
|
8
8
|
|
9
9
|
==Example
|
10
10
|
|
11
|
-
Taken from <tt>#{GEMDIR}/test/test_remote_table.rb</tt>:
|
12
|
-
|
13
|
-
should "open an XLSX" do
|
14
|
-
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
15
|
-
assert_equal "Secure encryption of all data", t[5]["Requirements"]
|
16
|
-
end
|
17
|
-
|
18
|
-
or on the console
|
19
|
-
|
20
11
|
?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
|
21
12
|
=> #<RemoteTable:0x359da50 [...]>
|
22
13
|
?> t[0]
|
23
14
|
=> {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
|
24
15
|
|
16
|
+
More examples:
|
17
|
+
|
18
|
+
RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil
|
19
|
+
|
20
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.csv'
|
21
|
+
|
22
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.ods'
|
23
|
+
|
24
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls'
|
25
|
+
|
26
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.csv'
|
27
|
+
|
28
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.ods'
|
29
|
+
|
30
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.xls'
|
31
|
+
|
32
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}'
|
33
|
+
|
34
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.#{format}', :keep_blank_rows => true
|
35
|
+
|
36
|
+
RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA&single=true&gid=0'
|
37
|
+
|
38
|
+
RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA'
|
39
|
+
|
40
|
+
RemoteTable.new 'http://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA', :skip => 1, :headers => false
|
41
|
+
|
42
|
+
RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
43
|
+
|
44
|
+
RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :headers => %w{ col1 col2 col3 }
|
45
|
+
|
46
|
+
RemoteTable.new 'http://spreadsheets.google.com/pub?key=tujrgUOwDSLWb-P4KCt1qBg'
|
47
|
+
|
48
|
+
RemoteTable.new 'http://tonto.eia.doe.gov/dnav/pet/xls/PET_PRI_RESID_A_EPPR_PTA_CPGAL_M.xls', :transform => { :class => FuelOilParser }
|
49
|
+
|
50
|
+
RemoteTable.new 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
51
|
+
|
52
|
+
RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls'
|
53
|
+
|
54
|
+
RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
|
55
|
+
|
56
|
+
RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
57
|
+
|
58
|
+
RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv'
|
59
|
+
|
60
|
+
RemoteTable.new 'http://www.worldmapper.org/data/opendoc/2_worldmapper_data.ods', :sheet => 'Data', :keep_blank_rows => true
|
61
|
+
|
62
|
+
RemoteTable.new 'https://spreadsheets.google.com/pub?key=t5HM1KbaRngmTUbntg8JwPA'
|
63
|
+
|
64
|
+
RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
65
|
+
|
66
|
+
RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => %w{foo bar baz}
|
67
|
+
|
68
|
+
RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false
|
69
|
+
|
70
|
+
RemoteTable.new 'http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=293&Has_Group=3&Is_Zipped=0', :form_data => 'UserTableName=T_100_Segment__All_Carriers&[...]', :compression => :zip, :glob => '/*.csv'
|
71
|
+
|
72
|
+
RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-E.htm",
|
73
|
+
:encoding => 'US-ASCII',
|
74
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
75
|
+
:column_xpath => 'td'
|
76
|
+
|
77
|
+
RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
|
78
|
+
:encoding => 'windows-1252',
|
79
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
80
|
+
:column_xpath => 'td',
|
81
|
+
:errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
82
|
+
:responder => AircraftGuru.new)
|
83
|
+
|
84
|
+
RemoteTable.new "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-G.htm",
|
85
|
+
:encoding => 'windows-1252',
|
86
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
87
|
+
:column_xpath => 'td',
|
88
|
+
:errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
89
|
+
:responder => AircraftGuru.new }
|
90
|
+
|
91
|
+
RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/00data.zip',
|
92
|
+
:filename => 'Gd6-dsc.txt',
|
93
|
+
:format => :fixed_width,
|
94
|
+
:crop => 21..26, # inclusive
|
95
|
+
:cut => '2-',
|
96
|
+
:select => lambda { |row| /\A[A-Z]/.match row['code'] },
|
97
|
+
:schema => [[ 'code', 2, { :type => :string } ],
|
98
|
+
[ 'spacer', 2 ],
|
99
|
+
[ 'name', 52, { :type => :string } ]]
|
100
|
+
|
101
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
102
|
+
:format => :fixed_width,
|
103
|
+
:skip => 1,
|
104
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
105
|
+
[ 'spacer', 1 ],
|
106
|
+
[ 'header5', 10, { :type => :string } ],
|
107
|
+
[ 'spacer', 12 ],
|
108
|
+
[ 'header6', 10, { :type => :string } ]]
|
109
|
+
|
110
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
|
111
|
+
:format => :fixed_width,
|
112
|
+
:keep_blank_rows => true,
|
113
|
+
:skip => 1,
|
114
|
+
:schema => [[ 'header4', 10, { :type => :string } ],
|
115
|
+
[ 'spacer', 1 ],
|
116
|
+
[ 'header5', 10, { :type => :string } ],
|
117
|
+
[ 'spacer', 12 ],
|
118
|
+
[ 'header6', 10, { :type => :string } ]]
|
119
|
+
|
120
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.fixed_width.txt',
|
121
|
+
:format => :fixed_width,
|
122
|
+
:skip => 1,
|
123
|
+
:schema => [[ 'header1', 10, { :type => :string } ],
|
124
|
+
[ 'spacer', 1 ],
|
125
|
+
[ 'header2', 10, { :type => :string } ],
|
126
|
+
[ 'spacer', 12 ],
|
127
|
+
[ 'header3', 10, { :type => :string } ]]
|
128
|
+
|
129
|
+
RemoteTable.new 'http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.fixed_width.txt',
|
130
|
+
:format => :fixed_width,
|
131
|
+
:skip => 1,
|
132
|
+
:schema => [[ 'spacer', 11 ],
|
133
|
+
[ 'header2', 10, { :type => :string } ],
|
134
|
+
[ 'spacer', 1 ],
|
135
|
+
[ 'header3', 10, { :type => :string } ],
|
136
|
+
[ 'spacer', 1 ],
|
137
|
+
[ 'header1', 10, { :type => :string } ]]
|
138
|
+
|
139
|
+
==Custom parsers
|
140
|
+
|
25
141
|
See the test file and also data_miner examples of custom parsers.
|
26
142
|
|
27
143
|
==Wishlist
|
@@ -47,17 +47,25 @@ class RemoteTable
|
|
47
47
|
|
48
48
|
private
|
49
49
|
|
50
|
+
FASTERCSV_OPTIONS = %w{
|
51
|
+
unconverted_fields
|
52
|
+
col_sep
|
53
|
+
headers
|
54
|
+
row_sep
|
55
|
+
return_headers
|
56
|
+
header_converters
|
57
|
+
quote_char
|
58
|
+
skip_blanks
|
59
|
+
converters
|
60
|
+
force_quotes
|
61
|
+
}
|
62
|
+
|
50
63
|
def fastercsv_options
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
else
|
57
|
-
fastercsv_options.merge!(:headers => :first_row)
|
58
|
-
end
|
59
|
-
fastercsv_options.merge!(:col_sep => t.properties.delimiter) if t.properties.delimiter
|
60
|
-
fastercsv_options
|
64
|
+
hsh = t.options.slice *FASTERCSV_OPTIONS
|
65
|
+
hsh.merge! 'skip_blanks' => !t.properties.keep_blank_rows
|
66
|
+
hsh.reverse_merge! 'headers' => :first_row
|
67
|
+
hsh.reverse_merge! 'col_sep' => t.properties.delimiter
|
68
|
+
hsh.symbolize_keys
|
61
69
|
end
|
62
70
|
end
|
63
71
|
end
|
data/lib/remote_table/version.rb
CHANGED
data/test/helper.rb
CHANGED
data/test/test_remote_table.rb
CHANGED
@@ -26,4 +26,13 @@ class TestRemoteTable < Test::Unit::TestCase
|
|
26
26
|
t = RemoteTable.new 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw'
|
27
27
|
assert_equal ::ActiveSupport::OrderedHash, t[0].class
|
28
28
|
end
|
29
|
+
|
30
|
+
should "pass through fastercsv options" do
|
31
|
+
f = Tempfile.new 'pass-through-fastercsv-options'
|
32
|
+
f.write %{3,Title example,Body example with a <a href="">link</a>,test category}
|
33
|
+
f.flush
|
34
|
+
t = RemoteTable.new "file://#{f.path}", :quote_char => %{'}, :headers => nil
|
35
|
+
assert_equal %{Body example with a <a href="">link</a>}, t[0][2]
|
36
|
+
f.close
|
37
|
+
end
|
29
38
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 1.1.
|
9
|
+
- 2
|
10
|
+
version: 1.1.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Seamus Abshere
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-03-
|
19
|
+
date: 2011-03-24 00:00:00 -05:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|