cartodb-importer 0.2.14 → 0.2.15
Sign up to get free protection for your applications and to get access to all the features.
- data/cartodb-importer.gemspec +12 -10
- data/lib/cartodb-exporter.rb +2 -0
- data/lib/cartodb-importer.rb +2 -0
- data/lib/cartodb-importer/importer.rb +72 -29
- data/lib/cartodb-importer/version.rb +1 -1
- data/spec/import_spec.rb +49 -3
- metadata +63 -41
- data/clubbing.shp +0 -0
data/cartodb-importer.gemspec
CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |s|
|
|
6
6
|
s.name = "cartodb-importer"
|
7
7
|
s.version = CartoDB::Importer::VERSION
|
8
8
|
s.platform = Gem::Platform::RUBY
|
9
|
-
s.authors = ["Fernando Blat", "Andrew Hill", "Javier de la Torre"]
|
9
|
+
s.authors = ["Fernando Blat", "Andrew Hill", "Javier de la Torre", "Simon Tokumine"]
|
10
10
|
s.email = ["andrew@vizzuality.com"]
|
11
11
|
s.homepage = ""
|
12
12
|
s.summary = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
|
@@ -20,14 +20,16 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
22
|
s.add_runtime_dependency "pg", "~> 0.11"
|
23
|
-
s.add_runtime_dependency "sequel"
|
24
|
-
s.add_runtime_dependency "roo"
|
25
|
-
s.add_runtime_dependency "spreadsheet"
|
26
|
-
s.add_runtime_dependency "google-spreadsheet-ruby"
|
27
|
-
s.add_runtime_dependency "rubyzip"
|
28
|
-
s.add_runtime_dependency "builder"
|
23
|
+
s.add_runtime_dependency "sequel", "~> 3.28.0"
|
24
|
+
s.add_runtime_dependency "roo", "~> 1.9.7"
|
25
|
+
s.add_runtime_dependency "spreadsheet", "~> 0.6.5.9"
|
26
|
+
s.add_runtime_dependency "google-spreadsheet-ruby", "~> 0.1.5"
|
27
|
+
s.add_runtime_dependency "rubyzip", "~> 0.9.4"
|
28
|
+
s.add_runtime_dependency "builder", "~> 3.0.0"
|
29
|
+
s.add_runtime_dependency "rgeo", "~> 0.3.2"
|
30
|
+
s.add_runtime_dependency "rgeo-geojson", "~> 0.2.1"
|
29
31
|
|
30
|
-
s.add_development_dependency "rspec"
|
31
|
-
s.add_development_dependency "mocha"
|
32
|
-
s.add_development_dependency "ruby-debug19"
|
32
|
+
s.add_development_dependency "rspec", "~> 2.6.0"
|
33
|
+
s.add_development_dependency "mocha", "~> 0.10.0"
|
34
|
+
s.add_development_dependency "ruby-debug19", "~> 0.11.6"
|
33
35
|
end
|
data/lib/cartodb-exporter.rb
CHANGED
data/lib/cartodb-importer.rb
CHANGED
@@ -175,7 +175,7 @@ module CartoDB
|
|
175
175
|
|
176
176
|
@table_created = true
|
177
177
|
|
178
|
-
FileUtils.rm_rf(path)
|
178
|
+
FileUtils.rm_rf(Dir.glob(path))
|
179
179
|
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
180
180
|
|
181
181
|
return OpenStruct.new({
|
@@ -209,39 +209,82 @@ module CartoDB
|
|
209
209
|
end
|
210
210
|
end
|
211
211
|
|
212
|
-
#
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
212
|
+
# Importing CartoDB CSV exports
|
213
|
+
# ===============================
|
214
|
+
# * if there is a column already called the_geom
|
215
|
+
# * if there is geojson in it
|
216
|
+
# * rename column to the_geom_orig
|
217
|
+
# * create a new column with the correct type (Assume 4326) "the_geom_temp"
|
218
|
+
# * loop over table and parse geojson into postgis geometries
|
219
|
+
# * drop the_geom_orig
|
220
|
+
#
|
221
|
+
# TODO: move the geom over using ST_FromGeoJSON once inside PostGIS 2.0
|
222
|
+
if column_names.include? "the_geom"
|
223
|
+
if res = @db_connection["select the_geom from #{@suggested_name} limit 1"].first
|
224
|
+
|
225
|
+
# attempt to read as geojson. If it fails, continue
|
226
|
+
begin
|
227
|
+
geojson = RGeo::GeoJSON.decode(res[:the_geom], :json_parser => :json)
|
228
|
+
geometry_type = geojson.geometry_type.type_name.upcase
|
229
|
+
|
230
|
+
if geometry_type
|
231
|
+
# move original geometry column around
|
232
|
+
@db_connection.run("ALTER TABLE #{@suggested_name} RENAME COLUMN the_geom TO the_geom_orig;")
|
233
|
+
@db_connection.run("SELECT AddGeometryColumn('#{@suggested_name}','the_geom',4326, '#{geometry_type}', 2)")
|
234
|
+
@db_connection.run("CREATE INDEX #{@suggested_name}_the_geom_gist ON #{@suggested_name} USING GIST (the_geom)")
|
235
|
+
|
236
|
+
# loop through old geom parsing into the_geom.
|
237
|
+
# TODO: Should probably window this
|
238
|
+
@db_connection["select the_geom_orig from #{@suggested_name}"].each do |res|
|
239
|
+
begin
|
240
|
+
geojson = RGeo::GeoJSON.decode(res[:the_geom_orig], :json_parser => :json)
|
241
|
+
@db_connection.run("UPDATE #{@suggested_name} SET the_geom = ST_GeomFromText('#{geojson.as_text}', 4326) WHERE the_geom_orig = '#{res[:the_geom_orig]}'")
|
242
|
+
rescue => e
|
243
|
+
runlog.err << "silently fail conversion #{geojson.inspect} to #{@suggested_name}. #{e.inspect}"
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Drop original the_geom column
|
248
|
+
@db_connection.run("ALTER TABLE #{@suggested_name} DROP COLUMN the_geom_orig")
|
249
|
+
end
|
250
|
+
rescue => e
|
251
|
+
runlog.err << "failed to read geojson for #{@suggested_name}. #{e.inspect}"
|
252
|
+
end
|
253
|
+
end
|
236
254
|
end
|
237
255
|
|
238
|
-
|
239
|
-
|
240
|
-
|
256
|
+
# if there is no the_geom, and there are latitude and longitude columns, create the_geom
|
257
|
+
unless column_names.include? "the_geom"
|
258
|
+
|
259
|
+
latitude_possible_names = "'latitude','lat','latitudedecimal','latitud','lati'"
|
260
|
+
longitude_possible_names = "'longitude','lon','lng','longitudedecimal','longitud','long'"
|
261
|
+
|
262
|
+
matching_latitude = nil
|
263
|
+
res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
|
264
|
+
and lower(column_name) in (#{latitude_possible_names}) LIMIT 1"]
|
265
|
+
if !res.first.nil?
|
266
|
+
matching_latitude= res.first[:column_name]
|
267
|
+
end
|
268
|
+
matching_longitude = nil
|
269
|
+
res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
|
270
|
+
and lower(column_name) in (#{longitude_possible_names}) LIMIT 1"]
|
271
|
+
if !res.first.nil?
|
272
|
+
matching_longitude= res.first[:column_name]
|
273
|
+
end
|
274
|
+
|
275
|
+
|
276
|
+
if matching_latitude and matching_longitude
|
277
|
+
#we know there is a latitude/longitude columns
|
278
|
+
@db_connection.run("SELECT AddGeometryColumn('#{@suggested_name}','the_geom',4326, 'POINT', 2);")
|
279
|
+
@db_connection.run("UPDATE \"#{@suggested_name}\" SET the_geom = ST_GeomFromText('POINT('|| \"#{matching_longitude}\" ||' '|| \"#{matching_latitude}\" ||')',4326)
|
280
|
+
WHERE \"#{matching_longitude}\" IS NOT NULL AND \"#{matching_latitude}\" IS NOT NULL AND \"#{matching_longitude}\"<>'' AND \"#{matching_latitude}\"<>''")
|
281
|
+
@db_connection.run("CREATE INDEX \"#{@suggested_name}_the_geom_gist\" ON \"#{@suggested_name}\" USING GIST (the_geom)")
|
282
|
+
end
|
283
|
+
end
|
241
284
|
|
242
285
|
@table_created = true
|
243
286
|
|
244
|
-
FileUtils.rm_rf(path)
|
287
|
+
FileUtils.rm_rf(Dir.glob(path))
|
245
288
|
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
246
289
|
|
247
290
|
return OpenStruct.new({
|
data/spec/import_spec.rb
CHANGED
@@ -254,7 +254,7 @@ describe CartoDB::Importer do
|
|
254
254
|
columns = db_connection.schema(:vizzuality).map{|s| s[0].to_s}
|
255
255
|
|
256
256
|
expected_columns = ["gid", "subclass", "x", "y", "length", "area", "angle", "name",
|
257
|
-
"pid", "lot_navteq", "version_na", "vitesse_sp", "id", "nombrerest", "tipocomida"
|
257
|
+
"pid", "lot_navteq", "version_na", "vitesse_sp", "id", "nombrerest", "tipocomida"]
|
258
258
|
(columns & expected_columns).sort.should == expected_columns.sort
|
259
259
|
end
|
260
260
|
|
@@ -346,7 +346,7 @@ describe CartoDB::Importer do
|
|
346
346
|
end
|
347
347
|
|
348
348
|
describe "Import CSV with latidude/logitude" do
|
349
|
-
it "should import
|
349
|
+
it "should import walmart.csv" do
|
350
350
|
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/walmart.csv", __FILE__),
|
351
351
|
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
352
352
|
:host => 'localhost', :port => 5432, :suggested_name => 'walmart'
|
@@ -358,7 +358,7 @@ describe CartoDB::Importer do
|
|
358
358
|
end
|
359
359
|
|
360
360
|
describe "Import CSV with lat/lon" do
|
361
|
-
it "should import
|
361
|
+
it "should import walmart.csv" do
|
362
362
|
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/walmart_latlon.csv", __FILE__),
|
363
363
|
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
364
364
|
:host => 'localhost', :port => 5432, :suggested_name => 'walmart_latlon'
|
@@ -368,6 +368,52 @@ describe CartoDB::Importer do
|
|
368
368
|
result.import_type.should == '.csv'
|
369
369
|
end
|
370
370
|
end
|
371
|
+
|
372
|
+
describe "Import CartoDB CSV export with lat/lon" do
|
373
|
+
it "should import CartoDB_csv_export.zip" do
|
374
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/CartoDB_csv_export.zip", __FILE__),
|
375
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
376
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'cartodb_csv_export'
|
377
|
+
result = importer.import!
|
378
|
+
result.name.should == 'cartodb_csv_export'
|
379
|
+
result.rows_imported.should == 155
|
380
|
+
result.import_type.should == '.csv'
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
# TODO: check that the_geom is now a real geometry built from geojson.
|
385
|
+
describe "Import CartoDB CSV export with the_geom in geojson" do
|
386
|
+
it "should import CartoDB_csv_multipoly_export.zip" do
|
387
|
+
opt = {:import_from_file => File.expand_path("../support/data/CartoDB_csv_multipoly_export.zip", __FILE__),
|
388
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
389
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'cartodb_csv_multipoly_export'}
|
390
|
+
importer = CartoDB::Importer.new opt
|
391
|
+
result = importer.import!
|
392
|
+
result.name.should == 'cartodb_csv_multipoly_export'
|
393
|
+
result.rows_imported.should == 601
|
394
|
+
result.import_type.should == '.csv'
|
395
|
+
|
396
|
+
# test geometry returned is legit
|
397
|
+
pg = "postgres://#{opt[:username]}:#{opt[:password]}@#{opt[:host]}:#{opt[:port]}/#{opt[:database]}"
|
398
|
+
sql = "select ST_AsGeoJSON(the_geom,0) as geom from cartodb_csv_multipoly_export limit 1"
|
399
|
+
db_connection = Sequel.connect(pg)
|
400
|
+
res = db_connection[sql].first[:geom]
|
401
|
+
res.should == '{"type":"MultiPolygon","coordinates":[[[[2,39],[2,39],[2,39],[2,39],[2,39]]]]}'
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
describe "Import CartoDB SHP export with lat/lon" do
|
406
|
+
it "should import CartoDB_shp_export.zip" do
|
407
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/CartoDB_shp_export.zip", __FILE__),
|
408
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
409
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'cartodb_shp_export'
|
410
|
+
result = importer.import!
|
411
|
+
result.name.should == 'cartodb_shp_export'
|
412
|
+
result.rows_imported.should == 155
|
413
|
+
result.import_type.should == '.shp'
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
371
417
|
|
372
418
|
|
373
419
|
end
|
metadata
CHANGED
@@ -1,21 +1,22 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cartodb-importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.15
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Fernando Blat
|
9
9
|
- Andrew Hill
|
10
10
|
- Javier de la Torre
|
11
|
+
- Simon Tokumine
|
11
12
|
autorequire:
|
12
13
|
bindir: bin
|
13
14
|
cert_chain: []
|
14
|
-
date: 2011-10-
|
15
|
+
date: 2011-10-04 00:00:00.000000000Z
|
15
16
|
dependencies:
|
16
17
|
- !ruby/object:Gem::Dependency
|
17
18
|
name: pg
|
18
|
-
requirement: &
|
19
|
+
requirement: &70233688939180 !ruby/object:Gem::Requirement
|
19
20
|
none: false
|
20
21
|
requirements:
|
21
22
|
- - ~>
|
@@ -23,106 +24,128 @@ dependencies:
|
|
23
24
|
version: '0.11'
|
24
25
|
type: :runtime
|
25
26
|
prerelease: false
|
26
|
-
version_requirements: *
|
27
|
+
version_requirements: *70233688939180
|
27
28
|
- !ruby/object:Gem::Dependency
|
28
29
|
name: sequel
|
29
|
-
requirement: &
|
30
|
+
requirement: &70233688938600 !ruby/object:Gem::Requirement
|
30
31
|
none: false
|
31
32
|
requirements:
|
32
|
-
- -
|
33
|
+
- - ~>
|
33
34
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
35
|
+
version: 3.28.0
|
35
36
|
type: :runtime
|
36
37
|
prerelease: false
|
37
|
-
version_requirements: *
|
38
|
+
version_requirements: *70233688938600
|
38
39
|
- !ruby/object:Gem::Dependency
|
39
40
|
name: roo
|
40
|
-
requirement: &
|
41
|
+
requirement: &70233688938100 !ruby/object:Gem::Requirement
|
41
42
|
none: false
|
42
43
|
requirements:
|
43
|
-
- -
|
44
|
+
- - ~>
|
44
45
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
46
|
+
version: 1.9.7
|
46
47
|
type: :runtime
|
47
48
|
prerelease: false
|
48
|
-
version_requirements: *
|
49
|
+
version_requirements: *70233688938100
|
49
50
|
- !ruby/object:Gem::Dependency
|
50
51
|
name: spreadsheet
|
51
|
-
requirement: &
|
52
|
+
requirement: &70233688937600 !ruby/object:Gem::Requirement
|
52
53
|
none: false
|
53
54
|
requirements:
|
54
|
-
- -
|
55
|
+
- - ~>
|
55
56
|
- !ruby/object:Gem::Version
|
56
|
-
version:
|
57
|
+
version: 0.6.5.9
|
57
58
|
type: :runtime
|
58
59
|
prerelease: false
|
59
|
-
version_requirements: *
|
60
|
+
version_requirements: *70233688937600
|
60
61
|
- !ruby/object:Gem::Dependency
|
61
62
|
name: google-spreadsheet-ruby
|
62
|
-
requirement: &
|
63
|
+
requirement: &70233688937120 !ruby/object:Gem::Requirement
|
63
64
|
none: false
|
64
65
|
requirements:
|
65
|
-
- -
|
66
|
+
- - ~>
|
66
67
|
- !ruby/object:Gem::Version
|
67
|
-
version:
|
68
|
+
version: 0.1.5
|
68
69
|
type: :runtime
|
69
70
|
prerelease: false
|
70
|
-
version_requirements: *
|
71
|
+
version_requirements: *70233688937120
|
71
72
|
- !ruby/object:Gem::Dependency
|
72
73
|
name: rubyzip
|
73
|
-
requirement: &
|
74
|
+
requirement: &70233688936400 !ruby/object:Gem::Requirement
|
74
75
|
none: false
|
75
76
|
requirements:
|
76
|
-
- -
|
77
|
+
- - ~>
|
77
78
|
- !ruby/object:Gem::Version
|
78
|
-
version:
|
79
|
+
version: 0.9.4
|
79
80
|
type: :runtime
|
80
81
|
prerelease: false
|
81
|
-
version_requirements: *
|
82
|
+
version_requirements: *70233688936400
|
82
83
|
- !ruby/object:Gem::Dependency
|
83
84
|
name: builder
|
84
|
-
requirement: &
|
85
|
+
requirement: &70233688935720 !ruby/object:Gem::Requirement
|
85
86
|
none: false
|
86
87
|
requirements:
|
87
|
-
- -
|
88
|
+
- - ~>
|
88
89
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
90
|
+
version: 3.0.0
|
90
91
|
type: :runtime
|
91
92
|
prerelease: false
|
92
|
-
version_requirements: *
|
93
|
+
version_requirements: *70233688935720
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: rgeo
|
96
|
+
requirement: &70233688935020 !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ~>
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 0.3.2
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: *70233688935020
|
105
|
+
- !ruby/object:Gem::Dependency
|
106
|
+
name: rgeo-geojson
|
107
|
+
requirement: &70233688934360 !ruby/object:Gem::Requirement
|
108
|
+
none: false
|
109
|
+
requirements:
|
110
|
+
- - ~>
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 0.2.1
|
113
|
+
type: :runtime
|
114
|
+
prerelease: false
|
115
|
+
version_requirements: *70233688934360
|
93
116
|
- !ruby/object:Gem::Dependency
|
94
117
|
name: rspec
|
95
|
-
requirement: &
|
118
|
+
requirement: &70233688933680 !ruby/object:Gem::Requirement
|
96
119
|
none: false
|
97
120
|
requirements:
|
98
|
-
- -
|
121
|
+
- - ~>
|
99
122
|
- !ruby/object:Gem::Version
|
100
|
-
version:
|
123
|
+
version: 2.6.0
|
101
124
|
type: :development
|
102
125
|
prerelease: false
|
103
|
-
version_requirements: *
|
126
|
+
version_requirements: *70233688933680
|
104
127
|
- !ruby/object:Gem::Dependency
|
105
128
|
name: mocha
|
106
|
-
requirement: &
|
129
|
+
requirement: &70233688932800 !ruby/object:Gem::Requirement
|
107
130
|
none: false
|
108
131
|
requirements:
|
109
|
-
- -
|
132
|
+
- - ~>
|
110
133
|
- !ruby/object:Gem::Version
|
111
|
-
version:
|
134
|
+
version: 0.10.0
|
112
135
|
type: :development
|
113
136
|
prerelease: false
|
114
|
-
version_requirements: *
|
137
|
+
version_requirements: *70233688932800
|
115
138
|
- !ruby/object:Gem::Dependency
|
116
139
|
name: ruby-debug19
|
117
|
-
requirement: &
|
140
|
+
requirement: &70233688931720 !ruby/object:Gem::Requirement
|
118
141
|
none: false
|
119
142
|
requirements:
|
120
|
-
- -
|
143
|
+
- - ~>
|
121
144
|
- !ruby/object:Gem::Version
|
122
|
-
version:
|
145
|
+
version: 0.11.6
|
123
146
|
type: :development
|
124
147
|
prerelease: false
|
125
|
-
version_requirements: *
|
148
|
+
version_requirements: *70233688931720
|
126
149
|
description: Import CSV, SHP, and other files with data into a PostgreSQL table
|
127
150
|
email:
|
128
151
|
- andrew@vizzuality.com
|
@@ -137,7 +160,6 @@ files:
|
|
137
160
|
- README.md
|
138
161
|
- Rakefile
|
139
162
|
- cartodb-importer.gemspec
|
140
|
-
- clubbing.shp
|
141
163
|
- lib/cartodb-exporter.rb
|
142
164
|
- lib/cartodb-exporter/exporter.rb
|
143
165
|
- lib/cartodb-exporter/version.rb
|
data/clubbing.shp
DELETED
File without changes
|