cartodb-importer 0.2.14 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/cartodb-importer.gemspec +12 -10
- data/lib/cartodb-exporter.rb +2 -0
- data/lib/cartodb-importer.rb +2 -0
- data/lib/cartodb-importer/importer.rb +72 -29
- data/lib/cartodb-importer/version.rb +1 -1
- data/spec/import_spec.rb +49 -3
- metadata +63 -41
- data/clubbing.shp +0 -0
data/cartodb-importer.gemspec
CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |s|
|
|
6
6
|
s.name = "cartodb-importer"
|
7
7
|
s.version = CartoDB::Importer::VERSION
|
8
8
|
s.platform = Gem::Platform::RUBY
|
9
|
-
s.authors = ["Fernando Blat", "Andrew Hill", "Javier de la Torre"]
|
9
|
+
s.authors = ["Fernando Blat", "Andrew Hill", "Javier de la Torre", "Simon Tokumine"]
|
10
10
|
s.email = ["andrew@vizzuality.com"]
|
11
11
|
s.homepage = ""
|
12
12
|
s.summary = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
|
@@ -20,14 +20,16 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
22
|
s.add_runtime_dependency "pg", "~> 0.11"
|
23
|
-
s.add_runtime_dependency "sequel"
|
24
|
-
s.add_runtime_dependency "roo"
|
25
|
-
s.add_runtime_dependency "spreadsheet"
|
26
|
-
s.add_runtime_dependency "google-spreadsheet-ruby"
|
27
|
-
s.add_runtime_dependency "rubyzip"
|
28
|
-
s.add_runtime_dependency "builder"
|
23
|
+
s.add_runtime_dependency "sequel", "~> 3.28.0"
|
24
|
+
s.add_runtime_dependency "roo", "~> 1.9.7"
|
25
|
+
s.add_runtime_dependency "spreadsheet", "~> 0.6.5.9"
|
26
|
+
s.add_runtime_dependency "google-spreadsheet-ruby", "~> 0.1.5"
|
27
|
+
s.add_runtime_dependency "rubyzip", "~> 0.9.4"
|
28
|
+
s.add_runtime_dependency "builder", "~> 3.0.0"
|
29
|
+
s.add_runtime_dependency "rgeo", "~> 0.3.2"
|
30
|
+
s.add_runtime_dependency "rgeo-geojson", "~> 0.2.1"
|
29
31
|
|
30
|
-
s.add_development_dependency "rspec"
|
31
|
-
s.add_development_dependency "mocha"
|
32
|
-
s.add_development_dependency "ruby-debug19"
|
32
|
+
s.add_development_dependency "rspec", "~> 2.6.0"
|
33
|
+
s.add_development_dependency "mocha", "~> 0.10.0"
|
34
|
+
s.add_development_dependency "ruby-debug19", "~> 0.11.6"
|
33
35
|
end
|
data/lib/cartodb-exporter.rb
CHANGED
data/lib/cartodb-importer.rb
CHANGED
@@ -175,7 +175,7 @@ module CartoDB
|
|
175
175
|
|
176
176
|
@table_created = true
|
177
177
|
|
178
|
-
FileUtils.rm_rf(path)
|
178
|
+
FileUtils.rm_rf(Dir.glob(path))
|
179
179
|
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
180
180
|
|
181
181
|
return OpenStruct.new({
|
@@ -209,39 +209,82 @@ module CartoDB
|
|
209
209
|
end
|
210
210
|
end
|
211
211
|
|
212
|
-
#
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
212
|
+
# Importing CartoDB CSV exports
|
213
|
+
# ===============================
|
214
|
+
# * if there is a column already called the_geom
|
215
|
+
# * if there is geojson in it
|
216
|
+
# * rename column to the_geom_orig
|
217
|
+
# * create a new column with the correct type (Assume 4326) "the_geom_temp"
|
218
|
+
# * loop over table and parse geojson into postgis geometries
|
219
|
+
# * drop the_geom_orig
|
220
|
+
#
|
221
|
+
# TODO: move the geom over using ST_FromGeoJSON once inside PostGIS 2.0
|
222
|
+
if column_names.include? "the_geom"
|
223
|
+
if res = @db_connection["select the_geom from #{@suggested_name} limit 1"].first
|
224
|
+
|
225
|
+
# attempt to read as geojson. If it fails, continue
|
226
|
+
begin
|
227
|
+
geojson = RGeo::GeoJSON.decode(res[:the_geom], :json_parser => :json)
|
228
|
+
geometry_type = geojson.geometry_type.type_name.upcase
|
229
|
+
|
230
|
+
if geometry_type
|
231
|
+
# move original geometry column around
|
232
|
+
@db_connection.run("ALTER TABLE #{@suggested_name} RENAME COLUMN the_geom TO the_geom_orig;")
|
233
|
+
@db_connection.run("SELECT AddGeometryColumn('#{@suggested_name}','the_geom',4326, '#{geometry_type}', 2)")
|
234
|
+
@db_connection.run("CREATE INDEX #{@suggested_name}_the_geom_gist ON #{@suggested_name} USING GIST (the_geom)")
|
235
|
+
|
236
|
+
# loop through old geom parsing into the_geom.
|
237
|
+
# TODO: Should probably window this
|
238
|
+
@db_connection["select the_geom_orig from #{@suggested_name}"].each do |res|
|
239
|
+
begin
|
240
|
+
geojson = RGeo::GeoJSON.decode(res[:the_geom_orig], :json_parser => :json)
|
241
|
+
@db_connection.run("UPDATE #{@suggested_name} SET the_geom = ST_GeomFromText('#{geojson.as_text}', 4326) WHERE the_geom_orig = '#{res[:the_geom_orig]}'")
|
242
|
+
rescue => e
|
243
|
+
runlog.err << "silently fail conversion #{geojson.inspect} to #{@suggested_name}. #{e.inspect}"
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Drop original the_geom column
|
248
|
+
@db_connection.run("ALTER TABLE #{@suggested_name} DROP COLUMN the_geom_orig")
|
249
|
+
end
|
250
|
+
rescue => e
|
251
|
+
runlog.err << "failed to read geojson for #{@suggested_name}. #{e.inspect}"
|
252
|
+
end
|
253
|
+
end
|
236
254
|
end
|
237
255
|
|
238
|
-
|
239
|
-
|
240
|
-
|
256
|
+
# if there is no the_geom, and there are latitude and longitude columns, create the_geom
|
257
|
+
unless column_names.include? "the_geom"
|
258
|
+
|
259
|
+
latitude_possible_names = "'latitude','lat','latitudedecimal','latitud','lati'"
|
260
|
+
longitude_possible_names = "'longitude','lon','lng','longitudedecimal','longitud','long'"
|
261
|
+
|
262
|
+
matching_latitude = nil
|
263
|
+
res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
|
264
|
+
and lower(column_name) in (#{latitude_possible_names}) LIMIT 1"]
|
265
|
+
if !res.first.nil?
|
266
|
+
matching_latitude= res.first[:column_name]
|
267
|
+
end
|
268
|
+
matching_longitude = nil
|
269
|
+
res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
|
270
|
+
and lower(column_name) in (#{longitude_possible_names}) LIMIT 1"]
|
271
|
+
if !res.first.nil?
|
272
|
+
matching_longitude= res.first[:column_name]
|
273
|
+
end
|
274
|
+
|
275
|
+
|
276
|
+
if matching_latitude and matching_longitude
|
277
|
+
#we know there is a latitude/longitude columns
|
278
|
+
@db_connection.run("SELECT AddGeometryColumn('#{@suggested_name}','the_geom',4326, 'POINT', 2);")
|
279
|
+
@db_connection.run("UPDATE \"#{@suggested_name}\" SET the_geom = ST_GeomFromText('POINT('|| \"#{matching_longitude}\" ||' '|| \"#{matching_latitude}\" ||')',4326)
|
280
|
+
WHERE \"#{matching_longitude}\" IS NOT NULL AND \"#{matching_latitude}\" IS NOT NULL AND \"#{matching_longitude}\"<>'' AND \"#{matching_latitude}\"<>''")
|
281
|
+
@db_connection.run("CREATE INDEX \"#{@suggested_name}_the_geom_gist\" ON \"#{@suggested_name}\" USING GIST (the_geom)")
|
282
|
+
end
|
283
|
+
end
|
241
284
|
|
242
285
|
@table_created = true
|
243
286
|
|
244
|
-
FileUtils.rm_rf(path)
|
287
|
+
FileUtils.rm_rf(Dir.glob(path))
|
245
288
|
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
246
289
|
|
247
290
|
return OpenStruct.new({
|
data/spec/import_spec.rb
CHANGED
@@ -254,7 +254,7 @@ describe CartoDB::Importer do
|
|
254
254
|
columns = db_connection.schema(:vizzuality).map{|s| s[0].to_s}
|
255
255
|
|
256
256
|
expected_columns = ["gid", "subclass", "x", "y", "length", "area", "angle", "name",
|
257
|
-
"pid", "lot_navteq", "version_na", "vitesse_sp", "id", "nombrerest", "tipocomida"
|
257
|
+
"pid", "lot_navteq", "version_na", "vitesse_sp", "id", "nombrerest", "tipocomida"]
|
258
258
|
(columns & expected_columns).sort.should == expected_columns.sort
|
259
259
|
end
|
260
260
|
|
@@ -346,7 +346,7 @@ describe CartoDB::Importer do
|
|
346
346
|
end
|
347
347
|
|
348
348
|
describe "Import CSV with latidude/logitude" do
|
349
|
-
it "should import
|
349
|
+
it "should import walmart.csv" do
|
350
350
|
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/walmart.csv", __FILE__),
|
351
351
|
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
352
352
|
:host => 'localhost', :port => 5432, :suggested_name => 'walmart'
|
@@ -358,7 +358,7 @@ describe CartoDB::Importer do
|
|
358
358
|
end
|
359
359
|
|
360
360
|
describe "Import CSV with lat/lon" do
|
361
|
-
it "should import
|
361
|
+
it "should import walmart.csv" do
|
362
362
|
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/walmart_latlon.csv", __FILE__),
|
363
363
|
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
364
364
|
:host => 'localhost', :port => 5432, :suggested_name => 'walmart_latlon'
|
@@ -368,6 +368,52 @@ describe CartoDB::Importer do
|
|
368
368
|
result.import_type.should == '.csv'
|
369
369
|
end
|
370
370
|
end
|
371
|
+
|
372
|
+
describe "Import CartoDB CSV export with lat/lon" do
|
373
|
+
it "should import CartoDB_csv_export.zip" do
|
374
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/CartoDB_csv_export.zip", __FILE__),
|
375
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
376
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'cartodb_csv_export'
|
377
|
+
result = importer.import!
|
378
|
+
result.name.should == 'cartodb_csv_export'
|
379
|
+
result.rows_imported.should == 155
|
380
|
+
result.import_type.should == '.csv'
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
# TODO: check that the_geom is now a real geometry built from geojson.
|
385
|
+
describe "Import CartoDB CSV export with the_geom in geojson" do
|
386
|
+
it "should import CartoDB_csv_multipoly_export.zip" do
|
387
|
+
opt = {:import_from_file => File.expand_path("../support/data/CartoDB_csv_multipoly_export.zip", __FILE__),
|
388
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
389
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'cartodb_csv_multipoly_export'}
|
390
|
+
importer = CartoDB::Importer.new opt
|
391
|
+
result = importer.import!
|
392
|
+
result.name.should == 'cartodb_csv_multipoly_export'
|
393
|
+
result.rows_imported.should == 601
|
394
|
+
result.import_type.should == '.csv'
|
395
|
+
|
396
|
+
# test geometry returned is legit
|
397
|
+
pg = "postgres://#{opt[:username]}:#{opt[:password]}@#{opt[:host]}:#{opt[:port]}/#{opt[:database]}"
|
398
|
+
sql = "select ST_AsGeoJSON(the_geom,0) as geom from cartodb_csv_multipoly_export limit 1"
|
399
|
+
db_connection = Sequel.connect(pg)
|
400
|
+
res = db_connection[sql].first[:geom]
|
401
|
+
res.should == '{"type":"MultiPolygon","coordinates":[[[[2,39],[2,39],[2,39],[2,39],[2,39]]]]}'
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
describe "Import CartoDB SHP export with lat/lon" do
|
406
|
+
it "should import CartoDB_shp_export.zip" do
|
407
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/CartoDB_shp_export.zip", __FILE__),
|
408
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
409
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'cartodb_shp_export'
|
410
|
+
result = importer.import!
|
411
|
+
result.name.should == 'cartodb_shp_export'
|
412
|
+
result.rows_imported.should == 155
|
413
|
+
result.import_type.should == '.shp'
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
371
417
|
|
372
418
|
|
373
419
|
end
|
metadata
CHANGED
@@ -1,21 +1,22 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cartodb-importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.15
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Fernando Blat
|
9
9
|
- Andrew Hill
|
10
10
|
- Javier de la Torre
|
11
|
+
- Simon Tokumine
|
11
12
|
autorequire:
|
12
13
|
bindir: bin
|
13
14
|
cert_chain: []
|
14
|
-
date: 2011-10-
|
15
|
+
date: 2011-10-04 00:00:00.000000000Z
|
15
16
|
dependencies:
|
16
17
|
- !ruby/object:Gem::Dependency
|
17
18
|
name: pg
|
18
|
-
requirement: &
|
19
|
+
requirement: &70233688939180 !ruby/object:Gem::Requirement
|
19
20
|
none: false
|
20
21
|
requirements:
|
21
22
|
- - ~>
|
@@ -23,106 +24,128 @@ dependencies:
|
|
23
24
|
version: '0.11'
|
24
25
|
type: :runtime
|
25
26
|
prerelease: false
|
26
|
-
version_requirements: *
|
27
|
+
version_requirements: *70233688939180
|
27
28
|
- !ruby/object:Gem::Dependency
|
28
29
|
name: sequel
|
29
|
-
requirement: &
|
30
|
+
requirement: &70233688938600 !ruby/object:Gem::Requirement
|
30
31
|
none: false
|
31
32
|
requirements:
|
32
|
-
- -
|
33
|
+
- - ~>
|
33
34
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
35
|
+
version: 3.28.0
|
35
36
|
type: :runtime
|
36
37
|
prerelease: false
|
37
|
-
version_requirements: *
|
38
|
+
version_requirements: *70233688938600
|
38
39
|
- !ruby/object:Gem::Dependency
|
39
40
|
name: roo
|
40
|
-
requirement: &
|
41
|
+
requirement: &70233688938100 !ruby/object:Gem::Requirement
|
41
42
|
none: false
|
42
43
|
requirements:
|
43
|
-
- -
|
44
|
+
- - ~>
|
44
45
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
46
|
+
version: 1.9.7
|
46
47
|
type: :runtime
|
47
48
|
prerelease: false
|
48
|
-
version_requirements: *
|
49
|
+
version_requirements: *70233688938100
|
49
50
|
- !ruby/object:Gem::Dependency
|
50
51
|
name: spreadsheet
|
51
|
-
requirement: &
|
52
|
+
requirement: &70233688937600 !ruby/object:Gem::Requirement
|
52
53
|
none: false
|
53
54
|
requirements:
|
54
|
-
- -
|
55
|
+
- - ~>
|
55
56
|
- !ruby/object:Gem::Version
|
56
|
-
version:
|
57
|
+
version: 0.6.5.9
|
57
58
|
type: :runtime
|
58
59
|
prerelease: false
|
59
|
-
version_requirements: *
|
60
|
+
version_requirements: *70233688937600
|
60
61
|
- !ruby/object:Gem::Dependency
|
61
62
|
name: google-spreadsheet-ruby
|
62
|
-
requirement: &
|
63
|
+
requirement: &70233688937120 !ruby/object:Gem::Requirement
|
63
64
|
none: false
|
64
65
|
requirements:
|
65
|
-
- -
|
66
|
+
- - ~>
|
66
67
|
- !ruby/object:Gem::Version
|
67
|
-
version:
|
68
|
+
version: 0.1.5
|
68
69
|
type: :runtime
|
69
70
|
prerelease: false
|
70
|
-
version_requirements: *
|
71
|
+
version_requirements: *70233688937120
|
71
72
|
- !ruby/object:Gem::Dependency
|
72
73
|
name: rubyzip
|
73
|
-
requirement: &
|
74
|
+
requirement: &70233688936400 !ruby/object:Gem::Requirement
|
74
75
|
none: false
|
75
76
|
requirements:
|
76
|
-
- -
|
77
|
+
- - ~>
|
77
78
|
- !ruby/object:Gem::Version
|
78
|
-
version:
|
79
|
+
version: 0.9.4
|
79
80
|
type: :runtime
|
80
81
|
prerelease: false
|
81
|
-
version_requirements: *
|
82
|
+
version_requirements: *70233688936400
|
82
83
|
- !ruby/object:Gem::Dependency
|
83
84
|
name: builder
|
84
|
-
requirement: &
|
85
|
+
requirement: &70233688935720 !ruby/object:Gem::Requirement
|
85
86
|
none: false
|
86
87
|
requirements:
|
87
|
-
- -
|
88
|
+
- - ~>
|
88
89
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
90
|
+
version: 3.0.0
|
90
91
|
type: :runtime
|
91
92
|
prerelease: false
|
92
|
-
version_requirements: *
|
93
|
+
version_requirements: *70233688935720
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: rgeo
|
96
|
+
requirement: &70233688935020 !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ~>
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 0.3.2
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: *70233688935020
|
105
|
+
- !ruby/object:Gem::Dependency
|
106
|
+
name: rgeo-geojson
|
107
|
+
requirement: &70233688934360 !ruby/object:Gem::Requirement
|
108
|
+
none: false
|
109
|
+
requirements:
|
110
|
+
- - ~>
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 0.2.1
|
113
|
+
type: :runtime
|
114
|
+
prerelease: false
|
115
|
+
version_requirements: *70233688934360
|
93
116
|
- !ruby/object:Gem::Dependency
|
94
117
|
name: rspec
|
95
|
-
requirement: &
|
118
|
+
requirement: &70233688933680 !ruby/object:Gem::Requirement
|
96
119
|
none: false
|
97
120
|
requirements:
|
98
|
-
- -
|
121
|
+
- - ~>
|
99
122
|
- !ruby/object:Gem::Version
|
100
|
-
version:
|
123
|
+
version: 2.6.0
|
101
124
|
type: :development
|
102
125
|
prerelease: false
|
103
|
-
version_requirements: *
|
126
|
+
version_requirements: *70233688933680
|
104
127
|
- !ruby/object:Gem::Dependency
|
105
128
|
name: mocha
|
106
|
-
requirement: &
|
129
|
+
requirement: &70233688932800 !ruby/object:Gem::Requirement
|
107
130
|
none: false
|
108
131
|
requirements:
|
109
|
-
- -
|
132
|
+
- - ~>
|
110
133
|
- !ruby/object:Gem::Version
|
111
|
-
version:
|
134
|
+
version: 0.10.0
|
112
135
|
type: :development
|
113
136
|
prerelease: false
|
114
|
-
version_requirements: *
|
137
|
+
version_requirements: *70233688932800
|
115
138
|
- !ruby/object:Gem::Dependency
|
116
139
|
name: ruby-debug19
|
117
|
-
requirement: &
|
140
|
+
requirement: &70233688931720 !ruby/object:Gem::Requirement
|
118
141
|
none: false
|
119
142
|
requirements:
|
120
|
-
- -
|
143
|
+
- - ~>
|
121
144
|
- !ruby/object:Gem::Version
|
122
|
-
version:
|
145
|
+
version: 0.11.6
|
123
146
|
type: :development
|
124
147
|
prerelease: false
|
125
|
-
version_requirements: *
|
148
|
+
version_requirements: *70233688931720
|
126
149
|
description: Import CSV, SHP, and other files with data into a PostgreSQL table
|
127
150
|
email:
|
128
151
|
- andrew@vizzuality.com
|
@@ -137,7 +160,6 @@ files:
|
|
137
160
|
- README.md
|
138
161
|
- Rakefile
|
139
162
|
- cartodb-importer.gemspec
|
140
|
-
- clubbing.shp
|
141
163
|
- lib/cartodb-exporter.rb
|
142
164
|
- lib/cartodb-exporter/exporter.rb
|
143
165
|
- lib/cartodb-exporter/version.rb
|
data/clubbing.shp
DELETED
File without changes
|