cartodb-importer 0.2.14 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ Gem::Specification.new do |s|
6
6
  s.name = "cartodb-importer"
7
7
  s.version = CartoDB::Importer::VERSION
8
8
  s.platform = Gem::Platform::RUBY
9
- s.authors = ["Fernando Blat", "Andrew Hill", "Javier de la Torre"]
9
+ s.authors = ["Fernando Blat", "Andrew Hill", "Javier de la Torre", "Simon Tokumine"]
10
10
  s.email = ["andrew@vizzuality.com"]
11
11
  s.homepage = ""
12
12
  s.summary = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
@@ -20,14 +20,16 @@ Gem::Specification.new do |s|
20
20
  s.require_paths = ["lib"]
21
21
 
22
22
  s.add_runtime_dependency "pg", "~> 0.11"
23
- s.add_runtime_dependency "sequel"
24
- s.add_runtime_dependency "roo"
25
- s.add_runtime_dependency "spreadsheet"
26
- s.add_runtime_dependency "google-spreadsheet-ruby"
27
- s.add_runtime_dependency "rubyzip"
28
- s.add_runtime_dependency "builder"
23
+ s.add_runtime_dependency "sequel", "~> 3.28.0"
24
+ s.add_runtime_dependency "roo", "~> 1.9.7"
25
+ s.add_runtime_dependency "spreadsheet", "~> 0.6.5.9"
26
+ s.add_runtime_dependency "google-spreadsheet-ruby", "~> 0.1.5"
27
+ s.add_runtime_dependency "rubyzip", "~> 0.9.4"
28
+ s.add_runtime_dependency "builder", "~> 3.0.0"
29
+ s.add_runtime_dependency "rgeo", "~> 0.3.2"
30
+ s.add_runtime_dependency "rgeo-geojson", "~> 0.2.1"
29
31
 
30
- s.add_development_dependency "rspec"
31
- s.add_development_dependency "mocha"
32
- s.add_development_dependency "ruby-debug19"
32
+ s.add_development_dependency "rspec", "~> 2.6.0"
33
+ s.add_development_dependency "mocha", "~> 0.10.0"
34
+ s.add_development_dependency "ruby-debug19", "~> 0.11.6"
33
35
  end
@@ -4,6 +4,8 @@ require 'rubygems'
4
4
  require 'bundler'
5
5
  Bundler.setup
6
6
 
7
+ require 'rgeo'
8
+ require 'rgeo/geo_json'
7
9
  require 'roo'
8
10
  require 'csv'
9
11
  require 'tempfile'
@@ -4,6 +4,8 @@ require 'rubygems'
4
4
  require 'bundler'
5
5
  Bundler.setup
6
6
 
7
+ require 'rgeo'
8
+ require 'rgeo/geo_json'
7
9
  require 'roo'
8
10
  require 'csv'
9
11
  require 'tempfile'
@@ -175,7 +175,7 @@ module CartoDB
175
175
 
176
176
  @table_created = true
177
177
 
178
- FileUtils.rm_rf(path)
178
+ FileUtils.rm_rf(Dir.glob(path))
179
179
  rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
180
180
 
181
181
  return OpenStruct.new({
@@ -209,39 +209,82 @@ module CartoDB
209
209
  end
210
210
  end
211
211
 
212
- #Now, if there is a ltitude and longitude column, lets create a the_geom for it
213
- latitude_possible_names = "'latitude','lat','latitudedecimal','latitud','lati'"
214
- longitude_possible_names = "'longitude','lon','lng','longitudedecimal','longitud','long'"
215
-
216
- matching_latitude = nil
217
- res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
218
- and lower(column_name) in (#{latitude_possible_names}) LIMIT 1"]
219
- if !res.first.nil?
220
- matching_latitude= res.first[:column_name]
221
- end
222
- matching_longitude = nil
223
- res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
224
- and lower(column_name) in (#{longitude_possible_names}) LIMIT 1"]
225
- if !res.first.nil?
226
- matching_longitude= res.first[:column_name]
227
- end
228
-
229
-
230
- if matching_latitude and matching_longitude
231
- #we know there is a latitude/longitude columns
232
- @db_connection.run("SELECT AddGeometryColumn('#{@suggested_name}','the_geom',4326, 'POINT', 2);")
233
- @db_connection.run("UPDATE \"#{@suggested_name}\" SET the_geom = ST_GeomFromText('POINT('|| \"#{matching_longitude}\" ||' '|| \"#{matching_latitude}\" ||')',4326)
234
- WHERE \"#{matching_longitude}\" IS NOT NULL AND \"#{matching_latitude}\" IS NOT NULL AND \"#{matching_longitude}\"<>'' AND \"#{matching_latitude}\"<>''")
235
- @db_connection.run("CREATE INDEX \"#{@suggested_name}_the_geom_gist\" ON \"#{@suggested_name}\" USING GIST (the_geom)")
212
+ # Importing CartoDB CSV exports
213
+ # ===============================
214
+ # * if there is a column already called the_geom
215
+ # * if there is geojson in it
216
+ # * rename column to the_geom_orig
217
+ # * create a new column with the correct type (Assume 4326) "the_geom_temp"
218
+ # * loop over table and parse geojson into postgis geometries
219
+ # * drop the_geom_orig
220
+ #
221
+ # TODO: move the geom over using ST_FromGeoJSON once inside PostGIS 2.0
222
+ if column_names.include? "the_geom"
223
+ if res = @db_connection["select the_geom from #{@suggested_name} limit 1"].first
224
+
225
+ # attempt to read as geojson. If it fails, continue
226
+ begin
227
+ geojson = RGeo::GeoJSON.decode(res[:the_geom], :json_parser => :json)
228
+ geometry_type = geojson.geometry_type.type_name.upcase
229
+
230
+ if geometry_type
231
+ # move original geometry column around
232
+ @db_connection.run("ALTER TABLE #{@suggested_name} RENAME COLUMN the_geom TO the_geom_orig;")
233
+ @db_connection.run("SELECT AddGeometryColumn('#{@suggested_name}','the_geom',4326, '#{geometry_type}', 2)")
234
+ @db_connection.run("CREATE INDEX #{@suggested_name}_the_geom_gist ON #{@suggested_name} USING GIST (the_geom)")
235
+
236
+ # loop through old geom parsing into the_geom.
237
+ # TODO: Should probably window this
238
+ @db_connection["select the_geom_orig from #{@suggested_name}"].each do |res|
239
+ begin
240
+ geojson = RGeo::GeoJSON.decode(res[:the_geom_orig], :json_parser => :json)
241
+ @db_connection.run("UPDATE #{@suggested_name} SET the_geom = ST_GeomFromText('#{geojson.as_text}', 4326) WHERE the_geom_orig = '#{res[:the_geom_orig]}'")
242
+ rescue => e
243
+ runlog.err << "silently fail conversion #{geojson.inspect} to #{@suggested_name}. #{e.inspect}"
244
+ end
245
+ end
246
+
247
+ # Drop original the_geom column
248
+ @db_connection.run("ALTER TABLE #{@suggested_name} DROP COLUMN the_geom_orig")
249
+ end
250
+ rescue => e
251
+ runlog.err << "failed to read geojson for #{@suggested_name}. #{e.inspect}"
252
+ end
253
+ end
236
254
  end
237
255
 
238
-
239
-
240
-
256
+ # if there is no the_geom, and there are latitude and longitude columns, create the_geom
257
+ unless column_names.include? "the_geom"
258
+
259
+ latitude_possible_names = "'latitude','lat','latitudedecimal','latitud','lati'"
260
+ longitude_possible_names = "'longitude','lon','lng','longitudedecimal','longitud','long'"
261
+
262
+ matching_latitude = nil
263
+ res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
264
+ and lower(column_name) in (#{latitude_possible_names}) LIMIT 1"]
265
+ if !res.first.nil?
266
+ matching_latitude= res.first[:column_name]
267
+ end
268
+ matching_longitude = nil
269
+ res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
270
+ and lower(column_name) in (#{longitude_possible_names}) LIMIT 1"]
271
+ if !res.first.nil?
272
+ matching_longitude= res.first[:column_name]
273
+ end
274
+
275
+
276
+ if matching_latitude and matching_longitude
277
+ #we know there is a latitude/longitude columns
278
+ @db_connection.run("SELECT AddGeometryColumn('#{@suggested_name}','the_geom',4326, 'POINT', 2);")
279
+ @db_connection.run("UPDATE \"#{@suggested_name}\" SET the_geom = ST_GeomFromText('POINT('|| \"#{matching_longitude}\" ||' '|| \"#{matching_latitude}\" ||')',4326)
280
+ WHERE \"#{matching_longitude}\" IS NOT NULL AND \"#{matching_latitude}\" IS NOT NULL AND \"#{matching_longitude}\"<>'' AND \"#{matching_latitude}\"<>''")
281
+ @db_connection.run("CREATE INDEX \"#{@suggested_name}_the_geom_gist\" ON \"#{@suggested_name}\" USING GIST (the_geom)")
282
+ end
283
+ end
241
284
 
242
285
  @table_created = true
243
286
 
244
- FileUtils.rm_rf(path)
287
+ FileUtils.rm_rf(Dir.glob(path))
245
288
  rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
246
289
 
247
290
  return OpenStruct.new({
@@ -1,6 +1,6 @@
1
1
  module CartoDB
2
2
  class Importer
3
- VERSION = "0.2.14"
3
+ VERSION = "0.2.15"
4
4
  end
5
5
  end
6
6
 
data/spec/import_spec.rb CHANGED
@@ -254,7 +254,7 @@ describe CartoDB::Importer do
254
254
  columns = db_connection.schema(:vizzuality).map{|s| s[0].to_s}
255
255
 
256
256
  expected_columns = ["gid", "subclass", "x", "y", "length", "area", "angle", "name",
257
- "pid", "lot_navteq", "version_na", "vitesse_sp", "id", "nombrerest", "tipocomida", "the_geom"]
257
+ "pid", "lot_navteq", "version_na", "vitesse_sp", "id", "nombrerest", "tipocomida"]
258
258
  (columns & expected_columns).sort.should == expected_columns.sort
259
259
  end
260
260
 
@@ -346,7 +346,7 @@ describe CartoDB::Importer do
346
346
  end
347
347
 
348
348
  describe "Import CSV with latidude/logitude" do
349
- it "should import estaciones2.csv" do
349
+ it "should import walmart.csv" do
350
350
  importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/walmart.csv", __FILE__),
351
351
  :database => "cartodb_importer_test", :username => 'postgres', :password => '',
352
352
  :host => 'localhost', :port => 5432, :suggested_name => 'walmart'
@@ -358,7 +358,7 @@ describe CartoDB::Importer do
358
358
  end
359
359
 
360
360
  describe "Import CSV with lat/lon" do
361
- it "should import estaciones2.csv" do
361
+ it "should import walmart.csv" do
362
362
  importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/walmart_latlon.csv", __FILE__),
363
363
  :database => "cartodb_importer_test", :username => 'postgres', :password => '',
364
364
  :host => 'localhost', :port => 5432, :suggested_name => 'walmart_latlon'
@@ -368,6 +368,52 @@ describe CartoDB::Importer do
368
368
  result.import_type.should == '.csv'
369
369
  end
370
370
  end
371
+
372
+ describe "Import CartoDB CSV export with lat/lon" do
373
+ it "should import CartoDB_csv_export.zip" do
374
+ importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/CartoDB_csv_export.zip", __FILE__),
375
+ :database => "cartodb_importer_test", :username => 'postgres', :password => '',
376
+ :host => 'localhost', :port => 5432, :suggested_name => 'cartodb_csv_export'
377
+ result = importer.import!
378
+ result.name.should == 'cartodb_csv_export'
379
+ result.rows_imported.should == 155
380
+ result.import_type.should == '.csv'
381
+ end
382
+ end
383
+
384
+ # TODO: check that the_geom is now a real geometry built from geojson.
385
+ describe "Import CartoDB CSV export with the_geom in geojson" do
386
+ it "should import CartoDB_csv_multipoly_export.zip" do
387
+ opt = {:import_from_file => File.expand_path("../support/data/CartoDB_csv_multipoly_export.zip", __FILE__),
388
+ :database => "cartodb_importer_test", :username => 'postgres', :password => '',
389
+ :host => 'localhost', :port => 5432, :suggested_name => 'cartodb_csv_multipoly_export'}
390
+ importer = CartoDB::Importer.new opt
391
+ result = importer.import!
392
+ result.name.should == 'cartodb_csv_multipoly_export'
393
+ result.rows_imported.should == 601
394
+ result.import_type.should == '.csv'
395
+
396
+ # test geometry returned is legit
397
+ pg = "postgres://#{opt[:username]}:#{opt[:password]}@#{opt[:host]}:#{opt[:port]}/#{opt[:database]}"
398
+ sql = "select ST_AsGeoJSON(the_geom,0) as geom from cartodb_csv_multipoly_export limit 1"
399
+ db_connection = Sequel.connect(pg)
400
+ res = db_connection[sql].first[:geom]
401
+ res.should == '{"type":"MultiPolygon","coordinates":[[[[2,39],[2,39],[2,39],[2,39],[2,39]]]]}'
402
+ end
403
+ end
404
+
405
+ describe "Import CartoDB SHP export with lat/lon" do
406
+ it "should import CartoDB_shp_export.zip" do
407
+ importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/CartoDB_shp_export.zip", __FILE__),
408
+ :database => "cartodb_importer_test", :username => 'postgres', :password => '',
409
+ :host => 'localhost', :port => 5432, :suggested_name => 'cartodb_shp_export'
410
+ result = importer.import!
411
+ result.name.should == 'cartodb_shp_export'
412
+ result.rows_imported.should == 155
413
+ result.import_type.should == '.shp'
414
+ end
415
+ end
416
+
371
417
 
372
418
 
373
419
  end
metadata CHANGED
@@ -1,21 +1,22 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cartodb-importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.14
4
+ version: 0.2.15
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Fernando Blat
9
9
  - Andrew Hill
10
10
  - Javier de la Torre
11
+ - Simon Tokumine
11
12
  autorequire:
12
13
  bindir: bin
13
14
  cert_chain: []
14
- date: 2011-10-03 00:00:00.000000000Z
15
+ date: 2011-10-04 00:00:00.000000000Z
15
16
  dependencies:
16
17
  - !ruby/object:Gem::Dependency
17
18
  name: pg
18
- requirement: &70348306305600 !ruby/object:Gem::Requirement
19
+ requirement: &70233688939180 !ruby/object:Gem::Requirement
19
20
  none: false
20
21
  requirements:
21
22
  - - ~>
@@ -23,106 +24,128 @@ dependencies:
23
24
  version: '0.11'
24
25
  type: :runtime
25
26
  prerelease: false
26
- version_requirements: *70348306305600
27
+ version_requirements: *70233688939180
27
28
  - !ruby/object:Gem::Dependency
28
29
  name: sequel
29
- requirement: &70348306288700 !ruby/object:Gem::Requirement
30
+ requirement: &70233688938600 !ruby/object:Gem::Requirement
30
31
  none: false
31
32
  requirements:
32
- - - ! '>='
33
+ - - ~>
33
34
  - !ruby/object:Gem::Version
34
- version: '0'
35
+ version: 3.28.0
35
36
  type: :runtime
36
37
  prerelease: false
37
- version_requirements: *70348306288700
38
+ version_requirements: *70233688938600
38
39
  - !ruby/object:Gem::Dependency
39
40
  name: roo
40
- requirement: &70348306287760 !ruby/object:Gem::Requirement
41
+ requirement: &70233688938100 !ruby/object:Gem::Requirement
41
42
  none: false
42
43
  requirements:
43
- - - ! '>='
44
+ - - ~>
44
45
  - !ruby/object:Gem::Version
45
- version: '0'
46
+ version: 1.9.7
46
47
  type: :runtime
47
48
  prerelease: false
48
- version_requirements: *70348306287760
49
+ version_requirements: *70233688938100
49
50
  - !ruby/object:Gem::Dependency
50
51
  name: spreadsheet
51
- requirement: &70348306287120 !ruby/object:Gem::Requirement
52
+ requirement: &70233688937600 !ruby/object:Gem::Requirement
52
53
  none: false
53
54
  requirements:
54
- - - ! '>='
55
+ - - ~>
55
56
  - !ruby/object:Gem::Version
56
- version: '0'
57
+ version: 0.6.5.9
57
58
  type: :runtime
58
59
  prerelease: false
59
- version_requirements: *70348306287120
60
+ version_requirements: *70233688937600
60
61
  - !ruby/object:Gem::Dependency
61
62
  name: google-spreadsheet-ruby
62
- requirement: &70348306286400 !ruby/object:Gem::Requirement
63
+ requirement: &70233688937120 !ruby/object:Gem::Requirement
63
64
  none: false
64
65
  requirements:
65
- - - ! '>='
66
+ - - ~>
66
67
  - !ruby/object:Gem::Version
67
- version: '0'
68
+ version: 0.1.5
68
69
  type: :runtime
69
70
  prerelease: false
70
- version_requirements: *70348306286400
71
+ version_requirements: *70233688937120
71
72
  - !ruby/object:Gem::Dependency
72
73
  name: rubyzip
73
- requirement: &70348306285500 !ruby/object:Gem::Requirement
74
+ requirement: &70233688936400 !ruby/object:Gem::Requirement
74
75
  none: false
75
76
  requirements:
76
- - - ! '>='
77
+ - - ~>
77
78
  - !ruby/object:Gem::Version
78
- version: '0'
79
+ version: 0.9.4
79
80
  type: :runtime
80
81
  prerelease: false
81
- version_requirements: *70348306285500
82
+ version_requirements: *70233688936400
82
83
  - !ruby/object:Gem::Dependency
83
84
  name: builder
84
- requirement: &70348306284860 !ruby/object:Gem::Requirement
85
+ requirement: &70233688935720 !ruby/object:Gem::Requirement
85
86
  none: false
86
87
  requirements:
87
- - - ! '>='
88
+ - - ~>
88
89
  - !ruby/object:Gem::Version
89
- version: '0'
90
+ version: 3.0.0
90
91
  type: :runtime
91
92
  prerelease: false
92
- version_requirements: *70348306284860
93
+ version_requirements: *70233688935720
94
+ - !ruby/object:Gem::Dependency
95
+ name: rgeo
96
+ requirement: &70233688935020 !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: 0.3.2
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: *70233688935020
105
+ - !ruby/object:Gem::Dependency
106
+ name: rgeo-geojson
107
+ requirement: &70233688934360 !ruby/object:Gem::Requirement
108
+ none: false
109
+ requirements:
110
+ - - ~>
111
+ - !ruby/object:Gem::Version
112
+ version: 0.2.1
113
+ type: :runtime
114
+ prerelease: false
115
+ version_requirements: *70233688934360
93
116
  - !ruby/object:Gem::Dependency
94
117
  name: rspec
95
- requirement: &70348306284240 !ruby/object:Gem::Requirement
118
+ requirement: &70233688933680 !ruby/object:Gem::Requirement
96
119
  none: false
97
120
  requirements:
98
- - - ! '>='
121
+ - - ~>
99
122
  - !ruby/object:Gem::Version
100
- version: '0'
123
+ version: 2.6.0
101
124
  type: :development
102
125
  prerelease: false
103
- version_requirements: *70348306284240
126
+ version_requirements: *70233688933680
104
127
  - !ruby/object:Gem::Dependency
105
128
  name: mocha
106
- requirement: &70348306283580 !ruby/object:Gem::Requirement
129
+ requirement: &70233688932800 !ruby/object:Gem::Requirement
107
130
  none: false
108
131
  requirements:
109
- - - ! '>='
132
+ - - ~>
110
133
  - !ruby/object:Gem::Version
111
- version: '0'
134
+ version: 0.10.0
112
135
  type: :development
113
136
  prerelease: false
114
- version_requirements: *70348306283580
137
+ version_requirements: *70233688932800
115
138
  - !ruby/object:Gem::Dependency
116
139
  name: ruby-debug19
117
- requirement: &70348306282880 !ruby/object:Gem::Requirement
140
+ requirement: &70233688931720 !ruby/object:Gem::Requirement
118
141
  none: false
119
142
  requirements:
120
- - - ! '>='
143
+ - - ~>
121
144
  - !ruby/object:Gem::Version
122
- version: '0'
145
+ version: 0.11.6
123
146
  type: :development
124
147
  prerelease: false
125
- version_requirements: *70348306282880
148
+ version_requirements: *70233688931720
126
149
  description: Import CSV, SHP, and other files with data into a PostgreSQL table
127
150
  email:
128
151
  - andrew@vizzuality.com
@@ -137,7 +160,6 @@ files:
137
160
  - README.md
138
161
  - Rakefile
139
162
  - cartodb-importer.gemspec
140
- - clubbing.shp
141
163
  - lib/cartodb-exporter.rb
142
164
  - lib/cartodb-exporter/exporter.rb
143
165
  - lib/cartodb-exporter/version.rb
data/clubbing.shp DELETED
File without changes