cartodb-importer 0.2.8 → 0.2.9

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2011, Vizzuality
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+ 1. Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ 2. Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+ 3. All advertising materials mentioning features or use of this software
12
+ must display the following acknowledgement:
13
+ This product includes software developed by Vizzuality.
14
+ 4. Neither the name of Vizzuality nor the
15
+ names of its contributors may be used to endorse or promote products
16
+ derived from this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY
19
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
22
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md CHANGED
@@ -9,9 +9,9 @@ CartoDB importer is a Ruby gem that makes your life easier when importing data f
9
9
 
10
10
  ## Installation and dependencies ##
11
11
 
12
- To install Ruby dependencies just install `bundler` gem and run the command `bundle install` in your shell.
12
+ To install Ruby dependencies just install `bundler` the gem and run the command `bundle install` in your shell.
13
13
 
14
- There are also some dependencies of external Python libraries (WTF!). You should install `pip` before:
14
+ There are also some dependencies of external Python libraries. You should install `pip` before:
15
15
 
16
16
  - In Debian / Ubuntu: `apt-get install python-pip`
17
17
 
@@ -10,7 +10,7 @@ module CartoDB
10
10
  end
11
11
  @@debug = true
12
12
 
13
- attr_accessor :import_from_file, :suggested_name,
13
+ attr_accessor :import_from_file,:import_from_url, :suggested_name,
14
14
  :ext, :db_configuration, :db_connection
15
15
 
16
16
  attr_reader :table_created, :force_name
@@ -18,7 +18,18 @@ module CartoDB
18
18
  def initialize(options = {})
19
19
  @@debug = options[:debug] if options[:debug]
20
20
  @table_created = nil
21
- @import_from_file = options[:import_from_file]
21
+
22
+ if !options[:import_from_url].blank?
23
+ #download from internet first
24
+ potential_name = File.basename(options[:import_from_url])
25
+ curl_cmd = "curl -0 \"#{options[:import_from_url]}\" > /tmp/#{potential_name}"
26
+ #log curl_cmd
27
+ `#{curl_cmd}`
28
+ @import_from_file = "/tmp/#{potential_name}"
29
+ else
30
+ @import_from_file = options[:import_from_file]
31
+ end
32
+
22
33
  raise "import_from_file value can't be nil" if @import_from_file.nil?
23
34
 
24
35
  @db_configuration = options.slice(:database, :username, :password, :host, :port)
@@ -91,6 +102,10 @@ module CartoDB
91
102
  end
92
103
 
93
104
  import_type = @ext
105
+ runlog = OpenStruct.new
106
+ runlog.log = Array.new
107
+ runlog.stdout = Array.new
108
+ runlog.err = Array.new
94
109
 
95
110
  # These types of files are converted to CSV
96
111
  if %W{ .xls .xlsx .ods }.include?(@ext)
@@ -103,6 +118,7 @@ module CartoDB
103
118
  when '.ods'
104
119
  Openoffice.new(path)
105
120
  else
121
+ runlog.log << "Don't know how to open file #{new_path}"
106
122
  raise ArgumentError, "Don't know how to open file #{new_path}"
107
123
  end.to_csv(new_path)
108
124
  @import_from_file = File.open(new_path,'r')
@@ -114,11 +130,15 @@ module CartoDB
114
130
 
115
131
  ogr2ogr_bin_path = `which ogr2ogr`.strip
116
132
  ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "PostgreSQL" PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{path} -nln #{@suggested_name}}
117
-
118
- output = `#{ogr2ogr_command} &> /dev/null`
133
+
134
+ out = `#{ogr2ogr_command}`
135
+ if 0 < out.strip.length
136
+ runlog.stdout << out
137
+ end
119
138
 
120
139
  # Check if the file had data, if not rise an error because probably something went wrong
121
140
  if @db_connection["SELECT * from #{@suggested_name} LIMIT 1"].first.nil?
141
+ runlog.err << "Empty table"
122
142
  raise "Empty table"
123
143
  end
124
144
 
@@ -134,13 +154,13 @@ module CartoDB
134
154
 
135
155
  FileUtils.rm_rf(path)
136
156
  rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
137
-
157
+
138
158
  return OpenStruct.new({
139
159
  :name => @suggested_name,
140
160
  :rows_imported => rows_imported,
141
- :import_type => import_type
161
+ :import_type => import_type,
162
+ :log => runlog
142
163
  })
143
-
144
164
  end
145
165
  if @ext == '.shp'
146
166
 
@@ -152,71 +172,101 @@ module CartoDB
152
172
  random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
153
173
 
154
174
  normalizer_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/shp_normalizer.py", __FILE__)} #{path} #{random_table_name}"
155
- shp_args_command = `#{normalizer_command}`.split( /, */, 4 )
156
-
157
- #print "-e -i -I -g the_geom -W %s %s %s %s" %(srid,encoding,shp_file,name,flag)
158
-
175
+ out = `#{normalizer_command}`
176
+ shp_args_command = out.split( /, */, 4 )
177
+
159
178
  if shp_args_command.length != 4
160
- raise "Error running python shp_normalizer script: #{normalizer_command}"
179
+ runlog.log << "Error running python shp_normalizer script: #{normalizer_command}"
180
+ runlog.stdout << out
181
+ raise "Error running python shp_normalizer script: #{normalizer_command}"
161
182
  end
162
- full_shp_command = "#{shp2pgsql_bin_path} -s #{shp_args_command[0]} -e -i -I -g the_geom -W #{shp_args_command[1]} #{shp_args_command[2]} #{shp_args_command[3].strip} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
183
+
184
+ full_shp_command = "#{shp2pgsql_bin_path} -s #{shp_args_command[0]} -e -i -g the_geom -W #{shp_args_command[1]} #{shp_args_command[2]} #{shp_args_command[3].strip} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
163
185
  log "Running shp2pgsql: #{full_shp_command}"
164
- %x[#{full_shp_command}]
186
+
187
+ out = `#{full_shp_command}`
188
+ if 0 < out.strip.length
189
+ runlog.stdout << out
190
+ end
191
+
165
192
  if shp_args_command[1] != '4326'
166
-
167
- @db_connection.run("SELECT UpdateGeometrySRID('#{random_table_name}', 'the_geom', 4326)")
168
- @db_connection.run("UPDATE #{random_table_name} SET the_geom = ST_Transform(the_geom, 4326)")
193
+ begin
194
+ @db_connection.run("SELECT UpdateGeometrySRID('#{random_table_name}', 'the_geom', 4326)")
195
+ @db_connection.run("UPDATE \"#{random_table_name}\" SET the_geom = ST_Transform(the_geom, 4326)")
196
+ @db_connection.run("CREATE INDEX \"#{random_table_name}_the_geom_gist\" ON \"#{random_table_name}\" USING GIST (the_geom)")
197
+ rescue Exception => msg
198
+ runlog.err << msg
199
+ end
169
200
  end
170
- @db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
171
- @db_connection.run("DROP TABLE #{random_table_name}")
172
- @table_created = true
173
201
 
202
+ begin
203
+ @db_connection.run("ALTER TABLE \"#{random_table_name}\" RENAME TO \"#{@suggested_name}\"")
204
+ @table_created = true
205
+ rescue Exception => msg
206
+ runlog.err << msg
207
+ end
174
208
  entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
175
- rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
209
+ rows_imported = @db_connection["SELECT count(*) as count from \"#{@suggested_name}\""].first[:count]
176
210
  @import_from_file.unlink
177
211
 
178
212
  return OpenStruct.new({
179
- :name => @suggested_name,
180
- :rows_imported => rows_imported,
181
- :import_type => import_type
213
+ :name => @suggested_name,
214
+ :rows_imported => rows_imported,
215
+ :import_type => import_type,
216
+ :log => runlog
182
217
  })
183
218
  end
184
219
  if %W{ .tif .tiff }.include?(@ext)
185
220
  log "Importing raster file: #{path}"
221
+
186
222
  raster2pgsql_bin_path = `which raster2pgsql.py`.strip
187
223
 
188
224
  host = @db_configuration[:host] ? "-h #{@db_configuration[:host]}" : ""
189
225
  port = @db_configuration[:port] ? "-p #{@db_configuration[:port]}" : ""
190
- #@suggested_name = get_valid_name(File.basename(path).tr('.','_').downcase.sanitize) unless @force_name
226
+
191
227
  random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
192
228
 
193
229
  gdal_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/srid_from_gdal.py", __FILE__)} #{path}"
194
230
  rast_srid_command = `#{gdal_command}`.strip
195
231
 
232
+ if 0 < rast_srid_command.strip.length
233
+ runlog.stdout << rast_srid_command
234
+ end
235
+
236
+
196
237
  log "SRID : #{rast_srid_command}"
197
238
 
198
239
  blocksize = "180x180"
199
240
  full_rast_command = "#{raster2pgsql_bin_path} -I -s #{rast_srid_command.strip} -k #{blocksize} -t #{random_table_name} -r #{path} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
200
241
  log "Running raster2pgsql: #{raster2pgsql_bin_path} #{full_rast_command}"
201
- %x[#{full_rast_command}]
242
+ out = `#{full_rast_command}`
243
+ if 0 < out.strip.length
244
+ runlog.stdout << out
245
+ end
202
246
 
203
- @db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
204
- @db_connection.run("DROP TABLE #{random_table_name}")
247
+ begin
248
+ @db_connection.run("CREATE TABLE \"#{@suggested_name}\" AS SELECT * FROM \"#{random_table_name}\"")
249
+ @db_connection.run("DROP TABLE \"#{random_table_name}\"")
250
+ @table_created = true
251
+ rescue Exception => msg
252
+ runlog.err << msg
253
+ end
205
254
 
206
255
  entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
207
- rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
256
+ rows_imported = @db_connection["SELECT count(*) as count from \"#{@suggested_name}\""].first[:count]
208
257
  @import_from_file.unlink
209
258
 
210
259
  @table_created = true
211
260
 
212
261
  entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
213
- rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
262
+ rows_imported = @db_connection["SELECT count(*) as count from \"#{@suggested_name}\""].first[:count]
214
263
  @import_from_file.unlink
215
264
 
216
265
  return OpenStruct.new({
217
266
  :name => @suggested_name,
218
267
  :rows_imported => rows_imported,
219
- :import_type => import_type
268
+ :import_type => import_type,
269
+ :log => runlog
220
270
  })
221
271
 
222
272
  end
@@ -1,6 +1,6 @@
1
1
  module CartoDB
2
2
  class Importer
3
- VERSION = "0.2.8"
3
+ VERSION = "0.2.9"
4
4
  end
5
5
  end
6
6
 
data/spec/import_spec.rb CHANGED
@@ -64,7 +64,7 @@ describe CartoDB::Importer do
64
64
  :database => "cartodb_importer_test", :username => 'postgres', :password => '',
65
65
  :host => 'localhost', :port => 5432, :suggested_name => 'prefered_name'
66
66
  result = importer.import!
67
- result.name.should == 'prefered_name_2'
67
+ result.name.should == 'prefered_name_1'
68
68
  result.rows_imported.should == 1998
69
69
  result.import_type.should == '.csv'
70
70
  end
@@ -182,14 +182,6 @@ describe CartoDB::Importer do
182
182
  result.name.should == 'estaciones2'
183
183
  result.rows_imported.should == 30
184
184
  result.import_type.should == '.csv'
185
-
186
- importer2 = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/estaciones2.csv", __FILE__),
187
- :database => "cartodb_importer_test", :username => 'postgres', :password => '',
188
- :host => 'localhost', :port => 5432, :suggested_name => 'estaciones2'
189
- result2 = importer2.import!
190
- result2.name.should == 'estaciones2_0'
191
- result2.rows_imported.should == 30
192
- result2.import_type.should == '.csv'
193
185
  end
194
186
  end
195
187
 
@@ -266,14 +258,28 @@ describe CartoDB::Importer do
266
258
  result.name.should == 'tm_world_borders_simpl_0_3'
267
259
  #result.rows_imported.should == 4365
268
260
  result.import_type.should == '.shp'
269
-
270
- importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/TM_WORLD_BORDERS_SIMPL-0.3.zip", __FILE__),
261
+ end
262
+ end
263
+ describe "Natural Earth Polygons" do
264
+ it "should import Natural Earth Polygons" do
265
+ importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/110m-glaciated-areas.zip", __FILE__),
266
+ :database => "cartodb_importer_test", :username => 'postgres', :password => '',
267
+ :host => 'localhost', :port => 5432
268
+ result = importer.import!
269
+ #result.rows_imported.should == 4365
270
+ result.import_type.should == '.shp'
271
+ end
272
+ end
273
+
274
+ describe "Import from URL" do
275
+ it "should import a shapefile from NaturalEarthData.com" do
276
+ importer = CartoDB::Importer.new :import_from_url => "http://www.nacis.org/naturalearth/10m/cultural/10m_parks_and_protected_areas.zip",
271
277
  :database => "cartodb_importer_test", :username => 'postgres', :password => '',
272
278
  :host => 'localhost', :port => 5432
273
- result2 = importer.import!
274
- result2.name.should == 'tm_world_borders_simpl_0_3_0'
279
+ result = importer.import!
275
280
  #result.rows_imported.should == 4365
276
- result2.import_type.should == '.shp'
281
+ result.import_type.should == '.shp'
277
282
  end
278
283
  end
284
+
279
285
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cartodb-importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,11 +11,11 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2011-09-14 00:00:00.000000000Z
14
+ date: 2011-09-19 00:00:00.000000000Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: pg
18
- requirement: &70263347758260 !ruby/object:Gem::Requirement
18
+ requirement: &70265809904660 !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ~>
@@ -23,10 +23,10 @@ dependencies:
23
23
  version: '0.11'
24
24
  type: :runtime
25
25
  prerelease: false
26
- version_requirements: *70263347758260
26
+ version_requirements: *70265809904660
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: sequel
29
- requirement: &70263347757840 !ruby/object:Gem::Requirement
29
+ requirement: &70265809904080 !ruby/object:Gem::Requirement
30
30
  none: false
31
31
  requirements:
32
32
  - - ! '>='
@@ -34,10 +34,10 @@ dependencies:
34
34
  version: '0'
35
35
  type: :runtime
36
36
  prerelease: false
37
- version_requirements: *70263347757840
37
+ version_requirements: *70265809904080
38
38
  - !ruby/object:Gem::Dependency
39
39
  name: roo
40
- requirement: &70263347757380 !ruby/object:Gem::Requirement
40
+ requirement: &70265809903620 !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
43
  - - ! '>='
@@ -45,10 +45,10 @@ dependencies:
45
45
  version: '0'
46
46
  type: :runtime
47
47
  prerelease: false
48
- version_requirements: *70263347757380
48
+ version_requirements: *70265809903620
49
49
  - !ruby/object:Gem::Dependency
50
50
  name: spreadsheet
51
- requirement: &70263347756940 !ruby/object:Gem::Requirement
51
+ requirement: &70265809903200 !ruby/object:Gem::Requirement
52
52
  none: false
53
53
  requirements:
54
54
  - - ! '>='
@@ -56,10 +56,10 @@ dependencies:
56
56
  version: '0'
57
57
  type: :runtime
58
58
  prerelease: false
59
- version_requirements: *70263347756940
59
+ version_requirements: *70265809903200
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: google-spreadsheet-ruby
62
- requirement: &70263347756360 !ruby/object:Gem::Requirement
62
+ requirement: &70265809902760 !ruby/object:Gem::Requirement
63
63
  none: false
64
64
  requirements:
65
65
  - - ! '>='
@@ -67,10 +67,10 @@ dependencies:
67
67
  version: '0'
68
68
  type: :runtime
69
69
  prerelease: false
70
- version_requirements: *70263347756360
70
+ version_requirements: *70265809902760
71
71
  - !ruby/object:Gem::Dependency
72
72
  name: rubyzip
73
- requirement: &70263347755840 !ruby/object:Gem::Requirement
73
+ requirement: &70265809902340 !ruby/object:Gem::Requirement
74
74
  none: false
75
75
  requirements:
76
76
  - - ! '>='
@@ -78,10 +78,10 @@ dependencies:
78
78
  version: '0'
79
79
  type: :runtime
80
80
  prerelease: false
81
- version_requirements: *70263347755840
81
+ version_requirements: *70265809902340
82
82
  - !ruby/object:Gem::Dependency
83
83
  name: builder
84
- requirement: &70263347755180 !ruby/object:Gem::Requirement
84
+ requirement: &70265809901920 !ruby/object:Gem::Requirement
85
85
  none: false
86
86
  requirements:
87
87
  - - ! '>='
@@ -89,10 +89,10 @@ dependencies:
89
89
  version: '0'
90
90
  type: :runtime
91
91
  prerelease: false
92
- version_requirements: *70263347755180
92
+ version_requirements: *70265809901920
93
93
  - !ruby/object:Gem::Dependency
94
94
  name: rspec
95
- requirement: &70263347754620 !ruby/object:Gem::Requirement
95
+ requirement: &70265809901500 !ruby/object:Gem::Requirement
96
96
  none: false
97
97
  requirements:
98
98
  - - ! '>='
@@ -100,10 +100,10 @@ dependencies:
100
100
  version: '0'
101
101
  type: :development
102
102
  prerelease: false
103
- version_requirements: *70263347754620
103
+ version_requirements: *70265809901500
104
104
  - !ruby/object:Gem::Dependency
105
105
  name: mocha
106
- requirement: &70263347754140 !ruby/object:Gem::Requirement
106
+ requirement: &70265809901080 !ruby/object:Gem::Requirement
107
107
  none: false
108
108
  requirements:
109
109
  - - ! '>='
@@ -111,10 +111,10 @@ dependencies:
111
111
  version: '0'
112
112
  type: :development
113
113
  prerelease: false
114
- version_requirements: *70263347754140
114
+ version_requirements: *70265809901080
115
115
  - !ruby/object:Gem::Dependency
116
116
  name: ruby-debug19
117
- requirement: &70263347753660 !ruby/object:Gem::Requirement
117
+ requirement: &70265809900620 !ruby/object:Gem::Requirement
118
118
  none: false
119
119
  requirements:
120
120
  - - ! '>='
@@ -122,7 +122,7 @@ dependencies:
122
122
  version: '0'
123
123
  type: :development
124
124
  prerelease: false
125
- version_requirements: *70263347753660
125
+ version_requirements: *70265809900620
126
126
  description: Import CSV, SHP, and other files with data into a PostgreSQL table
127
127
  email:
128
128
  - andrew@vizzuality.com
@@ -133,6 +133,7 @@ files:
133
133
  - .gitignore
134
134
  - .rvmrc
135
135
  - Gemfile
136
+ - LICENSE
136
137
  - README.md
137
138
  - Rakefile
138
139
  - cartodb-importer.gemspec