cartodb-importer 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/.rvmrc +2 -0
- data/Gemfile +4 -0
- data/README.md +59 -0
- data/Rakefile +2 -0
- data/cartodb-importer.gemspec +33 -0
- data/clubbing.shp +0 -0
- data/lib/cartodb-exporter/exporter.rb +197 -0
- data/lib/cartodb-exporter/version.rb +6 -0
- data/lib/cartodb-exporter.rb +14 -0
- data/lib/cartodb-importer/importer.rb +336 -0
- data/lib/cartodb-importer/version.rb +6 -0
- data/lib/cartodb-importer.rb +14 -0
- data/lib/core_ext/.DS_Store +0 -0
- data/lib/core_ext/blank.rb +3 -0
- data/lib/core_ext/hash.rb +10 -0
- data/lib/core_ext/string.rb +91 -0
- data/misc/csv_normalizer.py +27 -0
- data/misc/dbfUtils.py +113 -0
- data/misc/shp_normalizer.py +58 -0
- data/misc/srid_from_gdal.py +11 -0
- data/spec/export_spec.rb +60 -0
- data/spec/import_spec.rb +252 -0
- data/spec/spec_helper.rb +19 -0
- metadata +184 -0
data/.gitignore
ADDED
data/.rvmrc
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# CartoDB importer #
|
2
|
+
|
3
|
+
CartoDB importer is a Ruby gem that makes your life easier when importing data from a file into a PostGIS database. The accepted formats for input files are:
|
4
|
+
|
5
|
+
- CSV
|
6
|
+
- SHP (in a zip file)
|
7
|
+
- ODS
|
8
|
+
- XLX(S)
|
9
|
+
|
10
|
+
## Installation and dependencies ##
|
11
|
+
|
12
|
+
To install Ruby dependencies just install `bundler` gem and run the command `bundle install` in your shell.
|
13
|
+
|
14
|
+
There are also some dependencies of external Python libraries (WTF!). You should install `pip` before:
|
15
|
+
|
16
|
+
- In Debian / Ubuntu: `apt-get install python-pip`
|
17
|
+
|
18
|
+
- In MacosX: `easy_install pip`
|
19
|
+
|
20
|
+
And then run:
|
21
|
+
|
22
|
+
pip install GDAL chardet==1.0.1 argparse==1.2.1 brewery==0.6
|
23
|
+
|
24
|
+
## How to use it? ##
|
25
|
+
|
26
|
+
The way to use this gem is to initialize a object of class Cartodb::Importer using the appropiate parameters.
|
27
|
+
|
28
|
+
importer = Cartodb::Importer.new :import_from_file => "path to CSV file", :srid => 4326, :database => "...",
|
29
|
+
:username => "...", :password => "..."
|
30
|
+
result = importer.import!
|
31
|
+
|
32
|
+
If everything works fine, a new table will exist in the given database. A `result` object is return with some information about the import, such as the number of rows, or the name of the table.
|
33
|
+
|
34
|
+
puts result.rows_imported
|
35
|
+
# > 43243
|
36
|
+
|
37
|
+
If any error happens, an exception could be raised.
|
38
|
+
|
39
|
+
This is the list with all the available options to use in the constructor:
|
40
|
+
|
41
|
+
- import_from_file: a file descriptor, Tempfile or URL with the URL from which import the data
|
42
|
+
- srid: the value of the SRID
|
43
|
+
- database: the name of the database where import the data
|
44
|
+
- username: the owner of the database
|
45
|
+
- password: the password to connect to the database
|
46
|
+
- extra_columns: a SQL string with some extra columns that should be added to the imported table. If any of these columns already exists an error will be raised
|
47
|
+
|
48
|
+
## Running the specs ##
|
49
|
+
|
50
|
+
CartoDB Importer has a suite of specs which define its specification. To run this suite a database named cartodb_importer_test must exist. You can create this database by running:
|
51
|
+
|
52
|
+
CREATE DATABASE cartodb_importer_test
|
53
|
+
WITH TEMPLATE = template_postgis
|
54
|
+
OWNER = postgres
|
55
|
+
|
56
|
+
Then, to run the specs just run this command:
|
57
|
+
|
58
|
+
bundle exec rspec spec/import_spec.rb
|
59
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "cartodb-importer/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "cartodb-importer"
|
7
|
+
s.version = CartoDB::Importer::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Fernando Blat"]
|
10
|
+
s.email = ["ferblape@gmail.com"]
|
11
|
+
s.homepage = ""
|
12
|
+
s.summary = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
|
13
|
+
s.description = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
|
14
|
+
|
15
|
+
s.rubyforge_project = "cartodb-importer"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n").reject{|fn| fn =~ /spec\/support\/data/}
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n").reject{|fn| fn =~ /spec\/support\/data/}
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_runtime_dependency "pg", "0.11"
|
23
|
+
s.add_runtime_dependency "sequel"
|
24
|
+
s.add_runtime_dependency "roo"
|
25
|
+
s.add_runtime_dependency "spreadsheet"
|
26
|
+
s.add_runtime_dependency "google-spreadsheet-ruby"
|
27
|
+
s.add_runtime_dependency "rubyzip"
|
28
|
+
s.add_runtime_dependency "builder"
|
29
|
+
|
30
|
+
s.add_development_dependency "rspec"
|
31
|
+
s.add_development_dependency "mocha"
|
32
|
+
s.add_development_dependency "ruby-debug19"
|
33
|
+
end
|
data/clubbing.shp
ADDED
File without changes
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
module CartoDB
|
4
|
+
class Exporter
|
5
|
+
SUPPORTED_FORMATS = %W{ .csv .shp .kml }
|
6
|
+
OUTPUT_FILE_LOCATION = "/tmp"
|
7
|
+
class << self
|
8
|
+
attr_accessor :debug
|
9
|
+
end
|
10
|
+
@@debug = true
|
11
|
+
|
12
|
+
attr_accessor :export_to_file, :type, :suggested_name,
|
13
|
+
:ext, :db_configuration, :db_connection
|
14
|
+
|
15
|
+
attr_reader :table_created, :force_name
|
16
|
+
|
17
|
+
def initialize(options = {})
|
18
|
+
log "options: #{options}"
|
19
|
+
@@debug = options[:debug] if options[:debug]
|
20
|
+
@table_created = nil
|
21
|
+
@export_to_file = options[:export_to_file]
|
22
|
+
@type = options[:type]
|
23
|
+
raise "export_to_file value can't be nil" if @export_to_file.nil?
|
24
|
+
|
25
|
+
@db_configuration = options.slice(:database, :username, :password, :host, :port)
|
26
|
+
@db_configuration[:port] ||= 5432
|
27
|
+
@db_configuration[:host] ||= '127.0.0.1'
|
28
|
+
@db_connection = Sequel.connect("postgres://#{@db_configuration[:username]}:#{@db_configuration[:password]}@#{@db_configuration[:host]}:#{@db_configuration[:port]}/#{@db_configuration[:database]}")
|
29
|
+
|
30
|
+
unless options[:suggested_name].nil? || options[:suggested_name].blank?
|
31
|
+
@force_name = true
|
32
|
+
@suggested_name = get_valid_name(options[:suggested_name])
|
33
|
+
else
|
34
|
+
@force_name = false
|
35
|
+
end
|
36
|
+
|
37
|
+
rescue => e
|
38
|
+
log $!
|
39
|
+
log e.backtrace
|
40
|
+
raise e
|
41
|
+
end
|
42
|
+
|
43
|
+
def export!
|
44
|
+
path = "#{OUTPUT_FILE_LOCATION}/exporting_#{Time.now.to_i}_#{@export_to_file}"
|
45
|
+
|
46
|
+
python_bin_path = `which python`.strip
|
47
|
+
psql_bin_path = `which psql`.strip
|
48
|
+
|
49
|
+
entries = []
|
50
|
+
|
51
|
+
export_type = ".#{@type}"
|
52
|
+
|
53
|
+
if @type == 'csv'
|
54
|
+
|
55
|
+
ogr2ogr_bin_path = `which ogr2ogr`.strip
|
56
|
+
ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "CSV" #{path} PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
|
57
|
+
|
58
|
+
output = `#{ogr2ogr_command} &> /dev/null`
|
59
|
+
|
60
|
+
Zip::ZipOutputStream.open("#{path}.zip") do |zia|
|
61
|
+
zia.put_next_entry("#{@export_to_file}.#{type}")
|
62
|
+
zia.print IO.read("#{path}/#{@export_to_file}.#{type}")
|
63
|
+
end
|
64
|
+
FileUtils.rm_rf(path)
|
65
|
+
|
66
|
+
log "path: #{path}"
|
67
|
+
return OpenStruct.new({
|
68
|
+
:name => @export_to_file,
|
69
|
+
:import_type => export_type,
|
70
|
+
:path => "#{path}.#{type}"
|
71
|
+
})
|
72
|
+
|
73
|
+
end
|
74
|
+
if @type == 'kml'
|
75
|
+
|
76
|
+
ogr2ogr_bin_path = `which ogr2ogr`.strip
|
77
|
+
ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "KML" #{path}.kml PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
|
78
|
+
|
79
|
+
output = `#{ogr2ogr_command} &> /dev/null`
|
80
|
+
|
81
|
+
Zip::ZipOutputStream.open("#{path}.kmz") do |zia|
|
82
|
+
zia.put_next_entry("doc.kml")
|
83
|
+
zia.print IO.read("#{path}.kml")
|
84
|
+
end
|
85
|
+
FileUtils.rm_rf("#{path}.kml")
|
86
|
+
|
87
|
+
log "path: #{path}"
|
88
|
+
return OpenStruct.new({
|
89
|
+
:name => @export_to_file,
|
90
|
+
:import_type => export_type,
|
91
|
+
:path => "#{path}.#{type}"
|
92
|
+
})
|
93
|
+
|
94
|
+
end
|
95
|
+
if @type == 'shp'
|
96
|
+
|
97
|
+
ogr2ogr_bin_path = `which ogr2ogr`.strip
|
98
|
+
ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "ESRI Shapefile" #{path}.shp PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
|
99
|
+
|
100
|
+
output = `#{ogr2ogr_command} &> /dev/null`
|
101
|
+
|
102
|
+
Zip::ZipOutputStream.open("#{path}.zip") do |zia|
|
103
|
+
|
104
|
+
begin
|
105
|
+
zia.put_next_entry("#{export_to_file}.shp")
|
106
|
+
zia.print IO.read("#{path}.shp")
|
107
|
+
FileUtils.rm_rf("#{path}.shp")
|
108
|
+
rescue Exception=>e
|
109
|
+
# handle e
|
110
|
+
log "info #{e}"
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
begin
|
115
|
+
zia.put_next_entry("#{export_to_file}.shx")
|
116
|
+
zia.print IO.read("#{path}.shx")
|
117
|
+
FileUtils.rm_rf("#{path}.shx")
|
118
|
+
rescue Exception=>e
|
119
|
+
# handle e
|
120
|
+
log "info #{e}"
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
begin
|
125
|
+
zia.put_next_entry("#{export_to_file}.dbf")
|
126
|
+
zia.print IO.read("#{path}.dbf")
|
127
|
+
FileUtils.rm_rf("#{path}.dbf")
|
128
|
+
rescue Exception=>e
|
129
|
+
# handle e
|
130
|
+
log "info #{e}"
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
begin
|
135
|
+
zia.put_next_entry("#{export_to_file}.prj")
|
136
|
+
zia.print IO.read("#{path}.prj")
|
137
|
+
FileUtils.rm_rf("#{path}.prj")
|
138
|
+
rescue Exception=>e
|
139
|
+
# handle e
|
140
|
+
log "info #{e}"
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
begin
|
145
|
+
zia.put_next_entry("#{export_to_file}.sbn")
|
146
|
+
zia.print IO.read("#{path}.sbn")
|
147
|
+
FileUtils.rm_rf("#{path}.sbn")
|
148
|
+
rescue Exception=>e
|
149
|
+
# handle e
|
150
|
+
log "info #{e}"
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
return OpenStruct.new({
|
156
|
+
:name => @export_to_file,
|
157
|
+
:import_type => export_type,
|
158
|
+
:path => "#{path}.#{type}"
|
159
|
+
})
|
160
|
+
|
161
|
+
end
|
162
|
+
rescue => e
|
163
|
+
log "====================="
|
164
|
+
log $!
|
165
|
+
log e.backtrace
|
166
|
+
log "====================="
|
167
|
+
if !@table_created.nil?
|
168
|
+
@db_connection.drop_table(@suggested_name)
|
169
|
+
end
|
170
|
+
raise e
|
171
|
+
ensure
|
172
|
+
@db_connection.disconnect
|
173
|
+
end
|
174
|
+
|
175
|
+
private
|
176
|
+
|
177
|
+
def get_valid_name(name)
|
178
|
+
candidates = @db_connection.tables.map{ |t| t.to_s }.select{ |t| t.match(/^#{name}/) }
|
179
|
+
if candidates.any?
|
180
|
+
max_candidate = candidates.max
|
181
|
+
if max_candidate =~ /(.+)_(\d+)$/
|
182
|
+
return $1 + "_#{$2.to_i + 1}"
|
183
|
+
else
|
184
|
+
return max_candidate + "_2"
|
185
|
+
end
|
186
|
+
else
|
187
|
+
return name
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def log(str)
|
192
|
+
if @@debug
|
193
|
+
puts str
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
Bundler.setup
|
6
|
+
|
7
|
+
require 'roo'
|
8
|
+
require 'csv'
|
9
|
+
require 'tempfile'
|
10
|
+
require 'ostruct'
|
11
|
+
require 'cartodb-exporter/exporter'
|
12
|
+
require 'core_ext/string'
|
13
|
+
require 'core_ext/hash'
|
14
|
+
require 'core_ext/blank'
|
@@ -0,0 +1,336 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
module CartoDB
|
4
|
+
class Importer
|
5
|
+
RESERVED_COLUMN_NAMES = %W{ oid tableoid xmin cmin xmax cmax ctid }
|
6
|
+
SUPPORTED_FORMATS = %W{ .csv .shp .ods .xls .xlsx .tif .tiff }
|
7
|
+
|
8
|
+
class << self
|
9
|
+
attr_accessor :debug
|
10
|
+
end
|
11
|
+
@@debug = true
|
12
|
+
|
13
|
+
attr_accessor :import_from_file, :suggested_name,
|
14
|
+
:ext, :db_configuration, :db_connection
|
15
|
+
|
16
|
+
attr_reader :table_created, :force_name
|
17
|
+
|
18
|
+
def initialize(options = {})
|
19
|
+
@@debug = options[:debug] if options[:debug]
|
20
|
+
@table_created = nil
|
21
|
+
@import_from_file = options[:import_from_file]
|
22
|
+
raise "import_from_file value can't be nil" if @import_from_file.nil?
|
23
|
+
|
24
|
+
@db_configuration = options.slice(:database, :username, :password, :host, :port)
|
25
|
+
@db_configuration[:port] ||= 5432
|
26
|
+
@db_configuration[:host] ||= '127.0.0.1'
|
27
|
+
@db_connection = Sequel.connect("postgres://#{@db_configuration[:username]}:#{@db_configuration[:password]}@#{@db_configuration[:host]}:#{@db_configuration[:port]}/#{@db_configuration[:database]}")
|
28
|
+
|
29
|
+
unless options[:suggested_name].nil? || options[:suggested_name].blank?
|
30
|
+
@force_name = true
|
31
|
+
@suggested_name = get_valid_name(options[:suggested_name])
|
32
|
+
else
|
33
|
+
@force_name = false
|
34
|
+
end
|
35
|
+
|
36
|
+
if @import_from_file.is_a?(String)
|
37
|
+
if @import_from_file =~ /^http/
|
38
|
+
@import_from_file = URI.escape(@import_from_file)
|
39
|
+
end
|
40
|
+
open(@import_from_file) do |res|
|
41
|
+
file_name = File.basename(import_from_file)
|
42
|
+
@ext = File.extname(file_name)
|
43
|
+
@suggested_name ||= get_valid_name(File.basename(import_from_file, @ext).downcase.sanitize)
|
44
|
+
@import_from_file = Tempfile.new([@suggested_name, @ext])
|
45
|
+
@import_from_file.write res.read.force_encoding('utf-8')
|
46
|
+
@import_from_file.close
|
47
|
+
end
|
48
|
+
else
|
49
|
+
original_filename = if @import_from_file.respond_to?(:original_filename)
|
50
|
+
@import_from_file.original_filename
|
51
|
+
else
|
52
|
+
@import_from_file.path
|
53
|
+
end
|
54
|
+
@ext = File.extname(original_filename)
|
55
|
+
@suggested_name ||= get_valid_name(File.basename(original_filename,@ext).tr('.','_').downcase.sanitize)
|
56
|
+
@ext ||= File.extname(original_filename)
|
57
|
+
end
|
58
|
+
rescue => e
|
59
|
+
log $!
|
60
|
+
log e.backtrace
|
61
|
+
raise e
|
62
|
+
end
|
63
|
+
|
64
|
+
def import!
|
65
|
+
path = if @import_from_file.respond_to?(:tempfile)
|
66
|
+
@import_from_file.tempfile.path
|
67
|
+
else
|
68
|
+
@import_from_file.path
|
69
|
+
end
|
70
|
+
python_bin_path = `which python`.strip
|
71
|
+
psql_bin_path = `which psql`.strip
|
72
|
+
|
73
|
+
entries = []
|
74
|
+
if @ext == '.zip'
|
75
|
+
log "Importing zip file: #{path}"
|
76
|
+
Zip::ZipFile.foreach(path) do |entry|
|
77
|
+
name = entry.name.split('/').last
|
78
|
+
next if name =~ /^(\.|\_{2})/
|
79
|
+
entries << "/tmp/#{name}"
|
80
|
+
if SUPPORTED_FORMATS.include?(File.extname(name))
|
81
|
+
@ext = File.extname(name)
|
82
|
+
@suggested_name = get_valid_name(File.basename(name,@ext).tr('.','_').downcase.sanitize) unless @force_name
|
83
|
+
path = "/tmp/#{name}"
|
84
|
+
log "Found original @ext file named #{name} in path #{path}"
|
85
|
+
end
|
86
|
+
if File.file?("/tmp/#{name}")
|
87
|
+
FileUtils.rm("/tmp/#{name}")
|
88
|
+
end
|
89
|
+
entry.extract("/tmp/#{name}")
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
import_type = @ext
|
94
|
+
# These types of files are converted to CSV
|
95
|
+
if %W{ .xls .xlsx .ods }.include?(@ext)
|
96
|
+
new_path = "/tmp/#{@suggested_name}.csv"
|
97
|
+
case @ext
|
98
|
+
when '.xls'
|
99
|
+
Excel.new(path)
|
100
|
+
when '.xlsx'
|
101
|
+
Excelx.new(path)
|
102
|
+
when '.ods'
|
103
|
+
Openoffice.new(path)
|
104
|
+
else
|
105
|
+
raise ArgumentError, "Don't know how to open file #{new_path}"
|
106
|
+
end.to_csv(new_path)
|
107
|
+
@import_from_file = File.open(new_path,'r')
|
108
|
+
@ext = '.csv'
|
109
|
+
path = @import_from_file.path
|
110
|
+
end
|
111
|
+
|
112
|
+
if @ext == '.csv'
|
113
|
+
ogr2ogr_bin_path = `which ogr2ogr`.strip
|
114
|
+
ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "PostgreSQL" PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{path} -nln #{@suggested_name}}
|
115
|
+
|
116
|
+
output = `#{ogr2ogr_command} &> /dev/null`
|
117
|
+
|
118
|
+
# Check if the file had data, if not rise an error because probably something went wrong
|
119
|
+
if @db_connection["SELECT * from #{@suggested_name} LIMIT 1"].first.nil?
|
120
|
+
raise "Empty table"
|
121
|
+
end
|
122
|
+
|
123
|
+
# Sanitize column names where needed
|
124
|
+
column_names = @db_connection.schema(@suggested_name).map{ |s| s[0].to_s }
|
125
|
+
need_sanitizing = column_names.each do |column_name|
|
126
|
+
if column_name != column_name.sanitize_column_name
|
127
|
+
@db_connection.run("ALTER TABLE #{@suggested_name} RENAME COLUMN \"#{column_name}\" TO #{column_name.sanitize_column_name}")
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
@table_created = true
|
132
|
+
|
133
|
+
FileUtils.rm_rf(path)
|
134
|
+
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
135
|
+
|
136
|
+
return OpenStruct.new({
|
137
|
+
:name => @suggested_name,
|
138
|
+
:rows_imported => rows_imported,
|
139
|
+
:import_type => import_type
|
140
|
+
})
|
141
|
+
|
142
|
+
end
|
143
|
+
if @ext == '.shp'
|
144
|
+
|
145
|
+
shp2pgsql_bin_path = `which shp2pgsql`.strip
|
146
|
+
|
147
|
+
host = @db_configuration[:host] ? "-h #{@db_configuration[:host]}" : ""
|
148
|
+
port = @db_configuration[:port] ? "-p #{@db_configuration[:port]}" : ""
|
149
|
+
@suggested_name = get_valid_name(File.basename(path).tr('.','_').downcase.sanitize) unless @force_name
|
150
|
+
random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
|
151
|
+
|
152
|
+
normalizer_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/shp_normalizer.py", __FILE__)} #{path} #{random_table_name}"
|
153
|
+
shp_args_command = `#{normalizer_command}`
|
154
|
+
if shp_args_command.strip.blank?
|
155
|
+
raise "Error running python shp_normalizer script: #{normalizer_command}"
|
156
|
+
end
|
157
|
+
full_shp_command = "#{shp2pgsql_bin_path} #{shp_args_command.strip} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
|
158
|
+
log "Running shp2pgsql: #{full_shp_command}"
|
159
|
+
%x[#{full_shp_command}]
|
160
|
+
|
161
|
+
@db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
|
162
|
+
@db_connection.run("DROP TABLE #{random_table_name}")
|
163
|
+
@table_created = true
|
164
|
+
|
165
|
+
entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
|
166
|
+
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
167
|
+
@import_from_file.unlink
|
168
|
+
|
169
|
+
return OpenStruct.new({
|
170
|
+
:name => @suggested_name,
|
171
|
+
:rows_imported => rows_imported,
|
172
|
+
:import_type => import_type
|
173
|
+
})
|
174
|
+
end
|
175
|
+
if %W{ .tif .tiff }.include?(@ext)
|
176
|
+
log "Importing raster file: #{path}"
|
177
|
+
raster2pgsql_bin_path = `which raster2pgsql.py`.strip
|
178
|
+
|
179
|
+
host = @db_configuration[:host] ? "-h #{@db_configuration[:host]}" : ""
|
180
|
+
port = @db_configuration[:port] ? "-p #{@db_configuration[:port]}" : ""
|
181
|
+
@suggested_name = get_valid_name(File.basename(path).tr('.','_').downcase.sanitize) unless @force_name
|
182
|
+
random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
|
183
|
+
|
184
|
+
gdal_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/srid_from_gdal.py", __FILE__)} #{path}"
|
185
|
+
rast_srid_command = `#{gdal_command}`.strip
|
186
|
+
|
187
|
+
log "SRID : #{rast_srid_command}"
|
188
|
+
|
189
|
+
blocksize = "180x180"
|
190
|
+
full_rast_command = "#{raster2pgsql_bin_path} -I -s #{rast_srid_command.strip} -k #{blocksize} -t #{random_table_name} -r #{path} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
|
191
|
+
log "Running raster2pgsql: #{raster2pgsql_bin_path} #{full_rast_command}"
|
192
|
+
%x[#{full_rast_command}]
|
193
|
+
|
194
|
+
@db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
|
195
|
+
@db_connection.run("DROP TABLE #{random_table_name}")
|
196
|
+
|
197
|
+
entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
|
198
|
+
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
199
|
+
@import_from_file.unlink
|
200
|
+
|
201
|
+
@table_created = true
|
202
|
+
|
203
|
+
entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
|
204
|
+
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
205
|
+
@import_from_file.unlink
|
206
|
+
|
207
|
+
return OpenStruct.new({
|
208
|
+
:name => @suggested_name,
|
209
|
+
:rows_imported => rows_imported,
|
210
|
+
:import_type => import_type
|
211
|
+
})
|
212
|
+
|
213
|
+
end
|
214
|
+
rescue => e
|
215
|
+
log "====================="
|
216
|
+
log $!
|
217
|
+
log e.backtrace
|
218
|
+
log "====================="
|
219
|
+
if @table_created == nil
|
220
|
+
@db_connection.drop_table(@suggested_name)
|
221
|
+
end
|
222
|
+
raise e
|
223
|
+
ensure
|
224
|
+
@db_connection.disconnect
|
225
|
+
if @import_from_file.is_a?(File)
|
226
|
+
File.unlink(@import_from_file) if File.file?(@import_from_file.path)
|
227
|
+
elsif @import_from_file.is_a?(Tempfile)
|
228
|
+
@import_from_file.unlink
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
private
|
233
|
+
|
234
|
+
def guess_schema(path)
|
235
|
+
@col_separator = ','
|
236
|
+
options = {:col_sep => @col_separator}
|
237
|
+
schemas = []
|
238
|
+
uk_column_counter = 0
|
239
|
+
|
240
|
+
csv = CSV.open(path, options)
|
241
|
+
column_names = csv.gets
|
242
|
+
|
243
|
+
if column_names.size == 1
|
244
|
+
candidate_col_separators = {}
|
245
|
+
column_names.first.scan(/([^\w\s])/i).flatten.uniq.each do |candidate|
|
246
|
+
candidate_col_separators[candidate] = 0
|
247
|
+
end
|
248
|
+
candidate_col_separators.keys.each do |candidate|
|
249
|
+
csv = CSV.open(path, options.merge(:col_sep => candidate))
|
250
|
+
column_names = csv.gets
|
251
|
+
candidate_col_separators[candidate] = column_names.size
|
252
|
+
end
|
253
|
+
@col_separator = candidate_col_separators.sort{|a,b| a[1]<=>b[1]}.last.first
|
254
|
+
csv = CSV.open(path, options.merge(:col_sep => @col_separator))
|
255
|
+
column_names = csv.gets
|
256
|
+
end
|
257
|
+
|
258
|
+
column_names = column_names.map do |c|
|
259
|
+
if c.blank?
|
260
|
+
uk_column_counter += 1
|
261
|
+
"unknow_name_#{uk_column_counter}"
|
262
|
+
else
|
263
|
+
c = c.force_encoding('utf-8').encode
|
264
|
+
results = c.scan(/^(["`\'])[^"`\']+(["`\'])$/).flatten
|
265
|
+
if results.size == 2 && results[0] == results[1]
|
266
|
+
@quote = $1
|
267
|
+
end
|
268
|
+
c.sanitize_column_name
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
while (line = csv.gets)
|
273
|
+
line.each_with_index do |field, i|
|
274
|
+
next if line[i].blank?
|
275
|
+
unless @quote
|
276
|
+
results = line[i].scan(/^(["`\'])[^"`\']+(["`\'])$/).flatten
|
277
|
+
if results.size == 2 && results[0] == results[1]
|
278
|
+
@quote = $1
|
279
|
+
end
|
280
|
+
end
|
281
|
+
if schemas[i].nil?
|
282
|
+
if line[i] =~ /^\-?[0-9]+[\.|\,][0-9]+$/
|
283
|
+
schemas[i] = "float"
|
284
|
+
elsif line[i] =~ /^[0-9]+$/
|
285
|
+
schemas[i] = "integer"
|
286
|
+
else
|
287
|
+
schemas[i] = "varchar"
|
288
|
+
end
|
289
|
+
else
|
290
|
+
case schemas[i]
|
291
|
+
when "integer"
|
292
|
+
if line[i] !~ /^[0-9]+$/
|
293
|
+
if line[i] =~ /^\-?[0-9]+[\.|\,][0-9]+$/
|
294
|
+
schemas[i] = "float"
|
295
|
+
else
|
296
|
+
schemas[i] = "varchar"
|
297
|
+
end
|
298
|
+
elsif line[i].to_i > 2147483647
|
299
|
+
schemas[i] = "float"
|
300
|
+
end
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
result = []
|
307
|
+
column_names.each_with_index do |column_name, i|
|
308
|
+
if RESERVED_COLUMN_NAMES.include?(column_name.to_s)
|
309
|
+
column_name = "_#{column_name}"
|
310
|
+
end
|
311
|
+
result << "#{column_name} #{schemas[i] || "varchar"}"
|
312
|
+
end
|
313
|
+
return result
|
314
|
+
end
|
315
|
+
|
316
|
+
def get_valid_name(name)
|
317
|
+
candidates = @db_connection.tables.map{ |t| t.to_s }.select{ |t| t.match(/^#{name}/) }
|
318
|
+
if candidates.any?
|
319
|
+
max_candidate = candidates.max
|
320
|
+
if max_candidate =~ /(.+)_(\d+)$/
|
321
|
+
return $1 + "_#{$2.to_i + 1}"
|
322
|
+
else
|
323
|
+
return max_candidate + "_2"
|
324
|
+
end
|
325
|
+
else
|
326
|
+
return name
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
def log(str)
|
331
|
+
if @@debug
|
332
|
+
puts str
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|