cartodb-importer 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/.rvmrc +2 -0
- data/Gemfile +4 -0
- data/README.md +59 -0
- data/Rakefile +2 -0
- data/cartodb-importer.gemspec +33 -0
- data/clubbing.shp +0 -0
- data/lib/cartodb-exporter/exporter.rb +197 -0
- data/lib/cartodb-exporter/version.rb +6 -0
- data/lib/cartodb-exporter.rb +14 -0
- data/lib/cartodb-importer/importer.rb +336 -0
- data/lib/cartodb-importer/version.rb +6 -0
- data/lib/cartodb-importer.rb +14 -0
- data/lib/core_ext/.DS_Store +0 -0
- data/lib/core_ext/blank.rb +3 -0
- data/lib/core_ext/hash.rb +10 -0
- data/lib/core_ext/string.rb +91 -0
- data/misc/csv_normalizer.py +27 -0
- data/misc/dbfUtils.py +113 -0
- data/misc/shp_normalizer.py +58 -0
- data/misc/srid_from_gdal.py +11 -0
- data/spec/export_spec.rb +60 -0
- data/spec/import_spec.rb +252 -0
- data/spec/spec_helper.rb +19 -0
- metadata +184 -0
data/.gitignore
ADDED
data/.rvmrc
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# CartoDB importer #
|
2
|
+
|
3
|
+
CartoDB importer is a Ruby gem that makes your life easier when importing data from a file into a PostGIS database. The accepted formats for input files are:
|
4
|
+
|
5
|
+
- CSV
|
6
|
+
- SHP (in a zip file)
|
7
|
+
- ODS
|
8
|
+
- XLX(S)
|
9
|
+
|
10
|
+
## Installation and dependencies ##
|
11
|
+
|
12
|
+
To install Ruby dependencies just install `bundler` gem and run the command `bundle install` in your shell.
|
13
|
+
|
14
|
+
There are also some dependencies of external Python libraries (WTF!). You should install `pip` before:
|
15
|
+
|
16
|
+
- In Debian / Ubuntu: `apt-get install python-pip`
|
17
|
+
|
18
|
+
- In MacosX: `easy_install pip`
|
19
|
+
|
20
|
+
And then run:
|
21
|
+
|
22
|
+
pip install GDAL chardet==1.0.1 argparse==1.2.1 brewery==0.6
|
23
|
+
|
24
|
+
## How to use it? ##
|
25
|
+
|
26
|
+
The way to use this gem is to initialize a object of class Cartodb::Importer using the appropiate parameters.
|
27
|
+
|
28
|
+
importer = Cartodb::Importer.new :import_from_file => "path to CSV file", :srid => 4326, :database => "...",
|
29
|
+
:username => "...", :password => "..."
|
30
|
+
result = importer.import!
|
31
|
+
|
32
|
+
If everything works fine, a new table will exist in the given database. A `result` object is return with some information about the import, such as the number of rows, or the name of the table.
|
33
|
+
|
34
|
+
puts result.rows_imported
|
35
|
+
# > 43243
|
36
|
+
|
37
|
+
If any error happens, an exception could be raised.
|
38
|
+
|
39
|
+
This is the list with all the available options to use in the constructor:
|
40
|
+
|
41
|
+
- import_from_file: a file descriptor, Tempfile or URL with the URL from which import the data
|
42
|
+
- srid: the value of the SRID
|
43
|
+
- database: the name of the database where import the data
|
44
|
+
- username: the owner of the database
|
45
|
+
- password: the password to connect to the database
|
46
|
+
- extra_columns: a SQL string with some extra columns that should be added to the imported table. If any of these columns already exists an error will be raised
|
47
|
+
|
48
|
+
## Running the specs ##
|
49
|
+
|
50
|
+
CartoDB Importer has a suite of specs which define its specification. To run this suite a database named cartodb_importer_test must exist. You can create this database by running:
|
51
|
+
|
52
|
+
CREATE DATABASE cartodb_importer_test
|
53
|
+
WITH TEMPLATE = template_postgis
|
54
|
+
OWNER = postgres
|
55
|
+
|
56
|
+
Then, to run the specs just run this command:
|
57
|
+
|
58
|
+
bundle exec rspec spec/import_spec.rb
|
59
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "cartodb-importer/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "cartodb-importer"
|
7
|
+
s.version = CartoDB::Importer::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Fernando Blat"]
|
10
|
+
s.email = ["ferblape@gmail.com"]
|
11
|
+
s.homepage = ""
|
12
|
+
s.summary = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
|
13
|
+
s.description = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
|
14
|
+
|
15
|
+
s.rubyforge_project = "cartodb-importer"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n").reject{|fn| fn =~ /spec\/support\/data/}
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n").reject{|fn| fn =~ /spec\/support\/data/}
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_runtime_dependency "pg", "0.11"
|
23
|
+
s.add_runtime_dependency "sequel"
|
24
|
+
s.add_runtime_dependency "roo"
|
25
|
+
s.add_runtime_dependency "spreadsheet"
|
26
|
+
s.add_runtime_dependency "google-spreadsheet-ruby"
|
27
|
+
s.add_runtime_dependency "rubyzip"
|
28
|
+
s.add_runtime_dependency "builder"
|
29
|
+
|
30
|
+
s.add_development_dependency "rspec"
|
31
|
+
s.add_development_dependency "mocha"
|
32
|
+
s.add_development_dependency "ruby-debug19"
|
33
|
+
end
|
data/clubbing.shp
ADDED
File without changes
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
module CartoDB
|
4
|
+
class Exporter
|
5
|
+
SUPPORTED_FORMATS = %W{ .csv .shp .kml }
|
6
|
+
OUTPUT_FILE_LOCATION = "/tmp"
|
7
|
+
class << self
|
8
|
+
attr_accessor :debug
|
9
|
+
end
|
10
|
+
@@debug = true
|
11
|
+
|
12
|
+
attr_accessor :export_to_file, :type, :suggested_name,
|
13
|
+
:ext, :db_configuration, :db_connection
|
14
|
+
|
15
|
+
attr_reader :table_created, :force_name
|
16
|
+
|
17
|
+
def initialize(options = {})
|
18
|
+
log "options: #{options}"
|
19
|
+
@@debug = options[:debug] if options[:debug]
|
20
|
+
@table_created = nil
|
21
|
+
@export_to_file = options[:export_to_file]
|
22
|
+
@type = options[:type]
|
23
|
+
raise "export_to_file value can't be nil" if @export_to_file.nil?
|
24
|
+
|
25
|
+
@db_configuration = options.slice(:database, :username, :password, :host, :port)
|
26
|
+
@db_configuration[:port] ||= 5432
|
27
|
+
@db_configuration[:host] ||= '127.0.0.1'
|
28
|
+
@db_connection = Sequel.connect("postgres://#{@db_configuration[:username]}:#{@db_configuration[:password]}@#{@db_configuration[:host]}:#{@db_configuration[:port]}/#{@db_configuration[:database]}")
|
29
|
+
|
30
|
+
unless options[:suggested_name].nil? || options[:suggested_name].blank?
|
31
|
+
@force_name = true
|
32
|
+
@suggested_name = get_valid_name(options[:suggested_name])
|
33
|
+
else
|
34
|
+
@force_name = false
|
35
|
+
end
|
36
|
+
|
37
|
+
rescue => e
|
38
|
+
log $!
|
39
|
+
log e.backtrace
|
40
|
+
raise e
|
41
|
+
end
|
42
|
+
|
43
|
+
def export!
|
44
|
+
path = "#{OUTPUT_FILE_LOCATION}/exporting_#{Time.now.to_i}_#{@export_to_file}"
|
45
|
+
|
46
|
+
python_bin_path = `which python`.strip
|
47
|
+
psql_bin_path = `which psql`.strip
|
48
|
+
|
49
|
+
entries = []
|
50
|
+
|
51
|
+
export_type = ".#{@type}"
|
52
|
+
|
53
|
+
if @type == 'csv'
|
54
|
+
|
55
|
+
ogr2ogr_bin_path = `which ogr2ogr`.strip
|
56
|
+
ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "CSV" #{path} PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
|
57
|
+
|
58
|
+
output = `#{ogr2ogr_command} &> /dev/null`
|
59
|
+
|
60
|
+
Zip::ZipOutputStream.open("#{path}.zip") do |zia|
|
61
|
+
zia.put_next_entry("#{@export_to_file}.#{type}")
|
62
|
+
zia.print IO.read("#{path}/#{@export_to_file}.#{type}")
|
63
|
+
end
|
64
|
+
FileUtils.rm_rf(path)
|
65
|
+
|
66
|
+
log "path: #{path}"
|
67
|
+
return OpenStruct.new({
|
68
|
+
:name => @export_to_file,
|
69
|
+
:import_type => export_type,
|
70
|
+
:path => "#{path}.#{type}"
|
71
|
+
})
|
72
|
+
|
73
|
+
end
|
74
|
+
if @type == 'kml'
|
75
|
+
|
76
|
+
ogr2ogr_bin_path = `which ogr2ogr`.strip
|
77
|
+
ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "KML" #{path}.kml PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
|
78
|
+
|
79
|
+
output = `#{ogr2ogr_command} &> /dev/null`
|
80
|
+
|
81
|
+
Zip::ZipOutputStream.open("#{path}.kmz") do |zia|
|
82
|
+
zia.put_next_entry("doc.kml")
|
83
|
+
zia.print IO.read("#{path}.kml")
|
84
|
+
end
|
85
|
+
FileUtils.rm_rf("#{path}.kml")
|
86
|
+
|
87
|
+
log "path: #{path}"
|
88
|
+
return OpenStruct.new({
|
89
|
+
:name => @export_to_file,
|
90
|
+
:import_type => export_type,
|
91
|
+
:path => "#{path}.#{type}"
|
92
|
+
})
|
93
|
+
|
94
|
+
end
|
95
|
+
if @type == 'shp'
|
96
|
+
|
97
|
+
ogr2ogr_bin_path = `which ogr2ogr`.strip
|
98
|
+
ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "ESRI Shapefile" #{path}.shp PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
|
99
|
+
|
100
|
+
output = `#{ogr2ogr_command} &> /dev/null`
|
101
|
+
|
102
|
+
Zip::ZipOutputStream.open("#{path}.zip") do |zia|
|
103
|
+
|
104
|
+
begin
|
105
|
+
zia.put_next_entry("#{export_to_file}.shp")
|
106
|
+
zia.print IO.read("#{path}.shp")
|
107
|
+
FileUtils.rm_rf("#{path}.shp")
|
108
|
+
rescue Exception=>e
|
109
|
+
# handle e
|
110
|
+
log "info #{e}"
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
begin
|
115
|
+
zia.put_next_entry("#{export_to_file}.shx")
|
116
|
+
zia.print IO.read("#{path}.shx")
|
117
|
+
FileUtils.rm_rf("#{path}.shx")
|
118
|
+
rescue Exception=>e
|
119
|
+
# handle e
|
120
|
+
log "info #{e}"
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
begin
|
125
|
+
zia.put_next_entry("#{export_to_file}.dbf")
|
126
|
+
zia.print IO.read("#{path}.dbf")
|
127
|
+
FileUtils.rm_rf("#{path}.dbf")
|
128
|
+
rescue Exception=>e
|
129
|
+
# handle e
|
130
|
+
log "info #{e}"
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
begin
|
135
|
+
zia.put_next_entry("#{export_to_file}.prj")
|
136
|
+
zia.print IO.read("#{path}.prj")
|
137
|
+
FileUtils.rm_rf("#{path}.prj")
|
138
|
+
rescue Exception=>e
|
139
|
+
# handle e
|
140
|
+
log "info #{e}"
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
begin
|
145
|
+
zia.put_next_entry("#{export_to_file}.sbn")
|
146
|
+
zia.print IO.read("#{path}.sbn")
|
147
|
+
FileUtils.rm_rf("#{path}.sbn")
|
148
|
+
rescue Exception=>e
|
149
|
+
# handle e
|
150
|
+
log "info #{e}"
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
return OpenStruct.new({
|
156
|
+
:name => @export_to_file,
|
157
|
+
:import_type => export_type,
|
158
|
+
:path => "#{path}.#{type}"
|
159
|
+
})
|
160
|
+
|
161
|
+
end
|
162
|
+
rescue => e
|
163
|
+
log "====================="
|
164
|
+
log $!
|
165
|
+
log e.backtrace
|
166
|
+
log "====================="
|
167
|
+
if !@table_created.nil?
|
168
|
+
@db_connection.drop_table(@suggested_name)
|
169
|
+
end
|
170
|
+
raise e
|
171
|
+
ensure
|
172
|
+
@db_connection.disconnect
|
173
|
+
end
|
174
|
+
|
175
|
+
private
|
176
|
+
|
177
|
+
def get_valid_name(name)
|
178
|
+
candidates = @db_connection.tables.map{ |t| t.to_s }.select{ |t| t.match(/^#{name}/) }
|
179
|
+
if candidates.any?
|
180
|
+
max_candidate = candidates.max
|
181
|
+
if max_candidate =~ /(.+)_(\d+)$/
|
182
|
+
return $1 + "_#{$2.to_i + 1}"
|
183
|
+
else
|
184
|
+
return max_candidate + "_2"
|
185
|
+
end
|
186
|
+
else
|
187
|
+
return name
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def log(str)
|
192
|
+
if @@debug
|
193
|
+
puts str
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
Bundler.setup
|
6
|
+
|
7
|
+
require 'roo'
|
8
|
+
require 'csv'
|
9
|
+
require 'tempfile'
|
10
|
+
require 'ostruct'
|
11
|
+
require 'cartodb-exporter/exporter'
|
12
|
+
require 'core_ext/string'
|
13
|
+
require 'core_ext/hash'
|
14
|
+
require 'core_ext/blank'
|
@@ -0,0 +1,336 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
module CartoDB
|
4
|
+
class Importer
|
5
|
+
RESERVED_COLUMN_NAMES = %W{ oid tableoid xmin cmin xmax cmax ctid }
|
6
|
+
SUPPORTED_FORMATS = %W{ .csv .shp .ods .xls .xlsx .tif .tiff }
|
7
|
+
|
8
|
+
class << self
|
9
|
+
attr_accessor :debug
|
10
|
+
end
|
11
|
+
@@debug = true
|
12
|
+
|
13
|
+
attr_accessor :import_from_file, :suggested_name,
|
14
|
+
:ext, :db_configuration, :db_connection
|
15
|
+
|
16
|
+
attr_reader :table_created, :force_name
|
17
|
+
|
18
|
+
def initialize(options = {})
|
19
|
+
@@debug = options[:debug] if options[:debug]
|
20
|
+
@table_created = nil
|
21
|
+
@import_from_file = options[:import_from_file]
|
22
|
+
raise "import_from_file value can't be nil" if @import_from_file.nil?
|
23
|
+
|
24
|
+
@db_configuration = options.slice(:database, :username, :password, :host, :port)
|
25
|
+
@db_configuration[:port] ||= 5432
|
26
|
+
@db_configuration[:host] ||= '127.0.0.1'
|
27
|
+
@db_connection = Sequel.connect("postgres://#{@db_configuration[:username]}:#{@db_configuration[:password]}@#{@db_configuration[:host]}:#{@db_configuration[:port]}/#{@db_configuration[:database]}")
|
28
|
+
|
29
|
+
unless options[:suggested_name].nil? || options[:suggested_name].blank?
|
30
|
+
@force_name = true
|
31
|
+
@suggested_name = get_valid_name(options[:suggested_name])
|
32
|
+
else
|
33
|
+
@force_name = false
|
34
|
+
end
|
35
|
+
|
36
|
+
if @import_from_file.is_a?(String)
|
37
|
+
if @import_from_file =~ /^http/
|
38
|
+
@import_from_file = URI.escape(@import_from_file)
|
39
|
+
end
|
40
|
+
open(@import_from_file) do |res|
|
41
|
+
file_name = File.basename(import_from_file)
|
42
|
+
@ext = File.extname(file_name)
|
43
|
+
@suggested_name ||= get_valid_name(File.basename(import_from_file, @ext).downcase.sanitize)
|
44
|
+
@import_from_file = Tempfile.new([@suggested_name, @ext])
|
45
|
+
@import_from_file.write res.read.force_encoding('utf-8')
|
46
|
+
@import_from_file.close
|
47
|
+
end
|
48
|
+
else
|
49
|
+
original_filename = if @import_from_file.respond_to?(:original_filename)
|
50
|
+
@import_from_file.original_filename
|
51
|
+
else
|
52
|
+
@import_from_file.path
|
53
|
+
end
|
54
|
+
@ext = File.extname(original_filename)
|
55
|
+
@suggested_name ||= get_valid_name(File.basename(original_filename,@ext).tr('.','_').downcase.sanitize)
|
56
|
+
@ext ||= File.extname(original_filename)
|
57
|
+
end
|
58
|
+
rescue => e
|
59
|
+
log $!
|
60
|
+
log e.backtrace
|
61
|
+
raise e
|
62
|
+
end
|
63
|
+
|
64
|
+
def import!
|
65
|
+
path = if @import_from_file.respond_to?(:tempfile)
|
66
|
+
@import_from_file.tempfile.path
|
67
|
+
else
|
68
|
+
@import_from_file.path
|
69
|
+
end
|
70
|
+
python_bin_path = `which python`.strip
|
71
|
+
psql_bin_path = `which psql`.strip
|
72
|
+
|
73
|
+
entries = []
|
74
|
+
if @ext == '.zip'
|
75
|
+
log "Importing zip file: #{path}"
|
76
|
+
Zip::ZipFile.foreach(path) do |entry|
|
77
|
+
name = entry.name.split('/').last
|
78
|
+
next if name =~ /^(\.|\_{2})/
|
79
|
+
entries << "/tmp/#{name}"
|
80
|
+
if SUPPORTED_FORMATS.include?(File.extname(name))
|
81
|
+
@ext = File.extname(name)
|
82
|
+
@suggested_name = get_valid_name(File.basename(name,@ext).tr('.','_').downcase.sanitize) unless @force_name
|
83
|
+
path = "/tmp/#{name}"
|
84
|
+
log "Found original @ext file named #{name} in path #{path}"
|
85
|
+
end
|
86
|
+
if File.file?("/tmp/#{name}")
|
87
|
+
FileUtils.rm("/tmp/#{name}")
|
88
|
+
end
|
89
|
+
entry.extract("/tmp/#{name}")
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
import_type = @ext
|
94
|
+
# These types of files are converted to CSV
|
95
|
+
if %W{ .xls .xlsx .ods }.include?(@ext)
|
96
|
+
new_path = "/tmp/#{@suggested_name}.csv"
|
97
|
+
case @ext
|
98
|
+
when '.xls'
|
99
|
+
Excel.new(path)
|
100
|
+
when '.xlsx'
|
101
|
+
Excelx.new(path)
|
102
|
+
when '.ods'
|
103
|
+
Openoffice.new(path)
|
104
|
+
else
|
105
|
+
raise ArgumentError, "Don't know how to open file #{new_path}"
|
106
|
+
end.to_csv(new_path)
|
107
|
+
@import_from_file = File.open(new_path,'r')
|
108
|
+
@ext = '.csv'
|
109
|
+
path = @import_from_file.path
|
110
|
+
end
|
111
|
+
|
112
|
+
if @ext == '.csv'
|
113
|
+
ogr2ogr_bin_path = `which ogr2ogr`.strip
|
114
|
+
ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "PostgreSQL" PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{path} -nln #{@suggested_name}}
|
115
|
+
|
116
|
+
output = `#{ogr2ogr_command} &> /dev/null`
|
117
|
+
|
118
|
+
# Check if the file had data, if not rise an error because probably something went wrong
|
119
|
+
if @db_connection["SELECT * from #{@suggested_name} LIMIT 1"].first.nil?
|
120
|
+
raise "Empty table"
|
121
|
+
end
|
122
|
+
|
123
|
+
# Sanitize column names where needed
|
124
|
+
column_names = @db_connection.schema(@suggested_name).map{ |s| s[0].to_s }
|
125
|
+
need_sanitizing = column_names.each do |column_name|
|
126
|
+
if column_name != column_name.sanitize_column_name
|
127
|
+
@db_connection.run("ALTER TABLE #{@suggested_name} RENAME COLUMN \"#{column_name}\" TO #{column_name.sanitize_column_name}")
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
@table_created = true
|
132
|
+
|
133
|
+
FileUtils.rm_rf(path)
|
134
|
+
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
135
|
+
|
136
|
+
return OpenStruct.new({
|
137
|
+
:name => @suggested_name,
|
138
|
+
:rows_imported => rows_imported,
|
139
|
+
:import_type => import_type
|
140
|
+
})
|
141
|
+
|
142
|
+
end
|
143
|
+
if @ext == '.shp'
|
144
|
+
|
145
|
+
shp2pgsql_bin_path = `which shp2pgsql`.strip
|
146
|
+
|
147
|
+
host = @db_configuration[:host] ? "-h #{@db_configuration[:host]}" : ""
|
148
|
+
port = @db_configuration[:port] ? "-p #{@db_configuration[:port]}" : ""
|
149
|
+
@suggested_name = get_valid_name(File.basename(path).tr('.','_').downcase.sanitize) unless @force_name
|
150
|
+
random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
|
151
|
+
|
152
|
+
normalizer_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/shp_normalizer.py", __FILE__)} #{path} #{random_table_name}"
|
153
|
+
shp_args_command = `#{normalizer_command}`
|
154
|
+
if shp_args_command.strip.blank?
|
155
|
+
raise "Error running python shp_normalizer script: #{normalizer_command}"
|
156
|
+
end
|
157
|
+
full_shp_command = "#{shp2pgsql_bin_path} #{shp_args_command.strip} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
|
158
|
+
log "Running shp2pgsql: #{full_shp_command}"
|
159
|
+
%x[#{full_shp_command}]
|
160
|
+
|
161
|
+
@db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
|
162
|
+
@db_connection.run("DROP TABLE #{random_table_name}")
|
163
|
+
@table_created = true
|
164
|
+
|
165
|
+
entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
|
166
|
+
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
167
|
+
@import_from_file.unlink
|
168
|
+
|
169
|
+
return OpenStruct.new({
|
170
|
+
:name => @suggested_name,
|
171
|
+
:rows_imported => rows_imported,
|
172
|
+
:import_type => import_type
|
173
|
+
})
|
174
|
+
end
|
175
|
+
if %W{ .tif .tiff }.include?(@ext)
|
176
|
+
log "Importing raster file: #{path}"
|
177
|
+
raster2pgsql_bin_path = `which raster2pgsql.py`.strip
|
178
|
+
|
179
|
+
host = @db_configuration[:host] ? "-h #{@db_configuration[:host]}" : ""
|
180
|
+
port = @db_configuration[:port] ? "-p #{@db_configuration[:port]}" : ""
|
181
|
+
@suggested_name = get_valid_name(File.basename(path).tr('.','_').downcase.sanitize) unless @force_name
|
182
|
+
random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
|
183
|
+
|
184
|
+
gdal_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/srid_from_gdal.py", __FILE__)} #{path}"
|
185
|
+
rast_srid_command = `#{gdal_command}`.strip
|
186
|
+
|
187
|
+
log "SRID : #{rast_srid_command}"
|
188
|
+
|
189
|
+
blocksize = "180x180"
|
190
|
+
full_rast_command = "#{raster2pgsql_bin_path} -I -s #{rast_srid_command.strip} -k #{blocksize} -t #{random_table_name} -r #{path} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
|
191
|
+
log "Running raster2pgsql: #{raster2pgsql_bin_path} #{full_rast_command}"
|
192
|
+
%x[#{full_rast_command}]
|
193
|
+
|
194
|
+
@db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
|
195
|
+
@db_connection.run("DROP TABLE #{random_table_name}")
|
196
|
+
|
197
|
+
entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
|
198
|
+
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
199
|
+
@import_from_file.unlink
|
200
|
+
|
201
|
+
@table_created = true
|
202
|
+
|
203
|
+
entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
|
204
|
+
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
|
205
|
+
@import_from_file.unlink
|
206
|
+
|
207
|
+
return OpenStruct.new({
|
208
|
+
:name => @suggested_name,
|
209
|
+
:rows_imported => rows_imported,
|
210
|
+
:import_type => import_type
|
211
|
+
})
|
212
|
+
|
213
|
+
end
|
214
|
+
rescue => e
|
215
|
+
log "====================="
|
216
|
+
log $!
|
217
|
+
log e.backtrace
|
218
|
+
log "====================="
|
219
|
+
if @table_created == nil
|
220
|
+
@db_connection.drop_table(@suggested_name)
|
221
|
+
end
|
222
|
+
raise e
|
223
|
+
ensure
|
224
|
+
@db_connection.disconnect
|
225
|
+
if @import_from_file.is_a?(File)
|
226
|
+
File.unlink(@import_from_file) if File.file?(@import_from_file.path)
|
227
|
+
elsif @import_from_file.is_a?(Tempfile)
|
228
|
+
@import_from_file.unlink
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
private
|
233
|
+
|
234
|
+
def guess_schema(path)
|
235
|
+
@col_separator = ','
|
236
|
+
options = {:col_sep => @col_separator}
|
237
|
+
schemas = []
|
238
|
+
uk_column_counter = 0
|
239
|
+
|
240
|
+
csv = CSV.open(path, options)
|
241
|
+
column_names = csv.gets
|
242
|
+
|
243
|
+
if column_names.size == 1
|
244
|
+
candidate_col_separators = {}
|
245
|
+
column_names.first.scan(/([^\w\s])/i).flatten.uniq.each do |candidate|
|
246
|
+
candidate_col_separators[candidate] = 0
|
247
|
+
end
|
248
|
+
candidate_col_separators.keys.each do |candidate|
|
249
|
+
csv = CSV.open(path, options.merge(:col_sep => candidate))
|
250
|
+
column_names = csv.gets
|
251
|
+
candidate_col_separators[candidate] = column_names.size
|
252
|
+
end
|
253
|
+
@col_separator = candidate_col_separators.sort{|a,b| a[1]<=>b[1]}.last.first
|
254
|
+
csv = CSV.open(path, options.merge(:col_sep => @col_separator))
|
255
|
+
column_names = csv.gets
|
256
|
+
end
|
257
|
+
|
258
|
+
column_names = column_names.map do |c|
|
259
|
+
if c.blank?
|
260
|
+
uk_column_counter += 1
|
261
|
+
"unknow_name_#{uk_column_counter}"
|
262
|
+
else
|
263
|
+
c = c.force_encoding('utf-8').encode
|
264
|
+
results = c.scan(/^(["`\'])[^"`\']+(["`\'])$/).flatten
|
265
|
+
if results.size == 2 && results[0] == results[1]
|
266
|
+
@quote = $1
|
267
|
+
end
|
268
|
+
c.sanitize_column_name
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
while (line = csv.gets)
|
273
|
+
line.each_with_index do |field, i|
|
274
|
+
next if line[i].blank?
|
275
|
+
unless @quote
|
276
|
+
results = line[i].scan(/^(["`\'])[^"`\']+(["`\'])$/).flatten
|
277
|
+
if results.size == 2 && results[0] == results[1]
|
278
|
+
@quote = $1
|
279
|
+
end
|
280
|
+
end
|
281
|
+
if schemas[i].nil?
|
282
|
+
if line[i] =~ /^\-?[0-9]+[\.|\,][0-9]+$/
|
283
|
+
schemas[i] = "float"
|
284
|
+
elsif line[i] =~ /^[0-9]+$/
|
285
|
+
schemas[i] = "integer"
|
286
|
+
else
|
287
|
+
schemas[i] = "varchar"
|
288
|
+
end
|
289
|
+
else
|
290
|
+
case schemas[i]
|
291
|
+
when "integer"
|
292
|
+
if line[i] !~ /^[0-9]+$/
|
293
|
+
if line[i] =~ /^\-?[0-9]+[\.|\,][0-9]+$/
|
294
|
+
schemas[i] = "float"
|
295
|
+
else
|
296
|
+
schemas[i] = "varchar"
|
297
|
+
end
|
298
|
+
elsif line[i].to_i > 2147483647
|
299
|
+
schemas[i] = "float"
|
300
|
+
end
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
result = []
|
307
|
+
column_names.each_with_index do |column_name, i|
|
308
|
+
if RESERVED_COLUMN_NAMES.include?(column_name.to_s)
|
309
|
+
column_name = "_#{column_name}"
|
310
|
+
end
|
311
|
+
result << "#{column_name} #{schemas[i] || "varchar"}"
|
312
|
+
end
|
313
|
+
return result
|
314
|
+
end
|
315
|
+
|
316
|
+
def get_valid_name(name)
|
317
|
+
candidates = @db_connection.tables.map{ |t| t.to_s }.select{ |t| t.match(/^#{name}/) }
|
318
|
+
if candidates.any?
|
319
|
+
max_candidate = candidates.max
|
320
|
+
if max_candidate =~ /(.+)_(\d+)$/
|
321
|
+
return $1 + "_#{$2.to_i + 1}"
|
322
|
+
else
|
323
|
+
return max_candidate + "_2"
|
324
|
+
end
|
325
|
+
else
|
326
|
+
return name
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
def log(str)
|
331
|
+
if @@debug
|
332
|
+
puts str
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|