cartodb-importer 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ misc/*.pyc
6
+ bin/*
7
+ build/*
data/.rvmrc ADDED
@@ -0,0 +1,2 @@
1
+ rvm use --create 1.9.2@cartodb > /dev/null
2
+ rvm wrapper current textmate
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source :rubygems
2
+
3
+ # Specify your gem's dependencies in cartodb-importer.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,59 @@
1
+ # CartoDB importer #
2
+
3
+ CartoDB importer is a Ruby gem that makes your life easier when importing data from a file into a PostGIS database. The accepted formats for input files are:
4
+
5
+ - CSV
6
+ - SHP (in a zip file)
7
+ - ODS
8
+ - XLX(S)
9
+
10
+ ## Installation and dependencies ##
11
+
12
+ To install Ruby dependencies just install `bundler` gem and run the command `bundle install` in your shell.
13
+
14
+ There are also some dependencies of external Python libraries (WTF!). You should install `pip` before:
15
+
16
+ - In Debian / Ubuntu: `apt-get install python-pip`
17
+
18
+ - In MacosX: `easy_install pip`
19
+
20
+ And then run:
21
+
22
+ pip install GDAL chardet==1.0.1 argparse==1.2.1 brewery==0.6
23
+
24
+ ## How to use it? ##
25
+
26
+ The way to use this gem is to initialize a object of class Cartodb::Importer using the appropiate parameters.
27
+
28
+ importer = Cartodb::Importer.new :import_from_file => "path to CSV file", :srid => 4326, :database => "...",
29
+ :username => "...", :password => "..."
30
+ result = importer.import!
31
+
32
+ If everything works fine, a new table will exist in the given database. A `result` object is return with some information about the import, such as the number of rows, or the name of the table.
33
+
34
+ puts result.rows_imported
35
+ # > 43243
36
+
37
+ If any error happens, an exception could be raised.
38
+
39
+ This is the list with all the available options to use in the constructor:
40
+
41
+ - import_from_file: a file descriptor, Tempfile or URL with the URL from which import the data
42
+ - srid: the value of the SRID
43
+ - database: the name of the database where import the data
44
+ - username: the owner of the database
45
+ - password: the password to connect to the database
46
+ - extra_columns: a SQL string with some extra columns that should be added to the imported table. If any of these columns already exists an error will be raised
47
+
48
+ ## Running the specs ##
49
+
50
+ CartoDB Importer has a suite of specs which define its specification. To run this suite a database named cartodb_importer_test must exist. You can create this database by running:
51
+
52
+ CREATE DATABASE cartodb_importer_test
53
+ WITH TEMPLATE = template_postgis
54
+ OWNER = postgres
55
+
56
+ Then, to run the specs just run this command:
57
+
58
+ bundle exec rspec spec/import_spec.rb
59
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "cartodb-importer/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "cartodb-importer"
7
+ s.version = CartoDB::Importer::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Fernando Blat"]
10
+ s.email = ["ferblape@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
13
+ s.description = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
14
+
15
+ s.rubyforge_project = "cartodb-importer"
16
+
17
+ s.files = `git ls-files`.split("\n").reject{|fn| fn =~ /spec\/support\/data/}
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n").reject{|fn| fn =~ /spec\/support\/data/}
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_runtime_dependency "pg", "0.11"
23
+ s.add_runtime_dependency "sequel"
24
+ s.add_runtime_dependency "roo"
25
+ s.add_runtime_dependency "spreadsheet"
26
+ s.add_runtime_dependency "google-spreadsheet-ruby"
27
+ s.add_runtime_dependency "rubyzip"
28
+ s.add_runtime_dependency "builder"
29
+
30
+ s.add_development_dependency "rspec"
31
+ s.add_development_dependency "mocha"
32
+ s.add_development_dependency "ruby-debug19"
33
+ end
data/clubbing.shp ADDED
File without changes
@@ -0,0 +1,197 @@
1
+ # coding: UTF-8
2
+
3
+ module CartoDB
4
+ class Exporter
5
+ SUPPORTED_FORMATS = %W{ .csv .shp .kml }
6
+ OUTPUT_FILE_LOCATION = "/tmp"
7
+ class << self
8
+ attr_accessor :debug
9
+ end
10
+ @@debug = true
11
+
12
+ attr_accessor :export_to_file, :type, :suggested_name,
13
+ :ext, :db_configuration, :db_connection
14
+
15
+ attr_reader :table_created, :force_name
16
+
17
+ def initialize(options = {})
18
+ log "options: #{options}"
19
+ @@debug = options[:debug] if options[:debug]
20
+ @table_created = nil
21
+ @export_to_file = options[:export_to_file]
22
+ @type = options[:type]
23
+ raise "export_to_file value can't be nil" if @export_to_file.nil?
24
+
25
+ @db_configuration = options.slice(:database, :username, :password, :host, :port)
26
+ @db_configuration[:port] ||= 5432
27
+ @db_configuration[:host] ||= '127.0.0.1'
28
+ @db_connection = Sequel.connect("postgres://#{@db_configuration[:username]}:#{@db_configuration[:password]}@#{@db_configuration[:host]}:#{@db_configuration[:port]}/#{@db_configuration[:database]}")
29
+
30
+ unless options[:suggested_name].nil? || options[:suggested_name].blank?
31
+ @force_name = true
32
+ @suggested_name = get_valid_name(options[:suggested_name])
33
+ else
34
+ @force_name = false
35
+ end
36
+
37
+ rescue => e
38
+ log $!
39
+ log e.backtrace
40
+ raise e
41
+ end
42
+
43
+ def export!
44
+ path = "#{OUTPUT_FILE_LOCATION}/exporting_#{Time.now.to_i}_#{@export_to_file}"
45
+
46
+ python_bin_path = `which python`.strip
47
+ psql_bin_path = `which psql`.strip
48
+
49
+ entries = []
50
+
51
+ export_type = ".#{@type}"
52
+
53
+ if @type == 'csv'
54
+
55
+ ogr2ogr_bin_path = `which ogr2ogr`.strip
56
+ ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "CSV" #{path} PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
57
+
58
+ output = `#{ogr2ogr_command} &> /dev/null`
59
+
60
+ Zip::ZipOutputStream.open("#{path}.zip") do |zia|
61
+ zia.put_next_entry("#{@export_to_file}.#{type}")
62
+ zia.print IO.read("#{path}/#{@export_to_file}.#{type}")
63
+ end
64
+ FileUtils.rm_rf(path)
65
+
66
+ log "path: #{path}"
67
+ return OpenStruct.new({
68
+ :name => @export_to_file,
69
+ :import_type => export_type,
70
+ :path => "#{path}.#{type}"
71
+ })
72
+
73
+ end
74
+ if @type == 'kml'
75
+
76
+ ogr2ogr_bin_path = `which ogr2ogr`.strip
77
+ ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "KML" #{path}.kml PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
78
+
79
+ output = `#{ogr2ogr_command} &> /dev/null`
80
+
81
+ Zip::ZipOutputStream.open("#{path}.kmz") do |zia|
82
+ zia.put_next_entry("doc.kml")
83
+ zia.print IO.read("#{path}.kml")
84
+ end
85
+ FileUtils.rm_rf("#{path}.kml")
86
+
87
+ log "path: #{path}"
88
+ return OpenStruct.new({
89
+ :name => @export_to_file,
90
+ :import_type => export_type,
91
+ :path => "#{path}.#{type}"
92
+ })
93
+
94
+ end
95
+ if @type == 'shp'
96
+
97
+ ogr2ogr_bin_path = `which ogr2ogr`.strip
98
+ ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "ESRI Shapefile" #{path}.shp PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
99
+
100
+ output = `#{ogr2ogr_command} &> /dev/null`
101
+
102
+ Zip::ZipOutputStream.open("#{path}.zip") do |zia|
103
+
104
+ begin
105
+ zia.put_next_entry("#{export_to_file}.shp")
106
+ zia.print IO.read("#{path}.shp")
107
+ FileUtils.rm_rf("#{path}.shp")
108
+ rescue Exception=>e
109
+ # handle e
110
+ log "info #{e}"
111
+ end
112
+
113
+
114
+ begin
115
+ zia.put_next_entry("#{export_to_file}.shx")
116
+ zia.print IO.read("#{path}.shx")
117
+ FileUtils.rm_rf("#{path}.shx")
118
+ rescue Exception=>e
119
+ # handle e
120
+ log "info #{e}"
121
+ end
122
+
123
+
124
+ begin
125
+ zia.put_next_entry("#{export_to_file}.dbf")
126
+ zia.print IO.read("#{path}.dbf")
127
+ FileUtils.rm_rf("#{path}.dbf")
128
+ rescue Exception=>e
129
+ # handle e
130
+ log "info #{e}"
131
+ end
132
+
133
+
134
+ begin
135
+ zia.put_next_entry("#{export_to_file}.prj")
136
+ zia.print IO.read("#{path}.prj")
137
+ FileUtils.rm_rf("#{path}.prj")
138
+ rescue Exception=>e
139
+ # handle e
140
+ log "info #{e}"
141
+ end
142
+
143
+
144
+ begin
145
+ zia.put_next_entry("#{export_to_file}.sbn")
146
+ zia.print IO.read("#{path}.sbn")
147
+ FileUtils.rm_rf("#{path}.sbn")
148
+ rescue Exception=>e
149
+ # handle e
150
+ log "info #{e}"
151
+ end
152
+
153
+ end
154
+
155
+ return OpenStruct.new({
156
+ :name => @export_to_file,
157
+ :import_type => export_type,
158
+ :path => "#{path}.#{type}"
159
+ })
160
+
161
+ end
162
+ rescue => e
163
+ log "====================="
164
+ log $!
165
+ log e.backtrace
166
+ log "====================="
167
+ if !@table_created.nil?
168
+ @db_connection.drop_table(@suggested_name)
169
+ end
170
+ raise e
171
+ ensure
172
+ @db_connection.disconnect
173
+ end
174
+
175
+ private
176
+
177
+ def get_valid_name(name)
178
+ candidates = @db_connection.tables.map{ |t| t.to_s }.select{ |t| t.match(/^#{name}/) }
179
+ if candidates.any?
180
+ max_candidate = candidates.max
181
+ if max_candidate =~ /(.+)_(\d+)$/
182
+ return $1 + "_#{$2.to_i + 1}"
183
+ else
184
+ return max_candidate + "_2"
185
+ end
186
+ else
187
+ return name
188
+ end
189
+ end
190
+
191
+ def log(str)
192
+ if @@debug
193
+ puts str
194
+ end
195
+ end
196
+ end
197
+ end
@@ -0,0 +1,6 @@
1
+ module CartoDB
2
+ class Importer
3
+ VERSION = "0.1.9"
4
+ end
5
+ end
6
+
@@ -0,0 +1,14 @@
1
+ # coding: UTF-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ Bundler.setup
6
+
7
+ require 'roo'
8
+ require 'csv'
9
+ require 'tempfile'
10
+ require 'ostruct'
11
+ require 'cartodb-exporter/exporter'
12
+ require 'core_ext/string'
13
+ require 'core_ext/hash'
14
+ require 'core_ext/blank'
@@ -0,0 +1,336 @@
1
+ # coding: UTF-8
2
+
3
+ module CartoDB
4
+ class Importer
5
+ RESERVED_COLUMN_NAMES = %W{ oid tableoid xmin cmin xmax cmax ctid }
6
+ SUPPORTED_FORMATS = %W{ .csv .shp .ods .xls .xlsx .tif .tiff }
7
+
8
+ class << self
9
+ attr_accessor :debug
10
+ end
11
+ @@debug = true
12
+
13
+ attr_accessor :import_from_file, :suggested_name,
14
+ :ext, :db_configuration, :db_connection
15
+
16
+ attr_reader :table_created, :force_name
17
+
18
+ def initialize(options = {})
19
+ @@debug = options[:debug] if options[:debug]
20
+ @table_created = nil
21
+ @import_from_file = options[:import_from_file]
22
+ raise "import_from_file value can't be nil" if @import_from_file.nil?
23
+
24
+ @db_configuration = options.slice(:database, :username, :password, :host, :port)
25
+ @db_configuration[:port] ||= 5432
26
+ @db_configuration[:host] ||= '127.0.0.1'
27
+ @db_connection = Sequel.connect("postgres://#{@db_configuration[:username]}:#{@db_configuration[:password]}@#{@db_configuration[:host]}:#{@db_configuration[:port]}/#{@db_configuration[:database]}")
28
+
29
+ unless options[:suggested_name].nil? || options[:suggested_name].blank?
30
+ @force_name = true
31
+ @suggested_name = get_valid_name(options[:suggested_name])
32
+ else
33
+ @force_name = false
34
+ end
35
+
36
+ if @import_from_file.is_a?(String)
37
+ if @import_from_file =~ /^http/
38
+ @import_from_file = URI.escape(@import_from_file)
39
+ end
40
+ open(@import_from_file) do |res|
41
+ file_name = File.basename(import_from_file)
42
+ @ext = File.extname(file_name)
43
+ @suggested_name ||= get_valid_name(File.basename(import_from_file, @ext).downcase.sanitize)
44
+ @import_from_file = Tempfile.new([@suggested_name, @ext])
45
+ @import_from_file.write res.read.force_encoding('utf-8')
46
+ @import_from_file.close
47
+ end
48
+ else
49
+ original_filename = if @import_from_file.respond_to?(:original_filename)
50
+ @import_from_file.original_filename
51
+ else
52
+ @import_from_file.path
53
+ end
54
+ @ext = File.extname(original_filename)
55
+ @suggested_name ||= get_valid_name(File.basename(original_filename,@ext).tr('.','_').downcase.sanitize)
56
+ @ext ||= File.extname(original_filename)
57
+ end
58
+ rescue => e
59
+ log $!
60
+ log e.backtrace
61
+ raise e
62
+ end
63
+
64
+ def import!
65
+ path = if @import_from_file.respond_to?(:tempfile)
66
+ @import_from_file.tempfile.path
67
+ else
68
+ @import_from_file.path
69
+ end
70
+ python_bin_path = `which python`.strip
71
+ psql_bin_path = `which psql`.strip
72
+
73
+ entries = []
74
+ if @ext == '.zip'
75
+ log "Importing zip file: #{path}"
76
+ Zip::ZipFile.foreach(path) do |entry|
77
+ name = entry.name.split('/').last
78
+ next if name =~ /^(\.|\_{2})/
79
+ entries << "/tmp/#{name}"
80
+ if SUPPORTED_FORMATS.include?(File.extname(name))
81
+ @ext = File.extname(name)
82
+ @suggested_name = get_valid_name(File.basename(name,@ext).tr('.','_').downcase.sanitize) unless @force_name
83
+ path = "/tmp/#{name}"
84
+ log "Found original @ext file named #{name} in path #{path}"
85
+ end
86
+ if File.file?("/tmp/#{name}")
87
+ FileUtils.rm("/tmp/#{name}")
88
+ end
89
+ entry.extract("/tmp/#{name}")
90
+ end
91
+ end
92
+
93
+ import_type = @ext
94
+ # These types of files are converted to CSV
95
+ if %W{ .xls .xlsx .ods }.include?(@ext)
96
+ new_path = "/tmp/#{@suggested_name}.csv"
97
+ case @ext
98
+ when '.xls'
99
+ Excel.new(path)
100
+ when '.xlsx'
101
+ Excelx.new(path)
102
+ when '.ods'
103
+ Openoffice.new(path)
104
+ else
105
+ raise ArgumentError, "Don't know how to open file #{new_path}"
106
+ end.to_csv(new_path)
107
+ @import_from_file = File.open(new_path,'r')
108
+ @ext = '.csv'
109
+ path = @import_from_file.path
110
+ end
111
+
112
+ if @ext == '.csv'
113
+ ogr2ogr_bin_path = `which ogr2ogr`.strip
114
+ ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "PostgreSQL" PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{path} -nln #{@suggested_name}}
115
+
116
+ output = `#{ogr2ogr_command} &> /dev/null`
117
+
118
+ # Check if the file had data, if not rise an error because probably something went wrong
119
+ if @db_connection["SELECT * from #{@suggested_name} LIMIT 1"].first.nil?
120
+ raise "Empty table"
121
+ end
122
+
123
+ # Sanitize column names where needed
124
+ column_names = @db_connection.schema(@suggested_name).map{ |s| s[0].to_s }
125
+ need_sanitizing = column_names.each do |column_name|
126
+ if column_name != column_name.sanitize_column_name
127
+ @db_connection.run("ALTER TABLE #{@suggested_name} RENAME COLUMN \"#{column_name}\" TO #{column_name.sanitize_column_name}")
128
+ end
129
+ end
130
+
131
+ @table_created = true
132
+
133
+ FileUtils.rm_rf(path)
134
+ rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
135
+
136
+ return OpenStruct.new({
137
+ :name => @suggested_name,
138
+ :rows_imported => rows_imported,
139
+ :import_type => import_type
140
+ })
141
+
142
+ end
143
+ if @ext == '.shp'
144
+
145
+ shp2pgsql_bin_path = `which shp2pgsql`.strip
146
+
147
+ host = @db_configuration[:host] ? "-h #{@db_configuration[:host]}" : ""
148
+ port = @db_configuration[:port] ? "-p #{@db_configuration[:port]}" : ""
149
+ @suggested_name = get_valid_name(File.basename(path).tr('.','_').downcase.sanitize) unless @force_name
150
+ random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
151
+
152
+ normalizer_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/shp_normalizer.py", __FILE__)} #{path} #{random_table_name}"
153
+ shp_args_command = `#{normalizer_command}`
154
+ if shp_args_command.strip.blank?
155
+ raise "Error running python shp_normalizer script: #{normalizer_command}"
156
+ end
157
+ full_shp_command = "#{shp2pgsql_bin_path} #{shp_args_command.strip} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
158
+ log "Running shp2pgsql: #{full_shp_command}"
159
+ %x[#{full_shp_command}]
160
+
161
+ @db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
162
+ @db_connection.run("DROP TABLE #{random_table_name}")
163
+ @table_created = true
164
+
165
+ entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
166
+ rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
167
+ @import_from_file.unlink
168
+
169
+ return OpenStruct.new({
170
+ :name => @suggested_name,
171
+ :rows_imported => rows_imported,
172
+ :import_type => import_type
173
+ })
174
+ end
175
+ if %W{ .tif .tiff }.include?(@ext)
176
+ log "Importing raster file: #{path}"
177
+ raster2pgsql_bin_path = `which raster2pgsql.py`.strip
178
+
179
+ host = @db_configuration[:host] ? "-h #{@db_configuration[:host]}" : ""
180
+ port = @db_configuration[:port] ? "-p #{@db_configuration[:port]}" : ""
181
+ @suggested_name = get_valid_name(File.basename(path).tr('.','_').downcase.sanitize) unless @force_name
182
+ random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
183
+
184
+ gdal_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/srid_from_gdal.py", __FILE__)} #{path}"
185
+ rast_srid_command = `#{gdal_command}`.strip
186
+
187
+ log "SRID : #{rast_srid_command}"
188
+
189
+ blocksize = "180x180"
190
+ full_rast_command = "#{raster2pgsql_bin_path} -I -s #{rast_srid_command.strip} -k #{blocksize} -t #{random_table_name} -r #{path} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
191
+ log "Running raster2pgsql: #{raster2pgsql_bin_path} #{full_rast_command}"
192
+ %x[#{full_rast_command}]
193
+
194
+ @db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
195
+ @db_connection.run("DROP TABLE #{random_table_name}")
196
+
197
+ entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
198
+ rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
199
+ @import_from_file.unlink
200
+
201
+ @table_created = true
202
+
203
+ entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
204
+ rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
205
+ @import_from_file.unlink
206
+
207
+ return OpenStruct.new({
208
+ :name => @suggested_name,
209
+ :rows_imported => rows_imported,
210
+ :import_type => import_type
211
+ })
212
+
213
+ end
214
+ rescue => e
215
+ log "====================="
216
+ log $!
217
+ log e.backtrace
218
+ log "====================="
219
+ if @table_created == nil
220
+ @db_connection.drop_table(@suggested_name)
221
+ end
222
+ raise e
223
+ ensure
224
+ @db_connection.disconnect
225
+ if @import_from_file.is_a?(File)
226
+ File.unlink(@import_from_file) if File.file?(@import_from_file.path)
227
+ elsif @import_from_file.is_a?(Tempfile)
228
+ @import_from_file.unlink
229
+ end
230
+ end
231
+
232
+ private
233
+
234
+ def guess_schema(path)
235
+ @col_separator = ','
236
+ options = {:col_sep => @col_separator}
237
+ schemas = []
238
+ uk_column_counter = 0
239
+
240
+ csv = CSV.open(path, options)
241
+ column_names = csv.gets
242
+
243
+ if column_names.size == 1
244
+ candidate_col_separators = {}
245
+ column_names.first.scan(/([^\w\s])/i).flatten.uniq.each do |candidate|
246
+ candidate_col_separators[candidate] = 0
247
+ end
248
+ candidate_col_separators.keys.each do |candidate|
249
+ csv = CSV.open(path, options.merge(:col_sep => candidate))
250
+ column_names = csv.gets
251
+ candidate_col_separators[candidate] = column_names.size
252
+ end
253
+ @col_separator = candidate_col_separators.sort{|a,b| a[1]<=>b[1]}.last.first
254
+ csv = CSV.open(path, options.merge(:col_sep => @col_separator))
255
+ column_names = csv.gets
256
+ end
257
+
258
+ column_names = column_names.map do |c|
259
+ if c.blank?
260
+ uk_column_counter += 1
261
+ "unknow_name_#{uk_column_counter}"
262
+ else
263
+ c = c.force_encoding('utf-8').encode
264
+ results = c.scan(/^(["`\'])[^"`\']+(["`\'])$/).flatten
265
+ if results.size == 2 && results[0] == results[1]
266
+ @quote = $1
267
+ end
268
+ c.sanitize_column_name
269
+ end
270
+ end
271
+
272
+ while (line = csv.gets)
273
+ line.each_with_index do |field, i|
274
+ next if line[i].blank?
275
+ unless @quote
276
+ results = line[i].scan(/^(["`\'])[^"`\']+(["`\'])$/).flatten
277
+ if results.size == 2 && results[0] == results[1]
278
+ @quote = $1
279
+ end
280
+ end
281
+ if schemas[i].nil?
282
+ if line[i] =~ /^\-?[0-9]+[\.|\,][0-9]+$/
283
+ schemas[i] = "float"
284
+ elsif line[i] =~ /^[0-9]+$/
285
+ schemas[i] = "integer"
286
+ else
287
+ schemas[i] = "varchar"
288
+ end
289
+ else
290
+ case schemas[i]
291
+ when "integer"
292
+ if line[i] !~ /^[0-9]+$/
293
+ if line[i] =~ /^\-?[0-9]+[\.|\,][0-9]+$/
294
+ schemas[i] = "float"
295
+ else
296
+ schemas[i] = "varchar"
297
+ end
298
+ elsif line[i].to_i > 2147483647
299
+ schemas[i] = "float"
300
+ end
301
+ end
302
+ end
303
+ end
304
+ end
305
+
306
+ result = []
307
+ column_names.each_with_index do |column_name, i|
308
+ if RESERVED_COLUMN_NAMES.include?(column_name.to_s)
309
+ column_name = "_#{column_name}"
310
+ end
311
+ result << "#{column_name} #{schemas[i] || "varchar"}"
312
+ end
313
+ return result
314
+ end
315
+
316
+ def get_valid_name(name)
317
+ candidates = @db_connection.tables.map{ |t| t.to_s }.select{ |t| t.match(/^#{name}/) }
318
+ if candidates.any?
319
+ max_candidate = candidates.max
320
+ if max_candidate =~ /(.+)_(\d+)$/
321
+ return $1 + "_#{$2.to_i + 1}"
322
+ else
323
+ return max_candidate + "_2"
324
+ end
325
+ else
326
+ return name
327
+ end
328
+ end
329
+
330
+ def log(str)
331
+ if @@debug
332
+ puts str
333
+ end
334
+ end
335
+ end
336
+ end
@@ -0,0 +1,6 @@
1
+ module CartoDB
2
+ class Importer
3
+ VERSION = "0.2.2"
4
+ end
5
+ end
6
+