cartodb-importer 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ misc/*.pyc
6
+ bin/*
7
+ build/*
data/.rvmrc ADDED
@@ -0,0 +1,2 @@
1
+ rvm use --create 1.9.2@cartodb > /dev/null
2
+ rvm wrapper current textmate
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source :rubygems
2
+
3
+ # Specify your gem's dependencies in cartodb-importer.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,59 @@
1
+ # CartoDB importer #
2
+
3
+ CartoDB importer is a Ruby gem that makes your life easier when importing data from a file into a PostGIS database. The accepted formats for input files are:
4
+
5
+ - CSV
6
+ - SHP (in a zip file)
7
+ - ODS
8
+ - XLX(S)
9
+
10
+ ## Installation and dependencies ##
11
+
12
+ To install Ruby dependencies just install `bundler` gem and run the command `bundle install` in your shell.
13
+
14
+ There are also some dependencies of external Python libraries (WTF!). You should install `pip` before:
15
+
16
+ - In Debian / Ubuntu: `apt-get install python-pip`
17
+
18
+ - In MacosX: `easy_install pip`
19
+
20
+ And then run:
21
+
22
+ pip install GDAL chardet==1.0.1 argparse==1.2.1 brewery==0.6
23
+
24
+ ## How to use it? ##
25
+
26
+ The way to use this gem is to initialize a object of class Cartodb::Importer using the appropiate parameters.
27
+
28
+ importer = Cartodb::Importer.new :import_from_file => "path to CSV file", :srid => 4326, :database => "...",
29
+ :username => "...", :password => "..."
30
+ result = importer.import!
31
+
32
+ If everything works fine, a new table will exist in the given database. A `result` object is return with some information about the import, such as the number of rows, or the name of the table.
33
+
34
+ puts result.rows_imported
35
+ # > 43243
36
+
37
+ If any error happens, an exception could be raised.
38
+
39
+ This is the list with all the available options to use in the constructor:
40
+
41
+ - import_from_file: a file descriptor, Tempfile or URL with the URL from which import the data
42
+ - srid: the value of the SRID
43
+ - database: the name of the database where import the data
44
+ - username: the owner of the database
45
+ - password: the password to connect to the database
46
+ - extra_columns: a SQL string with some extra columns that should be added to the imported table. If any of these columns already exists an error will be raised
47
+
48
+ ## Running the specs ##
49
+
50
+ CartoDB Importer has a suite of specs which define its specification. To run this suite a database named cartodb_importer_test must exist. You can create this database by running:
51
+
52
+ CREATE DATABASE cartodb_importer_test
53
+ WITH TEMPLATE = template_postgis
54
+ OWNER = postgres
55
+
56
+ Then, to run the specs just run this command:
57
+
58
+ bundle exec rspec spec/import_spec.rb
59
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "cartodb-importer/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "cartodb-importer"
7
+ s.version = CartoDB::Importer::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Fernando Blat"]
10
+ s.email = ["ferblape@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
13
+ s.description = %q{Import CSV, SHP, and other files with data into a PostgreSQL table}
14
+
15
+ s.rubyforge_project = "cartodb-importer"
16
+
17
+ s.files = `git ls-files`.split("\n").reject{|fn| fn =~ /spec\/support\/data/}
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n").reject{|fn| fn =~ /spec\/support\/data/}
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_runtime_dependency "pg", "0.11"
23
+ s.add_runtime_dependency "sequel"
24
+ s.add_runtime_dependency "roo"
25
+ s.add_runtime_dependency "spreadsheet"
26
+ s.add_runtime_dependency "google-spreadsheet-ruby"
27
+ s.add_runtime_dependency "rubyzip"
28
+ s.add_runtime_dependency "builder"
29
+
30
+ s.add_development_dependency "rspec"
31
+ s.add_development_dependency "mocha"
32
+ s.add_development_dependency "ruby-debug19"
33
+ end
data/clubbing.shp ADDED
File without changes
@@ -0,0 +1,197 @@
1
+ # coding: UTF-8
2
+
3
+ module CartoDB
4
+ class Exporter
5
+ SUPPORTED_FORMATS = %W{ .csv .shp .kml }
6
+ OUTPUT_FILE_LOCATION = "/tmp"
7
+ class << self
8
+ attr_accessor :debug
9
+ end
10
+ @@debug = true
11
+
12
+ attr_accessor :export_to_file, :type, :suggested_name,
13
+ :ext, :db_configuration, :db_connection
14
+
15
+ attr_reader :table_created, :force_name
16
+
17
+ def initialize(options = {})
18
+ log "options: #{options}"
19
+ @@debug = options[:debug] if options[:debug]
20
+ @table_created = nil
21
+ @export_to_file = options[:export_to_file]
22
+ @type = options[:type]
23
+ raise "export_to_file value can't be nil" if @export_to_file.nil?
24
+
25
+ @db_configuration = options.slice(:database, :username, :password, :host, :port)
26
+ @db_configuration[:port] ||= 5432
27
+ @db_configuration[:host] ||= '127.0.0.1'
28
+ @db_connection = Sequel.connect("postgres://#{@db_configuration[:username]}:#{@db_configuration[:password]}@#{@db_configuration[:host]}:#{@db_configuration[:port]}/#{@db_configuration[:database]}")
29
+
30
+ unless options[:suggested_name].nil? || options[:suggested_name].blank?
31
+ @force_name = true
32
+ @suggested_name = get_valid_name(options[:suggested_name])
33
+ else
34
+ @force_name = false
35
+ end
36
+
37
+ rescue => e
38
+ log $!
39
+ log e.backtrace
40
+ raise e
41
+ end
42
+
43
+ def export!
44
+ path = "#{OUTPUT_FILE_LOCATION}/exporting_#{Time.now.to_i}_#{@export_to_file}"
45
+
46
+ python_bin_path = `which python`.strip
47
+ psql_bin_path = `which psql`.strip
48
+
49
+ entries = []
50
+
51
+ export_type = ".#{@type}"
52
+
53
+ if @type == 'csv'
54
+
55
+ ogr2ogr_bin_path = `which ogr2ogr`.strip
56
+ ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "CSV" #{path} PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
57
+
58
+ output = `#{ogr2ogr_command} &> /dev/null`
59
+
60
+ Zip::ZipOutputStream.open("#{path}.zip") do |zia|
61
+ zia.put_next_entry("#{@export_to_file}.#{type}")
62
+ zia.print IO.read("#{path}/#{@export_to_file}.#{type}")
63
+ end
64
+ FileUtils.rm_rf(path)
65
+
66
+ log "path: #{path}"
67
+ return OpenStruct.new({
68
+ :name => @export_to_file,
69
+ :import_type => export_type,
70
+ :path => "#{path}.#{type}"
71
+ })
72
+
73
+ end
74
+ if @type == 'kml'
75
+
76
+ ogr2ogr_bin_path = `which ogr2ogr`.strip
77
+ ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "KML" #{path}.kml PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
78
+
79
+ output = `#{ogr2ogr_command} &> /dev/null`
80
+
81
+ Zip::ZipOutputStream.open("#{path}.kmz") do |zia|
82
+ zia.put_next_entry("doc.kml")
83
+ zia.print IO.read("#{path}.kml")
84
+ end
85
+ FileUtils.rm_rf("#{path}.kml")
86
+
87
+ log "path: #{path}"
88
+ return OpenStruct.new({
89
+ :name => @export_to_file,
90
+ :import_type => export_type,
91
+ :path => "#{path}.#{type}"
92
+ })
93
+
94
+ end
95
+ if @type == 'shp'
96
+
97
+ ogr2ogr_bin_path = `which ogr2ogr`.strip
98
+ ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "ESRI Shapefile" #{path}.shp PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{@export_to_file}}
99
+
100
+ output = `#{ogr2ogr_command} &> /dev/null`
101
+
102
+ Zip::ZipOutputStream.open("#{path}.zip") do |zia|
103
+
104
+ begin
105
+ zia.put_next_entry("#{export_to_file}.shp")
106
+ zia.print IO.read("#{path}.shp")
107
+ FileUtils.rm_rf("#{path}.shp")
108
+ rescue Exception=>e
109
+ # handle e
110
+ log "info #{e}"
111
+ end
112
+
113
+
114
+ begin
115
+ zia.put_next_entry("#{export_to_file}.shx")
116
+ zia.print IO.read("#{path}.shx")
117
+ FileUtils.rm_rf("#{path}.shx")
118
+ rescue Exception=>e
119
+ # handle e
120
+ log "info #{e}"
121
+ end
122
+
123
+
124
+ begin
125
+ zia.put_next_entry("#{export_to_file}.dbf")
126
+ zia.print IO.read("#{path}.dbf")
127
+ FileUtils.rm_rf("#{path}.dbf")
128
+ rescue Exception=>e
129
+ # handle e
130
+ log "info #{e}"
131
+ end
132
+
133
+
134
+ begin
135
+ zia.put_next_entry("#{export_to_file}.prj")
136
+ zia.print IO.read("#{path}.prj")
137
+ FileUtils.rm_rf("#{path}.prj")
138
+ rescue Exception=>e
139
+ # handle e
140
+ log "info #{e}"
141
+ end
142
+
143
+
144
+ begin
145
+ zia.put_next_entry("#{export_to_file}.sbn")
146
+ zia.print IO.read("#{path}.sbn")
147
+ FileUtils.rm_rf("#{path}.sbn")
148
+ rescue Exception=>e
149
+ # handle e
150
+ log "info #{e}"
151
+ end
152
+
153
+ end
154
+
155
+ return OpenStruct.new({
156
+ :name => @export_to_file,
157
+ :import_type => export_type,
158
+ :path => "#{path}.#{type}"
159
+ })
160
+
161
+ end
162
+ rescue => e
163
+ log "====================="
164
+ log $!
165
+ log e.backtrace
166
+ log "====================="
167
+ if !@table_created.nil?
168
+ @db_connection.drop_table(@suggested_name)
169
+ end
170
+ raise e
171
+ ensure
172
+ @db_connection.disconnect
173
+ end
174
+
175
+ private
176
+
177
+ def get_valid_name(name)
178
+ candidates = @db_connection.tables.map{ |t| t.to_s }.select{ |t| t.match(/^#{name}/) }
179
+ if candidates.any?
180
+ max_candidate = candidates.max
181
+ if max_candidate =~ /(.+)_(\d+)$/
182
+ return $1 + "_#{$2.to_i + 1}"
183
+ else
184
+ return max_candidate + "_2"
185
+ end
186
+ else
187
+ return name
188
+ end
189
+ end
190
+
191
+ def log(str)
192
+ if @@debug
193
+ puts str
194
+ end
195
+ end
196
+ end
197
+ end
@@ -0,0 +1,6 @@
1
+ module CartoDB
2
+ class Importer
3
+ VERSION = "0.1.9"
4
+ end
5
+ end
6
+
@@ -0,0 +1,14 @@
1
+ # coding: UTF-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ Bundler.setup
6
+
7
+ require 'roo'
8
+ require 'csv'
9
+ require 'tempfile'
10
+ require 'ostruct'
11
+ require 'cartodb-exporter/exporter'
12
+ require 'core_ext/string'
13
+ require 'core_ext/hash'
14
+ require 'core_ext/blank'
@@ -0,0 +1,336 @@
1
+ # coding: UTF-8
2
+
3
+ module CartoDB
4
+ class Importer
5
+ RESERVED_COLUMN_NAMES = %W{ oid tableoid xmin cmin xmax cmax ctid }
6
+ SUPPORTED_FORMATS = %W{ .csv .shp .ods .xls .xlsx .tif .tiff }
7
+
8
+ class << self
9
+ attr_accessor :debug
10
+ end
11
+ @@debug = true
12
+
13
+ attr_accessor :import_from_file, :suggested_name,
14
+ :ext, :db_configuration, :db_connection
15
+
16
+ attr_reader :table_created, :force_name
17
+
18
+ def initialize(options = {})
19
+ @@debug = options[:debug] if options[:debug]
20
+ @table_created = nil
21
+ @import_from_file = options[:import_from_file]
22
+ raise "import_from_file value can't be nil" if @import_from_file.nil?
23
+
24
+ @db_configuration = options.slice(:database, :username, :password, :host, :port)
25
+ @db_configuration[:port] ||= 5432
26
+ @db_configuration[:host] ||= '127.0.0.1'
27
+ @db_connection = Sequel.connect("postgres://#{@db_configuration[:username]}:#{@db_configuration[:password]}@#{@db_configuration[:host]}:#{@db_configuration[:port]}/#{@db_configuration[:database]}")
28
+
29
+ unless options[:suggested_name].nil? || options[:suggested_name].blank?
30
+ @force_name = true
31
+ @suggested_name = get_valid_name(options[:suggested_name])
32
+ else
33
+ @force_name = false
34
+ end
35
+
36
+ if @import_from_file.is_a?(String)
37
+ if @import_from_file =~ /^http/
38
+ @import_from_file = URI.escape(@import_from_file)
39
+ end
40
+ open(@import_from_file) do |res|
41
+ file_name = File.basename(import_from_file)
42
+ @ext = File.extname(file_name)
43
+ @suggested_name ||= get_valid_name(File.basename(import_from_file, @ext).downcase.sanitize)
44
+ @import_from_file = Tempfile.new([@suggested_name, @ext])
45
+ @import_from_file.write res.read.force_encoding('utf-8')
46
+ @import_from_file.close
47
+ end
48
+ else
49
+ original_filename = if @import_from_file.respond_to?(:original_filename)
50
+ @import_from_file.original_filename
51
+ else
52
+ @import_from_file.path
53
+ end
54
+ @ext = File.extname(original_filename)
55
+ @suggested_name ||= get_valid_name(File.basename(original_filename,@ext).tr('.','_').downcase.sanitize)
56
+ @ext ||= File.extname(original_filename)
57
+ end
58
+ rescue => e
59
+ log $!
60
+ log e.backtrace
61
+ raise e
62
+ end
63
+
64
+ def import!
65
+ path = if @import_from_file.respond_to?(:tempfile)
66
+ @import_from_file.tempfile.path
67
+ else
68
+ @import_from_file.path
69
+ end
70
+ python_bin_path = `which python`.strip
71
+ psql_bin_path = `which psql`.strip
72
+
73
+ entries = []
74
+ if @ext == '.zip'
75
+ log "Importing zip file: #{path}"
76
+ Zip::ZipFile.foreach(path) do |entry|
77
+ name = entry.name.split('/').last
78
+ next if name =~ /^(\.|\_{2})/
79
+ entries << "/tmp/#{name}"
80
+ if SUPPORTED_FORMATS.include?(File.extname(name))
81
+ @ext = File.extname(name)
82
+ @suggested_name = get_valid_name(File.basename(name,@ext).tr('.','_').downcase.sanitize) unless @force_name
83
+ path = "/tmp/#{name}"
84
+ log "Found original @ext file named #{name} in path #{path}"
85
+ end
86
+ if File.file?("/tmp/#{name}")
87
+ FileUtils.rm("/tmp/#{name}")
88
+ end
89
+ entry.extract("/tmp/#{name}")
90
+ end
91
+ end
92
+
93
+ import_type = @ext
94
+ # These types of files are converted to CSV
95
+ if %W{ .xls .xlsx .ods }.include?(@ext)
96
+ new_path = "/tmp/#{@suggested_name}.csv"
97
+ case @ext
98
+ when '.xls'
99
+ Excel.new(path)
100
+ when '.xlsx'
101
+ Excelx.new(path)
102
+ when '.ods'
103
+ Openoffice.new(path)
104
+ else
105
+ raise ArgumentError, "Don't know how to open file #{new_path}"
106
+ end.to_csv(new_path)
107
+ @import_from_file = File.open(new_path,'r')
108
+ @ext = '.csv'
109
+ path = @import_from_file.path
110
+ end
111
+
112
+ if @ext == '.csv'
113
+ ogr2ogr_bin_path = `which ogr2ogr`.strip
114
+ ogr2ogr_command = %Q{#{ogr2ogr_bin_path} -f "PostgreSQL" PG:"host=#{@db_configuration[:host]} port=#{@db_configuration[:port]} user=#{@db_configuration[:username]} dbname=#{@db_configuration[:database]}" #{path} -nln #{@suggested_name}}
115
+
116
+ output = `#{ogr2ogr_command} &> /dev/null`
117
+
118
+ # Check if the file had data, if not rise an error because probably something went wrong
119
+ if @db_connection["SELECT * from #{@suggested_name} LIMIT 1"].first.nil?
120
+ raise "Empty table"
121
+ end
122
+
123
+ # Sanitize column names where needed
124
+ column_names = @db_connection.schema(@suggested_name).map{ |s| s[0].to_s }
125
+ need_sanitizing = column_names.each do |column_name|
126
+ if column_name != column_name.sanitize_column_name
127
+ @db_connection.run("ALTER TABLE #{@suggested_name} RENAME COLUMN \"#{column_name}\" TO #{column_name.sanitize_column_name}")
128
+ end
129
+ end
130
+
131
+ @table_created = true
132
+
133
+ FileUtils.rm_rf(path)
134
+ rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
135
+
136
+ return OpenStruct.new({
137
+ :name => @suggested_name,
138
+ :rows_imported => rows_imported,
139
+ :import_type => import_type
140
+ })
141
+
142
+ end
143
+ if @ext == '.shp'
144
+
145
+ shp2pgsql_bin_path = `which shp2pgsql`.strip
146
+
147
+ host = @db_configuration[:host] ? "-h #{@db_configuration[:host]}" : ""
148
+ port = @db_configuration[:port] ? "-p #{@db_configuration[:port]}" : ""
149
+ @suggested_name = get_valid_name(File.basename(path).tr('.','_').downcase.sanitize) unless @force_name
150
+ random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
151
+
152
+ normalizer_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/shp_normalizer.py", __FILE__)} #{path} #{random_table_name}"
153
+ shp_args_command = `#{normalizer_command}`
154
+ if shp_args_command.strip.blank?
155
+ raise "Error running python shp_normalizer script: #{normalizer_command}"
156
+ end
157
+ full_shp_command = "#{shp2pgsql_bin_path} #{shp_args_command.strip} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
158
+ log "Running shp2pgsql: #{full_shp_command}"
159
+ %x[#{full_shp_command}]
160
+
161
+ @db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
162
+ @db_connection.run("DROP TABLE #{random_table_name}")
163
+ @table_created = true
164
+
165
+ entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
166
+ rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
167
+ @import_from_file.unlink
168
+
169
+ return OpenStruct.new({
170
+ :name => @suggested_name,
171
+ :rows_imported => rows_imported,
172
+ :import_type => import_type
173
+ })
174
+ end
175
+ if %W{ .tif .tiff }.include?(@ext)
176
+ log "Importing raster file: #{path}"
177
+ raster2pgsql_bin_path = `which raster2pgsql.py`.strip
178
+
179
+ host = @db_configuration[:host] ? "-h #{@db_configuration[:host]}" : ""
180
+ port = @db_configuration[:port] ? "-p #{@db_configuration[:port]}" : ""
181
+ @suggested_name = get_valid_name(File.basename(path).tr('.','_').downcase.sanitize) unless @force_name
182
+ random_table_name = "importing_#{Time.now.to_i}_#{@suggested_name}"
183
+
184
+ gdal_command = "#{python_bin_path} -Wignore #{File.expand_path("../../../misc/srid_from_gdal.py", __FILE__)} #{path}"
185
+ rast_srid_command = `#{gdal_command}`.strip
186
+
187
+ log "SRID : #{rast_srid_command}"
188
+
189
+ blocksize = "180x180"
190
+ full_rast_command = "#{raster2pgsql_bin_path} -I -s #{rast_srid_command.strip} -k #{blocksize} -t #{random_table_name} -r #{path} | #{psql_bin_path} #{host} #{port} -U #{@db_configuration[:username]} -w -d #{@db_configuration[:database]}"
191
+ log "Running raster2pgsql: #{raster2pgsql_bin_path} #{full_rast_command}"
192
+ %x[#{full_rast_command}]
193
+
194
+ @db_connection.run("CREATE TABLE #{@suggested_name} AS SELECT * FROM #{random_table_name}")
195
+ @db_connection.run("DROP TABLE #{random_table_name}")
196
+
197
+ entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
198
+ rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
199
+ @import_from_file.unlink
200
+
201
+ @table_created = true
202
+
203
+ entries.each{ |e| FileUtils.rm_rf(e) } if entries.any?
204
+ rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]
205
+ @import_from_file.unlink
206
+
207
+ return OpenStruct.new({
208
+ :name => @suggested_name,
209
+ :rows_imported => rows_imported,
210
+ :import_type => import_type
211
+ })
212
+
213
+ end
214
+ rescue => e
215
+ log "====================="
216
+ log $!
217
+ log e.backtrace
218
+ log "====================="
219
+ if @table_created == nil
220
+ @db_connection.drop_table(@suggested_name)
221
+ end
222
+ raise e
223
+ ensure
224
+ @db_connection.disconnect
225
+ if @import_from_file.is_a?(File)
226
+ File.unlink(@import_from_file) if File.file?(@import_from_file.path)
227
+ elsif @import_from_file.is_a?(Tempfile)
228
+ @import_from_file.unlink
229
+ end
230
+ end
231
+
232
+ private
233
+
234
+ def guess_schema(path)
235
+ @col_separator = ','
236
+ options = {:col_sep => @col_separator}
237
+ schemas = []
238
+ uk_column_counter = 0
239
+
240
+ csv = CSV.open(path, options)
241
+ column_names = csv.gets
242
+
243
+ if column_names.size == 1
244
+ candidate_col_separators = {}
245
+ column_names.first.scan(/([^\w\s])/i).flatten.uniq.each do |candidate|
246
+ candidate_col_separators[candidate] = 0
247
+ end
248
+ candidate_col_separators.keys.each do |candidate|
249
+ csv = CSV.open(path, options.merge(:col_sep => candidate))
250
+ column_names = csv.gets
251
+ candidate_col_separators[candidate] = column_names.size
252
+ end
253
+ @col_separator = candidate_col_separators.sort{|a,b| a[1]<=>b[1]}.last.first
254
+ csv = CSV.open(path, options.merge(:col_sep => @col_separator))
255
+ column_names = csv.gets
256
+ end
257
+
258
+ column_names = column_names.map do |c|
259
+ if c.blank?
260
+ uk_column_counter += 1
261
+ "unknow_name_#{uk_column_counter}"
262
+ else
263
+ c = c.force_encoding('utf-8').encode
264
+ results = c.scan(/^(["`\'])[^"`\']+(["`\'])$/).flatten
265
+ if results.size == 2 && results[0] == results[1]
266
+ @quote = $1
267
+ end
268
+ c.sanitize_column_name
269
+ end
270
+ end
271
+
272
+ while (line = csv.gets)
273
+ line.each_with_index do |field, i|
274
+ next if line[i].blank?
275
+ unless @quote
276
+ results = line[i].scan(/^(["`\'])[^"`\']+(["`\'])$/).flatten
277
+ if results.size == 2 && results[0] == results[1]
278
+ @quote = $1
279
+ end
280
+ end
281
+ if schemas[i].nil?
282
+ if line[i] =~ /^\-?[0-9]+[\.|\,][0-9]+$/
283
+ schemas[i] = "float"
284
+ elsif line[i] =~ /^[0-9]+$/
285
+ schemas[i] = "integer"
286
+ else
287
+ schemas[i] = "varchar"
288
+ end
289
+ else
290
+ case schemas[i]
291
+ when "integer"
292
+ if line[i] !~ /^[0-9]+$/
293
+ if line[i] =~ /^\-?[0-9]+[\.|\,][0-9]+$/
294
+ schemas[i] = "float"
295
+ else
296
+ schemas[i] = "varchar"
297
+ end
298
+ elsif line[i].to_i > 2147483647
299
+ schemas[i] = "float"
300
+ end
301
+ end
302
+ end
303
+ end
304
+ end
305
+
306
+ result = []
307
+ column_names.each_with_index do |column_name, i|
308
+ if RESERVED_COLUMN_NAMES.include?(column_name.to_s)
309
+ column_name = "_#{column_name}"
310
+ end
311
+ result << "#{column_name} #{schemas[i] || "varchar"}"
312
+ end
313
+ return result
314
+ end
315
+
316
+ def get_valid_name(name)
317
+ candidates = @db_connection.tables.map{ |t| t.to_s }.select{ |t| t.match(/^#{name}/) }
318
+ if candidates.any?
319
+ max_candidate = candidates.max
320
+ if max_candidate =~ /(.+)_(\d+)$/
321
+ return $1 + "_#{$2.to_i + 1}"
322
+ else
323
+ return max_candidate + "_2"
324
+ end
325
+ else
326
+ return name
327
+ end
328
+ end
329
+
330
+ def log(str)
331
+ if @@debug
332
+ puts str
333
+ end
334
+ end
335
+ end
336
+ end
@@ -0,0 +1,6 @@
1
+ module CartoDB
2
+ class Importer
3
+ VERSION = "0.2.2"
4
+ end
5
+ end
6
+