cartodb-importer 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/.rvmrc +2 -0
- data/Gemfile +4 -0
- data/README.md +59 -0
- data/Rakefile +2 -0
- data/cartodb-importer.gemspec +33 -0
- data/clubbing.shp +0 -0
- data/lib/cartodb-exporter/exporter.rb +197 -0
- data/lib/cartodb-exporter/version.rb +6 -0
- data/lib/cartodb-exporter.rb +14 -0
- data/lib/cartodb-importer/importer.rb +336 -0
- data/lib/cartodb-importer/version.rb +6 -0
- data/lib/cartodb-importer.rb +14 -0
- data/lib/core_ext/.DS_Store +0 -0
- data/lib/core_ext/blank.rb +3 -0
- data/lib/core_ext/hash.rb +10 -0
- data/lib/core_ext/string.rb +91 -0
- data/misc/csv_normalizer.py +27 -0
- data/misc/dbfUtils.py +113 -0
- data/misc/shp_normalizer.py +58 -0
- data/misc/srid_from_gdal.py +11 -0
- data/spec/export_spec.rb +60 -0
- data/spec/import_spec.rb +252 -0
- data/spec/spec_helper.rb +19 -0
- metadata +184 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
Bundler.setup
|
6
|
+
|
7
|
+
require 'roo'
|
8
|
+
require 'csv'
|
9
|
+
require 'tempfile'
|
10
|
+
require 'ostruct'
|
11
|
+
require 'cartodb-importer/importer'
|
12
|
+
require 'core_ext/string'
|
13
|
+
require 'core_ext/hash'
|
14
|
+
require 'core_ext/blank'
|
Binary file
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
class String
|
4
|
+
|
5
|
+
POSTGRESQL_RESERVED_WORDS = %W{ ALL ANALYSE ANALYZE AND ANY ARRAY AS ASC ASYMMETRIC AUTHORIZATION BETWEEN BINARY BOTH CASE CAST
|
6
|
+
CHECK COLLATE COLUMN CONSTRAINT CREATE CROSS CURRENT_DATE CURRENT_ROLE CURRENT_TIME CURRENT_TIMESTAMP
|
7
|
+
CURRENT_USER DEFAULT DEFERRABLE DESC DISTINCT DO ELSE END EXCEPT FALSE FOR FOREIGN FREEZE FROM FULL
|
8
|
+
GRANT GROUP HAVING ILIKE IN INITIALLY INNER INTERSECT INTO IS ISNULL JOIN LEADING LEFT LIKE LIMIT LOCALTIME
|
9
|
+
LOCALTIMESTAMP NATURAL NEW NOT NOTNULL NULL OFF OFFSET OLD ON ONLY OR ORDER OUTER OVERLAPS PLACING PRIMARY
|
10
|
+
REFERENCES RIGHT SELECT SESSION_USER SIMILAR SOME SYMMETRIC TABLE THEN TO TRAILING TRUE UNION UNIQUE USER
|
11
|
+
USING VERBOSE WHEN WHERE }
|
12
|
+
|
13
|
+
|
14
|
+
def blank?
|
15
|
+
self !~ /\S/
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.random(length=10)
|
19
|
+
('a'..'z').sort_by {rand}[0,length].join
|
20
|
+
end
|
21
|
+
|
22
|
+
def normalize
|
23
|
+
str = self.downcase
|
24
|
+
return '' if str.blank?
|
25
|
+
n = str.force_encoding("UTF-8")
|
26
|
+
n.gsub!(/[àáâãäåāă]/, 'a')
|
27
|
+
n.gsub!(/æ/, 'ae')
|
28
|
+
n.gsub!(/[ďđ]/, 'd')
|
29
|
+
n.gsub!(/[çćčĉċ]/, 'c')
|
30
|
+
n.gsub!(/[èéêëēęěĕė]/, 'e')
|
31
|
+
n.gsub!(/ƒ/, 'f')
|
32
|
+
n.gsub!(/[ĝğġģ]/, 'g')
|
33
|
+
n.gsub!(/[ĥħ]/, 'h')
|
34
|
+
n.gsub!(/[ììíîïīĩĭ]/, 'i')
|
35
|
+
n.gsub!(/[įıijĵ]/, 'j')
|
36
|
+
n.gsub!(/[ķĸ]/, 'k')
|
37
|
+
n.gsub!(/[łľĺļŀ]/, 'l')
|
38
|
+
n.gsub!(/[ñńňņʼnŋ]/, 'n')
|
39
|
+
n.gsub!(/[òóôõöøōőŏŏ]/, 'o')
|
40
|
+
n.gsub!(/œ/, 'oe')
|
41
|
+
n.gsub!(/ą/, 'q')
|
42
|
+
n.gsub!(/[ŕřŗ]/, 'r')
|
43
|
+
n.gsub!(/[śšşŝș]/, 's')
|
44
|
+
n.gsub!(/[ťţŧț]/, 't')
|
45
|
+
n.gsub!(/[ùúûüūůűŭũų]/, 'u')
|
46
|
+
n.gsub!(/ŵ/, 'w')
|
47
|
+
n.gsub!(/[ýÿŷ]/, 'y')
|
48
|
+
n.gsub!(/[žżź]/, 'z')
|
49
|
+
n.gsub!(/[ÀÁÂÃÄÅĀĂ]/i, 'A')
|
50
|
+
n.gsub!(/Æ/i, 'AE')
|
51
|
+
n.gsub!(/[ĎĐ]/i, 'D')
|
52
|
+
n.gsub!(/[ÇĆČĈĊ]/i, 'C')
|
53
|
+
n.gsub!(/[ÈÉÊËĒĘĚĔĖ]/i, 'E')
|
54
|
+
n.gsub!(/Ƒ/i, 'F')
|
55
|
+
n.gsub!(/[ĜĞĠĢ]/i, 'G')
|
56
|
+
n.gsub!(/[ĤĦ]/i, 'H')
|
57
|
+
n.gsub!(/[ÌÌÍÎÏĪĨĬ]/i, 'I')
|
58
|
+
n.gsub!(/[IJĴ]/i, 'J')
|
59
|
+
n.gsub!(/[Ķĸ]/i, 'J')
|
60
|
+
n.gsub!(/[ŁĽĹĻĿ]/i, 'L')
|
61
|
+
n.gsub!(/[ÑŃŇŅʼnŊ]/i, 'M')
|
62
|
+
n.gsub!(/[ÒÓÔÕÖØŌŐŎŎ]/i, 'N')
|
63
|
+
n.gsub!(/Œ/i, 'OE')
|
64
|
+
n.gsub!(/Ą/i, 'Q')
|
65
|
+
n.gsub!(/[ŔŘŖ]/i, 'R')
|
66
|
+
n.gsub!(/[ŚŠŞŜȘ]/i, 'S')
|
67
|
+
n.gsub!(/[ŤŢŦȚ]/i, 'T')
|
68
|
+
n.gsub!(/[ÙÚÛÜŪŮŰŬŨŲ]/i, 'U')
|
69
|
+
n.gsub!(/Ŵ/i, 'W')
|
70
|
+
n.gsub!(/[ÝŸŶ]/i, 'Y')
|
71
|
+
n.gsub!(/[ŽŻŹ]/i, 'Z')
|
72
|
+
n
|
73
|
+
end
|
74
|
+
|
75
|
+
def sanitize
|
76
|
+
return if self.blank?
|
77
|
+
self.gsub(/<[^>]+>/m,'').normalize.downcase.gsub(/&.+?;/,'-').
|
78
|
+
gsub(/[^a-z0-9 _-]/,'-').strip.gsub(/\s+/,'-').gsub(/-+/,'-').
|
79
|
+
gsub(/-/,' ').strip.gsub(/ /,'-').gsub(/-/,'_')
|
80
|
+
end
|
81
|
+
|
82
|
+
def sanitize_column_name
|
83
|
+
temporal_name = self.sanitize
|
84
|
+
if temporal_name !~ /^[a-zA-Z_]/ || POSTGRESQL_RESERVED_WORDS.include?(self.upcase)
|
85
|
+
return '_' + temporal_name
|
86
|
+
else
|
87
|
+
temporal_name
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import sys
|
2
|
+
import brewery.ds as ds
|
3
|
+
import brewery.dq as dq
|
4
|
+
from chardet.universaldetector import UniversalDetector
|
5
|
+
|
6
|
+
filename = sys.argv[1]
|
7
|
+
|
8
|
+
detector = UniversalDetector()
|
9
|
+
for line in file(filename, 'rb'):
|
10
|
+
detector.feed(line)
|
11
|
+
if detector.done: break
|
12
|
+
detector.close()
|
13
|
+
|
14
|
+
src = ds.CSVDataSource(filename, read_header = True, encoding=detector.result["encoding"], delimiter=',' )
|
15
|
+
src.initialize()
|
16
|
+
if len(src.field_names) == 1:
|
17
|
+
src.finalize()
|
18
|
+
src = ds.CSVDataSource(filename, read_header = True, encoding=detector.result["encoding"], delimiter=';' )
|
19
|
+
src.initialize()
|
20
|
+
|
21
|
+
out = ds.CSVDataTarget(sys.stdout, encoding='utf-8')
|
22
|
+
out.fields = ds.fieldlist(src.field_names)
|
23
|
+
out.initialize()
|
24
|
+
for record in src.records():
|
25
|
+
out.append(record)
|
26
|
+
src.finalize()
|
27
|
+
out.finalize()
|
data/misc/dbfUtils.py
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
import struct, datetime, decimal, itertools
|
2
|
+
|
3
|
+
def dbfreader(f):
|
4
|
+
"""Returns an iterator over records in a Xbase DBF file.
|
5
|
+
|
6
|
+
The first row returned contains the field names.
|
7
|
+
The second row contains field specs: (type, size, decimal places).
|
8
|
+
Subsequent rows contain the data records.
|
9
|
+
If a record is marked as deleted, it is skipped.
|
10
|
+
|
11
|
+
File should be opened for binary reads.
|
12
|
+
|
13
|
+
"""
|
14
|
+
# See DBF format spec at:
|
15
|
+
# http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT
|
16
|
+
|
17
|
+
numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))
|
18
|
+
numfields = (lenheader - 33) // 32
|
19
|
+
|
20
|
+
fields = []
|
21
|
+
for fieldno in xrange(numfields):
|
22
|
+
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
|
23
|
+
name = name.replace('\0', '') # eliminate NULs from string
|
24
|
+
fields.append((name, typ, size, deci))
|
25
|
+
yield [field[0] for field in fields]
|
26
|
+
yield [tuple(field[1:]) for field in fields]
|
27
|
+
|
28
|
+
terminator = f.read(1)
|
29
|
+
assert terminator == '\r'
|
30
|
+
|
31
|
+
fields.insert(0, ('DeletionFlag', 'C', 1, 0))
|
32
|
+
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
|
33
|
+
fmtsiz = struct.calcsize(fmt)
|
34
|
+
for i in xrange(numrec):
|
35
|
+
record = struct.unpack(fmt, f.read(fmtsiz))
|
36
|
+
if record[0] != ' ':
|
37
|
+
continue # deleted record
|
38
|
+
result = []
|
39
|
+
for (name, typ, size, deci), value in itertools.izip(fields, record):
|
40
|
+
if name == 'DeletionFlag':
|
41
|
+
continue
|
42
|
+
if typ == "N":
|
43
|
+
value = value.replace('\0', '').lstrip()
|
44
|
+
if value == '':
|
45
|
+
value = 0
|
46
|
+
elif deci:
|
47
|
+
value = decimal.Decimal(value)
|
48
|
+
else:
|
49
|
+
value = value
|
50
|
+
elif typ == 'D':
|
51
|
+
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
|
52
|
+
value = datetime.date(y, m, d)
|
53
|
+
elif typ == 'L':
|
54
|
+
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
|
55
|
+
result.append(value)
|
56
|
+
yield result
|
57
|
+
|
58
|
+
|
59
|
+
def dbfwriter(f, fieldnames, fieldspecs, records):
|
60
|
+
""" Return a string suitable for writing directly to a binary dbf file.
|
61
|
+
|
62
|
+
File f should be open for writing in a binary mode.
|
63
|
+
|
64
|
+
Fieldnames should be no longer than ten characters and not include \x00.
|
65
|
+
Fieldspecs are in the form (type, size, deci) where
|
66
|
+
type is one of:
|
67
|
+
C for ascii character data
|
68
|
+
M for ascii character memo data (real memo fields not supported)
|
69
|
+
D for datetime objects
|
70
|
+
N for ints or decimal objects
|
71
|
+
L for logical values 'T', 'F', or '?'
|
72
|
+
size is the field width
|
73
|
+
deci is the number of decimal places in the provided decimal object
|
74
|
+
Records can be an iterable over the records (sequences of field values).
|
75
|
+
|
76
|
+
"""
|
77
|
+
# header info
|
78
|
+
ver = 3
|
79
|
+
now = datetime.datetime.now()
|
80
|
+
yr, mon, day = now.year-1900, now.month, now.day
|
81
|
+
numrec = len(records)
|
82
|
+
numfields = len(fieldspecs)
|
83
|
+
lenheader = numfields * 32 + 33
|
84
|
+
lenrecord = sum(field[1] for field in fieldspecs) + 1
|
85
|
+
hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord)
|
86
|
+
f.write(hdr)
|
87
|
+
|
88
|
+
# field specs
|
89
|
+
for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs):
|
90
|
+
name = name.ljust(11, '\x00')
|
91
|
+
fld = struct.pack('<11sc4xBB14x', name, typ, size, deci)
|
92
|
+
f.write(fld)
|
93
|
+
|
94
|
+
# terminator
|
95
|
+
f.write('\r')
|
96
|
+
|
97
|
+
# records
|
98
|
+
for record in records:
|
99
|
+
f.write(' ') # deletion flag
|
100
|
+
for (typ, size, deci), value in itertools.izip(fieldspecs, record):
|
101
|
+
if typ == "N":
|
102
|
+
value = str(value).rjust(size, ' ')
|
103
|
+
elif typ == 'D':
|
104
|
+
value = value.strftime('%Y%m%d')
|
105
|
+
elif typ == 'L':
|
106
|
+
value = str(value)[0].upper()
|
107
|
+
else:
|
108
|
+
value = str(value)[:size].ljust(size, ' ')
|
109
|
+
assert len(value) == size
|
110
|
+
f.write(value)
|
111
|
+
|
112
|
+
# End of file
|
113
|
+
f.write('\x1A')
|
@@ -0,0 +1,58 @@
|
|
1
|
+
from chardet.universaldetector import UniversalDetector
|
2
|
+
import os.path
|
3
|
+
import sys
|
4
|
+
import dbfUtils
|
5
|
+
import sys
|
6
|
+
from osgeo import osr
|
7
|
+
from urllib import urlencode
|
8
|
+
from urllib2 import urlopen
|
9
|
+
import json
|
10
|
+
import subprocess
|
11
|
+
|
12
|
+
shp_file = sys.argv[1]
|
13
|
+
name = sys.argv[2]
|
14
|
+
|
15
|
+
dbf_file = shp_file[0:-4] + '.dbf'
|
16
|
+
prj_file = shp_file[0:-4] + '.prj'
|
17
|
+
|
18
|
+
|
19
|
+
#Try detecting the SRID, by default we set to 4326 and hope the best
|
20
|
+
#srid=4326
|
21
|
+
if os.path.isfile(prj_file):
|
22
|
+
prj_filef = open(prj_file, 'r')
|
23
|
+
prj_txt = prj_filef.read()
|
24
|
+
prj_filef.close()
|
25
|
+
srs = osr.SpatialReference()
|
26
|
+
srs.ImportFromESRI([prj_txt])
|
27
|
+
srs.AutoIdentifyEPSG()
|
28
|
+
code = srs.GetAuthorityCode(None)
|
29
|
+
if code:
|
30
|
+
srid = code
|
31
|
+
else:
|
32
|
+
#Ok, no luck, lets try with the OpenGeo service
|
33
|
+
query = urlencode({
|
34
|
+
'exact' : True,
|
35
|
+
'error' : True,
|
36
|
+
'mode' : 'wkt',
|
37
|
+
'terms' : prj_txt})
|
38
|
+
webres = urlopen('http://prj2epsg.org/search.json', query)
|
39
|
+
jres = json.loads(webres.read())
|
40
|
+
if jres['codes']:
|
41
|
+
srid = int(jres['codes'][0]['code'])
|
42
|
+
|
43
|
+
#Try to detect the encoding
|
44
|
+
dbf = open(dbf_file, 'rb')
|
45
|
+
db = dbfUtils.dbfreader(dbf)
|
46
|
+
|
47
|
+
detector = UniversalDetector()
|
48
|
+
for row in db:
|
49
|
+
detector.feed(str(row))
|
50
|
+
if detector.done: break
|
51
|
+
detector.close()
|
52
|
+
dbf.close()
|
53
|
+
|
54
|
+
encoding = detector.result["encoding"]
|
55
|
+
if encoding=="ascii":
|
56
|
+
encoding="LATIN1"
|
57
|
+
|
58
|
+
print " -r %s -s 4326 -e -i -I -g the_geom -W %s %s %s" %(srid,encoding,shp_file,name)
|
data/spec/export_spec.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require File.expand_path("../../lib/cartodb-importer", __FILE__)
|
5
|
+
require File.expand_path("../../lib/cartodb-exporter", __FILE__)
|
6
|
+
|
7
|
+
describe CartoDB::Exporter do
|
8
|
+
describe "#CSV" do
|
9
|
+
it "should export a ZIP archive containing a CSV file to the /tmp directory named with a unique string" do
|
10
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
11
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
12
|
+
:host => 'localhost', :port => 5432
|
13
|
+
run = importer.import!
|
14
|
+
|
15
|
+
exporter = CartoDB::Exporter.new :export_to_file => 'clubbing',
|
16
|
+
:type => 'csv',
|
17
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
18
|
+
:host => 'localhost', :port => 5432
|
19
|
+
|
20
|
+
result = exporter.export!
|
21
|
+
result.name.should == 'clubbing'
|
22
|
+
result.import_type.should == '.csv'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
describe "#KML" do
|
26
|
+
it "should export a KMZ file to the /tmp directory named with a unique string" do
|
27
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
28
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
29
|
+
:host => 'localhost', :port => 5432
|
30
|
+
run = importer.import!
|
31
|
+
|
32
|
+
exporter = CartoDB::Exporter.new :export_to_file => 'clubbing',
|
33
|
+
:type => 'kml',
|
34
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
35
|
+
:host => 'localhost', :port => 5432
|
36
|
+
|
37
|
+
result = exporter.export!
|
38
|
+
result.name.should == 'clubbing'
|
39
|
+
result.import_type.should == '.kml'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
describe "#SHP" do
|
43
|
+
it "should export a SHP file set as a ZIP to the /tmp directory named with a unique string" do
|
44
|
+
|
45
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/EjemploVizzuality.zip", __FILE__),
|
46
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
47
|
+
:host => 'localhost', :port => 5432
|
48
|
+
run = importer.import!
|
49
|
+
|
50
|
+
exporter = CartoDB::Exporter.new :export_to_file => 'vizzuality_shp',
|
51
|
+
:type => 'shp',
|
52
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
53
|
+
:host => 'localhost', :port => 5432
|
54
|
+
|
55
|
+
result = exporter.export!
|
56
|
+
result.name.should == 'vizzuality_shp'
|
57
|
+
result.import_type.should == '.shp'
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/spec/import_spec.rb
ADDED
@@ -0,0 +1,252 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require File.expand_path("../../lib/cartodb-importer", __FILE__)
|
5
|
+
|
6
|
+
describe CartoDB::Importer do
|
7
|
+
it "should raise an error if :import_from_file option is blank" do
|
8
|
+
lambda {
|
9
|
+
CartoDB::Importer.new
|
10
|
+
}.should raise_error("import_from_file value can't be nil")
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should get the name from the options" do
|
14
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
15
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
16
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name'
|
17
|
+
result = importer.import!
|
18
|
+
result.name.should == 'prefered_name'
|
19
|
+
result.rows_imported.should == 1998
|
20
|
+
result.import_type.should == '.csv'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should remove the table from the database if an exception happens" do
|
24
|
+
options = { :import_from_file => File.expand_path("../support/data/empty.csv", __FILE__),
|
25
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
26
|
+
:host => 'localhost', :port => 5432 }
|
27
|
+
importer = CartoDB::Importer.new options
|
28
|
+
lambda {
|
29
|
+
importer.import!
|
30
|
+
}.should raise_error
|
31
|
+
|
32
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
33
|
+
db_connection.tables.should_not include(:empty)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should keep existing tables when trying to import a new one with the same name as an existing one and fails" do
|
37
|
+
options = { :import_from_file => File.expand_path("../support/data/empty.csv", __FILE__),
|
38
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
39
|
+
:host => 'localhost', :port => 5432, :suggested_name => "testing" }
|
40
|
+
|
41
|
+
importer = CartoDB::Importer.new(options.merge(:import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__)))
|
42
|
+
result = importer.import!
|
43
|
+
result.import_type.should == '.csv'
|
44
|
+
|
45
|
+
importer = CartoDB::Importer.new(options)
|
46
|
+
lambda {
|
47
|
+
importer.import!
|
48
|
+
}.should raise_error
|
49
|
+
|
50
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
51
|
+
db_connection.tables.should include(:testing)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should suggest a new table name of the format _n if the previous table exists" do
|
55
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
56
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
57
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name'
|
58
|
+
result = importer.import!
|
59
|
+
result.name.should == 'prefered_name'
|
60
|
+
result.rows_imported.should == 1998
|
61
|
+
result.import_type.should == '.csv'
|
62
|
+
|
63
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
64
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
65
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name'
|
66
|
+
result = importer.import!
|
67
|
+
result.name.should == 'prefered_name_2'
|
68
|
+
result.rows_imported.should == 1998
|
69
|
+
result.import_type.should == '.csv'
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should sanitize column names" do
|
73
|
+
options = { :import_from_file => File.expand_path("../support/data/twitters.csv", __FILE__),
|
74
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
75
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name' }
|
76
|
+
importer = CartoDB::Importer.new(options)
|
77
|
+
result = importer.import!
|
78
|
+
result.name.should == 'prefered_name'
|
79
|
+
result.rows_imported.should == 7
|
80
|
+
result.import_type.should == '.csv'
|
81
|
+
|
82
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
83
|
+
db_connection.tables.should include(:prefered_name)
|
84
|
+
columns = db_connection.schema(:prefered_name).map{|s| s[0].to_s}
|
85
|
+
expected_columns = ["url","login","country","followers_count"]
|
86
|
+
(columns & expected_columns).sort.should == expected_columns.sort
|
87
|
+
end
|
88
|
+
|
89
|
+
pending "should escape reserved column names" do
|
90
|
+
options = { :import_from_file => File.expand_path("../support/data/reserved_columns.csv", __FILE__),
|
91
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
92
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name' }
|
93
|
+
importer = CartoDB::Importer.new(options)
|
94
|
+
result = importer.import!
|
95
|
+
result.name.should == 'prefered_name'
|
96
|
+
result.rows_imported.should == 7
|
97
|
+
result.import_type.should == '.csv'
|
98
|
+
|
99
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
100
|
+
db_connection.tables.should include(:prefered_name)
|
101
|
+
columns = db_connection.schema(:prefered_name).map{|s| s[0].to_s}
|
102
|
+
expected_columns = ["url","login","country","followers_count", "_xmin"]
|
103
|
+
(columns & expected_columns).sort.should == expected_columns.sort
|
104
|
+
end
|
105
|
+
|
106
|
+
describe "#ZIP" do
|
107
|
+
it "should import CSV even from a ZIP file" do
|
108
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/pino.zip", __FILE__),
|
109
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
110
|
+
:host => 'localhost', :port => 5432
|
111
|
+
result = importer.import!
|
112
|
+
result.name.should == 'data'
|
113
|
+
result.rows_imported.should == 4
|
114
|
+
result.import_type.should == '.csv'
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should import CSV even from a ZIP file with the given name" do
|
118
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/pino.zip", __FILE__),
|
119
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
120
|
+
:host => 'localhost', :port => 5432, :suggested_name => "table123"
|
121
|
+
result = importer.import!
|
122
|
+
result.name.should == 'table123'
|
123
|
+
result.rows_imported.should == 4
|
124
|
+
result.import_type.should == '.csv'
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe "#CSV" do
|
129
|
+
it "should import a CSV file in the given database in a table named like the file" do
|
130
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
131
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
132
|
+
:host => 'localhost', :port => 5432
|
133
|
+
result = importer.import!
|
134
|
+
result.name.should == 'clubbing'
|
135
|
+
result.rows_imported.should == 1998
|
136
|
+
result.import_type.should == '.csv'
|
137
|
+
end
|
138
|
+
it "should import Food Security Aid Map_projects.csv" do
|
139
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/Food Security Aid Map_projects.csv", __FILE__),
|
140
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
141
|
+
:host => 'localhost', :port => 5432
|
142
|
+
result = importer.import!
|
143
|
+
result.name.should == 'food_security_aid_map_projects'
|
144
|
+
result.rows_imported.should == 827
|
145
|
+
result.import_type.should == '.csv'
|
146
|
+
end
|
147
|
+
it "should import world_heritage_list.csv" do
|
148
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/world_heritage_list.csv", __FILE__),
|
149
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
150
|
+
:host => 'localhost', :port => 5432
|
151
|
+
result = importer.import!
|
152
|
+
result.name.should == 'world_heritage_list'
|
153
|
+
result.rows_imported.should == 937
|
154
|
+
result.import_type.should == '.csv'
|
155
|
+
end
|
156
|
+
it "should import cp_vizzuality_export.csv" do
|
157
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/cp_vizzuality_export.csv", __FILE__),
|
158
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
159
|
+
:host => 'localhost', :port => 5432
|
160
|
+
result = importer.import!
|
161
|
+
result.name.should == 'cp_vizzuality_export'
|
162
|
+
result.rows_imported.should == 19235
|
163
|
+
result.import_type.should == '.csv'
|
164
|
+
end
|
165
|
+
|
166
|
+
# Not supported by cartodb-importer ~ v0.2.1
|
167
|
+
# File in format different than UTF-8
|
168
|
+
pending "should import estaciones.csv" do
|
169
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/estaciones.csv", __FILE__),
|
170
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
171
|
+
:host => 'localhost', :port => 5432
|
172
|
+
result = importer.import!
|
173
|
+
result.name.should == 'estaciones'
|
174
|
+
result.rows_imported.should == 29
|
175
|
+
result.import_type.should == '.csv'
|
176
|
+
end
|
177
|
+
it "should import estaciones2.csv" do
|
178
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/estaciones2.csv", __FILE__),
|
179
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
180
|
+
:host => 'localhost', :port => 5432
|
181
|
+
result = importer.import!
|
182
|
+
result.name.should == 'estaciones2'
|
183
|
+
result.rows_imported.should == 30
|
184
|
+
result.import_type.should == '.csv'
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
describe "#XLSX" do
|
189
|
+
it "should import a XLSX file in the given database in a table named like the file" do
|
190
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/ngos.xlsx", __FILE__),
|
191
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
192
|
+
:host => 'localhost', :port => 5432
|
193
|
+
result = importer.import!
|
194
|
+
result.name.should == 'ngos'
|
195
|
+
result.rows_imported.should == 76
|
196
|
+
result.import_type.should == '.xlsx'
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
describe "#SHP" do
|
201
|
+
it "should import a SHP file in the given database in a table named like the file" do
|
202
|
+
options = { :import_from_file => File.expand_path("../support/data/EjemploVizzuality.zip", __FILE__),
|
203
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
204
|
+
:host => 'localhost', :port => 5432 }
|
205
|
+
|
206
|
+
importer = CartoDB::Importer.new(options)
|
207
|
+
result = importer.import!
|
208
|
+
result.name.should == 'vizzuality_shp'
|
209
|
+
result.rows_imported.should == 11
|
210
|
+
result.import_type.should == '.shp'
|
211
|
+
|
212
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
213
|
+
db_connection.tables.should include(:vizzuality_shp)
|
214
|
+
columns = db_connection.schema(:vizzuality_shp).map{|s| s[0].to_s}
|
215
|
+
|
216
|
+
expected_columns = ["gid", "subclass", "x", "y", "length", "area", "angle", "name",
|
217
|
+
"pid", "lot_navteq", "version_na", "vitesse_sp", "id", "nombrerest", "tipocomida", "the_geom"]
|
218
|
+
(columns & expected_columns).sort.should == expected_columns.sort
|
219
|
+
end
|
220
|
+
|
221
|
+
it "should import SHP file TM_WORLD_BORDERS_SIMPL-0.3.zip" do
|
222
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/TM_WORLD_BORDERS_SIMPL-0.3.zip", __FILE__),
|
223
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
224
|
+
:host => 'localhost', :port => 5432
|
225
|
+
result = importer.import!
|
226
|
+
result.name.should == 'tm_world_borders_simpl_0_3_shp'
|
227
|
+
result.rows_imported.should == 246
|
228
|
+
result.import_type.should == '.shp'
|
229
|
+
end
|
230
|
+
|
231
|
+
it "should import SHP file TM_WORLD_BORDERS_SIMPL-0.3.zip but set the given name" do
|
232
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/TM_WORLD_BORDERS_SIMPL-0.3.zip", __FILE__),
|
233
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
234
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'borders'
|
235
|
+
result = importer.import!
|
236
|
+
result.name.should == 'borders'
|
237
|
+
result.rows_imported.should == 246
|
238
|
+
result.import_type.should == '.shp'
|
239
|
+
end
|
240
|
+
end
|
241
|
+
describe "#GTIFF" do
|
242
|
+
it "should import a GTIFF file in the given database in a table named like the file" do
|
243
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/GLOBAL_ELEVATION_SIMPLE.zip", __FILE__),
|
244
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
245
|
+
:host => 'localhost', :port => 5432
|
246
|
+
result = importer.import!
|
247
|
+
result.name.should == 'global_elevation_simple_tif'
|
248
|
+
result.rows_imported.should == 1500
|
249
|
+
result.import_type.should == '.tif'
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
Bundler.setup
|
6
|
+
require 'sequel'
|
7
|
+
|
8
|
+
require File.expand_path("../support/database_connection", __FILE__)
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.mock_with :mocha
|
12
|
+
|
13
|
+
config.before(:each) do
|
14
|
+
CartoDB::DatabaseConnection.connection.tables.each do |t|
|
15
|
+
next if %W{ raster_columns raster_overviews geography_columns geometry_columns spatial_ref_sys }.include?(t.to_s)
|
16
|
+
CartoDB::DatabaseConnection.connection.drop_table(t)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|