cartodb-importer 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/.rvmrc +2 -0
- data/Gemfile +4 -0
- data/README.md +59 -0
- data/Rakefile +2 -0
- data/cartodb-importer.gemspec +33 -0
- data/clubbing.shp +0 -0
- data/lib/cartodb-exporter/exporter.rb +197 -0
- data/lib/cartodb-exporter/version.rb +6 -0
- data/lib/cartodb-exporter.rb +14 -0
- data/lib/cartodb-importer/importer.rb +336 -0
- data/lib/cartodb-importer/version.rb +6 -0
- data/lib/cartodb-importer.rb +14 -0
- data/lib/core_ext/.DS_Store +0 -0
- data/lib/core_ext/blank.rb +3 -0
- data/lib/core_ext/hash.rb +10 -0
- data/lib/core_ext/string.rb +91 -0
- data/misc/csv_normalizer.py +27 -0
- data/misc/dbfUtils.py +113 -0
- data/misc/shp_normalizer.py +58 -0
- data/misc/srid_from_gdal.py +11 -0
- data/spec/export_spec.rb +60 -0
- data/spec/import_spec.rb +252 -0
- data/spec/spec_helper.rb +19 -0
- metadata +184 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
Bundler.setup
|
6
|
+
|
7
|
+
require 'roo'
|
8
|
+
require 'csv'
|
9
|
+
require 'tempfile'
|
10
|
+
require 'ostruct'
|
11
|
+
require 'cartodb-importer/importer'
|
12
|
+
require 'core_ext/string'
|
13
|
+
require 'core_ext/hash'
|
14
|
+
require 'core_ext/blank'
|
Binary file
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
class String
|
4
|
+
|
5
|
+
POSTGRESQL_RESERVED_WORDS = %W{ ALL ANALYSE ANALYZE AND ANY ARRAY AS ASC ASYMMETRIC AUTHORIZATION BETWEEN BINARY BOTH CASE CAST
|
6
|
+
CHECK COLLATE COLUMN CONSTRAINT CREATE CROSS CURRENT_DATE CURRENT_ROLE CURRENT_TIME CURRENT_TIMESTAMP
|
7
|
+
CURRENT_USER DEFAULT DEFERRABLE DESC DISTINCT DO ELSE END EXCEPT FALSE FOR FOREIGN FREEZE FROM FULL
|
8
|
+
GRANT GROUP HAVING ILIKE IN INITIALLY INNER INTERSECT INTO IS ISNULL JOIN LEADING LEFT LIKE LIMIT LOCALTIME
|
9
|
+
LOCALTIMESTAMP NATURAL NEW NOT NOTNULL NULL OFF OFFSET OLD ON ONLY OR ORDER OUTER OVERLAPS PLACING PRIMARY
|
10
|
+
REFERENCES RIGHT SELECT SESSION_USER SIMILAR SOME SYMMETRIC TABLE THEN TO TRAILING TRUE UNION UNIQUE USER
|
11
|
+
USING VERBOSE WHEN WHERE }
|
12
|
+
|
13
|
+
|
14
|
+
def blank?
|
15
|
+
self !~ /\S/
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.random(length=10)
|
19
|
+
('a'..'z').sort_by {rand}[0,length].join
|
20
|
+
end
|
21
|
+
|
22
|
+
def normalize
|
23
|
+
str = self.downcase
|
24
|
+
return '' if str.blank?
|
25
|
+
n = str.force_encoding("UTF-8")
|
26
|
+
n.gsub!(/[àáâãäåāă]/, 'a')
|
27
|
+
n.gsub!(/æ/, 'ae')
|
28
|
+
n.gsub!(/[ďđ]/, 'd')
|
29
|
+
n.gsub!(/[çćčĉċ]/, 'c')
|
30
|
+
n.gsub!(/[èéêëēęěĕė]/, 'e')
|
31
|
+
n.gsub!(/ƒ/, 'f')
|
32
|
+
n.gsub!(/[ĝğġģ]/, 'g')
|
33
|
+
n.gsub!(/[ĥħ]/, 'h')
|
34
|
+
n.gsub!(/[ììíîïīĩĭ]/, 'i')
|
35
|
+
n.gsub!(/[įıijĵ]/, 'j')
|
36
|
+
n.gsub!(/[ķĸ]/, 'k')
|
37
|
+
n.gsub!(/[łľĺļŀ]/, 'l')
|
38
|
+
n.gsub!(/[ñńňņʼnŋ]/, 'n')
|
39
|
+
n.gsub!(/[òóôõöøōőŏŏ]/, 'o')
|
40
|
+
n.gsub!(/œ/, 'oe')
|
41
|
+
n.gsub!(/ą/, 'q')
|
42
|
+
n.gsub!(/[ŕřŗ]/, 'r')
|
43
|
+
n.gsub!(/[śšşŝș]/, 's')
|
44
|
+
n.gsub!(/[ťţŧț]/, 't')
|
45
|
+
n.gsub!(/[ùúûüūůűŭũų]/, 'u')
|
46
|
+
n.gsub!(/ŵ/, 'w')
|
47
|
+
n.gsub!(/[ýÿŷ]/, 'y')
|
48
|
+
n.gsub!(/[žżź]/, 'z')
|
49
|
+
n.gsub!(/[ÀÁÂÃÄÅĀĂ]/i, 'A')
|
50
|
+
n.gsub!(/Æ/i, 'AE')
|
51
|
+
n.gsub!(/[ĎĐ]/i, 'D')
|
52
|
+
n.gsub!(/[ÇĆČĈĊ]/i, 'C')
|
53
|
+
n.gsub!(/[ÈÉÊËĒĘĚĔĖ]/i, 'E')
|
54
|
+
n.gsub!(/Ƒ/i, 'F')
|
55
|
+
n.gsub!(/[ĜĞĠĢ]/i, 'G')
|
56
|
+
n.gsub!(/[ĤĦ]/i, 'H')
|
57
|
+
n.gsub!(/[ÌÌÍÎÏĪĨĬ]/i, 'I')
|
58
|
+
n.gsub!(/[IJĴ]/i, 'J')
|
59
|
+
n.gsub!(/[Ķĸ]/i, 'J')
|
60
|
+
n.gsub!(/[ŁĽĹĻĿ]/i, 'L')
|
61
|
+
n.gsub!(/[ÑŃŇŅʼnŊ]/i, 'M')
|
62
|
+
n.gsub!(/[ÒÓÔÕÖØŌŐŎŎ]/i, 'N')
|
63
|
+
n.gsub!(/Œ/i, 'OE')
|
64
|
+
n.gsub!(/Ą/i, 'Q')
|
65
|
+
n.gsub!(/[ŔŘŖ]/i, 'R')
|
66
|
+
n.gsub!(/[ŚŠŞŜȘ]/i, 'S')
|
67
|
+
n.gsub!(/[ŤŢŦȚ]/i, 'T')
|
68
|
+
n.gsub!(/[ÙÚÛÜŪŮŰŬŨŲ]/i, 'U')
|
69
|
+
n.gsub!(/Ŵ/i, 'W')
|
70
|
+
n.gsub!(/[ÝŸŶ]/i, 'Y')
|
71
|
+
n.gsub!(/[ŽŻŹ]/i, 'Z')
|
72
|
+
n
|
73
|
+
end
|
74
|
+
|
75
|
+
def sanitize
|
76
|
+
return if self.blank?
|
77
|
+
self.gsub(/<[^>]+>/m,'').normalize.downcase.gsub(/&.+?;/,'-').
|
78
|
+
gsub(/[^a-z0-9 _-]/,'-').strip.gsub(/\s+/,'-').gsub(/-+/,'-').
|
79
|
+
gsub(/-/,' ').strip.gsub(/ /,'-').gsub(/-/,'_')
|
80
|
+
end
|
81
|
+
|
82
|
+
def sanitize_column_name
|
83
|
+
temporal_name = self.sanitize
|
84
|
+
if temporal_name !~ /^[a-zA-Z_]/ || POSTGRESQL_RESERVED_WORDS.include?(self.upcase)
|
85
|
+
return '_' + temporal_name
|
86
|
+
else
|
87
|
+
temporal_name
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import sys
|
2
|
+
import brewery.ds as ds
|
3
|
+
import brewery.dq as dq
|
4
|
+
from chardet.universaldetector import UniversalDetector
|
5
|
+
|
6
|
+
filename = sys.argv[1]
|
7
|
+
|
8
|
+
detector = UniversalDetector()
|
9
|
+
for line in file(filename, 'rb'):
|
10
|
+
detector.feed(line)
|
11
|
+
if detector.done: break
|
12
|
+
detector.close()
|
13
|
+
|
14
|
+
src = ds.CSVDataSource(filename, read_header = True, encoding=detector.result["encoding"], delimiter=',' )
|
15
|
+
src.initialize()
|
16
|
+
if len(src.field_names) == 1:
|
17
|
+
src.finalize()
|
18
|
+
src = ds.CSVDataSource(filename, read_header = True, encoding=detector.result["encoding"], delimiter=';' )
|
19
|
+
src.initialize()
|
20
|
+
|
21
|
+
out = ds.CSVDataTarget(sys.stdout, encoding='utf-8')
|
22
|
+
out.fields = ds.fieldlist(src.field_names)
|
23
|
+
out.initialize()
|
24
|
+
for record in src.records():
|
25
|
+
out.append(record)
|
26
|
+
src.finalize()
|
27
|
+
out.finalize()
|
data/misc/dbfUtils.py
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
import struct, datetime, decimal, itertools
|
2
|
+
|
3
|
+
def dbfreader(f):
|
4
|
+
"""Returns an iterator over records in a Xbase DBF file.
|
5
|
+
|
6
|
+
The first row returned contains the field names.
|
7
|
+
The second row contains field specs: (type, size, decimal places).
|
8
|
+
Subsequent rows contain the data records.
|
9
|
+
If a record is marked as deleted, it is skipped.
|
10
|
+
|
11
|
+
File should be opened for binary reads.
|
12
|
+
|
13
|
+
"""
|
14
|
+
# See DBF format spec at:
|
15
|
+
# http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT
|
16
|
+
|
17
|
+
numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))
|
18
|
+
numfields = (lenheader - 33) // 32
|
19
|
+
|
20
|
+
fields = []
|
21
|
+
for fieldno in xrange(numfields):
|
22
|
+
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
|
23
|
+
name = name.replace('\0', '') # eliminate NULs from string
|
24
|
+
fields.append((name, typ, size, deci))
|
25
|
+
yield [field[0] for field in fields]
|
26
|
+
yield [tuple(field[1:]) for field in fields]
|
27
|
+
|
28
|
+
terminator = f.read(1)
|
29
|
+
assert terminator == '\r'
|
30
|
+
|
31
|
+
fields.insert(0, ('DeletionFlag', 'C', 1, 0))
|
32
|
+
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
|
33
|
+
fmtsiz = struct.calcsize(fmt)
|
34
|
+
for i in xrange(numrec):
|
35
|
+
record = struct.unpack(fmt, f.read(fmtsiz))
|
36
|
+
if record[0] != ' ':
|
37
|
+
continue # deleted record
|
38
|
+
result = []
|
39
|
+
for (name, typ, size, deci), value in itertools.izip(fields, record):
|
40
|
+
if name == 'DeletionFlag':
|
41
|
+
continue
|
42
|
+
if typ == "N":
|
43
|
+
value = value.replace('\0', '').lstrip()
|
44
|
+
if value == '':
|
45
|
+
value = 0
|
46
|
+
elif deci:
|
47
|
+
value = decimal.Decimal(value)
|
48
|
+
else:
|
49
|
+
value = value
|
50
|
+
elif typ == 'D':
|
51
|
+
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
|
52
|
+
value = datetime.date(y, m, d)
|
53
|
+
elif typ == 'L':
|
54
|
+
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
|
55
|
+
result.append(value)
|
56
|
+
yield result
|
57
|
+
|
58
|
+
|
59
|
+
def dbfwriter(f, fieldnames, fieldspecs, records):
|
60
|
+
""" Return a string suitable for writing directly to a binary dbf file.
|
61
|
+
|
62
|
+
File f should be open for writing in a binary mode.
|
63
|
+
|
64
|
+
Fieldnames should be no longer than ten characters and not include \x00.
|
65
|
+
Fieldspecs are in the form (type, size, deci) where
|
66
|
+
type is one of:
|
67
|
+
C for ascii character data
|
68
|
+
M for ascii character memo data (real memo fields not supported)
|
69
|
+
D for datetime objects
|
70
|
+
N for ints or decimal objects
|
71
|
+
L for logical values 'T', 'F', or '?'
|
72
|
+
size is the field width
|
73
|
+
deci is the number of decimal places in the provided decimal object
|
74
|
+
Records can be an iterable over the records (sequences of field values).
|
75
|
+
|
76
|
+
"""
|
77
|
+
# header info
|
78
|
+
ver = 3
|
79
|
+
now = datetime.datetime.now()
|
80
|
+
yr, mon, day = now.year-1900, now.month, now.day
|
81
|
+
numrec = len(records)
|
82
|
+
numfields = len(fieldspecs)
|
83
|
+
lenheader = numfields * 32 + 33
|
84
|
+
lenrecord = sum(field[1] for field in fieldspecs) + 1
|
85
|
+
hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord)
|
86
|
+
f.write(hdr)
|
87
|
+
|
88
|
+
# field specs
|
89
|
+
for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs):
|
90
|
+
name = name.ljust(11, '\x00')
|
91
|
+
fld = struct.pack('<11sc4xBB14x', name, typ, size, deci)
|
92
|
+
f.write(fld)
|
93
|
+
|
94
|
+
# terminator
|
95
|
+
f.write('\r')
|
96
|
+
|
97
|
+
# records
|
98
|
+
for record in records:
|
99
|
+
f.write(' ') # deletion flag
|
100
|
+
for (typ, size, deci), value in itertools.izip(fieldspecs, record):
|
101
|
+
if typ == "N":
|
102
|
+
value = str(value).rjust(size, ' ')
|
103
|
+
elif typ == 'D':
|
104
|
+
value = value.strftime('%Y%m%d')
|
105
|
+
elif typ == 'L':
|
106
|
+
value = str(value)[0].upper()
|
107
|
+
else:
|
108
|
+
value = str(value)[:size].ljust(size, ' ')
|
109
|
+
assert len(value) == size
|
110
|
+
f.write(value)
|
111
|
+
|
112
|
+
# End of file
|
113
|
+
f.write('\x1A')
|
@@ -0,0 +1,58 @@
|
|
1
|
+
from chardet.universaldetector import UniversalDetector
|
2
|
+
import os.path
|
3
|
+
import sys
|
4
|
+
import dbfUtils
|
5
|
+
import sys
|
6
|
+
from osgeo import osr
|
7
|
+
from urllib import urlencode
|
8
|
+
from urllib2 import urlopen
|
9
|
+
import json
|
10
|
+
import subprocess
|
11
|
+
|
12
|
+
shp_file = sys.argv[1]
|
13
|
+
name = sys.argv[2]
|
14
|
+
|
15
|
+
dbf_file = shp_file[0:-4] + '.dbf'
|
16
|
+
prj_file = shp_file[0:-4] + '.prj'
|
17
|
+
|
18
|
+
|
19
|
+
#Try detecting the SRID, by default we set to 4326 and hope the best
|
20
|
+
#srid=4326
|
21
|
+
if os.path.isfile(prj_file):
|
22
|
+
prj_filef = open(prj_file, 'r')
|
23
|
+
prj_txt = prj_filef.read()
|
24
|
+
prj_filef.close()
|
25
|
+
srs = osr.SpatialReference()
|
26
|
+
srs.ImportFromESRI([prj_txt])
|
27
|
+
srs.AutoIdentifyEPSG()
|
28
|
+
code = srs.GetAuthorityCode(None)
|
29
|
+
if code:
|
30
|
+
srid = code
|
31
|
+
else:
|
32
|
+
#Ok, no luck, lets try with the OpenGeo service
|
33
|
+
query = urlencode({
|
34
|
+
'exact' : True,
|
35
|
+
'error' : True,
|
36
|
+
'mode' : 'wkt',
|
37
|
+
'terms' : prj_txt})
|
38
|
+
webres = urlopen('http://prj2epsg.org/search.json', query)
|
39
|
+
jres = json.loads(webres.read())
|
40
|
+
if jres['codes']:
|
41
|
+
srid = int(jres['codes'][0]['code'])
|
42
|
+
|
43
|
+
#Try to detect the encoding
|
44
|
+
dbf = open(dbf_file, 'rb')
|
45
|
+
db = dbfUtils.dbfreader(dbf)
|
46
|
+
|
47
|
+
detector = UniversalDetector()
|
48
|
+
for row in db:
|
49
|
+
detector.feed(str(row))
|
50
|
+
if detector.done: break
|
51
|
+
detector.close()
|
52
|
+
dbf.close()
|
53
|
+
|
54
|
+
encoding = detector.result["encoding"]
|
55
|
+
if encoding=="ascii":
|
56
|
+
encoding="LATIN1"
|
57
|
+
|
58
|
+
print " -r %s -s 4326 -e -i -I -g the_geom -W %s %s %s" %(srid,encoding,shp_file,name)
|
data/spec/export_spec.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require File.expand_path("../../lib/cartodb-importer", __FILE__)
|
5
|
+
require File.expand_path("../../lib/cartodb-exporter", __FILE__)
|
6
|
+
|
7
|
+
describe CartoDB::Exporter do
|
8
|
+
describe "#CSV" do
|
9
|
+
it "should export a ZIP archive containing a CSV file to the /tmp directory named with a unique string" do
|
10
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
11
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
12
|
+
:host => 'localhost', :port => 5432
|
13
|
+
run = importer.import!
|
14
|
+
|
15
|
+
exporter = CartoDB::Exporter.new :export_to_file => 'clubbing',
|
16
|
+
:type => 'csv',
|
17
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
18
|
+
:host => 'localhost', :port => 5432
|
19
|
+
|
20
|
+
result = exporter.export!
|
21
|
+
result.name.should == 'clubbing'
|
22
|
+
result.import_type.should == '.csv'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
describe "#KML" do
|
26
|
+
it "should export a KMZ file to the /tmp directory named with a unique string" do
|
27
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
28
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
29
|
+
:host => 'localhost', :port => 5432
|
30
|
+
run = importer.import!
|
31
|
+
|
32
|
+
exporter = CartoDB::Exporter.new :export_to_file => 'clubbing',
|
33
|
+
:type => 'kml',
|
34
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
35
|
+
:host => 'localhost', :port => 5432
|
36
|
+
|
37
|
+
result = exporter.export!
|
38
|
+
result.name.should == 'clubbing'
|
39
|
+
result.import_type.should == '.kml'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
describe "#SHP" do
|
43
|
+
it "should export a SHP file set as a ZIP to the /tmp directory named with a unique string" do
|
44
|
+
|
45
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/EjemploVizzuality.zip", __FILE__),
|
46
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
47
|
+
:host => 'localhost', :port => 5432
|
48
|
+
run = importer.import!
|
49
|
+
|
50
|
+
exporter = CartoDB::Exporter.new :export_to_file => 'vizzuality_shp',
|
51
|
+
:type => 'shp',
|
52
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
53
|
+
:host => 'localhost', :port => 5432
|
54
|
+
|
55
|
+
result = exporter.export!
|
56
|
+
result.name.should == 'vizzuality_shp'
|
57
|
+
result.import_type.should == '.shp'
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/spec/import_spec.rb
ADDED
@@ -0,0 +1,252 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
require File.expand_path("../../lib/cartodb-importer", __FILE__)
|
5
|
+
|
6
|
+
describe CartoDB::Importer do
|
7
|
+
it "should raise an error if :import_from_file option is blank" do
|
8
|
+
lambda {
|
9
|
+
CartoDB::Importer.new
|
10
|
+
}.should raise_error("import_from_file value can't be nil")
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should get the name from the options" do
|
14
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
15
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
16
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name'
|
17
|
+
result = importer.import!
|
18
|
+
result.name.should == 'prefered_name'
|
19
|
+
result.rows_imported.should == 1998
|
20
|
+
result.import_type.should == '.csv'
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should remove the table from the database if an exception happens" do
|
24
|
+
options = { :import_from_file => File.expand_path("../support/data/empty.csv", __FILE__),
|
25
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
26
|
+
:host => 'localhost', :port => 5432 }
|
27
|
+
importer = CartoDB::Importer.new options
|
28
|
+
lambda {
|
29
|
+
importer.import!
|
30
|
+
}.should raise_error
|
31
|
+
|
32
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
33
|
+
db_connection.tables.should_not include(:empty)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should keep existing tables when trying to import a new one with the same name as an existing one and fails" do
|
37
|
+
options = { :import_from_file => File.expand_path("../support/data/empty.csv", __FILE__),
|
38
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
39
|
+
:host => 'localhost', :port => 5432, :suggested_name => "testing" }
|
40
|
+
|
41
|
+
importer = CartoDB::Importer.new(options.merge(:import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__)))
|
42
|
+
result = importer.import!
|
43
|
+
result.import_type.should == '.csv'
|
44
|
+
|
45
|
+
importer = CartoDB::Importer.new(options)
|
46
|
+
lambda {
|
47
|
+
importer.import!
|
48
|
+
}.should raise_error
|
49
|
+
|
50
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
51
|
+
db_connection.tables.should include(:testing)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should suggest a new table name of the format _n if the previous table exists" do
|
55
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
56
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
57
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name'
|
58
|
+
result = importer.import!
|
59
|
+
result.name.should == 'prefered_name'
|
60
|
+
result.rows_imported.should == 1998
|
61
|
+
result.import_type.should == '.csv'
|
62
|
+
|
63
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
64
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
65
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name'
|
66
|
+
result = importer.import!
|
67
|
+
result.name.should == 'prefered_name_2'
|
68
|
+
result.rows_imported.should == 1998
|
69
|
+
result.import_type.should == '.csv'
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should sanitize column names" do
|
73
|
+
options = { :import_from_file => File.expand_path("../support/data/twitters.csv", __FILE__),
|
74
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
75
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name' }
|
76
|
+
importer = CartoDB::Importer.new(options)
|
77
|
+
result = importer.import!
|
78
|
+
result.name.should == 'prefered_name'
|
79
|
+
result.rows_imported.should == 7
|
80
|
+
result.import_type.should == '.csv'
|
81
|
+
|
82
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
83
|
+
db_connection.tables.should include(:prefered_name)
|
84
|
+
columns = db_connection.schema(:prefered_name).map{|s| s[0].to_s}
|
85
|
+
expected_columns = ["url","login","country","followers_count"]
|
86
|
+
(columns & expected_columns).sort.should == expected_columns.sort
|
87
|
+
end
|
88
|
+
|
89
|
+
pending "should escape reserved column names" do
|
90
|
+
options = { :import_from_file => File.expand_path("../support/data/reserved_columns.csv", __FILE__),
|
91
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
92
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'prefered_name' }
|
93
|
+
importer = CartoDB::Importer.new(options)
|
94
|
+
result = importer.import!
|
95
|
+
result.name.should == 'prefered_name'
|
96
|
+
result.rows_imported.should == 7
|
97
|
+
result.import_type.should == '.csv'
|
98
|
+
|
99
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
100
|
+
db_connection.tables.should include(:prefered_name)
|
101
|
+
columns = db_connection.schema(:prefered_name).map{|s| s[0].to_s}
|
102
|
+
expected_columns = ["url","login","country","followers_count", "_xmin"]
|
103
|
+
(columns & expected_columns).sort.should == expected_columns.sort
|
104
|
+
end
|
105
|
+
|
106
|
+
describe "#ZIP" do
|
107
|
+
it "should import CSV even from a ZIP file" do
|
108
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/pino.zip", __FILE__),
|
109
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
110
|
+
:host => 'localhost', :port => 5432
|
111
|
+
result = importer.import!
|
112
|
+
result.name.should == 'data'
|
113
|
+
result.rows_imported.should == 4
|
114
|
+
result.import_type.should == '.csv'
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should import CSV even from a ZIP file with the given name" do
|
118
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/pino.zip", __FILE__),
|
119
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
120
|
+
:host => 'localhost', :port => 5432, :suggested_name => "table123"
|
121
|
+
result = importer.import!
|
122
|
+
result.name.should == 'table123'
|
123
|
+
result.rows_imported.should == 4
|
124
|
+
result.import_type.should == '.csv'
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
describe "#CSV" do
|
129
|
+
it "should import a CSV file in the given database in a table named like the file" do
|
130
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/clubbing.csv", __FILE__),
|
131
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
132
|
+
:host => 'localhost', :port => 5432
|
133
|
+
result = importer.import!
|
134
|
+
result.name.should == 'clubbing'
|
135
|
+
result.rows_imported.should == 1998
|
136
|
+
result.import_type.should == '.csv'
|
137
|
+
end
|
138
|
+
it "should import Food Security Aid Map_projects.csv" do
|
139
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/Food Security Aid Map_projects.csv", __FILE__),
|
140
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
141
|
+
:host => 'localhost', :port => 5432
|
142
|
+
result = importer.import!
|
143
|
+
result.name.should == 'food_security_aid_map_projects'
|
144
|
+
result.rows_imported.should == 827
|
145
|
+
result.import_type.should == '.csv'
|
146
|
+
end
|
147
|
+
it "should import world_heritage_list.csv" do
|
148
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/world_heritage_list.csv", __FILE__),
|
149
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
150
|
+
:host => 'localhost', :port => 5432
|
151
|
+
result = importer.import!
|
152
|
+
result.name.should == 'world_heritage_list'
|
153
|
+
result.rows_imported.should == 937
|
154
|
+
result.import_type.should == '.csv'
|
155
|
+
end
|
156
|
+
it "should import cp_vizzuality_export.csv" do
|
157
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/cp_vizzuality_export.csv", __FILE__),
|
158
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
159
|
+
:host => 'localhost', :port => 5432
|
160
|
+
result = importer.import!
|
161
|
+
result.name.should == 'cp_vizzuality_export'
|
162
|
+
result.rows_imported.should == 19235
|
163
|
+
result.import_type.should == '.csv'
|
164
|
+
end
|
165
|
+
|
166
|
+
# Not supported by cartodb-importer ~ v0.2.1
|
167
|
+
# File in format different than UTF-8
|
168
|
+
pending "should import estaciones.csv" do
|
169
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/estaciones.csv", __FILE__),
|
170
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
171
|
+
:host => 'localhost', :port => 5432
|
172
|
+
result = importer.import!
|
173
|
+
result.name.should == 'estaciones'
|
174
|
+
result.rows_imported.should == 29
|
175
|
+
result.import_type.should == '.csv'
|
176
|
+
end
|
177
|
+
it "should import estaciones2.csv" do
|
178
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/estaciones2.csv", __FILE__),
|
179
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
180
|
+
:host => 'localhost', :port => 5432
|
181
|
+
result = importer.import!
|
182
|
+
result.name.should == 'estaciones2'
|
183
|
+
result.rows_imported.should == 30
|
184
|
+
result.import_type.should == '.csv'
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
describe "#XLSX" do
|
189
|
+
it "should import a XLSX file in the given database in a table named like the file" do
|
190
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/ngos.xlsx", __FILE__),
|
191
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
192
|
+
:host => 'localhost', :port => 5432
|
193
|
+
result = importer.import!
|
194
|
+
result.name.should == 'ngos'
|
195
|
+
result.rows_imported.should == 76
|
196
|
+
result.import_type.should == '.xlsx'
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
describe "#SHP" do
|
201
|
+
it "should import a SHP file in the given database in a table named like the file" do
|
202
|
+
options = { :import_from_file => File.expand_path("../support/data/EjemploVizzuality.zip", __FILE__),
|
203
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
204
|
+
:host => 'localhost', :port => 5432 }
|
205
|
+
|
206
|
+
importer = CartoDB::Importer.new(options)
|
207
|
+
result = importer.import!
|
208
|
+
result.name.should == 'vizzuality_shp'
|
209
|
+
result.rows_imported.should == 11
|
210
|
+
result.import_type.should == '.shp'
|
211
|
+
|
212
|
+
db_connection = Sequel.connect("postgres://#{options[:username]}:#{options[:password]}@#{options[:host]}:#{options[:port]}/#{options[:database]}")
|
213
|
+
db_connection.tables.should include(:vizzuality_shp)
|
214
|
+
columns = db_connection.schema(:vizzuality_shp).map{|s| s[0].to_s}
|
215
|
+
|
216
|
+
expected_columns = ["gid", "subclass", "x", "y", "length", "area", "angle", "name",
|
217
|
+
"pid", "lot_navteq", "version_na", "vitesse_sp", "id", "nombrerest", "tipocomida", "the_geom"]
|
218
|
+
(columns & expected_columns).sort.should == expected_columns.sort
|
219
|
+
end
|
220
|
+
|
221
|
+
it "should import SHP file TM_WORLD_BORDERS_SIMPL-0.3.zip" do
|
222
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/TM_WORLD_BORDERS_SIMPL-0.3.zip", __FILE__),
|
223
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
224
|
+
:host => 'localhost', :port => 5432
|
225
|
+
result = importer.import!
|
226
|
+
result.name.should == 'tm_world_borders_simpl_0_3_shp'
|
227
|
+
result.rows_imported.should == 246
|
228
|
+
result.import_type.should == '.shp'
|
229
|
+
end
|
230
|
+
|
231
|
+
it "should import SHP file TM_WORLD_BORDERS_SIMPL-0.3.zip but set the given name" do
|
232
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/TM_WORLD_BORDERS_SIMPL-0.3.zip", __FILE__),
|
233
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
234
|
+
:host => 'localhost', :port => 5432, :suggested_name => 'borders'
|
235
|
+
result = importer.import!
|
236
|
+
result.name.should == 'borders'
|
237
|
+
result.rows_imported.should == 246
|
238
|
+
result.import_type.should == '.shp'
|
239
|
+
end
|
240
|
+
end
|
241
|
+
describe "#GTIFF" do
|
242
|
+
it "should import a GTIFF file in the given database in a table named like the file" do
|
243
|
+
importer = CartoDB::Importer.new :import_from_file => File.expand_path("../support/data/GLOBAL_ELEVATION_SIMPLE.zip", __FILE__),
|
244
|
+
:database => "cartodb_importer_test", :username => 'postgres', :password => '',
|
245
|
+
:host => 'localhost', :port => 5432
|
246
|
+
result = importer.import!
|
247
|
+
result.name.should == 'global_elevation_simple_tif'
|
248
|
+
result.rows_imported.should == 1500
|
249
|
+
result.import_type.should == '.tif'
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# coding: UTF-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
Bundler.setup
|
6
|
+
require 'sequel'
|
7
|
+
|
8
|
+
require File.expand_path("../support/database_connection", __FILE__)
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.mock_with :mocha
|
12
|
+
|
13
|
+
config.before(:each) do
|
14
|
+
CartoDB::DatabaseConnection.connection.tables.each do |t|
|
15
|
+
next if %W{ raster_columns raster_overviews geography_columns geometry_columns spatial_ref_sys }.include?(t.to_s)
|
16
|
+
CartoDB::DatabaseConnection.connection.drop_table(t)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|