geo_coder 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +12 -0
- data/Gemfile.lock +32 -0
- data/History.txt +6 -0
- data/Makefile +13 -0
- data/Manifest.txt +18 -0
- data/README.rdoc +197 -0
- data/Rakefile +53 -0
- data/TODO.txt +8 -0
- data/VERSION +1 -0
- data/bin/build_indexes +8 -0
- data/bin/rebuild_cluster +22 -0
- data/bin/rebuild_metaphones +23 -0
- data/bin/tiger_import +59 -0
- data/demos/demo/app/ext/geocodewrap.rb +84 -0
- data/demos/demo/app/views/index.builder +13 -0
- data/demos/demo/app/views/index.erb +71 -0
- data/demos/demo/config.ru +12 -0
- data/demos/demo/config/bootstraps.rb +130 -0
- data/demos/demo/config/geoenvironment.rb +25 -0
- data/demos/demo/geocoder_helper.rb +12 -0
- data/demos/demo/geocom_geocode.rb +10 -0
- data/demos/demo/main.rb +3 -0
- data/demos/demo/rakefile.rb +17 -0
- data/demos/demo/tmp/restart.txt +0 -0
- data/demos/simpledemo/views/index.builder +13 -0
- data/demos/simpledemo/views/index.erb +69 -0
- data/demos/simpledemo/ws.rb +83 -0
- data/doc/Makefile +7 -0
- data/doc/html4css1.css +279 -0
- data/doc/lookup.rst +193 -0
- data/doc/parsing.rst +125 -0
- data/doc/voidspace.css +147 -0
- data/geo_coder.gemspec +172 -0
- data/lib/geocoder/us.rb +21 -0
- data/lib/geocoder/us/address.rb +290 -0
- data/lib/geocoder/us/constants.rb +670 -0
- data/lib/geocoder/us/database.rb +745 -0
- data/lib/geocoder/us/import.rb +181 -0
- data/lib/geocoder/us/import/tiger.rb +13 -0
- data/lib/geocoder/us/numbers.rb +58 -0
- data/navteq/README +4 -0
- data/navteq/convert.sql +37 -0
- data/navteq/navteq_import +39 -0
- data/navteq/prepare.sql +92 -0
- data/sql/cluster.sql +16 -0
- data/sql/convert.sql +80 -0
- data/sql/create.sql +37 -0
- data/sql/index.sql +12 -0
- data/sql/place.csv +104944 -0
- data/sql/place.sql +104948 -0
- data/sql/setup.sql +78 -0
- data/src/Makefile +13 -0
- data/src/README +14 -0
- data/src/liblwgeom/Makefile +75 -0
- data/src/liblwgeom/box2d.c +54 -0
- data/src/liblwgeom/lex.yy.c +4799 -0
- data/src/liblwgeom/liblwgeom.h +1405 -0
- data/src/liblwgeom/lwalgorithm.c +946 -0
- data/src/liblwgeom/lwalgorithm.h +52 -0
- data/src/liblwgeom/lwcircstring.c +759 -0
- data/src/liblwgeom/lwcollection.c +541 -0
- data/src/liblwgeom/lwcompound.c +118 -0
- data/src/liblwgeom/lwcurvepoly.c +86 -0
- data/src/liblwgeom/lwgeom.c +886 -0
- data/src/liblwgeom/lwgeom_api.c +2201 -0
- data/src/liblwgeom/lwgparse.c +1219 -0
- data/src/liblwgeom/lwgunparse.c +1054 -0
- data/src/liblwgeom/lwline.c +525 -0
- data/src/liblwgeom/lwmcurve.c +125 -0
- data/src/liblwgeom/lwmline.c +137 -0
- data/src/liblwgeom/lwmpoint.c +138 -0
- data/src/liblwgeom/lwmpoly.c +141 -0
- data/src/liblwgeom/lwmsurface.c +129 -0
- data/src/liblwgeom/lwpoint.c +439 -0
- data/src/liblwgeom/lwpoly.c +579 -0
- data/src/liblwgeom/lwsegmentize.c +1047 -0
- data/src/liblwgeom/lwutil.c +369 -0
- data/src/liblwgeom/measures.c +861 -0
- data/src/liblwgeom/postgis_config.h +93 -0
- data/src/liblwgeom/ptarray.c +847 -0
- data/src/liblwgeom/vsprintf.c +179 -0
- data/src/liblwgeom/wktparse.h +126 -0
- data/src/liblwgeom/wktparse.lex +74 -0
- data/src/liblwgeom/wktparse.tab.c +2353 -0
- data/src/liblwgeom/wktparse.tab.h +145 -0
- data/src/liblwgeom/wktparse.y +385 -0
- data/src/libsqlite3_geocoder/Makefile +22 -0
- data/src/libsqlite3_geocoder/Makefile.nix +15 -0
- data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
- data/src/libsqlite3_geocoder/extension.c +121 -0
- data/src/libsqlite3_geocoder/extension.h +13 -0
- data/src/libsqlite3_geocoder/levenshtein.c +42 -0
- data/src/libsqlite3_geocoder/metaphon.c +278 -0
- data/src/libsqlite3_geocoder/util.c +37 -0
- data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
- data/src/metaphone/Makefile +7 -0
- data/src/metaphone/README +49 -0
- data/src/metaphone/extension.c +37 -0
- data/src/metaphone/metaphon.c +251 -0
- data/src/shp2sqlite/Makefile +37 -0
- data/src/shp2sqlite/Makefile.nix +36 -0
- data/src/shp2sqlite/Makefile.redhat +35 -0
- data/src/shp2sqlite/dbfopen.c +1595 -0
- data/src/shp2sqlite/getopt.c +695 -0
- data/src/shp2sqlite/getopt.h +127 -0
- data/src/shp2sqlite/shapefil.h +500 -0
- data/src/shp2sqlite/shp2sqlite.c +1974 -0
- data/src/shp2sqlite/shpopen.c +1894 -0
- data/tests/address.rb +236 -0
- data/tests/benchmark.rb +20 -0
- data/tests/constants.rb +57 -0
- data/tests/data/address-sample.csv +52 -0
- data/tests/data/db-test.csv +57 -0
- data/tests/data/locations.csv +4 -0
- data/tests/database.rb +137 -0
- data/tests/generate.rb +34 -0
- data/tests/numbers.rb +46 -0
- data/tests/run.rb +11 -0
- metadata +237 -0
data/Gemfile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
gem "sqlite3-ruby", "= 1.2.5"
|
3
|
+
gem "text"
|
4
|
+
|
5
|
+
# Add dependencies to develop your gem here.
|
6
|
+
# Include everything needed to run rake, tests, features, etc.
|
7
|
+
group :development do
|
8
|
+
gem "bundler", "~> 1.0.0"
|
9
|
+
gem "jeweler", "~> 1.6.4"
|
10
|
+
gem "rcov"
|
11
|
+
gem "rspec"
|
12
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
diff-lcs (1.1.3)
|
5
|
+
git (1.2.5)
|
6
|
+
jeweler (1.6.4)
|
7
|
+
bundler (~> 1.0)
|
8
|
+
git (>= 1.2.5)
|
9
|
+
rake
|
10
|
+
rake (0.9.2)
|
11
|
+
rcov (0.9.10)
|
12
|
+
rspec (2.6.0)
|
13
|
+
rspec-core (~> 2.6.0)
|
14
|
+
rspec-expectations (~> 2.6.0)
|
15
|
+
rspec-mocks (~> 2.6.0)
|
16
|
+
rspec-core (2.6.4)
|
17
|
+
rspec-expectations (2.6.0)
|
18
|
+
diff-lcs (~> 1.1.2)
|
19
|
+
rspec-mocks (2.6.0)
|
20
|
+
sqlite3-ruby (1.2.5)
|
21
|
+
text (0.2.0)
|
22
|
+
|
23
|
+
PLATFORMS
|
24
|
+
ruby
|
25
|
+
|
26
|
+
DEPENDENCIES
|
27
|
+
bundler (~> 1.0.0)
|
28
|
+
jeweler (~> 1.6.4)
|
29
|
+
rcov
|
30
|
+
rspec
|
31
|
+
sqlite3-ruby (= 1.2.5)
|
32
|
+
text
|
data/History.txt
ADDED
data/Makefile
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.rdoc
|
4
|
+
Rakefile
|
5
|
+
lib/geocoder/us/database.rb
|
6
|
+
lib/geocoder/us/numbers.rb
|
7
|
+
lib/geocoder/us/address.rb
|
8
|
+
lib/geocoder/us/constants.rb
|
9
|
+
tests/database.rb
|
10
|
+
tests/numbers.rb
|
11
|
+
tests/generate.rb
|
12
|
+
tests/run.rb
|
13
|
+
tests/address.rb
|
14
|
+
tests/benchmark.rb
|
15
|
+
tests/constants.rb
|
16
|
+
tests/data/address-sample.csv
|
17
|
+
tests/data/locations.csv
|
18
|
+
tests/data/db-test.csv
|
data/README.rdoc
ADDED
@@ -0,0 +1,197 @@
|
|
1
|
+
= Geocoder::US
|
2
|
+
|
3
|
+
Geocoder::US 2.0 is a software package designed to geocode US street
|
4
|
+
addresses. Although it is primarily intended for use with the US Census
|
5
|
+
Bureau's free TIGER/Line dataset, it uses an abstract US address data model
|
6
|
+
that can be employed with other sources of US street address range data.
|
7
|
+
|
8
|
+
Geocoder::US 2.0 implements a Ruby interface to parse US street addresses, and
|
9
|
+
perform fuzzy lookup against an SQLite 3 database. Geocoder::US is designed to
|
10
|
+
return the best matches found, with geographic coordinates interpolated from
|
11
|
+
the street range dataset. Geocoder::US will fill in missing information, and
|
12
|
+
it knows about standard and common non-standard postal abbreviations, ordinal
|
13
|
+
versus cardinal numbers, and more.
|
14
|
+
|
15
|
+
Geocoder::US 2.0 is shipped with a free US ZIP code data set, compiled from
|
16
|
+
public domain sources.
|
17
|
+
|
18
|
+
== Synopsis
|
19
|
+
|
20
|
+
>> require 'geocoder/us'
|
21
|
+
>> db = Geocoder::US::Database.new("/opt/tiger/geocoder.db")
|
22
|
+
>> p db.geocode("1600 Pennsylvania Av, Washington DC")
|
23
|
+
|
24
|
+
[{:pretyp=>"", :street=>"Pennsylvania", :sufdir=>"NW", :zip=>"20502",
|
25
|
+
:lon=>-77.037528, :number=>"1600", :fips_county=>"11001", :predir=>"",
|
26
|
+
:precision=>:range, :city=>"Washington", :lat=>38.898746, :suftyp=>"Ave",
|
27
|
+
:state=>"DC", :prequal=>"", :sufqual=>"", :score=>0.906, :prenum=>""}]
|
28
|
+
|
29
|
+
== Prerequisites
|
30
|
+
|
31
|
+
To build Geocoder::US, you will need gcc/g++, make, bash or equivalent, the
|
32
|
+
standard *NIX 'unzip' utility, and the SQLite 3 executable and development
|
33
|
+
files installed on your system.
|
34
|
+
|
35
|
+
To use the Ruby interface, you will need the 'Text' gem installed from
|
36
|
+
rubyforge. To run the tests, you will also need the 'fastercsv' gem.
|
37
|
+
|
38
|
+
Additionally, you will need a custom build of the 'sqlite3-ruby' gem that
|
39
|
+
supports loading extension modules in SQLite. You can get a patched version of
|
40
|
+
this gem from http://github.com/schuyler/sqlite3-ruby/. Until the sqlite3-ruby
|
41
|
+
maintainers roll in the relevant patch, you will need *this* version.
|
42
|
+
|
43
|
+
*NOTE*: If you do not have /usr/include/sqlite3ext.h installed, then your
|
44
|
+
sqlite3 binaries are probably not configured to support dynamic extension
|
45
|
+
loading. If not, you *must* compile and install SQLite from source, or rebuild
|
46
|
+
your system packages. This is not believed to be a problem on Debian/Ubuntu,
|
47
|
+
but is known to be a problem with Red Hat/CentOS.
|
48
|
+
|
49
|
+
*NOTE*: If you *do* have to install from source, make sure that the
|
50
|
+
source-installed 'sqlite3' program is in your path before proceeding (and not
|
51
|
+
the system-installed version), using `which sqlite3`. Also, be sure that you've
|
52
|
+
added your source install prefix (usually /usr/local) to /etc/ld.so.conf (or
|
53
|
+
its moral equivalent) and that you've run /sbin/ldconfig.
|
54
|
+
|
55
|
+
== Thread safety
|
56
|
+
|
57
|
+
SQLite 3 is not designed for concurrent use of a single database handle across
|
58
|
+
multiple threads. Therefore, to prevent segfaults, Geocoder::US::Database
|
59
|
+
implements a global mutex that wraps all database access. The use of this mutex
|
60
|
+
will ensure stability in multi-threaded applications, but incurs a performance
|
61
|
+
penalty. However, since the database is read-only from Ruby, there's no reason
|
62
|
+
in principle why multi-threaded apps can't each have their own database handle.
|
63
|
+
|
64
|
+
To disable the mutex for better performance, you can do the following:
|
65
|
+
|
66
|
+
* Read the following and make sure you understand them:
|
67
|
+
* http://www.sqlite.org/faq.html#q6
|
68
|
+
* http://www.sqlite.org/cvstrac/wiki?p=MultiThreading
|
69
|
+
* Make sure you have compiled SQLite 3 with thread safety enabled.
|
70
|
+
* Instantiate a separate Geocoder::US::Database object for *each* thread
|
71
|
+
in your Ruby script, and pass :threadsafe => true to new() to disable mutex
|
72
|
+
synchronization.
|
73
|
+
|
74
|
+
Per the SQLite 3 documentation, do *not* attempt to retain a
|
75
|
+
Geocoder::US::Database object across a fork! "Problems will result if you do."
|
76
|
+
|
77
|
+
== Building Geocoder::US
|
78
|
+
|
79
|
+
Unpack the source and run 'make'. This will compile the SQLite 3 extension
|
80
|
+
needed by Geocoder::US, the Shapefile import utility, and the Geocoder-US
|
81
|
+
gem.
|
82
|
+
|
83
|
+
You can run 'make install' as root to install the gem systemwide.
|
84
|
+
|
85
|
+
== Generating a Geocoder::US Database
|
86
|
+
|
87
|
+
Build the package from source as described above. Generating the database
|
88
|
+
involves three basic steps:
|
89
|
+
|
90
|
+
* Import the Shapefile data into an SQLite database.
|
91
|
+
* Build the database indexes.
|
92
|
+
* Optionally, rebuild the database to cluster indexed rows.
|
93
|
+
|
94
|
+
We will presume that you are building a Geocoder::US database from TIGER/Line,
|
95
|
+
and that you have obtained the complete set of TIGER/Line ZIP files, and put
|
96
|
+
the entire tree in /opt/tiger. Please adjust these instructions as needed.
|
97
|
+
|
98
|
+
A full TIGER/Line database import takes ten hours to run on a normal Amazon
|
99
|
+
EC2 instance, and takes up a little over 5 gigabytes after all is said and
|
100
|
+
done. You will need to have at least 12 gigabytes of free disk space *after*
|
101
|
+
downloading the TIGER/Line dataset, if you are building the full database.
|
102
|
+
|
103
|
+
=== Import TIGER/Line
|
104
|
+
|
105
|
+
From inside the Geocoder::US source tree, run the following:
|
106
|
+
|
107
|
+
$ bin/tiger_import /opt/tiger/geocoder.db /opt/tiger
|
108
|
+
|
109
|
+
This will unpack each TIGER/Line ZIP file to a temporary directory, and
|
110
|
+
perform the extract/transform/load sequence to incrementally build the
|
111
|
+
database. The process takes about 10-12 hours on a normal Amazon EC2 instance,
|
112
|
+
or about 5 CPU hours flat out on a modern PC. Note that not all TIGER/Line
|
113
|
+
source files contain address range information, so you will see error messages
|
114
|
+
for some counties, but this is normal.
|
115
|
+
|
116
|
+
If you only want to import specific counties, you can pipe a list of
|
117
|
+
TIGER/Line county directories to tiger_import on stdin. For example,
|
118
|
+
the following will install just the data for the state of Delaware:
|
119
|
+
|
120
|
+
$ ls -d /opt/tiger/10_DELAWARE/1* | bin/tiger_import ~/delaware.db
|
121
|
+
|
122
|
+
The tiger_import process uses a binary utility, shp2sqlite, which is derived
|
123
|
+
from shp2pgsql, which ships with PostGIS. The shp2sqlite utility converts
|
124
|
+
.shp and .dbf files into SQL suitable for import into SQLite. This SQL
|
125
|
+
is then piped into the sqlite3 command line tool, where it is loaded into
|
126
|
+
temporary tables, and then a set of static SQL statements (kept in the sql/
|
127
|
+
directory) are used to transform this data and import it into the database
|
128
|
+
itself.
|
129
|
+
|
130
|
+
== Build metaphones using Ruby metaphone
|
131
|
+
|
132
|
+
run bin/rebuild_metaphones /opt/tiger/geocoder.db
|
133
|
+
|
134
|
+
This creates the metaphones using Ruby's metaphone function and will produce better geocoding results.
|
135
|
+
|
136
|
+
=== Build the indexes
|
137
|
+
|
138
|
+
After the database import is complete, you will want to construct the database
|
139
|
+
indexes:
|
140
|
+
|
141
|
+
$ bin/build_indexes /opt/tiger/geocoder.db
|
142
|
+
|
143
|
+
This process takes 25 minutes on an EC2 instance (8 CPU minutes), but it's a
|
144
|
+
*lot* faster than building the indexes incrementally during the import
|
145
|
+
process. Basically, this process simply feeds SQL statements to the sqlite3
|
146
|
+
utility to construct the indexes on the existing database.
|
147
|
+
|
148
|
+
=== Cluster the database tables (optional)
|
149
|
+
|
150
|
+
As a final optional step, you can cluster the database tables according to
|
151
|
+
their indexes, which will make the database smaller, and lookups faster. This
|
152
|
+
process will take an hour or two, and may be a micro-optimization.
|
153
|
+
|
154
|
+
$ bin/rebuild_cluster /opt/tiger/geocoder.db
|
155
|
+
|
156
|
+
You will need as much free disk space to run rebuild_cluster as the database
|
157
|
+
takes up, because the process essentially reconstructs the database in a new
|
158
|
+
file, and then it renames the new database over top of the old.
|
159
|
+
|
160
|
+
== Running the unit tests
|
161
|
+
|
162
|
+
From within the source tree, you can run the following:
|
163
|
+
|
164
|
+
$ ruby tests/run.rb
|
165
|
+
|
166
|
+
This tests the libraries, except for the database routines. If you have a
|
167
|
+
database built, you can run the test harness like so:
|
168
|
+
|
169
|
+
$ ruby tests/run.rb /opt/tiger/geocoder.db
|
170
|
+
|
171
|
+
The full test suite may take 30 or so seconds to run completely.
|
172
|
+
|
173
|
+
== License
|
174
|
+
|
175
|
+
Geocoder::US 2.0 was based on earlier work by Schuyler Erle on
|
176
|
+
a Perl module of the same name. You can find it at
|
177
|
+
http://search.cpan.org/~sderle/.
|
178
|
+
|
179
|
+
Geocoder::US 2.0 was written by Schuyler Erle, of Entropy Free LLC,
|
180
|
+
with the gracious support of FortiusOne, Inc. Please send bug reports,
|
181
|
+
patches, kudos, etc. to patches at geocoder.us.
|
182
|
+
|
183
|
+
Copyright (c) 2009 FortiusOne, Inc.
|
184
|
+
|
185
|
+
This program is free software: you can redistribute it and/or modify
|
186
|
+
it under the terms of the GNU General Public License as published by
|
187
|
+
the Free Software Foundation, either version 3 of the License, or
|
188
|
+
(at your option) any later version.
|
189
|
+
|
190
|
+
This program is distributed in the hope that it will be useful,
|
191
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
192
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
193
|
+
GNU General Public License for more details.
|
194
|
+
|
195
|
+
You should have received a copy of the GNU General Public License
|
196
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
197
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "geo_coder"
|
18
|
+
gem.homepage = "http://github.com/kornypoet/geo_coder"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = "Geocoder based upon the Geocommons Geocoder."
|
21
|
+
gem.description = "Geocode a text address."
|
22
|
+
gem.email = "dempsey.travis@gmail.com"
|
23
|
+
gem.authors = ["Travis Dempsey"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'rcov/rcovtask'
|
36
|
+
Rcov::RcovTask.new do |test|
|
37
|
+
test.libs << 'test'
|
38
|
+
test.pattern = 'test/**/test_*.rb'
|
39
|
+
test.verbose = true
|
40
|
+
test.rcov_opts << '--exclude "gems/*"'
|
41
|
+
end
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rdoc/task'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "geo_coder #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/TODO.txt
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
1. Check interpolate measure: scale longitude or not?
|
2
|
+
5. Intersections...
|
3
|
+
- import ALL linestrings (even those with without ranges)
|
4
|
+
- throw away internal points on lines that don't have ranges
|
5
|
+
7. Documentation (*)
|
6
|
+
8. Make SQLite memory cache size an option to the Database constructor
|
7
|
+
9. Precision and accuracy measure
|
8
|
+
10. Street line set back
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/bin/build_indexes
ADDED
data/bin/rebuild_cluster
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
BASE=$(dirname $0)
|
4
|
+
PATH=$PATH:$BASE/bin
|
5
|
+
SQL="$BASE/../sql"
|
6
|
+
|
7
|
+
OLD_DB=$1
|
8
|
+
DATABASE=${OLD_DB}.$$
|
9
|
+
|
10
|
+
[ -r $DATABASE ] && echo "$DATABASE already exists." && exit -1
|
11
|
+
[ ! -r $OLD_DB ] && echo "Can't read $OLD_DB." && exit -1
|
12
|
+
|
13
|
+
# Create a shiny new database, attach the old one,
|
14
|
+
# extract the data from it, and then index that.
|
15
|
+
# Finally, overwrite the old database with the new one.
|
16
|
+
( cat ${SQL}/create.sql && \
|
17
|
+
echo "ATTACH DATABASE '${OLD_DB}' AS old;" && \
|
18
|
+
cat ${SQL}/cluster.sql && \
|
19
|
+
echo "DETACH DATABASE old;" && \
|
20
|
+
cat ${SQL}/index.sql && \
|
21
|
+
echo "ANALYZE;" ) | sqlite3 $DATABASE \
|
22
|
+
&& mv $DATABASE $OLD_DB
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'sqlite3'
|
5
|
+
require 'text'
|
6
|
+
|
7
|
+
@db = SQLite3::Database.new("../geocoderdata/geocoder.db")
|
8
|
+
@db.create_function("metaphone", 2) do |func, string, len|
|
9
|
+
test = string.to_s.gsub(/\W/o, "")
|
10
|
+
if test =~ /^(\d+)/o
|
11
|
+
mph = $1
|
12
|
+
elsif test =~ /^([wy])$/io
|
13
|
+
mph = $1
|
14
|
+
else
|
15
|
+
mph = Text::Metaphone.metaphone test
|
16
|
+
end
|
17
|
+
func.result = mph[0...len.to_i]
|
18
|
+
end
|
19
|
+
sql = "update place set city_phone = metaphone(city,5)"
|
20
|
+
|
21
|
+
@db.execute sql
|
22
|
+
|
23
|
+
@db.close
|
data/bin/tiger_import
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
TMP="/tmp/tiger-import.$$"
|
4
|
+
SHPS="edges"
|
5
|
+
DBFS="featnames addr"
|
6
|
+
BASE=$(dirname $0)
|
7
|
+
PATH=$PATH:$BASE
|
8
|
+
SQL="$BASE/../sql"
|
9
|
+
HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so"
|
10
|
+
DATABASE=$1
|
11
|
+
shift
|
12
|
+
|
13
|
+
mkdir -p $TMP || exit 1
|
14
|
+
|
15
|
+
# Initialize the database if it doesn't exist.
|
16
|
+
[ ! -r $DATABASE ] && cat ${SQL}/{create,place}.sql | sqlite3 $DATABASE
|
17
|
+
|
18
|
+
# Marshal the county directories to import.
|
19
|
+
#
|
20
|
+
# If no directory was given on the command-line, read a list from STDIN.
|
21
|
+
if [ x"$1" = x"" ]; then
|
22
|
+
cat
|
23
|
+
else
|
24
|
+
# Otherwise, find all of the contents of each state directory.
|
25
|
+
ls -d $1/[0-9]* | while read state; do
|
26
|
+
ls -d ${state}/[0-9]*
|
27
|
+
done
|
28
|
+
fi | while read county; do
|
29
|
+
echo "--- $county"
|
30
|
+
# Unpack the county files into the temp directory.
|
31
|
+
for file in $SHPS $DBFS; do
|
32
|
+
ZIP=$(ls ${county}/*_${file}.zip 2>/dev/null)
|
33
|
+
SHP=$(ls ${county}/*_${file}.* 2>/dev/null)
|
34
|
+
if [ x"$ZIP" != x"" ]; then
|
35
|
+
unzip -q $ZIP -d $TMP
|
36
|
+
elif [ x"$SHP" != x"" ]; then
|
37
|
+
ln -s $SHP $TMP
|
38
|
+
fi
|
39
|
+
done
|
40
|
+
# Generate an SQL stream to feed into the sqlite3 binary.
|
41
|
+
# Start by loading the helper libs and initializing the temporary tables
|
42
|
+
# that will hold the TIGER data before ETL.
|
43
|
+
(echo ".load $HELPER_LIB" && \
|
44
|
+
cat ${SQL}/setup.sql && \
|
45
|
+
for file in $SHPS; do
|
46
|
+
# Convert each Shapefile into SQL statements.
|
47
|
+
shp2sqlite -aS ${TMP}/*_${file}.shp tiger_${file}
|
48
|
+
done && \
|
49
|
+
for file in $DBFS; do
|
50
|
+
# Convert each DBF into SQL statements likewise.
|
51
|
+
shp2sqlite -an ${TMP}/*_${file}.dbf tiger_${file}
|
52
|
+
done && \
|
53
|
+
cat ${SQL}/convert.sql) | sqlite3 $DATABASE
|
54
|
+
# Finally, do the transform/load phase (convert.sql)
|
55
|
+
# and clean up the temporary files.
|
56
|
+
rm -f $TMP/*
|
57
|
+
done 2>&1 | tee import-$$.log
|
58
|
+
rm -rf $TMP
|
59
|
+
|