geo_coder 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. data/Gemfile +12 -0
  2. data/Gemfile.lock +32 -0
  3. data/History.txt +6 -0
  4. data/Makefile +13 -0
  5. data/Manifest.txt +18 -0
  6. data/README.rdoc +197 -0
  7. data/Rakefile +53 -0
  8. data/TODO.txt +8 -0
  9. data/VERSION +1 -0
  10. data/bin/build_indexes +8 -0
  11. data/bin/rebuild_cluster +22 -0
  12. data/bin/rebuild_metaphones +23 -0
  13. data/bin/tiger_import +59 -0
  14. data/demos/demo/app/ext/geocodewrap.rb +84 -0
  15. data/demos/demo/app/views/index.builder +13 -0
  16. data/demos/demo/app/views/index.erb +71 -0
  17. data/demos/demo/config.ru +12 -0
  18. data/demos/demo/config/bootstraps.rb +130 -0
  19. data/demos/demo/config/geoenvironment.rb +25 -0
  20. data/demos/demo/geocoder_helper.rb +12 -0
  21. data/demos/demo/geocom_geocode.rb +10 -0
  22. data/demos/demo/main.rb +3 -0
  23. data/demos/demo/rakefile.rb +17 -0
  24. data/demos/demo/tmp/restart.txt +0 -0
  25. data/demos/simpledemo/views/index.builder +13 -0
  26. data/demos/simpledemo/views/index.erb +69 -0
  27. data/demos/simpledemo/ws.rb +83 -0
  28. data/doc/Makefile +7 -0
  29. data/doc/html4css1.css +279 -0
  30. data/doc/lookup.rst +193 -0
  31. data/doc/parsing.rst +125 -0
  32. data/doc/voidspace.css +147 -0
  33. data/geo_coder.gemspec +172 -0
  34. data/lib/geocoder/us.rb +21 -0
  35. data/lib/geocoder/us/address.rb +290 -0
  36. data/lib/geocoder/us/constants.rb +670 -0
  37. data/lib/geocoder/us/database.rb +745 -0
  38. data/lib/geocoder/us/import.rb +181 -0
  39. data/lib/geocoder/us/import/tiger.rb +13 -0
  40. data/lib/geocoder/us/numbers.rb +58 -0
  41. data/navteq/README +4 -0
  42. data/navteq/convert.sql +37 -0
  43. data/navteq/navteq_import +39 -0
  44. data/navteq/prepare.sql +92 -0
  45. data/sql/cluster.sql +16 -0
  46. data/sql/convert.sql +80 -0
  47. data/sql/create.sql +37 -0
  48. data/sql/index.sql +12 -0
  49. data/sql/place.csv +104944 -0
  50. data/sql/place.sql +104948 -0
  51. data/sql/setup.sql +78 -0
  52. data/src/Makefile +13 -0
  53. data/src/README +14 -0
  54. data/src/liblwgeom/Makefile +75 -0
  55. data/src/liblwgeom/box2d.c +54 -0
  56. data/src/liblwgeom/lex.yy.c +4799 -0
  57. data/src/liblwgeom/liblwgeom.h +1405 -0
  58. data/src/liblwgeom/lwalgorithm.c +946 -0
  59. data/src/liblwgeom/lwalgorithm.h +52 -0
  60. data/src/liblwgeom/lwcircstring.c +759 -0
  61. data/src/liblwgeom/lwcollection.c +541 -0
  62. data/src/liblwgeom/lwcompound.c +118 -0
  63. data/src/liblwgeom/lwcurvepoly.c +86 -0
  64. data/src/liblwgeom/lwgeom.c +886 -0
  65. data/src/liblwgeom/lwgeom_api.c +2201 -0
  66. data/src/liblwgeom/lwgparse.c +1219 -0
  67. data/src/liblwgeom/lwgunparse.c +1054 -0
  68. data/src/liblwgeom/lwline.c +525 -0
  69. data/src/liblwgeom/lwmcurve.c +125 -0
  70. data/src/liblwgeom/lwmline.c +137 -0
  71. data/src/liblwgeom/lwmpoint.c +138 -0
  72. data/src/liblwgeom/lwmpoly.c +141 -0
  73. data/src/liblwgeom/lwmsurface.c +129 -0
  74. data/src/liblwgeom/lwpoint.c +439 -0
  75. data/src/liblwgeom/lwpoly.c +579 -0
  76. data/src/liblwgeom/lwsegmentize.c +1047 -0
  77. data/src/liblwgeom/lwutil.c +369 -0
  78. data/src/liblwgeom/measures.c +861 -0
  79. data/src/liblwgeom/postgis_config.h +93 -0
  80. data/src/liblwgeom/ptarray.c +847 -0
  81. data/src/liblwgeom/vsprintf.c +179 -0
  82. data/src/liblwgeom/wktparse.h +126 -0
  83. data/src/liblwgeom/wktparse.lex +74 -0
  84. data/src/liblwgeom/wktparse.tab.c +2353 -0
  85. data/src/liblwgeom/wktparse.tab.h +145 -0
  86. data/src/liblwgeom/wktparse.y +385 -0
  87. data/src/libsqlite3_geocoder/Makefile +22 -0
  88. data/src/libsqlite3_geocoder/Makefile.nix +15 -0
  89. data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
  90. data/src/libsqlite3_geocoder/extension.c +121 -0
  91. data/src/libsqlite3_geocoder/extension.h +13 -0
  92. data/src/libsqlite3_geocoder/levenshtein.c +42 -0
  93. data/src/libsqlite3_geocoder/metaphon.c +278 -0
  94. data/src/libsqlite3_geocoder/util.c +37 -0
  95. data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
  96. data/src/metaphone/Makefile +7 -0
  97. data/src/metaphone/README +49 -0
  98. data/src/metaphone/extension.c +37 -0
  99. data/src/metaphone/metaphon.c +251 -0
  100. data/src/shp2sqlite/Makefile +37 -0
  101. data/src/shp2sqlite/Makefile.nix +36 -0
  102. data/src/shp2sqlite/Makefile.redhat +35 -0
  103. data/src/shp2sqlite/dbfopen.c +1595 -0
  104. data/src/shp2sqlite/getopt.c +695 -0
  105. data/src/shp2sqlite/getopt.h +127 -0
  106. data/src/shp2sqlite/shapefil.h +500 -0
  107. data/src/shp2sqlite/shp2sqlite.c +1974 -0
  108. data/src/shp2sqlite/shpopen.c +1894 -0
  109. data/tests/address.rb +236 -0
  110. data/tests/benchmark.rb +20 -0
  111. data/tests/constants.rb +57 -0
  112. data/tests/data/address-sample.csv +52 -0
  113. data/tests/data/db-test.csv +57 -0
  114. data/tests/data/locations.csv +4 -0
  115. data/tests/database.rb +137 -0
  116. data/tests/generate.rb +34 -0
  117. data/tests/numbers.rb +46 -0
  118. data/tests/run.rb +11 -0
  119. metadata +237 -0
data/Gemfile ADDED
@@ -0,0 +1,12 @@
1
+ source "http://rubygems.org"
2
+ gem "sqlite3-ruby", "= 1.2.5"
3
+ gem "text"
4
+
5
+ # Add dependencies to develop your gem here.
6
+ # Include everything needed to run rake, tests, features, etc.
7
+ group :development do
8
+ gem "bundler", "~> 1.0.0"
9
+ gem "jeweler", "~> 1.6.4"
10
+ gem "rcov"
11
+ gem "rspec"
12
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,32 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.6.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rake (0.9.2)
11
+ rcov (0.9.10)
12
+ rspec (2.6.0)
13
+ rspec-core (~> 2.6.0)
14
+ rspec-expectations (~> 2.6.0)
15
+ rspec-mocks (~> 2.6.0)
16
+ rspec-core (2.6.4)
17
+ rspec-expectations (2.6.0)
18
+ diff-lcs (~> 1.1.2)
19
+ rspec-mocks (2.6.0)
20
+ sqlite3-ruby (1.2.5)
21
+ text (0.2.0)
22
+
23
+ PLATFORMS
24
+ ruby
25
+
26
+ DEPENDENCIES
27
+ bundler (~> 1.0.0)
28
+ jeweler (~> 1.6.4)
29
+ rcov
30
+ rspec
31
+ sqlite3-ruby (= 1.2.5)
32
+ text
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2009-06-02
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
data/Makefile ADDED
@@ -0,0 +1,13 @@
1
+ all:
2
+ make -C src install
3
+ gem build gemspec
4
+
5
+ test: all
6
+ ruby -Ilib tests/run.rb
7
+
8
+ install: all
9
+ gem install *.gem
10
+
11
+ clean:
12
+ make -C src clean
13
+ rm *.gem
data/Manifest.txt ADDED
@@ -0,0 +1,18 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ lib/geocoder/us/database.rb
6
+ lib/geocoder/us/numbers.rb
7
+ lib/geocoder/us/address.rb
8
+ lib/geocoder/us/constants.rb
9
+ tests/database.rb
10
+ tests/numbers.rb
11
+ tests/generate.rb
12
+ tests/run.rb
13
+ tests/address.rb
14
+ tests/benchmark.rb
15
+ tests/constants.rb
16
+ tests/data/address-sample.csv
17
+ tests/data/locations.csv
18
+ tests/data/db-test.csv
data/README.rdoc ADDED
@@ -0,0 +1,197 @@
1
+ = Geocoder::US
2
+
3
+ Geocoder::US 2.0 is a software package designed to geocode US street
4
+ addresses. Although it is primarily intended for use with the US Census
5
+ Bureau's free TIGER/Line dataset, it uses an abstract US address data model
6
+ that can be employed with other sources of US street address range data.
7
+
8
+ Geocoder::US 2.0 implements a Ruby interface to parse US street addresses, and
9
+ perform fuzzy lookup against an SQLite 3 database. Geocoder::US is designed to
10
+ return the best matches found, with geographic coordinates interpolated from
11
+ the street range dataset. Geocoder::US will fill in missing information, and
12
+ it knows about standard and common non-standard postal abbreviations, ordinal
13
+ versus cardinal numbers, and more.
14
+
15
+ Geocoder::US 2.0 is shipped with a free US ZIP code data set, compiled from
16
+ public domain sources.
17
+
18
+ == Synopsis
19
+
20
+ >> require 'geocoder/us'
21
+ >> db = Geocoder::US::Database.new("/opt/tiger/geocoder.db")
22
+ >> p db.geocode("1600 Pennsylvania Av, Washington DC")
23
+
24
+ [{:pretyp=>"", :street=>"Pennsylvania", :sufdir=>"NW", :zip=>"20502",
25
+ :lon=>-77.037528, :number=>"1600", :fips_county=>"11001", :predir=>"",
26
+ :precision=>:range, :city=>"Washington", :lat=>38.898746, :suftyp=>"Ave",
27
+ :state=>"DC", :prequal=>"", :sufqual=>"", :score=>0.906, :prenum=>""}]
28
+
29
+ == Prerequisites
30
+
31
+ To build Geocoder::US, you will need gcc/g++, make, bash or equivalent, the
32
+ standard *NIX 'unzip' utility, and the SQLite 3 executable and development
33
+ files installed on your system.
34
+
35
+ To use the Ruby interface, you will need the 'Text' gem installed from
36
+ rubyforge. To run the tests, you will also need the 'fastercsv' gem.
37
+
38
+ Additionally, you will need a custom build of the 'sqlite3-ruby' gem that
39
+ supports loading extension modules in SQLite. You can get a patched version of
40
+ this gem from http://github.com/schuyler/sqlite3-ruby/. Until the sqlite3-ruby
41
+ maintainers roll in the relevant patch, you will need *this* version.
42
+
43
+ *NOTE*: If you do not have /usr/include/sqlite3ext.h installed, then your
44
+ sqlite3 binaries are probably not configured to support dynamic extension
45
+ loading. If not, you *must* compile and install SQLite from source, or rebuild
46
+ your system packages. This is not believed to be a problem on Debian/Ubuntu,
47
+ but is known to be a problem with Red Hat/CentOS.
48
+
49
+ *NOTE*: If you *do* have to install from source, make sure that the
50
+ source-installed 'sqlite3' program is in your path before proceeding (and not
51
+ the system-installed version), using `which sqlite3`. Also, be sure that you've
52
+ added your source install prefix (usually /usr/local) to /etc/ld.so.conf (or
53
+ its moral equivalent) and that you've run /sbin/ldconfig.
54
+
55
+ == Thread safety
56
+
57
+ SQLite 3 is not designed for concurrent use of a single database handle across
58
+ multiple threads. Therefore, to prevent segfaults, Geocoder::US::Database
59
+ implements a global mutex that wraps all database access. The use of this mutex
60
+ will ensure stability in multi-threaded applications, but incurs a performance
61
+ penalty. However, since the database is read-only from Ruby, there's no reason
62
+ in principle why multi-threaded apps can't each have their own database handle.
63
+
64
+ To disable the mutex for better performance, you can do the following:
65
+
66
+ * Read the following and make sure you understand them:
67
+ * http://www.sqlite.org/faq.html#q6
68
+ * http://www.sqlite.org/cvstrac/wiki?p=MultiThreading
69
+ * Make sure you have compiled SQLite 3 with thread safety enabled.
70
+ * Instantiate a separate Geocoder::US::Database object for *each* thread
71
+ in your Ruby script, and pass :threadsafe => true to new() to disable mutex
72
+ synchronization.
73
+
74
+ Per the SQLite 3 documentation, do *not* attempt to retain a
75
+ Geocoder::US::Database object across a fork! "Problems will result if you do."
76
+
77
+ == Building Geocoder::US
78
+
79
+ Unpack the source and run 'make'. This will compile the SQLite 3 extension
80
+ needed by Geocoder::US, the Shapefile import utility, and the Geocoder-US
81
+ gem.
82
+
83
+ You can run 'make install' as root to install the gem systemwide.
84
+
85
+ == Generating a Geocoder::US Database
86
+
87
+ Build the package from source as described above. Generating the database
88
+ involves three basic steps:
89
+
90
+ * Import the Shapefile data into an SQLite database.
91
+ * Build the database indexes.
92
+ * Optionally, rebuild the database to cluster indexed rows.
93
+
94
+ We will presume that you are building a Geocoder::US database from TIGER/Line,
95
+ and that you have obtained the complete set of TIGER/Line ZIP files, and put
96
+ the entire tree in /opt/tiger. Please adjust these instructions as needed.
97
+
98
+ A full TIGER/Line database import takes ten hours to run on a normal Amazon
99
+ EC2 instance, and takes up a little over 5 gigabytes after all is said and
100
+ done. You will need to have at least 12 gigabytes of free disk space *after*
101
+ downloading the TIGER/Line dataset, if you are building the full database.
102
+
103
+ === Import TIGER/Line
104
+
105
+ From inside the Geocoder::US source tree, run the following:
106
+
107
+ $ bin/tiger_import /opt/tiger/geocoder.db /opt/tiger
108
+
109
+ This will unpack each TIGER/Line ZIP file to a temporary directory, and
110
+ perform the extract/transform/load sequence to incrementally build the
111
+ database. The process takes about 10-12 hours on a normal Amazon EC2 instance,
112
+ or about 5 CPU hours flat out on a modern PC. Note that not all TIGER/Line
113
+ source files contain address range information, so you will see error messages
114
+ for some counties, but this is normal.
115
+
116
+ If you only want to import specific counties, you can pipe a list of
117
+ TIGER/Line county directories to tiger_import on stdin. For example,
118
+ the following will install just the data for the state of Delaware:
119
+
120
+ $ ls -d /opt/tiger/10_DELAWARE/1* | bin/tiger_import ~/delaware.db
121
+
122
+ The tiger_import process uses a binary utility, shp2sqlite, which is derived
123
+ from shp2pgsql, which ships with PostGIS. The shp2sqlite utility converts
124
+ .shp and .dbf files into SQL suitable for import into SQLite. This SQL
125
+ is then piped into the sqlite3 command line tool, where it is loaded into
126
+ temporary tables, and then a set of static SQL statements (kept in the sql/
127
+ directory) are used to transform this data and import it into the database
128
+ itself.
129
+
130
+ == Build metaphones using Ruby metaphone
131
+
132
+ run bin/rebuild_metaphones /opt/tiger/geocoder.db
133
+
134
+ This creates the metaphones using Ruby's metaphone function and will produce better geocoding results.
135
+
136
+ === Build the indexes
137
+
138
+ After the database import is complete, you will want to construct the database
139
+ indexes:
140
+
141
+ $ bin/build_indexes /opt/tiger/geocoder.db
142
+
143
+ This process takes 25 minutes on an EC2 instance (8 CPU minutes), but it's a
144
+ *lot* faster than building the indexes incrementally during the import
145
+ process. Basically, this process simply feeds SQL statements to the sqlite3
146
+ utility to construct the indexes on the existing database.
147
+
148
+ === Cluster the database tables (optional)
149
+
150
+ As a final optional step, you can cluster the database tables according to
151
+ their indexes, which will make the database smaller, and lookups faster. This
152
+ process will take an hour or two, and may be a micro-optimization.
153
+
154
+ $ bin/rebuild_cluster /opt/tiger/geocoder.db
155
+
156
+ You will need as much free disk space to run rebuild_cluster as the database
157
+ takes up, because the process essentially reconstructs the database in a new
158
+ file, and then it renames the new database over top of the old.
159
+
160
+ == Running the unit tests
161
+
162
+ From within the source tree, you can run the following:
163
+
164
+ $ ruby tests/run.rb
165
+
166
+ This tests the libraries, except for the database routines. If you have a
167
+ database built, you can run the test harness like so:
168
+
169
+ $ ruby tests/run.rb /opt/tiger/geocoder.db
170
+
171
+ The full test suite may take 30 or so seconds to run completely.
172
+
173
+ == License
174
+
175
+ Geocoder::US 2.0 was based on earlier work by Schuyler Erle on
176
+ a Perl module of the same name. You can find it at
177
+ http://search.cpan.org/~sderle/.
178
+
179
+ Geocoder::US 2.0 was written by Schuyler Erle, of Entropy Free LLC,
180
+ with the gracious support of FortiusOne, Inc. Please send bug reports,
181
+ patches, kudos, etc. to patches at geocoder.us.
182
+
183
+ Copyright (c) 2009 FortiusOne, Inc.
184
+
185
+ This program is free software: you can redistribute it and/or modify
186
+ it under the terms of the GNU General Public License as published by
187
+ the Free Software Foundation, either version 3 of the License, or
188
+ (at your option) any later version.
189
+
190
+ This program is distributed in the hope that it will be useful,
191
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
192
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
193
+ GNU General Public License for more details.
194
+
195
+ You should have received a copy of the GNU General Public License
196
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
197
+
data/Rakefile ADDED
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "geo_coder"
18
+ gem.homepage = "http://github.com/kornypoet/geo_coder"
19
+ gem.license = "MIT"
20
+ gem.summary = "Geocoder based upon the Geocommons Geocoder."
21
+ gem.description = "Geocode a text address."
22
+ gem.email = "dempsey.travis@gmail.com"
23
+ gem.authors = ["Travis Dempsey"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ require 'rcov/rcovtask'
36
+ Rcov::RcovTask.new do |test|
37
+ test.libs << 'test'
38
+ test.pattern = 'test/**/test_*.rb'
39
+ test.verbose = true
40
+ test.rcov_opts << '--exclude "gems/*"'
41
+ end
42
+
43
+ task :default => :test
44
+
45
+ require 'rdoc/task'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "geo_coder #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/TODO.txt ADDED
@@ -0,0 +1,8 @@
1
+ 1. Check interpolate measure: scale longitude or not?
2
+ 5. Intersections...
3
+ - import ALL linestrings (even those with without ranges)
4
+ - throw away internal points on lines that don't have ranges
5
+ 7. Documentation (*)
6
+ 8. Make SQLite memory cache size an option to the Database constructor
7
+ 9. Precision and accuracy measure
8
+ 10. Street line set back
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/bin/build_indexes ADDED
@@ -0,0 +1,8 @@
1
+ #!/bin/bash
2
+
3
+ BASE=$(dirname $0)
4
+ PATH=$PATH:$BASE/bin
5
+ SQL="$BASE/../sql"
6
+
7
+ # Just run the SQL that constructs the indexes.
8
+ sqlite3 $1 < ${SQL}/index.sql
@@ -0,0 +1,22 @@
1
+ #!/bin/bash
2
+
3
+ BASE=$(dirname $0)
4
+ PATH=$PATH:$BASE/bin
5
+ SQL="$BASE/../sql"
6
+
7
+ OLD_DB=$1
8
+ DATABASE=${OLD_DB}.$$
9
+
10
+ [ -r $DATABASE ] && echo "$DATABASE already exists." && exit -1
11
+ [ ! -r $OLD_DB ] && echo "Can't read $OLD_DB." && exit -1
12
+
13
+ # Create a shiny new database, attach the old one,
14
+ # extract the data from it, and then index that.
15
+ # Finally, overwrite the old database with the new one.
16
+ ( cat ${SQL}/create.sql && \
17
+ echo "ATTACH DATABASE '${OLD_DB}' AS old;" && \
18
+ cat ${SQL}/cluster.sql && \
19
+ echo "DETACH DATABASE old;" && \
20
+ cat ${SQL}/index.sql && \
21
+ echo "ANALYZE;" ) | sqlite3 $DATABASE \
22
+ && mv $DATABASE $OLD_DB
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'sqlite3'
5
+ require 'text'
6
+
7
+ @db = SQLite3::Database.new("../geocoderdata/geocoder.db")
8
+ @db.create_function("metaphone", 2) do |func, string, len|
9
+ test = string.to_s.gsub(/\W/o, "")
10
+ if test =~ /^(\d+)/o
11
+ mph = $1
12
+ elsif test =~ /^([wy])$/io
13
+ mph = $1
14
+ else
15
+ mph = Text::Metaphone.metaphone test
16
+ end
17
+ func.result = mph[0...len.to_i]
18
+ end
19
+ sql = "update place set city_phone = metaphone(city,5)"
20
+
21
+ @db.execute sql
22
+
23
+ @db.close
data/bin/tiger_import ADDED
@@ -0,0 +1,59 @@
1
+ #!/bin/bash
2
+
3
+ TMP="/tmp/tiger-import.$$"
4
+ SHPS="edges"
5
+ DBFS="featnames addr"
6
+ BASE=$(dirname $0)
7
+ PATH=$PATH:$BASE
8
+ SQL="$BASE/../sql"
9
+ HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so"
10
+ DATABASE=$1
11
+ shift
12
+
13
+ mkdir -p $TMP || exit 1
14
+
15
+ # Initialize the database if it doesn't exist.
16
+ [ ! -r $DATABASE ] && cat ${SQL}/{create,place}.sql | sqlite3 $DATABASE
17
+
18
+ # Marshal the county directories to import.
19
+ #
20
+ # If no directory was given on the command-line, read a list from STDIN.
21
+ if [ x"$1" = x"" ]; then
22
+ cat
23
+ else
24
+ # Otherwise, find all of the contents of each state directory.
25
+ ls -d $1/[0-9]* | while read state; do
26
+ ls -d ${state}/[0-9]*
27
+ done
28
+ fi | while read county; do
29
+ echo "--- $county"
30
+ # Unpack the county files into the temp directory.
31
+ for file in $SHPS $DBFS; do
32
+ ZIP=$(ls ${county}/*_${file}.zip 2>/dev/null)
33
+ SHP=$(ls ${county}/*_${file}.* 2>/dev/null)
34
+ if [ x"$ZIP" != x"" ]; then
35
+ unzip -q $ZIP -d $TMP
36
+ elif [ x"$SHP" != x"" ]; then
37
+ ln -s $SHP $TMP
38
+ fi
39
+ done
40
+ # Generate an SQL stream to feed into the sqlite3 binary.
41
+ # Start by loading the helper libs and initializing the temporary tables
42
+ # that will hold the TIGER data before ETL.
43
+ (echo ".load $HELPER_LIB" && \
44
+ cat ${SQL}/setup.sql && \
45
+ for file in $SHPS; do
46
+ # Convert each Shapefile into SQL statements.
47
+ shp2sqlite -aS ${TMP}/*_${file}.shp tiger_${file}
48
+ done && \
49
+ for file in $DBFS; do
50
+ # Convert each DBF into SQL statements likewise.
51
+ shp2sqlite -an ${TMP}/*_${file}.dbf tiger_${file}
52
+ done && \
53
+ cat ${SQL}/convert.sql) | sqlite3 $DATABASE
54
+ # Finally, do the transform/load phase (convert.sql)
55
+ # and clean up the temporary files.
56
+ rm -f $TMP/*
57
+ done 2>&1 | tee import-$$.log
58
+ rm -rf $TMP
59
+