geo_coder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. data/Gemfile +12 -0
  2. data/Gemfile.lock +32 -0
  3. data/History.txt +6 -0
  4. data/Makefile +13 -0
  5. data/Manifest.txt +18 -0
  6. data/README.rdoc +197 -0
  7. data/Rakefile +53 -0
  8. data/TODO.txt +8 -0
  9. data/VERSION +1 -0
  10. data/bin/build_indexes +8 -0
  11. data/bin/rebuild_cluster +22 -0
  12. data/bin/rebuild_metaphones +23 -0
  13. data/bin/tiger_import +59 -0
  14. data/demos/demo/app/ext/geocodewrap.rb +84 -0
  15. data/demos/demo/app/views/index.builder +13 -0
  16. data/demos/demo/app/views/index.erb +71 -0
  17. data/demos/demo/config.ru +12 -0
  18. data/demos/demo/config/bootstraps.rb +130 -0
  19. data/demos/demo/config/geoenvironment.rb +25 -0
  20. data/demos/demo/geocoder_helper.rb +12 -0
  21. data/demos/demo/geocom_geocode.rb +10 -0
  22. data/demos/demo/main.rb +3 -0
  23. data/demos/demo/rakefile.rb +17 -0
  24. data/demos/demo/tmp/restart.txt +0 -0
  25. data/demos/simpledemo/views/index.builder +13 -0
  26. data/demos/simpledemo/views/index.erb +69 -0
  27. data/demos/simpledemo/ws.rb +83 -0
  28. data/doc/Makefile +7 -0
  29. data/doc/html4css1.css +279 -0
  30. data/doc/lookup.rst +193 -0
  31. data/doc/parsing.rst +125 -0
  32. data/doc/voidspace.css +147 -0
  33. data/geo_coder.gemspec +172 -0
  34. data/lib/geocoder/us.rb +21 -0
  35. data/lib/geocoder/us/address.rb +290 -0
  36. data/lib/geocoder/us/constants.rb +670 -0
  37. data/lib/geocoder/us/database.rb +745 -0
  38. data/lib/geocoder/us/import.rb +181 -0
  39. data/lib/geocoder/us/import/tiger.rb +13 -0
  40. data/lib/geocoder/us/numbers.rb +58 -0
  41. data/navteq/README +4 -0
  42. data/navteq/convert.sql +37 -0
  43. data/navteq/navteq_import +39 -0
  44. data/navteq/prepare.sql +92 -0
  45. data/sql/cluster.sql +16 -0
  46. data/sql/convert.sql +80 -0
  47. data/sql/create.sql +37 -0
  48. data/sql/index.sql +12 -0
  49. data/sql/place.csv +104944 -0
  50. data/sql/place.sql +104948 -0
  51. data/sql/setup.sql +78 -0
  52. data/src/Makefile +13 -0
  53. data/src/README +14 -0
  54. data/src/liblwgeom/Makefile +75 -0
  55. data/src/liblwgeom/box2d.c +54 -0
  56. data/src/liblwgeom/lex.yy.c +4799 -0
  57. data/src/liblwgeom/liblwgeom.h +1405 -0
  58. data/src/liblwgeom/lwalgorithm.c +946 -0
  59. data/src/liblwgeom/lwalgorithm.h +52 -0
  60. data/src/liblwgeom/lwcircstring.c +759 -0
  61. data/src/liblwgeom/lwcollection.c +541 -0
  62. data/src/liblwgeom/lwcompound.c +118 -0
  63. data/src/liblwgeom/lwcurvepoly.c +86 -0
  64. data/src/liblwgeom/lwgeom.c +886 -0
  65. data/src/liblwgeom/lwgeom_api.c +2201 -0
  66. data/src/liblwgeom/lwgparse.c +1219 -0
  67. data/src/liblwgeom/lwgunparse.c +1054 -0
  68. data/src/liblwgeom/lwline.c +525 -0
  69. data/src/liblwgeom/lwmcurve.c +125 -0
  70. data/src/liblwgeom/lwmline.c +137 -0
  71. data/src/liblwgeom/lwmpoint.c +138 -0
  72. data/src/liblwgeom/lwmpoly.c +141 -0
  73. data/src/liblwgeom/lwmsurface.c +129 -0
  74. data/src/liblwgeom/lwpoint.c +439 -0
  75. data/src/liblwgeom/lwpoly.c +579 -0
  76. data/src/liblwgeom/lwsegmentize.c +1047 -0
  77. data/src/liblwgeom/lwutil.c +369 -0
  78. data/src/liblwgeom/measures.c +861 -0
  79. data/src/liblwgeom/postgis_config.h +93 -0
  80. data/src/liblwgeom/ptarray.c +847 -0
  81. data/src/liblwgeom/vsprintf.c +179 -0
  82. data/src/liblwgeom/wktparse.h +126 -0
  83. data/src/liblwgeom/wktparse.lex +74 -0
  84. data/src/liblwgeom/wktparse.tab.c +2353 -0
  85. data/src/liblwgeom/wktparse.tab.h +145 -0
  86. data/src/liblwgeom/wktparse.y +385 -0
  87. data/src/libsqlite3_geocoder/Makefile +22 -0
  88. data/src/libsqlite3_geocoder/Makefile.nix +15 -0
  89. data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
  90. data/src/libsqlite3_geocoder/extension.c +121 -0
  91. data/src/libsqlite3_geocoder/extension.h +13 -0
  92. data/src/libsqlite3_geocoder/levenshtein.c +42 -0
  93. data/src/libsqlite3_geocoder/metaphon.c +278 -0
  94. data/src/libsqlite3_geocoder/util.c +37 -0
  95. data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
  96. data/src/metaphone/Makefile +7 -0
  97. data/src/metaphone/README +49 -0
  98. data/src/metaphone/extension.c +37 -0
  99. data/src/metaphone/metaphon.c +251 -0
  100. data/src/shp2sqlite/Makefile +37 -0
  101. data/src/shp2sqlite/Makefile.nix +36 -0
  102. data/src/shp2sqlite/Makefile.redhat +35 -0
  103. data/src/shp2sqlite/dbfopen.c +1595 -0
  104. data/src/shp2sqlite/getopt.c +695 -0
  105. data/src/shp2sqlite/getopt.h +127 -0
  106. data/src/shp2sqlite/shapefil.h +500 -0
  107. data/src/shp2sqlite/shp2sqlite.c +1974 -0
  108. data/src/shp2sqlite/shpopen.c +1894 -0
  109. data/tests/address.rb +236 -0
  110. data/tests/benchmark.rb +20 -0
  111. data/tests/constants.rb +57 -0
  112. data/tests/data/address-sample.csv +52 -0
  113. data/tests/data/db-test.csv +57 -0
  114. data/tests/data/locations.csv +4 -0
  115. data/tests/database.rb +137 -0
  116. data/tests/generate.rb +34 -0
  117. data/tests/numbers.rb +46 -0
  118. data/tests/run.rb +11 -0
  119. metadata +237 -0
data/Gemfile ADDED
@@ -0,0 +1,12 @@
1
+ source "http://rubygems.org"
2
+ gem "sqlite3-ruby", "= 1.2.5"
3
+ gem "text"
4
+
5
+ # Add dependencies to develop your gem here.
6
+ # Include everything needed to run rake, tests, features, etc.
7
+ group :development do
8
+ gem "bundler", "~> 1.0.0"
9
+ gem "jeweler", "~> 1.6.4"
10
+ gem "rcov"
11
+ gem "rspec"
12
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,32 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.6.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rake (0.9.2)
11
+ rcov (0.9.10)
12
+ rspec (2.6.0)
13
+ rspec-core (~> 2.6.0)
14
+ rspec-expectations (~> 2.6.0)
15
+ rspec-mocks (~> 2.6.0)
16
+ rspec-core (2.6.4)
17
+ rspec-expectations (2.6.0)
18
+ diff-lcs (~> 1.1.2)
19
+ rspec-mocks (2.6.0)
20
+ sqlite3-ruby (1.2.5)
21
+ text (0.2.0)
22
+
23
+ PLATFORMS
24
+ ruby
25
+
26
+ DEPENDENCIES
27
+ bundler (~> 1.0.0)
28
+ jeweler (~> 1.6.4)
29
+ rcov
30
+ rspec
31
+ sqlite3-ruby (= 1.2.5)
32
+ text
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2009-06-02
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
data/Makefile ADDED
@@ -0,0 +1,13 @@
1
+ all:
2
+ make -C src install
3
+ gem build gemspec
4
+
5
+ test: all
6
+ ruby -Ilib tests/run.rb
7
+
8
+ install: all
9
+ gem install *.gem
10
+
11
+ clean:
12
+ make -C src clean
13
+ rm *.gem
data/Manifest.txt ADDED
@@ -0,0 +1,18 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ lib/geocoder/us/database.rb
6
+ lib/geocoder/us/numbers.rb
7
+ lib/geocoder/us/address.rb
8
+ lib/geocoder/us/constants.rb
9
+ tests/database.rb
10
+ tests/numbers.rb
11
+ tests/generate.rb
12
+ tests/run.rb
13
+ tests/address.rb
14
+ tests/benchmark.rb
15
+ tests/constants.rb
16
+ tests/data/address-sample.csv
17
+ tests/data/locations.csv
18
+ tests/data/db-test.csv
data/README.rdoc ADDED
@@ -0,0 +1,197 @@
1
+ = Geocoder::US
2
+
3
+ Geocoder::US 2.0 is a software package designed to geocode US street
4
+ addresses. Although it is primarily intended for use with the US Census
5
+ Bureau's free TIGER/Line dataset, it uses an abstract US address data model
6
+ that can be employed with other sources of US street address range data.
7
+
8
+ Geocoder::US 2.0 implements a Ruby interface to parse US street addresses, and
9
+ perform fuzzy lookup against an SQLite 3 database. Geocoder::US is designed to
10
+ return the best matches found, with geographic coordinates interpolated from
11
+ the street range dataset. Geocoder::US will fill in missing information, and
12
+ it knows about standard and common non-standard postal abbreviations, ordinal
13
+ versus cardinal numbers, and more.
14
+
15
+ Geocoder::US 2.0 is shipped with a free US ZIP code data set, compiled from
16
+ public domain sources.
17
+
18
+ == Synopsis
19
+
20
+ >> require 'geocoder/us'
21
+ >> db = Geocoder::US::Database.new("/opt/tiger/geocoder.db")
22
+ >> p db.geocode("1600 Pennsylvania Av, Washington DC")
23
+
24
+ [{:pretyp=>"", :street=>"Pennsylvania", :sufdir=>"NW", :zip=>"20502",
25
+ :lon=>-77.037528, :number=>"1600", :fips_county=>"11001", :predir=>"",
26
+ :precision=>:range, :city=>"Washington", :lat=>38.898746, :suftyp=>"Ave",
27
+ :state=>"DC", :prequal=>"", :sufqual=>"", :score=>0.906, :prenum=>""}]
28
+
29
+ == Prerequisites
30
+
31
+ To build Geocoder::US, you will need gcc/g++, make, bash or equivalent, the
32
+ standard *NIX 'unzip' utility, and the SQLite 3 executable and development
33
+ files installed on your system.
34
+
35
+ To use the Ruby interface, you will need the 'Text' gem installed from
36
+ rubyforge. To run the tests, you will also need the 'fastercsv' gem.
37
+
38
+ Additionally, you will need a custom build of the 'sqlite3-ruby' gem that
39
+ supports loading extension modules in SQLite. You can get a patched version of
40
+ this gem from http://github.com/schuyler/sqlite3-ruby/. Until the sqlite3-ruby
41
+ maintainers roll in the relevant patch, you will need *this* version.
42
+
43
+ *NOTE*: If you do not have /usr/include/sqlite3ext.h installed, then your
44
+ sqlite3 binaries are probably not configured to support dynamic extension
45
+ loading. If not, you *must* compile and install SQLite from source, or rebuild
46
+ your system packages. This is not believed to be a problem on Debian/Ubuntu,
47
+ but is known to be a problem with Red Hat/CentOS.
48
+
49
+ *NOTE*: If you *do* have to install from source, make sure that the
50
+ source-installed 'sqlite3' program is in your path before proceeding (and not
51
+ the system-installed version), using `which sqlite3`. Also, be sure that you've
52
+ added your source install prefix (usually /usr/local) to /etc/ld.so.conf (or
53
+ its moral equivalent) and that you've run /sbin/ldconfig.
54
+
55
+ == Thread safety
56
+
57
+ SQLite 3 is not designed for concurrent use of a single database handle across
58
+ multiple threads. Therefore, to prevent segfaults, Geocoder::US::Database
59
+ implements a global mutex that wraps all database access. The use of this mutex
60
+ will ensure stability in multi-threaded applications, but incurs a performance
61
+ penalty. However, since the database is read-only from Ruby, there's no reason
62
+ in principle why multi-threaded apps can't each have their own database handle.
63
+
64
+ To disable the mutex for better performance, you can do the following:
65
+
66
+ * Read the following and make sure you understand them:
67
+ * http://www.sqlite.org/faq.html#q6
68
+ * http://www.sqlite.org/cvstrac/wiki?p=MultiThreading
69
+ * Make sure you have compiled SQLite 3 with thread safety enabled.
70
+ * Instantiate a separate Geocoder::US::Database object for *each* thread
71
+ in your Ruby script, and pass :threadsafe => true to new() to disable mutex
72
+ synchronization.
73
+
74
+ Per the SQLite 3 documentation, do *not* attempt to retain a
75
+ Geocoder::US::Database object across a fork! "Problems will result if you do."
76
+
77
+ == Building Geocoder::US
78
+
79
+ Unpack the source and run 'make'. This will compile the SQLite 3 extension
80
+ needed by Geocoder::US, the Shapefile import utility, and the Geocoder-US
81
+ gem.
82
+
83
+ You can run 'make install' as root to install the gem systemwide.
84
+
85
+ == Generating a Geocoder::US Database
86
+
87
+ Build the package from source as described above. Generating the database
88
+ involves three basic steps:
89
+
90
+ * Import the Shapefile data into an SQLite database.
91
+ * Build the database indexes.
92
+ * Optionally, rebuild the database to cluster indexed rows.
93
+
94
+ We will presume that you are building a Geocoder::US database from TIGER/Line,
95
+ and that you have obtained the complete set of TIGER/Line ZIP files, and put
96
+ the entire tree in /opt/tiger. Please adjust these instructions as needed.
97
+
98
+ A full TIGER/Line database import takes ten hours to run on a normal Amazon
99
+ EC2 instance, and takes up a little over 5 gigabytes after all is said and
100
+ done. You will need to have at least 12 gigabytes of free disk space *after*
101
+ downloading the TIGER/Line dataset, if you are building the full database.
102
+
103
+ === Import TIGER/Line
104
+
105
+ From inside the Geocoder::US source tree, run the following:
106
+
107
+ $ bin/tiger_import /opt/tiger/geocoder.db /opt/tiger
108
+
109
+ This will unpack each TIGER/Line ZIP file to a temporary directory, and
110
+ perform the extract/transform/load sequence to incrementally build the
111
+ database. The process takes about 10-12 hours on a normal Amazon EC2 instance,
112
+ or about 5 CPU hours flat out on a modern PC. Note that not all TIGER/Line
113
+ source files contain address range information, so you will see error messages
114
+ for some counties, but this is normal.
115
+
116
+ If you only want to import specific counties, you can pipe a list of
117
+ TIGER/Line county directories to tiger_import on stdin. For example,
118
+ the following will install just the data for the state of Delaware:
119
+
120
+ $ ls -d /opt/tiger/10_DELAWARE/1* | bin/tiger_import ~/delaware.db
121
+
122
+ The tiger_import process uses a binary utility, shp2sqlite, which is derived
123
+ from shp2pgsql, which ships with PostGIS. The shp2sqlite utility converts
124
+ .shp and .dbf files into SQL suitable for import into SQLite. This SQL
125
+ is then piped into the sqlite3 command line tool, where it is loaded into
126
+ temporary tables, and then a set of static SQL statements (kept in the sql/
127
+ directory) are used to transform this data and import it into the database
128
+ itself.
129
+
130
+ == Build metaphones using Ruby metaphone
131
+
132
+ run bin/rebuild_metaphones /opt/tiger/geocoder.db
133
+
134
+ This creates the metaphones using Ruby's metaphone function and will produce better geocoding results.
135
+
136
+ === Build the indexes
137
+
138
+ After the database import is complete, you will want to construct the database
139
+ indexes:
140
+
141
+ $ bin/build_indexes /opt/tiger/geocoder.db
142
+
143
+ This process takes 25 minutes on an EC2 instance (8 CPU minutes), but it's a
144
+ *lot* faster than building the indexes incrementally during the import
145
+ process. Basically, this process simply feeds SQL statements to the sqlite3
146
+ utility to construct the indexes on the existing database.
147
+
148
+ === Cluster the database tables (optional)
149
+
150
+ As a final optional step, you can cluster the database tables according to
151
+ their indexes, which will make the database smaller, and lookups faster. This
152
+ process will take an hour or two, and may be a micro-optimization.
153
+
154
+ $ bin/rebuild_cluster /opt/tiger/geocoder.db
155
+
156
+ You will need as much free disk space to run rebuild_cluster as the database
157
+ takes up, because the process essentially reconstructs the database in a new
158
+ file, and then it renames the new database over top of the old.
159
+
160
+ == Running the unit tests
161
+
162
+ From within the source tree, you can run the following:
163
+
164
+ $ ruby tests/run.rb
165
+
166
+ This tests the libraries, except for the database routines. If you have a
167
+ database built, you can run the test harness like so:
168
+
169
+ $ ruby tests/run.rb /opt/tiger/geocoder.db
170
+
171
+ The full test suite may take 30 or so seconds to run completely.
172
+
173
+ == License
174
+
175
+ Geocoder::US 2.0 was based on earlier work by Schuyler Erle on
176
+ a Perl module of the same name. You can find it at
177
+ http://search.cpan.org/~sderle/.
178
+
179
+ Geocoder::US 2.0 was written by Schuyler Erle, of Entropy Free LLC,
180
+ with the gracious support of FortiusOne, Inc. Please send bug reports,
181
+ patches, kudos, etc. to patches at geocoder.us.
182
+
183
+ Copyright (c) 2009 FortiusOne, Inc.
184
+
185
+ This program is free software: you can redistribute it and/or modify
186
+ it under the terms of the GNU General Public License as published by
187
+ the Free Software Foundation, either version 3 of the License, or
188
+ (at your option) any later version.
189
+
190
+ This program is distributed in the hope that it will be useful,
191
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
192
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
193
+ GNU General Public License for more details.
194
+
195
+ You should have received a copy of the GNU General Public License
196
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
197
+
data/Rakefile ADDED
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "geo_coder"
18
+ gem.homepage = "http://github.com/kornypoet/geo_coder"
19
+ gem.license = "MIT"
20
+ gem.summary = "Geocoder based upon the Geocommons Geocoder."
21
+ gem.description = "Geocode a text address."
22
+ gem.email = "dempsey.travis@gmail.com"
23
+ gem.authors = ["Travis Dempsey"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ require 'rcov/rcovtask'
36
+ Rcov::RcovTask.new do |test|
37
+ test.libs << 'test'
38
+ test.pattern = 'test/**/test_*.rb'
39
+ test.verbose = true
40
+ test.rcov_opts << '--exclude "gems/*"'
41
+ end
42
+
43
+ task :default => :test
44
+
45
+ require 'rdoc/task'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "geo_coder #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/TODO.txt ADDED
@@ -0,0 +1,8 @@
1
+ 1. Check interpolate measure: scale longitude or not?
2
+ 5. Intersections...
3
+ - import ALL linestrings (even those with without ranges)
4
+ - throw away internal points on lines that don't have ranges
5
+ 7. Documentation (*)
6
+ 8. Make SQLite memory cache size an option to the Database constructor
7
+ 9. Precision and accuracy measure
8
+ 10. Street line set back
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/bin/build_indexes ADDED
@@ -0,0 +1,8 @@
1
+ #!/bin/bash
2
+
3
+ BASE=$(dirname $0)
4
+ PATH=$PATH:$BASE/bin
5
+ SQL="$BASE/../sql"
6
+
7
+ # Just run the SQL that constructs the indexes.
8
+ sqlite3 $1 < ${SQL}/index.sql
@@ -0,0 +1,22 @@
1
+ #!/bin/bash
2
+
3
+ BASE=$(dirname $0)
4
+ PATH=$PATH:$BASE/bin
5
+ SQL="$BASE/../sql"
6
+
7
+ OLD_DB=$1
8
+ DATABASE=${OLD_DB}.$$
9
+
10
+ [ -r $DATABASE ] && echo "$DATABASE already exists." && exit -1
11
+ [ ! -r $OLD_DB ] && echo "Can't read $OLD_DB." && exit -1
12
+
13
+ # Create a shiny new database, attach the old one,
14
+ # extract the data from it, and then index that.
15
+ # Finally, overwrite the old database with the new one.
16
+ ( cat ${SQL}/create.sql && \
17
+ echo "ATTACH DATABASE '${OLD_DB}' AS old;" && \
18
+ cat ${SQL}/cluster.sql && \
19
+ echo "DETACH DATABASE old;" && \
20
+ cat ${SQL}/index.sql && \
21
+ echo "ANALYZE;" ) | sqlite3 $DATABASE \
22
+ && mv $DATABASE $OLD_DB
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'sqlite3'
5
+ require 'text'
6
+
7
+ @db = SQLite3::Database.new("../geocoderdata/geocoder.db")
8
+ @db.create_function("metaphone", 2) do |func, string, len|
9
+ test = string.to_s.gsub(/\W/o, "")
10
+ if test =~ /^(\d+)/o
11
+ mph = $1
12
+ elsif test =~ /^([wy])$/io
13
+ mph = $1
14
+ else
15
+ mph = Text::Metaphone.metaphone test
16
+ end
17
+ func.result = mph[0...len.to_i]
18
+ end
19
+ sql = "update place set city_phone = metaphone(city,5)"
20
+
21
+ @db.execute sql
22
+
23
+ @db.close
data/bin/tiger_import ADDED
@@ -0,0 +1,59 @@
1
+ #!/bin/bash
2
+
3
+ TMP="/tmp/tiger-import.$$"
4
+ SHPS="edges"
5
+ DBFS="featnames addr"
6
+ BASE=$(dirname $0)
7
+ PATH=$PATH:$BASE
8
+ SQL="$BASE/../sql"
9
+ HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so"
10
+ DATABASE=$1
11
+ shift
12
+
13
+ mkdir -p $TMP || exit 1
14
+
15
+ # Initialize the database if it doesn't exist.
16
+ [ ! -r $DATABASE ] && cat ${SQL}/{create,place}.sql | sqlite3 $DATABASE
17
+
18
+ # Marshal the county directories to import.
19
+ #
20
+ # If no directory was given on the command-line, read a list from STDIN.
21
+ if [ x"$1" = x"" ]; then
22
+ cat
23
+ else
24
+ # Otherwise, find all of the contents of each state directory.
25
+ ls -d $1/[0-9]* | while read state; do
26
+ ls -d ${state}/[0-9]*
27
+ done
28
+ fi | while read county; do
29
+ echo "--- $county"
30
+ # Unpack the county files into the temp directory.
31
+ for file in $SHPS $DBFS; do
32
+ ZIP=$(ls ${county}/*_${file}.zip 2>/dev/null)
33
+ SHP=$(ls ${county}/*_${file}.* 2>/dev/null)
34
+ if [ x"$ZIP" != x"" ]; then
35
+ unzip -q $ZIP -d $TMP
36
+ elif [ x"$SHP" != x"" ]; then
37
+ ln -s $SHP $TMP
38
+ fi
39
+ done
40
+ # Generate an SQL stream to feed into the sqlite3 binary.
41
+ # Start by loading the helper libs and initializing the temporary tables
42
+ # that will hold the TIGER data before ETL.
43
+ (echo ".load $HELPER_LIB" && \
44
+ cat ${SQL}/setup.sql && \
45
+ for file in $SHPS; do
46
+ # Convert each Shapefile into SQL statements.
47
+ shp2sqlite -aS ${TMP}/*_${file}.shp tiger_${file}
48
+ done && \
49
+ for file in $DBFS; do
50
+ # Convert each DBF into SQL statements likewise.
51
+ shp2sqlite -an ${TMP}/*_${file}.dbf tiger_${file}
52
+ done && \
53
+ cat ${SQL}/convert.sql) | sqlite3 $DATABASE
54
+ # Finally, do the transform/load phase (convert.sql)
55
+ # and clean up the temporary files.
56
+ rm -f $TMP/*
57
+ done 2>&1 | tee import-$$.log
58
+ rm -rf $TMP
59
+