geo_coder 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +12 -0
- data/Gemfile.lock +32 -0
- data/History.txt +6 -0
- data/Makefile +13 -0
- data/Manifest.txt +18 -0
- data/README.rdoc +197 -0
- data/Rakefile +53 -0
- data/TODO.txt +8 -0
- data/VERSION +1 -0
- data/bin/build_indexes +8 -0
- data/bin/rebuild_cluster +22 -0
- data/bin/rebuild_metaphones +23 -0
- data/bin/tiger_import +59 -0
- data/demos/demo/app/ext/geocodewrap.rb +84 -0
- data/demos/demo/app/views/index.builder +13 -0
- data/demos/demo/app/views/index.erb +71 -0
- data/demos/demo/config.ru +12 -0
- data/demos/demo/config/bootstraps.rb +130 -0
- data/demos/demo/config/geoenvironment.rb +25 -0
- data/demos/demo/geocoder_helper.rb +12 -0
- data/demos/demo/geocom_geocode.rb +10 -0
- data/demos/demo/main.rb +3 -0
- data/demos/demo/rakefile.rb +17 -0
- data/demos/demo/tmp/restart.txt +0 -0
- data/demos/simpledemo/views/index.builder +13 -0
- data/demos/simpledemo/views/index.erb +69 -0
- data/demos/simpledemo/ws.rb +83 -0
- data/doc/Makefile +7 -0
- data/doc/html4css1.css +279 -0
- data/doc/lookup.rst +193 -0
- data/doc/parsing.rst +125 -0
- data/doc/voidspace.css +147 -0
- data/geo_coder.gemspec +172 -0
- data/lib/geocoder/us.rb +21 -0
- data/lib/geocoder/us/address.rb +290 -0
- data/lib/geocoder/us/constants.rb +670 -0
- data/lib/geocoder/us/database.rb +745 -0
- data/lib/geocoder/us/import.rb +181 -0
- data/lib/geocoder/us/import/tiger.rb +13 -0
- data/lib/geocoder/us/numbers.rb +58 -0
- data/navteq/README +4 -0
- data/navteq/convert.sql +37 -0
- data/navteq/navteq_import +39 -0
- data/navteq/prepare.sql +92 -0
- data/sql/cluster.sql +16 -0
- data/sql/convert.sql +80 -0
- data/sql/create.sql +37 -0
- data/sql/index.sql +12 -0
- data/sql/place.csv +104944 -0
- data/sql/place.sql +104948 -0
- data/sql/setup.sql +78 -0
- data/src/Makefile +13 -0
- data/src/README +14 -0
- data/src/liblwgeom/Makefile +75 -0
- data/src/liblwgeom/box2d.c +54 -0
- data/src/liblwgeom/lex.yy.c +4799 -0
- data/src/liblwgeom/liblwgeom.h +1405 -0
- data/src/liblwgeom/lwalgorithm.c +946 -0
- data/src/liblwgeom/lwalgorithm.h +52 -0
- data/src/liblwgeom/lwcircstring.c +759 -0
- data/src/liblwgeom/lwcollection.c +541 -0
- data/src/liblwgeom/lwcompound.c +118 -0
- data/src/liblwgeom/lwcurvepoly.c +86 -0
- data/src/liblwgeom/lwgeom.c +886 -0
- data/src/liblwgeom/lwgeom_api.c +2201 -0
- data/src/liblwgeom/lwgparse.c +1219 -0
- data/src/liblwgeom/lwgunparse.c +1054 -0
- data/src/liblwgeom/lwline.c +525 -0
- data/src/liblwgeom/lwmcurve.c +125 -0
- data/src/liblwgeom/lwmline.c +137 -0
- data/src/liblwgeom/lwmpoint.c +138 -0
- data/src/liblwgeom/lwmpoly.c +141 -0
- data/src/liblwgeom/lwmsurface.c +129 -0
- data/src/liblwgeom/lwpoint.c +439 -0
- data/src/liblwgeom/lwpoly.c +579 -0
- data/src/liblwgeom/lwsegmentize.c +1047 -0
- data/src/liblwgeom/lwutil.c +369 -0
- data/src/liblwgeom/measures.c +861 -0
- data/src/liblwgeom/postgis_config.h +93 -0
- data/src/liblwgeom/ptarray.c +847 -0
- data/src/liblwgeom/vsprintf.c +179 -0
- data/src/liblwgeom/wktparse.h +126 -0
- data/src/liblwgeom/wktparse.lex +74 -0
- data/src/liblwgeom/wktparse.tab.c +2353 -0
- data/src/liblwgeom/wktparse.tab.h +145 -0
- data/src/liblwgeom/wktparse.y +385 -0
- data/src/libsqlite3_geocoder/Makefile +22 -0
- data/src/libsqlite3_geocoder/Makefile.nix +15 -0
- data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
- data/src/libsqlite3_geocoder/extension.c +121 -0
- data/src/libsqlite3_geocoder/extension.h +13 -0
- data/src/libsqlite3_geocoder/levenshtein.c +42 -0
- data/src/libsqlite3_geocoder/metaphon.c +278 -0
- data/src/libsqlite3_geocoder/util.c +37 -0
- data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
- data/src/metaphone/Makefile +7 -0
- data/src/metaphone/README +49 -0
- data/src/metaphone/extension.c +37 -0
- data/src/metaphone/metaphon.c +251 -0
- data/src/shp2sqlite/Makefile +37 -0
- data/src/shp2sqlite/Makefile.nix +36 -0
- data/src/shp2sqlite/Makefile.redhat +35 -0
- data/src/shp2sqlite/dbfopen.c +1595 -0
- data/src/shp2sqlite/getopt.c +695 -0
- data/src/shp2sqlite/getopt.h +127 -0
- data/src/shp2sqlite/shapefil.h +500 -0
- data/src/shp2sqlite/shp2sqlite.c +1974 -0
- data/src/shp2sqlite/shpopen.c +1894 -0
- data/tests/address.rb +236 -0
- data/tests/benchmark.rb +20 -0
- data/tests/constants.rb +57 -0
- data/tests/data/address-sample.csv +52 -0
- data/tests/data/db-test.csv +57 -0
- data/tests/data/locations.csv +4 -0
- data/tests/database.rb +137 -0
- data/tests/generate.rb +34 -0
- data/tests/numbers.rb +46 -0
- data/tests/run.rb +11 -0
- metadata +237 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'sinatra'
|
|
3
|
+
require 'geocoder/us/database'
|
|
4
|
+
require 'fastercsv'
|
|
5
|
+
require 'json'
|
|
6
|
+
|
|
7
|
+
set :port, 8080
|
|
8
|
+
@@db = Geocoder::US::Database.new("/fortiusone/geocoder/geocoder.db")
|
|
9
|
+
get '/' do
|
|
10
|
+
unless params[:address].nil?
|
|
11
|
+
@records = @@db.geocode params[:address]
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
case params[:format]
|
|
15
|
+
when /xml/
|
|
16
|
+
builder :index
|
|
17
|
+
when /atom/
|
|
18
|
+
builder :atom
|
|
19
|
+
else
|
|
20
|
+
erb :index
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
require 'open-uri'
|
|
25
|
+
get '/link.:format' do
|
|
26
|
+
if(params.include?(:url))
|
|
27
|
+
csv_file = params[:url]
|
|
28
|
+
else
|
|
29
|
+
csv_file = "uploads/#{params[:filename]}.csv"
|
|
30
|
+
end
|
|
31
|
+
csv = FasterCSV.parse(open(csv_file))
|
|
32
|
+
headers = csv[0]
|
|
33
|
+
|
|
34
|
+
@records = csv.collect do |record|
|
|
35
|
+
next if record == headers
|
|
36
|
+
begin
|
|
37
|
+
(@@db.geocode record[1]).first
|
|
38
|
+
rescue Exception => e
|
|
39
|
+
puts e.message
|
|
40
|
+
next
|
|
41
|
+
end
|
|
42
|
+
end.compact
|
|
43
|
+
case params[:format]
|
|
44
|
+
when /atom/
|
|
45
|
+
builder :atom
|
|
46
|
+
when /xml/
|
|
47
|
+
builder :index
|
|
48
|
+
else
|
|
49
|
+
erb :index
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
post '/batch' do
|
|
56
|
+
csv_file = request.env["rack.input"].read
|
|
57
|
+
csv = FasterCSV.parse(csv_file, :row_sep => "*", :col_sep => "|")
|
|
58
|
+
headers = csv[0]
|
|
59
|
+
@records = csv.collect do |record|
|
|
60
|
+
next if record == headers
|
|
61
|
+
begin
|
|
62
|
+
(@@db.geocode record[1]).first.merge(headers[0] => record[0])
|
|
63
|
+
rescue Exception => e
|
|
64
|
+
puts e.message
|
|
65
|
+
next
|
|
66
|
+
end
|
|
67
|
+
end.compact
|
|
68
|
+
case params[:format]
|
|
69
|
+
when /xml/
|
|
70
|
+
builder :index
|
|
71
|
+
when /atom/
|
|
72
|
+
builder :atom
|
|
73
|
+
when /json/
|
|
74
|
+
@records.to_json
|
|
75
|
+
else
|
|
76
|
+
erb :index
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
data/doc/Makefile
ADDED
data/doc/html4css1.css
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/*
|
|
2
|
+
:Author: David Goodger
|
|
3
|
+
:Contact: goodger@users.sourceforge.net
|
|
4
|
+
:Date: $Date: 2005-12-18 01:56:14 +0100 (Sun, 18 Dec 2005) $
|
|
5
|
+
:Revision: $Revision: 4224 $
|
|
6
|
+
:Copyright: This stylesheet has been placed in the public domain.
|
|
7
|
+
|
|
8
|
+
Default cascading style sheet for the HTML output of Docutils.
|
|
9
|
+
|
|
10
|
+
See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
|
|
11
|
+
customize this style sheet.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
/* used to remove borders from tables and images */
|
|
15
|
+
.borderless, table.borderless td, table.borderless th {
|
|
16
|
+
border: 0 }
|
|
17
|
+
|
|
18
|
+
table.borderless td, table.borderless th {
|
|
19
|
+
/* Override padding for "table.docutils td" with "! important".
|
|
20
|
+
The right padding separates the table cells. */
|
|
21
|
+
padding: 0 0.5em 0 0 ! important }
|
|
22
|
+
|
|
23
|
+
.first {
|
|
24
|
+
/* Override more specific margin styles with "! important". */
|
|
25
|
+
margin-top: 0 ! important }
|
|
26
|
+
|
|
27
|
+
.last, .with-subtitle {
|
|
28
|
+
margin-bottom: 0 ! important }
|
|
29
|
+
|
|
30
|
+
.hidden {
|
|
31
|
+
display: none }
|
|
32
|
+
|
|
33
|
+
a.toc-backref {
|
|
34
|
+
text-decoration: none ;
|
|
35
|
+
color: black }
|
|
36
|
+
|
|
37
|
+
blockquote.epigraph {
|
|
38
|
+
margin: 2em 5em ; }
|
|
39
|
+
|
|
40
|
+
dl.docutils dd {
|
|
41
|
+
margin-bottom: 0.5em }
|
|
42
|
+
|
|
43
|
+
/* Uncomment (and remove this text!) to get bold-faced definition list terms
|
|
44
|
+
dl.docutils dt {
|
|
45
|
+
font-weight: bold }
|
|
46
|
+
*/
|
|
47
|
+
|
|
48
|
+
div.abstract {
|
|
49
|
+
margin: 2em 5em }
|
|
50
|
+
|
|
51
|
+
div.abstract p.topic-title {
|
|
52
|
+
font-weight: bold ;
|
|
53
|
+
text-align: center }
|
|
54
|
+
|
|
55
|
+
div.admonition, div.attention, div.caution, div.danger, div.error,
|
|
56
|
+
div.hint, div.important, div.note, div.tip, div.warning {
|
|
57
|
+
margin: 2em ;
|
|
58
|
+
border: medium outset ;
|
|
59
|
+
padding: 1em }
|
|
60
|
+
|
|
61
|
+
div.admonition p.admonition-title, div.hint p.admonition-title,
|
|
62
|
+
div.important p.admonition-title, div.note p.admonition-title,
|
|
63
|
+
div.tip p.admonition-title {
|
|
64
|
+
font-weight: bold ;
|
|
65
|
+
font-family: sans-serif }
|
|
66
|
+
|
|
67
|
+
div.attention p.admonition-title, div.caution p.admonition-title,
|
|
68
|
+
div.danger p.admonition-title, div.error p.admonition-title,
|
|
69
|
+
div.warning p.admonition-title {
|
|
70
|
+
color: red ;
|
|
71
|
+
font-weight: bold ;
|
|
72
|
+
font-family: sans-serif }
|
|
73
|
+
|
|
74
|
+
/* Uncomment (and remove this text!) to get reduced vertical space in
|
|
75
|
+
compound paragraphs.
|
|
76
|
+
div.compound .compound-first, div.compound .compound-middle {
|
|
77
|
+
margin-bottom: 0.5em }
|
|
78
|
+
|
|
79
|
+
div.compound .compound-last, div.compound .compound-middle {
|
|
80
|
+
margin-top: 0.5em }
|
|
81
|
+
*/
|
|
82
|
+
|
|
83
|
+
div.dedication {
|
|
84
|
+
margin: 2em 5em ;
|
|
85
|
+
text-align: center ;
|
|
86
|
+
font-style: italic }
|
|
87
|
+
|
|
88
|
+
div.dedication p.topic-title {
|
|
89
|
+
font-weight: bold ;
|
|
90
|
+
font-style: normal }
|
|
91
|
+
|
|
92
|
+
div.figure {
|
|
93
|
+
margin-left: 2em ;
|
|
94
|
+
margin-right: 2em }
|
|
95
|
+
|
|
96
|
+
div.footer, div.header {
|
|
97
|
+
clear: both;
|
|
98
|
+
font-size: smaller }
|
|
99
|
+
|
|
100
|
+
div.line-block {
|
|
101
|
+
display: block ;
|
|
102
|
+
margin-top: 1em ;
|
|
103
|
+
margin-bottom: 1em }
|
|
104
|
+
|
|
105
|
+
div.line-block div.line-block {
|
|
106
|
+
margin-top: 0 ;
|
|
107
|
+
margin-bottom: 0 ;
|
|
108
|
+
margin-left: 1.5em }
|
|
109
|
+
|
|
110
|
+
div.sidebar {
|
|
111
|
+
margin-left: 1em ;
|
|
112
|
+
border: medium outset ;
|
|
113
|
+
padding: 1em ;
|
|
114
|
+
background-color: #ffffee ;
|
|
115
|
+
width: 40% ;
|
|
116
|
+
float: right ;
|
|
117
|
+
clear: right }
|
|
118
|
+
|
|
119
|
+
div.sidebar p.rubric {
|
|
120
|
+
font-family: sans-serif ;
|
|
121
|
+
font-size: medium }
|
|
122
|
+
|
|
123
|
+
div.system-messages {
|
|
124
|
+
margin: 5em }
|
|
125
|
+
|
|
126
|
+
div.system-messages h1 {
|
|
127
|
+
color: red }
|
|
128
|
+
|
|
129
|
+
div.system-message {
|
|
130
|
+
border: medium outset ;
|
|
131
|
+
padding: 1em }
|
|
132
|
+
|
|
133
|
+
div.system-message p.system-message-title {
|
|
134
|
+
color: red ;
|
|
135
|
+
font-weight: bold }
|
|
136
|
+
|
|
137
|
+
div.topic {
|
|
138
|
+
margin: 2em }
|
|
139
|
+
|
|
140
|
+
h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
|
|
141
|
+
h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
|
|
142
|
+
margin-top: 0.4em }
|
|
143
|
+
|
|
144
|
+
h1.title {
|
|
145
|
+
text-align: center }
|
|
146
|
+
|
|
147
|
+
h2.subtitle {
|
|
148
|
+
text-align: center }
|
|
149
|
+
|
|
150
|
+
hr.docutils {
|
|
151
|
+
width: 75% }
|
|
152
|
+
|
|
153
|
+
img.align-left {
|
|
154
|
+
clear: left }
|
|
155
|
+
|
|
156
|
+
img.align-right {
|
|
157
|
+
clear: right }
|
|
158
|
+
|
|
159
|
+
ol.simple, ul.simple {
|
|
160
|
+
margin-bottom: 1em }
|
|
161
|
+
|
|
162
|
+
ol.arabic {
|
|
163
|
+
list-style: decimal }
|
|
164
|
+
|
|
165
|
+
ol.loweralpha {
|
|
166
|
+
list-style: lower-alpha }
|
|
167
|
+
|
|
168
|
+
ol.upperalpha {
|
|
169
|
+
list-style: upper-alpha }
|
|
170
|
+
|
|
171
|
+
ol.lowerroman {
|
|
172
|
+
list-style: lower-roman }
|
|
173
|
+
|
|
174
|
+
ol.upperroman {
|
|
175
|
+
list-style: upper-roman }
|
|
176
|
+
|
|
177
|
+
p.attribution {
|
|
178
|
+
text-align: right ;
|
|
179
|
+
margin-left: 50% }
|
|
180
|
+
|
|
181
|
+
p.caption {
|
|
182
|
+
font-style: italic }
|
|
183
|
+
|
|
184
|
+
p.credits {
|
|
185
|
+
font-style: italic ;
|
|
186
|
+
font-size: smaller }
|
|
187
|
+
|
|
188
|
+
p.label {
|
|
189
|
+
white-space: nowrap }
|
|
190
|
+
|
|
191
|
+
p.rubric {
|
|
192
|
+
font-weight: bold ;
|
|
193
|
+
font-size: larger ;
|
|
194
|
+
color: maroon ;
|
|
195
|
+
text-align: center }
|
|
196
|
+
|
|
197
|
+
p.sidebar-title {
|
|
198
|
+
font-family: sans-serif ;
|
|
199
|
+
font-weight: bold ;
|
|
200
|
+
font-size: larger }
|
|
201
|
+
|
|
202
|
+
p.sidebar-subtitle {
|
|
203
|
+
font-family: sans-serif ;
|
|
204
|
+
font-weight: bold }
|
|
205
|
+
|
|
206
|
+
p.topic-title {
|
|
207
|
+
font-weight: bold }
|
|
208
|
+
|
|
209
|
+
pre.address {
|
|
210
|
+
margin-bottom: 0 ;
|
|
211
|
+
margin-top: 0 ;
|
|
212
|
+
font-family: serif ;
|
|
213
|
+
font-size: 100% }
|
|
214
|
+
|
|
215
|
+
pre.literal-block, pre.doctest-block {
|
|
216
|
+
margin-left: 2em ;
|
|
217
|
+
margin-right: 2em ;
|
|
218
|
+
background-color: #eeeeee }
|
|
219
|
+
|
|
220
|
+
span.classifier {
|
|
221
|
+
font-family: sans-serif ;
|
|
222
|
+
font-style: oblique }
|
|
223
|
+
|
|
224
|
+
span.classifier-delimiter {
|
|
225
|
+
font-family: sans-serif ;
|
|
226
|
+
font-weight: bold }
|
|
227
|
+
|
|
228
|
+
span.interpreted {
|
|
229
|
+
font-family: sans-serif }
|
|
230
|
+
|
|
231
|
+
span.option {
|
|
232
|
+
white-space: nowrap }
|
|
233
|
+
|
|
234
|
+
span.pre {
|
|
235
|
+
white-space: pre }
|
|
236
|
+
|
|
237
|
+
span.problematic {
|
|
238
|
+
color: red }
|
|
239
|
+
|
|
240
|
+
span.section-subtitle {
|
|
241
|
+
/* font-size relative to parent (h1..h6 element) */
|
|
242
|
+
font-size: 80% }
|
|
243
|
+
|
|
244
|
+
table.citation {
|
|
245
|
+
border-left: solid 1px gray;
|
|
246
|
+
margin-left: 1px }
|
|
247
|
+
|
|
248
|
+
table.docinfo {
|
|
249
|
+
margin: 2em 4em }
|
|
250
|
+
|
|
251
|
+
table.docutils {
|
|
252
|
+
margin-top: 0.5em ;
|
|
253
|
+
margin-bottom: 0.5em }
|
|
254
|
+
|
|
255
|
+
table.footnote {
|
|
256
|
+
border-left: solid 1px black;
|
|
257
|
+
margin-left: 1px }
|
|
258
|
+
|
|
259
|
+
table.docutils td, table.docutils th,
|
|
260
|
+
table.docinfo td, table.docinfo th {
|
|
261
|
+
padding-left: 0.5em ;
|
|
262
|
+
padding-right: 0.5em ;
|
|
263
|
+
vertical-align: top }
|
|
264
|
+
|
|
265
|
+
table.docutils th.field-name, table.docinfo th.docinfo-name {
|
|
266
|
+
font-weight: bold ;
|
|
267
|
+
text-align: left ;
|
|
268
|
+
white-space: nowrap ;
|
|
269
|
+
padding-left: 0 }
|
|
270
|
+
|
|
271
|
+
h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
|
|
272
|
+
h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
|
|
273
|
+
font-size: 100% }
|
|
274
|
+
|
|
275
|
+
tt.docutils {
|
|
276
|
+
background-color: #eeeeee }
|
|
277
|
+
|
|
278
|
+
ul.auto-toc {
|
|
279
|
+
list-style-type: none }
|
data/doc/lookup.rst
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
.. _lookup:
|
|
2
|
+
|
|
3
|
+
===================================
|
|
4
|
+
Geocoder.us Address Lookup Strategy
|
|
5
|
+
===================================
|
|
6
|
+
|
|
7
|
+
:Author: Schuyler Erle
|
|
8
|
+
:Contact: schuyler at geocoder dot us
|
|
9
|
+
:Created: 2009/03/13
|
|
10
|
+
:Edited: 2009/03/14
|
|
11
|
+
|
|
12
|
+
Definitions
|
|
13
|
+
-----------
|
|
14
|
+
|
|
15
|
+
Edge
|
|
16
|
+
Database representation of a street segment, consisting of a linestring
|
|
17
|
+
geometry and an edge ID. Edges relate to many ranges and many features
|
|
18
|
+
through its ID.
|
|
19
|
+
|
|
20
|
+
Feature
|
|
21
|
+
Database representation of a named street, consisting of street name
|
|
22
|
+
and modifier elements, a reference ZIP code, and a primary/alternate flag.
|
|
23
|
+
|
|
24
|
+
Range
|
|
25
|
+
Database representation of a range of address numbers on a given
|
|
26
|
+
street, consisting of range start and end numbers, an optional prefix
|
|
27
|
+
ending with a non-numeric character, and a delivery ZIP code for that
|
|
28
|
+
range.
|
|
29
|
+
|
|
30
|
+
Place
|
|
31
|
+
Database representation of a ZIP code, consisting of a city name,
|
|
32
|
+
state abbreviation, a ZIP code, and a primary/alternate flag.
|
|
33
|
+
|
|
34
|
+
Address record
|
|
35
|
+
A set consisting of exactly one edge, one feature, and one range, related
|
|
36
|
+
through the edge ID.
|
|
37
|
+
|
|
38
|
+
Address query
|
|
39
|
+
An ordered set of {Number Prefix, Number, Directional Prefix, Type Prefix,
|
|
40
|
+
Qualifier Prefix, Street Name, Qualifier Suffix, Type Suffix, Directional
|
|
41
|
+
Suffix, City, State, ZIP}. All of the elements are optional except Number and
|
|
42
|
+
Street Name. Either ZIP or City must also be present. The State element
|
|
43
|
+
and all of the prefix and suffix elements are assumed to be normalized to
|
|
44
|
+
standard postal abbreviations.
|
|
45
|
+
|
|
46
|
+
Address string
|
|
47
|
+
A string including some or all of the elements of an address.
|
|
48
|
+
|
|
49
|
+
Address Lookup Strategy
|
|
50
|
+
-----------------------
|
|
51
|
+
|
|
52
|
+
1. Given a an address query, initialize an empty set of candidate places,
|
|
53
|
+
and an empty set of candidate address records.
|
|
54
|
+
|
|
55
|
+
#. If a ZIP was given, look up `the place from the ZIP`_, and add the
|
|
56
|
+
place, if any, to the candidate place set.
|
|
57
|
+
|
|
58
|
+
#. If a city was given, look up all `the places matching the metaphone hash
|
|
59
|
+
of the city name`_, and add them, if any, to the candidate place set.
|
|
60
|
+
|
|
61
|
+
#. Generate a unique set of ZIPs from the set of candidate places, since a ZIP
|
|
62
|
+
may have one or more names associated with it.
|
|
63
|
+
|
|
64
|
+
#. Generate `a list of candidate address records`_ by fetching all the street
|
|
65
|
+
features matching the metaphone hash of the street name and one of the ZIPs
|
|
66
|
+
in the query set, along with the ranges matching the edge ID of each
|
|
67
|
+
feature, where the given number is in the range. The edge does not
|
|
68
|
+
need to be fetched yet.
|
|
69
|
+
|
|
70
|
+
#. If the look up generates no results, optionally generate `more candidate
|
|
71
|
+
records`_ by looking up all the street features matching the metaphone hash
|
|
72
|
+
of the street name, along with the ranges matching the edge ID of each
|
|
73
|
+
feature, where the given number is in the range. This may be a very time
|
|
74
|
+
consuming database query, because some street names are quite common.
|
|
75
|
+
|
|
76
|
+
#. Score each of the candidate records as follows:
|
|
77
|
+
|
|
78
|
+
a. Score one point for every provided element of the address query that it
|
|
79
|
+
matches exactly.
|
|
80
|
+
#. Optionally, compute the scaled Damerau-Levenshtein distance (or
|
|
81
|
+
alternately the simple Levenshtein distance) between each provided
|
|
82
|
+
element of the address query and the corresponding element in the
|
|
83
|
+
candidate. Score one minus the scaled distance, which yields a fraction
|
|
84
|
+
of a point.
|
|
85
|
+
#. Score one point if the parity of starting range number matches the parity
|
|
86
|
+
of the queried address number.
|
|
87
|
+
#. Note that the maximum possible score is equal to the number of provided
|
|
88
|
+
elements in the address query. Divide the score by the maximum possible.
|
|
89
|
+
This is the confidence value of the candidate.
|
|
90
|
+
|
|
91
|
+
#. Sort the candidate address records by confidence. Retain only the records
|
|
92
|
+
that share the highest confidence as candidates.
|
|
93
|
+
|
|
94
|
+
#. Fetch `the edges and primary feature names`_ matching the edge IDs of
|
|
95
|
+
the remaining candidate address records.
|
|
96
|
+
|
|
97
|
+
#. For each remaining candidate record:
|
|
98
|
+
|
|
99
|
+
a. Replace the candidate record feature elements with those of the
|
|
100
|
+
primary feature name for that edge.
|
|
101
|
+
#. Fetch `all of the ranges for the edge ID`_ of the candidate, sorted by
|
|
102
|
+
starting number.
|
|
103
|
+
#. Compute the sum of the differences of the starting and ending house
|
|
104
|
+
number for each range. This is the total number width of the edge.
|
|
105
|
+
#. Take the difference between the candidate starting number and the lowest
|
|
106
|
+
starting number, add the difference between the queried number and the
|
|
107
|
+
candidate starting number, and divide by the total number width. This is
|
|
108
|
+
the interpolation distance.
|
|
109
|
+
#. Optionally, find the local UTM zone and project the edge into it.
|
|
110
|
+
#. Find the point along the line at the interpolation distance.
|
|
111
|
+
#. If the edge was projected, unproject the point.
|
|
112
|
+
#. Assign the point as the geocoded location of the query to the candidate
|
|
113
|
+
record.
|
|
114
|
+
|
|
115
|
+
#. Construct a set of result ZIPs from the remaining candidates, and look up
|
|
116
|
+
`the primary name and state for each ZIP`_ in the set. Assign the matching
|
|
117
|
+
primary city and state to each candidate.
|
|
118
|
+
|
|
119
|
+
#. Return the set of candidate records as the result of the query.
|
|
120
|
+
|
|
121
|
+
SQL Statements
|
|
122
|
+
--------------
|
|
123
|
+
|
|
124
|
+
the place from the ZIP
|
|
125
|
+
~~~~~~~~~~~~~~~~~~~~~~~
|
|
126
|
+
|
|
127
|
+
::
|
|
128
|
+
|
|
129
|
+
SELECT * FROM place WHERE zip = '...';
|
|
130
|
+
|
|
131
|
+
the places matching the metaphone hash of the city name
|
|
132
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
133
|
+
|
|
134
|
+
::
|
|
135
|
+
|
|
136
|
+
SELECT * FROM place WHERE city_phone = metaphone('...');
|
|
137
|
+
|
|
138
|
+
a list of candidate address records
|
|
139
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
140
|
+
|
|
141
|
+
::
|
|
142
|
+
|
|
143
|
+
SELECT feature.*, range.* FROM feature, range
|
|
144
|
+
WHERE name_phone = metaphone('...') AND feature.zip IN (...)
|
|
145
|
+
AND range.tlid = feature.tlid
|
|
146
|
+
AND fromhn <= ... AND tohn >= ...;
|
|
147
|
+
|
|
148
|
+
more candidate records
|
|
149
|
+
~~~~~~~~~~~~~~~~~~~~~~
|
|
150
|
+
|
|
151
|
+
::
|
|
152
|
+
|
|
153
|
+
SELECT feature.*, range.* FROM feature, range
|
|
154
|
+
WHERE name_phone = metaphone('...')
|
|
155
|
+
AND range.tlid = feature.tlid
|
|
156
|
+
AND fromhn <= ... AND tohn >= ...;
|
|
157
|
+
|
|
158
|
+
the edges and primary feature names
|
|
159
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
160
|
+
|
|
161
|
+
::
|
|
162
|
+
|
|
163
|
+
SELECT feature.*, edge.* FROM feature, edge
|
|
164
|
+
WHERE feature.tlid = ... AND paflag = 'P'
|
|
165
|
+
AND edge.tlid = feature.tlid;
|
|
166
|
+
|
|
167
|
+
-- or
|
|
168
|
+
|
|
169
|
+
SELECT feature.*, edge.* FROM feature, edge
|
|
170
|
+
WHERE feature.tlid IN (...)
|
|
171
|
+
AND paflag = 'P'
|
|
172
|
+
AND edge.tlid = feature.tlid;
|
|
173
|
+
|
|
174
|
+
all of the ranges for the edge ID
|
|
175
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
176
|
+
|
|
177
|
+
::
|
|
178
|
+
|
|
179
|
+
SELECT * FROM range WHERE range.tlid = ...;
|
|
180
|
+
|
|
181
|
+
-- or
|
|
182
|
+
|
|
183
|
+
SELECT * FROM range WHERE range.tlid IN (...);
|
|
184
|
+
|
|
185
|
+
the primary name and state for each ZIP
|
|
186
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
::
|
|
190
|
+
|
|
191
|
+
SELECT * FROM place WHERE zip IN (...) AND paflag = 'P';
|
|
192
|
+
|
|
193
|
+
= 30 =
|