geo_coder 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +12 -0
- data/Gemfile.lock +32 -0
- data/History.txt +6 -0
- data/Makefile +13 -0
- data/Manifest.txt +18 -0
- data/README.rdoc +197 -0
- data/Rakefile +53 -0
- data/TODO.txt +8 -0
- data/VERSION +1 -0
- data/bin/build_indexes +8 -0
- data/bin/rebuild_cluster +22 -0
- data/bin/rebuild_metaphones +23 -0
- data/bin/tiger_import +59 -0
- data/demos/demo/app/ext/geocodewrap.rb +84 -0
- data/demos/demo/app/views/index.builder +13 -0
- data/demos/demo/app/views/index.erb +71 -0
- data/demos/demo/config.ru +12 -0
- data/demos/demo/config/bootstraps.rb +130 -0
- data/demos/demo/config/geoenvironment.rb +25 -0
- data/demos/demo/geocoder_helper.rb +12 -0
- data/demos/demo/geocom_geocode.rb +10 -0
- data/demos/demo/main.rb +3 -0
- data/demos/demo/rakefile.rb +17 -0
- data/demos/demo/tmp/restart.txt +0 -0
- data/demos/simpledemo/views/index.builder +13 -0
- data/demos/simpledemo/views/index.erb +69 -0
- data/demos/simpledemo/ws.rb +83 -0
- data/doc/Makefile +7 -0
- data/doc/html4css1.css +279 -0
- data/doc/lookup.rst +193 -0
- data/doc/parsing.rst +125 -0
- data/doc/voidspace.css +147 -0
- data/geo_coder.gemspec +172 -0
- data/lib/geocoder/us.rb +21 -0
- data/lib/geocoder/us/address.rb +290 -0
- data/lib/geocoder/us/constants.rb +670 -0
- data/lib/geocoder/us/database.rb +745 -0
- data/lib/geocoder/us/import.rb +181 -0
- data/lib/geocoder/us/import/tiger.rb +13 -0
- data/lib/geocoder/us/numbers.rb +58 -0
- data/navteq/README +4 -0
- data/navteq/convert.sql +37 -0
- data/navteq/navteq_import +39 -0
- data/navteq/prepare.sql +92 -0
- data/sql/cluster.sql +16 -0
- data/sql/convert.sql +80 -0
- data/sql/create.sql +37 -0
- data/sql/index.sql +12 -0
- data/sql/place.csv +104944 -0
- data/sql/place.sql +104948 -0
- data/sql/setup.sql +78 -0
- data/src/Makefile +13 -0
- data/src/README +14 -0
- data/src/liblwgeom/Makefile +75 -0
- data/src/liblwgeom/box2d.c +54 -0
- data/src/liblwgeom/lex.yy.c +4799 -0
- data/src/liblwgeom/liblwgeom.h +1405 -0
- data/src/liblwgeom/lwalgorithm.c +946 -0
- data/src/liblwgeom/lwalgorithm.h +52 -0
- data/src/liblwgeom/lwcircstring.c +759 -0
- data/src/liblwgeom/lwcollection.c +541 -0
- data/src/liblwgeom/lwcompound.c +118 -0
- data/src/liblwgeom/lwcurvepoly.c +86 -0
- data/src/liblwgeom/lwgeom.c +886 -0
- data/src/liblwgeom/lwgeom_api.c +2201 -0
- data/src/liblwgeom/lwgparse.c +1219 -0
- data/src/liblwgeom/lwgunparse.c +1054 -0
- data/src/liblwgeom/lwline.c +525 -0
- data/src/liblwgeom/lwmcurve.c +125 -0
- data/src/liblwgeom/lwmline.c +137 -0
- data/src/liblwgeom/lwmpoint.c +138 -0
- data/src/liblwgeom/lwmpoly.c +141 -0
- data/src/liblwgeom/lwmsurface.c +129 -0
- data/src/liblwgeom/lwpoint.c +439 -0
- data/src/liblwgeom/lwpoly.c +579 -0
- data/src/liblwgeom/lwsegmentize.c +1047 -0
- data/src/liblwgeom/lwutil.c +369 -0
- data/src/liblwgeom/measures.c +861 -0
- data/src/liblwgeom/postgis_config.h +93 -0
- data/src/liblwgeom/ptarray.c +847 -0
- data/src/liblwgeom/vsprintf.c +179 -0
- data/src/liblwgeom/wktparse.h +126 -0
- data/src/liblwgeom/wktparse.lex +74 -0
- data/src/liblwgeom/wktparse.tab.c +2353 -0
- data/src/liblwgeom/wktparse.tab.h +145 -0
- data/src/liblwgeom/wktparse.y +385 -0
- data/src/libsqlite3_geocoder/Makefile +22 -0
- data/src/libsqlite3_geocoder/Makefile.nix +15 -0
- data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
- data/src/libsqlite3_geocoder/extension.c +121 -0
- data/src/libsqlite3_geocoder/extension.h +13 -0
- data/src/libsqlite3_geocoder/levenshtein.c +42 -0
- data/src/libsqlite3_geocoder/metaphon.c +278 -0
- data/src/libsqlite3_geocoder/util.c +37 -0
- data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
- data/src/metaphone/Makefile +7 -0
- data/src/metaphone/README +49 -0
- data/src/metaphone/extension.c +37 -0
- data/src/metaphone/metaphon.c +251 -0
- data/src/shp2sqlite/Makefile +37 -0
- data/src/shp2sqlite/Makefile.nix +36 -0
- data/src/shp2sqlite/Makefile.redhat +35 -0
- data/src/shp2sqlite/dbfopen.c +1595 -0
- data/src/shp2sqlite/getopt.c +695 -0
- data/src/shp2sqlite/getopt.h +127 -0
- data/src/shp2sqlite/shapefil.h +500 -0
- data/src/shp2sqlite/shp2sqlite.c +1974 -0
- data/src/shp2sqlite/shpopen.c +1894 -0
- data/tests/address.rb +236 -0
- data/tests/benchmark.rb +20 -0
- data/tests/constants.rb +57 -0
- data/tests/data/address-sample.csv +52 -0
- data/tests/data/db-test.csv +57 -0
- data/tests/data/locations.csv +4 -0
- data/tests/database.rb +137 -0
- data/tests/generate.rb +34 -0
- data/tests/numbers.rb +46 -0
- data/tests/run.rb +11 -0
- metadata +237 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'sinatra'
|
3
|
+
require 'geocoder/us/database'
|
4
|
+
require 'fastercsv'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
set :port, 8080
|
8
|
+
@@db = Geocoder::US::Database.new("/fortiusone/geocoder/geocoder.db")
|
9
|
+
get '/' do
|
10
|
+
unless params[:address].nil?
|
11
|
+
@records = @@db.geocode params[:address]
|
12
|
+
end
|
13
|
+
|
14
|
+
case params[:format]
|
15
|
+
when /xml/
|
16
|
+
builder :index
|
17
|
+
when /atom/
|
18
|
+
builder :atom
|
19
|
+
else
|
20
|
+
erb :index
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'open-uri'
|
25
|
+
get '/link.:format' do
|
26
|
+
if(params.include?(:url))
|
27
|
+
csv_file = params[:url]
|
28
|
+
else
|
29
|
+
csv_file = "uploads/#{params[:filename]}.csv"
|
30
|
+
end
|
31
|
+
csv = FasterCSV.parse(open(csv_file))
|
32
|
+
headers = csv[0]
|
33
|
+
|
34
|
+
@records = csv.collect do |record|
|
35
|
+
next if record == headers
|
36
|
+
begin
|
37
|
+
(@@db.geocode record[1]).first
|
38
|
+
rescue Exception => e
|
39
|
+
puts e.message
|
40
|
+
next
|
41
|
+
end
|
42
|
+
end.compact
|
43
|
+
case params[:format]
|
44
|
+
when /atom/
|
45
|
+
builder :atom
|
46
|
+
when /xml/
|
47
|
+
builder :index
|
48
|
+
else
|
49
|
+
erb :index
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
post '/batch' do
|
56
|
+
csv_file = request.env["rack.input"].read
|
57
|
+
csv = FasterCSV.parse(csv_file, :row_sep => "*", :col_sep => "|")
|
58
|
+
headers = csv[0]
|
59
|
+
@records = csv.collect do |record|
|
60
|
+
next if record == headers
|
61
|
+
begin
|
62
|
+
(@@db.geocode record[1]).first.merge(headers[0] => record[0])
|
63
|
+
rescue Exception => e
|
64
|
+
puts e.message
|
65
|
+
next
|
66
|
+
end
|
67
|
+
end.compact
|
68
|
+
case params[:format]
|
69
|
+
when /xml/
|
70
|
+
builder :index
|
71
|
+
when /atom/
|
72
|
+
builder :atom
|
73
|
+
when /json/
|
74
|
+
@records.to_json
|
75
|
+
else
|
76
|
+
erb :index
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
data/doc/Makefile
ADDED
data/doc/html4css1.css
ADDED
@@ -0,0 +1,279 @@
|
|
1
|
+
/*
|
2
|
+
:Author: David Goodger
|
3
|
+
:Contact: goodger@users.sourceforge.net
|
4
|
+
:Date: $Date: 2005-12-18 01:56:14 +0100 (Sun, 18 Dec 2005) $
|
5
|
+
:Revision: $Revision: 4224 $
|
6
|
+
:Copyright: This stylesheet has been placed in the public domain.
|
7
|
+
|
8
|
+
Default cascading style sheet for the HTML output of Docutils.
|
9
|
+
|
10
|
+
See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
|
11
|
+
customize this style sheet.
|
12
|
+
*/
|
13
|
+
|
14
|
+
/* used to remove borders from tables and images */
|
15
|
+
.borderless, table.borderless td, table.borderless th {
|
16
|
+
border: 0 }
|
17
|
+
|
18
|
+
table.borderless td, table.borderless th {
|
19
|
+
/* Override padding for "table.docutils td" with "! important".
|
20
|
+
The right padding separates the table cells. */
|
21
|
+
padding: 0 0.5em 0 0 ! important }
|
22
|
+
|
23
|
+
.first {
|
24
|
+
/* Override more specific margin styles with "! important". */
|
25
|
+
margin-top: 0 ! important }
|
26
|
+
|
27
|
+
.last, .with-subtitle {
|
28
|
+
margin-bottom: 0 ! important }
|
29
|
+
|
30
|
+
.hidden {
|
31
|
+
display: none }
|
32
|
+
|
33
|
+
a.toc-backref {
|
34
|
+
text-decoration: none ;
|
35
|
+
color: black }
|
36
|
+
|
37
|
+
blockquote.epigraph {
|
38
|
+
margin: 2em 5em ; }
|
39
|
+
|
40
|
+
dl.docutils dd {
|
41
|
+
margin-bottom: 0.5em }
|
42
|
+
|
43
|
+
/* Uncomment (and remove this text!) to get bold-faced definition list terms
|
44
|
+
dl.docutils dt {
|
45
|
+
font-weight: bold }
|
46
|
+
*/
|
47
|
+
|
48
|
+
div.abstract {
|
49
|
+
margin: 2em 5em }
|
50
|
+
|
51
|
+
div.abstract p.topic-title {
|
52
|
+
font-weight: bold ;
|
53
|
+
text-align: center }
|
54
|
+
|
55
|
+
div.admonition, div.attention, div.caution, div.danger, div.error,
|
56
|
+
div.hint, div.important, div.note, div.tip, div.warning {
|
57
|
+
margin: 2em ;
|
58
|
+
border: medium outset ;
|
59
|
+
padding: 1em }
|
60
|
+
|
61
|
+
div.admonition p.admonition-title, div.hint p.admonition-title,
|
62
|
+
div.important p.admonition-title, div.note p.admonition-title,
|
63
|
+
div.tip p.admonition-title {
|
64
|
+
font-weight: bold ;
|
65
|
+
font-family: sans-serif }
|
66
|
+
|
67
|
+
div.attention p.admonition-title, div.caution p.admonition-title,
|
68
|
+
div.danger p.admonition-title, div.error p.admonition-title,
|
69
|
+
div.warning p.admonition-title {
|
70
|
+
color: red ;
|
71
|
+
font-weight: bold ;
|
72
|
+
font-family: sans-serif }
|
73
|
+
|
74
|
+
/* Uncomment (and remove this text!) to get reduced vertical space in
|
75
|
+
compound paragraphs.
|
76
|
+
div.compound .compound-first, div.compound .compound-middle {
|
77
|
+
margin-bottom: 0.5em }
|
78
|
+
|
79
|
+
div.compound .compound-last, div.compound .compound-middle {
|
80
|
+
margin-top: 0.5em }
|
81
|
+
*/
|
82
|
+
|
83
|
+
div.dedication {
|
84
|
+
margin: 2em 5em ;
|
85
|
+
text-align: center ;
|
86
|
+
font-style: italic }
|
87
|
+
|
88
|
+
div.dedication p.topic-title {
|
89
|
+
font-weight: bold ;
|
90
|
+
font-style: normal }
|
91
|
+
|
92
|
+
div.figure {
|
93
|
+
margin-left: 2em ;
|
94
|
+
margin-right: 2em }
|
95
|
+
|
96
|
+
div.footer, div.header {
|
97
|
+
clear: both;
|
98
|
+
font-size: smaller }
|
99
|
+
|
100
|
+
div.line-block {
|
101
|
+
display: block ;
|
102
|
+
margin-top: 1em ;
|
103
|
+
margin-bottom: 1em }
|
104
|
+
|
105
|
+
div.line-block div.line-block {
|
106
|
+
margin-top: 0 ;
|
107
|
+
margin-bottom: 0 ;
|
108
|
+
margin-left: 1.5em }
|
109
|
+
|
110
|
+
div.sidebar {
|
111
|
+
margin-left: 1em ;
|
112
|
+
border: medium outset ;
|
113
|
+
padding: 1em ;
|
114
|
+
background-color: #ffffee ;
|
115
|
+
width: 40% ;
|
116
|
+
float: right ;
|
117
|
+
clear: right }
|
118
|
+
|
119
|
+
div.sidebar p.rubric {
|
120
|
+
font-family: sans-serif ;
|
121
|
+
font-size: medium }
|
122
|
+
|
123
|
+
div.system-messages {
|
124
|
+
margin: 5em }
|
125
|
+
|
126
|
+
div.system-messages h1 {
|
127
|
+
color: red }
|
128
|
+
|
129
|
+
div.system-message {
|
130
|
+
border: medium outset ;
|
131
|
+
padding: 1em }
|
132
|
+
|
133
|
+
div.system-message p.system-message-title {
|
134
|
+
color: red ;
|
135
|
+
font-weight: bold }
|
136
|
+
|
137
|
+
div.topic {
|
138
|
+
margin: 2em }
|
139
|
+
|
140
|
+
h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
|
141
|
+
h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
|
142
|
+
margin-top: 0.4em }
|
143
|
+
|
144
|
+
h1.title {
|
145
|
+
text-align: center }
|
146
|
+
|
147
|
+
h2.subtitle {
|
148
|
+
text-align: center }
|
149
|
+
|
150
|
+
hr.docutils {
|
151
|
+
width: 75% }
|
152
|
+
|
153
|
+
img.align-left {
|
154
|
+
clear: left }
|
155
|
+
|
156
|
+
img.align-right {
|
157
|
+
clear: right }
|
158
|
+
|
159
|
+
ol.simple, ul.simple {
|
160
|
+
margin-bottom: 1em }
|
161
|
+
|
162
|
+
ol.arabic {
|
163
|
+
list-style: decimal }
|
164
|
+
|
165
|
+
ol.loweralpha {
|
166
|
+
list-style: lower-alpha }
|
167
|
+
|
168
|
+
ol.upperalpha {
|
169
|
+
list-style: upper-alpha }
|
170
|
+
|
171
|
+
ol.lowerroman {
|
172
|
+
list-style: lower-roman }
|
173
|
+
|
174
|
+
ol.upperroman {
|
175
|
+
list-style: upper-roman }
|
176
|
+
|
177
|
+
p.attribution {
|
178
|
+
text-align: right ;
|
179
|
+
margin-left: 50% }
|
180
|
+
|
181
|
+
p.caption {
|
182
|
+
font-style: italic }
|
183
|
+
|
184
|
+
p.credits {
|
185
|
+
font-style: italic ;
|
186
|
+
font-size: smaller }
|
187
|
+
|
188
|
+
p.label {
|
189
|
+
white-space: nowrap }
|
190
|
+
|
191
|
+
p.rubric {
|
192
|
+
font-weight: bold ;
|
193
|
+
font-size: larger ;
|
194
|
+
color: maroon ;
|
195
|
+
text-align: center }
|
196
|
+
|
197
|
+
p.sidebar-title {
|
198
|
+
font-family: sans-serif ;
|
199
|
+
font-weight: bold ;
|
200
|
+
font-size: larger }
|
201
|
+
|
202
|
+
p.sidebar-subtitle {
|
203
|
+
font-family: sans-serif ;
|
204
|
+
font-weight: bold }
|
205
|
+
|
206
|
+
p.topic-title {
|
207
|
+
font-weight: bold }
|
208
|
+
|
209
|
+
pre.address {
|
210
|
+
margin-bottom: 0 ;
|
211
|
+
margin-top: 0 ;
|
212
|
+
font-family: serif ;
|
213
|
+
font-size: 100% }
|
214
|
+
|
215
|
+
pre.literal-block, pre.doctest-block {
|
216
|
+
margin-left: 2em ;
|
217
|
+
margin-right: 2em ;
|
218
|
+
background-color: #eeeeee }
|
219
|
+
|
220
|
+
span.classifier {
|
221
|
+
font-family: sans-serif ;
|
222
|
+
font-style: oblique }
|
223
|
+
|
224
|
+
span.classifier-delimiter {
|
225
|
+
font-family: sans-serif ;
|
226
|
+
font-weight: bold }
|
227
|
+
|
228
|
+
span.interpreted {
|
229
|
+
font-family: sans-serif }
|
230
|
+
|
231
|
+
span.option {
|
232
|
+
white-space: nowrap }
|
233
|
+
|
234
|
+
span.pre {
|
235
|
+
white-space: pre }
|
236
|
+
|
237
|
+
span.problematic {
|
238
|
+
color: red }
|
239
|
+
|
240
|
+
span.section-subtitle {
|
241
|
+
/* font-size relative to parent (h1..h6 element) */
|
242
|
+
font-size: 80% }
|
243
|
+
|
244
|
+
table.citation {
|
245
|
+
border-left: solid 1px gray;
|
246
|
+
margin-left: 1px }
|
247
|
+
|
248
|
+
table.docinfo {
|
249
|
+
margin: 2em 4em }
|
250
|
+
|
251
|
+
table.docutils {
|
252
|
+
margin-top: 0.5em ;
|
253
|
+
margin-bottom: 0.5em }
|
254
|
+
|
255
|
+
table.footnote {
|
256
|
+
border-left: solid 1px black;
|
257
|
+
margin-left: 1px }
|
258
|
+
|
259
|
+
table.docutils td, table.docutils th,
|
260
|
+
table.docinfo td, table.docinfo th {
|
261
|
+
padding-left: 0.5em ;
|
262
|
+
padding-right: 0.5em ;
|
263
|
+
vertical-align: top }
|
264
|
+
|
265
|
+
table.docutils th.field-name, table.docinfo th.docinfo-name {
|
266
|
+
font-weight: bold ;
|
267
|
+
text-align: left ;
|
268
|
+
white-space: nowrap ;
|
269
|
+
padding-left: 0 }
|
270
|
+
|
271
|
+
h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
|
272
|
+
h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
|
273
|
+
font-size: 100% }
|
274
|
+
|
275
|
+
tt.docutils {
|
276
|
+
background-color: #eeeeee }
|
277
|
+
|
278
|
+
ul.auto-toc {
|
279
|
+
list-style-type: none }
|
data/doc/lookup.rst
ADDED
@@ -0,0 +1,193 @@
|
|
1
|
+
.. _lookup:
|
2
|
+
|
3
|
+
===================================
|
4
|
+
Geocoder.us Address Lookup Strategy
|
5
|
+
===================================
|
6
|
+
|
7
|
+
:Author: Schuyler Erle
|
8
|
+
:Contact: schuyler at geocoder dot us
|
9
|
+
:Created: 2009/03/13
|
10
|
+
:Edited: 2009/03/14
|
11
|
+
|
12
|
+
Definitions
|
13
|
+
-----------
|
14
|
+
|
15
|
+
Edge
|
16
|
+
Database representation of a street segment, consisting of a linestring
|
17
|
+
geometry and an edge ID. Edges relate to many ranges and many features
|
18
|
+
through its ID.
|
19
|
+
|
20
|
+
Feature
|
21
|
+
Database representation of a named street, consisting of street name
|
22
|
+
and modifier elements, a reference ZIP code, and a primary/alternate flag.
|
23
|
+
|
24
|
+
Range
|
25
|
+
Database representation of a range of address numbers on a given
|
26
|
+
street, consisting of range start and end numbers, an optional prefix
|
27
|
+
ending with a non-numeric character, and a delivery ZIP code for that
|
28
|
+
range.
|
29
|
+
|
30
|
+
Place
|
31
|
+
Database representation of a ZIP code, consisting of a city name,
|
32
|
+
state abbreviation, a ZIP code, and a primary/alternate flag.
|
33
|
+
|
34
|
+
Address record
|
35
|
+
A set consisting of exactly one edge, one feature, and one range, related
|
36
|
+
through the edge ID.
|
37
|
+
|
38
|
+
Address query
|
39
|
+
An ordered set of {Number Prefix, Number, Directional Prefix, Type Prefix,
|
40
|
+
Qualifier Prefix, Street Name, Qualifier Suffix, Type Suffix, Directional
|
41
|
+
Suffix, City, State, ZIP}. All of the elements are optional except Number and
|
42
|
+
Street Name. Either ZIP or City must also be present. The State element
|
43
|
+
and all of the prefix and suffix elements are assumed to be normalized to
|
44
|
+
standard postal abbreviations.
|
45
|
+
|
46
|
+
Address string
|
47
|
+
A string including some or all of the elements of an address.
|
48
|
+
|
49
|
+
Address Lookup Strategy
|
50
|
+
-----------------------
|
51
|
+
|
52
|
+
1. Given a an address query, initialize an empty set of candidate places,
|
53
|
+
and an empty set of candidate address records.
|
54
|
+
|
55
|
+
#. If a ZIP was given, look up `the place from the ZIP`_, and add the
|
56
|
+
place, if any, to the candidate place set.
|
57
|
+
|
58
|
+
#. If a city was given, look up all `the places matching the metaphone hash
|
59
|
+
of the city name`_, and add them, if any, to the candidate place set.
|
60
|
+
|
61
|
+
#. Generate a unique set of ZIPs from the set of candidate places, since a ZIP
|
62
|
+
may have one or more names associated with it.
|
63
|
+
|
64
|
+
#. Generate `a list of candidate address records`_ by fetching all the street
|
65
|
+
features matching the metaphone hash of the street name and one of the ZIPs
|
66
|
+
in the query set, along with the ranges matching the edge ID of each
|
67
|
+
feature, where the given number is in the range. The edge does not
|
68
|
+
need to be fetched yet.
|
69
|
+
|
70
|
+
#. If the look up generates no results, optionally generate `more candidate
|
71
|
+
records`_ by looking up all the street features matching the metaphone hash
|
72
|
+
of the street name, along with the ranges matching the edge ID of each
|
73
|
+
feature, where the given number is in the range. This may be a very time
|
74
|
+
consuming database query, because some street names are quite common.
|
75
|
+
|
76
|
+
#. Score each of the candidate records as follows:
|
77
|
+
|
78
|
+
a. Score one point for every provided element of the address query that it
|
79
|
+
matches exactly.
|
80
|
+
#. Optionally, compute the scaled Damerau-Levenshtein distance (or
|
81
|
+
alternately the simple Levenshtein distance) between each provided
|
82
|
+
element of the address query and the corresponding element in the
|
83
|
+
candidate. Score one minus the scaled distance, which yields a fraction
|
84
|
+
of a point.
|
85
|
+
#. Score one point if the parity of starting range number matches the parity
|
86
|
+
of the queried address number.
|
87
|
+
#. Note that the maximum possible score is equal to the number of provided
|
88
|
+
elements in the address query. Divide the score by the maximum possible.
|
89
|
+
This is the confidence value of the candidate.
|
90
|
+
|
91
|
+
#. Sort the candidate address records by confidence. Retain only the records
|
92
|
+
that share the highest confidence as candidates.
|
93
|
+
|
94
|
+
#. Fetch `the edges and primary feature names`_ matching the edge IDs of
|
95
|
+
the remaining candidate address records.
|
96
|
+
|
97
|
+
#. For each remaining candidate record:
|
98
|
+
|
99
|
+
a. Replace the candidate record feature elements with those of the
|
100
|
+
primary feature name for that edge.
|
101
|
+
#. Fetch `all of the ranges for the edge ID`_ of the candidate, sorted by
|
102
|
+
starting number.
|
103
|
+
#. Compute the sum of the differences of the starting and ending house
|
104
|
+
number for each range. This is the total number width of the edge.
|
105
|
+
#. Take the difference between the candidate starting number and the lowest
|
106
|
+
starting number, add the difference between the queried number and the
|
107
|
+
candidate starting number, and divide by the total number width. This is
|
108
|
+
the interpolation distance.
|
109
|
+
#. Optionally, find the local UTM zone and project the edge into it.
|
110
|
+
#. Find the point along the line at the interpolation distance.
|
111
|
+
#. If the edge was projected, unproject the point.
|
112
|
+
#. Assign the point as the geocoded location of the query to the candidate
|
113
|
+
record.
|
114
|
+
|
115
|
+
#. Construct a set of result ZIPs from the remaining candidates, and look up
|
116
|
+
`the primary name and state for each ZIP`_ in the set. Assign the matching
|
117
|
+
primary city and state to each candidate.
|
118
|
+
|
119
|
+
#. Return the set of candidate records as the result of the query.
|
120
|
+
|
121
|
+
SQL Statements
|
122
|
+
--------------
|
123
|
+
|
124
|
+
the place from the ZIP
|
125
|
+
~~~~~~~~~~~~~~~~~~~~~~~
|
126
|
+
|
127
|
+
::
|
128
|
+
|
129
|
+
SELECT * FROM place WHERE zip = '...';
|
130
|
+
|
131
|
+
the places matching the metaphone hash of the city name
|
132
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
133
|
+
|
134
|
+
::
|
135
|
+
|
136
|
+
SELECT * FROM place WHERE city_phone = metaphone('...');
|
137
|
+
|
138
|
+
a list of candidate address records
|
139
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
140
|
+
|
141
|
+
::
|
142
|
+
|
143
|
+
SELECT feature.*, range.* FROM feature, range
|
144
|
+
WHERE name_phone = metaphone('...') AND feature.zip IN (...)
|
145
|
+
AND range.tlid = feature.tlid
|
146
|
+
AND fromhn <= ... AND tohn >= ...;
|
147
|
+
|
148
|
+
more candidate records
|
149
|
+
~~~~~~~~~~~~~~~~~~~~~~
|
150
|
+
|
151
|
+
::
|
152
|
+
|
153
|
+
SELECT feature.*, range.* FROM feature, range
|
154
|
+
WHERE name_phone = metaphone('...')
|
155
|
+
AND range.tlid = feature.tlid
|
156
|
+
AND fromhn <= ... AND tohn >= ...;
|
157
|
+
|
158
|
+
the edges and primary feature names
|
159
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
160
|
+
|
161
|
+
::
|
162
|
+
|
163
|
+
SELECT feature.*, edge.* FROM feature, edge
|
164
|
+
WHERE feature.tlid = ... AND paflag = 'P'
|
165
|
+
AND edge.tlid = feature.tlid;
|
166
|
+
|
167
|
+
-- or
|
168
|
+
|
169
|
+
SELECT feature.*, edge.* FROM feature, edge
|
170
|
+
WHERE feature.tlid IN (...)
|
171
|
+
AND paflag = 'P'
|
172
|
+
AND edge.tlid = feature.tlid;
|
173
|
+
|
174
|
+
all of the ranges for the edge ID
|
175
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
176
|
+
|
177
|
+
::
|
178
|
+
|
179
|
+
SELECT * FROM range WHERE range.tlid = ...;
|
180
|
+
|
181
|
+
-- or
|
182
|
+
|
183
|
+
SELECT * FROM range WHERE range.tlid IN (...);
|
184
|
+
|
185
|
+
the primary name and state for each ZIP
|
186
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
187
|
+
|
188
|
+
|
189
|
+
::
|
190
|
+
|
191
|
+
SELECT * FROM place WHERE zip IN (...) AND paflag = 'P';
|
192
|
+
|
193
|
+
= 30 =
|