geo_coder 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. data/Gemfile +12 -0
  2. data/Gemfile.lock +32 -0
  3. data/History.txt +6 -0
  4. data/Makefile +13 -0
  5. data/Manifest.txt +18 -0
  6. data/README.rdoc +197 -0
  7. data/Rakefile +53 -0
  8. data/TODO.txt +8 -0
  9. data/VERSION +1 -0
  10. data/bin/build_indexes +8 -0
  11. data/bin/rebuild_cluster +22 -0
  12. data/bin/rebuild_metaphones +23 -0
  13. data/bin/tiger_import +59 -0
  14. data/demos/demo/app/ext/geocodewrap.rb +84 -0
  15. data/demos/demo/app/views/index.builder +13 -0
  16. data/demos/demo/app/views/index.erb +71 -0
  17. data/demos/demo/config.ru +12 -0
  18. data/demos/demo/config/bootstraps.rb +130 -0
  19. data/demos/demo/config/geoenvironment.rb +25 -0
  20. data/demos/demo/geocoder_helper.rb +12 -0
  21. data/demos/demo/geocom_geocode.rb +10 -0
  22. data/demos/demo/main.rb +3 -0
  23. data/demos/demo/rakefile.rb +17 -0
  24. data/demos/demo/tmp/restart.txt +0 -0
  25. data/demos/simpledemo/views/index.builder +13 -0
  26. data/demos/simpledemo/views/index.erb +69 -0
  27. data/demos/simpledemo/ws.rb +83 -0
  28. data/doc/Makefile +7 -0
  29. data/doc/html4css1.css +279 -0
  30. data/doc/lookup.rst +193 -0
  31. data/doc/parsing.rst +125 -0
  32. data/doc/voidspace.css +147 -0
  33. data/geo_coder.gemspec +172 -0
  34. data/lib/geocoder/us.rb +21 -0
  35. data/lib/geocoder/us/address.rb +290 -0
  36. data/lib/geocoder/us/constants.rb +670 -0
  37. data/lib/geocoder/us/database.rb +745 -0
  38. data/lib/geocoder/us/import.rb +181 -0
  39. data/lib/geocoder/us/import/tiger.rb +13 -0
  40. data/lib/geocoder/us/numbers.rb +58 -0
  41. data/navteq/README +4 -0
  42. data/navteq/convert.sql +37 -0
  43. data/navteq/navteq_import +39 -0
  44. data/navteq/prepare.sql +92 -0
  45. data/sql/cluster.sql +16 -0
  46. data/sql/convert.sql +80 -0
  47. data/sql/create.sql +37 -0
  48. data/sql/index.sql +12 -0
  49. data/sql/place.csv +104944 -0
  50. data/sql/place.sql +104948 -0
  51. data/sql/setup.sql +78 -0
  52. data/src/Makefile +13 -0
  53. data/src/README +14 -0
  54. data/src/liblwgeom/Makefile +75 -0
  55. data/src/liblwgeom/box2d.c +54 -0
  56. data/src/liblwgeom/lex.yy.c +4799 -0
  57. data/src/liblwgeom/liblwgeom.h +1405 -0
  58. data/src/liblwgeom/lwalgorithm.c +946 -0
  59. data/src/liblwgeom/lwalgorithm.h +52 -0
  60. data/src/liblwgeom/lwcircstring.c +759 -0
  61. data/src/liblwgeom/lwcollection.c +541 -0
  62. data/src/liblwgeom/lwcompound.c +118 -0
  63. data/src/liblwgeom/lwcurvepoly.c +86 -0
  64. data/src/liblwgeom/lwgeom.c +886 -0
  65. data/src/liblwgeom/lwgeom_api.c +2201 -0
  66. data/src/liblwgeom/lwgparse.c +1219 -0
  67. data/src/liblwgeom/lwgunparse.c +1054 -0
  68. data/src/liblwgeom/lwline.c +525 -0
  69. data/src/liblwgeom/lwmcurve.c +125 -0
  70. data/src/liblwgeom/lwmline.c +137 -0
  71. data/src/liblwgeom/lwmpoint.c +138 -0
  72. data/src/liblwgeom/lwmpoly.c +141 -0
  73. data/src/liblwgeom/lwmsurface.c +129 -0
  74. data/src/liblwgeom/lwpoint.c +439 -0
  75. data/src/liblwgeom/lwpoly.c +579 -0
  76. data/src/liblwgeom/lwsegmentize.c +1047 -0
  77. data/src/liblwgeom/lwutil.c +369 -0
  78. data/src/liblwgeom/measures.c +861 -0
  79. data/src/liblwgeom/postgis_config.h +93 -0
  80. data/src/liblwgeom/ptarray.c +847 -0
  81. data/src/liblwgeom/vsprintf.c +179 -0
  82. data/src/liblwgeom/wktparse.h +126 -0
  83. data/src/liblwgeom/wktparse.lex +74 -0
  84. data/src/liblwgeom/wktparse.tab.c +2353 -0
  85. data/src/liblwgeom/wktparse.tab.h +145 -0
  86. data/src/liblwgeom/wktparse.y +385 -0
  87. data/src/libsqlite3_geocoder/Makefile +22 -0
  88. data/src/libsqlite3_geocoder/Makefile.nix +15 -0
  89. data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
  90. data/src/libsqlite3_geocoder/extension.c +121 -0
  91. data/src/libsqlite3_geocoder/extension.h +13 -0
  92. data/src/libsqlite3_geocoder/levenshtein.c +42 -0
  93. data/src/libsqlite3_geocoder/metaphon.c +278 -0
  94. data/src/libsqlite3_geocoder/util.c +37 -0
  95. data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
  96. data/src/metaphone/Makefile +7 -0
  97. data/src/metaphone/README +49 -0
  98. data/src/metaphone/extension.c +37 -0
  99. data/src/metaphone/metaphon.c +251 -0
  100. data/src/shp2sqlite/Makefile +37 -0
  101. data/src/shp2sqlite/Makefile.nix +36 -0
  102. data/src/shp2sqlite/Makefile.redhat +35 -0
  103. data/src/shp2sqlite/dbfopen.c +1595 -0
  104. data/src/shp2sqlite/getopt.c +695 -0
  105. data/src/shp2sqlite/getopt.h +127 -0
  106. data/src/shp2sqlite/shapefil.h +500 -0
  107. data/src/shp2sqlite/shp2sqlite.c +1974 -0
  108. data/src/shp2sqlite/shpopen.c +1894 -0
  109. data/tests/address.rb +236 -0
  110. data/tests/benchmark.rb +20 -0
  111. data/tests/constants.rb +57 -0
  112. data/tests/data/address-sample.csv +52 -0
  113. data/tests/data/db-test.csv +57 -0
  114. data/tests/data/locations.csv +4 -0
  115. data/tests/database.rb +137 -0
  116. data/tests/generate.rb +34 -0
  117. data/tests/numbers.rb +46 -0
  118. data/tests/run.rb +11 -0
  119. metadata +237 -0
@@ -0,0 +1,290 @@
1
+ require 'geocoder/us/constants'
2
+
3
+ module Geocoder::US
4
+ # Defines the matching of parsed address tokens.
5
+ Match = {
6
+ # FIXME: shouldn't have to anchor :number and :zip at start/end
7
+ :number => /^(\d+\W|[a-z]+)?(\d+)([a-z]?)\b/io,
8
+ :street => /(?:\b(?:\d+\w*|[a-z'-]+)\s*)+/io,
9
+ :city => /(?:\b[a-z'-]+\s*)+/io,
10
+ :state => Regexp.new(State.regexp.source + "\s*$", Regexp::IGNORECASE),
11
+ :zip => /(\d{5})(?:-\d{4})?\s*$/o,
12
+ :at => /\s(at|@|and|&)\s/io,
13
+ :po_box => /\b[P|p]*(OST|ost)*\.*\s*[O|o|0]*(ffice|FFICE)*\.*\s*[B|b][O|o|0][X|x]\b/
14
+ }
15
+
16
+ # The Address class takes a US street address or place name and
17
+ # constructs a list of possible structured parses of the address
18
+ # string.
19
+ class Address
20
+ attr_accessor :text
21
+ attr_accessor :prenum, :number, :sufnum
22
+ attr_accessor :street
23
+ attr_accessor :city
24
+ attr_accessor :state
25
+ attr_accessor :zip, :plus4
26
+
27
+ # Takes an address or place name string as its sole argument.
28
+ def initialize (text)
29
+ raise ArgumentError, "no text provided" unless text and !text.empty?
30
+ if text.class == Hash
31
+ @text = ""
32
+ assign_text_to_address text
33
+ else
34
+ @text = clean text
35
+ parse
36
+ end
37
+ end
38
+
39
+ # Removes any characters that aren't strictly part of an address string.
40
+ def clean (value)
41
+ value.strip \
42
+ .gsub(/[^a-z0-9 ,'&@\/-]+/io, "") \
43
+ .gsub(/\s+/o, " ")
44
+ end
45
+
46
+
47
+ def assign_text_to_address(text)
48
+ if !text[:address].nil?
49
+ @text = clean text[:address]
50
+ parse
51
+ else
52
+ @street = []
53
+ @prenum = text[:prenum]
54
+ @sufnum = text[:sufnum]
55
+ if !text[:street].nil?
56
+ @street = text[:street].scan(Match[:street])
57
+ end
58
+ @number = ""
59
+ if !@street.nil?
60
+ if text[:number].nil?
61
+ @street.map! { |single_street|
62
+ single_street.downcase!
63
+ @number = single_street.scan(Match[:number])[0].to_s
64
+ single_street.sub! @number, ""
65
+ single_street.sub! /^\s*,?\s*/o, ""
66
+ }
67
+ else
68
+ @number = text[:number].to_s
69
+ end
70
+ @street = expand_streets(@street)
71
+ street_parts
72
+ end
73
+ @city = []
74
+ if !text[:city].nil?
75
+ @city.push(text[:city])
76
+ @text = text[:city].to_s
77
+ else
78
+ @city.push("")
79
+ end
80
+ if !text[:region].nil?
81
+ # @state = []
82
+ @state = text[:region]
83
+ if @state.length > 2
84
+ # full_state = @state.strip # special case: New York
85
+ @state = State[@state]
86
+ end
87
+ elsif !text[:country].nil?
88
+ @state = text[:country]
89
+ elsif !text[:state].nil?
90
+ @state = text[:state]
91
+ end
92
+
93
+ @zip = text[:postal_code]
94
+ @plus4 = text[:plus4]
95
+ if !@zip
96
+ @zip = @plus4 = ""
97
+ end
98
+ end
99
+ end
100
+
101
+ # Expands a token into a list of possible strings based on
102
+ # the Geocoder::US::Name_Abbr constant, and expands numerals and
103
+ # number words into their possible equivalents.
104
+ def expand_numbers (string)
105
+ if /\b\d+(?:st|nd|rd|th)?\b/o.match string
106
+ match = $&
107
+ num = $&.to_i
108
+ elsif Ordinals.regexp.match string
109
+ num = Ordinals[$&]
110
+ match = $&
111
+ elsif Cardinals.regexp.match string
112
+ num = Cardinals[$&]
113
+ match = $&
114
+ end
115
+ strings = []
116
+ if num and num < 100
117
+ [num.to_s, Ordinals[num], Cardinals[num]].each {|replace|
118
+ strings << string.sub(match, replace)
119
+ }
120
+ else
121
+ strings << string
122
+ end
123
+ strings
124
+ end
125
+
126
+ def parse_zip(regex_match, text)
127
+ idx = text.rindex(regex_match)
128
+ text[idx...idx+regex_match.length] = ""
129
+ text.sub! /\s*,?\s*$/o, ""
130
+ @zip, @plus4 = @zip.map {|s|s.strip}
131
+ text
132
+ end
133
+
134
+ def parse_state(regex_match, text)
135
+ idx = text.rindex(regex_match)
136
+ text[idx...idx+regex_match.length] = ""
137
+ text.sub! /\s*,?\s*$/o, ""
138
+ @full_state = @state[0].strip # special case: New York
139
+ @state = State[@full_state]
140
+ text
141
+ end
142
+
143
+ def parse_number(regex_match, text)
144
+ # FIXME: What if this string appears twice?
145
+ idx = text.index(regex_match)
146
+ text[idx...idx+regex_match.length] = ""
147
+ text.sub! /^\s*,?\s*/o, ""
148
+ @prenum, @number, @sufnum = @number.map {|s| s and s.strip}
149
+ text
150
+ end
151
+
152
+ def parse
153
+ text = @text.clone.downcase
154
+
155
+ @zip = text.scan(Match[:zip])[-1]
156
+ if @zip
157
+ text = parse_zip($&, text)
158
+ else
159
+ @zip = @plus4 = ""
160
+ end
161
+
162
+ @state = text.scan(Match[:state])[-1]
163
+ if @state
164
+ text = parse_state($&, text)
165
+ else
166
+ @full_state = ""
167
+ @state = ""
168
+ end
169
+
170
+ @number = text.scan(Match[:number])[0]
171
+ # FIXME: 230 Fish And Game Rd, Hudson NY 12534
172
+ if @number # and not intersection?
173
+ text = parse_number($&, text)
174
+ else
175
+ @prenum = @number = @sufnum = ""
176
+ end
177
+
178
+ # FIXME: special case: Name_Abbr gets a bit aggressive
179
+ # about replacing St with Saint. exceptional case:
180
+ # Sault Ste. Marie
181
+
182
+ # FIXME: PO Box should geocode to ZIP
183
+ @street = text.scan(Match[:street])
184
+ @street = expand_streets(@street)
185
+ # SPECIAL CASE: 1600 Pennsylvania 20050
186
+ @street << @full_state if @street.empty? and @state.downcase != @full_state.downcase
187
+
188
+ @city = text.scan(Match[:city])
189
+ if !@city.empty?
190
+ @city = [@city[-1].strip]
191
+ add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
192
+ @city |= add
193
+ @city.map! {|s| s.downcase}
194
+ @city.uniq!
195
+ else
196
+ @city = []
197
+ end
198
+
199
+ # SPECIAL CASE: no city, but a state with the same name. e.g. "New York"
200
+ @city << @full_state if @state.downcase != @full_state.downcase
201
+
202
+ # SPECIAL CASE: if given a single city string, and it's not the
203
+ # same as the street string, remove it from the street parts
204
+ self.city= @city if @city.length == 1 and @city != @street
205
+ end
206
+
207
+ def expand_streets(street)
208
+ if !street.empty? && !street[0].nil?
209
+ street.map! {|s|s.strip}
210
+ add = street.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
211
+ street |= add
212
+ add = street.map {|item| item.gsub(Std_Abbr.regexp) {|m| Std_Abbr[m]}}
213
+ street |= add
214
+ street.map! {|item| expand_numbers(item)}
215
+ street.flatten!
216
+ street.map! {|s| s.downcase}
217
+ street.uniq!
218
+ else
219
+ street = []
220
+ end
221
+ street
222
+ end
223
+
224
+ def street_parts
225
+ strings = []
226
+ # Get all the substrings delimited by whitespace
227
+ @street.each {|string|
228
+ tokens = string.split(" ")
229
+ strings |= (0...tokens.length).map {|i|
230
+ (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
231
+ }
232
+ strings = remove_noise_words(strings)
233
+
234
+ # Try a simpler case of adding the @number in case everything is an abbr.
235
+ strings += [@number] if strings.all? {|s| Std_Abbr.key? s or Name_Abbr.key? s}
236
+ strings.uniq
237
+ end
238
+
239
+ def remove_noise_words(strings)
240
+ # Don't return strings that consist solely of abbreviations.
241
+ # NOTE: Is this a micro-optimization that has edge cases that will break?
242
+ # Answer: Yes, it breaks on simple things like "Prairie St" or "Front St"
243
+ prefix = Regexp.new("^" + Prefix_Type.regexp.source + "\s*", Regexp::IGNORECASE)
244
+ suffix = Regexp.new("\s*" + Suffix_Type.regexp.source + "$", Regexp::IGNORECASE)
245
+ predxn = Regexp.new("^" + Directional.regexp.source + "\s*", Regexp::IGNORECASE)
246
+ sufdxn = Regexp.new("\s*" + Directional.regexp.source + "$", Regexp::IGNORECASE)
247
+ good_strings = strings.map {|s|
248
+ s = s.clone
249
+ s.gsub!(predxn, "")
250
+ s.gsub!(sufdxn, "")
251
+ s.gsub!(prefix, "")
252
+ s.gsub!(suffix, "")
253
+ s
254
+ }
255
+ good_strings.reject! {|s| s.empty?}
256
+ strings = good_strings if !good_strings.empty? {|s| not Std_Abbr.key?(s) and not Name_Abbr.key?(s)}
257
+ strings
258
+ end
259
+
260
+ def city_parts
261
+ strings = []
262
+ @city.map {|string|
263
+ tokens = string.split(" ")
264
+ strings |= (0...tokens.length).to_a.reverse.map {|i|
265
+ (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
266
+ }
267
+ # Don't return strings that consist solely of abbreviations.
268
+ # NOTE: Is this a micro-optimization that has edge cases that will break?
269
+ # Answer: Yes, it breaks on "Prairie"
270
+ good_strings = strings.reject {|s| Std_Abbr.key? s}
271
+ strings = good_strings if !good_strings.empty?
272
+ strings.uniq
273
+ end
274
+
275
+ def city= (strings)
276
+ # NOTE: This will still fail on: 100 Broome St, 33333 (if 33333 is
277
+ # Broome, MT or what)
278
+ match = Regexp.new('\s*\b(?:' + strings.join("|") + ')\b\s*$', Regexp::IGNORECASE)
279
+ @street = @street.map {|string| string.gsub(match, '')}.select {|s|!s.empty?}
280
+ end
281
+
282
+ def po_box?
283
+ Match[:po_box].match @text
284
+ end
285
+
286
+ def intersection?
287
+ Match[:at].match @text
288
+ end
289
+ end
290
+ end
@@ -0,0 +1,670 @@
1
+ # coding: utf-8
2
+ require 'set'
3
+ require 'geocoder/us/numbers'
4
+
5
+ module Geocoder
6
+ end
7
+
8
+ module Geocoder::US
9
+ class Map < Hash
10
+ # The Map class provides a two-way mapping between postal abbreviations
11
+ # and their fully written equivalents.
12
+ #attr_accessor :partial
13
+ attr_accessor :regexp
14
+ def self.[] (*items)
15
+ hash = super(*items)
16
+ #hash.build_partial
17
+ hash.build_match
18
+ hash.keys.each {|k| hash[k.downcase] = hash.fetch(k)}
19
+ hash.values.each {|v| hash[v.downcase] = v}
20
+ hash.freeze
21
+ end
22
+ # The build_partial method constructs a hash of case-insensitive,
23
+ # whitespace-delimited prefixes to keys and values in the two-way Map.
24
+ def build_partial
25
+ @partial = Set.new()
26
+ [keys, values].flatten.each {|item|
27
+ @partial << item.downcase
28
+ item.downcase.split.each {|token| @partial << token}
29
+ }
30
+ end
31
+ def build_match
32
+ @regexp = Regexp.new(
33
+ '\b(' + [keys,values].flatten.join("|") + ')\b',
34
+ Regexp::IGNORECASE)
35
+ end
36
+ # The partial? method returns true if the key is a prefix of some
37
+ # key in the Map.
38
+ def partial? (key)
39
+ @partial.member? key.downcase
40
+ end
41
+ def key? (key)
42
+ super(key.downcase)
43
+ end
44
+ def [] (key)
45
+ super(key.downcase)
46
+ end
47
+ end
48
+
49
+ # The Directional constant maps compass direction words in English and
50
+ # Spanish to their 1- or 2- letter abbreviations. See 2008 TIGER/Line
51
+ # technical documentation Appendix C for more details.
52
+ Directional = Map[
53
+ "North" => "N",
54
+ "South" => "S",
55
+ "East" => "E",
56
+ "West" => "W",
57
+ "Northeast" => "NE",
58
+ "Northwest" => "NW",
59
+ "Southeast" => "SE",
60
+ "Southwest" => "SW",
61
+ "Norte" => "N",
62
+ "Sur" => "S",
63
+ "Este" => "E",
64
+ "Oeste" => "O",
65
+ "Noreste" => "NE",
66
+ "Noroeste" => "NO",
67
+ "Sudeste" => "SE",
68
+ "Sudoeste" => "SO"
69
+ ]
70
+
71
+ # The Prefix_Qualifier constant maps feature prefix qualifiers to their
72
+ # abbreviations. See 2008 TIGER/Line technical documentation Appendix D.
73
+ Prefix_Qualifier = Map[
74
+ "Alternate" => "Alt",
75
+ "Business" => "Bus",
76
+ "Bypass" => "Byp",
77
+ "Extended" => "Exd",
78
+ "Historic" => "Hst",
79
+ "Loop" => "Lp",
80
+ "Old" => "Old",
81
+ "Private" => "Pvt",
82
+ "Public" => "Pub",
83
+ "Spur" => "Spr",
84
+ ]
85
+
86
+ # The Suffix_Qualifier constant maps feature suffix qualifiers to their
87
+ # abbreviations. See 2008 TIGER/Line technical documentation Appendix D.
88
+ Suffix_Qualifier = Map[
89
+ "Access" => "Acc",
90
+ "Alternate" => "Alt",
91
+ "Business" => "Bus",
92
+ "Bypass" => "Byp",
93
+ "Connector" => "Con",
94
+ "Extended" => "Exd",
95
+ "Extension" => "Exn",
96
+ "Loop" => "Lp",
97
+ "Private" => "Pvt",
98
+ "Public" => "Pub",
99
+ "Scenic" => "Scn",
100
+ "Spur" => "Spr",
101
+ "Ramp" => "Rmp",
102
+ "Underpass" => "Unp",
103
+ "Overpass" => "Ovp",
104
+ ]
105
+
106
+ # The Prefix_Canonical constant maps canonical TIGER/Line street type
107
+ # prefixes to their abbreviations. This list is the subset of the list from
108
+ # 2008 TIGER/Line technical documentation Appendix E that was extracted from
109
+ # a TIGER/Line database import.
110
+ Prefix_Canonical = {
111
+ "Arcade" => "Arc",
112
+ "Autopista" => "Autopista",
113
+ "Avenida" => "Ave",
114
+ "Avenue" => "Ave",
115
+ "Boulevard" => "Blvd",
116
+ "Bulevar" => "Bulevar",
117
+ "Bureau of Indian Affairs Highway" => "BIA Hwy",
118
+ "Bureau of Indian Affairs Road" => "BIA Rd",
119
+ "Bureau of Indian Affairs Route" => "BIA Rte",
120
+ "Bureau of Land Management Road" => "BLM Rd",
121
+ "Bypass" => "Byp",
122
+ "Calle" => "Cll",
123
+ "Calleja" => "Calleja",
124
+ "Callejón" => "Callejón",
125
+ "Caminito" => "Cmt",
126
+ "Camino" => "Cam",
127
+ "Carretera" => "Carr",
128
+ "Cerrada" => "Cer",
129
+ "Círculo" => "Cír",
130
+ "Commons" => "Cmns",
131
+ "Corte" => "Corte",
132
+ "County Highway" => "Co Hwy",
133
+ "County Lane" => "Co Ln",
134
+ "County Road" => "Co Rd",
135
+ "County Route" => "Co Rte",
136
+ "County State Aid Highway" => "Co St Aid Hwy",
137
+ "County Trunk Highway" => "Co Trunk Hwy",
138
+ "County Trunk Road" => "Co Trunk Rd",
139
+ "Court" => "Ct",
140
+ "Delta Road" => "Delta Rd",
141
+ "District of Columbia Highway" => "DC Hwy",
142
+ "Driveway" => "Driveway",
143
+ "Entrada" => "Ent",
144
+ "Expreso" => "Expreso",
145
+ "Expressway" => "Expy",
146
+ "Farm Road" => "Farm Rd",
147
+ "Farm-to-Market Road" => "FM",
148
+ "Fire Control Road" => "Fire Cntrl Rd",
149
+ "Fire District Road" => "Fire Dist Rd",
150
+ "Fire Lane" => "Fire Ln",
151
+ "Fire Road" => "Fire Rd",
152
+ "Fire Route" => "Fire Rte",
153
+ "Fire Trail" => "Fire Trl",
154
+ "Forest Highway" => "Forest Hwy",
155
+ "Forest Road" => "Forest Rd",
156
+ "Forest Route" => "Forest Rte",
157
+ "Forest Service Road" => "FS Rd",
158
+ "Highway" => "Hwy",
159
+ "Indian Route" => "Indian Rte",
160
+ "Indian Service Route" => "Indian Svc Rte",
161
+ "Interstate Highway" => "I-",
162
+ "Lane" => "Ln",
163
+ "Logging Road" => "Logging Rd",
164
+ "Loop" => "Loop",
165
+ "National Forest Development Road" => "Nat For Dev Rd",
166
+ "Navajo Service Route" => "Navajo Svc Rte",
167
+ "Parish Road" => "Parish Rd",
168
+ "Pasaje" => "Pasaje",
169
+ "Paseo" => "Pso",
170
+ "Passage" => "Psge",
171
+ "Placita" => "Pla",
172
+ "Plaza" => "Plz",
173
+ "Point" => "Pt",
174
+ "Puente" => "Puente",
175
+ "Ranch Road" => "Ranch Rd",
176
+ "Ranch to Market Road" => "RM",
177
+ "Reservation Highway" => "Resvn Hwy",
178
+ "Road" => "Rd",
179
+ "Route" => "Rte",
180
+ "Row" => "Row",
181
+ "Rue" => "Rue",
182
+ "Ruta" => "Ruta",
183
+ "Sector" => "Sec",
184
+ "Sendero" => "Sendero",
185
+ "Service Road" => "Svc Rd",
186
+ "Skyway" => "Skwy",
187
+ "Square" => "Sq",
188
+ "State Forest Service Road" => "St FS Rd",
189
+ "State Highway" => "State Hwy",
190
+ "State Loop" => "State Loop",
191
+ "State Road" => "State Rd",
192
+ "State Route" => "State Rte",
193
+ "State Spur" => "State Spur",
194
+ "State Trunk Highway" => "St Trunk Hwy",
195
+ "Terrace" => "Ter",
196
+ "Town Highway" => "Town Hwy",
197
+ "Town Road" => "Town Rd",
198
+ "Township Highway" => "Twp Hwy",
199
+ "Township Road" => "Twp Rd",
200
+ "Trail" => "Trl",
201
+ "Tribal Road" => "Tribal Rd",
202
+ "Tunnel" => "Tunl",
203
+ "US Forest Service Highway" => "USFS Hwy",
204
+ "US Forest Service Road" => "USFS Rd",
205
+ "US Highway" => "US Hwy",
206
+ "US Route" => "US Rte",
207
+ "Vereda" => "Ver",
208
+ "Via" => "Via",
209
+ "Vista" => "Vis",
210
+ }
211
+
212
+ # The Prefix_Alternate constant maps alternate prefix street types to
213
+ # their canonical abbreviations. This list was merged in from the USPS
214
+ # list at http://www.usps.com/ncsc/lookups/abbr_suffix.txt.
215
+ Prefix_Alternate = {
216
+ "Av" => "Ave",
217
+ "Aven" => "Ave",
218
+ "Avenu" => "Ave",
219
+ "Avenue" => "Ave",
220
+ "Avn" => "Ave",
221
+ "Avnue" => "Ave",
222
+ "Boul" => "Blvd",
223
+ "Boulv" => "Blvd",
224
+ "Bypa" => "Byp",
225
+ "Bypas" => "Byp",
226
+ "Byps" => "Byp",
227
+ "Crt" => "Ct",
228
+ "Exp" => "Expy",
229
+ "Expr" => "Expy",
230
+ "Express" => "Expy",
231
+ "Expw" => "Expy",
232
+ "Highwy" => "Hwy",
233
+ "Hiway" => "Hwy",
234
+ "Hiwy" => "Hwy",
235
+ "Hway" => "Hwy",
236
+ "La" => "Ln",
237
+ "Lanes" => "Ln",
238
+ "Loops" => "Loop",
239
+ "Plza" => "Plz",
240
+ "Sqr" => "Sq",
241
+ "Sqre" => "Sq",
242
+ "Squ" => "Sq",
243
+ "Terr" => "Ter",
244
+ "Tr" => "Trl",
245
+ "Trails" => "Trl",
246
+ "Trls" => "Trl",
247
+ "Tunel" => "Tunl",
248
+ "Tunls" => "Tunl",
249
+ "Tunnels" => "Tunl",
250
+ "Tunnl" => "Tunl",
251
+ "Vdct" => "Via",
252
+ "Viadct" => "Via",
253
+ "Viaduct" => "Via",
254
+ "Vist" => "Vis",
255
+ "Vst" => "Vis",
256
+ "Vsta" => "Vis"
257
+ }
258
+
259
+ # The Prefix_Type constant merges the canonical prefix type abbreviations
260
+ # with their USPS accepted alternates.
261
+ Prefix_Type = Map[ Prefix_Canonical.merge(Prefix_Alternate) ]
262
+
263
+ # The Suffix_Canonical constant maps canonical TIGER/Line street type
264
+ # suffixes to their abbreviations. This list is the subset of the list from
265
+ # 2008 TIGER/Line technical documentation Appendix E that was extracted from
266
+ # a TIGER/Line database import.
267
+ Suffix_Canonical = {
268
+ "Alley" => "Aly",
269
+ "Arcade" => "Arc",
270
+ "Avenida" => "Ave",
271
+ "Avenue" => "Ave",
272
+ "Beltway" => "Beltway",
273
+ "Boulevard" => "Blvd",
274
+ "Bridge" => "Brg",
275
+ "Bypass" => "Byp",
276
+ "Causeway" => "Cswy",
277
+ "Circle" => "Cir",
278
+ "Common" => "Cmn",
279
+ "Commons" => "Cmns",
280
+ "Corners" => "Cors",
281
+ "Court" => "Ct",
282
+ "Courts" => "Cts",
283
+ "Crescent" => "Cres",
284
+ "Crest" => "Crst",
285
+ "Crossing" => "Xing",
286
+ "Cutoff" => "Cutoff",
287
+ "Drive" => "Dr",
288
+ "Driveway" => "Driveway",
289
+ "Esplanade" => "Esplanade",
290
+ "Estates" => "Ests",
291
+ "Expressway" => "Expy",
292
+ "Forest Highway" => "Forest Hwy",
293
+ "Fork" => "Frk",
294
+ "Four-Wheel Drive Trail" => "4WD Trl",
295
+ "Freeway" => "Fwy",
296
+ "Grade" => "Grade",
297
+ "Heights" => "Hts",
298
+ "Highway" => "Hwy",
299
+ "Jeep Trail" => "Jeep Trl",
300
+ "Landing" => "Lndg",
301
+ "Lane" => "Ln",
302
+ "Logging Road" => "Logging Rd",
303
+ "Loop" => "Loop",
304
+ "Motorway" => "Mtwy",
305
+ "Oval" => "Oval",
306
+ "Overpass" => "Opas",
307
+ "Parkway" => "Pkwy",
308
+ "Pass" => "Pass",
309
+ "Passage" => "Psge",
310
+ "Path" => "Path",
311
+ "Pike" => "Pike",
312
+ "Place" => "Pl",
313
+ "Plaza" => "Plz",
314
+ "Point" => "Pt",
315
+ "Pointe" => "Pointe",
316
+ "Promenade" => "Promenade",
317
+ "Railroad" => "RR",
318
+ "Railway" => "Rlwy",
319
+ "Ramp" => "Ramp",
320
+ "River" => "Riv",
321
+ "Road" => "Rd",
322
+ "Roadway" => "Roadway",
323
+ "Route" => "Rte",
324
+ "Row" => "Row",
325
+ "Rue" => "Rue",
326
+ "Service Road" => "Svc Rd",
327
+ "Skyway" => "Skwy",
328
+ "Spur" => "Spur",
329
+ "Square" => "Sq",
330
+ "Stravenue" => "Stra",
331
+ "Street" => "St",
332
+ "Strip" => "Strip",
333
+ "Terrace" => "Ter",
334
+ "Thoroughfare" => "Thoroughfare",
335
+ "Tollway" => "Tollway",
336
+ "Trace" => "Trce",
337
+ "Trafficway" => "Trfy",
338
+ "Trail" => "Trl",
339
+ "Trolley" => "Trolley",
340
+ "Truck Trail" => "Truck Trl",
341
+ "Tunnel" => "Tunl",
342
+ "Turnpike" => "Tpke",
343
+ "Viaduct" => "Viaduct",
344
+ "View" => "Vw",
345
+ "Vista" => "Vis",
346
+ "Walk" => "Walk",
347
+ "Walkway" => "Walkway",
348
+ "Way" => "Way",
349
+ }
350
+
351
+ # The Suffix_Alternate constant maps alternate suffix street types to
352
+ # their canonical abbreviations. This list was merged in from the USPS
353
+ # list at http://www.usps.com/ncsc/lookups/abbr_suffix.txt.
354
+ Suffix_Alternate = {
355
+ "Allee" => "Aly",
356
+ "Ally" => "Aly",
357
+ "Av" => "Ave",
358
+ "Aven" => "Ave",
359
+ "Avenu" => "Ave",
360
+ "Avenue" => "Ave",
361
+ "Avn" => "Ave",
362
+ "Avnue" => "Ave",
363
+ "Boul" => "Blvd",
364
+ "Boulv" => "Blvd",
365
+ "Brdge" => "Brg",
366
+ "Bypa" => "Byp",
367
+ "Bypas" => "Byp",
368
+ "Byps" => "Byp",
369
+ "Causway" => "Cswy",
370
+ "Circ" => "Cir",
371
+ "Circl" => "Cir",
372
+ "Crcl" => "Cir",
373
+ "Crcle" => "Cir",
374
+ "Crecent" => "Cres",
375
+ "Cresent" => "Cres",
376
+ "Crscnt" => "Cres",
377
+ "Crsent" => "Cres",
378
+ "Crsnt" => "Cres",
379
+ "Crssing" => "Xing",
380
+ "Crssng" => "Xing",
381
+ "Crt" => "Ct",
382
+ "Driv" => "Dr",
383
+ "Drv" => "Dr",
384
+ "Exp" => "Expy",
385
+ "Expr" => "Expy",
386
+ "Express" => "Expy",
387
+ "Expw" => "Expy",
388
+ "Freewy" => "Fwy",
389
+ "Frway" => "Fwy",
390
+ "Frwy" => "Fwy",
391
+ "Height" => "Hts",
392
+ "Hgts" => "Hts",
393
+ "Highwy" => "Hwy",
394
+ "Hiway" => "Hwy",
395
+ "Hiwy" => "Hwy",
396
+ "Ht" => "Hts",
397
+ "Hway" => "Hwy",
398
+ "La" => "Ln",
399
+ "Lanes" => "Ln",
400
+ "Lndng" => "Lndg",
401
+ "Loops" => "Loop",
402
+ "Ovl" => "Oval",
403
+ "Parkways" => "Pkwy",
404
+ "Parkwy" => "Pkwy",
405
+ "Paths" => "Path",
406
+ "Pikes" => "Pike",
407
+ "Pkway" => "Pkwy",
408
+ "Pkwys" => "Pkwy",
409
+ "Pky" => "Pkwy",
410
+ "Plza" => "Plz",
411
+ "Rivr" => "Riv",
412
+ "Rvr" => "Riv",
413
+ "Spurs" => "Spur",
414
+ "Sqr" => "Sq",
415
+ "Sqre" => "Sq",
416
+ "Squ" => "Sq",
417
+ "Str" => "St",
418
+ "Strav" => "Stra",
419
+ "Strave" => "Stra",
420
+ "Straven" => "Stra",
421
+ "Stravn" => "Stra",
422
+ "Strt" => "St",
423
+ "Strvn" => "Stra",
424
+ "Strvnue" => "Stra",
425
+ "Terr" => "Ter",
426
+ "Tpk" => "Tpke",
427
+ "Tr" => "Trl",
428
+ "Traces" => "Trce",
429
+ "Trails" => "Trl",
430
+ "Trls" => "Trl",
431
+ "Trnpk" => "Tpke",
432
+ "Trpk" => "Tpke",
433
+ "Tunel" => "Tunl",
434
+ "Tunls" => "Tunl",
435
+ "Tunnels" => "Tunl",
436
+ "Tunnl" => "Tunl",
437
+ "Turnpk" => "Tpke",
438
+ "Vist" => "Vis",
439
+ "Vst" => "Vis",
440
+ "Vsta" => "Vis",
441
+ "Walks" => "Walk",
442
+ "Wy" => "Way",
443
+ }
444
+
445
+ # The Suffix_Type constant merges the canonical suffix type abbreviations
446
+ # with their USPS accepted alternates.
447
+ Suffix_Type = Map[ Suffix_Canonical.merge(Suffix_Alternate) ]
448
+
449
+ # The Unit_Type constant lists acceptable USPS unit type abbreviations
450
+ # from http://www.usps.com/ncsc/lookups/abbr_sud.txt.
451
+ Unit_Type = Map[
452
+ "Apartment" => "Apt",
453
+ "Basement" => "Bsmt",
454
+ "Building" => "Bldg",
455
+ "Department"=> "Dept",
456
+ "Floor" => "Fl",
457
+ "Front" => "Frnt",
458
+ "Hangar" => "Hngr",
459
+ "Lobby" => "Lbby",
460
+ "Lot" => "Lot",
461
+ "Lower" => "Lowr",
462
+ "Office" => "Ofc",
463
+ "Penthouse" => "Ph",
464
+ "Pier" => "Pier",
465
+ "Rear" => "Rear",
466
+ "Room" => "Rm",
467
+ "Side" => "Side",
468
+ "Slip" => "Slip",
469
+ "Space" => "Spc",
470
+ "Stop" => "Stop",
471
+ "Suite" => "Ste",
472
+ "Trailer" => "Trlr",
473
+ "Unit" => "Unit",
474
+ "Upper" => "Uppr",
475
+ ]
476
+
477
+ Std_Abbr = Map[
478
+ [Directional, Prefix_Qualifier, Suffix_Qualifier,
479
+ Prefix_Type, Suffix_Type].inject({}) {|x,y|x.merge y}
480
+ ]
481
+
482
+ # The Name_Abbr constant maps common toponym abbreviations to their
483
+ # full word equivalents. This list was constructed partly by hand, and
484
+ # partly by matching USPS alternate abbreviations with feature names
485
+ # found in the TIGER/Line dataset.
486
+ Name_Abbr = Map[
487
+ "Av" => "Avenue",
488
+ "Ave" => "Avenue",
489
+ "Blvd" => "Boulevard",
490
+ "Bot" => "Bottom",
491
+ "Boul" => "Boulevard",
492
+ "Boulv" => "Boulevard",
493
+ "Br" => "Branch",
494
+ "Brg" => "Bridge",
495
+ "Canyn" => "Canyon",
496
+ "Cen" => "Center",
497
+ "Cent" => "Center",
498
+ "Cir" => "Circle",
499
+ "Circ" => "Circle",
500
+ "Ck" => "Creek",
501
+ "Cnter" => "Center",
502
+ "Cntr" => "Center",
503
+ "Cnyn" => "Canyon",
504
+ "Cor" => "Corner",
505
+ "Cors" => "Corners",
506
+ "Cp" => "Camp",
507
+ "Cr" => "Creek",
508
+ "Crcl" => "Circle",
509
+ "Crcle" => "Circle",
510
+ "Cres" => "Crescent",
511
+ "Crscnt" => "Crescent",
512
+ "Ct" => "Court",
513
+ "Ctr" => "Center",
514
+ "Cts" => "Courts",
515
+ "Cyn" => "Canyon",
516
+ "Div" => "Divide",
517
+ "Dr" => "Drive",
518
+ "Dv" => "Divide",
519
+ "Est" => "Estate",
520
+ "Ests" => "Estates",
521
+ "Ext" => "Extension",
522
+ "Extn" => "Extension",
523
+ "Extnsn" => "Extension",
524
+ "Forests" => "Forest",
525
+ "Forg" => "Forge",
526
+ "Frg" => "Forge",
527
+ "Ft" => "Fort",
528
+ "Gatewy" => "Gateway",
529
+ "Gdn" => "Garden",
530
+ "Gdns" => "Gardens",
531
+ "Gtwy" => "Gateway",
532
+ "Harb" => "Harbor",
533
+ "Hbr" => "Harbor",
534
+ "Height" => "Heights",
535
+ "Hgts" => "Heights",
536
+ "Highwy" => "Highway",
537
+ "Hiway" => "Highway",
538
+ "Hiwy" => "Highway",
539
+ "Holws" => "Hollow",
540
+ "Ht" => "Heights",
541
+ "Hway" => "Highway",
542
+ "Hwy" => "Highway",
543
+ "Is" => "Island",
544
+ "Iss" => "Islands",
545
+ "Jct" => "Junction",
546
+ "Jction" => "Junction",
547
+ "Jctn" => "Junction",
548
+ "Junctn" => "Junction",
549
+ "Juncton" => "Junction",
550
+ "Ldg" => "Lodge",
551
+ "Lgt" => "Light",
552
+ "Lndg" => "Landing",
553
+ "Lodg" => "Lodge",
554
+ "Loops" => "Loop",
555
+ "Mt" => "Mount",
556
+ "Mtin" => "Mountain",
557
+ "Mtn" => "Mountain",
558
+ "Orch" => "Orchard",
559
+ "Parkwy" => "Parkway",
560
+ "Pk" => "Park",
561
+ "Pkway" => "Parkway",
562
+ "Pkwy" => "Parkway",
563
+ "Pky" => "Parkway",
564
+ "Pl" => "Place",
565
+ "Pnes" => "Pines",
566
+ "Pr" => "Prairie",
567
+ "Prr" => "Prairie",
568
+ "Pt" => "Point",
569
+ "Pts" => "Points",
570
+ "Rdg" => "Ridge",
571
+ "Riv" => "River",
572
+ "Rnchs" => "Ranch",
573
+ "Spg" => "Spring",
574
+ "Spgs" => "Springs",
575
+ "Spng" => "Spring",
576
+ "Spngs" => "Springs",
577
+ "Sq" => "Square",
578
+ "Squ" => "Square",
579
+ # "St" => "Saint",
580
+ "Sta" => "Station",
581
+ "Statn" => "Station",
582
+ "Ste" => "Sainte",
583
+ "Stn" => "Station",
584
+ "Str" => "Street",
585
+ "Ter" => "Terrace",
586
+ "Terr" => "Terrace",
587
+ "Tpk" => "Turnpike",
588
+ "Tpke" => "Turnpike",
589
+ "Tr" => "Trail",
590
+ "Trls" => "Trail",
591
+ "Trpk" => "Turnpike",
592
+ "Tunls" => "Tunnel",
593
+ "Un" => "Union",
594
+ "Vill" => "Village",
595
+ "Villag" => "Village",
596
+ "Villg" => "Village",
597
+ "Vis" => "Vista",
598
+ "Vlg" => "Village",
599
+ "Vlgs" => "Villages",
600
+ "Wls" => "Wells",
601
+ "Wy" => "Way",
602
+ "Xing" => "Crossing",
603
+ ]
604
+
605
+ # The State constant maps US state and territory names to their 2-letter
606
+ # USPS abbreviations.
607
+ State = Map[
608
+ "Alabama" => "AL",
609
+ "Alaska" => "AK",
610
+ "American Samoa" => "AS",
611
+ "Arizona" => "AZ",
612
+ "Arkansas" => "AR",
613
+ "California" => "CA",
614
+ "Colorado" => "CO",
615
+ "Connecticut" => "CT",
616
+ "Delaware" => "DE",
617
+ "District of Columbia" => "DC",
618
+ "Federated States of Micronesia" => "FM",
619
+ "Florida" => "FL",
620
+ "Georgia" => "GA",
621
+ "Guam" => "GU",
622
+ "Hawaii" => "HI",
623
+ "Idaho" => "ID",
624
+ "Illinois" => "IL",
625
+ "Indiana" => "IN",
626
+ "Iowa" => "IA",
627
+ "Kansas" => "KS",
628
+ "Kentucky" => "KY",
629
+ "Louisiana" => "LA",
630
+ "Maine" => "ME",
631
+ "Marshall Islands" => "MH",
632
+ "Maryland" => "MD",
633
+ "Massachusetts" => "MA",
634
+ "Michigan" => "MI",
635
+ "Minnesota" => "MN",
636
+ "Mississippi" => "MS",
637
+ "Missouri" => "MO",
638
+ "Montana" => "MT",
639
+ "Nebraska" => "NE",
640
+ "Nevada" => "NV",
641
+ "New Hampshire" => "NH",
642
+ "New Jersey" => "NJ",
643
+ "New Mexico" => "NM",
644
+ "New York" => "NY",
645
+ "North Carolina" => "NC",
646
+ "North Dakota" => "ND",
647
+ "Northern Mariana Islands" => "MP",
648
+ "Ohio" => "OH",
649
+ "Oklahoma" => "OK",
650
+ "Oregon" => "OR",
651
+ "Palau" => "PW",
652
+ "Pennsylvania" => "PA",
653
+ "Puerto Rico" => "PR",
654
+ "Rhode Island" => "RI",
655
+ "South Carolina" => "SC",
656
+ "South Dakota" => "SD",
657
+ "Tennessee" => "TN",
658
+ "Texas" => "TX",
659
+ "Utah" => "UT",
660
+ "Vermont" => "VT",
661
+ "Virgin Islands" => "VI",
662
+ "Virginia" => "VA",
663
+ "Washington" => "WA",
664
+ "West Virginia" => "WV",
665
+ "Wisconsin" => "WI",
666
+ "Wyoming" => "WY"
667
+ ]
668
+
669
+
670
+ end