geo_coder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. data/Gemfile +12 -0
  2. data/Gemfile.lock +32 -0
  3. data/History.txt +6 -0
  4. data/Makefile +13 -0
  5. data/Manifest.txt +18 -0
  6. data/README.rdoc +197 -0
  7. data/Rakefile +53 -0
  8. data/TODO.txt +8 -0
  9. data/VERSION +1 -0
  10. data/bin/build_indexes +8 -0
  11. data/bin/rebuild_cluster +22 -0
  12. data/bin/rebuild_metaphones +23 -0
  13. data/bin/tiger_import +59 -0
  14. data/demos/demo/app/ext/geocodewrap.rb +84 -0
  15. data/demos/demo/app/views/index.builder +13 -0
  16. data/demos/demo/app/views/index.erb +71 -0
  17. data/demos/demo/config.ru +12 -0
  18. data/demos/demo/config/bootstraps.rb +130 -0
  19. data/demos/demo/config/geoenvironment.rb +25 -0
  20. data/demos/demo/geocoder_helper.rb +12 -0
  21. data/demos/demo/geocom_geocode.rb +10 -0
  22. data/demos/demo/main.rb +3 -0
  23. data/demos/demo/rakefile.rb +17 -0
  24. data/demos/demo/tmp/restart.txt +0 -0
  25. data/demos/simpledemo/views/index.builder +13 -0
  26. data/demos/simpledemo/views/index.erb +69 -0
  27. data/demos/simpledemo/ws.rb +83 -0
  28. data/doc/Makefile +7 -0
  29. data/doc/html4css1.css +279 -0
  30. data/doc/lookup.rst +193 -0
  31. data/doc/parsing.rst +125 -0
  32. data/doc/voidspace.css +147 -0
  33. data/geo_coder.gemspec +172 -0
  34. data/lib/geocoder/us.rb +21 -0
  35. data/lib/geocoder/us/address.rb +290 -0
  36. data/lib/geocoder/us/constants.rb +670 -0
  37. data/lib/geocoder/us/database.rb +745 -0
  38. data/lib/geocoder/us/import.rb +181 -0
  39. data/lib/geocoder/us/import/tiger.rb +13 -0
  40. data/lib/geocoder/us/numbers.rb +58 -0
  41. data/navteq/README +4 -0
  42. data/navteq/convert.sql +37 -0
  43. data/navteq/navteq_import +39 -0
  44. data/navteq/prepare.sql +92 -0
  45. data/sql/cluster.sql +16 -0
  46. data/sql/convert.sql +80 -0
  47. data/sql/create.sql +37 -0
  48. data/sql/index.sql +12 -0
  49. data/sql/place.csv +104944 -0
  50. data/sql/place.sql +104948 -0
  51. data/sql/setup.sql +78 -0
  52. data/src/Makefile +13 -0
  53. data/src/README +14 -0
  54. data/src/liblwgeom/Makefile +75 -0
  55. data/src/liblwgeom/box2d.c +54 -0
  56. data/src/liblwgeom/lex.yy.c +4799 -0
  57. data/src/liblwgeom/liblwgeom.h +1405 -0
  58. data/src/liblwgeom/lwalgorithm.c +946 -0
  59. data/src/liblwgeom/lwalgorithm.h +52 -0
  60. data/src/liblwgeom/lwcircstring.c +759 -0
  61. data/src/liblwgeom/lwcollection.c +541 -0
  62. data/src/liblwgeom/lwcompound.c +118 -0
  63. data/src/liblwgeom/lwcurvepoly.c +86 -0
  64. data/src/liblwgeom/lwgeom.c +886 -0
  65. data/src/liblwgeom/lwgeom_api.c +2201 -0
  66. data/src/liblwgeom/lwgparse.c +1219 -0
  67. data/src/liblwgeom/lwgunparse.c +1054 -0
  68. data/src/liblwgeom/lwline.c +525 -0
  69. data/src/liblwgeom/lwmcurve.c +125 -0
  70. data/src/liblwgeom/lwmline.c +137 -0
  71. data/src/liblwgeom/lwmpoint.c +138 -0
  72. data/src/liblwgeom/lwmpoly.c +141 -0
  73. data/src/liblwgeom/lwmsurface.c +129 -0
  74. data/src/liblwgeom/lwpoint.c +439 -0
  75. data/src/liblwgeom/lwpoly.c +579 -0
  76. data/src/liblwgeom/lwsegmentize.c +1047 -0
  77. data/src/liblwgeom/lwutil.c +369 -0
  78. data/src/liblwgeom/measures.c +861 -0
  79. data/src/liblwgeom/postgis_config.h +93 -0
  80. data/src/liblwgeom/ptarray.c +847 -0
  81. data/src/liblwgeom/vsprintf.c +179 -0
  82. data/src/liblwgeom/wktparse.h +126 -0
  83. data/src/liblwgeom/wktparse.lex +74 -0
  84. data/src/liblwgeom/wktparse.tab.c +2353 -0
  85. data/src/liblwgeom/wktparse.tab.h +145 -0
  86. data/src/liblwgeom/wktparse.y +385 -0
  87. data/src/libsqlite3_geocoder/Makefile +22 -0
  88. data/src/libsqlite3_geocoder/Makefile.nix +15 -0
  89. data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
  90. data/src/libsqlite3_geocoder/extension.c +121 -0
  91. data/src/libsqlite3_geocoder/extension.h +13 -0
  92. data/src/libsqlite3_geocoder/levenshtein.c +42 -0
  93. data/src/libsqlite3_geocoder/metaphon.c +278 -0
  94. data/src/libsqlite3_geocoder/util.c +37 -0
  95. data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
  96. data/src/metaphone/Makefile +7 -0
  97. data/src/metaphone/README +49 -0
  98. data/src/metaphone/extension.c +37 -0
  99. data/src/metaphone/metaphon.c +251 -0
  100. data/src/shp2sqlite/Makefile +37 -0
  101. data/src/shp2sqlite/Makefile.nix +36 -0
  102. data/src/shp2sqlite/Makefile.redhat +35 -0
  103. data/src/shp2sqlite/dbfopen.c +1595 -0
  104. data/src/shp2sqlite/getopt.c +695 -0
  105. data/src/shp2sqlite/getopt.h +127 -0
  106. data/src/shp2sqlite/shapefil.h +500 -0
  107. data/src/shp2sqlite/shp2sqlite.c +1974 -0
  108. data/src/shp2sqlite/shpopen.c +1894 -0
  109. data/tests/address.rb +236 -0
  110. data/tests/benchmark.rb +20 -0
  111. data/tests/constants.rb +57 -0
  112. data/tests/data/address-sample.csv +52 -0
  113. data/tests/data/db-test.csv +57 -0
  114. data/tests/data/locations.csv +4 -0
  115. data/tests/database.rb +137 -0
  116. data/tests/generate.rb +34 -0
  117. data/tests/numbers.rb +46 -0
  118. data/tests/run.rb +11 -0
  119. metadata +237 -0
@@ -0,0 +1,290 @@
1
+ require 'geocoder/us/constants'
2
+
3
+ module Geocoder::US
4
+ # Defines the matching of parsed address tokens.
5
+ Match = {
6
+ # FIXME: shouldn't have to anchor :number and :zip at start/end
7
+ :number => /^(\d+\W|[a-z]+)?(\d+)([a-z]?)\b/io,
8
+ :street => /(?:\b(?:\d+\w*|[a-z'-]+)\s*)+/io,
9
+ :city => /(?:\b[a-z'-]+\s*)+/io,
10
+ :state => Regexp.new(State.regexp.source + "\s*$", Regexp::IGNORECASE),
11
+ :zip => /(\d{5})(?:-\d{4})?\s*$/o,
12
+ :at => /\s(at|@|and|&)\s/io,
13
+ :po_box => /\b[P|p]*(OST|ost)*\.*\s*[O|o|0]*(ffice|FFICE)*\.*\s*[B|b][O|o|0][X|x]\b/
14
+ }
15
+
16
+ # The Address class takes a US street address or place name and
17
+ # constructs a list of possible structured parses of the address
18
+ # string.
19
+ class Address
20
+ attr_accessor :text
21
+ attr_accessor :prenum, :number, :sufnum
22
+ attr_accessor :street
23
+ attr_accessor :city
24
+ attr_accessor :state
25
+ attr_accessor :zip, :plus4
26
+
27
+ # Takes an address or place name string as its sole argument.
28
+ def initialize (text)
29
+ raise ArgumentError, "no text provided" unless text and !text.empty?
30
+ if text.class == Hash
31
+ @text = ""
32
+ assign_text_to_address text
33
+ else
34
+ @text = clean text
35
+ parse
36
+ end
37
+ end
38
+
39
+ # Removes any characters that aren't strictly part of an address string.
40
+ def clean (value)
41
+ value.strip \
42
+ .gsub(/[^a-z0-9 ,'&@\/-]+/io, "") \
43
+ .gsub(/\s+/o, " ")
44
+ end
45
+
46
+
47
+ def assign_text_to_address(text)
48
+ if !text[:address].nil?
49
+ @text = clean text[:address]
50
+ parse
51
+ else
52
+ @street = []
53
+ @prenum = text[:prenum]
54
+ @sufnum = text[:sufnum]
55
+ if !text[:street].nil?
56
+ @street = text[:street].scan(Match[:street])
57
+ end
58
+ @number = ""
59
+ if !@street.nil?
60
+ if text[:number].nil?
61
+ @street.map! { |single_street|
62
+ single_street.downcase!
63
+ @number = single_street.scan(Match[:number])[0].to_s
64
+ single_street.sub! @number, ""
65
+ single_street.sub! /^\s*,?\s*/o, ""
66
+ }
67
+ else
68
+ @number = text[:number].to_s
69
+ end
70
+ @street = expand_streets(@street)
71
+ street_parts
72
+ end
73
+ @city = []
74
+ if !text[:city].nil?
75
+ @city.push(text[:city])
76
+ @text = text[:city].to_s
77
+ else
78
+ @city.push("")
79
+ end
80
+ if !text[:region].nil?
81
+ # @state = []
82
+ @state = text[:region]
83
+ if @state.length > 2
84
+ # full_state = @state.strip # special case: New York
85
+ @state = State[@state]
86
+ end
87
+ elsif !text[:country].nil?
88
+ @state = text[:country]
89
+ elsif !text[:state].nil?
90
+ @state = text[:state]
91
+ end
92
+
93
+ @zip = text[:postal_code]
94
+ @plus4 = text[:plus4]
95
+ if !@zip
96
+ @zip = @plus4 = ""
97
+ end
98
+ end
99
+ end
100
+
101
+ # Expands a token into a list of possible strings based on
102
+ # the Geocoder::US::Name_Abbr constant, and expands numerals and
103
+ # number words into their possible equivalents.
104
+ def expand_numbers (string)
105
+ if /\b\d+(?:st|nd|rd|th)?\b/o.match string
106
+ match = $&
107
+ num = $&.to_i
108
+ elsif Ordinals.regexp.match string
109
+ num = Ordinals[$&]
110
+ match = $&
111
+ elsif Cardinals.regexp.match string
112
+ num = Cardinals[$&]
113
+ match = $&
114
+ end
115
+ strings = []
116
+ if num and num < 100
117
+ [num.to_s, Ordinals[num], Cardinals[num]].each {|replace|
118
+ strings << string.sub(match, replace)
119
+ }
120
+ else
121
+ strings << string
122
+ end
123
+ strings
124
+ end
125
+
126
+ def parse_zip(regex_match, text)
127
+ idx = text.rindex(regex_match)
128
+ text[idx...idx+regex_match.length] = ""
129
+ text.sub! /\s*,?\s*$/o, ""
130
+ @zip, @plus4 = @zip.map {|s|s.strip}
131
+ text
132
+ end
133
+
134
+ def parse_state(regex_match, text)
135
+ idx = text.rindex(regex_match)
136
+ text[idx...idx+regex_match.length] = ""
137
+ text.sub! /\s*,?\s*$/o, ""
138
+ @full_state = @state[0].strip # special case: New York
139
+ @state = State[@full_state]
140
+ text
141
+ end
142
+
143
+ def parse_number(regex_match, text)
144
+ # FIXME: What if this string appears twice?
145
+ idx = text.index(regex_match)
146
+ text[idx...idx+regex_match.length] = ""
147
+ text.sub! /^\s*,?\s*/o, ""
148
+ @prenum, @number, @sufnum = @number.map {|s| s and s.strip}
149
+ text
150
+ end
151
+
152
+ def parse
153
+ text = @text.clone.downcase
154
+
155
+ @zip = text.scan(Match[:zip])[-1]
156
+ if @zip
157
+ text = parse_zip($&, text)
158
+ else
159
+ @zip = @plus4 = ""
160
+ end
161
+
162
+ @state = text.scan(Match[:state])[-1]
163
+ if @state
164
+ text = parse_state($&, text)
165
+ else
166
+ @full_state = ""
167
+ @state = ""
168
+ end
169
+
170
+ @number = text.scan(Match[:number])[0]
171
+ # FIXME: 230 Fish And Game Rd, Hudson NY 12534
172
+ if @number # and not intersection?
173
+ text = parse_number($&, text)
174
+ else
175
+ @prenum = @number = @sufnum = ""
176
+ end
177
+
178
+ # FIXME: special case: Name_Abbr gets a bit aggressive
179
+ # about replacing St with Saint. exceptional case:
180
+ # Sault Ste. Marie
181
+
182
+ # FIXME: PO Box should geocode to ZIP
183
+ @street = text.scan(Match[:street])
184
+ @street = expand_streets(@street)
185
+ # SPECIAL CASE: 1600 Pennsylvania 20050
186
+ @street << @full_state if @street.empty? and @state.downcase != @full_state.downcase
187
+
188
+ @city = text.scan(Match[:city])
189
+ if !@city.empty?
190
+ @city = [@city[-1].strip]
191
+ add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
192
+ @city |= add
193
+ @city.map! {|s| s.downcase}
194
+ @city.uniq!
195
+ else
196
+ @city = []
197
+ end
198
+
199
+ # SPECIAL CASE: no city, but a state with the same name. e.g. "New York"
200
+ @city << @full_state if @state.downcase != @full_state.downcase
201
+
202
+ # SPECIAL CASE: if given a single city string, and it's not the
203
+ # same as the street string, remove it from the street parts
204
+ self.city= @city if @city.length == 1 and @city != @street
205
+ end
206
+
207
+ def expand_streets(street)
208
+ if !street.empty? && !street[0].nil?
209
+ street.map! {|s|s.strip}
210
+ add = street.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
211
+ street |= add
212
+ add = street.map {|item| item.gsub(Std_Abbr.regexp) {|m| Std_Abbr[m]}}
213
+ street |= add
214
+ street.map! {|item| expand_numbers(item)}
215
+ street.flatten!
216
+ street.map! {|s| s.downcase}
217
+ street.uniq!
218
+ else
219
+ street = []
220
+ end
221
+ street
222
+ end
223
+
224
+ def street_parts
225
+ strings = []
226
+ # Get all the substrings delimited by whitespace
227
+ @street.each {|string|
228
+ tokens = string.split(" ")
229
+ strings |= (0...tokens.length).map {|i|
230
+ (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
231
+ }
232
+ strings = remove_noise_words(strings)
233
+
234
+ # Try a simpler case of adding the @number in case everything is an abbr.
235
+ strings += [@number] if strings.all? {|s| Std_Abbr.key? s or Name_Abbr.key? s}
236
+ strings.uniq
237
+ end
238
+
239
+ def remove_noise_words(strings)
240
+ # Don't return strings that consist solely of abbreviations.
241
+ # NOTE: Is this a micro-optimization that has edge cases that will break?
242
+ # Answer: Yes, it breaks on simple things like "Prairie St" or "Front St"
243
+ prefix = Regexp.new("^" + Prefix_Type.regexp.source + "\s*", Regexp::IGNORECASE)
244
+ suffix = Regexp.new("\s*" + Suffix_Type.regexp.source + "$", Regexp::IGNORECASE)
245
+ predxn = Regexp.new("^" + Directional.regexp.source + "\s*", Regexp::IGNORECASE)
246
+ sufdxn = Regexp.new("\s*" + Directional.regexp.source + "$", Regexp::IGNORECASE)
247
+ good_strings = strings.map {|s|
248
+ s = s.clone
249
+ s.gsub!(predxn, "")
250
+ s.gsub!(sufdxn, "")
251
+ s.gsub!(prefix, "")
252
+ s.gsub!(suffix, "")
253
+ s
254
+ }
255
+ good_strings.reject! {|s| s.empty?}
256
+ strings = good_strings if !good_strings.empty? {|s| not Std_Abbr.key?(s) and not Name_Abbr.key?(s)}
257
+ strings
258
+ end
259
+
260
+ def city_parts
261
+ strings = []
262
+ @city.map {|string|
263
+ tokens = string.split(" ")
264
+ strings |= (0...tokens.length).to_a.reverse.map {|i|
265
+ (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
266
+ }
267
+ # Don't return strings that consist solely of abbreviations.
268
+ # NOTE: Is this a micro-optimization that has edge cases that will break?
269
+ # Answer: Yes, it breaks on "Prairie"
270
+ good_strings = strings.reject {|s| Std_Abbr.key? s}
271
+ strings = good_strings if !good_strings.empty?
272
+ strings.uniq
273
+ end
274
+
275
+ def city= (strings)
276
+ # NOTE: This will still fail on: 100 Broome St, 33333 (if 33333 is
277
+ # Broome, MT or what)
278
+ match = Regexp.new('\s*\b(?:' + strings.join("|") + ')\b\s*$', Regexp::IGNORECASE)
279
+ @street = @street.map {|string| string.gsub(match, '')}.select {|s|!s.empty?}
280
+ end
281
+
282
+ def po_box?
283
+ Match[:po_box].match @text
284
+ end
285
+
286
+ def intersection?
287
+ Match[:at].match @text
288
+ end
289
+ end
290
+ end
@@ -0,0 +1,670 @@
1
+ # coding: utf-8
2
+ require 'set'
3
+ require 'geocoder/us/numbers'
4
+
5
+ module Geocoder
6
+ end
7
+
8
+ module Geocoder::US
9
+ class Map < Hash
10
+ # The Map class provides a two-way mapping between postal abbreviations
11
+ # and their fully written equivalents.
12
+ #attr_accessor :partial
13
+ attr_accessor :regexp
14
+ def self.[] (*items)
15
+ hash = super(*items)
16
+ #hash.build_partial
17
+ hash.build_match
18
+ hash.keys.each {|k| hash[k.downcase] = hash.fetch(k)}
19
+ hash.values.each {|v| hash[v.downcase] = v}
20
+ hash.freeze
21
+ end
22
+ # The build_partial method constructs a hash of case-insensitive,
23
+ # whitespace-delimited prefixes to keys and values in the two-way Map.
24
+ def build_partial
25
+ @partial = Set.new()
26
+ [keys, values].flatten.each {|item|
27
+ @partial << item.downcase
28
+ item.downcase.split.each {|token| @partial << token}
29
+ }
30
+ end
31
+ def build_match
32
+ @regexp = Regexp.new(
33
+ '\b(' + [keys,values].flatten.join("|") + ')\b',
34
+ Regexp::IGNORECASE)
35
+ end
36
+ # The partial? method returns true if the key is a prefix of some
37
+ # key in the Map.
38
+ def partial? (key)
39
+ @partial.member? key.downcase
40
+ end
41
+ def key? (key)
42
+ super(key.downcase)
43
+ end
44
+ def [] (key)
45
+ super(key.downcase)
46
+ end
47
+ end
48
+
49
+ # The Directional constant maps compass direction words in English and
50
+ # Spanish to their 1- or 2- letter abbreviations. See 2008 TIGER/Line
51
+ # technical documentation Appendix C for more details.
52
+ Directional = Map[
53
+ "North" => "N",
54
+ "South" => "S",
55
+ "East" => "E",
56
+ "West" => "W",
57
+ "Northeast" => "NE",
58
+ "Northwest" => "NW",
59
+ "Southeast" => "SE",
60
+ "Southwest" => "SW",
61
+ "Norte" => "N",
62
+ "Sur" => "S",
63
+ "Este" => "E",
64
+ "Oeste" => "O",
65
+ "Noreste" => "NE",
66
+ "Noroeste" => "NO",
67
+ "Sudeste" => "SE",
68
+ "Sudoeste" => "SO"
69
+ ]
70
+
71
+ # The Prefix_Qualifier constant maps feature prefix qualifiers to their
72
+ # abbreviations. See 2008 TIGER/Line technical documentation Appendix D.
73
+ Prefix_Qualifier = Map[
74
+ "Alternate" => "Alt",
75
+ "Business" => "Bus",
76
+ "Bypass" => "Byp",
77
+ "Extended" => "Exd",
78
+ "Historic" => "Hst",
79
+ "Loop" => "Lp",
80
+ "Old" => "Old",
81
+ "Private" => "Pvt",
82
+ "Public" => "Pub",
83
+ "Spur" => "Spr",
84
+ ]
85
+
86
+ # The Suffix_Qualifier constant maps feature suffix qualifiers to their
87
+ # abbreviations. See 2008 TIGER/Line technical documentation Appendix D.
88
+ Suffix_Qualifier = Map[
89
+ "Access" => "Acc",
90
+ "Alternate" => "Alt",
91
+ "Business" => "Bus",
92
+ "Bypass" => "Byp",
93
+ "Connector" => "Con",
94
+ "Extended" => "Exd",
95
+ "Extension" => "Exn",
96
+ "Loop" => "Lp",
97
+ "Private" => "Pvt",
98
+ "Public" => "Pub",
99
+ "Scenic" => "Scn",
100
+ "Spur" => "Spr",
101
+ "Ramp" => "Rmp",
102
+ "Underpass" => "Unp",
103
+ "Overpass" => "Ovp",
104
+ ]
105
+
106
+ # The Prefix_Canonical constant maps canonical TIGER/Line street type
107
+ # prefixes to their abbreviations. This list is the subset of the list from
108
+ # 2008 TIGER/Line technical documentation Appendix E that was extracted from
109
+ # a TIGER/Line database import.
110
+ Prefix_Canonical = {
111
+ "Arcade" => "Arc",
112
+ "Autopista" => "Autopista",
113
+ "Avenida" => "Ave",
114
+ "Avenue" => "Ave",
115
+ "Boulevard" => "Blvd",
116
+ "Bulevar" => "Bulevar",
117
+ "Bureau of Indian Affairs Highway" => "BIA Hwy",
118
+ "Bureau of Indian Affairs Road" => "BIA Rd",
119
+ "Bureau of Indian Affairs Route" => "BIA Rte",
120
+ "Bureau of Land Management Road" => "BLM Rd",
121
+ "Bypass" => "Byp",
122
+ "Calle" => "Cll",
123
+ "Calleja" => "Calleja",
124
+ "Callejón" => "Callejón",
125
+ "Caminito" => "Cmt",
126
+ "Camino" => "Cam",
127
+ "Carretera" => "Carr",
128
+ "Cerrada" => "Cer",
129
+ "Círculo" => "Cír",
130
+ "Commons" => "Cmns",
131
+ "Corte" => "Corte",
132
+ "County Highway" => "Co Hwy",
133
+ "County Lane" => "Co Ln",
134
+ "County Road" => "Co Rd",
135
+ "County Route" => "Co Rte",
136
+ "County State Aid Highway" => "Co St Aid Hwy",
137
+ "County Trunk Highway" => "Co Trunk Hwy",
138
+ "County Trunk Road" => "Co Trunk Rd",
139
+ "Court" => "Ct",
140
+ "Delta Road" => "Delta Rd",
141
+ "District of Columbia Highway" => "DC Hwy",
142
+ "Driveway" => "Driveway",
143
+ "Entrada" => "Ent",
144
+ "Expreso" => "Expreso",
145
+ "Expressway" => "Expy",
146
+ "Farm Road" => "Farm Rd",
147
+ "Farm-to-Market Road" => "FM",
148
+ "Fire Control Road" => "Fire Cntrl Rd",
149
+ "Fire District Road" => "Fire Dist Rd",
150
+ "Fire Lane" => "Fire Ln",
151
+ "Fire Road" => "Fire Rd",
152
+ "Fire Route" => "Fire Rte",
153
+ "Fire Trail" => "Fire Trl",
154
+ "Forest Highway" => "Forest Hwy",
155
+ "Forest Road" => "Forest Rd",
156
+ "Forest Route" => "Forest Rte",
157
+ "Forest Service Road" => "FS Rd",
158
+ "Highway" => "Hwy",
159
+ "Indian Route" => "Indian Rte",
160
+ "Indian Service Route" => "Indian Svc Rte",
161
+ "Interstate Highway" => "I-",
162
+ "Lane" => "Ln",
163
+ "Logging Road" => "Logging Rd",
164
+ "Loop" => "Loop",
165
+ "National Forest Development Road" => "Nat For Dev Rd",
166
+ "Navajo Service Route" => "Navajo Svc Rte",
167
+ "Parish Road" => "Parish Rd",
168
+ "Pasaje" => "Pasaje",
169
+ "Paseo" => "Pso",
170
+ "Passage" => "Psge",
171
+ "Placita" => "Pla",
172
+ "Plaza" => "Plz",
173
+ "Point" => "Pt",
174
+ "Puente" => "Puente",
175
+ "Ranch Road" => "Ranch Rd",
176
+ "Ranch to Market Road" => "RM",
177
+ "Reservation Highway" => "Resvn Hwy",
178
+ "Road" => "Rd",
179
+ "Route" => "Rte",
180
+ "Row" => "Row",
181
+ "Rue" => "Rue",
182
+ "Ruta" => "Ruta",
183
+ "Sector" => "Sec",
184
+ "Sendero" => "Sendero",
185
+ "Service Road" => "Svc Rd",
186
+ "Skyway" => "Skwy",
187
+ "Square" => "Sq",
188
+ "State Forest Service Road" => "St FS Rd",
189
+ "State Highway" => "State Hwy",
190
+ "State Loop" => "State Loop",
191
+ "State Road" => "State Rd",
192
+ "State Route" => "State Rte",
193
+ "State Spur" => "State Spur",
194
+ "State Trunk Highway" => "St Trunk Hwy",
195
+ "Terrace" => "Ter",
196
+ "Town Highway" => "Town Hwy",
197
+ "Town Road" => "Town Rd",
198
+ "Township Highway" => "Twp Hwy",
199
+ "Township Road" => "Twp Rd",
200
+ "Trail" => "Trl",
201
+ "Tribal Road" => "Tribal Rd",
202
+ "Tunnel" => "Tunl",
203
+ "US Forest Service Highway" => "USFS Hwy",
204
+ "US Forest Service Road" => "USFS Rd",
205
+ "US Highway" => "US Hwy",
206
+ "US Route" => "US Rte",
207
+ "Vereda" => "Ver",
208
+ "Via" => "Via",
209
+ "Vista" => "Vis",
210
+ }
211
+
212
+ # The Prefix_Alternate constant maps alternate prefix street types to
213
+ # their canonical abbreviations. This list was merged in from the USPS
214
+ # list at http://www.usps.com/ncsc/lookups/abbr_suffix.txt.
215
+ Prefix_Alternate = {
216
+ "Av" => "Ave",
217
+ "Aven" => "Ave",
218
+ "Avenu" => "Ave",
219
+ "Avenue" => "Ave",
220
+ "Avn" => "Ave",
221
+ "Avnue" => "Ave",
222
+ "Boul" => "Blvd",
223
+ "Boulv" => "Blvd",
224
+ "Bypa" => "Byp",
225
+ "Bypas" => "Byp",
226
+ "Byps" => "Byp",
227
+ "Crt" => "Ct",
228
+ "Exp" => "Expy",
229
+ "Expr" => "Expy",
230
+ "Express" => "Expy",
231
+ "Expw" => "Expy",
232
+ "Highwy" => "Hwy",
233
+ "Hiway" => "Hwy",
234
+ "Hiwy" => "Hwy",
235
+ "Hway" => "Hwy",
236
+ "La" => "Ln",
237
+ "Lanes" => "Ln",
238
+ "Loops" => "Loop",
239
+ "Plza" => "Plz",
240
+ "Sqr" => "Sq",
241
+ "Sqre" => "Sq",
242
+ "Squ" => "Sq",
243
+ "Terr" => "Ter",
244
+ "Tr" => "Trl",
245
+ "Trails" => "Trl",
246
+ "Trls" => "Trl",
247
+ "Tunel" => "Tunl",
248
+ "Tunls" => "Tunl",
249
+ "Tunnels" => "Tunl",
250
+ "Tunnl" => "Tunl",
251
+ "Vdct" => "Via",
252
+ "Viadct" => "Via",
253
+ "Viaduct" => "Via",
254
+ "Vist" => "Vis",
255
+ "Vst" => "Vis",
256
+ "Vsta" => "Vis"
257
+ }
258
+
259
+ # The Prefix_Type constant merges the canonical prefix type abbreviations
260
+ # with their USPS accepted alternates.
261
+ Prefix_Type = Map[ Prefix_Canonical.merge(Prefix_Alternate) ]
262
+
263
+ # The Suffix_Canonical constant maps canonical TIGER/Line street type
264
+ # suffixes to their abbreviations. This list is the subset of the list from
265
+ # 2008 TIGER/Line technical documentation Appendix E that was extracted from
266
+ # a TIGER/Line database import.
267
+ Suffix_Canonical = {
268
+ "Alley" => "Aly",
269
+ "Arcade" => "Arc",
270
+ "Avenida" => "Ave",
271
+ "Avenue" => "Ave",
272
+ "Beltway" => "Beltway",
273
+ "Boulevard" => "Blvd",
274
+ "Bridge" => "Brg",
275
+ "Bypass" => "Byp",
276
+ "Causeway" => "Cswy",
277
+ "Circle" => "Cir",
278
+ "Common" => "Cmn",
279
+ "Commons" => "Cmns",
280
+ "Corners" => "Cors",
281
+ "Court" => "Ct",
282
+ "Courts" => "Cts",
283
+ "Crescent" => "Cres",
284
+ "Crest" => "Crst",
285
+ "Crossing" => "Xing",
286
+ "Cutoff" => "Cutoff",
287
+ "Drive" => "Dr",
288
+ "Driveway" => "Driveway",
289
+ "Esplanade" => "Esplanade",
290
+ "Estates" => "Ests",
291
+ "Expressway" => "Expy",
292
+ "Forest Highway" => "Forest Hwy",
293
+ "Fork" => "Frk",
294
+ "Four-Wheel Drive Trail" => "4WD Trl",
295
+ "Freeway" => "Fwy",
296
+ "Grade" => "Grade",
297
+ "Heights" => "Hts",
298
+ "Highway" => "Hwy",
299
+ "Jeep Trail" => "Jeep Trl",
300
+ "Landing" => "Lndg",
301
+ "Lane" => "Ln",
302
+ "Logging Road" => "Logging Rd",
303
+ "Loop" => "Loop",
304
+ "Motorway" => "Mtwy",
305
+ "Oval" => "Oval",
306
+ "Overpass" => "Opas",
307
+ "Parkway" => "Pkwy",
308
+ "Pass" => "Pass",
309
+ "Passage" => "Psge",
310
+ "Path" => "Path",
311
+ "Pike" => "Pike",
312
+ "Place" => "Pl",
313
+ "Plaza" => "Plz",
314
+ "Point" => "Pt",
315
+ "Pointe" => "Pointe",
316
+ "Promenade" => "Promenade",
317
+ "Railroad" => "RR",
318
+ "Railway" => "Rlwy",
319
+ "Ramp" => "Ramp",
320
+ "River" => "Riv",
321
+ "Road" => "Rd",
322
+ "Roadway" => "Roadway",
323
+ "Route" => "Rte",
324
+ "Row" => "Row",
325
+ "Rue" => "Rue",
326
+ "Service Road" => "Svc Rd",
327
+ "Skyway" => "Skwy",
328
+ "Spur" => "Spur",
329
+ "Square" => "Sq",
330
+ "Stravenue" => "Stra",
331
+ "Street" => "St",
332
+ "Strip" => "Strip",
333
+ "Terrace" => "Ter",
334
+ "Thoroughfare" => "Thoroughfare",
335
+ "Tollway" => "Tollway",
336
+ "Trace" => "Trce",
337
+ "Trafficway" => "Trfy",
338
+ "Trail" => "Trl",
339
+ "Trolley" => "Trolley",
340
+ "Truck Trail" => "Truck Trl",
341
+ "Tunnel" => "Tunl",
342
+ "Turnpike" => "Tpke",
343
+ "Viaduct" => "Viaduct",
344
+ "View" => "Vw",
345
+ "Vista" => "Vis",
346
+ "Walk" => "Walk",
347
+ "Walkway" => "Walkway",
348
+ "Way" => "Way",
349
+ }
350
+
351
+ # The Suffix_Alternate constant maps alternate suffix street types to
352
+ # their canonical abbreviations. This list was merged in from the USPS
353
+ # list at http://www.usps.com/ncsc/lookups/abbr_suffix.txt.
354
+ Suffix_Alternate = {
355
+ "Allee" => "Aly",
356
+ "Ally" => "Aly",
357
+ "Av" => "Ave",
358
+ "Aven" => "Ave",
359
+ "Avenu" => "Ave",
360
+ "Avenue" => "Ave",
361
+ "Avn" => "Ave",
362
+ "Avnue" => "Ave",
363
+ "Boul" => "Blvd",
364
+ "Boulv" => "Blvd",
365
+ "Brdge" => "Brg",
366
+ "Bypa" => "Byp",
367
+ "Bypas" => "Byp",
368
+ "Byps" => "Byp",
369
+ "Causway" => "Cswy",
370
+ "Circ" => "Cir",
371
+ "Circl" => "Cir",
372
+ "Crcl" => "Cir",
373
+ "Crcle" => "Cir",
374
+ "Crecent" => "Cres",
375
+ "Cresent" => "Cres",
376
+ "Crscnt" => "Cres",
377
+ "Crsent" => "Cres",
378
+ "Crsnt" => "Cres",
379
+ "Crssing" => "Xing",
380
+ "Crssng" => "Xing",
381
+ "Crt" => "Ct",
382
+ "Driv" => "Dr",
383
+ "Drv" => "Dr",
384
+ "Exp" => "Expy",
385
+ "Expr" => "Expy",
386
+ "Express" => "Expy",
387
+ "Expw" => "Expy",
388
+ "Freewy" => "Fwy",
389
+ "Frway" => "Fwy",
390
+ "Frwy" => "Fwy",
391
+ "Height" => "Hts",
392
+ "Hgts" => "Hts",
393
+ "Highwy" => "Hwy",
394
+ "Hiway" => "Hwy",
395
+ "Hiwy" => "Hwy",
396
+ "Ht" => "Hts",
397
+ "Hway" => "Hwy",
398
+ "La" => "Ln",
399
+ "Lanes" => "Ln",
400
+ "Lndng" => "Lndg",
401
+ "Loops" => "Loop",
402
+ "Ovl" => "Oval",
403
+ "Parkways" => "Pkwy",
404
+ "Parkwy" => "Pkwy",
405
+ "Paths" => "Path",
406
+ "Pikes" => "Pike",
407
+ "Pkway" => "Pkwy",
408
+ "Pkwys" => "Pkwy",
409
+ "Pky" => "Pkwy",
410
+ "Plza" => "Plz",
411
+ "Rivr" => "Riv",
412
+ "Rvr" => "Riv",
413
+ "Spurs" => "Spur",
414
+ "Sqr" => "Sq",
415
+ "Sqre" => "Sq",
416
+ "Squ" => "Sq",
417
+ "Str" => "St",
418
+ "Strav" => "Stra",
419
+ "Strave" => "Stra",
420
+ "Straven" => "Stra",
421
+ "Stravn" => "Stra",
422
+ "Strt" => "St",
423
+ "Strvn" => "Stra",
424
+ "Strvnue" => "Stra",
425
+ "Terr" => "Ter",
426
+ "Tpk" => "Tpke",
427
+ "Tr" => "Trl",
428
+ "Traces" => "Trce",
429
+ "Trails" => "Trl",
430
+ "Trls" => "Trl",
431
+ "Trnpk" => "Tpke",
432
+ "Trpk" => "Tpke",
433
+ "Tunel" => "Tunl",
434
+ "Tunls" => "Tunl",
435
+ "Tunnels" => "Tunl",
436
+ "Tunnl" => "Tunl",
437
+ "Turnpk" => "Tpke",
438
+ "Vist" => "Vis",
439
+ "Vst" => "Vis",
440
+ "Vsta" => "Vis",
441
+ "Walks" => "Walk",
442
+ "Wy" => "Way",
443
+ }
444
+
445
+ # The Suffix_Type constant merges the canonical suffix type abbreviations
446
+ # with their USPS accepted alternates.
447
+ Suffix_Type = Map[ Suffix_Canonical.merge(Suffix_Alternate) ]
448
+
449
+ # The Unit_Type constant lists acceptable USPS unit type abbreviations
450
+ # from http://www.usps.com/ncsc/lookups/abbr_sud.txt.
451
+ Unit_Type = Map[
452
+ "Apartment" => "Apt",
453
+ "Basement" => "Bsmt",
454
+ "Building" => "Bldg",
455
+ "Department"=> "Dept",
456
+ "Floor" => "Fl",
457
+ "Front" => "Frnt",
458
+ "Hangar" => "Hngr",
459
+ "Lobby" => "Lbby",
460
+ "Lot" => "Lot",
461
+ "Lower" => "Lowr",
462
+ "Office" => "Ofc",
463
+ "Penthouse" => "Ph",
464
+ "Pier" => "Pier",
465
+ "Rear" => "Rear",
466
+ "Room" => "Rm",
467
+ "Side" => "Side",
468
+ "Slip" => "Slip",
469
+ "Space" => "Spc",
470
+ "Stop" => "Stop",
471
+ "Suite" => "Ste",
472
+ "Trailer" => "Trlr",
473
+ "Unit" => "Unit",
474
+ "Upper" => "Uppr",
475
+ ]
476
+
477
+ Std_Abbr = Map[
478
+ [Directional, Prefix_Qualifier, Suffix_Qualifier,
479
+ Prefix_Type, Suffix_Type].inject({}) {|x,y|x.merge y}
480
+ ]
481
+
482
+ # The Name_Abbr constant maps common toponym abbreviations to their
483
+ # full word equivalents. This list was constructed partly by hand, and
484
+ # partly by matching USPS alternate abbreviations with feature names
485
+ # found in the TIGER/Line dataset.
486
+ Name_Abbr = Map[
487
+ "Av" => "Avenue",
488
+ "Ave" => "Avenue",
489
+ "Blvd" => "Boulevard",
490
+ "Bot" => "Bottom",
491
+ "Boul" => "Boulevard",
492
+ "Boulv" => "Boulevard",
493
+ "Br" => "Branch",
494
+ "Brg" => "Bridge",
495
+ "Canyn" => "Canyon",
496
+ "Cen" => "Center",
497
+ "Cent" => "Center",
498
+ "Cir" => "Circle",
499
+ "Circ" => "Circle",
500
+ "Ck" => "Creek",
501
+ "Cnter" => "Center",
502
+ "Cntr" => "Center",
503
+ "Cnyn" => "Canyon",
504
+ "Cor" => "Corner",
505
+ "Cors" => "Corners",
506
+ "Cp" => "Camp",
507
+ "Cr" => "Creek",
508
+ "Crcl" => "Circle",
509
+ "Crcle" => "Circle",
510
+ "Cres" => "Crescent",
511
+ "Crscnt" => "Crescent",
512
+ "Ct" => "Court",
513
+ "Ctr" => "Center",
514
+ "Cts" => "Courts",
515
+ "Cyn" => "Canyon",
516
+ "Div" => "Divide",
517
+ "Dr" => "Drive",
518
+ "Dv" => "Divide",
519
+ "Est" => "Estate",
520
+ "Ests" => "Estates",
521
+ "Ext" => "Extension",
522
+ "Extn" => "Extension",
523
+ "Extnsn" => "Extension",
524
+ "Forests" => "Forest",
525
+ "Forg" => "Forge",
526
+ "Frg" => "Forge",
527
+ "Ft" => "Fort",
528
+ "Gatewy" => "Gateway",
529
+ "Gdn" => "Garden",
530
+ "Gdns" => "Gardens",
531
+ "Gtwy" => "Gateway",
532
+ "Harb" => "Harbor",
533
+ "Hbr" => "Harbor",
534
+ "Height" => "Heights",
535
+ "Hgts" => "Heights",
536
+ "Highwy" => "Highway",
537
+ "Hiway" => "Highway",
538
+ "Hiwy" => "Highway",
539
+ "Holws" => "Hollow",
540
+ "Ht" => "Heights",
541
+ "Hway" => "Highway",
542
+ "Hwy" => "Highway",
543
+ "Is" => "Island",
544
+ "Iss" => "Islands",
545
+ "Jct" => "Junction",
546
+ "Jction" => "Junction",
547
+ "Jctn" => "Junction",
548
+ "Junctn" => "Junction",
549
+ "Juncton" => "Junction",
550
+ "Ldg" => "Lodge",
551
+ "Lgt" => "Light",
552
+ "Lndg" => "Landing",
553
+ "Lodg" => "Lodge",
554
+ "Loops" => "Loop",
555
+ "Mt" => "Mount",
556
+ "Mtin" => "Mountain",
557
+ "Mtn" => "Mountain",
558
+ "Orch" => "Orchard",
559
+ "Parkwy" => "Parkway",
560
+ "Pk" => "Park",
561
+ "Pkway" => "Parkway",
562
+ "Pkwy" => "Parkway",
563
+ "Pky" => "Parkway",
564
+ "Pl" => "Place",
565
+ "Pnes" => "Pines",
566
+ "Pr" => "Prairie",
567
+ "Prr" => "Prairie",
568
+ "Pt" => "Point",
569
+ "Pts" => "Points",
570
+ "Rdg" => "Ridge",
571
+ "Riv" => "River",
572
+ "Rnchs" => "Ranch",
573
+ "Spg" => "Spring",
574
+ "Spgs" => "Springs",
575
+ "Spng" => "Spring",
576
+ "Spngs" => "Springs",
577
+ "Sq" => "Square",
578
+ "Squ" => "Square",
579
+ # "St" => "Saint",
580
+ "Sta" => "Station",
581
+ "Statn" => "Station",
582
+ "Ste" => "Sainte",
583
+ "Stn" => "Station",
584
+ "Str" => "Street",
585
+ "Ter" => "Terrace",
586
+ "Terr" => "Terrace",
587
+ "Tpk" => "Turnpike",
588
+ "Tpke" => "Turnpike",
589
+ "Tr" => "Trail",
590
+ "Trls" => "Trail",
591
+ "Trpk" => "Turnpike",
592
+ "Tunls" => "Tunnel",
593
+ "Un" => "Union",
594
+ "Vill" => "Village",
595
+ "Villag" => "Village",
596
+ "Villg" => "Village",
597
+ "Vis" => "Vista",
598
+ "Vlg" => "Village",
599
+ "Vlgs" => "Villages",
600
+ "Wls" => "Wells",
601
+ "Wy" => "Way",
602
+ "Xing" => "Crossing",
603
+ ]
604
+
605
+ # The State constant maps US state and territory names to their 2-letter
606
+ # USPS abbreviations.
607
+ State = Map[
608
+ "Alabama" => "AL",
609
+ "Alaska" => "AK",
610
+ "American Samoa" => "AS",
611
+ "Arizona" => "AZ",
612
+ "Arkansas" => "AR",
613
+ "California" => "CA",
614
+ "Colorado" => "CO",
615
+ "Connecticut" => "CT",
616
+ "Delaware" => "DE",
617
+ "District of Columbia" => "DC",
618
+ "Federated States of Micronesia" => "FM",
619
+ "Florida" => "FL",
620
+ "Georgia" => "GA",
621
+ "Guam" => "GU",
622
+ "Hawaii" => "HI",
623
+ "Idaho" => "ID",
624
+ "Illinois" => "IL",
625
+ "Indiana" => "IN",
626
+ "Iowa" => "IA",
627
+ "Kansas" => "KS",
628
+ "Kentucky" => "KY",
629
+ "Louisiana" => "LA",
630
+ "Maine" => "ME",
631
+ "Marshall Islands" => "MH",
632
+ "Maryland" => "MD",
633
+ "Massachusetts" => "MA",
634
+ "Michigan" => "MI",
635
+ "Minnesota" => "MN",
636
+ "Mississippi" => "MS",
637
+ "Missouri" => "MO",
638
+ "Montana" => "MT",
639
+ "Nebraska" => "NE",
640
+ "Nevada" => "NV",
641
+ "New Hampshire" => "NH",
642
+ "New Jersey" => "NJ",
643
+ "New Mexico" => "NM",
644
+ "New York" => "NY",
645
+ "North Carolina" => "NC",
646
+ "North Dakota" => "ND",
647
+ "Northern Mariana Islands" => "MP",
648
+ "Ohio" => "OH",
649
+ "Oklahoma" => "OK",
650
+ "Oregon" => "OR",
651
+ "Palau" => "PW",
652
+ "Pennsylvania" => "PA",
653
+ "Puerto Rico" => "PR",
654
+ "Rhode Island" => "RI",
655
+ "South Carolina" => "SC",
656
+ "South Dakota" => "SD",
657
+ "Tennessee" => "TN",
658
+ "Texas" => "TX",
659
+ "Utah" => "UT",
660
+ "Vermont" => "VT",
661
+ "Virgin Islands" => "VI",
662
+ "Virginia" => "VA",
663
+ "Washington" => "WA",
664
+ "West Virginia" => "WV",
665
+ "Wisconsin" => "WI",
666
+ "Wyoming" => "WY"
667
+ ]
668
+
669
+
670
+ end