RubyGems - geo_coder - Versions diffs - 0.1.0 - Mend

geo_coder 0.1.0

Files changed (119) hide show

data/Gemfile +12 -0
data/Gemfile.lock +32 -0
data/History.txt +6 -0
data/Makefile +13 -0
data/Manifest.txt +18 -0
data/README.rdoc +197 -0
data/Rakefile +53 -0
data/TODO.txt +8 -0
data/VERSION +1 -0
data/bin/build_indexes +8 -0
data/bin/rebuild_cluster +22 -0
data/bin/rebuild_metaphones +23 -0
data/bin/tiger_import +59 -0
data/demos/demo/app/ext/geocodewrap.rb +84 -0
data/demos/demo/app/views/index.builder +13 -0
data/demos/demo/app/views/index.erb +71 -0
data/demos/demo/config.ru +12 -0
data/demos/demo/config/bootstraps.rb +130 -0
data/demos/demo/config/geoenvironment.rb +25 -0
data/demos/demo/geocoder_helper.rb +12 -0
data/demos/demo/geocom_geocode.rb +10 -0
data/demos/demo/main.rb +3 -0
data/demos/demo/rakefile.rb +17 -0
data/demos/demo/tmp/restart.txt +0 -0
data/demos/simpledemo/views/index.builder +13 -0
data/demos/simpledemo/views/index.erb +69 -0
data/demos/simpledemo/ws.rb +83 -0
data/doc/Makefile +7 -0
data/doc/html4css1.css +279 -0
data/doc/lookup.rst +193 -0
data/doc/parsing.rst +125 -0
data/doc/voidspace.css +147 -0
data/geo_coder.gemspec +172 -0
data/lib/geocoder/us.rb +21 -0
data/lib/geocoder/us/address.rb +290 -0
data/lib/geocoder/us/constants.rb +670 -0
data/lib/geocoder/us/database.rb +745 -0
data/lib/geocoder/us/import.rb +181 -0
data/lib/geocoder/us/import/tiger.rb +13 -0
data/lib/geocoder/us/numbers.rb +58 -0
data/navteq/README +4 -0
data/navteq/convert.sql +37 -0
data/navteq/navteq_import +39 -0
data/navteq/prepare.sql +92 -0
data/sql/cluster.sql +16 -0
data/sql/convert.sql +80 -0
data/sql/create.sql +37 -0
data/sql/index.sql +12 -0
data/sql/place.csv +104944 -0
data/sql/place.sql +104948 -0
data/sql/setup.sql +78 -0
data/src/Makefile +13 -0
data/src/README +14 -0
data/src/liblwgeom/Makefile +75 -0
data/src/liblwgeom/box2d.c +54 -0
data/src/liblwgeom/lex.yy.c +4799 -0
data/src/liblwgeom/liblwgeom.h +1405 -0
data/src/liblwgeom/lwalgorithm.c +946 -0
data/src/liblwgeom/lwalgorithm.h +52 -0
data/src/liblwgeom/lwcircstring.c +759 -0
data/src/liblwgeom/lwcollection.c +541 -0
data/src/liblwgeom/lwcompound.c +118 -0
data/src/liblwgeom/lwcurvepoly.c +86 -0
data/src/liblwgeom/lwgeom.c +886 -0
data/src/liblwgeom/lwgeom_api.c +2201 -0
data/src/liblwgeom/lwgparse.c +1219 -0
data/src/liblwgeom/lwgunparse.c +1054 -0
data/src/liblwgeom/lwline.c +525 -0
data/src/liblwgeom/lwmcurve.c +125 -0
data/src/liblwgeom/lwmline.c +137 -0
data/src/liblwgeom/lwmpoint.c +138 -0
data/src/liblwgeom/lwmpoly.c +141 -0
data/src/liblwgeom/lwmsurface.c +129 -0
data/src/liblwgeom/lwpoint.c +439 -0
data/src/liblwgeom/lwpoly.c +579 -0
data/src/liblwgeom/lwsegmentize.c +1047 -0
data/src/liblwgeom/lwutil.c +369 -0
data/src/liblwgeom/measures.c +861 -0
data/src/liblwgeom/postgis_config.h +93 -0
data/src/liblwgeom/ptarray.c +847 -0
data/src/liblwgeom/vsprintf.c +179 -0
data/src/liblwgeom/wktparse.h +126 -0
data/src/liblwgeom/wktparse.lex +74 -0
data/src/liblwgeom/wktparse.tab.c +2353 -0
data/src/liblwgeom/wktparse.tab.h +145 -0
data/src/liblwgeom/wktparse.y +385 -0
data/src/libsqlite3_geocoder/Makefile +22 -0
data/src/libsqlite3_geocoder/Makefile.nix +15 -0
data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
data/src/libsqlite3_geocoder/extension.c +121 -0
data/src/libsqlite3_geocoder/extension.h +13 -0
data/src/libsqlite3_geocoder/levenshtein.c +42 -0
data/src/libsqlite3_geocoder/metaphon.c +278 -0
data/src/libsqlite3_geocoder/util.c +37 -0
data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
data/src/metaphone/Makefile +7 -0
data/src/metaphone/README +49 -0
data/src/metaphone/extension.c +37 -0
data/src/metaphone/metaphon.c +251 -0
data/src/shp2sqlite/Makefile +37 -0
data/src/shp2sqlite/Makefile.nix +36 -0
data/src/shp2sqlite/Makefile.redhat +35 -0
data/src/shp2sqlite/dbfopen.c +1595 -0
data/src/shp2sqlite/getopt.c +695 -0
data/src/shp2sqlite/getopt.h +127 -0
data/src/shp2sqlite/shapefil.h +500 -0
data/src/shp2sqlite/shp2sqlite.c +1974 -0
data/src/shp2sqlite/shpopen.c +1894 -0
data/tests/address.rb +236 -0
data/tests/benchmark.rb +20 -0
data/tests/constants.rb +57 -0
data/tests/data/address-sample.csv +52 -0
data/tests/data/db-test.csv +57 -0
data/tests/data/locations.csv +4 -0
data/tests/database.rb +137 -0
data/tests/generate.rb +34 -0
data/tests/numbers.rb +46 -0
data/tests/run.rb +11 -0
metadata +237 -0

data/lib/geocoder/us/address.rb ADDED Viewed

@@ -0,0 +1,290 @@
+require 'geocoder/us/constants'
+module Geocoder::US
+  # Defines the matching of parsed address tokens.
+  Match = {
+    # FIXME: shouldn't have to anchor :number and :zip at start/end
+    :number   => /^(\d+\W|[a-z]+)?(\d+)([a-z]?)\b/io,
+    :street   => /(?:\b(?:\d+\w*|[a-z'-]+)\s*)+/io,
+    :city     => /(?:\b[a-z'-]+\s*)+/io,
+    :state    => Regexp.new(State.regexp.source + "\s*$", Regexp::IGNORECASE),
+    :zip      => /(\d{5})(?:-\d{4})?\s*$/o,
+    :at       => /\s(at|@|and|&)\s/io,
+    :po_box => /\b[P|p]*(OST|ost)*\.*\s*[O|o|0]*(ffice|FFICE)*\.*\s*[B|b][O|o|0][X|x]\b/
+  }
+  # The Address class takes a US street address or place name and
+  # constructs a list of possible structured parses of the address
+  # string.
+  class Address
+    attr_accessor :text
+    attr_accessor :prenum, :number, :sufnum
+    attr_accessor :street
+    attr_accessor :city
+    attr_accessor :state
+    attr_accessor :zip, :plus4
+    # Takes an address or place name string as its sole argument.
+    def initialize (text)
+      raise ArgumentError, "no text provided" unless text and !text.empty?
+      if text.class == Hash
+        @text = ""
+        assign_text_to_address text
+      else
+        @text = clean text
+        parse
+      end
+    end
+    # Removes any characters that aren't strictly part of an address string.
+    def clean (value)
+      value.strip \
+           .gsub(/[^a-z0-9 ,'&@\/-]+/io, "") \
+           .gsub(/\s+/o, " ")
+    end
+    def assign_text_to_address(text)
+      if !text[:address].nil?
+        @text = clean text[:address]
+        parse
+      else
+        @street = []
+        @prenum = text[:prenum]
+        @sufnum = text[:sufnum]
+        if !text[:street].nil?
+          @street = text[:street].scan(Match[:street])
+        end
+        @number = ""
+        if !@street.nil?
+          if text[:number].nil?
+             @street.map! { |single_street|
+               single_street.downcase!
+               @number = single_street.scan(Match[:number])[0].to_s
+               single_street.sub! @number, ""
+               single_street.sub! /^\s*,?\s*/o, ""
+              }
+         else
+            @number = text[:number].to_s
+          end
+         @street = expand_streets(@street)
+          street_parts
+        end
+        @city = []
+        if !text[:city].nil?
+          @city.push(text[:city])
+          @text = text[:city].to_s
+        else
+          @city.push("")
+        end
+        if !text[:region].nil?
+         # @state = []
+         @state = text[:region]
+          if @state.length > 2
+           # full_state = @state.strip # special case: New York
+            @state = State[@state]
+          end
+        elsif !text[:country].nil?
+          @state = text[:country]
+        elsif !text[:state].nil?
+          @state = text[:state]
+        end
+        @zip = text[:postal_code]
+        @plus4 = text[:plus4]
+        if !@zip
+           @zip = @plus4 = ""
+        end
+      end
+    end
+    # Expands a token into a list of possible strings based on
+    # the Geocoder::US::Name_Abbr constant, and expands numerals and
+    # number words into their possible equivalents.
+    def expand_numbers (string)
+      if /\b\d+(?:st|nd|rd|th)?\b/o.match string
+        match = $&
+        num = $&.to_i
+      elsif Ordinals.regexp.match string
+        num = Ordinals[$&]
+        match = $&
+      elsif Cardinals.regexp.match string
+        num = Cardinals[$&]
+        match = $&
+      end
+      strings = []
+      if num and num < 100
+        [num.to_s, Ordinals[num], Cardinals[num]].each {|replace|
+          strings << string.sub(match, replace)
+        }
+      else
+        strings << string
+      end
+      strings
+    end
+    def parse_zip(regex_match, text)
+      idx = text.rindex(regex_match)
+      text[idx...idx+regex_match.length] = ""
+      text.sub! /\s*,?\s*$/o, ""
+      @zip, @plus4 = @zip.map {|s|s.strip}
+      text
+    end
+    def parse_state(regex_match, text)
+      idx = text.rindex(regex_match)
+      text[idx...idx+regex_match.length] = ""
+      text.sub! /\s*,?\s*$/o, ""
+      @full_state = @state[0].strip # special case: New York
+      @state = State[@full_state]
+      text
+    end
+    def parse_number(regex_match, text)
+      # FIXME: What if this string appears twice?
+      idx = text.index(regex_match)
+      text[idx...idx+regex_match.length] = ""
+      text.sub! /^\s*,?\s*/o, ""
+      @prenum, @number, @sufnum = @number.map {|s| s and s.strip}
+      text
+    end
+    def parse
+      text = @text.clone.downcase
+      @zip = text.scan(Match[:zip])[-1]
+      if @zip
+        text = parse_zip($&, text)
+      else
+        @zip = @plus4 = ""
+      end
+      @state = text.scan(Match[:state])[-1]
+      if @state
+        text = parse_state($&, text)
+      else
+        @full_state = ""
+        @state = ""
+      end
+      @number = text.scan(Match[:number])[0]
+      # FIXME: 230 Fish And Game Rd, Hudson NY 12534
+      if @number # and not intersection?
+        text = parse_number($&, text)
+      else
+        @prenum = @number = @sufnum = ""
+      end
+      # FIXME: special case: Name_Abbr gets a bit aggressive
+      # about replacing St with Saint. exceptional case:
+      # Sault Ste. Marie
+      # FIXME: PO Box should geocode to ZIP
+      @street = text.scan(Match[:street])
+      @street = expand_streets(@street)
+      # SPECIAL CASE: 1600 Pennsylvania 20050
+      @street << @full_state if @street.empty? and @state.downcase != @full_state.downcase
+      @city = text.scan(Match[:city])
+      if !@city.empty?
+        @city = [@city[-1].strip]
+        add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
+        @city |= add
+        @city.map! {|s| s.downcase}
+        @city.uniq!
+      else
+        @city = []
+      end
+      # SPECIAL CASE: no city, but a state with the same name. e.g. "New York"
+      @city << @full_state if @state.downcase != @full_state.downcase
+      # SPECIAL CASE: if given a single city string, and it's not the
+      # same as the street string, remove it from the street parts
+      self.city= @city if @city.length == 1 and @city != @street
+    end
+    def expand_streets(street)
+      if !street.empty? && !street[0].nil?
+        street.map! {|s|s.strip}
+        add = street.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
+        street |= add
+        add = street.map {|item| item.gsub(Std_Abbr.regexp) {|m| Std_Abbr[m]}}
+        street |= add
+        street.map! {|item| expand_numbers(item)}
+        street.flatten!
+        street.map! {|s| s.downcase}
+        street.uniq!
+      else
+        street = []
+      end
+      street
+    end
+    def street_parts
+      strings = []
+      # Get all the substrings delimited by whitespace
+      @street.each {|string|
+        tokens = string.split(" ")
+        strings |= (0...tokens.length).map {|i|
+                   (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
+      }
+      strings = remove_noise_words(strings)
+      # Try a simpler case of adding the @number in case everything is an abbr.
+      strings += [@number] if strings.all? {|s| Std_Abbr.key? s or Name_Abbr.key? s}
+      strings.uniq
+    end
+    def remove_noise_words(strings)
+      # Don't return strings that consist solely of abbreviations.
+      # NOTE: Is this a micro-optimization that has edge cases that will break?
+      # Answer: Yes, it breaks on simple things like "Prairie St" or "Front St"
+      prefix = Regexp.new("^" + Prefix_Type.regexp.source + "\s*", Regexp::IGNORECASE)
+      suffix = Regexp.new("\s*" + Suffix_Type.regexp.source + "$", Regexp::IGNORECASE)
+      predxn = Regexp.new("^" + Directional.regexp.source + "\s*", Regexp::IGNORECASE)
+      sufdxn = Regexp.new("\s*" + Directional.regexp.source + "$", Regexp::IGNORECASE)
+      good_strings = strings.map {|s|
+        s = s.clone
+        s.gsub!(predxn, "")
+        s.gsub!(sufdxn, "")
+        s.gsub!(prefix, "")
+        s.gsub!(suffix, "")
+        s
+      }
+      good_strings.reject! {|s| s.empty?}
+      strings = good_strings if !good_strings.empty? {|s| not Std_Abbr.key?(s) and not Name_Abbr.key?(s)}
+      strings
+    end
+    def city_parts
+      strings = []
+      @city.map {|string|
+        tokens = string.split(" ")
+        strings |= (0...tokens.length).to_a.reverse.map {|i|
+                   (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
+      }
+      # Don't return strings that consist solely of abbreviations.
+      # NOTE: Is this a micro-optimization that has edge cases that will break?
+      # Answer: Yes, it breaks on "Prairie"
+      good_strings = strings.reject {|s| Std_Abbr.key? s}
+      strings = good_strings if !good_strings.empty?
+      strings.uniq
+    end
+    def city= (strings)
+      # NOTE: This will still fail on: 100 Broome St, 33333 (if 33333 is
+      # Broome, MT or what)
+      match = Regexp.new('\s*\b(?:' + strings.join("|") + ')\b\s*$', Regexp::IGNORECASE)
+      @street = @street.map {|string| string.gsub(match, '')}.select {|s|!s.empty?}
+    end
+    def po_box?
+      Match[:po_box].match @text
+    end
+    def intersection?
+      Match[:at].match @text
+    end
+  end
+end

data/lib/geocoder/us/constants.rb ADDED Viewed

@@ -0,0 +1,670 @@
+# coding: utf-8
+require 'set'
+require 'geocoder/us/numbers'
+module Geocoder
+end
+module Geocoder::US
+  class Map < Hash
+    # The Map class provides a two-way mapping between postal abbreviations
+    # and their fully written equivalents.
+    #attr_accessor :partial
+    attr_accessor :regexp
+    def self.[] (*items)
+      hash = super(*items)
+      #hash.build_partial
+      hash.build_match
+      hash.keys.each {|k| hash[k.downcase] = hash.fetch(k)}
+      hash.values.each {|v| hash[v.downcase] = v}
+      hash.freeze
+    end
+    # The build_partial method constructs a hash of case-insensitive,
+    # whitespace-delimited prefixes to keys and values in the two-way Map.
+    def build_partial
+      @partial = Set.new()
+      [keys, values].flatten.each {|item|
+        @partial << item.downcase
+        item.downcase.split.each {|token| @partial << token}
+      }
+    end
+    def build_match
+      @regexp = Regexp.new(
+        '\b(' + [keys,values].flatten.join("|") + ')\b',
+        Regexp::IGNORECASE)
+    end
+    # The partial? method returns true if the key is a prefix of some
+    # key in the Map.
+    def partial? (key)
+      @partial.member? key.downcase
+    end
+    def key? (key)
+      super(key.downcase)
+    end
+    def [] (key)
+      super(key.downcase)
+    end
+  end
+  # The Directional constant maps compass direction words in English and
+  # Spanish to their 1- or 2- letter abbreviations.  See 2008 TIGER/Line
+  # technical documentation Appendix C for more details.
+  Directional = Map[
+    "North"	=> "N",
+    "South"	=> "S",
+    "East"	=> "E",
+    "West"	=> "W",
+    "Northeast"	=> "NE",
+    "Northwest"	=> "NW",
+    "Southeast"	=> "SE",
+    "Southwest"	=> "SW",
+    "Norte"	=> "N",
+    "Sur"	=> "S",
+    "Este"	=> "E",
+    "Oeste"	=> "O",
+    "Noreste"	=> "NE",
+    "Noroeste"	=> "NO",
+    "Sudeste"	=> "SE",
+    "Sudoeste"	=> "SO"
+  ]
+  # The Prefix_Qualifier constant maps feature prefix qualifiers to their
+  # abbreviations. See 2008 TIGER/Line technical documentation Appendix D.
+  Prefix_Qualifier = Map[
+    "Alternate"	=> "Alt",
+    "Business"	=> "Bus",
+    "Bypass"	=> "Byp",
+    "Extended"	=> "Exd",
+    "Historic"	=> "Hst",
+    "Loop"	=> "Lp",
+    "Old"	=> "Old",
+    "Private"	=> "Pvt",
+    "Public"	=> "Pub",
+    "Spur"	=> "Spr",
+  ]
+  # The Suffix_Qualifier constant maps feature suffix qualifiers to their
+  # abbreviations. See 2008 TIGER/Line technical documentation Appendix D.
+  Suffix_Qualifier = Map[
+    "Access"	=> "Acc",
+    "Alternate"	=> "Alt",
+    "Business"	=> "Bus",
+    "Bypass"	=> "Byp",
+    "Connector"	=> "Con",
+    "Extended"	=> "Exd",
+    "Extension"	=> "Exn",
+    "Loop"	=> "Lp",
+    "Private"	=> "Pvt",
+    "Public"	=> "Pub",
+    "Scenic"	=> "Scn",
+    "Spur"	=> "Spr",
+    "Ramp"	=> "Rmp",
+    "Underpass"	=> "Unp",
+    "Overpass"	=> "Ovp",
+  ]
+  # The Prefix_Canonical constant maps canonical TIGER/Line street type
+  # prefixes to their abbreviations. This list is the subset of the list from
+  # 2008 TIGER/Line technical documentation Appendix E that was extracted from
+  # a TIGER/Line database import.
+  Prefix_Canonical = {
+    "Arcade"                            => "Arc",
+    "Autopista"                         => "Autopista",
+    "Avenida"                           => "Ave",
+    "Avenue"                            => "Ave",
+    "Boulevard"                         => "Blvd",
+    "Bulevar"                           => "Bulevar",
+    "Bureau of Indian Affairs Highway"  => "BIA Hwy",
+    "Bureau of Indian Affairs Road"     => "BIA Rd",
+    "Bureau of Indian Affairs Route"    => "BIA Rte",
+    "Bureau of Land Management Road"    => "BLM Rd",
+    "Bypass"                            => "Byp",
+    "Calle"                             => "Cll",
+    "Calleja"                           => "Calleja",
+    "Callejón"                          => "Callejón",
+    "Caminito"                          => "Cmt",
+    "Camino"                            => "Cam",
+    "Carretera"                         => "Carr",
+    "Cerrada"                           => "Cer",
+    "Círculo"                           => "Cír",
+    "Commons"                           => "Cmns",
+    "Corte"                             => "Corte",
+    "County Highway"                    => "Co Hwy",
+    "County Lane"                       => "Co Ln",
+    "County Road"                       => "Co Rd",
+    "County Route"                      => "Co Rte",
+    "County State Aid Highway"          => "Co St Aid Hwy",
+    "County Trunk Highway"              => "Co Trunk Hwy",
+    "County Trunk Road"                 => "Co Trunk Rd",
+    "Court"                             => "Ct",
+    "Delta Road"                        => "Delta Rd",
+    "District of Columbia Highway"      => "DC Hwy",
+    "Driveway"                          => "Driveway",
+    "Entrada"                           => "Ent",
+    "Expreso"                           => "Expreso",
+    "Expressway"                        => "Expy",
+    "Farm Road"                         => "Farm Rd",
+    "Farm-to-Market Road"               => "FM",
+    "Fire Control Road"                 => "Fire Cntrl Rd",
+    "Fire District Road"                => "Fire Dist Rd",
+    "Fire Lane"                         => "Fire Ln",
+    "Fire Road"                         => "Fire Rd",
+    "Fire Route"                        => "Fire Rte",
+    "Fire Trail"                        => "Fire Trl",
+    "Forest Highway"                    => "Forest Hwy",
+    "Forest Road"                       => "Forest Rd",
+    "Forest Route"                      => "Forest Rte",
+    "Forest Service Road"               => "FS Rd",
+    "Highway"                           => "Hwy",
+    "Indian Route"                      => "Indian Rte",
+    "Indian Service Route"              => "Indian Svc Rte",
+    "Interstate Highway"                => "I-",
+    "Lane"                              => "Ln",
+    "Logging Road"                      => "Logging Rd",
+    "Loop"                              => "Loop",
+    "National Forest Development Road"  => "Nat For Dev Rd",
+    "Navajo Service Route"              => "Navajo Svc Rte",
+    "Parish Road"                       => "Parish Rd",
+    "Pasaje"                            => "Pasaje",
+    "Paseo"                             => "Pso",
+    "Passage"                           => "Psge",
+    "Placita"                           => "Pla",
+    "Plaza"                             => "Plz",
+    "Point"                             => "Pt",
+    "Puente"                            => "Puente",
+    "Ranch Road"                        => "Ranch Rd",
+    "Ranch to Market Road"              => "RM",
+    "Reservation Highway"               => "Resvn Hwy",
+    "Road"                              => "Rd",
+    "Route"                             => "Rte",
+    "Row"                               => "Row",
+    "Rue"                               => "Rue",
+    "Ruta"                              => "Ruta",
+    "Sector"                            => "Sec",
+    "Sendero"                           => "Sendero",
+    "Service Road"                      => "Svc Rd",
+    "Skyway"                            => "Skwy",
+    "Square"                            => "Sq",
+    "State Forest Service Road"         => "St FS Rd",
+    "State Highway"                     => "State Hwy",
+    "State Loop"                        => "State Loop",
+    "State Road"                        => "State Rd",
+    "State Route"                       => "State Rte",
+    "State Spur"                        => "State Spur",
+    "State Trunk Highway"               => "St Trunk Hwy",
+    "Terrace"                           => "Ter",
+    "Town Highway"                      => "Town Hwy",
+    "Town Road"                         => "Town Rd",
+    "Township Highway"                  => "Twp Hwy",
+    "Township Road"                     => "Twp Rd",
+    "Trail"                             => "Trl",
+    "Tribal Road"                       => "Tribal Rd",
+    "Tunnel"                            => "Tunl",
+    "US Forest Service Highway"         => "USFS Hwy",
+    "US Forest Service Road"            => "USFS Rd",
+    "US Highway"                        => "US Hwy",
+    "US Route"                          => "US Rte",
+    "Vereda"                            => "Ver",
+    "Via"                               => "Via",
+    "Vista"                             => "Vis",
+  }
+  # The Prefix_Alternate constant maps alternate prefix street types to
+  # their canonical abbreviations. This list was merged in from the USPS
+  # list at http://www.usps.com/ncsc/lookups/abbr_suffix.txt.
+  Prefix_Alternate = {
+    "Av"			=> "Ave",
+    "Aven"			=> "Ave",
+    "Avenu"			=> "Ave",
+    "Avenue"			=> "Ave",
+    "Avn"			=> "Ave",
+    "Avnue"			=> "Ave",
+    "Boul"			=> "Blvd",
+    "Boulv"			=> "Blvd",
+    "Bypa"			=> "Byp",
+    "Bypas"			=> "Byp",
+    "Byps"			=> "Byp",
+    "Crt"			=> "Ct",
+    "Exp"			=> "Expy",
+    "Expr"			=> "Expy",
+    "Express"			=> "Expy",
+    "Expw"			=> "Expy",
+    "Highwy"			=> "Hwy",
+    "Hiway"			=> "Hwy",
+    "Hiwy"			=> "Hwy",
+    "Hway"			=> "Hwy",
+    "La"			=> "Ln",
+    "Lanes"			=> "Ln",
+    "Loops"			=> "Loop",
+    "Plza"			=> "Plz",
+    "Sqr"			=> "Sq",
+    "Sqre"			=> "Sq",
+    "Squ"			=> "Sq",
+    "Terr"			=> "Ter",
+    "Tr"			=> "Trl",
+    "Trails"			=> "Trl",
+    "Trls"			=> "Trl",
+    "Tunel"			=> "Tunl",
+    "Tunls"			=> "Tunl",
+    "Tunnels"			=> "Tunl",
+    "Tunnl"			=> "Tunl",
+    "Vdct"			=> "Via",
+    "Viadct"			=> "Via",
+    "Viaduct"			=> "Via",
+    "Vist"			=> "Vis",
+    "Vst"			=> "Vis",
+    "Vsta"			=> "Vis"
+  }
+  # The Prefix_Type constant merges the canonical prefix type abbreviations
+  # with their USPS accepted alternates.
+  Prefix_Type = Map[ Prefix_Canonical.merge(Prefix_Alternate) ]
+  # The Suffix_Canonical constant maps canonical TIGER/Line street type
+  # suffixes to their abbreviations. This list is the subset of the list from
+  # 2008 TIGER/Line technical documentation Appendix E that was extracted from
+  # a TIGER/Line database import.
+  Suffix_Canonical = {
+    "Alley"                             => "Aly",
+    "Arcade"                            => "Arc",
+    "Avenida"                           => "Ave",
+    "Avenue"                            => "Ave",
+    "Beltway"                           => "Beltway",
+    "Boulevard"                         => "Blvd",
+    "Bridge"                            => "Brg",
+    "Bypass"                            => "Byp",
+    "Causeway"                          => "Cswy",
+    "Circle"                            => "Cir",
+    "Common"                            => "Cmn",
+    "Commons"                           => "Cmns",
+    "Corners"                           => "Cors",
+    "Court"                             => "Ct",
+    "Courts"                            => "Cts",
+    "Crescent"                          => "Cres",
+    "Crest"                             => "Crst",
+    "Crossing"                          => "Xing",
+    "Cutoff"                            => "Cutoff",
+    "Drive"                             => "Dr",
+    "Driveway"                          => "Driveway",
+    "Esplanade"                         => "Esplanade",
+    "Estates"                           => "Ests",
+    "Expressway"                        => "Expy",
+    "Forest Highway"                    => "Forest Hwy",
+    "Fork"                              => "Frk",
+    "Four-Wheel Drive Trail"            => "4WD Trl",
+    "Freeway"                           => "Fwy",
+    "Grade"                             => "Grade",
+    "Heights"                           => "Hts",
+    "Highway"                           => "Hwy",
+    "Jeep Trail"                        => "Jeep Trl",
+    "Landing"                           => "Lndg",
+    "Lane"                              => "Ln",
+    "Logging Road"                      => "Logging Rd",
+    "Loop"                              => "Loop",
+    "Motorway"                          => "Mtwy",
+    "Oval"                              => "Oval",
+    "Overpass"                          => "Opas",
+    "Parkway"                           => "Pkwy",
+    "Pass"                              => "Pass",
+    "Passage"                           => "Psge",
+    "Path"                              => "Path",
+    "Pike"                              => "Pike",
+    "Place"                             => "Pl",
+    "Plaza"                             => "Plz",
+    "Point"                             => "Pt",
+    "Pointe"                            => "Pointe",
+    "Promenade"                         => "Promenade",
+    "Railroad"                          => "RR",
+    "Railway"                           => "Rlwy",
+    "Ramp"                              => "Ramp",
+    "River"                             => "Riv",
+    "Road"                              => "Rd",
+    "Roadway"                           => "Roadway",
+    "Route"                             => "Rte",
+    "Row"                               => "Row",
+    "Rue"                               => "Rue",
+    "Service Road"                      => "Svc Rd",
+    "Skyway"                            => "Skwy",
+    "Spur"                              => "Spur",
+    "Square"                            => "Sq",
+    "Stravenue"                         => "Stra",
+    "Street"                            => "St",
+    "Strip"                             => "Strip",
+    "Terrace"                           => "Ter",
+    "Thoroughfare"                      => "Thoroughfare",
+    "Tollway"                           => "Tollway",
+    "Trace"                             => "Trce",
+    "Trafficway"                        => "Trfy",
+    "Trail"                             => "Trl",
+    "Trolley"                           => "Trolley",
+    "Truck Trail"                       => "Truck Trl",
+    "Tunnel"                            => "Tunl",
+    "Turnpike"                          => "Tpke",
+    "Viaduct"                           => "Viaduct",
+    "View"                              => "Vw",
+    "Vista"                             => "Vis",
+    "Walk"                              => "Walk",
+    "Walkway"                           => "Walkway",
+    "Way"                               => "Way",
+  }
+  # The Suffix_Alternate constant maps alternate suffix street types to
+  # their canonical abbreviations. This list was merged in from the USPS
+  # list at http://www.usps.com/ncsc/lookups/abbr_suffix.txt.
+  Suffix_Alternate = {
+    "Allee"			=> "Aly",
+    "Ally"			=> "Aly",
+    "Av"			=> "Ave",
+    "Aven"			=> "Ave",
+    "Avenu"			=> "Ave",
+    "Avenue"			=> "Ave",
+    "Avn"			=> "Ave",
+    "Avnue"			=> "Ave",
+    "Boul"			=> "Blvd",
+    "Boulv"			=> "Blvd",
+    "Brdge"			=> "Brg",
+    "Bypa"			=> "Byp",
+    "Bypas"			=> "Byp",
+    "Byps"			=> "Byp",
+    "Causway"			=> "Cswy",
+    "Circ"			=> "Cir",
+    "Circl"			=> "Cir",
+    "Crcl"			=> "Cir",
+    "Crcle"			=> "Cir",
+    "Crecent"			=> "Cres",
+    "Cresent"			=> "Cres",
+    "Crscnt"			=> "Cres",
+    "Crsent"			=> "Cres",
+    "Crsnt"			=> "Cres",
+    "Crssing"			=> "Xing",
+    "Crssng"			=> "Xing",
+    "Crt"			=> "Ct",
+    "Driv"			=> "Dr",
+    "Drv"			=> "Dr",
+    "Exp"			=> "Expy",
+    "Expr"			=> "Expy",
+    "Express"			=> "Expy",
+    "Expw"			=> "Expy",
+    "Freewy"			=> "Fwy",
+    "Frway"			=> "Fwy",
+    "Frwy"			=> "Fwy",
+    "Height"			=> "Hts",
+    "Hgts"			=> "Hts",
+    "Highwy"			=> "Hwy",
+    "Hiway"			=> "Hwy",
+    "Hiwy"			=> "Hwy",
+    "Ht"			=> "Hts",
+    "Hway"			=> "Hwy",
+    "La"			=> "Ln",
+    "Lanes"			=> "Ln",
+    "Lndng"			=> "Lndg",
+    "Loops"			=> "Loop",
+    "Ovl"			=> "Oval",
+    "Parkways"			=> "Pkwy",
+    "Parkwy"			=> "Pkwy",
+    "Paths"			=> "Path",
+    "Pikes"			=> "Pike",
+    "Pkway"			=> "Pkwy",
+    "Pkwys"			=> "Pkwy",
+    "Pky"			=> "Pkwy",
+    "Plza"			=> "Plz",
+    "Rivr"			=> "Riv",
+    "Rvr"			=> "Riv",
+    "Spurs"			=> "Spur",
+    "Sqr"			=> "Sq",
+    "Sqre"			=> "Sq",
+    "Squ"			=> "Sq",
+    "Str"			=> "St",
+    "Strav"			=> "Stra",
+    "Strave"			=> "Stra",
+    "Straven"			=> "Stra",
+    "Stravn"			=> "Stra",
+    "Strt"			=> "St",
+    "Strvn"			=> "Stra",
+    "Strvnue"			=> "Stra",
+    "Terr"			=> "Ter",
+    "Tpk"			=> "Tpke",
+    "Tr"			=> "Trl",
+    "Traces"			=> "Trce",
+    "Trails"			=> "Trl",
+    "Trls"			=> "Trl",
+    "Trnpk"			=> "Tpke",
+    "Trpk"			=> "Tpke",
+    "Tunel"			=> "Tunl",
+    "Tunls"			=> "Tunl",
+    "Tunnels"			=> "Tunl",
+    "Tunnl"			=> "Tunl",
+    "Turnpk"			=> "Tpke",
+    "Vist"			=> "Vis",
+    "Vst"			=> "Vis",
+    "Vsta"			=> "Vis",
+    "Walks"			=> "Walk",
+    "Wy"			=> "Way",
+  }
+  # The Suffix_Type constant merges the canonical suffix type abbreviations
+  # with their USPS accepted alternates.
+  Suffix_Type = Map[ Suffix_Canonical.merge(Suffix_Alternate) ]
+  # The Unit_Type constant lists acceptable USPS unit type abbreviations
+  # from http://www.usps.com/ncsc/lookups/abbr_sud.txt.
+  Unit_Type = Map[
+    "Apartment"	=> "Apt",
+    "Basement"	=> "Bsmt",
+    "Building"	=> "Bldg",
+    "Department"=> "Dept",
+    "Floor"	=> "Fl",
+    "Front"	=> "Frnt",
+    "Hangar"	=> "Hngr",
+    "Lobby"	=> "Lbby",
+    "Lot"	=> "Lot",
+    "Lower"	=> "Lowr",
+    "Office"	=> "Ofc",
+    "Penthouse"	=> "Ph",
+    "Pier"	=> "Pier",
+    "Rear"	=> "Rear",
+    "Room"	=> "Rm",
+    "Side"	=> "Side",
+    "Slip"	=> "Slip",
+    "Space"	=> "Spc",
+    "Stop"	=> "Stop",
+    "Suite"	=> "Ste",
+    "Trailer"	=> "Trlr",
+    "Unit"	=> "Unit",
+    "Upper"	=> "Uppr",
+  ]
+  Std_Abbr = Map[
+    [Directional, Prefix_Qualifier, Suffix_Qualifier,
+     Prefix_Type, Suffix_Type].inject({}) {|x,y|x.merge y}
+  ]
+  # The Name_Abbr constant maps common toponym abbreviations to their
+  # full word equivalents. This list was constructed partly by hand, and
+  # partly by matching USPS alternate abbreviations with feature names
+  # found in the TIGER/Line dataset.
+  Name_Abbr = Map[
+    "Av"	=> "Avenue",
+    "Ave"	=> "Avenue",
+    "Blvd"	=> "Boulevard",
+    "Bot"	=> "Bottom",
+    "Boul"	=> "Boulevard",
+    "Boulv"	=> "Boulevard",
+    "Br"	=> "Branch",
+    "Brg"	=> "Bridge",
+    "Canyn"	=> "Canyon",
+    "Cen"	=> "Center",
+    "Cent"	=> "Center",
+    "Cir"	=> "Circle",
+    "Circ"	=> "Circle",
+    "Ck"	=> "Creek",
+    "Cnter"	=> "Center",
+    "Cntr"	=> "Center",
+    "Cnyn"	=> "Canyon",
+    "Cor"	=> "Corner",
+    "Cors"	=> "Corners",
+    "Cp"	=> "Camp",
+    "Cr"	=> "Creek",
+    "Crcl"	=> "Circle",
+    "Crcle"	=> "Circle",
+    "Cres"	=> "Crescent",
+    "Crscnt"	=> "Crescent",
+    "Ct"	=> "Court",
+    "Ctr"	=> "Center",
+    "Cts"	=> "Courts",
+    "Cyn"	=> "Canyon",
+    "Div"	=> "Divide",
+    "Dr"	=> "Drive",
+    "Dv"	=> "Divide",
+    "Est"	=> "Estate",
+    "Ests"	=> "Estates",
+    "Ext"	=> "Extension",
+    "Extn"	=> "Extension",
+    "Extnsn"	=> "Extension",
+    "Forests"	=> "Forest",
+    "Forg"	=> "Forge",
+    "Frg"	=> "Forge",
+    "Ft"	=> "Fort",
+    "Gatewy"	=> "Gateway",
+    "Gdn"	=> "Garden",
+    "Gdns"	=> "Gardens",
+    "Gtwy"	=> "Gateway",
+    "Harb"	=> "Harbor",
+    "Hbr"	=> "Harbor",
+    "Height"	=> "Heights",
+    "Hgts"	=> "Heights",
+    "Highwy"	=> "Highway",
+    "Hiway"	=> "Highway",
+    "Hiwy"	=> "Highway",
+    "Holws"	=> "Hollow",
+    "Ht"	=> "Heights",
+    "Hway"	=> "Highway",
+    "Hwy"	=> "Highway",
+    "Is"	=> "Island",
+    "Iss"	=> "Islands",
+    "Jct"	=> "Junction",
+    "Jction"	=> "Junction",
+    "Jctn"	=> "Junction",
+    "Junctn"	=> "Junction",
+    "Juncton"	=> "Junction",
+    "Ldg"	=> "Lodge",
+    "Lgt"	=> "Light",
+    "Lndg"	=> "Landing",
+    "Lodg"	=> "Lodge",
+    "Loops"	=> "Loop",
+    "Mt"	=> "Mount",
+    "Mtin"	=> "Mountain",
+    "Mtn"	=> "Mountain",
+    "Orch"	=> "Orchard",
+    "Parkwy"	=> "Parkway",
+    "Pk"	=> "Park",
+    "Pkway"	=> "Parkway",
+    "Pkwy"	=> "Parkway",
+    "Pky"	=> "Parkway",
+    "Pl"	=> "Place",
+    "Pnes"	=> "Pines",
+    "Pr"	=> "Prairie",
+    "Prr"	=> "Prairie",
+    "Pt"	=> "Point",
+    "Pts"	=> "Points",
+    "Rdg"	=> "Ridge",
+    "Riv"	=> "River",
+    "Rnchs"	=> "Ranch",
+    "Spg"	=> "Spring",
+    "Spgs"	=> "Springs",
+    "Spng"	=> "Spring",
+    "Spngs"	=> "Springs",
+    "Sq"	=> "Square",
+    "Squ"	=> "Square",
+#    "St"	=> "Saint",
+    "Sta"	=> "Station",
+    "Statn"	=> "Station",
+    "Ste"	=> "Sainte",
+    "Stn"	=> "Station",
+    "Str"	=> "Street",
+    "Ter"	=> "Terrace",
+    "Terr"	=> "Terrace",
+    "Tpk"	=> "Turnpike",
+    "Tpke"	=> "Turnpike",
+    "Tr"	=> "Trail",
+    "Trls"	=> "Trail",
+    "Trpk"	=> "Turnpike",
+    "Tunls"	=> "Tunnel",
+    "Un"	=> "Union",
+    "Vill"	=> "Village",
+    "Villag"	=> "Village",
+    "Villg"	=> "Village",
+    "Vis"	=> "Vista",
+    "Vlg"	=> "Village",
+    "Vlgs"	=> "Villages",
+    "Wls"	=> "Wells",
+    "Wy"	=> "Way",
+    "Xing"	=> "Crossing",
+  ]
+  # The State constant maps US state and territory names to their 2-letter
+  # USPS abbreviations.
+  State = Map[
+    "Alabama"		=> "AL",
+    "Alaska"		=> "AK",
+    "American Samoa"	=> "AS",
+    "Arizona"		=> "AZ",
+    "Arkansas"		=> "AR",
+    "California"	=> "CA",
+    "Colorado"		=> "CO",
+    "Connecticut"	=> "CT",
+    "Delaware"		=> "DE",
+    "District of Columbia" => "DC",
+    "Federated States of Micronesia" => "FM",
+    "Florida"		=> "FL",
+    "Georgia"		=> "GA",
+    "Guam"		=> "GU",
+    "Hawaii"		=> "HI",
+    "Idaho"		=> "ID",
+    "Illinois"		=> "IL",
+    "Indiana"		=> "IN",
+    "Iowa"		=> "IA",
+    "Kansas"		=> "KS",
+    "Kentucky"		=> "KY",
+    "Louisiana"		=> "LA",
+    "Maine"		=> "ME",
+    "Marshall Islands"	=> "MH",
+    "Maryland"		=> "MD",
+    "Massachusetts"	=> "MA",
+    "Michigan"		=> "MI",
+    "Minnesota"		=> "MN",
+    "Mississippi"	=> "MS",
+    "Missouri"		=> "MO",
+    "Montana"		=> "MT",
+    "Nebraska"		=> "NE",
+    "Nevada"		=> "NV",
+    "New Hampshire"	=> "NH",
+    "New Jersey"	=> "NJ",
+    "New Mexico"	=> "NM",
+    "New York"		=> "NY",
+    "North Carolina"	=> "NC",
+    "North Dakota"	=> "ND",
+    "Northern Mariana Islands"	=> "MP",
+    "Ohio"		=> "OH",
+    "Oklahoma"		=> "OK",
+    "Oregon"		=> "OR",
+    "Palau"		=> "PW",
+    "Pennsylvania"	=> "PA",
+    "Puerto Rico"	=> "PR",
+    "Rhode Island"	=> "RI",
+    "South Carolina"	=> "SC",
+    "South Dakota"	=> "SD",
+    "Tennessee"		=> "TN",
+    "Texas"		=> "TX",
+    "Utah"		=> "UT",
+    "Vermont"		=> "VT",
+    "Virgin Islands"	=> "VI",
+    "Virginia"		=> "VA",
+    "Washington"	=> "WA",
+    "West Virginia"	=> "WV",
+    "Wisconsin"		=> "WI",
+    "Wyoming"		=> "WY"
+  ]
+end