mcmire-address_standardization 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc ADDED
@@ -0,0 +1,76 @@
1
+ = address_standardization
2
+
3
+ == Summary
4
+
5
+ A tiny Ruby library to quickly standardize a postal address. Right now this supports Google Maps and MelissaData.
6
+
7
+ == Installation
8
+
9
+ Put this in your environment.rb:
10
+
11
+ config.gem 'mcmire-address_standardization', :lib => "address_standardization", :source => "http://gems.github.com"
12
+
13
+ Then run <tt>rake gems:install</tt> to install the gem.
14
+
15
+ == Usage
16
+
17
+ Right now there are two methods supported: MelissaData and Google Maps.
18
+
19
+ MelissaData provides two services: {US address lookup}[http://www.melissadata.com/lookups/AddressVerify.asp] and {Canadian address lookup}[http://www.melissadata.com/lookups/CanadianAddressVerify.asp]. They both work the same way, however. First, here's how to standardize a US address:
20
+
21
+ addr = AddressStandardization::MelissaData::USAddress.standardize(
22
+ :street => "1 Infinite Loop",
23
+ :city => "Cupertino",
24
+ :state => "CA"
25
+ )
26
+
27
+ This submits the address to MelissaData. If the address can't be found, you'll get back +nil+. But if the address can be found (as in this case), you'll get an instance of <tt>AddressStandardization::MelissaData::USAddress</tt>. If you store the instance, you can refer to the individual fields like so:
28
+
29
+ addr.street #=> "1 INFINITE LOOP"
30
+ addr.city #=> "CUPERTINO"
31
+ addr.state #=> "CA"
32
+ addr.zip #=> "95014-2083"
33
+
34
+ And standardizing a Canadian address:
35
+
36
+ addr = AddressStandardization::MelissaData::CanadianAddress.standardize(
37
+ :street => "103 Metig St",
38
+ :city => "Sault Ste Marie",
39
+ :province => "ON"
40
+ )
41
+ addr.street #=> "103 METIG ST RR 4"
42
+ addr.city #=> "SAULT STE MARIE"
43
+ addr.province #=> "ON"
44
+ addr.postalcode #=> "P6A 5K9"
45
+
46
+ Using Google Maps to validate an address is just as easy:
47
+
48
+ addr = AddressStandardization::GoogleMaps::Address.standardize(
49
+ :street => "1600 Amphitheatre Parkway",
50
+ :city => "Mountain View",
51
+ :state => "CA"
52
+ )
53
+ addr.street #=> "1600 AMPHITHEATRE PKWY"
54
+ addr.city #=> "MOUNTAIN VIEW"
55
+ addr.state #=> "CA"
56
+ addr.zip #=> "94043"
57
+ addr.country #=> "USA"
58
+
59
+ And, again, a Canadian address:
60
+
61
+ addr = AddressStandardization::GoogleMaps::Address.standardize(
62
+ :street => "1770 Stenson Blvd.",
63
+ :city => "Peterborough",
64
+ :province => "ON"
65
+ )
66
+ addr.street #=> "1770 STENSON BLVD"
67
+ addr.city #=> "PETERBOROUGH"
68
+ addr.province #=> "ON"
69
+ addr.postalcode #=> "K9K"
70
+ addr.country #=> "CANADA"
71
+
72
+ You'll notice that for some reason, the Google Maps API doesn't return the full postal code for Canadian addresses. If you know why this is please let me know (my email address is below).
73
+
74
+ == Author
75
+
76
+ (c) 2008 Elliot Winkler (elliot dot winkler at gmail dot com). Released under the MIT license.
data/Rakefile ADDED
@@ -0,0 +1,40 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rake/testtask'
4
+
5
+ require 'lib/address_standardization/version'
6
+
7
+ task :default => :test
8
+
9
+ spec = Gem::Specification.new do |s|
10
+ s.name = 'address_standardization'
11
+ s.version = AddressStandardization::Version.to_s
12
+ s.has_rdoc = true
13
+ s.extra_rdoc_files = %w(README.rdoc)
14
+ s.rdoc_options = %w(--main README.rdoc)
15
+ s.summary = "A tiny Ruby library to quickly standardize a postal address"
16
+ s.author = 'Elliot Winkler'
17
+ s.email = 'elliot.winkler@gmail.com'
18
+ s.homepage = 'http://github.com/mcmire/address_standardization'
19
+ s.files = %w(README.rdoc Rakefile) + Dir.glob("{lib,test}/**/*")
20
+ # s.executables = ['address_standardization']
21
+
22
+ s.add_dependency('mechanize')
23
+ end
24
+
25
+ Rake::GemPackageTask.new(spec) do |pkg|
26
+ pkg.gem_spec = spec
27
+ end
28
+
29
+ Rake::TestTask.new do |t|
30
+ t.libs << 'test'
31
+ t.test_files = FileList["test/**/*_test.rb"]
32
+ t.verbose = true
33
+ end
34
+
35
+ desc 'Generate the gemspec to serve this Gem from Github'
36
+ task :github do
37
+ file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
38
+ File.open(file, 'w') {|f| f << spec.to_ruby }
39
+ puts "Created gemspec: #{file}"
40
+ end
@@ -0,0 +1,9 @@
1
+ # address_standardization: A tiny Ruby library to quickly standardize a postal address
2
+ # Copyright (C) 2008 Elliot Winkler. Released under the MIT license.
3
+
4
+ require File.dirname(__FILE__)+'/address_standardization/ruby_ext'
5
+ require File.dirname(__FILE__)+'/address_standardization/class_level_inheritable_attributes'
6
+
7
+ require File.dirname(__FILE__)+'/address_standardization/abstract_address'
8
+ require File.dirname(__FILE__)+'/address_standardization/melissa_data'
9
+ require File.dirname(__FILE__)+'/address_standardization/google_maps'
@@ -0,0 +1,58 @@
1
+ module AddressStandardization
2
+ class StandardizationError < StandardError; end
3
+
4
+ class AbstractAddress
5
+
6
+ include ClassLevelInheritableAttributes
7
+ inheritable_attributes :valid_keys
8
+
9
+ def self.standardize
10
+ raise NotImplementedError, "You must override .standardize in a subclass"
11
+ end
12
+
13
+ attr_reader :address_info
14
+
15
+ def initialize(address_info)
16
+ raise NotImplementedError, "You must define valid_keys" unless self.class.valid_keys
17
+ raise ArgumentError, "No address given!" if address_info.empty?
18
+ address_info = address_info.inject({}) {|h,(k,v)| h[k.to_s] = v; h } # stringify keys
19
+ validate_keys(address_info)
20
+ standardize_values!(address_info)
21
+ @address_info = address_info
22
+ end
23
+
24
+ def validate_keys(hash)
25
+ # assume keys are already stringified
26
+ invalid_keys = hash.keys - self.class.valid_keys
27
+ unless invalid_keys.empty?
28
+ raise ArgumentError, "Invalid keys: #{invalid_keys.join(', ')}. Valid keys are: #{self.class.valid_keys.join(', ')}"
29
+ end
30
+ end
31
+
32
+ def method_missing(name, *args)
33
+ name = name.to_s
34
+ if self.class.valid_keys.include?(name)
35
+ if args.empty?
36
+ @address_info[name]
37
+ else
38
+ @address_info[name] = standardize_value(args.first)
39
+ end
40
+ else
41
+ super(name.to_sym, *args)
42
+ end
43
+ end
44
+
45
+ def ==(other)
46
+ other.kind_of?(AbstractAddress) && @address_info == other.address_info
47
+ end
48
+
49
+ private
50
+ def standardize_values!(hash)
51
+ hash.each {|k,v| hash[k] = standardize_value(v) }
52
+ end
53
+
54
+ def standardize_value(value)
55
+ value ? value.strip_whitespace : ""
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,26 @@
1
+ # from <http://railstips.org/2006/11/18/class-and-instance-variables-in-ruby>
2
+ module ClassLevelInheritableAttributes
3
+ def self.included(base)
4
+ base.extend(ClassMethods)
5
+ end
6
+
7
+ module ClassMethods
8
+ def inheritable_attributes(*args)
9
+ @inheritable_attributes ||= [:inheritable_attributes]
10
+ @inheritable_attributes += args
11
+ args.each do |arg|
12
+ class_eval %(
13
+ class << self; attr_accessor :#{arg} end
14
+ )
15
+ end
16
+ @inheritable_attributes
17
+ end
18
+
19
+ def inherited(subclass)
20
+ @inheritable_attributes.each do |inheritable_attribute|
21
+ instance_var = "@#{inheritable_attribute}"
22
+ subclass.instance_variable_set(instance_var, instance_variable_get(instance_var))
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,59 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+
4
+ module AddressStandardization
5
+ module GoogleMaps
6
+ class << self
7
+ attr_accessor :api_key
8
+ end
9
+
10
+ class Address < AbstractAddress
11
+ self.valid_keys = %w(street city state province postalcode zip country full_address precision)
12
+
13
+ class << self
14
+ # much of this code was borrowed from GeoKit, thanks...
15
+ def standardize(address_info)
16
+ raise "API key not specified.\nCall AddressStandardization::GoogleMaps.api_key = '...' before you call .standardize()." unless GoogleMaps.api_key
17
+
18
+ address_str = "%s, %s, %s %s" % [
19
+ address_info[:street],
20
+ address_info[:city],
21
+ (address_info[:state] || address_info[:province]),
22
+ address_info[:zip]
23
+ ]
24
+ url = "http://maps.google.com/maps/geo?q=#{address_str.url_escape}&output=xml&key=#{GoogleMaps.api_key}&oe=utf-8"
25
+ # puts url
26
+ uri = URI.parse(url)
27
+ res = Net::HTTP.get_response(uri)
28
+ unless res.is_a?(Net::HTTPSuccess)
29
+ File.open("test.xml", "w") {|f| f.write("(no response or response was unsuccessful)") }
30
+ return nil
31
+ end
32
+ xml = res.body
33
+ #File.open("test.xml", "w") {|f| f.write(xml) }
34
+ xml = Hpricot::XML(xml)
35
+
36
+ if xml.at("//kml/Response/Status/code").inner_text == "200"
37
+ addr = {}
38
+
39
+ addr[:street] = get_inner_text(xml, '//ThoroughfareName')
40
+ addr[:city] = get_inner_text(xml, '//LocalityName')
41
+ addr[:province] = addr[:state] = get_inner_text(xml, '//AdministrativeAreaName')
42
+ addr[:zip] = addr[:postalcode] = get_inner_text(xml, '//PostalCodeNumber')
43
+ addr[:country] = get_inner_text(xml, '//CountryName')
44
+
45
+ new(addr)
46
+ else
47
+ #File.open("test.xml", "w") {|f| f.write("(no response or response was unsuccessful)") }
48
+ nil
49
+ end
50
+ end
51
+
52
+ private
53
+ def get_inner_text(xml, xpath)
54
+ lambda {|x| x && x.inner_text.upcase }.call(xml.at(xpath))
55
+ end
56
+ end
57
+ end # Address
58
+ end # GoogleMaps
59
+ end # AddressStandardization
@@ -0,0 +1,68 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ module AddressStandardization
5
+ class MelissaData
6
+ class BaseAddress < AbstractAddress
7
+ inheritable_attributes :start_url
8
+
9
+ def initialize(address_info)
10
+ raise NotImplementedError, "You must define start_url" unless self.class.start_url
11
+ super(address_info)
12
+ end
13
+
14
+ class << self
15
+ protected
16
+ def standardize(address_info, action, attrs_to_fields)
17
+ is_canada = (action =~ /Canadian/)
18
+ addr = new(address_info)
19
+ fields = nil
20
+ WWW::Mechanize.new do |ua|
21
+ form_page = ua.get(start_url)
22
+ form = form_page.form_with(:action => action) do |form|
23
+ attrs_to_fields.each do |attr, field|
24
+ form[field] = addr.send(attr)
25
+ end
26
+ end
27
+ results_page = form.submit(form.buttons.first)
28
+
29
+ table = results_page.search("form")[2].next_sibling.next_sibling
30
+ status_th = table.search(is_canada ? "td" : "th")[0]
31
+ return unless status_th && status_th.inner_text =~ /Address Verified/
32
+ main_td = table.search("tr:eq(#{is_canada ? 1 : 2})/td:eq(1)")
33
+ strongs = main_td.search("strong")
34
+ street = strongs.first.inner_text
35
+ strongs[1].search("a:gt(0)").remove
36
+ city, state, zip = strongs[1].inner_html.strip_newlines.strip_html.split(/(?:&\w+;)+/)
37
+ fields = [ street.upcase, city.upcase, state.upcase, zip.upcase ]
38
+ end
39
+ fields
40
+ end
41
+ end
42
+ end
43
+
44
+ class USAddress < BaseAddress
45
+ self.start_url = 'http://www.melissadata.com/lookups/AddressVerify.asp'
46
+ self.valid_keys = %w(street city state zip)
47
+
48
+ def self.standardize(address_info)
49
+ if fields = super(address_info, "AddressVerify.asp", :street => 'Address', :city => 'city', :state => 'state', :zip => 'zip')
50
+ street, city, state, zip = fields
51
+ new(:street => street, :city => city, :state => state, :zip => zip)
52
+ end
53
+ end
54
+ end
55
+
56
+ class CanadianAddress < BaseAddress
57
+ self.start_url = 'http://www.melissadata.com/lookups/CanadianAddressVerify.asp'
58
+ self.valid_keys = %w(street city province postalcode)
59
+
60
+ def self.standardize(address_info)
61
+ if fields = super(address_info, "CanadianAddressVerify.asp", :street => 'Street', :city => 'city', :province => 'Province', :postalcode => 'Postcode')
62
+ street, city, province, postalcode = fields
63
+ new(:street => street, :city => city, :province => province, :postalcode => postalcode)
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,17 @@
1
+ class String
2
+ def strip_html
3
+ gsub(/<\/?([^>]+)>/, '')
4
+ end
5
+ def strip_newlines
6
+ gsub(/[\r\n]+/, '')
7
+ end
8
+ def strip_whitespace
9
+ strip_newlines.squeeze(" ").strip
10
+ end
11
+
12
+ def url_escape
13
+ gsub(/([^ a-zA-Z0-9_.-]+)/n) do
14
+ '%' + $1.unpack('H2' * $1.size).join('%').upcase
15
+ end.tr(' ', '+')
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ module AddressStandardization
2
+ module Version
3
+
4
+ MAJOR = 0
5
+ MINOR = 1
6
+ TINY = 0
7
+
8
+ def self.to_s # :nodoc:
9
+ [MAJOR, MINOR, TINY].join('.')
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,48 @@
1
+ require 'test_helper'
2
+
3
+ class GoogleMapsTest < Test::Unit::TestCase
4
+ before do
5
+ AddressStandardization::GoogleMaps.api_key = "ABQIAAAALHg3jKnK9wN9K3_ArJA6TxSTZ2OgdK08l2h0_gdsozNQ-6zpaxQvIY84J7Mh1fAHQrYGI4W27qKZaw"
6
+ end
7
+
8
+ test "A valid US address" do
9
+ addr = AddressStandardization::GoogleMaps::Address.standardize(
10
+ :street => "1600 Amphitheatre Parkway",
11
+ :city => "Mountain View",
12
+ :state => "CA"
13
+ )
14
+ addr.should == AddressStandardization::GoogleMaps::Address.new(
15
+ "street" => "1600 AMPHITHEATRE PKWY",
16
+ "city" => "MOUNTAIN VIEW",
17
+ "state" => "CA",
18
+ "province" => "CA",
19
+ "postalcode" => "94043",
20
+ "country" => "USA"
21
+ )
22
+ end
23
+
24
+ test "A valid Canadian address" do
25
+ addr = AddressStandardization::GoogleMaps::Address.standardize(
26
+ :street => "1770 Stenson Boulevard",
27
+ :city => "Peterborough",
28
+ :province => "ON"
29
+ )
30
+ addr.should == AddressStandardization::GoogleMaps::Address.new(
31
+ "street" => "1770 STENSON BLVD",
32
+ "city" => "PETERBOROUGH",
33
+ "state" => "ON",
34
+ "province" => "ON",
35
+ "postalcode" => "K9K",
36
+ "country" => "CANADA"
37
+ )
38
+ end
39
+
40
+ test "An invalid address" do
41
+ addr = AddressStandardization::GoogleMaps::Address.standardize(
42
+ :street => "123 Imaginary Lane",
43
+ :city => "Some Town",
44
+ :state => "AK"
45
+ )
46
+ addr.should == nil
47
+ end
48
+ end
@@ -0,0 +1,49 @@
1
+ require 'test_helper'
2
+
3
+ class MelissaDataTest < Test::Unit::TestCase
4
+ test "Valid USA address" do
5
+ addr = AddressStandardization::MelissaData::USAddress.standardize(
6
+ :street => "1 Infinite Loop",
7
+ :city => "Cupertino",
8
+ :state => "CA"
9
+ )
10
+ addr.should == AddressStandardization::MelissaData::USAddress.new(
11
+ "street" => "1 INFINITE LOOP",
12
+ "city" => "CUPERTINO",
13
+ "state" => "CA",
14
+ "zip" => "95014-2083"
15
+ )
16
+ end
17
+
18
+ test "Invalid USA address" do
19
+ addr = AddressStandardization::MelissaData::USAddress.standardize(
20
+ :street => "123 Imaginary Lane",
21
+ :city => "Some Town",
22
+ :state => "AK"
23
+ )
24
+ addr.should == nil
25
+ end
26
+
27
+ test "Valid Canadian address" do
28
+ addr = AddressStandardization::MelissaData::CanadianAddress.standardize(
29
+ :street => "103 Metig St",
30
+ :city => "Sault Ste Marie",
31
+ :province => "ON"
32
+ )
33
+ addr.should == AddressStandardization::MelissaData::CanadianAddress.new(
34
+ "street" => "103 METIG ST RR 4",
35
+ "province" => "ON",
36
+ "city" => "SAULT STE MARIE",
37
+ "postalcode" => "P6A 5K9"
38
+ )
39
+ end
40
+
41
+ test "Invalid Canadian address" do
42
+ addr = AddressStandardization::MelissaData::CanadianAddress.standardize(
43
+ :street => "123 Imaginary Lane",
44
+ :city => "Some Town",
45
+ :province => "BC"
46
+ )
47
+ addr.should == nil
48
+ end
49
+ end
@@ -0,0 +1,12 @@
1
+ # http://sneaq.net/textmate-wtf
2
+ $LOAD_PATH.reject! { |e| e.include? 'TextMate' }
3
+
4
+ dir = File.dirname(__FILE__)
5
+ lib = dir + "/../lib"
6
+ $LOAD_PATH.unshift(lib)
7
+
8
+ require 'rubygems'
9
+ require 'context'
10
+ require 'matchy'
11
+
12
+ require 'address_standardization'
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mcmire-address_standardization
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Elliot Winkler
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-05 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: mechanize
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0"
23
+ version:
24
+ description:
25
+ email: elliot.winkler@gmail.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files:
31
+ - README.rdoc
32
+ files:
33
+ - README.rdoc
34
+ - Rakefile
35
+ - lib/address_standardization
36
+ - lib/address_standardization/abstract_address.rb
37
+ - lib/address_standardization/class_level_inheritable_attributes.rb
38
+ - lib/address_standardization/google_maps.rb
39
+ - lib/address_standardization/melissa_data.rb
40
+ - lib/address_standardization/ruby_ext.rb
41
+ - lib/address_standardization/version.rb
42
+ - lib/address_standardization.rb
43
+ - test/google_maps_test.rb
44
+ - test/melissa_data_test.rb
45
+ - test/test.xml
46
+ - test/test_helper.rb
47
+ has_rdoc: true
48
+ homepage: http://github.com/mcmire/address_standardization
49
+ post_install_message:
50
+ rdoc_options:
51
+ - --main
52
+ - README.rdoc
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ version:
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.2.0
71
+ signing_key:
72
+ specification_version: 2
73
+ summary: A tiny Ruby library to quickly standardize a postal address
74
+ test_files: []
75
+