normalic 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest +6 -0
- data/README.rdoc +15 -0
- data/Rakefile +15 -0
- data/lib/constants.rb +42337 -0
- data/lib/normalic.rb +131 -0
- data/normalic.gemspec +29 -0
- data/spec/normalic_spec.rb +78 -0
- metadata +79 -0
data/lib/normalic.rb
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
#only handles U.S addresses
|
2
|
+
require 'constants'
|
3
|
+
|
4
|
+
module Normalic
|
5
|
+
class Address
|
6
|
+
|
7
|
+
attr_accessor :number, :direction, :street, :type, :city, :state, :zipcode
|
8
|
+
|
9
|
+
def initialize(fields={})
|
10
|
+
@number = fields[:number]
|
11
|
+
@direction = fields[:direction]
|
12
|
+
@street = fields[:street]
|
13
|
+
@type = fields[:type]
|
14
|
+
@city = fields[:city]
|
15
|
+
@state = fields[:state]
|
16
|
+
@zipcode = fields[:zipcode]
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.titlize(str)
|
20
|
+
if str
|
21
|
+
str.gsub(/\w+/){|w| w.capitalize}
|
22
|
+
else
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def [](field_name)
|
28
|
+
begin
|
29
|
+
self.send(field_name.to_s)
|
30
|
+
rescue NoMethodError => e
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def []=(field_name, value)
|
36
|
+
begin
|
37
|
+
self.send("#{field_name}=", value)
|
38
|
+
rescue NoMethodError => e
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_s
|
44
|
+
#"#{line1},#{" #{city.gsub(/\w+/){|w| w.capitalize}}," if city}#{" #{state.upcase}" if state}#{" " + zipcode if zipcode}".strip
|
45
|
+
"#{line1}#{", #{city}" if city}#{", #{state}" if state}#{" " + zipcode if zipcode}".strip
|
46
|
+
#"#{line1}, #{city}, #{state} #{zipcode}"
|
47
|
+
end
|
48
|
+
|
49
|
+
def line1
|
50
|
+
#"#{number}#{" " + direction.upcase if direction}#{" " + street.gsub(/\w+/){|w| w.capitalize} if street}#{" " + type.capitalize if type}".strip
|
51
|
+
"#{number}#{" " + direction if direction}#{" " + street if street}#{" " + type if type}"
|
52
|
+
end
|
53
|
+
|
54
|
+
#Iteratively take chunks off of the string.
|
55
|
+
def self.parse(address)
|
56
|
+
address.strip!
|
57
|
+
regex = {
|
58
|
+
:unit => /(((\#?\w*)?\W*(su?i?te|p\W*[om]\W*b(?:ox)?|dept|department|ro*m|floor|fl|apt|apartment|unit|box))$)|(\W((su?i?te|p\W*[om]\W*b(?:ox)?|dept|department|ro*m|floor|fl|apt|apartment|unit|box)\W*(\#?\w*)?)\W{0,3}$)/i,
|
59
|
+
:direct => Regexp.new(Directional.keys * '|' + '|' + Directional.values * '\.?|',Regexp::IGNORECASE),
|
60
|
+
:type => Regexp.new('(' + StreetTypes_list * '|' + ')\\W*?$',Regexp::IGNORECASE),
|
61
|
+
:number => /\d+-?\d*/,
|
62
|
+
:fraction => /\d+\/\d+/,
|
63
|
+
:country => /\W+USA$/,
|
64
|
+
:zipcode => /\W+(\d{5}|\d{5}-\d{4})$/,
|
65
|
+
:state => Regexp.new('\W+(' + StateCodes.values * '|' + '|' + StateCodes.keys * '|' + ')$',Regexp::IGNORECASE),
|
66
|
+
}
|
67
|
+
regex[:street] = Regexp.new('((' + regex[:direct].source + ')\\W)?\\W*(.*)\\W*(' + regex[:type].source + ')?', Regexp::IGNORECASE)
|
68
|
+
|
69
|
+
#get rid of USA at the end
|
70
|
+
country_code = address[regex[:country]]
|
71
|
+
address.gsub!(regex[:country], "")
|
72
|
+
zipcode = address[regex[:zipcode]]
|
73
|
+
address.gsub!(regex[:zipcode], "")
|
74
|
+
zipcode.gsub!(/\W/, "") if zipcode
|
75
|
+
|
76
|
+
state = address[regex[:state]]
|
77
|
+
address.gsub!(regex[:state], "")
|
78
|
+
state.gsub!(/(^\W*|\W*$)/, "").downcase! if state
|
79
|
+
state = StateCodes[state] || state
|
80
|
+
|
81
|
+
if ZipCityMap[zipcode]
|
82
|
+
regex[:city] = Regexp.new("\\W+" + ZipCityMap[zipcode] + "$", Regexp::IGNORECASE)
|
83
|
+
regex[:city] = /,.*$/ if !address[regex[:city]]
|
84
|
+
city = ZipCityMap[zipcode]
|
85
|
+
else
|
86
|
+
regex[:city] = /,.*$/
|
87
|
+
city = address[regex[:city]]
|
88
|
+
city.gsub!(/(^\W*|\W*$)/, "").downcase! if city
|
89
|
+
end
|
90
|
+
|
91
|
+
address.gsub!(regex[:city], "")
|
92
|
+
address.gsub!(regex[:unit], "")
|
93
|
+
address.gsub!(Regexp.new('\W(' + regex[:direct].source + ')\\W{0,3}$', Regexp::IGNORECASE), "")
|
94
|
+
|
95
|
+
type = address[regex[:type]]
|
96
|
+
address.gsub!(regex[:type], "")
|
97
|
+
type.gsub!(/(^\W*|\W*$)/, "").downcase! if type
|
98
|
+
type = StreetTypes[type] || type if type
|
99
|
+
|
100
|
+
if address =~ /(\Wand\W|\W\&\W)/
|
101
|
+
#intersections. print as is
|
102
|
+
address.gsub!(/(\Wand\W|\W\&\W)/, " and ")
|
103
|
+
arr = ["", address, "", ""]
|
104
|
+
else
|
105
|
+
regex[:address] = Regexp.new('^\W*(' + regex[:number].source + '\\W)?\W*(?:' + regex[:fraction].source + '\W*)?' + regex[:street].source, Regexp::IGNORECASE)
|
106
|
+
arr = regex[:address].match(address).to_a
|
107
|
+
end
|
108
|
+
|
109
|
+
number = arr[1].strip if arr[1]
|
110
|
+
if arr[2] && (!arr[4] || arr[4].empty?)
|
111
|
+
street = arr[2].strip.downcase
|
112
|
+
else
|
113
|
+
dir = Directional[arr[2].strip.downcase] || arr[2].strip.downcase if arr[2]
|
114
|
+
dir.gsub!(/\W/, "") if dir
|
115
|
+
end
|
116
|
+
street = arr[4].strip.downcase if arr[4] && !street
|
117
|
+
|
118
|
+
self.new(
|
119
|
+
{
|
120
|
+
:number => number,
|
121
|
+
:direction => dir ? dir.upcase : nil,
|
122
|
+
:street => titlize(street),
|
123
|
+
:type => titlize(type),
|
124
|
+
:city => titlize(city),
|
125
|
+
:state => state ? state.upcase : nil,
|
126
|
+
:zipcode => zipcode
|
127
|
+
}
|
128
|
+
)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
data/normalic.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{normalic}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = [%q{Eric Tang}]
|
9
|
+
s.date = %q{2011-07-31}
|
10
|
+
s.description = %q{Normalize U.S addresses}
|
11
|
+
s.email = %q{eric.x.tang@gmail.com}
|
12
|
+
s.extra_rdoc_files = [%q{README.rdoc}, %q{lib/constants.rb}, %q{lib/normalic.rb}]
|
13
|
+
s.files = [%q{README.rdoc}, %q{Rakefile}, %q{lib/constants.rb}, %q{lib/normalic.rb}, %q{spec/normalic_spec.rb}, %q{Manifest}, %q{normalic.gemspec}]
|
14
|
+
s.homepage = %q{http://github.com/ericxtang/normalic}
|
15
|
+
s.rdoc_options = [%q{--line-numbers}, %q{--inline-source}, %q{--title}, %q{Normalic}, %q{--main}, %q{README.rdoc}]
|
16
|
+
s.require_paths = [%q{lib}]
|
17
|
+
s.rubyforge_project = %q{normalic}
|
18
|
+
s.rubygems_version = %q{1.8.6}
|
19
|
+
s.summary = %q{Normalize U.S addresses}
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
s.specification_version = 3
|
23
|
+
|
24
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
25
|
+
else
|
26
|
+
end
|
27
|
+
else
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'lib/normalic'
|
2
|
+
|
3
|
+
describe "Normalic test" do
|
4
|
+
|
5
|
+
it "should parse an address with unit(floor) information" do
|
6
|
+
addr = Normalic::Address.parse("201 Varick St. floor 12th, New York, NY 10014")
|
7
|
+
addr[:number].should == "201"
|
8
|
+
addr[:street].should == "varick"
|
9
|
+
addr[:type].should == "st"
|
10
|
+
addr[:city].should == "new york"
|
11
|
+
addr[:state].should == "ny"
|
12
|
+
addr[:zipcode].should == "10014"
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should parse an address with direction information" do
|
16
|
+
addr = Normalic::Address.parse("167 West 4th Street, New York, NY 10014")
|
17
|
+
addr[:number].should == "167"
|
18
|
+
addr[:street].should == "4th"
|
19
|
+
addr[:direction].should == "w"
|
20
|
+
addr[:type].should == "st"
|
21
|
+
addr[:city].should == "new york"
|
22
|
+
addr[:state].should == "ny"
|
23
|
+
addr[:zipcode].should == "10014"
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should parse an address with incorrect state info" do
|
27
|
+
addr = Normalic::Address.parse("871 Washington Street, New York, NewYork 10014")
|
28
|
+
addr[:number].should == "871"
|
29
|
+
addr[:street].should == "washington"
|
30
|
+
addr[:type].should == "st"
|
31
|
+
addr[:city].should == "new york"
|
32
|
+
addr[:state].should == "ny"
|
33
|
+
addr[:zipcode].should == "10014"
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should parse an address with floor info and without city info" do
|
37
|
+
addr = Normalic::Address.parse("201 Varick St. floor 12th")
|
38
|
+
addr[:number].should == "201"
|
39
|
+
addr[:street].should == "varick"
|
40
|
+
addr[:type].should == "st"
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should parse an address with no city info" do
|
44
|
+
addr = Normalic::Address.parse("871 Washington Street")
|
45
|
+
addr[:number].should == "871"
|
46
|
+
addr[:street].should == "washington"
|
47
|
+
addr[:type].should == "st"
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
it "should parse an address with direction info and no city info" do
|
52
|
+
addr = Normalic::Address.parse("871 West Washington Street")
|
53
|
+
addr[:number].should == "871"
|
54
|
+
addr[:street].should == "washington"
|
55
|
+
addr[:direction].should == "w"
|
56
|
+
addr[:type].should == "st"
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should use dot notation" do
|
60
|
+
addr = Normalic::Address.parse("871 West Washington Street")
|
61
|
+
addr.number.should == "871"
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should return nil if a bad field is passed in" do
|
65
|
+
addr = Normalic::Address.parse("871 West Washington Street")
|
66
|
+
addr[:bad_name].should == nil
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should return a line1" do
|
70
|
+
addr = Normalic::Address.parse("871 West Washington Street")
|
71
|
+
addr.line1.should == "871 W Washington St"
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should have a to_s method" do
|
75
|
+
addr = Normalic::Address.parse("167 West 4th Street, New York, NY 10014")
|
76
|
+
addr.to_s.should == "167 W 4th St, New York, NY 10014"
|
77
|
+
end
|
78
|
+
end
|
metadata
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: normalic
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Eric Tang
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-07-31 00:00:00 Z
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: Normalize U.S addresses
|
22
|
+
email: eric.x.tang@gmail.com
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README.rdoc
|
29
|
+
- lib/constants.rb
|
30
|
+
- lib/normalic.rb
|
31
|
+
files:
|
32
|
+
- README.rdoc
|
33
|
+
- Rakefile
|
34
|
+
- lib/constants.rb
|
35
|
+
- lib/normalic.rb
|
36
|
+
- spec/normalic_spec.rb
|
37
|
+
- Manifest
|
38
|
+
- normalic.gemspec
|
39
|
+
homepage: http://github.com/ericxtang/normalic
|
40
|
+
licenses: []
|
41
|
+
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options:
|
44
|
+
- --line-numbers
|
45
|
+
- --inline-source
|
46
|
+
- --title
|
47
|
+
- Normalic
|
48
|
+
- --main
|
49
|
+
- README.rdoc
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
hash: 11
|
67
|
+
segments:
|
68
|
+
- 1
|
69
|
+
- 2
|
70
|
+
version: "1.2"
|
71
|
+
requirements: []
|
72
|
+
|
73
|
+
rubyforge_project: normalic
|
74
|
+
rubygems_version: 1.8.10
|
75
|
+
signing_key:
|
76
|
+
specification_version: 3
|
77
|
+
summary: Normalize U.S addresses
|
78
|
+
test_files: []
|
79
|
+
|