normalic 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,131 @@
1
+ #only handles U.S addresses
2
+ require 'constants'
3
+
4
+ module Normalic
5
+ class Address
6
+
7
+ attr_accessor :number, :direction, :street, :type, :city, :state, :zipcode
8
+
9
+ def initialize(fields={})
10
+ @number = fields[:number]
11
+ @direction = fields[:direction]
12
+ @street = fields[:street]
13
+ @type = fields[:type]
14
+ @city = fields[:city]
15
+ @state = fields[:state]
16
+ @zipcode = fields[:zipcode]
17
+ end
18
+
19
+ def self.titlize(str)
20
+ if str
21
+ str.gsub(/\w+/){|w| w.capitalize}
22
+ else
23
+ nil
24
+ end
25
+ end
26
+
27
+ def [](field_name)
28
+ begin
29
+ self.send(field_name.to_s)
30
+ rescue NoMethodError => e
31
+ nil
32
+ end
33
+ end
34
+
35
+ def []=(field_name, value)
36
+ begin
37
+ self.send("#{field_name}=", value)
38
+ rescue NoMethodError => e
39
+ nil
40
+ end
41
+ end
42
+
43
+ def to_s
44
+ #"#{line1},#{" #{city.gsub(/\w+/){|w| w.capitalize}}," if city}#{" #{state.upcase}" if state}#{" " + zipcode if zipcode}".strip
45
+ "#{line1}#{", #{city}" if city}#{", #{state}" if state}#{" " + zipcode if zipcode}".strip
46
+ #"#{line1}, #{city}, #{state} #{zipcode}"
47
+ end
48
+
49
+ def line1
50
+ #"#{number}#{" " + direction.upcase if direction}#{" " + street.gsub(/\w+/){|w| w.capitalize} if street}#{" " + type.capitalize if type}".strip
51
+ "#{number}#{" " + direction if direction}#{" " + street if street}#{" " + type if type}"
52
+ end
53
+
54
+ #Iteratively take chunks off of the string.
55
+ def self.parse(address)
56
+ address.strip!
57
+ regex = {
58
+ :unit => /(((\#?\w*)?\W*(su?i?te|p\W*[om]\W*b(?:ox)?|dept|department|ro*m|floor|fl|apt|apartment|unit|box))$)|(\W((su?i?te|p\W*[om]\W*b(?:ox)?|dept|department|ro*m|floor|fl|apt|apartment|unit|box)\W*(\#?\w*)?)\W{0,3}$)/i,
59
+ :direct => Regexp.new(Directional.keys * '|' + '|' + Directional.values * '\.?|',Regexp::IGNORECASE),
60
+ :type => Regexp.new('(' + StreetTypes_list * '|' + ')\\W*?$',Regexp::IGNORECASE),
61
+ :number => /\d+-?\d*/,
62
+ :fraction => /\d+\/\d+/,
63
+ :country => /\W+USA$/,
64
+ :zipcode => /\W+(\d{5}|\d{5}-\d{4})$/,
65
+ :state => Regexp.new('\W+(' + StateCodes.values * '|' + '|' + StateCodes.keys * '|' + ')$',Regexp::IGNORECASE),
66
+ }
67
+ regex[:street] = Regexp.new('((' + regex[:direct].source + ')\\W)?\\W*(.*)\\W*(' + regex[:type].source + ')?', Regexp::IGNORECASE)
68
+
69
+ #get rid of USA at the end
70
+ country_code = address[regex[:country]]
71
+ address.gsub!(regex[:country], "")
72
+ zipcode = address[regex[:zipcode]]
73
+ address.gsub!(regex[:zipcode], "")
74
+ zipcode.gsub!(/\W/, "") if zipcode
75
+
76
+ state = address[regex[:state]]
77
+ address.gsub!(regex[:state], "")
78
+ state.gsub!(/(^\W*|\W*$)/, "").downcase! if state
79
+ state = StateCodes[state] || state
80
+
81
+ if ZipCityMap[zipcode]
82
+ regex[:city] = Regexp.new("\\W+" + ZipCityMap[zipcode] + "$", Regexp::IGNORECASE)
83
+ regex[:city] = /,.*$/ if !address[regex[:city]]
84
+ city = ZipCityMap[zipcode]
85
+ else
86
+ regex[:city] = /,.*$/
87
+ city = address[regex[:city]]
88
+ city.gsub!(/(^\W*|\W*$)/, "").downcase! if city
89
+ end
90
+
91
+ address.gsub!(regex[:city], "")
92
+ address.gsub!(regex[:unit], "")
93
+ address.gsub!(Regexp.new('\W(' + regex[:direct].source + ')\\W{0,3}$', Regexp::IGNORECASE), "")
94
+
95
+ type = address[regex[:type]]
96
+ address.gsub!(regex[:type], "")
97
+ type.gsub!(/(^\W*|\W*$)/, "").downcase! if type
98
+ type = StreetTypes[type] || type if type
99
+
100
+ if address =~ /(\Wand\W|\W\&\W)/
101
+ #intersections. print as is
102
+ address.gsub!(/(\Wand\W|\W\&\W)/, " and ")
103
+ arr = ["", address, "", ""]
104
+ else
105
+ regex[:address] = Regexp.new('^\W*(' + regex[:number].source + '\\W)?\W*(?:' + regex[:fraction].source + '\W*)?' + regex[:street].source, Regexp::IGNORECASE)
106
+ arr = regex[:address].match(address).to_a
107
+ end
108
+
109
+ number = arr[1].strip if arr[1]
110
+ if arr[2] && (!arr[4] || arr[4].empty?)
111
+ street = arr[2].strip.downcase
112
+ else
113
+ dir = Directional[arr[2].strip.downcase] || arr[2].strip.downcase if arr[2]
114
+ dir.gsub!(/\W/, "") if dir
115
+ end
116
+ street = arr[4].strip.downcase if arr[4] && !street
117
+
118
+ self.new(
119
+ {
120
+ :number => number,
121
+ :direction => dir ? dir.upcase : nil,
122
+ :street => titlize(street),
123
+ :type => titlize(type),
124
+ :city => titlize(city),
125
+ :state => state ? state.upcase : nil,
126
+ :zipcode => zipcode
127
+ }
128
+ )
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{normalic}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = [%q{Eric Tang}]
9
+ s.date = %q{2011-07-31}
10
+ s.description = %q{Normalize U.S addresses}
11
+ s.email = %q{eric.x.tang@gmail.com}
12
+ s.extra_rdoc_files = [%q{README.rdoc}, %q{lib/constants.rb}, %q{lib/normalic.rb}]
13
+ s.files = [%q{README.rdoc}, %q{Rakefile}, %q{lib/constants.rb}, %q{lib/normalic.rb}, %q{spec/normalic_spec.rb}, %q{Manifest}, %q{normalic.gemspec}]
14
+ s.homepage = %q{http://github.com/ericxtang/normalic}
15
+ s.rdoc_options = [%q{--line-numbers}, %q{--inline-source}, %q{--title}, %q{Normalic}, %q{--main}, %q{README.rdoc}]
16
+ s.require_paths = [%q{lib}]
17
+ s.rubyforge_project = %q{normalic}
18
+ s.rubygems_version = %q{1.8.6}
19
+ s.summary = %q{Normalize U.S addresses}
20
+
21
+ if s.respond_to? :specification_version then
22
+ s.specification_version = 3
23
+
24
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
25
+ else
26
+ end
27
+ else
28
+ end
29
+ end
@@ -0,0 +1,78 @@
1
+ require 'lib/normalic'
2
+
3
+ describe "Normalic test" do
4
+
5
+ it "should parse an address with unit(floor) information" do
6
+ addr = Normalic::Address.parse("201 Varick St. floor 12th, New York, NY 10014")
7
+ addr[:number].should == "201"
8
+ addr[:street].should == "varick"
9
+ addr[:type].should == "st"
10
+ addr[:city].should == "new york"
11
+ addr[:state].should == "ny"
12
+ addr[:zipcode].should == "10014"
13
+ end
14
+
15
+ it "should parse an address with direction information" do
16
+ addr = Normalic::Address.parse("167 West 4th Street, New York, NY 10014")
17
+ addr[:number].should == "167"
18
+ addr[:street].should == "4th"
19
+ addr[:direction].should == "w"
20
+ addr[:type].should == "st"
21
+ addr[:city].should == "new york"
22
+ addr[:state].should == "ny"
23
+ addr[:zipcode].should == "10014"
24
+ end
25
+
26
+ it "should parse an address with incorrect state info" do
27
+ addr = Normalic::Address.parse("871 Washington Street, New York, NewYork 10014")
28
+ addr[:number].should == "871"
29
+ addr[:street].should == "washington"
30
+ addr[:type].should == "st"
31
+ addr[:city].should == "new york"
32
+ addr[:state].should == "ny"
33
+ addr[:zipcode].should == "10014"
34
+ end
35
+
36
+ it "should parse an address with floor info and without city info" do
37
+ addr = Normalic::Address.parse("201 Varick St. floor 12th")
38
+ addr[:number].should == "201"
39
+ addr[:street].should == "varick"
40
+ addr[:type].should == "st"
41
+ end
42
+
43
+ it "should parse an address with no city info" do
44
+ addr = Normalic::Address.parse("871 Washington Street")
45
+ addr[:number].should == "871"
46
+ addr[:street].should == "washington"
47
+ addr[:type].should == "st"
48
+ end
49
+
50
+
51
+ it "should parse an address with direction info and no city info" do
52
+ addr = Normalic::Address.parse("871 West Washington Street")
53
+ addr[:number].should == "871"
54
+ addr[:street].should == "washington"
55
+ addr[:direction].should == "w"
56
+ addr[:type].should == "st"
57
+ end
58
+
59
+ it "should use dot notation" do
60
+ addr = Normalic::Address.parse("871 West Washington Street")
61
+ addr.number.should == "871"
62
+ end
63
+
64
+ it "should return nil if a bad field is passed in" do
65
+ addr = Normalic::Address.parse("871 West Washington Street")
66
+ addr[:bad_name].should == nil
67
+ end
68
+
69
+ it "should return a line1" do
70
+ addr = Normalic::Address.parse("871 West Washington Street")
71
+ addr.line1.should == "871 W Washington St"
72
+ end
73
+
74
+ it "should have a to_s method" do
75
+ addr = Normalic::Address.parse("167 West 4th Street, New York, NY 10014")
76
+ addr.to_s.should == "167 W 4th St, New York, NY 10014"
77
+ end
78
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: normalic
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Eric Tang
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-07-31 00:00:00 Z
19
+ dependencies: []
20
+
21
+ description: Normalize U.S addresses
22
+ email: eric.x.tang@gmail.com
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files:
28
+ - README.rdoc
29
+ - lib/constants.rb
30
+ - lib/normalic.rb
31
+ files:
32
+ - README.rdoc
33
+ - Rakefile
34
+ - lib/constants.rb
35
+ - lib/normalic.rb
36
+ - spec/normalic_spec.rb
37
+ - Manifest
38
+ - normalic.gemspec
39
+ homepage: http://github.com/ericxtang/normalic
40
+ licenses: []
41
+
42
+ post_install_message:
43
+ rdoc_options:
44
+ - --line-numbers
45
+ - --inline-source
46
+ - --title
47
+ - Normalic
48
+ - --main
49
+ - README.rdoc
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 11
67
+ segments:
68
+ - 1
69
+ - 2
70
+ version: "1.2"
71
+ requirements: []
72
+
73
+ rubyforge_project: normalic
74
+ rubygems_version: 1.8.10
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: Normalize U.S addresses
78
+ test_files: []
79
+