normalic 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,131 @@
1
+ #only handles U.S addresses
2
+ require 'constants'
3
+
4
+ module Normalic
5
+ class Address
6
+
7
+ attr_accessor :number, :direction, :street, :type, :city, :state, :zipcode
8
+
9
+ def initialize(fields={})
10
+ @number = fields[:number]
11
+ @direction = fields[:direction]
12
+ @street = fields[:street]
13
+ @type = fields[:type]
14
+ @city = fields[:city]
15
+ @state = fields[:state]
16
+ @zipcode = fields[:zipcode]
17
+ end
18
+
19
+ def self.titlize(str)
20
+ if str
21
+ str.gsub(/\w+/){|w| w.capitalize}
22
+ else
23
+ nil
24
+ end
25
+ end
26
+
27
+ def [](field_name)
28
+ begin
29
+ self.send(field_name.to_s)
30
+ rescue NoMethodError => e
31
+ nil
32
+ end
33
+ end
34
+
35
+ def []=(field_name, value)
36
+ begin
37
+ self.send("#{field_name}=", value)
38
+ rescue NoMethodError => e
39
+ nil
40
+ end
41
+ end
42
+
43
+ def to_s
44
+ #"#{line1},#{" #{city.gsub(/\w+/){|w| w.capitalize}}," if city}#{" #{state.upcase}" if state}#{" " + zipcode if zipcode}".strip
45
+ "#{line1}#{", #{city}" if city}#{", #{state}" if state}#{" " + zipcode if zipcode}".strip
46
+ #"#{line1}, #{city}, #{state} #{zipcode}"
47
+ end
48
+
49
+ def line1
50
+ #"#{number}#{" " + direction.upcase if direction}#{" " + street.gsub(/\w+/){|w| w.capitalize} if street}#{" " + type.capitalize if type}".strip
51
+ "#{number}#{" " + direction if direction}#{" " + street if street}#{" " + type if type}"
52
+ end
53
+
54
+ #Iteratively take chunks off of the string.
55
+ def self.parse(address)
56
+ address.strip!
57
+ regex = {
58
+ :unit => /(((\#?\w*)?\W*(su?i?te|p\W*[om]\W*b(?:ox)?|dept|department|ro*m|floor|fl|apt|apartment|unit|box))$)|(\W((su?i?te|p\W*[om]\W*b(?:ox)?|dept|department|ro*m|floor|fl|apt|apartment|unit|box)\W*(\#?\w*)?)\W{0,3}$)/i,
59
+ :direct => Regexp.new(Directional.keys * '|' + '|' + Directional.values * '\.?|',Regexp::IGNORECASE),
60
+ :type => Regexp.new('(' + StreetTypes_list * '|' + ')\\W*?$',Regexp::IGNORECASE),
61
+ :number => /\d+-?\d*/,
62
+ :fraction => /\d+\/\d+/,
63
+ :country => /\W+USA$/,
64
+ :zipcode => /\W+(\d{5}|\d{5}-\d{4})$/,
65
+ :state => Regexp.new('\W+(' + StateCodes.values * '|' + '|' + StateCodes.keys * '|' + ')$',Regexp::IGNORECASE),
66
+ }
67
+ regex[:street] = Regexp.new('((' + regex[:direct].source + ')\\W)?\\W*(.*)\\W*(' + regex[:type].source + ')?', Regexp::IGNORECASE)
68
+
69
+ #get rid of USA at the end
70
+ country_code = address[regex[:country]]
71
+ address.gsub!(regex[:country], "")
72
+ zipcode = address[regex[:zipcode]]
73
+ address.gsub!(regex[:zipcode], "")
74
+ zipcode.gsub!(/\W/, "") if zipcode
75
+
76
+ state = address[regex[:state]]
77
+ address.gsub!(regex[:state], "")
78
+ state.gsub!(/(^\W*|\W*$)/, "").downcase! if state
79
+ state = StateCodes[state] || state
80
+
81
+ if ZipCityMap[zipcode]
82
+ regex[:city] = Regexp.new("\\W+" + ZipCityMap[zipcode] + "$", Regexp::IGNORECASE)
83
+ regex[:city] = /,.*$/ if !address[regex[:city]]
84
+ city = ZipCityMap[zipcode]
85
+ else
86
+ regex[:city] = /,.*$/
87
+ city = address[regex[:city]]
88
+ city.gsub!(/(^\W*|\W*$)/, "").downcase! if city
89
+ end
90
+
91
+ address.gsub!(regex[:city], "")
92
+ address.gsub!(regex[:unit], "")
93
+ address.gsub!(Regexp.new('\W(' + regex[:direct].source + ')\\W{0,3}$', Regexp::IGNORECASE), "")
94
+
95
+ type = address[regex[:type]]
96
+ address.gsub!(regex[:type], "")
97
+ type.gsub!(/(^\W*|\W*$)/, "").downcase! if type
98
+ type = StreetTypes[type] || type if type
99
+
100
+ if address =~ /(\Wand\W|\W\&\W)/
101
+ #intersections. print as is
102
+ address.gsub!(/(\Wand\W|\W\&\W)/, " and ")
103
+ arr = ["", address, "", ""]
104
+ else
105
+ regex[:address] = Regexp.new('^\W*(' + regex[:number].source + '\\W)?\W*(?:' + regex[:fraction].source + '\W*)?' + regex[:street].source, Regexp::IGNORECASE)
106
+ arr = regex[:address].match(address).to_a
107
+ end
108
+
109
+ number = arr[1].strip if arr[1]
110
+ if arr[2] && (!arr[4] || arr[4].empty?)
111
+ street = arr[2].strip.downcase
112
+ else
113
+ dir = Directional[arr[2].strip.downcase] || arr[2].strip.downcase if arr[2]
114
+ dir.gsub!(/\W/, "") if dir
115
+ end
116
+ street = arr[4].strip.downcase if arr[4] && !street
117
+
118
+ self.new(
119
+ {
120
+ :number => number,
121
+ :direction => dir ? dir.upcase : nil,
122
+ :street => titlize(street),
123
+ :type => titlize(type),
124
+ :city => titlize(city),
125
+ :state => state ? state.upcase : nil,
126
+ :zipcode => zipcode
127
+ }
128
+ )
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{normalic}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = [%q{Eric Tang}]
9
+ s.date = %q{2011-07-31}
10
+ s.description = %q{Normalize U.S addresses}
11
+ s.email = %q{eric.x.tang@gmail.com}
12
+ s.extra_rdoc_files = [%q{README.rdoc}, %q{lib/constants.rb}, %q{lib/normalic.rb}]
13
+ s.files = [%q{README.rdoc}, %q{Rakefile}, %q{lib/constants.rb}, %q{lib/normalic.rb}, %q{spec/normalic_spec.rb}, %q{Manifest}, %q{normalic.gemspec}]
14
+ s.homepage = %q{http://github.com/ericxtang/normalic}
15
+ s.rdoc_options = [%q{--line-numbers}, %q{--inline-source}, %q{--title}, %q{Normalic}, %q{--main}, %q{README.rdoc}]
16
+ s.require_paths = [%q{lib}]
17
+ s.rubyforge_project = %q{normalic}
18
+ s.rubygems_version = %q{1.8.6}
19
+ s.summary = %q{Normalize U.S addresses}
20
+
21
+ if s.respond_to? :specification_version then
22
+ s.specification_version = 3
23
+
24
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
25
+ else
26
+ end
27
+ else
28
+ end
29
+ end
@@ -0,0 +1,78 @@
1
+ require 'lib/normalic'
2
+
3
+ describe "Normalic test" do
4
+
5
+ it "should parse an address with unit(floor) information" do
6
+ addr = Normalic::Address.parse("201 Varick St. floor 12th, New York, NY 10014")
7
+ addr[:number].should == "201"
8
+ addr[:street].should == "varick"
9
+ addr[:type].should == "st"
10
+ addr[:city].should == "new york"
11
+ addr[:state].should == "ny"
12
+ addr[:zipcode].should == "10014"
13
+ end
14
+
15
+ it "should parse an address with direction information" do
16
+ addr = Normalic::Address.parse("167 West 4th Street, New York, NY 10014")
17
+ addr[:number].should == "167"
18
+ addr[:street].should == "4th"
19
+ addr[:direction].should == "w"
20
+ addr[:type].should == "st"
21
+ addr[:city].should == "new york"
22
+ addr[:state].should == "ny"
23
+ addr[:zipcode].should == "10014"
24
+ end
25
+
26
+ it "should parse an address with incorrect state info" do
27
+ addr = Normalic::Address.parse("871 Washington Street, New York, NewYork 10014")
28
+ addr[:number].should == "871"
29
+ addr[:street].should == "washington"
30
+ addr[:type].should == "st"
31
+ addr[:city].should == "new york"
32
+ addr[:state].should == "ny"
33
+ addr[:zipcode].should == "10014"
34
+ end
35
+
36
+ it "should parse an address with floor info and without city info" do
37
+ addr = Normalic::Address.parse("201 Varick St. floor 12th")
38
+ addr[:number].should == "201"
39
+ addr[:street].should == "varick"
40
+ addr[:type].should == "st"
41
+ end
42
+
43
+ it "should parse an address with no city info" do
44
+ addr = Normalic::Address.parse("871 Washington Street")
45
+ addr[:number].should == "871"
46
+ addr[:street].should == "washington"
47
+ addr[:type].should == "st"
48
+ end
49
+
50
+
51
+ it "should parse an address with direction info and no city info" do
52
+ addr = Normalic::Address.parse("871 West Washington Street")
53
+ addr[:number].should == "871"
54
+ addr[:street].should == "washington"
55
+ addr[:direction].should == "w"
56
+ addr[:type].should == "st"
57
+ end
58
+
59
+ it "should use dot notation" do
60
+ addr = Normalic::Address.parse("871 West Washington Street")
61
+ addr.number.should == "871"
62
+ end
63
+
64
+ it "should return nil if a bad field is passed in" do
65
+ addr = Normalic::Address.parse("871 West Washington Street")
66
+ addr[:bad_name].should == nil
67
+ end
68
+
69
+ it "should return a line1" do
70
+ addr = Normalic::Address.parse("871 West Washington Street")
71
+ addr.line1.should == "871 W Washington St"
72
+ end
73
+
74
+ it "should have a to_s method" do
75
+ addr = Normalic::Address.parse("167 West 4th Street, New York, NY 10014")
76
+ addr.to_s.should == "167 W 4th St, New York, NY 10014"
77
+ end
78
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: normalic
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Eric Tang
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-07-31 00:00:00 Z
19
+ dependencies: []
20
+
21
+ description: Normalize U.S addresses
22
+ email: eric.x.tang@gmail.com
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files:
28
+ - README.rdoc
29
+ - lib/constants.rb
30
+ - lib/normalic.rb
31
+ files:
32
+ - README.rdoc
33
+ - Rakefile
34
+ - lib/constants.rb
35
+ - lib/normalic.rb
36
+ - spec/normalic_spec.rb
37
+ - Manifest
38
+ - normalic.gemspec
39
+ homepage: http://github.com/ericxtang/normalic
40
+ licenses: []
41
+
42
+ post_install_message:
43
+ rdoc_options:
44
+ - --line-numbers
45
+ - --inline-source
46
+ - --title
47
+ - Normalic
48
+ - --main
49
+ - README.rdoc
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 11
67
+ segments:
68
+ - 1
69
+ - 2
70
+ version: "1.2"
71
+ requirements: []
72
+
73
+ rubyforge_project: normalic
74
+ rubygems_version: 1.8.10
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: Normalize U.S addresses
78
+ test_files: []
79
+