normalic 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/License.txt +8 -0
- data/Manifest +2 -0
- data/Rakefile +1 -1
- data/lib/normalic/address.rb +37 -23
- data/normalic.gemspec +3 -3
- data/spec/normalic_spec.rb +47 -0
- metadata +6 -5
data/License.txt
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
Copyright (c) 2011 Eric Tang (eric.x.tang@gmail.com)
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
4
|
+
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
8
|
+
|
data/Manifest
CHANGED
data/Rakefile
CHANGED
data/lib/normalic/address.rb
CHANGED
@@ -33,10 +33,24 @@ module Normalic
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def self.parse(raw)
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
clean = clean(raw)
|
37
|
+
tokens = tokenize(clean)
|
38
|
+
normd = normalize(tokens)
|
39
|
+
|
40
|
+
self.new(normd)
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.normalize_fields(fields)
|
44
|
+
clean_fields = Hash[*fields.collect do |(k, v)|
|
45
|
+
k2 = k.is_a?(Symbol) ? k : clean(k).gsub(/\W+/,'').to_sym
|
46
|
+
[k2, clean(v)]
|
47
|
+
end.flatten(1)]
|
48
|
+
if (address = clean_fields.delete(:address) ||
|
49
|
+
clean_fields.delete(:address_line1))
|
50
|
+
clean_fields.merge!(Hash[[:type, :street, :direction,
|
51
|
+
:number].zip(tokenize_street(address))])
|
52
|
+
end
|
53
|
+
normd = normalize(clean_fields)
|
40
54
|
|
41
55
|
self.new(normd)
|
42
56
|
end
|
@@ -100,8 +114,8 @@ module Normalic
|
|
100
114
|
end
|
101
115
|
end
|
102
116
|
|
103
|
-
def self.clean(
|
104
|
-
address =
|
117
|
+
def self.clean(raw)
|
118
|
+
address = raw.to_s.dup
|
105
119
|
|
106
120
|
address.downcase!
|
107
121
|
address.gsub!("\n",', ')
|
@@ -113,7 +127,7 @@ module Normalic
|
|
113
127
|
end
|
114
128
|
|
115
129
|
def self.tokenize(address)
|
116
|
-
address = address.
|
130
|
+
address = address.dup
|
117
131
|
|
118
132
|
address.detoken!(REGEXES[:country])
|
119
133
|
zipcode = address.detoken!(REGEXES[:zipcode])
|
@@ -134,15 +148,15 @@ module Normalic
|
|
134
148
|
|
135
149
|
if m = address.match(REGEXES[:intersection])
|
136
150
|
intersection = true
|
137
|
-
t1, s1, d1 =
|
138
|
-
t2, s2, d2 =
|
151
|
+
t1, s1, d1 = tokenize_street(m[1], false)
|
152
|
+
t2, s2, d2 = tokenize_street(m[3], false)
|
139
153
|
type = [t1, t2]
|
140
154
|
street = [s1, s2]
|
141
155
|
direction = [d1, d2]
|
142
156
|
number = nil
|
143
157
|
else
|
144
158
|
intersection = false
|
145
|
-
type, street, direction, number =
|
159
|
+
type, street, direction, number = tokenize_street(address)
|
146
160
|
end
|
147
161
|
|
148
162
|
{:zipcode => zipcode,
|
@@ -156,7 +170,7 @@ module Normalic
|
|
156
170
|
end
|
157
171
|
|
158
172
|
def self.tokenize_street(address, has_number=true)
|
159
|
-
address = address.
|
173
|
+
address = address.dup
|
160
174
|
|
161
175
|
number = has_number ? address.detoken_front!(REGEXES[:number]) : nil
|
162
176
|
direction = address.detoken_front!(REGEXES[:directional]) ||
|
@@ -173,18 +187,18 @@ module Normalic
|
|
173
187
|
def self.normalize(tokens)
|
174
188
|
tokens = tokens.clone
|
175
189
|
|
176
|
-
tokens[:zipcode] =
|
177
|
-
tokens[:state] =
|
178
|
-
tokens[:city] =
|
190
|
+
tokens[:zipcode] = normalize_zipcode(tokens[:zipcode])
|
191
|
+
tokens[:state] = normalize_state(tokens[:state], tokens[:zipcode])
|
192
|
+
tokens[:city] = normalize_city(tokens[:city], tokens[:zipcode])
|
179
193
|
|
180
194
|
if tokens[:intersection]
|
181
|
-
tokens[:type].collect! {|t|
|
182
|
-
tokens[:street].collect! {|s|
|
183
|
-
tokens[:direction].collect! {|d|
|
195
|
+
tokens[:type].collect! {|t| normalize_type(t)}
|
196
|
+
tokens[:street].collect! {|s| normalize_street(s)}
|
197
|
+
tokens[:direction].collect! {|d| normalize_direction(d)}
|
184
198
|
else
|
185
|
-
tokens[:type] =
|
186
|
-
tokens[:street] =
|
187
|
-
tokens[:direction] =
|
199
|
+
tokens[:type] = normalize_type(tokens[:type])
|
200
|
+
tokens[:street] = normalize_street(tokens[:street])
|
201
|
+
tokens[:direction] = normalize_direction(tokens[:direction])
|
188
202
|
end
|
189
203
|
|
190
204
|
tokens
|
@@ -208,20 +222,20 @@ module Normalic
|
|
208
222
|
|
209
223
|
def self.normalize_city(city, zipcode=nil)
|
210
224
|
city = ZIP_CITY_MAP[zipcode][:city] if zipcode && ZIP_CITY_MAP[zipcode]
|
211
|
-
city ?
|
225
|
+
city ? titlize(city) : nil
|
212
226
|
end
|
213
227
|
|
214
228
|
def self.normalize_type(type)
|
215
229
|
if type
|
216
230
|
type = STREET_TYPES[type] || type
|
217
|
-
|
231
|
+
titlize(type) + '.'
|
218
232
|
else
|
219
233
|
nil
|
220
234
|
end
|
221
235
|
end
|
222
236
|
|
223
237
|
def self.normalize_street(street)
|
224
|
-
street ?
|
238
|
+
street ? titlize(street) : nil
|
225
239
|
end
|
226
240
|
|
227
241
|
def self.normalize_direction(direction)
|
data/normalic.gemspec
CHANGED
@@ -2,15 +2,15 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "normalic"
|
5
|
-
s.version = "0.1.
|
5
|
+
s.version = "0.1.2"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Eric Tang"]
|
9
|
-
s.date = "2011-
|
9
|
+
s.date = "2011-11-02"
|
10
10
|
s.description = "Normalize U.S addresses"
|
11
11
|
s.email = "eric.x.tang@gmail.com"
|
12
12
|
s.extra_rdoc_files = ["README.rdoc", "lib/constants.rb", "lib/normalic.rb", "lib/normalic/address.rb", "lib/normalic/phone_number.rb", "lib/normalic/uri.rb"]
|
13
|
-
s.files = ["Manifest", "README.rdoc", "Rakefile", "lib/constants.rb", "lib/normalic.rb", "lib/normalic/address.rb", "lib/normalic/phone_number.rb", "lib/normalic/uri.rb", "
|
13
|
+
s.files = ["License.txt", "Manifest", "README.rdoc", "Rakefile", "lib/constants.rb", "lib/normalic.rb", "lib/normalic/address.rb", "lib/normalic/phone_number.rb", "lib/normalic/uri.rb", "normalic.gemspec", "spec/normalic_spec.rb"]
|
14
14
|
s.homepage = "http://github.com/ericxtang/normalic"
|
15
15
|
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Normalic", "--main", "README.rdoc"]
|
16
16
|
s.require_paths = ["lib"]
|
data/spec/normalic_spec.rb
CHANGED
@@ -275,6 +275,53 @@ describe "Normalic::Address" do
|
|
275
275
|
addr[:intersection].should == false
|
276
276
|
end
|
277
277
|
|
278
|
+
it "should parse an address from a hash of fields" do
|
279
|
+
addr = Normalic::Address.normalize_fields(:number => 201,
|
280
|
+
:street => "Varick",
|
281
|
+
"type" => "St.",
|
282
|
+
:city => "New York",
|
283
|
+
:state => "NY",
|
284
|
+
"zipcode" => 10014)
|
285
|
+
addr[:number].should == "201"
|
286
|
+
addr[:direction].should == nil
|
287
|
+
addr[:street].should == "Varick"
|
288
|
+
addr[:type].should == "St."
|
289
|
+
addr[:city].should == "New York"
|
290
|
+
addr[:state].should == "NY"
|
291
|
+
addr[:zipcode].should == "10014"
|
292
|
+
addr[:intersection].should == false
|
293
|
+
end
|
294
|
+
|
295
|
+
it "should parse an address from a hash of fields including 'address'" do
|
296
|
+
addr = Normalic::Address.normalize_fields("address" => "201 Varick St.",
|
297
|
+
:city => "New York",
|
298
|
+
:state => "NY",
|
299
|
+
:zipcode => 10014)
|
300
|
+
addr[:number].should == "201"
|
301
|
+
addr[:direction].should == nil
|
302
|
+
addr[:street].should == "Varick"
|
303
|
+
addr[:type].should == "St."
|
304
|
+
addr[:city].should == "New York"
|
305
|
+
addr[:state].should == "NY"
|
306
|
+
addr[:zipcode].should == "10014"
|
307
|
+
addr[:intersection].should == false
|
308
|
+
end
|
309
|
+
|
310
|
+
it "should parse an address from a hash of fields including 'address_line1'" do
|
311
|
+
addr = Normalic::Address.normalize_fields("address_line1" => "201 Varick St.",
|
312
|
+
:city => "New York",
|
313
|
+
:state => "NY",
|
314
|
+
:zipcode => 10014)
|
315
|
+
addr[:number].should == "201"
|
316
|
+
addr[:direction].should == nil
|
317
|
+
addr[:street].should == "Varick"
|
318
|
+
addr[:type].should == "St."
|
319
|
+
addr[:city].should == "New York"
|
320
|
+
addr[:state].should == "NY"
|
321
|
+
addr[:zipcode].should == "10014"
|
322
|
+
addr[:intersection].should == false
|
323
|
+
end
|
324
|
+
|
278
325
|
it "should use dot notation" do
|
279
326
|
addr = Normalic::Address.parse("871 west washington street, new york, ny 10014")
|
280
327
|
addr.number.should == "871"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: normalic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 2
|
10
|
+
version: 0.1.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Eric Tang
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-11-02 00:00:00 Z
|
19
19
|
dependencies: []
|
20
20
|
|
21
21
|
description: Normalize U.S addresses
|
@@ -32,6 +32,7 @@ extra_rdoc_files:
|
|
32
32
|
- lib/normalic/phone_number.rb
|
33
33
|
- lib/normalic/uri.rb
|
34
34
|
files:
|
35
|
+
- License.txt
|
35
36
|
- Manifest
|
36
37
|
- README.rdoc
|
37
38
|
- Rakefile
|
@@ -40,8 +41,8 @@ files:
|
|
40
41
|
- lib/normalic/address.rb
|
41
42
|
- lib/normalic/phone_number.rb
|
42
43
|
- lib/normalic/uri.rb
|
43
|
-
- spec/normalic_spec.rb
|
44
44
|
- normalic.gemspec
|
45
|
+
- spec/normalic_spec.rb
|
45
46
|
homepage: http://github.com/ericxtang/normalic
|
46
47
|
licenses: []
|
47
48
|
|