Indirizzo 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/.travis.yml +5 -0
- data/Gemfile +3 -0
- data/Indirizzo.gemspec +28 -0
- data/LICENSE.txt +165 -0
- data/README.md +56 -0
- data/Rakefile +31 -0
- data/VERSION +1 -0
- data/lib/indirizzo.rb +1 -0
- data/lib/indirizzo/address.rb +286 -0
- data/lib/indirizzo/constants.rb +666 -0
- data/lib/indirizzo/numbers.rb +55 -0
- data/test/test_address.rb +228 -0
- data/test/test_constants.rb +55 -0
- data/test/test_helper.rb +4 -0
- data/test/test_numbers.rb +44 -0
- metadata +102 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Indirizzo.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = %q{Indirizzo}
|
3
|
+
s.version = "0.1.0"
|
4
|
+
|
5
|
+
s.authors = [%q{Dave Worth}]
|
6
|
+
s.date = %q{2011-12-14}
|
7
|
+
s.description = %q{Indirizzo is simply an extraction of the US Street Address parsing code from Geocoder::US}
|
8
|
+
s.email = %q{dave@highgroove.com}
|
9
|
+
|
10
|
+
s.homepage = %q{http://github.com/daveworth/indirizzo}
|
11
|
+
s.licenses = [%q{LGPL}]
|
12
|
+
s.require_paths = [%q{lib}]
|
13
|
+
s.rubygems_version = %q{1.9.2}
|
14
|
+
s.summary = %q{Indirizzo is simply an extraction of the US Street Address parsing code from Geocoder::US}
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
s.extra_rdoc_files = [
|
21
|
+
"LICENSE.txt",
|
22
|
+
"README.md"
|
23
|
+
]
|
24
|
+
|
25
|
+
s.add_development_dependency('rake')
|
26
|
+
s.add_development_dependency('cover_me')
|
27
|
+
s.add_development_dependency('awesome_print')
|
28
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
GNU LESSER GENERAL PUBLIC LICENSE
|
2
|
+
Version 3, 29 June 2007
|
3
|
+
|
4
|
+
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
5
|
+
Everyone is permitted to copy and distribute verbatim copies
|
6
|
+
of this license document, but changing it is not allowed.
|
7
|
+
|
8
|
+
|
9
|
+
This version of the GNU Lesser General Public License incorporates
|
10
|
+
the terms and conditions of version 3 of the GNU General Public
|
11
|
+
License, supplemented by the additional permissions listed below.
|
12
|
+
|
13
|
+
0. Additional Definitions.
|
14
|
+
|
15
|
+
As used herein, "this License" refers to version 3 of the GNU Lesser
|
16
|
+
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
17
|
+
General Public License.
|
18
|
+
|
19
|
+
"The Library" refers to a covered work governed by this License,
|
20
|
+
other than an Application or a Combined Work as defined below.
|
21
|
+
|
22
|
+
An "Application" is any work that makes use of an interface provided
|
23
|
+
by the Library, but which is not otherwise based on the Library.
|
24
|
+
Defining a subclass of a class defined by the Library is deemed a mode
|
25
|
+
of using an interface provided by the Library.
|
26
|
+
|
27
|
+
A "Combined Work" is a work produced by combining or linking an
|
28
|
+
Application with the Library. The particular version of the Library
|
29
|
+
with which the Combined Work was made is also called the "Linked
|
30
|
+
Version".
|
31
|
+
|
32
|
+
The "Minimal Corresponding Source" for a Combined Work means the
|
33
|
+
Corresponding Source for the Combined Work, excluding any source code
|
34
|
+
for portions of the Combined Work that, considered in isolation, are
|
35
|
+
based on the Application, and not on the Linked Version.
|
36
|
+
|
37
|
+
The "Corresponding Application Code" for a Combined Work means the
|
38
|
+
object code and/or source code for the Application, including any data
|
39
|
+
and utility programs needed for reproducing the Combined Work from the
|
40
|
+
Application, but excluding the System Libraries of the Combined Work.
|
41
|
+
|
42
|
+
1. Exception to Section 3 of the GNU GPL.
|
43
|
+
|
44
|
+
You may convey a covered work under sections 3 and 4 of this License
|
45
|
+
without being bound by section 3 of the GNU GPL.
|
46
|
+
|
47
|
+
2. Conveying Modified Versions.
|
48
|
+
|
49
|
+
If you modify a copy of the Library, and, in your modifications, a
|
50
|
+
facility refers to a function or data to be supplied by an Application
|
51
|
+
that uses the facility (other than as an argument passed when the
|
52
|
+
facility is invoked), then you may convey a copy of the modified
|
53
|
+
version:
|
54
|
+
|
55
|
+
a) under this License, provided that you make a good faith effort to
|
56
|
+
ensure that, in the event an Application does not supply the
|
57
|
+
function or data, the facility still operates, and performs
|
58
|
+
whatever part of its purpose remains meaningful, or
|
59
|
+
|
60
|
+
b) under the GNU GPL, with none of the additional permissions of
|
61
|
+
this License applicable to that copy.
|
62
|
+
|
63
|
+
3. Object Code Incorporating Material from Library Header Files.
|
64
|
+
|
65
|
+
The object code form of an Application may incorporate material from
|
66
|
+
a header file that is part of the Library. You may convey such object
|
67
|
+
code under terms of your choice, provided that, if the incorporated
|
68
|
+
material is not limited to numerical parameters, data structure
|
69
|
+
layouts and accessors, or small macros, inline functions and templates
|
70
|
+
(ten or fewer lines in length), you do both of the following:
|
71
|
+
|
72
|
+
a) Give prominent notice with each copy of the object code that the
|
73
|
+
Library is used in it and that the Library and its use are
|
74
|
+
covered by this License.
|
75
|
+
|
76
|
+
b) Accompany the object code with a copy of the GNU GPL and this license
|
77
|
+
document.
|
78
|
+
|
79
|
+
4. Combined Works.
|
80
|
+
|
81
|
+
You may convey a Combined Work under terms of your choice that,
|
82
|
+
taken together, effectively do not restrict modification of the
|
83
|
+
portions of the Library contained in the Combined Work and reverse
|
84
|
+
engineering for debugging such modifications, if you also do each of
|
85
|
+
the following:
|
86
|
+
|
87
|
+
a) Give prominent notice with each copy of the Combined Work that
|
88
|
+
the Library is used in it and that the Library and its use are
|
89
|
+
covered by this License.
|
90
|
+
|
91
|
+
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
92
|
+
document.
|
93
|
+
|
94
|
+
c) For a Combined Work that displays copyright notices during
|
95
|
+
execution, include the copyright notice for the Library among
|
96
|
+
these notices, as well as a reference directing the user to the
|
97
|
+
copies of the GNU GPL and this license document.
|
98
|
+
|
99
|
+
d) Do one of the following:
|
100
|
+
|
101
|
+
0) Convey the Minimal Corresponding Source under the terms of this
|
102
|
+
License, and the Corresponding Application Code in a form
|
103
|
+
suitable for, and under terms that permit, the user to
|
104
|
+
recombine or relink the Application with a modified version of
|
105
|
+
the Linked Version to produce a modified Combined Work, in the
|
106
|
+
manner specified by section 6 of the GNU GPL for conveying
|
107
|
+
Corresponding Source.
|
108
|
+
|
109
|
+
1) Use a suitable shared library mechanism for linking with the
|
110
|
+
Library. A suitable mechanism is one that (a) uses at run time
|
111
|
+
a copy of the Library already present on the user's computer
|
112
|
+
system, and (b) will operate properly with a modified version
|
113
|
+
of the Library that is interface-compatible with the Linked
|
114
|
+
Version.
|
115
|
+
|
116
|
+
e) Provide Installation Information, but only if you would otherwise
|
117
|
+
be required to provide such information under section 6 of the
|
118
|
+
GNU GPL, and only to the extent that such information is
|
119
|
+
necessary to install and execute a modified version of the
|
120
|
+
Combined Work produced by recombining or relinking the
|
121
|
+
Application with a modified version of the Linked Version. (If
|
122
|
+
you use option 4d0, the Installation Information must accompany
|
123
|
+
the Minimal Corresponding Source and Corresponding Application
|
124
|
+
Code. If you use option 4d1, you must provide the Installation
|
125
|
+
Information in the manner specified by section 6 of the GNU GPL
|
126
|
+
for conveying Corresponding Source.)
|
127
|
+
|
128
|
+
5. Combined Libraries.
|
129
|
+
|
130
|
+
You may place library facilities that are a work based on the
|
131
|
+
Library side by side in a single library together with other library
|
132
|
+
facilities that are not Applications and are not covered by this
|
133
|
+
License, and convey such a combined library under terms of your
|
134
|
+
choice, if you do both of the following:
|
135
|
+
|
136
|
+
a) Accompany the combined library with a copy of the same work based
|
137
|
+
on the Library, uncombined with any other library facilities,
|
138
|
+
conveyed under the terms of this License.
|
139
|
+
|
140
|
+
b) Give prominent notice with the combined library that part of it
|
141
|
+
is a work based on the Library, and explaining where to find the
|
142
|
+
accompanying uncombined form of the same work.
|
143
|
+
|
144
|
+
6. Revised Versions of the GNU Lesser General Public License.
|
145
|
+
|
146
|
+
The Free Software Foundation may publish revised and/or new versions
|
147
|
+
of the GNU Lesser General Public License from time to time. Such new
|
148
|
+
versions will be similar in spirit to the present version, but may
|
149
|
+
differ in detail to address new problems or concerns.
|
150
|
+
|
151
|
+
Each version is given a distinguishing version number. If the
|
152
|
+
Library as you received it specifies that a certain numbered version
|
153
|
+
of the GNU Lesser General Public License "or any later version"
|
154
|
+
applies to it, you have the option of following the terms and
|
155
|
+
conditions either of that published version or of any later version
|
156
|
+
published by the Free Software Foundation. If the Library as you
|
157
|
+
received it does not specify a version number of the GNU Lesser
|
158
|
+
General Public License, you may choose any version of the GNU Lesser
|
159
|
+
General Public License ever published by the Free Software Foundation.
|
160
|
+
|
161
|
+
If the Library as you received it specifies that a proxy can decide
|
162
|
+
whether future versions of the GNU Lesser General Public License shall
|
163
|
+
apply, that proxy's public statement of acceptance of any version is
|
164
|
+
permanent authorization for you to choose that version for the
|
165
|
+
Library.
|
data/README.md
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# Introduction
|
2
|
+
|
3
|
+
Indirizzo is a simple extraction of the Address class (along with the numbers
|
4
|
+
and constants handling code) from [Geocommons](http://geocommons.com/)'
|
5
|
+
[Geocoder::US 2.0](https://github.com/geocommons/geocoder) gem.
|
6
|
+
|
7
|
+
[![Build Status](https://secure.travis-ci.org/daveworth/Indirizzo.png)](http://travis-ci.org/daveworth/Indirizzo)
|
8
|
+
|
9
|
+
## Background
|
10
|
+
|
11
|
+
My motivation for creating this extraction is the dearth of high-quality,
|
12
|
+
flexible, street address parsing gems available to the Ruby community. After
|
13
|
+
digging into Ruby-Toolbox looking for alternatives I came up with tools based on
|
14
|
+
the Perl
|
15
|
+
[GEO::StreetAddress::US](http://search.cpan.org/~sderle/Geo-StreetAddress-US-0.99/US.pm)
|
16
|
+
such as [street\_address](https://github.com/astevens/street_address). The
|
17
|
+
street_address gem ended up being much to restrictive for my needs and my
|
18
|
+
continued searching brought me to the Geocoder::US gem. Regrettably the
|
19
|
+
constraints of needing a SQLite3 database for proper geocoding added overhead to
|
20
|
+
my simple needs. I simply need to parse addresses that may, or may not, be
|
21
|
+
"complete" or "well-formed". Thus Indirizzo was born.
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
require 'Indirizzo'
|
27
|
+
Indirizzo::Address.new("some address")
|
28
|
+
```
|
29
|
+
|
30
|
+
## License
|
31
|
+
|
32
|
+
Indirizzo is a direct derivative of [Geocoder::US 2.0](https://github.com/geocommons/geocoder)
|
33
|
+
|
34
|
+
Geocoder::US 2.0 was based on earlier work by Schuyler Erle on
|
35
|
+
a Perl module of the same name. You can find it at
|
36
|
+
[http://search.cpan.org/~sderle/](http://search.cpan.org/~sderle/).
|
37
|
+
|
38
|
+
Geocoder::US 2.0 was written by Schuyler Erle, of Entropy Free LLC,
|
39
|
+
with the gracious support of FortiusOne, Inc. Please send bug reports,
|
40
|
+
patches, kudos, etc. to patches at geocoder.us.
|
41
|
+
|
42
|
+
Copyright (c) 2009 FortiusOne, Inc.
|
43
|
+
|
44
|
+
This program is free software: you can redistribute it and/or modify
|
45
|
+
it under the terms of the GNU General Public License as published by
|
46
|
+
the Free Software Foundation, either version 3 of the License, or
|
47
|
+
(at your option) any later version.
|
48
|
+
|
49
|
+
This program is distributed in the hope that it will be useful,
|
50
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
51
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
52
|
+
GNU General Public License for more details.
|
53
|
+
|
54
|
+
You should have received a copy of the GNU General Public License
|
55
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
56
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'rake/testtask'
|
2
|
+
require 'bundler/gem_tasks'
|
3
|
+
require 'bundler'
|
4
|
+
begin
|
5
|
+
Bundler.setup(:default, :development)
|
6
|
+
rescue Bundler::BundlerError => e
|
7
|
+
$stderr.puts e.message
|
8
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
9
|
+
exit e.status_code
|
10
|
+
end
|
11
|
+
require 'rake'
|
12
|
+
|
13
|
+
Rake::TestTask.new(:test) do |test|
|
14
|
+
test.libs << 'lib' << 'test'
|
15
|
+
test.pattern = 'test/**/test_*.rb'
|
16
|
+
test.verbose = true
|
17
|
+
end
|
18
|
+
|
19
|
+
task :default => :test
|
20
|
+
|
21
|
+
namespace :cover_me do
|
22
|
+
desc "Generates and opens code coverage report."
|
23
|
+
task :report do
|
24
|
+
require 'cover_me'
|
25
|
+
CoverMe.complete!
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
task :test do
|
30
|
+
Rake::Task['cover_me:report'].invoke
|
31
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/indirizzo.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'Indirizzo/Address'
|
@@ -0,0 +1,286 @@
|
|
1
|
+
require 'indirizzo/constants'
|
2
|
+
|
3
|
+
module Indirizzo
|
4
|
+
# Defines the matching of parsed address tokens.
|
5
|
+
Match = {
|
6
|
+
# FIXME: shouldn't have to anchor :number and :zip at start/end
|
7
|
+
:number => /^(\d+\W|[a-z]+)?(\d+)([a-z]?)\b/io,
|
8
|
+
:street => /(?:\b(?:\d+\w*|[a-z'-]+)\s*)+/io,
|
9
|
+
:city => /(?:\b[a-z'-]+\s*)+/io,
|
10
|
+
:state => Regexp.new(State.regexp.source + "\s*$", Regexp::IGNORECASE),
|
11
|
+
:zip => /(\d{5})(?:-\d{4})?\s*$/o,
|
12
|
+
:at => /\s(at|@|and|&)\s/io,
|
13
|
+
:po_box => /\b[P|p]*(OST|ost)*\.*\s*[O|o|0]*(ffice|FFICE)*\.*\s*[B|b][O|o|0][X|x]\b/
|
14
|
+
}
|
15
|
+
|
16
|
+
# The Address class takes a US street address or place name and
|
17
|
+
# constructs a list of possible structured parses of the address
|
18
|
+
# string.
|
19
|
+
class Address
|
20
|
+
attr_accessor :text
|
21
|
+
attr_accessor :prenum, :number, :sufnum
|
22
|
+
attr_accessor :street
|
23
|
+
attr_accessor :city
|
24
|
+
attr_accessor :state
|
25
|
+
attr_accessor :zip, :plus4
|
26
|
+
|
27
|
+
# Takes an address or place name string as its sole argument.
|
28
|
+
def initialize (text)
|
29
|
+
raise ArgumentError, "no text provided" unless text and !text.empty?
|
30
|
+
if text.class == Hash
|
31
|
+
@text = ""
|
32
|
+
assign_text_to_address text
|
33
|
+
else
|
34
|
+
@text = clean text
|
35
|
+
parse
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Removes any characters that aren't strictly part of an address string.
|
40
|
+
def clean (value)
|
41
|
+
value.strip \
|
42
|
+
.gsub(/[^a-z0-9 ,'&@\/-]+/io, "") \
|
43
|
+
.gsub(/\s+/o, " ")
|
44
|
+
end
|
45
|
+
|
46
|
+
def assign_text_to_address(text)
|
47
|
+
if !text[:address].nil?
|
48
|
+
@text = clean text[:address]
|
49
|
+
parse
|
50
|
+
else
|
51
|
+
@street = []
|
52
|
+
@prenum = text[:prenum]
|
53
|
+
@sufnum = text[:sufnum]
|
54
|
+
if !text[:street].nil?
|
55
|
+
@street = text[:street].scan(Match[:street])
|
56
|
+
end
|
57
|
+
@number = ""
|
58
|
+
if !@street.nil?
|
59
|
+
if text[:number].nil?
|
60
|
+
@street.map! { |single_street|
|
61
|
+
single_street.downcase!
|
62
|
+
@number = single_street.scan(Match[:number])[0].reject{|n| n.nil? || n.empty?}.first.to_s
|
63
|
+
single_street.sub! @number, ""
|
64
|
+
single_street.sub! /^\s*,?\s*/o, ""
|
65
|
+
}
|
66
|
+
else
|
67
|
+
@number = text[:number].to_s
|
68
|
+
end
|
69
|
+
@street = expand_streets(@street)
|
70
|
+
street_parts
|
71
|
+
end
|
72
|
+
@city = []
|
73
|
+
if !text[:city].nil?
|
74
|
+
@city.push(text[:city])
|
75
|
+
@text = text[:city].to_s
|
76
|
+
else
|
77
|
+
@city.push("")
|
78
|
+
end
|
79
|
+
if !text[:region].nil?
|
80
|
+
# @state = []
|
81
|
+
@state = text[:region]
|
82
|
+
if @state.length > 2
|
83
|
+
# full_state = @state.strip # special case: New York
|
84
|
+
@state = State[@state]
|
85
|
+
end
|
86
|
+
elsif !text[:country].nil?
|
87
|
+
@state = text[:country]
|
88
|
+
elsif !text[:state].nil?
|
89
|
+
@state = text[:state]
|
90
|
+
end
|
91
|
+
|
92
|
+
@zip = text[:postal_code]
|
93
|
+
@plus4 = text[:plus4]
|
94
|
+
if !@zip
|
95
|
+
@zip = @plus4 = ""
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Expands a token into a list of possible strings based on
|
101
|
+
# the Geocoder::US::Name_Abbr constant, and expands numerals and
|
102
|
+
# number words into their possible equivalents.
|
103
|
+
def expand_numbers (string)
|
104
|
+
if /\b\d+(?:st|nd|rd|th)?\b/o.match string
|
105
|
+
match = $&
|
106
|
+
num = $&.to_i
|
107
|
+
elsif Ordinals.regexp.match string
|
108
|
+
num = Ordinals[$&]
|
109
|
+
match = $&
|
110
|
+
elsif Cardinals.regexp.match string
|
111
|
+
num = Cardinals[$&]
|
112
|
+
match = $&
|
113
|
+
end
|
114
|
+
strings = []
|
115
|
+
if num and num < 100
|
116
|
+
[num.to_s, Ordinals[num], Cardinals[num]].each {|replace|
|
117
|
+
strings << string.sub(match, replace)
|
118
|
+
}
|
119
|
+
else
|
120
|
+
strings << string
|
121
|
+
end
|
122
|
+
strings
|
123
|
+
end
|
124
|
+
|
125
|
+
def parse_zip(regex_match, text)
|
126
|
+
idx = text.rindex(regex_match)
|
127
|
+
text[idx...idx+regex_match.length] = ""
|
128
|
+
text.sub! /\s*,?\s*$/o, ""
|
129
|
+
@zip, @plus4 = @zip.map {|s|s.strip}
|
130
|
+
text
|
131
|
+
end
|
132
|
+
|
133
|
+
def parse_state(regex_match, text)
|
134
|
+
idx = text.rindex(regex_match)
|
135
|
+
text[idx...idx+regex_match.length] = ""
|
136
|
+
text.sub! /\s*,?\s*$/o, ""
|
137
|
+
@full_state = @state[0].strip # special case: New York
|
138
|
+
@state = State[@full_state]
|
139
|
+
text
|
140
|
+
end
|
141
|
+
|
142
|
+
def parse_number(regex_match, text)
|
143
|
+
# FIXME: What if this string appears twice?
|
144
|
+
idx = text.index(regex_match)
|
145
|
+
text[idx...idx+regex_match.length] = ""
|
146
|
+
text.sub! /^\s*,?\s*/o, ""
|
147
|
+
@prenum, @number, @sufnum = @number.map {|s| s and s.strip}
|
148
|
+
text
|
149
|
+
end
|
150
|
+
|
151
|
+
def parse
|
152
|
+
text = @text.clone.downcase
|
153
|
+
|
154
|
+
@zip = text.scan(Match[:zip])[-1]
|
155
|
+
if @zip
|
156
|
+
text = parse_zip($&, text)
|
157
|
+
else
|
158
|
+
@zip = @plus4 = ""
|
159
|
+
end
|
160
|
+
|
161
|
+
@state = text.scan(Match[:state])[-1]
|
162
|
+
if @state
|
163
|
+
text = parse_state($&, text)
|
164
|
+
else
|
165
|
+
@full_state = ""
|
166
|
+
@state = ""
|
167
|
+
end
|
168
|
+
|
169
|
+
@number = text.scan(Match[:number])[0]
|
170
|
+
# FIXME: 230 Fish And Game Rd, Hudson NY 12534
|
171
|
+
if @number # and not intersection?
|
172
|
+
text = parse_number($&, text)
|
173
|
+
else
|
174
|
+
@prenum = @number = @sufnum = ""
|
175
|
+
end
|
176
|
+
|
177
|
+
# FIXME: special case: Name_Abbr gets a bit aggressive
|
178
|
+
# about replacing St with Saint. exceptional case:
|
179
|
+
# Sault Ste. Marie
|
180
|
+
|
181
|
+
# FIXME: PO Box should geocode to ZIP
|
182
|
+
@street = text.scan(Match[:street])
|
183
|
+
@street = expand_streets(@street)
|
184
|
+
# SPECIAL CASE: 1600 Pennsylvania 20050
|
185
|
+
@street << @full_state if @street.empty? and @state.downcase != @full_state.downcase
|
186
|
+
|
187
|
+
@city = text.scan(Match[:city])
|
188
|
+
if !@city.empty?
|
189
|
+
@city = [@city[-1].strip]
|
190
|
+
add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
|
191
|
+
@city |= add
|
192
|
+
@city.map! {|s| s.downcase}
|
193
|
+
@city.uniq!
|
194
|
+
else
|
195
|
+
@city = []
|
196
|
+
end
|
197
|
+
|
198
|
+
# SPECIAL CASE: no city, but a state with the same name. e.g. "New York"
|
199
|
+
@city << @full_state if @state.downcase != @full_state.downcase
|
200
|
+
end
|
201
|
+
|
202
|
+
def expand_streets(street)
|
203
|
+
if !street.empty? && !street[0].nil?
|
204
|
+
street.map! {|s|s.strip}
|
205
|
+
add = street.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
|
206
|
+
street |= add
|
207
|
+
add = street.map {|item| item.gsub(Std_Abbr.regexp) {|m| Std_Abbr[m]}}
|
208
|
+
street |= add
|
209
|
+
street.map! {|item| expand_numbers(item)}
|
210
|
+
street.flatten!
|
211
|
+
street.map! {|s| s.downcase}
|
212
|
+
street.uniq!
|
213
|
+
else
|
214
|
+
street = []
|
215
|
+
end
|
216
|
+
street
|
217
|
+
end
|
218
|
+
|
219
|
+
def street_parts
|
220
|
+
strings = []
|
221
|
+
# Get all the substrings delimited by whitespace
|
222
|
+
@street.each {|string|
|
223
|
+
tokens = string.split(" ")
|
224
|
+
strings |= (0...tokens.length).map {|i|
|
225
|
+
(i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
|
226
|
+
}
|
227
|
+
strings = remove_noise_words(strings)
|
228
|
+
|
229
|
+
# Try a simpler case of adding the @number in case everything is an abbr.
|
230
|
+
strings += [@number] if strings.all? {|s| Std_Abbr.key? s or Name_Abbr.key? s}
|
231
|
+
strings.uniq
|
232
|
+
end
|
233
|
+
|
234
|
+
def remove_noise_words(strings)
|
235
|
+
# Don't return strings that consist solely of abbreviations.
|
236
|
+
# NOTE: Is this a micro-optimization that has edge cases that will break?
|
237
|
+
# Answer: Yes, it breaks on simple things like "Prairie St" or "Front St"
|
238
|
+
prefix = Regexp.new("^" + Prefix_Type.regexp.source + "\s*", Regexp::IGNORECASE)
|
239
|
+
suffix = Regexp.new("\s*" + Suffix_Type.regexp.source + "$", Regexp::IGNORECASE)
|
240
|
+
predxn = Regexp.new("^" + Directional.regexp.source + "\s*", Regexp::IGNORECASE)
|
241
|
+
sufdxn = Regexp.new("\s*" + Directional.regexp.source + "$", Regexp::IGNORECASE)
|
242
|
+
good_strings = strings.map {|s|
|
243
|
+
s = s.clone
|
244
|
+
s.gsub!(predxn, "")
|
245
|
+
s.gsub!(sufdxn, "")
|
246
|
+
s.gsub!(prefix, "")
|
247
|
+
s.gsub!(suffix, "")
|
248
|
+
s
|
249
|
+
}
|
250
|
+
good_strings.reject! {|s| s.empty?}
|
251
|
+
strings = good_strings if !good_strings.empty? {|s| not Std_Abbr.key?(s) and not Name_Abbr.key?(s)}
|
252
|
+
strings
|
253
|
+
end
|
254
|
+
|
255
|
+
def city_parts
|
256
|
+
strings = []
|
257
|
+
@city.map {|string|
|
258
|
+
tokens = string.split(" ")
|
259
|
+
strings |= (0...tokens.length).to_a.reverse.map {|i|
|
260
|
+
(i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
|
261
|
+
}
|
262
|
+
# Don't return strings that consist solely of abbreviations.
|
263
|
+
# NOTE: Is this a micro-optimization that has edge cases that will break?
|
264
|
+
# Answer: Yes, it breaks on "Prairie"
|
265
|
+
good_strings = strings.reject {|s| Std_Abbr.key? s}
|
266
|
+
strings = good_strings if !good_strings.empty?
|
267
|
+
strings.uniq
|
268
|
+
end
|
269
|
+
|
270
|
+
def city= (strings)
|
271
|
+
# NOTE: This will still fail on: 100 Broome St, 33333 (if 33333 is
|
272
|
+
# Broome, MT or what)
|
273
|
+
strings = expand_streets(strings) # fix for "Mountain View" -> "Mountain Vw"
|
274
|
+
match = Regexp.new('\s*\b(?:' + strings.join("|") + ')\b\s*$', Regexp::IGNORECASE)
|
275
|
+
@street = @street.map {|string| string.gsub(match, '')}.select {|s|!s.empty?}
|
276
|
+
end
|
277
|
+
|
278
|
+
def po_box?
|
279
|
+
!Match[:po_box].match(@text).nil?
|
280
|
+
end
|
281
|
+
|
282
|
+
def intersection?
|
283
|
+
!Match[:at].match(@text).nil?
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|