Indirizzo 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/.travis.yml +5 -0
- data/Gemfile +3 -0
- data/Indirizzo.gemspec +28 -0
- data/LICENSE.txt +165 -0
- data/README.md +56 -0
- data/Rakefile +31 -0
- data/VERSION +1 -0
- data/lib/indirizzo.rb +1 -0
- data/lib/indirizzo/address.rb +286 -0
- data/lib/indirizzo/constants.rb +666 -0
- data/lib/indirizzo/numbers.rb +55 -0
- data/test/test_address.rb +228 -0
- data/test/test_constants.rb +55 -0
- data/test/test_helper.rb +4 -0
- data/test/test_numbers.rb +44 -0
- metadata +102 -0
@@ -0,0 +1,55 @@
|
|
1
|
+
module Indirizzo
|
2
|
+
# The NumberMap class provides a means for mapping ordinal
|
3
|
+
# and cardinal number words to digits and back.
|
4
|
+
class NumberMap < Hash
|
5
|
+
attr_accessor :regexp
|
6
|
+
def self.[] (array)
|
7
|
+
nmap = self.new({})
|
8
|
+
array.each {|item| nmap << item }
|
9
|
+
nmap.build_match
|
10
|
+
nmap
|
11
|
+
end
|
12
|
+
def initialize (array)
|
13
|
+
@count = 0
|
14
|
+
end
|
15
|
+
def build_match
|
16
|
+
@regexp = Regexp.new(
|
17
|
+
'\b(' + keys.flatten.join("|") + ')\b',
|
18
|
+
Regexp::IGNORECASE)
|
19
|
+
end
|
20
|
+
def clean (key)
|
21
|
+
key.is_a?(String) ? key.downcase.gsub(/\W/o, "") : key
|
22
|
+
end
|
23
|
+
def <<(item)
|
24
|
+
store clean(item), @count
|
25
|
+
store @count, item
|
26
|
+
@count += 1
|
27
|
+
end
|
28
|
+
def [] (key)
|
29
|
+
super(clean(key))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# The Cardinals constant maps digits to cardinal number words and back.
|
34
|
+
Cardinals = NumberMap[%w[
|
35
|
+
zero one two three four five six seven eight nine ten
|
36
|
+
eleven twelve thirteen fourteen fifteen sixteen seventeen
|
37
|
+
eighteen nineteen
|
38
|
+
]]
|
39
|
+
Cardinal_Tens = %w[ twenty thirty forty fifty sixty seventy eighty ninety ]
|
40
|
+
Cardinal_Tens.each {|tens|
|
41
|
+
Cardinals << tens
|
42
|
+
(1..9).each {|n| Cardinals << tens + "-" + Cardinals[n]}
|
43
|
+
}
|
44
|
+
|
45
|
+
# The Ordinals constant maps digits to ordinal number words and back.
|
46
|
+
Ordinals = NumberMap[%w[
|
47
|
+
zeroth first second third fourth fifth sixth seventh eighth ninth
|
48
|
+
tenth eleventh twelfth thirteenth fourteenth fifteenth sixteenth
|
49
|
+
seventeenth eighteenth nineteenth
|
50
|
+
]]
|
51
|
+
Cardinal_Tens.each {|tens|
|
52
|
+
Ordinals << tens.gsub("y","ieth")
|
53
|
+
(1..9).each {|n| Ordinals << tens + "-" + Ordinals[n]}
|
54
|
+
}
|
55
|
+
end
|
@@ -0,0 +1,228 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'set'
|
3
|
+
require 'indirizzo/address'
|
4
|
+
|
5
|
+
include Indirizzo
|
6
|
+
|
7
|
+
class TestAddress < Test::Unit::TestCase
|
8
|
+
def test_new
|
9
|
+
addr = Address.new("1600 Pennsylvania Av., Washington DC")
|
10
|
+
assert_equal "1600 Pennsylvania Av, Washington DC", addr.text
|
11
|
+
end
|
12
|
+
def test_clean
|
13
|
+
fixtures = [
|
14
|
+
[ "cleaned text", "cleaned: text!" ],
|
15
|
+
[ "cleaned-text 2", "cleaned-text: #2?" ],
|
16
|
+
[ "it's working 1/2", "~it's working 1/2~" ],
|
17
|
+
[ "it's working, yes", "it's working, yes...?" ],
|
18
|
+
[ "it's working & well", "it's working & well?" ]
|
19
|
+
]
|
20
|
+
fixtures.each {|output, given|
|
21
|
+
assert_equal output, Address.new(given).text
|
22
|
+
}
|
23
|
+
end
|
24
|
+
def test_expand_numbers
|
25
|
+
num_list = ["5", "fifth", "five"]
|
26
|
+
num_list.each {|n|
|
27
|
+
addr = Address.new(n)
|
28
|
+
assert_equal num_list, addr.expand_numbers(n).to_a.sort
|
29
|
+
}
|
30
|
+
end
|
31
|
+
def test_city_parse
|
32
|
+
places = [
|
33
|
+
[ "New York, NY", "New York", "NY", "" ],
|
34
|
+
[ "NY", "", "NY", "" ],
|
35
|
+
[ "New York", "New York", "NY", "" ],
|
36
|
+
[ "Philadelphia", "Philadelphia", "", "" ],
|
37
|
+
[ "Philadelphia PA", "Philadelphia", "PA", "" ],
|
38
|
+
[ "Philadelphia, PA", "Philadelphia", "PA", "" ],
|
39
|
+
[ "Philadelphia, Pennsylvania", "Philadelphia", "PA", "" ],
|
40
|
+
[ "Philadelphia, Pennsylvania 19131", "Philadelphia", "PA", "19131" ],
|
41
|
+
[ "Philadelphia 19131", "Philadelphia", "", "19131" ],
|
42
|
+
[ "Pennsylvania 19131", "Pennsylvania", "PA", "19131" ], # kind of a misfeature
|
43
|
+
[ "19131", "", "", "19131" ],
|
44
|
+
[ "19131-9999", "", "", "19131" ],
|
45
|
+
]
|
46
|
+
for fixture in places
|
47
|
+
addr = Address.new fixture[0]
|
48
|
+
[:city, :state, :zip].zip(fixture[1..3]).each {|key,val|
|
49
|
+
result = addr.send key
|
50
|
+
result = [result.downcase] unless result.kind_of? Array
|
51
|
+
if result.empty?
|
52
|
+
assert_equal val, "", key.to_s + " test no result " + fixture.join("/")
|
53
|
+
else
|
54
|
+
assert result.member?(val.downcase), key.to_s + " test " + result.inspect + fixture.join("/")
|
55
|
+
end
|
56
|
+
}
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_po_box
|
61
|
+
addr_po = Address.new "PO Box 1111 Herndon VA 20171"
|
62
|
+
assert addr_po.po_box?
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_parse
|
66
|
+
addrs = [
|
67
|
+
{:text => "1600 Pennsylvania Av., Washington DC 20050",
|
68
|
+
:number => "1600",
|
69
|
+
:street => "Pennsylvania Ave",
|
70
|
+
:city => "Washington",
|
71
|
+
:state => "DC",
|
72
|
+
:zip => "20050"},
|
73
|
+
|
74
|
+
{:text => "1600 Pennsylvania, Washington DC",
|
75
|
+
:number => "1600",
|
76
|
+
:street => "Pennsylvania",
|
77
|
+
:city => "Washington",
|
78
|
+
:state => "DC"},
|
79
|
+
|
80
|
+
{:text => "1600 Pennsylvania Washington DC",
|
81
|
+
:number => "1600",
|
82
|
+
:street => "Pennsylvania Washington",
|
83
|
+
:city => "Pennsylvania Washington", # FIXME
|
84
|
+
:state => "DC"},
|
85
|
+
|
86
|
+
{:text => "1600 Pennsylvania Washington",
|
87
|
+
:number => "1600",
|
88
|
+
:street => "Pennsylvania",
|
89
|
+
:city => "Washington",
|
90
|
+
:state => "WA"}, # FIXME
|
91
|
+
|
92
|
+
{:text => "1600 Pennsylvania 20050",
|
93
|
+
:number => "1600",
|
94
|
+
:street => "Pennsylvania", # FIXME
|
95
|
+
:zip => "20050"},
|
96
|
+
|
97
|
+
{:text => "1600 Pennsylvania Av, 20050-9999",
|
98
|
+
:number => "1600",
|
99
|
+
:street => "Pennsylvania Ave",
|
100
|
+
:zip => "20050"},
|
101
|
+
|
102
|
+
{:text => "1005 Gravenstein Highway North, Sebastopol CA",
|
103
|
+
:number => "1005",
|
104
|
+
:street => "Gravenstein Hwy N",
|
105
|
+
:city => "Sebastopol",
|
106
|
+
:state => "CA"},
|
107
|
+
|
108
|
+
{:text => "100 N 7th St, Brooklyn",
|
109
|
+
:number => "100",
|
110
|
+
:street => "N 7 St",
|
111
|
+
:city => "Brooklyn"},
|
112
|
+
|
113
|
+
{:text => "100 N Seventh St, Brooklyn",
|
114
|
+
:number => "100",
|
115
|
+
:street => "N 7 St",
|
116
|
+
:city => "Brooklyn"},
|
117
|
+
|
118
|
+
{:text => "100 Central Park West, New York, NY",
|
119
|
+
:number => "100",
|
120
|
+
:street => "Central Park W",
|
121
|
+
:city => "New York",
|
122
|
+
:state => "NY"},
|
123
|
+
|
124
|
+
{:text => "100 Central Park West, 10010",
|
125
|
+
:number => "100",
|
126
|
+
:street => "Central Park W",
|
127
|
+
:zip => "10010"},
|
128
|
+
|
129
|
+
{:text => "1400 Avenue of the Americas, New York, NY 10019",
|
130
|
+
:number => "1400",
|
131
|
+
:street => "Ave of the Americas",
|
132
|
+
:city => "New York",
|
133
|
+
:state => "NY"},
|
134
|
+
|
135
|
+
{:text => "1400 Avenue of the Americas, New York",
|
136
|
+
:number => "1400",
|
137
|
+
:street => "Ave of the Americas",
|
138
|
+
:city => "New York"},
|
139
|
+
|
140
|
+
{:text => "1400 Ave of the Americas, New York",
|
141
|
+
:number => "1400",
|
142
|
+
:street => "Ave of the Americas",
|
143
|
+
:city => "New York"},
|
144
|
+
|
145
|
+
{:text => "1400 Av of the Americas, New York",
|
146
|
+
:number => "1400",
|
147
|
+
:street => "Ave of the Americas",
|
148
|
+
:city => "New York"},
|
149
|
+
|
150
|
+
{:text => "1400 Av of the Americas New York",
|
151
|
+
:number => "1400",
|
152
|
+
:street => "Ave of the Americas",
|
153
|
+
:city => "New York"},
|
154
|
+
|
155
|
+
]
|
156
|
+
for fixture in addrs
|
157
|
+
text = fixture.delete(:text)
|
158
|
+
addr = Address.new(text)
|
159
|
+
for key, val in fixture
|
160
|
+
result = addr.send key
|
161
|
+
if result.kind_of? Array
|
162
|
+
result.map! {|str| str.downcase}
|
163
|
+
assert result.member?(val.downcase), "#{text} (#{key}) = #{result.inspect}"
|
164
|
+
else
|
165
|
+
assert_equal val, result, "#{text} (#{key}) = #{result.inspect}"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_skip_parse
|
172
|
+
addresses = [
|
173
|
+
{:street => "1233 Main St", :city => "Springfield", :region => "VA", :postal_code => "12345", :final_number => "1233", :parsed_street => "main st"},
|
174
|
+
{:street => "somewhere Ln", :city => "Somewhere", :region => "WI", :postal_code => "22222", :number => "402", :parsed_street => "somewhere ln", :final_number => "402"},
|
175
|
+
]
|
176
|
+
for preparsed_address in addresses
|
177
|
+
address_for_geocode = Address.new preparsed_address
|
178
|
+
assert_equal preparsed_address[:parsed_street],address_for_geocode.street[0]
|
179
|
+
assert_equal preparsed_address[:final_number],address_for_geocode.number
|
180
|
+
assert_equal preparsed_address[:city],address_for_geocode.city[0]
|
181
|
+
assert_equal preparsed_address[:region],address_for_geocode.state
|
182
|
+
assert_equal preparsed_address[:postal_code],address_for_geocode.zip
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def test_states_abbreviated_in_skip_parse
|
187
|
+
addresses = [
|
188
|
+
{:street => "123 Main St", :city => "Springfield", :region => "Virginia", :postal_code => "12345",:state_abbrev => "VA"},
|
189
|
+
{:street => "402 Somewhere Ln", :city => "Somewhere", :region => "WI", :postal_code => "22222", :state_abbrev => "WI"},
|
190
|
+
]
|
191
|
+
for preparsed_address in addresses
|
192
|
+
address_for_geocode = Address.new preparsed_address
|
193
|
+
assert_equal preparsed_address[:state_abbrev],address_for_geocode.state
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def test_address_hash
|
198
|
+
addresses = [
|
199
|
+
{:address => "Herndon, VA", :place_check => ["herndon"]},
|
200
|
+
{:address => "Arlington, VA", :place_check => ["arlington"]}
|
201
|
+
]
|
202
|
+
for preparsed_address in addresses
|
203
|
+
address_for_geocode = Address.new preparsed_address
|
204
|
+
assert_equal preparsed_address[:place_check],address_for_geocode.city
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def test_partial_address
|
209
|
+
addresses = [
|
210
|
+
{:street => "2200 Wilson Blvd", :postal_code => "22201"},
|
211
|
+
]
|
212
|
+
for preparsed_address in addresses
|
213
|
+
address_for_geocode = Address.new preparsed_address
|
214
|
+
assert_equal preparsed_address[:postal_code],address_for_geocode.zip
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def test_country_parse
|
219
|
+
addresses = [
|
220
|
+
{:city => "Paris", :country => "FR"},
|
221
|
+
]
|
222
|
+
|
223
|
+
for preparsed_address in addresses
|
224
|
+
address_for_geocode = Address.new preparsed_address
|
225
|
+
assert_equal preparsed_address[:country],address_for_geocode.state
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'indirizzo/constants'
|
3
|
+
|
4
|
+
include Indirizzo
|
5
|
+
|
6
|
+
class TestConstants < Test::Unit::TestCase
|
7
|
+
def initialize (*args)
|
8
|
+
@map = Map[
|
9
|
+
"Abbreviation" => "abbr",
|
10
|
+
"Two words" => "2words",
|
11
|
+
"Some three words" => "3words"
|
12
|
+
]
|
13
|
+
super(*args)
|
14
|
+
end
|
15
|
+
def test_class_constructor
|
16
|
+
assert_kind_of Map, @map
|
17
|
+
assert_kind_of Hash, @map
|
18
|
+
end
|
19
|
+
def test_key
|
20
|
+
assert @map.key?( "Abbreviation" )
|
21
|
+
assert @map.key?( "abbreviation" )
|
22
|
+
assert !(@map.key? "abbreviation?")
|
23
|
+
assert @map.key?( "abbr" )
|
24
|
+
assert @map.key?( "Two words" )
|
25
|
+
assert @map.key?( "2words" )
|
26
|
+
end
|
27
|
+
def test_fetch
|
28
|
+
assert_equal "abbr", @map["Abbreviation"]
|
29
|
+
assert_equal "abbr", @map["abbreviation"]
|
30
|
+
assert_nil @map["abbreviation?"]
|
31
|
+
assert_equal "abbr", @map["abbr"]
|
32
|
+
assert_equal "2words", @map["Two words"]
|
33
|
+
assert_equal "2words", @map["2words"]
|
34
|
+
end
|
35
|
+
# def test_partial
|
36
|
+
# assert @map.partial?( "Abbreviation" )
|
37
|
+
# assert @map.partial?( "Two" )
|
38
|
+
# assert @map.partial?( "two" )
|
39
|
+
# assert !(@map.partial? "words")
|
40
|
+
# assert @map.partial?( "Some" )
|
41
|
+
# assert !(@map.partial? "words")
|
42
|
+
# assert @map.partial?( "Some three" )
|
43
|
+
# assert @map.partial?( "SOME THREE WORDS" )
|
44
|
+
# end
|
45
|
+
def test_constants
|
46
|
+
assert_kind_of Map, Directional
|
47
|
+
assert_kind_of Map, Prefix_Qualifier
|
48
|
+
assert_kind_of Map, Suffix_Qualifier
|
49
|
+
assert_kind_of Map, Prefix_Type
|
50
|
+
assert_kind_of Map, Suffix_Type
|
51
|
+
assert_kind_of Map, Unit_Type
|
52
|
+
assert_kind_of Map, Name_Abbr
|
53
|
+
assert_kind_of Map, State
|
54
|
+
end
|
55
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'indirizzo/numbers'
|
3
|
+
|
4
|
+
include Indirizzo
|
5
|
+
|
6
|
+
class TestAddress < Test::Unit::TestCase
|
7
|
+
def test_number_to_cardinal
|
8
|
+
assert_equal 'one', Cardinals[1]
|
9
|
+
assert_equal 'ten', Cardinals[10]
|
10
|
+
assert_equal 'twelve', Cardinals[12]
|
11
|
+
assert_equal 'eighty-seven', Cardinals[87]
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_cardinal_to_number
|
15
|
+
assert_equal 1, Cardinals['one']
|
16
|
+
assert_equal 1, Cardinals['One']
|
17
|
+
assert_equal 10, Cardinals['ten']
|
18
|
+
assert_equal 12, Cardinals['twelve']
|
19
|
+
assert_equal 87, Cardinals['eighty-seven']
|
20
|
+
assert_equal 87, Cardinals['eighty seven']
|
21
|
+
assert_equal 87, Cardinals['eightyseven']
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_number_to_ordinal
|
25
|
+
assert_equal 'first', Ordinals[1]
|
26
|
+
assert_equal 'second', Ordinals[2]
|
27
|
+
assert_equal 'tenth', Ordinals[10]
|
28
|
+
assert_equal 'twelfth', Ordinals[12]
|
29
|
+
assert_equal 'twentieth', Ordinals[20]
|
30
|
+
assert_equal 'twenty-second', Ordinals[22]
|
31
|
+
assert_equal 'eighty-seventh', Ordinals[87]
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_ordinal_to_number
|
35
|
+
assert_equal 1, Ordinals['first']
|
36
|
+
assert_equal 1, Ordinals['First']
|
37
|
+
assert_equal 10, Ordinals['tenth']
|
38
|
+
assert_equal 12, Ordinals['twelfth']
|
39
|
+
assert_equal 73, Ordinals['seventy-third']
|
40
|
+
assert_equal 74, Ordinals['seventy fourth']
|
41
|
+
assert_equal 75, Ordinals['seventyfifth']
|
42
|
+
assert_equal nil, Ordinals['seventy-eleventh']
|
43
|
+
end
|
44
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: Indirizzo
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Dave Worth
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-12-14 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: &70182937832940 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70182937832940
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: cover_me
|
27
|
+
requirement: &70182937832500 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70182937832500
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: awesome_print
|
38
|
+
requirement: &70182937832080 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70182937832080
|
47
|
+
description: Indirizzo is simply an extraction of the US Street Address parsing code
|
48
|
+
from Geocoder::US
|
49
|
+
email: dave@highgroove.com
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files:
|
53
|
+
- LICENSE.txt
|
54
|
+
- README.md
|
55
|
+
files:
|
56
|
+
- .gitignore
|
57
|
+
- .travis.yml
|
58
|
+
- Gemfile
|
59
|
+
- Indirizzo.gemspec
|
60
|
+
- LICENSE.txt
|
61
|
+
- README.md
|
62
|
+
- Rakefile
|
63
|
+
- VERSION
|
64
|
+
- lib/indirizzo.rb
|
65
|
+
- lib/indirizzo/address.rb
|
66
|
+
- lib/indirizzo/constants.rb
|
67
|
+
- lib/indirizzo/numbers.rb
|
68
|
+
- test/test_address.rb
|
69
|
+
- test/test_constants.rb
|
70
|
+
- test/test_helper.rb
|
71
|
+
- test/test_numbers.rb
|
72
|
+
homepage: http://github.com/daveworth/indirizzo
|
73
|
+
licenses:
|
74
|
+
- LGPL
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options: []
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ! '>='
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
none: false
|
87
|
+
requirements:
|
88
|
+
- - ! '>='
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
requirements: []
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 1.8.6
|
94
|
+
signing_key:
|
95
|
+
specification_version: 3
|
96
|
+
summary: Indirizzo is simply an extraction of the US Street Address parsing code from
|
97
|
+
Geocoder::US
|
98
|
+
test_files:
|
99
|
+
- test/test_address.rb
|
100
|
+
- test/test_constants.rb
|
101
|
+
- test/test_helper.rb
|
102
|
+
- test/test_numbers.rb
|