coderifous-address_extractor 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/address_extractor.gemspec +2 -2
- data/lib/address_extractor.rb +15 -15
- data/test/test_address_extractor.rb +19 -18
- data/test/test_helper.rb +12 -14
- metadata +2 -2
data/Rakefile
CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
|
|
2
2
|
require 'rake'
|
3
3
|
require 'echoe'
|
4
4
|
|
5
|
-
Echoe.new('address_extractor', '0.1.
|
5
|
+
Echoe.new('address_extractor', '0.1.4') do |p|
|
6
6
|
p.description = "Give it text. It finds addresses in it."
|
7
7
|
p.url = "http://github.com/coderifous/address_extractor"
|
8
8
|
p.author = "Jim Garvin"
|
data/address_extractor.gemspec
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = %q{address_extractor}
|
3
|
-
s.version = "0.1.
|
3
|
+
s.version = "0.1.4"
|
4
4
|
|
5
5
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
6
6
|
s.authors = ["Jim Garvin"]
|
7
|
-
s.date = %q{2008-11-
|
7
|
+
s.date = %q{2008-11-21}
|
8
8
|
s.description = %q{Give it text. It finds addresses in it.}
|
9
9
|
s.email = %q{jim at thegarvin dot com}
|
10
10
|
s.extra_rdoc_files = ["lib/address_extractor.rb", "LICENSE.textile", "README.textile"]
|
data/lib/address_extractor.rb
CHANGED
@@ -12,8 +12,8 @@ class AddressExtractor
|
|
12
12
|
def find_addresses(string)
|
13
13
|
string.scan(ADDRESS_PATTERN).collect { |a| hashify_results(a) }.compact
|
14
14
|
end
|
15
|
-
|
16
|
-
# Pass it a block that recieves 2 parameters:
|
15
|
+
|
16
|
+
# Pass it a block that recieves 2 parameters:
|
17
17
|
# address hash
|
18
18
|
# matched address string ($&)
|
19
19
|
# Whatever your block returns will be used for the substition.
|
@@ -33,9 +33,9 @@ class AddressExtractor
|
|
33
33
|
useful_address?(hash) ? yield(hash, $&) : match
|
34
34
|
end
|
35
35
|
end
|
36
|
-
|
36
|
+
|
37
37
|
private
|
38
|
-
|
38
|
+
|
39
39
|
def hashify_results(matches)
|
40
40
|
return nil if matches.nil?
|
41
41
|
result = { }
|
@@ -46,16 +46,16 @@ class AddressExtractor
|
|
46
46
|
end
|
47
47
|
useful_address?(result) ? result : nil
|
48
48
|
end
|
49
|
-
|
49
|
+
|
50
50
|
def useful_address?(hash)
|
51
|
-
hash &&
|
51
|
+
hash &&
|
52
52
|
hash[:street1] && ( hash[:zip] || hash[:city] && hash[:state] )
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
CAPTURE_MAP = [ :street1, :street2, :city, :state, :zip, :zip ]
|
58
|
-
|
58
|
+
|
59
59
|
STATES = <<-EOF
|
60
60
|
ALABAMA AL
|
61
61
|
ALASKA AK
|
@@ -117,9 +117,9 @@ class AddressExtractor
|
|
117
117
|
WISCONSIN WI
|
118
118
|
WYOMING WY
|
119
119
|
EOF
|
120
|
-
|
120
|
+
|
121
121
|
STATE_REGEX = STATES.split(/\n/).collect{ |n| n.scan(/(\w.*\w)\s*([A-Z]{2})\s*$/) }.join("|")
|
122
|
-
|
122
|
+
|
123
123
|
SECONDARY_UNIT_DESIGNATORS = <<-EOF
|
124
124
|
APARTMENT APT
|
125
125
|
BASEMENT BSMT
|
@@ -145,14 +145,14 @@ class AddressExtractor
|
|
145
145
|
UNIT UNIT
|
146
146
|
UPPER UPPR
|
147
147
|
EOF
|
148
|
-
|
148
|
+
|
149
149
|
SECONDARY_UNIT_DESIGNATORS_REGEX = SECONDARY_UNIT_DESIGNATORS.split(/\n/).collect{ |n| n.scan(/(\w+)\s*(\w+)\s*$/) }.join("|")
|
150
150
|
|
151
151
|
ADDRESS_PATTERN = /
|
152
152
|
(
|
153
153
|
\d+ # A few numbers
|
154
154
|
\s+
|
155
|
-
(?:[A-Za-z'.-]+\s?){
|
155
|
+
(?:[A-Za-z'.-]+\s?){1,5} # Followed by a street name
|
156
156
|
)
|
157
157
|
\s* ,? \s* # a comma, optionally
|
158
158
|
(
|
@@ -168,9 +168,9 @@ class AddressExtractor
|
|
168
168
|
\b(#{STATE_REGEX})\b # state
|
169
169
|
\s* ,? \s* # a comma, optionally
|
170
170
|
(\d{5})? # a zip code, optionally
|
171
|
-
)
|
171
|
+
)
|
172
172
|
| # or, instead of city and state
|
173
173
|
(\d{5})? # a lone zip code will do
|
174
174
|
)
|
175
175
|
/xi
|
176
|
-
end
|
176
|
+
end
|
@@ -1,46 +1,41 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'address_extractor.rb'
|
5
|
-
require 'test_helper.rb'
|
1
|
+
require 'address_extractor'
|
2
|
+
require 'test_helper'
|
6
3
|
include TestDataHelper
|
7
4
|
|
8
5
|
class AddressExtractorTest < Test::Unit::TestCase
|
9
|
-
include Helpers
|
10
|
-
|
11
6
|
def test_first_address_extraction
|
12
7
|
each_test_data do |test_data|
|
13
8
|
address = AddressExtractor.first_address(test_data[:input])
|
14
9
|
flunk "No address found in:\n#{test_data[:input]}" if address.nil?
|
15
|
-
assert_equal_hashes
|
10
|
+
assert_equal_hashes test_data[:expected_output].first, address
|
16
11
|
end
|
17
12
|
end
|
18
|
-
|
13
|
+
|
19
14
|
def test_find_addresses
|
20
15
|
each_test_data do |test_data|
|
21
16
|
addresses = AddressExtractor.find_addresses(test_data[:input])
|
22
17
|
assert_equal addresses.size, test_data[:expected_output].size
|
23
18
|
test_data[:expected_output].each do |expected_output|
|
24
|
-
assert_equal_hashes addresses.shift
|
19
|
+
assert_equal_hashes expected_output, addresses.shift
|
25
20
|
end
|
26
21
|
end
|
27
22
|
end
|
28
|
-
|
23
|
+
|
29
24
|
def test_replace_first_address
|
30
25
|
string = AddressExtractor.replace_first_address(test_data.first[:input]) do |address_hash, address|
|
31
|
-
assert_equal_hashes
|
32
|
-
assert_match /^\s*123 Foo St., Someplace FL\s*/, address
|
26
|
+
assert_equal_hashes test_data.first[:expected_output].first, address_hash
|
27
|
+
assert_match /^\s*123 Foo St., Someplace FL\s*/, address
|
33
28
|
"skidoosh"
|
34
29
|
end
|
35
|
-
|
30
|
+
assert_match /Please send the package to skidoosh/, string
|
36
31
|
end
|
37
32
|
|
38
33
|
def test_replace_addresses
|
39
34
|
string = AddressExtractor.replace_addresses(test_data.first[:input]) do |address_hash, address|
|
40
35
|
"skidoosh"
|
41
36
|
end
|
42
|
-
|
43
|
-
|
37
|
+
assert_match /Please send the package to skidoosh/, string
|
38
|
+
assert_match /via mail at:\s+skidoosh/, string
|
44
39
|
end
|
45
40
|
|
46
41
|
def test_no_addresses_found
|
@@ -70,7 +65,7 @@ test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby NY 12345",
|
|
70
65
|
|
71
66
|
test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby, NY 12345",
|
72
67
|
{ :street1 => "123 Foo Bar Street", :street2 => nil, :city => "Scooby", :state => "NY", :zip => "12345" }
|
73
|
-
|
68
|
+
|
74
69
|
test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby, NY, 12345",
|
75
70
|
{ :street1 => "123 Foo Bar Street", :street2 => nil, :city => "Scooby", :state => "NY", :zip => "12345" }
|
76
71
|
|
@@ -85,4 +80,10 @@ test_input "
|
|
85
80
|
|
86
81
|
test_input "Apple Computer, Inc. 1 Infinite Loop, Cupertino, CA 95014",
|
87
82
|
{ :street1 => "1 Infinite Loop", :street2 => nil, :city => "Cupertino", :state => "CA", :zip => "95014" }
|
88
|
-
|
83
|
+
|
84
|
+
test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr NW, Leesburg, VA",
|
85
|
+
{ :street1 => "60 Ida Lee Dr NW", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
|
86
|
+
|
87
|
+
test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr N West, Leesburg, VA",
|
88
|
+
{ :street1 => "60 Ida Lee Dr N West", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
|
89
|
+
|
data/test/test_helper.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
require 'test/unit'
|
2
|
+
require 'rubygems'
|
3
|
+
begin require 'redgreen' unless ENV['TM_FILENAME']; rescue LoadError; end
|
4
4
|
|
5
|
+
module TestDataHelper
|
5
6
|
def test_input(input_string, *expected_outputs)
|
6
7
|
test_data << { :input => input_string, :expected_output => expected_outputs }
|
7
8
|
end
|
@@ -9,23 +10,20 @@ module TestDataHelper
|
|
9
10
|
def each_test_data
|
10
11
|
test_data.each { |t| yield(t) }
|
11
12
|
end
|
12
|
-
|
13
|
+
|
13
14
|
def test_data
|
14
|
-
|
15
|
+
@@test_data ||= []
|
15
16
|
end
|
16
|
-
|
17
17
|
end
|
18
18
|
|
19
19
|
module Helpers
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
assert_equal a[k], b[k], "a[#{k.inspect}] = #{a[k].inspect} != b[#{k.inspect}] = #{b[k].inspect}"
|
20
|
+
def assert_equal_hashes(expected, hash)
|
21
|
+
(expected.keys + hash.keys).uniq.each do |k|
|
22
|
+
assert_equal expected[k], hash[k], "expected[#{k.inspect}] = #{expected[k].inspect} != hash[#{k.inspect}] = #{hash[k].inspect}"
|
24
23
|
end
|
25
24
|
end
|
25
|
+
end
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
end
|
30
|
-
|
27
|
+
class Test::Unit::TestCase
|
28
|
+
include Helpers
|
31
29
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coderifous-address_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Garvin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-11-
|
12
|
+
date: 2008-11-21 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|