coderifous-address_extractor 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/address_extractor.gemspec +2 -2
- data/lib/address_extractor.rb +15 -15
- data/test/test_address_extractor.rb +19 -18
- data/test/test_helper.rb +12 -14
- metadata +2 -2
data/Rakefile
CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
|
|
2
2
|
require 'rake'
|
3
3
|
require 'echoe'
|
4
4
|
|
5
|
-
Echoe.new('address_extractor', '0.1.
|
5
|
+
Echoe.new('address_extractor', '0.1.4') do |p|
|
6
6
|
p.description = "Give it text. It finds addresses in it."
|
7
7
|
p.url = "http://github.com/coderifous/address_extractor"
|
8
8
|
p.author = "Jim Garvin"
|
data/address_extractor.gemspec
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = %q{address_extractor}
|
3
|
-
s.version = "0.1.
|
3
|
+
s.version = "0.1.4"
|
4
4
|
|
5
5
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
6
6
|
s.authors = ["Jim Garvin"]
|
7
|
-
s.date = %q{2008-11-
|
7
|
+
s.date = %q{2008-11-21}
|
8
8
|
s.description = %q{Give it text. It finds addresses in it.}
|
9
9
|
s.email = %q{jim at thegarvin dot com}
|
10
10
|
s.extra_rdoc_files = ["lib/address_extractor.rb", "LICENSE.textile", "README.textile"]
|
data/lib/address_extractor.rb
CHANGED
@@ -12,8 +12,8 @@ class AddressExtractor
|
|
12
12
|
def find_addresses(string)
|
13
13
|
string.scan(ADDRESS_PATTERN).collect { |a| hashify_results(a) }.compact
|
14
14
|
end
|
15
|
-
|
16
|
-
# Pass it a block that recieves 2 parameters:
|
15
|
+
|
16
|
+
# Pass it a block that recieves 2 parameters:
|
17
17
|
# address hash
|
18
18
|
# matched address string ($&)
|
19
19
|
# Whatever your block returns will be used for the substition.
|
@@ -33,9 +33,9 @@ class AddressExtractor
|
|
33
33
|
useful_address?(hash) ? yield(hash, $&) : match
|
34
34
|
end
|
35
35
|
end
|
36
|
-
|
36
|
+
|
37
37
|
private
|
38
|
-
|
38
|
+
|
39
39
|
def hashify_results(matches)
|
40
40
|
return nil if matches.nil?
|
41
41
|
result = { }
|
@@ -46,16 +46,16 @@ class AddressExtractor
|
|
46
46
|
end
|
47
47
|
useful_address?(result) ? result : nil
|
48
48
|
end
|
49
|
-
|
49
|
+
|
50
50
|
def useful_address?(hash)
|
51
|
-
hash &&
|
51
|
+
hash &&
|
52
52
|
hash[:street1] && ( hash[:zip] || hash[:city] && hash[:state] )
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
CAPTURE_MAP = [ :street1, :street2, :city, :state, :zip, :zip ]
|
58
|
-
|
58
|
+
|
59
59
|
STATES = <<-EOF
|
60
60
|
ALABAMA AL
|
61
61
|
ALASKA AK
|
@@ -117,9 +117,9 @@ class AddressExtractor
|
|
117
117
|
WISCONSIN WI
|
118
118
|
WYOMING WY
|
119
119
|
EOF
|
120
|
-
|
120
|
+
|
121
121
|
STATE_REGEX = STATES.split(/\n/).collect{ |n| n.scan(/(\w.*\w)\s*([A-Z]{2})\s*$/) }.join("|")
|
122
|
-
|
122
|
+
|
123
123
|
SECONDARY_UNIT_DESIGNATORS = <<-EOF
|
124
124
|
APARTMENT APT
|
125
125
|
BASEMENT BSMT
|
@@ -145,14 +145,14 @@ class AddressExtractor
|
|
145
145
|
UNIT UNIT
|
146
146
|
UPPER UPPR
|
147
147
|
EOF
|
148
|
-
|
148
|
+
|
149
149
|
SECONDARY_UNIT_DESIGNATORS_REGEX = SECONDARY_UNIT_DESIGNATORS.split(/\n/).collect{ |n| n.scan(/(\w+)\s*(\w+)\s*$/) }.join("|")
|
150
150
|
|
151
151
|
ADDRESS_PATTERN = /
|
152
152
|
(
|
153
153
|
\d+ # A few numbers
|
154
154
|
\s+
|
155
|
-
(?:[A-Za-z'.-]+\s?){
|
155
|
+
(?:[A-Za-z'.-]+\s?){1,5} # Followed by a street name
|
156
156
|
)
|
157
157
|
\s* ,? \s* # a comma, optionally
|
158
158
|
(
|
@@ -168,9 +168,9 @@ class AddressExtractor
|
|
168
168
|
\b(#{STATE_REGEX})\b # state
|
169
169
|
\s* ,? \s* # a comma, optionally
|
170
170
|
(\d{5})? # a zip code, optionally
|
171
|
-
)
|
171
|
+
)
|
172
172
|
| # or, instead of city and state
|
173
173
|
(\d{5})? # a lone zip code will do
|
174
174
|
)
|
175
175
|
/xi
|
176
|
-
end
|
176
|
+
end
|
@@ -1,46 +1,41 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'address_extractor.rb'
|
5
|
-
require 'test_helper.rb'
|
1
|
+
require 'address_extractor'
|
2
|
+
require 'test_helper'
|
6
3
|
include TestDataHelper
|
7
4
|
|
8
5
|
class AddressExtractorTest < Test::Unit::TestCase
|
9
|
-
include Helpers
|
10
|
-
|
11
6
|
def test_first_address_extraction
|
12
7
|
each_test_data do |test_data|
|
13
8
|
address = AddressExtractor.first_address(test_data[:input])
|
14
9
|
flunk "No address found in:\n#{test_data[:input]}" if address.nil?
|
15
|
-
assert_equal_hashes
|
10
|
+
assert_equal_hashes test_data[:expected_output].first, address
|
16
11
|
end
|
17
12
|
end
|
18
|
-
|
13
|
+
|
19
14
|
def test_find_addresses
|
20
15
|
each_test_data do |test_data|
|
21
16
|
addresses = AddressExtractor.find_addresses(test_data[:input])
|
22
17
|
assert_equal addresses.size, test_data[:expected_output].size
|
23
18
|
test_data[:expected_output].each do |expected_output|
|
24
|
-
assert_equal_hashes addresses.shift
|
19
|
+
assert_equal_hashes expected_output, addresses.shift
|
25
20
|
end
|
26
21
|
end
|
27
22
|
end
|
28
|
-
|
23
|
+
|
29
24
|
def test_replace_first_address
|
30
25
|
string = AddressExtractor.replace_first_address(test_data.first[:input]) do |address_hash, address|
|
31
|
-
assert_equal_hashes
|
32
|
-
assert_match /^\s*123 Foo St., Someplace FL\s*/, address
|
26
|
+
assert_equal_hashes test_data.first[:expected_output].first, address_hash
|
27
|
+
assert_match /^\s*123 Foo St., Someplace FL\s*/, address
|
33
28
|
"skidoosh"
|
34
29
|
end
|
35
|
-
|
30
|
+
assert_match /Please send the package to skidoosh/, string
|
36
31
|
end
|
37
32
|
|
38
33
|
def test_replace_addresses
|
39
34
|
string = AddressExtractor.replace_addresses(test_data.first[:input]) do |address_hash, address|
|
40
35
|
"skidoosh"
|
41
36
|
end
|
42
|
-
|
43
|
-
|
37
|
+
assert_match /Please send the package to skidoosh/, string
|
38
|
+
assert_match /via mail at:\s+skidoosh/, string
|
44
39
|
end
|
45
40
|
|
46
41
|
def test_no_addresses_found
|
@@ -70,7 +65,7 @@ test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby NY 12345",
|
|
70
65
|
|
71
66
|
test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby, NY 12345",
|
72
67
|
{ :street1 => "123 Foo Bar Street", :street2 => nil, :city => "Scooby", :state => "NY", :zip => "12345" }
|
73
|
-
|
68
|
+
|
74
69
|
test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby, NY, 12345",
|
75
70
|
{ :street1 => "123 Foo Bar Street", :street2 => nil, :city => "Scooby", :state => "NY", :zip => "12345" }
|
76
71
|
|
@@ -85,4 +80,10 @@ test_input "
|
|
85
80
|
|
86
81
|
test_input "Apple Computer, Inc. 1 Infinite Loop, Cupertino, CA 95014",
|
87
82
|
{ :street1 => "1 Infinite Loop", :street2 => nil, :city => "Cupertino", :state => "CA", :zip => "95014" }
|
88
|
-
|
83
|
+
|
84
|
+
test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr NW, Leesburg, VA",
|
85
|
+
{ :street1 => "60 Ida Lee Dr NW", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
|
86
|
+
|
87
|
+
test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr N West, Leesburg, VA",
|
88
|
+
{ :street1 => "60 Ida Lee Dr N West", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
|
89
|
+
|
data/test/test_helper.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
require 'test/unit'
|
2
|
+
require 'rubygems'
|
3
|
+
begin require 'redgreen' unless ENV['TM_FILENAME']; rescue LoadError; end
|
4
4
|
|
5
|
+
module TestDataHelper
|
5
6
|
def test_input(input_string, *expected_outputs)
|
6
7
|
test_data << { :input => input_string, :expected_output => expected_outputs }
|
7
8
|
end
|
@@ -9,23 +10,20 @@ module TestDataHelper
|
|
9
10
|
def each_test_data
|
10
11
|
test_data.each { |t| yield(t) }
|
11
12
|
end
|
12
|
-
|
13
|
+
|
13
14
|
def test_data
|
14
|
-
|
15
|
+
@@test_data ||= []
|
15
16
|
end
|
16
|
-
|
17
17
|
end
|
18
18
|
|
19
19
|
module Helpers
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
assert_equal a[k], b[k], "a[#{k.inspect}] = #{a[k].inspect} != b[#{k.inspect}] = #{b[k].inspect}"
|
20
|
+
def assert_equal_hashes(expected, hash)
|
21
|
+
(expected.keys + hash.keys).uniq.each do |k|
|
22
|
+
assert_equal expected[k], hash[k], "expected[#{k.inspect}] = #{expected[k].inspect} != hash[#{k.inspect}] = #{hash[k].inspect}"
|
24
23
|
end
|
25
24
|
end
|
25
|
+
end
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
end
|
30
|
-
|
27
|
+
class Test::Unit::TestCase
|
28
|
+
include Helpers
|
31
29
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coderifous-address_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Garvin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-11-
|
12
|
+
date: 2008-11-21 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|