address_extractor 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ h4. Copyright and License
2
+
3
+ The MIT License
4
+
5
+ Copyright (c) 2008 Jim Garvin
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in
15
+ all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
+ THE SOFTWARE.
@@ -0,0 +1,6 @@
1
+ lib/address_extractor.rb
2
+ LICENSE.textile
3
+ Manifest
4
+ Rakefile
5
+ README.textile
6
+ test/test_address_extractor.rb
@@ -0,0 +1,48 @@
1
+ h1. AddressExtractor
2
+
3
+ Find and/or replace mailing addresses in strings.
4
+
5
+ h2. Examples
6
+
7
+ <pre><code>
8
+ require 'rubygems'
9
+ require 'address_extractor'
10
+
11
+ string = <<EOF
12
+ Please send the package to 123 Foo St., Someplace FL
13
+
14
+ My phone number is 123-1234 and St. Marc of Israel can be reached
15
+ via mail at:
16
+ 123 Goob Avenue
17
+ Apt 123
18
+ Nice Town CA 123456
19
+ EOF
20
+
21
+ # Find first address
22
+ AddressExtractor.first_address(string) # => { :street1 => "123 Foo St.", :city => "Someplace", :state => "FL" }
23
+
24
+ # Find all addresses
25
+ AddressExtractor.find_addresses(string) # =>
26
+ # [
27
+ # { :street1 => "123 Foo St.", :city => "Someplace", :state => "FL" }
28
+ # { :street1 => "123 Goob Avenue.", :street2 => "Apt 123", :city => "Nice Town", :state => "CA", :zip => "123456" }
29
+ # ]
30
+
31
+ # Do a gsub on first address
32
+ new_string = AddressExtractor.replace_first_address(string) do |address_hash, address_string|
33
+ map_link_to(address_string)
34
+ end
35
+
36
+ # Do a gsub on all addresses
37
+ new_string = AddressExtractor.replace_addresses(string) do |address_hash, address_string|
38
+ map_link_to(address_string)
39
+ end
40
+ </code></pre>
41
+
42
+ h3. About
43
+
44
+ Written by Jim Garvin ("coderifous":http://github.com/coderifous) at RubyConf '08 at the request of Chris Murphy ("chmurph2":http://github.com/chmurph2) and Ryan McGeary ("rmm5t":http://github.com/rmm5t) so they could use it in their awesome "invitation and survey app":http://yarp.com.
45
+
46
+ You can use it, too.
47
+
48
+ The address-finding regex may be a bit naive, I'll gladly accept pull requests that add to the test data and tests.
@@ -0,0 +1,14 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('address_extractor', '0.1.4') do |p|
6
+ p.description = "Give it text. It finds addresses in it."
7
+ p.url = "http://github.com/coderifous/address_extractor"
8
+ p.author = "Jim Garvin"
9
+ p.email = "jim at thegarvin dot com"
10
+ p.ignore_pattern = ["tmp/*", "script/*"]
11
+ p.development_dependencies = []
12
+ end
13
+
14
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,30 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = %q{address_extractor}
3
+ s.version = "0.1.4"
4
+
5
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
6
+ s.authors = ["Jim Garvin"]
7
+ s.date = %q{2008-11-21}
8
+ s.description = %q{Give it text. It finds addresses in it.}
9
+ s.email = %q{jim at thegarvin dot com}
10
+ s.extra_rdoc_files = ["lib/address_extractor.rb", "LICENSE.textile", "README.textile"]
11
+ s.files = ["lib/address_extractor.rb", "LICENSE.textile", "Manifest", "Rakefile", "README.textile", "test/test_address_extractor.rb", "address_extractor.gemspec", "test/test_helper.rb"]
12
+ s.has_rdoc = true
13
+ s.homepage = %q{http://github.com/coderifous/address_extractor}
14
+ s.rdoc_options = ["--line-numbers", "--title", "Address_extractor", "--main", "README.textile"]
15
+ s.require_paths = ["lib"]
16
+ s.rubyforge_project = %q{address_extractor}
17
+ s.rubygems_version = %q{1.2.0}
18
+ s.summary = %q{Give it text. It finds addresses in it.}
19
+ s.test_files = ["test/test_address_extractor.rb", "test/test_helper.rb"]
20
+
21
+ if s.respond_to? :specification_version then
22
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
+ s.specification_version = 2
24
+
25
+ if current_version >= 3 then
26
+ else
27
+ end
28
+ else
29
+ end
30
+ end
@@ -0,0 +1,176 @@
1
+ class AddressExtractor
2
+ class << self
3
+
4
+ # Returns hash for address if address found.
5
+ # Returns nil if no address found.
6
+ def first_address(string)
7
+ hashify_results string.scan(ADDRESS_PATTERN).first
8
+ end
9
+
10
+ # Returns array of hashes for each address found.
11
+ # Returns empty array if no addresses found.
12
+ def find_addresses(string)
13
+ string.scan(ADDRESS_PATTERN).collect { |a| hashify_results(a) }.compact
14
+ end
15
+
16
+ # Pass it a block that recieves 2 parameters:
17
+ # address hash
18
+ # matched address string ($&)
19
+ # Whatever your block returns will be used for the substition.
20
+ # Returns new string with substition applied to first identified address.
21
+ # If no address found, returns same string unaltered.
22
+ def replace_first_address(string)
23
+ hash = first_address(string)
24
+ string.sub(ADDRESS_PATTERN) do |match|
25
+ yield(hash, $&)
26
+ end
27
+ end
28
+
29
+ # Same as +replace_first_address+ but applies substition to all identified addresses.
30
+ def replace_addresses(string)
31
+ string.gsub(ADDRESS_PATTERN) do |match|
32
+ hash = hashify_results match.scan(ADDRESS_PATTERN).first
33
+ useful_address?(hash) ? yield(hash, $&) : match
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def hashify_results(matches)
40
+ return nil if matches.nil?
41
+ result = { }
42
+ capture_index = 0
43
+ CAPTURE_MAP.each do |field|
44
+ result[field] = matches[capture_index].to_s.chomp if matches[capture_index]
45
+ capture_index += 1
46
+ end
47
+ useful_address?(result) ? result : nil
48
+ end
49
+
50
+ def useful_address?(hash)
51
+ hash &&
52
+ hash[:street1] && ( hash[:zip] || hash[:city] && hash[:state] )
53
+ end
54
+
55
+ end
56
+
57
+ CAPTURE_MAP = [ :street1, :street2, :city, :state, :zip, :zip ]
58
+
59
+ STATES = <<-EOF
60
+ ALABAMA AL
61
+ ALASKA AK
62
+ AMERICAN SAMOA AS
63
+ ARIZONA AZ
64
+ ARKANSAS AR
65
+ CALIFORNIA CA
66
+ COLORADO CO
67
+ CONNECTICUT CT
68
+ DELAWARE DE
69
+ DISTRICT OF COLUMBIA DC
70
+ FEDERATED STATES OF MICRONESIA FM
71
+ FLORIDA FL
72
+ GEORGIA GA
73
+ GUAM GU
74
+ HAWAII HI
75
+ IDAHO ID
76
+ ILLINOIS IL
77
+ INDIANA IN
78
+ IOWA IA
79
+ KANSAS KS
80
+ KENTUCKY KY
81
+ LOUISIANA LA
82
+ MAINE ME
83
+ MARSHALL ISLANDS MH
84
+ MARYLAND MD
85
+ MASSACHUSETTS MA
86
+ MICHIGAN MI
87
+ MINNESOTA MN
88
+ MISSISSIPPI MS
89
+ MISSOURI MO
90
+ MONTANA MT
91
+ NEBRASKA NE
92
+ NEVADA NV
93
+ NEW HAMPSHIRE NH
94
+ NEW JERSEY NJ
95
+ NEW MEXICO NM
96
+ NEW YORK NY
97
+ NORTH CAROLINA NC
98
+ NORTH DAKOTA ND
99
+ NORTHERN MARIANA ISLANDS MP
100
+ OHIO OH
101
+ OKLAHOMA OK
102
+ OREGON OR
103
+ PALAU PW
104
+ PENNSYLVANIA PA
105
+ PUERTO RICO PR
106
+ RHODE ISLAND RI
107
+ SOUTH CAROLINA SC
108
+ SOUTH DAKOTA SD
109
+ TENNESSEE TN
110
+ TEXAS TX
111
+ UTAH UT
112
+ VERMONT VT
113
+ VIRGIN ISLANDS VI
114
+ VIRGINIA VA
115
+ WASHINGTON WA
116
+ WEST VIRGINIA WV
117
+ WISCONSIN WI
118
+ WYOMING WY
119
+ EOF
120
+
121
+ STATE_REGEX = STATES.split(/\n/).collect{ |n| n.scan(/(\w.*\w)\s*([A-Z]{2})\s*$/) }.join("|")
122
+
123
+ SECONDARY_UNIT_DESIGNATORS = <<-EOF
124
+ APARTMENT APT
125
+ BASEMENT BSMT
126
+ BUILDING BLDG
127
+ DEPARTMENT DEPT
128
+ FLOOR FL
129
+ FRONT FRNT
130
+ HANGAR HNGR
131
+ LOBBY LBBY
132
+ LOT LOT
133
+ LOWER LOWR
134
+ OFFICE OFC
135
+ PENTHOUSE PH
136
+ PIER PIER
137
+ REAR REAR
138
+ ROOM RM
139
+ SIDE SIDE
140
+ SLIP SLIP
141
+ SPACE SPC
142
+ STOP STOP
143
+ SUITE STE
144
+ TRAILER TRLR
145
+ UNIT UNIT
146
+ UPPER UPPR
147
+ EOF
148
+
149
+ SECONDARY_UNIT_DESIGNATORS_REGEX = SECONDARY_UNIT_DESIGNATORS.split(/\n/).collect{ |n| n.scan(/(\w+)\s*(\w+)\s*$/) }.join("|")
150
+
151
+ ADDRESS_PATTERN = /
152
+ (
153
+ \d+ # A few numbers
154
+ \s+
155
+ (?:[A-Za-z'.-]+\s?){1,5} # Followed by a street name
156
+ )
157
+ \s* ,? \s* # a comma, optionally
158
+ (
159
+ (?:\d+\s+)? # a secondary unit, optionally
160
+ (?:#{SECONDARY_UNIT_DESIGNATORS_REGEX})
161
+ (?:\s+\d+)?
162
+ )?
163
+ \s* ,? \s* # a comma, optionally
164
+ (?:
165
+ (?:
166
+ ( (?:[A-Za-z]+\s?){0,2} (?:[A-Za-z]+) ) # city
167
+ \s* ,? \s* # a comma, optionally
168
+ \b(#{STATE_REGEX})\b # state
169
+ \s* ,? \s* # a comma, optionally
170
+ (\d{5})? # a zip code, optionally
171
+ )
172
+ | # or, instead of city and state
173
+ (\d{5})? # a lone zip code will do
174
+ )
175
+ /xi
176
+ end
@@ -0,0 +1,89 @@
1
+ require 'address_extractor'
2
+ require 'test_helper'
3
+ include TestDataHelper
4
+
5
+ class AddressExtractorTest < Test::Unit::TestCase
6
+ def test_first_address_extraction
7
+ each_test_data do |test_data|
8
+ address = AddressExtractor.first_address(test_data[:input])
9
+ flunk "No address found in:\n#{test_data[:input]}" if address.nil?
10
+ assert_equal_hashes test_data[:expected_output].first, address
11
+ end
12
+ end
13
+
14
+ def test_find_addresses
15
+ each_test_data do |test_data|
16
+ addresses = AddressExtractor.find_addresses(test_data[:input])
17
+ assert_equal addresses.size, test_data[:expected_output].size
18
+ test_data[:expected_output].each do |expected_output|
19
+ assert_equal_hashes expected_output, addresses.shift
20
+ end
21
+ end
22
+ end
23
+
24
+ def test_replace_first_address
25
+ string = AddressExtractor.replace_first_address(test_data.first[:input]) do |address_hash, address|
26
+ assert_equal_hashes test_data.first[:expected_output].first, address_hash
27
+ assert_match /^\s*123 Foo St., Someplace FL\s*/, address
28
+ "skidoosh"
29
+ end
30
+ assert_match /Please send the package to skidoosh/, string
31
+ end
32
+
33
+ def test_replace_addresses
34
+ string = AddressExtractor.replace_addresses(test_data.first[:input]) do |address_hash, address|
35
+ "skidoosh"
36
+ end
37
+ assert_match /Please send the package to skidoosh/, string
38
+ assert_match /via mail at:\s+skidoosh/, string
39
+ end
40
+
41
+ def test_no_addresses_found
42
+ assert_nil AddressExtractor.first_address("foo")
43
+ assert_equal [], AddressExtractor.find_addresses("foo")
44
+ assert_equal "foo", AddressExtractor.replace_first_address("foo")
45
+ assert_equal "foo", AddressExtractor.replace_addresses("foo")
46
+ end
47
+ end
48
+
49
+ # Test Input/Expected outputs defined below using test_input helper
50
+ # Expanding the tests will probably start with adding new test input
51
+
52
+ test_input "
53
+ Please send the package to 123 Foo St., Someplace FL
54
+
55
+ My phone number is 123-1234 and St. Marc of Israel can be reached
56
+ via mail at:
57
+ 123 Goob Avenue
58
+ Apt 123
59
+ Nice Town CA 12345",
60
+ { :street1 => "123 Foo St.", :street2 => nil, :city => "Someplace", :state => "FL", :zip => nil },
61
+ { :street1 => "123 Goob Avenue", :street2 => "Apt 123", :city => "Nice Town", :state => "CA", :zip => "12345" }
62
+
63
+ test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby NY 12345",
64
+ { :street1 => "123 Foo Bar Street", :street2 => nil, :city => "Scooby", :state => "NY", :zip => "12345" }
65
+
66
+ test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby, NY 12345",
67
+ { :street1 => "123 Foo Bar Street", :street2 => nil, :city => "Scooby", :state => "NY", :zip => "12345" }
68
+
69
+ test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby, NY, 12345",
70
+ { :street1 => "123 Foo Bar Street", :street2 => nil, :city => "Scooby", :state => "NY", :zip => "12345" }
71
+
72
+ test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, 12345",
73
+ { :street1 => "123 Foo Bar Street", :street2 => nil, :city => nil, :state => nil, :zip => "12345" }
74
+
75
+ test_input "
76
+ Apple Computer, Inc.
77
+ 1 Infinite Loop
78
+ Cupertino, CA 95014",
79
+ { :street1 => "1 Infinite Loop", :street2 => nil, :city => "Cupertino", :state => "CA", :zip => "95014" }
80
+
81
+ test_input "Apple Computer, Inc. 1 Infinite Loop, Cupertino, CA 95014",
82
+ { :street1 => "1 Infinite Loop", :street2 => nil, :city => "Cupertino", :state => "CA", :zip => "95014" }
83
+
84
+ test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr NW, Leesburg, VA",
85
+ { :street1 => "60 Ida Lee Dr NW", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
86
+
87
+ test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr N West, Leesburg, VA",
88
+ { :street1 => "60 Ida Lee Dr N West", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
89
+
@@ -0,0 +1,29 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ begin require 'redgreen' unless ENV['TM_FILENAME']; rescue LoadError; end
4
+
5
+ module TestDataHelper
6
+ def test_input(input_string, *expected_outputs)
7
+ test_data << { :input => input_string, :expected_output => expected_outputs }
8
+ end
9
+
10
+ def each_test_data
11
+ test_data.each { |t| yield(t) }
12
+ end
13
+
14
+ def test_data
15
+ @@test_data ||= []
16
+ end
17
+ end
18
+
19
+ module Helpers
20
+ def assert_equal_hashes(expected, hash)
21
+ (expected.keys + hash.keys).uniq.each do |k|
22
+ assert_equal expected[k], hash[k], "expected[#{k.inspect}] = #{expected[k].inspect} != hash[#{k.inspect}] = #{hash[k].inspect}"
23
+ end
24
+ end
25
+ end
26
+
27
+ class Test::Unit::TestCase
28
+ include Helpers
29
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: address_extractor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors:
7
+ - Jim Garvin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-11-21 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Give it text. It finds addresses in it.
17
+ email: jim at thegarvin dot com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - lib/address_extractor.rb
24
+ - LICENSE.textile
25
+ - README.textile
26
+ files:
27
+ - lib/address_extractor.rb
28
+ - LICENSE.textile
29
+ - Manifest
30
+ - Rakefile
31
+ - README.textile
32
+ - test/test_address_extractor.rb
33
+ - address_extractor.gemspec
34
+ - test/test_helper.rb
35
+ has_rdoc: true
36
+ homepage: http://github.com/coderifous/address_extractor
37
+ licenses: []
38
+
39
+ post_install_message:
40
+ rdoc_options:
41
+ - --line-numbers
42
+ - --title
43
+ - Address_extractor
44
+ - --main
45
+ - README.textile
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "1.2"
59
+ version:
60
+ requirements: []
61
+
62
+ rubyforge_project: address_extractor
63
+ rubygems_version: 1.3.5
64
+ signing_key:
65
+ specification_version: 2
66
+ summary: Give it text. It finds addresses in it.
67
+ test_files:
68
+ - test/test_address_extractor.rb
69
+ - test/test_helper.rb