coderifous-address_extractor 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/address_extractor.gemspec +2 -2
- data/lib/address_extractor.rb +15 -15
- data/test/test_address_extractor.rb +19 -18
- data/test/test_helper.rb +12 -14
- metadata +2 -2
    
        data/Rakefile
    CHANGED
    
    | @@ -2,7 +2,7 @@ require 'rubygems' | |
| 2 2 | 
             
            require 'rake'
         | 
| 3 3 | 
             
            require 'echoe'
         | 
| 4 4 |  | 
| 5 | 
            -
            Echoe.new('address_extractor', '0.1. | 
| 5 | 
            +
            Echoe.new('address_extractor', '0.1.4') do |p|
         | 
| 6 6 | 
             
              p.description    = "Give it text.  It finds addresses in it."
         | 
| 7 7 | 
             
              p.url            = "http://github.com/coderifous/address_extractor"
         | 
| 8 8 | 
             
              p.author         = "Jim Garvin"
         | 
    
        data/address_extractor.gemspec
    CHANGED
    
    | @@ -1,10 +1,10 @@ | |
| 1 1 | 
             
            Gem::Specification.new do |s|
         | 
| 2 2 | 
             
              s.name = %q{address_extractor}
         | 
| 3 | 
            -
              s.version = "0.1. | 
| 3 | 
            +
              s.version = "0.1.4"
         | 
| 4 4 |  | 
| 5 5 | 
             
              s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
         | 
| 6 6 | 
             
              s.authors = ["Jim Garvin"]
         | 
| 7 | 
            -
              s.date = %q{2008-11- | 
| 7 | 
            +
              s.date = %q{2008-11-21}
         | 
| 8 8 | 
             
              s.description = %q{Give it text.  It finds addresses in it.}
         | 
| 9 9 | 
             
              s.email = %q{jim at thegarvin dot com}
         | 
| 10 10 | 
             
              s.extra_rdoc_files = ["lib/address_extractor.rb", "LICENSE.textile", "README.textile"]
         | 
    
        data/lib/address_extractor.rb
    CHANGED
    
    | @@ -12,8 +12,8 @@ class AddressExtractor | |
| 12 12 | 
             
                def find_addresses(string)
         | 
| 13 13 | 
             
                  string.scan(ADDRESS_PATTERN).collect { |a| hashify_results(a) }.compact
         | 
| 14 14 | 
             
                end
         | 
| 15 | 
            -
             | 
| 16 | 
            -
                # Pass it a block that recieves 2 parameters: | 
| 15 | 
            +
             | 
| 16 | 
            +
                # Pass it a block that recieves 2 parameters:
         | 
| 17 17 | 
             
                #   address hash
         | 
| 18 18 | 
             
                #   matched address string ($&)
         | 
| 19 19 | 
             
                # Whatever your block returns will be used for the substition.
         | 
| @@ -33,9 +33,9 @@ class AddressExtractor | |
| 33 33 | 
             
                    useful_address?(hash) ? yield(hash, $&) : match
         | 
| 34 34 | 
             
                  end
         | 
| 35 35 | 
             
                end
         | 
| 36 | 
            -
             | 
| 36 | 
            +
             | 
| 37 37 | 
             
              private
         | 
| 38 | 
            -
             | 
| 38 | 
            +
             | 
| 39 39 | 
             
                def hashify_results(matches)
         | 
| 40 40 | 
             
                  return nil if matches.nil?
         | 
| 41 41 | 
             
                  result = { }
         | 
| @@ -46,16 +46,16 @@ class AddressExtractor | |
| 46 46 | 
             
                  end
         | 
| 47 47 | 
             
                  useful_address?(result) ? result : nil
         | 
| 48 48 | 
             
                end
         | 
| 49 | 
            -
             | 
| 49 | 
            +
             | 
| 50 50 | 
             
                def useful_address?(hash)
         | 
| 51 | 
            -
                  hash && | 
| 51 | 
            +
                  hash &&
         | 
| 52 52 | 
             
                  hash[:street1] && ( hash[:zip] || hash[:city] && hash[:state] )
         | 
| 53 53 | 
             
                end
         | 
| 54 | 
            -
             | 
| 54 | 
            +
             | 
| 55 55 | 
             
              end
         | 
| 56 | 
            -
             | 
| 56 | 
            +
             | 
| 57 57 | 
             
              CAPTURE_MAP = [ :street1, :street2, :city, :state, :zip, :zip ]
         | 
| 58 | 
            -
             | 
| 58 | 
            +
             | 
| 59 59 | 
             
              STATES = <<-EOF
         | 
| 60 60 | 
             
                ALABAMA  AL
         | 
| 61 61 | 
             
                ALASKA  AK
         | 
| @@ -117,9 +117,9 @@ class AddressExtractor | |
| 117 117 | 
             
                WISCONSIN  WI
         | 
| 118 118 | 
             
                WYOMING  WY
         | 
| 119 119 | 
             
              EOF
         | 
| 120 | 
            -
             | 
| 120 | 
            +
             | 
| 121 121 | 
             
              STATE_REGEX = STATES.split(/\n/).collect{ |n| n.scan(/(\w.*\w)\s*([A-Z]{2})\s*$/) }.join("|")
         | 
| 122 | 
            -
             | 
| 122 | 
            +
             | 
| 123 123 | 
             
              SECONDARY_UNIT_DESIGNATORS = <<-EOF
         | 
| 124 124 | 
             
                APARTMENT APT
         | 
| 125 125 | 
             
                BASEMENT BSMT
         | 
| @@ -145,14 +145,14 @@ class AddressExtractor | |
| 145 145 | 
             
                UNIT UNIT
         | 
| 146 146 | 
             
                UPPER UPPR
         | 
| 147 147 | 
             
              EOF
         | 
| 148 | 
            -
             | 
| 148 | 
            +
             | 
| 149 149 | 
             
              SECONDARY_UNIT_DESIGNATORS_REGEX = SECONDARY_UNIT_DESIGNATORS.split(/\n/).collect{ |n| n.scan(/(\w+)\s*(\w+)\s*$/) }.join("|")
         | 
| 150 150 |  | 
| 151 151 | 
             
              ADDRESS_PATTERN = /
         | 
| 152 152 | 
             
                (
         | 
| 153 153 | 
             
                  \d+                           # A few numbers
         | 
| 154 154 | 
             
                  \s+
         | 
| 155 | 
            -
                  (?:[A-Za-z'.-]+\s?){ | 
| 155 | 
            +
                  (?:[A-Za-z'.-]+\s?){1,5}      # Followed by a street name
         | 
| 156 156 | 
             
                )
         | 
| 157 157 | 
             
                \s* ,?  \s*                     # a comma, optionally
         | 
| 158 158 | 
             
                (
         | 
| @@ -168,9 +168,9 @@ class AddressExtractor | |
| 168 168 | 
             
                    \b(#{STATE_REGEX})\b        # state
         | 
| 169 169 | 
             
                    \s* ,? \s*                  # a comma, optionally
         | 
| 170 170 | 
             
                    (\d{5})?                    # a zip code, optionally
         | 
| 171 | 
            -
                  ) | 
| 171 | 
            +
                  )
         | 
| 172 172 | 
             
                  |                             # or, instead of city and state
         | 
| 173 173 | 
             
                  (\d{5})?                      # a lone zip code will do
         | 
| 174 174 | 
             
                )
         | 
| 175 175 | 
             
              /xi
         | 
| 176 | 
            -
            end
         | 
| 176 | 
            +
            end
         | 
| @@ -1,46 +1,41 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
             | 
| 3 | 
            -
            require 'test/unit'
         | 
| 4 | 
            -
            require 'address_extractor.rb'
         | 
| 5 | 
            -
            require 'test_helper.rb'
         | 
| 1 | 
            +
            require 'address_extractor'
         | 
| 2 | 
            +
            require 'test_helper'
         | 
| 6 3 | 
             
            include TestDataHelper
         | 
| 7 4 |  | 
| 8 5 | 
             
            class AddressExtractorTest < Test::Unit::TestCase
         | 
| 9 | 
            -
              include Helpers
         | 
| 10 | 
            -
             | 
| 11 6 | 
             
              def test_first_address_extraction
         | 
| 12 7 | 
             
                each_test_data do |test_data|
         | 
| 13 8 | 
             
                  address = AddressExtractor.first_address(test_data[:input])
         | 
| 14 9 | 
             
                  flunk "No address found in:\n#{test_data[:input]}" if address.nil?
         | 
| 15 | 
            -
                  assert_equal_hashes  | 
| 10 | 
            +
                  assert_equal_hashes test_data[:expected_output].first, address
         | 
| 16 11 | 
             
                end
         | 
| 17 12 | 
             
              end
         | 
| 18 | 
            -
             | 
| 13 | 
            +
             | 
| 19 14 | 
             
              def test_find_addresses
         | 
| 20 15 | 
             
                each_test_data do |test_data|
         | 
| 21 16 | 
             
                  addresses = AddressExtractor.find_addresses(test_data[:input])
         | 
| 22 17 | 
             
                  assert_equal addresses.size, test_data[:expected_output].size
         | 
| 23 18 | 
             
                  test_data[:expected_output].each do |expected_output|
         | 
| 24 | 
            -
                    assert_equal_hashes addresses.shift | 
| 19 | 
            +
                    assert_equal_hashes expected_output, addresses.shift
         | 
| 25 20 | 
             
                  end
         | 
| 26 21 | 
             
                end
         | 
| 27 22 | 
             
              end
         | 
| 28 | 
            -
             | 
| 23 | 
            +
             | 
| 29 24 | 
             
              def test_replace_first_address
         | 
| 30 25 | 
             
                string = AddressExtractor.replace_first_address(test_data.first[:input]) do |address_hash, address|
         | 
| 31 | 
            -
                  assert_equal_hashes  | 
| 32 | 
            -
                  assert_match /^\s*123 Foo St., Someplace FL\s*/, address | 
| 26 | 
            +
                  assert_equal_hashes test_data.first[:expected_output].first, address_hash
         | 
| 27 | 
            +
                  assert_match /^\s*123 Foo St., Someplace FL\s*/, address
         | 
| 33 28 | 
             
                  "skidoosh"
         | 
| 34 29 | 
             
                end
         | 
| 35 | 
            -
                 | 
| 30 | 
            +
                assert_match /Please send the package to skidoosh/, string
         | 
| 36 31 | 
             
              end
         | 
| 37 32 |  | 
| 38 33 | 
             
              def test_replace_addresses
         | 
| 39 34 | 
             
                string = AddressExtractor.replace_addresses(test_data.first[:input]) do |address_hash, address|
         | 
| 40 35 | 
             
                  "skidoosh"
         | 
| 41 36 | 
             
                end
         | 
| 42 | 
            -
                 | 
| 43 | 
            -
                 | 
| 37 | 
            +
                assert_match /Please send the package to skidoosh/, string
         | 
| 38 | 
            +
                assert_match /via mail at:\s+skidoosh/, string
         | 
| 44 39 | 
             
              end
         | 
| 45 40 |  | 
| 46 41 | 
             
              def test_no_addresses_found
         | 
| @@ -70,7 +65,7 @@ test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby NY 12345", | |
| 70 65 |  | 
| 71 66 | 
             
            test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby, NY 12345",
         | 
| 72 67 | 
             
              { :street1 => "123 Foo Bar Street", :street2 => nil, :city => "Scooby", :state => "NY", :zip => "12345" }
         | 
| 73 | 
            -
             | 
| 68 | 
            +
             | 
| 74 69 | 
             
            test_input "Let's meet tomorrow at noon at 123 Foo Bar Street, Scooby, NY, 12345",
         | 
| 75 70 | 
             
              { :street1 => "123 Foo Bar Street", :street2 => nil, :city => "Scooby", :state => "NY", :zip => "12345" }
         | 
| 76 71 |  | 
| @@ -85,4 +80,10 @@ test_input " | |
| 85 80 |  | 
| 86 81 | 
             
            test_input "Apple Computer, Inc. 1 Infinite Loop, Cupertino, CA 95014",
         | 
| 87 82 | 
             
              { :street1 => "1 Infinite Loop", :street2 => nil, :city => "Cupertino", :state => "CA", :zip => "95014" }
         | 
| 88 | 
            -
             | 
| 83 | 
            +
             | 
| 84 | 
            +
            test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr NW, Leesburg, VA",
         | 
| 85 | 
            +
              { :street1 => "60 Ida Lee Dr NW", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
         | 
| 86 | 
            +
             | 
| 87 | 
            +
            test_input "Ida Lee Park Recreation Center 60 Ida Lee Dr N West, Leesburg, VA",
         | 
| 88 | 
            +
              { :street1 => "60 Ida Lee Dr N West", :street2 => nil, :city => "Leesburg", :state => "VA", :zip => nil }
         | 
| 89 | 
            +
             | 
    
        data/test/test_helper.rb
    CHANGED
    
    | @@ -1,7 +1,8 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
             | 
| 3 | 
            -
             | 
| 1 | 
            +
            require 'test/unit'
         | 
| 2 | 
            +
            require 'rubygems'
         | 
| 3 | 
            +
            begin require 'redgreen' unless ENV['TM_FILENAME']; rescue LoadError; end
         | 
| 4 4 |  | 
| 5 | 
            +
            module TestDataHelper
         | 
| 5 6 | 
             
              def test_input(input_string, *expected_outputs)
         | 
| 6 7 | 
             
                test_data << { :input => input_string, :expected_output => expected_outputs }
         | 
| 7 8 | 
             
              end
         | 
| @@ -9,23 +10,20 @@ module TestDataHelper | |
| 9 10 | 
             
              def each_test_data
         | 
| 10 11 | 
             
                test_data.each { |t| yield(t) }
         | 
| 11 12 | 
             
              end
         | 
| 12 | 
            -
             | 
| 13 | 
            +
             | 
| 13 14 | 
             
              def test_data
         | 
| 14 | 
            -
                 | 
| 15 | 
            +
                @@test_data ||= []
         | 
| 15 16 | 
             
              end
         | 
| 16 | 
            -
              
         | 
| 17 17 | 
             
            end
         | 
| 18 18 |  | 
| 19 19 | 
             
            module Helpers
         | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
                  assert_equal a[k], b[k], "a[#{k.inspect}] = #{a[k].inspect} != b[#{k.inspect}] = #{b[k].inspect}"
         | 
| 20 | 
            +
              def assert_equal_hashes(expected, hash)
         | 
| 21 | 
            +
                (expected.keys + hash.keys).uniq.each do |k|
         | 
| 22 | 
            +
                  assert_equal expected[k], hash[k], "expected[#{k.inspect}] = #{expected[k].inspect} != hash[#{k.inspect}] = #{hash[k].inspect}"
         | 
| 24 23 | 
             
                end
         | 
| 25 24 | 
             
              end
         | 
| 25 | 
            +
            end
         | 
| 26 26 |  | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
              end
         | 
| 30 | 
            -
              
         | 
| 27 | 
            +
            class Test::Unit::TestCase
         | 
| 28 | 
            +
              include Helpers
         | 
| 31 29 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification 
         | 
| 2 2 | 
             
            name: coderifous-address_extractor
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.4
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors: 
         | 
| 7 7 | 
             
            - Jim Garvin
         | 
| @@ -9,7 +9,7 @@ autorequire: | |
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 11 |  | 
| 12 | 
            -
            date: 2008-11- | 
| 12 | 
            +
            date: 2008-11-21 00:00:00 -08:00
         | 
| 13 13 | 
             
            default_executable: 
         | 
| 14 14 | 
             
            dependencies: []
         | 
| 15 15 |  |