namor 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,13 +7,25 @@ class Namor::Namor
7
7
  @config = opts
8
8
  end
9
9
 
10
- def extract(name)
11
- return [] if name.nil?
12
-
10
+ # clean up a single name component
11
+ # * output all converted to uppercase
12
+ # * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
13
+ # * remove any words that are in the user-provided suppression list
14
+ # * remove words from list of common suffixes (Jr, Sr etc)
15
+ # * remove anything inside parenthesis
16
+ # * remove punctuation
17
+ # * squeeze whitespace & trim spaces from ends
18
+ def scrub(name, opts = {})
13
19
  suppression_list = @config[:suppress] || []
14
- suppression_re = suppression_list.join('|')
20
+ suppression_re = (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|')
21
+
22
+ name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
23
+ end
24
+
25
+ def extract(name, opts = {})
26
+ return [] if name.nil?
15
27
 
16
- detitled_name = name.upcase.gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(MD|JR|SR|I+|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/[_.'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
28
+ detitled_name = scrub(name, opts)
17
29
 
18
30
  if detitled_name =~ /,/
19
31
  # "last, first[ middle]"
@@ -1,3 +1,3 @@
1
1
  module Namor
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -5,7 +5,7 @@ require "spec_helper"
5
5
  describe "name extract" do
6
6
  before(:all) do
7
7
  @namor = Namor::Namor.new
8
- @namor.config(:suppress => ['MD', 'DDS'])
8
+ @namor.config(:suppress => ['MD', 'dds'])
9
9
  end
10
10
 
11
11
  it "should handle 2-part names without commas" do
@@ -69,6 +69,9 @@ describe "name extract" do
69
69
  @namor.extract("Smith,Anne-Marie").should == ['ANNEMARIE', nil, 'SMITH', 'SMITH,ANNEMARIE']
70
70
  end
71
71
 
72
+ it "should treat some cases with periods as first.last" do
73
+ @namor.extract("john.smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
74
+ end
72
75
  end
73
76
 
74
77
  describe "with cluster coding" do
@@ -101,4 +104,25 @@ describe "name componentization" do
101
104
  @namor.components("john smith esk.").should == ['ESK', 'JOHN', 'SMITH']
102
105
  @namor.components("john smith esq.").should == ['JOHN', 'SMITH']
103
106
  end
107
+
108
+ it "should scrub individual name components of punctuation and titles" do
109
+ @namor.scrub('Foxworthy-Smythe, ESQ.').should == 'FOXWORTHYSMYTHE'
110
+ end
111
+
112
+ it "should delete strings inside parens" do
113
+ @namor.scrub("O'Hara (Morrison)").should == 'OHARA'
114
+ @namor.scrub(" Smith (Brown) ").should == 'SMITH'
115
+ end
116
+
117
+ it "should deal with periods in names" do
118
+ @namor.scrub("G. Gordon").should == 'G GORDON'
119
+ end
120
+
121
+ it "should remove junk prefixing" do
122
+ @namor.scrub("ZZJOHN SMITH").should == 'JOHN SMITH'
123
+ end
124
+
125
+ it "should allow case-specific word suppression" do
126
+ @namor.scrub("Amazing Magician", :suppress => ['magician', nil, 'conjuror']).should == 'AMAZING'
127
+ end
104
128
  end
@@ -1,35 +1,9 @@
1
- # require File.expand_path("../../config/environment", __FILE__)
2
- # require 'rspec/rails'
3
1
  require 'rspec/autorun'
4
- # require 'capybara/rspec'
5
- require "tempfile"
6
2
 
7
3
  # Requires supporting ruby files with custom matchers and macros, etc,
8
4
  # in spec/support/ and its subdirectories.
9
- # Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
10
5
 
11
6
  require File.expand_path('../../lib/namor', __FILE__)
12
7
 
13
8
  RSpec.configure do |config|
14
- # ## Mock Framework
15
- #
16
- # If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
17
- #
18
- # config.mock_with :mocha
19
- # config.mock_with :flexmock
20
- # config.mock_with :rr
21
-
22
- # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
23
- # config.fixture_path = "#{::Rails.root}/spec/fixtures"
24
-
25
- # If you're not using ActiveRecord, or you'd prefer not to run each of your
26
- # examples within a transaction, remove the following line or assign false
27
- # instead of true.
28
- # config.use_transactional_fixtures = true
29
- # config.use_instantiated_fixtures = false
30
-
31
- # If true, the base class of anonymous controllers will be inferred
32
- # automatically. This will be the default behavior in future versions of
33
- # rspec-rails.
34
- # config.infer_base_class_for_anonymous_controllers = false
35
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-30 00:00:00.000000000 Z
12
+ date: 2012-08-31 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
108
  version: '0'
109
109
  segments:
110
110
  - 0
111
- hash: 52459653516907226
111
+ hash: -1410144287652690285
112
112
  required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  none: false
114
114
  requirements:
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
117
  version: '0'
118
118
  segments:
119
119
  - 0
120
- hash: 52459653516907226
120
+ hash: -1410144287652690285
121
121
  requirements: []
122
122
  rubyforge_project:
123
123
  rubygems_version: 1.8.24