namor 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,13 +7,25 @@ class Namor::Namor
7
7
  @config = opts
8
8
  end
9
9
 
10
- def extract(name)
11
- return [] if name.nil?
12
-
10
+ # clean up a single name component
11
+ # * output all converted to uppercase
12
+ # * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
13
+ # * remove any words that are in the user-provided suppression list
14
+ # * remove words from list of common suffixes (Jr, Sr etc)
15
+ # * remove anything inside parenthesis
16
+ # * remove punctuation
17
+ # * squeeze whitespace & trim spaces from ends
18
+ def scrub(name, opts = {})
13
19
  suppression_list = @config[:suppress] || []
14
- suppression_re = suppression_list.join('|')
20
+ suppression_re = (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|')
21
+
22
+ name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
23
+ end
24
+
25
+ def extract(name, opts = {})
26
+ return [] if name.nil?
15
27
 
16
- detitled_name = name.upcase.gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(MD|JR|SR|I+|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/[_.'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
28
+ detitled_name = scrub(name, opts)
17
29
 
18
30
  if detitled_name =~ /,/
19
31
  # "last, first[ middle]"
@@ -1,3 +1,3 @@
1
1
  module Namor
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -5,7 +5,7 @@ require "spec_helper"
5
5
  describe "name extract" do
6
6
  before(:all) do
7
7
  @namor = Namor::Namor.new
8
- @namor.config(:suppress => ['MD', 'DDS'])
8
+ @namor.config(:suppress => ['MD', 'dds'])
9
9
  end
10
10
 
11
11
  it "should handle 2-part names without commas" do
@@ -69,6 +69,9 @@ describe "name extract" do
69
69
  @namor.extract("Smith,Anne-Marie").should == ['ANNEMARIE', nil, 'SMITH', 'SMITH,ANNEMARIE']
70
70
  end
71
71
 
72
+ it "should treat some cases with periods as first.last" do
73
+ @namor.extract("john.smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
74
+ end
72
75
  end
73
76
 
74
77
  describe "with cluster coding" do
@@ -101,4 +104,25 @@ describe "name componentization" do
101
104
  @namor.components("john smith esk.").should == ['ESK', 'JOHN', 'SMITH']
102
105
  @namor.components("john smith esq.").should == ['JOHN', 'SMITH']
103
106
  end
107
+
108
+ it "should scrub individual name components of punctuation and titles" do
109
+ @namor.scrub('Foxworthy-Smythe, ESQ.').should == 'FOXWORTHYSMYTHE'
110
+ end
111
+
112
+ it "should delete strings inside parens" do
113
+ @namor.scrub("O'Hara (Morrison)").should == 'OHARA'
114
+ @namor.scrub(" Smith (Brown) ").should == 'SMITH'
115
+ end
116
+
117
+ it "should deal with periods in names" do
118
+ @namor.scrub("G. Gordon").should == 'G GORDON'
119
+ end
120
+
121
+ it "should remove junk prefixing" do
122
+ @namor.scrub("ZZJOHN SMITH").should == 'JOHN SMITH'
123
+ end
124
+
125
+ it "should allow case-specific word suppression" do
126
+ @namor.scrub("Amazing Magician", :suppress => ['magician', nil, 'conjuror']).should == 'AMAZING'
127
+ end
104
128
  end
@@ -1,35 +1,9 @@
1
- # require File.expand_path("../../config/environment", __FILE__)
2
- # require 'rspec/rails'
3
1
  require 'rspec/autorun'
4
- # require 'capybara/rspec'
5
- require "tempfile"
6
2
 
7
3
  # Requires supporting ruby files with custom matchers and macros, etc,
8
4
  # in spec/support/ and its subdirectories.
9
- # Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
10
5
 
11
6
  require File.expand_path('../../lib/namor', __FILE__)
12
7
 
13
8
  RSpec.configure do |config|
14
- # ## Mock Framework
15
- #
16
- # If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
17
- #
18
- # config.mock_with :mocha
19
- # config.mock_with :flexmock
20
- # config.mock_with :rr
21
-
22
- # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
23
- # config.fixture_path = "#{::Rails.root}/spec/fixtures"
24
-
25
- # If you're not using ActiveRecord, or you'd prefer not to run each of your
26
- # examples within a transaction, remove the following line or assign false
27
- # instead of true.
28
- # config.use_transactional_fixtures = true
29
- # config.use_instantiated_fixtures = false
30
-
31
- # If true, the base class of anonymous controllers will be inferred
32
- # automatically. This will be the default behavior in future versions of
33
- # rspec-rails.
34
- # config.infer_base_class_for_anonymous_controllers = false
35
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-30 00:00:00.000000000 Z
12
+ date: 2012-08-31 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
108
  version: '0'
109
109
  segments:
110
110
  - 0
111
- hash: 52459653516907226
111
+ hash: -1410144287652690285
112
112
  required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  none: false
114
114
  requirements:
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
117
  version: '0'
118
118
  segments:
119
119
  - 0
120
- hash: 52459653516907226
120
+ hash: -1410144287652690285
121
121
  requirements: []
122
122
  rubyforge_project:
123
123
  rubygems_version: 1.8.24