namor 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/namor/namor.rb +17 -5
- data/lib/namor/version.rb +1 -1
- data/spec/lib/namor_spec.rb +25 -1
- data/spec/spec_helper.rb +0 -26
- metadata +4 -4
data/lib/namor/namor.rb
CHANGED
@@ -7,13 +7,25 @@ class Namor::Namor
|
|
7
7
|
@config = opts
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
# clean up a single name component
|
11
|
+
# * output all converted to uppercase
|
12
|
+
# * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
|
13
|
+
# * remove any words that are in the user-provided suppression list
|
14
|
+
# * remove words from list of common suffixes (Jr, Sr etc)
|
15
|
+
# * remove anything inside parenthesis
|
16
|
+
# * remove punctuation
|
17
|
+
# * squeeze whitespace & trim spaces from ends
|
18
|
+
def scrub(name, opts = {})
|
13
19
|
suppression_list = @config[:suppress] || []
|
14
|
-
suppression_re = suppression_list.join('|')
|
20
|
+
suppression_re = (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|')
|
21
|
+
|
22
|
+
name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
|
23
|
+
end
|
24
|
+
|
25
|
+
def extract(name, opts = {})
|
26
|
+
return [] if name.nil?
|
15
27
|
|
16
|
-
detitled_name = name
|
28
|
+
detitled_name = scrub(name, opts)
|
17
29
|
|
18
30
|
if detitled_name =~ /,/
|
19
31
|
# "last, first[ middle]"
|
data/lib/namor/version.rb
CHANGED
data/spec/lib/namor_spec.rb
CHANGED
@@ -5,7 +5,7 @@ require "spec_helper"
|
|
5
5
|
describe "name extract" do
|
6
6
|
before(:all) do
|
7
7
|
@namor = Namor::Namor.new
|
8
|
-
@namor.config(:suppress => ['MD', '
|
8
|
+
@namor.config(:suppress => ['MD', 'dds'])
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should handle 2-part names without commas" do
|
@@ -69,6 +69,9 @@ describe "name extract" do
|
|
69
69
|
@namor.extract("Smith,Anne-Marie").should == ['ANNEMARIE', nil, 'SMITH', 'SMITH,ANNEMARIE']
|
70
70
|
end
|
71
71
|
|
72
|
+
it "should treat some cases with periods as first.last" do
|
73
|
+
@namor.extract("john.smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
|
74
|
+
end
|
72
75
|
end
|
73
76
|
|
74
77
|
describe "with cluster coding" do
|
@@ -101,4 +104,25 @@ describe "name componentization" do
|
|
101
104
|
@namor.components("john smith esk.").should == ['ESK', 'JOHN', 'SMITH']
|
102
105
|
@namor.components("john smith esq.").should == ['JOHN', 'SMITH']
|
103
106
|
end
|
107
|
+
|
108
|
+
it "should scrub individual name components of punctuation and titles" do
|
109
|
+
@namor.scrub('Foxworthy-Smythe, ESQ.').should == 'FOXWORTHYSMYTHE'
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should delete strings inside parens" do
|
113
|
+
@namor.scrub("O'Hara (Morrison)").should == 'OHARA'
|
114
|
+
@namor.scrub(" Smith (Brown) ").should == 'SMITH'
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should deal with periods in names" do
|
118
|
+
@namor.scrub("G. Gordon").should == 'G GORDON'
|
119
|
+
end
|
120
|
+
|
121
|
+
it "should remove junk prefixing" do
|
122
|
+
@namor.scrub("ZZJOHN SMITH").should == 'JOHN SMITH'
|
123
|
+
end
|
124
|
+
|
125
|
+
it "should allow case-specific word suppression" do
|
126
|
+
@namor.scrub("Amazing Magician", :suppress => ['magician', nil, 'conjuror']).should == 'AMAZING'
|
127
|
+
end
|
104
128
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,35 +1,9 @@
|
|
1
|
-
# require File.expand_path("../../config/environment", __FILE__)
|
2
|
-
# require 'rspec/rails'
|
3
1
|
require 'rspec/autorun'
|
4
|
-
# require 'capybara/rspec'
|
5
|
-
require "tempfile"
|
6
2
|
|
7
3
|
# Requires supporting ruby files with custom matchers and macros, etc,
|
8
4
|
# in spec/support/ and its subdirectories.
|
9
|
-
# Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
|
10
5
|
|
11
6
|
require File.expand_path('../../lib/namor', __FILE__)
|
12
7
|
|
13
8
|
RSpec.configure do |config|
|
14
|
-
# ## Mock Framework
|
15
|
-
#
|
16
|
-
# If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
|
17
|
-
#
|
18
|
-
# config.mock_with :mocha
|
19
|
-
# config.mock_with :flexmock
|
20
|
-
# config.mock_with :rr
|
21
|
-
|
22
|
-
# Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
|
23
|
-
# config.fixture_path = "#{::Rails.root}/spec/fixtures"
|
24
|
-
|
25
|
-
# If you're not using ActiveRecord, or you'd prefer not to run each of your
|
26
|
-
# examples within a transaction, remove the following line or assign false
|
27
|
-
# instead of true.
|
28
|
-
# config.use_transactional_fixtures = true
|
29
|
-
# config.use_instantiated_fixtures = false
|
30
|
-
|
31
|
-
# If true, the base class of anonymous controllers will be inferred
|
32
|
-
# automatically. This will be the default behavior in future versions of
|
33
|
-
# rspec-rails.
|
34
|
-
# config.infer_base_class_for_anonymous_controllers = false
|
35
9
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-31 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
108
|
version: '0'
|
109
109
|
segments:
|
110
110
|
- 0
|
111
|
-
hash:
|
111
|
+
hash: -1410144287652690285
|
112
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
113
|
none: false
|
114
114
|
requirements:
|
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
117
117
|
version: '0'
|
118
118
|
segments:
|
119
119
|
- 0
|
120
|
-
hash:
|
120
|
+
hash: -1410144287652690285
|
121
121
|
requirements: []
|
122
122
|
rubyforge_project:
|
123
123
|
rubygems_version: 1.8.24
|