namor 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/namor/namor.rb +17 -5
- data/lib/namor/version.rb +1 -1
- data/spec/lib/namor_spec.rb +25 -1
- data/spec/spec_helper.rb +0 -26
- metadata +4 -4
data/lib/namor/namor.rb
CHANGED
@@ -7,13 +7,25 @@ class Namor::Namor
|
|
7
7
|
@config = opts
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
# clean up a single name component
|
11
|
+
# * output all converted to uppercase
|
12
|
+
# * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
|
13
|
+
# * remove any words that are in the user-provided suppression list
|
14
|
+
# * remove words from list of common suffixes (Jr, Sr etc)
|
15
|
+
# * remove anything inside parenthesis
|
16
|
+
# * remove punctuation
|
17
|
+
# * squeeze whitespace & trim spaces from ends
|
18
|
+
def scrub(name, opts = {})
|
13
19
|
suppression_list = @config[:suppress] || []
|
14
|
-
suppression_re = suppression_list.join('|')
|
20
|
+
suppression_re = (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|')
|
21
|
+
|
22
|
+
name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
|
23
|
+
end
|
24
|
+
|
25
|
+
def extract(name, opts = {})
|
26
|
+
return [] if name.nil?
|
15
27
|
|
16
|
-
detitled_name = name
|
28
|
+
detitled_name = scrub(name, opts)
|
17
29
|
|
18
30
|
if detitled_name =~ /,/
|
19
31
|
# "last, first[ middle]"
|
data/lib/namor/version.rb
CHANGED
data/spec/lib/namor_spec.rb
CHANGED
@@ -5,7 +5,7 @@ require "spec_helper"
|
|
5
5
|
describe "name extract" do
|
6
6
|
before(:all) do
|
7
7
|
@namor = Namor::Namor.new
|
8
|
-
@namor.config(:suppress => ['MD', '
|
8
|
+
@namor.config(:suppress => ['MD', 'dds'])
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should handle 2-part names without commas" do
|
@@ -69,6 +69,9 @@ describe "name extract" do
|
|
69
69
|
@namor.extract("Smith,Anne-Marie").should == ['ANNEMARIE', nil, 'SMITH', 'SMITH,ANNEMARIE']
|
70
70
|
end
|
71
71
|
|
72
|
+
it "should treat some cases with periods as first.last" do
|
73
|
+
@namor.extract("john.smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
|
74
|
+
end
|
72
75
|
end
|
73
76
|
|
74
77
|
describe "with cluster coding" do
|
@@ -101,4 +104,25 @@ describe "name componentization" do
|
|
101
104
|
@namor.components("john smith esk.").should == ['ESK', 'JOHN', 'SMITH']
|
102
105
|
@namor.components("john smith esq.").should == ['JOHN', 'SMITH']
|
103
106
|
end
|
107
|
+
|
108
|
+
it "should scrub individual name components of punctuation and titles" do
|
109
|
+
@namor.scrub('Foxworthy-Smythe, ESQ.').should == 'FOXWORTHYSMYTHE'
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should delete strings inside parens" do
|
113
|
+
@namor.scrub("O'Hara (Morrison)").should == 'OHARA'
|
114
|
+
@namor.scrub(" Smith (Brown) ").should == 'SMITH'
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should deal with periods in names" do
|
118
|
+
@namor.scrub("G. Gordon").should == 'G GORDON'
|
119
|
+
end
|
120
|
+
|
121
|
+
it "should remove junk prefixing" do
|
122
|
+
@namor.scrub("ZZJOHN SMITH").should == 'JOHN SMITH'
|
123
|
+
end
|
124
|
+
|
125
|
+
it "should allow case-specific word suppression" do
|
126
|
+
@namor.scrub("Amazing Magician", :suppress => ['magician', nil, 'conjuror']).should == 'AMAZING'
|
127
|
+
end
|
104
128
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,35 +1,9 @@
|
|
1
|
-
# require File.expand_path("../../config/environment", __FILE__)
|
2
|
-
# require 'rspec/rails'
|
3
1
|
require 'rspec/autorun'
|
4
|
-
# require 'capybara/rspec'
|
5
|
-
require "tempfile"
|
6
2
|
|
7
3
|
# Requires supporting ruby files with custom matchers and macros, etc,
|
8
4
|
# in spec/support/ and its subdirectories.
|
9
|
-
# Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
|
10
5
|
|
11
6
|
require File.expand_path('../../lib/namor', __FILE__)
|
12
7
|
|
13
8
|
RSpec.configure do |config|
|
14
|
-
# ## Mock Framework
|
15
|
-
#
|
16
|
-
# If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
|
17
|
-
#
|
18
|
-
# config.mock_with :mocha
|
19
|
-
# config.mock_with :flexmock
|
20
|
-
# config.mock_with :rr
|
21
|
-
|
22
|
-
# Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
|
23
|
-
# config.fixture_path = "#{::Rails.root}/spec/fixtures"
|
24
|
-
|
25
|
-
# If you're not using ActiveRecord, or you'd prefer not to run each of your
|
26
|
-
# examples within a transaction, remove the following line or assign false
|
27
|
-
# instead of true.
|
28
|
-
# config.use_transactional_fixtures = true
|
29
|
-
# config.use_instantiated_fixtures = false
|
30
|
-
|
31
|
-
# If true, the base class of anonymous controllers will be inferred
|
32
|
-
# automatically. This will be the default behavior in future versions of
|
33
|
-
# rspec-rails.
|
34
|
-
# config.infer_base_class_for_anonymous_controllers = false
|
35
9
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-31 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
108
|
version: '0'
|
109
109
|
segments:
|
110
110
|
- 0
|
111
|
-
hash:
|
111
|
+
hash: -1410144287652690285
|
112
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
113
|
none: false
|
114
114
|
requirements:
|
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
117
117
|
version: '0'
|
118
118
|
segments:
|
119
119
|
- 0
|
120
|
-
hash:
|
120
|
+
hash: -1410144287652690285
|
121
121
|
requirements: []
|
122
122
|
rubyforge_project:
|
123
123
|
rubygems_version: 1.8.24
|