icu_name 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENCE +22 -0
- data/README.rdoc +62 -0
- data/lib/icu_name/version.rb +7 -0
- data/lib/icu_name.rb +230 -0
- data/spec/icu_name_spec.rb +208 -0
- data/spec/spec_helper.rb +8 -0
- metadata +99 -0
data/LICENCE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2011 Mark Orr
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
= ICU Tournament
|
2
|
+
|
3
|
+
Canonicalises and matches person names with Western European characters and first and last names.
|
4
|
+
|
5
|
+
== Installation
|
6
|
+
|
7
|
+
For ruby 1.9.2 and above.
|
8
|
+
|
9
|
+
gem install icu_name
|
10
|
+
|
11
|
+
== Names
|
12
|
+
|
13
|
+
This class exists for two main purposes:
|
14
|
+
|
15
|
+
* to normalise to a common format the different ways names are typed in practice
|
16
|
+
* to be able to match two names even if they are not exactly the same
|
17
|
+
|
18
|
+
To create a name object, supply both the first and second names separately to the constructor.
|
19
|
+
|
20
|
+
robert = ICU::Name.new(' robert j ', ' FISHER ')
|
21
|
+
|
22
|
+
Capitalisation, white space and punctuation will all be automatically corrected:
|
23
|
+
|
24
|
+
robert.name # => 'Robert J. Fischer'
|
25
|
+
robert.rname # => 'Fischer, Robert J.' (reversed name)
|
26
|
+
|
27
|
+
To avoid ambiguity when either the first or second names consist of multiple words, it is better to
|
28
|
+
supply the two separately, if known. However, the full name can be supplied alone to the constructor
|
29
|
+
and a guess will be made as to the first and last names.
|
30
|
+
|
31
|
+
bobby = ICU::Name.new(' bobby fischer ')
|
32
|
+
|
33
|
+
bobby.first # => 'Bobby'
|
34
|
+
bobby.last # => 'Fischer'
|
35
|
+
|
36
|
+
Names will match even if one is missing middle initials or if a nickname is used for one of the first names.
|
37
|
+
|
38
|
+
bobby.match('Robert J.', 'Fischer') # => true
|
39
|
+
|
40
|
+
Note that the class is aware of only common nicknames (e.g. _Bobby_ and _Robert_, _Bill_ and _William_, etc), not all possibilities.
|
41
|
+
|
42
|
+
Supplying the _match_ method with strings is equivalent to instantiating a Name instance with the same
|
43
|
+
strings and then matching it. So, for example the following are equivalent:
|
44
|
+
|
45
|
+
robert.match('R.', 'Fischer') # => true
|
46
|
+
robert.match(ICU::Name.new('R.', 'Fischer')) # => true
|
47
|
+
|
48
|
+
The inital _R_, for example, matches the first letter of _Robert_. However, nickname matches will not
|
49
|
+
always work with initials. In the next example, the initial _R_ does not match the first letter _B_ of the
|
50
|
+
nickname _Bobby_.
|
51
|
+
|
52
|
+
bobby.match('R. J.', 'Fischer') # => false
|
53
|
+
|
54
|
+
Some of the ways last names are canonicalised are illustrated below:
|
55
|
+
|
56
|
+
ICU::Name.new('John', 'O Reilly').last # => "O'Reilly"
|
57
|
+
ICU::Name.new('dave', 'mcmanus').last # => "McManus"
|
58
|
+
ICU::Name.new('pete', 'MACMANUS').last # => "MacManus"
|
59
|
+
|
60
|
+
== Author
|
61
|
+
|
62
|
+
Mark Orr, rating officer for the Irish Chess Union (ICU[http://icu.ie]).
|
data/lib/icu_name.rb
ADDED
@@ -0,0 +1,230 @@
|
|
1
|
+
module ICU
|
2
|
+
class Name
|
3
|
+
attr_reader :first, :last
|
4
|
+
|
5
|
+
# Construct from one or two strings or any objects that have a to_s method.
|
6
|
+
def initialize(name1='', name2='')
|
7
|
+
@name1 = name1.to_s
|
8
|
+
@name2 = name2.to_s
|
9
|
+
canonicalize
|
10
|
+
end
|
11
|
+
|
12
|
+
# Return a complete name, first name first, no comma.
|
13
|
+
def name
|
14
|
+
name = ''
|
15
|
+
name << @first
|
16
|
+
name << ' ' if @first.length > 0 && @last.length > 0
|
17
|
+
name << @last
|
18
|
+
name
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return a reversed complete name, first name last after a comma.
|
22
|
+
def rname
|
23
|
+
name = ''
|
24
|
+
name << @last
|
25
|
+
name << ', ' if @first.length > 0 && @last.length > 0
|
26
|
+
name << @first
|
27
|
+
name
|
28
|
+
end
|
29
|
+
|
30
|
+
# Convert object to a string.
|
31
|
+
def to_s
|
32
|
+
rname
|
33
|
+
end
|
34
|
+
|
35
|
+
# Match another name to this object, returning true or false.
|
36
|
+
def match(name1='', name2='')
|
37
|
+
other = Name.new(name1, name2)
|
38
|
+
match_first(first, other.first) && match_last(last, other.last)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
# Canonicalise the first and last names.
|
44
|
+
def canonicalize
|
45
|
+
first, last = partition
|
46
|
+
@first = finish_first(first)
|
47
|
+
@last = finish_last(last)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Split one complete name into first and last parts.
|
51
|
+
def partition
|
52
|
+
if @name2.length == 0
|
53
|
+
# Only one imput so we must split first and last.
|
54
|
+
parts = @name1.split(/,/)
|
55
|
+
if parts.size > 1
|
56
|
+
last = clean(parts.shift || '')
|
57
|
+
first = clean(parts.join(' '))
|
58
|
+
else
|
59
|
+
parts = clean(@name1).split(/ /)
|
60
|
+
last = parts.pop || ''
|
61
|
+
first = parts.join(' ')
|
62
|
+
end
|
63
|
+
else
|
64
|
+
# Two inputs, so we are given first and last.
|
65
|
+
first = clean(@name1)
|
66
|
+
last = clean(@name2)
|
67
|
+
end
|
68
|
+
[first, last]
|
69
|
+
end
|
70
|
+
|
71
|
+
# Clean up characters in any name.
|
72
|
+
def clean(name)
|
73
|
+
name.gsub!(/`/, "'")
|
74
|
+
name.gsub!(/[^-a-zA-Z.'\s]/, '')
|
75
|
+
name.gsub!(/\./, ' ')
|
76
|
+
name.gsub!(/\s*-\s*/, '-')
|
77
|
+
name.gsub!(/'+/, "'")
|
78
|
+
name.strip.downcase.split(/\s+/).map do |n|
|
79
|
+
n.sub!(/^-+/, '')
|
80
|
+
n.sub!(/-+$/, '')
|
81
|
+
n.split(/-/).map do |p|
|
82
|
+
p.capitalize!
|
83
|
+
end.join('-')
|
84
|
+
end.join(' ')
|
85
|
+
end
|
86
|
+
|
87
|
+
# Apply final touches to finish canonicalising a first name.
|
88
|
+
def finish_first(names)
|
89
|
+
names.gsub(/([A-Z])\b/, '\1.')
|
90
|
+
end
|
91
|
+
|
92
|
+
# Apply final touches to finish canonicalising a last name.
|
93
|
+
def finish_last(names)
|
94
|
+
names.gsub!(/\b([A-Z])'([a-z])/) { |m| $1 << "'" << $2.upcase}
|
95
|
+
names.gsub!(/\bMc([a-z])/) { |m| 'Mc' << $1.upcase}
|
96
|
+
names.gsub!(/\bMac([a-z])/) do |m|
|
97
|
+
letter = $1
|
98
|
+
'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase)
|
99
|
+
end
|
100
|
+
names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 }
|
101
|
+
names
|
102
|
+
end
|
103
|
+
|
104
|
+
# Match a complete first name.
|
105
|
+
def match_first(first1, first2)
|
106
|
+
# Is this one a walk in the park?
|
107
|
+
return true if first1 == first2
|
108
|
+
|
109
|
+
# No easy ride. Begin by splitting into individual first names.
|
110
|
+
first1 = split_first(first1)
|
111
|
+
first2 = split_first(first2)
|
112
|
+
|
113
|
+
# Get the long list and the short list.
|
114
|
+
long, short = first1.size >= first2.size ? [first1, first2] : [first2, first1]
|
115
|
+
|
116
|
+
# The short one must be a "subset" of the long one.
|
117
|
+
# An extra condition must also be satisfied.
|
118
|
+
extra = false
|
119
|
+
(0..long.size-1).each do |i|
|
120
|
+
lword = long.shift
|
121
|
+
score = match_first_name(lword, short.first)
|
122
|
+
if score >= 0
|
123
|
+
short.shift
|
124
|
+
extra = true if i == 0 || score == 0
|
125
|
+
end
|
126
|
+
break if short.empty? || long.empty?
|
127
|
+
end
|
128
|
+
|
129
|
+
# There's a match if the following is true.
|
130
|
+
short.empty? && extra
|
131
|
+
end
|
132
|
+
|
133
|
+
# Match a complete last name.
|
134
|
+
def match_last(last1, last2)
|
135
|
+
return true if last1 == last2
|
136
|
+
[last1, last2].each do |last|
|
137
|
+
last.downcase! # MacDonaugh and Macdonaugh
|
138
|
+
last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh
|
139
|
+
last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly
|
140
|
+
end
|
141
|
+
last1 == last2
|
142
|
+
end
|
143
|
+
|
144
|
+
# Split a complete first name for matching.
|
145
|
+
def split_first(first)
|
146
|
+
first.tr!('-', ' ') # J. K. and J.-K.
|
147
|
+
first = first.split(/ /) # split on spaces
|
148
|
+
first = [''] if first.size == 0 # in case input was empty string
|
149
|
+
first
|
150
|
+
end
|
151
|
+
|
152
|
+
# Match individual first names or initials.
|
153
|
+
# -1 = no match
|
154
|
+
# 0 = full match
|
155
|
+
# 1 = match involving 1 initial
|
156
|
+
# 2 = match involving 2 initials
|
157
|
+
def match_first_name(first1, first2)
|
158
|
+
initials = 0
|
159
|
+
initials+= 1 if first1.match(/^[A-Z]\.?$/)
|
160
|
+
initials+= 1 if first2.match(/^[A-Z]\.?$/)
|
161
|
+
return initials if first1 == first2
|
162
|
+
return 0 if initials == 0 && match_nick_name(first1, first2)
|
163
|
+
return -1 unless initials > 0
|
164
|
+
return initials if first1[0] == first2[0]
|
165
|
+
-1
|
166
|
+
end
|
167
|
+
|
168
|
+
# Match two first names that might be equivalent nicknames.
|
169
|
+
def match_nick_name(nick1, nick2)
|
170
|
+
compile_nick_names unless @@nc
|
171
|
+
code1 = @@nc[nick1]
|
172
|
+
return false unless code1
|
173
|
+
code1 == @@nc[nick2]
|
174
|
+
end
|
175
|
+
|
176
|
+
# Compile the nick names code hash when matching nick names is first attempted.
|
177
|
+
def compile_nick_names
|
178
|
+
@@nc = Hash.new
|
179
|
+
code = 1
|
180
|
+
@@nl.each do |nicks|
|
181
|
+
nicks.each do |n|
|
182
|
+
throw "duplicate name #{n}" if @@nc[n]
|
183
|
+
@@nc[n] = code
|
184
|
+
end
|
185
|
+
code+= 1
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
# A array of data for matching nicknames and also a few common misspellings.
|
190
|
+
@@nc = nil
|
191
|
+
@@nl = <<EOF.split(/\n/).reject{|x| x.length == 0 }.map{|x| x.split(' ')}
|
192
|
+
Abdul Abul
|
193
|
+
Alexander Alex
|
194
|
+
Anandagopal Ananda
|
195
|
+
Anne Ann
|
196
|
+
Anthony Tony
|
197
|
+
Benjamin Ben
|
198
|
+
Catherine Cathy Cath
|
199
|
+
Daniel Danial Danny Dan
|
200
|
+
David Dave
|
201
|
+
Deborah Debbie
|
202
|
+
Des Desmond
|
203
|
+
Eamonn Eamon
|
204
|
+
Edward Eddie Ed
|
205
|
+
Eric Erick Erik
|
206
|
+
Frederick Frederic Fred
|
207
|
+
Gerald Gerry
|
208
|
+
Gerhard Gerard Ger
|
209
|
+
James Jim
|
210
|
+
Joanna Joan Joanne
|
211
|
+
John Johnny
|
212
|
+
Jonathan Jon
|
213
|
+
Kenneth Ken Kenny
|
214
|
+
Michael Mike Mick Micky
|
215
|
+
Nicholas Nick Nicolas
|
216
|
+
Nicola Nickie Nicky
|
217
|
+
Patrick Pat Paddy
|
218
|
+
Peter Pete
|
219
|
+
Philippe Philip Phillippe Phillip
|
220
|
+
Rick Ricky
|
221
|
+
Robert Bob Bobby
|
222
|
+
Samual Sam Samuel
|
223
|
+
Stefanie Stef
|
224
|
+
Stephen Steven Steve
|
225
|
+
Terence Terry
|
226
|
+
Thomas Tom Tommy
|
227
|
+
William Will Willy Willie Bill
|
228
|
+
EOF
|
229
|
+
end
|
230
|
+
end
|
@@ -0,0 +1,208 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
module ICU
|
4
|
+
describe Name do
|
5
|
+
context "public methods" do
|
6
|
+
before(:each) do
|
7
|
+
@simple = Name.new('mark j l', 'orr')
|
8
|
+
end
|
9
|
+
|
10
|
+
it "#first returns the first name(s)" do
|
11
|
+
@simple.first.should == 'Mark J. L.'
|
12
|
+
end
|
13
|
+
|
14
|
+
it "#last returns the last name(s)" do
|
15
|
+
@simple.last.should == 'Orr'
|
16
|
+
end
|
17
|
+
|
18
|
+
it "#name returns the full name with first name(s) first" do
|
19
|
+
@simple.name.should == 'Mark J. L. Orr'
|
20
|
+
end
|
21
|
+
|
22
|
+
it "#rname returns the full name with last name(s) first" do
|
23
|
+
@simple.rname.should == 'Orr, Mark J. L.'
|
24
|
+
end
|
25
|
+
|
26
|
+
it "#to_s is the same as rname" do
|
27
|
+
@simple.to_s.should == 'Orr, Mark J. L.'
|
28
|
+
end
|
29
|
+
|
30
|
+
it "#match returns true if and only if two names match" do
|
31
|
+
@simple.match('mark j l orr').should be_true
|
32
|
+
@simple.match('malcolm g l orr').should be_false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context "rdoc expample" do
|
37
|
+
before(:each) do
|
38
|
+
@robert = Name.new(' robert j ', ' FISCHER ')
|
39
|
+
@bobby = Name.new(' bobby fischer ')
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should get Robert" do
|
43
|
+
@robert.name.should == 'Robert J. Fischer'
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should get Bobby" do
|
47
|
+
@bobby.last.should == 'Fischer'
|
48
|
+
@bobby.first.should == 'Bobby'
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should match Robert and Bobby" do
|
52
|
+
@robert.match(@bobby).should be_true
|
53
|
+
@robert.match('R. J.', 'Fischer').should be_true
|
54
|
+
@bobby.match('R. J.', 'Fischer').should be_false
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should canconicalise last names" do
|
58
|
+
Name.new('John', 'O Reilly').last.should == "O'Reilly"
|
59
|
+
Name.new('dave', 'mcmanus').last.should == "McManus"
|
60
|
+
Name.new('pete', 'MACMANUS').last.should == "MacManus"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context "names that are already canonical" do
|
65
|
+
it "should not be altered" do
|
66
|
+
Name.new('Mark J. L.', 'Orr').name.should == 'Mark J. L. Orr'
|
67
|
+
Name.new('Anna-Marie J.-K.', 'Liviu-Dieter').name.should == 'Anna-Marie J.-K. Liviu-Dieter'
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context "last names beginning with a single letter followed by a quote" do
|
72
|
+
it "should be handled correctly" do
|
73
|
+
Name.new('una', "O'boyle").name.should == "Una O'Boyle"
|
74
|
+
Name.new('jonathan', 'd`arcy').name.should == "Jonathan D'Arcy"
|
75
|
+
Name.new('erwin e', "L'AMI").name.should == "Erwin E. L'Ami"
|
76
|
+
Name.new('cormac', "o brien").name.should == "Cormac O'Brien"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
context "last beginning with Mc" do
|
81
|
+
it "should be handled correctly" do
|
82
|
+
Name.new('shane', "mccabe").name.should == "Shane McCabe"
|
83
|
+
Name.new('shawn', "macDonagh").name.should == "Shawn MacDonagh"
|
84
|
+
Name.new('shawn', "macdonagh").name.should == "Shawn Macdonagh"
|
85
|
+
Name.new('bartlomiej', "macieja").name.should == "Bartlomiej Macieja"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
context "doubled barrelled names or initials" do
|
90
|
+
it "should be handled correctly" do
|
91
|
+
Name.new('anna-marie', 'den-otter').name.should == 'Anna-Marie Den-Otter'
|
92
|
+
Name.new('j-k', 'rowling').name.should == 'J.-K. Rowling'
|
93
|
+
Name.new("mark j. - l", 'ORR').name.should == 'Mark J.-L. Orr'
|
94
|
+
Name.new('JOHANNA', "lowry-o'REILLY").name.should == "Johanna Lowry-O'Reilly"
|
95
|
+
Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context "extraneous white space" do
|
100
|
+
it "should be handled correctly" do
|
101
|
+
Name.new(' mark j l ', " \t\r\n orr \n").name.should == 'Mark J. L. Orr'
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
context "extraneous full stops" do
|
106
|
+
it "should be handled correctly" do
|
107
|
+
Name.new('. mark j..l', 'orr.').name.should == 'Mark J. L. Orr'
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
context "construction from a single string" do
|
112
|
+
before(:each) do
|
113
|
+
@mark1 = Name.new('ORR, mark j l')
|
114
|
+
@mark2 = Name.new('MARK J L ORR')
|
115
|
+
@oreil = Name.new("O'Reilly, j-k")
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should be possible in simple cases" do
|
119
|
+
@mark1.first.should == 'Mark J. L.'
|
120
|
+
@mark1.last.should == 'Orr'
|
121
|
+
@mark2.first.should == 'Mark J. L.'
|
122
|
+
@mark2.last.should == 'Orr'
|
123
|
+
@oreil.name.should == "J.-K. O'Reilly"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context "construction from an instance" do
|
128
|
+
it "should be possible" do
|
129
|
+
Name.new(Name.new('ORR, mark j l')).name.should == 'Mark J. L. Orr'
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
context "constuction corner cases" do
|
134
|
+
it "should be handled correctly" do
|
135
|
+
Name.new('Orr').name.should == 'Orr'
|
136
|
+
Name.new('Orr').rname.should == 'Orr'
|
137
|
+
Name.new('').name.should == ''
|
138
|
+
Name.new('').rname.should == ''
|
139
|
+
Name.new.name.should == ''
|
140
|
+
Name.new.rname.should == ''
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
context "inputs to matching" do
|
145
|
+
before(:all) do
|
146
|
+
@mark = Name.new('Mark', 'Orr')
|
147
|
+
@kram = Name.new('Mark', 'Orr')
|
148
|
+
end
|
149
|
+
|
150
|
+
it "should be flexible" do
|
151
|
+
@mark.match('Mark', 'Orr').should be_true
|
152
|
+
@mark.match('Mark Orr').should be_true
|
153
|
+
@mark.match('Orr, Mark').should be_true
|
154
|
+
@mark.match(@kram).should be_true
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
context "first name matches" do
|
159
|
+
it "should match when first names are the same" do
|
160
|
+
Name.new('Mark', 'Orr').match('Mark', 'Orr').should be_true
|
161
|
+
end
|
162
|
+
|
163
|
+
it "should be flexible with regards to hyphens in double barrelled names" do
|
164
|
+
Name.new('J.-K.', 'Rowling').match('J. K.', 'Rowling').should be_true
|
165
|
+
Name.new('Joanne-K.', 'Rowling').match('Joanne K.', 'Rowling').should be_true
|
166
|
+
end
|
167
|
+
|
168
|
+
it "should match initials" do
|
169
|
+
Name.new('M. J. L.', 'Orr').match('Mark John Legard', 'Orr').should be_true
|
170
|
+
Name.new('M.', 'Orr').match('Mark', 'Orr').should be_true
|
171
|
+
Name.new('M. J. L.', 'Orr').match('Mark', 'Orr').should be_true
|
172
|
+
Name.new('M.', 'Orr').match('M. J.', 'Orr').should be_true
|
173
|
+
Name.new('M. J. L.', 'Orr').match('M. G.', 'Orr').should be_false
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should not match on full names not in first position or without an exact match" do
|
177
|
+
Name.new('J. M.', 'Orr').match('John', 'Orr').should be_true
|
178
|
+
Name.new('M. J.', 'Orr').match('John', 'Orr').should be_false
|
179
|
+
Name.new('M. John', 'Orr').match('John', 'Orr').should be_true
|
180
|
+
end
|
181
|
+
|
182
|
+
it "should handle common nicknames" do
|
183
|
+
Name.new('William', 'Orr').match('Bill', 'Orr').should be_true
|
184
|
+
Name.new('David', 'Orr').match('Dave', 'Orr').should be_true
|
185
|
+
Name.new('Mick', 'Orr').match('Mike', 'Orr').should be_true
|
186
|
+
end
|
187
|
+
|
188
|
+
it "should not mix up nick names" do
|
189
|
+
Name.new('David', 'Orr').match('Bill', 'Orr').should be_false
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
context "last name matches" do
|
194
|
+
it "should be flexible with regards to hyphens in double barrelled names" do
|
195
|
+
Name.new('Johanna', "Lowry-O'Reilly").match('Johanna', "Lowry O'Reilly").should be_true
|
196
|
+
end
|
197
|
+
|
198
|
+
it "should be case insensitive in matches involving Macsomething and MacSomething" do
|
199
|
+
Name.new('Alan', 'MacDonagh').match('Alan', 'Macdonagh').should be_true
|
200
|
+
end
|
201
|
+
|
202
|
+
it "should cater for the common mispelling of names beginning with Mc or Mac" do
|
203
|
+
Name.new('Alan', 'McDonagh').match('Alan', 'MacDonagh').should be_true
|
204
|
+
Name.new('Darko', 'Polimac').match('Darko', 'Polimc').should be_false
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: icu_name
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Mark Orr
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-12-31 00:00:00 +00:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: bundler
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 0
|
31
|
+
- 7
|
32
|
+
version: 1.0.7
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: rspec
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
description: Canonicalises and matches person names with Latin1 characters and first and last names
|
49
|
+
email: mark.j.l.orr@googlemail.com
|
50
|
+
executables: []
|
51
|
+
|
52
|
+
extensions: []
|
53
|
+
|
54
|
+
extra_rdoc_files:
|
55
|
+
- LICENCE
|
56
|
+
- README.rdoc
|
57
|
+
files:
|
58
|
+
- lib/icu_name/version.rb
|
59
|
+
- lib/icu_name.rb
|
60
|
+
- spec/icu_name_spec.rb
|
61
|
+
- spec/spec_helper.rb
|
62
|
+
- LICENCE
|
63
|
+
- README.rdoc
|
64
|
+
has_rdoc: true
|
65
|
+
homepage: http://rubygems.org/gems/icu_name
|
66
|
+
licenses: []
|
67
|
+
|
68
|
+
post_install_message:
|
69
|
+
rdoc_options: []
|
70
|
+
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
segments:
|
87
|
+
- 1
|
88
|
+
- 3
|
89
|
+
- 6
|
90
|
+
version: 1.3.6
|
91
|
+
requirements: []
|
92
|
+
|
93
|
+
rubyforge_project: icu_name
|
94
|
+
rubygems_version: 1.3.7
|
95
|
+
signing_key:
|
96
|
+
specification_version: 3
|
97
|
+
summary: Canonicalises and matches person names
|
98
|
+
test_files: []
|
99
|
+
|