icu_name 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENCE +22 -0
- data/README.rdoc +62 -0
- data/lib/icu_name/version.rb +7 -0
- data/lib/icu_name.rb +230 -0
- data/spec/icu_name_spec.rb +208 -0
- data/spec/spec_helper.rb +8 -0
- metadata +99 -0
data/LICENCE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2011 Mark Orr
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
= ICU Tournament
|
2
|
+
|
3
|
+
Canonicalises and matches person names with Western European characters and first and last names.
|
4
|
+
|
5
|
+
== Installation
|
6
|
+
|
7
|
+
For ruby 1.9.2 and above.
|
8
|
+
|
9
|
+
gem install icu_name
|
10
|
+
|
11
|
+
== Names
|
12
|
+
|
13
|
+
This class exists for two main purposes:
|
14
|
+
|
15
|
+
* to normalise to a common format the different ways names are typed in practice
|
16
|
+
* to be able to match two names even if they are not exactly the same
|
17
|
+
|
18
|
+
To create a name object, supply both the first and second names separately to the constructor.
|
19
|
+
|
20
|
+
robert = ICU::Name.new(' robert j ', ' FISHER ')
|
21
|
+
|
22
|
+
Capitalisation, white space and punctuation will all be automatically corrected:
|
23
|
+
|
24
|
+
robert.name # => 'Robert J. Fischer'
|
25
|
+
robert.rname # => 'Fischer, Robert J.' (reversed name)
|
26
|
+
|
27
|
+
To avoid ambiguity when either the first or second names consist of multiple words, it is better to
|
28
|
+
supply the two separately, if known. However, the full name can be supplied alone to the constructor
|
29
|
+
and a guess will be made as to the first and last names.
|
30
|
+
|
31
|
+
bobby = ICU::Name.new(' bobby fischer ')
|
32
|
+
|
33
|
+
bobby.first # => 'Bobby'
|
34
|
+
bobby.last # => 'Fischer'
|
35
|
+
|
36
|
+
Names will match even if one is missing middle initials or if a nickname is used for one of the first names.
|
37
|
+
|
38
|
+
bobby.match('Robert J.', 'Fischer') # => true
|
39
|
+
|
40
|
+
Note that the class is aware of only common nicknames (e.g. _Bobby_ and _Robert_, _Bill_ and _William_, etc), not all possibilities.
|
41
|
+
|
42
|
+
Supplying the _match_ method with strings is equivalent to instantiating a Name instance with the same
|
43
|
+
strings and then matching it. So, for example the following are equivalent:
|
44
|
+
|
45
|
+
robert.match('R.', 'Fischer') # => true
|
46
|
+
robert.match(ICU::Name.new('R.', 'Fischer')) # => true
|
47
|
+
|
48
|
+
The inital _R_, for example, matches the first letter of _Robert_. However, nickname matches will not
|
49
|
+
always work with initials. In the next example, the initial _R_ does not match the first letter _B_ of the
|
50
|
+
nickname _Bobby_.
|
51
|
+
|
52
|
+
bobby.match('R. J.', 'Fischer') # => false
|
53
|
+
|
54
|
+
Some of the ways last names are canonicalised are illustrated below:
|
55
|
+
|
56
|
+
ICU::Name.new('John', 'O Reilly').last # => "O'Reilly"
|
57
|
+
ICU::Name.new('dave', 'mcmanus').last # => "McManus"
|
58
|
+
ICU::Name.new('pete', 'MACMANUS').last # => "MacManus"
|
59
|
+
|
60
|
+
== Author
|
61
|
+
|
62
|
+
Mark Orr, rating officer for the Irish Chess Union (ICU[http://icu.ie]).
|
data/lib/icu_name.rb
ADDED
@@ -0,0 +1,230 @@
|
|
1
|
+
module ICU
|
2
|
+
class Name
|
3
|
+
attr_reader :first, :last
|
4
|
+
|
5
|
+
# Construct from one or two strings or any objects that have a to_s method.
|
6
|
+
def initialize(name1='', name2='')
|
7
|
+
@name1 = name1.to_s
|
8
|
+
@name2 = name2.to_s
|
9
|
+
canonicalize
|
10
|
+
end
|
11
|
+
|
12
|
+
# Return a complete name, first name first, no comma.
|
13
|
+
def name
|
14
|
+
name = ''
|
15
|
+
name << @first
|
16
|
+
name << ' ' if @first.length > 0 && @last.length > 0
|
17
|
+
name << @last
|
18
|
+
name
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return a reversed complete name, first name last after a comma.
|
22
|
+
def rname
|
23
|
+
name = ''
|
24
|
+
name << @last
|
25
|
+
name << ', ' if @first.length > 0 && @last.length > 0
|
26
|
+
name << @first
|
27
|
+
name
|
28
|
+
end
|
29
|
+
|
30
|
+
# Convert object to a string.
|
31
|
+
def to_s
|
32
|
+
rname
|
33
|
+
end
|
34
|
+
|
35
|
+
# Match another name to this object, returning true or false.
|
36
|
+
def match(name1='', name2='')
|
37
|
+
other = Name.new(name1, name2)
|
38
|
+
match_first(first, other.first) && match_last(last, other.last)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
# Canonicalise the first and last names.
|
44
|
+
def canonicalize
|
45
|
+
first, last = partition
|
46
|
+
@first = finish_first(first)
|
47
|
+
@last = finish_last(last)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Split one complete name into first and last parts.
|
51
|
+
def partition
|
52
|
+
if @name2.length == 0
|
53
|
+
# Only one imput so we must split first and last.
|
54
|
+
parts = @name1.split(/,/)
|
55
|
+
if parts.size > 1
|
56
|
+
last = clean(parts.shift || '')
|
57
|
+
first = clean(parts.join(' '))
|
58
|
+
else
|
59
|
+
parts = clean(@name1).split(/ /)
|
60
|
+
last = parts.pop || ''
|
61
|
+
first = parts.join(' ')
|
62
|
+
end
|
63
|
+
else
|
64
|
+
# Two inputs, so we are given first and last.
|
65
|
+
first = clean(@name1)
|
66
|
+
last = clean(@name2)
|
67
|
+
end
|
68
|
+
[first, last]
|
69
|
+
end
|
70
|
+
|
71
|
+
# Clean up characters in any name.
|
72
|
+
def clean(name)
|
73
|
+
name.gsub!(/`/, "'")
|
74
|
+
name.gsub!(/[^-a-zA-Z.'\s]/, '')
|
75
|
+
name.gsub!(/\./, ' ')
|
76
|
+
name.gsub!(/\s*-\s*/, '-')
|
77
|
+
name.gsub!(/'+/, "'")
|
78
|
+
name.strip.downcase.split(/\s+/).map do |n|
|
79
|
+
n.sub!(/^-+/, '')
|
80
|
+
n.sub!(/-+$/, '')
|
81
|
+
n.split(/-/).map do |p|
|
82
|
+
p.capitalize!
|
83
|
+
end.join('-')
|
84
|
+
end.join(' ')
|
85
|
+
end
|
86
|
+
|
87
|
+
# Apply final touches to finish canonicalising a first name.
|
88
|
+
def finish_first(names)
|
89
|
+
names.gsub(/([A-Z])\b/, '\1.')
|
90
|
+
end
|
91
|
+
|
92
|
+
# Apply final touches to finish canonicalising a last name.
|
93
|
+
def finish_last(names)
|
94
|
+
names.gsub!(/\b([A-Z])'([a-z])/) { |m| $1 << "'" << $2.upcase}
|
95
|
+
names.gsub!(/\bMc([a-z])/) { |m| 'Mc' << $1.upcase}
|
96
|
+
names.gsub!(/\bMac([a-z])/) do |m|
|
97
|
+
letter = $1
|
98
|
+
'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase)
|
99
|
+
end
|
100
|
+
names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 }
|
101
|
+
names
|
102
|
+
end
|
103
|
+
|
104
|
+
# Match a complete first name.
|
105
|
+
def match_first(first1, first2)
|
106
|
+
# Is this one a walk in the park?
|
107
|
+
return true if first1 == first2
|
108
|
+
|
109
|
+
# No easy ride. Begin by splitting into individual first names.
|
110
|
+
first1 = split_first(first1)
|
111
|
+
first2 = split_first(first2)
|
112
|
+
|
113
|
+
# Get the long list and the short list.
|
114
|
+
long, short = first1.size >= first2.size ? [first1, first2] : [first2, first1]
|
115
|
+
|
116
|
+
# The short one must be a "subset" of the long one.
|
117
|
+
# An extra condition must also be satisfied.
|
118
|
+
extra = false
|
119
|
+
(0..long.size-1).each do |i|
|
120
|
+
lword = long.shift
|
121
|
+
score = match_first_name(lword, short.first)
|
122
|
+
if score >= 0
|
123
|
+
short.shift
|
124
|
+
extra = true if i == 0 || score == 0
|
125
|
+
end
|
126
|
+
break if short.empty? || long.empty?
|
127
|
+
end
|
128
|
+
|
129
|
+
# There's a match if the following is true.
|
130
|
+
short.empty? && extra
|
131
|
+
end
|
132
|
+
|
133
|
+
# Match a complete last name.
|
134
|
+
def match_last(last1, last2)
|
135
|
+
return true if last1 == last2
|
136
|
+
[last1, last2].each do |last|
|
137
|
+
last.downcase! # MacDonaugh and Macdonaugh
|
138
|
+
last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh
|
139
|
+
last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly
|
140
|
+
end
|
141
|
+
last1 == last2
|
142
|
+
end
|
143
|
+
|
144
|
+
# Split a complete first name for matching.
|
145
|
+
def split_first(first)
|
146
|
+
first.tr!('-', ' ') # J. K. and J.-K.
|
147
|
+
first = first.split(/ /) # split on spaces
|
148
|
+
first = [''] if first.size == 0 # in case input was empty string
|
149
|
+
first
|
150
|
+
end
|
151
|
+
|
152
|
+
# Match individual first names or initials.
|
153
|
+
# -1 = no match
|
154
|
+
# 0 = full match
|
155
|
+
# 1 = match involving 1 initial
|
156
|
+
# 2 = match involving 2 initials
|
157
|
+
def match_first_name(first1, first2)
|
158
|
+
initials = 0
|
159
|
+
initials+= 1 if first1.match(/^[A-Z]\.?$/)
|
160
|
+
initials+= 1 if first2.match(/^[A-Z]\.?$/)
|
161
|
+
return initials if first1 == first2
|
162
|
+
return 0 if initials == 0 && match_nick_name(first1, first2)
|
163
|
+
return -1 unless initials > 0
|
164
|
+
return initials if first1[0] == first2[0]
|
165
|
+
-1
|
166
|
+
end
|
167
|
+
|
168
|
+
# Match two first names that might be equivalent nicknames.
|
169
|
+
def match_nick_name(nick1, nick2)
|
170
|
+
compile_nick_names unless @@nc
|
171
|
+
code1 = @@nc[nick1]
|
172
|
+
return false unless code1
|
173
|
+
code1 == @@nc[nick2]
|
174
|
+
end
|
175
|
+
|
176
|
+
# Compile the nick names code hash when matching nick names is first attempted.
|
177
|
+
def compile_nick_names
|
178
|
+
@@nc = Hash.new
|
179
|
+
code = 1
|
180
|
+
@@nl.each do |nicks|
|
181
|
+
nicks.each do |n|
|
182
|
+
throw "duplicate name #{n}" if @@nc[n]
|
183
|
+
@@nc[n] = code
|
184
|
+
end
|
185
|
+
code+= 1
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
# A array of data for matching nicknames and also a few common misspellings.
|
190
|
+
@@nc = nil
|
191
|
+
@@nl = <<EOF.split(/\n/).reject{|x| x.length == 0 }.map{|x| x.split(' ')}
|
192
|
+
Abdul Abul
|
193
|
+
Alexander Alex
|
194
|
+
Anandagopal Ananda
|
195
|
+
Anne Ann
|
196
|
+
Anthony Tony
|
197
|
+
Benjamin Ben
|
198
|
+
Catherine Cathy Cath
|
199
|
+
Daniel Danial Danny Dan
|
200
|
+
David Dave
|
201
|
+
Deborah Debbie
|
202
|
+
Des Desmond
|
203
|
+
Eamonn Eamon
|
204
|
+
Edward Eddie Ed
|
205
|
+
Eric Erick Erik
|
206
|
+
Frederick Frederic Fred
|
207
|
+
Gerald Gerry
|
208
|
+
Gerhard Gerard Ger
|
209
|
+
James Jim
|
210
|
+
Joanna Joan Joanne
|
211
|
+
John Johnny
|
212
|
+
Jonathan Jon
|
213
|
+
Kenneth Ken Kenny
|
214
|
+
Michael Mike Mick Micky
|
215
|
+
Nicholas Nick Nicolas
|
216
|
+
Nicola Nickie Nicky
|
217
|
+
Patrick Pat Paddy
|
218
|
+
Peter Pete
|
219
|
+
Philippe Philip Phillippe Phillip
|
220
|
+
Rick Ricky
|
221
|
+
Robert Bob Bobby
|
222
|
+
Samual Sam Samuel
|
223
|
+
Stefanie Stef
|
224
|
+
Stephen Steven Steve
|
225
|
+
Terence Terry
|
226
|
+
Thomas Tom Tommy
|
227
|
+
William Will Willy Willie Bill
|
228
|
+
EOF
|
229
|
+
end
|
230
|
+
end
|
@@ -0,0 +1,208 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
module ICU
|
4
|
+
describe Name do
|
5
|
+
context "public methods" do
|
6
|
+
before(:each) do
|
7
|
+
@simple = Name.new('mark j l', 'orr')
|
8
|
+
end
|
9
|
+
|
10
|
+
it "#first returns the first name(s)" do
|
11
|
+
@simple.first.should == 'Mark J. L.'
|
12
|
+
end
|
13
|
+
|
14
|
+
it "#last returns the last name(s)" do
|
15
|
+
@simple.last.should == 'Orr'
|
16
|
+
end
|
17
|
+
|
18
|
+
it "#name returns the full name with first name(s) first" do
|
19
|
+
@simple.name.should == 'Mark J. L. Orr'
|
20
|
+
end
|
21
|
+
|
22
|
+
it "#rname returns the full name with last name(s) first" do
|
23
|
+
@simple.rname.should == 'Orr, Mark J. L.'
|
24
|
+
end
|
25
|
+
|
26
|
+
it "#to_s is the same as rname" do
|
27
|
+
@simple.to_s.should == 'Orr, Mark J. L.'
|
28
|
+
end
|
29
|
+
|
30
|
+
it "#match returns true if and only if two names match" do
|
31
|
+
@simple.match('mark j l orr').should be_true
|
32
|
+
@simple.match('malcolm g l orr').should be_false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context "rdoc expample" do
|
37
|
+
before(:each) do
|
38
|
+
@robert = Name.new(' robert j ', ' FISCHER ')
|
39
|
+
@bobby = Name.new(' bobby fischer ')
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should get Robert" do
|
43
|
+
@robert.name.should == 'Robert J. Fischer'
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should get Bobby" do
|
47
|
+
@bobby.last.should == 'Fischer'
|
48
|
+
@bobby.first.should == 'Bobby'
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should match Robert and Bobby" do
|
52
|
+
@robert.match(@bobby).should be_true
|
53
|
+
@robert.match('R. J.', 'Fischer').should be_true
|
54
|
+
@bobby.match('R. J.', 'Fischer').should be_false
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should canconicalise last names" do
|
58
|
+
Name.new('John', 'O Reilly').last.should == "O'Reilly"
|
59
|
+
Name.new('dave', 'mcmanus').last.should == "McManus"
|
60
|
+
Name.new('pete', 'MACMANUS').last.should == "MacManus"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context "names that are already canonical" do
|
65
|
+
it "should not be altered" do
|
66
|
+
Name.new('Mark J. L.', 'Orr').name.should == 'Mark J. L. Orr'
|
67
|
+
Name.new('Anna-Marie J.-K.', 'Liviu-Dieter').name.should == 'Anna-Marie J.-K. Liviu-Dieter'
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context "last names beginning with a single letter followed by a quote" do
|
72
|
+
it "should be handled correctly" do
|
73
|
+
Name.new('una', "O'boyle").name.should == "Una O'Boyle"
|
74
|
+
Name.new('jonathan', 'd`arcy').name.should == "Jonathan D'Arcy"
|
75
|
+
Name.new('erwin e', "L'AMI").name.should == "Erwin E. L'Ami"
|
76
|
+
Name.new('cormac', "o brien").name.should == "Cormac O'Brien"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
context "last beginning with Mc" do
|
81
|
+
it "should be handled correctly" do
|
82
|
+
Name.new('shane', "mccabe").name.should == "Shane McCabe"
|
83
|
+
Name.new('shawn', "macDonagh").name.should == "Shawn MacDonagh"
|
84
|
+
Name.new('shawn', "macdonagh").name.should == "Shawn Macdonagh"
|
85
|
+
Name.new('bartlomiej', "macieja").name.should == "Bartlomiej Macieja"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
context "doubled barrelled names or initials" do
|
90
|
+
it "should be handled correctly" do
|
91
|
+
Name.new('anna-marie', 'den-otter').name.should == 'Anna-Marie Den-Otter'
|
92
|
+
Name.new('j-k', 'rowling').name.should == 'J.-K. Rowling'
|
93
|
+
Name.new("mark j. - l", 'ORR').name.should == 'Mark J.-L. Orr'
|
94
|
+
Name.new('JOHANNA', "lowry-o'REILLY").name.should == "Johanna Lowry-O'Reilly"
|
95
|
+
Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context "extraneous white space" do
|
100
|
+
it "should be handled correctly" do
|
101
|
+
Name.new(' mark j l ', " \t\r\n orr \n").name.should == 'Mark J. L. Orr'
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
context "extraneous full stops" do
|
106
|
+
it "should be handled correctly" do
|
107
|
+
Name.new('. mark j..l', 'orr.').name.should == 'Mark J. L. Orr'
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
context "construction from a single string" do
|
112
|
+
before(:each) do
|
113
|
+
@mark1 = Name.new('ORR, mark j l')
|
114
|
+
@mark2 = Name.new('MARK J L ORR')
|
115
|
+
@oreil = Name.new("O'Reilly, j-k")
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should be possible in simple cases" do
|
119
|
+
@mark1.first.should == 'Mark J. L.'
|
120
|
+
@mark1.last.should == 'Orr'
|
121
|
+
@mark2.first.should == 'Mark J. L.'
|
122
|
+
@mark2.last.should == 'Orr'
|
123
|
+
@oreil.name.should == "J.-K. O'Reilly"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context "construction from an instance" do
|
128
|
+
it "should be possible" do
|
129
|
+
Name.new(Name.new('ORR, mark j l')).name.should == 'Mark J. L. Orr'
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
context "constuction corner cases" do
|
134
|
+
it "should be handled correctly" do
|
135
|
+
Name.new('Orr').name.should == 'Orr'
|
136
|
+
Name.new('Orr').rname.should == 'Orr'
|
137
|
+
Name.new('').name.should == ''
|
138
|
+
Name.new('').rname.should == ''
|
139
|
+
Name.new.name.should == ''
|
140
|
+
Name.new.rname.should == ''
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
context "inputs to matching" do
|
145
|
+
before(:all) do
|
146
|
+
@mark = Name.new('Mark', 'Orr')
|
147
|
+
@kram = Name.new('Mark', 'Orr')
|
148
|
+
end
|
149
|
+
|
150
|
+
it "should be flexible" do
|
151
|
+
@mark.match('Mark', 'Orr').should be_true
|
152
|
+
@mark.match('Mark Orr').should be_true
|
153
|
+
@mark.match('Orr, Mark').should be_true
|
154
|
+
@mark.match(@kram).should be_true
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
context "first name matches" do
|
159
|
+
it "should match when first names are the same" do
|
160
|
+
Name.new('Mark', 'Orr').match('Mark', 'Orr').should be_true
|
161
|
+
end
|
162
|
+
|
163
|
+
it "should be flexible with regards to hyphens in double barrelled names" do
|
164
|
+
Name.new('J.-K.', 'Rowling').match('J. K.', 'Rowling').should be_true
|
165
|
+
Name.new('Joanne-K.', 'Rowling').match('Joanne K.', 'Rowling').should be_true
|
166
|
+
end
|
167
|
+
|
168
|
+
it "should match initials" do
|
169
|
+
Name.new('M. J. L.', 'Orr').match('Mark John Legard', 'Orr').should be_true
|
170
|
+
Name.new('M.', 'Orr').match('Mark', 'Orr').should be_true
|
171
|
+
Name.new('M. J. L.', 'Orr').match('Mark', 'Orr').should be_true
|
172
|
+
Name.new('M.', 'Orr').match('M. J.', 'Orr').should be_true
|
173
|
+
Name.new('M. J. L.', 'Orr').match('M. G.', 'Orr').should be_false
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should not match on full names not in first position or without an exact match" do
|
177
|
+
Name.new('J. M.', 'Orr').match('John', 'Orr').should be_true
|
178
|
+
Name.new('M. J.', 'Orr').match('John', 'Orr').should be_false
|
179
|
+
Name.new('M. John', 'Orr').match('John', 'Orr').should be_true
|
180
|
+
end
|
181
|
+
|
182
|
+
it "should handle common nicknames" do
|
183
|
+
Name.new('William', 'Orr').match('Bill', 'Orr').should be_true
|
184
|
+
Name.new('David', 'Orr').match('Dave', 'Orr').should be_true
|
185
|
+
Name.new('Mick', 'Orr').match('Mike', 'Orr').should be_true
|
186
|
+
end
|
187
|
+
|
188
|
+
it "should not mix up nick names" do
|
189
|
+
Name.new('David', 'Orr').match('Bill', 'Orr').should be_false
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
context "last name matches" do
|
194
|
+
it "should be flexible with regards to hyphens in double barrelled names" do
|
195
|
+
Name.new('Johanna', "Lowry-O'Reilly").match('Johanna', "Lowry O'Reilly").should be_true
|
196
|
+
end
|
197
|
+
|
198
|
+
it "should be case insensitive in matches involving Macsomething and MacSomething" do
|
199
|
+
Name.new('Alan', 'MacDonagh').match('Alan', 'Macdonagh').should be_true
|
200
|
+
end
|
201
|
+
|
202
|
+
it "should cater for the common mispelling of names beginning with Mc or Mac" do
|
203
|
+
Name.new('Alan', 'McDonagh').match('Alan', 'MacDonagh').should be_true
|
204
|
+
Name.new('Darko', 'Polimac').match('Darko', 'Polimc').should be_false
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: icu_name
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Mark Orr
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-12-31 00:00:00 +00:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: bundler
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 1
|
30
|
+
- 0
|
31
|
+
- 7
|
32
|
+
version: 1.0.7
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: rspec
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
description: Canonicalises and matches person names with Latin1 characters and first and last names
|
49
|
+
email: mark.j.l.orr@googlemail.com
|
50
|
+
executables: []
|
51
|
+
|
52
|
+
extensions: []
|
53
|
+
|
54
|
+
extra_rdoc_files:
|
55
|
+
- LICENCE
|
56
|
+
- README.rdoc
|
57
|
+
files:
|
58
|
+
- lib/icu_name/version.rb
|
59
|
+
- lib/icu_name.rb
|
60
|
+
- spec/icu_name_spec.rb
|
61
|
+
- spec/spec_helper.rb
|
62
|
+
- LICENCE
|
63
|
+
- README.rdoc
|
64
|
+
has_rdoc: true
|
65
|
+
homepage: http://rubygems.org/gems/icu_name
|
66
|
+
licenses: []
|
67
|
+
|
68
|
+
post_install_message:
|
69
|
+
rdoc_options: []
|
70
|
+
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
segments:
|
87
|
+
- 1
|
88
|
+
- 3
|
89
|
+
- 6
|
90
|
+
version: 1.3.6
|
91
|
+
requirements: []
|
92
|
+
|
93
|
+
rubyforge_project: icu_name
|
94
|
+
rubygems_version: 1.3.7
|
95
|
+
signing_key:
|
96
|
+
specification_version: 3
|
97
|
+
summary: Canonicalises and matches person names
|
98
|
+
test_files: []
|
99
|
+
|