icu_tournament 1.1.2 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,274 +0,0 @@
1
- module ICU
2
-
3
- =begin rdoc
4
-
5
- == Names
6
-
7
- This class exists for two main reasons:
8
-
9
- * to normalise to a common format the different ways names are typed in practice
10
- * to be able to match two names even if they are not exactly the same
11
-
12
- To create a name object, supply both the first and second names separately to the constructor.
13
-
14
- robert = ICU::Name.new(' robert j ', ' FISHER ')
15
-
16
- Capitalisation, white space and punctuation will all be automatically corrected:
17
-
18
- robert.name # => 'Robert J. Fischer'
19
- robert.rname # => 'Fischer, Robert J.' (reversed name)
20
-
21
- To avoid ambiguity when either the first or second names consist of multiple words, it is better to
22
- supply the two separately, if known. However, the full name can be supplied alone to the constructor
23
- and a guess will be made as to the first and last names.
24
-
25
- bobby = ICU::Name.new(' bobby fischer ')
26
-
27
- bobby.first # => 'Bobby'
28
- bobby.last # => 'Fischer'
29
-
30
- Names will match even if one is missing middle initials or if a nickname is used for one of the first names.
31
-
32
- bobby.match(robert) # => true
33
-
34
- Note that the class is aware of only common nicknames (e.g. _Bobby_ and _Robert_, _Bill_ and _William_, etc), not all possibilities.
35
-
36
- Supplying the _match_ method with strings is equivalent to instantiating a Name instance with the same
37
- strings and then matching it. So, for example the following are equivalent:
38
-
39
- robert.match('R. J.', 'Fischer') # => true
40
- robert.match(ICU::Name('R. J.', 'Fischer')) # => true
41
-
42
- In those examples, the inital _R_ matches the first letter of _Robert_. However, nickname matches will not
43
- always work with initials. In the next example, the initial _R_ does not match the first letter _B_ of the
44
- nickname _Bobby_.
45
-
46
- bobby.match('R. J.', 'Fischer') # => false
47
-
48
- Some of the ways last names are canonicalised are illustrated below:
49
-
50
- ICU::Name.new('John', 'O Reilly').last # => "O'Reilly"
51
- ICU::Name.new('dave', 'mcmanus').last # => "McManus"
52
- ICU::Name.new('pete', 'MACMANUS').last # => "MacManus"
53
-
54
- =end
55
-
56
- class Name
57
- attr_reader :first, :last
58
-
59
- def initialize(name1='', name2='')
60
- @name1 = name1.to_s
61
- @name2 = name2.to_s
62
- canonicalize
63
- end
64
-
65
- def name
66
- name = ''
67
- name << @first
68
- name << ' ' if @first.length > 0 && @last.length > 0
69
- name << @last
70
- name
71
- end
72
-
73
- def rname
74
- name = ''
75
- name << @last
76
- name << ', ' if @first.length > 0 && @last.length > 0
77
- name << @first
78
- name
79
- end
80
-
81
- def to_s
82
- rname
83
- end
84
-
85
- def match(name1='', name2='')
86
- other = Name.new(name1, name2)
87
- match_first(first, other.first) && match_last(last, other.last)
88
- end
89
-
90
- private
91
-
92
- def canonicalize
93
- first, last = partition
94
- @first = finish_first(first)
95
- @last = finish_last(last)
96
- end
97
-
98
- def partition
99
- if @name2.length == 0
100
- # Only one imput so we must split first and last.
101
- parts = @name1.split(/,/)
102
- if parts.size > 1
103
- last = clean(parts.shift || '')
104
- first = clean(parts.join(' '))
105
- else
106
- parts = clean(@name1).split(/ /)
107
- last = parts.pop || ''
108
- first = parts.join(' ')
109
- end
110
- else
111
- # Two inputs, so we are given first and last.
112
- first = clean(@name1)
113
- last = clean(@name2)
114
- end
115
- [first, last]
116
- end
117
-
118
- def clean(name)
119
- name.gsub!(/`/, "'")
120
- name.gsub!(/[^-a-zA-Z.'\s]/, '')
121
- name.gsub!(/\./, ' ')
122
- name.gsub!(/\s*-\s*/, '-')
123
- name.gsub!(/'+/, "'")
124
- name.strip.downcase.split(/\s+/).map do |n|
125
- n.sub!(/^-+/, '')
126
- n.sub!(/-+$/, '')
127
- n.split(/-/).map do |p|
128
- p.capitalize!
129
- end.join('-')
130
- end.join(' ')
131
- end
132
-
133
- def finish_first(names)
134
- names.gsub(/([A-Z])\b/, '\1.')
135
- end
136
-
137
- def finish_last(names)
138
- names.gsub!(/\b([A-Z])'([a-z])/) { |m| $1 << "'" << $2.upcase}
139
- names.gsub!(/\bMc([a-z])/) { |m| 'Mc' << $1.upcase}
140
- names.gsub!(/\bMac([a-z])/) do |m|
141
- letter = $1
142
- 'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase)
143
- end
144
- names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 }
145
- names
146
- end
147
-
148
- # Match a complete first name.
149
- def match_first(first1, first2)
150
- # Is this one a walk in the park?
151
- return true if first1 == first2
152
-
153
- # No easy ride. Begin by splitting into individual first names.
154
- first1 = split_first(first1)
155
- first2 = split_first(first2)
156
-
157
- # Get the long list and the short list.
158
- long, short = first1.size >= first2.size ? [first1, first2] : [first2, first1]
159
-
160
- # The short one must be a "subset" of the long one.
161
- # An extra condition must also be satisfied.
162
- extra = false
163
- (0..long.size-1).each do |i|
164
- lword = long.shift
165
- score = match_first_name(lword, short.first)
166
- if score >= 0
167
- short.shift
168
- extra = true if i == 0 || score == 0
169
- end
170
- break if short.empty? || long.empty?
171
- end
172
-
173
- # There's a match if the following is true.
174
- short.empty? && extra
175
- end
176
-
177
- # Match a complete last name.
178
- def match_last(last1, last2)
179
- return true if last1 == last2
180
- [last1, last2].each do |last|
181
- last.downcase! # MacDonaugh and Macdonaugh
182
- last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh
183
- last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly
184
- end
185
- last1 == last2
186
- end
187
-
188
- # Split a complete first name for matching.
189
- def split_first(first)
190
- first.tr!('-', ' ') # J. K. and J.-K.
191
- first = first.split(/ /) # split on spaces
192
- first = [''] if first.size == 0 # in case input was empty string
193
- first
194
- end
195
-
196
- # Match individual first names or initials.
197
- # -1 = no match
198
- # 0 = full match
199
- # 1 = match involving 1 initial
200
- # 2 = match involving 2 initials
201
- def match_first_name(first1, first2)
202
- initials = 0
203
- initials+= 1 if first1.match(/^[A-Z]\.?$/)
204
- initials+= 1 if first2.match(/^[A-Z]\.?$/)
205
- return initials if first1 == first2
206
- return 0 if initials == 0 && match_nick_name(first1, first2)
207
- return -1 unless initials > 0
208
- return initials if first1[0] == first2[0]
209
- -1
210
- end
211
-
212
- # Match two first names that might be equivalent nicknames.
213
- def match_nick_name(nick1, nick2)
214
- compile_nick_names unless @@nc
215
- code1 = @@nc[nick1]
216
- return false unless code1
217
- code1 == @@nc[nick2]
218
- end
219
-
220
- # Compile the nick names code hash when matching nick names is first attempted.
221
- def compile_nick_names
222
- @@nc = Hash.new
223
- code = 1
224
- @@nl.each do |nicks|
225
- nicks.each do |n|
226
- throw "duplicate name #{n}" if @@nc[n]
227
- @@nc[n] = code
228
- end
229
- code+= 1
230
- end
231
- end
232
-
233
- # A array of data for matching nicknames and also a few common misspellings.
234
- @@nc = nil
235
- @@nl = <<EOF.split(/\n/).reject{|x| x.length == 0 }.map{|x| x.split(' ')}
236
- Abdul Abul
237
- Alexander Alex
238
- Anandagopal Ananda
239
- Anne Ann
240
- Anthony Tony
241
- Benjamin Ben
242
- Catherine Cathy Cath
243
- Daniel Danial Danny Dan
244
- David Dave
245
- Deborah Debbie
246
- Des Desmond
247
- Eamonn Eamon
248
- Edward Eddie Ed
249
- Eric Erick Erik
250
- Frederick Frederic Fred
251
- Gerald Gerry
252
- Gerhard Gerard Ger
253
- James Jim
254
- Joanna Joan Joanne
255
- John Johnny
256
- Jonathan Jon
257
- Kenneth Ken Kenny
258
- Michael Mike Mick Micky
259
- Nicholas Nick Nicolas
260
- Nicola Nickie Nicky
261
- Patrick Pat Paddy
262
- Peter Pete
263
- Philippe Philip Phillippe Phillip
264
- Rick Ricky
265
- Robert Bob Bobby
266
- Samual Sam Samuel
267
- Stefanie Stef
268
- Stephen Steven Steve
269
- Terence Terry
270
- Thomas Tom Tommy
271
- William Will Willy Willie Bill
272
- EOF
273
- end
274
- end
data/spec/name_spec.rb DELETED
@@ -1,208 +0,0 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
-
3
- module ICU
4
- describe Name do
5
- context "public methods" do
6
- before(:each) do
7
- @simple = Name.new('mark j l', 'orr')
8
- end
9
-
10
- it "#first returns the first name(s)" do
11
- @simple.first.should == 'Mark J. L.'
12
- end
13
-
14
- it "#last returns the last name(s)" do
15
- @simple.last.should == 'Orr'
16
- end
17
-
18
- it "#name returns the full name with first name(s) first" do
19
- @simple.name.should == 'Mark J. L. Orr'
20
- end
21
-
22
- it "#rname returns the full name with last name(s) first" do
23
- @simple.rname.should == 'Orr, Mark J. L.'
24
- end
25
-
26
- it "#to_s is the same as rname" do
27
- @simple.to_s.should == 'Orr, Mark J. L.'
28
- end
29
-
30
- it "#match returns true if and only if two names match" do
31
- @simple.match('mark j l orr').should be_true
32
- @simple.match('malcolm g l orr').should be_false
33
- end
34
- end
35
-
36
- context "rdoc expample" do
37
- before(:each) do
38
- @robert = Name.new(' robert j ', ' FISCHER ')
39
- @bobby = Name.new(' bobby fischer ')
40
- end
41
-
42
- it "should get Robert" do
43
- @robert.name.should == 'Robert J. Fischer'
44
- end
45
-
46
- it "should get Bobby" do
47
- @bobby.last.should == 'Fischer'
48
- @bobby.first.should == 'Bobby'
49
- end
50
-
51
- it "should match Robert and Bobby" do
52
- @robert.match(@bobby).should be_true
53
- @robert.match('R. J.', 'Fischer').should be_true
54
- @bobby.match('R. J.', 'Fischer').should be_false
55
- end
56
-
57
- it "should canconicalise last names" do
58
- Name.new('John', 'O Reilly').last.should == "O'Reilly"
59
- Name.new('dave', 'mcmanus').last.should == "McManus"
60
- Name.new('pete', 'MACMANUS').last.should == "MacManus"
61
- end
62
- end
63
-
64
- context "names that are already canonical" do
65
- it "should not be altered" do
66
- Name.new('Mark J. L.', 'Orr').name.should == 'Mark J. L. Orr'
67
- Name.new('Anna-Marie J.-K.', 'Liviu-Dieter').name.should == 'Anna-Marie J.-K. Liviu-Dieter'
68
- end
69
- end
70
-
71
- context "last names beginning with a single letter followed by a quote" do
72
- it "should be handled correctly" do
73
- Name.new('una', "O'boyle").name.should == "Una O'Boyle"
74
- Name.new('jonathan', 'd`arcy').name.should == "Jonathan D'Arcy"
75
- Name.new('erwin e', "L'AMI").name.should == "Erwin E. L'Ami"
76
- Name.new('cormac', "o brien").name.should == "Cormac O'Brien"
77
- end
78
- end
79
-
80
- context "last beginning with Mc" do
81
- it "should be handled correctly" do
82
- Name.new('shane', "mccabe").name.should == "Shane McCabe"
83
- Name.new('shawn', "macDonagh").name.should == "Shawn MacDonagh"
84
- Name.new('shawn', "macdonagh").name.should == "Shawn Macdonagh"
85
- Name.new('bartlomiej', "macieja").name.should == "Bartlomiej Macieja"
86
- end
87
- end
88
-
89
- context "doubled barrelled names or initials" do
90
- it "should be handled correctly" do
91
- Name.new('anna-marie', 'den-otter').name.should == 'Anna-Marie Den-Otter'
92
- Name.new('j-k', 'rowling').name.should == 'J.-K. Rowling'
93
- Name.new("mark j. - l", 'ORR').name.should == 'Mark J.-L. Orr'
94
- Name.new('JOHANNA', "lowry-o'REILLY").name.should == "Johanna Lowry-O'Reilly"
95
- Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
96
- end
97
- end
98
-
99
- context "extraneous white space" do
100
- it "should be handled correctly" do
101
- Name.new(' mark j l ', " \t\r\n orr \n").name.should == 'Mark J. L. Orr'
102
- end
103
- end
104
-
105
- context "extraneous full stops" do
106
- it "should be handled correctly" do
107
- Name.new('. mark j..l', 'orr.').name.should == 'Mark J. L. Orr'
108
- end
109
- end
110
-
111
- context "construction from a single string" do
112
- before(:each) do
113
- @mark1 = Name.new('ORR, mark j l')
114
- @mark2 = Name.new('MARK J L ORR')
115
- @oreil = Name.new("O'Reilly, j-k")
116
- end
117
-
118
- it "should be possible in simple cases" do
119
- @mark1.first.should == 'Mark J. L.'
120
- @mark1.last.should == 'Orr'
121
- @mark2.first.should == 'Mark J. L.'
122
- @mark2.last.should == 'Orr'
123
- @oreil.name.should == "J.-K. O'Reilly"
124
- end
125
- end
126
-
127
- context "construction from an instance" do
128
- it "should be possible" do
129
- Name.new(Name.new('ORR, mark j l')).name.should == 'Mark J. L. Orr'
130
- end
131
- end
132
-
133
- context "constuction corner cases" do
134
- it "should be handled correctly" do
135
- Name.new('Orr').name.should == 'Orr'
136
- Name.new('Orr').rname.should == 'Orr'
137
- Name.new('').name.should == ''
138
- Name.new('').rname.should == ''
139
- Name.new.name.should == ''
140
- Name.new.rname.should == ''
141
- end
142
- end
143
-
144
- context "inputs to matching" do
145
- before(:all) do
146
- @mark = Name.new('Mark', 'Orr')
147
- @kram = Name.new('Mark', 'Orr')
148
- end
149
-
150
- it "should be flexible" do
151
- @mark.match('Mark', 'Orr').should be_true
152
- @mark.match('Mark Orr').should be_true
153
- @mark.match('Orr, Mark').should be_true
154
- @mark.match(@kram).should be_true
155
- end
156
- end
157
-
158
- context "first name matches" do
159
- it "should match when first names are the same" do
160
- Name.new('Mark', 'Orr').match('Mark', 'Orr').should be_true
161
- end
162
-
163
- it "should be flexible with regards to hyphens in double barrelled names" do
164
- Name.new('J.-K.', 'Rowling').match('J. K.', 'Rowling').should be_true
165
- Name.new('Joanne-K.', 'Rowling').match('Joanne K.', 'Rowling').should be_true
166
- end
167
-
168
- it "should match initials" do
169
- Name.new('M. J. L.', 'Orr').match('Mark John Legard', 'Orr').should be_true
170
- Name.new('M.', 'Orr').match('Mark', 'Orr').should be_true
171
- Name.new('M. J. L.', 'Orr').match('Mark', 'Orr').should be_true
172
- Name.new('M.', 'Orr').match('M. J.', 'Orr').should be_true
173
- Name.new('M. J. L.', 'Orr').match('M. G.', 'Orr').should be_false
174
- end
175
-
176
- it "should not match on full names not in first position or without an exact match" do
177
- Name.new('J. M.', 'Orr').match('John', 'Orr').should be_true
178
- Name.new('M. J.', 'Orr').match('John', 'Orr').should be_false
179
- Name.new('M. John', 'Orr').match('John', 'Orr').should be_true
180
- end
181
-
182
- it "should handle common nicknames" do
183
- Name.new('William', 'Orr').match('Bill', 'Orr').should be_true
184
- Name.new('David', 'Orr').match('Dave', 'Orr').should be_true
185
- Name.new('Mick', 'Orr').match('Mike', 'Orr').should be_true
186
- end
187
-
188
- it "should not mix up nick names" do
189
- Name.new('David', 'Orr').match('Bill', 'Orr').should be_false
190
- end
191
- end
192
-
193
- context "last name matches" do
194
- it "should be flexible with regards to hyphens in double barrelled names" do
195
- Name.new('Johanna', "Lowry-O'Reilly").match('Johanna', "Lowry O'Reilly").should be_true
196
- end
197
-
198
- it "should be case insensitive in matches involving Macsomething and MacSomething" do
199
- Name.new('Alan', 'MacDonagh').match('Alan', 'Macdonagh').should be_true
200
- end
201
-
202
- it "should cater for the common mispelling of names beginning with Mc or Mac" do
203
- Name.new('Alan', 'McDonagh').match('Alan', 'MacDonagh').should be_true
204
- Name.new('Darko', 'Polimac').match('Darko', 'Polimc').should be_false
205
- end
206
- end
207
- end
208
- end