icu_name 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +37 -1
- data/lib/icu_name/name.rb +50 -34
- data/lib/icu_name/version.rb +1 -1
- data/spec/name_spec.rb +154 -22
- metadata +37 -7
data/README.rdoc
CHANGED
@@ -8,6 +8,8 @@ For ruby 1.9.2 and above.
|
|
8
8
|
|
9
9
|
gem install icu_name
|
10
10
|
|
11
|
+
It depends on active_support and i18n.
|
12
|
+
|
11
13
|
== Names
|
12
14
|
|
13
15
|
This class exists for two main purposes:
|
@@ -55,8 +57,42 @@ Some of the ways last names are canonicalised are illustrated below:
|
|
55
57
|
|
56
58
|
ICU::Name.new('John', 'O Reilly').last # => "O'Reilly"
|
57
59
|
ICU::Name.new('dave', 'mcmanus').last # => "McManus"
|
58
|
-
ICU::Name.new('pete', 'MACMANUS').last # => "MacManus"
|
59
60
|
|
61
|
+
== Characters and Encoding
|
62
|
+
|
63
|
+
The class can only cope with Western European letter characters, including the accented ones in Latin-1.
|
64
|
+
It's various accessors (_first_, _last_, _name_, _rname_, _to_s_) always return strings encoded in UTF-8,
|
65
|
+
no matter what the input encoding.
|
66
|
+
|
67
|
+
eric = ICU::Name.new('éric', 'PRIÉ')
|
68
|
+
eric.rname # => "Prié, Éric"
|
69
|
+
eric.rname.encoding.name # => "UTF-8"
|
70
|
+
|
71
|
+
eric = ICU::Name.new('éric'.encode("ISO-8859-1"), 'PRIÉ'.force_encoding("ASCII-8BIT"))
|
72
|
+
eric.rname # => "Prié, Éric"
|
73
|
+
eric.rname.encoding.name # => "UTF-8"
|
74
|
+
|
75
|
+
Currently, all characters outside the Latin-1 range are removed as if they wern't there.
|
76
|
+
|
77
|
+
ICU::Name.new('Józef Żabiński').name # "Józef Abiski"
|
78
|
+
ICU::Name.new('Bǔ Xiángzhì').name # "B. Xiángzhì"
|
79
|
+
|
80
|
+
Accented Latin-1 characters can be transliterated into their ascii counterparts by setting the
|
81
|
+
_ascii_ option to a true value.
|
82
|
+
|
83
|
+
eric.name(:ascii => true) # => "Eric Prie"
|
84
|
+
|
85
|
+
This works with all the other accessors and also with the constructor:
|
86
|
+
|
87
|
+
eric_ascii = ICU::Name.new('éric', 'PRIÉ', :ascii => true)
|
88
|
+
eric_ascii.name # => "Eric Prie"
|
89
|
+
|
90
|
+
The option also relaxes the need for accented characters to match exactly:
|
91
|
+
|
92
|
+
eric.match('Éric', 'Prié') # => true
|
93
|
+
eric.match('Eric', 'Prie') # => false
|
94
|
+
eric.match('Eric', 'Prie', :ascii => true) # => true
|
95
|
+
|
60
96
|
== Author
|
61
97
|
|
62
98
|
Mark Orr, rating officer for the Irish Chess Union (ICU[http://icu.ie]).
|
data/lib/icu_name/name.rb
CHANGED
@@ -1,41 +1,60 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_support/inflector/transliterate'
|
3
|
+
require 'active_support/core_ext/string/multibyte'
|
4
|
+
|
1
5
|
module ICU
|
2
6
|
class Name
|
3
|
-
attr_reader :first, :last
|
4
7
|
|
5
8
|
# Construct from one or two strings or any objects that have a to_s method.
|
6
|
-
def initialize(name1='', name2='')
|
7
|
-
@name1 = name1.to_s
|
8
|
-
@name2 = name2.to_s
|
9
|
+
def initialize(name1='', name2='', opt={})
|
10
|
+
@name1 = Util.to_utf8(name1.to_s)
|
11
|
+
@name2 = Util.to_utf8(name2.to_s)
|
9
12
|
canonicalize
|
13
|
+
if opt[:ascii]
|
14
|
+
@first = ActiveSupport::Inflector.transliterate(@first)
|
15
|
+
@last = ActiveSupport::Inflector.transliterate(@last)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# First name getter.
|
20
|
+
def first(opt={})
|
21
|
+
return ActiveSupport::Inflector.transliterate(@first) if opt[:ascii]
|
22
|
+
@first
|
23
|
+
end
|
24
|
+
|
25
|
+
# Last name getter.
|
26
|
+
def last(opt={})
|
27
|
+
return ActiveSupport::Inflector.transliterate(@last) if opt[:ascii]
|
28
|
+
@last
|
10
29
|
end
|
11
30
|
|
12
31
|
# Return a complete name, first name first, no comma.
|
13
|
-
def name
|
32
|
+
def name(opts={})
|
14
33
|
name = ''
|
15
|
-
name <<
|
34
|
+
name << first(opts)
|
16
35
|
name << ' ' if @first.length > 0 && @last.length > 0
|
17
|
-
name <<
|
36
|
+
name << last(opts)
|
18
37
|
name
|
19
38
|
end
|
20
39
|
|
21
40
|
# Return a reversed complete name, first name last after a comma.
|
22
|
-
def rname
|
41
|
+
def rname(opts={})
|
23
42
|
name = ''
|
24
|
-
name <<
|
43
|
+
name << last(opts)
|
25
44
|
name << ', ' if @first.length > 0 && @last.length > 0
|
26
|
-
name <<
|
45
|
+
name << first(opts)
|
27
46
|
name
|
28
47
|
end
|
29
48
|
|
30
49
|
# Convert object to a string.
|
31
|
-
def to_s
|
32
|
-
rname
|
50
|
+
def to_s(opts={})
|
51
|
+
rname(opts)
|
33
52
|
end
|
34
53
|
|
35
54
|
# Match another name to this object, returning true or false.
|
36
|
-
def match(name1='', name2='')
|
37
|
-
other = Name.new(name1, name2)
|
38
|
-
match_first(first, other.first) && match_last(last, other.last)
|
55
|
+
def match(name1='', name2='', opts={})
|
56
|
+
other = Name.new(name1, name2, opts)
|
57
|
+
match_first(first(opts), other.first) && match_last(last(opts), other.last)
|
39
58
|
end
|
40
59
|
|
41
60
|
private
|
@@ -58,6 +77,7 @@ module ICU
|
|
58
77
|
else
|
59
78
|
parts = clean(@name1).split(/ /)
|
60
79
|
last = parts.pop || ''
|
80
|
+
last = "#{parts.pop}'#{last}" if parts.size > 1 && parts.last == "O" && !last.match(/^O'/)
|
61
81
|
first = parts.join(' ')
|
62
82
|
end
|
63
83
|
else
|
@@ -68,36 +88,32 @@ module ICU
|
|
68
88
|
[first, last]
|
69
89
|
end
|
70
90
|
|
71
|
-
# Clean up characters in any name.
|
91
|
+
# Clean up characters in any name keeping only letters (including accented), hyphens, and single quotes.
|
72
92
|
def clean(name)
|
73
93
|
name.gsub!(/`/, "'")
|
74
|
-
name.gsub!(/[^-a-zA-Z.'\s]/, '')
|
94
|
+
name.gsub!(/[^-a-zA-Z\u{c0}-\u{d6}\u{d8}-\u{f6}\u{f8}-\u{ff}.'\s]/, '')
|
75
95
|
name.gsub!(/\./, ' ')
|
76
96
|
name.gsub!(/\s*-\s*/, '-')
|
77
97
|
name.gsub!(/'+/, "'")
|
78
|
-
name.strip.downcase.split(/\s+/).map do |n|
|
98
|
+
name.strip.mb_chars.downcase.split(/\s+/).map do |n|
|
79
99
|
n.sub!(/^-+/, '')
|
80
100
|
n.sub!(/-+$/, '')
|
81
101
|
n.split(/-/).map do |p|
|
82
102
|
p.capitalize!
|
83
103
|
end.join('-')
|
84
|
-
end.join(' ')
|
104
|
+
end.join(' ').to_s
|
85
105
|
end
|
86
106
|
|
87
|
-
# Apply final touches to finish canonicalising a first name.
|
107
|
+
# Apply final touches to finish canonicalising a first name mb_chars object, returning a normal string.
|
88
108
|
def finish_first(names)
|
89
|
-
names.gsub(/([A-Z])\b/, '\1.')
|
109
|
+
names.gsub(/([A-Z\u{c0}-\u{de}])\b/, '\1.')
|
90
110
|
end
|
91
111
|
|
92
|
-
# Apply final touches to finish canonicalising a last name.
|
112
|
+
# Apply final touches to finish canonicalising a last name mb_chars object, returning a normal string.
|
93
113
|
def finish_last(names)
|
94
|
-
names.gsub!(/\b([A-Z])
|
95
|
-
names.gsub!(/\
|
96
|
-
names.gsub!(/\
|
97
|
-
letter = $1
|
98
|
-
'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase)
|
99
|
-
end
|
100
|
-
names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 }
|
114
|
+
names.gsub!(/\b([A-Z\u{c0}-\u{de}]')([a-z\u{e0}-\u{ff}])/) { |m| $1 << $2.mb_chars.upcase.to_s }
|
115
|
+
names.gsub!(/\b(Mc)([a-z\u{e0}-\u{ff}])/) { |m| $1 << $2.mb_chars.upcase.to_s }
|
116
|
+
names.gsub!(/\bO ([A-Z\u{c0}-\u{de}])/) { |m| "O'" << $1 }
|
101
117
|
names
|
102
118
|
end
|
103
119
|
|
@@ -134,9 +150,9 @@ module ICU
|
|
134
150
|
def match_last(last1, last2)
|
135
151
|
return true if last1 == last2
|
136
152
|
[last1, last2].each do |last|
|
137
|
-
last.downcase!
|
138
|
-
last.gsub!(/\bmac/, 'mc')
|
139
|
-
last.tr!('-', ' ')
|
153
|
+
last.downcase! # case insensitive
|
154
|
+
last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh
|
155
|
+
last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly
|
140
156
|
end
|
141
157
|
last1 == last2
|
142
158
|
end
|
@@ -156,8 +172,8 @@ module ICU
|
|
156
172
|
# 2 = match involving 2 initials
|
157
173
|
def match_first_name(first1, first2)
|
158
174
|
initials = 0
|
159
|
-
initials+= 1 if first1.match(/^[A-Z]\.?$/)
|
160
|
-
initials+= 1 if first2.match(/^[A-Z]\.?$/)
|
175
|
+
initials+= 1 if first1.match(/^[A-Z\u{c0}-\u{de}]\.?$/)
|
176
|
+
initials+= 1 if first2.match(/^[A-Z\u{c0}-\u{de}]\.?$/)
|
161
177
|
return initials if first1 == first2
|
162
178
|
return 0 if initials == 0 && match_nick_name(first1, first2)
|
163
179
|
return -1 unless initials > 0
|
data/lib/icu_name/version.rb
CHANGED
data/spec/name_spec.rb
CHANGED
@@ -58,7 +58,26 @@ module ICU
|
|
58
58
|
it "should canconicalise last names" do
|
59
59
|
Name.new('John', 'O Reilly').last.should == "O'Reilly"
|
60
60
|
Name.new('dave', 'mcmanus').last.should == "McManus"
|
61
|
-
Name.new('pete', 'MACMANUS').last.should == "
|
61
|
+
Name.new('pete', 'MACMANUS').last.should == "Macmanus"
|
62
|
+
end
|
63
|
+
|
64
|
+
it "characters and encoding" do
|
65
|
+
josef = ICU::Name.new('Józef', 'Żabiński')
|
66
|
+
josef.name.should == "Józef Abiski"
|
67
|
+
bu = ICU::Name.new('Bǔ Xiángzhì')
|
68
|
+
bu.name.should == "B. Xiángzhì"
|
69
|
+
eric = ICU::Name.new('éric', 'PRIÉ')
|
70
|
+
eric.rname.should == "Prié, Éric"
|
71
|
+
eric.rname.encoding.name.should == "UTF-8"
|
72
|
+
eric = ICU::Name.new('éric'.encode("ISO-8859-1"), 'PRIÉ'.force_encoding("ASCII-8BIT"))
|
73
|
+
eric.rname.should == "Prié, Éric"
|
74
|
+
eric.rname.encoding.name.should == "UTF-8"
|
75
|
+
eric.name(:ascii => true).should == "Eric Prie"
|
76
|
+
eric_ascii = ICU::Name.new('éric', 'PRIÉ', :ascii => true)
|
77
|
+
eric_ascii.name.should == "Eric Prie"
|
78
|
+
eric.match('Éric', 'Prié').should be_true
|
79
|
+
eric.match('Eric', 'Prie').should be_false
|
80
|
+
eric.match('Eric', 'Prie', :ascii => true).should be_true
|
62
81
|
end
|
63
82
|
end
|
64
83
|
|
@@ -66,24 +85,35 @@ module ICU
|
|
66
85
|
it "should not be altered" do
|
67
86
|
Name.new('Mark J. L.', 'Orr').name.should == 'Mark J. L. Orr'
|
68
87
|
Name.new('Anna-Marie J.-K.', 'Liviu-Dieter').name.should == 'Anna-Marie J.-K. Liviu-Dieter'
|
88
|
+
Name.new('Èric Cantona').name.should == 'Èric Cantona'
|
69
89
|
end
|
70
90
|
end
|
71
91
|
|
72
|
-
context "last names
|
92
|
+
context "last names involving a quote" do
|
73
93
|
it "should be handled correctly" do
|
74
94
|
Name.new('una', "O'boyle").name.should == "Una O'Boyle"
|
75
95
|
Name.new('jonathan', 'd`arcy').name.should == "Jonathan D'Arcy"
|
76
96
|
Name.new('erwin e', "L'AMI").name.should == "Erwin E. L'Ami"
|
77
97
|
Name.new('cormac', "o brien").name.should == "Cormac O'Brien"
|
98
|
+
Name.new('türko', "o özgür").name.should == "Türko O'Özgür"
|
99
|
+
Name.new('türko', "l`özgür").name.should == "Türko L'Özgür"
|
78
100
|
end
|
79
101
|
end
|
80
102
|
|
81
|
-
context "last beginning with Mc" do
|
103
|
+
context "last beginning with Mc or Mac" do
|
82
104
|
it "should be handled correctly" do
|
83
105
|
Name.new('shane', "mccabe").name.should == "Shane McCabe"
|
84
|
-
Name.new('shawn', "macDonagh").name.should == "Shawn MacDonagh"
|
85
106
|
Name.new('shawn', "macdonagh").name.should == "Shawn Macdonagh"
|
86
107
|
Name.new('bartlomiej', "macieja").name.should == "Bartlomiej Macieja"
|
108
|
+
Name.new('türko', "mcözgür").name.should == "Türko McÖzgür"
|
109
|
+
Name.new('TÜRKO', "MACÖZGÜR").name.should == "Türko Macözgür"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context "first name initials" do
|
114
|
+
it "should be handled correctly" do
|
115
|
+
Name.new('m j l', 'Orr').first.should == 'M. J. L.'
|
116
|
+
Name.new('Ö. é m', 'Panno').first.should == "Ö. É. M."
|
87
117
|
end
|
88
118
|
end
|
89
119
|
|
@@ -94,6 +124,22 @@ module ICU
|
|
94
124
|
Name.new("mark j. - l", 'ORR').name.should == 'Mark J.-L. Orr'
|
95
125
|
Name.new('JOHANNA', "lowry-o'REILLY").name.should == "Johanna Lowry-O'Reilly"
|
96
126
|
Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
|
127
|
+
Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
|
128
|
+
Name.new('ètienne', "gèrard - mcözgür").name.should == "Ètienne Gèrard-McÖzgür"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
context "accented characters and capitalisation" do
|
133
|
+
it "should downcase upper case accented characters where appropriate" do
|
134
|
+
name = Name.new('GEARÓIDÍN', 'UÍ LAIGHLÉIS')
|
135
|
+
name.first.should == 'Gearóidín'
|
136
|
+
name.last.should == 'Uí Laighléis'
|
137
|
+
end
|
138
|
+
|
139
|
+
it "should upcase upper case accented characters where appropriate" do
|
140
|
+
name = Name.new('èric özgür')
|
141
|
+
name.first.should == 'Èric'
|
142
|
+
name.last.should == 'Özgür'
|
97
143
|
end
|
98
144
|
end
|
99
145
|
|
@@ -110,18 +156,12 @@ module ICU
|
|
110
156
|
end
|
111
157
|
|
112
158
|
context "construction from a single string" do
|
113
|
-
before(:each) do
|
114
|
-
@mark1 = Name.new('ORR, mark j l')
|
115
|
-
@mark2 = Name.new('MARK J L ORR')
|
116
|
-
@oreil = Name.new("O'Reilly, j-k")
|
117
|
-
end
|
118
|
-
|
119
159
|
it "should be possible in simple cases" do
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
160
|
+
Name.new('ORR, mark j l').rname.should == 'Orr, Mark J. L.'
|
161
|
+
Name.new('MARK J L ORR').rname.should == 'Orr, Mark J. L.'
|
162
|
+
Name.new("j-k O'Reilly").rname.should == "O'Reilly, J.-K."
|
163
|
+
Name.new("j-k O Reilly").rname.should == "O'Reilly, J.-K."
|
164
|
+
Name.new('ètienne o o özgür').name.should == "Ètienne O. O'Özgür"
|
125
165
|
end
|
126
166
|
end
|
127
167
|
|
@@ -131,10 +171,94 @@ module ICU
|
|
131
171
|
end
|
132
172
|
end
|
133
173
|
|
174
|
+
context "encoding" do
|
175
|
+
before(:each) do
|
176
|
+
@first = 'Gearóidín'
|
177
|
+
@last = 'Uí Laighléis'
|
178
|
+
end
|
179
|
+
|
180
|
+
it "should handle UTF-8" do
|
181
|
+
name = Name.new(@first, @last)
|
182
|
+
name.first.should == @first
|
183
|
+
name.last.should == @last
|
184
|
+
name.first.encoding.name.should == "UTF-8"
|
185
|
+
name.last.encoding.name.should == "UTF-8"
|
186
|
+
end
|
187
|
+
|
188
|
+
it "should handle ISO-8859-1" do
|
189
|
+
name = Name.new(@first.encode("ISO-8859-1"), @last.encode("ISO-8859-1"))
|
190
|
+
name.first.should == @first
|
191
|
+
name.last.should == @last
|
192
|
+
name.first.encoding.name.should == "UTF-8"
|
193
|
+
name.last.encoding.name.should == "UTF-8"
|
194
|
+
end
|
195
|
+
|
196
|
+
it "should handle Windows-1252" do
|
197
|
+
name = Name.new(@first.encode("Windows-1252"), @last.encode("Windows-1252"))
|
198
|
+
name.first.should == @first
|
199
|
+
name.last.should == @last
|
200
|
+
name.first.encoding.name.should == "UTF-8"
|
201
|
+
name.last.encoding.name.should == "UTF-8"
|
202
|
+
end
|
203
|
+
|
204
|
+
it "should handle ASCII-8BIT" do
|
205
|
+
name = Name.new(@first.dup.force_encoding('ASCII-8BIT'), @last.dup.force_encoding('ASCII-8BIT'))
|
206
|
+
name.first.should == @first
|
207
|
+
name.last.should == @last
|
208
|
+
name.first.encoding.name.should == "UTF-8"
|
209
|
+
name.last.encoding.name.should == "UTF-8"
|
210
|
+
end
|
211
|
+
|
212
|
+
it "should handle US-ASCII" do
|
213
|
+
@first = 'Gearoidin'
|
214
|
+
@last = 'Ui Laighleis'
|
215
|
+
name = Name.new(@first.encode("US-ASCII"), @last.encode("US-ASCII"))
|
216
|
+
name.first.should == @first
|
217
|
+
name.last.should == @last
|
218
|
+
name.first.encoding.name.should == "UTF-8"
|
219
|
+
name.last.encoding.name.should == "UTF-8"
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
context "transliteration" do
|
224
|
+
before(:all) do
|
225
|
+
@opt = { :ascii => true }
|
226
|
+
end
|
227
|
+
|
228
|
+
it "should be a no-op for names that already ASCII" do
|
229
|
+
name = Name.new('Mark J. L.', 'Orr')
|
230
|
+
name.first(@opt).should == 'Mark J. L.'
|
231
|
+
name.last(@opt).should == 'Orr'
|
232
|
+
name.name(@opt).should == 'Mark J. L. Orr'
|
233
|
+
name.rname(@opt).should == 'Orr, Mark J. L.'
|
234
|
+
name.to_s(@opt).should == 'Orr, Mark J. L.'
|
235
|
+
end
|
236
|
+
|
237
|
+
it "should remove the accents from accented characters" do
|
238
|
+
name = Name.new('Gearóidín', 'Uí Laighléis')
|
239
|
+
name.first(@opt).should == 'Gearoidin'
|
240
|
+
name.last(@opt).should == 'Ui Laighleis'
|
241
|
+
name.name(@opt).should == 'Gearoidin Ui Laighleis'
|
242
|
+
name.rname(@opt).should == 'Ui Laighleis, Gearoidin'
|
243
|
+
name.to_s(@opt).should == 'Ui Laighleis, Gearoidin'
|
244
|
+
name = Name.new('èric PRIÉ')
|
245
|
+
name.first(@opt).should == 'Eric'
|
246
|
+
name.last(@opt).should == 'Prie'
|
247
|
+
end
|
248
|
+
|
249
|
+
it "should work for the constructor as well as accessors" do
|
250
|
+
name = Name.new('Gearóidín', 'Uí Laighléis', @opt)
|
251
|
+
name.first.should == 'Gearoidin'
|
252
|
+
name.last.should == 'Ui Laighleis'
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
134
256
|
context "constuction corner cases" do
|
135
257
|
it "should be handled correctly" do
|
136
258
|
Name.new('Orr').name.should == 'Orr'
|
137
259
|
Name.new('Orr').rname.should == 'Orr'
|
260
|
+
Name.new('Uí Laighléis').rname.should == 'Laighléis, Uí'
|
261
|
+
Name.new('', 'Uí Laighléis', :ascii => true).last.should == 'Ui Laighleis'
|
138
262
|
Name.new('').name.should == ''
|
139
263
|
Name.new('').rname.should == ''
|
140
264
|
Name.new.name.should == ''
|
@@ -164,6 +288,7 @@ module ICU
|
|
164
288
|
it "should be flexible with regards to hyphens in double barrelled names" do
|
165
289
|
Name.new('J.-K.', 'Rowling').match('J. K.', 'Rowling').should be_true
|
166
290
|
Name.new('Joanne-K.', 'Rowling').match('Joanne K.', 'Rowling').should be_true
|
291
|
+
Name.new('Èric-K.', 'Cantona').match('Èric K.', 'Cantona').should be_true
|
167
292
|
end
|
168
293
|
|
169
294
|
it "should match initials" do
|
@@ -172,6 +297,8 @@ module ICU
|
|
172
297
|
Name.new('M. J. L.', 'Orr').match('Mark', 'Orr').should be_true
|
173
298
|
Name.new('M.', 'Orr').match('M. J.', 'Orr').should be_true
|
174
299
|
Name.new('M. J. L.', 'Orr').match('M. G.', 'Orr').should be_false
|
300
|
+
Name.new('È', 'Cantona').match('Èric K.', 'Cantona').should be_true
|
301
|
+
Name.new('E. K.', 'Cantona').match('Èric K.', 'Cantona').should be_false
|
175
302
|
end
|
176
303
|
|
177
304
|
it "should not match on full names not in first position or without an exact match" do
|
@@ -206,15 +333,20 @@ module ICU
|
|
206
333
|
end
|
207
334
|
end
|
208
335
|
|
209
|
-
context "accented characters" do
|
210
|
-
|
211
|
-
|
212
|
-
|
336
|
+
context "matches involving accented characters" do
|
337
|
+
it "should work for identical names" do
|
338
|
+
Name.new('Gearóidín', 'Uí Laighléis').match('Gearóidín', 'Uí Laighléis').should be_true
|
339
|
+
Name.new('Gearóidín', 'Uí Laighléis').match('Gearoidin', 'Ui Laighleis').should be_false
|
213
340
|
end
|
214
341
|
|
215
|
-
it "should
|
216
|
-
|
217
|
-
|
342
|
+
it "should work for first name initials" do
|
343
|
+
Name.new('Èric-K.', 'Cantona').match('È. K.', 'Cantona').should be_true
|
344
|
+
Name.new('Èric-K.', 'Cantona').match('E. K.', 'Cantona').should be_false
|
345
|
+
end
|
346
|
+
|
347
|
+
it "the matching of accented characters can be relaxed" do
|
348
|
+
Name.new('Gearóidín', 'Uí Laighléis').match('Gearoidin', 'Ui Laíghleis', :ascii => true).should be_true
|
349
|
+
Name.new('Èric-K.', 'Cantona').match('E. K.', 'Cantona', :ascii => true).should be_true
|
218
350
|
end
|
219
351
|
end
|
220
352
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 5
|
9
|
+
version: 0.0.5
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Mark Orr
|
@@ -14,13 +14,43 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-23 00:00:00 +00:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
|
-
name:
|
21
|
+
name: activesupport
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 3
|
30
|
+
- 0
|
31
|
+
- 3
|
32
|
+
version: 3.0.3
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: i18n
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
- 5
|
46
|
+
- 0
|
47
|
+
version: 0.5.0
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id002
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: bundler
|
52
|
+
prerelease: false
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
24
54
|
none: false
|
25
55
|
requirements:
|
26
56
|
- - ">="
|
@@ -31,11 +61,11 @@ dependencies:
|
|
31
61
|
- 7
|
32
62
|
version: 1.0.7
|
33
63
|
type: :development
|
34
|
-
version_requirements: *
|
64
|
+
version_requirements: *id003
|
35
65
|
- !ruby/object:Gem::Dependency
|
36
66
|
name: rspec
|
37
67
|
prerelease: false
|
38
|
-
requirement: &
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
39
69
|
none: false
|
40
70
|
requirements:
|
41
71
|
- - ">="
|
@@ -44,7 +74,7 @@ dependencies:
|
|
44
74
|
- 0
|
45
75
|
version: "0"
|
46
76
|
type: :development
|
47
|
-
version_requirements: *
|
77
|
+
version_requirements: *id004
|
48
78
|
description: Canonicalises and matches person names with Latin1 characters and first and last names
|
49
79
|
email: mark.j.l.orr@googlemail.com
|
50
80
|
executables: []
|