icu_name 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +37 -1
- data/lib/icu_name/name.rb +50 -34
- data/lib/icu_name/version.rb +1 -1
- data/spec/name_spec.rb +154 -22
- metadata +37 -7
data/README.rdoc
CHANGED
@@ -8,6 +8,8 @@ For ruby 1.9.2 and above.
|
|
8
8
|
|
9
9
|
gem install icu_name
|
10
10
|
|
11
|
+
It depends on active_support and i18n.
|
12
|
+
|
11
13
|
== Names
|
12
14
|
|
13
15
|
This class exists for two main purposes:
|
@@ -55,8 +57,42 @@ Some of the ways last names are canonicalised are illustrated below:
|
|
55
57
|
|
56
58
|
ICU::Name.new('John', 'O Reilly').last # => "O'Reilly"
|
57
59
|
ICU::Name.new('dave', 'mcmanus').last # => "McManus"
|
58
|
-
ICU::Name.new('pete', 'MACMANUS').last # => "MacManus"
|
59
60
|
|
61
|
+
== Characters and Encoding
|
62
|
+
|
63
|
+
The class can only cope with Western European letter characters, including the accented ones in Latin-1.
|
64
|
+
It's various accessors (_first_, _last_, _name_, _rname_, _to_s_) always return strings encoded in UTF-8,
|
65
|
+
no matter what the input encoding.
|
66
|
+
|
67
|
+
eric = ICU::Name.new('éric', 'PRIÉ')
|
68
|
+
eric.rname # => "Prié, Éric"
|
69
|
+
eric.rname.encoding.name # => "UTF-8"
|
70
|
+
|
71
|
+
eric = ICU::Name.new('éric'.encode("ISO-8859-1"), 'PRIÉ'.force_encoding("ASCII-8BIT"))
|
72
|
+
eric.rname # => "Prié, Éric"
|
73
|
+
eric.rname.encoding.name # => "UTF-8"
|
74
|
+
|
75
|
+
Currently, all characters outside the Latin-1 range are removed as if they wern't there.
|
76
|
+
|
77
|
+
ICU::Name.new('Józef Żabiński').name # "Józef Abiski"
|
78
|
+
ICU::Name.new('Bǔ Xiángzhì').name # "B. Xiángzhì"
|
79
|
+
|
80
|
+
Accented Latin-1 characters can be transliterated into their ascii counterparts by setting the
|
81
|
+
_ascii_ option to a true value.
|
82
|
+
|
83
|
+
eric.name(:ascii => true) # => "Eric Prie"
|
84
|
+
|
85
|
+
This works with all the other accessors and also with the constructor:
|
86
|
+
|
87
|
+
eric_ascii = ICU::Name.new('éric', 'PRIÉ', :ascii => true)
|
88
|
+
eric_ascii.name # => "Eric Prie"
|
89
|
+
|
90
|
+
The option also relaxes the need for accented characters to match exactly:
|
91
|
+
|
92
|
+
eric.match('Éric', 'Prié') # => true
|
93
|
+
eric.match('Eric', 'Prie') # => false
|
94
|
+
eric.match('Eric', 'Prie', :ascii => true) # => true
|
95
|
+
|
60
96
|
== Author
|
61
97
|
|
62
98
|
Mark Orr, rating officer for the Irish Chess Union (ICU[http://icu.ie]).
|
data/lib/icu_name/name.rb
CHANGED
@@ -1,41 +1,60 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_support/inflector/transliterate'
|
3
|
+
require 'active_support/core_ext/string/multibyte'
|
4
|
+
|
1
5
|
module ICU
|
2
6
|
class Name
|
3
|
-
attr_reader :first, :last
|
4
7
|
|
5
8
|
# Construct from one or two strings or any objects that have a to_s method.
|
6
|
-
def initialize(name1='', name2='')
|
7
|
-
@name1 = name1.to_s
|
8
|
-
@name2 = name2.to_s
|
9
|
+
def initialize(name1='', name2='', opt={})
|
10
|
+
@name1 = Util.to_utf8(name1.to_s)
|
11
|
+
@name2 = Util.to_utf8(name2.to_s)
|
9
12
|
canonicalize
|
13
|
+
if opt[:ascii]
|
14
|
+
@first = ActiveSupport::Inflector.transliterate(@first)
|
15
|
+
@last = ActiveSupport::Inflector.transliterate(@last)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# First name getter.
|
20
|
+
def first(opt={})
|
21
|
+
return ActiveSupport::Inflector.transliterate(@first) if opt[:ascii]
|
22
|
+
@first
|
23
|
+
end
|
24
|
+
|
25
|
+
# Last name getter.
|
26
|
+
def last(opt={})
|
27
|
+
return ActiveSupport::Inflector.transliterate(@last) if opt[:ascii]
|
28
|
+
@last
|
10
29
|
end
|
11
30
|
|
12
31
|
# Return a complete name, first name first, no comma.
|
13
|
-
def name
|
32
|
+
def name(opts={})
|
14
33
|
name = ''
|
15
|
-
name <<
|
34
|
+
name << first(opts)
|
16
35
|
name << ' ' if @first.length > 0 && @last.length > 0
|
17
|
-
name <<
|
36
|
+
name << last(opts)
|
18
37
|
name
|
19
38
|
end
|
20
39
|
|
21
40
|
# Return a reversed complete name, first name last after a comma.
|
22
|
-
def rname
|
41
|
+
def rname(opts={})
|
23
42
|
name = ''
|
24
|
-
name <<
|
43
|
+
name << last(opts)
|
25
44
|
name << ', ' if @first.length > 0 && @last.length > 0
|
26
|
-
name <<
|
45
|
+
name << first(opts)
|
27
46
|
name
|
28
47
|
end
|
29
48
|
|
30
49
|
# Convert object to a string.
|
31
|
-
def to_s
|
32
|
-
rname
|
50
|
+
def to_s(opts={})
|
51
|
+
rname(opts)
|
33
52
|
end
|
34
53
|
|
35
54
|
# Match another name to this object, returning true or false.
|
36
|
-
def match(name1='', name2='')
|
37
|
-
other = Name.new(name1, name2)
|
38
|
-
match_first(first, other.first) && match_last(last, other.last)
|
55
|
+
def match(name1='', name2='', opts={})
|
56
|
+
other = Name.new(name1, name2, opts)
|
57
|
+
match_first(first(opts), other.first) && match_last(last(opts), other.last)
|
39
58
|
end
|
40
59
|
|
41
60
|
private
|
@@ -58,6 +77,7 @@ module ICU
|
|
58
77
|
else
|
59
78
|
parts = clean(@name1).split(/ /)
|
60
79
|
last = parts.pop || ''
|
80
|
+
last = "#{parts.pop}'#{last}" if parts.size > 1 && parts.last == "O" && !last.match(/^O'/)
|
61
81
|
first = parts.join(' ')
|
62
82
|
end
|
63
83
|
else
|
@@ -68,36 +88,32 @@ module ICU
|
|
68
88
|
[first, last]
|
69
89
|
end
|
70
90
|
|
71
|
-
# Clean up characters in any name.
|
91
|
+
# Clean up characters in any name keeping only letters (including accented), hyphens, and single quotes.
|
72
92
|
def clean(name)
|
73
93
|
name.gsub!(/`/, "'")
|
74
|
-
name.gsub!(/[^-a-zA-Z.'\s]/, '')
|
94
|
+
name.gsub!(/[^-a-zA-Z\u{c0}-\u{d6}\u{d8}-\u{f6}\u{f8}-\u{ff}.'\s]/, '')
|
75
95
|
name.gsub!(/\./, ' ')
|
76
96
|
name.gsub!(/\s*-\s*/, '-')
|
77
97
|
name.gsub!(/'+/, "'")
|
78
|
-
name.strip.downcase.split(/\s+/).map do |n|
|
98
|
+
name.strip.mb_chars.downcase.split(/\s+/).map do |n|
|
79
99
|
n.sub!(/^-+/, '')
|
80
100
|
n.sub!(/-+$/, '')
|
81
101
|
n.split(/-/).map do |p|
|
82
102
|
p.capitalize!
|
83
103
|
end.join('-')
|
84
|
-
end.join(' ')
|
104
|
+
end.join(' ').to_s
|
85
105
|
end
|
86
106
|
|
87
|
-
# Apply final touches to finish canonicalising a first name.
|
107
|
+
# Apply final touches to finish canonicalising a first name mb_chars object, returning a normal string.
|
88
108
|
def finish_first(names)
|
89
|
-
names.gsub(/([A-Z])\b/, '\1.')
|
109
|
+
names.gsub(/([A-Z\u{c0}-\u{de}])\b/, '\1.')
|
90
110
|
end
|
91
111
|
|
92
|
-
# Apply final touches to finish canonicalising a last name.
|
112
|
+
# Apply final touches to finish canonicalising a last name mb_chars object, returning a normal string.
|
93
113
|
def finish_last(names)
|
94
|
-
names.gsub!(/\b([A-Z])
|
95
|
-
names.gsub!(/\
|
96
|
-
names.gsub!(/\
|
97
|
-
letter = $1
|
98
|
-
'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase)
|
99
|
-
end
|
100
|
-
names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 }
|
114
|
+
names.gsub!(/\b([A-Z\u{c0}-\u{de}]')([a-z\u{e0}-\u{ff}])/) { |m| $1 << $2.mb_chars.upcase.to_s }
|
115
|
+
names.gsub!(/\b(Mc)([a-z\u{e0}-\u{ff}])/) { |m| $1 << $2.mb_chars.upcase.to_s }
|
116
|
+
names.gsub!(/\bO ([A-Z\u{c0}-\u{de}])/) { |m| "O'" << $1 }
|
101
117
|
names
|
102
118
|
end
|
103
119
|
|
@@ -134,9 +150,9 @@ module ICU
|
|
134
150
|
def match_last(last1, last2)
|
135
151
|
return true if last1 == last2
|
136
152
|
[last1, last2].each do |last|
|
137
|
-
last.downcase!
|
138
|
-
last.gsub!(/\bmac/, 'mc')
|
139
|
-
last.tr!('-', ' ')
|
153
|
+
last.downcase! # case insensitive
|
154
|
+
last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh
|
155
|
+
last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly
|
140
156
|
end
|
141
157
|
last1 == last2
|
142
158
|
end
|
@@ -156,8 +172,8 @@ module ICU
|
|
156
172
|
# 2 = match involving 2 initials
|
157
173
|
def match_first_name(first1, first2)
|
158
174
|
initials = 0
|
159
|
-
initials+= 1 if first1.match(/^[A-Z]\.?$/)
|
160
|
-
initials+= 1 if first2.match(/^[A-Z]\.?$/)
|
175
|
+
initials+= 1 if first1.match(/^[A-Z\u{c0}-\u{de}]\.?$/)
|
176
|
+
initials+= 1 if first2.match(/^[A-Z\u{c0}-\u{de}]\.?$/)
|
161
177
|
return initials if first1 == first2
|
162
178
|
return 0 if initials == 0 && match_nick_name(first1, first2)
|
163
179
|
return -1 unless initials > 0
|
data/lib/icu_name/version.rb
CHANGED
data/spec/name_spec.rb
CHANGED
@@ -58,7 +58,26 @@ module ICU
|
|
58
58
|
it "should canconicalise last names" do
|
59
59
|
Name.new('John', 'O Reilly').last.should == "O'Reilly"
|
60
60
|
Name.new('dave', 'mcmanus').last.should == "McManus"
|
61
|
-
Name.new('pete', 'MACMANUS').last.should == "
|
61
|
+
Name.new('pete', 'MACMANUS').last.should == "Macmanus"
|
62
|
+
end
|
63
|
+
|
64
|
+
it "characters and encoding" do
|
65
|
+
josef = ICU::Name.new('Józef', 'Żabiński')
|
66
|
+
josef.name.should == "Józef Abiski"
|
67
|
+
bu = ICU::Name.new('Bǔ Xiángzhì')
|
68
|
+
bu.name.should == "B. Xiángzhì"
|
69
|
+
eric = ICU::Name.new('éric', 'PRIÉ')
|
70
|
+
eric.rname.should == "Prié, Éric"
|
71
|
+
eric.rname.encoding.name.should == "UTF-8"
|
72
|
+
eric = ICU::Name.new('éric'.encode("ISO-8859-1"), 'PRIÉ'.force_encoding("ASCII-8BIT"))
|
73
|
+
eric.rname.should == "Prié, Éric"
|
74
|
+
eric.rname.encoding.name.should == "UTF-8"
|
75
|
+
eric.name(:ascii => true).should == "Eric Prie"
|
76
|
+
eric_ascii = ICU::Name.new('éric', 'PRIÉ', :ascii => true)
|
77
|
+
eric_ascii.name.should == "Eric Prie"
|
78
|
+
eric.match('Éric', 'Prié').should be_true
|
79
|
+
eric.match('Eric', 'Prie').should be_false
|
80
|
+
eric.match('Eric', 'Prie', :ascii => true).should be_true
|
62
81
|
end
|
63
82
|
end
|
64
83
|
|
@@ -66,24 +85,35 @@ module ICU
|
|
66
85
|
it "should not be altered" do
|
67
86
|
Name.new('Mark J. L.', 'Orr').name.should == 'Mark J. L. Orr'
|
68
87
|
Name.new('Anna-Marie J.-K.', 'Liviu-Dieter').name.should == 'Anna-Marie J.-K. Liviu-Dieter'
|
88
|
+
Name.new('Èric Cantona').name.should == 'Èric Cantona'
|
69
89
|
end
|
70
90
|
end
|
71
91
|
|
72
|
-
context "last names
|
92
|
+
context "last names involving a quote" do
|
73
93
|
it "should be handled correctly" do
|
74
94
|
Name.new('una', "O'boyle").name.should == "Una O'Boyle"
|
75
95
|
Name.new('jonathan', 'd`arcy').name.should == "Jonathan D'Arcy"
|
76
96
|
Name.new('erwin e', "L'AMI").name.should == "Erwin E. L'Ami"
|
77
97
|
Name.new('cormac', "o brien").name.should == "Cormac O'Brien"
|
98
|
+
Name.new('türko', "o özgür").name.should == "Türko O'Özgür"
|
99
|
+
Name.new('türko', "l`özgür").name.should == "Türko L'Özgür"
|
78
100
|
end
|
79
101
|
end
|
80
102
|
|
81
|
-
context "last beginning with Mc" do
|
103
|
+
context "last beginning with Mc or Mac" do
|
82
104
|
it "should be handled correctly" do
|
83
105
|
Name.new('shane', "mccabe").name.should == "Shane McCabe"
|
84
|
-
Name.new('shawn', "macDonagh").name.should == "Shawn MacDonagh"
|
85
106
|
Name.new('shawn', "macdonagh").name.should == "Shawn Macdonagh"
|
86
107
|
Name.new('bartlomiej', "macieja").name.should == "Bartlomiej Macieja"
|
108
|
+
Name.new('türko', "mcözgür").name.should == "Türko McÖzgür"
|
109
|
+
Name.new('TÜRKO', "MACÖZGÜR").name.should == "Türko Macözgür"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context "first name initials" do
|
114
|
+
it "should be handled correctly" do
|
115
|
+
Name.new('m j l', 'Orr').first.should == 'M. J. L.'
|
116
|
+
Name.new('Ö. é m', 'Panno').first.should == "Ö. É. M."
|
87
117
|
end
|
88
118
|
end
|
89
119
|
|
@@ -94,6 +124,22 @@ module ICU
|
|
94
124
|
Name.new("mark j. - l", 'ORR').name.should == 'Mark J.-L. Orr'
|
95
125
|
Name.new('JOHANNA', "lowry-o'REILLY").name.should == "Johanna Lowry-O'Reilly"
|
96
126
|
Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
|
127
|
+
Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
|
128
|
+
Name.new('ètienne', "gèrard - mcözgür").name.should == "Ètienne Gèrard-McÖzgür"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
context "accented characters and capitalisation" do
|
133
|
+
it "should downcase upper case accented characters where appropriate" do
|
134
|
+
name = Name.new('GEARÓIDÍN', 'UÍ LAIGHLÉIS')
|
135
|
+
name.first.should == 'Gearóidín'
|
136
|
+
name.last.should == 'Uí Laighléis'
|
137
|
+
end
|
138
|
+
|
139
|
+
it "should upcase upper case accented characters where appropriate" do
|
140
|
+
name = Name.new('èric özgür')
|
141
|
+
name.first.should == 'Èric'
|
142
|
+
name.last.should == 'Özgür'
|
97
143
|
end
|
98
144
|
end
|
99
145
|
|
@@ -110,18 +156,12 @@ module ICU
|
|
110
156
|
end
|
111
157
|
|
112
158
|
context "construction from a single string" do
|
113
|
-
before(:each) do
|
114
|
-
@mark1 = Name.new('ORR, mark j l')
|
115
|
-
@mark2 = Name.new('MARK J L ORR')
|
116
|
-
@oreil = Name.new("O'Reilly, j-k")
|
117
|
-
end
|
118
|
-
|
119
159
|
it "should be possible in simple cases" do
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
160
|
+
Name.new('ORR, mark j l').rname.should == 'Orr, Mark J. L.'
|
161
|
+
Name.new('MARK J L ORR').rname.should == 'Orr, Mark J. L.'
|
162
|
+
Name.new("j-k O'Reilly").rname.should == "O'Reilly, J.-K."
|
163
|
+
Name.new("j-k O Reilly").rname.should == "O'Reilly, J.-K."
|
164
|
+
Name.new('ètienne o o özgür').name.should == "Ètienne O. O'Özgür"
|
125
165
|
end
|
126
166
|
end
|
127
167
|
|
@@ -131,10 +171,94 @@ module ICU
|
|
131
171
|
end
|
132
172
|
end
|
133
173
|
|
174
|
+
context "encoding" do
|
175
|
+
before(:each) do
|
176
|
+
@first = 'Gearóidín'
|
177
|
+
@last = 'Uí Laighléis'
|
178
|
+
end
|
179
|
+
|
180
|
+
it "should handle UTF-8" do
|
181
|
+
name = Name.new(@first, @last)
|
182
|
+
name.first.should == @first
|
183
|
+
name.last.should == @last
|
184
|
+
name.first.encoding.name.should == "UTF-8"
|
185
|
+
name.last.encoding.name.should == "UTF-8"
|
186
|
+
end
|
187
|
+
|
188
|
+
it "should handle ISO-8859-1" do
|
189
|
+
name = Name.new(@first.encode("ISO-8859-1"), @last.encode("ISO-8859-1"))
|
190
|
+
name.first.should == @first
|
191
|
+
name.last.should == @last
|
192
|
+
name.first.encoding.name.should == "UTF-8"
|
193
|
+
name.last.encoding.name.should == "UTF-8"
|
194
|
+
end
|
195
|
+
|
196
|
+
it "should handle Windows-1252" do
|
197
|
+
name = Name.new(@first.encode("Windows-1252"), @last.encode("Windows-1252"))
|
198
|
+
name.first.should == @first
|
199
|
+
name.last.should == @last
|
200
|
+
name.first.encoding.name.should == "UTF-8"
|
201
|
+
name.last.encoding.name.should == "UTF-8"
|
202
|
+
end
|
203
|
+
|
204
|
+
it "should handle ASCII-8BIT" do
|
205
|
+
name = Name.new(@first.dup.force_encoding('ASCII-8BIT'), @last.dup.force_encoding('ASCII-8BIT'))
|
206
|
+
name.first.should == @first
|
207
|
+
name.last.should == @last
|
208
|
+
name.first.encoding.name.should == "UTF-8"
|
209
|
+
name.last.encoding.name.should == "UTF-8"
|
210
|
+
end
|
211
|
+
|
212
|
+
it "should handle US-ASCII" do
|
213
|
+
@first = 'Gearoidin'
|
214
|
+
@last = 'Ui Laighleis'
|
215
|
+
name = Name.new(@first.encode("US-ASCII"), @last.encode("US-ASCII"))
|
216
|
+
name.first.should == @first
|
217
|
+
name.last.should == @last
|
218
|
+
name.first.encoding.name.should == "UTF-8"
|
219
|
+
name.last.encoding.name.should == "UTF-8"
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
context "transliteration" do
|
224
|
+
before(:all) do
|
225
|
+
@opt = { :ascii => true }
|
226
|
+
end
|
227
|
+
|
228
|
+
it "should be a no-op for names that already ASCII" do
|
229
|
+
name = Name.new('Mark J. L.', 'Orr')
|
230
|
+
name.first(@opt).should == 'Mark J. L.'
|
231
|
+
name.last(@opt).should == 'Orr'
|
232
|
+
name.name(@opt).should == 'Mark J. L. Orr'
|
233
|
+
name.rname(@opt).should == 'Orr, Mark J. L.'
|
234
|
+
name.to_s(@opt).should == 'Orr, Mark J. L.'
|
235
|
+
end
|
236
|
+
|
237
|
+
it "should remove the accents from accented characters" do
|
238
|
+
name = Name.new('Gearóidín', 'Uí Laighléis')
|
239
|
+
name.first(@opt).should == 'Gearoidin'
|
240
|
+
name.last(@opt).should == 'Ui Laighleis'
|
241
|
+
name.name(@opt).should == 'Gearoidin Ui Laighleis'
|
242
|
+
name.rname(@opt).should == 'Ui Laighleis, Gearoidin'
|
243
|
+
name.to_s(@opt).should == 'Ui Laighleis, Gearoidin'
|
244
|
+
name = Name.new('èric PRIÉ')
|
245
|
+
name.first(@opt).should == 'Eric'
|
246
|
+
name.last(@opt).should == 'Prie'
|
247
|
+
end
|
248
|
+
|
249
|
+
it "should work for the constructor as well as accessors" do
|
250
|
+
name = Name.new('Gearóidín', 'Uí Laighléis', @opt)
|
251
|
+
name.first.should == 'Gearoidin'
|
252
|
+
name.last.should == 'Ui Laighleis'
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
134
256
|
context "constuction corner cases" do
|
135
257
|
it "should be handled correctly" do
|
136
258
|
Name.new('Orr').name.should == 'Orr'
|
137
259
|
Name.new('Orr').rname.should == 'Orr'
|
260
|
+
Name.new('Uí Laighléis').rname.should == 'Laighléis, Uí'
|
261
|
+
Name.new('', 'Uí Laighléis', :ascii => true).last.should == 'Ui Laighleis'
|
138
262
|
Name.new('').name.should == ''
|
139
263
|
Name.new('').rname.should == ''
|
140
264
|
Name.new.name.should == ''
|
@@ -164,6 +288,7 @@ module ICU
|
|
164
288
|
it "should be flexible with regards to hyphens in double barrelled names" do
|
165
289
|
Name.new('J.-K.', 'Rowling').match('J. K.', 'Rowling').should be_true
|
166
290
|
Name.new('Joanne-K.', 'Rowling').match('Joanne K.', 'Rowling').should be_true
|
291
|
+
Name.new('Èric-K.', 'Cantona').match('Èric K.', 'Cantona').should be_true
|
167
292
|
end
|
168
293
|
|
169
294
|
it "should match initials" do
|
@@ -172,6 +297,8 @@ module ICU
|
|
172
297
|
Name.new('M. J. L.', 'Orr').match('Mark', 'Orr').should be_true
|
173
298
|
Name.new('M.', 'Orr').match('M. J.', 'Orr').should be_true
|
174
299
|
Name.new('M. J. L.', 'Orr').match('M. G.', 'Orr').should be_false
|
300
|
+
Name.new('È', 'Cantona').match('Èric K.', 'Cantona').should be_true
|
301
|
+
Name.new('E. K.', 'Cantona').match('Èric K.', 'Cantona').should be_false
|
175
302
|
end
|
176
303
|
|
177
304
|
it "should not match on full names not in first position or without an exact match" do
|
@@ -206,15 +333,20 @@ module ICU
|
|
206
333
|
end
|
207
334
|
end
|
208
335
|
|
209
|
-
context "accented characters" do
|
210
|
-
|
211
|
-
|
212
|
-
|
336
|
+
context "matches involving accented characters" do
|
337
|
+
it "should work for identical names" do
|
338
|
+
Name.new('Gearóidín', 'Uí Laighléis').match('Gearóidín', 'Uí Laighléis').should be_true
|
339
|
+
Name.new('Gearóidín', 'Uí Laighléis').match('Gearoidin', 'Ui Laighleis').should be_false
|
213
340
|
end
|
214
341
|
|
215
|
-
it "should
|
216
|
-
|
217
|
-
|
342
|
+
it "should work for first name initials" do
|
343
|
+
Name.new('Èric-K.', 'Cantona').match('È. K.', 'Cantona').should be_true
|
344
|
+
Name.new('Èric-K.', 'Cantona').match('E. K.', 'Cantona').should be_false
|
345
|
+
end
|
346
|
+
|
347
|
+
it "the matching of accented characters can be relaxed" do
|
348
|
+
Name.new('Gearóidín', 'Uí Laighléis').match('Gearoidin', 'Ui Laíghleis', :ascii => true).should be_true
|
349
|
+
Name.new('Èric-K.', 'Cantona').match('E. K.', 'Cantona', :ascii => true).should be_true
|
218
350
|
end
|
219
351
|
end
|
220
352
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 5
|
9
|
+
version: 0.0.5
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Mark Orr
|
@@ -14,13 +14,43 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-23 00:00:00 +00:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
|
-
name:
|
21
|
+
name: activesupport
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 3
|
30
|
+
- 0
|
31
|
+
- 3
|
32
|
+
version: 3.0.3
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: i18n
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
- 5
|
46
|
+
- 0
|
47
|
+
version: 0.5.0
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id002
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: bundler
|
52
|
+
prerelease: false
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
24
54
|
none: false
|
25
55
|
requirements:
|
26
56
|
- - ">="
|
@@ -31,11 +61,11 @@ dependencies:
|
|
31
61
|
- 7
|
32
62
|
version: 1.0.7
|
33
63
|
type: :development
|
34
|
-
version_requirements: *
|
64
|
+
version_requirements: *id003
|
35
65
|
- !ruby/object:Gem::Dependency
|
36
66
|
name: rspec
|
37
67
|
prerelease: false
|
38
|
-
requirement: &
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
39
69
|
none: false
|
40
70
|
requirements:
|
41
71
|
- - ">="
|
@@ -44,7 +74,7 @@ dependencies:
|
|
44
74
|
- 0
|
45
75
|
version: "0"
|
46
76
|
type: :development
|
47
|
-
version_requirements: *
|
77
|
+
version_requirements: *id004
|
48
78
|
description: Canonicalises and matches person names with Latin1 characters and first and last names
|
49
79
|
email: mark.j.l.orr@googlemail.com
|
50
80
|
executables: []
|