icu_name 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +15 -39
- data/config/first_alternatives.yaml +31 -18
- data/config/last_alternatives.yaml +7 -1
- data/config/test_first_alts.yaml +2 -33
- data/config/test_last_alts.yaml +1 -0
- data/lib/icu_name/name.rb +12 -12
- data/lib/icu_name/util.rb +64 -38
- data/lib/icu_name/version.rb +1 -1
- data/spec/name_spec.rb +60 -41
- data/spec/util_spec.rb +68 -45
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -51,10 +51,11 @@ The method <tt>alternatives</tt> can be used to list alternatives to a given fir
|
|
51
51
|
|
52
52
|
Name.new('Stephen', 'Orr').alternatives(:first) # => ["Steve"]
|
53
53
|
Name.new('Michael Stephen', 'Orr').alternatives(:first) # => ["Steve", "Mike", "Mick", "Mikey"],
|
54
|
+
Name.new('Oissine', 'Murphy').alternatives(:last) # => ["Murchadha"],
|
54
55
|
Name.new('Mark', 'Orr').alternatives(:first) # => []
|
55
56
|
|
56
|
-
By default the class
|
57
|
-
|
57
|
+
By default the class uses a set of first and last name alternatives curated for the ICU.
|
58
|
+
However, this can be customized (see below).
|
58
59
|
|
59
60
|
Supplying the <tt>match</tt> method with strings is equivalent to instantiating an instance with the same
|
60
61
|
strings and then matching it. So, for example the following are equivalent:
|
@@ -109,7 +110,7 @@ The same option also relaxes the need for accented characters to match exactly:
|
|
109
110
|
We saw above how _Bobby_ and _Robert_ were able to match because, by default, the
|
110
111
|
matcher is aware of some common English nicknames. These name alternatives can be
|
111
112
|
customised to handle additional nicknames and other types of alternative names
|
112
|
-
such as common spelling
|
113
|
+
such as common spelling errors and player name changes.
|
113
114
|
|
114
115
|
The alternative names consist of two arrays, one for first names and
|
115
116
|
one for last names. Each array element is itself an array of strings
|
@@ -117,58 +118,33 @@ representing a set of equivalent names. Here, for example, are some
|
|
117
118
|
of the default first name alternatives:
|
118
119
|
|
119
120
|
["Anthony", "Tony"]
|
120
|
-
["James", "Jim", "Jimmy"]
|
121
|
-
["Michael", "Mike", "Mick", "Mikey"]
|
121
|
+
["James", "Jim", "Jimmy", "Jamie"]
|
122
122
|
["Robert", "Bob", "Bobby"]
|
123
|
-
["Stephen", "Steve"]
|
124
|
-
["Steven", "Steve"]
|
123
|
+
["Stephen", "Steve", "Steven"]
|
125
124
|
["Thomas", "Tom", "Tommy"]
|
126
|
-
["William", "Will", "Willy", "Willie", "Bill"]
|
127
125
|
|
128
126
|
The first of these means that _Anthony_ and _Tony_ are considered equivalent and can match.
|
129
127
|
|
130
|
-
Name.new("Tony", "Miles").match("Anthony", "Miles")
|
131
|
-
|
132
|
-
Note that both _Steven_ and _Stephen_ match _Steve_ but, because they don't occur in the
|
133
|
-
same group, they don't match each other.
|
134
|
-
|
135
|
-
Name.new("Steven", "Hanly").match("Steve", "Hanly") # => true
|
136
|
-
Name.new("Stephen", "Hanly").match("Steve", "Hanly") # => true
|
137
|
-
Name.new("Stephen", "Hanly").match("Steven", "Hanly") # => false
|
128
|
+
ICU::Name.new("Tony", "Miles").match("Anthony", "Miles") # => true
|
138
129
|
|
139
130
|
To change alternative name behaviour, you can replace the default alternatives
|
140
131
|
with a customized set perhaps stored in a database or a YAML file, as illustrated below:
|
141
132
|
|
133
|
+
ICU::Name.reset_alternatives
|
142
134
|
data = YAML.load(File open "my_last_name_alternatives.yaml")
|
143
|
-
Name.load_alternatives(:last, data)
|
135
|
+
ICU::Name.load_alternatives(:last, data)
|
144
136
|
data = YAML.load(File open "my_first_name_alternatives.yaml")
|
145
|
-
Name.load_alternatives(:first, data)
|
146
|
-
|
147
|
-
An example of one way in which you might want to customize the alternatives is to
|
148
|
-
cater for common spelling mistakes such as _Steven_ and _Stephen_. These two names
|
149
|
-
don't match by default, but you can make them so by replacing the two default rules:
|
150
|
-
|
151
|
-
["Stephen", "Steve"]
|
152
|
-
["Steven", "Steve"]
|
153
|
-
|
154
|
-
with the following single rule:
|
155
|
-
|
156
|
-
["Stephen", "Steven", "Steve"]
|
157
|
-
|
158
|
-
so that now:
|
159
|
-
|
160
|
-
Name.new("Stephen", "Hanly").match("Steven", "Hanly") # => true
|
137
|
+
ICU::Name.load_alternatives(:first, data)
|
161
138
|
|
162
|
-
|
163
|
-
|
164
|
-
spelling mistakes in the context of your application.
|
139
|
+
Note that without the call to <tt>reset_alternatives</tt>, the new loaded alternatives
|
140
|
+
add to, rather than replace, the defaults.
|
165
141
|
|
166
|
-
|
167
|
-
|
142
|
+
Other uses of alternatives is to cater for English and Irish versions of the same name,
|
143
|
+
for example (last names):
|
168
144
|
|
169
145
|
[Murphy, Murchadha]
|
170
146
|
|
171
|
-
or for
|
147
|
+
or for variations including spelling variations, for example (first names):
|
172
148
|
|
173
149
|
[Patrick, Pat, Paddy, Padraig, Padraic, Padhraig, Padhraic]
|
174
150
|
|
@@ -1,37 +1,50 @@
|
|
1
1
|
---
|
2
|
+
- [Abdul, Abul]
|
2
3
|
- [Alexander, Alex]
|
4
|
+
- [Anandagopal, Ananda]
|
3
5
|
- [Andrew, Andy]
|
6
|
+
- [Anne, Ann]
|
4
7
|
- [Anthony, Tony]
|
5
8
|
- [Benjamin, Ben]
|
6
|
-
- [Catherine, Cathy, Cath]
|
7
|
-
- [
|
9
|
+
- [Catherine, Cathy, Cath, Cate, Katherine, Kathy, Kath, Kate]
|
10
|
+
- [Charlie, Charles]
|
11
|
+
- [Chris, Christopher]
|
12
|
+
- [Daniel, Danial, Danny, Dan]
|
8
13
|
- [David, Dave]
|
9
14
|
- [Deborah, Debbie]
|
10
15
|
- [Des, Desmond]
|
11
|
-
- [
|
12
|
-
- [
|
13
|
-
- [
|
16
|
+
- [Douglas, Dougie]
|
17
|
+
- [Eamonn, Eamon]
|
18
|
+
- [Edward, Eddie, Eddy, Ed, Ned]
|
19
|
+
- [Eric, Erick, Erik]
|
20
|
+
- [Frederick, Frederic, Fred]
|
14
21
|
- [Gerald, Gerry]
|
15
|
-
- [Gerard, Gerry]
|
16
|
-
- [James, Jim, Jimmy]
|
22
|
+
- [Gerhard, Gerard, Ger, Gerry]
|
23
|
+
- [James, Jim, Jimmy, Jamie]
|
24
|
+
- [Joanna, Joan, Joanne]
|
25
|
+
- [Joe, Joseph]
|
17
26
|
- [John, Johnny]
|
18
27
|
- [Jonathan, Jon]
|
19
|
-
- [Kenneth, Ken, Kenny]
|
20
28
|
- [Lyubomir, Lubomir]
|
21
|
-
- [
|
22
|
-
- [
|
29
|
+
- [Kenneth, Ken, Kenny]
|
30
|
+
- [Michael, Mike, Mick, Micky, Mickie, Mikey, Micheal]
|
31
|
+
- [Muthu, Muthukumaran]
|
32
|
+
- [Nicholas, Nick, Nicolas]
|
23
33
|
- [Nicola, Nickie, Nicky]
|
24
|
-
- [Patrick, Pat]
|
25
|
-
- [Patricia, Patty, Pat]
|
34
|
+
- [Patrick, Pat, Paddy, Padraig, Padraic, Padhraig, Padhraic]
|
35
|
+
- [Patricia, Paddy, Patty, Pat]
|
26
36
|
- [Peter, Pete]
|
27
|
-
- [Philip, Phil]
|
28
|
-
- [
|
37
|
+
- [Philip, Phillip, Phil]
|
38
|
+
- [Philippe, Phillippe, Phil]
|
39
|
+
- [Raymond, Ray]
|
29
40
|
- [Rick, Ricky]
|
30
41
|
- [Robert, Bob, Bobby]
|
31
|
-
- [
|
32
|
-
- [
|
33
|
-
- [
|
34
|
-
- [
|
42
|
+
- [Rodney, Rod]
|
43
|
+
- [Samual, Sam, Samuel]
|
44
|
+
- [Stef, Stefan, Stephan, Stefen, Stephen]
|
45
|
+
- [Steffy, Stefanie, Stephanie, Stefenie, Stephenie]
|
46
|
+
- [Stephen, Steve, Steven]
|
35
47
|
- [Terence, Terry]
|
36
48
|
- [Thomas, Tom, Tommy]
|
37
49
|
- [William, Will, Willy, Willie, Bill]
|
50
|
+
- [Sean, John, !ruby/regexp /^Bradley$/]
|
@@ -1 +1,7 @@
|
|
1
|
-
---
|
1
|
+
---
|
2
|
+
- [Ffrench, French]
|
3
|
+
- [Murchadha, Murphy]
|
4
|
+
- [Quinn, Benjamin, !ruby/regexp /^(Debbie|Deborah)$/]
|
5
|
+
- [Astaneh Lopez, Lopez, !ruby/regexp /^Alex$/]
|
6
|
+
- [Gardenes Santiago, Gardenes, !ruby/regexp /^Manuel$/]
|
7
|
+
- ["O'Siochru", King, !ruby/regexp /^Mairead$/]
|
data/config/test_first_alts.yaml
CHANGED
@@ -1,42 +1,11 @@
|
|
1
1
|
---
|
2
|
-
- [Abdul, Abul]
|
3
|
-
- [Alexander, Alex]
|
4
|
-
- [Anandagopal, Ananda]
|
5
|
-
- [Andrew, Andy]
|
6
|
-
- [Anne, Ann]
|
7
|
-
- [Anthony, Tony]
|
8
|
-
- [Benjamin, Ben]
|
9
|
-
- [Catherine, Cathy, Cath]
|
10
|
-
- [Daniel, Danial, Danny, Dan]
|
11
|
-
- [David, Dave]
|
12
2
|
- [Deborah, Debbie]
|
13
|
-
- [
|
14
|
-
- [Eamonn, Eamon]
|
15
|
-
- [Edward, Eddie, Eddy, Ed]
|
16
|
-
- [Eric, Erick, Erik]
|
17
|
-
- [Frederick, Frederic, Fred]
|
18
|
-
- [Gerald, Gerry]
|
19
|
-
- [Gerhard, Gerard, Ger, Gerry]
|
20
|
-
- [James, Jim, Jimmy]
|
21
|
-
- [Joanna, Joan, Joanne]
|
3
|
+
- [Demeter, Ceres]
|
22
4
|
- [John, Johnny]
|
23
|
-
- [Jonathan, Jon]
|
24
|
-
- [Kenneth, Ken, Kenny]
|
25
5
|
- [Lyubomir, Lubomir]
|
26
|
-
- [Michael, Mike
|
27
|
-
- [Nicholas, Nick, Nicolas]
|
28
|
-
- [Nicola, Nickie, Nicky]
|
6
|
+
- [Michael, Mike]
|
29
7
|
- [Patrick, Pat, Paddy, Padraig, Padraic, Padhraig, Padhraic]
|
30
|
-
- [Patricia, Paddy, Patty, Pat]
|
31
|
-
- [Peter, Pete]
|
32
8
|
- [Philippe, Philip, Phillippe, Phillip]
|
33
|
-
- [Rick, Ricky]
|
34
|
-
- [Robert, Bob, Bobby]
|
35
|
-
- [Samual, Sam, Samuel]
|
36
|
-
- [Stef, Stefan, Stephan, Stefen, Stephen]
|
37
|
-
- [Steffy, Stefanie, Stephanie, Stefenie, Stephenie]
|
38
9
|
- [Stephen, Steve, Steven]
|
39
|
-
- [Terence, Terry]
|
40
|
-
- [Thomas, Tom, Tommy]
|
41
10
|
- [William, Will, Willy, Willie, Bill]
|
42
11
|
- [Sean, John, !ruby/regexp /^Bradley$/]
|
data/config/test_last_alts.yaml
CHANGED
data/lib/icu_name/name.rb
CHANGED
@@ -13,8 +13,8 @@ module ICU
|
|
13
13
|
|
14
14
|
# Construct a new name from one or two strings or any objects that have a to_s method.
|
15
15
|
def initialize(name1='', name2='')
|
16
|
-
@name1 = Util.to_utf8(name1.to_s)
|
17
|
-
@name2 = Util.to_utf8(name2.to_s)
|
16
|
+
@name1 = Util::String.to_utf8(name1.to_s)
|
17
|
+
@name2 = Util::String.to_utf8(name2.to_s)
|
18
18
|
originalize
|
19
19
|
canonicalize
|
20
20
|
@first.freeze
|
@@ -69,10 +69,10 @@ module ICU
|
|
69
69
|
match_first(first(opts), other.first(opts)) && match_last(last(opts), other.last(opts))
|
70
70
|
end
|
71
71
|
|
72
|
-
# Load a set of first or last name alternatives. If
|
73
|
-
#
|
74
|
-
def self.load_alternatives(type,
|
75
|
-
compile_alts(check_type(type),
|
72
|
+
# Load a set of first or last name alternatives. If no data is absent, a default set will be loaded.
|
73
|
+
# <tt>type</tt> should be <tt>:first</tt> or <tt>:last</tt>.
|
74
|
+
def self.load_alternatives(type, data=nil)
|
75
|
+
compile_alts(check_type(type), data, true)
|
76
76
|
end
|
77
77
|
|
78
78
|
# Show first name or last name alternatives.
|
@@ -93,7 +93,7 @@ module ICU
|
|
93
93
|
# Transliterate characters to ASCII.
|
94
94
|
def transliterate(str, chars='US-ASCII')
|
95
95
|
if chars.match(/ASCII/i)
|
96
|
-
Util.transliterate(str)
|
96
|
+
Util::String.transliterate(str)
|
97
97
|
else
|
98
98
|
str.dup
|
99
99
|
end
|
@@ -139,12 +139,12 @@ module ICU
|
|
139
139
|
name.gsub!(/\s*-\s*/, '-')
|
140
140
|
name.gsub!(/'+/, "'")
|
141
141
|
name.strip!
|
142
|
-
name = Util.downcase(name)
|
142
|
+
name = Util::String.downcase(name)
|
143
143
|
name.split(/\s+/).map do |n|
|
144
144
|
n.sub!(/^-+/, '')
|
145
145
|
n.sub!(/-+$/, '')
|
146
146
|
n.split(/-/).map do |p|
|
147
|
-
Util.capitalize(p)
|
147
|
+
Util::String.capitalize(p)
|
148
148
|
end.join('-')
|
149
149
|
end.join(' ')
|
150
150
|
end
|
@@ -156,11 +156,11 @@ module ICU
|
|
156
156
|
|
157
157
|
# Apply final touches to finish canonicalising a last name.
|
158
158
|
def finish_last(names)
|
159
|
-
names.gsub!(/\b([A-Z\u{c0}-\u{de}]')([a-z\u{e0}-\u{ff}])/) { |m| $1 + Util.upcase($2) }
|
160
|
-
names.gsub!(/\b(Mc)([a-z\u{e0}-\u{ff}])/) { |m| $1 + Util.upcase($2) }
|
159
|
+
names.gsub!(/\b([A-Z\u{c0}-\u{de}]')([a-z\u{e0}-\u{ff}])/) { |m| $1 + Util::String.upcase($2) }
|
160
|
+
names.gsub!(/\b(Mc)([a-z\u{e0}-\u{ff}])/) { |m| $1 + Util::String.upcase($2) }
|
161
161
|
names.gsub!(/\bMac([a-z\u{e0}-\u{ff}])/) do |m|
|
162
162
|
letter = $1 # capitalize after "Mac" only if the original clearly indicates it
|
163
|
-
upper = Util.upcase(letter)
|
163
|
+
upper = Util::String.upcase(letter)
|
164
164
|
'Mac'.concat(@original.match(/\bMac#{upper}/) ? upper : letter)
|
165
165
|
end
|
166
166
|
names.gsub!(/\bO ([A-Z\u{c0}-\u{de}])/) { |m| "O'" + $1 } # O Kelly => "O'Kelly"
|
data/lib/icu_name/util.rb
CHANGED
@@ -2,51 +2,77 @@
|
|
2
2
|
|
3
3
|
module ICU
|
4
4
|
module Util
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
|
6
|
+
# For converting strings in various ways.
|
7
|
+
module String
|
8
|
+
LOWER_CHARS = "àáâãäåæçèéêëìíîïñòóôõöøùúûüýþ"
|
9
|
+
UPPER_CHARS = "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝÞ"
|
10
|
+
ACCENTED_CHARS = "ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüý"
|
11
|
+
UNACCENTED_CHARS = "AAAAAAEEEEIIIINOOOOOUUUUYaaaaaaeeeeiiiinooooouuuuy"
|
9
12
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
# Decide if a string is valid UTF-8 or not, returning true or false.
|
14
|
+
def self.is_utf8(str)
|
15
|
+
dup = str.dup
|
16
|
+
dup.force_encoding("UTF-8")
|
17
|
+
dup.valid_encoding?
|
18
|
+
end
|
16
19
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
20
|
+
# Try to convert any string to UTF-8.
|
21
|
+
def self.to_utf8(str)
|
22
|
+
utf8 = is_utf8(str)
|
23
|
+
dup = str.dup
|
24
|
+
return dup.force_encoding("UTF-8") if utf8
|
25
|
+
dup.force_encoding("Windows-1252") if dup.encoding.name.match(/^(ASCII-8BIT|UTF-8)$/)
|
26
|
+
dup.encode("UTF-8")
|
27
|
+
end
|
25
28
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
29
|
+
# Upcase a UTF-8 string that might contain accented characters.
|
30
|
+
def self.upcase(str)
|
31
|
+
str = str.upcase
|
32
|
+
return str if str.ascii_only?
|
33
|
+
str.tr(LOWER_CHARS, UPPER_CHARS)
|
34
|
+
end
|
32
35
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
# Downcase a UTF-8 string that might contain accented characters.
|
37
|
+
def self.downcase(str)
|
38
|
+
str = str.downcase
|
39
|
+
return str if str.ascii_only?
|
40
|
+
str.tr(UPPER_CHARS, LOWER_CHARS)
|
41
|
+
end
|
39
42
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
43
|
+
# Capilalize a UTF-8 string that might contain accented characters.
|
44
|
+
def self.capitalize(str)
|
45
|
+
return str.capitalize if str.ascii_only? || !str.match(/\A(.)(.*)\z/)
|
46
|
+
upcase($1) + downcase($2)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Transliterate Latin-1 accented characters to ASCII.
|
50
|
+
def self.transliterate(str)
|
51
|
+
return str.dup if str.ascii_only?
|
52
|
+
str.tr(ACCENTED_CHARS, UNACCENTED_CHARS)
|
53
|
+
end
|
44
54
|
end
|
45
55
|
|
46
|
-
#
|
47
|
-
|
48
|
-
|
49
|
-
|
56
|
+
# For generating SQL queries relating to alternative first or last names.
|
57
|
+
module AlternativeNames
|
58
|
+
def last_name_like(last, first)
|
59
|
+
ICU::Name.new(first, last).alternatives(:last).push(last).map do |nam|
|
60
|
+
"last_name LIKE '%#{quote_str(nam)}%'"
|
61
|
+
end.sort.join(" OR ")
|
62
|
+
end
|
63
|
+
|
64
|
+
def first_name_like(first, last)
|
65
|
+
ICU::Name.new(first, last).alternatives(:first).push(first).map do |nam|
|
66
|
+
"first_name LIKE '%#{quote_str(nam)}%'"
|
67
|
+
end.sort.join(" OR ")
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
# Same as Rails version (ActiveRecord::ConnectionAdapters::Quoting).
|
73
|
+
def quote_str(s)
|
74
|
+
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
75
|
+
end
|
50
76
|
end
|
51
77
|
end
|
52
78
|
end
|
data/lib/icu_name/version.rb
CHANGED
data/spec/name_spec.rb
CHANGED
@@ -3,7 +3,8 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
3
3
|
|
4
4
|
module ICU
|
5
5
|
describe Name do
|
6
|
-
def load_alt_test(*types)
|
6
|
+
def load_alt_test(reset, *types)
|
7
|
+
Name.reset_alternatives if reset
|
7
8
|
types.each do |type|
|
8
9
|
file = File.expand_path(File.dirname(__FILE__) + "/../config/test_#{type}_alts.yaml")
|
9
10
|
data = File.open(file) { |fd| YAML.load(fd) }
|
@@ -351,13 +352,13 @@ module ICU
|
|
351
352
|
Name.new('Gerard', 'Orr').match('Gerald', 'Orr').should be_false
|
352
353
|
end
|
353
354
|
|
354
|
-
it "should
|
355
|
-
Name.new('Steven', 'Brady').match('Stephen', 'Brady').should
|
356
|
-
Name.new('Philip', 'Short').match('Phillip', 'Short').should
|
355
|
+
it "should handle some common misspellings" do
|
356
|
+
Name.new('Steven', 'Brady').match('Stephen', 'Brady').should be_true
|
357
|
+
Name.new('Philip', 'Short').match('Phillip', 'Short').should be_true
|
357
358
|
end
|
358
359
|
|
359
|
-
it "should
|
360
|
-
Name.new('Sean', 'Bradley').match('John', 'Bradley').should
|
360
|
+
it "should have some conditional matches" do
|
361
|
+
Name.new('Sean', 'Bradley').match('John', 'Bradley').should be_true
|
361
362
|
end
|
362
363
|
|
363
364
|
it "should not mix up nick names" do
|
@@ -379,9 +380,9 @@ module ICU
|
|
379
380
|
Name.new('Darko', 'Polimac').match('Darko', 'Polimc').should be_false
|
380
381
|
end
|
381
382
|
|
382
|
-
it "should
|
383
|
-
Name.new('Debbie', 'Quinn').match('Debbie', 'Benjamin').should
|
384
|
-
Name.new('Mairead', "O'Siochru").match('Mairead', 'King').should
|
383
|
+
it "should have some conditional matches" do
|
384
|
+
Name.new('Debbie', 'Quinn').match('Debbie', 'Benjamin').should be_true
|
385
|
+
Name.new('Mairead', "O'Siochru").match('Mairead', 'King').should be_true
|
385
386
|
end
|
386
387
|
end
|
387
388
|
|
@@ -404,7 +405,11 @@ module ICU
|
|
404
405
|
|
405
406
|
context "configuring new first name alternatives" do
|
406
407
|
before(:all) do
|
407
|
-
load_alt_test(:first)
|
408
|
+
load_alt_test(true, :first)
|
409
|
+
end
|
410
|
+
|
411
|
+
after(:all) do
|
412
|
+
Name.reset_alternatives
|
408
413
|
end
|
409
414
|
|
410
415
|
it "should match some spelling errors" do
|
@@ -421,7 +426,11 @@ module ICU
|
|
421
426
|
|
422
427
|
context "configuring new last name alternatives" do
|
423
428
|
before(:all) do
|
424
|
-
load_alt_test(:last)
|
429
|
+
load_alt_test(true, :last)
|
430
|
+
end
|
431
|
+
|
432
|
+
after(:all) do
|
433
|
+
Name.reset_alternatives
|
425
434
|
end
|
426
435
|
|
427
436
|
it "should match some spelling errors" do
|
@@ -444,7 +453,11 @@ module ICU
|
|
444
453
|
|
445
454
|
context "configuring new first and new last name alternatives" do
|
446
455
|
before(:all) do
|
447
|
-
load_alt_test(:first, :last)
|
456
|
+
load_alt_test(true, :first, :last)
|
457
|
+
end
|
458
|
+
|
459
|
+
after(:all) do
|
460
|
+
Name.reset_alternatives
|
448
461
|
end
|
449
462
|
|
450
463
|
it "should allow some awesome matches" do
|
@@ -455,16 +468,20 @@ module ICU
|
|
455
468
|
|
456
469
|
context "reverting to the default configuration" do
|
457
470
|
before(:all) do
|
458
|
-
load_alt_test(:first, :last)
|
471
|
+
load_alt_test(true, :first, :last)
|
459
472
|
end
|
460
473
|
|
461
|
-
|
462
|
-
Name.
|
474
|
+
after(:all) do
|
475
|
+
Name.reset_alternatives
|
476
|
+
end
|
477
|
+
|
478
|
+
it "should not match after reverting" do
|
479
|
+
Name.new('avril, demeter').match('Ceres', 'Avril').should be_true
|
463
480
|
Name.load_alternatives(:first)
|
464
|
-
Name.new('
|
465
|
-
Name.new('Patrick', '
|
481
|
+
Name.new('avril, demeter').match('Ceres', 'Avril').should be_false
|
482
|
+
Name.new('Patrick', 'Ares').match('Patrick', 'Mars').should be_true
|
466
483
|
Name.load_alternatives(:last)
|
467
|
-
Name.new('Patrick', '
|
484
|
+
Name.new('Patrick', 'Ares').match('Patrick', 'Mars').should be_false
|
468
485
|
end
|
469
486
|
end
|
470
487
|
|
@@ -472,35 +489,39 @@ module ICU
|
|
472
489
|
it "should show common nicknames" do
|
473
490
|
Name.new('William', 'Ffrench').alternatives(:first).should =~ %w{Bill Willy Willie Will}
|
474
491
|
Name.new('Bill', 'Ffrench').alternatives(:first).should =~ %w{William Willy Will Willie}
|
475
|
-
Name.new('Steven', 'Ffrench').alternatives(:first).should =~ %w{Steve}
|
476
|
-
Name.new('Stephen', 'Ffrench').alternatives(:first).should =~ %w{Steve}
|
477
|
-
Name.new('Michael Stephen', 'Ffrench').alternatives(:first).should =~ %w{
|
478
|
-
Name.new('Stephen M.', 'Ffrench').alternatives(:first).should =~ %w{Steve}
|
492
|
+
Name.new('Steven', 'Ffrench').alternatives(:first).should =~ %w{Steve Stephen}
|
493
|
+
Name.new('Stephen', 'Ffrench').alternatives(:first).should =~ %w{Stef Stefan Stefen Stephan Steve Steven}
|
494
|
+
Name.new('Michael Stephen', 'Ffrench').alternatives(:first).should =~ %w{Micheal Mick Mickie Micky Mike Mikey Stef Stefan Stefen Stephan Steve Steven}
|
495
|
+
Name.new('Stephen M.', 'Ffrench').alternatives(:first).should =~ %w{Stef Stefan Stefen Stephan Steve Steven}
|
496
|
+
Name.new('Sean', 'Bradley').alternatives(:first).should =~ %w{John}
|
479
497
|
Name.new('S.', 'Ffrench').alternatives(:first).should =~ []
|
480
|
-
Name.new('Sean', 'Bradley').alternatives(:first).should =~ []
|
481
498
|
end
|
482
499
|
|
483
500
|
it "should have automatic last name alternatives for apostrophes to cater for FIDE's habits" do
|
484
|
-
Name.new('Mairead', "O'Siochru").alternatives(:last).should =~
|
485
|
-
Name.new('Erwin E.', "L`Ami").alternatives(:last).should =~
|
501
|
+
Name.new('Mairead', "O'Siochru").alternatives(:last).should =~ %w{King O`Siochru}
|
502
|
+
Name.new('Erwin E.', "L`Ami").alternatives(:last).should =~ %w{L`Ami}
|
486
503
|
end
|
487
504
|
|
488
|
-
it "should not have
|
489
|
-
Name.new('William', 'Ffrench').alternatives(:last).should =~
|
490
|
-
Name.new('Oissine', 'Murphy').alternatives(:last).should =~
|
491
|
-
Name.new('Debbie', 'Quinn').alternatives(:last).should =~
|
505
|
+
it "should not have some last name alternatives" do
|
506
|
+
Name.new('William', 'Ffrench').alternatives(:last).should =~ %w{French}
|
507
|
+
Name.new('Oissine', 'Murphy').alternatives(:last).should =~ %w{Murchadha}
|
508
|
+
Name.new('Debbie', 'Quinn').alternatives(:last).should =~ %w{Benjamin}
|
492
509
|
end
|
493
510
|
end
|
494
511
|
|
495
512
|
context "name alternatives with more adventurous configuration" do
|
496
513
|
before(:all) do
|
497
|
-
load_alt_test(:first, :last)
|
514
|
+
load_alt_test(true, :first, :last)
|
515
|
+
end
|
516
|
+
|
517
|
+
after(:all) do
|
518
|
+
Name.reset_alternatives
|
498
519
|
end
|
499
520
|
|
500
|
-
it "should show
|
521
|
+
it "should show different nicknames" do
|
501
522
|
Name.new('Steven', 'Ffrench').alternatives(:first).should =~ %w{Stephen Steve}
|
502
|
-
Name.new('Stephen', 'Ffrench').alternatives(:first).should =~ %w{
|
503
|
-
Name.new('Stephen Mike', 'Ffrench').alternatives(:first).should =~ %w{Michael
|
523
|
+
Name.new('Stephen', 'Ffrench').alternatives(:first).should =~ %w{Steve Steven}
|
524
|
+
Name.new('Stephen Mike', 'Ffrench').alternatives(:first).should =~ %w{Michael Steve Steven}
|
504
525
|
Name.new('Sean', 'Bradley').alternatives(:first).should =~ %w{John}
|
505
526
|
Name.new('Sean', 'McDonagh').alternatives(:first).should =~ []
|
506
527
|
Name.new('John', 'Bradley').alternatives(:first).should =~ %w{Sean Johnny}
|
@@ -521,6 +542,10 @@ module ICU
|
|
521
542
|
Name.reset_alternatives
|
522
543
|
end
|
523
544
|
|
545
|
+
after(:all) do
|
546
|
+
Name.reset_alternatives
|
547
|
+
end
|
548
|
+
|
524
549
|
it "should be no more than necessary" do
|
525
550
|
alt_compilations(:first).should == 0
|
526
551
|
alt_compilations(:last).should == 0
|
@@ -530,16 +555,10 @@ module ICU
|
|
530
555
|
Name.new('Debbie', 'Quinn').match('Deborah', 'Benjamin')
|
531
556
|
alt_compilations(:first).should == 1
|
532
557
|
alt_compilations(:last).should == 1
|
533
|
-
load_alt_test(:first)
|
558
|
+
load_alt_test(false, :first)
|
534
559
|
alt_compilations(:first).should == 2
|
535
560
|
alt_compilations(:last).should == 1
|
536
|
-
load_alt_test(:last)
|
537
|
-
alt_compilations(:first).should == 2
|
538
|
-
alt_compilations(:last).should == 2
|
539
|
-
Name.new('William', 'Ffrench').match('Bill', 'French')
|
540
|
-
Name.new('Debbie', 'Quinn').match('Deborah', 'Benjamin')
|
541
|
-
Name.new('Mark', 'Orr').alternatives(:first)
|
542
|
-
Name.new('Mark', 'Orr').alternatives(:last)
|
561
|
+
load_alt_test(false, :last)
|
543
562
|
alt_compilations(:first).should == 2
|
544
563
|
alt_compilations(:last).should == 2
|
545
564
|
end
|
data/spec/util_spec.rb
CHANGED
@@ -2,63 +2,86 @@
|
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
3
|
|
4
4
|
module ICU
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
5
|
+
module Util
|
6
|
+
describe String do
|
7
|
+
context "#is_utf8" do
|
8
|
+
it "recognises some encodings as a special case of UTF-8" do
|
9
|
+
expect(String.is_utf8("Resume".encode("US-ASCII"))).to be_true
|
10
|
+
expect(String.is_utf8("Resume".encode("ASCII-8BIT"))).to be_true
|
11
|
+
expect(String.is_utf8("Resume".encode("BINARY"))).to be_true
|
12
|
+
end
|
13
|
+
|
14
|
+
it "recognises UTF-8" do
|
15
|
+
expect(String.is_utf8("Résumé")).to be_true
|
16
|
+
expect(String.is_utf8("δog")).to be_true
|
17
|
+
end
|
12
18
|
|
13
|
-
|
14
|
-
|
15
|
-
|
19
|
+
it "should recognize other encodings as not being UTF-8" do
|
20
|
+
expect(String.is_utf8("Résumé".encode("ISO-8859-1"))).to be_false
|
21
|
+
expect(String.is_utf8("€50".encode("Windows-1252"))).to be_false
|
22
|
+
expect(String.is_utf8("ひらがな".encode("Shift_JIS"))).to be_false
|
23
|
+
expect(String.is_utf8("\xa3")).to be_false
|
24
|
+
end
|
16
25
|
end
|
17
26
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
27
|
+
context "#to_utf8" do
|
28
|
+
it "converts to UTF-8" do
|
29
|
+
expect(String.to_utf8("Resume")).to eq "Resume"
|
30
|
+
expect(String.to_utf8("Resume".force_encoding("US-ASCII")).encoding.name).to eq "UTF-8"
|
31
|
+
expect(String.to_utf8("Résumé".encode("ISO-8859-1"))).to eq "Résumé"
|
32
|
+
expect(String.to_utf8("Résumé".encode("Windows-1252"))).to eq "Résumé"
|
33
|
+
expect(String.to_utf8("€50".encode("Windows-1252"))).to eq "€50"
|
34
|
+
expect(String.to_utf8("\xa350".force_encoding("ASCII-8BIT"))).to eq "£50"
|
35
|
+
expect(String.to_utf8("\xa350")).to eq "£50"
|
36
|
+
expect(String.to_utf8("ひらがな".encode("Shift_JIS"))).to eq "ひらがな"
|
37
|
+
end
|
23
38
|
end
|
24
|
-
end
|
25
39
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
expect(Util.to_utf8("€50".encode("Windows-1252"))).to eq "€50"
|
33
|
-
expect(Util.to_utf8("\xa350".force_encoding("ASCII-8BIT"))).to eq "£50"
|
34
|
-
expect(Util.to_utf8("\xa350")).to eq "£50"
|
35
|
-
expect(Util.to_utf8("ひらがな".encode("Shift_JIS"))).to eq "ひらがな"
|
40
|
+
context "#downcase" do
|
41
|
+
it "downcases characters in the Latin-1 range" do
|
42
|
+
expect(String.downcase("Eric")).to eq "eric"
|
43
|
+
expect(String.downcase("Éric")).to eq "éric"
|
44
|
+
expect(String.downcase("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝÞ")).to eq "àáâãäåæçèéêëìíîïñòóôõöøùúûüýþ"
|
45
|
+
end
|
36
46
|
end
|
37
|
-
end
|
38
47
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
48
|
+
context "#upcase" do
|
49
|
+
it "upcases characters in the Latin-1 range" do
|
50
|
+
expect(String.upcase("Gearoidin")).to eq "GEAROIDIN"
|
51
|
+
expect(String.upcase("Gearóidín")).to eq "GEARÓIDÍN"
|
52
|
+
expect(String.upcase("àáâãäåæçèéêëìíîïñòóôõöøùúûüýþ")).to eq "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝÞ"
|
53
|
+
end
|
44
54
|
end
|
45
|
-
end
|
46
55
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
56
|
+
context "#capitalize" do
|
57
|
+
it "capitalizes strings that might contain accented characters" do
|
58
|
+
expect(String.capitalize("gearoidin")).to eq "Gearoidin"
|
59
|
+
expect(String.capitalize("GEAROIDIN")).to eq "Gearoidin"
|
60
|
+
expect(String.capitalize("gEAróiDÍn")).to eq "Gearóidín"
|
61
|
+
expect(String.capitalize("ériC")).to eq "Éric"
|
62
|
+
expect(String.capitalize("ÉRIc")).to eq "Éric"
|
63
|
+
end
|
52
64
|
end
|
53
65
|
end
|
66
|
+
|
67
|
+
describe AlternativeNames do
|
68
|
+
context "extends" do
|
69
|
+
class Dummy
|
70
|
+
extend AlternativeNames
|
71
|
+
end
|
72
|
+
|
73
|
+
it "#last_name_like" do
|
74
|
+
expect(Dummy.last_name_like("Murphy", "Oissine")).to eq "last_name LIKE '%Murchadha%' OR last_name LIKE '%Murphy%'"
|
75
|
+
expect(Dummy.last_name_like("O'Connor", "Jonathan")).to eq "last_name LIKE '%O''Connor%' OR last_name LIKE '%O`Connor%'"
|
76
|
+
expect(Dummy.last_name_like("Orr", "Mark")).to eq "last_name LIKE '%Orr%'"
|
77
|
+
expect(Dummy.last_name_like("", "Mark")).to eq "last_name LIKE '%%'"
|
78
|
+
end
|
54
79
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
expect(Util.capitalize("ériC")).to eq "Éric"
|
61
|
-
expect(Util.capitalize("ÉRIc")).to eq "Éric"
|
80
|
+
it "#first_name_like" do
|
81
|
+
expect(Dummy.first_name_like("sean", "bradley")).to eq "first_name LIKE '%John%' OR first_name LIKE '%sean%'"
|
82
|
+
expect(Dummy.first_name_like("Jonathan", "O'Connor")).to eq "first_name LIKE '%Jon%' OR first_name LIKE '%Jonathan%'"
|
83
|
+
expect(Dummy.first_name_like("", "O'Connor")).to eq "first_name LIKE '%%'"
|
84
|
+
end
|
62
85
|
end
|
63
86
|
end
|
64
87
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: icu_name
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-10-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -112,7 +112,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
112
112
|
version: '0'
|
113
113
|
segments:
|
114
114
|
- 0
|
115
|
-
hash:
|
115
|
+
hash: 1642061693049790720
|
116
116
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
117
|
none: false
|
118
118
|
requirements:
|