icu_name 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ module ICU
2
+ class Name
3
+ attr_reader :first, :last
4
+
5
+ # Construct from one or two strings or any objects that have a to_s method.
6
+ def initialize(name1='', name2='')
7
+ @name1 = name1.to_s.dup
8
+ @name2 = name2.to_s.dup
9
+ canonicalize
10
+ end
11
+
12
+ # Return a complete name, first name first, no comma.
13
+ def name
14
+ name = ''
15
+ name << @first
16
+ name << ' ' if @first.length > 0 && @last.length > 0
17
+ name << @last
18
+ name
19
+ end
20
+
21
+ # Return a reversed complete name, first name last after a comma.
22
+ def rname
23
+ name = ''
24
+ name << @last
25
+ name << ', ' if @first.length > 0 && @last.length > 0
26
+ name << @first
27
+ name
28
+ end
29
+
30
+ # Convert object to a string.
31
+ def to_s
32
+ rname
33
+ end
34
+
35
+ # Match another name to this object, returning true or false.
36
+ def match(name1='', name2='')
37
+ other = Name.new(name1, name2)
38
+ match_first(first, other.first) && match_last(last, other.last)
39
+ end
40
+
41
+ private
42
+
43
+ # Canonicalise the first and last names.
44
+ def canonicalize
45
+ first, last = partition
46
+ @first = finish_first(first)
47
+ @last = finish_last(last)
48
+ end
49
+
50
+ # Split one complete name into first and last parts.
51
+ def partition
52
+ if @name2.length == 0
53
+ # Only one imput so we must split first and last.
54
+ parts = @name1.split(/,/)
55
+ if parts.size > 1
56
+ last = clean(parts.shift || '')
57
+ first = clean(parts.join(' '))
58
+ else
59
+ parts = clean(@name1).split(/ /)
60
+ last = parts.pop || ''
61
+ first = parts.join(' ')
62
+ end
63
+ else
64
+ # Two inputs, so we are given first and last.
65
+ first = clean(@name1)
66
+ last = clean(@name2)
67
+ end
68
+ [first, last]
69
+ end
70
+
71
+ # Clean up characters in any name.
72
+ def clean(name)
73
+ name.gsub!(/`/, "'")
74
+ name.gsub!(/[^-a-zA-Z.'\s]/, '')
75
+ name.gsub!(/\./, ' ')
76
+ name.gsub!(/\s*-\s*/, '-')
77
+ name.gsub!(/'+/, "'")
78
+ name.strip.downcase.split(/\s+/).map do |n|
79
+ n.sub!(/^-+/, '')
80
+ n.sub!(/-+$/, '')
81
+ n.split(/-/).map do |p|
82
+ p.capitalize!
83
+ end.join('-')
84
+ end.join(' ')
85
+ end
86
+
87
+ # Apply final touches to finish canonicalising a first name.
88
+ def finish_first(names)
89
+ names.gsub(/([A-Z])\b/, '\1.')
90
+ end
91
+
92
+ # Apply final touches to finish canonicalising a last name.
93
+ def finish_last(names)
94
+ names.gsub!(/\b([A-Z])'([a-z])/) { |m| $1 << "'" << $2.upcase}
95
+ names.gsub!(/\bMc([a-z])/) { |m| 'Mc' << $1.upcase}
96
+ names.gsub!(/\bMac([a-z])/) do |m|
97
+ letter = $1
98
+ 'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase)
99
+ end
100
+ names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 }
101
+ names
102
+ end
103
+
104
+ # Match a complete first name.
105
+ def match_first(first1, first2)
106
+ # Is this one a walk in the park?
107
+ return true if first1 == first2
108
+
109
+ # No easy ride. Begin by splitting into individual first names.
110
+ first1 = split_first(first1)
111
+ first2 = split_first(first2)
112
+
113
+ # Get the long list and the short list.
114
+ long, short = first1.size >= first2.size ? [first1, first2] : [first2, first1]
115
+
116
+ # The short one must be a "subset" of the long one.
117
+ # An extra condition must also be satisfied.
118
+ extra = false
119
+ (0..long.size-1).each do |i|
120
+ lword = long.shift
121
+ score = match_first_name(lword, short.first)
122
+ if score >= 0
123
+ short.shift
124
+ extra = true if i == 0 || score == 0
125
+ end
126
+ break if short.empty? || long.empty?
127
+ end
128
+
129
+ # There's a match if the following is true.
130
+ short.empty? && extra
131
+ end
132
+
133
+ # Match a complete last name.
134
+ def match_last(last1, last2)
135
+ return true if last1 == last2
136
+ [last1, last2].each do |last|
137
+ last.downcase! # MacDonaugh and Macdonaugh
138
+ last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh
139
+ last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly
140
+ end
141
+ last1 == last2
142
+ end
143
+
144
+ # Split a complete first name for matching.
145
+ def split_first(first)
146
+ first.tr!('-', ' ') # J. K. and J.-K.
147
+ first = first.split(/ /) # split on spaces
148
+ first = [''] if first.size == 0 # in case input was empty string
149
+ first
150
+ end
151
+
152
+ # Match individual first names or initials.
153
+ # -1 = no match
154
+ # 0 = full match
155
+ # 1 = match involving 1 initial
156
+ # 2 = match involving 2 initials
157
+ def match_first_name(first1, first2)
158
+ initials = 0
159
+ initials+= 1 if first1.match(/^[A-Z]\.?$/)
160
+ initials+= 1 if first2.match(/^[A-Z]\.?$/)
161
+ return initials if first1 == first2
162
+ return 0 if initials == 0 && match_nick_name(first1, first2)
163
+ return -1 unless initials > 0
164
+ return initials if first1[0] == first2[0]
165
+ -1
166
+ end
167
+
168
+ # Match two first names that might be equivalent nicknames.
169
+ def match_nick_name(nick1, nick2)
170
+ compile_nick_names unless @@nc
171
+ code1 = @@nc[nick1]
172
+ return false unless code1
173
+ code1 == @@nc[nick2]
174
+ end
175
+
176
+ # Compile the nick names code hash when matching nick names is first attempted.
177
+ def compile_nick_names
178
+ @@nc = Hash.new
179
+ code = 1
180
+ @@nl.each do |nicks|
181
+ nicks.each do |n|
182
+ throw "duplicate name #{n}" if @@nc[n]
183
+ @@nc[n] = code
184
+ end
185
+ code+= 1
186
+ end
187
+ end
188
+
189
+ # A array of data for matching nicknames and also a few common misspellings.
190
+ @@nc = nil
191
+ @@nl = <<EOF.split(/\n/).reject{|x| x.length == 0 }.map{|x| x.split(' ')}
192
+ Abdul Abul
193
+ Alexander Alex
194
+ Anandagopal Ananda
195
+ Anne Ann
196
+ Anthony Tony
197
+ Benjamin Ben
198
+ Catherine Cathy Cath
199
+ Daniel Danial Danny Dan
200
+ David Dave
201
+ Deborah Debbie
202
+ Des Desmond
203
+ Eamonn Eamon
204
+ Edward Eddie Ed
205
+ Eric Erick Erik
206
+ Frederick Frederic Fred
207
+ Gerald Gerry
208
+ Gerhard Gerard Ger
209
+ James Jim
210
+ Joanna Joan Joanne
211
+ John Johnny
212
+ Jonathan Jon
213
+ Kenneth Ken Kenny
214
+ Michael Mike Mick Micky
215
+ Nicholas Nick Nicolas
216
+ Nicola Nickie Nicky
217
+ Patrick Pat Paddy
218
+ Peter Pete
219
+ Philippe Philip Phillippe Phillip
220
+ Rick Ricky
221
+ Robert Bob Bobby
222
+ Samual Sam Samuel
223
+ Stefanie Stef
224
+ Stephen Steven Steve
225
+ Terence Terry
226
+ Thomas Tom Tommy
227
+ William Will Willy Willie Bill
228
+ EOF
229
+ end
230
+ end
@@ -0,0 +1,19 @@
1
+ module ICU
2
+ class Util
3
+ # Decide if a string is valid UTF-8 or not, returning true or false.
4
+ def self.is_utf8(str)
5
+ dup = str.dup
6
+ dup.force_encoding("UTF-8")
7
+ dup.valid_encoding?
8
+ end
9
+
10
+ # Try to convert any string to UTF-8.
11
+ def self.to_utf8(str)
12
+ utf8 = is_utf8(str)
13
+ dup = str.dup
14
+ return dup.force_encoding("UTF-8") if utf8
15
+ dup.force_encoding("Windows-1252") if dup.encoding.name.match(/^(ASCII-8BIT|UTF-8)$/)
16
+ dup.encode("UTF-8")
17
+ end
18
+ end
19
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module ICU
4
4
  class Name
5
- VERSION = "0.0.3"
5
+ VERSION = "0.0.4"
6
6
  end
7
7
  end
data/lib/icu_name.rb CHANGED
@@ -1,230 +1,2 @@
1
- module ICU
2
- class Name
3
- attr_reader :first, :last
4
-
5
- # Construct from one or two strings or any objects that have a to_s method.
6
- def initialize(name1='', name2='')
7
- @name1 = name1.to_s
8
- @name2 = name2.to_s
9
- canonicalize
10
- end
11
-
12
- # Return a complete name, first name first, no comma.
13
- def name
14
- name = ''
15
- name << @first
16
- name << ' ' if @first.length > 0 && @last.length > 0
17
- name << @last
18
- name
19
- end
20
-
21
- # Return a reversed complete name, first name last after a comma.
22
- def rname
23
- name = ''
24
- name << @last
25
- name << ', ' if @first.length > 0 && @last.length > 0
26
- name << @first
27
- name
28
- end
29
-
30
- # Convert object to a string.
31
- def to_s
32
- rname
33
- end
34
-
35
- # Match another name to this object, returning true or false.
36
- def match(name1='', name2='')
37
- other = Name.new(name1, name2)
38
- match_first(first, other.first) && match_last(last, other.last)
39
- end
40
-
41
- private
42
-
43
- # Canonicalise the first and last names.
44
- def canonicalize
45
- first, last = partition
46
- @first = finish_first(first)
47
- @last = finish_last(last)
48
- end
49
-
50
- # Split one complete name into first and last parts.
51
- def partition
52
- if @name2.length == 0
53
- # Only one imput so we must split first and last.
54
- parts = @name1.split(/,/)
55
- if parts.size > 1
56
- last = clean(parts.shift || '')
57
- first = clean(parts.join(' '))
58
- else
59
- parts = clean(@name1).split(/ /)
60
- last = parts.pop || ''
61
- first = parts.join(' ')
62
- end
63
- else
64
- # Two inputs, so we are given first and last.
65
- first = clean(@name1)
66
- last = clean(@name2)
67
- end
68
- [first, last]
69
- end
70
-
71
- # Clean up characters in any name.
72
- def clean(name)
73
- name.gsub!(/`/, "'")
74
- name.gsub!(/[^-a-zA-Z.'\s]/, '')
75
- name.gsub!(/\./, ' ')
76
- name.gsub!(/\s*-\s*/, '-')
77
- name.gsub!(/'+/, "'")
78
- name.strip.downcase.split(/\s+/).map do |n|
79
- n.sub!(/^-+/, '')
80
- n.sub!(/-+$/, '')
81
- n.split(/-/).map do |p|
82
- p.capitalize!
83
- end.join('-')
84
- end.join(' ')
85
- end
86
-
87
- # Apply final touches to finish canonicalising a first name.
88
- def finish_first(names)
89
- names.gsub(/([A-Z])\b/, '\1.')
90
- end
91
-
92
- # Apply final touches to finish canonicalising a last name.
93
- def finish_last(names)
94
- names.gsub!(/\b([A-Z])'([a-z])/) { |m| $1 << "'" << $2.upcase}
95
- names.gsub!(/\bMc([a-z])/) { |m| 'Mc' << $1.upcase}
96
- names.gsub!(/\bMac([a-z])/) do |m|
97
- letter = $1
98
- 'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase)
99
- end
100
- names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 }
101
- names
102
- end
103
-
104
- # Match a complete first name.
105
- def match_first(first1, first2)
106
- # Is this one a walk in the park?
107
- return true if first1 == first2
108
-
109
- # No easy ride. Begin by splitting into individual first names.
110
- first1 = split_first(first1)
111
- first2 = split_first(first2)
112
-
113
- # Get the long list and the short list.
114
- long, short = first1.size >= first2.size ? [first1, first2] : [first2, first1]
115
-
116
- # The short one must be a "subset" of the long one.
117
- # An extra condition must also be satisfied.
118
- extra = false
119
- (0..long.size-1).each do |i|
120
- lword = long.shift
121
- score = match_first_name(lword, short.first)
122
- if score >= 0
123
- short.shift
124
- extra = true if i == 0 || score == 0
125
- end
126
- break if short.empty? || long.empty?
127
- end
128
-
129
- # There's a match if the following is true.
130
- short.empty? && extra
131
- end
132
-
133
- # Match a complete last name.
134
- def match_last(last1, last2)
135
- return true if last1 == last2
136
- [last1, last2].each do |last|
137
- last.downcase! # MacDonaugh and Macdonaugh
138
- last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh
139
- last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly
140
- end
141
- last1 == last2
142
- end
143
-
144
- # Split a complete first name for matching.
145
- def split_first(first)
146
- first.tr!('-', ' ') # J. K. and J.-K.
147
- first = first.split(/ /) # split on spaces
148
- first = [''] if first.size == 0 # in case input was empty string
149
- first
150
- end
151
-
152
- # Match individual first names or initials.
153
- # -1 = no match
154
- # 0 = full match
155
- # 1 = match involving 1 initial
156
- # 2 = match involving 2 initials
157
- def match_first_name(first1, first2)
158
- initials = 0
159
- initials+= 1 if first1.match(/^[A-Z]\.?$/)
160
- initials+= 1 if first2.match(/^[A-Z]\.?$/)
161
- return initials if first1 == first2
162
- return 0 if initials == 0 && match_nick_name(first1, first2)
163
- return -1 unless initials > 0
164
- return initials if first1[0] == first2[0]
165
- -1
166
- end
167
-
168
- # Match two first names that might be equivalent nicknames.
169
- def match_nick_name(nick1, nick2)
170
- compile_nick_names unless @@nc
171
- code1 = @@nc[nick1]
172
- return false unless code1
173
- code1 == @@nc[nick2]
174
- end
175
-
176
- # Compile the nick names code hash when matching nick names is first attempted.
177
- def compile_nick_names
178
- @@nc = Hash.new
179
- code = 1
180
- @@nl.each do |nicks|
181
- nicks.each do |n|
182
- throw "duplicate name #{n}" if @@nc[n]
183
- @@nc[n] = code
184
- end
185
- code+= 1
186
- end
187
- end
188
-
189
- # A array of data for matching nicknames and also a few common misspellings.
190
- @@nc = nil
191
- @@nl = <<EOF.split(/\n/).reject{|x| x.length == 0 }.map{|x| x.split(' ')}
192
- Abdul Abul
193
- Alexander Alex
194
- Anandagopal Ananda
195
- Anne Ann
196
- Anthony Tony
197
- Benjamin Ben
198
- Catherine Cathy Cath
199
- Daniel Danial Danny Dan
200
- David Dave
201
- Deborah Debbie
202
- Des Desmond
203
- Eamonn Eamon
204
- Edward Eddie Ed
205
- Eric Erick Erik
206
- Frederick Frederic Fred
207
- Gerald Gerry
208
- Gerhard Gerard Ger
209
- James Jim
210
- Joanna Joan Joanne
211
- John Johnny
212
- Jonathan Jon
213
- Kenneth Ken Kenny
214
- Michael Mike Mick Micky
215
- Nicholas Nick Nicolas
216
- Nicola Nickie Nicky
217
- Patrick Pat Paddy
218
- Peter Pete
219
- Philippe Philip Phillippe Phillip
220
- Rick Ricky
221
- Robert Bob Bobby
222
- Samual Sam Samuel
223
- Stefanie Stef
224
- Stephen Steven Steve
225
- Terence Terry
226
- Thomas Tom Tommy
227
- William Will Willy Willie Bill
228
- EOF
229
- end
230
- end
1
+ require 'icu_name/name.rb'
2
+ require 'icu_name/util.rb'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
3
 
3
4
  module ICU
@@ -6,68 +7,68 @@ module ICU
6
7
  before(:each) do
7
8
  @simple = Name.new('mark j l', 'orr')
8
9
  end
9
-
10
+
10
11
  it "#first returns the first name(s)" do
11
12
  @simple.first.should == 'Mark J. L.'
12
13
  end
13
-
14
+
14
15
  it "#last returns the last name(s)" do
15
16
  @simple.last.should == 'Orr'
16
17
  end
17
-
18
+
18
19
  it "#name returns the full name with first name(s) first" do
19
20
  @simple.name.should == 'Mark J. L. Orr'
20
21
  end
21
-
22
+
22
23
  it "#rname returns the full name with last name(s) first" do
23
24
  @simple.rname.should == 'Orr, Mark J. L.'
24
25
  end
25
-
26
+
26
27
  it "#to_s is the same as rname" do
27
28
  @simple.to_s.should == 'Orr, Mark J. L.'
28
29
  end
29
-
30
+
30
31
  it "#match returns true if and only if two names match" do
31
32
  @simple.match('mark j l orr').should be_true
32
33
  @simple.match('malcolm g l orr').should be_false
33
34
  end
34
35
  end
35
-
36
+
36
37
  context "rdoc expample" do
37
38
  before(:each) do
38
39
  @robert = Name.new(' robert j ', ' FISCHER ')
39
40
  @bobby = Name.new(' bobby fischer ')
40
41
  end
41
-
42
+
42
43
  it "should get Robert" do
43
44
  @robert.name.should == 'Robert J. Fischer'
44
45
  end
45
-
46
+
46
47
  it "should get Bobby" do
47
48
  @bobby.last.should == 'Fischer'
48
49
  @bobby.first.should == 'Bobby'
49
50
  end
50
-
51
+
51
52
  it "should match Robert and Bobby" do
52
53
  @robert.match(@bobby).should be_true
53
54
  @robert.match('R. J.', 'Fischer').should be_true
54
55
  @bobby.match('R. J.', 'Fischer').should be_false
55
56
  end
56
-
57
+
57
58
  it "should canconicalise last names" do
58
59
  Name.new('John', 'O Reilly').last.should == "O'Reilly"
59
60
  Name.new('dave', 'mcmanus').last.should == "McManus"
60
61
  Name.new('pete', 'MACMANUS').last.should == "MacManus"
61
62
  end
62
63
  end
63
-
64
+
64
65
  context "names that are already canonical" do
65
66
  it "should not be altered" do
66
67
  Name.new('Mark J. L.', 'Orr').name.should == 'Mark J. L. Orr'
67
68
  Name.new('Anna-Marie J.-K.', 'Liviu-Dieter').name.should == 'Anna-Marie J.-K. Liviu-Dieter'
68
69
  end
69
70
  end
70
-
71
+
71
72
  context "last names beginning with a single letter followed by a quote" do
72
73
  it "should be handled correctly" do
73
74
  Name.new('una', "O'boyle").name.should == "Una O'Boyle"
@@ -76,7 +77,7 @@ module ICU
76
77
  Name.new('cormac', "o brien").name.should == "Cormac O'Brien"
77
78
  end
78
79
  end
79
-
80
+
80
81
  context "last beginning with Mc" do
81
82
  it "should be handled correctly" do
82
83
  Name.new('shane', "mccabe").name.should == "Shane McCabe"
@@ -85,7 +86,7 @@ module ICU
85
86
  Name.new('bartlomiej', "macieja").name.should == "Bartlomiej Macieja"
86
87
  end
87
88
  end
88
-
89
+
89
90
  context "doubled barrelled names or initials" do
90
91
  it "should be handled correctly" do
91
92
  Name.new('anna-marie', 'den-otter').name.should == 'Anna-Marie Den-Otter'
@@ -95,26 +96,26 @@ module ICU
95
96
  Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
96
97
  end
97
98
  end
98
-
99
+
99
100
  context "extraneous white space" do
100
101
  it "should be handled correctly" do
101
102
  Name.new(' mark j l ', " \t\r\n orr \n").name.should == 'Mark J. L. Orr'
102
103
  end
103
104
  end
104
-
105
+
105
106
  context "extraneous full stops" do
106
107
  it "should be handled correctly" do
107
108
  Name.new('. mark j..l', 'orr.').name.should == 'Mark J. L. Orr'
108
109
  end
109
110
  end
110
-
111
+
111
112
  context "construction from a single string" do
112
113
  before(:each) do
113
114
  @mark1 = Name.new('ORR, mark j l')
114
115
  @mark2 = Name.new('MARK J L ORR')
115
116
  @oreil = Name.new("O'Reilly, j-k")
116
117
  end
117
-
118
+
118
119
  it "should be possible in simple cases" do
119
120
  @mark1.first.should == 'Mark J. L.'
120
121
  @mark1.last.should == 'Orr'
@@ -123,13 +124,13 @@ module ICU
123
124
  @oreil.name.should == "J.-K. O'Reilly"
124
125
  end
125
126
  end
126
-
127
+
127
128
  context "construction from an instance" do
128
129
  it "should be possible" do
129
130
  Name.new(Name.new('ORR, mark j l')).name.should == 'Mark J. L. Orr'
130
131
  end
131
132
  end
132
-
133
+
133
134
  context "constuction corner cases" do
134
135
  it "should be handled correctly" do
135
136
  Name.new('Orr').name.should == 'Orr'
@@ -140,13 +141,13 @@ module ICU
140
141
  Name.new.rname.should == ''
141
142
  end
142
143
  end
143
-
144
+
144
145
  context "inputs to matching" do
145
146
  before(:all) do
146
147
  @mark = Name.new('Mark', 'Orr')
147
148
  @kram = Name.new('Mark', 'Orr')
148
149
  end
149
-
150
+
150
151
  it "should be flexible" do
151
152
  @mark.match('Mark', 'Orr').should be_true
152
153
  @mark.match('Mark Orr').should be_true
@@ -159,12 +160,12 @@ module ICU
159
160
  it "should match when first names are the same" do
160
161
  Name.new('Mark', 'Orr').match('Mark', 'Orr').should be_true
161
162
  end
162
-
163
+
163
164
  it "should be flexible with regards to hyphens in double barrelled names" do
164
165
  Name.new('J.-K.', 'Rowling').match('J. K.', 'Rowling').should be_true
165
166
  Name.new('Joanne-K.', 'Rowling').match('Joanne K.', 'Rowling').should be_true
166
167
  end
167
-
168
+
168
169
  it "should match initials" do
169
170
  Name.new('M. J. L.', 'Orr').match('Mark John Legard', 'Orr').should be_true
170
171
  Name.new('M.', 'Orr').match('Mark', 'Orr').should be_true
@@ -172,37 +173,49 @@ module ICU
172
173
  Name.new('M.', 'Orr').match('M. J.', 'Orr').should be_true
173
174
  Name.new('M. J. L.', 'Orr').match('M. G.', 'Orr').should be_false
174
175
  end
175
-
176
+
176
177
  it "should not match on full names not in first position or without an exact match" do
177
178
  Name.new('J. M.', 'Orr').match('John', 'Orr').should be_true
178
179
  Name.new('M. J.', 'Orr').match('John', 'Orr').should be_false
179
180
  Name.new('M. John', 'Orr').match('John', 'Orr').should be_true
180
181
  end
181
-
182
+
182
183
  it "should handle common nicknames" do
183
184
  Name.new('William', 'Orr').match('Bill', 'Orr').should be_true
184
185
  Name.new('David', 'Orr').match('Dave', 'Orr').should be_true
185
186
  Name.new('Mick', 'Orr').match('Mike', 'Orr').should be_true
186
187
  end
187
-
188
+
188
189
  it "should not mix up nick names" do
189
190
  Name.new('David', 'Orr').match('Bill', 'Orr').should be_false
190
191
  end
191
192
  end
192
-
193
+
193
194
  context "last name matches" do
194
195
  it "should be flexible with regards to hyphens in double barrelled names" do
195
196
  Name.new('Johanna', "Lowry-O'Reilly").match('Johanna', "Lowry O'Reilly").should be_true
196
197
  end
197
-
198
+
198
199
  it "should be case insensitive in matches involving Macsomething and MacSomething" do
199
200
  Name.new('Alan', 'MacDonagh').match('Alan', 'Macdonagh').should be_true
200
201
  end
201
-
202
+
202
203
  it "should cater for the common mispelling of names beginning with Mc or Mac" do
203
204
  Name.new('Alan', 'McDonagh').match('Alan', 'MacDonagh').should be_true
204
205
  Name.new('Darko', 'Polimac').match('Darko', 'Polimc').should be_false
205
206
  end
206
207
  end
208
+
209
+ context "accented characters" do
210
+ before(:each) do
211
+ @first = 'Gearóidín'
212
+ @last = 'Uí Laighléis'
213
+ end
214
+
215
+ it "should not yet deal with UTF-8" do
216
+ name = Name.new(@first, @last)
217
+ name.first.should_not == @first
218
+ end
219
+ end
207
220
  end
208
221
  end
data/spec/util_spec.rb ADDED
@@ -0,0 +1,37 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
+
4
+ module ICU
5
+ describe Util do
6
+ context "#is_utf8" do
7
+ it "should recognise US-ASCII as a special case of UTF-8" do
8
+ Util.is_utf8("Resume".encode("US-ASCII")).should be_true
9
+ end
10
+
11
+ it "should recognise UTF-8" do
12
+ Util.is_utf8("Résumé").should be_true
13
+ Util.is_utf8("δog").should be_true
14
+ end
15
+
16
+ it "should recognize other encodings as not being UTF-8" do
17
+ Util.is_utf8("Résumé".encode("ISO-8859-1")).should be_false
18
+ Util.is_utf8("€50".encode("Windows-1252")).should be_false
19
+ Util.is_utf8("ひらがな".encode("Shift_JIS")).should be_false
20
+ Util.is_utf8("\xa3").should be_false
21
+ end
22
+ end
23
+
24
+ context "#to_utf8" do
25
+ it "should convert to UTF-8" do
26
+ Util.to_utf8("Resume").should == "Resume"
27
+ Util.to_utf8("Resume".force_encoding("US-ASCII")).encoding.name.should == "UTF-8"
28
+ Util.to_utf8("Résumé".encode("ISO-8859-1")).should == "Résumé"
29
+ Util.to_utf8("Résumé".encode("Windows-1252")).should == "Résumé"
30
+ Util.to_utf8("€50".encode("Windows-1252")).should == "€50"
31
+ Util.to_utf8("\xa350".force_encoding("ASCII-8BIT")).should == "£50"
32
+ Util.to_utf8("\xa350").should == "£50"
33
+ Util.to_utf8("ひらがな".encode("Shift_JIS")).should == "ひらがな"
34
+ end
35
+ end
36
+ end
37
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 3
9
- version: 0.0.3
8
+ - 4
9
+ version: 0.0.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Mark Orr
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-01-02 00:00:00 +00:00
17
+ date: 2011-01-21 00:00:00 +00:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -55,10 +55,13 @@ extra_rdoc_files:
55
55
  - LICENCE
56
56
  - README.rdoc
57
57
  files:
58
+ - lib/icu_name/name.rb
59
+ - lib/icu_name/util.rb
58
60
  - lib/icu_name/version.rb
59
61
  - lib/icu_name.rb
60
- - spec/icu_name_spec.rb
62
+ - spec/name_spec.rb
61
63
  - spec/spec_helper.rb
64
+ - spec/util_spec.rb
62
65
  - LICENCE
63
66
  - README.rdoc
64
67
  has_rdoc: true