icu_name 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,230 @@
1
+ module ICU
2
+ class Name
3
+ attr_reader :first, :last
4
+
5
+ # Construct from one or two strings or any objects that have a to_s method.
6
+ def initialize(name1='', name2='')
7
+ @name1 = name1.to_s.dup
8
+ @name2 = name2.to_s.dup
9
+ canonicalize
10
+ end
11
+
12
+ # Return a complete name, first name first, no comma.
13
+ def name
14
+ name = ''
15
+ name << @first
16
+ name << ' ' if @first.length > 0 && @last.length > 0
17
+ name << @last
18
+ name
19
+ end
20
+
21
+ # Return a reversed complete name, first name last after a comma.
22
+ def rname
23
+ name = ''
24
+ name << @last
25
+ name << ', ' if @first.length > 0 && @last.length > 0
26
+ name << @first
27
+ name
28
+ end
29
+
30
+ # Convert object to a string.
31
+ def to_s
32
+ rname
33
+ end
34
+
35
+ # Match another name to this object, returning true or false.
36
+ def match(name1='', name2='')
37
+ other = Name.new(name1, name2)
38
+ match_first(first, other.first) && match_last(last, other.last)
39
+ end
40
+
41
+ private
42
+
43
+ # Canonicalise the first and last names.
44
+ def canonicalize
45
+ first, last = partition
46
+ @first = finish_first(first)
47
+ @last = finish_last(last)
48
+ end
49
+
50
+ # Split one complete name into first and last parts.
51
+ def partition
52
+ if @name2.length == 0
53
+ # Only one imput so we must split first and last.
54
+ parts = @name1.split(/,/)
55
+ if parts.size > 1
56
+ last = clean(parts.shift || '')
57
+ first = clean(parts.join(' '))
58
+ else
59
+ parts = clean(@name1).split(/ /)
60
+ last = parts.pop || ''
61
+ first = parts.join(' ')
62
+ end
63
+ else
64
+ # Two inputs, so we are given first and last.
65
+ first = clean(@name1)
66
+ last = clean(@name2)
67
+ end
68
+ [first, last]
69
+ end
70
+
71
+ # Clean up characters in any name.
72
+ def clean(name)
73
+ name.gsub!(/`/, "'")
74
+ name.gsub!(/[^-a-zA-Z.'\s]/, '')
75
+ name.gsub!(/\./, ' ')
76
+ name.gsub!(/\s*-\s*/, '-')
77
+ name.gsub!(/'+/, "'")
78
+ name.strip.downcase.split(/\s+/).map do |n|
79
+ n.sub!(/^-+/, '')
80
+ n.sub!(/-+$/, '')
81
+ n.split(/-/).map do |p|
82
+ p.capitalize!
83
+ end.join('-')
84
+ end.join(' ')
85
+ end
86
+
87
+ # Apply final touches to finish canonicalising a first name.
88
+ def finish_first(names)
89
+ names.gsub(/([A-Z])\b/, '\1.')
90
+ end
91
+
92
+ # Apply final touches to finish canonicalising a last name.
93
+ def finish_last(names)
94
+ names.gsub!(/\b([A-Z])'([a-z])/) { |m| $1 << "'" << $2.upcase}
95
+ names.gsub!(/\bMc([a-z])/) { |m| 'Mc' << $1.upcase}
96
+ names.gsub!(/\bMac([a-z])/) do |m|
97
+ letter = $1
98
+ 'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase)
99
+ end
100
+ names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 }
101
+ names
102
+ end
103
+
104
+ # Match a complete first name.
105
+ def match_first(first1, first2)
106
+ # Is this one a walk in the park?
107
+ return true if first1 == first2
108
+
109
+ # No easy ride. Begin by splitting into individual first names.
110
+ first1 = split_first(first1)
111
+ first2 = split_first(first2)
112
+
113
+ # Get the long list and the short list.
114
+ long, short = first1.size >= first2.size ? [first1, first2] : [first2, first1]
115
+
116
+ # The short one must be a "subset" of the long one.
117
+ # An extra condition must also be satisfied.
118
+ extra = false
119
+ (0..long.size-1).each do |i|
120
+ lword = long.shift
121
+ score = match_first_name(lword, short.first)
122
+ if score >= 0
123
+ short.shift
124
+ extra = true if i == 0 || score == 0
125
+ end
126
+ break if short.empty? || long.empty?
127
+ end
128
+
129
+ # There's a match if the following is true.
130
+ short.empty? && extra
131
+ end
132
+
133
+ # Match a complete last name.
134
+ def match_last(last1, last2)
135
+ return true if last1 == last2
136
+ [last1, last2].each do |last|
137
+ last.downcase! # MacDonaugh and Macdonaugh
138
+ last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh
139
+ last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly
140
+ end
141
+ last1 == last2
142
+ end
143
+
144
+ # Split a complete first name for matching.
145
+ def split_first(first)
146
+ first.tr!('-', ' ') # J. K. and J.-K.
147
+ first = first.split(/ /) # split on spaces
148
+ first = [''] if first.size == 0 # in case input was empty string
149
+ first
150
+ end
151
+
152
+ # Match individual first names or initials.
153
+ # -1 = no match
154
+ # 0 = full match
155
+ # 1 = match involving 1 initial
156
+ # 2 = match involving 2 initials
157
+ def match_first_name(first1, first2)
158
+ initials = 0
159
+ initials+= 1 if first1.match(/^[A-Z]\.?$/)
160
+ initials+= 1 if first2.match(/^[A-Z]\.?$/)
161
+ return initials if first1 == first2
162
+ return 0 if initials == 0 && match_nick_name(first1, first2)
163
+ return -1 unless initials > 0
164
+ return initials if first1[0] == first2[0]
165
+ -1
166
+ end
167
+
168
+ # Match two first names that might be equivalent nicknames.
169
+ def match_nick_name(nick1, nick2)
170
+ compile_nick_names unless @@nc
171
+ code1 = @@nc[nick1]
172
+ return false unless code1
173
+ code1 == @@nc[nick2]
174
+ end
175
+
176
+ # Compile the nick names code hash when matching nick names is first attempted.
177
+ def compile_nick_names
178
+ @@nc = Hash.new
179
+ code = 1
180
+ @@nl.each do |nicks|
181
+ nicks.each do |n|
182
+ throw "duplicate name #{n}" if @@nc[n]
183
+ @@nc[n] = code
184
+ end
185
+ code+= 1
186
+ end
187
+ end
188
+
189
+ # A array of data for matching nicknames and also a few common misspellings.
190
+ @@nc = nil
191
+ @@nl = <<EOF.split(/\n/).reject{|x| x.length == 0 }.map{|x| x.split(' ')}
192
+ Abdul Abul
193
+ Alexander Alex
194
+ Anandagopal Ananda
195
+ Anne Ann
196
+ Anthony Tony
197
+ Benjamin Ben
198
+ Catherine Cathy Cath
199
+ Daniel Danial Danny Dan
200
+ David Dave
201
+ Deborah Debbie
202
+ Des Desmond
203
+ Eamonn Eamon
204
+ Edward Eddie Ed
205
+ Eric Erick Erik
206
+ Frederick Frederic Fred
207
+ Gerald Gerry
208
+ Gerhard Gerard Ger
209
+ James Jim
210
+ Joanna Joan Joanne
211
+ John Johnny
212
+ Jonathan Jon
213
+ Kenneth Ken Kenny
214
+ Michael Mike Mick Micky
215
+ Nicholas Nick Nicolas
216
+ Nicola Nickie Nicky
217
+ Patrick Pat Paddy
218
+ Peter Pete
219
+ Philippe Philip Phillippe Phillip
220
+ Rick Ricky
221
+ Robert Bob Bobby
222
+ Samual Sam Samuel
223
+ Stefanie Stef
224
+ Stephen Steven Steve
225
+ Terence Terry
226
+ Thomas Tom Tommy
227
+ William Will Willy Willie Bill
228
+ EOF
229
+ end
230
+ end
@@ -0,0 +1,19 @@
1
+ module ICU
2
+ class Util
3
+ # Decide if a string is valid UTF-8 or not, returning true or false.
4
+ def self.is_utf8(str)
5
+ dup = str.dup
6
+ dup.force_encoding("UTF-8")
7
+ dup.valid_encoding?
8
+ end
9
+
10
+ # Try to convert any string to UTF-8.
11
+ def self.to_utf8(str)
12
+ utf8 = is_utf8(str)
13
+ dup = str.dup
14
+ return dup.force_encoding("UTF-8") if utf8
15
+ dup.force_encoding("Windows-1252") if dup.encoding.name.match(/^(ASCII-8BIT|UTF-8)$/)
16
+ dup.encode("UTF-8")
17
+ end
18
+ end
19
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module ICU
4
4
  class Name
5
- VERSION = "0.0.3"
5
+ VERSION = "0.0.4"
6
6
  end
7
7
  end
data/lib/icu_name.rb CHANGED
@@ -1,230 +1,2 @@
1
- module ICU
2
- class Name
3
- attr_reader :first, :last
4
-
5
- # Construct from one or two strings or any objects that have a to_s method.
6
- def initialize(name1='', name2='')
7
- @name1 = name1.to_s
8
- @name2 = name2.to_s
9
- canonicalize
10
- end
11
-
12
- # Return a complete name, first name first, no comma.
13
- def name
14
- name = ''
15
- name << @first
16
- name << ' ' if @first.length > 0 && @last.length > 0
17
- name << @last
18
- name
19
- end
20
-
21
- # Return a reversed complete name, first name last after a comma.
22
- def rname
23
- name = ''
24
- name << @last
25
- name << ', ' if @first.length > 0 && @last.length > 0
26
- name << @first
27
- name
28
- end
29
-
30
- # Convert object to a string.
31
- def to_s
32
- rname
33
- end
34
-
35
- # Match another name to this object, returning true or false.
36
- def match(name1='', name2='')
37
- other = Name.new(name1, name2)
38
- match_first(first, other.first) && match_last(last, other.last)
39
- end
40
-
41
- private
42
-
43
- # Canonicalise the first and last names.
44
- def canonicalize
45
- first, last = partition
46
- @first = finish_first(first)
47
- @last = finish_last(last)
48
- end
49
-
50
- # Split one complete name into first and last parts.
51
- def partition
52
- if @name2.length == 0
53
- # Only one imput so we must split first and last.
54
- parts = @name1.split(/,/)
55
- if parts.size > 1
56
- last = clean(parts.shift || '')
57
- first = clean(parts.join(' '))
58
- else
59
- parts = clean(@name1).split(/ /)
60
- last = parts.pop || ''
61
- first = parts.join(' ')
62
- end
63
- else
64
- # Two inputs, so we are given first and last.
65
- first = clean(@name1)
66
- last = clean(@name2)
67
- end
68
- [first, last]
69
- end
70
-
71
- # Clean up characters in any name.
72
- def clean(name)
73
- name.gsub!(/`/, "'")
74
- name.gsub!(/[^-a-zA-Z.'\s]/, '')
75
- name.gsub!(/\./, ' ')
76
- name.gsub!(/\s*-\s*/, '-')
77
- name.gsub!(/'+/, "'")
78
- name.strip.downcase.split(/\s+/).map do |n|
79
- n.sub!(/^-+/, '')
80
- n.sub!(/-+$/, '')
81
- n.split(/-/).map do |p|
82
- p.capitalize!
83
- end.join('-')
84
- end.join(' ')
85
- end
86
-
87
- # Apply final touches to finish canonicalising a first name.
88
- def finish_first(names)
89
- names.gsub(/([A-Z])\b/, '\1.')
90
- end
91
-
92
- # Apply final touches to finish canonicalising a last name.
93
- def finish_last(names)
94
- names.gsub!(/\b([A-Z])'([a-z])/) { |m| $1 << "'" << $2.upcase}
95
- names.gsub!(/\bMc([a-z])/) { |m| 'Mc' << $1.upcase}
96
- names.gsub!(/\bMac([a-z])/) do |m|
97
- letter = $1
98
- 'Mac'.concat(@name2.match("[mM][aA][cC]#{letter}") ? letter : letter.upcase)
99
- end
100
- names.gsub!(/\bO ([A-Z])/) { |m| "O'" << $1 }
101
- names
102
- end
103
-
104
- # Match a complete first name.
105
- def match_first(first1, first2)
106
- # Is this one a walk in the park?
107
- return true if first1 == first2
108
-
109
- # No easy ride. Begin by splitting into individual first names.
110
- first1 = split_first(first1)
111
- first2 = split_first(first2)
112
-
113
- # Get the long list and the short list.
114
- long, short = first1.size >= first2.size ? [first1, first2] : [first2, first1]
115
-
116
- # The short one must be a "subset" of the long one.
117
- # An extra condition must also be satisfied.
118
- extra = false
119
- (0..long.size-1).each do |i|
120
- lword = long.shift
121
- score = match_first_name(lword, short.first)
122
- if score >= 0
123
- short.shift
124
- extra = true if i == 0 || score == 0
125
- end
126
- break if short.empty? || long.empty?
127
- end
128
-
129
- # There's a match if the following is true.
130
- short.empty? && extra
131
- end
132
-
133
- # Match a complete last name.
134
- def match_last(last1, last2)
135
- return true if last1 == last2
136
- [last1, last2].each do |last|
137
- last.downcase! # MacDonaugh and Macdonaugh
138
- last.gsub!(/\bmac/, 'mc') # MacDonaugh and McDonaugh
139
- last.tr!('-', ' ') # Lowry-O'Reilly and Lowry O'Reilly
140
- end
141
- last1 == last2
142
- end
143
-
144
- # Split a complete first name for matching.
145
- def split_first(first)
146
- first.tr!('-', ' ') # J. K. and J.-K.
147
- first = first.split(/ /) # split on spaces
148
- first = [''] if first.size == 0 # in case input was empty string
149
- first
150
- end
151
-
152
- # Match individual first names or initials.
153
- # -1 = no match
154
- # 0 = full match
155
- # 1 = match involving 1 initial
156
- # 2 = match involving 2 initials
157
- def match_first_name(first1, first2)
158
- initials = 0
159
- initials+= 1 if first1.match(/^[A-Z]\.?$/)
160
- initials+= 1 if first2.match(/^[A-Z]\.?$/)
161
- return initials if first1 == first2
162
- return 0 if initials == 0 && match_nick_name(first1, first2)
163
- return -1 unless initials > 0
164
- return initials if first1[0] == first2[0]
165
- -1
166
- end
167
-
168
- # Match two first names that might be equivalent nicknames.
169
- def match_nick_name(nick1, nick2)
170
- compile_nick_names unless @@nc
171
- code1 = @@nc[nick1]
172
- return false unless code1
173
- code1 == @@nc[nick2]
174
- end
175
-
176
- # Compile the nick names code hash when matching nick names is first attempted.
177
- def compile_nick_names
178
- @@nc = Hash.new
179
- code = 1
180
- @@nl.each do |nicks|
181
- nicks.each do |n|
182
- throw "duplicate name #{n}" if @@nc[n]
183
- @@nc[n] = code
184
- end
185
- code+= 1
186
- end
187
- end
188
-
189
- # A array of data for matching nicknames and also a few common misspellings.
190
- @@nc = nil
191
- @@nl = <<EOF.split(/\n/).reject{|x| x.length == 0 }.map{|x| x.split(' ')}
192
- Abdul Abul
193
- Alexander Alex
194
- Anandagopal Ananda
195
- Anne Ann
196
- Anthony Tony
197
- Benjamin Ben
198
- Catherine Cathy Cath
199
- Daniel Danial Danny Dan
200
- David Dave
201
- Deborah Debbie
202
- Des Desmond
203
- Eamonn Eamon
204
- Edward Eddie Ed
205
- Eric Erick Erik
206
- Frederick Frederic Fred
207
- Gerald Gerry
208
- Gerhard Gerard Ger
209
- James Jim
210
- Joanna Joan Joanne
211
- John Johnny
212
- Jonathan Jon
213
- Kenneth Ken Kenny
214
- Michael Mike Mick Micky
215
- Nicholas Nick Nicolas
216
- Nicola Nickie Nicky
217
- Patrick Pat Paddy
218
- Peter Pete
219
- Philippe Philip Phillippe Phillip
220
- Rick Ricky
221
- Robert Bob Bobby
222
- Samual Sam Samuel
223
- Stefanie Stef
224
- Stephen Steven Steve
225
- Terence Terry
226
- Thomas Tom Tommy
227
- William Will Willy Willie Bill
228
- EOF
229
- end
230
- end
1
+ require 'icu_name/name.rb'
2
+ require 'icu_name/util.rb'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
3
 
3
4
  module ICU
@@ -6,68 +7,68 @@ module ICU
6
7
  before(:each) do
7
8
  @simple = Name.new('mark j l', 'orr')
8
9
  end
9
-
10
+
10
11
  it "#first returns the first name(s)" do
11
12
  @simple.first.should == 'Mark J. L.'
12
13
  end
13
-
14
+
14
15
  it "#last returns the last name(s)" do
15
16
  @simple.last.should == 'Orr'
16
17
  end
17
-
18
+
18
19
  it "#name returns the full name with first name(s) first" do
19
20
  @simple.name.should == 'Mark J. L. Orr'
20
21
  end
21
-
22
+
22
23
  it "#rname returns the full name with last name(s) first" do
23
24
  @simple.rname.should == 'Orr, Mark J. L.'
24
25
  end
25
-
26
+
26
27
  it "#to_s is the same as rname" do
27
28
  @simple.to_s.should == 'Orr, Mark J. L.'
28
29
  end
29
-
30
+
30
31
  it "#match returns true if and only if two names match" do
31
32
  @simple.match('mark j l orr').should be_true
32
33
  @simple.match('malcolm g l orr').should be_false
33
34
  end
34
35
  end
35
-
36
+
36
37
  context "rdoc expample" do
37
38
  before(:each) do
38
39
  @robert = Name.new(' robert j ', ' FISCHER ')
39
40
  @bobby = Name.new(' bobby fischer ')
40
41
  end
41
-
42
+
42
43
  it "should get Robert" do
43
44
  @robert.name.should == 'Robert J. Fischer'
44
45
  end
45
-
46
+
46
47
  it "should get Bobby" do
47
48
  @bobby.last.should == 'Fischer'
48
49
  @bobby.first.should == 'Bobby'
49
50
  end
50
-
51
+
51
52
  it "should match Robert and Bobby" do
52
53
  @robert.match(@bobby).should be_true
53
54
  @robert.match('R. J.', 'Fischer').should be_true
54
55
  @bobby.match('R. J.', 'Fischer').should be_false
55
56
  end
56
-
57
+
57
58
  it "should canconicalise last names" do
58
59
  Name.new('John', 'O Reilly').last.should == "O'Reilly"
59
60
  Name.new('dave', 'mcmanus').last.should == "McManus"
60
61
  Name.new('pete', 'MACMANUS').last.should == "MacManus"
61
62
  end
62
63
  end
63
-
64
+
64
65
  context "names that are already canonical" do
65
66
  it "should not be altered" do
66
67
  Name.new('Mark J. L.', 'Orr').name.should == 'Mark J. L. Orr'
67
68
  Name.new('Anna-Marie J.-K.', 'Liviu-Dieter').name.should == 'Anna-Marie J.-K. Liviu-Dieter'
68
69
  end
69
70
  end
70
-
71
+
71
72
  context "last names beginning with a single letter followed by a quote" do
72
73
  it "should be handled correctly" do
73
74
  Name.new('una', "O'boyle").name.should == "Una O'Boyle"
@@ -76,7 +77,7 @@ module ICU
76
77
  Name.new('cormac', "o brien").name.should == "Cormac O'Brien"
77
78
  end
78
79
  end
79
-
80
+
80
81
  context "last beginning with Mc" do
81
82
  it "should be handled correctly" do
82
83
  Name.new('shane', "mccabe").name.should == "Shane McCabe"
@@ -85,7 +86,7 @@ module ICU
85
86
  Name.new('bartlomiej', "macieja").name.should == "Bartlomiej Macieja"
86
87
  end
87
88
  end
88
-
89
+
89
90
  context "doubled barrelled names or initials" do
90
91
  it "should be handled correctly" do
91
92
  Name.new('anna-marie', 'den-otter').name.should == 'Anna-Marie Den-Otter'
@@ -95,26 +96,26 @@ module ICU
95
96
  Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
96
97
  end
97
98
  end
98
-
99
+
99
100
  context "extraneous white space" do
100
101
  it "should be handled correctly" do
101
102
  Name.new(' mark j l ', " \t\r\n orr \n").name.should == 'Mark J. L. Orr'
102
103
  end
103
104
  end
104
-
105
+
105
106
  context "extraneous full stops" do
106
107
  it "should be handled correctly" do
107
108
  Name.new('. mark j..l', 'orr.').name.should == 'Mark J. L. Orr'
108
109
  end
109
110
  end
110
-
111
+
111
112
  context "construction from a single string" do
112
113
  before(:each) do
113
114
  @mark1 = Name.new('ORR, mark j l')
114
115
  @mark2 = Name.new('MARK J L ORR')
115
116
  @oreil = Name.new("O'Reilly, j-k")
116
117
  end
117
-
118
+
118
119
  it "should be possible in simple cases" do
119
120
  @mark1.first.should == 'Mark J. L.'
120
121
  @mark1.last.should == 'Orr'
@@ -123,13 +124,13 @@ module ICU
123
124
  @oreil.name.should == "J.-K. O'Reilly"
124
125
  end
125
126
  end
126
-
127
+
127
128
  context "construction from an instance" do
128
129
  it "should be possible" do
129
130
  Name.new(Name.new('ORR, mark j l')).name.should == 'Mark J. L. Orr'
130
131
  end
131
132
  end
132
-
133
+
133
134
  context "constuction corner cases" do
134
135
  it "should be handled correctly" do
135
136
  Name.new('Orr').name.should == 'Orr'
@@ -140,13 +141,13 @@ module ICU
140
141
  Name.new.rname.should == ''
141
142
  end
142
143
  end
143
-
144
+
144
145
  context "inputs to matching" do
145
146
  before(:all) do
146
147
  @mark = Name.new('Mark', 'Orr')
147
148
  @kram = Name.new('Mark', 'Orr')
148
149
  end
149
-
150
+
150
151
  it "should be flexible" do
151
152
  @mark.match('Mark', 'Orr').should be_true
152
153
  @mark.match('Mark Orr').should be_true
@@ -159,12 +160,12 @@ module ICU
159
160
  it "should match when first names are the same" do
160
161
  Name.new('Mark', 'Orr').match('Mark', 'Orr').should be_true
161
162
  end
162
-
163
+
163
164
  it "should be flexible with regards to hyphens in double barrelled names" do
164
165
  Name.new('J.-K.', 'Rowling').match('J. K.', 'Rowling').should be_true
165
166
  Name.new('Joanne-K.', 'Rowling').match('Joanne K.', 'Rowling').should be_true
166
167
  end
167
-
168
+
168
169
  it "should match initials" do
169
170
  Name.new('M. J. L.', 'Orr').match('Mark John Legard', 'Orr').should be_true
170
171
  Name.new('M.', 'Orr').match('Mark', 'Orr').should be_true
@@ -172,37 +173,49 @@ module ICU
172
173
  Name.new('M.', 'Orr').match('M. J.', 'Orr').should be_true
173
174
  Name.new('M. J. L.', 'Orr').match('M. G.', 'Orr').should be_false
174
175
  end
175
-
176
+
176
177
  it "should not match on full names not in first position or without an exact match" do
177
178
  Name.new('J. M.', 'Orr').match('John', 'Orr').should be_true
178
179
  Name.new('M. J.', 'Orr').match('John', 'Orr').should be_false
179
180
  Name.new('M. John', 'Orr').match('John', 'Orr').should be_true
180
181
  end
181
-
182
+
182
183
  it "should handle common nicknames" do
183
184
  Name.new('William', 'Orr').match('Bill', 'Orr').should be_true
184
185
  Name.new('David', 'Orr').match('Dave', 'Orr').should be_true
185
186
  Name.new('Mick', 'Orr').match('Mike', 'Orr').should be_true
186
187
  end
187
-
188
+
188
189
  it "should not mix up nick names" do
189
190
  Name.new('David', 'Orr').match('Bill', 'Orr').should be_false
190
191
  end
191
192
  end
192
-
193
+
193
194
  context "last name matches" do
194
195
  it "should be flexible with regards to hyphens in double barrelled names" do
195
196
  Name.new('Johanna', "Lowry-O'Reilly").match('Johanna', "Lowry O'Reilly").should be_true
196
197
  end
197
-
198
+
198
199
  it "should be case insensitive in matches involving Macsomething and MacSomething" do
199
200
  Name.new('Alan', 'MacDonagh').match('Alan', 'Macdonagh').should be_true
200
201
  end
201
-
202
+
202
203
  it "should cater for the common mispelling of names beginning with Mc or Mac" do
203
204
  Name.new('Alan', 'McDonagh').match('Alan', 'MacDonagh').should be_true
204
205
  Name.new('Darko', 'Polimac').match('Darko', 'Polimc').should be_false
205
206
  end
206
207
  end
208
+
209
+ context "accented characters" do
210
+ before(:each) do
211
+ @first = 'Gearóidín'
212
+ @last = 'Uí Laighléis'
213
+ end
214
+
215
+ it "should not yet deal with UTF-8" do
216
+ name = Name.new(@first, @last)
217
+ name.first.should_not == @first
218
+ end
219
+ end
207
220
  end
208
221
  end
data/spec/util_spec.rb ADDED
@@ -0,0 +1,37 @@
1
+ # encoding: UTF-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
+
4
+ module ICU
5
+ describe Util do
6
+ context "#is_utf8" do
7
+ it "should recognise US-ASCII as a special case of UTF-8" do
8
+ Util.is_utf8("Resume".encode("US-ASCII")).should be_true
9
+ end
10
+
11
+ it "should recognise UTF-8" do
12
+ Util.is_utf8("Résumé").should be_true
13
+ Util.is_utf8("δog").should be_true
14
+ end
15
+
16
+ it "should recognize other encodings as not being UTF-8" do
17
+ Util.is_utf8("Résumé".encode("ISO-8859-1")).should be_false
18
+ Util.is_utf8("€50".encode("Windows-1252")).should be_false
19
+ Util.is_utf8("ひらがな".encode("Shift_JIS")).should be_false
20
+ Util.is_utf8("\xa3").should be_false
21
+ end
22
+ end
23
+
24
+ context "#to_utf8" do
25
+ it "should convert to UTF-8" do
26
+ Util.to_utf8("Resume").should == "Resume"
27
+ Util.to_utf8("Resume".force_encoding("US-ASCII")).encoding.name.should == "UTF-8"
28
+ Util.to_utf8("Résumé".encode("ISO-8859-1")).should == "Résumé"
29
+ Util.to_utf8("Résumé".encode("Windows-1252")).should == "Résumé"
30
+ Util.to_utf8("€50".encode("Windows-1252")).should == "€50"
31
+ Util.to_utf8("\xa350".force_encoding("ASCII-8BIT")).should == "£50"
32
+ Util.to_utf8("\xa350").should == "£50"
33
+ Util.to_utf8("ひらがな".encode("Shift_JIS")).should == "ひらがな"
34
+ end
35
+ end
36
+ end
37
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 3
9
- version: 0.0.3
8
+ - 4
9
+ version: 0.0.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Mark Orr
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-01-02 00:00:00 +00:00
17
+ date: 2011-01-21 00:00:00 +00:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -55,10 +55,13 @@ extra_rdoc_files:
55
55
  - LICENCE
56
56
  - README.rdoc
57
57
  files:
58
+ - lib/icu_name/name.rb
59
+ - lib/icu_name/util.rb
58
60
  - lib/icu_name/version.rb
59
61
  - lib/icu_name.rb
60
- - spec/icu_name_spec.rb
62
+ - spec/name_spec.rb
61
63
  - spec/spec_helper.rb
64
+ - spec/util_spec.rb
62
65
  - LICENCE
63
66
  - README.rdoc
64
67
  has_rdoc: true