name-tamer 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ require 'cgi'
3
+ require 'name_tamer/string'
4
+ require 'name_tamer/array'
5
+ require 'name_tamer/constants'
6
+
7
+ module NameTamer
8
+ autoload :Name, 'name_tamer/name'
9
+ autoload :Text, 'name_tamer/text'
10
+
11
+ class << self
12
+ def [](name, args = {})
13
+ NameTamer::Name.new name, args
14
+ end
15
+
16
+ # Make a slug from a string
17
+ def parameterize(string, args = {})
18
+ NameTamer::Text.new(string, args).parameterize
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,7 @@
1
+ # encoding: utf-8
2
+ class Array
3
+ def neighbours
4
+ last_index = length - 1
5
+ 0.upto(last_index).flat_map { |i| i.upto(last_index).map { |j| self[i..j] } }
6
+ end
7
+ end
@@ -0,0 +1,121 @@
1
+ module NameTamer
2
+ NONBREAKING_SPACE = "\u00a0".freeze
3
+ ASCII_SPACE = ' '.freeze
4
+ ADFIX_JOINERS = "[#{ASCII_SPACE}-]".freeze
5
+ SLUG_DELIMITER = '-'.freeze
6
+ ZERO_WIDTH_FILTER = /[\u180E\u200B\u200C\u200D\u2063\uFEFF]/
7
+
8
+ # Constants for parameterizing Unicode strings for IRIs
9
+ #
10
+ # Allowed characters in an IRI segment are defined by RFC 3987
11
+ # (https://tools.ietf.org/html/rfc3987#section-2.2) as follows:
12
+ #
13
+ # isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
14
+ # / "@" )
15
+ # ; non-zero-length segment without any colon ":"
16
+ #
17
+ # iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
18
+ #
19
+ # pct-encoded = "%" HEXDIG HEXDIG
20
+ #
21
+ # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
22
+ # / "*" / "+" / "," / ";" / "="
23
+ #
24
+ # ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
25
+ # / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
26
+ # / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
27
+ # / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
28
+ # / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
29
+ # / %xD0000-DFFFD / %xE1000-EFFFD
30
+ #
31
+ # Note that we can't use Unicode code points above \uFFFF because of
32
+ # regex limitations, so we'll ignore ucschar above that point.
33
+ #
34
+ # We're using the most restrictive segment definition (isegment-nz-nc)
35
+ # to avoid any possible problems with the IRI that it one day might
36
+ # get placed in.
37
+ ALPHA = 'A-Za-z'.freeze
38
+ DIGIT = '0-9'.freeze
39
+ UCSCHAR = '\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'.freeze
40
+ IUNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~#{UCSCHAR}".freeze
41
+ SUBDELIMS = '!$&\'\(\)\*+,;='.freeze
42
+ ISEGMENT_NZ_NC = "#{IUNRESERVED}#{SUBDELIMS}@".freeze # pct-encoded not needed
43
+ FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
44
+ FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
45
+
46
+ # These are the prefixes and suffixes we want to remove
47
+ # If you add to the list, you can use spaces and dots where appropriate
48
+ # Ensure any single letters are followed by a dot because we'll add one to the string
49
+ # during processing, e.g. "y Cia." should be "y. Cia."
50
+ ADFIXES = {
51
+ prefix: {
52
+ person: [
53
+ 'Baron', 'Baroness', 'Capt.', 'Captain', 'Col.', 'Colonel', 'Dame',
54
+ 'Doctor', 'Dr.', 'Judge', 'Justice', 'Lady', 'Lieut.', 'Lieutenant',
55
+ 'Lord', 'Madame', 'Major', 'Master', 'Matron', 'Messrs.', 'Mgr.',
56
+ 'Miss', 'Mister', 'Mlle.', 'Mme.', 'Mons.', 'Mr.', 'Mr. & Mrs.',
57
+ 'Mr. and Mrs.', 'Mrs.', 'Msgr.', 'Ms.', 'Prof.', 'Professor', 'Rev.',
58
+ 'Reverend', 'Sir', 'Sister', 'The Hon.', 'The Lady.', 'The Lord',
59
+ 'The Rt. Hon.'
60
+ ],
61
+ organization: [
62
+ 'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
63
+ ],
64
+ before: '\\A', after: ADFIX_JOINERS
65
+ },
66
+ suffix: {
67
+ person: [
68
+ 'Chartered F.C.S.I.', 'Chartered M.C.S.I.', 'I.F.R.S. Certified', 'F.Inst.L.M.', 'C.I.S.S.P.', 'F.C.I.P.S.',
69
+ 'M.R.I.C.S.', 'T.M.I.E.T.', 'Dip. D.M.', 'A.A.M.S.', 'A.C.C.A.', 'A.C.M.A.', 'A.I.F.A.', 'A.W.M.A.', 'C.A.I.A.',
70
+ 'C.A.P.M.', 'C.C.I.M.', 'C.D.F.A.', 'C.E.P.P.', 'C.F.B.S.', 'C.G.M.A.', 'C.I.T.P.', 'C.L.T.C.', 'C.P.C.C.',
71
+ 'C.R.P.C.', 'C.R.P.S.', 'C.S.O.X.', 'C.S.S.D.', 'F.B.C.S.', 'F.C.C.A.', 'F.C.M.I.', 'F.C.S.I.', 'F.I.E.T.',
72
+ 'F.I.R.P.', 'M.I.E.T.', 'M.S.F.S.', 'M.Sc. D.', 'O.R.S.C.', 'R.I.C.P.', 'B.Tech.', 'Cantab.', 'Ch.F.C.',
73
+ 'D.Phil.', 'I.T.I.L. v3', 'M.Io.D.', 'S.C.M.P', 'A.C.A.', 'A.C.C.', 'A.E.P.', 'A.I.F.', 'A.S.A.', 'B.Eng.',
74
+ 'C.B.V.', 'C.E.M.', 'C.Eng.', 'C.F.A.', 'C.F.F.', 'C.F.P.', 'C.F.S.', 'C.G.A.', 'C.G.B.', 'C.G.P.', 'C.I.M.',
75
+ 'C.L.P.', 'C.L.U.', 'C.M.A.', 'C.M.T.', 'C.P.A.', 'C.T.A.', 'C.W.S.', 'D.B.E.', 'D.D.S.', 'D.V.M.', 'E.R.P.',
76
+ 'Eng.D.', 'F.C.A.', 'F.P.C.', 'F.R.M.', 'F.R.M.', 'G.S.P.', 'L.P.S.', 'M.B.A.', 'M.B.E.', 'M.E.P.', 'M.Eng.',
77
+ 'M.Jur.', 'M.P.A.', 'M.S.F.', 'M.S.P.', 'O.B.E.', 'P.C.C.', 'P.F.S.', 'P.H.R.', 'P.M.C.', 'P.M.P.', 'P.M.P.',
78
+ 'P.S.P.', 'R.F.C.', 'V.M.D.', 'B.Ed.', 'B.Sc.', 'Ed.D.', 'Ed.M.', 'Hons.', 'LL.B.', 'LL.D.', 'LL.M.', 'M.Ed.',
79
+ 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'C.A.', 'E.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'M.S.',
80
+ 'O.K.', 'P.A.', 'Q.C.', 'R.D.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
81
+ ],
82
+ organization: [
83
+ 'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
84
+ 'Cía. S. C. A.', 'y. Cía. S. C.', 'S.A. de C.V.', 'spol. s.r.o.', '(Pty.) Ltd.', '(Pvt.) Ltd.', 'A.D.S.I.Tz.',
85
+ 'S.p. z.o.o.', '(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.',
86
+ 'S.P.R.L.U.', 'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.',
87
+ 'Pty. Ltd.', 'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.',
88
+ 'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'K.G.a.A.',
89
+ 'L.L.L.P.', 'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.', 'P.L.L.C.',
90
+ 'S. en C.', 'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.', 'S.C.R.I.',
91
+ 'S.C.R.L.', 'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', '&. Cie.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.',
92
+ 'Part.G.', 'Sh.p.k.', '&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.',
93
+ 'F.I.E.', 'G.b.R.', 'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.',
94
+ 'L.L.P.', 'o.h.f.', 'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.',
95
+ 'S.C.S.', 'S.E.M.', 'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.',
96
+ 'V.O.F.', 'V.o.G.', 'v.o.s.', 'V.Z.W.', 'z.r.t.', 'А.А.Т.', 'Е.А.Д.', 'З.А.Т.', 'К.Д.А.', 'О.О.Д.', 'Т.А.А.',
97
+ '股份有限公司', 'Ap.S.', 'Corp.', 'ltda.', 'Sh.A.', 'st.G.', 'Ultd.', 'a.b.', 'A.D.', 'A.E.', 'A.G.', 'A.S.',
98
+ 'A.Ş.', 'A.y.', 'B.M.', 'b.t.', 'B.V.', 'C.A.', 'C.V.', 'd.d.', 'e.c.', 'E.E.', 'e.G.', 'E.I.', 'E.P.', 'E.T.',
99
+ 'E.U.', 'e.v.', 'G.K.', 'G.P.', 'h.f.', 'Inc.', 'K.D.', 'K.G.', 'K.K.', 'k.s.', 'k.v.', 'K.y.', 'L.C.', 'L.P.',
100
+ 'Ltd.', 'N.K.', 'N.L.', 'N.V.', 'O.E.', 'O.G.', 'O.Ü.', 'O.y.', 'P.C.', 'p.l.', 'Pty.', 'PUP.', 'Pvt.', 'r.t.',
101
+ 'S.A.', 'S.D.', 'S.E.', 's.f.', 'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.',
102
+ 'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
103
+ 'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
104
+ ],
105
+ before: ADFIX_JOINERS, after: '\\z'
106
+ }
107
+ }.freeze
108
+
109
+ ADFIX_PATTERNS = [:prefix, :suffix].map do |adfix_type|
110
+ patterns = {}
111
+ adfix = ADFIXES[adfix_type]
112
+
113
+ [:person, :organization].each do |ct|
114
+ with_optional_spaces = adfix[ct].map { |p| p.gsub(ASCII_SPACE, ' *') }
115
+ pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
116
+ patterns[ct] = /#{adfix[:before]}\(*(?:#{pattern_string})[®™\)]*#{adfix[:after]}/i
117
+ end
118
+
119
+ [adfix_type, patterns]
120
+ end.to_h
121
+ end
@@ -0,0 +1,384 @@
1
+ module NameTamer
2
+ class Name
3
+ # References:
4
+ # http://www.w3.org/International/questions/qa-personal-names
5
+ # https://github.com/berkmancenter/namae
6
+ # https://github.com/mericson/people
7
+ # http://en.wikipedia.org/wiki/Types_of_business_entity
8
+ # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA)
9
+ # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
10
+ # http://en.wikipedia.org/wiki/Nobiliary_particle
11
+ # http://en.wikipedia.org/wiki/Spanish_naming_customs
12
+ # http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
13
+ attr_reader :name
14
+
15
+ def tidy_name
16
+ unless @tidy_name
17
+ @tidy_name = name.dup # Start with the name we've received
18
+
19
+ unescape # Unescape percent-encoded characters and fix UTF-8 encoding
20
+ remove_zero_width # remove zero-width characters
21
+ tidy_spacing # " John Smith " -> "John Smith"
22
+ fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
23
+ consolidate_initials # "I. B. M." -> "I.B.M."
24
+ end
25
+
26
+ @tidy_name
27
+ end
28
+
29
+ def nice_name
30
+ unless @nice_name
31
+ @nice_name = tidy_name.dup # Start with the tidied name
32
+
33
+ remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
34
+ fixup_last_name_first # "Smith, John" -> "John Smith"
35
+ fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
36
+ remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
37
+ name_wrangle # proper name case and non-breaking spaces
38
+ use_nonbreaking_spaces_in_compound_names
39
+ end
40
+
41
+ @nice_name
42
+ end
43
+
44
+ def simple_name
45
+ unless @simple_name
46
+ @simple_name = nice_name.dup # Start with nice name
47
+
48
+ remove_initials # "John Q. Doe" -> "John Doe"
49
+ remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
50
+ remove_periods_from_initials # "J.P.R. Williams" -> "JPR Williams"
51
+ standardize_words # "B&Q Intl" -> "B and Q International"
52
+
53
+ @simple_name.whitespace_to!(ASCII_SPACE)
54
+ end
55
+
56
+ @simple_name
57
+ end
58
+
59
+ def slug
60
+ @slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
61
+ end
62
+
63
+ def array
64
+ @array ||= slug.split(SLUG_DELIMITER)
65
+ end
66
+
67
+ def contact_type
68
+ nice_name # make sure we've done the bit which infers contact_type
69
+ contact_type_best_effort
70
+ end
71
+
72
+ def contact_type=(new_contact_type)
73
+ ct_as_sym = new_contact_type.to_sym
74
+
75
+ unless @contact_type.nil? || @contact_type == ct_as_sym
76
+ puts "Changing contact type of #{@name} from #{@contact_type} to #{new_contact_type}"
77
+ end
78
+
79
+ @contact_type = ct_as_sym
80
+ end
81
+
82
+ # These lines aren't used and aren't covered by specs
83
+ # def name=(new_name)
84
+ # initialize new_name, :contact_type => @contact_type
85
+ # end
86
+ #
87
+ # def to_hash
88
+ # {
89
+ # name: name,
90
+ # nice_name: nice_name,
91
+ # simple_name: simple_name,
92
+ # slug: slug,
93
+ # contact_type: contact_type,
94
+ # last_name: last_name,
95
+ # remainder: remainder,
96
+ # adfix_found: adfix_found
97
+ # }
98
+ # end
99
+
100
+ private
101
+
102
+ #--------------------------------------------------------
103
+ # Tidy up the name we've received
104
+ #--------------------------------------------------------
105
+
106
+ def unescape
107
+ @tidy_name.ensure_safe!.safe_unescape!.unescape_html!
108
+ end
109
+
110
+ def remove_zero_width
111
+ @tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER)
112
+ end
113
+
114
+ def tidy_spacing
115
+ @tidy_name
116
+ .space_around_comma!
117
+ .strip_or_self!
118
+ .whitespace_to!(ASCII_SPACE)
119
+ end
120
+
121
+ def fix_encoding_errors
122
+ @tidy_name.fix_encoding_errors!
123
+ end
124
+
125
+ # Remove spaces from groups of initials
126
+ def consolidate_initials
127
+ @tidy_name
128
+ .remove_spaces_from_initials!
129
+ .ensure_space_after_initials!
130
+ end
131
+
132
+ # An adfix is either a prefix or a suffix
133
+ def remove_adfixes
134
+ if @last_name.nil?
135
+ # Our name is still in one part, not two
136
+ loop do
137
+ @nice_name = remove_outermost_adfix(:suffix, @nice_name)
138
+ break unless @adfix_found
139
+ end
140
+
141
+ loop do
142
+ @nice_name = remove_outermost_adfix(:prefix, @nice_name)
143
+ break unless @adfix_found
144
+ end
145
+ else
146
+ # Our name is currently in two halves
147
+ loop do
148
+ @last_name = remove_outermost_adfix(:suffix, @last_name)
149
+ break unless @adfix_found
150
+ end
151
+
152
+ loop do
153
+ @remainder = remove_outermost_adfix(:prefix, @remainder)
154
+ break unless @adfix_found
155
+ end
156
+ end
157
+ end
158
+
159
+ # Names in the form "Smith, John" need to be turned around to "John Smith"
160
+ def fixup_last_name_first
161
+ return if @contact_type == :organization
162
+
163
+ parts = @nice_name.split ', '
164
+
165
+ return unless parts.count == 2
166
+
167
+ @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
168
+ @remainder = parts[1]
169
+ end
170
+
171
+ # Sometimes we end up with mismatched braces after adfix stripping
172
+ # e.g. "Ceres (Ceres Holdings LLC)" -> "Ceres (Ceres Holdings"
173
+ def fixup_mismatched_braces
174
+ left_brace_count = @nice_name.count '('
175
+ right_brace_count = @nice_name.count ')'
176
+
177
+ if left_brace_count > right_brace_count
178
+ @nice_name += ')'
179
+ elsif left_brace_count < right_brace_count
180
+ @nice_name = '(' + @nice_name
181
+ end
182
+ end
183
+
184
+ def name_wrangle
185
+ # Fix case if all caps or all lowercase
186
+ if @last_name.nil?
187
+ name_wrangle_single_name
188
+ else
189
+ name_wrangle_split_name
190
+ end
191
+ end
192
+
193
+ def name_wrangle_single_name
194
+ lowercase = @nice_name.downcase
195
+ uppercase = @nice_name.upcase
196
+ fix_case = false
197
+
198
+ if @contact_type == :organization
199
+ fix_case = true if @nice_name == uppercase && @nice_name.length > 4
200
+ elsif [uppercase, lowercase].include?(@nice_name)
201
+ fix_case = true
202
+ end
203
+
204
+ @nice_name = name_case(lowercase) if fix_case
205
+ end
206
+
207
+ def name_wrangle_split_name
208
+ # It's a person if we've split the name, so no organization logic here
209
+ lowercase = @last_name.downcase
210
+ uppercase = @last_name.upcase
211
+ @last_name = name_case(lowercase) if [uppercase, lowercase].include?(@last_name)
212
+ @nice_name = "#{@remainder} #{@last_name}"
213
+ end
214
+
215
+ # Conjoin compound names with non-breaking spaces
216
+ def use_nonbreaking_spaces_in_compound_names
217
+ @nice_name
218
+ .nbsp_in_compound_name!
219
+ .nbsp_in_name_modifier!
220
+ end
221
+
222
+ #--------------------------------------------------------
223
+ # Make search name from nice name
224
+ #--------------------------------------------------------
225
+
226
+ # Remove initials from personal names unless they are the only identifier.
227
+ # i.e. only remove initials if there's also a proper name there
228
+ def remove_initials
229
+ return unless @contact_type == :person
230
+
231
+ temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
232
+
233
+ # If the name still has at least one space we're OK
234
+ @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
235
+ end
236
+
237
+ def remove_middle_names
238
+ return unless @contact_type == :person
239
+
240
+ first_name, parts = find_first_usable_name(@simple_name.split)
241
+ last_name, = find_last_usable_name(parts)
242
+
243
+ return unless first_name || last_name
244
+
245
+ separator = first_name && last_name ? ' ' : ''
246
+ @simple_name = "#{first_name}#{separator}#{last_name}"
247
+ end
248
+
249
+ def find_first_usable_name(parts)
250
+ part = nil
251
+
252
+ parts.each_index do |i|
253
+ part = parts[i]
254
+ next if part.gsub(FILTER_COMPAT, '').empty?
255
+ parts = parts.slice(i + 1, parts.length) # don't use "slice!"
256
+ break
257
+ end
258
+
259
+ [part, parts]
260
+ end
261
+
262
+ def find_last_usable_name(parts)
263
+ part = nil
264
+
265
+ parts.reverse_each do |p|
266
+ next if p.gsub(FILTER_COMPAT, '').empty?
267
+ part = p
268
+ break
269
+ end
270
+
271
+ part
272
+ end
273
+
274
+ def remove_periods_from_initials
275
+ @simple_name.remove_periods_from_initials!
276
+ end
277
+
278
+ def standardize_words
279
+ @simple_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
280
+ @simple_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
281
+ @simple_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
282
+ @simple_name.gsub!(/[־‐‑‒–—―−﹘﹣-]/, SLUG_DELIMITER) # Replace Unicode dashes with ASCII hyphen
283
+ @simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
284
+ end
285
+
286
+ #--------------------------------------------------------
287
+ # Initialization and utilities
288
+ #--------------------------------------------------------
289
+
290
+ def initialize(new_name, args = {})
291
+ @name = new_name || ''
292
+ @contact_type = contact_type_from args
293
+
294
+ @tidy_name = nil
295
+ @nice_name = nil
296
+ @simple_name = nil
297
+ @slug = nil
298
+
299
+ @last_name = nil
300
+ @remainder = nil
301
+
302
+ @adfix_found = false
303
+ end
304
+
305
+ def contact_type_from(args)
306
+ args_ct = args[:contact_type]
307
+ return unless args_ct
308
+
309
+ ct = args_ct.is_a?(Symbol) ? args_ct : args_ct.dup
310
+ ct = ct.to_s unless [String, Symbol].include? ct.class
311
+ ct.downcase! if ct.class == String
312
+ ct = ct.to_sym
313
+ ct = nil unless [:person, :organization].include? ct
314
+
315
+ ct
316
+ end
317
+
318
+ # If we don't know the contact type, what's our best guess?
319
+ def contact_type_best_effort
320
+ if @contact_type
321
+ @contact_type
322
+ else
323
+ # If it's just one word we'll assume organization.
324
+ # If more then we'll assume a person
325
+ @name.include?(ASCII_SPACE) ? :person : :organization
326
+ end
327
+ end
328
+
329
+ # We pass to this routine either prefixes or suffixes
330
+ def remove_outermost_adfix(adfix_type, name_part)
331
+ ct, parts = find_contact_type_and_parts(ADFIX_PATTERNS[adfix_type], name_part)
332
+
333
+ return name_part unless @adfix_found
334
+
335
+ # If we've found a diagnostic adfix then set the contact type
336
+ self.contact_type = ct
337
+
338
+ # The remainder of the name will be in parts[0] or parts[2] depending
339
+ # on whether this is a prefix or a suffix.
340
+ # We'll also remove any trailing commas we've exposed.
341
+ (parts[0] + parts[2]).gsub(/\s*,\s*$/, '')
342
+ end
343
+
344
+ def find_contact_type_and_parts(adfixes, name_part)
345
+ ct = contact_type_best_effort
346
+ parts = name_part.partition adfixes[ct]
347
+ @adfix_found = !parts[1].empty?
348
+
349
+ return [ct, parts] if @contact_type || @adfix_found
350
+
351
+ # If the contact type is indeterminate and we didn't find a diagnostic adfix
352
+ # for a person then try again for an organization
353
+ ct = :organization
354
+ parts = name_part.partition adfixes[ct]
355
+ @adfix_found = !parts[1].empty?
356
+
357
+ [ct, parts]
358
+ end
359
+
360
+ # Original Version of NameCase:
361
+ # Copyright (c) Mark Summerfield 1998-2008. All Rights Reserved
362
+ # This module may be used/distributed/modified under the same terms as Perl itself
363
+ # http://dev.perl.org/licenses/ (GPL)
364
+ #
365
+ # Ruby Version:
366
+ # Copyright (c) Aaron Patterson 2006
367
+ # NameCase is distributed under the GPL license.
368
+ #
369
+ # Substantially modified for Xendata
370
+ # Improved in several areas, also now adds non-breaking spaces for
371
+ # compound names like "van der Pump"
372
+ def name_case(lowercase)
373
+ n = lowercase.dup # We assume the name is passed already downcased
374
+
375
+ n
376
+ .upcase_first_letter!
377
+ .downcase_after_apostrophe!
378
+ .fix_mac!
379
+ .fix_ff!
380
+ .fix_name_modifiers!
381
+ .upcase_initials!
382
+ end
383
+ end
384
+ end