name_tamer 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/name-tamer.rb ADDED
@@ -0,0 +1 @@
1
+ require 'name_tamer'
data/lib/name_tamer.rb ADDED
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+
3
+ require 'cgi'
4
+ require 'name_tamer/string'
5
+ require 'name_tamer/array'
6
+ require 'name_tamer/constants'
7
+
8
+ module NameTamer
9
+ autoload :Name, 'name_tamer/name'
10
+ autoload :Text, 'name_tamer/text'
11
+
12
+ class << self
13
+ def [](name, args = {})
14
+ NameTamer::Name.new name, args
15
+ end
16
+
17
+ # Make a slug from a string
18
+ def parameterize(string, args = {})
19
+ NameTamer::Text.new(string, args).parameterize
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+
3
+ class Array
4
+ def neighbours
5
+ last_index = length - 1
6
+ 0.upto(last_index).flat_map { |i| i.upto(last_index).map { |j| self[i..j] } }
7
+ end
8
+ end
@@ -0,0 +1,121 @@
1
+ module NameTamer
2
+ NONBREAKING_SPACE = "\u00a0".freeze
3
+ ASCII_SPACE = ' '.freeze
4
+ ADFIX_JOINERS = "[#{ASCII_SPACE}-]".freeze
5
+ SLUG_DELIMITER = '-'.freeze
6
+ ZERO_WIDTH_FILTER = /[\u180E\u200B\u200C\u200D\u2063\uFEFF]/
7
+
8
+ # Constants for parameterizing Unicode strings for IRIs
9
+ #
10
+ # Allowed characters in an IRI segment are defined by RFC 3987
11
+ # (https://tools.ietf.org/html/rfc3987#section-2.2) as follows:
12
+ #
13
+ # isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
14
+ # / "@" )
15
+ # ; non-zero-length segment without any colon ":"
16
+ #
17
+ # iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
18
+ #
19
+ # pct-encoded = "%" HEXDIG HEXDIG
20
+ #
21
+ # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
22
+ # / "*" / "+" / "," / ";" / "="
23
+ #
24
+ # ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
25
+ # / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
26
+ # / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
27
+ # / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
28
+ # / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
29
+ # / %xD0000-DFFFD / %xE1000-EFFFD
30
+ #
31
+ # Note that we can't use Unicode code points above \uFFFF because of
32
+ # regex limitations, so we'll ignore ucschar above that point.
33
+ #
34
+ # We're using the most restrictive segment definition (isegment-nz-nc)
35
+ # to avoid any possible problems with the IRI that it one day might
36
+ # get placed in.
37
+ ALPHA = 'A-Za-z'.freeze
38
+ DIGIT = '0-9'.freeze
39
+ UCSCHAR = '\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'.freeze
40
+ IUNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~#{UCSCHAR}".freeze
41
+ SUBDELIMS = '!$&\'\(\)\*+,;='.freeze
42
+ ISEGMENT_NZ_NC = "#{IUNRESERVED}#{SUBDELIMS}@".freeze # pct-encoded not needed
43
+ FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
44
+ FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
45
+
46
+ # These are the prefixes and suffixes we want to remove
47
+ # If you add to the list, you can use spaces and dots where appropriate
48
+ # Ensure any single letters are followed by a dot because we'll add one to the string
49
+ # during processing, e.g. "y Cia." should be "y. Cia."
50
+ ADFIXES = {
51
+ prefix: {
52
+ person: [
53
+ 'Baroness', 'Capt.', 'Captain', 'Col.', 'Colonel', 'Dame', 'Doctor',
54
+ 'Dr.', 'Judge', 'Justice', 'Lady', 'Lieut.', 'Lieutenant', 'Lord',
55
+ 'Madame', 'Major', 'Master', 'Matron', 'Messrs.', 'Mgr.', 'Miss',
56
+ 'Mister', 'Mlle.', 'Mme.', 'Mons.', 'Mr.', 'Mr. & Mrs.', 'Mr. and Mrs.',
57
+ 'Mrs.', 'Ms.', 'Msgr.', 'Prof.', 'Professor', 'Rev.', 'Reverend', 'Sir',
58
+ 'Sister', 'The Hon.', 'The Lady.', 'The Lord', 'The Rt. Hon.', 'Doktor',
59
+ 'Herr', 'Frau'
60
+ ],
61
+ organization: [
62
+ 'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
63
+ ],
64
+ before: '\\A', after: ADFIX_JOINERS
65
+ },
66
+ suffix: {
67
+ person: [
68
+ 'Chartered F.C.S.I.', 'Chartered M.C.S.I.', 'I.F.R.S. Certified', 'F.Inst.L.M.', 'C.I.S.S.P.', 'F.C.I.P.S.',
69
+ 'M.R.I.C.S.', 'T.M.I.E.T.', 'Dip. D.M.', 'A.A.M.S.', 'A.C.C.A.', 'A.C.M.A.', 'A.I.F.A.', 'A.W.M.A.', 'C.A.I.A.',
70
+ 'C.A.P.M.', 'C.C.I.M.', 'C.D.F.A.', 'C.E.P.P.', 'C.F.B.S.', 'C.G.M.A.', 'C.I.T.P.', 'C.L.T.C.', 'C.P.C.C.',
71
+ 'C.R.P.C.', 'C.R.P.S.', 'C.S.O.X.', 'C.S.S.D.', 'F.B.C.S.', 'F.C.C.A.', 'F.C.M.I.', 'F.C.S.I.', 'F.I.E.T.',
72
+ 'F.I.R.P.', 'M.I.E.T.', 'M.S.F.S.', 'M.Sc. D.', 'O.R.S.C.', 'R.I.C.P.', 'B.Tech.', 'Cantab.', 'Ch.F.C.',
73
+ 'D.Phil.', 'I.T.I.L. v3', 'M.Io.D.', 'S.C.M.P', 'A.C.A.', 'A.C.C.', 'A.E.P.', 'A.I.F.', 'A.S.A.', 'B.Eng.',
74
+ 'C.B.V.', 'C.E.M.', 'C.Eng.', 'C.F.A.', 'C.F.F.', 'C.F.P.', 'C.F.S.', 'C.G.A.', 'C.G.B.', 'C.G.P.', 'C.I.M.',
75
+ 'C.L.P.', 'C.L.U.', 'C.M.A.', 'C.M.T.', 'C.P.A.', 'C.T.A.', 'C.W.S.', 'D.B.E.', 'D.D.S.', 'D.V.M.', 'E.R.P.',
76
+ 'Eng.D.', 'F.C.A.', 'F.P.C.', 'F.R.M.', 'F.R.M.', 'G.S.P.', 'L.P.S.', 'M.B.A.', 'M.B.E.', 'M.E.P.', 'M.Eng.',
77
+ 'M.Jur.', 'M.P.A.', 'M.S.F.', 'M.S.P.', 'O.B.E.', 'P.C.C.', 'P.F.S.', 'P.H.R.', 'P.M.C.', 'P.M.P.', 'P.M.P.',
78
+ 'P.S.P.', 'R.F.C.', 'V.M.D.', 'B.Ed.', 'B.Sc.', 'Ed.D.', 'Ed.M.', 'Hons.', 'LL.B.', 'LL.D.', 'LL.M.', 'M.Ed.',
79
+ 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'C.A.', 'E.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'M.S.',
80
+ 'O.K.', 'P.A.', 'Q.C.', 'R.D.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
81
+ ],
82
+ organization: [
83
+ 'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
84
+ 'Cía. S. C. A.', 'y. Cía. S. C.', 'S.A. de C.V.', 'spol. s.r.o.', '(Pty.) Ltd.', '(Pvt.) Ltd.', 'A.D.S.I.Tz.',
85
+ 'S.p. z.o.o.', '(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.',
86
+ 'S.P.R.L.U.', 'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.',
87
+ 'Pty. Ltd.', 'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.',
88
+ 'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'K.G.a.A.',
89
+ 'L.L.L.P.', 'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.', 'P.L.L.C.',
90
+ 'S. en C.', 'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.', 'S.C.R.I.',
91
+ 'S.C.R.L.', 'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', '&. Cie.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.',
92
+ 'Part.G.', 'Sh.p.k.', '&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.',
93
+ 'F.I.E.', 'G.b.R.', 'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.',
94
+ 'L.L.P.', 'o.h.f.', 'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.',
95
+ 'S.C.S.', 'S.E.M.', 'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.',
96
+ 'V.O.F.', 'V.o.G.', 'v.o.s.', 'V.Z.W.', 'z.r.t.', 'А.А.Т.', 'Е.А.Д.', 'З.А.Т.', 'К.Д.А.', 'О.О.Д.', 'Т.А.А.',
97
+ '股份有限公司', 'Ap.S.', 'Corp.', 'ltda.', 'Sh.A.', 'st.G.', 'Ultd.', 'a.b.', 'A.D.', 'A.E.', 'A.G.', 'A.S.',
98
+ 'A.Ş.', 'A.y.', 'B.M.', 'b.t.', 'B.V.', 'C.A.', 'C.V.', 'd.d.', 'e.c.', 'E.E.', 'e.G.', 'E.I.', 'E.P.', 'E.T.',
99
+ 'E.U.', 'e.v.', 'G.K.', 'G.P.', 'h.f.', 'Inc.', 'K.D.', 'K.G.', 'K.K.', 'k.s.', 'k.v.', 'K.y.', 'L.C.', 'L.P.',
100
+ 'Ltd.', 'N.K.', 'N.L.', 'N.V.', 'O.E.', 'O.G.', 'O.Ü.', 'O.y.', 'P.C.', 'p.l.', 'Pty.', 'PUP.', 'Pvt.', 'r.t.',
101
+ 'S.A.', 'S.D.', 'S.E.', 's.f.', 'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.',
102
+ 'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
103
+ 'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
104
+ ],
105
+ before: ADFIX_JOINERS, after: '\\z'
106
+ }
107
+ }.freeze
108
+
109
+ ADFIX_PATTERNS = Hash[%i[prefix suffix].map do |adfix_type|
110
+ patterns = {}
111
+ adfix = ADFIXES[adfix_type]
112
+
113
+ %i[person organization].each do |ct|
114
+ with_optional_spaces = adfix[ct].map { |p| p.gsub(ASCII_SPACE, ' *') }
115
+ pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
116
+ patterns[ct] = /#{adfix[:before]}\(*(?:#{pattern_string})[®™\)]*#{adfix[:after]}/i
117
+ end
118
+
119
+ [adfix_type, patterns]
120
+ end]
121
+ end
@@ -0,0 +1,390 @@
1
+ module NameTamer
2
+ class Name
3
+ # References:
4
+ # http://www.w3.org/International/questions/qa-personal-names
5
+ # https://github.com/berkmancenter/namae
6
+ # https://github.com/mericson/people
7
+ # http://en.wikipedia.org/wiki/Types_of_business_entity
8
+ # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA)
9
+ # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
10
+ # http://en.wikipedia.org/wiki/Nobiliary_particle
11
+ # http://en.wikipedia.org/wiki/Spanish_naming_customs
12
+ # http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
13
+ attr_reader :name
14
+
15
+ def tidy_name
16
+ unless @tidy_name
17
+ @tidy_name = name.dup # Start with the name we've received
18
+
19
+ unescape # Unescape percent-encoded characters and fix UTF-8 encoding
20
+ remove_zero_width # remove zero-width characters
21
+ tidy_spacing # " John Smith " -> "John Smith"
22
+ fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
23
+ consolidate_initials # "I. B. M." -> "I.B.M."
24
+ end
25
+
26
+ @tidy_name
27
+ end
28
+
29
+ def nice_name
30
+ unless @nice_name
31
+ @nice_name = tidy_name.dup # Start with the tidied name
32
+
33
+ remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
34
+ fixup_last_name_first # "Smith, John" -> "John Smith"
35
+ fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
36
+ remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
37
+ name_wrangle # proper name case and non-breaking spaces
38
+ use_nonbreaking_spaces_in_compound_names
39
+ end
40
+
41
+ @nice_name
42
+ end
43
+
44
+ def simple_name
45
+ unless @simple_name
46
+ @simple_name = nice_name.dup # Start with nice name
47
+
48
+ remove_initials # "John Q. Doe" -> "John Doe"
49
+ remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
50
+ remove_periods_from_initials # "J.P.R. Williams" -> "JPR Williams"
51
+ standardize_words # "B&Q Intl" -> "B and Q International"
52
+
53
+ @simple_name.whitespace_to!(ASCII_SPACE)
54
+ end
55
+
56
+ @simple_name
57
+ end
58
+
59
+ def slug
60
+ @slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
61
+ end
62
+
63
+ def array
64
+ @array ||= slug.split(SLUG_DELIMITER)
65
+ end
66
+
67
+ def contact_type
68
+ nice_name # make sure we've done the bit which infers contact_type
69
+ contact_type_best_effort
70
+ end
71
+
72
+ def contact_type=(new_contact_type)
73
+ ct_as_sym = new_contact_type.to_sym
74
+
75
+ unless @contact_type.nil? || @contact_type == ct_as_sym
76
+ puts "Changing contact type of #{@name} from #{@contact_type} to #{new_contact_type}"
77
+ end
78
+
79
+ @contact_type = ct_as_sym
80
+ end
81
+
82
+ # Useful method for iterating through the words in the name
83
+ def each_word(&block)
84
+ @words ||= slug.split(SLUG_DELIMITER)
85
+ @words.each(&block)
86
+ end
87
+
88
+ # These lines aren't used and aren't covered by specs
89
+ # def name=(new_name)
90
+ # initialize new_name, :contact_type => @contact_type
91
+ # end
92
+ #
93
+ # def to_hash
94
+ # {
95
+ # name: name,
96
+ # nice_name: nice_name,
97
+ # simple_name: simple_name,
98
+ # slug: slug,
99
+ # contact_type: contact_type,
100
+ # last_name: last_name,
101
+ # remainder: remainder,
102
+ # adfix_found: adfix_found
103
+ # }
104
+ # end
105
+
106
+ private
107
+
108
+ #--------------------------------------------------------
109
+ # Tidy up the name we've received
110
+ #--------------------------------------------------------
111
+
112
+ def unescape
113
+ @tidy_name.ensure_safe!.safe_unescape!.unescape_html!
114
+ end
115
+
116
+ def remove_zero_width
117
+ @tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER)
118
+ end
119
+
120
+ def tidy_spacing
121
+ @tidy_name
122
+ .space_around_comma!
123
+ .strip_or_self!
124
+ .whitespace_to!(ASCII_SPACE)
125
+ end
126
+
127
+ def fix_encoding_errors
128
+ @tidy_name.fix_encoding_errors!
129
+ end
130
+
131
+ # Remove spaces from groups of initials
132
+ def consolidate_initials
133
+ @tidy_name
134
+ .remove_spaces_from_initials!
135
+ .ensure_space_after_initials!
136
+ end
137
+
138
+ # An adfix is either a prefix or a suffix
139
+ def remove_adfixes
140
+ if @last_name.nil?
141
+ # Our name is still in one part, not two
142
+ loop do
143
+ @nice_name = remove_outermost_adfix(:suffix, @nice_name)
144
+ break unless @adfix_found
145
+ end
146
+
147
+ loop do
148
+ @nice_name = remove_outermost_adfix(:prefix, @nice_name)
149
+ break unless @adfix_found
150
+ end
151
+ else
152
+ # Our name is currently in two halves
153
+ loop do
154
+ @last_name = remove_outermost_adfix(:suffix, @last_name)
155
+ break unless @adfix_found
156
+ end
157
+
158
+ loop do
159
+ @remainder = remove_outermost_adfix(:prefix, @remainder)
160
+ break unless @adfix_found
161
+ end
162
+ end
163
+ end
164
+
165
+ # Names in the form "Smith, John" need to be turned around to "John Smith"
166
+ def fixup_last_name_first
167
+ return if @contact_type == :organization
168
+
169
+ parts = @nice_name.split ', '
170
+
171
+ return unless parts.count == 2
172
+
173
+ @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
174
+ @remainder = parts[1]
175
+ end
176
+
177
+ # Sometimes we end up with mismatched braces after adfix stripping
178
+ # e.g. "Ceres (Ceres Holdings LLC)" -> "Ceres (Ceres Holdings"
179
+ def fixup_mismatched_braces
180
+ left_brace_count = @nice_name.count '('
181
+ right_brace_count = @nice_name.count ')'
182
+
183
+ if left_brace_count > right_brace_count
184
+ @nice_name += ')'
185
+ elsif left_brace_count < right_brace_count
186
+ @nice_name = '(' + @nice_name
187
+ end
188
+ end
189
+
190
+ def name_wrangle
191
+ # Fix case if all caps or all lowercase
192
+ if @last_name.nil?
193
+ name_wrangle_single_name
194
+ else
195
+ name_wrangle_split_name
196
+ end
197
+ end
198
+
199
+ def name_wrangle_single_name
200
+ lowercase = @nice_name.downcase
201
+ uppercase = @nice_name.upcase
202
+ fix_case = false
203
+
204
+ if @contact_type == :organization
205
+ fix_case = true if @nice_name == uppercase && @nice_name.length > 4
206
+ elsif [uppercase, lowercase].include?(@nice_name)
207
+ fix_case = true
208
+ end
209
+
210
+ @nice_name = name_case(lowercase) if fix_case
211
+ end
212
+
213
+ def name_wrangle_split_name
214
+ # It's a person if we've split the name, so no organization logic here
215
+ lowercase = @last_name.downcase
216
+ uppercase = @last_name.upcase
217
+ @last_name = name_case(lowercase) if [uppercase, lowercase].include?(@last_name)
218
+ @nice_name = "#{@remainder} #{@last_name}"
219
+ end
220
+
221
+ # Conjoin compound names with non-breaking spaces
222
+ def use_nonbreaking_spaces_in_compound_names
223
+ @nice_name
224
+ .nbsp_in_compound_name!
225
+ .nbsp_in_name_modifier!
226
+ end
227
+
228
+ #--------------------------------------------------------
229
+ # Make search name from nice name
230
+ #--------------------------------------------------------
231
+
232
+ # Remove initials from personal names unless they are the only identifier.
233
+ # i.e. only remove initials if there's also a proper name there
234
+ def remove_initials
235
+ return unless @contact_type == :person
236
+
237
+ temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
238
+
239
+ # If the name still has at least one space we're OK
240
+ @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
241
+ end
242
+
243
+ def remove_middle_names
244
+ return unless @contact_type == :person
245
+
246
+ first_name, parts = find_first_usable_name(@simple_name.split)
247
+ last_name, = find_last_usable_name(parts)
248
+
249
+ return unless first_name || last_name
250
+
251
+ separator = first_name && last_name ? ' ' : ''
252
+ @simple_name = "#{first_name}#{separator}#{last_name}"
253
+ end
254
+
255
+ def find_first_usable_name(parts)
256
+ part = nil
257
+
258
+ parts.each_index do |i|
259
+ part = parts[i]
260
+ next if part.gsub(FILTER_COMPAT, '').empty?
261
+ parts = parts.slice(i + 1, parts.length) # don't use "slice!"
262
+ break
263
+ end
264
+
265
+ [part, parts]
266
+ end
267
+
268
+ def find_last_usable_name(parts)
269
+ part = nil
270
+
271
+ parts.reverse_each do |p|
272
+ next if p.gsub(FILTER_COMPAT, '').empty?
273
+ part = p
274
+ break
275
+ end
276
+
277
+ part
278
+ end
279
+
280
+ def remove_periods_from_initials
281
+ @simple_name.remove_periods_from_initials!
282
+ end
283
+
284
+ def standardize_words
285
+ @simple_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
286
+ @simple_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
287
+ @simple_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
288
+ @simple_name.gsub!(/[־‐‑‒–—―−﹘﹣-]/, SLUG_DELIMITER) # Replace Unicode dashes with ASCII hyphen
289
+ @simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
290
+ end
291
+
292
+ #--------------------------------------------------------
293
+ # Initialization and utilities
294
+ #--------------------------------------------------------
295
+
296
+ def initialize(new_name, args = {})
297
+ @name = new_name || ''
298
+ @contact_type = contact_type_from args
299
+
300
+ @tidy_name = nil
301
+ @nice_name = nil
302
+ @simple_name = nil
303
+ @slug = nil
304
+
305
+ @last_name = nil
306
+ @remainder = nil
307
+
308
+ @adfix_found = false
309
+ end
310
+
311
+ def contact_type_from(args)
312
+ args_ct = args[:contact_type]
313
+ return unless args_ct
314
+
315
+ ct = args_ct.is_a?(Symbol) ? args_ct : args_ct.dup
316
+ ct = ct.to_s unless [String, Symbol].include? ct.class
317
+ ct.downcase! if ct.class == String
318
+ ct = ct.to_sym
319
+ ct = nil unless %i[person organization].include? ct
320
+
321
+ ct
322
+ end
323
+
324
+ # If we don't know the contact type, what's our best guess?
325
+ def contact_type_best_effort
326
+ if @contact_type
327
+ @contact_type
328
+ else
329
+ # If it's just one word we'll assume organization.
330
+ # If more then we'll assume a person
331
+ @name.include?(ASCII_SPACE) ? :person : :organization
332
+ end
333
+ end
334
+
335
+ # We pass to this routine either prefixes or suffixes
336
+ def remove_outermost_adfix(adfix_type, name_part)
337
+ ct, parts = find_contact_type_and_parts(ADFIX_PATTERNS[adfix_type], name_part)
338
+
339
+ return name_part unless @adfix_found
340
+
341
+ # If we've found a diagnostic adfix then set the contact type
342
+ self.contact_type = ct
343
+
344
+ # The remainder of the name will be in parts[0] or parts[2] depending
345
+ # on whether this is a prefix or a suffix.
346
+ # We'll also remove any trailing commas we've exposed.
347
+ (parts[0] + parts[2]).gsub(/\s*,\s*$/, '')
348
+ end
349
+
350
+ def find_contact_type_and_parts(adfixes, name_part)
351
+ ct = contact_type_best_effort
352
+ parts = name_part.partition adfixes[ct]
353
+ @adfix_found = !parts[1].empty?
354
+
355
+ return [ct, parts] if @contact_type || @adfix_found
356
+
357
+ # If the contact type is indeterminate and we didn't find a diagnostic adfix
358
+ # for a person then try again for an organization
359
+ ct = :organization
360
+ parts = name_part.partition adfixes[ct]
361
+ @adfix_found = !parts[1].empty?
362
+
363
+ [ct, parts]
364
+ end
365
+
366
+ # Original Version of NameCase:
367
+ # Copyright (c) Mark Summerfield 1998-2008. All Rights Reserved
368
+ # This module may be used/distributed/modified under the same terms as Perl itself
369
+ # http://dev.perl.org/licenses/ (GPL)
370
+ #
371
+ # Ruby Version:
372
+ # Copyright (c) Aaron Patterson 2006
373
+ # NameCase is distributed under the GPL license.
374
+ #
375
+ # Substantially modified for Xendata
376
+ # Improved in several areas, also now adds non-breaking spaces for
377
+ # compound names like "van der Pump"
378
+ def name_case(lowercase)
379
+ n = lowercase.dup # We assume the name is passed already downcased
380
+
381
+ n
382
+ .upcase_first_letter!
383
+ .downcase_after_apostrophe!
384
+ .fix_mac!
385
+ .fix_ff!
386
+ .fix_name_modifiers!
387
+ .upcase_initials!
388
+ end
389
+ end
390
+ end