name-tamer 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d27bffbd8c84ab7c9494017659166c56d1c686b0
4
- data.tar.gz: 419038fdd3ef60f3b19138ec670cc06176acb737
3
+ metadata.gz: fb6ad36c6ae8e6e70a0dc780b7c16a21c044a50e
4
+ data.tar.gz: 39e85453fb141d296944dfc2541a19a8308c62d5
5
5
  SHA512:
6
- metadata.gz: f6982d1d027a2774c4fc01f30a14c1b5bdb1f9795cb9a789ed270ab141c7b2c6650f34a62d283d281cd627ef4b6fbb880a2d75045d2ede4c668cc8650d011c45
7
- data.tar.gz: 0985d15ac64ed550bb4584b588864f216fe89e02b60cec65ff4b6746f8bc9397a01c670e2dc3946642c00a5fd789cf4e8c79b77a19021f267f0c246230793347
6
+ metadata.gz: 50eedb83bbdef219b9ce12309e89faa53f2ca31aad1b20af54068777f7a4792abcca854479409f7259cfa23a376912a558b8c8845a892f7df4807c283a9afd0e
7
+ data.tar.gz: cef63c1ce63b49618c49f5f07eaaac89bc81839901581a18662c90cb1f652c72b63463fab27263005da32461db9bc2a0e5ac17dd6986f3b1f7395a623077cac0
data/.gitignore CHANGED
@@ -1,3 +1,5 @@
1
+ *.gem
2
+
1
3
  *.rbc
2
4
  capybara-*.html
3
5
  .rspec
@@ -1 +1 @@
1
- 2.3.0
1
+ 2.3.1
@@ -46,7 +46,7 @@ task :check_existing do
46
46
  'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.',
47
47
  'O.K.', 'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
48
48
  ].each do |suffix|
49
- fail suffix unless NameTamer::ADFIXES[:suffix][:person].include? suffix
49
+ raise suffix unless NameTamer::ADFIXES[:suffix][:person].include? suffix
50
50
  end
51
51
 
52
52
  [
@@ -71,6 +71,6 @@ task :check_existing do
71
71
  'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.', 'ПУП.', 'С.Д.', 'בע"מ', '任意組合',
72
72
  '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社', 'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
73
73
  ].each do |suffix|
74
- fail suffix unless NameTamer::ADFIXES[:suffix][:organization].include? suffix
74
+ raise suffix unless NameTamer::ADFIXES[:suffix][:organization].include? suffix
75
75
  end
76
76
  end
@@ -1,537 +1 @@
1
- # encoding: utf-8
2
- require 'cgi'
3
- require 'string_extras'
4
-
5
- # References:
6
- # http://www.w3.org/International/questions/qa-personal-names
7
- # https://github.com/berkmancenter/namae
8
- # https://github.com/mericson/people
9
- # http://en.wikipedia.org/wiki/Types_of_business_entity
10
- # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA)
11
- # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
12
- # http://en.wikipedia.org/wiki/Nobiliary_particle
13
- # http://en.wikipedia.org/wiki/Spanish_naming_customs
14
- # http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
15
-
16
- class NameTamer
17
- attr_reader :name
18
-
19
- class << self
20
- def [](name, args = {})
21
- new name, args
22
- end
23
-
24
- # Make a slug from a string
25
- def parameterize(string, args = {})
26
- sep = args[:sep] || SLUG_DELIMITER
27
- rfc3987 = args[:rfc3987] || false
28
- filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
29
-
30
- new_string = string.dup
31
-
32
- new_string
33
- .whitespace_to!(sep)
34
- .invalid_chars_to!(sep)
35
- .strip_unwanted!(filter)
36
- .fix_separators!(sep)
37
- .approximate_latin_chars!
38
-
39
- # Have we got anything left?
40
- new_string = '_' if new_string.empty?
41
-
42
- # downcase any latin characters
43
- new_string.downcase
44
- end
45
- end
46
-
47
- def tidy_name
48
- unless @tidy_name
49
- @tidy_name = name.dup # Start with the name we've received
50
-
51
- unescape # Unescape percent-encoded characters and fix UTF-8 encoding
52
- remove_zero_width # remove zero-width characters
53
- tidy_spacing # " John Smith " -> "John Smith"
54
- fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
55
- consolidate_initials # "I. B. M." -> "I.B.M."
56
- end
57
-
58
- @tidy_name
59
- end
60
-
61
- def nice_name
62
- unless @nice_name
63
- @nice_name = tidy_name.dup # Start with the tidied name
64
-
65
- remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
66
- fixup_last_name_first # "Smith, John" -> "John Smith"
67
- fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
68
- remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
69
- name_wrangle # proper name case and non-breaking spaces
70
- use_nonbreaking_spaces_in_compound_names
71
- end
72
-
73
- @nice_name
74
- end
75
-
76
- def simple_name
77
- unless @simple_name
78
- @simple_name = nice_name.dup # Start with nice name
79
-
80
- remove_initials # "John Q. Doe" -> "John Doe"
81
- remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
82
- remove_periods_from_initials # "J.P.R. Williams" -> "JPR Williams"
83
- standardize_words # "B&Q Intl" -> "B and Q International"
84
-
85
- @simple_name.whitespace_to!(ASCII_SPACE)
86
- end
87
-
88
- @simple_name
89
- end
90
-
91
- def slug
92
- @slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
93
- end
94
-
95
- def contact_type
96
- nice_name # make sure we've done the bit which infers contact_type
97
- contact_type_best_effort
98
- end
99
-
100
- def contact_type=(new_contact_type)
101
- ct_as_sym = new_contact_type.to_sym
102
-
103
- unless @contact_type.nil? || @contact_type == ct_as_sym
104
- puts "Changing contact type of #{@name} from #{@contact_type} to #{new_contact_type}"
105
- end
106
-
107
- @contact_type = ct_as_sym
108
- end
109
-
110
- # These lines aren't used and aren't covered by specs
111
- # def name=(new_name)
112
- # initialize new_name, :contact_type => @contact_type
113
- # end
114
- #
115
- # def to_hash
116
- # {
117
- # name: name,
118
- # nice_name: nice_name,
119
- # simple_name: simple_name,
120
- # slug: slug,
121
- # contact_type: contact_type,
122
- # last_name: last_name,
123
- # remainder: remainder,
124
- # adfix_found: adfix_found
125
- # }
126
- # end
127
-
128
- private
129
-
130
- #--------------------------------------------------------
131
- # Tidy up the name we've received
132
- #--------------------------------------------------------
133
-
134
- def unescape
135
- @tidy_name.ensure_safe!.safe_unescape!.unescape_html!
136
- end
137
-
138
- def remove_zero_width
139
- @tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER)
140
- end
141
-
142
- def tidy_spacing
143
- @tidy_name
144
- .space_around_comma!
145
- .strip_or_self!
146
- .whitespace_to!(ASCII_SPACE)
147
- end
148
-
149
- def fix_encoding_errors
150
- @tidy_name.fix_encoding_errors!
151
- end
152
-
153
- # Remove spaces from groups of initials
154
- def consolidate_initials
155
- @tidy_name
156
- .remove_spaces_from_initials!
157
- .ensure_space_after_initials!
158
- end
159
-
160
- # An adfix is either a prefix or a suffix
161
- def remove_adfixes
162
- if @last_name.nil?
163
- # Our name is still in one part, not two
164
- loop do
165
- @nice_name = remove_outermost_adfix(:suffix, @nice_name)
166
- break unless @adfix_found
167
- end
168
-
169
- loop do
170
- @nice_name = remove_outermost_adfix(:prefix, @nice_name)
171
- break unless @adfix_found
172
- end
173
- else
174
- # Our name is currently in two halves
175
- loop do
176
- @last_name = remove_outermost_adfix(:suffix, @last_name)
177
- break unless @adfix_found
178
- end
179
-
180
- loop do
181
- @remainder = remove_outermost_adfix(:prefix, @remainder)
182
- break unless @adfix_found
183
- end
184
- end
185
- end
186
-
187
- # Names in the form "Smith, John" need to be turned around to "John Smith"
188
- def fixup_last_name_first
189
- return if @contact_type == :organization
190
-
191
- parts = @nice_name.split ', '
192
-
193
- return unless parts.count == 2
194
-
195
- @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
196
- @remainder = parts[1]
197
- end
198
-
199
- # Sometimes we end up with mismatched braces after adfix stripping
200
- # e.g. "Ceres (Ceres Holdings LLC)" -> "Ceres (Ceres Holdings"
201
- def fixup_mismatched_braces
202
- left_brace_count = @nice_name.count '('
203
- right_brace_count = @nice_name.count ')'
204
-
205
- if left_brace_count > right_brace_count
206
- @nice_name += ')'
207
- elsif left_brace_count < right_brace_count
208
- @nice_name = '(' + @nice_name
209
- end
210
- end
211
-
212
- def name_wrangle
213
- # Fix case if all caps or all lowercase
214
- if @last_name.nil?
215
- name_wrangle_single_name
216
- else
217
- name_wrangle_split_name
218
- end
219
- end
220
-
221
- def name_wrangle_single_name
222
- lowercase = @nice_name.downcase
223
- uppercase = @nice_name.upcase
224
- fix_case = false
225
-
226
- if @contact_type == :organization
227
- fix_case = true if @nice_name == uppercase && @nice_name.length > 4
228
- else
229
- fix_case = true if [uppercase, lowercase].include?(@nice_name)
230
- end
231
-
232
- @nice_name = name_case(lowercase) if fix_case
233
- end
234
-
235
- def name_wrangle_split_name
236
- # It's a person if we've split the name, so no organization logic here
237
- lowercase = @last_name.downcase
238
- uppercase = @last_name.upcase
239
- @last_name = name_case(lowercase) if [uppercase, lowercase].include?(@last_name)
240
- @nice_name = "#{@remainder} #{@last_name}"
241
- end
242
-
243
- # Conjoin compound names with non-breaking spaces
244
- def use_nonbreaking_spaces_in_compound_names
245
- @nice_name
246
- .nbsp_in_compound_name!
247
- .nbsp_in_name_modifier!
248
- end
249
-
250
- #--------------------------------------------------------
251
- # Make search name from nice name
252
- #--------------------------------------------------------
253
-
254
- # Remove initials from personal names unless they are the only identifier.
255
- # i.e. only remove initials if there's also a proper name there
256
- def remove_initials
257
- return unless @contact_type == :person
258
-
259
- temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
260
-
261
- # If the name still has at least one space we're OK
262
- @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
263
- end
264
-
265
- def remove_middle_names
266
- return unless @contact_type == :person
267
-
268
- first_name, parts = find_first_usable_name(@simple_name.split)
269
- last_name, = find_last_usable_name(parts)
270
-
271
- return unless first_name || last_name
272
-
273
- separator = first_name && last_name ? ' ' : ''
274
- @simple_name = "#{first_name}#{separator}#{last_name}"
275
- end
276
-
277
- def find_first_usable_name(parts)
278
- part = nil
279
-
280
- parts.each_index do |i|
281
- part = parts[i]
282
- next if part.gsub(FILTER_COMPAT, '').empty?
283
- parts = parts.slice(i + 1, parts.length) # don't use "slice!"
284
- break
285
- end
286
-
287
- [part, parts]
288
- end
289
-
290
- def find_last_usable_name(parts)
291
- part = nil
292
-
293
- parts.reverse_each do |p|
294
- next if p.gsub(FILTER_COMPAT, '').empty?
295
- part = p
296
- break
297
- end
298
-
299
- part
300
- end
301
-
302
- def remove_periods_from_initials
303
- @simple_name.remove_periods_from_initials!
304
- end
305
-
306
- def standardize_words
307
- @simple_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
308
- @simple_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
309
- @simple_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
310
- @simple_name.gsub!(/[־‐‑‒–—―−﹘﹣-]/, SLUG_DELIMITER) # Replace Unicode dashes with ASCII hyphen
311
- @simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
312
- end
313
-
314
- #--------------------------------------------------------
315
- # Initialization and utilities
316
- #--------------------------------------------------------
317
-
318
- def initialize(new_name, args = {})
319
- @name = new_name || ''
320
- @contact_type = contact_type_from args
321
-
322
- @tidy_name = nil
323
- @nice_name = nil
324
- @simple_name = nil
325
- @slug = nil
326
-
327
- @last_name = nil
328
- @remainder = nil
329
-
330
- @adfix_found = false
331
- end
332
-
333
- def contact_type_from(args)
334
- args_ct = args[:contact_type]
335
- return unless args_ct
336
-
337
- ct = args_ct.is_a?(Symbol) ? args_ct : args_ct.dup
338
- ct = ct.to_s unless [String, Symbol].include? ct.class
339
- ct.downcase! if ct.class == String
340
- ct = ct.to_sym
341
- ct = nil unless [:person, :organization].include? ct
342
-
343
- ct
344
- end
345
-
346
- # If we don't know the contact type, what's our best guess?
347
- def contact_type_best_effort
348
- if @contact_type
349
- @contact_type
350
- else
351
- # If it's just one word we'll assume organization.
352
- # If more then we'll assume a person
353
- @name.include?(ASCII_SPACE) ? :person : :organization
354
- end
355
- end
356
-
357
- # We pass to this routine either prefixes or suffixes
358
- def remove_outermost_adfix(adfix_type, name_part)
359
- ct, parts = find_contact_type_and_parts(ADFIX_PATTERNS[adfix_type], name_part)
360
-
361
- return name_part unless @adfix_found
362
-
363
- # If we've found a diagnostic adfix then set the contact type
364
- self.contact_type = ct
365
-
366
- # The remainder of the name will be in parts[0] or parts[2] depending
367
- # on whether this is a prefix or a suffix.
368
- # We'll also remove any trailing commas we've exposed.
369
- (parts[0] + parts[2]).gsub(/\s*,\s*$/, '')
370
- end
371
-
372
- def find_contact_type_and_parts(adfixes, name_part)
373
- ct = contact_type_best_effort
374
- parts = name_part.partition adfixes[ct]
375
- @adfix_found = !parts[1].empty?
376
-
377
- return [ct, parts] if @contact_type || @adfix_found
378
-
379
- # If the contact type is indeterminate and we didn't find a diagnostic adfix
380
- # for a person then try again for an organization
381
- ct = :organization
382
- parts = name_part.partition adfixes[ct]
383
- @adfix_found = !parts[1].empty?
384
-
385
- [ct, parts]
386
- end
387
-
388
- # Original Version of NameCase:
389
- # Copyright (c) Mark Summerfield 1998-2008. All Rights Reserved
390
- # This module may be used/distributed/modified under the same terms as Perl itself
391
- # http://dev.perl.org/licenses/ (GPL)
392
- #
393
- # Ruby Version:
394
- # Copyright (c) Aaron Patterson 2006
395
- # NameCase is distributed under the GPL license.
396
- #
397
- # Substantially modified for Xendata
398
- # Improved in several areas, also now adds non-breaking spaces for
399
- # compound names like "van der Pump"
400
- def name_case(lowercase)
401
- n = lowercase.dup # We assume the name is passed already downcased
402
-
403
- n
404
- .upcase_first_letter!
405
- .downcase_after_apostrophe!
406
- .fix_mac!
407
- .fix_ff!
408
- .fix_name_modifiers!
409
- .upcase_initials!
410
- end
411
-
412
- #--------------------------------------------------------
413
- # Constants
414
- #--------------------------------------------------------
415
-
416
- NONBREAKING_SPACE = "\u00a0"
417
- ASCII_SPACE = "\u0020"
418
- ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
419
- SLUG_DELIMITER = '-'
420
- ZERO_WIDTH_FILTER = /[\u180E\u200B\u200C\u200D\u2063\uFEFF]/
421
-
422
- # Constants for parameterizing Unicode strings for IRIs
423
- #
424
- # Allowed characters in an IRI segment are defined by RFC 3987
425
- # (https://tools.ietf.org/html/rfc3987#section-2.2) as follows:
426
- #
427
- # isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
428
- # / "@" )
429
- # ; non-zero-length segment without any colon ":"
430
- #
431
- # iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
432
- #
433
- # pct-encoded = "%" HEXDIG HEXDIG
434
- #
435
- # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
436
- # / "*" / "+" / "," / ";" / "="
437
- #
438
- # ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
439
- # / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
440
- # / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
441
- # / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
442
- # / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
443
- # / %xD0000-DFFFD / %xE1000-EFFFD
444
- #
445
- # Note that we can't use Unicode code points above \uFFFF because of
446
- # regex limitations, so we'll ignore ucschar above that point.
447
- #
448
- # We're using the most restrictive segment definition (isegment-nz-nc)
449
- # to avoid any possible problems with the IRI that it one day might
450
- # get placed in.
451
- ALPHA = 'A-Za-z'
452
- DIGIT = '0-9'
453
- UCSCHAR = '\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
454
- IUNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~#{UCSCHAR}"
455
- SUBDELIMS = '!$&\'\(\)\*+,;='
456
- ISEGMENT_NZ_NC = "#{IUNRESERVED}#{SUBDELIMS}@" # pct-encoded not needed
457
- FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
458
- FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
459
-
460
- # These are the prefixes and suffixes we want to remove
461
- # If you add to the list, you can use spaces and dots where appropriate
462
- # Ensure any single letters are followed by a dot because we'll add one to the string
463
- # during processing, e.g. "y Cia." should be "y. Cia."
464
- ADFIXES = {
465
- prefix: {
466
- person: [
467
- 'Baron', 'Baroness', 'Capt.', 'Captain', 'Col.', 'Colonel', 'Dame',
468
- 'Doctor', 'Dr.', 'Judge', 'Justice', 'Lady', 'Lieut.', 'Lieutenant',
469
- 'Lord', 'Madame', 'Major', 'Master', 'Matron', 'Messrs.', 'Mgr.',
470
- 'Miss', 'Mister', 'Mlle.', 'Mme.', 'Mons.', 'Mr.', 'Mr. & Mrs.',
471
- 'Mr. and Mrs.', 'Mrs.', 'Msgr.', 'Ms.', 'Prof.', 'Professor', 'Rev.',
472
- 'Reverend', 'Sir', 'Sister', 'The Hon.', 'The Lady.', 'The Lord',
473
- 'The Rt. Hon.'
474
- ],
475
- organization: [
476
- 'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
477
- ],
478
- before: '\\A', after: ADFIX_JOINERS
479
- },
480
- suffix: {
481
- person: [
482
- 'Chartered F.C.S.I.', 'Chartered M.C.S.I.', 'I.F.R.S. Certified', 'F.Inst.L.M.', 'C.I.S.S.P.', 'F.C.I.P.S.',
483
- 'M.R.I.C.S.', 'T.M.I.E.T.', 'Dip. D.M.', 'A.A.M.S.', 'A.C.C.A.', 'A.C.M.A.', 'A.I.F.A.', 'A.W.M.A.', 'C.A.I.A.',
484
- 'C.A.P.M.', 'C.C.I.M.', 'C.D.F.A.', 'C.E.P.P.', 'C.F.B.S.', 'C.G.M.A.', 'C.I.T.P.', 'C.L.T.C.', 'C.P.C.C.',
485
- 'C.R.P.C.', 'C.R.P.S.', 'C.S.O.X.', 'C.S.S.D.', 'F.B.C.S.', 'F.C.C.A.', 'F.C.M.I.', 'F.C.S.I.', 'F.I.E.T.',
486
- 'F.I.R.P.', 'M.I.E.T.', 'M.S.F.S.', 'M.Sc. D.', 'O.R.S.C.', 'R.I.C.P.', 'B.Tech.', 'Cantab.', 'Ch.F.C.',
487
- 'D.Phil.', 'I.T.I.L. v3', 'M.Io.D.', 'S.C.M.P', 'A.C.A.', 'A.C.C.', 'A.E.P.', 'A.I.F.', 'A.S.A.', 'B.Eng.',
488
- 'C.B.V.', 'C.E.M.', 'C.Eng.', 'C.F.A.', 'C.F.F.', 'C.F.P.', 'C.F.S.', 'C.G.A.', 'C.G.B.', 'C.G.P.', 'C.I.M.',
489
- 'C.L.P.', 'C.L.U.', 'C.M.A.', 'C.M.T.', 'C.P.A.', 'C.T.A.', 'C.W.S.', 'D.B.E.', 'D.D.S.', 'D.V.M.', 'E.R.P.',
490
- 'Eng.D.', 'F.C.A.', 'F.P.C.', 'F.R.M.', 'F.R.M.', 'G.S.P.', 'L.P.S.', 'M.B.A.', 'M.B.E.', 'M.E.P.', 'M.Eng.',
491
- 'M.Jur.', 'M.P.A.', 'M.S.F.', 'M.S.P.', 'O.B.E.', 'P.C.C.', 'P.F.S.', 'P.H.R.', 'P.M.C.', 'P.M.P.', 'P.M.P.',
492
- 'P.S.P.', 'R.F.C.', 'V.M.D.', 'B.Ed.', 'B.Sc.', 'Ed.D.', 'Ed.M.', 'Hons.', 'LL.B.', 'LL.D.', 'LL.M.', 'M.Ed.',
493
- 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'C.A.', 'E.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'M.S.',
494
- 'O.K.', 'P.A.', 'Q.C.', 'R.D.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
495
- ],
496
- organization: [
497
- 'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
498
- 'Cía. S. C. A.', 'y. Cía. S. C.', 'S.A. de C.V.', 'spol. s.r.o.', '(Pty.) Ltd.', '(Pvt.) Ltd.', 'A.D.S.I.Tz.',
499
- 'S.p. z.o.o.', '(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.',
500
- 'S.P.R.L.U.', 'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.',
501
- 'Pty. Ltd.', 'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.',
502
- 'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'K.G.a.A.',
503
- 'L.L.L.P.', 'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.', 'P.L.L.C.',
504
- 'S. en C.', 'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.', 'S.C.R.I.',
505
- 'S.C.R.L.', 'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', '&. Cie.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.',
506
- 'Part.G.', 'Sh.p.k.', '&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.',
507
- 'F.I.E.', 'G.b.R.', 'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.',
508
- 'L.L.P.', 'o.h.f.', 'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.',
509
- 'S.C.S.', 'S.E.M.', 'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.',
510
- 'V.O.F.', 'V.o.G.', 'v.o.s.', 'V.Z.W.', 'z.r.t.', 'А.А.Т.', 'Е.А.Д.', 'З.А.Т.', 'К.Д.А.', 'О.О.Д.', 'Т.А.А.',
511
- '股份有限公司', 'Ap.S.', 'Corp.', 'ltda.', 'Sh.A.', 'st.G.', 'Ultd.', 'a.b.', 'A.D.', 'A.E.', 'A.G.', 'A.S.',
512
- 'A.Ş.', 'A.y.', 'B.M.', 'b.t.', 'B.V.', 'C.A.', 'C.V.', 'd.d.', 'e.c.', 'E.E.', 'e.G.', 'E.I.', 'E.P.', 'E.T.',
513
- 'E.U.', 'e.v.', 'G.K.', 'G.P.', 'h.f.', 'Inc.', 'K.D.', 'K.G.', 'K.K.', 'k.s.', 'k.v.', 'K.y.', 'L.C.', 'L.P.',
514
- 'Ltd.', 'N.K.', 'N.L.', 'N.V.', 'O.E.', 'O.G.', 'O.Ü.', 'O.y.', 'P.C.', 'p.l.', 'Pty.', 'PUP.', 'Pvt.', 'r.t.',
515
- 'S.A.', 'S.D.', 'S.E.', 's.f.', 'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.',
516
- 'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
517
- 'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
518
- ],
519
- before: ADFIX_JOINERS, after: '\\z'
520
- }
521
- }
522
-
523
- ADFIX_PATTERNS = {}
524
-
525
- [:prefix, :suffix].each do |adfix_type|
526
- patterns = {}
527
- adfix = ADFIXES[adfix_type]
528
-
529
- [:person, :organization].each do |ct|
530
- with_optional_spaces = adfix[ct].map { |p| p.gsub(ASCII_SPACE, ' *') }
531
- pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
532
- patterns[ct] = /#{adfix[:before]}\(*(?:#{pattern_string})[®™\)]*#{adfix[:after]}/i
533
- end
534
-
535
- ADFIX_PATTERNS[adfix_type] = patterns
536
- end
537
- end
1
+ require 'name_tamer'