name-tamer 0.4.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d27bffbd8c84ab7c9494017659166c56d1c686b0
4
- data.tar.gz: 419038fdd3ef60f3b19138ec670cc06176acb737
3
+ metadata.gz: fb6ad36c6ae8e6e70a0dc780b7c16a21c044a50e
4
+ data.tar.gz: 39e85453fb141d296944dfc2541a19a8308c62d5
5
5
  SHA512:
6
- metadata.gz: f6982d1d027a2774c4fc01f30a14c1b5bdb1f9795cb9a789ed270ab141c7b2c6650f34a62d283d281cd627ef4b6fbb880a2d75045d2ede4c668cc8650d011c45
7
- data.tar.gz: 0985d15ac64ed550bb4584b588864f216fe89e02b60cec65ff4b6746f8bc9397a01c670e2dc3946642c00a5fd789cf4e8c79b77a19021f267f0c246230793347
6
+ metadata.gz: 50eedb83bbdef219b9ce12309e89faa53f2ca31aad1b20af54068777f7a4792abcca854479409f7259cfa23a376912a558b8c8845a892f7df4807c283a9afd0e
7
+ data.tar.gz: cef63c1ce63b49618c49f5f07eaaac89bc81839901581a18662c90cb1f652c72b63463fab27263005da32461db9bc2a0e5ac17dd6986f3b1f7395a623077cac0
data/.gitignore CHANGED
@@ -1,3 +1,5 @@
1
+ *.gem
2
+
1
3
  *.rbc
2
4
  capybara-*.html
3
5
  .rspec
@@ -1 +1 @@
1
- 2.3.0
1
+ 2.3.1
@@ -46,7 +46,7 @@ task :check_existing do
46
46
  'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.',
47
47
  'O.K.', 'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
48
48
  ].each do |suffix|
49
- fail suffix unless NameTamer::ADFIXES[:suffix][:person].include? suffix
49
+ raise suffix unless NameTamer::ADFIXES[:suffix][:person].include? suffix
50
50
  end
51
51
 
52
52
  [
@@ -71,6 +71,6 @@ task :check_existing do
71
71
  'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.', 'ПУП.', 'С.Д.', 'בע"מ', '任意組合',
72
72
  '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社', 'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
73
73
  ].each do |suffix|
74
- fail suffix unless NameTamer::ADFIXES[:suffix][:organization].include? suffix
74
+ raise suffix unless NameTamer::ADFIXES[:suffix][:organization].include? suffix
75
75
  end
76
76
  end
@@ -1,537 +1 @@
1
- # encoding: utf-8
2
- require 'cgi'
3
- require 'string_extras'
4
-
5
- # References:
6
- # http://www.w3.org/International/questions/qa-personal-names
7
- # https://github.com/berkmancenter/namae
8
- # https://github.com/mericson/people
9
- # http://en.wikipedia.org/wiki/Types_of_business_entity
10
- # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA)
11
- # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
12
- # http://en.wikipedia.org/wiki/Nobiliary_particle
13
- # http://en.wikipedia.org/wiki/Spanish_naming_customs
14
- # http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
15
-
16
- class NameTamer
17
- attr_reader :name
18
-
19
- class << self
20
- def [](name, args = {})
21
- new name, args
22
- end
23
-
24
- # Make a slug from a string
25
- def parameterize(string, args = {})
26
- sep = args[:sep] || SLUG_DELIMITER
27
- rfc3987 = args[:rfc3987] || false
28
- filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
29
-
30
- new_string = string.dup
31
-
32
- new_string
33
- .whitespace_to!(sep)
34
- .invalid_chars_to!(sep)
35
- .strip_unwanted!(filter)
36
- .fix_separators!(sep)
37
- .approximate_latin_chars!
38
-
39
- # Have we got anything left?
40
- new_string = '_' if new_string.empty?
41
-
42
- # downcase any latin characters
43
- new_string.downcase
44
- end
45
- end
46
-
47
- def tidy_name
48
- unless @tidy_name
49
- @tidy_name = name.dup # Start with the name we've received
50
-
51
- unescape # Unescape percent-encoded characters and fix UTF-8 encoding
52
- remove_zero_width # remove zero-width characters
53
- tidy_spacing # " John Smith " -> "John Smith"
54
- fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
55
- consolidate_initials # "I. B. M." -> "I.B.M."
56
- end
57
-
58
- @tidy_name
59
- end
60
-
61
- def nice_name
62
- unless @nice_name
63
- @nice_name = tidy_name.dup # Start with the tidied name
64
-
65
- remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
66
- fixup_last_name_first # "Smith, John" -> "John Smith"
67
- fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
68
- remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
69
- name_wrangle # proper name case and non-breaking spaces
70
- use_nonbreaking_spaces_in_compound_names
71
- end
72
-
73
- @nice_name
74
- end
75
-
76
- def simple_name
77
- unless @simple_name
78
- @simple_name = nice_name.dup # Start with nice name
79
-
80
- remove_initials # "John Q. Doe" -> "John Doe"
81
- remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
82
- remove_periods_from_initials # "J.P.R. Williams" -> "JPR Williams"
83
- standardize_words # "B&Q Intl" -> "B and Q International"
84
-
85
- @simple_name.whitespace_to!(ASCII_SPACE)
86
- end
87
-
88
- @simple_name
89
- end
90
-
91
- def slug
92
- @slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
93
- end
94
-
95
- def contact_type
96
- nice_name # make sure we've done the bit which infers contact_type
97
- contact_type_best_effort
98
- end
99
-
100
- def contact_type=(new_contact_type)
101
- ct_as_sym = new_contact_type.to_sym
102
-
103
- unless @contact_type.nil? || @contact_type == ct_as_sym
104
- puts "Changing contact type of #{@name} from #{@contact_type} to #{new_contact_type}"
105
- end
106
-
107
- @contact_type = ct_as_sym
108
- end
109
-
110
- # These lines aren't used and aren't covered by specs
111
- # def name=(new_name)
112
- # initialize new_name, :contact_type => @contact_type
113
- # end
114
- #
115
- # def to_hash
116
- # {
117
- # name: name,
118
- # nice_name: nice_name,
119
- # simple_name: simple_name,
120
- # slug: slug,
121
- # contact_type: contact_type,
122
- # last_name: last_name,
123
- # remainder: remainder,
124
- # adfix_found: adfix_found
125
- # }
126
- # end
127
-
128
- private
129
-
130
- #--------------------------------------------------------
131
- # Tidy up the name we've received
132
- #--------------------------------------------------------
133
-
134
- def unescape
135
- @tidy_name.ensure_safe!.safe_unescape!.unescape_html!
136
- end
137
-
138
- def remove_zero_width
139
- @tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER)
140
- end
141
-
142
- def tidy_spacing
143
- @tidy_name
144
- .space_around_comma!
145
- .strip_or_self!
146
- .whitespace_to!(ASCII_SPACE)
147
- end
148
-
149
- def fix_encoding_errors
150
- @tidy_name.fix_encoding_errors!
151
- end
152
-
153
- # Remove spaces from groups of initials
154
- def consolidate_initials
155
- @tidy_name
156
- .remove_spaces_from_initials!
157
- .ensure_space_after_initials!
158
- end
159
-
160
- # An adfix is either a prefix or a suffix
161
- def remove_adfixes
162
- if @last_name.nil?
163
- # Our name is still in one part, not two
164
- loop do
165
- @nice_name = remove_outermost_adfix(:suffix, @nice_name)
166
- break unless @adfix_found
167
- end
168
-
169
- loop do
170
- @nice_name = remove_outermost_adfix(:prefix, @nice_name)
171
- break unless @adfix_found
172
- end
173
- else
174
- # Our name is currently in two halves
175
- loop do
176
- @last_name = remove_outermost_adfix(:suffix, @last_name)
177
- break unless @adfix_found
178
- end
179
-
180
- loop do
181
- @remainder = remove_outermost_adfix(:prefix, @remainder)
182
- break unless @adfix_found
183
- end
184
- end
185
- end
186
-
187
- # Names in the form "Smith, John" need to be turned around to "John Smith"
188
- def fixup_last_name_first
189
- return if @contact_type == :organization
190
-
191
- parts = @nice_name.split ', '
192
-
193
- return unless parts.count == 2
194
-
195
- @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
196
- @remainder = parts[1]
197
- end
198
-
199
- # Sometimes we end up with mismatched braces after adfix stripping
200
- # e.g. "Ceres (Ceres Holdings LLC)" -> "Ceres (Ceres Holdings"
201
- def fixup_mismatched_braces
202
- left_brace_count = @nice_name.count '('
203
- right_brace_count = @nice_name.count ')'
204
-
205
- if left_brace_count > right_brace_count
206
- @nice_name += ')'
207
- elsif left_brace_count < right_brace_count
208
- @nice_name = '(' + @nice_name
209
- end
210
- end
211
-
212
- def name_wrangle
213
- # Fix case if all caps or all lowercase
214
- if @last_name.nil?
215
- name_wrangle_single_name
216
- else
217
- name_wrangle_split_name
218
- end
219
- end
220
-
221
- def name_wrangle_single_name
222
- lowercase = @nice_name.downcase
223
- uppercase = @nice_name.upcase
224
- fix_case = false
225
-
226
- if @contact_type == :organization
227
- fix_case = true if @nice_name == uppercase && @nice_name.length > 4
228
- else
229
- fix_case = true if [uppercase, lowercase].include?(@nice_name)
230
- end
231
-
232
- @nice_name = name_case(lowercase) if fix_case
233
- end
234
-
235
- def name_wrangle_split_name
236
- # It's a person if we've split the name, so no organization logic here
237
- lowercase = @last_name.downcase
238
- uppercase = @last_name.upcase
239
- @last_name = name_case(lowercase) if [uppercase, lowercase].include?(@last_name)
240
- @nice_name = "#{@remainder} #{@last_name}"
241
- end
242
-
243
- # Conjoin compound names with non-breaking spaces
244
- def use_nonbreaking_spaces_in_compound_names
245
- @nice_name
246
- .nbsp_in_compound_name!
247
- .nbsp_in_name_modifier!
248
- end
249
-
250
- #--------------------------------------------------------
251
- # Make search name from nice name
252
- #--------------------------------------------------------
253
-
254
- # Remove initials from personal names unless they are the only identifier.
255
- # i.e. only remove initials if there's also a proper name there
256
- def remove_initials
257
- return unless @contact_type == :person
258
-
259
- temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
260
-
261
- # If the name still has at least one space we're OK
262
- @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
263
- end
264
-
265
- def remove_middle_names
266
- return unless @contact_type == :person
267
-
268
- first_name, parts = find_first_usable_name(@simple_name.split)
269
- last_name, = find_last_usable_name(parts)
270
-
271
- return unless first_name || last_name
272
-
273
- separator = first_name && last_name ? ' ' : ''
274
- @simple_name = "#{first_name}#{separator}#{last_name}"
275
- end
276
-
277
- def find_first_usable_name(parts)
278
- part = nil
279
-
280
- parts.each_index do |i|
281
- part = parts[i]
282
- next if part.gsub(FILTER_COMPAT, '').empty?
283
- parts = parts.slice(i + 1, parts.length) # don't use "slice!"
284
- break
285
- end
286
-
287
- [part, parts]
288
- end
289
-
290
- def find_last_usable_name(parts)
291
- part = nil
292
-
293
- parts.reverse_each do |p|
294
- next if p.gsub(FILTER_COMPAT, '').empty?
295
- part = p
296
- break
297
- end
298
-
299
- part
300
- end
301
-
302
- def remove_periods_from_initials
303
- @simple_name.remove_periods_from_initials!
304
- end
305
-
306
- def standardize_words
307
- @simple_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
308
- @simple_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
309
- @simple_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
310
- @simple_name.gsub!(/[־‐‑‒–—―−﹘﹣-]/, SLUG_DELIMITER) # Replace Unicode dashes with ASCII hyphen
311
- @simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
312
- end
313
-
314
- #--------------------------------------------------------
315
- # Initialization and utilities
316
- #--------------------------------------------------------
317
-
318
- def initialize(new_name, args = {})
319
- @name = new_name || ''
320
- @contact_type = contact_type_from args
321
-
322
- @tidy_name = nil
323
- @nice_name = nil
324
- @simple_name = nil
325
- @slug = nil
326
-
327
- @last_name = nil
328
- @remainder = nil
329
-
330
- @adfix_found = false
331
- end
332
-
333
- def contact_type_from(args)
334
- args_ct = args[:contact_type]
335
- return unless args_ct
336
-
337
- ct = args_ct.is_a?(Symbol) ? args_ct : args_ct.dup
338
- ct = ct.to_s unless [String, Symbol].include? ct.class
339
- ct.downcase! if ct.class == String
340
- ct = ct.to_sym
341
- ct = nil unless [:person, :organization].include? ct
342
-
343
- ct
344
- end
345
-
346
- # If we don't know the contact type, what's our best guess?
347
- def contact_type_best_effort
348
- if @contact_type
349
- @contact_type
350
- else
351
- # If it's just one word we'll assume organization.
352
- # If more then we'll assume a person
353
- @name.include?(ASCII_SPACE) ? :person : :organization
354
- end
355
- end
356
-
357
- # We pass to this routine either prefixes or suffixes
358
- def remove_outermost_adfix(adfix_type, name_part)
359
- ct, parts = find_contact_type_and_parts(ADFIX_PATTERNS[adfix_type], name_part)
360
-
361
- return name_part unless @adfix_found
362
-
363
- # If we've found a diagnostic adfix then set the contact type
364
- self.contact_type = ct
365
-
366
- # The remainder of the name will be in parts[0] or parts[2] depending
367
- # on whether this is a prefix or a suffix.
368
- # We'll also remove any trailing commas we've exposed.
369
- (parts[0] + parts[2]).gsub(/\s*,\s*$/, '')
370
- end
371
-
372
- def find_contact_type_and_parts(adfixes, name_part)
373
- ct = contact_type_best_effort
374
- parts = name_part.partition adfixes[ct]
375
- @adfix_found = !parts[1].empty?
376
-
377
- return [ct, parts] if @contact_type || @adfix_found
378
-
379
- # If the contact type is indeterminate and we didn't find a diagnostic adfix
380
- # for a person then try again for an organization
381
- ct = :organization
382
- parts = name_part.partition adfixes[ct]
383
- @adfix_found = !parts[1].empty?
384
-
385
- [ct, parts]
386
- end
387
-
388
- # Original Version of NameCase:
389
- # Copyright (c) Mark Summerfield 1998-2008. All Rights Reserved
390
- # This module may be used/distributed/modified under the same terms as Perl itself
391
- # http://dev.perl.org/licenses/ (GPL)
392
- #
393
- # Ruby Version:
394
- # Copyright (c) Aaron Patterson 2006
395
- # NameCase is distributed under the GPL license.
396
- #
397
- # Substantially modified for Xendata
398
- # Improved in several areas, also now adds non-breaking spaces for
399
- # compound names like "van der Pump"
400
- def name_case(lowercase)
401
- n = lowercase.dup # We assume the name is passed already downcased
402
-
403
- n
404
- .upcase_first_letter!
405
- .downcase_after_apostrophe!
406
- .fix_mac!
407
- .fix_ff!
408
- .fix_name_modifiers!
409
- .upcase_initials!
410
- end
411
-
412
- #--------------------------------------------------------
413
- # Constants
414
- #--------------------------------------------------------
415
-
416
- NONBREAKING_SPACE = "\u00a0"
417
- ASCII_SPACE = "\u0020"
418
- ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
419
- SLUG_DELIMITER = '-'
420
- ZERO_WIDTH_FILTER = /[\u180E\u200B\u200C\u200D\u2063\uFEFF]/
421
-
422
- # Constants for parameterizing Unicode strings for IRIs
423
- #
424
- # Allowed characters in an IRI segment are defined by RFC 3987
425
- # (https://tools.ietf.org/html/rfc3987#section-2.2) as follows:
426
- #
427
- # isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
428
- # / "@" )
429
- # ; non-zero-length segment without any colon ":"
430
- #
431
- # iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
432
- #
433
- # pct-encoded = "%" HEXDIG HEXDIG
434
- #
435
- # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
436
- # / "*" / "+" / "," / ";" / "="
437
- #
438
- # ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
439
- # / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
440
- # / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
441
- # / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
442
- # / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
443
- # / %xD0000-DFFFD / %xE1000-EFFFD
444
- #
445
- # Note that we can't use Unicode code points above \uFFFF because of
446
- # regex limitations, so we'll ignore ucschar above that point.
447
- #
448
- # We're using the most restrictive segment definition (isegment-nz-nc)
449
- # to avoid any possible problems with the IRI that it one day might
450
- # get placed in.
451
- ALPHA = 'A-Za-z'
452
- DIGIT = '0-9'
453
- UCSCHAR = '\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
454
- IUNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~#{UCSCHAR}"
455
- SUBDELIMS = '!$&\'\(\)\*+,;='
456
- ISEGMENT_NZ_NC = "#{IUNRESERVED}#{SUBDELIMS}@" # pct-encoded not needed
457
- FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
458
- FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
459
-
460
- # These are the prefixes and suffixes we want to remove
461
- # If you add to the list, you can use spaces and dots where appropriate
462
- # Ensure any single letters are followed by a dot because we'll add one to the string
463
- # during processing, e.g. "y Cia." should be "y. Cia."
464
- ADFIXES = {
465
- prefix: {
466
- person: [
467
- 'Baron', 'Baroness', 'Capt.', 'Captain', 'Col.', 'Colonel', 'Dame',
468
- 'Doctor', 'Dr.', 'Judge', 'Justice', 'Lady', 'Lieut.', 'Lieutenant',
469
- 'Lord', 'Madame', 'Major', 'Master', 'Matron', 'Messrs.', 'Mgr.',
470
- 'Miss', 'Mister', 'Mlle.', 'Mme.', 'Mons.', 'Mr.', 'Mr. & Mrs.',
471
- 'Mr. and Mrs.', 'Mrs.', 'Msgr.', 'Ms.', 'Prof.', 'Professor', 'Rev.',
472
- 'Reverend', 'Sir', 'Sister', 'The Hon.', 'The Lady.', 'The Lord',
473
- 'The Rt. Hon.'
474
- ],
475
- organization: [
476
- 'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
477
- ],
478
- before: '\\A', after: ADFIX_JOINERS
479
- },
480
- suffix: {
481
- person: [
482
- 'Chartered F.C.S.I.', 'Chartered M.C.S.I.', 'I.F.R.S. Certified', 'F.Inst.L.M.', 'C.I.S.S.P.', 'F.C.I.P.S.',
483
- 'M.R.I.C.S.', 'T.M.I.E.T.', 'Dip. D.M.', 'A.A.M.S.', 'A.C.C.A.', 'A.C.M.A.', 'A.I.F.A.', 'A.W.M.A.', 'C.A.I.A.',
484
- 'C.A.P.M.', 'C.C.I.M.', 'C.D.F.A.', 'C.E.P.P.', 'C.F.B.S.', 'C.G.M.A.', 'C.I.T.P.', 'C.L.T.C.', 'C.P.C.C.',
485
- 'C.R.P.C.', 'C.R.P.S.', 'C.S.O.X.', 'C.S.S.D.', 'F.B.C.S.', 'F.C.C.A.', 'F.C.M.I.', 'F.C.S.I.', 'F.I.E.T.',
486
- 'F.I.R.P.', 'M.I.E.T.', 'M.S.F.S.', 'M.Sc. D.', 'O.R.S.C.', 'R.I.C.P.', 'B.Tech.', 'Cantab.', 'Ch.F.C.',
487
- 'D.Phil.', 'I.T.I.L. v3', 'M.Io.D.', 'S.C.M.P', 'A.C.A.', 'A.C.C.', 'A.E.P.', 'A.I.F.', 'A.S.A.', 'B.Eng.',
488
- 'C.B.V.', 'C.E.M.', 'C.Eng.', 'C.F.A.', 'C.F.F.', 'C.F.P.', 'C.F.S.', 'C.G.A.', 'C.G.B.', 'C.G.P.', 'C.I.M.',
489
- 'C.L.P.', 'C.L.U.', 'C.M.A.', 'C.M.T.', 'C.P.A.', 'C.T.A.', 'C.W.S.', 'D.B.E.', 'D.D.S.', 'D.V.M.', 'E.R.P.',
490
- 'Eng.D.', 'F.C.A.', 'F.P.C.', 'F.R.M.', 'F.R.M.', 'G.S.P.', 'L.P.S.', 'M.B.A.', 'M.B.E.', 'M.E.P.', 'M.Eng.',
491
- 'M.Jur.', 'M.P.A.', 'M.S.F.', 'M.S.P.', 'O.B.E.', 'P.C.C.', 'P.F.S.', 'P.H.R.', 'P.M.C.', 'P.M.P.', 'P.M.P.',
492
- 'P.S.P.', 'R.F.C.', 'V.M.D.', 'B.Ed.', 'B.Sc.', 'Ed.D.', 'Ed.M.', 'Hons.', 'LL.B.', 'LL.D.', 'LL.M.', 'M.Ed.',
493
- 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'C.A.', 'E.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'M.S.',
494
- 'O.K.', 'P.A.', 'Q.C.', 'R.D.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
495
- ],
496
- organization: [
497
- 'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
498
- 'Cía. S. C. A.', 'y. Cía. S. C.', 'S.A. de C.V.', 'spol. s.r.o.', '(Pty.) Ltd.', '(Pvt.) Ltd.', 'A.D.S.I.Tz.',
499
- 'S.p. z.o.o.', '(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.',
500
- 'S.P.R.L.U.', 'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.',
501
- 'Pty. Ltd.', 'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.',
502
- 'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'K.G.a.A.',
503
- 'L.L.L.P.', 'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.', 'P.L.L.C.',
504
- 'S. en C.', 'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.', 'S.C.R.I.',
505
- 'S.C.R.L.', 'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', '&. Cie.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.',
506
- 'Part.G.', 'Sh.p.k.', '&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.',
507
- 'F.I.E.', 'G.b.R.', 'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.',
508
- 'L.L.P.', 'o.h.f.', 'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.',
509
- 'S.C.S.', 'S.E.M.', 'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.',
510
- 'V.O.F.', 'V.o.G.', 'v.o.s.', 'V.Z.W.', 'z.r.t.', 'А.А.Т.', 'Е.А.Д.', 'З.А.Т.', 'К.Д.А.', 'О.О.Д.', 'Т.А.А.',
511
- '股份有限公司', 'Ap.S.', 'Corp.', 'ltda.', 'Sh.A.', 'st.G.', 'Ultd.', 'a.b.', 'A.D.', 'A.E.', 'A.G.', 'A.S.',
512
- 'A.Ş.', 'A.y.', 'B.M.', 'b.t.', 'B.V.', 'C.A.', 'C.V.', 'd.d.', 'e.c.', 'E.E.', 'e.G.', 'E.I.', 'E.P.', 'E.T.',
513
- 'E.U.', 'e.v.', 'G.K.', 'G.P.', 'h.f.', 'Inc.', 'K.D.', 'K.G.', 'K.K.', 'k.s.', 'k.v.', 'K.y.', 'L.C.', 'L.P.',
514
- 'Ltd.', 'N.K.', 'N.L.', 'N.V.', 'O.E.', 'O.G.', 'O.Ü.', 'O.y.', 'P.C.', 'p.l.', 'Pty.', 'PUP.', 'Pvt.', 'r.t.',
515
- 'S.A.', 'S.D.', 'S.E.', 's.f.', 'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.',
516
- 'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
517
- 'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
518
- ],
519
- before: ADFIX_JOINERS, after: '\\z'
520
- }
521
- }
522
-
523
- ADFIX_PATTERNS = {}
524
-
525
- [:prefix, :suffix].each do |adfix_type|
526
- patterns = {}
527
- adfix = ADFIXES[adfix_type]
528
-
529
- [:person, :organization].each do |ct|
530
- with_optional_spaces = adfix[ct].map { |p| p.gsub(ASCII_SPACE, ' *') }
531
- pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
532
- patterns[ct] = /#{adfix[:before]}\(*(?:#{pattern_string})[®™\)]*#{adfix[:after]}/i
533
- end
534
-
535
- ADFIX_PATTERNS[adfix_type] = patterns
536
- end
537
- end
1
+ require 'name_tamer'