name-tamer 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef4402ebc8c35fce9d01108561f09a5af085c445
4
- data.tar.gz: 5ac1d2d43bac66079d8089a508bef15f22aa03bf
3
+ metadata.gz: efc79a2d297ca97447620a9f2cfa839667108a1d
4
+ data.tar.gz: 98022db00b0fccf4e7d2090d7e2883b8ae6239bd
5
5
  SHA512:
6
- metadata.gz: 1ed1bcaeaf186d62442600930ebc6c30baeeb1df065b16abb7c781a41e15ab7fd101591877a1f4ac0562d48e1ed717b8a9330a5cc67b7a30a0f69f7375c03d0e
7
- data.tar.gz: 4929c9a7ff6742df2a4e160ddaeb0b0a209af80996d83cef6d66a814cba8751003895354fe80ac98af7cad5f5d4e9a3f99a8a1f260b4b5c0ae9c117d27422d99
6
+ metadata.gz: b100a7a8944c5ab4beade888f8d17be2d7547c84857301bba3ecf78862df3445844e9228fe1dbbedf15305f47c4c849d880e871f661452b39de2ff94885e2dfe
7
+ data.tar.gz: aedb38fce8a533cea1c3d5d615c66bf985b57a106c91a30363b1c678d99a8e631aaa8fdae1477bd6ef78fca33b03025ea499c2ee67175e8130554e0f93b71945
data/.hound.yml ADDED
@@ -0,0 +1,17 @@
1
+ LineLength:
2
+ Description: 'Limit lines to 120 characters.'
3
+ Max: 120
4
+ Enabled: true
5
+
6
+ MethodLength:
7
+ Description: 'Avoid methods longer than 10 lines of code.'
8
+ Max: 23
9
+ Enabled: true
10
+
11
+ Documentation:
12
+ Description: 'Document classes and non-namespace modules.'
13
+ Enabled: false
14
+
15
+ FileName:
16
+ Description: 'Use snake_case for source file names.'
17
+ Enabled: false
data/.rubocop.yml ADDED
@@ -0,0 +1,10 @@
1
+ inherit_from: .hound.yml
2
+
3
+ CyclomaticComplexity:
4
+ Description: 'Avoid complex methods.'
5
+ Max: 8
6
+
7
+ ClassLength:
8
+ Description: 'Avoid classes longer than 100 lines of code.'
9
+ CountComments: false # count full line comments?
10
+ Max: 301
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- name-tamer (0.1.2)
4
+ name-tamer (0.1.3)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # NameTamer
2
2
 
3
- ![Gem Version](http://img.shields.io/gem/v/name-tamer.svg?style=flat) [![Coverage Status](https://img.shields.io/coveralls/Xenapto/name-tamer.svg?style=flat)](https://coveralls.io/r/Xenapto/name-tamer?branch=master)
3
+ ![Gem Version](http://img.shields.io/gem/v/name-tamer.svg?style=flat) [![Code Climate](http://img.shields.io/codeclimate/github/Xenapto/name-tamer.svg?style=flat)](https://codeclimate.com/github/Xenapto/name-tamer) [![Coverage Status](https://img.shields.io/coveralls/Xenapto/name-tamer.svg?style=flat)](https://coveralls.io/r/Xenapto/name-tamer?branch=master)
4
4
  [![Developer status](http://img.shields.io/badge/developer-awesome-brightgreen.svg?style=flat)](http://xenapto.com)
5
5
  ![build status](https://circleci.com/gh/Xenapto/name-tamer.png?circle-token=2293f2a1d8463a948c2a2ce4bb3bd99786958c59)
6
6
 
data/Rakefile CHANGED
@@ -1 +1 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
data/lib/name-tamer.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+ require 'string_extras'
2
3
 
3
4
  # References:
4
5
  # http://www.w3.org/International/questions/qa-personal-names
@@ -46,7 +47,7 @@ class NameTamer
46
47
  remove_dots_from_abbreviations # "J.P.R. Williams" -> "JPR Williams"
47
48
  standardize_words # "B&Q Intl" -> "B and Q International"
48
49
 
49
- @simple_name = ensure_whitespace_is_ascii_space @simple_name
50
+ @simple_name.whitespace_to!(ASCII_SPACE)
50
51
  end
51
52
 
52
53
  @simple_name
@@ -66,7 +67,7 @@ class NameTamer
66
67
  contact_type_best_effort
67
68
  end
68
69
 
69
- def contact_type= new_contact_type
70
+ def contact_type=(new_contact_type)
70
71
  ct_as_sym = new_contact_type.to_sym
71
72
 
72
73
  unless @contact_type.nil? || @contact_type == ct_as_sym
@@ -76,24 +77,23 @@ class NameTamer
76
77
  @contact_type = ct_as_sym
77
78
  end
78
79
 
79
- =begin These lines aren't used and aren't covered by specs
80
- def name=(new_name)
81
- initialize new_name, :contact_type => @contact_type
82
- end
83
-
84
- def to_hash
85
- {
86
- name: name,
87
- nice_name: nice_name,
88
- simple_name: simple_name,
89
- slug: slug,
90
- contact_type: contact_type,
91
- last_name: last_name,
92
- remainder: remainder,
93
- adfix_found: adfix_found
94
- }
95
- end
96
- =end
80
+ # These lines aren't used and aren't covered by specs
81
+ # def name=(new_name)
82
+ # initialize new_name, :contact_type => @contact_type
83
+ # end
84
+ #
85
+ # def to_hash
86
+ # {
87
+ # name: name,
88
+ # nice_name: nice_name,
89
+ # simple_name: simple_name,
90
+ # slug: slug,
91
+ # contact_type: contact_type,
92
+ # last_name: last_name,
93
+ # remainder: remainder,
94
+ # adfix_found: adfix_found
95
+ # }
96
+ # end
97
97
 
98
98
  private
99
99
 
@@ -102,50 +102,56 @@ class NameTamer
102
102
  #--------------------------------------------------------
103
103
 
104
104
  def tidy_spacing
105
- @nice_name.gsub!(/,\s*/, ', ') # Ensure commas have exactly one space after them
106
- @nice_name.strip! # remove leading & trailing whitespace
107
- @nice_name = ensure_whitespace_is_ascii_space @nice_name
105
+ @nice_name
106
+ .space_after_comma!
107
+ .strip_or_self!
108
+ .whitespace_to!(ASCII_SPACE)
108
109
  end
109
110
 
110
111
  # Remove spaces from groups of initials
111
112
  def consolidate_initials
112
- @nice_name.gsub!(/\b([a-z])\.* (?=[a-z][\. ])/i) { |match| "#{$1}." } # Remove spaces from initial groups
113
- @nice_name.gsub!(/\b([a-z](?:\.[a-z])+)\.?(?= )/i) { |match| "#{$1}." } # Ensure each group ends with a dot
113
+ @nice_name
114
+ .remove_spaces_from_initials!
115
+ .ensure_space_after_initials!
114
116
  end
115
117
 
116
118
  # An adfix is either a prefix or a suffix
117
119
  def remove_adfixes
118
120
  if @last_name.nil?
119
121
  # Our name is still in one part, not two
120
- begin
122
+ loop do
121
123
  @nice_name = remove_outermost_adfix(:suffix, @nice_name)
122
- end while @adfix_found
124
+ break unless @adfix_found
125
+ end
123
126
 
124
- begin
127
+ loop do
125
128
  @nice_name = remove_outermost_adfix(:prefix, @nice_name)
126
- end while @adfix_found
129
+ break unless @adfix_found
130
+ end
127
131
  else
128
132
  # Our name is currently in two halves
129
- begin
133
+ loop do
130
134
  @last_name = remove_outermost_adfix(:suffix, @last_name)
131
- end while @adfix_found
135
+ break unless @adfix_found
136
+ end
132
137
 
133
- begin
138
+ loop do
134
139
  @remainder = remove_outermost_adfix(:prefix, @remainder)
135
- end while @adfix_found
140
+ break unless @adfix_found
141
+ end
136
142
  end
137
143
  end
138
144
 
139
145
  # Names in the form "Smith, John" need to be turned around to "John Smith"
140
146
  def fixup_last_name_first
141
- unless @contact_type == :organization
142
- parts = @nice_name.split ', '
147
+ return if @contact_type == :organization
143
148
 
144
- if parts.count == 2
145
- @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
146
- @remainder = parts[1]
147
- end
148
- end
149
+ parts = @nice_name.split ', '
150
+
151
+ return unless parts.count == 2
152
+
153
+ @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
154
+ @remainder = parts[1]
149
155
  end
150
156
 
151
157
  # Sometimes we end up with mismatched braces after adfix stripping
@@ -168,7 +174,8 @@ class NameTamer
168
174
  uppercase = @nice_name.upcase
169
175
 
170
176
  # Some companies like to be all lowercase so don't mess with them
171
- @nice_name = name_case(lowercase) if @nice_name == uppercase || ( @nice_name == lowercase && @contact_type != :organization )
177
+ @nice_name = name_case(lowercase) if @nice_name == uppercase ||
178
+ ( @nice_name == lowercase && @contact_type != :organization)
172
179
  else
173
180
  lowercase = @last_name.downcase
174
181
  uppercase = @last_name.upcase
@@ -180,14 +187,9 @@ class NameTamer
180
187
 
181
188
  # Conjoin compound names with non-breaking spaces
182
189
  def use_nonbreaking_spaces_in_compound_names
183
- # Fix known last names that have spaces (not hyphens!)
184
- COMPOUND_NAMES.each do |compound_name|
185
- @nice_name.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
186
- end
187
-
188
- NAME_MODIFIERS.each do |modifier|
189
- @nice_name.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |match| "#{$1}#{NONBREAKING_SPACE}" }
190
- end
190
+ @nice_name
191
+ .nbsp_in_compound_name!
192
+ .nbsp_in_name_modifier!
191
193
  end
192
194
 
193
195
  #--------------------------------------------------------
@@ -197,48 +199,45 @@ class NameTamer
197
199
  # Remove initials from personal names unless they are the only identifier.
198
200
  # i.e. only remove initials if there's also a proper name there
199
201
  def remove_initials
200
- if @contact_type == :person
201
- temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
202
+ return unless @contact_type == :person
202
203
 
203
- # If the name still has at least one space we're OK
204
- @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
205
- end
204
+ temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
205
+
206
+ # If the name still has at least one space we're OK
207
+ @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
206
208
  end
207
209
 
208
210
  def remove_middle_names
209
- if @contact_type == :person
210
- parts = @simple_name.split
211
- first_name = nil
212
- last_name = nil
213
-
214
- # Find first usable name
215
- parts.each_index do |i|
216
- part = parts[i]
217
-
218
- unless part.gsub(FILTER_COMPAT, '').empty?
219
- first_name = part
220
- parts = parts.slice(i + 1, parts.length) # don't use "slice!"
221
- break
222
- end
223
- end
224
-
225
- # Find last usable name
226
- parts.reverse_each do |part|
227
- unless part.gsub(FILTER_COMPAT, '').empty?
228
- last_name = part
229
- break
230
- end
231
- end
211
+ return unless @contact_type == :person
212
+
213
+ parts = @simple_name.split
214
+ first_name = nil
215
+ last_name = nil
216
+
217
+ # Find first usable name
218
+ parts.each_index do |i|
219
+ part = parts[i]
220
+ next if part.gsub(FILTER_COMPAT, '').empty?
221
+ first_name = part
222
+ parts = parts.slice(i + 1, parts.length) # don't use "slice!"
223
+ break
224
+ end
232
225
 
233
- if first_name || last_name
234
- separator = first_name && last_name ? ' ' : ''
235
- @simple_name = "#{first_name}#{separator}#{last_name}"
236
- end
226
+ # Find last usable name
227
+ parts.reverse_each do |part|
228
+ next if part.gsub(FILTER_COMPAT, '').empty?
229
+ last_name = part
230
+ break
237
231
  end
232
+
233
+ return unless first_name || last_name
234
+
235
+ separator = first_name && last_name ? ' ' : ''
236
+ @simple_name = "#{first_name}#{separator}#{last_name}"
238
237
  end
239
238
 
240
239
  def remove_dots_from_abbreviations
241
- @simple_name.gsub!(/\b([a-z])\./i) { |match| $1 }
240
+ @simple_name.gsub!(/\b([a-z])\./i) { |_match| Regexp.last_match[1] }
242
241
  end
243
242
 
244
243
  def standardize_words
@@ -253,7 +252,7 @@ class NameTamer
253
252
 
254
253
  def slugify
255
254
  # Inflector::parameterize just gives up with non-latin characters so...
256
- #@slug = @slug.parameterize # Can't use this
255
+ # @slug = @slug.parameterize # Can't use this
257
256
 
258
257
  # Instead we'll do it ourselves
259
258
  @slug = parameterize @slug
@@ -296,12 +295,8 @@ class NameTamer
296
295
  end
297
296
  end
298
297
 
299
- def ensure_whitespace_is_ascii_space string
300
- string.gsub(/[[:space:]]+/, ASCII_SPACE) # /\s/ doesn't match Unicode whitespace in Ruby 1.9.3
301
- end
302
-
303
298
  # We pass to this routine either prefixes or suffixes
304
- def remove_outermost_adfix adfix_type, name_part
299
+ def remove_outermost_adfix(adfix_type, name_part)
305
300
  adfixes = ADFIX_PATTERNS[adfix_type]
306
301
  ct = contact_type_best_effort
307
302
  parts = name_part.partition adfixes[ct]
@@ -344,48 +339,19 @@ class NameTamer
344
339
  # Substantially modified for Xendata
345
340
  # Improved in several areas, also now adds non-breaking spaces for
346
341
  # compound names like "van der Pump"
347
- def name_case lowercase
348
- n = lowercase # We assume the name is passed already downcased
349
- n.gsub!(/\b\w/) { |first| first.upcase }
350
- n.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
351
-
352
- # Our list of terminal characters that indicate a non-celtic name used
353
- # to include o but we removed it because of MacMurdo.
354
- if n =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ or n =~ /\bMc/
355
- n.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |match| $1 + $2.capitalize }
356
-
357
- # Fix Mac exceptions
358
- [
359
- 'MacEdo', 'MacEvicius', 'MacHado', 'MacHar', 'MacHin', 'MacHlin', 'MacIas', 'MacIulis', 'MacKie', 'MacKle',
360
- 'MacKlin', 'MacKmin', 'MacKmurdo', 'MacQuarie', 'MacLise', 'MacKenzie'
361
- ].each { |mac_name| n.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
362
- end
363
-
364
- # Fix ff wierdybonks
365
- [
366
- 'Fforbes', 'Fforde', 'Ffinch', 'Ffrench', 'Ffoulkes'
367
- ].each { |ff_name| n.gsub!(ff_name,ff_name.downcase) }
368
-
369
- # Fixes for name modifiers followed by space
370
- # Also replaces spaces with non-breaking spaces
371
- NAME_MODIFIERS.each do |modifier|
372
- n.gsub!(/((?:[[:space:]]|^)#{modifier})(\s+|-)/) { |match| "#{$1.rstrip.downcase}#{$2.tr(ASCII_SPACE, NONBREAKING_SPACE)}" }
373
- end
374
-
375
- # Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
376
- ['Dell', 'D'].each do |modifier|
377
- n.gsub!(/(.#{modifier}')(\w)/) { |match| "#{$1.rstrip.downcase}#{$2}" }
378
- end
379
-
380
- # Upcase words with no vowels, e.g JPR Williams
381
- n.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |match| $1.upcase }
382
- # Except Ng
383
- n.gsub!(/\b(NG)\b/i) { |match| $1.capitalize } # http://en.wikipedia.org/wiki/Ng
342
+ def name_case(lowercase)
343
+ n = lowercase.dup # We assume the name is passed already downcased
384
344
 
385
345
  n
346
+ .upcase_first_letter!
347
+ .downcase_after_apostrophe!
348
+ .fix_mac!
349
+ .fix_ff!
350
+ .fix_name_modifiers!
351
+ .upcase_initials!
386
352
  end
387
353
 
388
- def parameterize string, args = {}
354
+ def parameterize(string, args = {})
389
355
  sep = args[:sep] || SLUG_DELIMITER
390
356
  rfc3987 = args[:rfc3987] || false
391
357
  filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
@@ -394,29 +360,12 @@ class NameTamer
394
360
  # things we want to alter for the slug, like whitespace (e.g. %20)
395
361
  new_string = URI.unescape(string)
396
362
 
397
- # Then we change any whitespace into our separator character
398
- new_string.gsub!(/\s+/, sep)
399
-
400
- # Change some characters embedded in words to our separator character
401
- # e.g. example.com -> example-com
402
- new_string.gsub!(/(?<!\s)[\.\/](?!\s)/, sep)
403
-
404
- # Then we strip any other illegal characters out completely
405
- new_string.gsub!(filter, '')
406
-
407
- # Make sure separators are not where they shouldn't be
408
- unless sep.nil? || sep.empty?
409
- re_sep = Regexp.escape(sep)
410
- # No more than one of the separator in a row.
411
- new_string.gsub!(/#{re_sep}{2,}/, sep)
412
- # Remove leading/trailing separator.
413
- new_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
414
- end
415
-
416
- # Any characters that resemble latin characters might usefully be
417
- # transliterated into ones that are easy to type on an anglophone
418
- # keyboard.
419
- new_string.gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char }
363
+ new_string
364
+ .whitespace_to!(sep)
365
+ .invalid_chars_to!(sep)
366
+ .strip_invalid!(filter)
367
+ .fix_separators!(sep)
368
+ .approximate_latin_chars!
420
369
 
421
370
  # Have we got anything left?
422
371
  new_string = '_' if new_string.empty?
@@ -434,39 +383,6 @@ class NameTamer
434
383
  ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
435
384
  SLUG_DELIMITER = '-'
436
385
 
437
- # Transliterations (like the i18n defaults)
438
- # see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
439
- APPROXIMATIONS = {
440
- "À"=>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE",
441
- "Ç"=>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I",
442
- "Î"=>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O",
443
- "Õ"=>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U",
444
- "Ü"=>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a",
445
- "ã"=>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e",
446
- "ê"=>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d",
447
- "ñ"=>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o",
448
- "ù"=>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y",
449
- "Ā"=>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C",
450
- "ć"=>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c",
451
- "Ď"=>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E",
452
- "ĕ"=>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e",
453
- "Ĝ"=>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G",
454
- "ģ"=>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i",
455
- "Ī"=>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I",
456
- "ı"=>"i", "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k",
457
- "ĸ"=>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l",
458
- "Ŀ"=>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N",
459
- "ņ"=>"n", "Ň"=>"N", "ň"=>"n", "ʼn"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng",
460
- "Ō"=>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE",
461
- "œ"=>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r",
462
- "Ś"=>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S",
463
- "š"=>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t",
464
- "Ũ"=>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U",
465
- "ů"=>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w",
466
- "Ŷ"=>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z",
467
- "Ž"=>"Z", "ž"=>"z"
468
- }
469
-
470
386
  # Constants for parameterizing Unicode strings for IRIs
471
387
  #
472
388
  # Allowed characters in an IRI segment are defined by RFC 3987
@@ -505,21 +421,10 @@ class NameTamer
505
421
  FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
506
422
  FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
507
423
 
508
- NAME_MODIFIERS = [
509
- 'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San', 'St[\.]?',
510
- 'Zur'
511
- ]
512
-
513
- COMPOUND_NAMES = [
514
- 'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
515
- 'Baron Cohen', 'Strang Steel',
516
- 'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
517
- ]
518
-
519
424
  # These are the prefixes and suffixes we want to remove
520
425
  # If you add to the list, you can use spaces and dots where appropriate
521
426
  # Ensure any single letters are followed by a dot because we'll add one to the string
522
- # during processing, e.g. "y Cía." should be "y. Cía."
427
+ # during processing, e.g. "y Cia." should be "y. Cia."
523
428
  ADFIXES = {
524
429
  prefix: {
525
430
  person: [
@@ -534,7 +439,7 @@ class NameTamer
534
439
  organization: [
535
440
  'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
536
441
  ],
537
- before:'\\A', after:ADFIX_JOINERS
442
+ before: '\\A', after: ADFIX_JOINERS
538
443
  },
539
444
  suffix: {
540
445
  person: [
@@ -543,10 +448,10 @@ class NameTamer
543
448
  'M.I.E.T.', 'B.Tech.',
544
449
  'Cantab.', 'D.Phil.', 'I.T.I.L. v3', 'B.Eng.', 'C.Eng.', 'M.Jur.', 'C.F.A.', 'D.B.E.',
545
450
  'D.D.S.', 'D.V.M.', 'Eng.D.', 'A.C.A.', 'C.T.A.', 'E.R.P.', 'F.C.A', 'F.P.C.', 'F.R.M.', 'M.B.A.', 'M.B.E.',
546
- 'M.E.P.', 'M.Eng.', 'M.Jur.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'V.M.D.', 'B.Ed.', 'B.Sc.', 'Ed.D.',
547
- 'Hons.', 'LL.B.',
548
- 'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'O.K.',
549
- 'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'I', 'V'
451
+ 'M.E.P.', 'M.Eng.', 'M.Jur.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'V.M.D.', 'B.Ed.', 'B.Sc.',
452
+ 'Ed.D.', 'Hons.', 'LL.B.',
453
+ 'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.',
454
+ 'O.K.', 'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'I', 'V'
550
455
  ],
551
456
  organization: [
552
457
  'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
@@ -572,7 +477,7 @@ class NameTamer
572
477
  'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
573
478
  'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
574
479
  ],
575
- before:ADFIX_JOINERS, after:'\\z'
480
+ before: ADFIX_JOINERS, after: '\\z'
576
481
  }
577
482
  }
578
483
 
@@ -583,7 +488,7 @@ class NameTamer
583
488
  adfix = ADFIXES[adfix_type]
584
489
 
585
490
  [:person, :organization].each do |ct|
586
- with_optional_spaces = adfix[ct].map { |p| p.gsub(ASCII_SPACE,' *') }
491
+ with_optional_spaces = adfix[ct].map { |p| p.gsub(ASCII_SPACE, ' *') }
587
492
  pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
588
493
  patterns[ct] = /#{adfix[:before]}\(*(?:#{pattern_string})\)*#{adfix[:after]}/i
589
494
  end
@@ -1,3 +1,3 @@
1
1
  class NameTamer
2
- VERSION = "0.1.3"
2
+ VERSION = '0.1.4'
3
3
  end
@@ -0,0 +1,188 @@
1
+ # encoding: utf-8
2
+ class String
3
+ # Strip illegal characters out completely
4
+ def strip_invalid!(filter)
5
+ self.gsub!(filter, '')
6
+ self # Allows chaining
7
+ end
8
+
9
+ def strip_or_self!
10
+ self.strip!
11
+ self # Allows chaining
12
+ end
13
+
14
+ # Change any whitespace into our separator character
15
+ def whitespace_to!(separator)
16
+ self.gsub!(/[[:space:]]+/, separator)
17
+ self # Allows chaining
18
+ end
19
+
20
+ # Ensure commas have exactly one space after them
21
+ def space_after_comma!
22
+ self.gsub!(/,[[:space:]]*/, ', ')
23
+ self # Allows chaining
24
+ end
25
+
26
+ # Change some characters embedded in words to our separator character
27
+ # e.g. example.com -> example-com
28
+ def invalid_chars_to!(separator)
29
+ self.gsub!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
30
+ self # Allows chaining
31
+ end
32
+
33
+ # Make sure separators are not where they shouldn't be
34
+ def fix_separators!(separator)
35
+ unless separator.nil? || separator.empty?
36
+ r = Regexp.escape(separator)
37
+ # No more than one of the separator in a row.
38
+ self.gsub!(/#{r}{2,}/, separator)
39
+ # Remove leading/trailing separator.
40
+ self.gsub!(/^#{r}|#{r}$/i, '')
41
+ end
42
+
43
+ self # Allows chaining
44
+ end
45
+
46
+ # Any characters that resemble latin characters might usefully be
47
+ # transliterated into ones that are easy to type on an anglophone
48
+ # keyboard.
49
+ def approximate_latin_chars!
50
+ self.gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char }
51
+ self # Allows chaining
52
+ end
53
+
54
+ def upcase_first_letter!
55
+ self.gsub!(/\b\w/) { |first| first.upcase }
56
+ self # Allows chaining
57
+ end
58
+
59
+ def downcase_after_apostrophe!
60
+ self.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
61
+ self # Allows chaining
62
+ end
63
+
64
+ # Our list of terminal characters that indicate a non-celtic name used
65
+ # to include o but we removed it because of MacMurdo.
66
+ def fix_mac!
67
+ if self =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ || self =~ /\bMc/
68
+ self.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |_| Regexp.last_match[1] + Regexp.last_match[2].capitalize }
69
+
70
+ # Fix Mac exceptions
71
+ %w(
72
+ MacEdo MacEvicius MacHado MacHar MacHin MacHlin MacIas MacIulis MacKie
73
+ MacKle MacKlin MacKmin MacKmurdo MacQuarie MacLise MacKenzie
74
+ ).each { |mac_name| self.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
75
+ end
76
+
77
+ self # Allows chaining
78
+ end
79
+
80
+ # Fix ff wierdybonks
81
+ def fix_ff!
82
+ %w(
83
+ Fforbes Fforde Ffinch Ffrench Ffoulkes
84
+ ).each { |ff_name| self.gsub!(ff_name, ff_name.downcase) }
85
+
86
+ self # Allows chaining
87
+ end
88
+
89
+ # Fixes for name modifiers followed by space
90
+ # Also replaces spaces with non-breaking spaces
91
+ # Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
92
+ def fix_name_modifiers!
93
+ NAME_MODIFIERS.each do |modifier|
94
+ self.gsub!(/((?:[[:space:]]|^)#{modifier})([[:space:]]+|-)/) do |_|
95
+ "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2].tr(ASCII_SPACE, NONBREAKING_SPACE)}"
96
+ end
97
+ end
98
+
99
+ %w(Dell D).each do |modifier|
100
+ self.gsub!(/(.#{modifier}')(\w)/) { |_| "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2]}" }
101
+ end
102
+
103
+ self # Allows chaining
104
+ end
105
+
106
+ # Upcase words with no vowels, e.g JPR Williams
107
+ # Except Ng
108
+ def upcase_initials!
109
+ self.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |_| Regexp.last_match[1].upcase }
110
+ self.gsub!(/\b(NG)\b/i) { |_| Regexp.last_match[1].capitalize } # http://en.wikipedia.org/wiki/Ng
111
+
112
+ self # Allows chaining
113
+ end
114
+
115
+ # Fix known last names that have spaces (not hyphens!)
116
+ def nbsp_in_compound_name!
117
+ COMPOUND_NAMES.each do |compound_name|
118
+ self.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
119
+ end
120
+
121
+ self # Allows chaining
122
+ end
123
+
124
+ def nbsp_in_name_modifier!
125
+ NAME_MODIFIERS.each do |modifier|
126
+ self.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |_| "#{Regexp.last_match[1]}#{NONBREAKING_SPACE}" }
127
+ end
128
+
129
+ self # Allows chaining
130
+ end
131
+
132
+ def remove_spaces_from_initials!
133
+ self.gsub!(/\b([a-z])(\.)* \b(?![a-z0-9']{2,})/i) { |_| "#{Regexp.last_match[1]}#{Regexp.last_match[2]}" }
134
+ self # Allows chaining
135
+ end
136
+
137
+ def ensure_space_after_initials!
138
+ self.gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " }
139
+ self # Allows chaining
140
+ end
141
+
142
+ NONBREAKING_SPACE = "\u00a0"
143
+ ASCII_SPACE = "\u0020"
144
+
145
+ COMPOUND_NAMES = [
146
+ 'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
147
+ 'Baron Cohen', 'Strang Steel',
148
+ 'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
149
+ ]
150
+
151
+ NAME_MODIFIERS = [
152
+ 'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San',
153
+ 'St[\.]?', 'Zur'
154
+ ]
155
+
156
+ # Transliterations (like the i18n defaults)
157
+ # see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
158
+ APPROXIMATIONS = {
159
+ 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE',
160
+ 'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I',
161
+ 'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O',
162
+ 'Õ' => 'O', 'Ö' => 'O', '×' => 'x', 'Ø' => 'O', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U',
163
+ 'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'Th', 'ß' => 'ss', 'à' => 'a', 'á' => 'a', 'â' => 'a',
164
+ 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e',
165
+ 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd',
166
+ 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o',
167
+ 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y',
168
+ 'Ā' => 'A', 'ā' => 'a', 'Ă' => 'A', 'ă' => 'a', 'Ą' => 'A', 'ą' => 'a', 'Ć' => 'C',
169
+ 'ć' => 'c', 'Ĉ' => 'C', 'ĉ' => 'c', 'Ċ' => 'C', 'ċ' => 'c', 'Č' => 'C', 'č' => 'c',
170
+ 'Ď' => 'D', 'ď' => 'd', 'Đ' => 'D', 'đ' => 'd', 'Ē' => 'E', 'ē' => 'e', 'Ĕ' => 'E',
171
+ 'ĕ' => 'e', 'Ė' => 'E', 'ė' => 'e', 'Ę' => 'E', 'ę' => 'e', 'Ě' => 'E', 'ě' => 'e',
172
+ 'Ĝ' => 'G', 'ĝ' => 'g', 'Ğ' => 'G', 'ğ' => 'g', 'Ġ' => 'G', 'ġ' => 'g', 'Ģ' => 'G',
173
+ 'ģ' => 'g', 'Ĥ' => 'H', 'ĥ' => 'h', 'Ħ' => 'H', 'ħ' => 'h', 'Ĩ' => 'I', 'ĩ' => 'i',
174
+ 'Ī' => 'I', 'ī' => 'i', 'Ĭ' => 'I', 'ĭ' => 'i', 'Į' => 'I', 'į' => 'i', 'İ' => 'I',
175
+ 'ı' => 'i', 'IJ' => 'IJ', 'ij' => 'ij', 'Ĵ' => 'J', 'ĵ' => 'j', 'Ķ' => 'K', 'ķ' => 'k',
176
+ 'ĸ' => 'k', 'Ĺ' => 'L', 'ĺ' => 'l', 'Ļ' => 'L', 'ļ' => 'l', 'Ľ' => 'L', 'ľ' => 'l',
177
+ 'Ŀ' => 'L', 'ŀ' => 'l', 'Ł' => 'L', 'ł' => 'l', 'Ń' => 'N', 'ń' => 'n', 'Ņ' => 'N',
178
+ 'ņ' => 'n', 'Ň' => 'N', 'ň' => 'n', 'ʼn' => "'n", 'Ŋ' => 'NG', 'ŋ' => 'ng',
179
+ 'Ō' => 'O', 'ō' => 'o', 'Ŏ' => 'O', 'ŏ' => 'o', 'Ő' => 'O', 'ő' => 'o', 'Œ' => 'OE',
180
+ 'œ' => 'oe', 'Ŕ' => 'R', 'ŕ' => 'r', 'Ŗ' => 'R', 'ŗ' => 'r', 'Ř' => 'R', 'ř' => 'r',
181
+ 'Ś' => 'S', 'ś' => 's', 'Ŝ' => 'S', 'ŝ' => 's', 'Ş' => 'S', 'ş' => 's', 'Š' => 'S',
182
+ 'š' => 's', 'Ţ' => 'T', 'ţ' => 't', 'Ť' => 'T', 'ť' => 't', 'Ŧ' => 'T', 'ŧ' => 't',
183
+ 'Ũ' => 'U', 'ũ' => 'u', 'Ū' => 'U', 'ū' => 'u', 'Ŭ' => 'U', 'ŭ' => 'u', 'Ů' => 'U',
184
+ 'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u', 'Ŵ' => 'W', 'ŵ' => 'w',
185
+ 'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z',
186
+ 'Ž' => 'Z', 'ž' => 'z'
187
+ }
188
+ end
data/name-tamer.gemspec CHANGED
@@ -7,14 +7,14 @@ Gem::Specification.new do |spec|
7
7
  spec.version = NameTamer::VERSION
8
8
  spec.authors = ['Xenapto']
9
9
  spec.email = ['developers@xenapto.com']
10
- spec.description = %q{Useful methods for taming names}
11
- spec.summary = %q{Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith}
10
+ spec.description = %q(Useful methods for taming names)
11
+ spec.summary = %q(Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith)
12
12
  spec.homepage = 'https://github.com/Xenapto/name-tamer'
13
13
  spec.license = 'MIT'
14
14
 
15
- spec.files = `git ls-files`.split($/)
16
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
- spec.test_files = spec.files.grep(%r{^(test|spec|features|coverage)/})
15
+ spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
16
+ spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(/^(test|spec|features|coverage)\//)
18
18
  spec.require_paths = ['lib']
19
19
 
20
20
  spec.add_development_dependency 'bundler', '~> 1'
@@ -5,157 +5,191 @@ require 'name-tamer'
5
5
  describe NameTamer do
6
6
  let(:names) do
7
7
  [
8
- { n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
9
- { n:'JOHN SMITH', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
10
- { n:'john smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
11
- { n:'Smith, John', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
12
- { n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
13
- { n:'Smith, John', nn:'John Smith', sn:'John Smith', s:'john-smith' },
14
- { n:'John J Smith', t: :person, nn:'John J Smith', sn:'John Smith', s:'john-smith' },
15
- { n:'John J. Smith', t: :person, nn:'John J. Smith', sn:'John Smith', s:'john-smith' },
16
- { n:'SMITH, Mr John J.R.', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
17
- { n:' SMITH, Mr John J. R. ', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
18
- { n:'SMITH, Mr John J.R.', nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
19
- { n:'Mr John J.R. SMITH JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
20
- { n:'Mr John J.R. SMITH III,JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
21
- { n:'Mr John J.R. SMITH JD', nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
22
- { n:'Mr Jean-Michel SMITH JD', t: :person, nn:'Jean-Michel SMITH', sn:'Jean-Michel SMITH', s:'jean-michel-smith' },
23
- { n:'Mr Jean Michel-SMITH JD', nn:'Jean Michel-SMITH', sn:'Jean Michel-SMITH', s:'jean-michel-smith' },
24
- { n:'Dr Martha Lane Fox Ph.D', nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
25
- { n:'Lane Fox Ph.D, Dr Martha', t: :person, nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
26
- { n:'Baroness Lane-Fox of Lewisham', t: :person, nn:'Lane-Fox of Lewisham', sn:'Lane-Fox of Lewisham', s:'lane-fox-of-lewisham' },
27
- { n:'MACDONALDS LLC', nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
28
- { n:'MACDONALDS LLC', t: :organization, nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
29
- { n:'macdonalds', t: :organization, nn:'macdonalds', sn:'macdonalds', s:'macdonalds' },
30
- { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', t: :organization, nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
31
- { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
32
- { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
33
- { n:'K.V.A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
34
- { n:'K. V. A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
35
- { n:'J.P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
36
- { n:'J. P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
37
- { n:'J P Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
38
- { n:'JP Rangaswami', nn:'JP Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
39
- { n:'Audrey fforbes', nn:'Audrey fforbes', sn:'Audrey fforbes', s:'audrey-fforbes' },
40
- { n:'J. Arthur Rank', t: :person, nn:'J. Arthur Rank', sn:'Arthur Rank', s:'arthur-rank' },
41
- { n:'PHILIP NG', t: :person, nn:'Philip Ng', sn:'Philip Ng', s:'philip-ng' },
42
- { n:'Super R&D', nn:'Super R&D', sn:'Super R and D', s:'super-r-and-d' },
43
- { n:'Harry Dean Stanton', t: :person, nn:'Harry Dean Stanton', sn:'Harry Stanton', s:'harry-stanton' },
44
- { n:'Union Square Ventures', t: :organization, nn:'Union Square Ventures', sn:'Union Square Ventures', s:'union-square-ventures' },
45
- { n:'J Arthur Rank Inc.', t: :organization, nn:'J Arthur Rank', sn:'J Arthur Rank', s:'j-arthur-rank' },
46
- { n:'Jean VAN DER VELDE', t: :person, nn:'Jean VAN DER VELDE', sn:'Jean VAN DER VELDE', s:'jean-van-der-velde' },
47
- { n:'Al Capone', t: :person, nn:'Al Capone', sn:'Al Capone', s:'al-capone' },
48
- { n:'Fahd al-Saud', t: :person, nn:'Fahd al-Saud', sn:'Fahd al-Saud', s:'fahd-al-saud' },
49
- { n:'Mehmet al Auouiby', t: :person, nn:'Mehmet al Auouiby', sn:'Mehmet al Auouiby', s:'mehmet-al-auouiby' },
50
- { n:'Macquarie Bank', t: :organization, nn:'Macquarie Bank', sn:'Macquarie Bank', s:'macquarie-bank' },
51
- { n:"COMMEDIA DELL'ARTE", t: :organization, nn:"Commedia dell'Arte", sn:"Commedia dell'Arte", s:'commedia-dellarte' },
52
- { n:'Della Smith', t: :person, nn:'Della Smith', sn:'Della Smith', s:'della-smith' },
53
- { n:'Antonio DELLA MONTEVERDE', nn:'Antonio DELLA MONTEVERDE', sn:'Antonio DELLA MONTEVERDE', s:'antonio-della-monteverde' },
54
- { n:'Tony St Clair', t: :person, nn:'Tony St Clair', sn:'Tony St Clair', s:'tony-st-clair' },
55
- { n:'Seamus O\'Malley', t: :person, nn:'Seamus O\'Malley', sn:'Seamus O\'Malley', s:'seamus-omalley' },
56
- { n:'SeedCamp', t: :organization, nn:'SeedCamp', sn:'SeedCamp', s:'seedcamp' },
57
- { n:'Peter Van Der Auwera', t: :person, nn:'Peter Van Der Auwera', sn:'Peter Van Der Auwera', s:'peter-van-der-auwera' },
58
- { n:'VAN DER AUWERA, Peter', t: :person, nn:'Peter van der Auwera', sn:'Peter van der Auwera', s:'peter-van-der-auwera' },
59
- { n:'Li Fan', t: :person, nn:'Li Fan', sn:'Li Fan', s:'li-fan' },
60
- { n:'Fan Li', t: :person, nn:'Fan Li', sn:'Fan Li', s:'fan-li' },
61
- { n:'Levi Strauss & Co.', nn:'Levi Strauss', sn:'Levi Strauss', s:'levi-strauss' },
62
- { n:'Standard & Poor\'s', t: :organization, nn:'Standard & Poor\'s', sn:'Standard and Poor\'s', s:'standard-and-poors' },
63
- { n:'I B M Services', t: :organization, nn:'I.B.M. Services', sn:'IBM Services', s:'ibm-services' },
64
- { n:'Sean Park DDS', t: :person, nn:'Sean Park', sn:'Sean Park', s:'sean-park' },
65
- { n:'SEAN MACLISE PARK', t: :person, nn:'Sean Maclise Park', sn:'Sean Park', s:'sean-park' },
66
- { n:'AJ Hanna', t: :person, nn:'AJ Hanna', sn:'AJ Hanna', s:'aj-hanna' },
67
- { n:'Free & Clear', t: :organization, nn:'Free & Clear', sn:'Free and Clear', s:'free-and-clear' },
68
- { n:'Adam D\'ANGELO', t: :person, nn:'Adam D\'ANGELO', sn:'Adam D\'ANGELO', s:'adam-dangelo' },
69
- { n:'MACKENZIE, Doug', t: :person, nn:'Doug Mackenzie', sn:'Doug Mackenzie', s:'doug-mackenzie' },
70
- { n:'Up + Down', t: :organization, nn:'Up + Down', sn:'Up plus Down', s:'up-plus-down' },
71
- { n:'San Francisco Ltd', t: :organization, nn:'San Francisco', sn:'San Francisco', s:'san-francisco' },
72
- { n:'AT&T', t: :organization, nn:'At&T', sn:'At and T', s:'at-and-t' },
73
- { n:'SMITH, John, Jr.', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
74
- { n:'I Heart Movies', t: :organization, nn:'I Heart Movies', sn:'I Heart Movies', s:'i-heart-movies' },
75
- { n:'Y Combinator', t: :organization, nn:'Y Combinator', sn:'Y Combinator', s:'y-combinator' },
76
- { n:'Ben\'s 10 Hens', t: :organization, nn:'Ben\'s 10 Hens', sn:'Ben\'s 10 Hens', s:'bens-10-hens' },
77
- { n:'Elazer Edelman, MD , PhD', t: :person, nn:'Elazer Edelman', sn:'Elazer Edelman', s:'elazer-edelman' },
78
- { n:'Judith M. O\'Brien', t: :person, nn:'Judith M. O\'Brien', sn:'Judith O\'Brien', s:'judith-obrien' },
79
- { n:'MORRISON, Van', t: :person, nn:'Van Morrison', sn:'Van Morrison', s:'van-morrison' },
80
- { n:'i/o Ventures', t: :organization, nn:'i/o Ventures', sn:'i/o Ventures', s:'i-o-ventures' },
81
- { n:'C T Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
82
- { n:'C.T. Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
83
- { n:'CT Corporation System', t: :person, nn:'CT Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
84
- { n:'Corporation Service Company', t: :person, nn:'Corporation Service Company', sn:'Corporation Service Company', s:'corporation-service-company'},
85
- { n:'Kurshuni,Inc.', t: :organization, nn:'Kurshuni', sn:'Kurshuni', s:'kurshuni' },
86
- { n:'Cellular Inc-LLC', t: :organization, nn:'Cellular', sn:'Cellular', s:'cellular' },
87
- { n:'Emtec (AZ) Limited', t: :organization, nn:'Emtec (AZ)', sn:'Emtec (AZ)', s:'emtec-az' },
88
- { n:'Emtec (LLC) Limited', t: :organization, nn:'Emtec', sn:'Emtec', s:'emtec' },
89
- { n:'Emtec (XYZ LLC) Limited', t: :organization, nn:'Emtec (XYZ)', sn:'Emtec (XYZ)', s:'emtec-xyz' },
90
- { n:'Tao Ma', t: :person, nn:'Tao', sn:'Tao', s:'tao' }, # Unfortunate but we can't distinguish between Ma and M.A.
91
- { n:'(Mr.) Courtney J. Miller, J.D., LL.M.', t: :person, nn:'Courtney J. Miller', sn:'Courtney Miller', s:'courtney-miller' },
92
- { n:'(Mr Woo) The Window Cleaner', t: :person, nn:'(Woo) The Window Cleaner', sn:'(Woo) Cleaner', s:'woo-cleaner'},
93
- { n:'DOMINIC MACMURDO', t: :person, nn:'Dominic MacMurdo', sn:'Dominic MacMurdo', s:'dominic-macmurdo' },
94
- { n:'DOMINIC MACEDO', t: :person, nn:'Dominic Macedo', sn:'Dominic Macedo', s:'dominic-macedo' },
95
- { n:'DOMINIC MACDONALD', t: :person, nn:'Dominic MacDonald', sn:'Dominic MacDonald', s:'dominic-macdonald' },
96
- { n:'AGUSTA DO ROMEIRO', t: :person, nn:'Agusta do Romeiro', sn:'Agusta do Romeiro', s:'agusta-do-romeiro' },
97
- { n:'CARLOS DOS SANTOS', t: :person, nn:'Carlos dos Santos', sn:'Carlos dos Santos', s:'carlos-dos-santos' },
98
- { n:'유정 ', t: :organization, nn:'유정 ', sn:'유정 ', s:'유정-신' },
99
- { n:'xxx%52zzz', t: :organization, nn:'xxx%52zzz', sn:'xxx%52zzz', s:'xxxrzzz' },
100
- { n:'Евгений Болотнов', t: :organization, nn:'Евгений Болотнов', sn:'Евгений Болотнов', s:'Евгений-Болотнов' },
101
- { n:'김태성', t: :organization, nn:'김태성', sn:'김태성', s:'김태성' },
102
- { n:'ゴルフスタジアム', t: :organization, nn:'ゴルフスタジアム', sn:'ゴルフスタジアム', s:'ゴルフスタジアム' },
103
- { n:'我摘', t: :organization, nn:'我摘', sn:'我摘', s:'我摘' },
104
- { n:'Καρατζάς Στέφανος', t: :organization, nn:'Καρατζάς Στέφανος', sn:'Καρατζάς Στέφανος', s:'Καρατζάς-Στέφανος' },
105
- { n:'โชติวัน วัฒนลาภ', t: :organization, nn:'โชติวัน วัฒนลาภ', sn:'โชติวัน วัฒนลาภ', s:'โชติวัน-วัฒนลาภ' },
106
- { n:' 續寶', t: :organization, nn:'張 續寶', sn:'張 續寶', s:'張-續寶' },
107
- { n:'Юрий Гайдук', t: :organization, nn:'Юрий Гайдук', sn:'Юрий Гайдук', s:'Юрий-Гайдук' },
108
- { n:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', t: :organization, nn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', sn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', s:'☣-©Ʀѱ∏†ʘ-Σɏ§†℈Ϻ-☣' },
109
- { n:' KlasikB0i ♠', t: :organization, nn:'♠ KlasikB0i ♠', sn:'♠ KlasikB0i ♠', s:'♠-klasikb0i-♠' },
110
- { n:'* Shorusan *', t: :organization, nn:'* Shorusan *', sn:'* Shorusan *', s:'shorusan' },
111
- { n:'项目谷', t: :organization, nn:'项目谷', sn:'项目谷', s:'项目谷' },
112
- { n:'ООО "Инновационные полимерные адгезивы"', t: :organization, nn:'ООО "Инновационные полимерные адгезивы"', sn:'ООО "Инновационные полимерные адгезивы"', s:'ООО-Инновационные-полимерные-адгезивы' },
113
- { n:'عبدالله ...', t: :organization, nn:'عبدالله ...', sn:'عبدالله ...', s:'عبدالله' },
114
- { n:'กมลชนก ทิศไธสง', t: :organization, nn:'กมลชนก ทิศไธสง', sn:'กมลชนก ทิศไธสง', s:'กมลชนก-ทิศไธสง' },
115
- { n:'יוֹ אָב', t: :organization, nn:'יוֹ אָב', sn:'יוֹ אָב', s:'יוֹ-אָב' },
116
- { n:'יגאל נימני', t: :organization, nn:'יגאל נימני', sn:'יגאל נימני', s:'יגאל-נימני' },
117
- { n:'ניסים דניאלי', t: :organization, nn:'ניסים דניאלי', sn:'ניסים דניאלי', s:'ניסים-דניאלי' },
118
- { n:'مساء الخير', t: :organization, nn:'مساء الخير', sn:'مساء الخير', s:'مساء-الخير' },
119
- { n:'محمود ياسر', t: :organization, nn:'محمود ياسر', sn:'محمود ياسر', s:'محمود-ياسر' },
120
- { n:'קובי ביטר', t: :organization, nn:'קובי ביטר', sn:'קובי ביטר', s:'קובי-ביטר' },
121
- { n:'الملاك الحارس', t: :organization, nn:'الملاك الحارس', sn:'الملاك الحارس', s:'الملاك-الحارس' },
122
- { n:'কবির হাসান', t: :organization, nn:'কবির হাসান', sn:'কবির হাসান', s:'কবির-হাসান' },
8
+ { n: 'John Smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
9
+ { n: 'JOHN SMITH', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
10
+ { n: 'john smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
11
+ { n: 'Smith, John', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
12
+ { n: 'John Smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
13
+ { n: 'Smith, John', nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
14
+ { n: 'John J. Smith', t: :person, nn: 'John J. Smith', sn: 'John Smith', s: 'john-smith' },
15
+ { n: 'John J. Smith', t: :person, nn: 'John J. Smith', sn: 'John Smith', s: 'john-smith' },
16
+ { n: 'SMITH, Mr John J.R.', t: :person, nn: 'John J.R. Smith', sn: 'John Smith', s: 'john-smith' },
17
+ { n: ' SMITH, Mr John J. R. ', t: :person, nn: 'John J.R. Smith', sn: 'John Smith', s: 'john-smith' },
18
+ { n: 'SMITH, Mr John J.R.', nn: 'John J.R. Smith', sn: 'John Smith', s: 'john-smith' },
19
+ { n: 'Mr John J.R. SMITH JD', t: :person, nn: 'John J.R. SMITH', sn: 'John SMITH', s: 'john-smith' },
20
+ { n: 'Mr John J.R. SMITH III,JD', t: :person, nn: 'John J.R. SMITH', sn: 'John SMITH', s: 'john-smith' },
21
+ { n: 'Mr John J.R. SMITH JD', nn: 'John J.R. SMITH', sn: 'John SMITH', s: 'john-smith' },
22
+ { n: 'Mr Jean-Michel SMITH JD', t: :person, nn: 'Jean-Michel SMITH', sn: 'Jean-Michel SMITH',
23
+ s: 'jean-michel-smith' },
24
+ { n: 'Mr Jean Michel-SMITH JD', nn: 'Jean Michel-SMITH', sn: 'Jean Michel-SMITH', s: 'jean-michel-smith' },
25
+ { n: 'Dr Martha Lane Fox Ph.D', nn: 'Martha Lane Fox', sn: 'Martha Lane Fox', s: 'martha-lane-fox' },
26
+ { n: 'Lane Fox Ph.D, Dr Martha', t: :person, nn: 'Martha Lane Fox', sn: 'Martha Lane Fox', s: 'martha-lane-fox' },
27
+ { n: 'Baroness Lane-Fox of Lewisham', t: :person, nn: 'Lane-Fox of Lewisham', sn: 'Lane-Fox of Lewisham',
28
+ s: 'lane-fox-of-lewisham' },
29
+ { n: 'MACDONALDS LLC', nn: 'MacDonalds', sn: 'MacDonalds', s: 'macdonalds' },
30
+ { n: 'MACDONALDS LLC', t: :organization, nn: 'MacDonalds', sn: 'MacDonalds', s: 'macdonalds' },
31
+ { n: 'macdonalds', t: :organization, nn: 'macdonalds', sn: 'macdonalds', s: 'macdonalds' },
32
+ { n: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', t: :organization,
33
+ nn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub',
34
+ sn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub',
35
+ s: 'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
36
+ { n: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP',
37
+ nn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub',
38
+ sn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub',
39
+ s: 'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
40
+ { n: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP',
41
+ nn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub',
42
+ sn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub',
43
+ s: 'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
44
+ { n: 'K.V.A. Instruments y Cía S. en C.', nn: 'K.V.A. Instruments', sn: 'KVA Instruments', s: 'kva-instruments' },
45
+ { n: 'K. V. A. Instruments y Cía S. en C.', nn: 'K.V.A. Instruments', sn: 'KVA Instruments',
46
+ s: 'kva-instruments' },
47
+ { n: 'J.P.R. Williams', nn: 'J.P.R. Williams', sn: 'JPR Williams', s: 'jpr-williams' },
48
+ { n: 'J. P. R. Williams', nn: 'J.P.R. Williams', sn: 'JPR Williams', s: 'jpr-williams' },
49
+ { n: 'J P R Williams', nn: 'JPR Williams', sn: 'JPR Williams', s: 'jpr-williams' },
50
+ { n: 'JPR Williams', nn: 'JPR Williams', sn: 'JPR Williams', s: 'jpr-williams' },
51
+ { n: 'Audrey fforbes', nn: 'Audrey fforbes', sn: 'Audrey fforbes', s: 'audrey-fforbes' },
52
+ { n: 'J. Arthur Rank', t: :person, nn: 'J. Arthur Rank', sn: 'Arthur Rank', s: 'arthur-rank' },
53
+ { n: 'PHILIP NG', t: :person, nn: 'Philip Ng', sn: 'Philip Ng', s: 'philip-ng' },
54
+ { n: 'Super R&D', nn: 'Super R&D', sn: 'Super R and D', s: 'super-r-and-d' },
55
+ { n: 'Harry Dean Stanton', t: :person, nn: 'Harry Dean Stanton', sn: 'Harry Stanton', s: 'harry-stanton' },
56
+ { n: 'Union Square Ventures', t: :organization, nn: 'Union Square Ventures', sn: 'Union Square Ventures',
57
+ s: 'union-square-ventures' },
58
+ { n: 'J Arthur Rank Inc.', t: :organization, nn: 'J Arthur Rank', sn: 'J Arthur Rank', s: 'j-arthur-rank' },
59
+ { n: 'Jean VAN DER VELDE', t: :person, nn: 'Jean VAN DER VELDE', sn: 'Jean VAN DER VELDE',
60
+ s: 'jean-van-der-velde' },
61
+ { n: 'Al Capone', t: :person, nn: 'Al Capone', sn: 'Al Capone', s: 'al-capone' },
62
+ { n: 'Fahd al-Saud', t: :person, nn: 'Fahd al-Saud', sn: 'Fahd al-Saud', s: 'fahd-al-saud' },
63
+ { n: 'Mehmet al Auouiby', t: :person, nn: 'Mehmet al Auouiby', sn: 'Mehmet al Auouiby', s: 'mehmet-al-auouiby' },
64
+ { n: 'Macquarie Bank', t: :organization, nn: 'Macquarie Bank', sn: 'Macquarie Bank', s: 'macquarie-bank' },
65
+ { n: "COMMEDIA DELL'ARTE", t: :organization, nn: "Commedia dell'Arte", sn: "Commedia dell'Arte",
66
+ s: 'commedia-dellarte' },
67
+ { n: 'Della Smith', t: :person, nn: 'Della Smith', sn: 'Della Smith', s: 'della-smith' },
68
+ { n: 'Antonio DELLA MONTEVERDE', nn: 'Antonio DELLA MONTEVERDE', sn: 'Antonio DELLA MONTEVERDE',
69
+ s: 'antonio-della-monteverde' },
70
+ { n: 'Tony St Clair', t: :person, nn: 'Tony St Clair', sn: 'Tony St Clair', s: 'tony-st-clair' },
71
+ { n: 'Seamus O\'Malley', t: :person, nn: 'Seamus O\'Malley', sn: 'Seamus O\'Malley', s: 'seamus-omalley' },
72
+ { n: 'SeedCamp', t: :organization, nn: 'SeedCamp', sn: 'SeedCamp', s: 'seedcamp' },
73
+ { n: 'Peter Van Der Auwera', t: :person, nn: 'Peter Van Der Auwera', sn: 'Peter Van Der Auwera',
74
+ s: 'peter-van-der-auwera' },
75
+ { n: 'VAN DER AUWERA, Peter', t: :person, nn: 'Peter van der Auwera', sn: 'Peter van der Auwera',
76
+ s: 'peter-van-der-auwera' },
77
+ { n: 'Li Fan', t: :person, nn: 'Li Fan', sn: 'Li Fan', s: 'li-fan' },
78
+ { n: 'Fan Li', t: :person, nn: 'Fan Li', sn: 'Fan Li', s: 'fan-li' },
79
+ { n: 'Levi Strauss & Co.', nn: 'Levi Strauss', sn: 'Levi Strauss', s: 'levi-strauss' },
80
+ { n: 'Standard & Poor\'s', t: :organization, nn: 'Standard & Poor\'s', sn: 'Standard and Poor\'s',
81
+ s: 'standard-and-poors' },
82
+ { n: 'I B M Services', t: :organization, nn: 'IBM Services', sn: 'IBM Services', s: 'ibm-services' },
83
+ { n: 'Sean Park DDS', t: :person, nn: 'Sean Park', sn: 'Sean Park', s: 'sean-park' },
84
+ { n: 'SEAN MACLISE PARK', t: :person, nn: 'Sean Maclise Park', sn: 'Sean Park', s: 'sean-park' },
85
+ { n: 'AJ Hanna', t: :person, nn: 'AJ Hanna', sn: 'AJ Hanna', s: 'aj-hanna' },
86
+ { n: 'Free & Clear', t: :organization, nn: 'Free & Clear', sn: 'Free and Clear', s: 'free-and-clear' },
87
+ { n: 'Adam D\'ANGELO', t: :person, nn: 'Adam D\'ANGELO', sn: 'Adam D\'ANGELO', s: 'adam-dangelo' },
88
+ { n: 'MACKENZIE, Doug', t: :person, nn: 'Doug Mackenzie', sn: 'Doug Mackenzie', s: 'doug-mackenzie' },
89
+ { n: 'Up + Down', t: :organization, nn: 'Up + Down', sn: 'Up plus Down', s: 'up-plus-down' },
90
+ { n: 'San Francisco Ltd', t: :organization, nn: 'San Francisco', sn: 'San Francisco', s: 'san-francisco' },
91
+ { n: 'AT&T', t: :organization, nn: 'At&T', sn: 'At and T', s: 'at-and-t' },
92
+ { n: 'SMITH, John, Jr.', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
93
+ { n: 'I Heart Movies', t: :organization, nn: 'I Heart Movies', sn: 'I Heart Movies', s: 'i-heart-movies' },
94
+ { n: 'Y Combinator', t: :organization, nn: 'Y Combinator', sn: 'Y Combinator', s: 'y-combinator' },
95
+ { n: 'Ben\'s 10 Hens', t: :organization, nn: 'Ben\'s 10 Hens', sn: 'Ben\'s 10 Hens', s: 'bens-10-hens' },
96
+ { n: 'Elazer Edelman, MD , PhD', t: :person, nn: 'Elazer Edelman', sn: 'Elazer Edelman', s: 'elazer-edelman' },
97
+ { n: 'Judith M. O\'Brien', t: :person, nn: 'Judith M. O\'Brien', sn: 'Judith O\'Brien', s: 'judith-obrien' },
98
+ { n: 'MORRISON, Van', t: :person, nn: 'Van Morrison', sn: 'Van Morrison', s: 'van-morrison' },
99
+ { n: 'i/o Ventures', t: :organization, nn: 'i/o Ventures', sn: 'i/o Ventures', s: 'i-o-ventures' },
100
+ { n: 'C T Corporation System', t: :person, nn: 'CT Corporation System', sn: 'CT Corporation System',
101
+ s: 'ct-corporation-system' },
102
+ { n: 'C.T. Corporation System', t: :person, nn: 'C.T. Corporation System', sn: 'CT Corporation System',
103
+ s: 'ct-corporation-system' },
104
+ { n: 'CT Corporation System', t: :person, nn: 'CT Corporation System', sn: 'CT Corporation System',
105
+ s: 'ct-corporation-system' },
106
+ { n: 'Corporation Service Company', t: :person, nn: 'Corporation Service Company',
107
+ sn: 'Corporation Service Company', s: 'corporation-service-company' },
108
+ { n: 'Kurshuni,Inc.', t: :organization, nn: 'Kurshuni', sn: 'Kurshuni', s: 'kurshuni' },
109
+ { n: 'Cellular Inc-LLC', t: :organization, nn: 'Cellular', sn: 'Cellular', s: 'cellular' },
110
+ { n: 'Emtec (AZ) Limited', t: :organization, nn: 'Emtec (AZ)', sn: 'Emtec (AZ)', s: 'emtec-az' },
111
+ { n: 'Emtec (LLC) Limited', t: :organization, nn: 'Emtec', sn: 'Emtec', s: 'emtec' },
112
+ { n: 'Emtec (XYZ LLC) Limited', t: :organization, nn: 'Emtec (XYZ)', sn: 'Emtec (XYZ)', s: 'emtec-xyz' },
113
+ { n: 'Tao Ma', t: :person, nn: 'Tao', sn: 'Tao',
114
+ s: 'tao' }, # Unfortunate but we can't distinguish between Ma and M.A.
115
+ { n: '(Mr.) Courtney J. Miller, J.D., LL.M.', t: :person, nn: 'Courtney J. Miller', sn: 'Courtney Miller',
116
+ s: 'courtney-miller' },
117
+ { n: '(Mr Woo) The Window Cleaner', t: :person, nn: '(Woo) The Window Cleaner', sn: '(Woo) Cleaner',
118
+ s: 'woo-cleaner' },
119
+ { n: 'DOMINIC MACMURDO', t: :person, nn: 'Dominic MacMurdo', sn: 'Dominic MacMurdo', s: 'dominic-macmurdo' },
120
+ { n: 'DOMINIC MACEDO', t: :person, nn: 'Dominic Macedo', sn: 'Dominic Macedo', s: 'dominic-macedo' },
121
+ { n: 'DOMINIC MACDONALD', t: :person, nn: 'Dominic MacDonald', sn: 'Dominic MacDonald', s: 'dominic-macdonald' },
122
+ { n: 'AGUSTA DO ROMEIRO', t: :person, nn: 'Agusta do Romeiro', sn: 'Agusta do Romeiro', s: 'agusta-do-romeiro' },
123
+ { n: 'CARLOS DOS SANTOS', t: :person, nn: 'Carlos dos Santos', sn: 'Carlos dos Santos', s: 'carlos-dos-santos' },
124
+ { n: '유정 신', t: :organization, nn: '유정 신', sn: '유정 신', s: '유정-신' },
125
+ { n: 'xxx%52zzz', t: :organization, nn: 'xxx%52zzz', sn: 'xxx%52zzz', s: 'xxxrzzz' },
126
+ { n: 'Евгений Болотнов', t: :organization, nn: 'Евгений Болотнов', sn: 'Евгений Болотнов',
127
+ s: 'Евгений-Болотнов' },
128
+ { n: '김태성', t: :organization, nn: '김태성', sn: '김태성', s: '김태성' },
129
+ { n: 'ゴルフスタジアム', t: :organization, nn: 'ゴルフスタジアム', sn: 'ゴルフスタジアム', s: 'ゴルフスタジアム' },
130
+ { n: '我摘', t: :organization, nn: '我摘', sn: '我摘', s: '我摘' },
131
+ { n: 'Καρατζάς Στέφανος', t: :organization, nn: 'Καρατζάς Στέφανος', sn: 'Καρατζάς Στέφανος',
132
+ s: 'Καρατζάς-Στέφανος' },
133
+ { n: 'โชติวัน วัฒนลาภ', t: :organization, nn: 'โชติวัน วัฒนลาภ', sn: 'โชติวัน วัฒนลาภ', s: 'โชติวัน-วัฒนลาภ' },
134
+ { n: '張 續寶', t: :organization, nn: '張 續寶', sn: '張 續寶', s: '張-續寶' },
135
+ { n: 'Юрий Гайдук', t: :organization, nn: 'Юрий Гайдук', sn: 'Юрий Гайдук', s: 'Юрий-Гайдук' },
136
+ { n: '☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', t: :organization, nn: '☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', sn: '☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣',
137
+ s: '☣-©Ʀѱ∏†ʘ-Σɏ§†℈Ϻ-☣' },
138
+ { n: '♠ KlasikB0i ♠', t: :organization, nn: '♠ KlasikB0i ♠', sn: '♠ KlasikB0i ♠', s: '♠-klasikb0i-♠' },
139
+ { n: '* Shorusan *', t: :organization, nn: '* Shorusan *', sn: '* Shorusan *', s: 'shorusan' },
140
+ { n: '项目谷', t: :organization, nn: '项目谷', sn: '项目谷', s: '项目谷' },
141
+ { n: 'ООО "Инновационные полимерные адгезивы"', t: :organization, nn: 'ООО "Инновационные полимерные адгезивы"',
142
+ sn: 'ООО "Инновационные полимерные адгезивы"', s: 'ООО-Инновационные-полимерные-адгезивы' },
143
+ { n: 'عبدالله ...', t: :organization, nn: 'عبدالله ...', sn: 'عبدالله ...', s: 'عبدالله' },
144
+ { n: 'กมลชนก ทิศไธสง', t: :organization, nn: 'กมลชนก ทิศไธสง', sn: 'กมลชนก ทิศไธสง', s: 'กมลชนก-ทิศไธสง' },
145
+ { n: 'יוֹ אָב', t: :organization, nn: 'יוֹ אָב', sn: 'יוֹ אָב', s: 'יוֹ-אָב' },
146
+ { n: 'יגאל נימני', t: :organization, nn: 'יגאל נימני', sn: 'יגאל נימני', s: 'יגאל-נימני' },
147
+ { n: 'ניסים דניאלי', t: :organization, nn: 'ניסים דניאלי', sn: 'ניסים דניאלי', s: 'ניסים-דניאלי' },
148
+ { n: 'مساء الخير', t: :organization, nn: 'مساء الخير', sn: 'مساء الخير', s: 'مساء-الخير' },
149
+ { n: 'محمود ياسر', t: :organization, nn: 'محمود ياسر', sn: 'محمود ياسر', s: 'محمود-ياسر' },
150
+ { n: 'קובי ביטר', t: :organization, nn: 'קובי ביטר', sn: 'קובי ביטר', s: 'קובי-ביטר' },
151
+ { n: 'الملاك الحارس', t: :organization, nn: 'الملاك الحارس', sn: 'الملاك الحارس', s: 'الملاك-الحارس' },
152
+ { n: 'কবির হাসান', t: :organization, nn: 'কবির হাসান', sn: 'কবির হাসান', s: 'কবির-হাসান' },
123
153
  { nn: '', sn: '', s: '_' },
124
- { n:'Union Square Ventures', t: 'Organization', nn:'Union Square Ventures', sn:'Union Square Ventures', s:'union-square-ventures' },
125
- { n:'John Smith', t: 'Person', nn:'John Smith', sn:'John Smith', s:'john-smith' },
126
- { n:'John Smith', t: :nonsense, nn:'John Smith', sn:'John Smith', s:'john-smith' },
127
- { n:'John Smith', t: Kernel, nn:'John Smith', sn:'John Smith', s:'john-smith' },
128
- { n:'Ms Jane Smith', t: :person, nn:'Jane Smith', sn:'Jane Smith', s:'jane-smith' },
129
- { n:'example.com', t: :organization, nn:'example.com', sn:'example.com', s:'example-com' },
130
- { n:'Hermann Müller', t: :person, nn: 'Hermann Müller', sn: 'Hermann Müller', s:'hermann-muller'},
131
- { n:'b-to-v Partners AG', t: :organization, nn:'b-to-v Partners', sn:'b-to-v Partners', s:'b-to-v-partners' },
132
- { n:'*', t: :person, nn: '*', sn: '*', s:'_'},
133
- { n:'* *', t: :person, nn: '* *', sn: '* *', s:'_'},
134
- { n:'* Olga *', t: :person, nn: '* Olga *', sn: 'Olga', s:'olga'},
135
- { n:'* Olga Bedia García *', t: :person, nn: '* Olga Bedia García *', sn: 'Olga García', s:'olga-garcia'},
136
- { n:'John Smith M.A. (Oxon)', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith'}
154
+ { n: 'Union Square Ventures', t: 'Organization', nn: 'Union Square Ventures', sn: 'Union Square Ventures',
155
+ s: 'union-square-ventures' },
156
+ { n: 'John Smith', t: 'Person', nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
157
+ { n: 'John Smith', t: :nonsense, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
158
+ { n: 'John Smith', t: Kernel, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
159
+ { n: 'Ms Jane Smith', t: :person, nn: 'Jane Smith', sn: 'Jane Smith', s: 'jane-smith' },
160
+ { n: 'example.com', t: :organization, nn: 'example.com', sn: 'example.com', s: 'example-com' },
161
+ { n: 'Hermann Müller', t: :person, nn: 'Hermann Müller', sn: 'Hermann Müller', s: 'hermann-muller' },
162
+ { n: 'b-to-v Partners AG', t: :organization, nn: 'b-to-v Partners', sn: 'b-to-v Partners', s: 'b-to-v-partners' },
163
+ { n: '*', t: :person, nn: '*', sn: '*', s: '_' },
164
+ { n: '* *', t: :person, nn: '* *', sn: '* *', s: '_' },
165
+ { n: '* Olga *', t: :person, nn: '* Olga *', sn: 'Olga', s: 'olga' },
166
+ { n: '* Olga Bedia García *', t: :person, nn: '* Olga Bedia García *', sn: 'Olga García', s: 'olga-garcia' },
167
+ { n: 'John Smith M.A. (Oxon)', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
168
+ { n: 'I B M', t: :organization, nn: 'Ibm', sn: 'Ibm', s: 'ibm' },
169
+ { n: 'I-B-M', t: :organization, nn: 'I-B-M', sn: 'I-B-M', s: 'i-b-m' },
170
+ { n: 'I.B.M.', t: :organization, nn: 'I.B.M.', sn: 'IBM', s: 'ibm' }
137
171
  ]
138
172
  end
139
173
 
140
- it "makes a slug" do
174
+ it 'makes a slug' do
141
175
  names.each do |name_data|
142
176
  name = name_data[:n]
143
- NameTamer[name, contact_type:name_data[:t]].slug.should == name_data[:s]
177
+ NameTamer[name, contact_type: name_data[:t]].slug.should == name_data[:s]
144
178
  end
145
179
  end
146
180
 
147
- it "makes a nice name" do
181
+ it 'makes a nice name' do
148
182
  names.each do |name_data|
149
183
  name = name_data[:n]
150
- nice_name = NameTamer[name, contact_type:name_data[:t]].nice_name
184
+ nice_name = NameTamer[name, contact_type: name_data[:t]].nice_name
151
185
  nice_name.should == name_data[:nn]
152
186
  end
153
187
  end
154
188
 
155
- it "makes a searchable name" do
189
+ it 'makes a searchable name' do
156
190
  names.each do |name_data|
157
191
  name = name_data[:n]
158
- NameTamer[name, contact_type:name_data[:t]].simple_name.should == name_data[:sn]
192
+ NameTamer[name, contact_type: name_data[:t]].simple_name.should == name_data[:sn]
159
193
  end
160
194
  end
161
195
  end
data/spec/spec_helper.rb CHANGED
@@ -5,11 +5,11 @@ Coveralls.wear!
5
5
  SimpleCov.start
6
6
 
7
7
  RSpec.configure do |config|
8
- # Run specs in random order to surface order dependencies. If you find an
8
+ # Run specs in random order to surface order dependencies. If you find an
9
9
  # order dependency and want to debug it, you can fix the order by providing
10
10
  # the seed, which is printed after each run.
11
11
  # --seed 1234
12
- config.order = "random"
12
+ config.order = 'random'
13
13
 
14
14
  # Manually-added
15
15
  config.color_enabled = true
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name-tamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xenapto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-02 00:00:00.000000000 Z
11
+ date: 2014-06-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -109,6 +109,8 @@ extra_rdoc_files: []
109
109
  files:
110
110
  - ".env"
111
111
  - ".gitignore"
112
+ - ".hound.yml"
113
+ - ".rubocop.yml"
112
114
  - ".ruby-version"
113
115
  - Gemfile
114
116
  - Gemfile.lock
@@ -120,6 +122,7 @@ files:
120
122
  - doc/suffixes.csv
121
123
  - lib/name-tamer.rb
122
124
  - lib/name-tamer/version.rb
125
+ - lib/string_extras.rb
123
126
  - name-tamer.gemspec
124
127
  - spec/name_tamer_spec.rb
125
128
  - spec/spec_helper.rb