name-tamer 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.hound.yml +17 -0
- data/.rubocop.yml +10 -0
- data/Gemfile.lock +1 -1
- data/README.md +1 -1
- data/Rakefile +1 -1
- data/lib/name-tamer.rb +106 -201
- data/lib/name-tamer/version.rb +1 -1
- data/lib/string_extras.rb +188 -0
- data/name-tamer.gemspec +5 -5
- data/spec/name_tamer_spec.rb +168 -134
- data/spec/spec_helper.rb +2 -2
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: efc79a2d297ca97447620a9f2cfa839667108a1d
|
4
|
+
data.tar.gz: 98022db00b0fccf4e7d2090d7e2883b8ae6239bd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b100a7a8944c5ab4beade888f8d17be2d7547c84857301bba3ecf78862df3445844e9228fe1dbbedf15305f47c4c849d880e871f661452b39de2ff94885e2dfe
|
7
|
+
data.tar.gz: aedb38fce8a533cea1c3d5d615c66bf985b57a106c91a30363b1c678d99a8e631aaa8fdae1477bd6ef78fca33b03025ea499c2ee67175e8130554e0f93b71945
|
data/.hound.yml
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
LineLength:
|
2
|
+
Description: 'Limit lines to 120 characters.'
|
3
|
+
Max: 120
|
4
|
+
Enabled: true
|
5
|
+
|
6
|
+
MethodLength:
|
7
|
+
Description: 'Avoid methods longer than 10 lines of code.'
|
8
|
+
Max: 23
|
9
|
+
Enabled: true
|
10
|
+
|
11
|
+
Documentation:
|
12
|
+
Description: 'Document classes and non-namespace modules.'
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
FileName:
|
16
|
+
Description: 'Use snake_case for source file names.'
|
17
|
+
Enabled: false
|
data/.rubocop.yml
ADDED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# NameTamer
|
2
2
|
|
3
|
-
![Gem Version](http://img.shields.io/gem/v/name-tamer.svg?style=flat) [![Coverage Status](https://img.shields.io/coveralls/Xenapto/name-tamer.svg?style=flat)](https://coveralls.io/r/Xenapto/name-tamer?branch=master)
|
3
|
+
![Gem Version](http://img.shields.io/gem/v/name-tamer.svg?style=flat) [![Code Climate](http://img.shields.io/codeclimate/github/Xenapto/name-tamer.svg?style=flat)](https://codeclimate.com/github/Xenapto/name-tamer) [![Coverage Status](https://img.shields.io/coveralls/Xenapto/name-tamer.svg?style=flat)](https://coveralls.io/r/Xenapto/name-tamer?branch=master)
|
4
4
|
[![Developer status](http://img.shields.io/badge/developer-awesome-brightgreen.svg?style=flat)](http://xenapto.com)
|
5
5
|
![build status](https://circleci.com/gh/Xenapto/name-tamer.png?circle-token=2293f2a1d8463a948c2a2ce4bb3bd99786958c59)
|
6
6
|
|
data/Rakefile
CHANGED
@@ -1 +1 @@
|
|
1
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
data/lib/name-tamer.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
+
require 'string_extras'
|
2
3
|
|
3
4
|
# References:
|
4
5
|
# http://www.w3.org/International/questions/qa-personal-names
|
@@ -46,7 +47,7 @@ class NameTamer
|
|
46
47
|
remove_dots_from_abbreviations # "J.P.R. Williams" -> "JPR Williams"
|
47
48
|
standardize_words # "B&Q Intl" -> "B and Q International"
|
48
49
|
|
49
|
-
@simple_name
|
50
|
+
@simple_name.whitespace_to!(ASCII_SPACE)
|
50
51
|
end
|
51
52
|
|
52
53
|
@simple_name
|
@@ -66,7 +67,7 @@ class NameTamer
|
|
66
67
|
contact_type_best_effort
|
67
68
|
end
|
68
69
|
|
69
|
-
def contact_type=
|
70
|
+
def contact_type=(new_contact_type)
|
70
71
|
ct_as_sym = new_contact_type.to_sym
|
71
72
|
|
72
73
|
unless @contact_type.nil? || @contact_type == ct_as_sym
|
@@ -76,24 +77,23 @@ class NameTamer
|
|
76
77
|
@contact_type = ct_as_sym
|
77
78
|
end
|
78
79
|
|
79
|
-
|
80
|
-
def name=(new_name)
|
81
|
-
|
82
|
-
end
|
83
|
-
|
84
|
-
def to_hash
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
end
|
96
|
-
=end
|
80
|
+
# These lines aren't used and aren't covered by specs
|
81
|
+
# def name=(new_name)
|
82
|
+
# initialize new_name, :contact_type => @contact_type
|
83
|
+
# end
|
84
|
+
#
|
85
|
+
# def to_hash
|
86
|
+
# {
|
87
|
+
# name: name,
|
88
|
+
# nice_name: nice_name,
|
89
|
+
# simple_name: simple_name,
|
90
|
+
# slug: slug,
|
91
|
+
# contact_type: contact_type,
|
92
|
+
# last_name: last_name,
|
93
|
+
# remainder: remainder,
|
94
|
+
# adfix_found: adfix_found
|
95
|
+
# }
|
96
|
+
# end
|
97
97
|
|
98
98
|
private
|
99
99
|
|
@@ -102,50 +102,56 @@ class NameTamer
|
|
102
102
|
#--------------------------------------------------------
|
103
103
|
|
104
104
|
def tidy_spacing
|
105
|
-
@nice_name
|
106
|
-
|
107
|
-
|
105
|
+
@nice_name
|
106
|
+
.space_after_comma!
|
107
|
+
.strip_or_self!
|
108
|
+
.whitespace_to!(ASCII_SPACE)
|
108
109
|
end
|
109
110
|
|
110
111
|
# Remove spaces from groups of initials
|
111
112
|
def consolidate_initials
|
112
|
-
@nice_name
|
113
|
-
|
113
|
+
@nice_name
|
114
|
+
.remove_spaces_from_initials!
|
115
|
+
.ensure_space_after_initials!
|
114
116
|
end
|
115
117
|
|
116
118
|
# An adfix is either a prefix or a suffix
|
117
119
|
def remove_adfixes
|
118
120
|
if @last_name.nil?
|
119
121
|
# Our name is still in one part, not two
|
120
|
-
|
122
|
+
loop do
|
121
123
|
@nice_name = remove_outermost_adfix(:suffix, @nice_name)
|
122
|
-
|
124
|
+
break unless @adfix_found
|
125
|
+
end
|
123
126
|
|
124
|
-
|
127
|
+
loop do
|
125
128
|
@nice_name = remove_outermost_adfix(:prefix, @nice_name)
|
126
|
-
|
129
|
+
break unless @adfix_found
|
130
|
+
end
|
127
131
|
else
|
128
132
|
# Our name is currently in two halves
|
129
|
-
|
133
|
+
loop do
|
130
134
|
@last_name = remove_outermost_adfix(:suffix, @last_name)
|
131
|
-
|
135
|
+
break unless @adfix_found
|
136
|
+
end
|
132
137
|
|
133
|
-
|
138
|
+
loop do
|
134
139
|
@remainder = remove_outermost_adfix(:prefix, @remainder)
|
135
|
-
|
140
|
+
break unless @adfix_found
|
141
|
+
end
|
136
142
|
end
|
137
143
|
end
|
138
144
|
|
139
145
|
# Names in the form "Smith, John" need to be turned around to "John Smith"
|
140
146
|
def fixup_last_name_first
|
141
|
-
|
142
|
-
parts = @nice_name.split ', '
|
147
|
+
return if @contact_type == :organization
|
143
148
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
+
parts = @nice_name.split ', '
|
150
|
+
|
151
|
+
return unless parts.count == 2
|
152
|
+
|
153
|
+
@last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
|
154
|
+
@remainder = parts[1]
|
149
155
|
end
|
150
156
|
|
151
157
|
# Sometimes we end up with mismatched braces after adfix stripping
|
@@ -168,7 +174,8 @@ class NameTamer
|
|
168
174
|
uppercase = @nice_name.upcase
|
169
175
|
|
170
176
|
# Some companies like to be all lowercase so don't mess with them
|
171
|
-
@nice_name = name_case(lowercase)
|
177
|
+
@nice_name = name_case(lowercase) if @nice_name == uppercase ||
|
178
|
+
( @nice_name == lowercase && @contact_type != :organization)
|
172
179
|
else
|
173
180
|
lowercase = @last_name.downcase
|
174
181
|
uppercase = @last_name.upcase
|
@@ -180,14 +187,9 @@ class NameTamer
|
|
180
187
|
|
181
188
|
# Conjoin compound names with non-breaking spaces
|
182
189
|
def use_nonbreaking_spaces_in_compound_names
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
end
|
187
|
-
|
188
|
-
NAME_MODIFIERS.each do |modifier|
|
189
|
-
@nice_name.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |match| "#{$1}#{NONBREAKING_SPACE}" }
|
190
|
-
end
|
190
|
+
@nice_name
|
191
|
+
.nbsp_in_compound_name!
|
192
|
+
.nbsp_in_name_modifier!
|
191
193
|
end
|
192
194
|
|
193
195
|
#--------------------------------------------------------
|
@@ -197,48 +199,45 @@ class NameTamer
|
|
197
199
|
# Remove initials from personal names unless they are the only identifier.
|
198
200
|
# i.e. only remove initials if there's also a proper name there
|
199
201
|
def remove_initials
|
200
|
-
|
201
|
-
temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
|
202
|
+
return unless @contact_type == :person
|
202
203
|
|
203
|
-
|
204
|
-
|
205
|
-
|
204
|
+
temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
|
205
|
+
|
206
|
+
# If the name still has at least one space we're OK
|
207
|
+
@simple_name = temp_name if temp_name.include?(ASCII_SPACE)
|
206
208
|
end
|
207
209
|
|
208
210
|
def remove_middle_names
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
end
|
224
|
-
|
225
|
-
# Find last usable name
|
226
|
-
parts.reverse_each do |part|
|
227
|
-
unless part.gsub(FILTER_COMPAT, '').empty?
|
228
|
-
last_name = part
|
229
|
-
break
|
230
|
-
end
|
231
|
-
end
|
211
|
+
return unless @contact_type == :person
|
212
|
+
|
213
|
+
parts = @simple_name.split
|
214
|
+
first_name = nil
|
215
|
+
last_name = nil
|
216
|
+
|
217
|
+
# Find first usable name
|
218
|
+
parts.each_index do |i|
|
219
|
+
part = parts[i]
|
220
|
+
next if part.gsub(FILTER_COMPAT, '').empty?
|
221
|
+
first_name = part
|
222
|
+
parts = parts.slice(i + 1, parts.length) # don't use "slice!"
|
223
|
+
break
|
224
|
+
end
|
232
225
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
226
|
+
# Find last usable name
|
227
|
+
parts.reverse_each do |part|
|
228
|
+
next if part.gsub(FILTER_COMPAT, '').empty?
|
229
|
+
last_name = part
|
230
|
+
break
|
237
231
|
end
|
232
|
+
|
233
|
+
return unless first_name || last_name
|
234
|
+
|
235
|
+
separator = first_name && last_name ? ' ' : ''
|
236
|
+
@simple_name = "#{first_name}#{separator}#{last_name}"
|
238
237
|
end
|
239
238
|
|
240
239
|
def remove_dots_from_abbreviations
|
241
|
-
@simple_name.gsub!(/\b([a-z])\./i) { |
|
240
|
+
@simple_name.gsub!(/\b([a-z])\./i) { |_match| Regexp.last_match[1] }
|
242
241
|
end
|
243
242
|
|
244
243
|
def standardize_words
|
@@ -253,7 +252,7 @@ class NameTamer
|
|
253
252
|
|
254
253
|
def slugify
|
255
254
|
# Inflector::parameterize just gives up with non-latin characters so...
|
256
|
-
|
255
|
+
# @slug = @slug.parameterize # Can't use this
|
257
256
|
|
258
257
|
# Instead we'll do it ourselves
|
259
258
|
@slug = parameterize @slug
|
@@ -296,12 +295,8 @@ class NameTamer
|
|
296
295
|
end
|
297
296
|
end
|
298
297
|
|
299
|
-
def ensure_whitespace_is_ascii_space string
|
300
|
-
string.gsub(/[[:space:]]+/, ASCII_SPACE) # /\s/ doesn't match Unicode whitespace in Ruby 1.9.3
|
301
|
-
end
|
302
|
-
|
303
298
|
# We pass to this routine either prefixes or suffixes
|
304
|
-
def remove_outermost_adfix
|
299
|
+
def remove_outermost_adfix(adfix_type, name_part)
|
305
300
|
adfixes = ADFIX_PATTERNS[adfix_type]
|
306
301
|
ct = contact_type_best_effort
|
307
302
|
parts = name_part.partition adfixes[ct]
|
@@ -344,48 +339,19 @@ class NameTamer
|
|
344
339
|
# Substantially modified for Xendata
|
345
340
|
# Improved in several areas, also now adds non-breaking spaces for
|
346
341
|
# compound names like "van der Pump"
|
347
|
-
def name_case
|
348
|
-
n = lowercase # We assume the name is passed already downcased
|
349
|
-
n.gsub!(/\b\w/) { |first| first.upcase }
|
350
|
-
n.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
|
351
|
-
|
352
|
-
# Our list of terminal characters that indicate a non-celtic name used
|
353
|
-
# to include o but we removed it because of MacMurdo.
|
354
|
-
if n =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ or n =~ /\bMc/
|
355
|
-
n.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |match| $1 + $2.capitalize }
|
356
|
-
|
357
|
-
# Fix Mac exceptions
|
358
|
-
[
|
359
|
-
'MacEdo', 'MacEvicius', 'MacHado', 'MacHar', 'MacHin', 'MacHlin', 'MacIas', 'MacIulis', 'MacKie', 'MacKle',
|
360
|
-
'MacKlin', 'MacKmin', 'MacKmurdo', 'MacQuarie', 'MacLise', 'MacKenzie'
|
361
|
-
].each { |mac_name| n.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
|
362
|
-
end
|
363
|
-
|
364
|
-
# Fix ff wierdybonks
|
365
|
-
[
|
366
|
-
'Fforbes', 'Fforde', 'Ffinch', 'Ffrench', 'Ffoulkes'
|
367
|
-
].each { |ff_name| n.gsub!(ff_name,ff_name.downcase) }
|
368
|
-
|
369
|
-
# Fixes for name modifiers followed by space
|
370
|
-
# Also replaces spaces with non-breaking spaces
|
371
|
-
NAME_MODIFIERS.each do |modifier|
|
372
|
-
n.gsub!(/((?:[[:space:]]|^)#{modifier})(\s+|-)/) { |match| "#{$1.rstrip.downcase}#{$2.tr(ASCII_SPACE, NONBREAKING_SPACE)}" }
|
373
|
-
end
|
374
|
-
|
375
|
-
# Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
|
376
|
-
['Dell', 'D'].each do |modifier|
|
377
|
-
n.gsub!(/(.#{modifier}')(\w)/) { |match| "#{$1.rstrip.downcase}#{$2}" }
|
378
|
-
end
|
379
|
-
|
380
|
-
# Upcase words with no vowels, e.g JPR Williams
|
381
|
-
n.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |match| $1.upcase }
|
382
|
-
# Except Ng
|
383
|
-
n.gsub!(/\b(NG)\b/i) { |match| $1.capitalize } # http://en.wikipedia.org/wiki/Ng
|
342
|
+
def name_case(lowercase)
|
343
|
+
n = lowercase.dup # We assume the name is passed already downcased
|
384
344
|
|
385
345
|
n
|
346
|
+
.upcase_first_letter!
|
347
|
+
.downcase_after_apostrophe!
|
348
|
+
.fix_mac!
|
349
|
+
.fix_ff!
|
350
|
+
.fix_name_modifiers!
|
351
|
+
.upcase_initials!
|
386
352
|
end
|
387
353
|
|
388
|
-
def parameterize
|
354
|
+
def parameterize(string, args = {})
|
389
355
|
sep = args[:sep] || SLUG_DELIMITER
|
390
356
|
rfc3987 = args[:rfc3987] || false
|
391
357
|
filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
@@ -394,29 +360,12 @@ class NameTamer
|
|
394
360
|
# things we want to alter for the slug, like whitespace (e.g. %20)
|
395
361
|
new_string = URI.unescape(string)
|
396
362
|
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
# Then we strip any other illegal characters out completely
|
405
|
-
new_string.gsub!(filter, '')
|
406
|
-
|
407
|
-
# Make sure separators are not where they shouldn't be
|
408
|
-
unless sep.nil? || sep.empty?
|
409
|
-
re_sep = Regexp.escape(sep)
|
410
|
-
# No more than one of the separator in a row.
|
411
|
-
new_string.gsub!(/#{re_sep}{2,}/, sep)
|
412
|
-
# Remove leading/trailing separator.
|
413
|
-
new_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
|
414
|
-
end
|
415
|
-
|
416
|
-
# Any characters that resemble latin characters might usefully be
|
417
|
-
# transliterated into ones that are easy to type on an anglophone
|
418
|
-
# keyboard.
|
419
|
-
new_string.gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char }
|
363
|
+
new_string
|
364
|
+
.whitespace_to!(sep)
|
365
|
+
.invalid_chars_to!(sep)
|
366
|
+
.strip_invalid!(filter)
|
367
|
+
.fix_separators!(sep)
|
368
|
+
.approximate_latin_chars!
|
420
369
|
|
421
370
|
# Have we got anything left?
|
422
371
|
new_string = '_' if new_string.empty?
|
@@ -434,39 +383,6 @@ class NameTamer
|
|
434
383
|
ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
|
435
384
|
SLUG_DELIMITER = '-'
|
436
385
|
|
437
|
-
# Transliterations (like the i18n defaults)
|
438
|
-
# see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
|
439
|
-
APPROXIMATIONS = {
|
440
|
-
"À"=>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE",
|
441
|
-
"Ç"=>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I",
|
442
|
-
"Î"=>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O",
|
443
|
-
"Õ"=>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U",
|
444
|
-
"Ü"=>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a",
|
445
|
-
"ã"=>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e",
|
446
|
-
"ê"=>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d",
|
447
|
-
"ñ"=>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o",
|
448
|
-
"ù"=>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y",
|
449
|
-
"Ā"=>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C",
|
450
|
-
"ć"=>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c",
|
451
|
-
"Ď"=>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E",
|
452
|
-
"ĕ"=>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e",
|
453
|
-
"Ĝ"=>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G",
|
454
|
-
"ģ"=>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i",
|
455
|
-
"Ī"=>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I",
|
456
|
-
"ı"=>"i", "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k",
|
457
|
-
"ĸ"=>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l",
|
458
|
-
"Ŀ"=>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N",
|
459
|
-
"ņ"=>"n", "Ň"=>"N", "ň"=>"n", "ʼn"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng",
|
460
|
-
"Ō"=>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE",
|
461
|
-
"œ"=>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r",
|
462
|
-
"Ś"=>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S",
|
463
|
-
"š"=>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t",
|
464
|
-
"Ũ"=>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U",
|
465
|
-
"ů"=>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w",
|
466
|
-
"Ŷ"=>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z",
|
467
|
-
"Ž"=>"Z", "ž"=>"z"
|
468
|
-
}
|
469
|
-
|
470
386
|
# Constants for parameterizing Unicode strings for IRIs
|
471
387
|
#
|
472
388
|
# Allowed characters in an IRI segment are defined by RFC 3987
|
@@ -505,21 +421,10 @@ class NameTamer
|
|
505
421
|
FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
|
506
422
|
FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
|
507
423
|
|
508
|
-
NAME_MODIFIERS = [
|
509
|
-
'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San', 'St[\.]?',
|
510
|
-
'Zur'
|
511
|
-
]
|
512
|
-
|
513
|
-
COMPOUND_NAMES = [
|
514
|
-
'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
|
515
|
-
'Baron Cohen', 'Strang Steel',
|
516
|
-
'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
|
517
|
-
]
|
518
|
-
|
519
424
|
# These are the prefixes and suffixes we want to remove
|
520
425
|
# If you add to the list, you can use spaces and dots where appropriate
|
521
426
|
# Ensure any single letters are followed by a dot because we'll add one to the string
|
522
|
-
# during processing, e.g. "y
|
427
|
+
# during processing, e.g. "y Cia." should be "y. Cia."
|
523
428
|
ADFIXES = {
|
524
429
|
prefix: {
|
525
430
|
person: [
|
@@ -534,7 +439,7 @@ class NameTamer
|
|
534
439
|
organization: [
|
535
440
|
'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
|
536
441
|
],
|
537
|
-
before:'\\A', after:ADFIX_JOINERS
|
442
|
+
before: '\\A', after: ADFIX_JOINERS
|
538
443
|
},
|
539
444
|
suffix: {
|
540
445
|
person: [
|
@@ -543,10 +448,10 @@ class NameTamer
|
|
543
448
|
'M.I.E.T.', 'B.Tech.',
|
544
449
|
'Cantab.', 'D.Phil.', 'I.T.I.L. v3', 'B.Eng.', 'C.Eng.', 'M.Jur.', 'C.F.A.', 'D.B.E.',
|
545
450
|
'D.D.S.', 'D.V.M.', 'Eng.D.', 'A.C.A.', 'C.T.A.', 'E.R.P.', 'F.C.A', 'F.P.C.', 'F.R.M.', 'M.B.A.', 'M.B.E.',
|
546
|
-
'M.E.P.', 'M.Eng.', 'M.Jur.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'V.M.D.', 'B.Ed.', 'B.Sc.',
|
547
|
-
'Hons.', 'LL.B.',
|
548
|
-
'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.',
|
549
|
-
'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'I', 'V'
|
451
|
+
'M.E.P.', 'M.Eng.', 'M.Jur.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'V.M.D.', 'B.Ed.', 'B.Sc.',
|
452
|
+
'Ed.D.', 'Hons.', 'LL.B.',
|
453
|
+
'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.',
|
454
|
+
'O.K.', 'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'I', 'V'
|
550
455
|
],
|
551
456
|
organization: [
|
552
457
|
'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
|
@@ -572,7 +477,7 @@ class NameTamer
|
|
572
477
|
'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
|
573
478
|
'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
|
574
479
|
],
|
575
|
-
before:ADFIX_JOINERS, after:'\\z'
|
480
|
+
before: ADFIX_JOINERS, after: '\\z'
|
576
481
|
}
|
577
482
|
}
|
578
483
|
|
@@ -583,7 +488,7 @@ class NameTamer
|
|
583
488
|
adfix = ADFIXES[adfix_type]
|
584
489
|
|
585
490
|
[:person, :organization].each do |ct|
|
586
|
-
with_optional_spaces = adfix[ct].map { |p| p.gsub(ASCII_SPACE,' *') }
|
491
|
+
with_optional_spaces = adfix[ct].map { |p| p.gsub(ASCII_SPACE, ' *') }
|
587
492
|
pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
|
588
493
|
patterns[ct] = /#{adfix[:before]}\(*(?:#{pattern_string})\)*#{adfix[:after]}/i
|
589
494
|
end
|
data/lib/name-tamer/version.rb
CHANGED
@@ -0,0 +1,188 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
class String
|
3
|
+
# Strip illegal characters out completely
|
4
|
+
def strip_invalid!(filter)
|
5
|
+
self.gsub!(filter, '')
|
6
|
+
self # Allows chaining
|
7
|
+
end
|
8
|
+
|
9
|
+
def strip_or_self!
|
10
|
+
self.strip!
|
11
|
+
self # Allows chaining
|
12
|
+
end
|
13
|
+
|
14
|
+
# Change any whitespace into our separator character
|
15
|
+
def whitespace_to!(separator)
|
16
|
+
self.gsub!(/[[:space:]]+/, separator)
|
17
|
+
self # Allows chaining
|
18
|
+
end
|
19
|
+
|
20
|
+
# Ensure commas have exactly one space after them
|
21
|
+
def space_after_comma!
|
22
|
+
self.gsub!(/,[[:space:]]*/, ', ')
|
23
|
+
self # Allows chaining
|
24
|
+
end
|
25
|
+
|
26
|
+
# Change some characters embedded in words to our separator character
|
27
|
+
# e.g. example.com -> example-com
|
28
|
+
def invalid_chars_to!(separator)
|
29
|
+
self.gsub!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
|
30
|
+
self # Allows chaining
|
31
|
+
end
|
32
|
+
|
33
|
+
# Make sure separators are not where they shouldn't be
|
34
|
+
def fix_separators!(separator)
|
35
|
+
unless separator.nil? || separator.empty?
|
36
|
+
r = Regexp.escape(separator)
|
37
|
+
# No more than one of the separator in a row.
|
38
|
+
self.gsub!(/#{r}{2,}/, separator)
|
39
|
+
# Remove leading/trailing separator.
|
40
|
+
self.gsub!(/^#{r}|#{r}$/i, '')
|
41
|
+
end
|
42
|
+
|
43
|
+
self # Allows chaining
|
44
|
+
end
|
45
|
+
|
46
|
+
# Any characters that resemble latin characters might usefully be
|
47
|
+
# transliterated into ones that are easy to type on an anglophone
|
48
|
+
# keyboard.
|
49
|
+
def approximate_latin_chars!
|
50
|
+
self.gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char }
|
51
|
+
self # Allows chaining
|
52
|
+
end
|
53
|
+
|
54
|
+
def upcase_first_letter!
|
55
|
+
self.gsub!(/\b\w/) { |first| first.upcase }
|
56
|
+
self # Allows chaining
|
57
|
+
end
|
58
|
+
|
59
|
+
def downcase_after_apostrophe!
|
60
|
+
self.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
|
61
|
+
self # Allows chaining
|
62
|
+
end
|
63
|
+
|
64
|
+
# Our list of terminal characters that indicate a non-celtic name used
|
65
|
+
# to include o but we removed it because of MacMurdo.
|
66
|
+
def fix_mac!
|
67
|
+
if self =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ || self =~ /\bMc/
|
68
|
+
self.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |_| Regexp.last_match[1] + Regexp.last_match[2].capitalize }
|
69
|
+
|
70
|
+
# Fix Mac exceptions
|
71
|
+
%w(
|
72
|
+
MacEdo MacEvicius MacHado MacHar MacHin MacHlin MacIas MacIulis MacKie
|
73
|
+
MacKle MacKlin MacKmin MacKmurdo MacQuarie MacLise MacKenzie
|
74
|
+
).each { |mac_name| self.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
|
75
|
+
end
|
76
|
+
|
77
|
+
self # Allows chaining
|
78
|
+
end
|
79
|
+
|
80
|
+
# Fix ff wierdybonks
|
81
|
+
def fix_ff!
|
82
|
+
%w(
|
83
|
+
Fforbes Fforde Ffinch Ffrench Ffoulkes
|
84
|
+
).each { |ff_name| self.gsub!(ff_name, ff_name.downcase) }
|
85
|
+
|
86
|
+
self # Allows chaining
|
87
|
+
end
|
88
|
+
|
89
|
+
# Fixes for name modifiers followed by space
|
90
|
+
# Also replaces spaces with non-breaking spaces
|
91
|
+
# Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
|
92
|
+
def fix_name_modifiers!
|
93
|
+
NAME_MODIFIERS.each do |modifier|
|
94
|
+
self.gsub!(/((?:[[:space:]]|^)#{modifier})([[:space:]]+|-)/) do |_|
|
95
|
+
"#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2].tr(ASCII_SPACE, NONBREAKING_SPACE)}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
%w(Dell D).each do |modifier|
|
100
|
+
self.gsub!(/(.#{modifier}')(\w)/) { |_| "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2]}" }
|
101
|
+
end
|
102
|
+
|
103
|
+
self # Allows chaining
|
104
|
+
end
|
105
|
+
|
106
|
+
# Upcase words with no vowels, e.g JPR Williams
|
107
|
+
# Except Ng
|
108
|
+
def upcase_initials!
|
109
|
+
self.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |_| Regexp.last_match[1].upcase }
|
110
|
+
self.gsub!(/\b(NG)\b/i) { |_| Regexp.last_match[1].capitalize } # http://en.wikipedia.org/wiki/Ng
|
111
|
+
|
112
|
+
self # Allows chaining
|
113
|
+
end
|
114
|
+
|
115
|
+
# Fix known last names that have spaces (not hyphens!)
|
116
|
+
def nbsp_in_compound_name!
|
117
|
+
COMPOUND_NAMES.each do |compound_name|
|
118
|
+
self.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
|
119
|
+
end
|
120
|
+
|
121
|
+
self # Allows chaining
|
122
|
+
end
|
123
|
+
|
124
|
+
def nbsp_in_name_modifier!
|
125
|
+
NAME_MODIFIERS.each do |modifier|
|
126
|
+
self.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |_| "#{Regexp.last_match[1]}#{NONBREAKING_SPACE}" }
|
127
|
+
end
|
128
|
+
|
129
|
+
self # Allows chaining
|
130
|
+
end
|
131
|
+
|
132
|
+
def remove_spaces_from_initials!
|
133
|
+
self.gsub!(/\b([a-z])(\.)* \b(?![a-z0-9']{2,})/i) { |_| "#{Regexp.last_match[1]}#{Regexp.last_match[2]}" }
|
134
|
+
self # Allows chaining
|
135
|
+
end
|
136
|
+
|
137
|
+
def ensure_space_after_initials!
|
138
|
+
self.gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " }
|
139
|
+
self # Allows chaining
|
140
|
+
end
|
141
|
+
|
142
|
+
NONBREAKING_SPACE = "\u00a0"
|
143
|
+
ASCII_SPACE = "\u0020"
|
144
|
+
|
145
|
+
COMPOUND_NAMES = [
|
146
|
+
'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
|
147
|
+
'Baron Cohen', 'Strang Steel',
|
148
|
+
'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
|
149
|
+
]
|
150
|
+
|
151
|
+
NAME_MODIFIERS = [
|
152
|
+
'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San',
|
153
|
+
'St[\.]?', 'Zur'
|
154
|
+
]
|
155
|
+
|
156
|
+
# Transliterations (like the i18n defaults)
|
157
|
+
# see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
|
158
|
+
APPROXIMATIONS = {
|
159
|
+
'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE',
|
160
|
+
'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I',
|
161
|
+
'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O',
|
162
|
+
'Õ' => 'O', 'Ö' => 'O', '×' => 'x', 'Ø' => 'O', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U',
|
163
|
+
'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'Th', 'ß' => 'ss', 'à' => 'a', 'á' => 'a', 'â' => 'a',
|
164
|
+
'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e',
|
165
|
+
'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd',
|
166
|
+
'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o',
|
167
|
+
'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y',
|
168
|
+
'Ā' => 'A', 'ā' => 'a', 'Ă' => 'A', 'ă' => 'a', 'Ą' => 'A', 'ą' => 'a', 'Ć' => 'C',
|
169
|
+
'ć' => 'c', 'Ĉ' => 'C', 'ĉ' => 'c', 'Ċ' => 'C', 'ċ' => 'c', 'Č' => 'C', 'č' => 'c',
|
170
|
+
'Ď' => 'D', 'ď' => 'd', 'Đ' => 'D', 'đ' => 'd', 'Ē' => 'E', 'ē' => 'e', 'Ĕ' => 'E',
|
171
|
+
'ĕ' => 'e', 'Ė' => 'E', 'ė' => 'e', 'Ę' => 'E', 'ę' => 'e', 'Ě' => 'E', 'ě' => 'e',
|
172
|
+
'Ĝ' => 'G', 'ĝ' => 'g', 'Ğ' => 'G', 'ğ' => 'g', 'Ġ' => 'G', 'ġ' => 'g', 'Ģ' => 'G',
|
173
|
+
'ģ' => 'g', 'Ĥ' => 'H', 'ĥ' => 'h', 'Ħ' => 'H', 'ħ' => 'h', 'Ĩ' => 'I', 'ĩ' => 'i',
|
174
|
+
'Ī' => 'I', 'ī' => 'i', 'Ĭ' => 'I', 'ĭ' => 'i', 'Į' => 'I', 'į' => 'i', 'İ' => 'I',
|
175
|
+
'ı' => 'i', 'IJ' => 'IJ', 'ij' => 'ij', 'Ĵ' => 'J', 'ĵ' => 'j', 'Ķ' => 'K', 'ķ' => 'k',
|
176
|
+
'ĸ' => 'k', 'Ĺ' => 'L', 'ĺ' => 'l', 'Ļ' => 'L', 'ļ' => 'l', 'Ľ' => 'L', 'ľ' => 'l',
|
177
|
+
'Ŀ' => 'L', 'ŀ' => 'l', 'Ł' => 'L', 'ł' => 'l', 'Ń' => 'N', 'ń' => 'n', 'Ņ' => 'N',
|
178
|
+
'ņ' => 'n', 'Ň' => 'N', 'ň' => 'n', 'ʼn' => "'n", 'Ŋ' => 'NG', 'ŋ' => 'ng',
|
179
|
+
'Ō' => 'O', 'ō' => 'o', 'Ŏ' => 'O', 'ŏ' => 'o', 'Ő' => 'O', 'ő' => 'o', 'Œ' => 'OE',
|
180
|
+
'œ' => 'oe', 'Ŕ' => 'R', 'ŕ' => 'r', 'Ŗ' => 'R', 'ŗ' => 'r', 'Ř' => 'R', 'ř' => 'r',
|
181
|
+
'Ś' => 'S', 'ś' => 's', 'Ŝ' => 'S', 'ŝ' => 's', 'Ş' => 'S', 'ş' => 's', 'Š' => 'S',
|
182
|
+
'š' => 's', 'Ţ' => 'T', 'ţ' => 't', 'Ť' => 'T', 'ť' => 't', 'Ŧ' => 'T', 'ŧ' => 't',
|
183
|
+
'Ũ' => 'U', 'ũ' => 'u', 'Ū' => 'U', 'ū' => 'u', 'Ŭ' => 'U', 'ŭ' => 'u', 'Ů' => 'U',
|
184
|
+
'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u', 'Ŵ' => 'W', 'ŵ' => 'w',
|
185
|
+
'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z',
|
186
|
+
'Ž' => 'Z', 'ž' => 'z'
|
187
|
+
}
|
188
|
+
end
|
data/name-tamer.gemspec
CHANGED
@@ -7,14 +7,14 @@ Gem::Specification.new do |spec|
|
|
7
7
|
spec.version = NameTamer::VERSION
|
8
8
|
spec.authors = ['Xenapto']
|
9
9
|
spec.email = ['developers@xenapto.com']
|
10
|
-
spec.description = %q
|
11
|
-
spec.summary = %q
|
10
|
+
spec.description = %q(Useful methods for taming names)
|
11
|
+
spec.summary = %q(Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith)
|
12
12
|
spec.homepage = 'https://github.com/Xenapto/name-tamer'
|
13
13
|
spec.license = 'MIT'
|
14
14
|
|
15
|
-
spec.files = `git ls-files`.split(
|
16
|
-
spec.executables = spec.files.grep(
|
17
|
-
spec.test_files = spec.files.grep(
|
15
|
+
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
16
|
+
spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(/^(test|spec|features|coverage)\//)
|
18
18
|
spec.require_paths = ['lib']
|
19
19
|
|
20
20
|
spec.add_development_dependency 'bundler', '~> 1'
|
data/spec/name_tamer_spec.rb
CHANGED
@@ -5,157 +5,191 @@ require 'name-tamer'
|
|
5
5
|
describe NameTamer do
|
6
6
|
let(:names) do
|
7
7
|
[
|
8
|
-
{ n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
9
|
-
{ n:'JOHN SMITH', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
10
|
-
{ n:'john smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
11
|
-
{ n:'Smith, John', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
12
|
-
{ n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
13
|
-
{ n:'Smith, John', nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
14
|
-
{ n:'John J Smith', t: :person, nn:'John J Smith', sn:'John Smith', s:'john-smith' },
|
15
|
-
{ n:'John J. Smith', t: :person, nn:'John J. Smith', sn:'John Smith', s:'john-smith' },
|
16
|
-
{ n:'SMITH, Mr John J.R.', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
|
17
|
-
{ n:' SMITH, Mr John J. R. ', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
|
18
|
-
{ n:'SMITH, Mr John J.R.', nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
|
19
|
-
{ n:'Mr John J.R. SMITH JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
|
20
|
-
{ n:'Mr John J.R. SMITH III,JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
|
21
|
-
{ n:'Mr John J.R. SMITH JD', nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
|
22
|
-
{ n:'Mr Jean-Michel SMITH JD', t: :person, nn:'Jean-Michel SMITH', sn:'Jean-Michel SMITH',
|
23
|
-
|
24
|
-
{ n:'
|
25
|
-
{ n:'Lane Fox Ph.D
|
26
|
-
{ n:'
|
27
|
-
{ n:'
|
28
|
-
|
29
|
-
{ n:'
|
30
|
-
{ n:'
|
31
|
-
{ n:'
|
32
|
-
{ n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP',
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
{ n:
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
{ n:
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
{ n:'
|
45
|
-
{ n:'
|
46
|
-
|
47
|
-
{ n:'
|
48
|
-
{ n:
|
49
|
-
{ n:'
|
50
|
-
{ n:'
|
51
|
-
{ n:
|
52
|
-
{ n:'
|
53
|
-
{ n:'
|
54
|
-
{ n:'
|
55
|
-
{ n:'
|
56
|
-
{ n:'
|
57
|
-
|
58
|
-
{ n:'
|
59
|
-
{ n:'
|
60
|
-
|
61
|
-
{ n:'
|
62
|
-
{ n:'
|
63
|
-
{ n:'
|
64
|
-
{ n:'
|
65
|
-
{ n:
|
66
|
-
|
67
|
-
{ n:'
|
68
|
-
{ n:
|
69
|
-
|
70
|
-
{ n:'
|
71
|
-
{ n:'
|
72
|
-
{ n:'
|
73
|
-
{ n:'
|
74
|
-
|
75
|
-
{ n:'
|
76
|
-
|
77
|
-
{ n:'
|
78
|
-
{ n:'
|
79
|
-
{ n:
|
80
|
-
{ n:'
|
81
|
-
|
82
|
-
{ n:'
|
83
|
-
{ n:'
|
84
|
-
{ n:'
|
85
|
-
{ n:'
|
86
|
-
{ n:'
|
87
|
-
{ n:'
|
88
|
-
{ n:'
|
89
|
-
{ n:'
|
90
|
-
{ n:'
|
91
|
-
{ n:
|
92
|
-
{ n:'
|
93
|
-
{ n:'
|
94
|
-
{ n:'
|
95
|
-
{ n:'
|
96
|
-
{ n:'
|
97
|
-
{ n:'
|
98
|
-
{ n:'
|
99
|
-
{ n:'
|
100
|
-
{ n:'
|
101
|
-
|
102
|
-
{ n:'
|
103
|
-
|
104
|
-
{ n:'
|
105
|
-
|
106
|
-
{ n:'
|
107
|
-
|
108
|
-
{ n:
|
109
|
-
{ n:'
|
110
|
-
{ n:'
|
111
|
-
{ n:'
|
112
|
-
{ n:'
|
113
|
-
{ n:'
|
114
|
-
|
115
|
-
{ n:'
|
116
|
-
|
117
|
-
{ n:'
|
118
|
-
|
119
|
-
{ n:'
|
120
|
-
{ n:'
|
121
|
-
{ n:'
|
122
|
-
{ n:'
|
8
|
+
{ n: 'John Smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
9
|
+
{ n: 'JOHN SMITH', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
10
|
+
{ n: 'john smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
11
|
+
{ n: 'Smith, John', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
12
|
+
{ n: 'John Smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
13
|
+
{ n: 'Smith, John', nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
14
|
+
{ n: 'John J. Smith', t: :person, nn: 'John J. Smith', sn: 'John Smith', s: 'john-smith' },
|
15
|
+
{ n: 'John J. Smith', t: :person, nn: 'John J. Smith', sn: 'John Smith', s: 'john-smith' },
|
16
|
+
{ n: 'SMITH, Mr John J.R.', t: :person, nn: 'John J.R. Smith', sn: 'John Smith', s: 'john-smith' },
|
17
|
+
{ n: ' SMITH, Mr John J. R. ', t: :person, nn: 'John J.R. Smith', sn: 'John Smith', s: 'john-smith' },
|
18
|
+
{ n: 'SMITH, Mr John J.R.', nn: 'John J.R. Smith', sn: 'John Smith', s: 'john-smith' },
|
19
|
+
{ n: 'Mr John J.R. SMITH JD', t: :person, nn: 'John J.R. SMITH', sn: 'John SMITH', s: 'john-smith' },
|
20
|
+
{ n: 'Mr John J.R. SMITH III,JD', t: :person, nn: 'John J.R. SMITH', sn: 'John SMITH', s: 'john-smith' },
|
21
|
+
{ n: 'Mr John J.R. SMITH JD', nn: 'John J.R. SMITH', sn: 'John SMITH', s: 'john-smith' },
|
22
|
+
{ n: 'Mr Jean-Michel SMITH JD', t: :person, nn: 'Jean-Michel SMITH', sn: 'Jean-Michel SMITH',
|
23
|
+
s: 'jean-michel-smith' },
|
24
|
+
{ n: 'Mr Jean Michel-SMITH JD', nn: 'Jean Michel-SMITH', sn: 'Jean Michel-SMITH', s: 'jean-michel-smith' },
|
25
|
+
{ n: 'Dr Martha Lane Fox Ph.D', nn: 'Martha Lane Fox', sn: 'Martha Lane Fox', s: 'martha-lane-fox' },
|
26
|
+
{ n: 'Lane Fox Ph.D, Dr Martha', t: :person, nn: 'Martha Lane Fox', sn: 'Martha Lane Fox', s: 'martha-lane-fox' },
|
27
|
+
{ n: 'Baroness Lane-Fox of Lewisham', t: :person, nn: 'Lane-Fox of Lewisham', sn: 'Lane-Fox of Lewisham',
|
28
|
+
s: 'lane-fox-of-lewisham' },
|
29
|
+
{ n: 'MACDONALDS LLC', nn: 'MacDonalds', sn: 'MacDonalds', s: 'macdonalds' },
|
30
|
+
{ n: 'MACDONALDS LLC', t: :organization, nn: 'MacDonalds', sn: 'MacDonalds', s: 'macdonalds' },
|
31
|
+
{ n: 'macdonalds', t: :organization, nn: 'macdonalds', sn: 'macdonalds', s: 'macdonalds' },
|
32
|
+
{ n: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', t: :organization,
|
33
|
+
nn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub',
|
34
|
+
sn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub',
|
35
|
+
s: 'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
|
36
|
+
{ n: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP',
|
37
|
+
nn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub',
|
38
|
+
sn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub',
|
39
|
+
s: 'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
|
40
|
+
{ n: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP',
|
41
|
+
nn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub',
|
42
|
+
sn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub',
|
43
|
+
s: 'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
|
44
|
+
{ n: 'K.V.A. Instruments y Cía S. en C.', nn: 'K.V.A. Instruments', sn: 'KVA Instruments', s: 'kva-instruments' },
|
45
|
+
{ n: 'K. V. A. Instruments y Cía S. en C.', nn: 'K.V.A. Instruments', sn: 'KVA Instruments',
|
46
|
+
s: 'kva-instruments' },
|
47
|
+
{ n: 'J.P.R. Williams', nn: 'J.P.R. Williams', sn: 'JPR Williams', s: 'jpr-williams' },
|
48
|
+
{ n: 'J. P. R. Williams', nn: 'J.P.R. Williams', sn: 'JPR Williams', s: 'jpr-williams' },
|
49
|
+
{ n: 'J P R Williams', nn: 'JPR Williams', sn: 'JPR Williams', s: 'jpr-williams' },
|
50
|
+
{ n: 'JPR Williams', nn: 'JPR Williams', sn: 'JPR Williams', s: 'jpr-williams' },
|
51
|
+
{ n: 'Audrey fforbes', nn: 'Audrey fforbes', sn: 'Audrey fforbes', s: 'audrey-fforbes' },
|
52
|
+
{ n: 'J. Arthur Rank', t: :person, nn: 'J. Arthur Rank', sn: 'Arthur Rank', s: 'arthur-rank' },
|
53
|
+
{ n: 'PHILIP NG', t: :person, nn: 'Philip Ng', sn: 'Philip Ng', s: 'philip-ng' },
|
54
|
+
{ n: 'Super R&D', nn: 'Super R&D', sn: 'Super R and D', s: 'super-r-and-d' },
|
55
|
+
{ n: 'Harry Dean Stanton', t: :person, nn: 'Harry Dean Stanton', sn: 'Harry Stanton', s: 'harry-stanton' },
|
56
|
+
{ n: 'Union Square Ventures', t: :organization, nn: 'Union Square Ventures', sn: 'Union Square Ventures',
|
57
|
+
s: 'union-square-ventures' },
|
58
|
+
{ n: 'J Arthur Rank Inc.', t: :organization, nn: 'J Arthur Rank', sn: 'J Arthur Rank', s: 'j-arthur-rank' },
|
59
|
+
{ n: 'Jean VAN DER VELDE', t: :person, nn: 'Jean VAN DER VELDE', sn: 'Jean VAN DER VELDE',
|
60
|
+
s: 'jean-van-der-velde' },
|
61
|
+
{ n: 'Al Capone', t: :person, nn: 'Al Capone', sn: 'Al Capone', s: 'al-capone' },
|
62
|
+
{ n: 'Fahd al-Saud', t: :person, nn: 'Fahd al-Saud', sn: 'Fahd al-Saud', s: 'fahd-al-saud' },
|
63
|
+
{ n: 'Mehmet al Auouiby', t: :person, nn: 'Mehmet al Auouiby', sn: 'Mehmet al Auouiby', s: 'mehmet-al-auouiby' },
|
64
|
+
{ n: 'Macquarie Bank', t: :organization, nn: 'Macquarie Bank', sn: 'Macquarie Bank', s: 'macquarie-bank' },
|
65
|
+
{ n: "COMMEDIA DELL'ARTE", t: :organization, nn: "Commedia dell'Arte", sn: "Commedia dell'Arte",
|
66
|
+
s: 'commedia-dellarte' },
|
67
|
+
{ n: 'Della Smith', t: :person, nn: 'Della Smith', sn: 'Della Smith', s: 'della-smith' },
|
68
|
+
{ n: 'Antonio DELLA MONTEVERDE', nn: 'Antonio DELLA MONTEVERDE', sn: 'Antonio DELLA MONTEVERDE',
|
69
|
+
s: 'antonio-della-monteverde' },
|
70
|
+
{ n: 'Tony St Clair', t: :person, nn: 'Tony St Clair', sn: 'Tony St Clair', s: 'tony-st-clair' },
|
71
|
+
{ n: 'Seamus O\'Malley', t: :person, nn: 'Seamus O\'Malley', sn: 'Seamus O\'Malley', s: 'seamus-omalley' },
|
72
|
+
{ n: 'SeedCamp', t: :organization, nn: 'SeedCamp', sn: 'SeedCamp', s: 'seedcamp' },
|
73
|
+
{ n: 'Peter Van Der Auwera', t: :person, nn: 'Peter Van Der Auwera', sn: 'Peter Van Der Auwera',
|
74
|
+
s: 'peter-van-der-auwera' },
|
75
|
+
{ n: 'VAN DER AUWERA, Peter', t: :person, nn: 'Peter van der Auwera', sn: 'Peter van der Auwera',
|
76
|
+
s: 'peter-van-der-auwera' },
|
77
|
+
{ n: 'Li Fan', t: :person, nn: 'Li Fan', sn: 'Li Fan', s: 'li-fan' },
|
78
|
+
{ n: 'Fan Li', t: :person, nn: 'Fan Li', sn: 'Fan Li', s: 'fan-li' },
|
79
|
+
{ n: 'Levi Strauss & Co.', nn: 'Levi Strauss', sn: 'Levi Strauss', s: 'levi-strauss' },
|
80
|
+
{ n: 'Standard & Poor\'s', t: :organization, nn: 'Standard & Poor\'s', sn: 'Standard and Poor\'s',
|
81
|
+
s: 'standard-and-poors' },
|
82
|
+
{ n: 'I B M Services', t: :organization, nn: 'IBM Services', sn: 'IBM Services', s: 'ibm-services' },
|
83
|
+
{ n: 'Sean Park DDS', t: :person, nn: 'Sean Park', sn: 'Sean Park', s: 'sean-park' },
|
84
|
+
{ n: 'SEAN MACLISE PARK', t: :person, nn: 'Sean Maclise Park', sn: 'Sean Park', s: 'sean-park' },
|
85
|
+
{ n: 'AJ Hanna', t: :person, nn: 'AJ Hanna', sn: 'AJ Hanna', s: 'aj-hanna' },
|
86
|
+
{ n: 'Free & Clear', t: :organization, nn: 'Free & Clear', sn: 'Free and Clear', s: 'free-and-clear' },
|
87
|
+
{ n: 'Adam D\'ANGELO', t: :person, nn: 'Adam D\'ANGELO', sn: 'Adam D\'ANGELO', s: 'adam-dangelo' },
|
88
|
+
{ n: 'MACKENZIE, Doug', t: :person, nn: 'Doug Mackenzie', sn: 'Doug Mackenzie', s: 'doug-mackenzie' },
|
89
|
+
{ n: 'Up + Down', t: :organization, nn: 'Up + Down', sn: 'Up plus Down', s: 'up-plus-down' },
|
90
|
+
{ n: 'San Francisco Ltd', t: :organization, nn: 'San Francisco', sn: 'San Francisco', s: 'san-francisco' },
|
91
|
+
{ n: 'AT&T', t: :organization, nn: 'At&T', sn: 'At and T', s: 'at-and-t' },
|
92
|
+
{ n: 'SMITH, John, Jr.', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
93
|
+
{ n: 'I Heart Movies', t: :organization, nn: 'I Heart Movies', sn: 'I Heart Movies', s: 'i-heart-movies' },
|
94
|
+
{ n: 'Y Combinator', t: :organization, nn: 'Y Combinator', sn: 'Y Combinator', s: 'y-combinator' },
|
95
|
+
{ n: 'Ben\'s 10 Hens', t: :organization, nn: 'Ben\'s 10 Hens', sn: 'Ben\'s 10 Hens', s: 'bens-10-hens' },
|
96
|
+
{ n: 'Elazer Edelman, MD , PhD', t: :person, nn: 'Elazer Edelman', sn: 'Elazer Edelman', s: 'elazer-edelman' },
|
97
|
+
{ n: 'Judith M. O\'Brien', t: :person, nn: 'Judith M. O\'Brien', sn: 'Judith O\'Brien', s: 'judith-obrien' },
|
98
|
+
{ n: 'MORRISON, Van', t: :person, nn: 'Van Morrison', sn: 'Van Morrison', s: 'van-morrison' },
|
99
|
+
{ n: 'i/o Ventures', t: :organization, nn: 'i/o Ventures', sn: 'i/o Ventures', s: 'i-o-ventures' },
|
100
|
+
{ n: 'C T Corporation System', t: :person, nn: 'CT Corporation System', sn: 'CT Corporation System',
|
101
|
+
s: 'ct-corporation-system' },
|
102
|
+
{ n: 'C.T. Corporation System', t: :person, nn: 'C.T. Corporation System', sn: 'CT Corporation System',
|
103
|
+
s: 'ct-corporation-system' },
|
104
|
+
{ n: 'CT Corporation System', t: :person, nn: 'CT Corporation System', sn: 'CT Corporation System',
|
105
|
+
s: 'ct-corporation-system' },
|
106
|
+
{ n: 'Corporation Service Company', t: :person, nn: 'Corporation Service Company',
|
107
|
+
sn: 'Corporation Service Company', s: 'corporation-service-company' },
|
108
|
+
{ n: 'Kurshuni,Inc.', t: :organization, nn: 'Kurshuni', sn: 'Kurshuni', s: 'kurshuni' },
|
109
|
+
{ n: 'Cellular Inc-LLC', t: :organization, nn: 'Cellular', sn: 'Cellular', s: 'cellular' },
|
110
|
+
{ n: 'Emtec (AZ) Limited', t: :organization, nn: 'Emtec (AZ)', sn: 'Emtec (AZ)', s: 'emtec-az' },
|
111
|
+
{ n: 'Emtec (LLC) Limited', t: :organization, nn: 'Emtec', sn: 'Emtec', s: 'emtec' },
|
112
|
+
{ n: 'Emtec (XYZ LLC) Limited', t: :organization, nn: 'Emtec (XYZ)', sn: 'Emtec (XYZ)', s: 'emtec-xyz' },
|
113
|
+
{ n: 'Tao Ma', t: :person, nn: 'Tao', sn: 'Tao',
|
114
|
+
s: 'tao' }, # Unfortunate but we can't distinguish between Ma and M.A.
|
115
|
+
{ n: '(Mr.) Courtney J. Miller, J.D., LL.M.', t: :person, nn: 'Courtney J. Miller', sn: 'Courtney Miller',
|
116
|
+
s: 'courtney-miller' },
|
117
|
+
{ n: '(Mr Woo) The Window Cleaner', t: :person, nn: '(Woo) The Window Cleaner', sn: '(Woo) Cleaner',
|
118
|
+
s: 'woo-cleaner' },
|
119
|
+
{ n: 'DOMINIC MACMURDO', t: :person, nn: 'Dominic MacMurdo', sn: 'Dominic MacMurdo', s: 'dominic-macmurdo' },
|
120
|
+
{ n: 'DOMINIC MACEDO', t: :person, nn: 'Dominic Macedo', sn: 'Dominic Macedo', s: 'dominic-macedo' },
|
121
|
+
{ n: 'DOMINIC MACDONALD', t: :person, nn: 'Dominic MacDonald', sn: 'Dominic MacDonald', s: 'dominic-macdonald' },
|
122
|
+
{ n: 'AGUSTA DO ROMEIRO', t: :person, nn: 'Agusta do Romeiro', sn: 'Agusta do Romeiro', s: 'agusta-do-romeiro' },
|
123
|
+
{ n: 'CARLOS DOS SANTOS', t: :person, nn: 'Carlos dos Santos', sn: 'Carlos dos Santos', s: 'carlos-dos-santos' },
|
124
|
+
{ n: '유정 신', t: :organization, nn: '유정 신', sn: '유정 신', s: '유정-신' },
|
125
|
+
{ n: 'xxx%52zzz', t: :organization, nn: 'xxx%52zzz', sn: 'xxx%52zzz', s: 'xxxrzzz' },
|
126
|
+
{ n: 'Евгений Болотнов', t: :organization, nn: 'Евгений Болотнов', sn: 'Евгений Болотнов',
|
127
|
+
s: 'Евгений-Болотнов' },
|
128
|
+
{ n: '김태성', t: :organization, nn: '김태성', sn: '김태성', s: '김태성' },
|
129
|
+
{ n: 'ゴルフスタジアム', t: :organization, nn: 'ゴルフスタジアム', sn: 'ゴルフスタジアム', s: 'ゴルフスタジアム' },
|
130
|
+
{ n: '我摘', t: :organization, nn: '我摘', sn: '我摘', s: '我摘' },
|
131
|
+
{ n: 'Καρατζάς Στέφανος', t: :organization, nn: 'Καρατζάς Στέφανος', sn: 'Καρατζάς Στέφανος',
|
132
|
+
s: 'Καρατζάς-Στέφανος' },
|
133
|
+
{ n: 'โชติวัน วัฒนลาภ', t: :organization, nn: 'โชติวัน วัฒนลาภ', sn: 'โชติวัน วัฒนลาภ', s: 'โชติวัน-วัฒนลาภ' },
|
134
|
+
{ n: '張 續寶', t: :organization, nn: '張 續寶', sn: '張 續寶', s: '張-續寶' },
|
135
|
+
{ n: 'Юрий Гайдук', t: :organization, nn: 'Юрий Гайдук', sn: 'Юрий Гайдук', s: 'Юрий-Гайдук' },
|
136
|
+
{ n: '☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', t: :organization, nn: '☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', sn: '☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣',
|
137
|
+
s: '☣-©Ʀѱ∏†ʘ-Σɏ§†℈Ϻ-☣' },
|
138
|
+
{ n: '♠ KlasikB0i ♠', t: :organization, nn: '♠ KlasikB0i ♠', sn: '♠ KlasikB0i ♠', s: '♠-klasikb0i-♠' },
|
139
|
+
{ n: '* Shorusan *', t: :organization, nn: '* Shorusan *', sn: '* Shorusan *', s: 'shorusan' },
|
140
|
+
{ n: '项目谷', t: :organization, nn: '项目谷', sn: '项目谷', s: '项目谷' },
|
141
|
+
{ n: 'ООО "Инновационные полимерные адгезивы"', t: :organization, nn: 'ООО "Инновационные полимерные адгезивы"',
|
142
|
+
sn: 'ООО "Инновационные полимерные адгезивы"', s: 'ООО-Инновационные-полимерные-адгезивы' },
|
143
|
+
{ n: 'عبدالله ...', t: :organization, nn: 'عبدالله ...', sn: 'عبدالله ...', s: 'عبدالله' },
|
144
|
+
{ n: 'กมลชนก ทิศไธสง', t: :organization, nn: 'กมลชนก ทิศไธสง', sn: 'กมลชนก ทิศไธสง', s: 'กมลชนก-ทิศไธสง' },
|
145
|
+
{ n: 'יוֹ אָב', t: :organization, nn: 'יוֹ אָב', sn: 'יוֹ אָב', s: 'יוֹ-אָב' },
|
146
|
+
{ n: 'יגאל נימני', t: :organization, nn: 'יגאל נימני', sn: 'יגאל נימני', s: 'יגאל-נימני' },
|
147
|
+
{ n: 'ניסים דניאלי', t: :organization, nn: 'ניסים דניאלי', sn: 'ניסים דניאלי', s: 'ניסים-דניאלי' },
|
148
|
+
{ n: 'مساء الخير', t: :organization, nn: 'مساء الخير', sn: 'مساء الخير', s: 'مساء-الخير' },
|
149
|
+
{ n: 'محمود ياسر', t: :organization, nn: 'محمود ياسر', sn: 'محمود ياسر', s: 'محمود-ياسر' },
|
150
|
+
{ n: 'קובי ביטר', t: :organization, nn: 'קובי ביטר', sn: 'קובי ביטר', s: 'קובי-ביטר' },
|
151
|
+
{ n: 'الملاك الحارس', t: :organization, nn: 'الملاك الحارس', sn: 'الملاك الحارس', s: 'الملاك-الحارس' },
|
152
|
+
{ n: 'কবির হাসান', t: :organization, nn: 'কবির হাসান', sn: 'কবির হাসান', s: 'কবির-হাসান' },
|
123
153
|
{ nn: '', sn: '', s: '_' },
|
124
|
-
{ n:'Union Square Ventures', t: 'Organization', nn:'Union Square Ventures', sn:'Union Square Ventures',
|
125
|
-
|
126
|
-
{ n:'John Smith', t:
|
127
|
-
{ n:'John Smith', t:
|
128
|
-
{ n:'
|
129
|
-
{ n:'
|
130
|
-
{ n:
|
131
|
-
{ n:'
|
132
|
-
{ n:'
|
133
|
-
{ n:'*
|
134
|
-
{ n:'*
|
135
|
-
{ n:'* Olga
|
136
|
-
{ n:'
|
154
|
+
{ n: 'Union Square Ventures', t: 'Organization', nn: 'Union Square Ventures', sn: 'Union Square Ventures',
|
155
|
+
s: 'union-square-ventures' },
|
156
|
+
{ n: 'John Smith', t: 'Person', nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
157
|
+
{ n: 'John Smith', t: :nonsense, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
158
|
+
{ n: 'John Smith', t: Kernel, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
159
|
+
{ n: 'Ms Jane Smith', t: :person, nn: 'Jane Smith', sn: 'Jane Smith', s: 'jane-smith' },
|
160
|
+
{ n: 'example.com', t: :organization, nn: 'example.com', sn: 'example.com', s: 'example-com' },
|
161
|
+
{ n: 'Hermann Müller', t: :person, nn: 'Hermann Müller', sn: 'Hermann Müller', s: 'hermann-muller' },
|
162
|
+
{ n: 'b-to-v Partners AG', t: :organization, nn: 'b-to-v Partners', sn: 'b-to-v Partners', s: 'b-to-v-partners' },
|
163
|
+
{ n: '*', t: :person, nn: '*', sn: '*', s: '_' },
|
164
|
+
{ n: '* *', t: :person, nn: '* *', sn: '* *', s: '_' },
|
165
|
+
{ n: '* Olga *', t: :person, nn: '* Olga *', sn: 'Olga', s: 'olga' },
|
166
|
+
{ n: '* Olga Bedia García *', t: :person, nn: '* Olga Bedia García *', sn: 'Olga García', s: 'olga-garcia' },
|
167
|
+
{ n: 'John Smith M.A. (Oxon)', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
168
|
+
{ n: 'I B M', t: :organization, nn: 'Ibm', sn: 'Ibm', s: 'ibm' },
|
169
|
+
{ n: 'I-B-M', t: :organization, nn: 'I-B-M', sn: 'I-B-M', s: 'i-b-m' },
|
170
|
+
{ n: 'I.B.M.', t: :organization, nn: 'I.B.M.', sn: 'IBM', s: 'ibm' }
|
137
171
|
]
|
138
172
|
end
|
139
173
|
|
140
|
-
it
|
174
|
+
it 'makes a slug' do
|
141
175
|
names.each do |name_data|
|
142
176
|
name = name_data[:n]
|
143
|
-
NameTamer[name, contact_type:name_data[:t]].slug.should == name_data[:s]
|
177
|
+
NameTamer[name, contact_type: name_data[:t]].slug.should == name_data[:s]
|
144
178
|
end
|
145
179
|
end
|
146
180
|
|
147
|
-
it
|
181
|
+
it 'makes a nice name' do
|
148
182
|
names.each do |name_data|
|
149
183
|
name = name_data[:n]
|
150
|
-
nice_name = NameTamer[name, contact_type:name_data[:t]].nice_name
|
184
|
+
nice_name = NameTamer[name, contact_type: name_data[:t]].nice_name
|
151
185
|
nice_name.should == name_data[:nn]
|
152
186
|
end
|
153
187
|
end
|
154
188
|
|
155
|
-
it
|
189
|
+
it 'makes a searchable name' do
|
156
190
|
names.each do |name_data|
|
157
191
|
name = name_data[:n]
|
158
|
-
NameTamer[name, contact_type:name_data[:t]].simple_name.should == name_data[:sn]
|
192
|
+
NameTamer[name, contact_type: name_data[:t]].simple_name.should == name_data[:sn]
|
159
193
|
end
|
160
194
|
end
|
161
195
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -5,11 +5,11 @@ Coveralls.wear!
|
|
5
5
|
SimpleCov.start
|
6
6
|
|
7
7
|
RSpec.configure do |config|
|
8
|
-
|
8
|
+
# Run specs in random order to surface order dependencies. If you find an
|
9
9
|
# order dependency and want to debug it, you can fix the order by providing
|
10
10
|
# the seed, which is printed after each run.
|
11
11
|
# --seed 1234
|
12
|
-
config.order =
|
12
|
+
config.order = 'random'
|
13
13
|
|
14
14
|
# Manually-added
|
15
15
|
config.color_enabled = true
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-tamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xenapto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -109,6 +109,8 @@ extra_rdoc_files: []
|
|
109
109
|
files:
|
110
110
|
- ".env"
|
111
111
|
- ".gitignore"
|
112
|
+
- ".hound.yml"
|
113
|
+
- ".rubocop.yml"
|
112
114
|
- ".ruby-version"
|
113
115
|
- Gemfile
|
114
116
|
- Gemfile.lock
|
@@ -120,6 +122,7 @@ files:
|
|
120
122
|
- doc/suffixes.csv
|
121
123
|
- lib/name-tamer.rb
|
122
124
|
- lib/name-tamer/version.rb
|
125
|
+
- lib/string_extras.rb
|
123
126
|
- name-tamer.gemspec
|
124
127
|
- spec/name_tamer_spec.rb
|
125
128
|
- spec/spec_helper.rb
|