name-tamer 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YzRiZjg0NGY3ODliMzVjZWM0ZDBhMjExYjNjODMyMmJmZTExNDRiMA==
5
+ data.tar.gz: !binary |-
6
+ ZmZmODdkOGIwYzBlN2I3MjM3OTVjNzBhZGM0ZjQ2MTk0MmQ3MmIyMg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YzhhNzUxMTY5OWQ4YjkxYzUxNTMxNTBhYjNkZGMyMDE1MDFmNDNkMmY2Y2Uy
10
+ ZWVmODhiMGM3OTBjNWE3ZmI0NGI0Yjk4MTRiZWYyMWRkZTY0NjBhNzI5NWNm
11
+ Nzk4ODliZjU3OTc3YzY4MzdkYWYyYzU4ODE4ZGJhNjkwNmE1MDU=
12
+ data.tar.gz: !binary |-
13
+ YWNiMjZhODI5NDk2MmI1Y2E4MWUyYTg4NTk0M2ExMGFkNzhhZjdlZDRiYzJm
14
+ ZDI0ZjNiYmJlMWNiYjRmOTYxMGU5MTdlMTZjYzA0YTZjNjdkYTliOTc1YWRj
15
+ ZmM1ZWYwZTA5ZjQ5NDc2OGQ4NzA1NTgwYzM0MTNlYTFjMzgzMDc=
data/.env ADDED
@@ -0,0 +1 @@
1
+ PATH=/home/build/.rvm/gems/ruby-2.1.1/bin:/home/build/.rvm/gems/ruby-2.1.1@global/bin:/home/build/.rvm/rubies/ruby-2.1.1/bin:/home/build/.rvm/bin:/usr/local/heroku/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/home/build/.rvm/gems/ruby-2.1.1@global/bin/bundle
@@ -0,0 +1,24 @@
1
+ *.rbc
2
+ capybara-*.html
3
+ .rspec
4
+ /log
5
+ /tmp
6
+ /db/*.sqlite3
7
+ /public/system
8
+ /coverage/
9
+ /spec/tmp
10
+ **.orig
11
+ rerun.txt
12
+ pickle-email-*.html
13
+ config/initializers/secret_token.rb
14
+ config/secrets.yml
15
+
16
+ ## Environment normalisation:
17
+ /.bundle
18
+ /vendor/bundle
19
+
20
+ # these should all be checked in to normalise the environment:
21
+ # Gemfile.lock, .ruby-version, .ruby-gemset
22
+
23
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
24
+ .rvmrc
@@ -0,0 +1 @@
1
+ 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in foreplay.gemspec
4
+ gemspec
@@ -0,0 +1,42 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ name-tamer (0.0.0)
5
+ activesupport (~> 3)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ activesupport (3.2.13)
11
+ i18n (= 0.6.1)
12
+ multi_json (~> 1.0)
13
+ diff-lcs (1.2.5)
14
+ docile (1.1.3)
15
+ gem-release (0.7.3)
16
+ i18n (0.6.1)
17
+ multi_json (1.10.1)
18
+ rake (10.3.2)
19
+ rspec (2.14.1)
20
+ rspec-core (~> 2.14.0)
21
+ rspec-expectations (~> 2.14.0)
22
+ rspec-mocks (~> 2.14.0)
23
+ rspec-core (2.14.8)
24
+ rspec-expectations (2.14.5)
25
+ diff-lcs (>= 1.1.3, < 2.0)
26
+ rspec-mocks (2.14.6)
27
+ simplecov (0.8.2)
28
+ docile (~> 1.1.0)
29
+ multi_json
30
+ simplecov-html (~> 0.8.0)
31
+ simplecov-html (0.8.0)
32
+
33
+ PLATFORMS
34
+ ruby
35
+
36
+ DEPENDENCIES
37
+ bundler (~> 1)
38
+ gem-release (~> 0)
39
+ name-tamer!
40
+ rake (~> 10)
41
+ rspec (~> 2)
42
+ simplecov (~> 0)
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Xenapto
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,48 @@
1
+ # NameTamer
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/name-tamer.png)](http://badge.fury.io/rb/name-tamer)
4
+ [![Code Climate](https://codeclimate.com/github/Xenapto/name-tamer.png)](https://codeclimate.com/github/Xenapto/name-tamer)
5
+ [![Dependency Status](https://gemnasium.com/Xenapto/name-tamer.png)](https://gemnasium.com/Xenapto/name-tamer)
6
+ ![build status](https://circleci.com/gh/Xenapto/name-tamer.png?circle-token=dd3a51864d33f6506b18a355bc901b90c0df3b3b)
7
+
8
+ NameTamer: deploying Rails projects to Ubuntu using Foreman
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'name-tamer'
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install name-tamer
23
+
24
+ ## Usage
25
+
26
+ Examples:
27
+
28
+ ```ruby
29
+ NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith
30
+ ```
31
+
32
+ ```ruby
33
+ name_tamer = NameTamer['Mr. John Q. Smith III, MD']
34
+ name_tamer.slug # => john-smith
35
+ name_tamer.nice_name # => John Q. Smith
36
+ ```
37
+
38
+ ## Contributing
39
+
40
+ 1. Fork it
41
+ 1. Create your feature branch (`git checkout -b my-new-feature`)
42
+ 1. Commit your changes (`git commit -am 'Add some feature'`)
43
+ 1. Push to the branch (`git push origin my-new-feature`)
44
+ 1. Create new Pull Request
45
+
46
+ ## Acknowledgements
47
+
48
+ 1. Thanks to Ryan Bigg for the guide to making your first gem https://github.com/radar/guides/blob/master/gem-development.md
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require 'name_tamer/cli'
3
+ NameTamer::CLI.start
@@ -0,0 +1,509 @@
1
+ # encoding: utf-8
2
+
3
+ # References:
4
+ # http://www.w3.org/International/questions/qa-personal-names
5
+ # https://github.com/berkmancenter/namae
6
+ # https://github.com/mericson
7
+ # http://en.wikipedia.org/wiki/Types_of_business_entity
8
+ # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA)
9
+ # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
10
+ # http://en.wikipedia.org/wiki/Nobiliary_particle
11
+ # http://en.wikipedia.org/wiki/Spanish_naming_customs
12
+ # http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
13
+ require 'active_support/core_ext/object'
14
+
15
+ class NameTamer
16
+ attr_reader :name, :contact_type
17
+
18
+ class << self
19
+ def [](name, args = {})
20
+ new name, args
21
+ end
22
+ end
23
+
24
+ def nice_name
25
+ if @nice_name.nil?
26
+ @nice_name = @name.dup # Start with the name we've received
27
+
28
+ tidy_spacing # " John Smith " -> "John Smith"
29
+ consolidate_initials # "I. B. M." -> "I.B.M."
30
+ remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
31
+ fixup_last_name_first # "Smith, John" -> "John Smith"
32
+ fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
33
+ remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
34
+ name_wrangle # proper name case and non-breaking spaces
35
+ use_nonbreaking_spaces_in_compound_names
36
+ end
37
+
38
+ @nice_name
39
+ end
40
+
41
+ def search_name
42
+ if @search_name.nil?
43
+ @search_name = nice_name.dup # Start with nice name
44
+
45
+ remove_initials # "John Q. Doe" -> "John Doe"
46
+ remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
47
+ remove_dots_from_abbreviations # "J.P.R. Williams" -> "JPR Williams"
48
+ standardize_words # "B&Q Intl" -> "B and Q International"
49
+
50
+ @search_name = ensure_whitespace_is_ascii_space @search_name
51
+ end
52
+
53
+ @search_name
54
+ end
55
+
56
+ def slug
57
+ if @slug.nil?
58
+ @slug = search_name.dup # Start with search name
59
+ slugify # "John Doe" -> "john-doe"
60
+ end
61
+
62
+ @slug
63
+ end
64
+
65
+ def contact_type
66
+ nice_name # make sure we've done the bit which infers contact_type
67
+ contact_type_best_effort
68
+ end
69
+
70
+ =begin These lines aren't used and aren't covered by specs
71
+ def name=(new_name)
72
+ initialize new_name, :contact_type => @contact_type
73
+ end
74
+
75
+ def contact_type=(new_contact_type)
76
+ initialize @name, :contact_type => new_contact_type
77
+ end
78
+
79
+ def to_hash
80
+ {
81
+ name: @name,
82
+ nice_name: @nice_name,
83
+ search_name: @search_name,
84
+ slug: @slug,
85
+ contact_type: @contact_type,
86
+ last_name: @last_name,
87
+ remainder: @remainder,
88
+ adfix_found: @adfix_found
89
+ }
90
+ end
91
+ =end
92
+
93
+ private
94
+
95
+ #--------------------------------------------------------
96
+ # Tidy up the name we've received
97
+ #--------------------------------------------------------
98
+
99
+ def tidy_spacing
100
+ @nice_name.gsub!(/,\s*/, ', ') # Ensure commas have exactly one space after them
101
+ @nice_name.strip! # remove leading & trailing whitespace
102
+
103
+ @nice_name = ensure_whitespace_is_ascii_space @nice_name
104
+ end
105
+
106
+ # Remove spaces from groups of initials
107
+ def consolidate_initials
108
+ @nice_name.gsub!(/\b([a-z])\.* (?=[a-z][\. ])/i) { |match| "#{$1}." } # Remove spaces from initial groups
109
+ @nice_name.gsub!(/\b([a-z](?:\.[a-z])+)\.?(?= )/i) { |match| "#{$1}." } # Ensure each group ends with a dot
110
+ end
111
+
112
+ # An adfix is either a prefix or a suffix
113
+ def remove_adfixes
114
+ if @last_name.nil?
115
+ # Our name is still in one part, not two
116
+ begin
117
+ @nice_name = remove_outermost_adfix(:suffix, @nice_name)
118
+ end while @adfix_found
119
+
120
+ begin
121
+ @nice_name = remove_outermost_adfix(:prefix, @nice_name)
122
+ end while @adfix_found
123
+ else
124
+ # Our name is currently in two halves
125
+ begin
126
+ @last_name = remove_outermost_adfix(:suffix, @last_name)
127
+ end while @adfix_found
128
+
129
+ begin
130
+ @remainder = remove_outermost_adfix(:prefix, @remainder)
131
+ end while @adfix_found
132
+ end
133
+ end
134
+
135
+ # Names in the form "Smith, John" need to be turned around to "John Smith"
136
+ def fixup_last_name_first
137
+ unless @contact_type == :organization
138
+ parts = @nice_name.split ', '
139
+
140
+ if parts.count == 2
141
+ @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
142
+ @remainder = parts[1]
143
+ end
144
+ end
145
+ end
146
+
147
+ # Sometimes we end up with mismatched braces after adfix stripping
148
+ # e.g. "Ceres (Ceres Holdings LLC)" -> "Ceres (Ceres Holdings"
149
+ def fixup_mismatched_braces
150
+ left_brace_count = @nice_name.count '('
151
+ right_brace_count = @nice_name.count ')'
152
+
153
+ if left_brace_count > right_brace_count
154
+ @nice_name += ')'
155
+ elsif left_brace_count < right_brace_count
156
+ @nice_name = '(' + @nice_name
157
+ end
158
+ end
159
+
160
+ def name_wrangle
161
+ # Fix case if all caps or all lowercase
162
+ if @last_name.nil?
163
+ lowercase = @nice_name.downcase
164
+ uppercase = @nice_name.upcase
165
+
166
+ # Some companies like to be all lowercase so don't mess with them
167
+ @nice_name = name_case(lowercase) if @nice_name == uppercase || ( @nice_name == lowercase && @contact_type != :organization )
168
+ else
169
+ lowercase = @last_name.downcase
170
+ uppercase = @last_name.upcase
171
+ @last_name = name_case(lowercase) if @last_name == uppercase || @last_name == lowercase
172
+
173
+ @nice_name = "#{@remainder} #{@last_name}"
174
+ end
175
+ end
176
+
177
+ # Conjoin compound names with non-breaking spaces
178
+ def use_nonbreaking_spaces_in_compound_names
179
+ # Fix known last names that have spaces (not hyphens!)
180
+ [
181
+ 'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore',
182
+ 'Holmes à Court', 'Holmes a Court', 'Baron Cohen',
183
+ 'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
184
+ ].each do |compound_name|
185
+ @nice_name.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
186
+ end
187
+
188
+ NAME_MODIFIERS.each do |modifier|
189
+ @nice_name.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |match| "#{$1}#{NONBREAKING_SPACE}" }
190
+ end
191
+ end
192
+
193
+ #--------------------------------------------------------
194
+ # Make search name from nice name
195
+ #--------------------------------------------------------
196
+
197
+ # Remove initials from personal names unless they are the only identifier.
198
+ # i.e. only remove initials if there's also a proper name there
199
+ def remove_initials
200
+ if @contact_type == :person
201
+ name = @search_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
202
+
203
+ # If the name still has at least one space we're OK
204
+ @search_name = name if name.include?(ASCII_SPACE)
205
+ end
206
+ end
207
+
208
+ def remove_middle_names
209
+ if @contact_type == :person
210
+ parts = @search_name.split
211
+ @search_name = "#{parts[0]} #{parts[-1]}" if parts.count > 2
212
+ end
213
+ end
214
+
215
+ def remove_dots_from_abbreviations
216
+ @search_name.gsub!(/\b([a-z])\./i) { |match| $1 }
217
+ end
218
+
219
+ def standardize_words
220
+ @search_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
221
+ @search_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
222
+ @search_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
223
+ end
224
+
225
+ #--------------------------------------------------------
226
+ # Make slug from search name
227
+ #--------------------------------------------------------
228
+
229
+ def slugify
230
+ # Inflector::parameterize just gives up with non-latin characters so...
231
+ #@slug = @slug.parameterize # Can't use this
232
+
233
+ # Instead we'll do it ourselves
234
+ @slug = parameterize @slug
235
+ end
236
+
237
+ #--------------------------------------------------------
238
+ # Initialization and utilities
239
+ #--------------------------------------------------------
240
+
241
+ def initialize(name, args = {})
242
+ @name = name || ''
243
+ @contact_type = args[:contact_type].to_sym unless args[:contact_type].nil?
244
+
245
+ @nice_name = nil
246
+ @search_name = nil
247
+ @slug = nil
248
+
249
+ @last_name = nil
250
+ @remainder = nil
251
+
252
+ @adfix_found = false
253
+ end
254
+
255
+ def set_contact_type contact_type
256
+ contact_type_sym = contact_type.to_sym
257
+ puts "Changing contact type of #{@name} from #{@contact_type} to #{contact_type}".red unless @contact_type.nil? || @contact_type == contact_type_sym
258
+ @contact_type = contact_type_sym
259
+ end
260
+
261
+ # If we don't know the contact type, what's our best guess?
262
+ def contact_type_best_effort
263
+ if @contact_type
264
+ @contact_type
265
+ else
266
+ # If it's just one word we'll assume organization.
267
+ # If more then we'll assume a person
268
+ @name.include?(ASCII_SPACE) ? :person : :organization
269
+ end
270
+ end
271
+
272
+ def ensure_whitespace_is_ascii_space string
273
+ string.gsub(/[[:space:]]+/, ASCII_SPACE) # /\s/ doesn't match Unicode whitespace in Ruby 1.9.3
274
+ end
275
+
276
+ # We pass to this routine either prefixes or suffixes
277
+ def remove_outermost_adfix adfix_type, name_part
278
+ adfixes = ADFIX_PATTERNS[adfix_type]
279
+ contact_type = contact_type_best_effort
280
+ parts = name_part.partition adfixes[contact_type]
281
+ @adfix_found = parts[1].present?
282
+
283
+ # If the contact type is indeterminate and we didn't find a diagnostic adfix
284
+ # for a person then try again for an organization
285
+ if @contact_type.nil?
286
+ unless @adfix_found
287
+ contact_type = :organization
288
+ parts = name_part.partition adfixes[contact_type]
289
+ @adfix_found = parts[1].present?
290
+ end
291
+ end
292
+
293
+ if @adfix_found
294
+ # If we've found a diagnostic adfix then set the contact type
295
+ set_contact_type contact_type
296
+
297
+ # The remainder of the name will be in parts[0] or parts[2] depending
298
+ # on whether this is a prefix or a suffix.
299
+ # We'll also remove any trailing commas we've exposed.
300
+ result = (parts[0] + parts[2]).gsub(/\s*,\s*$/, '')
301
+ else
302
+ result = name_part
303
+ end
304
+
305
+ result
306
+ end
307
+
308
+ # Original Version of NameCase:
309
+ # Copyright (c) Mark Summerfield 1998-2008. All Rights Reserved
310
+ # This module may be used/distributed/modified under the same terms as Perl itself
311
+ # http://dev.perl.org/licenses/ (GPL)
312
+ #
313
+ # Ruby Version:
314
+ # Copyright (c) Aaron Patterson 2006
315
+ # NameCase is distributed under the GPL license.
316
+ #
317
+ # Substantially modified for Xendata
318
+ # Improved in several areas, also now adds non-breaking spaces for
319
+ # compound names like "van der Pump"
320
+ def name_case lowercase
321
+ name = lowercase # We assume the name is passed already downcased
322
+ name.gsub!(/\b\w/) { |first| first.upcase }
323
+ name.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
324
+
325
+ # Our list of terminal characters that indicate a non-celtic name used
326
+ # to include o but we removed it because of MacMurdo.
327
+ if name =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ or name =~ /\bMc/
328
+ name.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |match| $1 + $2.capitalize }
329
+
330
+ # Fix Mac exceptions
331
+ [
332
+ 'MacEdo', 'MacEvicius', 'MacHado', 'MacHar', 'MacHin', 'MacHlin', 'MacIas', 'MacIulis', 'MacKie', 'MacKle',
333
+ 'MacKlin', 'MacKmin', 'MacKmurdo', 'MacQuarie', 'MacLise', 'MacKenzie'
334
+ ].each { |mac_name| name.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
335
+ end
336
+
337
+ # Fix ff wierdybonks
338
+ [
339
+ 'Fforbes', 'Fforde', 'Ffinch', 'Ffrench', 'Ffoulkes'
340
+ ].each { |ff_name| name.gsub!(ff_name,ff_name.downcase) }
341
+
342
+ # Fixes for name modifiers followed by space
343
+ # Also replaces spaces with non-breaking spaces
344
+ NAME_MODIFIERS.each do |modifier|
345
+ name.gsub!(/((?:[[:space:]]|^)#{modifier})(\s+|-)/) { |match| "#{$1.rstrip.downcase}#{$2.tr(ASCII_SPACE, NONBREAKING_SPACE)}" }
346
+ end
347
+
348
+ # Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
349
+ ['Dell', 'D'].each do |modifier|
350
+ name.gsub!(/(.#{modifier}')(\w)/) { |match| "#{$1.rstrip.downcase}#{$2}" }
351
+ end
352
+
353
+ # Upcase words with no vowels, e.g JPR Williams
354
+ name.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |match| $1.upcase }
355
+ # Except Ng
356
+ name.gsub!(/\b(NG)\b/i) { |match| $1.capitalize } # http://en.wikipedia.org/wiki/Ng
357
+
358
+ name
359
+ end
360
+
361
+ def parameterize string, args = {}
362
+ sep = args[:sep] || SLUG_DELIMITER
363
+ rfc3987 = args[:rfc3987] || false
364
+ filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
365
+
366
+ # First we unescape any pct-encoded characters. These might turn into
367
+ # things we want to alter for the slug, like whitespace (e.g. %20)
368
+ parameterized_string = URI.unescape(string)
369
+
370
+ # Then we change any whitespace into our separator character
371
+ parameterized_string.gsub!(/\s+/, sep)
372
+
373
+ # Then we strip any illegal characters out completely
374
+ parameterized_string.gsub!(filter, '')
375
+
376
+ # Make sure separators are not where they shouldn't be
377
+ unless sep.nil? || sep.empty?
378
+ re_sep = Regexp.escape(sep)
379
+ # No more than one of the separator in a row.
380
+ parameterized_string.gsub!(/#{re_sep}{2,}/, sep)
381
+ # Remove leading/trailing separator.
382
+ parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
383
+ end
384
+
385
+ # downcase if it's all latin
386
+ parameterized_string.downcase
387
+ end
388
+
389
+ #--------------------------------------------------------
390
+ # Constants
391
+ #--------------------------------------------------------
392
+
393
+ NONBREAKING_SPACE = "\u00a0"
394
+ ASCII_SPACE = "\u0020"
395
+ ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
396
+ SLUG_DELIMITER = '-'
397
+
398
+ # Constants for parameterizing Unicode strings for IRIs
399
+ #
400
+ # Allowed characters in an IRI segment are defined by RFC 3987
401
+ # (https://tools.ietf.org/html/rfc3987#section-2.2) as follows:
402
+ #
403
+ # isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
404
+ # / "@" )
405
+ # ; non-zero-length segment without any colon ":"
406
+ #
407
+ # iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
408
+ #
409
+ # pct-encoded = "%" HEXDIG HEXDIG
410
+ #
411
+ # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
412
+ # / "*" / "+" / "," / ";" / "="
413
+ #
414
+ # ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
415
+ # / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
416
+ # / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
417
+ # / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
418
+ # / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
419
+ # / %xD0000-DFFFD / %xE1000-EFFFD
420
+ #
421
+ # Note that we can't use Unicode code points above \uFFFF because of
422
+ # regex limitations, so we'll ignore ucschar above that point.
423
+ #
424
+ # We're using the most restrictive segment definition (isegment-nz-nc)
425
+ # to avoid any possible problems with the IRI that it one day might
426
+ # get placed in.
427
+ ALPHA = 'A-Za-z'
428
+ DIGIT = '0-9'
429
+ UCSCHAR = '\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
430
+ IUNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~#{UCSCHAR}"
431
+ SUBDELIMS = '!$&\'\(\)\*+,;='
432
+ ISEGMENT_NZ_NC = "#{IUNRESERVED}#{SUBDELIMS}@" # pct-encoded not needed
433
+ FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
434
+ FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
435
+
436
+ NAME_MODIFIERS = [
437
+ 'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lr]', 'D[ao]s', 'El', 'La', 'L[eo]',
438
+ 'V[ao]n', 'Of', 'St[\.]?'
439
+ ]
440
+
441
+ # These are the prefixes and suffixes we want to remove
442
+ # If you add to the list, you can use spaces and dots where appropriate
443
+ # Ensure any single letters are followed by a dot because we'll add one to the string
444
+ # during processing, e.g. "y Cía." should be "y. Cía."
445
+ ADFIXES = {
446
+ prefix: {
447
+ person: [
448
+ 'Baron', 'Baroness', 'Capt.', 'Captain', 'Col.', 'Colonel', 'Dame',
449
+ 'Doctor', 'Dr.', 'Judge', 'Justice', 'Lady', 'Lieut.', 'Lieutenant',
450
+ 'Lord', 'Madame', 'Major', 'Master', 'Matron', 'Messrs.', 'Mgr.',
451
+ 'Miss', 'Mister', 'Mlle.', 'Mme.', 'Mons.', 'Mr.', 'Mr. & Mrs.',
452
+ 'Mr. and Mrs.', 'Mrs.', 'Msgr.', 'Prof.', 'Professor', 'Rev.',
453
+ 'Reverend', 'Sir', 'Sister', 'The Hon.', 'The Lady.', 'The Lord',
454
+ 'The Rt. Hon.'
455
+ ],
456
+ organization: [
457
+ 'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
458
+ ],
459
+ before:'\\A', after:ADFIX_JOINERS
460
+ },
461
+ suffix: {
462
+ person: [
463
+ 'C.I.S.S.P.', 'B.Tech.', 'D.Phil.', 'B.Eng.', 'C.F.A.', 'D.B.E.', 'D.D.S.', 'Eng.D.', 'M.B.A.', 'M.B.E.',
464
+ 'M.E.P.', 'M.Eng.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'B.Ed.', 'B.Sc.', 'Ed.D.', 'LL.B.',
465
+ 'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'O.K.',
466
+ 'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
467
+ ],
468
+ organization: [
469
+ 'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
470
+ 'Cía. S. C. A.', 'y. Cía. S. C.', 'S.A. de C.V.', 'spol. s.r.o.', '(Pty.) Ltd.', '(Pvt.) Ltd.', 'A.D.S.I.Tz.',
471
+ 'S.p. z.o.o.', '(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.',
472
+ 'S.P.R.L.U.', 'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.',
473
+ 'Pty. Ltd.', 'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.',
474
+ 'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'I.L.L.C.',
475
+ 'K.G.a.A.', 'L.L.L.P.', 'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.',
476
+ 'P.L.L.C.', 'S. en C.', 'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.',
477
+ 'S.C.R.I.', 'S.C.R.L.', 'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.',
478
+ 'Part.G.', 'Sh.p.k.', '&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.',
479
+ 'F.I.E.', 'G.b.R.', 'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.',
480
+ 'L.L.P.', 'o.h.f.', 'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.',
481
+ 'S.C.S.', 'S.E.M.', 'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.',
482
+ 'V.O.F.', 'V.o.G.', 'v.o.s.', 'V.Z.W.', 'z.r.t.', 'А.А.Т.', 'Е.А.Д.', 'З.А.Т.', 'К.Д.А.', 'О.О.Д.', 'Т.А.А.',
483
+ '股份有限公司', 'Ap.S.', 'Corp.', 'ltda.', 'Sh.A.', 'st.G.', 'Ultd.', 'a.b.', 'A.D.', 'A.E.', 'A.G.', 'A.S.',
484
+ 'A.Ş.', 'A.y.', 'B.M.', 'b.t.', 'B.V.', 'C.A.', 'C.V.', 'd.d.', 'e.c.', 'E.E.', 'e.G.', 'E.I.', 'E.P.', 'E.T.',
485
+ 'E.U.', 'e.v.', 'G.K.', 'G.P.', 'h.f.', 'Inc.', 'K.D.', 'K.G.', 'K.K.', 'k.s.', 'k.v.', 'K.y.', 'L.C.', 'L.P.',
486
+ 'Ltd.', 'N.K.', 'N.L.', 'N.V.', 'O.E.', 'O.G.', 'O.Ü.', 'O.y.', 'P.C.', 'p.l.', 'Pty.', 'PUP.', 'Pvt.', 'r.t.',
487
+ 'S.A.', 'S.D.', 'S.E.', 's.f.', 'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.',
488
+ 'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
489
+ 'A/S', 'G/S', 'I/S', 'K/S', 'P/S'
490
+ ],
491
+ before:ADFIX_JOINERS, after:'\\z'
492
+ }
493
+ }
494
+
495
+ ADFIX_PATTERNS = {}
496
+
497
+ [:prefix, :suffix].each do |adfix_type|
498
+ patterns = {}
499
+ adfix = ADFIXES[adfix_type]
500
+
501
+ [:person, :organization].each do |contact_type|
502
+ with_optional_spaces = adfix[contact_type].map { |p| p.gsub(ASCII_SPACE,' *') }
503
+ pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
504
+ patterns[contact_type] = /#{adfix[:before]}\(*(?:#{pattern_string})\)*#{adfix[:after]}/i
505
+ end
506
+
507
+ ADFIX_PATTERNS[adfix_type] = patterns
508
+ end
509
+ end
@@ -0,0 +1,3 @@
1
+ class NameTamer
2
+ VERSION = "0.0.0"
3
+ end
@@ -0,0 +1,27 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'name_tamer/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'name-tamer'
7
+ spec.version = NameTamer::VERSION
8
+ spec.authors = ['Xenapto']
9
+ spec.email = ['developers@xenapto.com']
10
+ spec.description = %q{Useful methods for taming names}
11
+ spec.summary = %q{Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith}
12
+ spec.homepage = 'https://github.com/Xenapto/name-tamer'
13
+ spec.license = 'MIT'
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features|coverage)/})
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_runtime_dependency 'activesupport', '~> 3'
21
+
22
+ spec.add_development_dependency 'bundler', '~> 1'
23
+ spec.add_development_dependency 'rake', '~> 10'
24
+ spec.add_development_dependency 'rspec', '~> 2'
25
+ spec.add_development_dependency 'gem-release', '~> 0'
26
+ spec.add_development_dependency 'simplecov', '~> 0'
27
+ end
@@ -0,0 +1,166 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+ require 'name_tamer'
4
+
5
+ describe NameTamer do
6
+ let(:names) do
7
+ [
8
+ { n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
9
+ { n:'JOHN SMITH', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
10
+ { n:'john smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
11
+ { n:'Smith, John', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
12
+ { n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
13
+ { n:'Smith, John', nn:'John Smith', sn:'John Smith', s:'john-smith' },
14
+ { n:'John J Smith', t: :person, nn:'John J Smith', sn:'John Smith', s:'john-smith' },
15
+ { n:'John J. Smith', t: :person, nn:'John J. Smith', sn:'John Smith', s:'john-smith' },
16
+ { n:'SMITH, Mr John J.R.', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
17
+ { n:' SMITH, Mr John J. R. ', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
18
+ { n:'SMITH, Mr John J.R.', nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
19
+ { n:'Mr John J.R. SMITH JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
20
+ { n:'Mr John J.R. SMITH III,JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
21
+ { n:'Mr John J.R. SMITH JD', nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
22
+ { n:'Mr Jean-Michel SMITH JD', t: :person, nn:'Jean-Michel SMITH', sn:'Jean-Michel SMITH', s:'jean-michel-smith' },
23
+ { n:'Mr Jean Michel-SMITH JD', nn:'Jean Michel-SMITH', sn:'Jean Michel-SMITH', s:'jean-michel-smith' },
24
+ { n:'Dr Martha Lane Fox Ph.D', nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
25
+ { n:'Lane Fox Ph.D, Dr Martha', t: :person, nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
26
+ { n:'Baroness Lane-Fox of Lewisham', t: :person, nn:'Lane-Fox of Lewisham', sn:'Lane-Fox of Lewisham', s:'lane-fox-of-lewisham' },
27
+ { n:'MACDONALDS LLC', nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
28
+ { n:'MACDONALDS LLC', t: :organization, nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
29
+ { n:'macdonalds', t: :organization, nn:'macdonalds', sn:'macdonalds', s:'macdonalds' },
30
+ { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', t: :organization, nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
31
+ { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
32
+ { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
33
+ { n:'K.V.A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
34
+ { n:'K. V. A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
35
+ { n:'J.P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
36
+ { n:'J. P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
37
+ { n:'J P Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
38
+ { n:'JP Rangaswami', nn:'JP Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
39
+ { n:'Audrey fforbes', nn:'Audrey fforbes', sn:'Audrey fforbes', s:'audrey-fforbes' },
40
+ { n:'J. Arthur Rank', t: :person, nn:'J. Arthur Rank', sn:'Arthur Rank', s:'arthur-rank' },
41
+ { n:'PHILIP NG', t: :person, nn:'Philip Ng', sn:'Philip Ng', s:'philip-ng' },
42
+ { n:'Super R&D', nn:'Super R&D', sn:'Super R and D', s:'super-r-and-d' },
43
+ { n:'Harry Dean Stanton', t: :person, nn:'Harry Dean Stanton', sn:'Harry Stanton', s:'harry-stanton' },
44
+ { n:'Union Square Ventures', t: :organization, nn:'Union Square Ventures', sn:'Union Square Ventures', s:'union-square-ventures' },
45
+ { n:'J Arthur Rank Inc.', t: :organization, nn:'J Arthur Rank', sn:'J Arthur Rank', s:'j-arthur-rank' },
46
+ { n:'Jean VAN DER VELDE', t: :person, nn:'Jean VAN DER VELDE', sn:'Jean VAN DER VELDE', s:'jean-van-der-velde' },
47
+ { n:'Al Capone', t: :person, nn:'Al Capone', sn:'Al Capone', s:'al-capone' },
48
+ { n:'Fahd al-Saud', t: :person, nn:'Fahd al-Saud', sn:'Fahd al-Saud', s:'fahd-al-saud' },
49
+ { n:'Mehmet al Auouiby', t: :person, nn:'Mehmet al Auouiby', sn:'Mehmet al Auouiby', s:'mehmet-al-auouiby' },
50
+ { n:'Macquarie Bank', t: :organization, nn:'Macquarie Bank', sn:'Macquarie Bank', s:'macquarie-bank' },
51
+ { n:"COMMEDIA DELL'ARTE", t: :organization, nn:"Commedia dell'Arte", sn:"Commedia dell'Arte", s:'commedia-dellarte' },
52
+ { n:'Della Smith', t: :person, nn:'Della Smith', sn:'Della Smith', s:'della-smith' },
53
+ { n:'Antonio DELLA MONTEVERDE', nn:'Antonio DELLA MONTEVERDE', sn:'Antonio DELLA MONTEVERDE', s:'antonio-della-monteverde' },
54
+ { n:'Tony St Clair', t: :person, nn:'Tony St Clair', sn:'Tony St Clair', s:'tony-st-clair' },
55
+ { n:'Seamus O\'Malley', t: :person, nn:'Seamus O\'Malley', sn:'Seamus O\'Malley', s:'seamus-omalley' },
56
+ { n:'SeedCamp', t: :organization, nn:'SeedCamp', sn:'SeedCamp', s:'seedcamp' },
57
+ { n:'Peter Van Der Auwera', t: :person, nn:'Peter Van Der Auwera', sn:'Peter Van Der Auwera', s:'peter-van-der-auwera' },
58
+ { n:'VAN DER AUWERA, Peter', t: :person, nn:'Peter van der Auwera', sn:'Peter van der Auwera', s:'peter-van-der-auwera' },
59
+ { n:'Li Fan', t: :person, nn:'Li Fan', sn:'Li Fan', s:'li-fan' },
60
+ { n:'Fan Li', t: :person, nn:'Fan Li', sn:'Fan Li', s:'fan-li' },
61
+ { n:'Levi Strauss & Co.', nn:'Levi Strauss', sn:'Levi Strauss', s:'levi-strauss' },
62
+ { n:'Standard & Poor\'s', t: :organization, nn:'Standard & Poor\'s', sn:'Standard and Poor\'s', s:'standard-and-poors' },
63
+ { n:'I B M Services', t: :organization, nn:'I.B.M. Services', sn:'IBM Services', s:'ibm-services' },
64
+ { n:'Sean Park DDS', t: :person, nn:'Sean Park', sn:'Sean Park', s:'sean-park' },
65
+ { n:'SEAN MACLISE PARK', t: :person, nn:'Sean Maclise Park', sn:'Sean Park', s:'sean-park' },
66
+ { n:'AJ Hanna', t: :person, nn:'AJ Hanna', sn:'AJ Hanna', s:'aj-hanna' },
67
+ { n:'Free & Clear', t: :organization, nn:'Free & Clear', sn:'Free and Clear', s:'free-and-clear' },
68
+ { n:'Adam D\'ANGELO', t: :person, nn:'Adam D\'ANGELO', sn:'Adam D\'ANGELO', s:'adam-dangelo' },
69
+ { n:'MACKENZIE, Doug', t: :person, nn:'Doug Mackenzie', sn:'Doug Mackenzie', s:'doug-mackenzie' },
70
+ { n:'Up + Down', t: :organization, nn:'Up + Down', sn:'Up plus Down', s:'up-plus-down' },
71
+ { n:'San Francisco Ltd', t: :organization, nn:'San Francisco', sn:'San Francisco', s:'san-francisco' },
72
+ { n:'AT&T', t: :organization, nn:'At&T', sn:'At and T', s:'at-and-t' },
73
+ { n:'SMITH, John, Jr.', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
74
+ { n:'I Heart Movies', t: :organization, nn:'I Heart Movies', sn:'I Heart Movies', s:'i-heart-movies' },
75
+ { n:'Y Combinator', t: :organization, nn:'Y Combinator', sn:'Y Combinator', s:'y-combinator' },
76
+ { n:'Ben\'s 10 Hens', t: :organization, nn:'Ben\'s 10 Hens', sn:'Ben\'s 10 Hens', s:'bens-10-hens' },
77
+ { n:'Elazer Edelman, MD , PhD', t: :person, nn:'Elazer Edelman', sn:'Elazer Edelman', s:'elazer-edelman' },
78
+ { n:'Judith M. O\'Brien', t: :person, nn:'Judith M. O\'Brien', sn:'Judith O\'Brien', s:'judith-obrien' },
79
+ { n:'MORRISON, Van', t: :person, nn:'Van Morrison', sn:'Van Morrison', s:'van-morrison' },
80
+ { n:'i/o Ventures', t: :organization, nn:'i/o Ventures', sn:'i/o Ventures', s:'io-ventures' },
81
+ { n:'C T Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
82
+ { n:'C.T. Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
83
+ { n:'CT Corporation System', t: :person, nn:'CT Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
84
+ { n:'Corporation Service Company', t: :person, nn:'Corporation Service Company', sn:'Corporation Service Company', s:'corporation-service-company'},
85
+ { n:'Kurshuni,Inc.', t: :organization, nn:'Kurshuni', sn:'Kurshuni', s:'kurshuni' },
86
+ { n:'Cellular Inc-LLC', t: :organization, nn:'Cellular', sn:'Cellular', s:'cellular' },
87
+ { n:'Emtec (AZ) Limited', t: :organization, nn:'Emtec (AZ)', sn:'Emtec (AZ)', s:'emtec-az' },
88
+ { n:'Emtec (LLC) Limited', t: :organization, nn:'Emtec', sn:'Emtec', s:'emtec' },
89
+ { n:'Emtec (XYZ LLC) Limited', t: :organization, nn:'Emtec (XYZ)', sn:'Emtec (XYZ)', s:'emtec-xyz' },
90
+ { n:'Tao Ma', t: :person, nn:'Tao', sn:'Tao', s:'tao' }, # Unfortunate but we can't distinguish between Ma and M.A.
91
+ { n:'(Mr.) Courtney J. Miller, J.D., LL.M.', t: :person, nn:'Courtney J. Miller', sn:'Courtney Miller', s:'courtney-miller' },
92
+ { n:'(Mr Woo) The Window Cleaner', t: :person, nn:'(Woo) The Window Cleaner', sn:'(Woo) Cleaner', s:'woo-cleaner'},
93
+ { n:'DOMINIC MACMURDO', t: :person, nn:'Dominic MacMurdo', sn:'Dominic MacMurdo', s:'dominic-macmurdo' },
94
+ { n:'DOMINIC MACEDO', t: :person, nn:'Dominic Macedo', sn:'Dominic Macedo', s:'dominic-macedo' },
95
+ { n:'DOMINIC MACDONALD', t: :person, nn:'Dominic MacDonald', sn:'Dominic MacDonald', s:'dominic-macdonald' },
96
+ { n:'AGUSTA DO ROMEIRO', t: :person, nn:'Agusta do Romeiro', sn:'Agusta do Romeiro', s:'agusta-do-romeiro' },
97
+ { n:'CARLOS DOS SANTOS', t: :person, nn:'Carlos dos Santos', sn:'Carlos dos Santos', s:'carlos-dos-santos' },
98
+ { n:'유정 신', t: :organization, nn:'유정 신', sn:'유정 신', s:'유정-신' },
99
+ { n:'xxx%52zzz', t: :organization, nn:'xxx%52zzz', sn:'xxx%52zzz', s:'xxxrzzz' },
100
+ { n:'Евгений Болотнов', t: :organization, nn:'Евгений Болотнов', sn:'Евгений Болотнов', s:'Евгений-Болотнов' },
101
+ { n:'김태성', t: :organization, nn:'김태성', sn:'김태성', s:'김태성' },
102
+ { n:'ゴルフスタジアム', t: :organization, nn:'ゴルフスタジアム', sn:'ゴルフスタジアム', s:'ゴルフスタジアム' },
103
+ { n:'我摘', t: :organization, nn:'我摘', sn:'我摘', s:'我摘' },
104
+ { n:'Καρατζάς Στέφανος', t: :organization, nn:'Καρατζάς Στέφανος', sn:'Καρατζάς Στέφανος', s:'Καρατζάς-Στέφανος' },
105
+ { n:'โชติวัน วัฒนลาภ', t: :organization, nn:'โชติวัน วัฒนลาภ', sn:'โชติวัน วัฒนลาภ', s:'โชติวัน-วัฒนลาภ' },
106
+ { n:'張 續寶', t: :organization, nn:'張 續寶', sn:'張 續寶', s:'張-續寶' },
107
+ { n:'Юрий Гайдук', t: :organization, nn:'Юрий Гайдук', sn:'Юрий Гайдук', s:'Юрий-Гайдук' },
108
+ { n:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', t: :organization, nn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', sn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', s:'☣-©Ʀѱ∏†ʘ-Σɏ§†℈Ϻ-☣' },
109
+ { n:'♠ KlasikB0i ♠', t: :organization, nn:'♠ KlasikB0i ♠', sn:'♠ KlasikB0i ♠', s:'♠-klasikb0i-♠' },
110
+ { n:'* Shorusan *', t: :organization, nn:'* Shorusan *', sn:'* Shorusan *', s:'shorusan' },
111
+ { n:'项目谷', t: :organization, nn:'项目谷', sn:'项目谷', s:'项目谷' },
112
+ { n:'ООО "Инновационные полимерные адгезивы"', t: :organization, nn:'ООО "Инновационные полимерные адгезивы"', sn:'ООО "Инновационные полимерные адгезивы"', s:'ООО-Инновационные-полимерные-адгезивы' },
113
+ { n:'عبدالله ...', t: :organization, nn:'عبدالله ...', sn:'عبدالله ...', s:'عبدالله' },
114
+ { n:'กมลชนก ทิศไธสง', t: :organization, nn:'กมลชนก ทิศไธสง', sn:'กมลชนก ทิศไธสง', s:'กมลชนก-ทิศไธสง' },
115
+ { n:'יוֹ אָב', t: :organization, nn:'יוֹ אָב', sn:'יוֹ אָב', s:'יוֹ-אָב' },
116
+ { n:'יגאל נימני', t: :organization, nn:'יגאל נימני', sn:'יגאל נימני', s:'יגאל-נימני' },
117
+ { n:'ניסים דניאלי', t: :organization, nn:'ניסים דניאלי', sn:'ניסים דניאלי', s:'ניסים-דניאלי' },
118
+ { n:'مساء الخير', t: :organization, nn:'مساء الخير', sn:'مساء الخير', s:'مساء-الخير' },
119
+ { n:'محمود ياسر', t: :organization, nn:'محمود ياسر', sn:'محمود ياسر', s:'محمود-ياسر' },
120
+ { n:'קובי ביטר', t: :organization, nn:'קובי ביטר', sn:'קובי ביטר', s:'קובי-ביטר' },
121
+ { n:'الملاك الحارس', t: :organization, nn:'الملاك الحارس', sn:'الملاك الحارس', s:'الملاك-الحارس' },
122
+ { n:'কবির হাসান', t: :organization, nn:'কবির হাসান', sn:'কবির হাসান', s:'কবির-হাসান' }
123
+ ]
124
+ end
125
+
126
+ it "makes a slug from #{name}" do
127
+ names.each do |name_data|
128
+ name = name_data[:n]
129
+ NameTamer[name, contact_type:name_data[:t]].slug.should == name_data[:s]
130
+ end
131
+ end
132
+
133
+ it "makes a nice name from #{name}" do
134
+ names.each do |name_data|
135
+ name = name_data[:n]
136
+ nice_name = NameTamer[name, contact_type:name_data[:t]].nice_name
137
+
138
+ nice_name.should == name_data[:nn]
139
+ end
140
+ end
141
+
142
+ it "makes a searchable name from #{name}" do
143
+ names.each do |name_data|
144
+ name = name_data[:n]
145
+ NameTamer[name, contact_type:name_data[:t]].search_name.should == name_data[:sn]
146
+ end
147
+ end
148
+ end
149
+
150
+ describe 'contact type inference' do
151
+ it 'infers that "Mr. John Smith" is a person' do
152
+ NameTamer['Mr. John Smith'].contact_type.should eq(:person)
153
+ end
154
+
155
+ it 'infers that "Di Doo Doo d.o.o." is an organization' do
156
+ NameTamer['Di Doo Doo d.o.o.'].contact_type.should eq(:organization)
157
+ end
158
+
159
+ it 'infers that "DiDooDoo" is an organization' do
160
+ NameTamer['DiDooDoo'].contact_type.should eq(:organization)
161
+ end
162
+
163
+ it 'infers that "John Smith" is a person' do
164
+ NameTamer['John Smith'].contact_type.should eq(:person)
165
+ end
166
+ end
@@ -0,0 +1,16 @@
1
+ # Initialize simplecov for coverage report.
2
+ require 'simplecov'
3
+ SimpleCov.start
4
+
5
+ RSpec.configure do |config|
6
+ # Run specs in random order to surface order dependencies. If you find an
7
+ # order dependency and want to debug it, you can fix the order by providing
8
+ # the seed, which is printed after each run.
9
+ # --seed 1234
10
+ config.order = "random"
11
+
12
+ # Manually-added
13
+ config.color_enabled = true
14
+ config.tty = true
15
+ config.formatter = :documentation
16
+ end
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: name-tamer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Xenapto
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '10'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '10'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: gem-release
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ~>
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Useful methods for taming names
98
+ email:
99
+ - developers@xenapto.com
100
+ executables:
101
+ - name-tamer
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - .env
106
+ - .gitignore
107
+ - .ruby-version
108
+ - Gemfile
109
+ - Gemfile.lock
110
+ - LICENSE
111
+ - README.md
112
+ - Rakefile
113
+ - bin/name-tamer
114
+ - lib/name_tamer.rb
115
+ - lib/name_tamer/version.rb
116
+ - name-tamer.gemspec
117
+ - spec/name_tamer_spec.rb
118
+ - spec/spec_helper.rb
119
+ homepage: https://github.com/Xenapto/name-tamer
120
+ licenses:
121
+ - MIT
122
+ metadata: {}
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ required_rubygems_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ! '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ requirements: []
138
+ rubyforge_project:
139
+ rubygems_version: 2.2.2
140
+ signing_key:
141
+ specification_version: 4
142
+ summary: ! 'Example: NameTamer[''Mr. John Q. Smith III, MD''].simple_name # => John
143
+ Smith'
144
+ test_files:
145
+ - spec/name_tamer_spec.rb
146
+ - spec/spec_helper.rb