name-tamer 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YzRiZjg0NGY3ODliMzVjZWM0ZDBhMjExYjNjODMyMmJmZTExNDRiMA==
5
+ data.tar.gz: !binary |-
6
+ ZmZmODdkOGIwYzBlN2I3MjM3OTVjNzBhZGM0ZjQ2MTk0MmQ3MmIyMg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YzhhNzUxMTY5OWQ4YjkxYzUxNTMxNTBhYjNkZGMyMDE1MDFmNDNkMmY2Y2Uy
10
+ ZWVmODhiMGM3OTBjNWE3ZmI0NGI0Yjk4MTRiZWYyMWRkZTY0NjBhNzI5NWNm
11
+ Nzk4ODliZjU3OTc3YzY4MzdkYWYyYzU4ODE4ZGJhNjkwNmE1MDU=
12
+ data.tar.gz: !binary |-
13
+ YWNiMjZhODI5NDk2MmI1Y2E4MWUyYTg4NTk0M2ExMGFkNzhhZjdlZDRiYzJm
14
+ ZDI0ZjNiYmJlMWNiYjRmOTYxMGU5MTdlMTZjYzA0YTZjNjdkYTliOTc1YWRj
15
+ ZmM1ZWYwZTA5ZjQ5NDc2OGQ4NzA1NTgwYzM0MTNlYTFjMzgzMDc=
data/.env ADDED
@@ -0,0 +1 @@
1
+ PATH=/home/build/.rvm/gems/ruby-2.1.1/bin:/home/build/.rvm/gems/ruby-2.1.1@global/bin:/home/build/.rvm/rubies/ruby-2.1.1/bin:/home/build/.rvm/bin:/usr/local/heroku/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/home/build/.rvm/gems/ruby-2.1.1@global/bin/bundle
@@ -0,0 +1,24 @@
1
+ *.rbc
2
+ capybara-*.html
3
+ .rspec
4
+ /log
5
+ /tmp
6
+ /db/*.sqlite3
7
+ /public/system
8
+ /coverage/
9
+ /spec/tmp
10
+ **.orig
11
+ rerun.txt
12
+ pickle-email-*.html
13
+ config/initializers/secret_token.rb
14
+ config/secrets.yml
15
+
16
+ ## Environment normalisation:
17
+ /.bundle
18
+ /vendor/bundle
19
+
20
+ # these should all be checked in to normalise the environment:
21
+ # Gemfile.lock, .ruby-version, .ruby-gemset
22
+
23
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
24
+ .rvmrc
@@ -0,0 +1 @@
1
+ 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in foreplay.gemspec
4
+ gemspec
@@ -0,0 +1,42 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ name-tamer (0.0.0)
5
+ activesupport (~> 3)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ activesupport (3.2.13)
11
+ i18n (= 0.6.1)
12
+ multi_json (~> 1.0)
13
+ diff-lcs (1.2.5)
14
+ docile (1.1.3)
15
+ gem-release (0.7.3)
16
+ i18n (0.6.1)
17
+ multi_json (1.10.1)
18
+ rake (10.3.2)
19
+ rspec (2.14.1)
20
+ rspec-core (~> 2.14.0)
21
+ rspec-expectations (~> 2.14.0)
22
+ rspec-mocks (~> 2.14.0)
23
+ rspec-core (2.14.8)
24
+ rspec-expectations (2.14.5)
25
+ diff-lcs (>= 1.1.3, < 2.0)
26
+ rspec-mocks (2.14.6)
27
+ simplecov (0.8.2)
28
+ docile (~> 1.1.0)
29
+ multi_json
30
+ simplecov-html (~> 0.8.0)
31
+ simplecov-html (0.8.0)
32
+
33
+ PLATFORMS
34
+ ruby
35
+
36
+ DEPENDENCIES
37
+ bundler (~> 1)
38
+ gem-release (~> 0)
39
+ name-tamer!
40
+ rake (~> 10)
41
+ rspec (~> 2)
42
+ simplecov (~> 0)
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Xenapto
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,48 @@
1
+ # NameTamer
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/name-tamer.png)](http://badge.fury.io/rb/name-tamer)
4
+ [![Code Climate](https://codeclimate.com/github/Xenapto/name-tamer.png)](https://codeclimate.com/github/Xenapto/name-tamer)
5
+ [![Dependency Status](https://gemnasium.com/Xenapto/name-tamer.png)](https://gemnasium.com/Xenapto/name-tamer)
6
+ ![build status](https://circleci.com/gh/Xenapto/name-tamer.png?circle-token=dd3a51864d33f6506b18a355bc901b90c0df3b3b)
7
+
8
+ NameTamer: deploying Rails projects to Ubuntu using Foreman
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'name-tamer'
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install name-tamer
23
+
24
+ ## Usage
25
+
26
+ Examples:
27
+
28
+ ```ruby
29
+ NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith
30
+ ```
31
+
32
+ ```ruby
33
+ name_tamer = NameTamer['Mr. John Q. Smith III, MD']
34
+ name_tamer.slug # => john-smith
35
+ name_tamer.nice_name # => John Q. Smith
36
+ ```
37
+
38
+ ## Contributing
39
+
40
+ 1. Fork it
41
+ 1. Create your feature branch (`git checkout -b my-new-feature`)
42
+ 1. Commit your changes (`git commit -am 'Add some feature'`)
43
+ 1. Push to the branch (`git push origin my-new-feature`)
44
+ 1. Create new Pull Request
45
+
46
+ ## Acknowledgements
47
+
48
+ 1. Thanks to Ryan Bigg for the guide to making your first gem https://github.com/radar/guides/blob/master/gem-development.md
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require 'name_tamer/cli'
3
+ NameTamer::CLI.start
@@ -0,0 +1,509 @@
1
+ # encoding: utf-8
2
+
3
+ # References:
4
+ # http://www.w3.org/International/questions/qa-personal-names
5
+ # https://github.com/berkmancenter/namae
6
+ # https://github.com/mericson
7
+ # http://en.wikipedia.org/wiki/Types_of_business_entity
8
+ # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA)
9
+ # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
10
+ # http://en.wikipedia.org/wiki/Nobiliary_particle
11
+ # http://en.wikipedia.org/wiki/Spanish_naming_customs
12
+ # http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
13
+ require 'active_support/core_ext/object'
14
+
15
+ class NameTamer
16
+ attr_reader :name, :contact_type
17
+
18
+ class << self
19
+ def [](name, args = {})
20
+ new name, args
21
+ end
22
+ end
23
+
24
+ def nice_name
25
+ if @nice_name.nil?
26
+ @nice_name = @name.dup # Start with the name we've received
27
+
28
+ tidy_spacing # " John Smith " -> "John Smith"
29
+ consolidate_initials # "I. B. M." -> "I.B.M."
30
+ remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
31
+ fixup_last_name_first # "Smith, John" -> "John Smith"
32
+ fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
33
+ remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
34
+ name_wrangle # proper name case and non-breaking spaces
35
+ use_nonbreaking_spaces_in_compound_names
36
+ end
37
+
38
+ @nice_name
39
+ end
40
+
41
+ def search_name
42
+ if @search_name.nil?
43
+ @search_name = nice_name.dup # Start with nice name
44
+
45
+ remove_initials # "John Q. Doe" -> "John Doe"
46
+ remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
47
+ remove_dots_from_abbreviations # "J.P.R. Williams" -> "JPR Williams"
48
+ standardize_words # "B&Q Intl" -> "B and Q International"
49
+
50
+ @search_name = ensure_whitespace_is_ascii_space @search_name
51
+ end
52
+
53
+ @search_name
54
+ end
55
+
56
+ def slug
57
+ if @slug.nil?
58
+ @slug = search_name.dup # Start with search name
59
+ slugify # "John Doe" -> "john-doe"
60
+ end
61
+
62
+ @slug
63
+ end
64
+
65
+ def contact_type
66
+ nice_name # make sure we've done the bit which infers contact_type
67
+ contact_type_best_effort
68
+ end
69
+
70
+ =begin These lines aren't used and aren't covered by specs
71
+ def name=(new_name)
72
+ initialize new_name, :contact_type => @contact_type
73
+ end
74
+
75
+ def contact_type=(new_contact_type)
76
+ initialize @name, :contact_type => new_contact_type
77
+ end
78
+
79
+ def to_hash
80
+ {
81
+ name: @name,
82
+ nice_name: @nice_name,
83
+ search_name: @search_name,
84
+ slug: @slug,
85
+ contact_type: @contact_type,
86
+ last_name: @last_name,
87
+ remainder: @remainder,
88
+ adfix_found: @adfix_found
89
+ }
90
+ end
91
+ =end
92
+
93
+ private
94
+
95
+ #--------------------------------------------------------
96
+ # Tidy up the name we've received
97
+ #--------------------------------------------------------
98
+
99
+ def tidy_spacing
100
+ @nice_name.gsub!(/,\s*/, ', ') # Ensure commas have exactly one space after them
101
+ @nice_name.strip! # remove leading & trailing whitespace
102
+
103
+ @nice_name = ensure_whitespace_is_ascii_space @nice_name
104
+ end
105
+
106
+ # Remove spaces from groups of initials
107
+ def consolidate_initials
108
+ @nice_name.gsub!(/\b([a-z])\.* (?=[a-z][\. ])/i) { |match| "#{$1}." } # Remove spaces from initial groups
109
+ @nice_name.gsub!(/\b([a-z](?:\.[a-z])+)\.?(?= )/i) { |match| "#{$1}." } # Ensure each group ends with a dot
110
+ end
111
+
112
+ # An adfix is either a prefix or a suffix
113
+ def remove_adfixes
114
+ if @last_name.nil?
115
+ # Our name is still in one part, not two
116
+ begin
117
+ @nice_name = remove_outermost_adfix(:suffix, @nice_name)
118
+ end while @adfix_found
119
+
120
+ begin
121
+ @nice_name = remove_outermost_adfix(:prefix, @nice_name)
122
+ end while @adfix_found
123
+ else
124
+ # Our name is currently in two halves
125
+ begin
126
+ @last_name = remove_outermost_adfix(:suffix, @last_name)
127
+ end while @adfix_found
128
+
129
+ begin
130
+ @remainder = remove_outermost_adfix(:prefix, @remainder)
131
+ end while @adfix_found
132
+ end
133
+ end
134
+
135
+ # Names in the form "Smith, John" need to be turned around to "John Smith"
136
+ def fixup_last_name_first
137
+ unless @contact_type == :organization
138
+ parts = @nice_name.split ', '
139
+
140
+ if parts.count == 2
141
+ @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
142
+ @remainder = parts[1]
143
+ end
144
+ end
145
+ end
146
+
147
+ # Sometimes we end up with mismatched braces after adfix stripping
148
+ # e.g. "Ceres (Ceres Holdings LLC)" -> "Ceres (Ceres Holdings"
149
+ def fixup_mismatched_braces
150
+ left_brace_count = @nice_name.count '('
151
+ right_brace_count = @nice_name.count ')'
152
+
153
+ if left_brace_count > right_brace_count
154
+ @nice_name += ')'
155
+ elsif left_brace_count < right_brace_count
156
+ @nice_name = '(' + @nice_name
157
+ end
158
+ end
159
+
160
+ def name_wrangle
161
+ # Fix case if all caps or all lowercase
162
+ if @last_name.nil?
163
+ lowercase = @nice_name.downcase
164
+ uppercase = @nice_name.upcase
165
+
166
+ # Some companies like to be all lowercase so don't mess with them
167
+ @nice_name = name_case(lowercase) if @nice_name == uppercase || ( @nice_name == lowercase && @contact_type != :organization )
168
+ else
169
+ lowercase = @last_name.downcase
170
+ uppercase = @last_name.upcase
171
+ @last_name = name_case(lowercase) if @last_name == uppercase || @last_name == lowercase
172
+
173
+ @nice_name = "#{@remainder} #{@last_name}"
174
+ end
175
+ end
176
+
177
+ # Conjoin compound names with non-breaking spaces
178
+ def use_nonbreaking_spaces_in_compound_names
179
+ # Fix known last names that have spaces (not hyphens!)
180
+ [
181
+ 'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore',
182
+ 'Holmes à Court', 'Holmes a Court', 'Baron Cohen',
183
+ 'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
184
+ ].each do |compound_name|
185
+ @nice_name.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
186
+ end
187
+
188
+ NAME_MODIFIERS.each do |modifier|
189
+ @nice_name.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |match| "#{$1}#{NONBREAKING_SPACE}" }
190
+ end
191
+ end
192
+
193
+ #--------------------------------------------------------
194
+ # Make search name from nice name
195
+ #--------------------------------------------------------
196
+
197
+ # Remove initials from personal names unless they are the only identifier.
198
+ # i.e. only remove initials if there's also a proper name there
199
+ def remove_initials
200
+ if @contact_type == :person
201
+ name = @search_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
202
+
203
+ # If the name still has at least one space we're OK
204
+ @search_name = name if name.include?(ASCII_SPACE)
205
+ end
206
+ end
207
+
208
+ def remove_middle_names
209
+ if @contact_type == :person
210
+ parts = @search_name.split
211
+ @search_name = "#{parts[0]} #{parts[-1]}" if parts.count > 2
212
+ end
213
+ end
214
+
215
+ def remove_dots_from_abbreviations
216
+ @search_name.gsub!(/\b([a-z])\./i) { |match| $1 }
217
+ end
218
+
219
+ def standardize_words
220
+ @search_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
221
+ @search_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
222
+ @search_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
223
+ end
224
+
225
+ #--------------------------------------------------------
226
+ # Make slug from search name
227
+ #--------------------------------------------------------
228
+
229
+ def slugify
230
+ # Inflector::parameterize just gives up with non-latin characters so...
231
+ #@slug = @slug.parameterize # Can't use this
232
+
233
+ # Instead we'll do it ourselves
234
+ @slug = parameterize @slug
235
+ end
236
+
237
+ #--------------------------------------------------------
238
+ # Initialization and utilities
239
+ #--------------------------------------------------------
240
+
241
+ def initialize(name, args = {})
242
+ @name = name || ''
243
+ @contact_type = args[:contact_type].to_sym unless args[:contact_type].nil?
244
+
245
+ @nice_name = nil
246
+ @search_name = nil
247
+ @slug = nil
248
+
249
+ @last_name = nil
250
+ @remainder = nil
251
+
252
+ @adfix_found = false
253
+ end
254
+
255
+ def set_contact_type contact_type
256
+ contact_type_sym = contact_type.to_sym
257
+ puts "Changing contact type of #{@name} from #{@contact_type} to #{contact_type}".red unless @contact_type.nil? || @contact_type == contact_type_sym
258
+ @contact_type = contact_type_sym
259
+ end
260
+
261
+ # If we don't know the contact type, what's our best guess?
262
+ def contact_type_best_effort
263
+ if @contact_type
264
+ @contact_type
265
+ else
266
+ # If it's just one word we'll assume organization.
267
+ # If more then we'll assume a person
268
+ @name.include?(ASCII_SPACE) ? :person : :organization
269
+ end
270
+ end
271
+
272
+ def ensure_whitespace_is_ascii_space string
273
+ string.gsub(/[[:space:]]+/, ASCII_SPACE) # /\s/ doesn't match Unicode whitespace in Ruby 1.9.3
274
+ end
275
+
276
+ # We pass to this routine either prefixes or suffixes
277
+ def remove_outermost_adfix adfix_type, name_part
278
+ adfixes = ADFIX_PATTERNS[adfix_type]
279
+ contact_type = contact_type_best_effort
280
+ parts = name_part.partition adfixes[contact_type]
281
+ @adfix_found = parts[1].present?
282
+
283
+ # If the contact type is indeterminate and we didn't find a diagnostic adfix
284
+ # for a person then try again for an organization
285
+ if @contact_type.nil?
286
+ unless @adfix_found
287
+ contact_type = :organization
288
+ parts = name_part.partition adfixes[contact_type]
289
+ @adfix_found = parts[1].present?
290
+ end
291
+ end
292
+
293
+ if @adfix_found
294
+ # If we've found a diagnostic adfix then set the contact type
295
+ set_contact_type contact_type
296
+
297
+ # The remainder of the name will be in parts[0] or parts[2] depending
298
+ # on whether this is a prefix or a suffix.
299
+ # We'll also remove any trailing commas we've exposed.
300
+ result = (parts[0] + parts[2]).gsub(/\s*,\s*$/, '')
301
+ else
302
+ result = name_part
303
+ end
304
+
305
+ result
306
+ end
307
+
308
+ # Original Version of NameCase:
309
+ # Copyright (c) Mark Summerfield 1998-2008. All Rights Reserved
310
+ # This module may be used/distributed/modified under the same terms as Perl itself
311
+ # http://dev.perl.org/licenses/ (GPL)
312
+ #
313
+ # Ruby Version:
314
+ # Copyright (c) Aaron Patterson 2006
315
+ # NameCase is distributed under the GPL license.
316
+ #
317
+ # Substantially modified for Xendata
318
+ # Improved in several areas, also now adds non-breaking spaces for
319
+ # compound names like "van der Pump"
320
+ def name_case lowercase
321
+ name = lowercase # We assume the name is passed already downcased
322
+ name.gsub!(/\b\w/) { |first| first.upcase }
323
+ name.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
324
+
325
+ # Our list of terminal characters that indicate a non-celtic name used
326
+ # to include o but we removed it because of MacMurdo.
327
+ if name =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ or name =~ /\bMc/
328
+ name.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |match| $1 + $2.capitalize }
329
+
330
+ # Fix Mac exceptions
331
+ [
332
+ 'MacEdo', 'MacEvicius', 'MacHado', 'MacHar', 'MacHin', 'MacHlin', 'MacIas', 'MacIulis', 'MacKie', 'MacKle',
333
+ 'MacKlin', 'MacKmin', 'MacKmurdo', 'MacQuarie', 'MacLise', 'MacKenzie'
334
+ ].each { |mac_name| name.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
335
+ end
336
+
337
+ # Fix ff wierdybonks
338
+ [
339
+ 'Fforbes', 'Fforde', 'Ffinch', 'Ffrench', 'Ffoulkes'
340
+ ].each { |ff_name| name.gsub!(ff_name,ff_name.downcase) }
341
+
342
+ # Fixes for name modifiers followed by space
343
+ # Also replaces spaces with non-breaking spaces
344
+ NAME_MODIFIERS.each do |modifier|
345
+ name.gsub!(/((?:[[:space:]]|^)#{modifier})(\s+|-)/) { |match| "#{$1.rstrip.downcase}#{$2.tr(ASCII_SPACE, NONBREAKING_SPACE)}" }
346
+ end
347
+
348
+ # Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
349
+ ['Dell', 'D'].each do |modifier|
350
+ name.gsub!(/(.#{modifier}')(\w)/) { |match| "#{$1.rstrip.downcase}#{$2}" }
351
+ end
352
+
353
+ # Upcase words with no vowels, e.g JPR Williams
354
+ name.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |match| $1.upcase }
355
+ # Except Ng
356
+ name.gsub!(/\b(NG)\b/i) { |match| $1.capitalize } # http://en.wikipedia.org/wiki/Ng
357
+
358
+ name
359
+ end
360
+
361
+ def parameterize string, args = {}
362
+ sep = args[:sep] || SLUG_DELIMITER
363
+ rfc3987 = args[:rfc3987] || false
364
+ filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
365
+
366
+ # First we unescape any pct-encoded characters. These might turn into
367
+ # things we want to alter for the slug, like whitespace (e.g. %20)
368
+ parameterized_string = URI.unescape(string)
369
+
370
+ # Then we change any whitespace into our separator character
371
+ parameterized_string.gsub!(/\s+/, sep)
372
+
373
+ # Then we strip any illegal characters out completely
374
+ parameterized_string.gsub!(filter, '')
375
+
376
+ # Make sure separators are not where they shouldn't be
377
+ unless sep.nil? || sep.empty?
378
+ re_sep = Regexp.escape(sep)
379
+ # No more than one of the separator in a row.
380
+ parameterized_string.gsub!(/#{re_sep}{2,}/, sep)
381
+ # Remove leading/trailing separator.
382
+ parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
383
+ end
384
+
385
+ # downcase if it's all latin
386
+ parameterized_string.downcase
387
+ end
388
+
389
+ #--------------------------------------------------------
390
+ # Constants
391
+ #--------------------------------------------------------
392
+
393
+ NONBREAKING_SPACE = "\u00a0"
394
+ ASCII_SPACE = "\u0020"
395
+ ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
396
+ SLUG_DELIMITER = '-'
397
+
398
+ # Constants for parameterizing Unicode strings for IRIs
399
+ #
400
+ # Allowed characters in an IRI segment are defined by RFC 3987
401
+ # (https://tools.ietf.org/html/rfc3987#section-2.2) as follows:
402
+ #
403
+ # isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
404
+ # / "@" )
405
+ # ; non-zero-length segment without any colon ":"
406
+ #
407
+ # iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
408
+ #
409
+ # pct-encoded = "%" HEXDIG HEXDIG
410
+ #
411
+ # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
412
+ # / "*" / "+" / "," / ";" / "="
413
+ #
414
+ # ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
415
+ # / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
416
+ # / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
417
+ # / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
418
+ # / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
419
+ # / %xD0000-DFFFD / %xE1000-EFFFD
420
+ #
421
+ # Note that we can't use Unicode code points above \uFFFF because of
422
+ # regex limitations, so we'll ignore ucschar above that point.
423
+ #
424
+ # We're using the most restrictive segment definition (isegment-nz-nc)
425
+ # to avoid any possible problems with the IRI that it one day might
426
+ # get placed in.
427
+ ALPHA = 'A-Za-z'
428
+ DIGIT = '0-9'
429
+ UCSCHAR = '\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
430
+ IUNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~#{UCSCHAR}"
431
+ SUBDELIMS = '!$&\'\(\)\*+,;='
432
+ ISEGMENT_NZ_NC = "#{IUNRESERVED}#{SUBDELIMS}@" # pct-encoded not needed
433
+ FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
434
+ FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
435
+
436
+ NAME_MODIFIERS = [
437
+ 'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lr]', 'D[ao]s', 'El', 'La', 'L[eo]',
438
+ 'V[ao]n', 'Of', 'St[\.]?'
439
+ ]
440
+
441
+ # These are the prefixes and suffixes we want to remove
442
+ # If you add to the list, you can use spaces and dots where appropriate
443
+ # Ensure any single letters are followed by a dot because we'll add one to the string
444
+ # during processing, e.g. "y Cía." should be "y. Cía."
445
+ ADFIXES = {
446
+ prefix: {
447
+ person: [
448
+ 'Baron', 'Baroness', 'Capt.', 'Captain', 'Col.', 'Colonel', 'Dame',
449
+ 'Doctor', 'Dr.', 'Judge', 'Justice', 'Lady', 'Lieut.', 'Lieutenant',
450
+ 'Lord', 'Madame', 'Major', 'Master', 'Matron', 'Messrs.', 'Mgr.',
451
+ 'Miss', 'Mister', 'Mlle.', 'Mme.', 'Mons.', 'Mr.', 'Mr. & Mrs.',
452
+ 'Mr. and Mrs.', 'Mrs.', 'Msgr.', 'Prof.', 'Professor', 'Rev.',
453
+ 'Reverend', 'Sir', 'Sister', 'The Hon.', 'The Lady.', 'The Lord',
454
+ 'The Rt. Hon.'
455
+ ],
456
+ organization: [
457
+ 'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
458
+ ],
459
+ before:'\\A', after:ADFIX_JOINERS
460
+ },
461
+ suffix: {
462
+ person: [
463
+ 'C.I.S.S.P.', 'B.Tech.', 'D.Phil.', 'B.Eng.', 'C.F.A.', 'D.B.E.', 'D.D.S.', 'Eng.D.', 'M.B.A.', 'M.B.E.',
464
+ 'M.E.P.', 'M.Eng.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'B.Ed.', 'B.Sc.', 'Ed.D.', 'LL.B.',
465
+ 'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'O.K.',
466
+ 'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
467
+ ],
468
+ organization: [
469
+ 'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
470
+ 'Cía. S. C. A.', 'y. Cía. S. C.', 'S.A. de C.V.', 'spol. s.r.o.', '(Pty.) Ltd.', '(Pvt.) Ltd.', 'A.D.S.I.Tz.',
471
+ 'S.p. z.o.o.', '(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.',
472
+ 'S.P.R.L.U.', 'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.',
473
+ 'Pty. Ltd.', 'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.',
474
+ 'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'I.L.L.C.',
475
+ 'K.G.a.A.', 'L.L.L.P.', 'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.',
476
+ 'P.L.L.C.', 'S. en C.', 'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.',
477
+ 'S.C.R.I.', 'S.C.R.L.', 'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.',
478
+ 'Part.G.', 'Sh.p.k.', '&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.',
479
+ 'F.I.E.', 'G.b.R.', 'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.',
480
+ 'L.L.P.', 'o.h.f.', 'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.',
481
+ 'S.C.S.', 'S.E.M.', 'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.',
482
+ 'V.O.F.', 'V.o.G.', 'v.o.s.', 'V.Z.W.', 'z.r.t.', 'А.А.Т.', 'Е.А.Д.', 'З.А.Т.', 'К.Д.А.', 'О.О.Д.', 'Т.А.А.',
483
+ '股份有限公司', 'Ap.S.', 'Corp.', 'ltda.', 'Sh.A.', 'st.G.', 'Ultd.', 'a.b.', 'A.D.', 'A.E.', 'A.G.', 'A.S.',
484
+ 'A.Ş.', 'A.y.', 'B.M.', 'b.t.', 'B.V.', 'C.A.', 'C.V.', 'd.d.', 'e.c.', 'E.E.', 'e.G.', 'E.I.', 'E.P.', 'E.T.',
485
+ 'E.U.', 'e.v.', 'G.K.', 'G.P.', 'h.f.', 'Inc.', 'K.D.', 'K.G.', 'K.K.', 'k.s.', 'k.v.', 'K.y.', 'L.C.', 'L.P.',
486
+ 'Ltd.', 'N.K.', 'N.L.', 'N.V.', 'O.E.', 'O.G.', 'O.Ü.', 'O.y.', 'P.C.', 'p.l.', 'Pty.', 'PUP.', 'Pvt.', 'r.t.',
487
+ 'S.A.', 'S.D.', 'S.E.', 's.f.', 'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.',
488
+ 'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
489
+ 'A/S', 'G/S', 'I/S', 'K/S', 'P/S'
490
+ ],
491
+ before:ADFIX_JOINERS, after:'\\z'
492
+ }
493
+ }
494
+
495
+ ADFIX_PATTERNS = {}
496
+
497
+ [:prefix, :suffix].each do |adfix_type|
498
+ patterns = {}
499
+ adfix = ADFIXES[adfix_type]
500
+
501
+ [:person, :organization].each do |contact_type|
502
+ with_optional_spaces = adfix[contact_type].map { |p| p.gsub(ASCII_SPACE,' *') }
503
+ pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
504
+ patterns[contact_type] = /#{adfix[:before]}\(*(?:#{pattern_string})\)*#{adfix[:after]}/i
505
+ end
506
+
507
+ ADFIX_PATTERNS[adfix_type] = patterns
508
+ end
509
+ end
@@ -0,0 +1,3 @@
1
+ class NameTamer
2
+ VERSION = "0.0.0"
3
+ end
@@ -0,0 +1,27 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'name_tamer/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'name-tamer'
7
+ spec.version = NameTamer::VERSION
8
+ spec.authors = ['Xenapto']
9
+ spec.email = ['developers@xenapto.com']
10
+ spec.description = %q{Useful methods for taming names}
11
+ spec.summary = %q{Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith}
12
+ spec.homepage = 'https://github.com/Xenapto/name-tamer'
13
+ spec.license = 'MIT'
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features|coverage)/})
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_runtime_dependency 'activesupport', '~> 3'
21
+
22
+ spec.add_development_dependency 'bundler', '~> 1'
23
+ spec.add_development_dependency 'rake', '~> 10'
24
+ spec.add_development_dependency 'rspec', '~> 2'
25
+ spec.add_development_dependency 'gem-release', '~> 0'
26
+ spec.add_development_dependency 'simplecov', '~> 0'
27
+ end
@@ -0,0 +1,166 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+ require 'name_tamer'
4
+
5
+ describe NameTamer do
6
+ let(:names) do
7
+ [
8
+ { n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
9
+ { n:'JOHN SMITH', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
10
+ { n:'john smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
11
+ { n:'Smith, John', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
12
+ { n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
13
+ { n:'Smith, John', nn:'John Smith', sn:'John Smith', s:'john-smith' },
14
+ { n:'John J Smith', t: :person, nn:'John J Smith', sn:'John Smith', s:'john-smith' },
15
+ { n:'John J. Smith', t: :person, nn:'John J. Smith', sn:'John Smith', s:'john-smith' },
16
+ { n:'SMITH, Mr John J.R.', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
17
+ { n:' SMITH, Mr John J. R. ', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
18
+ { n:'SMITH, Mr John J.R.', nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
19
+ { n:'Mr John J.R. SMITH JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
20
+ { n:'Mr John J.R. SMITH III,JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
21
+ { n:'Mr John J.R. SMITH JD', nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
22
+ { n:'Mr Jean-Michel SMITH JD', t: :person, nn:'Jean-Michel SMITH', sn:'Jean-Michel SMITH', s:'jean-michel-smith' },
23
+ { n:'Mr Jean Michel-SMITH JD', nn:'Jean Michel-SMITH', sn:'Jean Michel-SMITH', s:'jean-michel-smith' },
24
+ { n:'Dr Martha Lane Fox Ph.D', nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
25
+ { n:'Lane Fox Ph.D, Dr Martha', t: :person, nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
26
+ { n:'Baroness Lane-Fox of Lewisham', t: :person, nn:'Lane-Fox of Lewisham', sn:'Lane-Fox of Lewisham', s:'lane-fox-of-lewisham' },
27
+ { n:'MACDONALDS LLC', nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
28
+ { n:'MACDONALDS LLC', t: :organization, nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
29
+ { n:'macdonalds', t: :organization, nn:'macdonalds', sn:'macdonalds', s:'macdonalds' },
30
+ { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', t: :organization, nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
31
+ { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
32
+ { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
33
+ { n:'K.V.A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
34
+ { n:'K. V. A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
35
+ { n:'J.P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
36
+ { n:'J. P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
37
+ { n:'J P Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
38
+ { n:'JP Rangaswami', nn:'JP Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
39
+ { n:'Audrey fforbes', nn:'Audrey fforbes', sn:'Audrey fforbes', s:'audrey-fforbes' },
40
+ { n:'J. Arthur Rank', t: :person, nn:'J. Arthur Rank', sn:'Arthur Rank', s:'arthur-rank' },
41
+ { n:'PHILIP NG', t: :person, nn:'Philip Ng', sn:'Philip Ng', s:'philip-ng' },
42
+ { n:'Super R&D', nn:'Super R&D', sn:'Super R and D', s:'super-r-and-d' },
43
+ { n:'Harry Dean Stanton', t: :person, nn:'Harry Dean Stanton', sn:'Harry Stanton', s:'harry-stanton' },
44
+ { n:'Union Square Ventures', t: :organization, nn:'Union Square Ventures', sn:'Union Square Ventures', s:'union-square-ventures' },
45
+ { n:'J Arthur Rank Inc.', t: :organization, nn:'J Arthur Rank', sn:'J Arthur Rank', s:'j-arthur-rank' },
46
+ { n:'Jean VAN DER VELDE', t: :person, nn:'Jean VAN DER VELDE', sn:'Jean VAN DER VELDE', s:'jean-van-der-velde' },
47
+ { n:'Al Capone', t: :person, nn:'Al Capone', sn:'Al Capone', s:'al-capone' },
48
+ { n:'Fahd al-Saud', t: :person, nn:'Fahd al-Saud', sn:'Fahd al-Saud', s:'fahd-al-saud' },
49
+ { n:'Mehmet al Auouiby', t: :person, nn:'Mehmet al Auouiby', sn:'Mehmet al Auouiby', s:'mehmet-al-auouiby' },
50
+ { n:'Macquarie Bank', t: :organization, nn:'Macquarie Bank', sn:'Macquarie Bank', s:'macquarie-bank' },
51
+ { n:"COMMEDIA DELL'ARTE", t: :organization, nn:"Commedia dell'Arte", sn:"Commedia dell'Arte", s:'commedia-dellarte' },
52
+ { n:'Della Smith', t: :person, nn:'Della Smith', sn:'Della Smith', s:'della-smith' },
53
+ { n:'Antonio DELLA MONTEVERDE', nn:'Antonio DELLA MONTEVERDE', sn:'Antonio DELLA MONTEVERDE', s:'antonio-della-monteverde' },
54
+ { n:'Tony St Clair', t: :person, nn:'Tony St Clair', sn:'Tony St Clair', s:'tony-st-clair' },
55
+ { n:'Seamus O\'Malley', t: :person, nn:'Seamus O\'Malley', sn:'Seamus O\'Malley', s:'seamus-omalley' },
56
+ { n:'SeedCamp', t: :organization, nn:'SeedCamp', sn:'SeedCamp', s:'seedcamp' },
57
+ { n:'Peter Van Der Auwera', t: :person, nn:'Peter Van Der Auwera', sn:'Peter Van Der Auwera', s:'peter-van-der-auwera' },
58
+ { n:'VAN DER AUWERA, Peter', t: :person, nn:'Peter van der Auwera', sn:'Peter van der Auwera', s:'peter-van-der-auwera' },
59
+ { n:'Li Fan', t: :person, nn:'Li Fan', sn:'Li Fan', s:'li-fan' },
60
+ { n:'Fan Li', t: :person, nn:'Fan Li', sn:'Fan Li', s:'fan-li' },
61
+ { n:'Levi Strauss & Co.', nn:'Levi Strauss', sn:'Levi Strauss', s:'levi-strauss' },
62
+ { n:'Standard & Poor\'s', t: :organization, nn:'Standard & Poor\'s', sn:'Standard and Poor\'s', s:'standard-and-poors' },
63
+ { n:'I B M Services', t: :organization, nn:'I.B.M. Services', sn:'IBM Services', s:'ibm-services' },
64
+ { n:'Sean Park DDS', t: :person, nn:'Sean Park', sn:'Sean Park', s:'sean-park' },
65
+ { n:'SEAN MACLISE PARK', t: :person, nn:'Sean Maclise Park', sn:'Sean Park', s:'sean-park' },
66
+ { n:'AJ Hanna', t: :person, nn:'AJ Hanna', sn:'AJ Hanna', s:'aj-hanna' },
67
+ { n:'Free & Clear', t: :organization, nn:'Free & Clear', sn:'Free and Clear', s:'free-and-clear' },
68
+ { n:'Adam D\'ANGELO', t: :person, nn:'Adam D\'ANGELO', sn:'Adam D\'ANGELO', s:'adam-dangelo' },
69
+ { n:'MACKENZIE, Doug', t: :person, nn:'Doug Mackenzie', sn:'Doug Mackenzie', s:'doug-mackenzie' },
70
+ { n:'Up + Down', t: :organization, nn:'Up + Down', sn:'Up plus Down', s:'up-plus-down' },
71
+ { n:'San Francisco Ltd', t: :organization, nn:'San Francisco', sn:'San Francisco', s:'san-francisco' },
72
+ { n:'AT&T', t: :organization, nn:'At&T', sn:'At and T', s:'at-and-t' },
73
+ { n:'SMITH, John, Jr.', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
74
+ { n:'I Heart Movies', t: :organization, nn:'I Heart Movies', sn:'I Heart Movies', s:'i-heart-movies' },
75
+ { n:'Y Combinator', t: :organization, nn:'Y Combinator', sn:'Y Combinator', s:'y-combinator' },
76
+ { n:'Ben\'s 10 Hens', t: :organization, nn:'Ben\'s 10 Hens', sn:'Ben\'s 10 Hens', s:'bens-10-hens' },
77
+ { n:'Elazer Edelman, MD , PhD', t: :person, nn:'Elazer Edelman', sn:'Elazer Edelman', s:'elazer-edelman' },
78
+ { n:'Judith M. O\'Brien', t: :person, nn:'Judith M. O\'Brien', sn:'Judith O\'Brien', s:'judith-obrien' },
79
+ { n:'MORRISON, Van', t: :person, nn:'Van Morrison', sn:'Van Morrison', s:'van-morrison' },
80
+ { n:'i/o Ventures', t: :organization, nn:'i/o Ventures', sn:'i/o Ventures', s:'io-ventures' },
81
+ { n:'C T Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
82
+ { n:'C.T. Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
83
+ { n:'CT Corporation System', t: :person, nn:'CT Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
84
+ { n:'Corporation Service Company', t: :person, nn:'Corporation Service Company', sn:'Corporation Service Company', s:'corporation-service-company'},
85
+ { n:'Kurshuni,Inc.', t: :organization, nn:'Kurshuni', sn:'Kurshuni', s:'kurshuni' },
86
+ { n:'Cellular Inc-LLC', t: :organization, nn:'Cellular', sn:'Cellular', s:'cellular' },
87
+ { n:'Emtec (AZ) Limited', t: :organization, nn:'Emtec (AZ)', sn:'Emtec (AZ)', s:'emtec-az' },
88
+ { n:'Emtec (LLC) Limited', t: :organization, nn:'Emtec', sn:'Emtec', s:'emtec' },
89
+ { n:'Emtec (XYZ LLC) Limited', t: :organization, nn:'Emtec (XYZ)', sn:'Emtec (XYZ)', s:'emtec-xyz' },
90
+ { n:'Tao Ma', t: :person, nn:'Tao', sn:'Tao', s:'tao' }, # Unfortunate but we can't distinguish between Ma and M.A.
91
+ { n:'(Mr.) Courtney J. Miller, J.D., LL.M.', t: :person, nn:'Courtney J. Miller', sn:'Courtney Miller', s:'courtney-miller' },
92
+ { n:'(Mr Woo) The Window Cleaner', t: :person, nn:'(Woo) The Window Cleaner', sn:'(Woo) Cleaner', s:'woo-cleaner'},
93
+ { n:'DOMINIC MACMURDO', t: :person, nn:'Dominic MacMurdo', sn:'Dominic MacMurdo', s:'dominic-macmurdo' },
94
+ { n:'DOMINIC MACEDO', t: :person, nn:'Dominic Macedo', sn:'Dominic Macedo', s:'dominic-macedo' },
95
+ { n:'DOMINIC MACDONALD', t: :person, nn:'Dominic MacDonald', sn:'Dominic MacDonald', s:'dominic-macdonald' },
96
+ { n:'AGUSTA DO ROMEIRO', t: :person, nn:'Agusta do Romeiro', sn:'Agusta do Romeiro', s:'agusta-do-romeiro' },
97
+ { n:'CARLOS DOS SANTOS', t: :person, nn:'Carlos dos Santos', sn:'Carlos dos Santos', s:'carlos-dos-santos' },
98
+ { n:'유정 신', t: :organization, nn:'유정 신', sn:'유정 신', s:'유정-신' },
99
+ { n:'xxx%52zzz', t: :organization, nn:'xxx%52zzz', sn:'xxx%52zzz', s:'xxxrzzz' },
100
+ { n:'Евгений Болотнов', t: :organization, nn:'Евгений Болотнов', sn:'Евгений Болотнов', s:'Евгений-Болотнов' },
101
+ { n:'김태성', t: :organization, nn:'김태성', sn:'김태성', s:'김태성' },
102
+ { n:'ゴルフスタジアム', t: :organization, nn:'ゴルフスタジアム', sn:'ゴルフスタジアム', s:'ゴルフスタジアム' },
103
+ { n:'我摘', t: :organization, nn:'我摘', sn:'我摘', s:'我摘' },
104
+ { n:'Καρατζάς Στέφανος', t: :organization, nn:'Καρατζάς Στέφανος', sn:'Καρατζάς Στέφανος', s:'Καρατζάς-Στέφανος' },
105
+ { n:'โชติวัน วัฒนลาภ', t: :organization, nn:'โชติวัน วัฒนลาภ', sn:'โชติวัน วัฒนลาภ', s:'โชติวัน-วัฒนลาภ' },
106
+ { n:'張 續寶', t: :organization, nn:'張 續寶', sn:'張 續寶', s:'張-續寶' },
107
+ { n:'Юрий Гайдук', t: :organization, nn:'Юрий Гайдук', sn:'Юрий Гайдук', s:'Юрий-Гайдук' },
108
+ { n:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', t: :organization, nn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', sn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', s:'☣-©Ʀѱ∏†ʘ-Σɏ§†℈Ϻ-☣' },
109
+ { n:'♠ KlasikB0i ♠', t: :organization, nn:'♠ KlasikB0i ♠', sn:'♠ KlasikB0i ♠', s:'♠-klasikb0i-♠' },
110
+ { n:'* Shorusan *', t: :organization, nn:'* Shorusan *', sn:'* Shorusan *', s:'shorusan' },
111
+ { n:'项目谷', t: :organization, nn:'项目谷', sn:'项目谷', s:'项目谷' },
112
+ { n:'ООО "Инновационные полимерные адгезивы"', t: :organization, nn:'ООО "Инновационные полимерные адгезивы"', sn:'ООО "Инновационные полимерные адгезивы"', s:'ООО-Инновационные-полимерные-адгезивы' },
113
+ { n:'عبدالله ...', t: :organization, nn:'عبدالله ...', sn:'عبدالله ...', s:'عبدالله' },
114
+ { n:'กมลชนก ทิศไธสง', t: :organization, nn:'กมลชนก ทิศไธสง', sn:'กมลชนก ทิศไธสง', s:'กมลชนก-ทิศไธสง' },
115
+ { n:'יוֹ אָב', t: :organization, nn:'יוֹ אָב', sn:'יוֹ אָב', s:'יוֹ-אָב' },
116
+ { n:'יגאל נימני', t: :organization, nn:'יגאל נימני', sn:'יגאל נימני', s:'יגאל-נימני' },
117
+ { n:'ניסים דניאלי', t: :organization, nn:'ניסים דניאלי', sn:'ניסים דניאלי', s:'ניסים-דניאלי' },
118
+ { n:'مساء الخير', t: :organization, nn:'مساء الخير', sn:'مساء الخير', s:'مساء-الخير' },
119
+ { n:'محمود ياسر', t: :organization, nn:'محمود ياسر', sn:'محمود ياسر', s:'محمود-ياسر' },
120
+ { n:'קובי ביטר', t: :organization, nn:'קובי ביטר', sn:'קובי ביטר', s:'קובי-ביטר' },
121
+ { n:'الملاك الحارس', t: :organization, nn:'الملاك الحارس', sn:'الملاك الحارس', s:'الملاك-الحارس' },
122
+ { n:'কবির হাসান', t: :organization, nn:'কবির হাসান', sn:'কবির হাসান', s:'কবির-হাসান' }
123
+ ]
124
+ end
125
+
126
+ it "makes a slug from #{name}" do
127
+ names.each do |name_data|
128
+ name = name_data[:n]
129
+ NameTamer[name, contact_type:name_data[:t]].slug.should == name_data[:s]
130
+ end
131
+ end
132
+
133
+ it "makes a nice name from #{name}" do
134
+ names.each do |name_data|
135
+ name = name_data[:n]
136
+ nice_name = NameTamer[name, contact_type:name_data[:t]].nice_name
137
+
138
+ nice_name.should == name_data[:nn]
139
+ end
140
+ end
141
+
142
+ it "makes a searchable name from #{name}" do
143
+ names.each do |name_data|
144
+ name = name_data[:n]
145
+ NameTamer[name, contact_type:name_data[:t]].search_name.should == name_data[:sn]
146
+ end
147
+ end
148
+ end
149
+
150
+ describe 'contact type inference' do
151
+ it 'infers that "Mr. John Smith" is a person' do
152
+ NameTamer['Mr. John Smith'].contact_type.should eq(:person)
153
+ end
154
+
155
+ it 'infers that "Di Doo Doo d.o.o." is an organization' do
156
+ NameTamer['Di Doo Doo d.o.o.'].contact_type.should eq(:organization)
157
+ end
158
+
159
+ it 'infers that "DiDooDoo" is an organization' do
160
+ NameTamer['DiDooDoo'].contact_type.should eq(:organization)
161
+ end
162
+
163
+ it 'infers that "John Smith" is a person' do
164
+ NameTamer['John Smith'].contact_type.should eq(:person)
165
+ end
166
+ end
@@ -0,0 +1,16 @@
1
+ # Initialize simplecov for coverage report.
2
+ require 'simplecov'
3
+ SimpleCov.start
4
+
5
+ RSpec.configure do |config|
6
+ # Run specs in random order to surface order dependencies. If you find an
7
+ # order dependency and want to debug it, you can fix the order by providing
8
+ # the seed, which is printed after each run.
9
+ # --seed 1234
10
+ config.order = "random"
11
+
12
+ # Manually-added
13
+ config.color_enabled = true
14
+ config.tty = true
15
+ config.formatter = :documentation
16
+ end
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: name-tamer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Xenapto
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '10'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '10'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '2'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: gem-release
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ~>
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Useful methods for taming names
98
+ email:
99
+ - developers@xenapto.com
100
+ executables:
101
+ - name-tamer
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - .env
106
+ - .gitignore
107
+ - .ruby-version
108
+ - Gemfile
109
+ - Gemfile.lock
110
+ - LICENSE
111
+ - README.md
112
+ - Rakefile
113
+ - bin/name-tamer
114
+ - lib/name_tamer.rb
115
+ - lib/name_tamer/version.rb
116
+ - name-tamer.gemspec
117
+ - spec/name_tamer_spec.rb
118
+ - spec/spec_helper.rb
119
+ homepage: https://github.com/Xenapto/name-tamer
120
+ licenses:
121
+ - MIT
122
+ metadata: {}
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ required_rubygems_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ! '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ requirements: []
138
+ rubyforge_project:
139
+ rubygems_version: 2.2.2
140
+ signing_key:
141
+ specification_version: 4
142
+ summary: ! 'Example: NameTamer[''Mr. John Q. Smith III, MD''].simple_name # => John
143
+ Smith'
144
+ test_files:
145
+ - spec/name_tamer_spec.rb
146
+ - spec/spec_helper.rb