name-tamer 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.env +1 -0
- data/.gitignore +24 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +42 -0
- data/LICENSE +21 -0
- data/README.md +48 -0
- data/Rakefile +1 -0
- data/bin/name-tamer +3 -0
- data/lib/name_tamer.rb +509 -0
- data/lib/name_tamer/version.rb +3 -0
- data/name-tamer.gemspec +27 -0
- data/spec/name_tamer_spec.rb +166 -0
- data/spec/spec_helper.rb +16 -0
- metadata +146 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YzRiZjg0NGY3ODliMzVjZWM0ZDBhMjExYjNjODMyMmJmZTExNDRiMA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZmZmODdkOGIwYzBlN2I3MjM3OTVjNzBhZGM0ZjQ2MTk0MmQ3MmIyMg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YzhhNzUxMTY5OWQ4YjkxYzUxNTMxNTBhYjNkZGMyMDE1MDFmNDNkMmY2Y2Uy
|
10
|
+
ZWVmODhiMGM3OTBjNWE3ZmI0NGI0Yjk4MTRiZWYyMWRkZTY0NjBhNzI5NWNm
|
11
|
+
Nzk4ODliZjU3OTc3YzY4MzdkYWYyYzU4ODE4ZGJhNjkwNmE1MDU=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YWNiMjZhODI5NDk2MmI1Y2E4MWUyYTg4NTk0M2ExMGFkNzhhZjdlZDRiYzJm
|
14
|
+
ZDI0ZjNiYmJlMWNiYjRmOTYxMGU5MTdlMTZjYzA0YTZjNjdkYTliOTc1YWRj
|
15
|
+
ZmM1ZWYwZTA5ZjQ5NDc2OGQ4NzA1NTgwYzM0MTNlYTFjMzgzMDc=
|
data/.env
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
PATH=/home/build/.rvm/gems/ruby-2.1.1/bin:/home/build/.rvm/gems/ruby-2.1.1@global/bin:/home/build/.rvm/rubies/ruby-2.1.1/bin:/home/build/.rvm/bin:/usr/local/heroku/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/home/build/.rvm/gems/ruby-2.1.1@global/bin/bundle
|
data/.gitignore
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
*.rbc
|
2
|
+
capybara-*.html
|
3
|
+
.rspec
|
4
|
+
/log
|
5
|
+
/tmp
|
6
|
+
/db/*.sqlite3
|
7
|
+
/public/system
|
8
|
+
/coverage/
|
9
|
+
/spec/tmp
|
10
|
+
**.orig
|
11
|
+
rerun.txt
|
12
|
+
pickle-email-*.html
|
13
|
+
config/initializers/secret_token.rb
|
14
|
+
config/secrets.yml
|
15
|
+
|
16
|
+
## Environment normalisation:
|
17
|
+
/.bundle
|
18
|
+
/vendor/bundle
|
19
|
+
|
20
|
+
# these should all be checked in to normalise the environment:
|
21
|
+
# Gemfile.lock, .ruby-version, .ruby-gemset
|
22
|
+
|
23
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
24
|
+
.rvmrc
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.2
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
name-tamer (0.0.0)
|
5
|
+
activesupport (~> 3)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activesupport (3.2.13)
|
11
|
+
i18n (= 0.6.1)
|
12
|
+
multi_json (~> 1.0)
|
13
|
+
diff-lcs (1.2.5)
|
14
|
+
docile (1.1.3)
|
15
|
+
gem-release (0.7.3)
|
16
|
+
i18n (0.6.1)
|
17
|
+
multi_json (1.10.1)
|
18
|
+
rake (10.3.2)
|
19
|
+
rspec (2.14.1)
|
20
|
+
rspec-core (~> 2.14.0)
|
21
|
+
rspec-expectations (~> 2.14.0)
|
22
|
+
rspec-mocks (~> 2.14.0)
|
23
|
+
rspec-core (2.14.8)
|
24
|
+
rspec-expectations (2.14.5)
|
25
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
26
|
+
rspec-mocks (2.14.6)
|
27
|
+
simplecov (0.8.2)
|
28
|
+
docile (~> 1.1.0)
|
29
|
+
multi_json
|
30
|
+
simplecov-html (~> 0.8.0)
|
31
|
+
simplecov-html (0.8.0)
|
32
|
+
|
33
|
+
PLATFORMS
|
34
|
+
ruby
|
35
|
+
|
36
|
+
DEPENDENCIES
|
37
|
+
bundler (~> 1)
|
38
|
+
gem-release (~> 0)
|
39
|
+
name-tamer!
|
40
|
+
rake (~> 10)
|
41
|
+
rspec (~> 2)
|
42
|
+
simplecov (~> 0)
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Xenapto
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# NameTamer
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/name-tamer.png)](http://badge.fury.io/rb/name-tamer)
|
4
|
+
[![Code Climate](https://codeclimate.com/github/Xenapto/name-tamer.png)](https://codeclimate.com/github/Xenapto/name-tamer)
|
5
|
+
[![Dependency Status](https://gemnasium.com/Xenapto/name-tamer.png)](https://gemnasium.com/Xenapto/name-tamer)
|
6
|
+
![build status](https://circleci.com/gh/Xenapto/name-tamer.png?circle-token=dd3a51864d33f6506b18a355bc901b90c0df3b3b)
|
7
|
+
|
8
|
+
NameTamer: deploying Rails projects to Ubuntu using Foreman
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'name-tamer'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install name-tamer
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
Examples:
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith
|
30
|
+
```
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
name_tamer = NameTamer['Mr. John Q. Smith III, MD']
|
34
|
+
name_tamer.slug # => john-smith
|
35
|
+
name_tamer.nice_name # => John Q. Smith
|
36
|
+
```
|
37
|
+
|
38
|
+
## Contributing
|
39
|
+
|
40
|
+
1. Fork it
|
41
|
+
1. Create your feature branch (`git checkout -b my-new-feature`)
|
42
|
+
1. Commit your changes (`git commit -am 'Add some feature'`)
|
43
|
+
1. Push to the branch (`git push origin my-new-feature`)
|
44
|
+
1. Create new Pull Request
|
45
|
+
|
46
|
+
## Acknowledgements
|
47
|
+
|
48
|
+
1. Thanks to Ryan Bigg for the guide to making your first gem https://github.com/radar/guides/blob/master/gem-development.md
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/name-tamer
ADDED
data/lib/name_tamer.rb
ADDED
@@ -0,0 +1,509 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# References:
|
4
|
+
# http://www.w3.org/International/questions/qa-personal-names
|
5
|
+
# https://github.com/berkmancenter/namae
|
6
|
+
# https://github.com/mericson
|
7
|
+
# http://en.wikipedia.org/wiki/Types_of_business_entity
|
8
|
+
# http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA)
|
9
|
+
# http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
|
10
|
+
# http://en.wikipedia.org/wiki/Nobiliary_particle
|
11
|
+
# http://en.wikipedia.org/wiki/Spanish_naming_customs
|
12
|
+
# http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
|
13
|
+
require 'active_support/core_ext/object'
|
14
|
+
|
15
|
+
class NameTamer
|
16
|
+
attr_reader :name, :contact_type
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def [](name, args = {})
|
20
|
+
new name, args
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def nice_name
|
25
|
+
if @nice_name.nil?
|
26
|
+
@nice_name = @name.dup # Start with the name we've received
|
27
|
+
|
28
|
+
tidy_spacing # " John Smith " -> "John Smith"
|
29
|
+
consolidate_initials # "I. B. M." -> "I.B.M."
|
30
|
+
remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
|
31
|
+
fixup_last_name_first # "Smith, John" -> "John Smith"
|
32
|
+
fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
|
33
|
+
remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
|
34
|
+
name_wrangle # proper name case and non-breaking spaces
|
35
|
+
use_nonbreaking_spaces_in_compound_names
|
36
|
+
end
|
37
|
+
|
38
|
+
@nice_name
|
39
|
+
end
|
40
|
+
|
41
|
+
def search_name
|
42
|
+
if @search_name.nil?
|
43
|
+
@search_name = nice_name.dup # Start with nice name
|
44
|
+
|
45
|
+
remove_initials # "John Q. Doe" -> "John Doe"
|
46
|
+
remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
|
47
|
+
remove_dots_from_abbreviations # "J.P.R. Williams" -> "JPR Williams"
|
48
|
+
standardize_words # "B&Q Intl" -> "B and Q International"
|
49
|
+
|
50
|
+
@search_name = ensure_whitespace_is_ascii_space @search_name
|
51
|
+
end
|
52
|
+
|
53
|
+
@search_name
|
54
|
+
end
|
55
|
+
|
56
|
+
def slug
|
57
|
+
if @slug.nil?
|
58
|
+
@slug = search_name.dup # Start with search name
|
59
|
+
slugify # "John Doe" -> "john-doe"
|
60
|
+
end
|
61
|
+
|
62
|
+
@slug
|
63
|
+
end
|
64
|
+
|
65
|
+
def contact_type
|
66
|
+
nice_name # make sure we've done the bit which infers contact_type
|
67
|
+
contact_type_best_effort
|
68
|
+
end
|
69
|
+
|
70
|
+
=begin These lines aren't used and aren't covered by specs
|
71
|
+
def name=(new_name)
|
72
|
+
initialize new_name, :contact_type => @contact_type
|
73
|
+
end
|
74
|
+
|
75
|
+
def contact_type=(new_contact_type)
|
76
|
+
initialize @name, :contact_type => new_contact_type
|
77
|
+
end
|
78
|
+
|
79
|
+
def to_hash
|
80
|
+
{
|
81
|
+
name: @name,
|
82
|
+
nice_name: @nice_name,
|
83
|
+
search_name: @search_name,
|
84
|
+
slug: @slug,
|
85
|
+
contact_type: @contact_type,
|
86
|
+
last_name: @last_name,
|
87
|
+
remainder: @remainder,
|
88
|
+
adfix_found: @adfix_found
|
89
|
+
}
|
90
|
+
end
|
91
|
+
=end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
#--------------------------------------------------------
|
96
|
+
# Tidy up the name we've received
|
97
|
+
#--------------------------------------------------------
|
98
|
+
|
99
|
+
def tidy_spacing
|
100
|
+
@nice_name.gsub!(/,\s*/, ', ') # Ensure commas have exactly one space after them
|
101
|
+
@nice_name.strip! # remove leading & trailing whitespace
|
102
|
+
|
103
|
+
@nice_name = ensure_whitespace_is_ascii_space @nice_name
|
104
|
+
end
|
105
|
+
|
106
|
+
# Remove spaces from groups of initials
|
107
|
+
def consolidate_initials
|
108
|
+
@nice_name.gsub!(/\b([a-z])\.* (?=[a-z][\. ])/i) { |match| "#{$1}." } # Remove spaces from initial groups
|
109
|
+
@nice_name.gsub!(/\b([a-z](?:\.[a-z])+)\.?(?= )/i) { |match| "#{$1}." } # Ensure each group ends with a dot
|
110
|
+
end
|
111
|
+
|
112
|
+
# An adfix is either a prefix or a suffix
|
113
|
+
def remove_adfixes
|
114
|
+
if @last_name.nil?
|
115
|
+
# Our name is still in one part, not two
|
116
|
+
begin
|
117
|
+
@nice_name = remove_outermost_adfix(:suffix, @nice_name)
|
118
|
+
end while @adfix_found
|
119
|
+
|
120
|
+
begin
|
121
|
+
@nice_name = remove_outermost_adfix(:prefix, @nice_name)
|
122
|
+
end while @adfix_found
|
123
|
+
else
|
124
|
+
# Our name is currently in two halves
|
125
|
+
begin
|
126
|
+
@last_name = remove_outermost_adfix(:suffix, @last_name)
|
127
|
+
end while @adfix_found
|
128
|
+
|
129
|
+
begin
|
130
|
+
@remainder = remove_outermost_adfix(:prefix, @remainder)
|
131
|
+
end while @adfix_found
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Names in the form "Smith, John" need to be turned around to "John Smith"
|
136
|
+
def fixup_last_name_first
|
137
|
+
unless @contact_type == :organization
|
138
|
+
parts = @nice_name.split ', '
|
139
|
+
|
140
|
+
if parts.count == 2
|
141
|
+
@last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
|
142
|
+
@remainder = parts[1]
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# Sometimes we end up with mismatched braces after adfix stripping
|
148
|
+
# e.g. "Ceres (Ceres Holdings LLC)" -> "Ceres (Ceres Holdings"
|
149
|
+
def fixup_mismatched_braces
|
150
|
+
left_brace_count = @nice_name.count '('
|
151
|
+
right_brace_count = @nice_name.count ')'
|
152
|
+
|
153
|
+
if left_brace_count > right_brace_count
|
154
|
+
@nice_name += ')'
|
155
|
+
elsif left_brace_count < right_brace_count
|
156
|
+
@nice_name = '(' + @nice_name
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def name_wrangle
|
161
|
+
# Fix case if all caps or all lowercase
|
162
|
+
if @last_name.nil?
|
163
|
+
lowercase = @nice_name.downcase
|
164
|
+
uppercase = @nice_name.upcase
|
165
|
+
|
166
|
+
# Some companies like to be all lowercase so don't mess with them
|
167
|
+
@nice_name = name_case(lowercase) if @nice_name == uppercase || ( @nice_name == lowercase && @contact_type != :organization )
|
168
|
+
else
|
169
|
+
lowercase = @last_name.downcase
|
170
|
+
uppercase = @last_name.upcase
|
171
|
+
@last_name = name_case(lowercase) if @last_name == uppercase || @last_name == lowercase
|
172
|
+
|
173
|
+
@nice_name = "#{@remainder} #{@last_name}"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Conjoin compound names with non-breaking spaces
|
178
|
+
def use_nonbreaking_spaces_in_compound_names
|
179
|
+
# Fix known last names that have spaces (not hyphens!)
|
180
|
+
[
|
181
|
+
'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore',
|
182
|
+
'Holmes à Court', 'Holmes a Court', 'Baron Cohen',
|
183
|
+
'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
|
184
|
+
].each do |compound_name|
|
185
|
+
@nice_name.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
|
186
|
+
end
|
187
|
+
|
188
|
+
NAME_MODIFIERS.each do |modifier|
|
189
|
+
@nice_name.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |match| "#{$1}#{NONBREAKING_SPACE}" }
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
#--------------------------------------------------------
|
194
|
+
# Make search name from nice name
|
195
|
+
#--------------------------------------------------------
|
196
|
+
|
197
|
+
# Remove initials from personal names unless they are the only identifier.
|
198
|
+
# i.e. only remove initials if there's also a proper name there
|
199
|
+
def remove_initials
|
200
|
+
if @contact_type == :person
|
201
|
+
name = @search_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
|
202
|
+
|
203
|
+
# If the name still has at least one space we're OK
|
204
|
+
@search_name = name if name.include?(ASCII_SPACE)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def remove_middle_names
|
209
|
+
if @contact_type == :person
|
210
|
+
parts = @search_name.split
|
211
|
+
@search_name = "#{parts[0]} #{parts[-1]}" if parts.count > 2
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def remove_dots_from_abbreviations
|
216
|
+
@search_name.gsub!(/\b([a-z])\./i) { |match| $1 }
|
217
|
+
end
|
218
|
+
|
219
|
+
def standardize_words
|
220
|
+
@search_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
|
221
|
+
@search_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
|
222
|
+
@search_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
|
223
|
+
end
|
224
|
+
|
225
|
+
#--------------------------------------------------------
|
226
|
+
# Make slug from search name
|
227
|
+
#--------------------------------------------------------
|
228
|
+
|
229
|
+
def slugify
|
230
|
+
# Inflector::parameterize just gives up with non-latin characters so...
|
231
|
+
#@slug = @slug.parameterize # Can't use this
|
232
|
+
|
233
|
+
# Instead we'll do it ourselves
|
234
|
+
@slug = parameterize @slug
|
235
|
+
end
|
236
|
+
|
237
|
+
#--------------------------------------------------------
|
238
|
+
# Initialization and utilities
|
239
|
+
#--------------------------------------------------------
|
240
|
+
|
241
|
+
def initialize(name, args = {})
|
242
|
+
@name = name || ''
|
243
|
+
@contact_type = args[:contact_type].to_sym unless args[:contact_type].nil?
|
244
|
+
|
245
|
+
@nice_name = nil
|
246
|
+
@search_name = nil
|
247
|
+
@slug = nil
|
248
|
+
|
249
|
+
@last_name = nil
|
250
|
+
@remainder = nil
|
251
|
+
|
252
|
+
@adfix_found = false
|
253
|
+
end
|
254
|
+
|
255
|
+
def set_contact_type contact_type
|
256
|
+
contact_type_sym = contact_type.to_sym
|
257
|
+
puts "Changing contact type of #{@name} from #{@contact_type} to #{contact_type}".red unless @contact_type.nil? || @contact_type == contact_type_sym
|
258
|
+
@contact_type = contact_type_sym
|
259
|
+
end
|
260
|
+
|
261
|
+
# If we don't know the contact type, what's our best guess?
|
262
|
+
def contact_type_best_effort
|
263
|
+
if @contact_type
|
264
|
+
@contact_type
|
265
|
+
else
|
266
|
+
# If it's just one word we'll assume organization.
|
267
|
+
# If more then we'll assume a person
|
268
|
+
@name.include?(ASCII_SPACE) ? :person : :organization
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def ensure_whitespace_is_ascii_space string
|
273
|
+
string.gsub(/[[:space:]]+/, ASCII_SPACE) # /\s/ doesn't match Unicode whitespace in Ruby 1.9.3
|
274
|
+
end
|
275
|
+
|
276
|
+
# We pass to this routine either prefixes or suffixes
|
277
|
+
def remove_outermost_adfix adfix_type, name_part
|
278
|
+
adfixes = ADFIX_PATTERNS[adfix_type]
|
279
|
+
contact_type = contact_type_best_effort
|
280
|
+
parts = name_part.partition adfixes[contact_type]
|
281
|
+
@adfix_found = parts[1].present?
|
282
|
+
|
283
|
+
# If the contact type is indeterminate and we didn't find a diagnostic adfix
|
284
|
+
# for a person then try again for an organization
|
285
|
+
if @contact_type.nil?
|
286
|
+
unless @adfix_found
|
287
|
+
contact_type = :organization
|
288
|
+
parts = name_part.partition adfixes[contact_type]
|
289
|
+
@adfix_found = parts[1].present?
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
if @adfix_found
|
294
|
+
# If we've found a diagnostic adfix then set the contact type
|
295
|
+
set_contact_type contact_type
|
296
|
+
|
297
|
+
# The remainder of the name will be in parts[0] or parts[2] depending
|
298
|
+
# on whether this is a prefix or a suffix.
|
299
|
+
# We'll also remove any trailing commas we've exposed.
|
300
|
+
result = (parts[0] + parts[2]).gsub(/\s*,\s*$/, '')
|
301
|
+
else
|
302
|
+
result = name_part
|
303
|
+
end
|
304
|
+
|
305
|
+
result
|
306
|
+
end
|
307
|
+
|
308
|
+
# Original Version of NameCase:
|
309
|
+
# Copyright (c) Mark Summerfield 1998-2008. All Rights Reserved
|
310
|
+
# This module may be used/distributed/modified under the same terms as Perl itself
|
311
|
+
# http://dev.perl.org/licenses/ (GPL)
|
312
|
+
#
|
313
|
+
# Ruby Version:
|
314
|
+
# Copyright (c) Aaron Patterson 2006
|
315
|
+
# NameCase is distributed under the GPL license.
|
316
|
+
#
|
317
|
+
# Substantially modified for Xendata
|
318
|
+
# Improved in several areas, also now adds non-breaking spaces for
|
319
|
+
# compound names like "van der Pump"
|
320
|
+
def name_case lowercase
|
321
|
+
name = lowercase # We assume the name is passed already downcased
|
322
|
+
name.gsub!(/\b\w/) { |first| first.upcase }
|
323
|
+
name.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
|
324
|
+
|
325
|
+
# Our list of terminal characters that indicate a non-celtic name used
|
326
|
+
# to include o but we removed it because of MacMurdo.
|
327
|
+
if name =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ or name =~ /\bMc/
|
328
|
+
name.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |match| $1 + $2.capitalize }
|
329
|
+
|
330
|
+
# Fix Mac exceptions
|
331
|
+
[
|
332
|
+
'MacEdo', 'MacEvicius', 'MacHado', 'MacHar', 'MacHin', 'MacHlin', 'MacIas', 'MacIulis', 'MacKie', 'MacKle',
|
333
|
+
'MacKlin', 'MacKmin', 'MacKmurdo', 'MacQuarie', 'MacLise', 'MacKenzie'
|
334
|
+
].each { |mac_name| name.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
|
335
|
+
end
|
336
|
+
|
337
|
+
# Fix ff wierdybonks
|
338
|
+
[
|
339
|
+
'Fforbes', 'Fforde', 'Ffinch', 'Ffrench', 'Ffoulkes'
|
340
|
+
].each { |ff_name| name.gsub!(ff_name,ff_name.downcase) }
|
341
|
+
|
342
|
+
# Fixes for name modifiers followed by space
|
343
|
+
# Also replaces spaces with non-breaking spaces
|
344
|
+
NAME_MODIFIERS.each do |modifier|
|
345
|
+
name.gsub!(/((?:[[:space:]]|^)#{modifier})(\s+|-)/) { |match| "#{$1.rstrip.downcase}#{$2.tr(ASCII_SPACE, NONBREAKING_SPACE)}" }
|
346
|
+
end
|
347
|
+
|
348
|
+
# Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
|
349
|
+
['Dell', 'D'].each do |modifier|
|
350
|
+
name.gsub!(/(.#{modifier}')(\w)/) { |match| "#{$1.rstrip.downcase}#{$2}" }
|
351
|
+
end
|
352
|
+
|
353
|
+
# Upcase words with no vowels, e.g JPR Williams
|
354
|
+
name.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |match| $1.upcase }
|
355
|
+
# Except Ng
|
356
|
+
name.gsub!(/\b(NG)\b/i) { |match| $1.capitalize } # http://en.wikipedia.org/wiki/Ng
|
357
|
+
|
358
|
+
name
|
359
|
+
end
|
360
|
+
|
361
|
+
def parameterize string, args = {}
|
362
|
+
sep = args[:sep] || SLUG_DELIMITER
|
363
|
+
rfc3987 = args[:rfc3987] || false
|
364
|
+
filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
365
|
+
|
366
|
+
# First we unescape any pct-encoded characters. These might turn into
|
367
|
+
# things we want to alter for the slug, like whitespace (e.g. %20)
|
368
|
+
parameterized_string = URI.unescape(string)
|
369
|
+
|
370
|
+
# Then we change any whitespace into our separator character
|
371
|
+
parameterized_string.gsub!(/\s+/, sep)
|
372
|
+
|
373
|
+
# Then we strip any illegal characters out completely
|
374
|
+
parameterized_string.gsub!(filter, '')
|
375
|
+
|
376
|
+
# Make sure separators are not where they shouldn't be
|
377
|
+
unless sep.nil? || sep.empty?
|
378
|
+
re_sep = Regexp.escape(sep)
|
379
|
+
# No more than one of the separator in a row.
|
380
|
+
parameterized_string.gsub!(/#{re_sep}{2,}/, sep)
|
381
|
+
# Remove leading/trailing separator.
|
382
|
+
parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
|
383
|
+
end
|
384
|
+
|
385
|
+
# downcase if it's all latin
|
386
|
+
parameterized_string.downcase
|
387
|
+
end
|
388
|
+
|
389
|
+
#--------------------------------------------------------
|
390
|
+
# Constants
|
391
|
+
#--------------------------------------------------------
|
392
|
+
|
393
|
+
NONBREAKING_SPACE = "\u00a0"
|
394
|
+
ASCII_SPACE = "\u0020"
|
395
|
+
ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
|
396
|
+
SLUG_DELIMITER = '-'
|
397
|
+
|
398
|
+
# Constants for parameterizing Unicode strings for IRIs
|
399
|
+
#
|
400
|
+
# Allowed characters in an IRI segment are defined by RFC 3987
|
401
|
+
# (https://tools.ietf.org/html/rfc3987#section-2.2) as follows:
|
402
|
+
#
|
403
|
+
# isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
|
404
|
+
# / "@" )
|
405
|
+
# ; non-zero-length segment without any colon ":"
|
406
|
+
#
|
407
|
+
# iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
|
408
|
+
#
|
409
|
+
# pct-encoded = "%" HEXDIG HEXDIG
|
410
|
+
#
|
411
|
+
# sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
412
|
+
# / "*" / "+" / "," / ";" / "="
|
413
|
+
#
|
414
|
+
# ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
|
415
|
+
# / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
|
416
|
+
# / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
|
417
|
+
# / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
|
418
|
+
# / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
|
419
|
+
# / %xD0000-DFFFD / %xE1000-EFFFD
|
420
|
+
#
|
421
|
+
# Note that we can't use Unicode code points above \uFFFF because of
|
422
|
+
# regex limitations, so we'll ignore ucschar above that point.
|
423
|
+
#
|
424
|
+
# We're using the most restrictive segment definition (isegment-nz-nc)
|
425
|
+
# to avoid any possible problems with the IRI that it one day might
|
426
|
+
# get placed in.
|
427
|
+
ALPHA = 'A-Za-z'
|
428
|
+
DIGIT = '0-9'
|
429
|
+
UCSCHAR = '\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
|
430
|
+
IUNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~#{UCSCHAR}"
|
431
|
+
SUBDELIMS = '!$&\'\(\)\*+,;='
|
432
|
+
ISEGMENT_NZ_NC = "#{IUNRESERVED}#{SUBDELIMS}@" # pct-encoded not needed
|
433
|
+
FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
|
434
|
+
FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
|
435
|
+
|
436
|
+
NAME_MODIFIERS = [
|
437
|
+
'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lr]', 'D[ao]s', 'El', 'La', 'L[eo]',
|
438
|
+
'V[ao]n', 'Of', 'St[\.]?'
|
439
|
+
]
|
440
|
+
|
441
|
+
# These are the prefixes and suffixes we want to remove
|
442
|
+
# If you add to the list, you can use spaces and dots where appropriate
|
443
|
+
# Ensure any single letters are followed by a dot because we'll add one to the string
|
444
|
+
# during processing, e.g. "y Cía." should be "y. Cía."
|
445
|
+
ADFIXES = {
|
446
|
+
prefix: {
|
447
|
+
person: [
|
448
|
+
'Baron', 'Baroness', 'Capt.', 'Captain', 'Col.', 'Colonel', 'Dame',
|
449
|
+
'Doctor', 'Dr.', 'Judge', 'Justice', 'Lady', 'Lieut.', 'Lieutenant',
|
450
|
+
'Lord', 'Madame', 'Major', 'Master', 'Matron', 'Messrs.', 'Mgr.',
|
451
|
+
'Miss', 'Mister', 'Mlle.', 'Mme.', 'Mons.', 'Mr.', 'Mr. & Mrs.',
|
452
|
+
'Mr. and Mrs.', 'Mrs.', 'Msgr.', 'Prof.', 'Professor', 'Rev.',
|
453
|
+
'Reverend', 'Sir', 'Sister', 'The Hon.', 'The Lady.', 'The Lord',
|
454
|
+
'The Rt. Hon.'
|
455
|
+
],
|
456
|
+
organization: [
|
457
|
+
'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
|
458
|
+
],
|
459
|
+
before:'\\A', after:ADFIX_JOINERS
|
460
|
+
},
|
461
|
+
suffix: {
|
462
|
+
person: [
|
463
|
+
'C.I.S.S.P.', 'B.Tech.', 'D.Phil.', 'B.Eng.', 'C.F.A.', 'D.B.E.', 'D.D.S.', 'Eng.D.', 'M.B.A.', 'M.B.E.',
|
464
|
+
'M.E.P.', 'M.Eng.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'B.Ed.', 'B.Sc.', 'Ed.D.', 'LL.B.',
|
465
|
+
'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'O.K.',
|
466
|
+
'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
|
467
|
+
],
|
468
|
+
organization: [
|
469
|
+
'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
|
470
|
+
'Cía. S. C. A.', 'y. Cía. S. C.', 'S.A. de C.V.', 'spol. s.r.o.', '(Pty.) Ltd.', '(Pvt.) Ltd.', 'A.D.S.I.Tz.',
|
471
|
+
'S.p. z.o.o.', '(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.',
|
472
|
+
'S.P.R.L.U.', 'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.',
|
473
|
+
'Pty. Ltd.', 'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.',
|
474
|
+
'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'I.L.L.C.',
|
475
|
+
'K.G.a.A.', 'L.L.L.P.', 'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.',
|
476
|
+
'P.L.L.C.', 'S. en C.', 'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.',
|
477
|
+
'S.C.R.I.', 'S.C.R.L.', 'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.',
|
478
|
+
'Part.G.', 'Sh.p.k.', '&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.',
|
479
|
+
'F.I.E.', 'G.b.R.', 'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.',
|
480
|
+
'L.L.P.', 'o.h.f.', 'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.',
|
481
|
+
'S.C.S.', 'S.E.M.', 'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.',
|
482
|
+
'V.O.F.', 'V.o.G.', 'v.o.s.', 'V.Z.W.', 'z.r.t.', 'А.А.Т.', 'Е.А.Д.', 'З.А.Т.', 'К.Д.А.', 'О.О.Д.', 'Т.А.А.',
|
483
|
+
'股份有限公司', 'Ap.S.', 'Corp.', 'ltda.', 'Sh.A.', 'st.G.', 'Ultd.', 'a.b.', 'A.D.', 'A.E.', 'A.G.', 'A.S.',
|
484
|
+
'A.Ş.', 'A.y.', 'B.M.', 'b.t.', 'B.V.', 'C.A.', 'C.V.', 'd.d.', 'e.c.', 'E.E.', 'e.G.', 'E.I.', 'E.P.', 'E.T.',
|
485
|
+
'E.U.', 'e.v.', 'G.K.', 'G.P.', 'h.f.', 'Inc.', 'K.D.', 'K.G.', 'K.K.', 'k.s.', 'k.v.', 'K.y.', 'L.C.', 'L.P.',
|
486
|
+
'Ltd.', 'N.K.', 'N.L.', 'N.V.', 'O.E.', 'O.G.', 'O.Ü.', 'O.y.', 'P.C.', 'p.l.', 'Pty.', 'PUP.', 'Pvt.', 'r.t.',
|
487
|
+
'S.A.', 'S.D.', 'S.E.', 's.f.', 'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.',
|
488
|
+
'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
|
489
|
+
'A/S', 'G/S', 'I/S', 'K/S', 'P/S'
|
490
|
+
],
|
491
|
+
before:ADFIX_JOINERS, after:'\\z'
|
492
|
+
}
|
493
|
+
}
|
494
|
+
|
495
|
+
ADFIX_PATTERNS = {}
|
496
|
+
|
497
|
+
[:prefix, :suffix].each do |adfix_type|
|
498
|
+
patterns = {}
|
499
|
+
adfix = ADFIXES[adfix_type]
|
500
|
+
|
501
|
+
[:person, :organization].each do |contact_type|
|
502
|
+
with_optional_spaces = adfix[contact_type].map { |p| p.gsub(ASCII_SPACE,' *') }
|
503
|
+
pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
|
504
|
+
patterns[contact_type] = /#{adfix[:before]}\(*(?:#{pattern_string})\)*#{adfix[:after]}/i
|
505
|
+
end
|
506
|
+
|
507
|
+
ADFIX_PATTERNS[adfix_type] = patterns
|
508
|
+
end
|
509
|
+
end
|
data/name-tamer.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require 'name_tamer/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'name-tamer'
|
7
|
+
spec.version = NameTamer::VERSION
|
8
|
+
spec.authors = ['Xenapto']
|
9
|
+
spec.email = ['developers@xenapto.com']
|
10
|
+
spec.description = %q{Useful methods for taming names}
|
11
|
+
spec.summary = %q{Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith}
|
12
|
+
spec.homepage = 'https://github.com/Xenapto/name-tamer'
|
13
|
+
spec.license = 'MIT'
|
14
|
+
|
15
|
+
spec.files = `git ls-files`.split($/)
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features|coverage)/})
|
18
|
+
spec.require_paths = ['lib']
|
19
|
+
|
20
|
+
spec.add_runtime_dependency 'activesupport', '~> 3'
|
21
|
+
|
22
|
+
spec.add_development_dependency 'bundler', '~> 1'
|
23
|
+
spec.add_development_dependency 'rake', '~> 10'
|
24
|
+
spec.add_development_dependency 'rspec', '~> 2'
|
25
|
+
spec.add_development_dependency 'gem-release', '~> 0'
|
26
|
+
spec.add_development_dependency 'simplecov', '~> 0'
|
27
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'name_tamer'
|
4
|
+
|
5
|
+
describe NameTamer do
|
6
|
+
let(:names) do
|
7
|
+
[
|
8
|
+
{ n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
9
|
+
{ n:'JOHN SMITH', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
10
|
+
{ n:'john smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
11
|
+
{ n:'Smith, John', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
12
|
+
{ n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
13
|
+
{ n:'Smith, John', nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
14
|
+
{ n:'John J Smith', t: :person, nn:'John J Smith', sn:'John Smith', s:'john-smith' },
|
15
|
+
{ n:'John J. Smith', t: :person, nn:'John J. Smith', sn:'John Smith', s:'john-smith' },
|
16
|
+
{ n:'SMITH, Mr John J.R.', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
|
17
|
+
{ n:' SMITH, Mr John J. R. ', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
|
18
|
+
{ n:'SMITH, Mr John J.R.', nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
|
19
|
+
{ n:'Mr John J.R. SMITH JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
|
20
|
+
{ n:'Mr John J.R. SMITH III,JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
|
21
|
+
{ n:'Mr John J.R. SMITH JD', nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
|
22
|
+
{ n:'Mr Jean-Michel SMITH JD', t: :person, nn:'Jean-Michel SMITH', sn:'Jean-Michel SMITH', s:'jean-michel-smith' },
|
23
|
+
{ n:'Mr Jean Michel-SMITH JD', nn:'Jean Michel-SMITH', sn:'Jean Michel-SMITH', s:'jean-michel-smith' },
|
24
|
+
{ n:'Dr Martha Lane Fox Ph.D', nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
|
25
|
+
{ n:'Lane Fox Ph.D, Dr Martha', t: :person, nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
|
26
|
+
{ n:'Baroness Lane-Fox of Lewisham', t: :person, nn:'Lane-Fox of Lewisham', sn:'Lane-Fox of Lewisham', s:'lane-fox-of-lewisham' },
|
27
|
+
{ n:'MACDONALDS LLC', nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
|
28
|
+
{ n:'MACDONALDS LLC', t: :organization, nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
|
29
|
+
{ n:'macdonalds', t: :organization, nn:'macdonalds', sn:'macdonalds', s:'macdonalds' },
|
30
|
+
{ n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', t: :organization, nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
|
31
|
+
{ n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
|
32
|
+
{ n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
|
33
|
+
{ n:'K.V.A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
|
34
|
+
{ n:'K. V. A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
|
35
|
+
{ n:'J.P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
|
36
|
+
{ n:'J. P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
|
37
|
+
{ n:'J P Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
|
38
|
+
{ n:'JP Rangaswami', nn:'JP Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
|
39
|
+
{ n:'Audrey fforbes', nn:'Audrey fforbes', sn:'Audrey fforbes', s:'audrey-fforbes' },
|
40
|
+
{ n:'J. Arthur Rank', t: :person, nn:'J. Arthur Rank', sn:'Arthur Rank', s:'arthur-rank' },
|
41
|
+
{ n:'PHILIP NG', t: :person, nn:'Philip Ng', sn:'Philip Ng', s:'philip-ng' },
|
42
|
+
{ n:'Super R&D', nn:'Super R&D', sn:'Super R and D', s:'super-r-and-d' },
|
43
|
+
{ n:'Harry Dean Stanton', t: :person, nn:'Harry Dean Stanton', sn:'Harry Stanton', s:'harry-stanton' },
|
44
|
+
{ n:'Union Square Ventures', t: :organization, nn:'Union Square Ventures', sn:'Union Square Ventures', s:'union-square-ventures' },
|
45
|
+
{ n:'J Arthur Rank Inc.', t: :organization, nn:'J Arthur Rank', sn:'J Arthur Rank', s:'j-arthur-rank' },
|
46
|
+
{ n:'Jean VAN DER VELDE', t: :person, nn:'Jean VAN DER VELDE', sn:'Jean VAN DER VELDE', s:'jean-van-der-velde' },
|
47
|
+
{ n:'Al Capone', t: :person, nn:'Al Capone', sn:'Al Capone', s:'al-capone' },
|
48
|
+
{ n:'Fahd al-Saud', t: :person, nn:'Fahd al-Saud', sn:'Fahd al-Saud', s:'fahd-al-saud' },
|
49
|
+
{ n:'Mehmet al Auouiby', t: :person, nn:'Mehmet al Auouiby', sn:'Mehmet al Auouiby', s:'mehmet-al-auouiby' },
|
50
|
+
{ n:'Macquarie Bank', t: :organization, nn:'Macquarie Bank', sn:'Macquarie Bank', s:'macquarie-bank' },
|
51
|
+
{ n:"COMMEDIA DELL'ARTE", t: :organization, nn:"Commedia dell'Arte", sn:"Commedia dell'Arte", s:'commedia-dellarte' },
|
52
|
+
{ n:'Della Smith', t: :person, nn:'Della Smith', sn:'Della Smith', s:'della-smith' },
|
53
|
+
{ n:'Antonio DELLA MONTEVERDE', nn:'Antonio DELLA MONTEVERDE', sn:'Antonio DELLA MONTEVERDE', s:'antonio-della-monteverde' },
|
54
|
+
{ n:'Tony St Clair', t: :person, nn:'Tony St Clair', sn:'Tony St Clair', s:'tony-st-clair' },
|
55
|
+
{ n:'Seamus O\'Malley', t: :person, nn:'Seamus O\'Malley', sn:'Seamus O\'Malley', s:'seamus-omalley' },
|
56
|
+
{ n:'SeedCamp', t: :organization, nn:'SeedCamp', sn:'SeedCamp', s:'seedcamp' },
|
57
|
+
{ n:'Peter Van Der Auwera', t: :person, nn:'Peter Van Der Auwera', sn:'Peter Van Der Auwera', s:'peter-van-der-auwera' },
|
58
|
+
{ n:'VAN DER AUWERA, Peter', t: :person, nn:'Peter van der Auwera', sn:'Peter van der Auwera', s:'peter-van-der-auwera' },
|
59
|
+
{ n:'Li Fan', t: :person, nn:'Li Fan', sn:'Li Fan', s:'li-fan' },
|
60
|
+
{ n:'Fan Li', t: :person, nn:'Fan Li', sn:'Fan Li', s:'fan-li' },
|
61
|
+
{ n:'Levi Strauss & Co.', nn:'Levi Strauss', sn:'Levi Strauss', s:'levi-strauss' },
|
62
|
+
{ n:'Standard & Poor\'s', t: :organization, nn:'Standard & Poor\'s', sn:'Standard and Poor\'s', s:'standard-and-poors' },
|
63
|
+
{ n:'I B M Services', t: :organization, nn:'I.B.M. Services', sn:'IBM Services', s:'ibm-services' },
|
64
|
+
{ n:'Sean Park DDS', t: :person, nn:'Sean Park', sn:'Sean Park', s:'sean-park' },
|
65
|
+
{ n:'SEAN MACLISE PARK', t: :person, nn:'Sean Maclise Park', sn:'Sean Park', s:'sean-park' },
|
66
|
+
{ n:'AJ Hanna', t: :person, nn:'AJ Hanna', sn:'AJ Hanna', s:'aj-hanna' },
|
67
|
+
{ n:'Free & Clear', t: :organization, nn:'Free & Clear', sn:'Free and Clear', s:'free-and-clear' },
|
68
|
+
{ n:'Adam D\'ANGELO', t: :person, nn:'Adam D\'ANGELO', sn:'Adam D\'ANGELO', s:'adam-dangelo' },
|
69
|
+
{ n:'MACKENZIE, Doug', t: :person, nn:'Doug Mackenzie', sn:'Doug Mackenzie', s:'doug-mackenzie' },
|
70
|
+
{ n:'Up + Down', t: :organization, nn:'Up + Down', sn:'Up plus Down', s:'up-plus-down' },
|
71
|
+
{ n:'San Francisco Ltd', t: :organization, nn:'San Francisco', sn:'San Francisco', s:'san-francisco' },
|
72
|
+
{ n:'AT&T', t: :organization, nn:'At&T', sn:'At and T', s:'at-and-t' },
|
73
|
+
{ n:'SMITH, John, Jr.', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
74
|
+
{ n:'I Heart Movies', t: :organization, nn:'I Heart Movies', sn:'I Heart Movies', s:'i-heart-movies' },
|
75
|
+
{ n:'Y Combinator', t: :organization, nn:'Y Combinator', sn:'Y Combinator', s:'y-combinator' },
|
76
|
+
{ n:'Ben\'s 10 Hens', t: :organization, nn:'Ben\'s 10 Hens', sn:'Ben\'s 10 Hens', s:'bens-10-hens' },
|
77
|
+
{ n:'Elazer Edelman, MD , PhD', t: :person, nn:'Elazer Edelman', sn:'Elazer Edelman', s:'elazer-edelman' },
|
78
|
+
{ n:'Judith M. O\'Brien', t: :person, nn:'Judith M. O\'Brien', sn:'Judith O\'Brien', s:'judith-obrien' },
|
79
|
+
{ n:'MORRISON, Van', t: :person, nn:'Van Morrison', sn:'Van Morrison', s:'van-morrison' },
|
80
|
+
{ n:'i/o Ventures', t: :organization, nn:'i/o Ventures', sn:'i/o Ventures', s:'io-ventures' },
|
81
|
+
{ n:'C T Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
|
82
|
+
{ n:'C.T. Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
|
83
|
+
{ n:'CT Corporation System', t: :person, nn:'CT Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
|
84
|
+
{ n:'Corporation Service Company', t: :person, nn:'Corporation Service Company', sn:'Corporation Service Company', s:'corporation-service-company'},
|
85
|
+
{ n:'Kurshuni,Inc.', t: :organization, nn:'Kurshuni', sn:'Kurshuni', s:'kurshuni' },
|
86
|
+
{ n:'Cellular Inc-LLC', t: :organization, nn:'Cellular', sn:'Cellular', s:'cellular' },
|
87
|
+
{ n:'Emtec (AZ) Limited', t: :organization, nn:'Emtec (AZ)', sn:'Emtec (AZ)', s:'emtec-az' },
|
88
|
+
{ n:'Emtec (LLC) Limited', t: :organization, nn:'Emtec', sn:'Emtec', s:'emtec' },
|
89
|
+
{ n:'Emtec (XYZ LLC) Limited', t: :organization, nn:'Emtec (XYZ)', sn:'Emtec (XYZ)', s:'emtec-xyz' },
|
90
|
+
{ n:'Tao Ma', t: :person, nn:'Tao', sn:'Tao', s:'tao' }, # Unfortunate but we can't distinguish between Ma and M.A.
|
91
|
+
{ n:'(Mr.) Courtney J. Miller, J.D., LL.M.', t: :person, nn:'Courtney J. Miller', sn:'Courtney Miller', s:'courtney-miller' },
|
92
|
+
{ n:'(Mr Woo) The Window Cleaner', t: :person, nn:'(Woo) The Window Cleaner', sn:'(Woo) Cleaner', s:'woo-cleaner'},
|
93
|
+
{ n:'DOMINIC MACMURDO', t: :person, nn:'Dominic MacMurdo', sn:'Dominic MacMurdo', s:'dominic-macmurdo' },
|
94
|
+
{ n:'DOMINIC MACEDO', t: :person, nn:'Dominic Macedo', sn:'Dominic Macedo', s:'dominic-macedo' },
|
95
|
+
{ n:'DOMINIC MACDONALD', t: :person, nn:'Dominic MacDonald', sn:'Dominic MacDonald', s:'dominic-macdonald' },
|
96
|
+
{ n:'AGUSTA DO ROMEIRO', t: :person, nn:'Agusta do Romeiro', sn:'Agusta do Romeiro', s:'agusta-do-romeiro' },
|
97
|
+
{ n:'CARLOS DOS SANTOS', t: :person, nn:'Carlos dos Santos', sn:'Carlos dos Santos', s:'carlos-dos-santos' },
|
98
|
+
{ n:'유정 신', t: :organization, nn:'유정 신', sn:'유정 신', s:'유정-신' },
|
99
|
+
{ n:'xxx%52zzz', t: :organization, nn:'xxx%52zzz', sn:'xxx%52zzz', s:'xxxrzzz' },
|
100
|
+
{ n:'Евгений Болотнов', t: :organization, nn:'Евгений Болотнов', sn:'Евгений Болотнов', s:'Евгений-Болотнов' },
|
101
|
+
{ n:'김태성', t: :organization, nn:'김태성', sn:'김태성', s:'김태성' },
|
102
|
+
{ n:'ゴルフスタジアム', t: :organization, nn:'ゴルフスタジアム', sn:'ゴルフスタジアム', s:'ゴルフスタジアム' },
|
103
|
+
{ n:'我摘', t: :organization, nn:'我摘', sn:'我摘', s:'我摘' },
|
104
|
+
{ n:'Καρατζάς Στέφανος', t: :organization, nn:'Καρατζάς Στέφανος', sn:'Καρατζάς Στέφανος', s:'Καρατζάς-Στέφανος' },
|
105
|
+
{ n:'โชติวัน วัฒนลาภ', t: :organization, nn:'โชติวัน วัฒนลาภ', sn:'โชติวัน วัฒนลาภ', s:'โชติวัน-วัฒนลาภ' },
|
106
|
+
{ n:'張 續寶', t: :organization, nn:'張 續寶', sn:'張 續寶', s:'張-續寶' },
|
107
|
+
{ n:'Юрий Гайдук', t: :organization, nn:'Юрий Гайдук', sn:'Юрий Гайдук', s:'Юрий-Гайдук' },
|
108
|
+
{ n:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', t: :organization, nn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', sn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', s:'☣-©Ʀѱ∏†ʘ-Σɏ§†℈Ϻ-☣' },
|
109
|
+
{ n:'♠ KlasikB0i ♠', t: :organization, nn:'♠ KlasikB0i ♠', sn:'♠ KlasikB0i ♠', s:'♠-klasikb0i-♠' },
|
110
|
+
{ n:'* Shorusan *', t: :organization, nn:'* Shorusan *', sn:'* Shorusan *', s:'shorusan' },
|
111
|
+
{ n:'项目谷', t: :organization, nn:'项目谷', sn:'项目谷', s:'项目谷' },
|
112
|
+
{ n:'ООО "Инновационные полимерные адгезивы"', t: :organization, nn:'ООО "Инновационные полимерные адгезивы"', sn:'ООО "Инновационные полимерные адгезивы"', s:'ООО-Инновационные-полимерные-адгезивы' },
|
113
|
+
{ n:'عبدالله ...', t: :organization, nn:'عبدالله ...', sn:'عبدالله ...', s:'عبدالله' },
|
114
|
+
{ n:'กมลชนก ทิศไธสง', t: :organization, nn:'กมลชนก ทิศไธสง', sn:'กมลชนก ทิศไธสง', s:'กมลชนก-ทิศไธสง' },
|
115
|
+
{ n:'יוֹ אָב', t: :organization, nn:'יוֹ אָב', sn:'יוֹ אָב', s:'יוֹ-אָב' },
|
116
|
+
{ n:'יגאל נימני', t: :organization, nn:'יגאל נימני', sn:'יגאל נימני', s:'יגאל-נימני' },
|
117
|
+
{ n:'ניסים דניאלי', t: :organization, nn:'ניסים דניאלי', sn:'ניסים דניאלי', s:'ניסים-דניאלי' },
|
118
|
+
{ n:'مساء الخير', t: :organization, nn:'مساء الخير', sn:'مساء الخير', s:'مساء-الخير' },
|
119
|
+
{ n:'محمود ياسر', t: :organization, nn:'محمود ياسر', sn:'محمود ياسر', s:'محمود-ياسر' },
|
120
|
+
{ n:'קובי ביטר', t: :organization, nn:'קובי ביטר', sn:'קובי ביטר', s:'קובי-ביטר' },
|
121
|
+
{ n:'الملاك الحارس', t: :organization, nn:'الملاك الحارس', sn:'الملاك الحارس', s:'الملاك-الحارس' },
|
122
|
+
{ n:'কবির হাসান', t: :organization, nn:'কবির হাসান', sn:'কবির হাসান', s:'কবির-হাসান' }
|
123
|
+
]
|
124
|
+
end
|
125
|
+
|
126
|
+
it "makes a slug from #{name}" do
|
127
|
+
names.each do |name_data|
|
128
|
+
name = name_data[:n]
|
129
|
+
NameTamer[name, contact_type:name_data[:t]].slug.should == name_data[:s]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
it "makes a nice name from #{name}" do
|
134
|
+
names.each do |name_data|
|
135
|
+
name = name_data[:n]
|
136
|
+
nice_name = NameTamer[name, contact_type:name_data[:t]].nice_name
|
137
|
+
|
138
|
+
nice_name.should == name_data[:nn]
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
it "makes a searchable name from #{name}" do
|
143
|
+
names.each do |name_data|
|
144
|
+
name = name_data[:n]
|
145
|
+
NameTamer[name, contact_type:name_data[:t]].search_name.should == name_data[:sn]
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe 'contact type inference' do
|
151
|
+
it 'infers that "Mr. John Smith" is a person' do
|
152
|
+
NameTamer['Mr. John Smith'].contact_type.should eq(:person)
|
153
|
+
end
|
154
|
+
|
155
|
+
it 'infers that "Di Doo Doo d.o.o." is an organization' do
|
156
|
+
NameTamer['Di Doo Doo d.o.o.'].contact_type.should eq(:organization)
|
157
|
+
end
|
158
|
+
|
159
|
+
it 'infers that "DiDooDoo" is an organization' do
|
160
|
+
NameTamer['DiDooDoo'].contact_type.should eq(:organization)
|
161
|
+
end
|
162
|
+
|
163
|
+
it 'infers that "John Smith" is a person' do
|
164
|
+
NameTamer['John Smith'].contact_type.should eq(:person)
|
165
|
+
end
|
166
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Initialize simplecov for coverage report.
|
2
|
+
require 'simplecov'
|
3
|
+
SimpleCov.start
|
4
|
+
|
5
|
+
RSpec.configure do |config|
|
6
|
+
# Run specs in random order to surface order dependencies. If you find an
|
7
|
+
# order dependency and want to debug it, you can fix the order by providing
|
8
|
+
# the seed, which is printed after each run.
|
9
|
+
# --seed 1234
|
10
|
+
config.order = "random"
|
11
|
+
|
12
|
+
# Manually-added
|
13
|
+
config.color_enabled = true
|
14
|
+
config.tty = true
|
15
|
+
config.formatter = :documentation
|
16
|
+
end
|
metadata
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: name-tamer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Xenapto
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: gem-release
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: simplecov
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ~>
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ~>
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Useful methods for taming names
|
98
|
+
email:
|
99
|
+
- developers@xenapto.com
|
100
|
+
executables:
|
101
|
+
- name-tamer
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- .env
|
106
|
+
- .gitignore
|
107
|
+
- .ruby-version
|
108
|
+
- Gemfile
|
109
|
+
- Gemfile.lock
|
110
|
+
- LICENSE
|
111
|
+
- README.md
|
112
|
+
- Rakefile
|
113
|
+
- bin/name-tamer
|
114
|
+
- lib/name_tamer.rb
|
115
|
+
- lib/name_tamer/version.rb
|
116
|
+
- name-tamer.gemspec
|
117
|
+
- spec/name_tamer_spec.rb
|
118
|
+
- spec/spec_helper.rb
|
119
|
+
homepage: https://github.com/Xenapto/name-tamer
|
120
|
+
licenses:
|
121
|
+
- MIT
|
122
|
+
metadata: {}
|
123
|
+
post_install_message:
|
124
|
+
rdoc_options: []
|
125
|
+
require_paths:
|
126
|
+
- lib
|
127
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - ! '>='
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
requirements: []
|
138
|
+
rubyforge_project:
|
139
|
+
rubygems_version: 2.2.2
|
140
|
+
signing_key:
|
141
|
+
specification_version: 4
|
142
|
+
summary: ! 'Example: NameTamer[''Mr. John Q. Smith III, MD''].simple_name # => John
|
143
|
+
Smith'
|
144
|
+
test_files:
|
145
|
+
- spec/name_tamer_spec.rb
|
146
|
+
- spec/spec_helper.rb
|