name-tamer 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.env +1 -0
- data/.gitignore +24 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +42 -0
- data/LICENSE +21 -0
- data/README.md +48 -0
- data/Rakefile +1 -0
- data/bin/name-tamer +3 -0
- data/lib/name_tamer.rb +509 -0
- data/lib/name_tamer/version.rb +3 -0
- data/name-tamer.gemspec +27 -0
- data/spec/name_tamer_spec.rb +166 -0
- data/spec/spec_helper.rb +16 -0
- metadata +146 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YzRiZjg0NGY3ODliMzVjZWM0ZDBhMjExYjNjODMyMmJmZTExNDRiMA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZmZmODdkOGIwYzBlN2I3MjM3OTVjNzBhZGM0ZjQ2MTk0MmQ3MmIyMg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YzhhNzUxMTY5OWQ4YjkxYzUxNTMxNTBhYjNkZGMyMDE1MDFmNDNkMmY2Y2Uy
|
10
|
+
ZWVmODhiMGM3OTBjNWE3ZmI0NGI0Yjk4MTRiZWYyMWRkZTY0NjBhNzI5NWNm
|
11
|
+
Nzk4ODliZjU3OTc3YzY4MzdkYWYyYzU4ODE4ZGJhNjkwNmE1MDU=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YWNiMjZhODI5NDk2MmI1Y2E4MWUyYTg4NTk0M2ExMGFkNzhhZjdlZDRiYzJm
|
14
|
+
ZDI0ZjNiYmJlMWNiYjRmOTYxMGU5MTdlMTZjYzA0YTZjNjdkYTliOTc1YWRj
|
15
|
+
ZmM1ZWYwZTA5ZjQ5NDc2OGQ4NzA1NTgwYzM0MTNlYTFjMzgzMDc=
|
data/.env
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
PATH=/home/build/.rvm/gems/ruby-2.1.1/bin:/home/build/.rvm/gems/ruby-2.1.1@global/bin:/home/build/.rvm/rubies/ruby-2.1.1/bin:/home/build/.rvm/bin:/usr/local/heroku/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/home/build/.rvm/gems/ruby-2.1.1@global/bin/bundle
|
data/.gitignore
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
*.rbc
|
2
|
+
capybara-*.html
|
3
|
+
.rspec
|
4
|
+
/log
|
5
|
+
/tmp
|
6
|
+
/db/*.sqlite3
|
7
|
+
/public/system
|
8
|
+
/coverage/
|
9
|
+
/spec/tmp
|
10
|
+
**.orig
|
11
|
+
rerun.txt
|
12
|
+
pickle-email-*.html
|
13
|
+
config/initializers/secret_token.rb
|
14
|
+
config/secrets.yml
|
15
|
+
|
16
|
+
## Environment normalisation:
|
17
|
+
/.bundle
|
18
|
+
/vendor/bundle
|
19
|
+
|
20
|
+
# these should all be checked in to normalise the environment:
|
21
|
+
# Gemfile.lock, .ruby-version, .ruby-gemset
|
22
|
+
|
23
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
24
|
+
.rvmrc
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.2
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
name-tamer (0.0.0)
|
5
|
+
activesupport (~> 3)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activesupport (3.2.13)
|
11
|
+
i18n (= 0.6.1)
|
12
|
+
multi_json (~> 1.0)
|
13
|
+
diff-lcs (1.2.5)
|
14
|
+
docile (1.1.3)
|
15
|
+
gem-release (0.7.3)
|
16
|
+
i18n (0.6.1)
|
17
|
+
multi_json (1.10.1)
|
18
|
+
rake (10.3.2)
|
19
|
+
rspec (2.14.1)
|
20
|
+
rspec-core (~> 2.14.0)
|
21
|
+
rspec-expectations (~> 2.14.0)
|
22
|
+
rspec-mocks (~> 2.14.0)
|
23
|
+
rspec-core (2.14.8)
|
24
|
+
rspec-expectations (2.14.5)
|
25
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
26
|
+
rspec-mocks (2.14.6)
|
27
|
+
simplecov (0.8.2)
|
28
|
+
docile (~> 1.1.0)
|
29
|
+
multi_json
|
30
|
+
simplecov-html (~> 0.8.0)
|
31
|
+
simplecov-html (0.8.0)
|
32
|
+
|
33
|
+
PLATFORMS
|
34
|
+
ruby
|
35
|
+
|
36
|
+
DEPENDENCIES
|
37
|
+
bundler (~> 1)
|
38
|
+
gem-release (~> 0)
|
39
|
+
name-tamer!
|
40
|
+
rake (~> 10)
|
41
|
+
rspec (~> 2)
|
42
|
+
simplecov (~> 0)
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Xenapto
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# NameTamer
|
2
|
+
|
3
|
+
[](http://badge.fury.io/rb/name-tamer)
|
4
|
+
[](https://codeclimate.com/github/Xenapto/name-tamer)
|
5
|
+
[](https://gemnasium.com/Xenapto/name-tamer)
|
6
|
+

|
7
|
+
|
8
|
+
NameTamer: deploying Rails projects to Ubuntu using Foreman
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
Add this line to your application's Gemfile:
|
13
|
+
|
14
|
+
gem 'name-tamer'
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install name-tamer
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
Examples:
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith
|
30
|
+
```
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
name_tamer = NameTamer['Mr. John Q. Smith III, MD']
|
34
|
+
name_tamer.slug # => john-smith
|
35
|
+
name_tamer.nice_name # => John Q. Smith
|
36
|
+
```
|
37
|
+
|
38
|
+
## Contributing
|
39
|
+
|
40
|
+
1. Fork it
|
41
|
+
1. Create your feature branch (`git checkout -b my-new-feature`)
|
42
|
+
1. Commit your changes (`git commit -am 'Add some feature'`)
|
43
|
+
1. Push to the branch (`git push origin my-new-feature`)
|
44
|
+
1. Create new Pull Request
|
45
|
+
|
46
|
+
## Acknowledgements
|
47
|
+
|
48
|
+
1. Thanks to Ryan Bigg for the guide to making your first gem https://github.com/radar/guides/blob/master/gem-development.md
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/name-tamer
ADDED
data/lib/name_tamer.rb
ADDED
@@ -0,0 +1,509 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# References:
|
4
|
+
# http://www.w3.org/International/questions/qa-personal-names
|
5
|
+
# https://github.com/berkmancenter/namae
|
6
|
+
# https://github.com/mericson
|
7
|
+
# http://en.wikipedia.org/wiki/Types_of_business_entity
|
8
|
+
# http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA)
|
9
|
+
# http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
|
10
|
+
# http://en.wikipedia.org/wiki/Nobiliary_particle
|
11
|
+
# http://en.wikipedia.org/wiki/Spanish_naming_customs
|
12
|
+
# http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
|
13
|
+
require 'active_support/core_ext/object'
|
14
|
+
|
15
|
+
class NameTamer
|
16
|
+
attr_reader :name, :contact_type
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def [](name, args = {})
|
20
|
+
new name, args
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def nice_name
|
25
|
+
if @nice_name.nil?
|
26
|
+
@nice_name = @name.dup # Start with the name we've received
|
27
|
+
|
28
|
+
tidy_spacing # " John Smith " -> "John Smith"
|
29
|
+
consolidate_initials # "I. B. M." -> "I.B.M."
|
30
|
+
remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
|
31
|
+
fixup_last_name_first # "Smith, John" -> "John Smith"
|
32
|
+
fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
|
33
|
+
remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
|
34
|
+
name_wrangle # proper name case and non-breaking spaces
|
35
|
+
use_nonbreaking_spaces_in_compound_names
|
36
|
+
end
|
37
|
+
|
38
|
+
@nice_name
|
39
|
+
end
|
40
|
+
|
41
|
+
def search_name
|
42
|
+
if @search_name.nil?
|
43
|
+
@search_name = nice_name.dup # Start with nice name
|
44
|
+
|
45
|
+
remove_initials # "John Q. Doe" -> "John Doe"
|
46
|
+
remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
|
47
|
+
remove_dots_from_abbreviations # "J.P.R. Williams" -> "JPR Williams"
|
48
|
+
standardize_words # "B&Q Intl" -> "B and Q International"
|
49
|
+
|
50
|
+
@search_name = ensure_whitespace_is_ascii_space @search_name
|
51
|
+
end
|
52
|
+
|
53
|
+
@search_name
|
54
|
+
end
|
55
|
+
|
56
|
+
def slug
|
57
|
+
if @slug.nil?
|
58
|
+
@slug = search_name.dup # Start with search name
|
59
|
+
slugify # "John Doe" -> "john-doe"
|
60
|
+
end
|
61
|
+
|
62
|
+
@slug
|
63
|
+
end
|
64
|
+
|
65
|
+
def contact_type
|
66
|
+
nice_name # make sure we've done the bit which infers contact_type
|
67
|
+
contact_type_best_effort
|
68
|
+
end
|
69
|
+
|
70
|
+
=begin These lines aren't used and aren't covered by specs
|
71
|
+
def name=(new_name)
|
72
|
+
initialize new_name, :contact_type => @contact_type
|
73
|
+
end
|
74
|
+
|
75
|
+
def contact_type=(new_contact_type)
|
76
|
+
initialize @name, :contact_type => new_contact_type
|
77
|
+
end
|
78
|
+
|
79
|
+
def to_hash
|
80
|
+
{
|
81
|
+
name: @name,
|
82
|
+
nice_name: @nice_name,
|
83
|
+
search_name: @search_name,
|
84
|
+
slug: @slug,
|
85
|
+
contact_type: @contact_type,
|
86
|
+
last_name: @last_name,
|
87
|
+
remainder: @remainder,
|
88
|
+
adfix_found: @adfix_found
|
89
|
+
}
|
90
|
+
end
|
91
|
+
=end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
#--------------------------------------------------------
|
96
|
+
# Tidy up the name we've received
|
97
|
+
#--------------------------------------------------------
|
98
|
+
|
99
|
+
def tidy_spacing
|
100
|
+
@nice_name.gsub!(/,\s*/, ', ') # Ensure commas have exactly one space after them
|
101
|
+
@nice_name.strip! # remove leading & trailing whitespace
|
102
|
+
|
103
|
+
@nice_name = ensure_whitespace_is_ascii_space @nice_name
|
104
|
+
end
|
105
|
+
|
106
|
+
# Remove spaces from groups of initials
|
107
|
+
def consolidate_initials
|
108
|
+
@nice_name.gsub!(/\b([a-z])\.* (?=[a-z][\. ])/i) { |match| "#{$1}." } # Remove spaces from initial groups
|
109
|
+
@nice_name.gsub!(/\b([a-z](?:\.[a-z])+)\.?(?= )/i) { |match| "#{$1}." } # Ensure each group ends with a dot
|
110
|
+
end
|
111
|
+
|
112
|
+
# An adfix is either a prefix or a suffix
|
113
|
+
def remove_adfixes
|
114
|
+
if @last_name.nil?
|
115
|
+
# Our name is still in one part, not two
|
116
|
+
begin
|
117
|
+
@nice_name = remove_outermost_adfix(:suffix, @nice_name)
|
118
|
+
end while @adfix_found
|
119
|
+
|
120
|
+
begin
|
121
|
+
@nice_name = remove_outermost_adfix(:prefix, @nice_name)
|
122
|
+
end while @adfix_found
|
123
|
+
else
|
124
|
+
# Our name is currently in two halves
|
125
|
+
begin
|
126
|
+
@last_name = remove_outermost_adfix(:suffix, @last_name)
|
127
|
+
end while @adfix_found
|
128
|
+
|
129
|
+
begin
|
130
|
+
@remainder = remove_outermost_adfix(:prefix, @remainder)
|
131
|
+
end while @adfix_found
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Names in the form "Smith, John" need to be turned around to "John Smith"
|
136
|
+
def fixup_last_name_first
|
137
|
+
unless @contact_type == :organization
|
138
|
+
parts = @nice_name.split ', '
|
139
|
+
|
140
|
+
if parts.count == 2
|
141
|
+
@last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
|
142
|
+
@remainder = parts[1]
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# Sometimes we end up with mismatched braces after adfix stripping
|
148
|
+
# e.g. "Ceres (Ceres Holdings LLC)" -> "Ceres (Ceres Holdings"
|
149
|
+
def fixup_mismatched_braces
|
150
|
+
left_brace_count = @nice_name.count '('
|
151
|
+
right_brace_count = @nice_name.count ')'
|
152
|
+
|
153
|
+
if left_brace_count > right_brace_count
|
154
|
+
@nice_name += ')'
|
155
|
+
elsif left_brace_count < right_brace_count
|
156
|
+
@nice_name = '(' + @nice_name
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def name_wrangle
|
161
|
+
# Fix case if all caps or all lowercase
|
162
|
+
if @last_name.nil?
|
163
|
+
lowercase = @nice_name.downcase
|
164
|
+
uppercase = @nice_name.upcase
|
165
|
+
|
166
|
+
# Some companies like to be all lowercase so don't mess with them
|
167
|
+
@nice_name = name_case(lowercase) if @nice_name == uppercase || ( @nice_name == lowercase && @contact_type != :organization )
|
168
|
+
else
|
169
|
+
lowercase = @last_name.downcase
|
170
|
+
uppercase = @last_name.upcase
|
171
|
+
@last_name = name_case(lowercase) if @last_name == uppercase || @last_name == lowercase
|
172
|
+
|
173
|
+
@nice_name = "#{@remainder} #{@last_name}"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Conjoin compound names with non-breaking spaces
|
178
|
+
def use_nonbreaking_spaces_in_compound_names
|
179
|
+
# Fix known last names that have spaces (not hyphens!)
|
180
|
+
[
|
181
|
+
'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore',
|
182
|
+
'Holmes à Court', 'Holmes a Court', 'Baron Cohen',
|
183
|
+
'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
|
184
|
+
].each do |compound_name|
|
185
|
+
@nice_name.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
|
186
|
+
end
|
187
|
+
|
188
|
+
NAME_MODIFIERS.each do |modifier|
|
189
|
+
@nice_name.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |match| "#{$1}#{NONBREAKING_SPACE}" }
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
#--------------------------------------------------------
|
194
|
+
# Make search name from nice name
|
195
|
+
#--------------------------------------------------------
|
196
|
+
|
197
|
+
# Remove initials from personal names unless they are the only identifier.
|
198
|
+
# i.e. only remove initials if there's also a proper name there
|
199
|
+
def remove_initials
|
200
|
+
if @contact_type == :person
|
201
|
+
name = @search_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
|
202
|
+
|
203
|
+
# If the name still has at least one space we're OK
|
204
|
+
@search_name = name if name.include?(ASCII_SPACE)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def remove_middle_names
|
209
|
+
if @contact_type == :person
|
210
|
+
parts = @search_name.split
|
211
|
+
@search_name = "#{parts[0]} #{parts[-1]}" if parts.count > 2
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def remove_dots_from_abbreviations
|
216
|
+
@search_name.gsub!(/\b([a-z])\./i) { |match| $1 }
|
217
|
+
end
|
218
|
+
|
219
|
+
def standardize_words
|
220
|
+
@search_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
|
221
|
+
@search_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
|
222
|
+
@search_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
|
223
|
+
end
|
224
|
+
|
225
|
+
#--------------------------------------------------------
|
226
|
+
# Make slug from search name
|
227
|
+
#--------------------------------------------------------
|
228
|
+
|
229
|
+
def slugify
|
230
|
+
# Inflector::parameterize just gives up with non-latin characters so...
|
231
|
+
#@slug = @slug.parameterize # Can't use this
|
232
|
+
|
233
|
+
# Instead we'll do it ourselves
|
234
|
+
@slug = parameterize @slug
|
235
|
+
end
|
236
|
+
|
237
|
+
#--------------------------------------------------------
|
238
|
+
# Initialization and utilities
|
239
|
+
#--------------------------------------------------------
|
240
|
+
|
241
|
+
def initialize(name, args = {})
|
242
|
+
@name = name || ''
|
243
|
+
@contact_type = args[:contact_type].to_sym unless args[:contact_type].nil?
|
244
|
+
|
245
|
+
@nice_name = nil
|
246
|
+
@search_name = nil
|
247
|
+
@slug = nil
|
248
|
+
|
249
|
+
@last_name = nil
|
250
|
+
@remainder = nil
|
251
|
+
|
252
|
+
@adfix_found = false
|
253
|
+
end
|
254
|
+
|
255
|
+
def set_contact_type contact_type
|
256
|
+
contact_type_sym = contact_type.to_sym
|
257
|
+
puts "Changing contact type of #{@name} from #{@contact_type} to #{contact_type}".red unless @contact_type.nil? || @contact_type == contact_type_sym
|
258
|
+
@contact_type = contact_type_sym
|
259
|
+
end
|
260
|
+
|
261
|
+
# If we don't know the contact type, what's our best guess?
|
262
|
+
def contact_type_best_effort
|
263
|
+
if @contact_type
|
264
|
+
@contact_type
|
265
|
+
else
|
266
|
+
# If it's just one word we'll assume organization.
|
267
|
+
# If more then we'll assume a person
|
268
|
+
@name.include?(ASCII_SPACE) ? :person : :organization
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def ensure_whitespace_is_ascii_space string
|
273
|
+
string.gsub(/[[:space:]]+/, ASCII_SPACE) # /\s/ doesn't match Unicode whitespace in Ruby 1.9.3
|
274
|
+
end
|
275
|
+
|
276
|
+
# We pass to this routine either prefixes or suffixes
|
277
|
+
def remove_outermost_adfix adfix_type, name_part
|
278
|
+
adfixes = ADFIX_PATTERNS[adfix_type]
|
279
|
+
contact_type = contact_type_best_effort
|
280
|
+
parts = name_part.partition adfixes[contact_type]
|
281
|
+
@adfix_found = parts[1].present?
|
282
|
+
|
283
|
+
# If the contact type is indeterminate and we didn't find a diagnostic adfix
|
284
|
+
# for a person then try again for an organization
|
285
|
+
if @contact_type.nil?
|
286
|
+
unless @adfix_found
|
287
|
+
contact_type = :organization
|
288
|
+
parts = name_part.partition adfixes[contact_type]
|
289
|
+
@adfix_found = parts[1].present?
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
if @adfix_found
|
294
|
+
# If we've found a diagnostic adfix then set the contact type
|
295
|
+
set_contact_type contact_type
|
296
|
+
|
297
|
+
# The remainder of the name will be in parts[0] or parts[2] depending
|
298
|
+
# on whether this is a prefix or a suffix.
|
299
|
+
# We'll also remove any trailing commas we've exposed.
|
300
|
+
result = (parts[0] + parts[2]).gsub(/\s*,\s*$/, '')
|
301
|
+
else
|
302
|
+
result = name_part
|
303
|
+
end
|
304
|
+
|
305
|
+
result
|
306
|
+
end
|
307
|
+
|
308
|
+
# Original Version of NameCase:
|
309
|
+
# Copyright (c) Mark Summerfield 1998-2008. All Rights Reserved
|
310
|
+
# This module may be used/distributed/modified under the same terms as Perl itself
|
311
|
+
# http://dev.perl.org/licenses/ (GPL)
|
312
|
+
#
|
313
|
+
# Ruby Version:
|
314
|
+
# Copyright (c) Aaron Patterson 2006
|
315
|
+
# NameCase is distributed under the GPL license.
|
316
|
+
#
|
317
|
+
# Substantially modified for Xendata
|
318
|
+
# Improved in several areas, also now adds non-breaking spaces for
|
319
|
+
# compound names like "van der Pump"
|
320
|
+
def name_case lowercase
|
321
|
+
name = lowercase # We assume the name is passed already downcased
|
322
|
+
name.gsub!(/\b\w/) { |first| first.upcase }
|
323
|
+
name.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
|
324
|
+
|
325
|
+
# Our list of terminal characters that indicate a non-celtic name used
|
326
|
+
# to include o but we removed it because of MacMurdo.
|
327
|
+
if name =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ or name =~ /\bMc/
|
328
|
+
name.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |match| $1 + $2.capitalize }
|
329
|
+
|
330
|
+
# Fix Mac exceptions
|
331
|
+
[
|
332
|
+
'MacEdo', 'MacEvicius', 'MacHado', 'MacHar', 'MacHin', 'MacHlin', 'MacIas', 'MacIulis', 'MacKie', 'MacKle',
|
333
|
+
'MacKlin', 'MacKmin', 'MacKmurdo', 'MacQuarie', 'MacLise', 'MacKenzie'
|
334
|
+
].each { |mac_name| name.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
|
335
|
+
end
|
336
|
+
|
337
|
+
# Fix ff wierdybonks
|
338
|
+
[
|
339
|
+
'Fforbes', 'Fforde', 'Ffinch', 'Ffrench', 'Ffoulkes'
|
340
|
+
].each { |ff_name| name.gsub!(ff_name,ff_name.downcase) }
|
341
|
+
|
342
|
+
# Fixes for name modifiers followed by space
|
343
|
+
# Also replaces spaces with non-breaking spaces
|
344
|
+
NAME_MODIFIERS.each do |modifier|
|
345
|
+
name.gsub!(/((?:[[:space:]]|^)#{modifier})(\s+|-)/) { |match| "#{$1.rstrip.downcase}#{$2.tr(ASCII_SPACE, NONBREAKING_SPACE)}" }
|
346
|
+
end
|
347
|
+
|
348
|
+
# Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
|
349
|
+
['Dell', 'D'].each do |modifier|
|
350
|
+
name.gsub!(/(.#{modifier}')(\w)/) { |match| "#{$1.rstrip.downcase}#{$2}" }
|
351
|
+
end
|
352
|
+
|
353
|
+
# Upcase words with no vowels, e.g JPR Williams
|
354
|
+
name.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |match| $1.upcase }
|
355
|
+
# Except Ng
|
356
|
+
name.gsub!(/\b(NG)\b/i) { |match| $1.capitalize } # http://en.wikipedia.org/wiki/Ng
|
357
|
+
|
358
|
+
name
|
359
|
+
end
|
360
|
+
|
361
|
+
def parameterize string, args = {}
|
362
|
+
sep = args[:sep] || SLUG_DELIMITER
|
363
|
+
rfc3987 = args[:rfc3987] || false
|
364
|
+
filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
365
|
+
|
366
|
+
# First we unescape any pct-encoded characters. These might turn into
|
367
|
+
# things we want to alter for the slug, like whitespace (e.g. %20)
|
368
|
+
parameterized_string = URI.unescape(string)
|
369
|
+
|
370
|
+
# Then we change any whitespace into our separator character
|
371
|
+
parameterized_string.gsub!(/\s+/, sep)
|
372
|
+
|
373
|
+
# Then we strip any illegal characters out completely
|
374
|
+
parameterized_string.gsub!(filter, '')
|
375
|
+
|
376
|
+
# Make sure separators are not where they shouldn't be
|
377
|
+
unless sep.nil? || sep.empty?
|
378
|
+
re_sep = Regexp.escape(sep)
|
379
|
+
# No more than one of the separator in a row.
|
380
|
+
parameterized_string.gsub!(/#{re_sep}{2,}/, sep)
|
381
|
+
# Remove leading/trailing separator.
|
382
|
+
parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
|
383
|
+
end
|
384
|
+
|
385
|
+
# downcase if it's all latin
|
386
|
+
parameterized_string.downcase
|
387
|
+
end
|
388
|
+
|
389
|
+
#--------------------------------------------------------
|
390
|
+
# Constants
|
391
|
+
#--------------------------------------------------------
|
392
|
+
|
393
|
+
NONBREAKING_SPACE = "\u00a0"
|
394
|
+
ASCII_SPACE = "\u0020"
|
395
|
+
ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
|
396
|
+
SLUG_DELIMITER = '-'
|
397
|
+
|
398
|
+
# Constants for parameterizing Unicode strings for IRIs
|
399
|
+
#
|
400
|
+
# Allowed characters in an IRI segment are defined by RFC 3987
|
401
|
+
# (https://tools.ietf.org/html/rfc3987#section-2.2) as follows:
|
402
|
+
#
|
403
|
+
# isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
|
404
|
+
# / "@" )
|
405
|
+
# ; non-zero-length segment without any colon ":"
|
406
|
+
#
|
407
|
+
# iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
|
408
|
+
#
|
409
|
+
# pct-encoded = "%" HEXDIG HEXDIG
|
410
|
+
#
|
411
|
+
# sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
412
|
+
# / "*" / "+" / "," / ";" / "="
|
413
|
+
#
|
414
|
+
# ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
|
415
|
+
# / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
|
416
|
+
# / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
|
417
|
+
# / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
|
418
|
+
# / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
|
419
|
+
# / %xD0000-DFFFD / %xE1000-EFFFD
|
420
|
+
#
|
421
|
+
# Note that we can't use Unicode code points above \uFFFF because of
|
422
|
+
# regex limitations, so we'll ignore ucschar above that point.
|
423
|
+
#
|
424
|
+
# We're using the most restrictive segment definition (isegment-nz-nc)
|
425
|
+
# to avoid any possible problems with the IRI that it one day might
|
426
|
+
# get placed in.
|
427
|
+
ALPHA = 'A-Za-z'
|
428
|
+
DIGIT = '0-9'
|
429
|
+
UCSCHAR = '\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
|
430
|
+
IUNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~#{UCSCHAR}"
|
431
|
+
SUBDELIMS = '!$&\'\(\)\*+,;='
|
432
|
+
ISEGMENT_NZ_NC = "#{IUNRESERVED}#{SUBDELIMS}@" # pct-encoded not needed
|
433
|
+
FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
|
434
|
+
FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
|
435
|
+
|
436
|
+
NAME_MODIFIERS = [
|
437
|
+
'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lr]', 'D[ao]s', 'El', 'La', 'L[eo]',
|
438
|
+
'V[ao]n', 'Of', 'St[\.]?'
|
439
|
+
]
|
440
|
+
|
441
|
+
# These are the prefixes and suffixes we want to remove
|
442
|
+
# If you add to the list, you can use spaces and dots where appropriate
|
443
|
+
# Ensure any single letters are followed by a dot because we'll add one to the string
|
444
|
+
# during processing, e.g. "y Cía." should be "y. Cía."
|
445
|
+
ADFIXES = {
|
446
|
+
prefix: {
|
447
|
+
person: [
|
448
|
+
'Baron', 'Baroness', 'Capt.', 'Captain', 'Col.', 'Colonel', 'Dame',
|
449
|
+
'Doctor', 'Dr.', 'Judge', 'Justice', 'Lady', 'Lieut.', 'Lieutenant',
|
450
|
+
'Lord', 'Madame', 'Major', 'Master', 'Matron', 'Messrs.', 'Mgr.',
|
451
|
+
'Miss', 'Mister', 'Mlle.', 'Mme.', 'Mons.', 'Mr.', 'Mr. & Mrs.',
|
452
|
+
'Mr. and Mrs.', 'Mrs.', 'Msgr.', 'Prof.', 'Professor', 'Rev.',
|
453
|
+
'Reverend', 'Sir', 'Sister', 'The Hon.', 'The Lady.', 'The Lord',
|
454
|
+
'The Rt. Hon.'
|
455
|
+
],
|
456
|
+
organization: [
|
457
|
+
'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
|
458
|
+
],
|
459
|
+
before:'\\A', after:ADFIX_JOINERS
|
460
|
+
},
|
461
|
+
suffix: {
|
462
|
+
person: [
|
463
|
+
'C.I.S.S.P.', 'B.Tech.', 'D.Phil.', 'B.Eng.', 'C.F.A.', 'D.B.E.', 'D.D.S.', 'Eng.D.', 'M.B.A.', 'M.B.E.',
|
464
|
+
'M.E.P.', 'M.Eng.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'B.Ed.', 'B.Sc.', 'Ed.D.', 'LL.B.',
|
465
|
+
'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'O.K.',
|
466
|
+
'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
|
467
|
+
],
|
468
|
+
organization: [
|
469
|
+
'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
|
470
|
+
'Cía. S. C. A.', 'y. Cía. S. C.', 'S.A. de C.V.', 'spol. s.r.o.', '(Pty.) Ltd.', '(Pvt.) Ltd.', 'A.D.S.I.Tz.',
|
471
|
+
'S.p. z.o.o.', '(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.',
|
472
|
+
'S.P.R.L.U.', 'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.',
|
473
|
+
'Pty. Ltd.', 'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.',
|
474
|
+
'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'I.L.L.C.',
|
475
|
+
'K.G.a.A.', 'L.L.L.P.', 'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.',
|
476
|
+
'P.L.L.C.', 'S. en C.', 'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.',
|
477
|
+
'S.C.R.I.', 'S.C.R.L.', 'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.',
|
478
|
+
'Part.G.', 'Sh.p.k.', '&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.',
|
479
|
+
'F.I.E.', 'G.b.R.', 'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.',
|
480
|
+
'L.L.P.', 'o.h.f.', 'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.',
|
481
|
+
'S.C.S.', 'S.E.M.', 'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.',
|
482
|
+
'V.O.F.', 'V.o.G.', 'v.o.s.', 'V.Z.W.', 'z.r.t.', 'А.А.Т.', 'Е.А.Д.', 'З.А.Т.', 'К.Д.А.', 'О.О.Д.', 'Т.А.А.',
|
483
|
+
'股份有限公司', 'Ap.S.', 'Corp.', 'ltda.', 'Sh.A.', 'st.G.', 'Ultd.', 'a.b.', 'A.D.', 'A.E.', 'A.G.', 'A.S.',
|
484
|
+
'A.Ş.', 'A.y.', 'B.M.', 'b.t.', 'B.V.', 'C.A.', 'C.V.', 'd.d.', 'e.c.', 'E.E.', 'e.G.', 'E.I.', 'E.P.', 'E.T.',
|
485
|
+
'E.U.', 'e.v.', 'G.K.', 'G.P.', 'h.f.', 'Inc.', 'K.D.', 'K.G.', 'K.K.', 'k.s.', 'k.v.', 'K.y.', 'L.C.', 'L.P.',
|
486
|
+
'Ltd.', 'N.K.', 'N.L.', 'N.V.', 'O.E.', 'O.G.', 'O.Ü.', 'O.y.', 'P.C.', 'p.l.', 'Pty.', 'PUP.', 'Pvt.', 'r.t.',
|
487
|
+
'S.A.', 'S.D.', 'S.E.', 's.f.', 'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.',
|
488
|
+
'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
|
489
|
+
'A/S', 'G/S', 'I/S', 'K/S', 'P/S'
|
490
|
+
],
|
491
|
+
before:ADFIX_JOINERS, after:'\\z'
|
492
|
+
}
|
493
|
+
}
|
494
|
+
|
495
|
+
ADFIX_PATTERNS = {}
|
496
|
+
|
497
|
+
[:prefix, :suffix].each do |adfix_type|
|
498
|
+
patterns = {}
|
499
|
+
adfix = ADFIXES[adfix_type]
|
500
|
+
|
501
|
+
[:person, :organization].each do |contact_type|
|
502
|
+
with_optional_spaces = adfix[contact_type].map { |p| p.gsub(ASCII_SPACE,' *') }
|
503
|
+
pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
|
504
|
+
patterns[contact_type] = /#{adfix[:before]}\(*(?:#{pattern_string})\)*#{adfix[:after]}/i
|
505
|
+
end
|
506
|
+
|
507
|
+
ADFIX_PATTERNS[adfix_type] = patterns
|
508
|
+
end
|
509
|
+
end
|
data/name-tamer.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require 'name_tamer/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = 'name-tamer'
|
7
|
+
spec.version = NameTamer::VERSION
|
8
|
+
spec.authors = ['Xenapto']
|
9
|
+
spec.email = ['developers@xenapto.com']
|
10
|
+
spec.description = %q{Useful methods for taming names}
|
11
|
+
spec.summary = %q{Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith}
|
12
|
+
spec.homepage = 'https://github.com/Xenapto/name-tamer'
|
13
|
+
spec.license = 'MIT'
|
14
|
+
|
15
|
+
spec.files = `git ls-files`.split($/)
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features|coverage)/})
|
18
|
+
spec.require_paths = ['lib']
|
19
|
+
|
20
|
+
spec.add_runtime_dependency 'activesupport', '~> 3'
|
21
|
+
|
22
|
+
spec.add_development_dependency 'bundler', '~> 1'
|
23
|
+
spec.add_development_dependency 'rake', '~> 10'
|
24
|
+
spec.add_development_dependency 'rspec', '~> 2'
|
25
|
+
spec.add_development_dependency 'gem-release', '~> 0'
|
26
|
+
spec.add_development_dependency 'simplecov', '~> 0'
|
27
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'name_tamer'
|
4
|
+
|
5
|
+
describe NameTamer do
|
6
|
+
let(:names) do
|
7
|
+
[
|
8
|
+
{ n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
9
|
+
{ n:'JOHN SMITH', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
10
|
+
{ n:'john smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
11
|
+
{ n:'Smith, John', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
12
|
+
{ n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
13
|
+
{ n:'Smith, John', nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
14
|
+
{ n:'John J Smith', t: :person, nn:'John J Smith', sn:'John Smith', s:'john-smith' },
|
15
|
+
{ n:'John J. Smith', t: :person, nn:'John J. Smith', sn:'John Smith', s:'john-smith' },
|
16
|
+
{ n:'SMITH, Mr John J.R.', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
|
17
|
+
{ n:' SMITH, Mr John J. R. ', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
|
18
|
+
{ n:'SMITH, Mr John J.R.', nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
|
19
|
+
{ n:'Mr John J.R. SMITH JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
|
20
|
+
{ n:'Mr John J.R. SMITH III,JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
|
21
|
+
{ n:'Mr John J.R. SMITH JD', nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
|
22
|
+
{ n:'Mr Jean-Michel SMITH JD', t: :person, nn:'Jean-Michel SMITH', sn:'Jean-Michel SMITH', s:'jean-michel-smith' },
|
23
|
+
{ n:'Mr Jean Michel-SMITH JD', nn:'Jean Michel-SMITH', sn:'Jean Michel-SMITH', s:'jean-michel-smith' },
|
24
|
+
{ n:'Dr Martha Lane Fox Ph.D', nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
|
25
|
+
{ n:'Lane Fox Ph.D, Dr Martha', t: :person, nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
|
26
|
+
{ n:'Baroness Lane-Fox of Lewisham', t: :person, nn:'Lane-Fox of Lewisham', sn:'Lane-Fox of Lewisham', s:'lane-fox-of-lewisham' },
|
27
|
+
{ n:'MACDONALDS LLC', nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
|
28
|
+
{ n:'MACDONALDS LLC', t: :organization, nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
|
29
|
+
{ n:'macdonalds', t: :organization, nn:'macdonalds', sn:'macdonalds', s:'macdonalds' },
|
30
|
+
{ n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', t: :organization, nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
|
31
|
+
{ n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
|
32
|
+
{ n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
|
33
|
+
{ n:'K.V.A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
|
34
|
+
{ n:'K. V. A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
|
35
|
+
{ n:'J.P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
|
36
|
+
{ n:'J. P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
|
37
|
+
{ n:'J P Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
|
38
|
+
{ n:'JP Rangaswami', nn:'JP Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
|
39
|
+
{ n:'Audrey fforbes', nn:'Audrey fforbes', sn:'Audrey fforbes', s:'audrey-fforbes' },
|
40
|
+
{ n:'J. Arthur Rank', t: :person, nn:'J. Arthur Rank', sn:'Arthur Rank', s:'arthur-rank' },
|
41
|
+
{ n:'PHILIP NG', t: :person, nn:'Philip Ng', sn:'Philip Ng', s:'philip-ng' },
|
42
|
+
{ n:'Super R&D', nn:'Super R&D', sn:'Super R and D', s:'super-r-and-d' },
|
43
|
+
{ n:'Harry Dean Stanton', t: :person, nn:'Harry Dean Stanton', sn:'Harry Stanton', s:'harry-stanton' },
|
44
|
+
{ n:'Union Square Ventures', t: :organization, nn:'Union Square Ventures', sn:'Union Square Ventures', s:'union-square-ventures' },
|
45
|
+
{ n:'J Arthur Rank Inc.', t: :organization, nn:'J Arthur Rank', sn:'J Arthur Rank', s:'j-arthur-rank' },
|
46
|
+
{ n:'Jean VAN DER VELDE', t: :person, nn:'Jean VAN DER VELDE', sn:'Jean VAN DER VELDE', s:'jean-van-der-velde' },
|
47
|
+
{ n:'Al Capone', t: :person, nn:'Al Capone', sn:'Al Capone', s:'al-capone' },
|
48
|
+
{ n:'Fahd al-Saud', t: :person, nn:'Fahd al-Saud', sn:'Fahd al-Saud', s:'fahd-al-saud' },
|
49
|
+
{ n:'Mehmet al Auouiby', t: :person, nn:'Mehmet al Auouiby', sn:'Mehmet al Auouiby', s:'mehmet-al-auouiby' },
|
50
|
+
{ n:'Macquarie Bank', t: :organization, nn:'Macquarie Bank', sn:'Macquarie Bank', s:'macquarie-bank' },
|
51
|
+
{ n:"COMMEDIA DELL'ARTE", t: :organization, nn:"Commedia dell'Arte", sn:"Commedia dell'Arte", s:'commedia-dellarte' },
|
52
|
+
{ n:'Della Smith', t: :person, nn:'Della Smith', sn:'Della Smith', s:'della-smith' },
|
53
|
+
{ n:'Antonio DELLA MONTEVERDE', nn:'Antonio DELLA MONTEVERDE', sn:'Antonio DELLA MONTEVERDE', s:'antonio-della-monteverde' },
|
54
|
+
{ n:'Tony St Clair', t: :person, nn:'Tony St Clair', sn:'Tony St Clair', s:'tony-st-clair' },
|
55
|
+
{ n:'Seamus O\'Malley', t: :person, nn:'Seamus O\'Malley', sn:'Seamus O\'Malley', s:'seamus-omalley' },
|
56
|
+
{ n:'SeedCamp', t: :organization, nn:'SeedCamp', sn:'SeedCamp', s:'seedcamp' },
|
57
|
+
{ n:'Peter Van Der Auwera', t: :person, nn:'Peter Van Der Auwera', sn:'Peter Van Der Auwera', s:'peter-van-der-auwera' },
|
58
|
+
{ n:'VAN DER AUWERA, Peter', t: :person, nn:'Peter van der Auwera', sn:'Peter van der Auwera', s:'peter-van-der-auwera' },
|
59
|
+
{ n:'Li Fan', t: :person, nn:'Li Fan', sn:'Li Fan', s:'li-fan' },
|
60
|
+
{ n:'Fan Li', t: :person, nn:'Fan Li', sn:'Fan Li', s:'fan-li' },
|
61
|
+
{ n:'Levi Strauss & Co.', nn:'Levi Strauss', sn:'Levi Strauss', s:'levi-strauss' },
|
62
|
+
{ n:'Standard & Poor\'s', t: :organization, nn:'Standard & Poor\'s', sn:'Standard and Poor\'s', s:'standard-and-poors' },
|
63
|
+
{ n:'I B M Services', t: :organization, nn:'I.B.M. Services', sn:'IBM Services', s:'ibm-services' },
|
64
|
+
{ n:'Sean Park DDS', t: :person, nn:'Sean Park', sn:'Sean Park', s:'sean-park' },
|
65
|
+
{ n:'SEAN MACLISE PARK', t: :person, nn:'Sean Maclise Park', sn:'Sean Park', s:'sean-park' },
|
66
|
+
{ n:'AJ Hanna', t: :person, nn:'AJ Hanna', sn:'AJ Hanna', s:'aj-hanna' },
|
67
|
+
{ n:'Free & Clear', t: :organization, nn:'Free & Clear', sn:'Free and Clear', s:'free-and-clear' },
|
68
|
+
{ n:'Adam D\'ANGELO', t: :person, nn:'Adam D\'ANGELO', sn:'Adam D\'ANGELO', s:'adam-dangelo' },
|
69
|
+
{ n:'MACKENZIE, Doug', t: :person, nn:'Doug Mackenzie', sn:'Doug Mackenzie', s:'doug-mackenzie' },
|
70
|
+
{ n:'Up + Down', t: :organization, nn:'Up + Down', sn:'Up plus Down', s:'up-plus-down' },
|
71
|
+
{ n:'San Francisco Ltd', t: :organization, nn:'San Francisco', sn:'San Francisco', s:'san-francisco' },
|
72
|
+
{ n:'AT&T', t: :organization, nn:'At&T', sn:'At and T', s:'at-and-t' },
|
73
|
+
{ n:'SMITH, John, Jr.', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
74
|
+
{ n:'I Heart Movies', t: :organization, nn:'I Heart Movies', sn:'I Heart Movies', s:'i-heart-movies' },
|
75
|
+
{ n:'Y Combinator', t: :organization, nn:'Y Combinator', sn:'Y Combinator', s:'y-combinator' },
|
76
|
+
{ n:'Ben\'s 10 Hens', t: :organization, nn:'Ben\'s 10 Hens', sn:'Ben\'s 10 Hens', s:'bens-10-hens' },
|
77
|
+
{ n:'Elazer Edelman, MD , PhD', t: :person, nn:'Elazer Edelman', sn:'Elazer Edelman', s:'elazer-edelman' },
|
78
|
+
{ n:'Judith M. O\'Brien', t: :person, nn:'Judith M. O\'Brien', sn:'Judith O\'Brien', s:'judith-obrien' },
|
79
|
+
{ n:'MORRISON, Van', t: :person, nn:'Van Morrison', sn:'Van Morrison', s:'van-morrison' },
|
80
|
+
{ n:'i/o Ventures', t: :organization, nn:'i/o Ventures', sn:'i/o Ventures', s:'io-ventures' },
|
81
|
+
{ n:'C T Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
|
82
|
+
{ n:'C.T. Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
|
83
|
+
{ n:'CT Corporation System', t: :person, nn:'CT Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
|
84
|
+
{ n:'Corporation Service Company', t: :person, nn:'Corporation Service Company', sn:'Corporation Service Company', s:'corporation-service-company'},
|
85
|
+
{ n:'Kurshuni,Inc.', t: :organization, nn:'Kurshuni', sn:'Kurshuni', s:'kurshuni' },
|
86
|
+
{ n:'Cellular Inc-LLC', t: :organization, nn:'Cellular', sn:'Cellular', s:'cellular' },
|
87
|
+
{ n:'Emtec (AZ) Limited', t: :organization, nn:'Emtec (AZ)', sn:'Emtec (AZ)', s:'emtec-az' },
|
88
|
+
{ n:'Emtec (LLC) Limited', t: :organization, nn:'Emtec', sn:'Emtec', s:'emtec' },
|
89
|
+
{ n:'Emtec (XYZ LLC) Limited', t: :organization, nn:'Emtec (XYZ)', sn:'Emtec (XYZ)', s:'emtec-xyz' },
|
90
|
+
{ n:'Tao Ma', t: :person, nn:'Tao', sn:'Tao', s:'tao' }, # Unfortunate but we can't distinguish between Ma and M.A.
|
91
|
+
{ n:'(Mr.) Courtney J. Miller, J.D., LL.M.', t: :person, nn:'Courtney J. Miller', sn:'Courtney Miller', s:'courtney-miller' },
|
92
|
+
{ n:'(Mr Woo) The Window Cleaner', t: :person, nn:'(Woo) The Window Cleaner', sn:'(Woo) Cleaner', s:'woo-cleaner'},
|
93
|
+
{ n:'DOMINIC MACMURDO', t: :person, nn:'Dominic MacMurdo', sn:'Dominic MacMurdo', s:'dominic-macmurdo' },
|
94
|
+
{ n:'DOMINIC MACEDO', t: :person, nn:'Dominic Macedo', sn:'Dominic Macedo', s:'dominic-macedo' },
|
95
|
+
{ n:'DOMINIC MACDONALD', t: :person, nn:'Dominic MacDonald', sn:'Dominic MacDonald', s:'dominic-macdonald' },
|
96
|
+
{ n:'AGUSTA DO ROMEIRO', t: :person, nn:'Agusta do Romeiro', sn:'Agusta do Romeiro', s:'agusta-do-romeiro' },
|
97
|
+
{ n:'CARLOS DOS SANTOS', t: :person, nn:'Carlos dos Santos', sn:'Carlos dos Santos', s:'carlos-dos-santos' },
|
98
|
+
{ n:'유정 신', t: :organization, nn:'유정 신', sn:'유정 신', s:'유정-신' },
|
99
|
+
{ n:'xxx%52zzz', t: :organization, nn:'xxx%52zzz', sn:'xxx%52zzz', s:'xxxrzzz' },
|
100
|
+
{ n:'Евгений Болотнов', t: :organization, nn:'Евгений Болотнов', sn:'Евгений Болотнов', s:'Евгений-Болотнов' },
|
101
|
+
{ n:'김태성', t: :organization, nn:'김태성', sn:'김태성', s:'김태성' },
|
102
|
+
{ n:'ゴルフスタジアム', t: :organization, nn:'ゴルフスタジアム', sn:'ゴルフスタジアム', s:'ゴルフスタジアム' },
|
103
|
+
{ n:'我摘', t: :organization, nn:'我摘', sn:'我摘', s:'我摘' },
|
104
|
+
{ n:'Καρατζάς Στέφανος', t: :organization, nn:'Καρατζάς Στέφανος', sn:'Καρατζάς Στέφανος', s:'Καρατζάς-Στέφανος' },
|
105
|
+
{ n:'โชติวัน วัฒนลาภ', t: :organization, nn:'โชติวัน วัฒนลาภ', sn:'โชติวัน วัฒนลาภ', s:'โชติวัน-วัฒนลาภ' },
|
106
|
+
{ n:'張 續寶', t: :organization, nn:'張 續寶', sn:'張 續寶', s:'張-續寶' },
|
107
|
+
{ n:'Юрий Гайдук', t: :organization, nn:'Юрий Гайдук', sn:'Юрий Гайдук', s:'Юрий-Гайдук' },
|
108
|
+
{ n:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', t: :organization, nn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', sn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', s:'☣-©Ʀѱ∏†ʘ-Σɏ§†℈Ϻ-☣' },
|
109
|
+
{ n:'♠ KlasikB0i ♠', t: :organization, nn:'♠ KlasikB0i ♠', sn:'♠ KlasikB0i ♠', s:'♠-klasikb0i-♠' },
|
110
|
+
{ n:'* Shorusan *', t: :organization, nn:'* Shorusan *', sn:'* Shorusan *', s:'shorusan' },
|
111
|
+
{ n:'项目谷', t: :organization, nn:'项目谷', sn:'项目谷', s:'项目谷' },
|
112
|
+
{ n:'ООО "Инновационные полимерные адгезивы"', t: :organization, nn:'ООО "Инновационные полимерные адгезивы"', sn:'ООО "Инновационные полимерные адгезивы"', s:'ООО-Инновационные-полимерные-адгезивы' },
|
113
|
+
{ n:'عبدالله ...', t: :organization, nn:'عبدالله ...', sn:'عبدالله ...', s:'عبدالله' },
|
114
|
+
{ n:'กมลชนก ทิศไธสง', t: :organization, nn:'กมลชนก ทิศไธสง', sn:'กมลชนก ทิศไธสง', s:'กมลชนก-ทิศไธสง' },
|
115
|
+
{ n:'יוֹ אָב', t: :organization, nn:'יוֹ אָב', sn:'יוֹ אָב', s:'יוֹ-אָב' },
|
116
|
+
{ n:'יגאל נימני', t: :organization, nn:'יגאל נימני', sn:'יגאל נימני', s:'יגאל-נימני' },
|
117
|
+
{ n:'ניסים דניאלי', t: :organization, nn:'ניסים דניאלי', sn:'ניסים דניאלי', s:'ניסים-דניאלי' },
|
118
|
+
{ n:'مساء الخير', t: :organization, nn:'مساء الخير', sn:'مساء الخير', s:'مساء-الخير' },
|
119
|
+
{ n:'محمود ياسر', t: :organization, nn:'محمود ياسر', sn:'محمود ياسر', s:'محمود-ياسر' },
|
120
|
+
{ n:'קובי ביטר', t: :organization, nn:'קובי ביטר', sn:'קובי ביטר', s:'קובי-ביטר' },
|
121
|
+
{ n:'الملاك الحارس', t: :organization, nn:'الملاك الحارس', sn:'الملاك الحارس', s:'الملاك-الحارس' },
|
122
|
+
{ n:'কবির হাসান', t: :organization, nn:'কবির হাসান', sn:'কবির হাসান', s:'কবির-হাসান' }
|
123
|
+
]
|
124
|
+
end
|
125
|
+
|
126
|
+
it "makes a slug from #{name}" do
|
127
|
+
names.each do |name_data|
|
128
|
+
name = name_data[:n]
|
129
|
+
NameTamer[name, contact_type:name_data[:t]].slug.should == name_data[:s]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
it "makes a nice name from #{name}" do
|
134
|
+
names.each do |name_data|
|
135
|
+
name = name_data[:n]
|
136
|
+
nice_name = NameTamer[name, contact_type:name_data[:t]].nice_name
|
137
|
+
|
138
|
+
nice_name.should == name_data[:nn]
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
it "makes a searchable name from #{name}" do
|
143
|
+
names.each do |name_data|
|
144
|
+
name = name_data[:n]
|
145
|
+
NameTamer[name, contact_type:name_data[:t]].search_name.should == name_data[:sn]
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe 'contact type inference' do
|
151
|
+
it 'infers that "Mr. John Smith" is a person' do
|
152
|
+
NameTamer['Mr. John Smith'].contact_type.should eq(:person)
|
153
|
+
end
|
154
|
+
|
155
|
+
it 'infers that "Di Doo Doo d.o.o." is an organization' do
|
156
|
+
NameTamer['Di Doo Doo d.o.o.'].contact_type.should eq(:organization)
|
157
|
+
end
|
158
|
+
|
159
|
+
it 'infers that "DiDooDoo" is an organization' do
|
160
|
+
NameTamer['DiDooDoo'].contact_type.should eq(:organization)
|
161
|
+
end
|
162
|
+
|
163
|
+
it 'infers that "John Smith" is a person' do
|
164
|
+
NameTamer['John Smith'].contact_type.should eq(:person)
|
165
|
+
end
|
166
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Initialize simplecov for coverage report.
|
2
|
+
require 'simplecov'
|
3
|
+
SimpleCov.start
|
4
|
+
|
5
|
+
RSpec.configure do |config|
|
6
|
+
# Run specs in random order to surface order dependencies. If you find an
|
7
|
+
# order dependency and want to debug it, you can fix the order by providing
|
8
|
+
# the seed, which is printed after each run.
|
9
|
+
# --seed 1234
|
10
|
+
config.order = "random"
|
11
|
+
|
12
|
+
# Manually-added
|
13
|
+
config.color_enabled = true
|
14
|
+
config.tty = true
|
15
|
+
config.formatter = :documentation
|
16
|
+
end
|
metadata
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: name-tamer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Xenapto
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: gem-release
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: simplecov
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ~>
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ~>
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Useful methods for taming names
|
98
|
+
email:
|
99
|
+
- developers@xenapto.com
|
100
|
+
executables:
|
101
|
+
- name-tamer
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- .env
|
106
|
+
- .gitignore
|
107
|
+
- .ruby-version
|
108
|
+
- Gemfile
|
109
|
+
- Gemfile.lock
|
110
|
+
- LICENSE
|
111
|
+
- README.md
|
112
|
+
- Rakefile
|
113
|
+
- bin/name-tamer
|
114
|
+
- lib/name_tamer.rb
|
115
|
+
- lib/name_tamer/version.rb
|
116
|
+
- name-tamer.gemspec
|
117
|
+
- spec/name_tamer_spec.rb
|
118
|
+
- spec/spec_helper.rb
|
119
|
+
homepage: https://github.com/Xenapto/name-tamer
|
120
|
+
licenses:
|
121
|
+
- MIT
|
122
|
+
metadata: {}
|
123
|
+
post_install_message:
|
124
|
+
rdoc_options: []
|
125
|
+
require_paths:
|
126
|
+
- lib
|
127
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - ! '>='
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
requirements: []
|
138
|
+
rubyforge_project:
|
139
|
+
rubygems_version: 2.2.2
|
140
|
+
signing_key:
|
141
|
+
specification_version: 4
|
142
|
+
summary: ! 'Example: NameTamer[''Mr. John Q. Smith III, MD''].simple_name # => John
|
143
|
+
Smith'
|
144
|
+
test_files:
|
145
|
+
- spec/name_tamer_spec.rb
|
146
|
+
- spec/spec_helper.rb
|