name-tamer 0.2.15 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -1
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -1
- data/Gemfile.lock +94 -32
- data/Guardfile +16 -0
- data/doc/maintenance.rake +45 -0
- data/doc/suffixes.csv +92 -28
- data/lib/name-tamer.rb +80 -77
- data/lib/name-tamer/version.rb +1 -1
- data/lib/string_extras.rb +4 -4
- data/name-tamer.gemspec +20 -17
- data/spec/name_tamer_spec.rb +20 -9
- metadata +53 -28
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cbf8a73410b3e5575cf42adc919e7b42f23b080f
|
|
4
|
+
data.tar.gz: c27236356141b3023dc14aa5e590383f0662d8d2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 117f98cf5609b0811dc806d74383f94787cbd74293e51edbe2f9e78f26a0d877142119a77c93e3ffb4a74afbc04053110e66acf5032e26d4712e7862da72f331
|
|
7
|
+
data.tar.gz: da4f7a8598c5b05d60e947f1c94c93c55b838b91a8fc2f00c7e5774d288f613864bd8e440c279eef0ec9e134394e07774f8e9f3407a0d1a33b0a9573bcca32bb
|
data/.rubocop.yml
CHANGED
data/.ruby-gemset
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
name-tamer
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.2.
|
|
1
|
+
2.2.2
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
name-tamer (0.
|
|
4
|
+
name-tamer (0.3.0)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
@@ -9,60 +9,122 @@ GEM
|
|
|
9
9
|
ast (2.0.0)
|
|
10
10
|
astrolabe (1.3.0)
|
|
11
11
|
parser (>= 2.2.0.pre.3, < 3.0)
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
celluloid (0.16.0)
|
|
13
|
+
timers (~> 4.0.0)
|
|
14
|
+
coderay (1.1.0)
|
|
15
|
+
coveralls (0.8.2)
|
|
16
|
+
json (~> 1.8)
|
|
14
17
|
rest-client (>= 1.6.8, < 2)
|
|
15
|
-
simplecov (~> 0.
|
|
18
|
+
simplecov (~> 0.10.0)
|
|
16
19
|
term-ansicolor (~> 1.3)
|
|
17
20
|
thor (~> 0.19.1)
|
|
18
21
|
diff-lcs (1.2.5)
|
|
19
22
|
docile (1.1.5)
|
|
23
|
+
domain_name (0.5.24)
|
|
24
|
+
unf (>= 0.0.5, < 1.0.0)
|
|
25
|
+
ffi (1.9.8)
|
|
26
|
+
formatador (0.2.5)
|
|
20
27
|
gem-release (0.7.3)
|
|
21
|
-
|
|
22
|
-
|
|
28
|
+
guard (2.12.6)
|
|
29
|
+
formatador (>= 0.2.4)
|
|
30
|
+
listen (~> 2.7)
|
|
31
|
+
lumberjack (~> 1.0)
|
|
32
|
+
nenv (~> 0.1)
|
|
33
|
+
notiffany (~> 0.0)
|
|
34
|
+
pry (>= 0.9.12)
|
|
35
|
+
shellany (~> 0.0)
|
|
36
|
+
thor (>= 0.18.1)
|
|
37
|
+
guard-compat (1.2.1)
|
|
38
|
+
guard-rspec (4.5.2)
|
|
39
|
+
guard (~> 2.1)
|
|
40
|
+
guard-compat (~> 1.1)
|
|
41
|
+
rspec (>= 2.99.0, < 4.0)
|
|
42
|
+
guard-rubocop (1.2.0)
|
|
43
|
+
guard (~> 2.0)
|
|
44
|
+
rubocop (~> 0.20)
|
|
45
|
+
hitimes (1.2.2)
|
|
46
|
+
http-cookie (1.0.2)
|
|
47
|
+
domain_name (~> 0.5)
|
|
48
|
+
json (1.8.3)
|
|
49
|
+
listen (2.10.1)
|
|
50
|
+
celluloid (~> 0.16.0)
|
|
51
|
+
rb-fsevent (>= 0.9.3)
|
|
52
|
+
rb-inotify (>= 0.9)
|
|
53
|
+
lumberjack (1.0.9)
|
|
54
|
+
method_source (0.8.2)
|
|
55
|
+
mime-types (2.6.1)
|
|
56
|
+
nenv (0.2.0)
|
|
23
57
|
netrc (0.10.3)
|
|
24
|
-
|
|
58
|
+
notiffany (0.0.6)
|
|
59
|
+
nenv (~> 0.1)
|
|
60
|
+
shellany (~> 0.0)
|
|
61
|
+
parser (2.2.2.5)
|
|
25
62
|
ast (>= 1.1, < 3.0)
|
|
26
|
-
powerpack (0.1.
|
|
63
|
+
powerpack (0.1.1)
|
|
64
|
+
pry (0.10.1)
|
|
65
|
+
coderay (~> 1.1.0)
|
|
66
|
+
method_source (~> 0.8.1)
|
|
67
|
+
slop (~> 3.4)
|
|
27
68
|
rainbow (2.0.0)
|
|
28
69
|
rake (10.4.2)
|
|
29
|
-
|
|
70
|
+
rb-fsevent (0.9.5)
|
|
71
|
+
rb-inotify (0.9.5)
|
|
72
|
+
ffi (>= 0.5.0)
|
|
73
|
+
rest-client (1.8.0)
|
|
74
|
+
http-cookie (>= 1.0.2, < 2.0)
|
|
30
75
|
mime-types (>= 1.16, < 3.0)
|
|
31
76
|
netrc (~> 0.7)
|
|
32
|
-
rspec (
|
|
33
|
-
rspec-core (~>
|
|
34
|
-
rspec-expectations (~>
|
|
35
|
-
rspec-mocks (~>
|
|
36
|
-
rspec-core (
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
77
|
+
rspec (3.3.0)
|
|
78
|
+
rspec-core (~> 3.3.0)
|
|
79
|
+
rspec-expectations (~> 3.3.0)
|
|
80
|
+
rspec-mocks (~> 3.3.0)
|
|
81
|
+
rspec-core (3.3.1)
|
|
82
|
+
rspec-support (~> 3.3.0)
|
|
83
|
+
rspec-expectations (3.3.0)
|
|
84
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
85
|
+
rspec-support (~> 3.3.0)
|
|
86
|
+
rspec-mocks (3.3.1)
|
|
87
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
88
|
+
rspec-support (~> 3.3.0)
|
|
89
|
+
rspec-support (3.3.0)
|
|
90
|
+
rubocop (0.32.1)
|
|
41
91
|
astrolabe (~> 1.3)
|
|
42
|
-
parser (>= 2.2.
|
|
92
|
+
parser (>= 2.2.2.5, < 3.0)
|
|
43
93
|
powerpack (~> 0.1)
|
|
44
94
|
rainbow (>= 1.99.1, < 3.0)
|
|
45
95
|
ruby-progressbar (~> 1.4)
|
|
46
|
-
ruby-progressbar (1.7.
|
|
47
|
-
|
|
96
|
+
ruby-progressbar (1.7.5)
|
|
97
|
+
shellany (0.0.1)
|
|
98
|
+
simplecov (0.10.0)
|
|
48
99
|
docile (~> 1.1.0)
|
|
49
|
-
|
|
50
|
-
simplecov-html (~> 0.
|
|
51
|
-
simplecov-html (0.
|
|
52
|
-
|
|
100
|
+
json (~> 1.8)
|
|
101
|
+
simplecov-html (~> 0.10.0)
|
|
102
|
+
simplecov-html (0.10.0)
|
|
103
|
+
slop (3.6.0)
|
|
104
|
+
term-ansicolor (1.3.2)
|
|
53
105
|
tins (~> 1.0)
|
|
54
106
|
thor (0.19.1)
|
|
55
|
-
|
|
107
|
+
timers (4.0.1)
|
|
108
|
+
hitimes
|
|
109
|
+
tins (1.5.4)
|
|
110
|
+
unf (0.1.4)
|
|
111
|
+
unf_ext
|
|
112
|
+
unf_ext (0.0.7.1)
|
|
56
113
|
|
|
57
114
|
PLATFORMS
|
|
58
115
|
ruby
|
|
59
116
|
|
|
60
117
|
DEPENDENCIES
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
118
|
+
coveralls (~> 0.8)
|
|
119
|
+
gem-release (~> 0.7)
|
|
120
|
+
guard (~> 2.12)
|
|
121
|
+
guard-rspec (~> 4.5)
|
|
122
|
+
guard-rubocop (~> 1.2)
|
|
64
123
|
name-tamer!
|
|
65
124
|
rake (~> 10)
|
|
66
|
-
rspec (~>
|
|
67
|
-
rubocop (~> 0)
|
|
68
|
-
simplecov (~> 0.
|
|
125
|
+
rspec (~> 3.3)
|
|
126
|
+
rubocop (~> 0.32)
|
|
127
|
+
simplecov (~> 0.10)
|
|
128
|
+
|
|
129
|
+
BUNDLED WITH
|
|
130
|
+
1.10.4
|
data/Guardfile
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
guard :rubocop do
|
|
2
|
+
watch(/.+\.rb$/)
|
|
3
|
+
watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
|
|
4
|
+
end
|
|
5
|
+
|
|
6
|
+
guard(
|
|
7
|
+
:rspec,
|
|
8
|
+
all_after_pass: true,
|
|
9
|
+
all_on_start: true,
|
|
10
|
+
cmd: 'bundle exec rspec --fail-fast --format documentation'
|
|
11
|
+
) do
|
|
12
|
+
watch(%r{^spec/.+_spec\.rb$})
|
|
13
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
|
14
|
+
watch('spec/spec_helper.rb') { 'spec' }
|
|
15
|
+
watch(%r{^spec/support/.+\.rb$}) { 'spec' }
|
|
16
|
+
end
|
data/doc/maintenance.rake
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
# encoding: utf-8
|
|
2
|
+
lib = File.expand_path('../../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
|
|
2
5
|
require 'csv'
|
|
6
|
+
require 'name-tamer'
|
|
3
7
|
|
|
4
8
|
desc 'Build prefixes and suffixes'
|
|
5
9
|
task :adfixes do
|
|
@@ -29,3 +33,44 @@ task :adfixes do
|
|
|
29
33
|
puts "'" + sp.join("', '") + "'"
|
|
30
34
|
puts "'" + so.join("', '") + "'"
|
|
31
35
|
end
|
|
36
|
+
|
|
37
|
+
task :check_existing do
|
|
38
|
+
[
|
|
39
|
+
'Chartered F.C.S.I.',
|
|
40
|
+
'C.I.S.S.P.', 'T.M.I.E.T.', 'A.C.C.A.', 'C.I.T.P.', 'F.B.C.S.', 'F.C.C.A.', 'F.C.M.I.', 'F.I.E.T.', 'F.I.R.P.',
|
|
41
|
+
'M.I.E.T.', 'B.Tech.',
|
|
42
|
+
'Cantab.', 'D.Phil.', 'I.T.I.L. v3', 'B.Eng.', 'C.Eng.', 'M.Jur.', 'C.F.A.', 'D.B.E.', 'C.L.P.',
|
|
43
|
+
'D.D.S.', 'D.V.M.', 'Eng.D.', 'A.C.A.', 'C.T.A.', 'E.R.P.', 'F.C.A.', 'F.P.C.', 'F.R.M.', 'M.B.A.', 'M.B.E.',
|
|
44
|
+
'M.E.P.', 'M.Eng.', 'M.Jur.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'V.M.D.', 'B.Ed.', 'B.Sc.',
|
|
45
|
+
'Ed.D.', 'Hons.', 'LL.B.',
|
|
46
|
+
'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.',
|
|
47
|
+
'O.K.', 'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
|
|
48
|
+
].each do |suffix|
|
|
49
|
+
fail suffix unless NameTamer::ADFIXES[:suffix][:person].include? suffix
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
[
|
|
53
|
+
'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.', 'Cía. S. C. A.',
|
|
54
|
+
'y. Cía. S. C.', 'S.A. de C.V.', 'spol. s.r.o.', '(Pty.) Ltd.', '(Pvt.) Ltd.', 'A.D.S.I.Tz.', 'S.p. z.o.o.',
|
|
55
|
+
'(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.', 'S.P.R.L.U.',
|
|
56
|
+
'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.', 'Pty. Ltd.',
|
|
57
|
+
'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.', 'C.V.O.A.',
|
|
58
|
+
'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'K.G.a.A.', 'L.L.L.P.',
|
|
59
|
+
'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.', 'P.L.L.C.', 'S. en C.',
|
|
60
|
+
'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.', 'S.C.R.I.', 'S.C.R.L.',
|
|
61
|
+
'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', '&. Cie.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.', 'Part.G.', 'Sh.p.k.',
|
|
62
|
+
'&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.', 'F.I.E.', 'G.b.R.',
|
|
63
|
+
'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.', 'L.L.P.', 'o.h.f.',
|
|
64
|
+
'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.', 'S.C.S.', 'S.E.M.',
|
|
65
|
+
'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.', 'V.O.F.', 'V.o.G.',
|
|
66
|
+
'v.o.s.', 'V.Z.W.', 'z.r.t.', 'А.А.Т.', 'Е.А.Д.', 'З.А.Т.', 'К.Д.А.', 'О.О.Д.', 'Т.А.А.', '股份有限公司', 'Ap.S.',
|
|
67
|
+
'Corp.', 'ltda.', 'Sh.A.', 'st.G.', 'Ultd.', 'a.b.', 'A.D.', 'A.E.', 'A.G.', 'A.S.', 'A.Ş.', 'A.y.', 'B.M.', 'b.t.',
|
|
68
|
+
'B.V.', 'C.A.', 'C.V.', 'd.d.', 'e.c.', 'E.E.', 'e.G.', 'E.I.', 'E.P.', 'E.T.', 'E.U.', 'e.v.', 'G.K.', 'G.P.',
|
|
69
|
+
'h.f.', 'Inc.', 'K.D.', 'K.G.', 'K.K.', 'k.s.', 'k.v.', 'K.y.', 'L.C.', 'L.P.', 'Ltd.', 'N.K.', 'N.L.', 'N.V.',
|
|
70
|
+
'O.E.', 'O.G.', 'O.Ü.', 'O.y.', 'P.C.', 'p.l.', 'Pty.', 'PUP.', 'Pvt.', 'r.t.', 'S.A.', 'S.D.', 'S.E.', 's.f.',
|
|
71
|
+
'S.L.', 'S.P.', 'S.s.', 'T.K.', 'T.Ü.', 'U.Ü.', 'Y.K.', 'А.Д.', 'І.П.', 'К.Д.', 'ПУП.', 'С.Д.', 'בע"מ', '任意組合',
|
|
72
|
+
'匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社', 'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
|
|
73
|
+
].each do |suffix|
|
|
74
|
+
fail suffix unless NameTamer::ADFIXES[:suffix][:organization].include? suffix
|
|
75
|
+
end
|
|
76
|
+
end
|
data/doc/suffixes.csv
CHANGED
|
@@ -67,6 +67,7 @@ S.C.R.L.,,organization,100,,8
|
|
|
67
67
|
S.M.B.A.,Selskab med begrænset ansvar,organization,100,Denmark,8
|
|
68
68
|
S.P.R.L.,société privée à responsabilité limitée,organization,100,Belgium,8
|
|
69
69
|
Е.О.О.Д.,ednolichno druzhestvo s ogranichena otgovornost,organization,100,Bulgaria,8
|
|
70
|
+
&. Cie.,,organization,100,,7
|
|
70
71
|
and Co.,,organization,100,,7
|
|
71
72
|
Comm.V.,gewone commanditaire vennootschap,organization,100,Belgium,7
|
|
72
73
|
Limited,Limited,organization,100,,7
|
|
@@ -143,7 +144,7 @@ A.y.,avoin yhtiö,organization,80,Finland,4
|
|
|
143
144
|
B.M.,Be'eravon Mugbal,organization,100,Israel,4
|
|
144
145
|
b.t.,betéti társaság,organization,100,Hungary,4
|
|
145
146
|
B.V.,Besloten vennootschap,organization,100,Netherlands,4
|
|
146
|
-
C.A.,Compañía Anónima,organization,
|
|
147
|
+
C.A.,Compañía Anónima,organization,50,Ecuador,4
|
|
147
148
|
C.V.,Commanditaire vennootschap,organization,100,Netherlands,4
|
|
148
149
|
d.d.,dioničko društvo,organization,100,Bosnia & Herzegovina,4
|
|
149
150
|
e.c.,egyéni cég,organization,100,Hungary,4
|
|
@@ -210,72 +211,135 @@ G/S,Gensidigt selskab,organization,100,Denmark,3
|
|
|
210
211
|
I/S,Interessentskab,organization,100,Denmark,3
|
|
211
212
|
K/S,Kommanditselskab,organization,100,Denmark,3
|
|
212
213
|
P/S,Partnerselskab ,organization,100,Denmark,3
|
|
214
|
+
S/A,,organization,100,,3
|
|
215
|
+
Chartered F.C.S.I.,,person,100,,18
|
|
216
|
+
Chartered M.C.S.I.,,person,100,,18
|
|
217
|
+
I.F.R.S. Certified,,person,100,,18
|
|
218
|
+
F.Inst.L.M.,,person,100,,11
|
|
213
219
|
C.I.S.S.P.,,person,100,,10
|
|
220
|
+
F.C.I.P.S.,,person,100,,10
|
|
221
|
+
M.R.I.C.S.,,person,100,,10
|
|
222
|
+
T.M.I.E.T.,Member of the Institution of Engineering and Technology,person,100,,10
|
|
223
|
+
Dip. D.M.,,person,100,,9
|
|
224
|
+
A.A.M.S.,,person,100,,8
|
|
225
|
+
A.C.C.A.,Association of Chartered Certified Accountants,person,100,,8
|
|
226
|
+
A.C.M.A.,,person,100,,8
|
|
227
|
+
A.I.F.A.,,person,100,,8
|
|
228
|
+
A.W.M.A.,,person,100,,8
|
|
229
|
+
C.A.I.A.,,person,100,,8
|
|
230
|
+
C.A.P.M.,,person,100,,8
|
|
231
|
+
C.C.I.M.,,person,100,,8
|
|
232
|
+
C.D.F.A.,,person,100,,8
|
|
233
|
+
C.E.P.P.,,person,100,,8
|
|
234
|
+
C.F.B.S.,,person,100,,8
|
|
235
|
+
C.G.M.A.,,person,100,,8
|
|
236
|
+
C.I.T.P.,,person,100,,8
|
|
237
|
+
C.L.T.C.,,person,100,,8
|
|
238
|
+
C.P.C.C.,,person,100,,8
|
|
239
|
+
C.R.P.C.,,person,100,,8
|
|
240
|
+
C.R.P.S.,,person,100,,8
|
|
241
|
+
C.S.O.X.,,person,100,,8
|
|
242
|
+
C.S.S.D.,,person,100,,8
|
|
243
|
+
F.B.C.S.,,person,100,,8
|
|
244
|
+
F.C.C.A.,Fellow of the Association of Chartered Certified Accountants,person,100,,8
|
|
245
|
+
F.C.M.I.,Fellow of the Chartered Management Institute,person,100,United Kingdom,8
|
|
246
|
+
F.C.S.I.,,person,100,,8
|
|
247
|
+
F.I.E.T.,Fellow of the Institution of Engineering and Technology,person,100,,8
|
|
248
|
+
F.I.R.P.,,person,100,,8
|
|
249
|
+
M.I.E.T.,Member of the Institution of Engineering and Technology,person,100,,8
|
|
250
|
+
M.S.F.S.,,person,100,,8
|
|
251
|
+
M.Sc. D.,,person,100,,8
|
|
252
|
+
O.R.S.C.,,person,100,,8
|
|
253
|
+
R.I.C.P.,,person,100,,8
|
|
214
254
|
B.Tech.,Bachelor of Technology,person,100,,7
|
|
255
|
+
Cantab.,,person,100,,7
|
|
256
|
+
Ch.F.C.,,person,100,,7
|
|
215
257
|
D.Phil.,Doctor of Philosophy,person,100,,7
|
|
258
|
+
I.T.I.L. v3,,person,100,,7
|
|
259
|
+
M.Io.D.,,person,100,,7
|
|
260
|
+
S.C.M.P,,person,100,,7
|
|
261
|
+
A.C.A.,Associate of the Institute of Chartered Accountants in England and Wales,person,100,United Kingdom,6
|
|
262
|
+
A.C.C.,,person,100,,6
|
|
263
|
+
A.E.P.,,person,100,,6
|
|
264
|
+
A.I.F.,,person,100,,6
|
|
265
|
+
A.S.A.,,person,100,,6
|
|
216
266
|
B.Eng.,Bachelor of Engineering,person,100,,6
|
|
267
|
+
C.B.V.,,person,100,,6
|
|
268
|
+
C.E.M.,,person,100,,6
|
|
269
|
+
C.Eng.,,person,100,,6
|
|
217
270
|
C.F.A.,,person,100,,6
|
|
271
|
+
C.F.F.,,person,100,,6
|
|
272
|
+
C.F.P.,,person,100,,6
|
|
273
|
+
C.F.S.,,person,100,,6
|
|
274
|
+
C.G.A.,,person,100,,6
|
|
275
|
+
C.G.B.,,person,100,,6
|
|
276
|
+
C.G.P.,,person,100,,6
|
|
277
|
+
C.I.M.,,person,100,,6
|
|
218
278
|
C.L.P.,,person,100,,6
|
|
279
|
+
C.L.U.,,person,100,,6
|
|
280
|
+
C.M.A.,,person,100,,6
|
|
281
|
+
C.M.T.,,person,100,,6
|
|
282
|
+
C.P.A.,,person,100,,6
|
|
283
|
+
C.T.A.,Chartered Tax Adviser,person,100,,6
|
|
284
|
+
C.W.S.,,person,100,,6
|
|
219
285
|
D.B.E.,Dame of the British Empire,person,100,,6
|
|
220
286
|
D.D.S.,Doctor of Dental Surgery,person,100,,6
|
|
287
|
+
D.V.M.,Doctor of Veterinary Medecine,person,100,,6
|
|
288
|
+
E.R.P.,Energy Risk Professional,person,100,,6
|
|
221
289
|
Eng.D.,Doctor of Engineering,person,100,,6
|
|
290
|
+
F.C.A.,Fellow of the Institute of Chartered Accountants in England and Wales,person,100,United Kingdom,6
|
|
291
|
+
F.P.C.,,person,100,,6
|
|
292
|
+
F.R.M.,,person,100,,6
|
|
293
|
+
F.R.M.,,person,100,,6
|
|
294
|
+
G.S.P.,,person,100,,6
|
|
295
|
+
L.P.S.,,person,100,,6
|
|
222
296
|
M.B.A.,Master of Business Administration,person,100,,6
|
|
223
297
|
M.B.E.,Member of the Order of the British Empire,person,100,,6
|
|
224
298
|
M.E.P.,Member of the European Parliament,person,100,,6
|
|
225
299
|
M.Eng.,Master of Engineering,person,100,,6
|
|
300
|
+
M.Jur.,Master of Jurisprudence,person,100,United Kingdom,6
|
|
301
|
+
M.P.A.,,person,100,,6
|
|
302
|
+
M.S.F.,,person,100,,6
|
|
226
303
|
M.S.P.,Member of the Scottish Parliament,person,100,,6
|
|
227
304
|
O.B.E.,Order of the British Empire,person,100,,6
|
|
305
|
+
P.C.C.,,person,100,,6
|
|
306
|
+
P.F.S.,,person,100,,6
|
|
307
|
+
P.H.R.,,person,100,,6
|
|
228
308
|
P.M.C.,,person,100,,6
|
|
229
309
|
P.M.P.,,person,100,,6
|
|
310
|
+
P.M.P.,,person,100,,6
|
|
230
311
|
P.S.P.,,person,100,,6
|
|
312
|
+
R.F.C.,,person,100,,6
|
|
313
|
+
V.M.D.,Doctor of Veterinary Medecine,person,100,,6
|
|
231
314
|
B.Ed.,Bachelor of Education,person,20,,5
|
|
232
315
|
B.Sc.,Bachelor of Science,person,100,,5
|
|
233
316
|
Ed.D.,Doctor of Education,person,100,,5
|
|
317
|
+
Ed.M.,,person,100,,5
|
|
318
|
+
Hons.,,person,100,,5
|
|
234
319
|
LL.B.,Bachelor of Laws,person,100,,5
|
|
235
320
|
LL.D.,Doctor of Laws,person,100,,5
|
|
321
|
+
LL.M.,Master of Laws,person,100,,5
|
|
236
322
|
M.Ed.,Master of Education,person,100,,5
|
|
237
323
|
M.Sc.,Master of Science,person,100,,5
|
|
324
|
+
Oxon.,,person,100,,5
|
|
238
325
|
Ph.D.,Doctor of Philosophy,person,100,,5
|
|
239
326
|
B.A.,Bachelor of Arts,person,100,,4
|
|
327
|
+
C.A.,,person,50,,4
|
|
328
|
+
E.A.,,person,100,,4
|
|
240
329
|
Esq.,Esquire,person,100,,4
|
|
241
330
|
J.D.,Juris Doctor,person,100,,4
|
|
242
331
|
K.C.,King's Counsel,person,100,,4
|
|
243
332
|
M.A.,Master of Arts,person,100,,4
|
|
244
333
|
M.D.,Doctor of Medicine,person,100,,4
|
|
245
334
|
M.P.,Member of Parliament,person,100,,4
|
|
335
|
+
M.S.,,person,100,,4
|
|
246
336
|
O.K.,"(unknown personal suffix, poss. Oklahoma-related)",person,20,,4
|
|
247
337
|
P.A.,Physician Assistant,person,100,,4
|
|
248
338
|
Q.C.,Queen's Counsel,person,100,,4
|
|
339
|
+
R.D.,,person,100,,4
|
|
249
340
|
III,The Third,person,100,,3
|
|
250
341
|
Jr.,Junior,person,100,,3
|
|
251
342
|
Sr.,Senior,person,100,,3
|
|
252
343
|
II,The Second,person,100,,2
|
|
253
344
|
IV,The Fourth,person,100,,2
|
|
254
345
|
V,The Fifth,person,20,,1
|
|
255
|
-
D.V.M.,Doctor of Veterinary Medecine,person,100,,6
|
|
256
|
-
V.M.D.,Doctor of Veterinary Medecine,person,100,,6
|
|
257
|
-
M.I.E.T.,Member of the Institution of Engineering and Technology,person,100,,8
|
|
258
|
-
T.M.I.E.T.,Member of the Institution of Engineering and Technology,person,100,,10
|
|
259
|
-
F.I.E.T.,Fellow of the Institution of Engineering and Technology,person,100,,8
|
|
260
|
-
E.R.P.,Energy Risk Professional,person,100,,6
|
|
261
|
-
A.C.A.,Associate of the Institute of Chartered Accountants in England and Wales,person,100,United Kingdom,6
|
|
262
|
-
C.T.A.,Chartered Tax Adviser,person,100,,6
|
|
263
|
-
F.C.A.,Fellow of the Institute of Chartered Accountants in England and Wales,person,100,United Kingdom,6
|
|
264
|
-
M.Jur.,Master of Jurisprudence,person,100,United Kingdom,6
|
|
265
|
-
F.C.M.I.,Fellow of the Chartered Management Institute,person,100,United Kingdom,8
|
|
266
|
-
F.C.C.A.,Fellow of the Association of Chartered Certified Accountants,person,100,,8
|
|
267
|
-
A.C.C.A.,Association of Chartered Certified Accountants,person,100,,8
|
|
268
|
-
&. Cie.,,organization,100,,7
|
|
269
|
-
I.T.I.L. v3,,person,100,,7
|
|
270
|
-
P.M.P.,,person,100,,6
|
|
271
|
-
F.I.R.P.,,person,100,,8
|
|
272
|
-
C.Eng.,,person,100,,6
|
|
273
|
-
S/A,,organization,100,,3
|
|
274
|
-
Chartered F.C.S.I.,,person,100,,18
|
|
275
|
-
F.P.C.,,person,100,,6
|
|
276
|
-
F.R.M.,,person,100,,6
|
|
277
|
-
Oxon.,,person,100,,5
|
|
278
|
-
Cantab.,,person,100,,7
|
|
279
|
-
C.I.T.P.,,person,100,,8
|
|
280
|
-
F.B.C.S.,,person,100,,8
|
|
281
|
-
Hons.,,person,100,5
|
data/lib/name-tamer.rb
CHANGED
|
@@ -22,9 +22,9 @@ class NameTamer
|
|
|
22
22
|
|
|
23
23
|
# Make a slug from a string
|
|
24
24
|
def parameterize(string, args = {})
|
|
25
|
-
sep
|
|
26
|
-
rfc3987 = args[:rfc3987]
|
|
27
|
-
filter
|
|
25
|
+
sep = args[:sep] || SLUG_DELIMITER
|
|
26
|
+
rfc3987 = args[:rfc3987] || false
|
|
27
|
+
filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
|
28
28
|
|
|
29
29
|
new_string = string.dup
|
|
30
30
|
|
|
@@ -47,11 +47,11 @@ class NameTamer
|
|
|
47
47
|
unless @tidy_name
|
|
48
48
|
@tidy_name = name.dup # Start with the name we've received
|
|
49
49
|
|
|
50
|
-
unescape
|
|
51
|
-
remove_zero_width
|
|
52
|
-
tidy_spacing
|
|
53
|
-
fix_encoding_errors
|
|
54
|
-
consolidate_initials
|
|
50
|
+
unescape # Unescape percent-encoded characters and fix UTF-8 encoding
|
|
51
|
+
remove_zero_width # remove zero-width characters
|
|
52
|
+
tidy_spacing # " John Smith " -> "John Smith"
|
|
53
|
+
fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
|
|
54
|
+
consolidate_initials # "I. B. M." -> "I.B.M."
|
|
55
55
|
end
|
|
56
56
|
|
|
57
57
|
@tidy_name
|
|
@@ -59,13 +59,13 @@ class NameTamer
|
|
|
59
59
|
|
|
60
60
|
def nice_name
|
|
61
61
|
unless @nice_name
|
|
62
|
-
@nice_name = tidy_name.dup
|
|
62
|
+
@nice_name = tidy_name.dup # Start with the tidied name
|
|
63
63
|
|
|
64
|
-
remove_adfixes
|
|
65
|
-
fixup_last_name_first
|
|
66
|
-
fixup_mismatched_braces
|
|
67
|
-
remove_adfixes
|
|
68
|
-
name_wrangle
|
|
64
|
+
remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
|
|
65
|
+
fixup_last_name_first # "Smith, John" -> "John Smith"
|
|
66
|
+
fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
|
|
67
|
+
remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
|
|
68
|
+
name_wrangle # proper name case and non-breaking spaces
|
|
69
69
|
use_nonbreaking_spaces_in_compound_names
|
|
70
70
|
end
|
|
71
71
|
|
|
@@ -74,12 +74,12 @@ class NameTamer
|
|
|
74
74
|
|
|
75
75
|
def simple_name
|
|
76
76
|
unless @simple_name
|
|
77
|
-
@simple_name = nice_name.dup
|
|
77
|
+
@simple_name = nice_name.dup # Start with nice name
|
|
78
78
|
|
|
79
|
-
remove_initials
|
|
80
|
-
remove_middle_names
|
|
81
|
-
remove_periods_from_initials
|
|
82
|
-
standardize_words
|
|
79
|
+
remove_initials # "John Q. Doe" -> "John Doe"
|
|
80
|
+
remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
|
|
81
|
+
remove_periods_from_initials # "J.P.R. Williams" -> "JPR Williams"
|
|
82
|
+
standardize_words # "B&Q Intl" -> "B and Q International"
|
|
83
83
|
|
|
84
84
|
@simple_name.whitespace_to!(ASCII_SPACE)
|
|
85
85
|
end
|
|
@@ -191,14 +191,14 @@ class NameTamer
|
|
|
191
191
|
|
|
192
192
|
return unless parts.count == 2
|
|
193
193
|
|
|
194
|
-
@last_name
|
|
195
|
-
@remainder
|
|
194
|
+
@last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
|
|
195
|
+
@remainder = parts[1]
|
|
196
196
|
end
|
|
197
197
|
|
|
198
198
|
# Sometimes we end up with mismatched braces after adfix stripping
|
|
199
199
|
# e.g. "Ceres (Ceres Holdings LLC)" -> "Ceres (Ceres Holdings"
|
|
200
200
|
def fixup_mismatched_braces
|
|
201
|
-
left_brace_count
|
|
201
|
+
left_brace_count = @nice_name.count '('
|
|
202
202
|
right_brace_count = @nice_name.count ')'
|
|
203
203
|
|
|
204
204
|
if left_brace_count > right_brace_count
|
|
@@ -228,15 +228,15 @@ class NameTamer
|
|
|
228
228
|
fix_case = true if [uppercase, lowercase].include?(@nice_name)
|
|
229
229
|
end
|
|
230
230
|
|
|
231
|
-
@nice_name
|
|
231
|
+
@nice_name = name_case(lowercase) if fix_case
|
|
232
232
|
end
|
|
233
233
|
|
|
234
234
|
def name_wrangle_split_name
|
|
235
235
|
# It's a person if we've split the name, so no organization logic here
|
|
236
236
|
lowercase = @last_name.downcase
|
|
237
237
|
uppercase = @last_name.upcase
|
|
238
|
-
@last_name
|
|
239
|
-
@nice_name
|
|
238
|
+
@last_name = name_case(lowercase) if [uppercase, lowercase].include?(@last_name)
|
|
239
|
+
@nice_name = "#{@remainder} #{@last_name}"
|
|
240
240
|
end
|
|
241
241
|
|
|
242
242
|
# Conjoin compound names with non-breaking spaces
|
|
@@ -265,12 +265,12 @@ class NameTamer
|
|
|
265
265
|
return unless @contact_type == :person
|
|
266
266
|
|
|
267
267
|
first_name, parts = find_first_usable_name(@simple_name.split)
|
|
268
|
-
last_name,
|
|
268
|
+
last_name, = find_last_usable_name(parts)
|
|
269
269
|
|
|
270
270
|
return unless first_name || last_name
|
|
271
271
|
|
|
272
|
-
separator
|
|
273
|
-
@simple_name
|
|
272
|
+
separator = first_name && last_name ? ' ' : ''
|
|
273
|
+
@simple_name = "#{first_name}#{separator}#{last_name}"
|
|
274
274
|
end
|
|
275
275
|
|
|
276
276
|
def find_first_usable_name(parts)
|
|
@@ -303,11 +303,11 @@ class NameTamer
|
|
|
303
303
|
end
|
|
304
304
|
|
|
305
305
|
def standardize_words
|
|
306
|
-
@simple_name.gsub!(/ *& */, ' and ')
|
|
307
|
-
@simple_name.gsub!(/ *\+ */, ' plus ')
|
|
308
|
-
@simple_name.gsub!(/\bintl\b/i, 'International')
|
|
306
|
+
@simple_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
|
|
307
|
+
@simple_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
|
|
308
|
+
@simple_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
|
|
309
309
|
@simple_name.gsub!(/[־‐‑‒–—―−﹘﹣-]/, SLUG_DELIMITER) # Replace Unicode dashes with ASCII hyphen
|
|
310
|
-
@simple_name.strip_unwanted!(/["“”™℠®©℗]/)
|
|
310
|
+
@simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
|
|
311
311
|
end
|
|
312
312
|
|
|
313
313
|
#--------------------------------------------------------
|
|
@@ -315,18 +315,18 @@ class NameTamer
|
|
|
315
315
|
#--------------------------------------------------------
|
|
316
316
|
|
|
317
317
|
def initialize(new_name, args = {})
|
|
318
|
-
@name
|
|
318
|
+
@name = new_name || ''
|
|
319
319
|
@contact_type = contact_type_from args
|
|
320
320
|
|
|
321
|
-
@tidy_name
|
|
322
|
-
@nice_name
|
|
323
|
-
@simple_name
|
|
324
|
-
@slug
|
|
321
|
+
@tidy_name = nil
|
|
322
|
+
@nice_name = nil
|
|
323
|
+
@simple_name = nil
|
|
324
|
+
@slug = nil
|
|
325
325
|
|
|
326
|
-
@last_name
|
|
327
|
-
@remainder
|
|
326
|
+
@last_name = nil
|
|
327
|
+
@remainder = nil
|
|
328
328
|
|
|
329
|
-
@adfix_found
|
|
329
|
+
@adfix_found = false
|
|
330
330
|
end
|
|
331
331
|
|
|
332
332
|
def contact_type_from(args)
|
|
@@ -369,17 +369,17 @@ class NameTamer
|
|
|
369
369
|
end
|
|
370
370
|
|
|
371
371
|
def find_contact_type_and_parts(adfixes, name_part)
|
|
372
|
-
ct
|
|
373
|
-
parts
|
|
374
|
-
@adfix_found
|
|
372
|
+
ct = contact_type_best_effort
|
|
373
|
+
parts = name_part.partition adfixes[ct]
|
|
374
|
+
@adfix_found = !parts[1].empty?
|
|
375
375
|
|
|
376
376
|
return [ct, parts] if @contact_type || @adfix_found
|
|
377
377
|
|
|
378
378
|
# If the contact type is indeterminate and we didn't find a diagnostic adfix
|
|
379
379
|
# for a person then try again for an organization
|
|
380
|
-
ct
|
|
381
|
-
parts
|
|
382
|
-
@adfix_found
|
|
380
|
+
ct = :organization
|
|
381
|
+
parts = name_part.partition adfixes[ct]
|
|
382
|
+
@adfix_found = !parts[1].empty?
|
|
383
383
|
|
|
384
384
|
[ct, parts]
|
|
385
385
|
end
|
|
@@ -413,9 +413,9 @@ class NameTamer
|
|
|
413
413
|
#--------------------------------------------------------
|
|
414
414
|
|
|
415
415
|
NONBREAKING_SPACE = "\u00a0"
|
|
416
|
-
ASCII_SPACE
|
|
417
|
-
ADFIX_JOINERS
|
|
418
|
-
SLUG_DELIMITER
|
|
416
|
+
ASCII_SPACE = "\u0020"
|
|
417
|
+
ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
|
|
418
|
+
SLUG_DELIMITER = '-'
|
|
419
419
|
ZERO_WIDTH_FILTER = /[\u180E\u200B\u200C\u200D\u2063\uFEFF]/
|
|
420
420
|
|
|
421
421
|
# Constants for parameterizing Unicode strings for IRIs
|
|
@@ -447,14 +447,14 @@ class NameTamer
|
|
|
447
447
|
# We're using the most restrictive segment definition (isegment-nz-nc)
|
|
448
448
|
# to avoid any possible problems with the IRI that it one day might
|
|
449
449
|
# get placed in.
|
|
450
|
-
ALPHA
|
|
451
|
-
DIGIT
|
|
452
|
-
UCSCHAR
|
|
453
|
-
IUNRESERVED
|
|
454
|
-
SUBDELIMS
|
|
455
|
-
ISEGMENT_NZ_NC
|
|
456
|
-
FILTER_RFC3987
|
|
457
|
-
FILTER_COMPAT
|
|
450
|
+
ALPHA = 'A-Za-z'
|
|
451
|
+
DIGIT = '0-9'
|
|
452
|
+
UCSCHAR = '\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
|
|
453
|
+
IUNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~#{UCSCHAR}"
|
|
454
|
+
SUBDELIMS = '!$&\'\(\)\*+,;='
|
|
455
|
+
ISEGMENT_NZ_NC = "#{IUNRESERVED}#{SUBDELIMS}@" # pct-encoded not needed
|
|
456
|
+
FILTER_RFC3987 = /[^#{ISEGMENT_NZ_NC}]/
|
|
457
|
+
FILTER_COMPAT = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
|
|
458
458
|
|
|
459
459
|
# These are the prefixes and suffixes we want to remove
|
|
460
460
|
# If you add to the list, you can use spaces and dots where appropriate
|
|
@@ -478,15 +478,19 @@ class NameTamer
|
|
|
478
478
|
},
|
|
479
479
|
suffix: {
|
|
480
480
|
person: [
|
|
481
|
-
'Chartered F.C.S.I.',
|
|
482
|
-
'
|
|
483
|
-
'M.I.E.
|
|
484
|
-
'
|
|
485
|
-
'
|
|
486
|
-
'
|
|
487
|
-
'
|
|
488
|
-
'
|
|
489
|
-
'
|
|
481
|
+
'Chartered F.C.S.I.', 'Chartered M.C.S.I.', 'I.F.R.S. Certified', 'F.Inst.L.M.', 'C.I.S.S.P.', 'F.C.I.P.S.',
|
|
482
|
+
'M.R.I.C.S.', 'T.M.I.E.T.', 'Dip. D.M.', 'A.A.M.S.', 'A.C.C.A.', 'A.C.M.A.', 'A.I.F.A.', 'A.W.M.A.', 'C.A.I.A.',
|
|
483
|
+
'C.A.P.M.', 'C.C.I.M.', 'C.D.F.A.', 'C.E.P.P.', 'C.F.B.S.', 'C.G.M.A.', 'C.I.T.P.', 'C.L.T.C.', 'C.P.C.C.',
|
|
484
|
+
'C.R.P.C.', 'C.R.P.S.', 'C.S.O.X.', 'C.S.S.D.', 'F.B.C.S.', 'F.C.C.A.', 'F.C.M.I.', 'F.C.S.I.', 'F.I.E.T.',
|
|
485
|
+
'F.I.R.P.', 'M.I.E.T.', 'M.S.F.S.', 'M.Sc. D.', 'O.R.S.C.', 'R.I.C.P.', 'B.Tech.', 'Cantab.', 'Ch.F.C.',
|
|
486
|
+
'D.Phil.', 'I.T.I.L. v3', 'M.Io.D.', 'S.C.M.P', 'A.C.A.', 'A.C.C.', 'A.E.P.', 'A.I.F.', 'A.S.A.', 'B.Eng.',
|
|
487
|
+
'C.B.V.', 'C.E.M.', 'C.Eng.', 'C.F.A.', 'C.F.F.', 'C.F.P.', 'C.F.S.', 'C.G.A.', 'C.G.B.', 'C.G.P.', 'C.I.M.',
|
|
488
|
+
'C.L.P.', 'C.L.U.', 'C.M.A.', 'C.M.T.', 'C.P.A.', 'C.T.A.', 'C.W.S.', 'D.B.E.', 'D.D.S.', 'D.V.M.', 'E.R.P.',
|
|
489
|
+
'Eng.D.', 'F.C.A.', 'F.P.C.', 'F.R.M.', 'F.R.M.', 'G.S.P.', 'L.P.S.', 'M.B.A.', 'M.B.E.', 'M.E.P.', 'M.Eng.',
|
|
490
|
+
'M.Jur.', 'M.P.A.', 'M.S.F.', 'M.S.P.', 'O.B.E.', 'P.C.C.', 'P.F.S.', 'P.H.R.', 'P.M.C.', 'P.M.P.', 'P.M.P.',
|
|
491
|
+
'P.S.P.', 'R.F.C.', 'V.M.D.', 'B.Ed.', 'B.Sc.', 'Ed.D.', 'Ed.M.', 'Hons.', 'LL.B.', 'LL.D.', 'LL.M.', 'M.Ed.',
|
|
492
|
+
'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'C.A.', 'E.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'M.S.',
|
|
493
|
+
'O.K.', 'P.A.', 'Q.C.', 'R.D.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'V'
|
|
490
494
|
],
|
|
491
495
|
organization: [
|
|
492
496
|
'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
|
|
@@ -494,12 +498,11 @@ class NameTamer
|
|
|
494
498
|
'S.p. z.o.o.', '(Pvt.)Ltd.', 'akc. spol.', 'Cía. Ltda.', 'E.B.V.B.A.', 'P. Limited', 'S. de R.L.', 'S.I.C.A.V.',
|
|
495
499
|
'S.P.R.L.U.', 'А.Д.С.И.Ц.', '(P.) Ltd.', 'C. por A.', 'Comm.V.A.', 'Ltd. Şti.', 'Plc. Ltd.', 'Pte. Ltd.',
|
|
496
500
|
'Pty. Ltd.', 'Pvt. Ltd.', 'Soc. Col.', 'A.M.B.A.', 'A.S.B.L.', 'A.V.E.E.', 'B.V.B.A.', 'B.V.I.O.', 'C.V.B.A.',
|
|
497
|
-
'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', '
|
|
498
|
-
'
|
|
499
|
-
'
|
|
500
|
-
'S.C.R.
|
|
501
|
-
'Part.G.', 'Sh.p.k.', '&. Co.', '
|
|
502
|
-
'F.C.P.',
|
|
501
|
+
'C.V.O.A.', 'E.E.I.G.', 'E.I.R.L.', 'E.O.O.D.', 'E.U.R.L.', 'F.M.B.A.', 'G.m.b.H.', 'Ges.b.R.', 'K.G.a.A.',
|
|
502
|
+
'L.L.L.P.', 'Ltd. Co.', 'Ltd. Co.', 'M.E.P.E.', 'n.y.r.t.', 'O.V.E.E.', 'P.E.E.C.', 'P.L.L.C.', 'P.L.L.C.',
|
|
503
|
+
'S. en C.', 'S.a.p.a.', 'S.A.R.L.', 'S.à.R.L.', 'S.A.S.U.', 'S.C.e.I.', 'S.C.O.P.', 'S.C.p.A.', 'S.C.R.I.',
|
|
504
|
+
'S.C.R.L.', 'S.M.B.A.', 'S.P.R.L.', 'Е.О.О.Д.', '&. Cie.', 'and Co.', 'Comm.V.', 'Limited', 'P. Ltd.',
|
|
505
|
+
'Part.G.', 'Sh.p.k.', '&. Co.', 'C.X.A.', 'd.n.o.', 'd.o.o.', 'E.A.D.', 'e.h.f.', 'E.P.E.', 'E.S.V.', 'F.C.P.',
|
|
503
506
|
'F.I.E.', 'G.b.R.', 'G.I.E.', 'G.M.K.', 'G.S.K.', 'H.U.F.', 'K.D.A.', 'k.f.t.', 'k.h.t.', 'k.k.t.', 'L.L.C.',
|
|
504
507
|
'L.L.P.', 'o.h.f.', 'O.H.G.', 'O.O.D.', 'O.y.j.', 'p.l.c.', 'P.S.U.', 'S.A.E.', 'S.A.S.', 'S.C.A.', 'S.C.E.',
|
|
505
508
|
'S.C.S.', 'S.E.M.', 'S.E.P.', 's.e.s.', 'S.G.R.', 'S.N.C.', 'S.p.A.', 'S.P.E.', 'S.R.L.', 's.r.o.', 'Unltd.',
|
|
@@ -519,13 +522,13 @@ class NameTamer
|
|
|
519
522
|
ADFIX_PATTERNS = {}
|
|
520
523
|
|
|
521
524
|
[:prefix, :suffix].each do |adfix_type|
|
|
522
|
-
patterns
|
|
523
|
-
adfix
|
|
525
|
+
patterns = {}
|
|
526
|
+
adfix = ADFIXES[adfix_type]
|
|
524
527
|
|
|
525
528
|
[:person, :organization].each do |ct|
|
|
526
|
-
with_optional_spaces
|
|
527
|
-
pattern_string
|
|
528
|
-
patterns[ct]
|
|
529
|
+
with_optional_spaces = adfix[ct].map { |p| p.gsub(ASCII_SPACE, ' *') }
|
|
530
|
+
pattern_string = with_optional_spaces.join('|').gsub('.', '\.*')
|
|
531
|
+
patterns[ct] = /#{adfix[:before]}\(*(?:#{pattern_string})[®™\)]*#{adfix[:after]}/i
|
|
529
532
|
end
|
|
530
533
|
|
|
531
534
|
ADFIX_PATTERNS[adfix_type] = patterns
|
data/lib/name-tamer/version.rb
CHANGED
data/lib/string_extras.rb
CHANGED
|
@@ -22,7 +22,7 @@ class String
|
|
|
22
22
|
# Change some characters embedded in words to our separator character
|
|
23
23
|
# e.g. example.com -> example-com
|
|
24
24
|
def invalid_chars_to!(separator)
|
|
25
|
-
substitute!(
|
|
25
|
+
substitute!(%r{(?<![[:space:]])[\.\/](?![[:space:]])}, separator)
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
# Unescape percent-encoded characters
|
|
@@ -166,15 +166,15 @@ class String
|
|
|
166
166
|
end
|
|
167
167
|
|
|
168
168
|
NONBREAKING_SPACE = "\u00a0"
|
|
169
|
-
ASCII_SPACE
|
|
169
|
+
ASCII_SPACE = "\u0020"
|
|
170
170
|
|
|
171
|
-
COMPOUND_NAMES
|
|
171
|
+
COMPOUND_NAMES = [
|
|
172
172
|
'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
|
|
173
173
|
'Baron Cohen', 'Strang Steel',
|
|
174
174
|
'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
|
|
175
175
|
]
|
|
176
176
|
|
|
177
|
-
NAME_MODIFIERS
|
|
177
|
+
NAME_MODIFIERS = [
|
|
178
178
|
'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San',
|
|
179
179
|
'St[\.]?', 'Zur'
|
|
180
180
|
]
|
data/name-tamer.gemspec
CHANGED
|
@@ -3,25 +3,28 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
|
3
3
|
require 'name-tamer/version'
|
|
4
4
|
|
|
5
5
|
Gem::Specification.new do |spec|
|
|
6
|
-
spec.name
|
|
7
|
-
spec.version
|
|
8
|
-
spec.authors
|
|
9
|
-
spec.email
|
|
10
|
-
spec.description
|
|
11
|
-
spec.summary
|
|
12
|
-
spec.homepage
|
|
13
|
-
spec.license
|
|
6
|
+
spec.name = 'name-tamer'
|
|
7
|
+
spec.version = NameTamer::VERSION
|
|
8
|
+
spec.authors = ['Xenapto']
|
|
9
|
+
spec.email = ['developers@xenapto.com']
|
|
10
|
+
spec.description = 'Useful methods for taming names'
|
|
11
|
+
spec.summary = "Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith"
|
|
12
|
+
spec.homepage = 'https://github.com/Xenapto/name-tamer'
|
|
13
|
+
spec.license = 'MIT'
|
|
14
14
|
|
|
15
|
-
spec.files
|
|
16
|
-
spec.executables
|
|
17
|
-
spec.test_files
|
|
15
|
+
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
|
16
|
+
spec.executables = spec.files.grep(%r{^bin\/}) { |f| File.basename(f) }
|
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features|coverage)\/})
|
|
18
18
|
spec.require_paths = ['lib']
|
|
19
19
|
|
|
20
|
-
spec.add_development_dependency 'bundler', '~> 1'
|
|
20
|
+
#- spec.add_development_dependency 'bundler', '~> 1.10'
|
|
21
21
|
spec.add_development_dependency 'rake', '~> 10'
|
|
22
|
-
spec.add_development_dependency 'rspec', '~>
|
|
23
|
-
spec.add_development_dependency 'gem-release', '~> 0'
|
|
24
|
-
spec.add_development_dependency 'simplecov', '~> 0.
|
|
25
|
-
spec.add_development_dependency 'coveralls', '~> 0'
|
|
26
|
-
spec.add_development_dependency 'rubocop', '~> 0'
|
|
22
|
+
spec.add_development_dependency 'rspec', '~> 3.3'
|
|
23
|
+
spec.add_development_dependency 'gem-release', '~> 0.7'
|
|
24
|
+
spec.add_development_dependency 'simplecov', '~> 0.10'
|
|
25
|
+
spec.add_development_dependency 'coveralls', '~> 0.8'
|
|
26
|
+
spec.add_development_dependency 'rubocop', '~> 0.32'
|
|
27
|
+
spec.add_development_dependency 'guard', '~> 2.12'
|
|
28
|
+
spec.add_development_dependency 'guard-rspec', '~> 4.5'
|
|
29
|
+
spec.add_development_dependency 'guard-rubocop', '~> 1.2'
|
|
27
30
|
end
|
data/spec/name_tamer_spec.rb
CHANGED
|
@@ -191,48 +191,59 @@ describe NameTamer do
|
|
|
191
191
|
{ n: "John Smith\u{FEFF}\u{200B}\u{200C}\u{200D}\u{2063}", t: :person,
|
|
192
192
|
nn: 'John Smith', sn: 'John Smith', s: 'john-smith' }, # Zero-width characters
|
|
193
193
|
{ n: 'Herman Melville ,CLP', t: :person, nn: 'Herman Melville', sn: 'Herman Melville', s: 'herman-melville' },
|
|
194
|
+
{ n: 'Herman Melville, CLP®', t: :person, nn: 'Herman Melville', sn: 'Herman Melville', s: 'herman-melville' },
|
|
195
|
+
{ n: 'Herman Melville, CLP™', t: :person, nn: 'Herman Melville', sn: 'Herman Melville', s: 'herman-melville' },
|
|
196
|
+
{ n: 'Herman Melville, CLP®™', t: :person, nn: 'Herman Melville', sn: 'Herman Melville', s: 'herman-melville' },
|
|
197
|
+
{ n: 'Herman Melville, CLP™®', t: :person, nn: 'Herman Melville', sn: 'Herman Melville', s: 'herman-melville' },
|
|
194
198
|
{ n: 'Melville ,Herman', t: :person, nn: 'Herman Melville', sn: 'Herman Melville', s: 'herman-melville' },
|
|
195
|
-
{ n: "John\x00 Smith", t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' }
|
|
199
|
+
{ n: "John\x00 Smith", t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
|
200
|
+
{
|
|
201
|
+
n: 'Janen Moyer-Pesso, AWMA, CDFA, LPS',
|
|
202
|
+
t: :person,
|
|
203
|
+
nn: 'Janen Moyer-Pesso',
|
|
204
|
+
sn: 'Janen Moyer-Pesso',
|
|
205
|
+
s: 'janen-moyer-pesso'
|
|
206
|
+
}
|
|
196
207
|
]
|
|
197
208
|
end
|
|
198
209
|
|
|
199
210
|
it 'makes a slug' do
|
|
200
211
|
names.each do |name_data|
|
|
201
212
|
name = name_data[:n]
|
|
202
|
-
NameTamer[name, contact_type: name_data[:t]].slug.
|
|
213
|
+
expect(NameTamer[name, contact_type: name_data[:t]].slug).to eq(name_data[:s])
|
|
203
214
|
end
|
|
204
215
|
end
|
|
205
216
|
|
|
206
217
|
it 'makes a nice name' do
|
|
207
218
|
names.each do |name_data|
|
|
208
|
-
name
|
|
219
|
+
name = name_data[:n]
|
|
209
220
|
nice_name = NameTamer[name, contact_type: name_data[:t]].nice_name
|
|
210
|
-
nice_name.
|
|
221
|
+
expect(nice_name).to eq(name_data[:nn])
|
|
211
222
|
end
|
|
212
223
|
end
|
|
213
224
|
|
|
214
225
|
it 'makes a searchable name' do
|
|
215
226
|
names.each do |name_data|
|
|
216
227
|
name = name_data[:n]
|
|
217
|
-
NameTamer[name, contact_type: name_data[:t]].simple_name.
|
|
228
|
+
expect(NameTamer[name, contact_type: name_data[:t]].simple_name).to eq(name_data[:sn])
|
|
218
229
|
end
|
|
219
230
|
end
|
|
220
231
|
end
|
|
221
232
|
|
|
222
233
|
describe 'contact type inference' do
|
|
223
234
|
it 'infers that "Mr. John Smith" is a person' do
|
|
224
|
-
NameTamer['Mr. John Smith'].contact_type.
|
|
235
|
+
expect(NameTamer['Mr. John Smith'].contact_type).to eq(:person)
|
|
225
236
|
end
|
|
226
237
|
|
|
227
238
|
it 'infers that "Di Doo Doo d.o.o." is an organization' do
|
|
228
|
-
NameTamer['Di Doo Doo d.o.o.'].contact_type.
|
|
239
|
+
expect(NameTamer['Di Doo Doo d.o.o.'].contact_type).to eq(:organization)
|
|
229
240
|
end
|
|
230
241
|
|
|
231
242
|
it 'infers that "DiDooDoo" is an organization' do
|
|
232
|
-
NameTamer['DiDooDoo'].contact_type.
|
|
243
|
+
expect(NameTamer['DiDooDoo'].contact_type).to eq(:organization)
|
|
233
244
|
end
|
|
234
245
|
|
|
235
246
|
it 'infers that "John Smith" is a person' do
|
|
236
|
-
NameTamer['John Smith'].contact_type.
|
|
247
|
+
expect(NameTamer['John Smith'].contact_type).to eq(:person)
|
|
237
248
|
end
|
|
238
249
|
end
|
metadata
CHANGED
|
@@ -1,119 +1,141 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: name-tamer
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Xenapto
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-
|
|
11
|
+
date: 2015-06-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
|
-
name:
|
|
14
|
+
name: rake
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
17
|
- - "~>"
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '
|
|
19
|
+
version: '10'
|
|
20
20
|
type: :development
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - "~>"
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '
|
|
26
|
+
version: '10'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
|
-
name:
|
|
28
|
+
name: rspec
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
31
|
- - "~>"
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: '
|
|
33
|
+
version: '3.3'
|
|
34
34
|
type: :development
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
38
|
- - "~>"
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: '
|
|
40
|
+
version: '3.3'
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
|
-
name:
|
|
42
|
+
name: gem-release
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
44
44
|
requirements:
|
|
45
45
|
- - "~>"
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: '
|
|
47
|
+
version: '0.7'
|
|
48
48
|
type: :development
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: '
|
|
54
|
+
version: '0.7'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
|
-
name:
|
|
56
|
+
name: simplecov
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
58
58
|
requirements:
|
|
59
59
|
- - "~>"
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: '0'
|
|
61
|
+
version: '0.10'
|
|
62
62
|
type: :development
|
|
63
63
|
prerelease: false
|
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
|
66
66
|
- - "~>"
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: '0'
|
|
68
|
+
version: '0.10'
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
|
-
name:
|
|
70
|
+
name: coveralls
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
72
72
|
requirements:
|
|
73
73
|
- - "~>"
|
|
74
74
|
- !ruby/object:Gem::Version
|
|
75
|
-
version: '0.
|
|
76
|
-
|
|
75
|
+
version: '0.8'
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '0.8'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: rubocop
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - "~>"
|
|
77
88
|
- !ruby/object:Gem::Version
|
|
78
|
-
version: 0.
|
|
89
|
+
version: '0.32'
|
|
79
90
|
type: :development
|
|
80
91
|
prerelease: false
|
|
81
92
|
version_requirements: !ruby/object:Gem::Requirement
|
|
82
93
|
requirements:
|
|
83
94
|
- - "~>"
|
|
84
95
|
- !ruby/object:Gem::Version
|
|
85
|
-
version: '0.
|
|
86
|
-
|
|
96
|
+
version: '0.32'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: guard
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - "~>"
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '2.12'
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - "~>"
|
|
87
109
|
- !ruby/object:Gem::Version
|
|
88
|
-
version:
|
|
110
|
+
version: '2.12'
|
|
89
111
|
- !ruby/object:Gem::Dependency
|
|
90
|
-
name:
|
|
112
|
+
name: guard-rspec
|
|
91
113
|
requirement: !ruby/object:Gem::Requirement
|
|
92
114
|
requirements:
|
|
93
115
|
- - "~>"
|
|
94
116
|
- !ruby/object:Gem::Version
|
|
95
|
-
version: '
|
|
117
|
+
version: '4.5'
|
|
96
118
|
type: :development
|
|
97
119
|
prerelease: false
|
|
98
120
|
version_requirements: !ruby/object:Gem::Requirement
|
|
99
121
|
requirements:
|
|
100
122
|
- - "~>"
|
|
101
123
|
- !ruby/object:Gem::Version
|
|
102
|
-
version: '
|
|
124
|
+
version: '4.5'
|
|
103
125
|
- !ruby/object:Gem::Dependency
|
|
104
|
-
name: rubocop
|
|
126
|
+
name: guard-rubocop
|
|
105
127
|
requirement: !ruby/object:Gem::Requirement
|
|
106
128
|
requirements:
|
|
107
129
|
- - "~>"
|
|
108
130
|
- !ruby/object:Gem::Version
|
|
109
|
-
version: '
|
|
131
|
+
version: '1.2'
|
|
110
132
|
type: :development
|
|
111
133
|
prerelease: false
|
|
112
134
|
version_requirements: !ruby/object:Gem::Requirement
|
|
113
135
|
requirements:
|
|
114
136
|
- - "~>"
|
|
115
137
|
- !ruby/object:Gem::Version
|
|
116
|
-
version: '
|
|
138
|
+
version: '1.2'
|
|
117
139
|
description: Useful methods for taming names
|
|
118
140
|
email:
|
|
119
141
|
- developers@xenapto.com
|
|
@@ -125,9 +147,11 @@ files:
|
|
|
125
147
|
- ".gitignore"
|
|
126
148
|
- ".hound.yml"
|
|
127
149
|
- ".rubocop.yml"
|
|
150
|
+
- ".ruby-gemset"
|
|
128
151
|
- ".ruby-version"
|
|
129
152
|
- Gemfile
|
|
130
153
|
- Gemfile.lock
|
|
154
|
+
- Guardfile
|
|
131
155
|
- LICENSE
|
|
132
156
|
- README.md
|
|
133
157
|
- Rakefile
|
|
@@ -160,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
160
184
|
version: '0'
|
|
161
185
|
requirements: []
|
|
162
186
|
rubyforge_project:
|
|
163
|
-
rubygems_version: 2.4.
|
|
187
|
+
rubygems_version: 2.4.8
|
|
164
188
|
signing_key:
|
|
165
189
|
specification_version: 4
|
|
166
190
|
summary: 'Example: NameTamer[''Mr. John Q. Smith III, MD''].simple_name # => John
|
|
@@ -168,3 +192,4 @@ summary: 'Example: NameTamer[''Mr. John Q. Smith III, MD''].simple_name # => Joh
|
|
|
168
192
|
test_files:
|
|
169
193
|
- spec/name_tamer_spec.rb
|
|
170
194
|
- spec/spec_helper.rb
|
|
195
|
+
has_rdoc:
|