name-tamer 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.ruby-version +1 -1
- data/doc/maintenance.rake +2 -2
- data/lib/name-tamer.rb +1 -537
- data/lib/name_tamer.rb +21 -0
- data/lib/name_tamer/array.rb +7 -0
- data/lib/name_tamer/constants.rb +121 -0
- data/lib/name_tamer/name.rb +384 -0
- data/lib/{string_extras.rb → name_tamer/string.rb} +14 -8
- data/lib/name_tamer/text.rb +53 -0
- data/lib/name_tamer/version.rb +3 -0
- data/name-tamer.gemspec +10 -10
- data/spec/{name_tamer_spec.rb → name_tamer/name_spec.rb} +2 -2
- data/spec/name_tamer/text_spec.rb +42 -0
- metadata +31 -24
- data/lib/name-tamer/version.rb +0 -3
@@ -1,5 +1,11 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
class String
|
3
|
+
unless respond_to? :presence
|
4
|
+
def presence
|
5
|
+
self unless empty?
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
3
9
|
# Strip illegal characters out completely
|
4
10
|
def strip_unwanted!(filter)
|
5
11
|
substitute!(filter, '')
|
@@ -70,11 +76,11 @@ class String
|
|
70
76
|
end
|
71
77
|
|
72
78
|
def upcase_first_letter!
|
73
|
-
gsub!(/\b\w
|
79
|
+
gsub!(/\b\w/, &:upcase) || self
|
74
80
|
end
|
75
81
|
|
76
82
|
def downcase_after_apostrophe!
|
77
|
-
gsub!(/\'\w\b
|
83
|
+
gsub!(/\'\w\b/, &:downcase) || self # Lowercase 's
|
78
84
|
end
|
79
85
|
|
80
86
|
# Our list of terminal characters that indicate a non-celtic name used
|
@@ -170,19 +176,19 @@ class String
|
|
170
176
|
gsub!(pattern, replacement) || self
|
171
177
|
end
|
172
178
|
|
173
|
-
NONBREAKING_SPACE = "\u00a0"
|
174
|
-
ASCII_SPACE =
|
179
|
+
NONBREAKING_SPACE = "\u00a0".freeze
|
180
|
+
ASCII_SPACE = ' '.freeze
|
175
181
|
|
176
182
|
COMPOUND_NAMES = [
|
177
183
|
'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
|
178
184
|
'Baron Cohen', 'Strang Steel',
|
179
185
|
'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
|
180
|
-
]
|
186
|
+
].freeze
|
181
187
|
|
182
188
|
NAME_MODIFIERS = [
|
183
189
|
'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San',
|
184
190
|
'St[\.]?', 'Zur'
|
185
|
-
]
|
191
|
+
].freeze
|
186
192
|
|
187
193
|
# Transliterations (like the i18n defaults)
|
188
194
|
# see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
|
@@ -215,7 +221,7 @@ class String
|
|
215
221
|
'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u', 'Ŵ' => 'W', 'ŵ' => 'w',
|
216
222
|
'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z',
|
217
223
|
'Ž' => 'Z', 'ž' => 'z'
|
218
|
-
}
|
224
|
+
}.freeze
|
219
225
|
|
220
226
|
# When strings are mistakenly encoded as single-byte character sets, instead
|
221
227
|
# of UTF-8, there are some distinctive character combinations that we can spot
|
@@ -251,7 +257,7 @@ class String
|
|
251
257
|
'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü',
|
252
258
|
'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ',
|
253
259
|
"\x00" => '' # Manually added to avoid Bad Argument exception
|
254
|
-
}
|
260
|
+
}.freeze
|
255
261
|
|
256
262
|
BAD_ENCODING_PATTERNS = /(#{BAD_ENCODING.keys.join('|')})/
|
257
263
|
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module NameTamer
|
2
|
+
class Text
|
3
|
+
# All the potential slugs from the string
|
4
|
+
# e.g. 'lorem ipsum dolor' -> ['lorem', 'ipsum' ,'dolor', 'lorem-ipsum', 'ipsum-dolor', 'lorem-ipsum-dolor']
|
5
|
+
def slugs
|
6
|
+
@slugs ||= segments.flat_map { |s| self.class.new(s).neighbours }.uniq
|
7
|
+
end
|
8
|
+
|
9
|
+
# Split the string into segments (e.g. sentences)
|
10
|
+
def segments
|
11
|
+
string.split(%r{(?:[\.\?,:;!]|[[:space:]][/-])[[:space:]]})
|
12
|
+
end
|
13
|
+
|
14
|
+
# The string as a slug
|
15
|
+
def parameterize
|
16
|
+
@parameterize ||= (
|
17
|
+
string
|
18
|
+
.dup
|
19
|
+
.whitespace_to!(separator)
|
20
|
+
.invalid_chars_to!(separator)
|
21
|
+
.strip_unwanted!(filter)
|
22
|
+
.fix_separators!(separator)
|
23
|
+
.approximate_latin_chars!
|
24
|
+
.presence || '_'
|
25
|
+
).downcase
|
26
|
+
end
|
27
|
+
|
28
|
+
def neighbours
|
29
|
+
@neighbours ||= NameTamer[string].array.neighbours.map { |a| a.join('-') }
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
attr_reader :string, :args
|
35
|
+
|
36
|
+
def initialize(string, args = {})
|
37
|
+
@string = string
|
38
|
+
@args = args
|
39
|
+
end
|
40
|
+
|
41
|
+
def separator
|
42
|
+
@seperator ||= args[:sep] || SLUG_DELIMITER
|
43
|
+
end
|
44
|
+
|
45
|
+
def rfc3987
|
46
|
+
@rfc3987 ||= args[:rfc3987] || false
|
47
|
+
end
|
48
|
+
|
49
|
+
def filter
|
50
|
+
@filter ||= args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/name-tamer.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
lib = File.expand_path('../lib', __FILE__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
-
require '
|
3
|
+
require 'name_tamer/version'
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = 'name-tamer'
|
@@ -17,13 +17,13 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.test_files = spec.files.grep(%r{^(test|spec|features|coverage)\/})
|
18
18
|
spec.require_paths = ['lib']
|
19
19
|
|
20
|
-
spec.add_development_dependency 'rake', '~>
|
21
|
-
spec.add_development_dependency 'rspec', '~> 3
|
22
|
-
spec.add_development_dependency 'gem-release', '~> 0
|
23
|
-
spec.add_development_dependency 'simplecov', '~> 0
|
24
|
-
spec.add_development_dependency 'coveralls', '~> 0
|
25
|
-
spec.add_development_dependency 'rubocop', '~> 0
|
26
|
-
spec.add_development_dependency 'guard', '~> 2
|
27
|
-
spec.add_development_dependency 'guard-rspec', '~> 4
|
28
|
-
spec.add_development_dependency 'guard-rubocop', '~> 1
|
20
|
+
spec.add_development_dependency 'rake', '~> 11'
|
21
|
+
spec.add_development_dependency 'rspec', '~> 3'
|
22
|
+
spec.add_development_dependency 'gem-release', '~> 0'
|
23
|
+
spec.add_development_dependency 'simplecov', '~> 0'
|
24
|
+
spec.add_development_dependency 'coveralls', '~> 0'
|
25
|
+
spec.add_development_dependency 'rubocop', '~> 0'
|
26
|
+
spec.add_development_dependency 'guard', '~> 2'
|
27
|
+
spec.add_development_dependency 'guard-rspec', '~> 4'
|
28
|
+
spec.add_development_dependency 'guard-rubocop', '~> 1'
|
29
29
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
require 'spec_helper'
|
3
3
|
require 'name-tamer'
|
4
4
|
|
5
|
-
describe NameTamer do
|
5
|
+
describe NameTamer::Name do
|
6
6
|
let(:names) do
|
7
7
|
[
|
8
8
|
{ n: 'John Smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
@@ -207,7 +207,7 @@ describe NameTamer do
|
|
207
207
|
{ n: 'Dibble & Grub LLP', t: :organization,
|
208
208
|
nn: 'Dibble & Grub',
|
209
209
|
sn: 'Dibble and Grub',
|
210
|
-
s: 'dibble-and-grub' }
|
210
|
+
s: 'dibble-and-grub' }
|
211
211
|
]
|
212
212
|
end
|
213
213
|
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'name-tamer'
|
4
|
+
|
5
|
+
describe NameTamer::Text do
|
6
|
+
context '#segments' do
|
7
|
+
it 'splits a string into segments at appropriate boundaries' do
|
8
|
+
string = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. '\
|
9
|
+
'Nullam venenatis? Risus eu: auctor feugiat; libero nisl congue '\
|
10
|
+
'arcu - eget molestie metus / erat eu diam'
|
11
|
+
|
12
|
+
text = NameTamer::Text.new string
|
13
|
+
|
14
|
+
expect(text.segments).to include(
|
15
|
+
'Lorem ipsum dolor sit amet',
|
16
|
+
'consectetur adipiscing elit',
|
17
|
+
'Nullam venenatis',
|
18
|
+
'Risus eu',
|
19
|
+
'auctor feugiat',
|
20
|
+
'libero nisl congue arcu',
|
21
|
+
'eget molestie metus',
|
22
|
+
'erat eu diam'
|
23
|
+
)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
context '#slugs' do
|
28
|
+
it 'compiles all the potential slugs into an array' do
|
29
|
+
string = 'Lorem Ipsum Limited, lorem ipsum dolor. Dolor Mr Sit Amet.'
|
30
|
+
text = NameTamer::Text.new string
|
31
|
+
slugs = text.slugs
|
32
|
+
|
33
|
+
expect(slugs).to include(
|
34
|
+
'lorem', 'lorem-ipsum', 'ipsum', 'lorem-ipsum-dolor', 'ipsum-dolor',
|
35
|
+
'dolor', 'dolor-mr', 'dolor-mr-sit', 'dolor-mr-sit-amet', 'mr',
|
36
|
+
'mr-sit', 'mr-sit-amet', 'sit', 'sit-amet', 'amet'
|
37
|
+
)
|
38
|
+
|
39
|
+
expect(slugs.length).to eq 15
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-tamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xenapto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -16,126 +16,126 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '11'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '11'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '3
|
33
|
+
version: '3'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '3
|
40
|
+
version: '3'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: gem-release
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0
|
47
|
+
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: simplecov
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0
|
61
|
+
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0
|
68
|
+
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: coveralls
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '0
|
75
|
+
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '0
|
82
|
+
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rubocop
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '0
|
89
|
+
version: '0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '0
|
96
|
+
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: guard
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '2
|
103
|
+
version: '2'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '2
|
110
|
+
version: '2'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: guard-rspec
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '4
|
117
|
+
version: '4'
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '4
|
124
|
+
version: '4'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: guard-rubocop
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: '1
|
131
|
+
version: '1'
|
132
132
|
type: :development
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: '1
|
138
|
+
version: '1'
|
139
139
|
description: Useful methods for taming names
|
140
140
|
email:
|
141
141
|
- developers@xenapto.com
|
@@ -158,10 +158,16 @@ files:
|
|
158
158
|
- doc/prefixes.csv
|
159
159
|
- doc/suffixes.csv
|
160
160
|
- lib/name-tamer.rb
|
161
|
-
- lib/
|
162
|
-
- lib/
|
161
|
+
- lib/name_tamer.rb
|
162
|
+
- lib/name_tamer/array.rb
|
163
|
+
- lib/name_tamer/constants.rb
|
164
|
+
- lib/name_tamer/name.rb
|
165
|
+
- lib/name_tamer/string.rb
|
166
|
+
- lib/name_tamer/text.rb
|
167
|
+
- lib/name_tamer/version.rb
|
163
168
|
- name-tamer.gemspec
|
164
|
-
- spec/
|
169
|
+
- spec/name_tamer/name_spec.rb
|
170
|
+
- spec/name_tamer/text_spec.rb
|
165
171
|
- spec/spec_helper.rb
|
166
172
|
homepage: https://github.com/Xenapto/name-tamer
|
167
173
|
licenses:
|
@@ -189,5 +195,6 @@ specification_version: 4
|
|
189
195
|
summary: 'Example: NameTamer[''Mr. John Q. Smith III, MD''].simple_name # => John
|
190
196
|
Smith'
|
191
197
|
test_files:
|
192
|
-
- spec/
|
198
|
+
- spec/name_tamer/name_spec.rb
|
199
|
+
- spec/name_tamer/text_spec.rb
|
193
200
|
- spec/spec_helper.rb
|
data/lib/name-tamer/version.rb
DELETED