name-tamer 0.4.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.ruby-version +1 -1
- data/doc/maintenance.rake +2 -2
- data/lib/name-tamer.rb +1 -537
- data/lib/name_tamer.rb +21 -0
- data/lib/name_tamer/array.rb +7 -0
- data/lib/name_tamer/constants.rb +121 -0
- data/lib/name_tamer/name.rb +384 -0
- data/lib/{string_extras.rb → name_tamer/string.rb} +14 -8
- data/lib/name_tamer/text.rb +53 -0
- data/lib/name_tamer/version.rb +3 -0
- data/name-tamer.gemspec +10 -10
- data/spec/{name_tamer_spec.rb → name_tamer/name_spec.rb} +2 -2
- data/spec/name_tamer/text_spec.rb +42 -0
- metadata +31 -24
- data/lib/name-tamer/version.rb +0 -3
@@ -1,5 +1,11 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
class String
|
3
|
+
unless respond_to? :presence
|
4
|
+
def presence
|
5
|
+
self unless empty?
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
3
9
|
# Strip illegal characters out completely
|
4
10
|
def strip_unwanted!(filter)
|
5
11
|
substitute!(filter, '')
|
@@ -70,11 +76,11 @@ class String
|
|
70
76
|
end
|
71
77
|
|
72
78
|
def upcase_first_letter!
|
73
|
-
gsub!(/\b\w
|
79
|
+
gsub!(/\b\w/, &:upcase) || self
|
74
80
|
end
|
75
81
|
|
76
82
|
def downcase_after_apostrophe!
|
77
|
-
gsub!(/\'\w\b
|
83
|
+
gsub!(/\'\w\b/, &:downcase) || self # Lowercase 's
|
78
84
|
end
|
79
85
|
|
80
86
|
# Our list of terminal characters that indicate a non-celtic name used
|
@@ -170,19 +176,19 @@ class String
|
|
170
176
|
gsub!(pattern, replacement) || self
|
171
177
|
end
|
172
178
|
|
173
|
-
NONBREAKING_SPACE = "\u00a0"
|
174
|
-
ASCII_SPACE =
|
179
|
+
NONBREAKING_SPACE = "\u00a0".freeze
|
180
|
+
ASCII_SPACE = ' '.freeze
|
175
181
|
|
176
182
|
COMPOUND_NAMES = [
|
177
183
|
'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
|
178
184
|
'Baron Cohen', 'Strang Steel',
|
179
185
|
'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
|
180
|
-
]
|
186
|
+
].freeze
|
181
187
|
|
182
188
|
NAME_MODIFIERS = [
|
183
189
|
'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San',
|
184
190
|
'St[\.]?', 'Zur'
|
185
|
-
]
|
191
|
+
].freeze
|
186
192
|
|
187
193
|
# Transliterations (like the i18n defaults)
|
188
194
|
# see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
|
@@ -215,7 +221,7 @@ class String
|
|
215
221
|
'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u', 'Ŵ' => 'W', 'ŵ' => 'w',
|
216
222
|
'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z',
|
217
223
|
'Ž' => 'Z', 'ž' => 'z'
|
218
|
-
}
|
224
|
+
}.freeze
|
219
225
|
|
220
226
|
# When strings are mistakenly encoded as single-byte character sets, instead
|
221
227
|
# of UTF-8, there are some distinctive character combinations that we can spot
|
@@ -251,7 +257,7 @@ class String
|
|
251
257
|
'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü',
|
252
258
|
'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ',
|
253
259
|
"\x00" => '' # Manually added to avoid Bad Argument exception
|
254
|
-
}
|
260
|
+
}.freeze
|
255
261
|
|
256
262
|
BAD_ENCODING_PATTERNS = /(#{BAD_ENCODING.keys.join('|')})/
|
257
263
|
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module NameTamer
|
2
|
+
class Text
|
3
|
+
# All the potential slugs from the string
|
4
|
+
# e.g. 'lorem ipsum dolor' -> ['lorem', 'ipsum' ,'dolor', 'lorem-ipsum', 'ipsum-dolor', 'lorem-ipsum-dolor']
|
5
|
+
def slugs
|
6
|
+
@slugs ||= segments.flat_map { |s| self.class.new(s).neighbours }.uniq
|
7
|
+
end
|
8
|
+
|
9
|
+
# Split the string into segments (e.g. sentences)
|
10
|
+
def segments
|
11
|
+
string.split(%r{(?:[\.\?,:;!]|[[:space:]][/-])[[:space:]]})
|
12
|
+
end
|
13
|
+
|
14
|
+
# The string as a slug
|
15
|
+
def parameterize
|
16
|
+
@parameterize ||= (
|
17
|
+
string
|
18
|
+
.dup
|
19
|
+
.whitespace_to!(separator)
|
20
|
+
.invalid_chars_to!(separator)
|
21
|
+
.strip_unwanted!(filter)
|
22
|
+
.fix_separators!(separator)
|
23
|
+
.approximate_latin_chars!
|
24
|
+
.presence || '_'
|
25
|
+
).downcase
|
26
|
+
end
|
27
|
+
|
28
|
+
def neighbours
|
29
|
+
@neighbours ||= NameTamer[string].array.neighbours.map { |a| a.join('-') }
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
attr_reader :string, :args
|
35
|
+
|
36
|
+
def initialize(string, args = {})
|
37
|
+
@string = string
|
38
|
+
@args = args
|
39
|
+
end
|
40
|
+
|
41
|
+
def separator
|
42
|
+
@seperator ||= args[:sep] || SLUG_DELIMITER
|
43
|
+
end
|
44
|
+
|
45
|
+
def rfc3987
|
46
|
+
@rfc3987 ||= args[:rfc3987] || false
|
47
|
+
end
|
48
|
+
|
49
|
+
def filter
|
50
|
+
@filter ||= args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/name-tamer.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
lib = File.expand_path('../lib', __FILE__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
-
require '
|
3
|
+
require 'name_tamer/version'
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = 'name-tamer'
|
@@ -17,13 +17,13 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.test_files = spec.files.grep(%r{^(test|spec|features|coverage)\/})
|
18
18
|
spec.require_paths = ['lib']
|
19
19
|
|
20
|
-
spec.add_development_dependency 'rake', '~>
|
21
|
-
spec.add_development_dependency 'rspec', '~> 3
|
22
|
-
spec.add_development_dependency 'gem-release', '~> 0
|
23
|
-
spec.add_development_dependency 'simplecov', '~> 0
|
24
|
-
spec.add_development_dependency 'coveralls', '~> 0
|
25
|
-
spec.add_development_dependency 'rubocop', '~> 0
|
26
|
-
spec.add_development_dependency 'guard', '~> 2
|
27
|
-
spec.add_development_dependency 'guard-rspec', '~> 4
|
28
|
-
spec.add_development_dependency 'guard-rubocop', '~> 1
|
20
|
+
spec.add_development_dependency 'rake', '~> 11'
|
21
|
+
spec.add_development_dependency 'rspec', '~> 3'
|
22
|
+
spec.add_development_dependency 'gem-release', '~> 0'
|
23
|
+
spec.add_development_dependency 'simplecov', '~> 0'
|
24
|
+
spec.add_development_dependency 'coveralls', '~> 0'
|
25
|
+
spec.add_development_dependency 'rubocop', '~> 0'
|
26
|
+
spec.add_development_dependency 'guard', '~> 2'
|
27
|
+
spec.add_development_dependency 'guard-rspec', '~> 4'
|
28
|
+
spec.add_development_dependency 'guard-rubocop', '~> 1'
|
29
29
|
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
require 'spec_helper'
|
3
3
|
require 'name-tamer'
|
4
4
|
|
5
|
-
describe NameTamer do
|
5
|
+
describe NameTamer::Name do
|
6
6
|
let(:names) do
|
7
7
|
[
|
8
8
|
{ n: 'John Smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
|
@@ -207,7 +207,7 @@ describe NameTamer do
|
|
207
207
|
{ n: 'Dibble & Grub LLP', t: :organization,
|
208
208
|
nn: 'Dibble & Grub',
|
209
209
|
sn: 'Dibble and Grub',
|
210
|
-
s: 'dibble-and-grub' }
|
210
|
+
s: 'dibble-and-grub' }
|
211
211
|
]
|
212
212
|
end
|
213
213
|
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'name-tamer'
|
4
|
+
|
5
|
+
describe NameTamer::Text do
|
6
|
+
context '#segments' do
|
7
|
+
it 'splits a string into segments at appropriate boundaries' do
|
8
|
+
string = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. '\
|
9
|
+
'Nullam venenatis? Risus eu: auctor feugiat; libero nisl congue '\
|
10
|
+
'arcu - eget molestie metus / erat eu diam'
|
11
|
+
|
12
|
+
text = NameTamer::Text.new string
|
13
|
+
|
14
|
+
expect(text.segments).to include(
|
15
|
+
'Lorem ipsum dolor sit amet',
|
16
|
+
'consectetur adipiscing elit',
|
17
|
+
'Nullam venenatis',
|
18
|
+
'Risus eu',
|
19
|
+
'auctor feugiat',
|
20
|
+
'libero nisl congue arcu',
|
21
|
+
'eget molestie metus',
|
22
|
+
'erat eu diam'
|
23
|
+
)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
context '#slugs' do
|
28
|
+
it 'compiles all the potential slugs into an array' do
|
29
|
+
string = 'Lorem Ipsum Limited, lorem ipsum dolor. Dolor Mr Sit Amet.'
|
30
|
+
text = NameTamer::Text.new string
|
31
|
+
slugs = text.slugs
|
32
|
+
|
33
|
+
expect(slugs).to include(
|
34
|
+
'lorem', 'lorem-ipsum', 'ipsum', 'lorem-ipsum-dolor', 'ipsum-dolor',
|
35
|
+
'dolor', 'dolor-mr', 'dolor-mr-sit', 'dolor-mr-sit-amet', 'mr',
|
36
|
+
'mr-sit', 'mr-sit-amet', 'sit', 'sit-amet', 'amet'
|
37
|
+
)
|
38
|
+
|
39
|
+
expect(slugs.length).to eq 15
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-tamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xenapto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -16,126 +16,126 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '11'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '11'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '3
|
33
|
+
version: '3'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '3
|
40
|
+
version: '3'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: gem-release
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0
|
47
|
+
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0
|
54
|
+
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: simplecov
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0
|
61
|
+
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0
|
68
|
+
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: coveralls
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '0
|
75
|
+
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '0
|
82
|
+
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rubocop
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '0
|
89
|
+
version: '0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '0
|
96
|
+
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: guard
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '2
|
103
|
+
version: '2'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '2
|
110
|
+
version: '2'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: guard-rspec
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '4
|
117
|
+
version: '4'
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '4
|
124
|
+
version: '4'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: guard-rubocop
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: '1
|
131
|
+
version: '1'
|
132
132
|
type: :development
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: '1
|
138
|
+
version: '1'
|
139
139
|
description: Useful methods for taming names
|
140
140
|
email:
|
141
141
|
- developers@xenapto.com
|
@@ -158,10 +158,16 @@ files:
|
|
158
158
|
- doc/prefixes.csv
|
159
159
|
- doc/suffixes.csv
|
160
160
|
- lib/name-tamer.rb
|
161
|
-
- lib/
|
162
|
-
- lib/
|
161
|
+
- lib/name_tamer.rb
|
162
|
+
- lib/name_tamer/array.rb
|
163
|
+
- lib/name_tamer/constants.rb
|
164
|
+
- lib/name_tamer/name.rb
|
165
|
+
- lib/name_tamer/string.rb
|
166
|
+
- lib/name_tamer/text.rb
|
167
|
+
- lib/name_tamer/version.rb
|
163
168
|
- name-tamer.gemspec
|
164
|
-
- spec/
|
169
|
+
- spec/name_tamer/name_spec.rb
|
170
|
+
- spec/name_tamer/text_spec.rb
|
165
171
|
- spec/spec_helper.rb
|
166
172
|
homepage: https://github.com/Xenapto/name-tamer
|
167
173
|
licenses:
|
@@ -189,5 +195,6 @@ specification_version: 4
|
|
189
195
|
summary: 'Example: NameTamer[''Mr. John Q. Smith III, MD''].simple_name # => John
|
190
196
|
Smith'
|
191
197
|
test_files:
|
192
|
-
- spec/
|
198
|
+
- spec/name_tamer/name_spec.rb
|
199
|
+
- spec/name_tamer/text_spec.rb
|
193
200
|
- spec/spec_helper.rb
|
data/lib/name-tamer/version.rb
DELETED