name-tamer 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/name-tamer.rb +26 -44
- data/lib/name-tamer/version.rb +1 -1
- data/lib/string_extras.rb +11 -2
- data/spec/name_tamer_spec.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a8134129faabc9dc73dec3bd107873adf6a8f21
|
4
|
+
data.tar.gz: 6c2c5f68c2e3e9a41c6ccc0ddba78a896952c725
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 199a1fb93b68757fd0a9bc6657c1595c655da6ad5ad0ba87ae6cbf43dea88a15e3ecc9cb7317ed3edff415cac3288e26f012653e2432e222a4497aed41d21534
|
7
|
+
data.tar.gz: ae987673dfb5a693706a470765a3d7ac300dd8ff14c76d796659e97c5db79085fa735d1c2b1e218baf2da829fba047690cddbe640b02758b63a6fe49e503d01b
|
data/Gemfile.lock
CHANGED
data/lib/name-tamer.rb
CHANGED
@@ -19,13 +19,35 @@ class NameTamer
|
|
19
19
|
def [](name, args = {})
|
20
20
|
new name, args
|
21
21
|
end
|
22
|
+
|
23
|
+
# Make a slug from a string
|
24
|
+
def parameterize(string, args = {})
|
25
|
+
sep = args[:sep] || SLUG_DELIMITER
|
26
|
+
rfc3987 = args[:rfc3987] || false
|
27
|
+
filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
28
|
+
|
29
|
+
new_string = string.dup
|
30
|
+
|
31
|
+
new_string
|
32
|
+
.whitespace_to!(sep)
|
33
|
+
.invalid_chars_to!(sep)
|
34
|
+
.strip_unwanted!(filter)
|
35
|
+
.fix_separators!(sep)
|
36
|
+
.approximate_latin_chars!
|
37
|
+
|
38
|
+
# Have we got anything left?
|
39
|
+
new_string = '_' if new_string.empty?
|
40
|
+
|
41
|
+
# downcase any latin characters
|
42
|
+
new_string.downcase
|
43
|
+
end
|
22
44
|
end
|
23
45
|
|
24
46
|
def tidy_name
|
25
47
|
unless @tidy_name
|
26
48
|
@tidy_name = name.dup # Start with the name we've received
|
27
49
|
|
28
|
-
|
50
|
+
unescape # Unescape percent-encoded characters and fix UTF-8 encoding
|
29
51
|
tidy_spacing # " John Smith " -> "John Smith"
|
30
52
|
fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
|
31
53
|
consolidate_initials # "I. B. M." -> "I.B.M."
|
@@ -65,12 +87,7 @@ class NameTamer
|
|
65
87
|
end
|
66
88
|
|
67
89
|
def slug
|
68
|
-
|
69
|
-
@slug = simple_name.dup # Start with search name
|
70
|
-
slugify # "John Doe" -> "john-doe"
|
71
|
-
end
|
72
|
-
|
73
|
-
@slug
|
90
|
+
@slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
|
74
91
|
end
|
75
92
|
|
76
93
|
def contact_type
|
@@ -112,8 +129,8 @@ class NameTamer
|
|
112
129
|
# Tidy up the name we've received
|
113
130
|
#--------------------------------------------------------
|
114
131
|
|
115
|
-
def
|
116
|
-
@tidy_name.ensure_safe
|
132
|
+
def unescape
|
133
|
+
@tidy_name.ensure_safe!.safe_unescape!
|
117
134
|
end
|
118
135
|
|
119
136
|
def tidy_spacing
|
@@ -272,18 +289,6 @@ class NameTamer
|
|
272
289
|
@simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
|
273
290
|
end
|
274
291
|
|
275
|
-
#--------------------------------------------------------
|
276
|
-
# Make slug from search name
|
277
|
-
#--------------------------------------------------------
|
278
|
-
|
279
|
-
def slugify
|
280
|
-
# Inflector::parameterize just gives up with non-latin characters so...
|
281
|
-
# @slug = @slug.parameterize # Can't use this
|
282
|
-
|
283
|
-
# Instead we'll do it ourselves
|
284
|
-
@slug = parameterize @slug
|
285
|
-
end
|
286
|
-
|
287
292
|
#--------------------------------------------------------
|
288
293
|
# Initialization and utilities
|
289
294
|
#--------------------------------------------------------
|
@@ -378,29 +383,6 @@ class NameTamer
|
|
378
383
|
.upcase_initials!
|
379
384
|
end
|
380
385
|
|
381
|
-
def parameterize(string, args = {})
|
382
|
-
sep = args[:sep] || SLUG_DELIMITER
|
383
|
-
rfc3987 = args[:rfc3987] || false
|
384
|
-
filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
385
|
-
|
386
|
-
# First we unescape any pct-encoded characters. These might turn into
|
387
|
-
# things we want to alter for the slug, like whitespace (e.g. %20)
|
388
|
-
new_string = URI.unescape(string)
|
389
|
-
|
390
|
-
new_string
|
391
|
-
.whitespace_to!(sep)
|
392
|
-
.invalid_chars_to!(sep)
|
393
|
-
.strip_unwanted!(filter)
|
394
|
-
.fix_separators!(sep)
|
395
|
-
.approximate_latin_chars!
|
396
|
-
|
397
|
-
# Have we got anything left?
|
398
|
-
new_string = '_' if new_string.empty?
|
399
|
-
|
400
|
-
# downcase any latin characters
|
401
|
-
new_string.downcase
|
402
|
-
end
|
403
|
-
|
404
386
|
#--------------------------------------------------------
|
405
387
|
# Constants
|
406
388
|
#--------------------------------------------------------
|
data/lib/name-tamer/version.rb
CHANGED
data/lib/string_extras.rb
CHANGED
@@ -25,6 +25,16 @@ class String
|
|
25
25
|
substitute!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
|
26
26
|
end
|
27
27
|
|
28
|
+
# Unescape percent-encoded characters
|
29
|
+
# This might introduce UTF-8 invalid byte sequence
|
30
|
+
# so we take precautions
|
31
|
+
def safe_unescape!
|
32
|
+
string = URI.unescape(self)
|
33
|
+
return self if self == string
|
34
|
+
replace string
|
35
|
+
ensure_safe!
|
36
|
+
end
|
37
|
+
|
28
38
|
# Make sure separators are not where they shouldn't be
|
29
39
|
def fix_separators!(separator)
|
30
40
|
return self if separator.nil? || separator.empty?
|
@@ -139,8 +149,7 @@ class String
|
|
139
149
|
gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " } || self
|
140
150
|
end
|
141
151
|
|
142
|
-
def ensure_safe
|
143
|
-
return if valid_encoding?
|
152
|
+
def ensure_safe!
|
144
153
|
encode!('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
145
154
|
end
|
146
155
|
|
data/spec/name_tamer_spec.rb
CHANGED
@@ -122,7 +122,7 @@ describe NameTamer do
|
|
122
122
|
{ n: 'AGUSTA DO ROMEIRO', t: :person, nn: 'Agusta do Romeiro', sn: 'Agusta do Romeiro', s: 'agusta-do-romeiro' },
|
123
123
|
{ n: 'CARLOS DOS SANTOS', t: :person, nn: 'Carlos dos Santos', sn: 'Carlos dos Santos', s: 'carlos-dos-santos' },
|
124
124
|
{ n: '유정 신', t: :organization, nn: '유정 신', sn: '유정 신', s: '유정-신' },
|
125
|
-
{ n: 'xxx%52zzz', t: :organization, nn: '
|
125
|
+
{ n: 'xxx%52zzz', t: :organization, nn: 'xxxRzzz', sn: 'xxxRzzz', s: 'xxxrzzz' },
|
126
126
|
{ n: 'Евгений Болотнов', t: :organization, nn: 'Евгений Болотнов', sn: 'Евгений Болотнов',
|
127
127
|
s: 'Евгений-Болотнов' },
|
128
128
|
{ n: '김태성', t: :organization, nn: '김태성', sn: '김태성', s: '김태성' },
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-tamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xenapto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|