name-tamer 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/name-tamer.rb +26 -44
- data/lib/name-tamer/version.rb +1 -1
- data/lib/string_extras.rb +11 -2
- data/spec/name_tamer_spec.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a8134129faabc9dc73dec3bd107873adf6a8f21
|
4
|
+
data.tar.gz: 6c2c5f68c2e3e9a41c6ccc0ddba78a896952c725
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 199a1fb93b68757fd0a9bc6657c1595c655da6ad5ad0ba87ae6cbf43dea88a15e3ecc9cb7317ed3edff415cac3288e26f012653e2432e222a4497aed41d21534
|
7
|
+
data.tar.gz: ae987673dfb5a693706a470765a3d7ac300dd8ff14c76d796659e97c5db79085fa735d1c2b1e218baf2da829fba047690cddbe640b02758b63a6fe49e503d01b
|
data/Gemfile.lock
CHANGED
data/lib/name-tamer.rb
CHANGED
@@ -19,13 +19,35 @@ class NameTamer
|
|
19
19
|
def [](name, args = {})
|
20
20
|
new name, args
|
21
21
|
end
|
22
|
+
|
23
|
+
# Make a slug from a string
|
24
|
+
def parameterize(string, args = {})
|
25
|
+
sep = args[:sep] || SLUG_DELIMITER
|
26
|
+
rfc3987 = args[:rfc3987] || false
|
27
|
+
filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
28
|
+
|
29
|
+
new_string = string.dup
|
30
|
+
|
31
|
+
new_string
|
32
|
+
.whitespace_to!(sep)
|
33
|
+
.invalid_chars_to!(sep)
|
34
|
+
.strip_unwanted!(filter)
|
35
|
+
.fix_separators!(sep)
|
36
|
+
.approximate_latin_chars!
|
37
|
+
|
38
|
+
# Have we got anything left?
|
39
|
+
new_string = '_' if new_string.empty?
|
40
|
+
|
41
|
+
# downcase any latin characters
|
42
|
+
new_string.downcase
|
43
|
+
end
|
22
44
|
end
|
23
45
|
|
24
46
|
def tidy_name
|
25
47
|
unless @tidy_name
|
26
48
|
@tidy_name = name.dup # Start with the name we've received
|
27
49
|
|
28
|
-
|
50
|
+
unescape # Unescape percent-encoded characters and fix UTF-8 encoding
|
29
51
|
tidy_spacing # " John Smith " -> "John Smith"
|
30
52
|
fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
|
31
53
|
consolidate_initials # "I. B. M." -> "I.B.M."
|
@@ -65,12 +87,7 @@ class NameTamer
|
|
65
87
|
end
|
66
88
|
|
67
89
|
def slug
|
68
|
-
|
69
|
-
@slug = simple_name.dup # Start with search name
|
70
|
-
slugify # "John Doe" -> "john-doe"
|
71
|
-
end
|
72
|
-
|
73
|
-
@slug
|
90
|
+
@slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
|
74
91
|
end
|
75
92
|
|
76
93
|
def contact_type
|
@@ -112,8 +129,8 @@ class NameTamer
|
|
112
129
|
# Tidy up the name we've received
|
113
130
|
#--------------------------------------------------------
|
114
131
|
|
115
|
-
def
|
116
|
-
@tidy_name.ensure_safe
|
132
|
+
def unescape
|
133
|
+
@tidy_name.ensure_safe!.safe_unescape!
|
117
134
|
end
|
118
135
|
|
119
136
|
def tidy_spacing
|
@@ -272,18 +289,6 @@ class NameTamer
|
|
272
289
|
@simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
|
273
290
|
end
|
274
291
|
|
275
|
-
#--------------------------------------------------------
|
276
|
-
# Make slug from search name
|
277
|
-
#--------------------------------------------------------
|
278
|
-
|
279
|
-
def slugify
|
280
|
-
# Inflector::parameterize just gives up with non-latin characters so...
|
281
|
-
# @slug = @slug.parameterize # Can't use this
|
282
|
-
|
283
|
-
# Instead we'll do it ourselves
|
284
|
-
@slug = parameterize @slug
|
285
|
-
end
|
286
|
-
|
287
292
|
#--------------------------------------------------------
|
288
293
|
# Initialization and utilities
|
289
294
|
#--------------------------------------------------------
|
@@ -378,29 +383,6 @@ class NameTamer
|
|
378
383
|
.upcase_initials!
|
379
384
|
end
|
380
385
|
|
381
|
-
def parameterize(string, args = {})
|
382
|
-
sep = args[:sep] || SLUG_DELIMITER
|
383
|
-
rfc3987 = args[:rfc3987] || false
|
384
|
-
filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
|
385
|
-
|
386
|
-
# First we unescape any pct-encoded characters. These might turn into
|
387
|
-
# things we want to alter for the slug, like whitespace (e.g. %20)
|
388
|
-
new_string = URI.unescape(string)
|
389
|
-
|
390
|
-
new_string
|
391
|
-
.whitespace_to!(sep)
|
392
|
-
.invalid_chars_to!(sep)
|
393
|
-
.strip_unwanted!(filter)
|
394
|
-
.fix_separators!(sep)
|
395
|
-
.approximate_latin_chars!
|
396
|
-
|
397
|
-
# Have we got anything left?
|
398
|
-
new_string = '_' if new_string.empty?
|
399
|
-
|
400
|
-
# downcase any latin characters
|
401
|
-
new_string.downcase
|
402
|
-
end
|
403
|
-
|
404
386
|
#--------------------------------------------------------
|
405
387
|
# Constants
|
406
388
|
#--------------------------------------------------------
|
data/lib/name-tamer/version.rb
CHANGED
data/lib/string_extras.rb
CHANGED
@@ -25,6 +25,16 @@ class String
|
|
25
25
|
substitute!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
|
26
26
|
end
|
27
27
|
|
28
|
+
# Unescape percent-encoded characters
|
29
|
+
# This might introduce UTF-8 invalid byte sequence
|
30
|
+
# so we take precautions
|
31
|
+
def safe_unescape!
|
32
|
+
string = URI.unescape(self)
|
33
|
+
return self if self == string
|
34
|
+
replace string
|
35
|
+
ensure_safe!
|
36
|
+
end
|
37
|
+
|
28
38
|
# Make sure separators are not where they shouldn't be
|
29
39
|
def fix_separators!(separator)
|
30
40
|
return self if separator.nil? || separator.empty?
|
@@ -139,8 +149,7 @@ class String
|
|
139
149
|
gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " } || self
|
140
150
|
end
|
141
151
|
|
142
|
-
def ensure_safe
|
143
|
-
return if valid_encoding?
|
152
|
+
def ensure_safe!
|
144
153
|
encode!('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
145
154
|
end
|
146
155
|
|
data/spec/name_tamer_spec.rb
CHANGED
@@ -122,7 +122,7 @@ describe NameTamer do
|
|
122
122
|
{ n: 'AGUSTA DO ROMEIRO', t: :person, nn: 'Agusta do Romeiro', sn: 'Agusta do Romeiro', s: 'agusta-do-romeiro' },
|
123
123
|
{ n: 'CARLOS DOS SANTOS', t: :person, nn: 'Carlos dos Santos', sn: 'Carlos dos Santos', s: 'carlos-dos-santos' },
|
124
124
|
{ n: '유정 신', t: :organization, nn: '유정 신', sn: '유정 신', s: '유정-신' },
|
125
|
-
{ n: 'xxx%52zzz', t: :organization, nn: '
|
125
|
+
{ n: 'xxx%52zzz', t: :organization, nn: 'xxxRzzz', sn: 'xxxRzzz', s: 'xxxrzzz' },
|
126
126
|
{ n: 'Евгений Болотнов', t: :organization, nn: 'Евгений Болотнов', sn: 'Евгений Болотнов',
|
127
127
|
s: 'Евгений-Болотнов' },
|
128
128
|
{ n: '김태성', t: :organization, nn: '김태성', sn: '김태성', s: '김태성' },
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-tamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xenapto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|