name-tamer 0.2.8 → 0.2.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/name-tamer.rb +8 -1
- data/lib/name-tamer/version.rb +1 -1
- data/spec/name_tamer_spec.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7bc57d8799f60a21ac3da86db62f28cc7decbdf
|
4
|
+
data.tar.gz: 05a8bcf92f7765e60df2b437d4526c028ea41916
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b86e1afe2021fff0ec57e1dc985097d48715e20cee4e0b00ee06243e3e8086b55d34ee8354fc77f6d608b721f0ab149ebc0247987b19aaeca83353b37a64ef3a
|
7
|
+
data.tar.gz: 79dfd420738cbfadd96b2382d703cd9bdf7f688b55bb97535560679cc8ee8580087bd12c84c358eedba1e7da57180edbe9759644e35d256949bbf5e2d4ee8b0c
|
data/lib/name-tamer.rb
CHANGED
@@ -48,6 +48,7 @@ class NameTamer
|
|
48
48
|
@tidy_name = name.dup # Start with the name we've received
|
49
49
|
|
50
50
|
unescape # Unescape percent-encoded characters and fix UTF-8 encoding
|
51
|
+
remove_zero_width # remove zero-width characters
|
51
52
|
tidy_spacing # " John Smith " -> "John Smith"
|
52
53
|
fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
|
53
54
|
consolidate_initials # "I. B. M." -> "I.B.M."
|
@@ -133,6 +134,10 @@ class NameTamer
|
|
133
134
|
@tidy_name.ensure_safe!.safe_unescape!
|
134
135
|
end
|
135
136
|
|
137
|
+
def remove_zero_width
|
138
|
+
@tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER)
|
139
|
+
end
|
140
|
+
|
136
141
|
def tidy_spacing
|
137
142
|
@tidy_name
|
138
143
|
.space_after_comma!
|
@@ -390,7 +395,9 @@ class NameTamer
|
|
390
395
|
NONBREAKING_SPACE = "\u00a0"
|
391
396
|
ASCII_SPACE = "\u0020"
|
392
397
|
ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
|
393
|
-
SLUG_DELIMITER =
|
398
|
+
SLUG_DELIMITER = '-'
|
399
|
+
|
400
|
+
ZERO_WIDTH_FILTER = /[\u200B\u200C\u200D\u2063\uFEFF]/
|
394
401
|
|
395
402
|
# Constants for parameterizing Unicode strings for IRIs
|
396
403
|
#
|
data/lib/name-tamer/version.rb
CHANGED
data/spec/name_tamer_spec.rb
CHANGED
@@ -187,7 +187,9 @@ describe NameTamer do
|
|
187
187
|
{ n: 'John “Jonno†Johnson', t: :person, nn: 'John “Jonno” Johnson', sn: 'John Johnson', s: 'john-johnson' },
|
188
188
|
{ n: 'Pablo M Sánchez', t: :person, nn: 'Pablo M Sánchez', sn: 'Pablo Sánchez', s: 'pablo-sanchez' },
|
189
189
|
{ n: "\xc3\x28", t: :person, nn: '()', sn: '()', s: '_' }, # Invalid byte sequence in UTF-8
|
190
|
-
{ n: '’%80', t: :person, nn: '’%80', sn: '’%80', s: '’80' } # Encoding::CompatibilityError
|
190
|
+
{ n: '’%80', t: :person, nn: '’%80', sn: '’%80', s: '’80' }, # Encoding::CompatibilityError
|
191
|
+
{ n: "John Smith\u{FEFF}\u{200B}\u{200C}\u{200D}\u{2063}", t: :person,
|
192
|
+
nn: 'John Smith', sn: 'John Smith', s: 'john-smith' } # Zero-width characters
|
191
193
|
]
|
192
194
|
end
|
193
195
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-tamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xenapto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|