name-tamer 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/name-tamer.rb +8 -1
- data/lib/name-tamer/version.rb +1 -1
- data/spec/name_tamer_spec.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7bc57d8799f60a21ac3da86db62f28cc7decbdf
|
4
|
+
data.tar.gz: 05a8bcf92f7765e60df2b437d4526c028ea41916
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b86e1afe2021fff0ec57e1dc985097d48715e20cee4e0b00ee06243e3e8086b55d34ee8354fc77f6d608b721f0ab149ebc0247987b19aaeca83353b37a64ef3a
|
7
|
+
data.tar.gz: 79dfd420738cbfadd96b2382d703cd9bdf7f688b55bb97535560679cc8ee8580087bd12c84c358eedba1e7da57180edbe9759644e35d256949bbf5e2d4ee8b0c
|
data/lib/name-tamer.rb
CHANGED
@@ -48,6 +48,7 @@ class NameTamer
|
|
48
48
|
@tidy_name = name.dup # Start with the name we've received
|
49
49
|
|
50
50
|
unescape # Unescape percent-encoded characters and fix UTF-8 encoding
|
51
|
+
remove_zero_width # remove zero-width characters
|
51
52
|
tidy_spacing # " John Smith " -> "John Smith"
|
52
53
|
fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
|
53
54
|
consolidate_initials # "I. B. M." -> "I.B.M."
|
@@ -133,6 +134,10 @@ class NameTamer
|
|
133
134
|
@tidy_name.ensure_safe!.safe_unescape!
|
134
135
|
end
|
135
136
|
|
137
|
+
def remove_zero_width
|
138
|
+
@tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER)
|
139
|
+
end
|
140
|
+
|
136
141
|
def tidy_spacing
|
137
142
|
@tidy_name
|
138
143
|
.space_after_comma!
|
@@ -390,7 +395,9 @@ class NameTamer
|
|
390
395
|
NONBREAKING_SPACE = "\u00a0"
|
391
396
|
ASCII_SPACE = "\u0020"
|
392
397
|
ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
|
393
|
-
SLUG_DELIMITER =
|
398
|
+
SLUG_DELIMITER = '-'
|
399
|
+
|
400
|
+
ZERO_WIDTH_FILTER = /[\u200B\u200C\u200D\u2063\uFEFF]/
|
394
401
|
|
395
402
|
# Constants for parameterizing Unicode strings for IRIs
|
396
403
|
#
|
data/lib/name-tamer/version.rb
CHANGED
data/spec/name_tamer_spec.rb
CHANGED
@@ -187,7 +187,9 @@ describe NameTamer do
|
|
187
187
|
{ n: 'John “Jonno†Johnson', t: :person, nn: 'John “Jonno” Johnson', sn: 'John Johnson', s: 'john-johnson' },
|
188
188
|
{ n: 'Pablo M Sánchez', t: :person, nn: 'Pablo M Sánchez', sn: 'Pablo Sánchez', s: 'pablo-sanchez' },
|
189
189
|
{ n: "\xc3\x28", t: :person, nn: '()', sn: '()', s: '_' }, # Invalid byte sequence in UTF-8
|
190
|
-
{ n: '’%80', t: :person, nn: '’%80', sn: '’%80', s: '’80' } # Encoding::CompatibilityError
|
190
|
+
{ n: '’%80', t: :person, nn: '’%80', sn: '’%80', s: '’80' }, # Encoding::CompatibilityError
|
191
|
+
{ n: "John Smith\u{FEFF}\u{200B}\u{200C}\u{200D}\u{2063}", t: :person,
|
192
|
+
nn: 'John Smith', sn: 'John Smith', s: 'john-smith' } # Zero-width characters
|
191
193
|
]
|
192
194
|
end
|
193
195
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-tamer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Xenapto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|