name-tamer 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 584408a915c3b55c01d600ae7cee7dad1585b7d9
4
- data.tar.gz: aeec9318693df40e29d3444e78d57262aa749711
3
+ metadata.gz: a7bc57d8799f60a21ac3da86db62f28cc7decbdf
4
+ data.tar.gz: 05a8bcf92f7765e60df2b437d4526c028ea41916
5
5
  SHA512:
6
- metadata.gz: cb502da77de553dd0e451f8829770851de8bf7a6f180be99361c05f50278cd773fc0d347fbfa65969131b5f5e9374defacee2677d39a03372f8a18ec8bc4b589
7
- data.tar.gz: 4381197a9a181b4c4a135e53c01576b8edd4068c7e9a4517796124814ba0ff6e3adf2d3fdc17d20eab6a010f8d332a5de07fbd91af5abcf8fed0b3fe9ad78aa2
6
+ metadata.gz: b86e1afe2021fff0ec57e1dc985097d48715e20cee4e0b00ee06243e3e8086b55d34ee8354fc77f6d608b721f0ab149ebc0247987b19aaeca83353b37a64ef3a
7
+ data.tar.gz: 79dfd420738cbfadd96b2382d703cd9bdf7f688b55bb97535560679cc8ee8580087bd12c84c358eedba1e7da57180edbe9759644e35d256949bbf5e2d4ee8b0c
data/lib/name-tamer.rb CHANGED
@@ -48,6 +48,7 @@ class NameTamer
48
48
  @tidy_name = name.dup # Start with the name we've received
49
49
 
50
50
  unescape # Unescape percent-encoded characters and fix UTF-8 encoding
51
+ remove_zero_width # remove zero-width characters
51
52
  tidy_spacing # " John Smith " -> "John Smith"
52
53
  fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
53
54
  consolidate_initials # "I. B. M." -> "I.B.M."
@@ -133,6 +134,10 @@ class NameTamer
133
134
  @tidy_name.ensure_safe!.safe_unescape!
134
135
  end
135
136
 
137
+ def remove_zero_width
138
+ @tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER)
139
+ end
140
+
136
141
  def tidy_spacing
137
142
  @tidy_name
138
143
  .space_after_comma!
@@ -390,7 +395,9 @@ class NameTamer
390
395
  NONBREAKING_SPACE = "\u00a0"
391
396
  ASCII_SPACE = "\u0020"
392
397
  ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
393
- SLUG_DELIMITER = '-'
398
+ SLUG_DELIMITER = '-'
399
+
400
+ ZERO_WIDTH_FILTER = /[\u200B\u200C\u200D\u2063\uFEFF]/
394
401
 
395
402
  # Constants for parameterizing Unicode strings for IRIs
396
403
  #
@@ -1,3 +1,3 @@
1
1
  class NameTamer
2
- VERSION = '0.2.8'
2
+ VERSION = '0.2.9'
3
3
  end
@@ -187,7 +187,9 @@ describe NameTamer do
187
187
  { n: 'John “Jonno” Johnson', t: :person, nn: 'John “Jonno” Johnson', sn: 'John Johnson', s: 'john-johnson' },
188
188
  { n: 'Pablo M Sánchez', t: :person, nn: 'Pablo M Sánchez', sn: 'Pablo Sánchez', s: 'pablo-sanchez' },
189
189
  { n: "\xc3\x28", t: :person, nn: '()', sn: '()', s: '_' }, # Invalid byte sequence in UTF-8
190
- { n: '’%80', t: :person, nn: '’%80', sn: '’%80', s: '’80' } # Encoding::CompatibilityError
190
+ { n: '’%80', t: :person, nn: '’%80', sn: '’%80', s: '’80' }, # Encoding::CompatibilityError
191
+ { n: "John Smith\u{FEFF}\u{200B}\u{200C}\u{200D}\u{2063}", t: :person,
192
+ nn: 'John Smith', sn: 'John Smith', s: 'john-smith' } # Zero-width characters
191
193
  ]
192
194
  end
193
195
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name-tamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xenapto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-12 00:00:00.000000000 Z
11
+ date: 2015-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler