name-tamer 0.2.8 → 0.2.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 584408a915c3b55c01d600ae7cee7dad1585b7d9
4
- data.tar.gz: aeec9318693df40e29d3444e78d57262aa749711
3
+ metadata.gz: a7bc57d8799f60a21ac3da86db62f28cc7decbdf
4
+ data.tar.gz: 05a8bcf92f7765e60df2b437d4526c028ea41916
5
5
  SHA512:
6
- metadata.gz: cb502da77de553dd0e451f8829770851de8bf7a6f180be99361c05f50278cd773fc0d347fbfa65969131b5f5e9374defacee2677d39a03372f8a18ec8bc4b589
7
- data.tar.gz: 4381197a9a181b4c4a135e53c01576b8edd4068c7e9a4517796124814ba0ff6e3adf2d3fdc17d20eab6a010f8d332a5de07fbd91af5abcf8fed0b3fe9ad78aa2
6
+ metadata.gz: b86e1afe2021fff0ec57e1dc985097d48715e20cee4e0b00ee06243e3e8086b55d34ee8354fc77f6d608b721f0ab149ebc0247987b19aaeca83353b37a64ef3a
7
+ data.tar.gz: 79dfd420738cbfadd96b2382d703cd9bdf7f688b55bb97535560679cc8ee8580087bd12c84c358eedba1e7da57180edbe9759644e35d256949bbf5e2d4ee8b0c
data/lib/name-tamer.rb CHANGED
@@ -48,6 +48,7 @@ class NameTamer
48
48
  @tidy_name = name.dup # Start with the name we've received
49
49
 
50
50
  unescape # Unescape percent-encoded characters and fix UTF-8 encoding
51
+ remove_zero_width # remove zero-width characters
51
52
  tidy_spacing # " John Smith " -> "John Smith"
52
53
  fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
53
54
  consolidate_initials # "I. B. M." -> "I.B.M."
@@ -133,6 +134,10 @@ class NameTamer
133
134
  @tidy_name.ensure_safe!.safe_unescape!
134
135
  end
135
136
 
137
+ def remove_zero_width
138
+ @tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER)
139
+ end
140
+
136
141
  def tidy_spacing
137
142
  @tidy_name
138
143
  .space_after_comma!
@@ -390,7 +395,9 @@ class NameTamer
390
395
  NONBREAKING_SPACE = "\u00a0"
391
396
  ASCII_SPACE = "\u0020"
392
397
  ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
393
- SLUG_DELIMITER = '-'
398
+ SLUG_DELIMITER = '-'
399
+
400
+ ZERO_WIDTH_FILTER = /[\u200B\u200C\u200D\u2063\uFEFF]/
394
401
 
395
402
  # Constants for parameterizing Unicode strings for IRIs
396
403
  #
@@ -1,3 +1,3 @@
1
1
  class NameTamer
2
- VERSION = '0.2.8'
2
+ VERSION = '0.2.9'
3
3
  end
@@ -187,7 +187,9 @@ describe NameTamer do
187
187
  { n: 'John “Jonno” Johnson', t: :person, nn: 'John “Jonno” Johnson', sn: 'John Johnson', s: 'john-johnson' },
188
188
  { n: 'Pablo M Sánchez', t: :person, nn: 'Pablo M Sánchez', sn: 'Pablo Sánchez', s: 'pablo-sanchez' },
189
189
  { n: "\xc3\x28", t: :person, nn: '()', sn: '()', s: '_' }, # Invalid byte sequence in UTF-8
190
- { n: '’%80', t: :person, nn: '’%80', sn: '’%80', s: '’80' } # Encoding::CompatibilityError
190
+ { n: '’%80', t: :person, nn: '’%80', sn: '’%80', s: '’80' }, # Encoding::CompatibilityError
191
+ { n: "John Smith\u{FEFF}\u{200B}\u{200C}\u{200D}\u{2063}", t: :person,
192
+ nn: 'John Smith', sn: 'John Smith', s: 'john-smith' } # Zero-width characters
191
193
  ]
192
194
  end
193
195
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name-tamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xenapto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-12 00:00:00.000000000 Z
11
+ date: 2015-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler