name-tamer 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2706fda214e230fdf9bda63cc4424842183672e3
4
- data.tar.gz: 89f191da4268cfbca81674335e3dfe5abec626a0
3
+ metadata.gz: f8e5bd6a935818948438315fc5f7a1a2ff90e173
4
+ data.tar.gz: c6b2f73fa732939faa8e0eef6c680d852374b7c2
5
5
  SHA512:
6
- metadata.gz: cc8d7e0474059fe5e3680cc720655c303628fbb1ea25fe638e2e630582374720dd867b1b149c18990503d2c7e3d68ead42b896f41331c2fc650f13287146e438
7
- data.tar.gz: 16e081e040d86117620e5a1760e8cf49328264adf9488ab29f3f8649d4e732247f30b4ed430005bd5ba51063ce03d196728e9323e0b1b300912dd32993282de8
6
+ metadata.gz: dca80914cdbb4d1e6d254e51c2c9104ea18410424cd75663800564ac9c597cfe19c16651467cebc20883f4359b9e394ee23fbc0291cd6dad293c801db862ec84
7
+ data.tar.gz: 4cfadfe895919caf36468c5dc2c243054db0be686f8a6bb8500c69f4f7b39aeb00e9c32e6d358eb088e6e39c36ce1f9eb2d3d2ebce5806f5618aa8ee41832b75
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- name-tamer (0.1.5)
4
+ name-tamer (0.1.6)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -26,6 +26,7 @@ class NameTamer
26
26
  @nice_name = name.dup # Start with the name we've received
27
27
 
28
28
  tidy_spacing # " John Smith " -> "John Smith"
29
+ fix_encoding_errors # "René Descartes" -> "René Descartes"
29
30
  consolidate_initials # "I. B. M." -> "I.B.M."
30
31
  remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
31
32
  fixup_last_name_first # "Smith, John" -> "John Smith"
@@ -108,6 +109,10 @@ class NameTamer
108
109
  .whitespace_to!(ASCII_SPACE)
109
110
  end
110
111
 
112
+ def fix_encoding_errors
113
+ @nice_name.fix_encoding_errors!
114
+ end
115
+
111
116
  # Remove spaces from groups of initials
112
117
  def consolidate_initials
113
118
  @nice_name
@@ -1,3 +1,3 @@
1
1
  class NameTamer
2
- VERSION = '0.1.6'
2
+ VERSION = '0.1.7'
3
3
  end
@@ -51,6 +51,13 @@ class String
51
51
  self # Allows chaining
52
52
  end
53
53
 
54
+ # Strings that were wrongly encoded with single-byte encodings sometimes have
55
+ # tell-tale substrings that we can put back into the correct UTF-8 character
56
+ def fix_encoding_errors!
57
+ self.gsub!(BAD_ENCODING_PATTERNS) { |substring| BAD_ENCODING[substring] || substring }
58
+ self # Allows chaining
59
+ end
60
+
54
61
  def upcase_first_letter!
55
62
  self.gsub!(/\b\w/) { |first| first.upcase }
56
63
  self # Allows chaining
@@ -190,4 +197,37 @@ class String
190
197
  'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z',
191
198
  'Ž' => 'Z', 'ž' => 'z'
192
199
  }
200
+
201
+ # When strings are mistakenly encoded as single-byte character sets, instead
202
+ # of UTF-8, there are some distinctive character combinations that we can spot
203
+ # and fix
204
+ BAD_ENCODING = {
205
+ '€' => '€', '‚' => '‚', 'Æ’' => 'ƒ', '„' => '„', '…' => '…',
206
+ 'â€' => '†', '‡' => '‡', 'ˆ' => 'ˆ', '‰' => '‰', 'Å ' => 'Š',
207
+ '‹' => '‹', 'Å’' => 'Œ', 'Ž' => 'Ž', '‘' => '‘', '’' => '’',
208
+ '“' => '“', 'â€' => '”', '•' => '•', '–' => '–', '—' => '—',
209
+ 'Ëœ' => '˜', 'â„¢' => '™', 'Å¡' => 'š', '›' => '›', 'Å“' => 'œ',
210
+ 'ž' => 'ž', 'Ÿ' => 'Ÿ', ' ' => ' ', '¡' => '¡', '¢' => '¢',
211
+ '£' => '£', '¤' => '¤', 'Â¥' => '¥', '¦' => '¦', '§' => '§',
212
+ '¨' => '¨', '©' => '©', 'ª' => 'ª', '«' => '«', '¬' => '¬',
213
+ '­' => '­', '®' => '®', '¯' => '¯', '°' => '°', '±' => '±',
214
+ '²' => '²', '³' => '³', '´' => '´', 'µ' => 'µ', '¶' => '¶',
215
+ '·' => '·', '¸' => '¸', '¹' => '¹', 'º' => 'º', '»' => '»',
216
+ '¼' => '¼', '½' => '½', '¾' => '¾', '¿' => '¿', 'À' => 'À',
217
+ 'Ã�' => 'Á', 'Â' => 'Â', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Ã…' => 'Å',
218
+ 'Æ' => 'Æ', 'Ç' => 'Ç', 'È' => 'È', 'É' => 'É', 'Ê' => 'Ê',
219
+ 'Ë' => 'Ë', 'ÃŒ' => 'Ì', 'Ã�' => 'Í', 'ÃŽ' => 'Î', 'Ã�' => 'Ï',
220
+ 'Ã�' => 'Ð', 'Ñ' => 'Ñ', 'Ã’' => 'Ò', 'Ó' => 'Ó', 'Ô' => 'Ô',
221
+ 'Õ' => 'Õ', 'Ö' => 'Ö', '×' => '×', 'Ø' => 'Ø', 'Ù' => 'Ù',
222
+ 'Ú' => 'Ú', 'Û' => 'Û', 'Ü' => 'Ü', 'Ã�' => 'Ý', 'Þ' => 'Þ',
223
+ 'ß' => 'ß', 'à' => 'à', 'á' => 'á', 'â' => 'â', 'ã' => 'ã',
224
+ 'ä' => 'ä', 'Ã¥' => 'å', 'æ' => 'æ', 'ç' => 'ç', 'è' => 'è',
225
+ 'é' => 'é', 'ê' => 'ê', 'ë' => 'ë', 'ì' => 'ì', 'í' => 'í',
226
+ 'î' => 'î', 'ï' => 'ï', 'ð' => 'ð', 'ñ' => 'ñ', 'ò' => 'ò',
227
+ 'ó' => 'ó', 'ô' => 'ô', 'õ' => 'õ', 'ö' => 'ö', '÷' => '÷',
228
+ 'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü',
229
+ 'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ'
230
+ }
231
+
232
+ BAD_ENCODING_PATTERNS = /(#{BAD_ENCODING.keys.join('|')})/
193
233
  end
@@ -181,6 +181,12 @@ describe NameTamer do
181
181
  nn: 'Scout® Loyalty Optimizer',
182
182
  sn: 'Scout Loyalty Optimizer',
183
183
  s: 'scout-loyalty-optimizer'
184
+ },
185
+ { n: 'René Descartes',
186
+ t: :person,
187
+ nn: 'René Descartes',
188
+ sn: 'René Descartes',
189
+ s:'rene-descartes'
184
190
  }
185
191
  ]
186
192
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name-tamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xenapto