name-tamer 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2706fda214e230fdf9bda63cc4424842183672e3
4
- data.tar.gz: 89f191da4268cfbca81674335e3dfe5abec626a0
3
+ metadata.gz: f8e5bd6a935818948438315fc5f7a1a2ff90e173
4
+ data.tar.gz: c6b2f73fa732939faa8e0eef6c680d852374b7c2
5
5
  SHA512:
6
- metadata.gz: cc8d7e0474059fe5e3680cc720655c303628fbb1ea25fe638e2e630582374720dd867b1b149c18990503d2c7e3d68ead42b896f41331c2fc650f13287146e438
7
- data.tar.gz: 16e081e040d86117620e5a1760e8cf49328264adf9488ab29f3f8649d4e732247f30b4ed430005bd5ba51063ce03d196728e9323e0b1b300912dd32993282de8
6
+ metadata.gz: dca80914cdbb4d1e6d254e51c2c9104ea18410424cd75663800564ac9c597cfe19c16651467cebc20883f4359b9e394ee23fbc0291cd6dad293c801db862ec84
7
+ data.tar.gz: 4cfadfe895919caf36468c5dc2c243054db0be686f8a6bb8500c69f4f7b39aeb00e9c32e6d358eb088e6e39c36ce1f9eb2d3d2ebce5806f5618aa8ee41832b75
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- name-tamer (0.1.5)
4
+ name-tamer (0.1.6)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -26,6 +26,7 @@ class NameTamer
26
26
  @nice_name = name.dup # Start with the name we've received
27
27
 
28
28
  tidy_spacing # " John Smith " -> "John Smith"
29
+ fix_encoding_errors # "René Descartes" -> "René Descartes"
29
30
  consolidate_initials # "I. B. M." -> "I.B.M."
30
31
  remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
31
32
  fixup_last_name_first # "Smith, John" -> "John Smith"
@@ -108,6 +109,10 @@ class NameTamer
108
109
  .whitespace_to!(ASCII_SPACE)
109
110
  end
110
111
 
112
+ def fix_encoding_errors
113
+ @nice_name.fix_encoding_errors!
114
+ end
115
+
111
116
  # Remove spaces from groups of initials
112
117
  def consolidate_initials
113
118
  @nice_name
@@ -1,3 +1,3 @@
1
1
  class NameTamer
2
- VERSION = '0.1.6'
2
+ VERSION = '0.1.7'
3
3
  end
@@ -51,6 +51,13 @@ class String
51
51
  self # Allows chaining
52
52
  end
53
53
 
54
+ # Strings that were wrongly encoded with single-byte encodings sometimes have
55
+ # tell-tale substrings that we can put back into the correct UTF-8 character
56
+ def fix_encoding_errors!
57
+ self.gsub!(BAD_ENCODING_PATTERNS) { |substring| BAD_ENCODING[substring] || substring }
58
+ self # Allows chaining
59
+ end
60
+
54
61
  def upcase_first_letter!
55
62
  self.gsub!(/\b\w/) { |first| first.upcase }
56
63
  self # Allows chaining
@@ -190,4 +197,37 @@ class String
190
197
  'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z',
191
198
  'Ž' => 'Z', 'ž' => 'z'
192
199
  }
200
+
201
+ # When strings are mistakenly encoded as single-byte character sets, instead
202
+ # of UTF-8, there are some distinctive character combinations that we can spot
203
+ # and fix
204
+ BAD_ENCODING = {
205
+ '€' => '€', '‚' => '‚', 'Æ’' => 'ƒ', '„' => '„', '…' => '…',
206
+ 'â€' => '†', '‡' => '‡', 'ˆ' => 'ˆ', '‰' => '‰', 'Å ' => 'Š',
207
+ '‹' => '‹', 'Å’' => 'Œ', 'Ž' => 'Ž', '‘' => '‘', '’' => '’',
208
+ '“' => '“', 'â€' => '”', '•' => '•', '–' => '–', '—' => '—',
209
+ 'Ëœ' => '˜', 'â„¢' => '™', 'Å¡' => 'š', '›' => '›', 'Å“' => 'œ',
210
+ 'ž' => 'ž', 'Ÿ' => 'Ÿ', ' ' => ' ', '¡' => '¡', '¢' => '¢',
211
+ '£' => '£', '¤' => '¤', 'Â¥' => '¥', '¦' => '¦', '§' => '§',
212
+ '¨' => '¨', '©' => '©', 'ª' => 'ª', '«' => '«', '¬' => '¬',
213
+ '­' => '­', '®' => '®', '¯' => '¯', '°' => '°', '±' => '±',
214
+ '²' => '²', '³' => '³', '´' => '´', 'µ' => 'µ', '¶' => '¶',
215
+ '·' => '·', '¸' => '¸', '¹' => '¹', 'º' => 'º', '»' => '»',
216
+ '¼' => '¼', '½' => '½', '¾' => '¾', '¿' => '¿', 'À' => 'À',
217
+ 'Ã�' => 'Á', 'Â' => 'Â', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Ã…' => 'Å',
218
+ 'Æ' => 'Æ', 'Ç' => 'Ç', 'È' => 'È', 'É' => 'É', 'Ê' => 'Ê',
219
+ 'Ë' => 'Ë', 'ÃŒ' => 'Ì', 'Ã�' => 'Í', 'ÃŽ' => 'Î', 'Ã�' => 'Ï',
220
+ 'Ã�' => 'Ð', 'Ñ' => 'Ñ', 'Ã’' => 'Ò', 'Ó' => 'Ó', 'Ô' => 'Ô',
221
+ 'Õ' => 'Õ', 'Ö' => 'Ö', '×' => '×', 'Ø' => 'Ø', 'Ù' => 'Ù',
222
+ 'Ú' => 'Ú', 'Û' => 'Û', 'Ãœ' => 'Ü', 'Ã�' => 'Ý', 'Þ' => 'Þ',
223
+ 'ß' => 'ß', 'à' => 'à', 'á' => 'á', 'â' => 'â', 'ã' => 'ã',
224
+ 'ä' => 'ä', 'Ã¥' => 'å', 'æ' => 'æ', 'ç' => 'ç', 'è' => 'è',
225
+ 'é' => 'é', 'ê' => 'ê', 'ë' => 'ë', 'ì' => 'ì', 'í' => 'í',
226
+ 'î' => 'î', 'ï' => 'ï', 'ð' => 'ð', 'ñ' => 'ñ', 'ò' => 'ò',
227
+ 'ó' => 'ó', 'ô' => 'ô', 'õ' => 'õ', 'ö' => 'ö', '÷' => '÷',
228
+ 'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü',
229
+ 'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ'
230
+ }
231
+
232
+ BAD_ENCODING_PATTERNS = /(#{BAD_ENCODING.keys.join('|')})/
193
233
  end
@@ -181,6 +181,12 @@ describe NameTamer do
181
181
  nn: 'Scout® Loyalty Optimizer',
182
182
  sn: 'Scout Loyalty Optimizer',
183
183
  s: 'scout-loyalty-optimizer'
184
+ },
185
+ { n: 'René Descartes',
186
+ t: :person,
187
+ nn: 'René Descartes',
188
+ sn: 'René Descartes',
189
+ s:'rene-descartes'
184
190
  }
185
191
  ]
186
192
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name-tamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xenapto