name_tamer 0.6.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module NameTamer
2
4
  class Name
3
5
  # References:
@@ -215,7 +217,7 @@ module NameTamer
215
217
  lowercase = @last_name.downcase
216
218
  uppercase = @last_name.upcase
217
219
  @last_name = name_case(lowercase) if [uppercase, lowercase].include?(@last_name)
218
- @nice_name = "#{@remainder} #{@last_name}"
220
+ @nice_name = +"#{@remainder} #{@last_name}"
219
221
  end
220
222
 
221
223
  # Conjoin compound names with non-breaking spaces
@@ -249,7 +251,7 @@ module NameTamer
249
251
  return unless first_name || last_name
250
252
 
251
253
  separator = first_name && last_name ? ' ' : ''
252
- @simple_name = "#{first_name}#{separator}#{last_name}"
254
+ @simple_name = +"#{first_name}#{separator}#{last_name}"
253
255
  end
254
256
 
255
257
  def find_first_usable_name(parts)
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  class String
4
4
  unless respond_to? :presence
@@ -23,7 +23,8 @@ class String
23
23
 
24
24
  # Ensure commas have exactly one space after them
25
25
  def space_around_comma!
26
- substitute!(/[[:space:]]*,[[:space:]]*/, ', ')
26
+ substitute!(/[[:space:]]*,[[:space:]]*/, ',
27
+ ')
27
28
  end
28
29
 
29
30
  # Change some characters embedded in words to our separator character
@@ -36,10 +37,7 @@ class String
36
37
  # This might introduce UTF-8 invalid byte sequence
37
38
  # so we take precautions
38
39
  def safe_unescape!
39
- string = URI.unescape(self)
40
- rescue Encoding::CompatibilityError # e.g. "\u2019%80"
41
- return self
42
- else
40
+ string = CGI.unescape(gsub('+', '%2B'))
43
41
  return self if self == string
44
42
  replace string
45
43
  ensure_safe!
@@ -177,51 +175,240 @@ class String
177
175
  gsub!(pattern, replacement) || self
178
176
  end
179
177
 
180
- NONBREAKING_SPACE = "\u00a0".freeze
181
- ASCII_SPACE = ' '.freeze
178
+ NONBREAKING_SPACE = "\u00a0"
179
+ ASCII_SPACE = ' '
182
180
 
183
181
  COMPOUND_NAMES = [
184
- 'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
185
- 'Baron Cohen', 'Strang Steel', 'Wingfield Digby',
186
- 'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
182
+ # Known families with a space in their surname
183
+ 'Baron Cohen',
184
+ 'Bonham Carter',
185
+ 'Holmes a Court',
186
+ 'Holmes à Court',
187
+ 'Lane Fox',
188
+ 'Lloyd Webber',
189
+ 'Pitt Rivers',
190
+ 'Sebag Montefiore',
191
+ 'Strang Steel',
192
+ 'Wedgwood Benn',
193
+ 'Wingfield Digby',
194
+ # Sometimes companies appear as people
195
+ 'Corporation Company',
196
+ 'Corporation System',
197
+ 'Incorporations Limited',
198
+ 'Service Company',
187
199
  ].freeze
188
200
 
189
201
  NAME_MODIFIERS = [
190
- 'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San',
191
- 'St[\.]?', 'Zur'
202
+ 'Al',
203
+ 'Ap',
204
+ 'Ben',
205
+ 'D[aeiou]',
206
+ 'D[ao]s',
207
+ 'De[lrn]',
208
+ 'Dell[ae]',
209
+ 'El',
210
+ 'L[eo]',
211
+ 'La',
212
+ 'Of',
213
+ 'San',
214
+ 'St[\.]?',
215
+ 'V[ao]n',
216
+ 'Zur',
192
217
  ].freeze
193
218
 
194
219
  # Transliterations (like the i18n defaults)
195
220
  # see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
196
221
  APPROXIMATIONS = {
197
- 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE',
198
- 'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I',
199
- 'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O',
200
- 'Õ' => 'O', 'Ö' => 'O', '×' => 'x', 'Ø' => 'O', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U',
201
- 'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'Th', 'ß' => 'ss', 'à' => 'a', 'á' => 'a', 'â' => 'a',
202
- 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e',
203
- 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd',
204
- 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o',
205
- 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y',
206
- 'Ā' => 'A', 'ā' => 'a', 'Ă' => 'A', 'ă' => 'a', 'Ą' => 'A', 'ą' => 'a', 'Ć' => 'C',
207
- 'ć' => 'c', 'Ĉ' => 'C', 'ĉ' => 'c', 'Ċ' => 'C', 'ċ' => 'c', 'Č' => 'C', 'č' => 'c',
208
- 'Ď' => 'D', 'ď' => 'd', 'Đ' => 'D', 'đ' => 'd', 'Ē' => 'E', 'ē' => 'e', 'Ĕ' => 'E',
209
- 'ĕ' => 'e', 'Ė' => 'E', 'ė' => 'e', 'Ę' => 'E', 'ę' => 'e', 'Ě' => 'E', 'ě' => 'e',
210
- 'Ĝ' => 'G', 'ĝ' => 'g', 'Ğ' => 'G', 'ğ' => 'g', 'Ġ' => 'G', 'ġ' => 'g', 'Ģ' => 'G',
211
- 'ģ' => 'g', 'Ĥ' => 'H', 'ĥ' => 'h', 'Ħ' => 'H', 'ħ' => 'h', 'Ĩ' => 'I', 'ĩ' => 'i',
212
- 'Ī' => 'I', 'ī' => 'i', 'Ĭ' => 'I', 'ĭ' => 'i', 'Į' => 'I', 'į' => 'i', 'İ' => 'I',
213
- 'ı' => 'i', 'IJ' => 'IJ', 'ij' => 'ij', 'Ĵ' => 'J', 'ĵ' => 'j', 'Ķ' => 'K', 'ķ' => 'k',
214
- 'ĸ' => 'k', 'Ĺ' => 'L', 'ĺ' => 'l', 'Ļ' => 'L', 'ļ' => 'l', 'Ľ' => 'L', 'ľ' => 'l',
215
- 'Ŀ' => 'L', 'ŀ' => 'l', 'Ł' => 'L', 'ł' => 'l', 'Ń' => 'N', 'ń' => 'n', 'Ņ' => 'N',
216
- 'ņ' => 'n', 'Ň' => 'N', 'ň' => 'n', 'ʼn' => "'n", 'Ŋ' => 'NG', 'ŋ' => 'ng',
217
- 'Ō' => 'O', 'ō' => 'o', 'Ŏ' => 'O', 'ŏ' => 'o', 'Ő' => 'O', 'ő' => 'o', 'Œ' => 'OE',
218
- 'œ' => 'oe', 'Ŕ' => 'R', 'ŕ' => 'r', 'Ŗ' => 'R', 'ŗ' => 'r', 'Ř' => 'R', 'ř' => 'r',
219
- 'Ś' => 'S', 'ś' => 's', 'Ŝ' => 'S', 'ŝ' => 's', 'Ş' => 'S', 'ş' => 's', 'Š' => 'S',
220
- 'š' => 's', 'Ţ' => 'T', 'ţ' => 't', 'Ť' => 'T', 'ť' => 't', 'Ŧ' => 'T', 'ŧ' => 't',
221
- 'Ũ' => 'U', 'ũ' => 'u', 'Ū' => 'U', 'ū' => 'u', 'Ŭ' => 'U', 'ŭ' => 'u', 'Ů' => 'U',
222
- 'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u', 'Ŵ' => 'W', 'ŵ' => 'w',
223
- 'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z',
224
- 'Ž' => 'Z', 'ž' => 'z'
222
+ 'İ' => 'I',
223
+ '×' => 'x',
224
+ 'ß' => 'ss',
225
+ 'À' => 'A',
226
+ 'à' => 'a',
227
+ 'Á' => 'A',
228
+ 'á' => 'a',
229
+ 'Â' => 'A',
230
+ 'â' => 'a',
231
+ 'Ã' => 'A',
232
+ 'ã' => 'a',
233
+ 'Ä' => 'A',
234
+ 'ä' => 'a',
235
+ 'Å' => 'A',
236
+ 'å' => 'a',
237
+ 'Æ' => 'AE',
238
+ 'æ' => 'ae',
239
+ 'Ç' => 'C',
240
+ 'ç' => 'c',
241
+ 'È' => 'E',
242
+ 'è' => 'e',
243
+ 'É' => 'E',
244
+ 'é' => 'e',
245
+ 'Ê' => 'E',
246
+ 'ê' => 'e',
247
+ 'Ë' => 'E',
248
+ 'ë' => 'e',
249
+ 'Ì' => 'I',
250
+ 'ì' => 'i',
251
+ 'Í' => 'I',
252
+ 'í' => 'i',
253
+ 'Î' => 'I',
254
+ 'î' => 'i',
255
+ 'Ï' => 'I',
256
+ 'ï' => 'i',
257
+ 'Ð' => 'D',
258
+ 'ð' => 'd',
259
+ 'Ñ' => 'N',
260
+ 'ñ' => 'n',
261
+ 'Ò' => 'O',
262
+ 'ò' => 'o',
263
+ 'Ó' => 'O',
264
+ 'ó' => 'o',
265
+ 'Ô' => 'O',
266
+ 'ô' => 'o',
267
+ 'Õ' => 'O',
268
+ 'õ' => 'o',
269
+ 'Ö' => 'O',
270
+ 'ö' => 'o',
271
+ 'Ø' => 'O',
272
+ 'ø' => 'o',
273
+ 'Ù' => 'U',
274
+ 'ù' => 'u',
275
+ 'Ú' => 'U',
276
+ 'ú' => 'u',
277
+ 'Û' => 'U',
278
+ 'û' => 'u',
279
+ 'Ü' => 'U',
280
+ 'ü' => 'u',
281
+ 'Ý' => 'Y',
282
+ 'ý' => 'y',
283
+ 'Þ' => 'Th',
284
+ 'þ' => 'th',
285
+ 'ÿ' => 'y',
286
+ 'Ÿ' => 'Y',
287
+ 'Ā' => 'A',
288
+ 'ā' => 'a',
289
+ 'Ă' => 'A',
290
+ 'ă' => 'a',
291
+ 'Ą' => 'A',
292
+ 'ą' => 'a',
293
+ 'Ć' => 'C',
294
+ 'ć' => 'c',
295
+ 'Ĉ' => 'C',
296
+ 'ĉ' => 'c',
297
+ 'Ċ' => 'C',
298
+ 'ċ' => 'c',
299
+ 'Č' => 'C',
300
+ 'č' => 'c',
301
+ 'Ď' => 'D',
302
+ 'ď' => 'd',
303
+ 'Đ' => 'D',
304
+ 'đ' => 'd',
305
+ 'Ē' => 'E',
306
+ 'ē' => 'e',
307
+ 'Ĕ' => 'E',
308
+ 'ĕ' => 'e',
309
+ 'Ė' => 'E',
310
+ 'ė' => 'e',
311
+ 'Ę' => 'E',
312
+ 'ę' => 'e',
313
+ 'Ě' => 'E',
314
+ 'ě' => 'e',
315
+ 'Ĝ' => 'G',
316
+ 'ĝ' => 'g',
317
+ 'Ğ' => 'G',
318
+ 'ğ' => 'g',
319
+ 'Ġ' => 'G',
320
+ 'ġ' => 'g',
321
+ 'Ģ' => 'G',
322
+ 'ģ' => 'g',
323
+ 'Ĥ' => 'H',
324
+ 'ĥ' => 'h',
325
+ 'Ħ' => 'H',
326
+ 'ħ' => 'h',
327
+ 'Ĩ' => 'I',
328
+ 'ĩ' => 'i',
329
+ 'Ī' => 'I',
330
+ 'ī' => 'i',
331
+ 'Ĭ' => 'I',
332
+ 'ĭ' => 'i',
333
+ 'Į' => 'I',
334
+ 'į' => 'i',
335
+ 'ı' => 'i',
336
+ 'IJ' => 'IJ',
337
+ 'ij' => 'ij',
338
+ 'Ĵ' => 'J',
339
+ 'ĵ' => 'j',
340
+ 'Ķ' => 'K',
341
+ 'ķ' => 'k',
342
+ 'ĸ' => 'k',
343
+ 'Ĺ' => 'L',
344
+ 'ĺ' => 'l',
345
+ 'Ļ' => 'L',
346
+ 'ļ' => 'l',
347
+ 'Ľ' => 'L',
348
+ 'ľ' => 'l',
349
+ 'Ŀ' => 'L',
350
+ 'ŀ' => 'l',
351
+ 'Ł' => 'L',
352
+ 'ł' => 'l',
353
+ 'Ń' => 'N',
354
+ 'ń' => 'n',
355
+ 'Ņ' => 'N',
356
+ 'ņ' => 'n',
357
+ 'Ň' => 'N',
358
+ 'ň' => 'n',
359
+ 'ʼn' => "'n",
360
+ 'Ŋ' => 'NG',
361
+ 'ŋ' => 'ng',
362
+ 'Ō' => 'O',
363
+ 'ō' => 'o',
364
+ 'Ŏ' => 'O',
365
+ 'ŏ' => 'o',
366
+ 'Ő' => 'O',
367
+ 'ő' => 'o',
368
+ 'Œ' => 'OE',
369
+ 'œ' => 'oe',
370
+ 'Ŕ' => 'R',
371
+ 'ŕ' => 'r',
372
+ 'Ŗ' => 'R',
373
+ 'ŗ' => 'r',
374
+ 'Ř' => 'R',
375
+ 'ř' => 'r',
376
+ 'Ś' => 'S',
377
+ 'ś' => 's',
378
+ 'Ŝ' => 'S',
379
+ 'ŝ' => 's',
380
+ 'Ş' => 'S',
381
+ 'ş' => 's',
382
+ 'Š' => 'S',
383
+ 'š' => 's',
384
+ 'Ţ' => 'T',
385
+ 'ţ' => 't',
386
+ 'Ť' => 'T',
387
+ 'ť' => 't',
388
+ 'Ŧ' => 'T',
389
+ 'ŧ' => 't',
390
+ 'Ũ' => 'U',
391
+ 'ũ' => 'u',
392
+ 'Ū' => 'U',
393
+ 'ū' => 'u',
394
+ 'Ŭ' => 'U',
395
+ 'ŭ' => 'u',
396
+ 'Ů' => 'U',
397
+ 'ů' => 'u',
398
+ 'Ű' => 'U',
399
+ 'ű' => 'u',
400
+ 'Ų' => 'U',
401
+ 'ų' => 'u',
402
+ 'Ŵ' => 'W',
403
+ 'ŵ' => 'w',
404
+ 'Ŷ' => 'Y',
405
+ 'ŷ' => 'y',
406
+ 'Ź' => 'Z',
407
+ 'ź' => 'z',
408
+ 'Ż' => 'Z',
409
+ 'ż' => 'z',
410
+ 'ž' => 'z',
411
+ 'Ž' => 'Z',
225
412
  }.freeze
226
413
 
227
414
  # When strings are mistakenly encoded as single-byte character sets, instead
@@ -229,34 +416,130 @@ class String
229
416
  # and fix
230
417
  # Useful table here http://www.i18nqa.com/debug/utf8-debug.html
231
418
  BAD_ENCODING = {
232
- '€' => '', '‚' => '‚', 'Æ’' => 'ƒ', '„' => '„', '…' => '…',
233
- '†' => '', '‡' => '‡', 'ˆ' => 'ˆ', '‰' => '‰', 'Å ' => 'Š',
234
- '‹' => '', 'Å’' => 'Œ', 'Ž' => 'Ž', '‘' => '‘', '’' => '’',
235
- '“' => '',
419
+ "\xC3\x8D" => 'Í',
420
+ "\xC3\x8F" => 'Ï',
421
+ "\xC3\x90" => 'Ð',
422
+ "\xC3\x9D" => 'Ý',
423
+ ' ' => ' ',
424
+ '¡' => '¡',
425
+ '¢' => '¢',
426
+ '£' => '£',
427
+ '¤' => '¤',
428
+ 'Â¥' => '¥',
429
+ '¦' => '¦',
430
+ '§' => '§',
431
+ '¨' => '¨',
432
+ '©' => '©',
433
+ 'ª' => 'ª',
434
+ '«' => '«',
435
+ '¬' => '¬',
436
+ '­' => '­',
437
+ '®' => '®',
438
+ '¯' => '¯',
439
+ '°' => '°',
440
+ '±' => '±',
441
+ '²' => '²',
442
+ '³' => '³',
443
+ '´' => '´',
444
+ 'µ' => 'µ',
445
+ '¶' => '¶',
446
+ '·' => '·',
447
+ '¸' => '¸',
448
+ '¹' => '¹',
449
+ 'º' => 'º',
450
+ '»' => '»',
451
+ '¼' => '¼',
452
+ '½' => '½',
453
+ '¾' => '¾',
454
+ '¿' => '¿',
455
+ '€' => '€',
456
+ 'â„¢' => '™',
236
457
  '”' => '”', # Note the invisible Ux009D in the key
458
+ '†' => '†',
459
+ '‡' => '‡',
460
+ '•' => '•',
461
+ '…' => '…',
462
+ '‰' => '‰',
237
463
  '′' => '′', # Manually added. Some seem to use this instead of Ux2019
238
- '•' => '', '–' => '–', '—' => '—',
239
- 'Ëœ' => '˜', 'â„¢' => '™', 'Å¡' => 'š', '›' => '›', 'Å“' => 'œ',
240
- 'ž' => 'ž', 'Ÿ' => 'Ÿ', ' ' => ' ', '¡' => '¡', '¢' => '¢',
241
- '£' => '£', '¤' => '¤', 'Â¥' => '¥', '¦' => '¦', '§' => '§',
242
- '¨' => '¨', '©' => '©', 'ª' => 'ª', '«' => '«', '¬' => '¬',
243
- '­' => '­', '®' => '®', '¯' => '¯', '°' => '°', '±' => '±',
244
- '²' => '²', '³' => '³', '´' => '´', 'µ' => 'µ', '¶' => '¶',
245
- '·' => '·', '¸' => '¸', '¹' => '¹', 'º' => 'º', '»' => '»',
246
- '¼' => '¼', '½' => '½', '¾' => '¾', '¿' => '¿', 'À' => 'À',
247
- 'Ã�' => 'Á', 'Â' => 'Â', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Ã…' => 'Å',
248
- 'Æ' => 'Æ', 'Ç' => 'Ç', 'È' => 'È', 'É' => 'É', 'Ê' => 'Ê',
249
- 'Ë' => 'Ë', 'ÃŒ' => 'Ì', "\xC3\x8D" => 'Í', 'ÃŽ' => 'Î', "\xC3\x8F" => 'Ï',
250
- "\xC3\x90" => 'Ð', 'Ñ' => 'Ñ', 'Ã’' => 'Ò', 'Ó' => 'Ó', 'Ô' => 'Ô',
251
- 'Õ' => 'Õ', 'Ö' => 'Ö', '×' => '×', 'Ø' => 'Ø', 'Ù' => 'Ù',
252
- 'Ú' => 'Ú', 'Û' => 'Û', 'Ãœ' => 'Ü', "\xC3\x9D" => 'Ý', 'Þ' => 'Þ',
253
- 'ß' => 'ß', 'à' => 'à', 'á' => 'á', 'â' => 'â', 'ã' => 'ã',
254
- 'ä' => 'ä', 'Ã¥' => 'å', 'æ' => 'æ', 'ç' => 'ç', 'è' => 'è',
255
- 'é' => 'é', 'ê' => 'ê', 'ë' => 'ë', 'ì' => 'ì', 'í' => 'í',
256
- 'î' => 'î', 'ï' => 'ï', 'ð' => 'ð', 'ñ' => 'ñ', 'ò' => 'ò',
257
- 'ó' => 'ó', 'ô' => 'ô', 'õ' => 'õ', 'ö' => 'ö', '÷' => '÷',
258
- 'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü',
259
- 'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ',
464
+ '‹' => '',
465
+ '›' => '›',
466
+ '“' => '',
467
+ '‚' => '',
468
+ '„' => '',
469
+ '‘' => '',
470
+ '–' => '',
471
+ '—' => '',
472
+ '’' => '',
473
+  ' => 'à',
474
+ 'á' => 'á',
475
+ 'â' => 'â',
476
+ 'ã' => 'ã',
477
+ 'ä' => 'ä',
478
+ 'Ã¥' => 'å',
479
+ 'æ' => 'æ',
480
+ 'ç' => 'ç',
481
+ 'è' => 'è',
482
+ 'é' => 'é',
483
+ 'ê' => 'ê',
484
+ 'ë' => 'ë',
485
+ 'ì' => 'ì',
486
+ 'í' => 'í',
487
+ 'î' => 'î',
488
+ 'ï' => 'ï',
489
+ 'ð' => 'ð',
490
+ 'ñ' => 'ñ',
491
+ 'ò' => 'ò',
492
+ 'ó' => 'ó',
493
+ 'ô' => 'ô',
494
+ 'õ' => 'õ',
495
+ 'ö' => 'ö',
496
+ '÷' => '÷',
497
+ 'ø' => 'ø',
498
+ 'ù' => 'ù',
499
+ 'ú' => 'ú',
500
+ 'û' => 'û',
501
+ 'ü' => 'ü',
502
+ 'ý' => 'ý',
503
+ 'þ' => 'þ',
504
+ 'ÿ' => 'ÿ',
505
+ 'ß' => 'ß',
506
+ 'ÃŒ' => 'Ì',
507
+ 'Ãœ' => 'Ü',
508
+ 'Ê' => 'Ê',
509
+ 'Ú' => 'Ú',
510
+ 'ÃŽ' => 'Î',
511
+ 'Þ' => 'Þ',
512
+ 'Ã' => 'Ã',
513
+ 'È' => 'È',
514
+ 'Ø' => 'Ø',
515
+ 'Ö' => 'Ö',
516
+ '×' => '×',
517
+ 'Ñ' => 'Ñ',
518
+ 'Ã’' => 'Ò',
519
+ 'Â' => 'Â',
520
+ 'Ó' => 'Ó',
521
+ 'Ô' => 'Ô',
522
+ 'Ä' => 'Ä',
523
+ 'Æ' => 'Æ',
524
+ 'Ç' => 'Ç',
525
+ 'Õ' => 'Õ',
526
+ 'Ã…' => 'Å',
527
+ 'É' => 'É',
528
+ 'Ë' => 'Ë',
529
+ 'Û' => 'Û',
530
+ 'À' => 'À',
531
+ 'Ù' => 'Ù',
532
+ 'Ã�' => 'Á',
533
+ 'Å ' => 'Š',
534
+ 'Å¡' => 'š',
535
+ 'Ÿ' => 'Ÿ',
536
+ 'Ž' => 'Ž',
537
+ 'ž' => 'ž',
538
+ 'Å’' => 'Œ',
539
+ 'Å“' => 'œ',
540
+ 'Æ’' => 'ƒ',
541
+ 'Ëœ' => '˜',
542
+ 'ˆ' => 'ˆ',
260
543
  "\x00" => '' # Manually added to avoid Bad Argument exception
261
544
  }.freeze
262
545