name_tamer 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module NameTamer
2
4
  class Name
3
5
  # References:
@@ -215,7 +217,7 @@ module NameTamer
215
217
  lowercase = @last_name.downcase
216
218
  uppercase = @last_name.upcase
217
219
  @last_name = name_case(lowercase) if [uppercase, lowercase].include?(@last_name)
218
- @nice_name = "#{@remainder} #{@last_name}"
220
+ @nice_name = +"#{@remainder} #{@last_name}"
219
221
  end
220
222
 
221
223
  # Conjoin compound names with non-breaking spaces
@@ -249,7 +251,7 @@ module NameTamer
249
251
  return unless first_name || last_name
250
252
 
251
253
  separator = first_name && last_name ? ' ' : ''
252
- @simple_name = "#{first_name}#{separator}#{last_name}"
254
+ @simple_name = +"#{first_name}#{separator}#{last_name}"
253
255
  end
254
256
 
255
257
  def find_first_usable_name(parts)
@@ -1,4 +1,4 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  class String
4
4
  unless respond_to? :presence
@@ -23,7 +23,8 @@ class String
23
23
 
24
24
  # Ensure commas have exactly one space after them
25
25
  def space_around_comma!
26
- substitute!(/[[:space:]]*,[[:space:]]*/, ', ')
26
+ substitute!(/[[:space:]]*,[[:space:]]*/, ',
27
+ ')
27
28
  end
28
29
 
29
30
  # Change some characters embedded in words to our separator character
@@ -36,10 +37,7 @@ class String
36
37
  # This might introduce UTF-8 invalid byte sequence
37
38
  # so we take precautions
38
39
  def safe_unescape!
39
- string = URI.unescape(self)
40
- rescue Encoding::CompatibilityError # e.g. "\u2019%80"
41
- return self
42
- else
40
+ string = CGI.unescape(gsub('+', '%2B'))
43
41
  return self if self == string
44
42
  replace string
45
43
  ensure_safe!
@@ -177,51 +175,240 @@ class String
177
175
  gsub!(pattern, replacement) || self
178
176
  end
179
177
 
180
- NONBREAKING_SPACE = "\u00a0".freeze
181
- ASCII_SPACE = ' '.freeze
178
+ NONBREAKING_SPACE = "\u00a0"
179
+ ASCII_SPACE = ' '
182
180
 
183
181
  COMPOUND_NAMES = [
184
- 'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
185
- 'Baron Cohen', 'Strang Steel', 'Wingfield Digby',
186
- 'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
182
+ # Known families with a space in their surname
183
+ 'Baron Cohen',
184
+ 'Bonham Carter',
185
+ 'Holmes a Court',
186
+ 'Holmes à Court',
187
+ 'Lane Fox',
188
+ 'Lloyd Webber',
189
+ 'Pitt Rivers',
190
+ 'Sebag Montefiore',
191
+ 'Strang Steel',
192
+ 'Wedgwood Benn',
193
+ 'Wingfield Digby',
194
+ # Sometimes companies appear as people
195
+ 'Corporation Company',
196
+ 'Corporation System',
197
+ 'Incorporations Limited',
198
+ 'Service Company',
187
199
  ].freeze
188
200
 
189
201
  NAME_MODIFIERS = [
190
- 'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San',
191
- 'St[\.]?', 'Zur'
202
+ 'Al',
203
+ 'Ap',
204
+ 'Ben',
205
+ 'D[aeiou]',
206
+ 'D[ao]s',
207
+ 'De[lrn]',
208
+ 'Dell[ae]',
209
+ 'El',
210
+ 'L[eo]',
211
+ 'La',
212
+ 'Of',
213
+ 'San',
214
+ 'St[\.]?',
215
+ 'V[ao]n',
216
+ 'Zur',
192
217
  ].freeze
193
218
 
194
219
  # Transliterations (like the i18n defaults)
195
220
  # see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
196
221
  APPROXIMATIONS = {
197
- 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE',
198
- 'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I',
199
- 'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O',
200
- 'Õ' => 'O', 'Ö' => 'O', '×' => 'x', 'Ø' => 'O', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U',
201
- 'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'Th', 'ß' => 'ss', 'à' => 'a', 'á' => 'a', 'â' => 'a',
202
- 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e',
203
- 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd',
204
- 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o',
205
- 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y',
206
- 'Ā' => 'A', 'ā' => 'a', 'Ă' => 'A', 'ă' => 'a', 'Ą' => 'A', 'ą' => 'a', 'Ć' => 'C',
207
- 'ć' => 'c', 'Ĉ' => 'C', 'ĉ' => 'c', 'Ċ' => 'C', 'ċ' => 'c', 'Č' => 'C', 'č' => 'c',
208
- 'Ď' => 'D', 'ď' => 'd', 'Đ' => 'D', 'đ' => 'd', 'Ē' => 'E', 'ē' => 'e', 'Ĕ' => 'E',
209
- 'ĕ' => 'e', 'Ė' => 'E', 'ė' => 'e', 'Ę' => 'E', 'ę' => 'e', 'Ě' => 'E', 'ě' => 'e',
210
- 'Ĝ' => 'G', 'ĝ' => 'g', 'Ğ' => 'G', 'ğ' => 'g', 'Ġ' => 'G', 'ġ' => 'g', 'Ģ' => 'G',
211
- 'ģ' => 'g', 'Ĥ' => 'H', 'ĥ' => 'h', 'Ħ' => 'H', 'ħ' => 'h', 'Ĩ' => 'I', 'ĩ' => 'i',
212
- 'Ī' => 'I', 'ī' => 'i', 'Ĭ' => 'I', 'ĭ' => 'i', 'Į' => 'I', 'į' => 'i', 'İ' => 'I',
213
- 'ı' => 'i', 'IJ' => 'IJ', 'ij' => 'ij', 'Ĵ' => 'J', 'ĵ' => 'j', 'Ķ' => 'K', 'ķ' => 'k',
214
- 'ĸ' => 'k', 'Ĺ' => 'L', 'ĺ' => 'l', 'Ļ' => 'L', 'ļ' => 'l', 'Ľ' => 'L', 'ľ' => 'l',
215
- 'Ŀ' => 'L', 'ŀ' => 'l', 'Ł' => 'L', 'ł' => 'l', 'Ń' => 'N', 'ń' => 'n', 'Ņ' => 'N',
216
- 'ņ' => 'n', 'Ň' => 'N', 'ň' => 'n', 'ʼn' => "'n", 'Ŋ' => 'NG', 'ŋ' => 'ng',
217
- 'Ō' => 'O', 'ō' => 'o', 'Ŏ' => 'O', 'ŏ' => 'o', 'Ő' => 'O', 'ő' => 'o', 'Œ' => 'OE',
218
- 'œ' => 'oe', 'Ŕ' => 'R', 'ŕ' => 'r', 'Ŗ' => 'R', 'ŗ' => 'r', 'Ř' => 'R', 'ř' => 'r',
219
- 'Ś' => 'S', 'ś' => 's', 'Ŝ' => 'S', 'ŝ' => 's', 'Ş' => 'S', 'ş' => 's', 'Š' => 'S',
220
- 'š' => 's', 'Ţ' => 'T', 'ţ' => 't', 'Ť' => 'T', 'ť' => 't', 'Ŧ' => 'T', 'ŧ' => 't',
221
- 'Ũ' => 'U', 'ũ' => 'u', 'Ū' => 'U', 'ū' => 'u', 'Ŭ' => 'U', 'ŭ' => 'u', 'Ů' => 'U',
222
- 'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u', 'Ŵ' => 'W', 'ŵ' => 'w',
223
- 'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z',
224
- 'Ž' => 'Z', 'ž' => 'z'
222
+ 'İ' => 'I',
223
+ '×' => 'x',
224
+ 'ß' => 'ss',
225
+ 'À' => 'A',
226
+ 'à' => 'a',
227
+ 'Á' => 'A',
228
+ 'á' => 'a',
229
+ 'Â' => 'A',
230
+ 'â' => 'a',
231
+ 'Ã' => 'A',
232
+ 'ã' => 'a',
233
+ 'Ä' => 'A',
234
+ 'ä' => 'a',
235
+ 'Å' => 'A',
236
+ 'å' => 'a',
237
+ 'Æ' => 'AE',
238
+ 'æ' => 'ae',
239
+ 'Ç' => 'C',
240
+ 'ç' => 'c',
241
+ 'È' => 'E',
242
+ 'è' => 'e',
243
+ 'É' => 'E',
244
+ 'é' => 'e',
245
+ 'Ê' => 'E',
246
+ 'ê' => 'e',
247
+ 'Ë' => 'E',
248
+ 'ë' => 'e',
249
+ 'Ì' => 'I',
250
+ 'ì' => 'i',
251
+ 'Í' => 'I',
252
+ 'í' => 'i',
253
+ 'Î' => 'I',
254
+ 'î' => 'i',
255
+ 'Ï' => 'I',
256
+ 'ï' => 'i',
257
+ 'Ð' => 'D',
258
+ 'ð' => 'd',
259
+ 'Ñ' => 'N',
260
+ 'ñ' => 'n',
261
+ 'Ò' => 'O',
262
+ 'ò' => 'o',
263
+ 'Ó' => 'O',
264
+ 'ó' => 'o',
265
+ 'Ô' => 'O',
266
+ 'ô' => 'o',
267
+ 'Õ' => 'O',
268
+ 'õ' => 'o',
269
+ 'Ö' => 'O',
270
+ 'ö' => 'o',
271
+ 'Ø' => 'O',
272
+ 'ø' => 'o',
273
+ 'Ù' => 'U',
274
+ 'ù' => 'u',
275
+ 'Ú' => 'U',
276
+ 'ú' => 'u',
277
+ 'Û' => 'U',
278
+ 'û' => 'u',
279
+ 'Ü' => 'U',
280
+ 'ü' => 'u',
281
+ 'Ý' => 'Y',
282
+ 'ý' => 'y',
283
+ 'Þ' => 'Th',
284
+ 'þ' => 'th',
285
+ 'ÿ' => 'y',
286
+ 'Ÿ' => 'Y',
287
+ 'Ā' => 'A',
288
+ 'ā' => 'a',
289
+ 'Ă' => 'A',
290
+ 'ă' => 'a',
291
+ 'Ą' => 'A',
292
+ 'ą' => 'a',
293
+ 'Ć' => 'C',
294
+ 'ć' => 'c',
295
+ 'Ĉ' => 'C',
296
+ 'ĉ' => 'c',
297
+ 'Ċ' => 'C',
298
+ 'ċ' => 'c',
299
+ 'Č' => 'C',
300
+ 'č' => 'c',
301
+ 'Ď' => 'D',
302
+ 'ď' => 'd',
303
+ 'Đ' => 'D',
304
+ 'đ' => 'd',
305
+ 'Ē' => 'E',
306
+ 'ē' => 'e',
307
+ 'Ĕ' => 'E',
308
+ 'ĕ' => 'e',
309
+ 'Ė' => 'E',
310
+ 'ė' => 'e',
311
+ 'Ę' => 'E',
312
+ 'ę' => 'e',
313
+ 'Ě' => 'E',
314
+ 'ě' => 'e',
315
+ 'Ĝ' => 'G',
316
+ 'ĝ' => 'g',
317
+ 'Ğ' => 'G',
318
+ 'ğ' => 'g',
319
+ 'Ġ' => 'G',
320
+ 'ġ' => 'g',
321
+ 'Ģ' => 'G',
322
+ 'ģ' => 'g',
323
+ 'Ĥ' => 'H',
324
+ 'ĥ' => 'h',
325
+ 'Ħ' => 'H',
326
+ 'ħ' => 'h',
327
+ 'Ĩ' => 'I',
328
+ 'ĩ' => 'i',
329
+ 'Ī' => 'I',
330
+ 'ī' => 'i',
331
+ 'Ĭ' => 'I',
332
+ 'ĭ' => 'i',
333
+ 'Į' => 'I',
334
+ 'į' => 'i',
335
+ 'ı' => 'i',
336
+ 'IJ' => 'IJ',
337
+ 'ij' => 'ij',
338
+ 'Ĵ' => 'J',
339
+ 'ĵ' => 'j',
340
+ 'Ķ' => 'K',
341
+ 'ķ' => 'k',
342
+ 'ĸ' => 'k',
343
+ 'Ĺ' => 'L',
344
+ 'ĺ' => 'l',
345
+ 'Ļ' => 'L',
346
+ 'ļ' => 'l',
347
+ 'Ľ' => 'L',
348
+ 'ľ' => 'l',
349
+ 'Ŀ' => 'L',
350
+ 'ŀ' => 'l',
351
+ 'Ł' => 'L',
352
+ 'ł' => 'l',
353
+ 'Ń' => 'N',
354
+ 'ń' => 'n',
355
+ 'Ņ' => 'N',
356
+ 'ņ' => 'n',
357
+ 'Ň' => 'N',
358
+ 'ň' => 'n',
359
+ 'ʼn' => "'n",
360
+ 'Ŋ' => 'NG',
361
+ 'ŋ' => 'ng',
362
+ 'Ō' => 'O',
363
+ 'ō' => 'o',
364
+ 'Ŏ' => 'O',
365
+ 'ŏ' => 'o',
366
+ 'Ő' => 'O',
367
+ 'ő' => 'o',
368
+ 'Œ' => 'OE',
369
+ 'œ' => 'oe',
370
+ 'Ŕ' => 'R',
371
+ 'ŕ' => 'r',
372
+ 'Ŗ' => 'R',
373
+ 'ŗ' => 'r',
374
+ 'Ř' => 'R',
375
+ 'ř' => 'r',
376
+ 'Ś' => 'S',
377
+ 'ś' => 's',
378
+ 'Ŝ' => 'S',
379
+ 'ŝ' => 's',
380
+ 'Ş' => 'S',
381
+ 'ş' => 's',
382
+ 'Š' => 'S',
383
+ 'š' => 's',
384
+ 'Ţ' => 'T',
385
+ 'ţ' => 't',
386
+ 'Ť' => 'T',
387
+ 'ť' => 't',
388
+ 'Ŧ' => 'T',
389
+ 'ŧ' => 't',
390
+ 'Ũ' => 'U',
391
+ 'ũ' => 'u',
392
+ 'Ū' => 'U',
393
+ 'ū' => 'u',
394
+ 'Ŭ' => 'U',
395
+ 'ŭ' => 'u',
396
+ 'Ů' => 'U',
397
+ 'ů' => 'u',
398
+ 'Ű' => 'U',
399
+ 'ű' => 'u',
400
+ 'Ų' => 'U',
401
+ 'ų' => 'u',
402
+ 'Ŵ' => 'W',
403
+ 'ŵ' => 'w',
404
+ 'Ŷ' => 'Y',
405
+ 'ŷ' => 'y',
406
+ 'Ź' => 'Z',
407
+ 'ź' => 'z',
408
+ 'Ż' => 'Z',
409
+ 'ż' => 'z',
410
+ 'ž' => 'z',
411
+ 'Ž' => 'Z',
225
412
  }.freeze
226
413
 
227
414
  # When strings are mistakenly encoded as single-byte character sets, instead
@@ -229,34 +416,130 @@ class String
229
416
  # and fix
230
417
  # Useful table here http://www.i18nqa.com/debug/utf8-debug.html
231
418
  BAD_ENCODING = {
232
- '€' => '', '‚' => '‚', 'Æ’' => 'ƒ', '„' => '„', '…' => '…',
233
- '†' => '', '‡' => '‡', 'ˆ' => 'ˆ', '‰' => '‰', 'Å ' => 'Š',
234
- '‹' => '', 'Å’' => 'Œ', 'Ž' => 'Ž', '‘' => '‘', '’' => '’',
235
- '“' => '',
419
+ "\xC3\x8D" => 'Í',
420
+ "\xC3\x8F" => 'Ï',
421
+ "\xC3\x90" => 'Ð',
422
+ "\xC3\x9D" => 'Ý',
423
+ ' ' => ' ',
424
+ '¡' => '¡',
425
+ '¢' => '¢',
426
+ '£' => '£',
427
+ '¤' => '¤',
428
+ 'Â¥' => '¥',
429
+ '¦' => '¦',
430
+ '§' => '§',
431
+ '¨' => '¨',
432
+ '©' => '©',
433
+ 'ª' => 'ª',
434
+ '«' => '«',
435
+ '¬' => '¬',
436
+ '­' => '­',
437
+ '®' => '®',
438
+ '¯' => '¯',
439
+ '°' => '°',
440
+ '±' => '±',
441
+ '²' => '²',
442
+ '³' => '³',
443
+ '´' => '´',
444
+ 'µ' => 'µ',
445
+ '¶' => '¶',
446
+ '·' => '·',
447
+ '¸' => '¸',
448
+ '¹' => '¹',
449
+ 'º' => 'º',
450
+ '»' => '»',
451
+ '¼' => '¼',
452
+ '½' => '½',
453
+ '¾' => '¾',
454
+ '¿' => '¿',
455
+ '€' => '€',
456
+ 'â„¢' => '™',
236
457
  '”' => '”', # Note the invisible Ux009D in the key
458
+ '†' => '†',
459
+ '‡' => '‡',
460
+ '•' => '•',
461
+ '…' => '…',
462
+ '‰' => '‰',
237
463
  '′' => '′', # Manually added. Some seem to use this instead of Ux2019
238
- '•' => '', '–' => '–', '—' => '—',
239
- 'Ëœ' => '˜', 'â„¢' => '™', 'Å¡' => 'š', '›' => '›', 'Å“' => 'œ',
240
- 'ž' => 'ž', 'Ÿ' => 'Ÿ', ' ' => ' ', '¡' => '¡', '¢' => '¢',
241
- '£' => '£', '¤' => '¤', 'Â¥' => '¥', '¦' => '¦', '§' => '§',
242
- '¨' => '¨', '©' => '©', 'ª' => 'ª', '«' => '«', '¬' => '¬',
243
- '­' => '­', '®' => '®', '¯' => '¯', '°' => '°', '±' => '±',
244
- '²' => '²', '³' => '³', '´' => '´', 'µ' => 'µ', '¶' => '¶',
245
- '·' => '·', '¸' => '¸', '¹' => '¹', 'º' => 'º', '»' => '»',
246
- '¼' => '¼', '½' => '½', '¾' => '¾', '¿' => '¿', 'À' => 'À',
247
- 'Ã�' => 'Á', 'Â' => 'Â', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Ã…' => 'Å',
248
- 'Æ' => 'Æ', 'Ç' => 'Ç', 'È' => 'È', 'É' => 'É', 'Ê' => 'Ê',
249
- 'Ë' => 'Ë', 'ÃŒ' => 'Ì', "\xC3\x8D" => 'Í', 'ÃŽ' => 'Î', "\xC3\x8F" => 'Ï',
250
- "\xC3\x90" => 'Ð', 'Ñ' => 'Ñ', 'Ã’' => 'Ò', 'Ó' => 'Ó', 'Ô' => 'Ô',
251
- 'Õ' => 'Õ', 'Ö' => 'Ö', '×' => '×', 'Ø' => 'Ø', 'Ù' => 'Ù',
252
- 'Ú' => 'Ú', 'Û' => 'Û', 'Ü' => 'Ü', "\xC3\x9D" => 'Ý', 'Þ' => 'Þ',
253
- 'ß' => 'ß', 'à' => 'à', 'á' => 'á', 'â' => 'â', 'ã' => 'ã',
254
- 'ä' => 'ä', 'Ã¥' => 'å', 'æ' => 'æ', 'ç' => 'ç', 'è' => 'è',
255
- 'é' => 'é', 'ê' => 'ê', 'ë' => 'ë', 'ì' => 'ì', 'í' => 'í',
256
- 'î' => 'î', 'ï' => 'ï', 'ð' => 'ð', 'ñ' => 'ñ', 'ò' => 'ò',
257
- 'ó' => 'ó', 'ô' => 'ô', 'õ' => 'õ', 'ö' => 'ö', '÷' => '÷',
258
- 'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü',
259
- 'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ',
464
+ '‹' => '',
465
+ '›' => '›',
466
+ '“' => '',
467
+ '‚' => '',
468
+ '„' => '',
469
+ '‘' => '',
470
+ '–' => '',
471
+ '—' => '',
472
+ '’' => '',
473
+  ' => 'à',
474
+ 'á' => 'á',
475
+ 'â' => 'â',
476
+ 'ã' => 'ã',
477
+ 'ä' => 'ä',
478
+ 'Ã¥' => 'å',
479
+ 'æ' => 'æ',
480
+ 'ç' => 'ç',
481
+ 'è' => 'è',
482
+ 'é' => 'é',
483
+ 'ê' => 'ê',
484
+ 'ë' => 'ë',
485
+ 'ì' => 'ì',
486
+ 'í' => 'í',
487
+ 'î' => 'î',
488
+ 'ï' => 'ï',
489
+ 'ð' => 'ð',
490
+ 'ñ' => 'ñ',
491
+ 'ò' => 'ò',
492
+ 'ó' => 'ó',
493
+ 'ô' => 'ô',
494
+ 'õ' => 'õ',
495
+ 'ö' => 'ö',
496
+ '÷' => '÷',
497
+ 'ø' => 'ø',
498
+ 'ù' => 'ù',
499
+ 'ú' => 'ú',
500
+ 'û' => 'û',
501
+ 'ü' => 'ü',
502
+ 'ý' => 'ý',
503
+ 'þ' => 'þ',
504
+ 'ÿ' => 'ÿ',
505
+ 'ß' => 'ß',
506
+ 'ÃŒ' => 'Ì',
507
+ 'Ü' => 'Ü',
508
+ 'Ê' => 'Ê',
509
+ 'Ú' => 'Ú',
510
+ 'ÃŽ' => 'Î',
511
+ 'Þ' => 'Þ',
512
+ 'Ã' => 'Ã',
513
+ 'È' => 'È',
514
+ 'Ø' => 'Ø',
515
+ 'Ö' => 'Ö',
516
+ '×' => '×',
517
+ 'Ñ' => 'Ñ',
518
+ 'Ã’' => 'Ò',
519
+ 'Â' => 'Â',
520
+ 'Ó' => 'Ó',
521
+ 'Ô' => 'Ô',
522
+ 'Ä' => 'Ä',
523
+ 'Æ' => 'Æ',
524
+ 'Ç' => 'Ç',
525
+ 'Õ' => 'Õ',
526
+ 'Ã…' => 'Å',
527
+ 'É' => 'É',
528
+ 'Ë' => 'Ë',
529
+ 'Û' => 'Û',
530
+ 'À' => 'À',
531
+ 'Ù' => 'Ù',
532
+ 'Ã�' => 'Á',
533
+ 'Å ' => 'Š',
534
+ 'Å¡' => 'š',
535
+ 'Ÿ' => 'Ÿ',
536
+ 'Ž' => 'Ž',
537
+ 'ž' => 'ž',
538
+ 'Å’' => 'Œ',
539
+ 'Å“' => 'œ',
540
+ 'Æ’' => 'ƒ',
541
+ 'Ëœ' => '˜',
542
+ 'ˆ' => 'ˆ',
260
543
  "\x00" => '' # Manually added to avoid Bad Argument exception
261
544
  }.freeze
262
545