name-tamer 0.0.6 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/name-tamer.rb +50 -8
- data/lib/name-tamer/version.rb +1 -1
- data/spec/name_tamer_spec.rb +3 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93cfcd5765b0b8ea84c41b3648bb45929a1cee52
|
4
|
+
data.tar.gz: 0a43481b5c7d7f20d85c13ce2f654e37f860fe19
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e24644fd3e9723045ada62d2a29eebfa4aaecd26f6b04c2f0eb90e2030a3a97f6ac5ff46e9f24cc9bb485178d1e12fb0eae8cd5e103cf841fe95742b3f05090b
|
7
|
+
data.tar.gz: 6812d69683174b11c3173b1a371ad3543e65ced985984a3711e41a2c852533f687777f14b97fe6e343d48fb437d0d2839ac3b5515ce28042348eae4f4187bf9b
|
data/Gemfile.lock
CHANGED
data/lib/name-tamer.rb
CHANGED
@@ -367,25 +367,34 @@ class NameTamer
|
|
367
367
|
|
368
368
|
# First we unescape any pct-encoded characters. These might turn into
|
369
369
|
# things we want to alter for the slug, like whitespace (e.g. %20)
|
370
|
-
|
370
|
+
new_string = URI.unescape(string)
|
371
371
|
|
372
372
|
# Then we change any whitespace into our separator character
|
373
|
-
|
373
|
+
new_string.gsub!(/\s+/, sep)
|
374
374
|
|
375
|
-
#
|
376
|
-
|
375
|
+
# Change any dots embedded in words to our separator character
|
376
|
+
# e.g. example.com -> example-com
|
377
|
+
new_string.gsub!(/(?<!\s)\.(?!\s)/, sep)
|
378
|
+
|
379
|
+
# Then we strip any other illegal characters out completely
|
380
|
+
new_string.gsub!(filter, '')
|
377
381
|
|
378
382
|
# Make sure separators are not where they shouldn't be
|
379
383
|
unless sep.nil? || sep.empty?
|
380
384
|
re_sep = Regexp.escape(sep)
|
381
385
|
# No more than one of the separator in a row.
|
382
|
-
|
386
|
+
new_string.gsub!(/#{re_sep}{2,}/, sep)
|
383
387
|
# Remove leading/trailing separator.
|
384
|
-
|
388
|
+
new_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
|
385
389
|
end
|
386
390
|
|
387
|
-
#
|
388
|
-
|
391
|
+
# Any characters that resemble latin characters might usefully be
|
392
|
+
# transliterated into ones that are easy to type on an anglophone
|
393
|
+
# keyboard.
|
394
|
+
new_string.gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char }
|
395
|
+
|
396
|
+
# downcase any latin characters
|
397
|
+
new_string.downcase
|
389
398
|
end
|
390
399
|
|
391
400
|
#--------------------------------------------------------
|
@@ -397,6 +406,39 @@ class NameTamer
|
|
397
406
|
ADFIX_JOINERS = "[#{ASCII_SPACE}-]"
|
398
407
|
SLUG_DELIMITER = '-'
|
399
408
|
|
409
|
+
# Transliterations (like the i18n defaults)
|
410
|
+
# see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
|
411
|
+
APPROXIMATIONS = {
|
412
|
+
"À"=>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE",
|
413
|
+
"Ç"=>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I",
|
414
|
+
"Î"=>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O",
|
415
|
+
"Õ"=>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U",
|
416
|
+
"Ü"=>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a",
|
417
|
+
"ã"=>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e",
|
418
|
+
"ê"=>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d",
|
419
|
+
"ñ"=>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o",
|
420
|
+
"ù"=>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y",
|
421
|
+
"Ā"=>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C",
|
422
|
+
"ć"=>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c",
|
423
|
+
"Ď"=>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E",
|
424
|
+
"ĕ"=>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e",
|
425
|
+
"Ĝ"=>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G",
|
426
|
+
"ģ"=>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i",
|
427
|
+
"Ī"=>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I",
|
428
|
+
"ı"=>"i", "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k",
|
429
|
+
"ĸ"=>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l",
|
430
|
+
"Ŀ"=>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N",
|
431
|
+
"ņ"=>"n", "Ň"=>"N", "ň"=>"n", "ʼn"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng",
|
432
|
+
"Ō"=>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE",
|
433
|
+
"œ"=>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r",
|
434
|
+
"Ś"=>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S",
|
435
|
+
"š"=>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t",
|
436
|
+
"Ũ"=>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U",
|
437
|
+
"ů"=>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w",
|
438
|
+
"Ŷ"=>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z",
|
439
|
+
"Ž"=>"Z", "ž"=>"z"
|
440
|
+
}
|
441
|
+
|
400
442
|
# Constants for parameterizing Unicode strings for IRIs
|
401
443
|
#
|
402
444
|
# Allowed characters in an IRI segment are defined by RFC 3987
|
data/lib/name-tamer/version.rb
CHANGED
data/spec/name_tamer_spec.rb
CHANGED
@@ -126,12 +126,15 @@ describe NameTamer do
|
|
126
126
|
{ n:'John Smith', t: :nonsense, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
127
127
|
{ n:'John Smith', t: Kernel, nn:'John Smith', sn:'John Smith', s:'john-smith' },
|
128
128
|
{ n:'Ms Jane Smith', t: :person, nn:'Jane Smith', sn:'Jane Smith', s:'jane-smith' },
|
129
|
+
{ n:'example.com', t: :organization, nn:'example.com', sn:'example.com', s:'example-com' },
|
130
|
+
{ n:'Hermann Müller', t: :person, nn: 'Hermann Müller', sn: 'Hermann Müller', s:'hermann-muller'}
|
129
131
|
]
|
130
132
|
end
|
131
133
|
|
132
134
|
it "makes a slug from #{name}" do
|
133
135
|
names.each do |name_data|
|
134
136
|
name = name_data[:n]
|
137
|
+
#-puts NameTamer[name, contact_type:name_data[:t]].simple_name # debug
|
135
138
|
NameTamer[name, contact_type:name_data[:t]].slug.should == name_data[:s]
|
136
139
|
end
|
137
140
|
end
|