string_utils 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/Gemfile.lock +1 -1
- data/lib/string_utils.rb +17 -1
- data/lib/string_utils/transliteration.rb +89 -0
- data/lib/string_utils/version.rb +1 -1
- data/test/urlify_spec.rb +19 -0
- metadata +5 -3
data/Gemfile.lock
CHANGED
data/lib/string_utils.rb
CHANGED
@@ -4,11 +4,14 @@ if RUBY_VERSION < '1.9'
|
|
4
4
|
require 'active_support/core_ext/string/multibyte'
|
5
5
|
end
|
6
6
|
|
7
|
+
require 'string_utils/transliteration'
|
7
8
|
|
8
|
-
|
9
|
+
|
10
|
+
# StringUtils is a library that provides various handy string manipulation methods
|
9
11
|
# Example usage:
|
10
12
|
# * StringUtils.truncate("hello world", 10, "...") #=> "hello..."
|
11
13
|
# * StringUtils.normalize_name "\302\240 Gran Via/Avda.de Asturias " #=> :Gran Via / Avda. de Asturias"
|
14
|
+
# * StringUtils.urlify("waßer") #=> "wasser"
|
12
15
|
module StringUtils
|
13
16
|
extend self
|
14
17
|
|
@@ -19,6 +22,19 @@ module StringUtils
|
|
19
22
|
WHITESPACES = /#{WHITESPACE_MATCHER}+/
|
20
23
|
|
21
24
|
|
25
|
+
# Converts a string to a nicely readable URL
|
26
|
+
# opts:
|
27
|
+
# :default_replacement -- string to use for unknown characters (Default: "")
|
28
|
+
# :whitespace_replacement -- string to use to replace whitespace+ (Default: "-")
|
29
|
+
def urlify(string, opts = {})
|
30
|
+
opts = {:whitespace_replacement => '-', :default_replacement => ""}.merge(opts)
|
31
|
+
string = string.gsub(WHITESPACES, opts[:whitespace_replacement])
|
32
|
+
string.strip!
|
33
|
+
string.gsub!(/[^\x00-\x7f]/u) { |char| TRANSLITERATIONS[char] || opts[:default_replacement] }
|
34
|
+
string.gsub!(/[^a-z0-9\-+_]/, opts[:default_replacement])
|
35
|
+
string
|
36
|
+
end
|
37
|
+
|
22
38
|
# Normalizes whitespace
|
23
39
|
# "a , a" => "a, a"
|
24
40
|
# "a ,a" => "a, a"
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module StringUtils
|
2
|
+
# Based on transliteration table from i18n v0.5.0
|
3
|
+
TRANSLITERATIONS = {
|
4
|
+
# Latin
|
5
|
+
"À" =>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE",
|
6
|
+
"Ç" =>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I",
|
7
|
+
"Î" =>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O",
|
8
|
+
"Õ" =>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U",
|
9
|
+
"Ü" =>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a",
|
10
|
+
"ã" =>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e",
|
11
|
+
"ê" =>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d",
|
12
|
+
"ñ" =>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o",
|
13
|
+
"ù" =>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y",
|
14
|
+
"Ā" =>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C",
|
15
|
+
"ć" =>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c",
|
16
|
+
"Ď" =>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E",
|
17
|
+
"ĕ" =>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e",
|
18
|
+
"Ĝ" =>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G",
|
19
|
+
"ģ" =>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i",
|
20
|
+
"Ī" =>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I",
|
21
|
+
"ı" =>"i", "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k",
|
22
|
+
"ĸ" =>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l",
|
23
|
+
"Ŀ" =>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N",
|
24
|
+
"ņ" =>"n", "Ň"=>"N", "ň"=>"n", "ʼn"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng",
|
25
|
+
"Ō" =>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE",
|
26
|
+
"œ" =>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r",
|
27
|
+
"Ś" =>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S",
|
28
|
+
"š" =>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t",
|
29
|
+
"Ũ" =>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U",
|
30
|
+
"ů" =>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w",
|
31
|
+
"Ŷ" =>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z",
|
32
|
+
"Ž" =>"Z", "ž"=>"z",
|
33
|
+
|
34
|
+
# Cyrillic
|
35
|
+
"Ґ" =>"G", "Ё"=>"YO", "Є"=>"E", "Ї"=>"YI", "І"=>"I",
|
36
|
+
"А" =>"A", "Б"=>"B", "В"=>"V", "Г"=>"G",
|
37
|
+
"Д" =>"D", "Е"=>"E", "Ж"=>"ZH", "З"=>"Z", "И"=>"I",
|
38
|
+
"Й" =>"Y", "К"=>"K", "Л"=>"L", "М"=>"M", "Н"=>"N",
|
39
|
+
"О" =>"O", "П"=>"P", "Р"=>"R", "С"=>"S", "Т"=>"T",
|
40
|
+
"У" =>"U", "Ф"=>"F", "Х"=>"H", "Ц"=>"TS", "Ч"=>"CH",
|
41
|
+
"Ш" =>"SH", "Щ"=>"SCH", "Ъ"=>"'", "Ы"=>"Y", "Ь"=>"",
|
42
|
+
"Э" =>"E", "Ю"=>"YU", "Я"=>"YA", "і"=>"i",
|
43
|
+
"ґ" =>"g", "ё"=>"yo", "№"=>"#", "є"=>"e",
|
44
|
+
"ї" =>"yi", "а"=>"a", "б"=>"b",
|
45
|
+
"в" =>"v", "г"=>"g", "д"=>"d", "е"=>"e", "ж"=>"zh",
|
46
|
+
"з" =>"z", "и"=>"i", "й"=>"y", "к"=>"k", "л"=>"l",
|
47
|
+
"м" =>"m", "н"=>"n", "о"=>"o", "п"=>"p", "р"=>"r",
|
48
|
+
"с" =>"s", "т"=>"t", "у"=>"u", "ф"=>"f", "х"=>"h",
|
49
|
+
"ц" =>"ts", "ч"=>"ch", "ш"=>"sh", "щ"=>"sch", "ъ"=>"'",
|
50
|
+
"ы" =>"y", "ь"=>"", "э"=>"e", "ю"=>"yu", "я"=>"ya",
|
51
|
+
|
52
|
+
# Greek
|
53
|
+
'α' => 'a',
|
54
|
+
'η' => 'h',
|
55
|
+
'ν' => 'n',
|
56
|
+
'τ' => 't',
|
57
|
+
'β' => 'b',
|
58
|
+
'θ' => 'th',
|
59
|
+
'ξ' => 'x',
|
60
|
+
'υ' => 'y',
|
61
|
+
'γ' => 'g',
|
62
|
+
'ι' => 'i',
|
63
|
+
'ο' => 'o',
|
64
|
+
'φ' => 'f',
|
65
|
+
'δ' => 'd',
|
66
|
+
'κ' => 'k',
|
67
|
+
'π' => 'p',
|
68
|
+
'χ' => 'ch',
|
69
|
+
'ε' => 'e',
|
70
|
+
'λ' => 'l',
|
71
|
+
'ρ' => 'r',
|
72
|
+
'ψ' => 'ps',
|
73
|
+
'ζ' => 'z',
|
74
|
+
'μ' => 'm',
|
75
|
+
'σ' => 's',
|
76
|
+
'ω' => 'w',
|
77
|
+
'Θ' => 'Th',
|
78
|
+
'Ξ' => 'X',
|
79
|
+
'Γ' => 'G',
|
80
|
+
'Φ' => 'F',
|
81
|
+
'Δ' => 'D',
|
82
|
+
'Π' => 'P',
|
83
|
+
'Λ' => 'L',
|
84
|
+
'Ρ' => 'R',
|
85
|
+
'Ψ' => 'Ps',
|
86
|
+
'Σ' => 'S',
|
87
|
+
'Ω' => 'W'
|
88
|
+
}
|
89
|
+
end
|
data/lib/string_utils/version.rb
CHANGED
data/test/urlify_spec.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require "string_utils"
|
3
|
+
|
4
|
+
describe "StringUtils" do
|
5
|
+
describe "#urlify" do
|
6
|
+
|
7
|
+
it 'replaces known entities' do
|
8
|
+
StringUtils.urlify("tschuß").should == "tschuss"
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'uses :whitespace_replacement' do
|
12
|
+
StringUtils.urlify("a b", :whitespace_replacement => "-x-").should == "a-x-b"
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'uses :default_replacement' do
|
16
|
+
StringUtils.urlify("%", :default_replacement => "A").should == "A"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 4
|
10
|
+
version: 1.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Gleb Mazovetskiy
|
@@ -88,10 +88,12 @@ files:
|
|
88
88
|
- README
|
89
89
|
- Rakefile
|
90
90
|
- lib/string_utils.rb
|
91
|
+
- lib/string_utils/transliteration.rb
|
91
92
|
- lib/string_utils/version.rb
|
92
93
|
- string_utils.gemspec
|
93
94
|
- test/normalize_name_spec.rb
|
94
95
|
- test/truncate_spec.rb
|
96
|
+
- test/urlify_spec.rb
|
95
97
|
has_rdoc: true
|
96
98
|
homepage: http://github.com/glebm/string_utils
|
97
99
|
licenses: []
|