string_utils 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/Gemfile.lock +1 -1
- data/lib/string_utils.rb +17 -1
- data/lib/string_utils/transliteration.rb +89 -0
- data/lib/string_utils/version.rb +1 -1
- data/test/urlify_spec.rb +19 -0
- metadata +5 -3
data/Gemfile.lock
CHANGED
data/lib/string_utils.rb
CHANGED
@@ -4,11 +4,14 @@ if RUBY_VERSION < '1.9'
|
|
4
4
|
require 'active_support/core_ext/string/multibyte'
|
5
5
|
end
|
6
6
|
|
7
|
+
require 'string_utils/transliteration'
|
7
8
|
|
8
|
-
|
9
|
+
|
10
|
+
# StringUtils is a library that provides various handy string manipulation methods
|
9
11
|
# Example usage:
|
10
12
|
# * StringUtils.truncate("hello world", 10, "...") #=> "hello..."
|
11
13
|
# * StringUtils.normalize_name "\302\240 Gran Via/Avda.de Asturias " #=> :Gran Via / Avda. de Asturias"
|
14
|
+
# * StringUtils.urlify("waßer") #=> "wasser"
|
12
15
|
module StringUtils
|
13
16
|
extend self
|
14
17
|
|
@@ -19,6 +22,19 @@ module StringUtils
|
|
19
22
|
WHITESPACES = /#{WHITESPACE_MATCHER}+/
|
20
23
|
|
21
24
|
|
25
|
+
# Converts a string to a nicely readable URL
|
26
|
+
# opts:
|
27
|
+
# :default_replacement -- string to use for unknown characters (Default: "")
|
28
|
+
# :whitespace_replacement -- string to use to replace whitespace+ (Default: "-")
|
29
|
+
def urlify(string, opts = {})
|
30
|
+
opts = {:whitespace_replacement => '-', :default_replacement => ""}.merge(opts)
|
31
|
+
string = string.gsub(WHITESPACES, opts[:whitespace_replacement])
|
32
|
+
string.strip!
|
33
|
+
string.gsub!(/[^\x00-\x7f]/u) { |char| TRANSLITERATIONS[char] || opts[:default_replacement] }
|
34
|
+
string.gsub!(/[^a-z0-9\-+_]/, opts[:default_replacement])
|
35
|
+
string
|
36
|
+
end
|
37
|
+
|
22
38
|
# Normalizes whitespace
|
23
39
|
# "a , a" => "a, a"
|
24
40
|
# "a ,a" => "a, a"
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module StringUtils
|
2
|
+
# Based on transliteration table from i18n v0.5.0
|
3
|
+
TRANSLITERATIONS = {
|
4
|
+
# Latin
|
5
|
+
"À" =>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE",
|
6
|
+
"Ç" =>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I",
|
7
|
+
"Î" =>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O",
|
8
|
+
"Õ" =>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U",
|
9
|
+
"Ü" =>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a",
|
10
|
+
"ã" =>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e",
|
11
|
+
"ê" =>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d",
|
12
|
+
"ñ" =>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o",
|
13
|
+
"ù" =>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y",
|
14
|
+
"Ā" =>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C",
|
15
|
+
"ć" =>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c",
|
16
|
+
"Ď" =>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E",
|
17
|
+
"ĕ" =>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e",
|
18
|
+
"Ĝ" =>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G",
|
19
|
+
"ģ" =>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i",
|
20
|
+
"Ī" =>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I",
|
21
|
+
"ı" =>"i", "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k",
|
22
|
+
"ĸ" =>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l",
|
23
|
+
"Ŀ" =>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N",
|
24
|
+
"ņ" =>"n", "Ň"=>"N", "ň"=>"n", "ʼn"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng",
|
25
|
+
"Ō" =>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE",
|
26
|
+
"œ" =>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r",
|
27
|
+
"Ś" =>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S",
|
28
|
+
"š" =>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t",
|
29
|
+
"Ũ" =>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U",
|
30
|
+
"ů" =>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w",
|
31
|
+
"Ŷ" =>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z",
|
32
|
+
"Ž" =>"Z", "ž"=>"z",
|
33
|
+
|
34
|
+
# Cyrillic
|
35
|
+
"Ґ" =>"G", "Ё"=>"YO", "Є"=>"E", "Ї"=>"YI", "І"=>"I",
|
36
|
+
"А" =>"A", "Б"=>"B", "В"=>"V", "Г"=>"G",
|
37
|
+
"Д" =>"D", "Е"=>"E", "Ж"=>"ZH", "З"=>"Z", "И"=>"I",
|
38
|
+
"Й" =>"Y", "К"=>"K", "Л"=>"L", "М"=>"M", "Н"=>"N",
|
39
|
+
"О" =>"O", "П"=>"P", "Р"=>"R", "С"=>"S", "Т"=>"T",
|
40
|
+
"У" =>"U", "Ф"=>"F", "Х"=>"H", "Ц"=>"TS", "Ч"=>"CH",
|
41
|
+
"Ш" =>"SH", "Щ"=>"SCH", "Ъ"=>"'", "Ы"=>"Y", "Ь"=>"",
|
42
|
+
"Э" =>"E", "Ю"=>"YU", "Я"=>"YA", "і"=>"i",
|
43
|
+
"ґ" =>"g", "ё"=>"yo", "№"=>"#", "є"=>"e",
|
44
|
+
"ї" =>"yi", "а"=>"a", "б"=>"b",
|
45
|
+
"в" =>"v", "г"=>"g", "д"=>"d", "е"=>"e", "ж"=>"zh",
|
46
|
+
"з" =>"z", "и"=>"i", "й"=>"y", "к"=>"k", "л"=>"l",
|
47
|
+
"м" =>"m", "н"=>"n", "о"=>"o", "п"=>"p", "р"=>"r",
|
48
|
+
"с" =>"s", "т"=>"t", "у"=>"u", "ф"=>"f", "х"=>"h",
|
49
|
+
"ц" =>"ts", "ч"=>"ch", "ш"=>"sh", "щ"=>"sch", "ъ"=>"'",
|
50
|
+
"ы" =>"y", "ь"=>"", "э"=>"e", "ю"=>"yu", "я"=>"ya",
|
51
|
+
|
52
|
+
# Greek
|
53
|
+
'α' => 'a',
|
54
|
+
'η' => 'h',
|
55
|
+
'ν' => 'n',
|
56
|
+
'τ' => 't',
|
57
|
+
'β' => 'b',
|
58
|
+
'θ' => 'th',
|
59
|
+
'ξ' => 'x',
|
60
|
+
'υ' => 'y',
|
61
|
+
'γ' => 'g',
|
62
|
+
'ι' => 'i',
|
63
|
+
'ο' => 'o',
|
64
|
+
'φ' => 'f',
|
65
|
+
'δ' => 'd',
|
66
|
+
'κ' => 'k',
|
67
|
+
'π' => 'p',
|
68
|
+
'χ' => 'ch',
|
69
|
+
'ε' => 'e',
|
70
|
+
'λ' => 'l',
|
71
|
+
'ρ' => 'r',
|
72
|
+
'ψ' => 'ps',
|
73
|
+
'ζ' => 'z',
|
74
|
+
'μ' => 'm',
|
75
|
+
'σ' => 's',
|
76
|
+
'ω' => 'w',
|
77
|
+
'Θ' => 'Th',
|
78
|
+
'Ξ' => 'X',
|
79
|
+
'Γ' => 'G',
|
80
|
+
'Φ' => 'F',
|
81
|
+
'Δ' => 'D',
|
82
|
+
'Π' => 'P',
|
83
|
+
'Λ' => 'L',
|
84
|
+
'Ρ' => 'R',
|
85
|
+
'Ψ' => 'Ps',
|
86
|
+
'Σ' => 'S',
|
87
|
+
'Ω' => 'W'
|
88
|
+
}
|
89
|
+
end
|
data/lib/string_utils/version.rb
CHANGED
data/test/urlify_spec.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require "string_utils"
|
3
|
+
|
4
|
+
describe "StringUtils" do
|
5
|
+
describe "#urlify" do
|
6
|
+
|
7
|
+
it 'replaces known entities' do
|
8
|
+
StringUtils.urlify("tschuß").should == "tschuss"
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'uses :whitespace_replacement' do
|
12
|
+
StringUtils.urlify("a b", :whitespace_replacement => "-x-").should == "a-x-b"
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'uses :default_replacement' do
|
16
|
+
StringUtils.urlify("%", :default_replacement => "A").should == "A"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 4
|
10
|
+
version: 1.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Gleb Mazovetskiy
|
@@ -88,10 +88,12 @@ files:
|
|
88
88
|
- README
|
89
89
|
- Rakefile
|
90
90
|
- lib/string_utils.rb
|
91
|
+
- lib/string_utils/transliteration.rb
|
91
92
|
- lib/string_utils/version.rb
|
92
93
|
- string_utils.gemspec
|
93
94
|
- test/normalize_name_spec.rb
|
94
95
|
- test/truncate_spec.rb
|
96
|
+
- test/urlify_spec.rb
|
95
97
|
has_rdoc: true
|
96
98
|
homepage: http://github.com/glebm/string_utils
|
97
99
|
licenses: []
|