babosa 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/README.md +59 -28
- data/Rakefile +14 -8
- data/lib/babosa.rb +11 -1
- data/lib/babosa/identifier.rb +26 -16
- data/lib/babosa/transliterator/base.rb +89 -0
- data/lib/babosa/transliterator/bulgarian.rb +27 -0
- data/lib/babosa/transliterator/cyrillic.rb +111 -0
- data/lib/babosa/transliterator/danish.rb +15 -0
- data/lib/babosa/transliterator/german.rb +15 -0
- data/lib/babosa/transliterator/latin.rb +199 -0
- data/lib/babosa/transliterator/russian.rb +22 -0
- data/lib/babosa/transliterator/serbian.rb +34 -0
- data/lib/babosa/transliterator/spanish.rb +9 -0
- data/lib/babosa/transliterator/ukranian.rb +11 -0
- data/lib/babosa/utf8/dumb_proxy.rb +1 -0
- data/lib/babosa/version.rb +1 -1
- data/spec/babosa_spec.rb +131 -0
- data/spec/spec_helper.rb +33 -0
- data/spec/transliterators/base_spec.rb +16 -0
- data/spec/transliterators/bulgarian_spec.rb +20 -0
- data/spec/transliterators/danish_spec.rb +17 -0
- data/spec/transliterators/german_spec.rb +17 -0
- data/spec/transliterators/russian_spec.rb +9 -0
- data/spec/transliterators/serbian_spec.rb +25 -0
- data/spec/transliterators/spanish_spec.rb +13 -0
- data/spec/transliterators/ukranian_spec.rb +9 -0
- data/spec/utf8_proxy_spec.rb +48 -0
- metadata +63 -19
- data/lib/babosa/characters.rb +0 -80
- data/test/babosa_test.rb +0 -198
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$LOAD_PATH << File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.uniq!
|
4
|
+
|
5
|
+
if ENV["COV"]
|
6
|
+
require "simplecov"
|
7
|
+
SimpleCov.start
|
8
|
+
end
|
9
|
+
|
10
|
+
# encoding: utf-8
|
11
|
+
$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
|
12
|
+
|
13
|
+
require "rubygems"
|
14
|
+
require "babosa"
|
15
|
+
require "active_support"
|
16
|
+
|
17
|
+
shared_examples_for "a latin transliterator" do
|
18
|
+
let(:t) { described_class.instance }
|
19
|
+
|
20
|
+
it "should transliterate latin characters" do
|
21
|
+
string = (0xC0..0x17E).to_a.pack("U*")
|
22
|
+
t.transliterate(string).should match(/[\x0-\x7f]/)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
shared_examples_for "a cyrillic transliterator" do
|
27
|
+
let(:t) { described_class.instance }
|
28
|
+
|
29
|
+
it "should transliterate cyrillic characters" do
|
30
|
+
string = "Славься, Отечество наше свободное"
|
31
|
+
t.transliterate(string).should match(/[\x0-\x7f]/)
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../../spec_helper", __FILE__)
|
3
|
+
|
4
|
+
describe Babosa::Transliterator::Base do
|
5
|
+
|
6
|
+
before { @t = Babosa::Transliterator::Base.instance }
|
7
|
+
|
8
|
+
it "should transliterate 'smart' quotes" do
|
9
|
+
@t.transliterate("’").should eql("'")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should transliterate non-breaking spaces" do
|
13
|
+
@t.transliterate("\xc2\xa0").should eql(" ")
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../../spec_helper", __FILE__)
|
3
|
+
|
4
|
+
describe Babosa::Transliterator::Bulgarian do
|
5
|
+
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a cyrillic transliterator"
|
8
|
+
|
9
|
+
it "should transliterate Cyrillic characters" do
|
10
|
+
examples = {
|
11
|
+
"Ютия" => "Iutiia",
|
12
|
+
"Чушка" => "Chushka",
|
13
|
+
"кьорав" => "kiorav",
|
14
|
+
"Щъркел" => "Shturkel",
|
15
|
+
"полицай" => "policai"
|
16
|
+
}
|
17
|
+
examples.each {|k, v| t.transliterate(k).should eql(v)}
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../../spec_helper", __FILE__)
|
3
|
+
|
4
|
+
describe Babosa::Transliterator::Danish do
|
5
|
+
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a latin transliterator"
|
8
|
+
|
9
|
+
it "should transliterate various characters" do
|
10
|
+
examples = {
|
11
|
+
"Ærøskøbing" => "Aeroeskoebing",
|
12
|
+
"Årslev" => "Aarslev"
|
13
|
+
}
|
14
|
+
examples.each {|k, v| t.transliterate(k).should eql(v)}
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../../spec_helper", __FILE__)
|
3
|
+
|
4
|
+
describe Babosa::Transliterator::German do
|
5
|
+
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a latin transliterator"
|
8
|
+
|
9
|
+
it "should transliterate Eszett" do
|
10
|
+
t.transliterate("ß").should eql("ss")
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should transliterate vowels with umlauts" do
|
14
|
+
t.transliterate("üöä").should eql("ueoeae")
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../../spec_helper", __FILE__)
|
3
|
+
|
4
|
+
describe Babosa::Transliterator::Serbian do
|
5
|
+
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a latin transliterator"
|
8
|
+
it_behaves_like "a cyrillic transliterator"
|
9
|
+
|
10
|
+
it "should transliterate Latin characters" do
|
11
|
+
examples = {
|
12
|
+
"Ðorđe" => "Djordje",
|
13
|
+
"Inđija" => "Indjija",
|
14
|
+
"Četiri" => "Chetiri",
|
15
|
+
"četiri" => "chetiri",
|
16
|
+
"Škola" => "Shkola",
|
17
|
+
"škola" => "shkola",
|
18
|
+
"Ђорђе" => "Djordje",
|
19
|
+
"Инђија" => "Indjija",
|
20
|
+
"Школа" => "Shkola",
|
21
|
+
}
|
22
|
+
examples.each {|k, v| t.transliterate(k).should eql(v)}
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../../spec_helper", __FILE__)
|
3
|
+
|
4
|
+
describe Babosa::Transliterator::Spanish do
|
5
|
+
|
6
|
+
let(:t) { described_class.instance }
|
7
|
+
it_behaves_like "a latin transliterator"
|
8
|
+
|
9
|
+
it "should transliterate ñ" do
|
10
|
+
t.transliterate("ñ").should eql("ni")
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path("../spec_helper", __FILE__)
|
3
|
+
|
4
|
+
PROXIES = [Babosa::UTF8::DumbProxy]
|
5
|
+
|
6
|
+
PROXIES << Babosa::UTF8::JavaProxy if Babosa.jruby15?
|
7
|
+
|
8
|
+
begin
|
9
|
+
require "unicode"
|
10
|
+
PROXIES << Babosa::UTF8::UnicodeProxy
|
11
|
+
rescue LoadError
|
12
|
+
end
|
13
|
+
|
14
|
+
begin
|
15
|
+
require "activesupport"
|
16
|
+
PROXIES << Babosa::UTF8::ActiveSupportProxy
|
17
|
+
rescue LoadError
|
18
|
+
end
|
19
|
+
|
20
|
+
PROXIES.each do |proxy|
|
21
|
+
|
22
|
+
describe proxy do
|
23
|
+
|
24
|
+
describe "#normalize_utf8" do
|
25
|
+
it "should normalize to canonical composed" do
|
26
|
+
# ÅÉÎØÜ
|
27
|
+
uncomposed_bytes = [65, 204, 138, 69, 204, 129, 73, 204, 130, 195, 152, 85, 204, 136]
|
28
|
+
composed_bytes = [195, 133, 195, 137, 195, 142, 195, 152, 195, 156]
|
29
|
+
uncomposed_string = uncomposed_bytes.pack("C*").unpack("U*").pack("U*")
|
30
|
+
composed_string = composed_bytes.pack("C*").unpack("U*").pack("U*")
|
31
|
+
proxy.normalize_utf8(uncomposed_string).unpack("C*").should eql(composed_bytes)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe "#upcase" do
|
36
|
+
it "should upcase the string" do
|
37
|
+
proxy.upcase("åéîøü").should eql("ÅÉÎØÜ")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "#downcase" do
|
42
|
+
it "should downcase the string" do
|
43
|
+
proxy.downcase("ÅÉÎØÜ").should eql("åéîøü")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
metadata
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: babosa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 2
|
8
|
-
- 2
|
9
|
-
version: 0.2.2
|
4
|
+
prerelease:
|
5
|
+
version: 0.3.0
|
10
6
|
platform: ruby
|
11
7
|
authors:
|
12
8
|
- Norman Clarke
|
@@ -14,10 +10,42 @@ autorequire:
|
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
12
|
|
17
|
-
date: 2011-
|
13
|
+
date: 2011-03-23 00:00:00 -03:00
|
18
14
|
default_executable:
|
19
|
-
dependencies:
|
20
|
-
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: activesupport
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 2.3.0
|
25
|
+
type: :development
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ~>
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 2.5.0
|
36
|
+
type: :development
|
37
|
+
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: simplecov
|
40
|
+
prerelease: false
|
41
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: "0"
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id003
|
21
49
|
description: " A library for creating slugs. Babosa an extraction and improvement of the\n string code from FriendlyId, intended to help developers create similar\n libraries or plugins.\n"
|
22
50
|
email: norman@njclarke.com
|
23
51
|
executables: []
|
@@ -27,8 +55,17 @@ extensions: []
|
|
27
55
|
extra_rdoc_files: []
|
28
56
|
|
29
57
|
files:
|
30
|
-
- lib/babosa/characters.rb
|
31
58
|
- lib/babosa/identifier.rb
|
59
|
+
- lib/babosa/transliterator/base.rb
|
60
|
+
- lib/babosa/transliterator/bulgarian.rb
|
61
|
+
- lib/babosa/transliterator/cyrillic.rb
|
62
|
+
- lib/babosa/transliterator/danish.rb
|
63
|
+
- lib/babosa/transliterator/german.rb
|
64
|
+
- lib/babosa/transliterator/latin.rb
|
65
|
+
- lib/babosa/transliterator/russian.rb
|
66
|
+
- lib/babosa/transliterator/serbian.rb
|
67
|
+
- lib/babosa/transliterator/spanish.rb
|
68
|
+
- lib/babosa/transliterator/ukranian.rb
|
32
69
|
- lib/babosa/utf8/active_support_proxy.rb
|
33
70
|
- lib/babosa/utf8/dumb_proxy.rb
|
34
71
|
- lib/babosa/utf8/java_proxy.rb
|
@@ -41,8 +78,19 @@ files:
|
|
41
78
|
- MIT-LICENSE
|
42
79
|
- Rakefile
|
43
80
|
- init.rb
|
44
|
-
-
|
45
|
-
|
81
|
+
- spec/babosa_spec.rb
|
82
|
+
- spec/spec_helper.rb
|
83
|
+
- spec/transliterators/base_spec.rb
|
84
|
+
- spec/transliterators/bulgarian_spec.rb
|
85
|
+
- spec/transliterators/danish_spec.rb
|
86
|
+
- spec/transliterators/german_spec.rb
|
87
|
+
- spec/transliterators/russian_spec.rb
|
88
|
+
- spec/transliterators/serbian_spec.rb
|
89
|
+
- spec/transliterators/spanish_spec.rb
|
90
|
+
- spec/transliterators/ukranian_spec.rb
|
91
|
+
- spec/utf8_proxy_spec.rb
|
92
|
+
- .gemtest
|
93
|
+
has_rdoc: true
|
46
94
|
homepage: http://norman.github.com/babosa
|
47
95
|
licenses: []
|
48
96
|
|
@@ -56,23 +104,19 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
56
104
|
requirements:
|
57
105
|
- - ">="
|
58
106
|
- !ruby/object:Gem::Version
|
59
|
-
segments:
|
60
|
-
- 0
|
61
107
|
version: "0"
|
62
108
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
109
|
none: false
|
64
110
|
requirements:
|
65
111
|
- - ">="
|
66
112
|
- !ruby/object:Gem::Version
|
67
|
-
segments:
|
68
|
-
- 0
|
69
113
|
version: "0"
|
70
114
|
requirements: []
|
71
115
|
|
72
116
|
rubyforge_project: "[none]"
|
73
|
-
rubygems_version: 1.
|
117
|
+
rubygems_version: 1.6.1
|
74
118
|
signing_key:
|
75
119
|
specification_version: 3
|
76
120
|
summary: A library for creating slugs.
|
77
|
-
test_files:
|
78
|
-
|
121
|
+
test_files: []
|
122
|
+
|
data/lib/babosa/characters.rb
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
module Babosa
|
3
|
-
|
4
|
-
# This module provides sets of characters needed for various UTF-8 aware
|
5
|
-
# string operations.
|
6
|
-
module Characters
|
7
|
-
extend self
|
8
|
-
|
9
|
-
# Hash of UTF-8 - ASCII approximations.
|
10
|
-
attr_reader :approximations
|
11
|
-
# Punctuation and control characters to remove from slug strings.
|
12
|
-
attr_reader :strippable
|
13
|
-
|
14
|
-
@strippable = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 19,
|
15
|
-
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39,
|
16
|
-
40, 41, 42, 43, 44, 45, 46, 47, 58, 59, 60, 61, 62, 63, 64, 91, 92, 93, 94,
|
17
|
-
95, 96, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135,
|
18
|
-
136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
|
19
|
-
151, 152, 153, 154, 155, 156, 157, 158, 159, 161, 162, 163, 164, 165, 166,
|
20
|
-
167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 182, 183,
|
21
|
-
184, 185, 187, 188, 189, 190, 191, 215, 247]
|
22
|
-
|
23
|
-
# Adds a hash of approximations.
|
24
|
-
# @example
|
25
|
-
# add_approximations :spanish, "ñ" => "ni"
|
26
|
-
# @param [#to_sym] name The name of the approximations to add.
|
27
|
-
# @param Hash hash The approximations to add.
|
28
|
-
def add_approximations(name, hash)
|
29
|
-
approximations = @approximations ? @approximations.dup : {}
|
30
|
-
approximations[name.to_sym] = hash.inject({}) do |memo, object|
|
31
|
-
key = object[0].unpack("U").shift
|
32
|
-
value = object[1].unpack("C*")
|
33
|
-
memo[key] = value.length == 1 ? value[0] : value
|
34
|
-
memo
|
35
|
-
end.freeze
|
36
|
-
@approximations = approximations.freeze
|
37
|
-
end
|
38
|
-
|
39
|
-
add_approximations :danish, "æ" => "ae", "ø" => "oe", "å" => "aa", "Ø" => "Oe", "Å" => "Aa"
|
40
|
-
add_approximations :german, "ä" => "ae", "ö" => "oe", "ü" => "ue", "Ä" => "Ae", "Ö" => "Oe", "Ü" => "Ue"
|
41
|
-
add_approximations :serbian, "Ð" => "Dj", "đ" => "dj" ,"Č" => "Ch", "č" => "ch", "Š" => "Sh", "š" => "sh"
|
42
|
-
add_approximations :spanish, "ñ" => "ni", "Ñ" => "Ni"
|
43
|
-
add_approximations :latin, {
|
44
|
-
"À" => "A", "Á" => "A", "Â" => "A", "Ã" => "A", "Ä" => "A", "Å" => "A",
|
45
|
-
"Æ" => "Ae", "Ç" => "C", "È" => "E", "É" => "E", "Ê" => "E", "Ë" => "E",
|
46
|
-
"Ì" => "I", "Í" => "I", "Î" => "I", "Ï" => "I", "Ð" => "D", "Ñ" => "N",
|
47
|
-
"Ò" => "O", "Ó" => "O", "Ô" => "O", "Õ" => "O", "Ö" => "O", "Ø" => "O",
|
48
|
-
"Ù" => "U", "Ú" => "U", "Û" => "U", "Ü" => "U", "Ý" => "Y", "Þ" => "Th",
|
49
|
-
"ß" => "ss", "à" => "a" , "á" => "a", "â" => "a", "ã" => "a", "ä" => "a",
|
50
|
-
"å" => "a", "æ" => "ae", "ç" => "c" , "è" => "e", "é" => "e", "ê" => "e",
|
51
|
-
"ë" => "e", "ì" => "i", "í" => "i", "î" => "i", "ï" => "i", "ð" => "d",
|
52
|
-
"ñ" => "n", "ò" => "o", "ó" => "o", "ô" => "o", "õ" => "o", "ö" => "o",
|
53
|
-
"ø" => "o", "ù" => "u", "ú" => "u", "û" => "u", "ü" => "u", "ý" => "y",
|
54
|
-
"þ" => "th", "ÿ" => "y", "Ā" => "A", "ā" => "a", "Ă" => "A", "ă" => "a",
|
55
|
-
"Ą" => "A", "ą" => "a", "Ć" => "C", "ć" => "c", "Ĉ" => "C", "ĉ" => "c",
|
56
|
-
"Ċ" => "C", "ċ" => "c", "Č" => "C", "č" => "c", "Ď" => "D", "ď" => "d",
|
57
|
-
"Đ" => "D", "đ" => "d", "Ē" => "E", "ē" => "e", "Ĕ" => "E", "ĕ" => "e",
|
58
|
-
"Ė" => "E", "ė" => "e", "Ę" => "E", "ę" => "e", "Ě" => "E", "ě" => "e",
|
59
|
-
"Ĝ" => "G", "ĝ" => "g", "Ğ" => "G", "ğ" => "g", "Ġ" => "G", "ġ" => "g",
|
60
|
-
"Ģ" => "G", "ģ" => "g", "Ĥ" => "H", "ĥ" => "h", "Ħ" => "H", "ħ" => "h",
|
61
|
-
"Ĩ" => "I", "ĩ" => "i", "Ī" => "I", "ī" => "i", "Ĭ" => "I", "ĭ" => "i",
|
62
|
-
"Į" => "I", "į" => "i", "İ" => "I", "ı" => "i", "IJ" => "Ij", "ij" => "ij",
|
63
|
-
"Ĵ" => "J", "ĵ" => "j", "Ķ" => "K", "ķ" => "k", "ĸ" => "k", "Ĺ" => "L",
|
64
|
-
"ĺ" => "l", "Ļ" => "L", "ļ" => "l", "Ľ" => "L", "ľ" => "l", "Ŀ" => "L",
|
65
|
-
"ŀ" => "l", "Ł" => "L", "ł" => "l", "Ń" => "N", "ń" => "n", "Ņ" => "N",
|
66
|
-
"ņ" => "n", "Ň" => "N", "ň" => "n", "ʼn" => "n", "Ŋ" => "Ng", "ŋ" => "ng",
|
67
|
-
"Ō" => "O", "ō" => "o", "Ŏ" => "O", "ŏ" => "o", "Ő" => "O", "ő" => "o",
|
68
|
-
"Œ" => "OE", "œ" => "oe", "Ŕ" => "R", "ŕ" => "r", "Ŗ" => "R", "ŗ" => "r",
|
69
|
-
"Ř" => "R", "ř" => "r", "Ś" => "S", "ś" => "s", "Ŝ" => "S", "ŝ" => "s",
|
70
|
-
"Ş" => "S", "ş" => "s", "Š" => "S", "š" => "s", "Ţ" => "T", "ţ" => "t",
|
71
|
-
"Ť" => "T", "ť" => "t", "Ŧ" => "T", "ŧ" => "t", "Ũ" => "U", "ũ" => "u",
|
72
|
-
"Ū" => "U", "ū" => "u", "Ŭ" => "U", "ŭ" => "u", "Ů" => "U", "ů" => "u",
|
73
|
-
"Ű" => "U", "ű" => "u", "Ų" => "U", "ų" => "u", "Ŵ" => "W", "ŵ" => "w",
|
74
|
-
"Ŷ" => "Y", "ŷ" => "y", "Ÿ" => "Y", "Ź" => "Z", "ź" => "z", "Ż" => "Z",
|
75
|
-
"ż" => "z", "Ž" => "Z", "ž" => "z", "×" => "x", "÷" => "/", "‘" => "'",
|
76
|
-
'’' => "'", "‛" => "'", "“" => '"', "”" => '"', "„" => '"', "‟" => '"',
|
77
|
-
"‐" => "-", "‑" => "-", "‒" => "-", "–" => "-", "—" => "-", "―" => "-"
|
78
|
-
}
|
79
|
-
end
|
80
|
-
end
|
data/test/babosa_test.rb
DELETED
@@ -1,198 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
|
3
|
-
|
4
|
-
require "rubygems"
|
5
|
-
require "bundler/setup"
|
6
|
-
require "test/unit"
|
7
|
-
require "babosa"
|
8
|
-
|
9
|
-
Module.send :include, Module.new {
|
10
|
-
def test(name, &block)
|
11
|
-
define_method("test_#{name.gsub(/[^a-z0-9_]/i, "_")}".to_sym, &block)
|
12
|
-
end
|
13
|
-
}
|
14
|
-
|
15
|
-
module UTF8ProxyTest
|
16
|
-
test "should downcase strings" do
|
17
|
-
assert_equal "åéîøü", proxy.downcase("ÅÉÎØÜ")
|
18
|
-
end
|
19
|
-
|
20
|
-
test "should upcase strings" do
|
21
|
-
assert_equal "ÅÉÎØÜ", proxy.upcase("åéîøü")
|
22
|
-
end
|
23
|
-
|
24
|
-
test "should compose UTF-8" do
|
25
|
-
# ÅÉÎØÜ
|
26
|
-
uncomposed_bytes = [65, 204, 138, 69, 204, 129, 73, 204, 130, 195, 152, 85, 204, 136]
|
27
|
-
composed_bytes = [195, 133, 195, 137, 195, 142, 195, 152, 195, 156]
|
28
|
-
uncomposed_string = uncomposed_bytes.pack("C*").unpack("U*").pack("U*")
|
29
|
-
composed_string = composed_bytes.pack("C*").unpack("U*").pack("U*")
|
30
|
-
assert_equal composed_bytes, proxy.normalize_utf8(uncomposed_string).unpack("C*")
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
if Babosa.jruby15?
|
35
|
-
class JavaProxyTest < Test::Unit::TestCase
|
36
|
-
include UTF8ProxyTest
|
37
|
-
def proxy
|
38
|
-
Babosa::UTF8::JavaProxy
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
class DumbProxyTest < Test::Unit::TestCase
|
44
|
-
include UTF8ProxyTest
|
45
|
-
def proxy
|
46
|
-
Babosa::UTF8::DumbProxy
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
class BabosaTest < Test::Unit::TestCase
|
51
|
-
|
52
|
-
test "should respond_to? :empty?" do
|
53
|
-
assert "".to_slug.respond_to? :empty?
|
54
|
-
end
|
55
|
-
|
56
|
-
test "word_chars! should leave only letters and spaces" do
|
57
|
-
string = "a*$%^$@!@b$%^&*()*!c"
|
58
|
-
assert_match /[a-z ]*/i, string.to_slug.word_chars!
|
59
|
-
end
|
60
|
-
|
61
|
-
test "approximate_ascii should transliterate to ascii" do
|
62
|
-
slug = (0xC0..0x17E).to_a.each do |codepoint|
|
63
|
-
ss = [codepoint].pack("U*").to_slug
|
64
|
-
approx = ss.approximate_ascii
|
65
|
-
assert_match /[\x0-\x7f]/, approx.to_s
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
test "should lowercase strings" do
|
70
|
-
assert_equal "feliz año", "FELIZ AÑO".to_slug.downcase!
|
71
|
-
end
|
72
|
-
|
73
|
-
test "should uppercase strings" do
|
74
|
-
assert_equal "FELIZ AÑO", "feliz año".to_slug.upcase!
|
75
|
-
end
|
76
|
-
|
77
|
-
test "should replace whitespace with dashes" do
|
78
|
-
assert_equal "a-b", "a b".to_slug.clean.normalize!
|
79
|
-
end
|
80
|
-
|
81
|
-
test "should replace multiple spaces with 1 dash" do
|
82
|
-
assert_equal "a-b", "a b".to_slug.clean.normalize!
|
83
|
-
end
|
84
|
-
|
85
|
-
test "should replace multiple dashes with 1 dash" do
|
86
|
-
assert_equal "male-female", "male - female".to_slug.normalize!
|
87
|
-
end
|
88
|
-
|
89
|
-
test "should strip trailing space" do
|
90
|
-
assert_equal "ab", "ab ".to_slug.normalize!
|
91
|
-
end
|
92
|
-
|
93
|
-
test "should strip leading space" do
|
94
|
-
assert_equal "ab", " ab".to_slug.normalize!
|
95
|
-
end
|
96
|
-
|
97
|
-
test "should strip trailing slashes" do
|
98
|
-
assert_equal "ab", "ab-".to_slug.normalize!
|
99
|
-
end
|
100
|
-
|
101
|
-
test "should strip leading slashes" do
|
102
|
-
assert_equal "ab", "-ab".to_slug.normalize!
|
103
|
-
end
|
104
|
-
|
105
|
-
test "should not modify valid name strings" do
|
106
|
-
assert_equal "a-b-c-d", "a-b-c-d".to_slug.normalize!
|
107
|
-
end
|
108
|
-
|
109
|
-
test "should do special approximations for German" do
|
110
|
-
{
|
111
|
-
"Jürgen" => "Juergen",
|
112
|
-
"böse" => "boese",
|
113
|
-
"Männer" => "Maenner"
|
114
|
-
}.each {|given, expected| assert_equal expected, given.to_slug.approximate_ascii!(:german)}
|
115
|
-
end
|
116
|
-
|
117
|
-
test "should do special approximations for Spanish" do
|
118
|
-
assert_equal "anio", "año".to_slug.approximate_ascii!(:spanish)
|
119
|
-
end
|
120
|
-
|
121
|
-
test "should do special approximations for Serbian" do
|
122
|
-
{
|
123
|
-
"Ðorđe" => "Djordje",
|
124
|
-
"Inđija" => "Indjija",
|
125
|
-
"Četiri" => "Chetiri",
|
126
|
-
"četiri" => "chetiri",
|
127
|
-
"Škola" => "Shkola",
|
128
|
-
"škola" => "shkola"
|
129
|
-
}.each {|given, expected| assert_equal expected, given.to_slug.approximate_ascii!(:serbian)}
|
130
|
-
end
|
131
|
-
|
132
|
-
test "should do special approximations for Danish" do
|
133
|
-
{
|
134
|
-
"Ærøskøbing" => "Aeroeskoebing",
|
135
|
-
"Årslev" => "Aarslev"
|
136
|
-
}.each {|given, expected| assert_equal expected, given.to_slug.approximate_ascii!(:danish)}
|
137
|
-
end
|
138
|
-
|
139
|
-
test "should work with non roman chars" do
|
140
|
-
assert_equal "検-索", "検 索".to_slug.normalize!
|
141
|
-
end
|
142
|
-
|
143
|
-
test "should work with invalid UTF-8 strings" do
|
144
|
-
%w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
|
145
|
-
string = "\x93abc".to_slug
|
146
|
-
assert_nothing_raised do
|
147
|
-
method == "truncate" ? string.send(method, 32) : string.send(method)
|
148
|
-
end
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
test "should truncate string by byte length" do
|
153
|
-
assert_equal "ü", "üa".to_slug.truncate_bytes!(2)
|
154
|
-
assert_equal "", "üa".to_slug.truncate_bytes!(1)
|
155
|
-
assert_equal "üa", "üa".to_slug.truncate_bytes!(100)
|
156
|
-
assert_equal "ü", "üéøá".to_slug.truncate_bytes!(3)
|
157
|
-
end
|
158
|
-
|
159
|
-
test "should truncate string by char length" do
|
160
|
-
assert_equal "üa", "üa".to_slug.truncate!(2)
|
161
|
-
assert_equal "ü", "üa".to_slug.truncate!(1)
|
162
|
-
assert_equal "üa", "üa".to_slug.truncate!(100)
|
163
|
-
end
|
164
|
-
|
165
|
-
test "should transliterate uncomposed utf8" do
|
166
|
-
string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
|
167
|
-
assert_equal "u", string.to_slug.approximate_ascii!
|
168
|
-
end
|
169
|
-
|
170
|
-
test "with_dashes should not change byte size when replacing spaces" do
|
171
|
-
assert_equal "".bytesize, "".to_slug.with_dashes.bytesize
|
172
|
-
assert_equal " ".bytesize, " ".to_slug.with_dashes.bytesize
|
173
|
-
assert_equal "-abc-".bytesize, "-abc-".to_slug.with_dashes.bytesize
|
174
|
-
assert_equal " abc ".bytesize, " abc ".to_slug.with_dashes.bytesize
|
175
|
-
assert_equal " a bc ".bytesize, " a bc ".to_slug.with_dashes.bytesize
|
176
|
-
end
|
177
|
-
|
178
|
-
test "normalize! with ascii option should approximate and strip non ascii" do
|
179
|
-
ss = "カタカナ: katakana is über cool".to_slug
|
180
|
-
assert_equal "katakana-is-uber-cool", ss.normalize!(:to_ascii => true)
|
181
|
-
end
|
182
|
-
|
183
|
-
test "normalize should use transliterations" do
|
184
|
-
assert_equal "juergen", "Jürgen".to_slug.normalize(:transliterations => :german).to_s
|
185
|
-
end
|
186
|
-
|
187
|
-
test "should get a string suitable for use as a ruby method" do
|
188
|
-
assert_equal "hello_world?", "¿¿¿hello... world???".to_slug.to_ruby_method!
|
189
|
-
assert_equal "katakana_is_uber_cool", "カタカナ: katakana is über cool".to_slug.to_ruby_method!
|
190
|
-
assert_equal "katakana_is_uber_cool!", "カタカナ: katakana is über cool!".to_slug.to_ruby_method!
|
191
|
-
assert_equal "katakana_is_uber_cool", "カタカナ: katakana is über cool".to_slug.to_ruby_method!(false)
|
192
|
-
end
|
193
|
-
|
194
|
-
test "should approximate 'smart' quotes" do
|
195
|
-
assert_equal "john's", "john’s".to_slug.approximate_ascii.to_s
|
196
|
-
assert_equal "johns", "john’s".to_slug.normalize.to_s
|
197
|
-
end
|
198
|
-
end
|