transliterator 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/.rvmrc +48 -0
- data/.travis.yml +5 -0
- data/.yardopts +3 -0
- data/Gemfile +26 -0
- data/Guardfile +14 -0
- data/LICENSE +22 -0
- data/README.md +36 -0
- data/Rakefile +19 -0
- data/lib/transliterator.rb +18 -0
- data/lib/transliterator/base.rb +82 -0
- data/lib/transliterator/cyrillic.rb +99 -0
- data/lib/transliterator/greek.rb +71 -0
- data/lib/transliterator/latin.rb +196 -0
- data/lib/transliterator/text.rb +18 -0
- data/lib/transliterator/version.rb +3 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/support/shared/base_transliterator.rb +31 -0
- data/spec/support/shared/cyrillic_transilterator.rb +12 -0
- data/spec/support/shared/greek_translitterator.rb +12 -0
- data/spec/support/shared/latin_transliterator.rb +12 -0
- data/spec/transliterator/base_spec.rb +9 -0
- data/spec/transliterator/cyrillic_spec.rb +8 -0
- data/spec/transliterator/greek_spec.rb +8 -0
- data/spec/transliterator/latin_spec.rb +8 -0
- data/spec/transliterator/text_spec.rb +46 -0
- data/spec/transliterator_spec.rb +23 -0
- data/transliterator.gemspec +22 -0
- metadata +88 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# This is an RVM Project .rvmrc file, used to automatically load the ruby
|
4
|
+
# development environment upon cd'ing into the directory
|
5
|
+
|
6
|
+
# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
|
7
|
+
# Only full ruby name is supported here, for short names use:
|
8
|
+
# echo "rvm use 1.9.3" > .rvmrc
|
9
|
+
environment_id="ruby-1.9.3-p125@transliterator"
|
10
|
+
|
11
|
+
# Uncomment the following lines if you want to verify rvm version per project
|
12
|
+
# rvmrc_rvm_version="1.10.3" # 1.10.1 seams as a safe start
|
13
|
+
# eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
|
14
|
+
# echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
|
15
|
+
# return 1
|
16
|
+
# }
|
17
|
+
|
18
|
+
# First we attempt to load the desired environment directly from the environment
|
19
|
+
# file. This is very fast and efficient compared to running through the entire
|
20
|
+
# CLI and selector. If you want feedback on which environment was used then
|
21
|
+
# insert the word 'use' after --create as this triggers verbose mode.
|
22
|
+
if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
|
23
|
+
&& -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
|
24
|
+
then
|
25
|
+
\. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
|
26
|
+
[[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
|
27
|
+
\. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
|
28
|
+
else
|
29
|
+
# If the environment file has not yet been created, use the RVM CLI to select.
|
30
|
+
rvm --create "$environment_id" || {
|
31
|
+
echo "Failed to create RVM environment '${environment_id}'."
|
32
|
+
return 1
|
33
|
+
}
|
34
|
+
fi
|
35
|
+
|
36
|
+
# If you use bundler, this might be useful to you:
|
37
|
+
if [[ -s Gemfile ]] && {
|
38
|
+
! builtin command -v bundle >/dev/null ||
|
39
|
+
builtin command -v bundle | grep $rvm_path/bin/bundle >/dev/null
|
40
|
+
}
|
41
|
+
then
|
42
|
+
printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
|
43
|
+
gem install bundler
|
44
|
+
fi
|
45
|
+
if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
|
46
|
+
then
|
47
|
+
bundle install | grep -vE '^Using|Your bundle is complete'
|
48
|
+
fi
|
data/.travis.yml
ADDED
data/.yardopts
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
source :rubygems
|
2
|
+
|
3
|
+
# Specify the project's dependencies in the gemspec:
|
4
|
+
gemspec
|
5
|
+
|
6
|
+
# Specifiy the development dependencies here:
|
7
|
+
group :development do
|
8
|
+
gem 'rake'
|
9
|
+
|
10
|
+
# Documentation:
|
11
|
+
gem 'yard'
|
12
|
+
|
13
|
+
# Testing libraries:
|
14
|
+
gem 'rspec'
|
15
|
+
|
16
|
+
# Colorful messages:
|
17
|
+
gem 'rainbow'
|
18
|
+
|
19
|
+
# Support for guard:
|
20
|
+
gem 'guard'
|
21
|
+
gem 'guard-bundler'
|
22
|
+
gem 'guard-rspec'
|
23
|
+
gem 'rb-fsevent'
|
24
|
+
gem 'rb-readline'
|
25
|
+
gem 'fuubar'
|
26
|
+
end
|
data/Guardfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'rb-readline'
|
2
|
+
|
3
|
+
guard 'bundler' do
|
4
|
+
watch('Gemfile')
|
5
|
+
watch('transliterator.gemspec')
|
6
|
+
end
|
7
|
+
|
8
|
+
guard 'rspec', version: 2, cli: '--format Fuubar --colour' do
|
9
|
+
watch(%r{^spec/.+_spec\.rb})
|
10
|
+
watch(%r{^lib/(.+)\.rb}) { |m| "spec/#{m[1]}_spec.rb" }
|
11
|
+
watch(%r{lib/.+\.rb}) { "spec" }
|
12
|
+
watch('spec/spec_helper.rb') { "spec" }
|
13
|
+
watch(%r{^spec/support/(.+)\.rb}) { "spec" }
|
14
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Tom-Eric Gerritsen
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Transliterator [![Build Status](https://secure.travis-ci.org/eet-nu/transliterator.png)][Travis CI] [![Dependency Status](https://gemnasium.com/eet-nu/transliterator.png)][Gemnasium]
|
2
|
+
|
3
|
+
|
4
|
+
A library for translating UTF-8 characters to their ASCII equivalents. This
|
5
|
+
library is based on code from the [babosa gem][Babosa] and meant for projects
|
6
|
+
that only need the transliterating functionality.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
gem 'transliterator'
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install transliterator
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
TODO: Write usage instructions here
|
25
|
+
|
26
|
+
## Contributing
|
27
|
+
|
28
|
+
1. Fork it
|
29
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
30
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
31
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
32
|
+
5. Create new Pull Request
|
33
|
+
|
34
|
+
[Travis CI]: http://travis-ci.org/eet-nu/transliterator
|
35
|
+
[Gemnasium]: https://gemnasium.com/eet-nu/transliterator
|
36
|
+
[Babosa]: http://norman.github.com/babosa/
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
desc "Start a console with the Transliterator library loaded"
|
5
|
+
task :console do
|
6
|
+
exec "irb -r ./lib/transliterator"
|
7
|
+
end
|
8
|
+
|
9
|
+
# RSpec tasks:
|
10
|
+
require 'rspec/core/rake_task'
|
11
|
+
RSpec::Core::RakeTask.new(:spec)
|
12
|
+
task :default => :spec
|
13
|
+
|
14
|
+
# YARD tasks:
|
15
|
+
require 'yard'
|
16
|
+
YARD::Rake::YardocTask.new do |t|
|
17
|
+
t.files = ['lib/**/*.rb']
|
18
|
+
t.options = ['--readme', 'README.md', '--charset', 'utf-8']
|
19
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Transliterator
|
2
|
+
autoload :VERSION, 'transliterator/version'
|
3
|
+
|
4
|
+
autoload :Base, 'transliterator/base'
|
5
|
+
autoload :Cyrillic, 'transliterator/cyrillic'
|
6
|
+
autoload :Greek, 'transliterator/greek'
|
7
|
+
autoload :Latin, 'transliterator/latin'
|
8
|
+
|
9
|
+
autoload :Text, 'transliterator/text'
|
10
|
+
|
11
|
+
def self.transliterate(input)
|
12
|
+
Text.new(input).transliterate
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.asciify(input)
|
16
|
+
transliterate(input).gsub(/[^\x00-\x7f]/u, '')
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Transliterator
|
6
|
+
class Base
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
APPROXIMATIONS = {
|
10
|
+
"×" => "x",
|
11
|
+
"÷" => "/",
|
12
|
+
"‐" => "-",
|
13
|
+
"‑" => "-",
|
14
|
+
"‒" => "-",
|
15
|
+
"–" => "-",
|
16
|
+
"—" => "-",
|
17
|
+
"―" => "-",
|
18
|
+
"―" => "-",
|
19
|
+
"‘" => "'",
|
20
|
+
"‛" => "'",
|
21
|
+
"“" => '"',
|
22
|
+
"”" => '"',
|
23
|
+
"„" => '"',
|
24
|
+
"‟" => '"',
|
25
|
+
'’' => "'",
|
26
|
+
# various kinds of space characters
|
27
|
+
"\xc2\xa0" => " ",
|
28
|
+
"\xe2\x80\x80" => " ",
|
29
|
+
"\xe2\x80\x81" => " ",
|
30
|
+
"\xe2\x80\x82" => " ",
|
31
|
+
"\xe2\x80\x83" => " ",
|
32
|
+
"\xe2\x80\x84" => " ",
|
33
|
+
"\xe2\x80\x85" => " ",
|
34
|
+
"\xe2\x80\x86" => " ",
|
35
|
+
"\xe2\x80\x87" => " ",
|
36
|
+
"\xe2\x80\x88" => " ",
|
37
|
+
"\xe2\x80\x89" => " ",
|
38
|
+
"\xe2\x80\x8a" => " ",
|
39
|
+
"\xe2\x81\x9f" => " ",
|
40
|
+
"\xe3\x80\x80" => " ",
|
41
|
+
}.freeze
|
42
|
+
|
43
|
+
attr_reader :approximations
|
44
|
+
|
45
|
+
# Initializes a new +Transliterator::Base+ instance.
|
46
|
+
#
|
47
|
+
# Because +Transliterator::Base+ is a singleton, you can only get an
|
48
|
+
# instance of it by calling the +#instance+ class method on it:
|
49
|
+
#
|
50
|
+
# Transliterator::Base.new # => NoMethodError: private method `new' called for Transliterator::Base:Class
|
51
|
+
# Transliterator::Base.instance # => #<Transliterator::Base:0x007f9b8c086e78>
|
52
|
+
#
|
53
|
+
# @return [Transliterator::Base] The +Transliterator::Base+ instance
|
54
|
+
def initialize
|
55
|
+
if self.class < Base
|
56
|
+
@approximations = self.class.superclass.instance.approximations.dup
|
57
|
+
else
|
58
|
+
@approximations = {}
|
59
|
+
end
|
60
|
+
|
61
|
+
self.class::APPROXIMATIONS.inject(@approximations) do |memo, object|
|
62
|
+
index = object[0].unpack("U").shift
|
63
|
+
value = object[1].unpack("C*")
|
64
|
+
memo[index] = value.length == 1 ? value[0] : value
|
65
|
+
memo
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Transliterate a given string's UTF-8 characters to their ASCII equivalants.
|
70
|
+
#
|
71
|
+
# transliterator = Transliterator::Base.instance
|
72
|
+
# transliterator.transliterate "5 × 10 ÷ 2 ‐ 5 = 20" # => "5 x 10 / 2 - 5 = 20"
|
73
|
+
#
|
74
|
+
# @return [String] The transliterated string
|
75
|
+
def transliterate(string)
|
76
|
+
string.unpack("U*")
|
77
|
+
.map { |codepoint| approximations[codepoint] || codepoint }
|
78
|
+
.flatten
|
79
|
+
.pack("U*")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Transliterator
|
3
|
+
class Cyrillic < Base
|
4
|
+
APPROXIMATIONS = {
|
5
|
+
"Ё" => "Yo",
|
6
|
+
"Ѓ" => "G",
|
7
|
+
"Є" => "Ye",
|
8
|
+
"Ї" => "Yi",
|
9
|
+
"Љ" => "L",
|
10
|
+
"Њ" => "N",
|
11
|
+
"Ќ" => "K",
|
12
|
+
"Ў" => "U",
|
13
|
+
"Џ" => "Dh",
|
14
|
+
"А" => "A",
|
15
|
+
"Б" => "B",
|
16
|
+
"В" => "V",
|
17
|
+
"Г" => "G",
|
18
|
+
"Д" => "D",
|
19
|
+
"Е" => "E",
|
20
|
+
"Ж" => "Zh",
|
21
|
+
"З" => "Z",
|
22
|
+
"И" => "I",
|
23
|
+
"Й" => "J",
|
24
|
+
"К" => "K",
|
25
|
+
"Л" => "L",
|
26
|
+
"М" => "M",
|
27
|
+
"Н" => "N",
|
28
|
+
"О" => "O",
|
29
|
+
"П" => "P",
|
30
|
+
"Р" => "R",
|
31
|
+
"С" => "S",
|
32
|
+
"Т" => "T",
|
33
|
+
"У" => "U",
|
34
|
+
"Ф" => "F",
|
35
|
+
"Х" => "X",
|
36
|
+
"Ц" => "Cz",
|
37
|
+
"Ч" => "Ch",
|
38
|
+
"Ш" => "Sh",
|
39
|
+
"Щ" => "Shh",
|
40
|
+
"Ъ" => "",
|
41
|
+
"Ы" => "Y",
|
42
|
+
"Ь" => "",
|
43
|
+
"Э" => "E",
|
44
|
+
"Ю" => "Yu",
|
45
|
+
"Я" => "Ya",
|
46
|
+
"а" => "a",
|
47
|
+
"б" => "b",
|
48
|
+
"в" => "v",
|
49
|
+
"г" => "g",
|
50
|
+
"д" => "d",
|
51
|
+
"е" => "e",
|
52
|
+
"ж" => "zh",
|
53
|
+
"з" => "z",
|
54
|
+
"и" => "i",
|
55
|
+
"й" => "j",
|
56
|
+
"к" => "k",
|
57
|
+
"л" => "l",
|
58
|
+
"м" => "m",
|
59
|
+
"н" => "n",
|
60
|
+
"о" => "o",
|
61
|
+
"п" => "p",
|
62
|
+
"р" => "r",
|
63
|
+
"с" => "s",
|
64
|
+
"т" => "t",
|
65
|
+
"у" => "u",
|
66
|
+
"ф" => "f",
|
67
|
+
"х" => "x",
|
68
|
+
"ц" => "cz",
|
69
|
+
"ч" => "ch",
|
70
|
+
"ш" => "sh",
|
71
|
+
"щ" => "shh",
|
72
|
+
"ъ" => "",
|
73
|
+
"ы" => "y",
|
74
|
+
"ь" => "",
|
75
|
+
"э" => "e",
|
76
|
+
"ю" => "yu",
|
77
|
+
"я" => "ya",
|
78
|
+
"ё" => "yo",
|
79
|
+
"ѓ" => "g",
|
80
|
+
"є" => "ye",
|
81
|
+
"ї" => "yi",
|
82
|
+
"љ" => "l",
|
83
|
+
"њ" => "n",
|
84
|
+
"ќ" => "k",
|
85
|
+
"ў" => "u",
|
86
|
+
"џ" => "dh",
|
87
|
+
"Ѣ" => "Ye",
|
88
|
+
"ѣ" => "ye",
|
89
|
+
"Ѫ" => "O",
|
90
|
+
"ѫ" => "o",
|
91
|
+
"Ѳ" => "Fh",
|
92
|
+
"ѳ" => "fh",
|
93
|
+
"Ѵ" => "Yh",
|
94
|
+
"ѵ" => "yh",
|
95
|
+
"Ґ" => "G",
|
96
|
+
"ґ" => "g"
|
97
|
+
}
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Transliterator
|
3
|
+
class Greek < Base
|
4
|
+
APPROXIMATIONS = {
|
5
|
+
"Α" => "A",
|
6
|
+
"Ά" => "A",
|
7
|
+
"α" => "a",
|
8
|
+
"ά" => "a",
|
9
|
+
"Β" => "B",
|
10
|
+
"β" => "b",
|
11
|
+
"Γ" => "G",
|
12
|
+
"γ" => "g",
|
13
|
+
"Δ" => "D",
|
14
|
+
"δ" => "d",
|
15
|
+
"Ε" => "E",
|
16
|
+
"Έ" => "E",
|
17
|
+
"ε" => "e",
|
18
|
+
"έ" => "e",
|
19
|
+
"Ζ" => "Z",
|
20
|
+
"ζ" => "z",
|
21
|
+
"Η" => "E",
|
22
|
+
"Ή" => "e",
|
23
|
+
"η" => "e",
|
24
|
+
"ή" => "E",
|
25
|
+
"Θ" => "Th",
|
26
|
+
"θ" => "th",
|
27
|
+
"Ι" => "I",
|
28
|
+
"Ί" => "Ι",
|
29
|
+
"Î" => "I",
|
30
|
+
"ι" => "i",
|
31
|
+
"ί" => "i",
|
32
|
+
"Κ" => "K",
|
33
|
+
"κ" => "k",
|
34
|
+
"Λ" => "L",
|
35
|
+
"λ" => "l",
|
36
|
+
"Μ" => "M",
|
37
|
+
"μ" => "m",
|
38
|
+
"Ν" => "N",
|
39
|
+
"ν" => "n",
|
40
|
+
"Ξ" => "X",
|
41
|
+
"ξ" => "x",
|
42
|
+
"Ο" => "O",
|
43
|
+
"Ό" => "O",
|
44
|
+
"ο" => "o",
|
45
|
+
"ό" => "o",
|
46
|
+
"Π" => "P",
|
47
|
+
"π" => "p",
|
48
|
+
"Ρ" => "R",
|
49
|
+
"ρ" => "r",
|
50
|
+
"Σ" => "S",
|
51
|
+
"σ" => "s",
|
52
|
+
"ς" => "s",
|
53
|
+
"Τ" => "T",
|
54
|
+
"τ" => "t",
|
55
|
+
"Υ" => "I",
|
56
|
+
"Ύ" => "I",
|
57
|
+
"υ" => "i",
|
58
|
+
"ύ" => "i",
|
59
|
+
"Φ" => "F",
|
60
|
+
"φ" => "f",
|
61
|
+
"Χ" => "Ch",
|
62
|
+
"χ" => "ch",
|
63
|
+
"Ψ" => "Ps",
|
64
|
+
"ψ" => "ps",
|
65
|
+
"Ω" => "O",
|
66
|
+
"Ώ" => "O",
|
67
|
+
"ω" => "o",
|
68
|
+
"ώ" => "o"
|
69
|
+
}
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Transliterator
|
3
|
+
class Latin < Base
|
4
|
+
APPROXIMATIONS = {
|
5
|
+
"À" => "A",
|
6
|
+
"Á" => "A",
|
7
|
+
"Â" => "A",
|
8
|
+
"Ã" => "A",
|
9
|
+
"Ä" => "A",
|
10
|
+
"Å" => "A",
|
11
|
+
"Æ" => "Ae",
|
12
|
+
"Ç" => "C",
|
13
|
+
"È" => "E",
|
14
|
+
"É" => "E",
|
15
|
+
"Ê" => "E",
|
16
|
+
"Ë" => "E",
|
17
|
+
"Ì" => "I",
|
18
|
+
"Í" => "I",
|
19
|
+
"Î" => "I",
|
20
|
+
"Ï" => "I",
|
21
|
+
"Ð" => "D",
|
22
|
+
"Ñ" => "N",
|
23
|
+
"Ò" => "O",
|
24
|
+
"Ó" => "O",
|
25
|
+
"Ô" => "O",
|
26
|
+
"Õ" => "O",
|
27
|
+
"Ö" => "O",
|
28
|
+
"Ø" => "O",
|
29
|
+
"Ù" => "U",
|
30
|
+
"Ú" => "U",
|
31
|
+
"Û" => "U",
|
32
|
+
"Ü" => "U",
|
33
|
+
"Ý" => "Y",
|
34
|
+
"Þ" => "Th",
|
35
|
+
"ß" => "ss",
|
36
|
+
"à" => "a" ,
|
37
|
+
"á" => "a",
|
38
|
+
"â" => "a",
|
39
|
+
"ã" => "a",
|
40
|
+
"ä" => "a",
|
41
|
+
"å" => "a",
|
42
|
+
"æ" => "ae",
|
43
|
+
"ç" => "c" ,
|
44
|
+
"è" => "e",
|
45
|
+
"é" => "e",
|
46
|
+
"ê" => "e",
|
47
|
+
"ë" => "e",
|
48
|
+
"ì" => "i",
|
49
|
+
"í" => "i",
|
50
|
+
"î" => "i",
|
51
|
+
"ï" => "i",
|
52
|
+
"ð" => "d",
|
53
|
+
"ñ" => "n",
|
54
|
+
"ò" => "o",
|
55
|
+
"ó" => "o",
|
56
|
+
"ô" => "o",
|
57
|
+
"õ" => "o",
|
58
|
+
"ö" => "o",
|
59
|
+
"ø" => "o",
|
60
|
+
"ù" => "u",
|
61
|
+
"ú" => "u",
|
62
|
+
"û" => "u",
|
63
|
+
"ü" => "u",
|
64
|
+
"ý" => "y",
|
65
|
+
"þ" => "th",
|
66
|
+
"ÿ" => "y",
|
67
|
+
"Ā" => "A",
|
68
|
+
"Ă" => "A",
|
69
|
+
"Ą" => "A",
|
70
|
+
"Ć" => "C",
|
71
|
+
"Ĉ" => "C",
|
72
|
+
"Ċ" => "C",
|
73
|
+
"Č" => "C",
|
74
|
+
"Ď" => "D",
|
75
|
+
"Đ" => "D",
|
76
|
+
"Ē" => "E",
|
77
|
+
"Ĕ" => "E",
|
78
|
+
"Ė" => "E",
|
79
|
+
"Ę" => "E",
|
80
|
+
"Ě" => "E",
|
81
|
+
"Ĝ" => "G",
|
82
|
+
"Ğ" => "G",
|
83
|
+
"Ġ" => "G",
|
84
|
+
"Ģ" => "G",
|
85
|
+
"Ĥ" => "H",
|
86
|
+
"Ħ" => "H",
|
87
|
+
"Ĩ" => "I",
|
88
|
+
"Ī" => "I",
|
89
|
+
"Ĭ" => "I",
|
90
|
+
"Į" => "I",
|
91
|
+
"İ" => "I",
|
92
|
+
"IJ" => "Ij",
|
93
|
+
"Ĵ" => "J",
|
94
|
+
"Ķ" => "K",
|
95
|
+
"Ĺ" => "L",
|
96
|
+
"Ļ" => "L",
|
97
|
+
"Ľ" => "L",
|
98
|
+
"Ŀ" => "L",
|
99
|
+
"Ł" => "L",
|
100
|
+
"Ń" => "N",
|
101
|
+
"Ņ" => "N",
|
102
|
+
"Ň" => "N",
|
103
|
+
"Ŋ" => "Ng",
|
104
|
+
"Ō" => "O",
|
105
|
+
"Ŏ" => "O",
|
106
|
+
"Ő" => "O",
|
107
|
+
"Œ" => "OE",
|
108
|
+
"Ŕ" => "R",
|
109
|
+
"Ŗ" => "R",
|
110
|
+
"Ř" => "R",
|
111
|
+
"Ś" => "S",
|
112
|
+
"Ŝ" => "S",
|
113
|
+
"Ş" => "S",
|
114
|
+
"Š" => "S",
|
115
|
+
"Ţ" => "T",
|
116
|
+
"Ť" => "T",
|
117
|
+
"Ŧ" => "T",
|
118
|
+
"Ũ" => "U",
|
119
|
+
"Ū" => "U",
|
120
|
+
"Ŭ" => "U",
|
121
|
+
"Ů" => "U",
|
122
|
+
"Ű" => "U",
|
123
|
+
"Ų" => "U",
|
124
|
+
"Ŵ" => "W",
|
125
|
+
"Ŷ" => "Y",
|
126
|
+
"Ÿ" => "Y",
|
127
|
+
"Ź" => "Z",
|
128
|
+
"Ż" => "Z",
|
129
|
+
"Ž" => "Z",
|
130
|
+
"ā" => "a",
|
131
|
+
"ă" => "a",
|
132
|
+
"ą" => "a",
|
133
|
+
"ć" => "c",
|
134
|
+
"ĉ" => "c",
|
135
|
+
"ċ" => "c",
|
136
|
+
"č" => "c",
|
137
|
+
"ď" => "d",
|
138
|
+
"đ" => "d",
|
139
|
+
"ē" => "e",
|
140
|
+
"ĕ" => "e",
|
141
|
+
"ė" => "e",
|
142
|
+
"ę" => "e",
|
143
|
+
"ě" => "e",
|
144
|
+
"ĝ" => "g",
|
145
|
+
"ğ" => "g",
|
146
|
+
"ġ" => "g",
|
147
|
+
"ģ" => "g",
|
148
|
+
"ĥ" => "h",
|
149
|
+
"ħ" => "h",
|
150
|
+
"ĩ" => "i",
|
151
|
+
"ī" => "i",
|
152
|
+
"ĭ" => "i",
|
153
|
+
"į" => "i",
|
154
|
+
"ı" => "i",
|
155
|
+
"ij" => "ij",
|
156
|
+
"ĵ" => "j",
|
157
|
+
"ķ" => "k",
|
158
|
+
"ĸ" => "k",
|
159
|
+
"ĺ" => "l",
|
160
|
+
"ļ" => "l",
|
161
|
+
"ľ" => "l",
|
162
|
+
"ŀ" => "l",
|
163
|
+
"ł" => "l",
|
164
|
+
"ń" => "n",
|
165
|
+
"ņ" => "n",
|
166
|
+
"ň" => "n",
|
167
|
+
"ʼn" => "n",
|
168
|
+
"ŋ" => "ng",
|
169
|
+
"ō" => "o",
|
170
|
+
"ŏ" => "o",
|
171
|
+
"ő" => "o",
|
172
|
+
"œ" => "oe",
|
173
|
+
"ŕ" => "r",
|
174
|
+
"ŗ" => "r",
|
175
|
+
"ř" => "r",
|
176
|
+
"ś" => "s",
|
177
|
+
"ŝ" => "s",
|
178
|
+
"ş" => "s",
|
179
|
+
"š" => "s",
|
180
|
+
"ţ" => "t",
|
181
|
+
"ť" => "t",
|
182
|
+
"ŧ" => "t",
|
183
|
+
"ũ" => "u",
|
184
|
+
"ū" => "u",
|
185
|
+
"ŭ" => "u",
|
186
|
+
"ů" => "u",
|
187
|
+
"ű" => "u",
|
188
|
+
"ų" => "u",
|
189
|
+
"ŵ" => "w",
|
190
|
+
"ŷ" => "y",
|
191
|
+
"ž" => "z",
|
192
|
+
"ź" => "z",
|
193
|
+
"ż" => "z"
|
194
|
+
}
|
195
|
+
end
|
196
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Transliterator
|
2
|
+
class Text
|
3
|
+
attr_reader :string
|
4
|
+
|
5
|
+
def initialize(string)
|
6
|
+
@string = string
|
7
|
+
end
|
8
|
+
|
9
|
+
def transliterate(*transliterators)
|
10
|
+
transliterators << Latin.instance if transliterators.empty?
|
11
|
+
transliterated = string.dup
|
12
|
+
transliterators.each do |transliterator|
|
13
|
+
transliterated = transliterator.transliterate(transliterated)
|
14
|
+
end
|
15
|
+
transliterated
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
require File.expand_path('../../lib/transliterator', __FILE__)
|
5
|
+
|
6
|
+
# Requires supporting files with custom matchers and macros, etc,
|
7
|
+
# in ./support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
12
|
+
config.run_all_when_everything_filtered = true
|
13
|
+
config.filter_run :focus
|
14
|
+
config.mock_with :rspec
|
15
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
shared_examples_for 'a base transliterator' do
|
4
|
+
subject { described_class.instance }
|
5
|
+
|
6
|
+
it 'is a singleton' do
|
7
|
+
subject.should be_a Transliterator::Base
|
8
|
+
expect { described_class.new }.to raise_error(NoMethodError)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'transliterates math related characters' do
|
12
|
+
input = '5 × 10 ÷ 2 ‐ 5 = 20'
|
13
|
+
output = '5 x 10 / 2 - 5 = 20'
|
14
|
+
|
15
|
+
subject.transliterate(input).should == output
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'transliterates "smart" quotes' do
|
19
|
+
input = '‛This‘ „is a quote”'
|
20
|
+
output = '\'This\' "is a quote"'
|
21
|
+
|
22
|
+
subject.transliterate(input).should == output
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'transliterates non-breaking spaces' do
|
26
|
+
input = "\xc2\xa0"
|
27
|
+
output = ' '
|
28
|
+
|
29
|
+
subject.transliterate(input).should == output
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
shared_examples_for 'a cyrillic transliterator' do
|
4
|
+
subject { described_class.instance }
|
5
|
+
|
6
|
+
it 'transliterates cyrillic characters' do
|
7
|
+
input = "Славься, Отечество наше свободное"
|
8
|
+
output = "Slavsya, Otechestvo nashe svobodnoe"
|
9
|
+
|
10
|
+
subject.transliterate(input).should == output
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
shared_examples_for 'a greek transliterator' do
|
4
|
+
subject { described_class.instance }
|
5
|
+
|
6
|
+
it 'transliterates greek characters' do
|
7
|
+
input = "Γερμανία"
|
8
|
+
output = "Germania"
|
9
|
+
|
10
|
+
subject.transliterate(input).should == output
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
shared_examples_for 'a latin transliterator' do
|
4
|
+
subject { described_class.instance }
|
5
|
+
|
6
|
+
it 'transliterates latin characters' do
|
7
|
+
input = 'cuī dōnō lĕpĭdūm nŏvūm lĭbēllŭm'
|
8
|
+
output = 'cui dono lepidum novum libellum'
|
9
|
+
|
10
|
+
subject.transliterate(input).should == output
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Transliterator::Text do
|
4
|
+
let(:string) { "String" }
|
5
|
+
|
6
|
+
subject { Transliterator::Text.new(string) }
|
7
|
+
|
8
|
+
describe '#transliterate' do
|
9
|
+
it 'translates with the given transliterator' do
|
10
|
+
reverser = stub
|
11
|
+
reverser.should_receive(:transliterate)
|
12
|
+
.with(string)
|
13
|
+
|
14
|
+
subject.transliterate(reverser)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'returns the transliterated string' do
|
18
|
+
reverser = mock(transliterate: 'gnirtS')
|
19
|
+
|
20
|
+
subject.transliterate(reverser).should == "gnirtS"
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'transliterates with Transliterator::Latin if no transliterator is given' do
|
24
|
+
Transliterator::Latin.instance
|
25
|
+
.should_receive(:transliterate)
|
26
|
+
.with(string)
|
27
|
+
|
28
|
+
subject.transliterate
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'can apply multiple transliterators' do
|
32
|
+
reverser = Object.new.tap do |instance|
|
33
|
+
def instance.transliterate(string)
|
34
|
+
string.reverse
|
35
|
+
end
|
36
|
+
end
|
37
|
+
capitalizer = Object.new.tap do |instance|
|
38
|
+
def instance.transliterate(string)
|
39
|
+
string.capitalize
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
subject.transliterate(reverser, capitalizer).should == 'Gnirts'
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe Transliterator do
|
6
|
+
describe '#asciify' do
|
7
|
+
it 'transliterates the given text and strips remaining UTF-8 characters' do
|
8
|
+
input = "cuī dōnō lĕpĭdūm nŏvūm lĭbēllŭm\nSnowman ☃\n5 × 10 ÷ 2 ‐ 5 = 20"
|
9
|
+
output = "cui dono lepidum novum libellum\nSnowman \n5 x 10 / 2 - 5 = 20"
|
10
|
+
|
11
|
+
Transliterator.asciify(input).should == output
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe '#transliterate' do
|
16
|
+
it 'transliterates the given text' do
|
17
|
+
input = "cuī dōnō lĕpĭdūm nŏvūm lĭbēllŭm"
|
18
|
+
output = "cui dono lepidum novum libellum"
|
19
|
+
|
20
|
+
Transliterator.transliterate(input).should == output
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/transliterator/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Tom-Eric Gerritsen"]
|
6
|
+
gem.email = ["tomeric@eet.nu"]
|
7
|
+
gem.description = <<-EOD
|
8
|
+
A library for translating UTF-8 characters to their ASCII equivalents. This
|
9
|
+
library is based on code from the babosa gem and meant for projects that
|
10
|
+
only need the transliterator functionality.
|
11
|
+
EOD
|
12
|
+
|
13
|
+
gem.summary = %q{A ruby library for translating UTF-8 characters to their ASCII equivalents.}
|
14
|
+
gem.homepage = "https://github.com/eet-nu/transliterator"
|
15
|
+
|
16
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
|
+
gem.files = `git ls-files`.split("\n")
|
18
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
gem.name = "transliterator"
|
20
|
+
gem.require_paths = ["lib"]
|
21
|
+
gem.version = Transliterator::VERSION
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: transliterator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tom-Eric Gerritsen
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-04-06 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: ! " A library for translating UTF-8 characters to their ASCII equivalents.
|
15
|
+
This\n library is based on code from the babosa gem and meant for projects that\n
|
16
|
+
\ only need the transliterator functionality.\n"
|
17
|
+
email:
|
18
|
+
- tomeric@eet.nu
|
19
|
+
executables: []
|
20
|
+
extensions: []
|
21
|
+
extra_rdoc_files: []
|
22
|
+
files:
|
23
|
+
- .gitignore
|
24
|
+
- .rspec
|
25
|
+
- .rvmrc
|
26
|
+
- .travis.yml
|
27
|
+
- .yardopts
|
28
|
+
- Gemfile
|
29
|
+
- Guardfile
|
30
|
+
- LICENSE
|
31
|
+
- README.md
|
32
|
+
- Rakefile
|
33
|
+
- lib/transliterator.rb
|
34
|
+
- lib/transliterator/base.rb
|
35
|
+
- lib/transliterator/cyrillic.rb
|
36
|
+
- lib/transliterator/greek.rb
|
37
|
+
- lib/transliterator/latin.rb
|
38
|
+
- lib/transliterator/text.rb
|
39
|
+
- lib/transliterator/version.rb
|
40
|
+
- spec/spec_helper.rb
|
41
|
+
- spec/support/shared/base_transliterator.rb
|
42
|
+
- spec/support/shared/cyrillic_transilterator.rb
|
43
|
+
- spec/support/shared/greek_translitterator.rb
|
44
|
+
- spec/support/shared/latin_transliterator.rb
|
45
|
+
- spec/transliterator/base_spec.rb
|
46
|
+
- spec/transliterator/cyrillic_spec.rb
|
47
|
+
- spec/transliterator/greek_spec.rb
|
48
|
+
- spec/transliterator/latin_spec.rb
|
49
|
+
- spec/transliterator/text_spec.rb
|
50
|
+
- spec/transliterator_spec.rb
|
51
|
+
- transliterator.gemspec
|
52
|
+
homepage: https://github.com/eet-nu/transliterator
|
53
|
+
licenses: []
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 1.8.17
|
73
|
+
signing_key:
|
74
|
+
specification_version: 3
|
75
|
+
summary: A ruby library for translating UTF-8 characters to their ASCII equivalents.
|
76
|
+
test_files:
|
77
|
+
- spec/spec_helper.rb
|
78
|
+
- spec/support/shared/base_transliterator.rb
|
79
|
+
- spec/support/shared/cyrillic_transilterator.rb
|
80
|
+
- spec/support/shared/greek_translitterator.rb
|
81
|
+
- spec/support/shared/latin_transliterator.rb
|
82
|
+
- spec/transliterator/base_spec.rb
|
83
|
+
- spec/transliterator/cyrillic_spec.rb
|
84
|
+
- spec/transliterator/greek_spec.rb
|
85
|
+
- spec/transliterator/latin_spec.rb
|
86
|
+
- spec/transliterator/text_spec.rb
|
87
|
+
- spec/transliterator_spec.rb
|
88
|
+
has_rdoc:
|