transliterator 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/.rvmrc ADDED
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
+ # development environment upon cd'ing into the directory
5
+
6
+ # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
+ # Only full ruby name is supported here, for short names use:
8
+ # echo "rvm use 1.9.3" > .rvmrc
9
+ environment_id="ruby-1.9.3-p125@transliterator"
10
+
11
+ # Uncomment the following lines if you want to verify rvm version per project
12
+ # rvmrc_rvm_version="1.10.3" # 1.10.1 seams as a safe start
13
+ # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14
+ # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15
+ # return 1
16
+ # }
17
+
18
+ # First we attempt to load the desired environment directly from the environment
19
+ # file. This is very fast and efficient compared to running through the entire
20
+ # CLI and selector. If you want feedback on which environment was used then
21
+ # insert the word 'use' after --create as this triggers verbose mode.
22
+ if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24
+ then
25
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26
+ [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27
+ \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28
+ else
29
+ # If the environment file has not yet been created, use the RVM CLI to select.
30
+ rvm --create "$environment_id" || {
31
+ echo "Failed to create RVM environment '${environment_id}'."
32
+ return 1
33
+ }
34
+ fi
35
+
36
+ # If you use bundler, this might be useful to you:
37
+ if [[ -s Gemfile ]] && {
38
+ ! builtin command -v bundle >/dev/null ||
39
+ builtin command -v bundle | grep $rvm_path/bin/bundle >/dev/null
40
+ }
41
+ then
42
+ printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
43
+ gem install bundler
44
+ fi
45
+ if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
46
+ then
47
+ bundle install | grep -vE '^Using|Your bundle is complete'
48
+ fi
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - jruby-19mode
5
+ - rbx-19mode
@@ -0,0 +1,3 @@
1
+ --readme README.md
2
+ --charset utf-8
3
+ 'lib/**/*.rb' - '*.md'
data/Gemfile ADDED
@@ -0,0 +1,26 @@
1
+ source :rubygems
2
+
3
+ # Specify the project's dependencies in the gemspec:
4
+ gemspec
5
+
6
+ # Specifiy the development dependencies here:
7
+ group :development do
8
+ gem 'rake'
9
+
10
+ # Documentation:
11
+ gem 'yard'
12
+
13
+ # Testing libraries:
14
+ gem 'rspec'
15
+
16
+ # Colorful messages:
17
+ gem 'rainbow'
18
+
19
+ # Support for guard:
20
+ gem 'guard'
21
+ gem 'guard-bundler'
22
+ gem 'guard-rspec'
23
+ gem 'rb-fsevent'
24
+ gem 'rb-readline'
25
+ gem 'fuubar'
26
+ end
@@ -0,0 +1,14 @@
1
+ require 'rb-readline'
2
+
3
+ guard 'bundler' do
4
+ watch('Gemfile')
5
+ watch('transliterator.gemspec')
6
+ end
7
+
8
+ guard 'rspec', version: 2, cli: '--format Fuubar --colour' do
9
+ watch(%r{^spec/.+_spec\.rb})
10
+ watch(%r{^lib/(.+)\.rb}) { |m| "spec/#{m[1]}_spec.rb" }
11
+ watch(%r{lib/.+\.rb}) { "spec" }
12
+ watch('spec/spec_helper.rb') { "spec" }
13
+ watch(%r{^spec/support/(.+)\.rb}) { "spec" }
14
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Tom-Eric Gerritsen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,36 @@
1
+ # Transliterator [![Build Status](https://secure.travis-ci.org/eet-nu/transliterator.png)][Travis CI] [![Dependency Status](https://gemnasium.com/eet-nu/transliterator.png)][Gemnasium]
2
+
3
+
4
+ A library for translating UTF-8 characters to their ASCII equivalents. This
5
+ library is based on code from the [babosa gem][Babosa] and meant for projects
6
+ that only need the transliterating functionality.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'transliterator'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install transliterator
21
+
22
+ ## Usage
23
+
24
+ TODO: Write usage instructions here
25
+
26
+ ## Contributing
27
+
28
+ 1. Fork it
29
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
30
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
31
+ 4. Push to the branch (`git push origin my-new-feature`)
32
+ 5. Create new Pull Request
33
+
34
+ [Travis CI]: http://travis-ci.org/eet-nu/transliterator
35
+ [Gemnasium]: https://gemnasium.com/eet-nu/transliterator
36
+ [Babosa]: http://norman.github.com/babosa/
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ desc "Start a console with the Transliterator library loaded"
5
+ task :console do
6
+ exec "irb -r ./lib/transliterator"
7
+ end
8
+
9
+ # RSpec tasks:
10
+ require 'rspec/core/rake_task'
11
+ RSpec::Core::RakeTask.new(:spec)
12
+ task :default => :spec
13
+
14
+ # YARD tasks:
15
+ require 'yard'
16
+ YARD::Rake::YardocTask.new do |t|
17
+ t.files = ['lib/**/*.rb']
18
+ t.options = ['--readme', 'README.md', '--charset', 'utf-8']
19
+ end
@@ -0,0 +1,18 @@
1
+ module Transliterator
2
+ autoload :VERSION, 'transliterator/version'
3
+
4
+ autoload :Base, 'transliterator/base'
5
+ autoload :Cyrillic, 'transliterator/cyrillic'
6
+ autoload :Greek, 'transliterator/greek'
7
+ autoload :Latin, 'transliterator/latin'
8
+
9
+ autoload :Text, 'transliterator/text'
10
+
11
+ def self.transliterate(input)
12
+ Text.new(input).transliterate
13
+ end
14
+
15
+ def self.asciify(input)
16
+ transliterate(input).gsub(/[^\x00-\x7f]/u, '')
17
+ end
18
+ end
@@ -0,0 +1,82 @@
1
+ # encoding: utf-8
2
+
3
+ require 'singleton'
4
+
5
+ module Transliterator
6
+ class Base
7
+ include Singleton
8
+
9
+ APPROXIMATIONS = {
10
+ "×" => "x",
11
+ "÷" => "/",
12
+ "‐" => "-",
13
+ "‑" => "-",
14
+ "‒" => "-",
15
+ "–" => "-",
16
+ "—" => "-",
17
+ "―" => "-",
18
+ "―" => "-",
19
+ "‘" => "'",
20
+ "‛" => "'",
21
+ "“" => '"',
22
+ "”" => '"',
23
+ "„" => '"',
24
+ "‟" => '"',
25
+ '’' => "'",
26
+ # various kinds of space characters
27
+ "\xc2\xa0" => " ",
28
+ "\xe2\x80\x80" => " ",
29
+ "\xe2\x80\x81" => " ",
30
+ "\xe2\x80\x82" => " ",
31
+ "\xe2\x80\x83" => " ",
32
+ "\xe2\x80\x84" => " ",
33
+ "\xe2\x80\x85" => " ",
34
+ "\xe2\x80\x86" => " ",
35
+ "\xe2\x80\x87" => " ",
36
+ "\xe2\x80\x88" => " ",
37
+ "\xe2\x80\x89" => " ",
38
+ "\xe2\x80\x8a" => " ",
39
+ "\xe2\x81\x9f" => " ",
40
+ "\xe3\x80\x80" => " ",
41
+ }.freeze
42
+
43
+ attr_reader :approximations
44
+
45
+ # Initializes a new +Transliterator::Base+ instance.
46
+ #
47
+ # Because +Transliterator::Base+ is a singleton, you can only get an
48
+ # instance of it by calling the +#instance+ class method on it:
49
+ #
50
+ # Transliterator::Base.new # => NoMethodError: private method `new' called for Transliterator::Base:Class
51
+ # Transliterator::Base.instance # => #<Transliterator::Base:0x007f9b8c086e78>
52
+ #
53
+ # @return [Transliterator::Base] The +Transliterator::Base+ instance
54
+ def initialize
55
+ if self.class < Base
56
+ @approximations = self.class.superclass.instance.approximations.dup
57
+ else
58
+ @approximations = {}
59
+ end
60
+
61
+ self.class::APPROXIMATIONS.inject(@approximations) do |memo, object|
62
+ index = object[0].unpack("U").shift
63
+ value = object[1].unpack("C*")
64
+ memo[index] = value.length == 1 ? value[0] : value
65
+ memo
66
+ end
67
+ end
68
+
69
+ # Transliterate a given string's UTF-8 characters to their ASCII equivalants.
70
+ #
71
+ # transliterator = Transliterator::Base.instance
72
+ # transliterator.transliterate "5 × 10 ÷ 2 ‐ 5 = 20" # => "5 x 10 / 2 - 5 = 20"
73
+ #
74
+ # @return [String] The transliterated string
75
+ def transliterate(string)
76
+ string.unpack("U*")
77
+ .map { |codepoint| approximations[codepoint] || codepoint }
78
+ .flatten
79
+ .pack("U*")
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,99 @@
1
+ # encoding: utf-8
2
+ module Transliterator
3
+ class Cyrillic < Base
4
+ APPROXIMATIONS = {
5
+ "Ё" => "Yo",
6
+ "Ѓ" => "G",
7
+ "Є" => "Ye",
8
+ "Ї" => "Yi",
9
+ "Љ" => "L",
10
+ "Њ" => "N",
11
+ "Ќ" => "K",
12
+ "Ў" => "U",
13
+ "Џ" => "Dh",
14
+ "А" => "A",
15
+ "Б" => "B",
16
+ "В" => "V",
17
+ "Г" => "G",
18
+ "Д" => "D",
19
+ "Е" => "E",
20
+ "Ж" => "Zh",
21
+ "З" => "Z",
22
+ "И" => "I",
23
+ "Й" => "J",
24
+ "К" => "K",
25
+ "Л" => "L",
26
+ "М" => "M",
27
+ "Н" => "N",
28
+ "О" => "O",
29
+ "П" => "P",
30
+ "Р" => "R",
31
+ "С" => "S",
32
+ "Т" => "T",
33
+ "У" => "U",
34
+ "Ф" => "F",
35
+ "Х" => "X",
36
+ "Ц" => "Cz",
37
+ "Ч" => "Ch",
38
+ "Ш" => "Sh",
39
+ "Щ" => "Shh",
40
+ "Ъ" => "",
41
+ "Ы" => "Y",
42
+ "Ь" => "",
43
+ "Э" => "E",
44
+ "Ю" => "Yu",
45
+ "Я" => "Ya",
46
+ "а" => "a",
47
+ "б" => "b",
48
+ "в" => "v",
49
+ "г" => "g",
50
+ "д" => "d",
51
+ "е" => "e",
52
+ "ж" => "zh",
53
+ "з" => "z",
54
+ "и" => "i",
55
+ "й" => "j",
56
+ "к" => "k",
57
+ "л" => "l",
58
+ "м" => "m",
59
+ "н" => "n",
60
+ "о" => "o",
61
+ "п" => "p",
62
+ "р" => "r",
63
+ "с" => "s",
64
+ "т" => "t",
65
+ "у" => "u",
66
+ "ф" => "f",
67
+ "х" => "x",
68
+ "ц" => "cz",
69
+ "ч" => "ch",
70
+ "ш" => "sh",
71
+ "щ" => "shh",
72
+ "ъ" => "",
73
+ "ы" => "y",
74
+ "ь" => "",
75
+ "э" => "e",
76
+ "ю" => "yu",
77
+ "я" => "ya",
78
+ "ё" => "yo",
79
+ "ѓ" => "g",
80
+ "є" => "ye",
81
+ "ї" => "yi",
82
+ "љ" => "l",
83
+ "њ" => "n",
84
+ "ќ" => "k",
85
+ "ў" => "u",
86
+ "џ" => "dh",
87
+ "Ѣ" => "Ye",
88
+ "ѣ" => "ye",
89
+ "Ѫ" => "O",
90
+ "ѫ" => "o",
91
+ "Ѳ" => "Fh",
92
+ "ѳ" => "fh",
93
+ "Ѵ" => "Yh",
94
+ "ѵ" => "yh",
95
+ "Ґ" => "G",
96
+ "ґ" => "g"
97
+ }
98
+ end
99
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+ module Transliterator
3
+ class Greek < Base
4
+ APPROXIMATIONS = {
5
+ "Α" => "A",
6
+ "Ά" => "A",
7
+ "α" => "a",
8
+ "ά" => "a",
9
+ "Β" => "B",
10
+ "β" => "b",
11
+ "Γ" => "G",
12
+ "γ" => "g",
13
+ "Δ" => "D",
14
+ "δ" => "d",
15
+ "Ε" => "E",
16
+ "Έ" => "E",
17
+ "ε" => "e",
18
+ "έ" => "e",
19
+ "Ζ" => "Z",
20
+ "ζ" => "z",
21
+ "Η" => "E",
22
+ "Ή" => "e",
23
+ "η" => "e",
24
+ "ή" => "E",
25
+ "Θ" => "Th",
26
+ "θ" => "th",
27
+ "Ι" => "I",
28
+ "Ί" => "Ι",
29
+ "Î" => "I",
30
+ "ι" => "i",
31
+ "ί" => "i",
32
+ "Κ" => "K",
33
+ "κ" => "k",
34
+ "Λ" => "L",
35
+ "λ" => "l",
36
+ "Μ" => "M",
37
+ "μ" => "m",
38
+ "Ν" => "N",
39
+ "ν" => "n",
40
+ "Ξ" => "X",
41
+ "ξ" => "x",
42
+ "Ο" => "O",
43
+ "Ό" => "O",
44
+ "ο" => "o",
45
+ "ό" => "o",
46
+ "Π" => "P",
47
+ "π" => "p",
48
+ "Ρ" => "R",
49
+ "ρ" => "r",
50
+ "Σ" => "S",
51
+ "σ" => "s",
52
+ "ς" => "s",
53
+ "Τ" => "T",
54
+ "τ" => "t",
55
+ "Υ" => "I",
56
+ "Ύ" => "I",
57
+ "υ" => "i",
58
+ "ύ" => "i",
59
+ "Φ" => "F",
60
+ "φ" => "f",
61
+ "Χ" => "Ch",
62
+ "χ" => "ch",
63
+ "Ψ" => "Ps",
64
+ "ψ" => "ps",
65
+ "Ω" => "O",
66
+ "Ώ" => "O",
67
+ "ω" => "o",
68
+ "ώ" => "o"
69
+ }
70
+ end
71
+ end
@@ -0,0 +1,196 @@
1
+ # encoding: utf-8
2
+ module Transliterator
3
+ class Latin < Base
4
+ APPROXIMATIONS = {
5
+ "À" => "A",
6
+ "Á" => "A",
7
+ "Â" => "A",
8
+ "Ã" => "A",
9
+ "Ä" => "A",
10
+ "Å" => "A",
11
+ "Æ" => "Ae",
12
+ "Ç" => "C",
13
+ "È" => "E",
14
+ "É" => "E",
15
+ "Ê" => "E",
16
+ "Ë" => "E",
17
+ "Ì" => "I",
18
+ "Í" => "I",
19
+ "Î" => "I",
20
+ "Ï" => "I",
21
+ "Ð" => "D",
22
+ "Ñ" => "N",
23
+ "Ò" => "O",
24
+ "Ó" => "O",
25
+ "Ô" => "O",
26
+ "Õ" => "O",
27
+ "Ö" => "O",
28
+ "Ø" => "O",
29
+ "Ù" => "U",
30
+ "Ú" => "U",
31
+ "Û" => "U",
32
+ "Ü" => "U",
33
+ "Ý" => "Y",
34
+ "Þ" => "Th",
35
+ "ß" => "ss",
36
+ "à" => "a" ,
37
+ "á" => "a",
38
+ "â" => "a",
39
+ "ã" => "a",
40
+ "ä" => "a",
41
+ "å" => "a",
42
+ "æ" => "ae",
43
+ "ç" => "c" ,
44
+ "è" => "e",
45
+ "é" => "e",
46
+ "ê" => "e",
47
+ "ë" => "e",
48
+ "ì" => "i",
49
+ "í" => "i",
50
+ "î" => "i",
51
+ "ï" => "i",
52
+ "ð" => "d",
53
+ "ñ" => "n",
54
+ "ò" => "o",
55
+ "ó" => "o",
56
+ "ô" => "o",
57
+ "õ" => "o",
58
+ "ö" => "o",
59
+ "ø" => "o",
60
+ "ù" => "u",
61
+ "ú" => "u",
62
+ "û" => "u",
63
+ "ü" => "u",
64
+ "ý" => "y",
65
+ "þ" => "th",
66
+ "ÿ" => "y",
67
+ "Ā" => "A",
68
+ "Ă" => "A",
69
+ "Ą" => "A",
70
+ "Ć" => "C",
71
+ "Ĉ" => "C",
72
+ "Ċ" => "C",
73
+ "Č" => "C",
74
+ "Ď" => "D",
75
+ "Đ" => "D",
76
+ "Ē" => "E",
77
+ "Ĕ" => "E",
78
+ "Ė" => "E",
79
+ "Ę" => "E",
80
+ "Ě" => "E",
81
+ "Ĝ" => "G",
82
+ "Ğ" => "G",
83
+ "Ġ" => "G",
84
+ "Ģ" => "G",
85
+ "Ĥ" => "H",
86
+ "Ħ" => "H",
87
+ "Ĩ" => "I",
88
+ "Ī" => "I",
89
+ "Ĭ" => "I",
90
+ "Į" => "I",
91
+ "İ" => "I",
92
+ "IJ" => "Ij",
93
+ "Ĵ" => "J",
94
+ "Ķ" => "K",
95
+ "Ĺ" => "L",
96
+ "Ļ" => "L",
97
+ "Ľ" => "L",
98
+ "Ŀ" => "L",
99
+ "Ł" => "L",
100
+ "Ń" => "N",
101
+ "Ņ" => "N",
102
+ "Ň" => "N",
103
+ "Ŋ" => "Ng",
104
+ "Ō" => "O",
105
+ "Ŏ" => "O",
106
+ "Ő" => "O",
107
+ "Œ" => "OE",
108
+ "Ŕ" => "R",
109
+ "Ŗ" => "R",
110
+ "Ř" => "R",
111
+ "Ś" => "S",
112
+ "Ŝ" => "S",
113
+ "Ş" => "S",
114
+ "Š" => "S",
115
+ "Ţ" => "T",
116
+ "Ť" => "T",
117
+ "Ŧ" => "T",
118
+ "Ũ" => "U",
119
+ "Ū" => "U",
120
+ "Ŭ" => "U",
121
+ "Ů" => "U",
122
+ "Ű" => "U",
123
+ "Ų" => "U",
124
+ "Ŵ" => "W",
125
+ "Ŷ" => "Y",
126
+ "Ÿ" => "Y",
127
+ "Ź" => "Z",
128
+ "Ż" => "Z",
129
+ "Ž" => "Z",
130
+ "ā" => "a",
131
+ "ă" => "a",
132
+ "ą" => "a",
133
+ "ć" => "c",
134
+ "ĉ" => "c",
135
+ "ċ" => "c",
136
+ "č" => "c",
137
+ "ď" => "d",
138
+ "đ" => "d",
139
+ "ē" => "e",
140
+ "ĕ" => "e",
141
+ "ė" => "e",
142
+ "ę" => "e",
143
+ "ě" => "e",
144
+ "ĝ" => "g",
145
+ "ğ" => "g",
146
+ "ġ" => "g",
147
+ "ģ" => "g",
148
+ "ĥ" => "h",
149
+ "ħ" => "h",
150
+ "ĩ" => "i",
151
+ "ī" => "i",
152
+ "ĭ" => "i",
153
+ "į" => "i",
154
+ "ı" => "i",
155
+ "ij" => "ij",
156
+ "ĵ" => "j",
157
+ "ķ" => "k",
158
+ "ĸ" => "k",
159
+ "ĺ" => "l",
160
+ "ļ" => "l",
161
+ "ľ" => "l",
162
+ "ŀ" => "l",
163
+ "ł" => "l",
164
+ "ń" => "n",
165
+ "ņ" => "n",
166
+ "ň" => "n",
167
+ "ʼn" => "n",
168
+ "ŋ" => "ng",
169
+ "ō" => "o",
170
+ "ŏ" => "o",
171
+ "ő" => "o",
172
+ "œ" => "oe",
173
+ "ŕ" => "r",
174
+ "ŗ" => "r",
175
+ "ř" => "r",
176
+ "ś" => "s",
177
+ "ŝ" => "s",
178
+ "ş" => "s",
179
+ "š" => "s",
180
+ "ţ" => "t",
181
+ "ť" => "t",
182
+ "ŧ" => "t",
183
+ "ũ" => "u",
184
+ "ū" => "u",
185
+ "ŭ" => "u",
186
+ "ů" => "u",
187
+ "ű" => "u",
188
+ "ų" => "u",
189
+ "ŵ" => "w",
190
+ "ŷ" => "y",
191
+ "ž" => "z",
192
+ "ź" => "z",
193
+ "ż" => "z"
194
+ }
195
+ end
196
+ end
@@ -0,0 +1,18 @@
1
+ module Transliterator
2
+ class Text
3
+ attr_reader :string
4
+
5
+ def initialize(string)
6
+ @string = string
7
+ end
8
+
9
+ def transliterate(*transliterators)
10
+ transliterators << Latin.instance if transliterators.empty?
11
+ transliterated = string.dup
12
+ transliterators.each do |transliterator|
13
+ transliterated = transliterator.transliterate(transliterated)
14
+ end
15
+ transliterated
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,3 @@
1
+ module Transliterator
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ require File.expand_path('../../lib/transliterator', __FILE__)
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
9
+
10
+ RSpec.configure do |config|
11
+ config.treat_symbols_as_metadata_keys_with_true_values = true
12
+ config.run_all_when_everything_filtered = true
13
+ config.filter_run :focus
14
+ config.mock_with :rspec
15
+ end
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+
3
+ shared_examples_for 'a base transliterator' do
4
+ subject { described_class.instance }
5
+
6
+ it 'is a singleton' do
7
+ subject.should be_a Transliterator::Base
8
+ expect { described_class.new }.to raise_error(NoMethodError)
9
+ end
10
+
11
+ it 'transliterates math related characters' do
12
+ input = '5 × 10 ÷ 2 ‐ 5 = 20'
13
+ output = '5 x 10 / 2 - 5 = 20'
14
+
15
+ subject.transliterate(input).should == output
16
+ end
17
+
18
+ it 'transliterates "smart" quotes' do
19
+ input = '‛This‘ „is a quote”'
20
+ output = '\'This\' "is a quote"'
21
+
22
+ subject.transliterate(input).should == output
23
+ end
24
+
25
+ it 'transliterates non-breaking spaces' do
26
+ input = "\xc2\xa0"
27
+ output = ' '
28
+
29
+ subject.transliterate(input).should == output
30
+ end
31
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ shared_examples_for 'a cyrillic transliterator' do
4
+ subject { described_class.instance }
5
+
6
+ it 'transliterates cyrillic characters' do
7
+ input = "Славься, Отечество наше свободное"
8
+ output = "Slavsya, Otechestvo nashe svobodnoe"
9
+
10
+ subject.transliterate(input).should == output
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ shared_examples_for 'a greek transliterator' do
4
+ subject { described_class.instance }
5
+
6
+ it 'transliterates greek characters' do
7
+ input = "Γερμανία"
8
+ output = "Germania"
9
+
10
+ subject.transliterate(input).should == output
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ shared_examples_for 'a latin transliterator' do
4
+ subject { described_class.instance }
5
+
6
+ it 'transliterates latin characters' do
7
+ input = 'cuī dōnō lĕpĭdūm nŏvūm lĭbēllŭm'
8
+ output = 'cui dono lepidum novum libellum'
9
+
10
+ subject.transliterate(input).should == output
11
+ end
12
+ end
@@ -0,0 +1,9 @@
1
+ # encoding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Transliterator::Base do
6
+ subject { Transliterator::Base.instance }
7
+
8
+ it_behaves_like 'a base transliterator'
9
+ end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Transliterator::Cyrillic do
6
+ it_behaves_like 'a base transliterator'
7
+ it_behaves_like 'a cyrillic transliterator'
8
+ end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Transliterator::Greek do
6
+ it_behaves_like 'a base transliterator'
7
+ it_behaves_like 'a greek transliterator'
8
+ end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Transliterator::Latin do
6
+ it_behaves_like 'a base transliterator'
7
+ it_behaves_like 'a latin transliterator'
8
+ end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+
3
+ describe Transliterator::Text do
4
+ let(:string) { "String" }
5
+
6
+ subject { Transliterator::Text.new(string) }
7
+
8
+ describe '#transliterate' do
9
+ it 'translates with the given transliterator' do
10
+ reverser = stub
11
+ reverser.should_receive(:transliterate)
12
+ .with(string)
13
+
14
+ subject.transliterate(reverser)
15
+ end
16
+
17
+ it 'returns the transliterated string' do
18
+ reverser = mock(transliterate: 'gnirtS')
19
+
20
+ subject.transliterate(reverser).should == "gnirtS"
21
+ end
22
+
23
+ it 'transliterates with Transliterator::Latin if no transliterator is given' do
24
+ Transliterator::Latin.instance
25
+ .should_receive(:transliterate)
26
+ .with(string)
27
+
28
+ subject.transliterate
29
+ end
30
+
31
+ it 'can apply multiple transliterators' do
32
+ reverser = Object.new.tap do |instance|
33
+ def instance.transliterate(string)
34
+ string.reverse
35
+ end
36
+ end
37
+ capitalizer = Object.new.tap do |instance|
38
+ def instance.transliterate(string)
39
+ string.capitalize
40
+ end
41
+ end
42
+
43
+ subject.transliterate(reverser, capitalizer).should == 'Gnirts'
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Transliterator do
6
+ describe '#asciify' do
7
+ it 'transliterates the given text and strips remaining UTF-8 characters' do
8
+ input = "cuī dōnō lĕpĭdūm nŏvūm lĭbēllŭm\nSnowman ☃\n5 × 10 ÷ 2 ‐ 5 = 20"
9
+ output = "cui dono lepidum novum libellum\nSnowman \n5 x 10 / 2 - 5 = 20"
10
+
11
+ Transliterator.asciify(input).should == output
12
+ end
13
+ end
14
+
15
+ describe '#transliterate' do
16
+ it 'transliterates the given text' do
17
+ input = "cuī dōnō lĕpĭdūm nŏvūm lĭbēllŭm"
18
+ output = "cui dono lepidum novum libellum"
19
+
20
+ Transliterator.transliterate(input).should == output
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/transliterator/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Tom-Eric Gerritsen"]
6
+ gem.email = ["tomeric@eet.nu"]
7
+ gem.description = <<-EOD
8
+ A library for translating UTF-8 characters to their ASCII equivalents. This
9
+ library is based on code from the babosa gem and meant for projects that
10
+ only need the transliterator functionality.
11
+ EOD
12
+
13
+ gem.summary = %q{A ruby library for translating UTF-8 characters to their ASCII equivalents.}
14
+ gem.homepage = "https://github.com/eet-nu/transliterator"
15
+
16
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ gem.files = `git ls-files`.split("\n")
18
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ gem.name = "transliterator"
20
+ gem.require_paths = ["lib"]
21
+ gem.version = Transliterator::VERSION
22
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: transliterator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tom-Eric Gerritsen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-04-06 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ! " A library for translating UTF-8 characters to their ASCII equivalents.
15
+ This\n library is based on code from the babosa gem and meant for projects that\n
16
+ \ only need the transliterator functionality.\n"
17
+ email:
18
+ - tomeric@eet.nu
19
+ executables: []
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - .gitignore
24
+ - .rspec
25
+ - .rvmrc
26
+ - .travis.yml
27
+ - .yardopts
28
+ - Gemfile
29
+ - Guardfile
30
+ - LICENSE
31
+ - README.md
32
+ - Rakefile
33
+ - lib/transliterator.rb
34
+ - lib/transliterator/base.rb
35
+ - lib/transliterator/cyrillic.rb
36
+ - lib/transliterator/greek.rb
37
+ - lib/transliterator/latin.rb
38
+ - lib/transliterator/text.rb
39
+ - lib/transliterator/version.rb
40
+ - spec/spec_helper.rb
41
+ - spec/support/shared/base_transliterator.rb
42
+ - spec/support/shared/cyrillic_transilterator.rb
43
+ - spec/support/shared/greek_translitterator.rb
44
+ - spec/support/shared/latin_transliterator.rb
45
+ - spec/transliterator/base_spec.rb
46
+ - spec/transliterator/cyrillic_spec.rb
47
+ - spec/transliterator/greek_spec.rb
48
+ - spec/transliterator/latin_spec.rb
49
+ - spec/transliterator/text_spec.rb
50
+ - spec/transliterator_spec.rb
51
+ - transliterator.gemspec
52
+ homepage: https://github.com/eet-nu/transliterator
53
+ licenses: []
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 1.8.17
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: A ruby library for translating UTF-8 characters to their ASCII equivalents.
76
+ test_files:
77
+ - spec/spec_helper.rb
78
+ - spec/support/shared/base_transliterator.rb
79
+ - spec/support/shared/cyrillic_transilterator.rb
80
+ - spec/support/shared/greek_translitterator.rb
81
+ - spec/support/shared/latin_transliterator.rb
82
+ - spec/transliterator/base_spec.rb
83
+ - spec/transliterator/cyrillic_spec.rb
84
+ - spec/transliterator/greek_spec.rb
85
+ - spec/transliterator/latin_spec.rb
86
+ - spec/transliterator/text_spec.rb
87
+ - spec/transliterator_spec.rb
88
+ has_rdoc: