smart_translitter 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/smart_translitter.rb +113 -0
- metadata +56 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
class SmartTranslitter
|
3
|
+
LOWER_CASE = {
|
4
|
+
"а" => ["a"],
|
5
|
+
"б" => ["b"],
|
6
|
+
"в" => ["v"],
|
7
|
+
"г" => ["g"],
|
8
|
+
"д" => ["d"],
|
9
|
+
"е" => ["e"],
|
10
|
+
"ё" => ["yo", "jo"],
|
11
|
+
"ж" => ["zh", "j"],
|
12
|
+
"з" => ["z"],
|
13
|
+
"и" => ["i"],
|
14
|
+
"й" => ["j", "jj", "y"],
|
15
|
+
"к" => ["k"],
|
16
|
+
"л" => ["l"],
|
17
|
+
"м" => ["m"],
|
18
|
+
"н" => ["n"],
|
19
|
+
"о" => ["o"],
|
20
|
+
"п" => ["p"],
|
21
|
+
"р" => ["r"],
|
22
|
+
"с" => ["s"],
|
23
|
+
"т" => ["t"],
|
24
|
+
"у" => ["u"],
|
25
|
+
"ф" => ["f"],
|
26
|
+
"х" => ["ch", "h", "kh"],
|
27
|
+
"ц" => ["c", "ts", "cz"],
|
28
|
+
"ч" => ["ch"],
|
29
|
+
"ш" => ["sh"],
|
30
|
+
"щ" => ["shh", "shch", "sc"],
|
31
|
+
"ъ" => ["\""],
|
32
|
+
"ы" => ["y"],
|
33
|
+
"ь" => ["\'"],
|
34
|
+
"э" => ["e", "eh"],
|
35
|
+
"ю" => ["ju", "yu", "iu"],
|
36
|
+
"я" => ["ja", "ya", "ia"]
|
37
|
+
}
|
38
|
+
|
39
|
+
UPPER_CASE = {
|
40
|
+
"А" => ["A"],
|
41
|
+
"Б" => ["B"],
|
42
|
+
"В" => ["V"],
|
43
|
+
"Г" => ["G"],
|
44
|
+
"Д" => ["D"],
|
45
|
+
"Е" => ["E", "Je", "Ye"],
|
46
|
+
"Ё" => ["Yo", "Jo"],
|
47
|
+
"Ж" => ["Zh", "J"],
|
48
|
+
"З" => ["Z"],
|
49
|
+
"И" => ["I"],
|
50
|
+
"Й" => ["J", "Jj", "Y"],
|
51
|
+
"К" => ["K"],
|
52
|
+
"Л" => ["L"],
|
53
|
+
"М" => ["M"],
|
54
|
+
"Н" => ["N"],
|
55
|
+
"О" => ["O"],
|
56
|
+
"П" => ["P"],
|
57
|
+
"Р" => ["R"],
|
58
|
+
"С" => ["S"],
|
59
|
+
"Т" => ["T"],
|
60
|
+
"У" => ["U"],
|
61
|
+
"Ф" => ["F"],
|
62
|
+
"Х" => ["Ch", "H", "Kh"],
|
63
|
+
"Ц" => ["C", "Ts", "Cz"],
|
64
|
+
"Ч" => ["Ch"],
|
65
|
+
"Ш" => ["Sh"],
|
66
|
+
"Щ" => ["Shh", "Shch", "Sc"],
|
67
|
+
"Ъ" => ["\""],
|
68
|
+
"Ы" => ["Y"],
|
69
|
+
"Ь" => ["\'"],
|
70
|
+
"Э" => ["E", "Eh"],
|
71
|
+
"Ю" => ["Ju", "Yu", "Iu"],
|
72
|
+
"Я" => ["Ja", "Ya", "Ia"]
|
73
|
+
}
|
74
|
+
|
75
|
+
def self.t(word)
|
76
|
+
|
77
|
+
# Набор финальных вариантов транслитерации строки
|
78
|
+
results = [""]
|
79
|
+
word.chars do |ch|
|
80
|
+
# Определяем регистр и выбираем набор возможных интерпреаций символа
|
81
|
+
if UPPER_CASE[ch].nil? && LOWER_CASE[ch].nil?
|
82
|
+
variants = [ch]
|
83
|
+
else
|
84
|
+
variants = !UPPER_CASE[ch].nil? ? UPPER_CASE[ch] : LOWER_CASE[ch]
|
85
|
+
end
|
86
|
+
|
87
|
+
# Для каждой возможной интерпретации символа копируем текущий набор вариантов интерпретации всей строки
|
88
|
+
additional_results = []
|
89
|
+
results.each do |result|
|
90
|
+
(variants.size - 1).times do
|
91
|
+
str_copy = result.dup
|
92
|
+
additional_results << str_copy
|
93
|
+
end
|
94
|
+
end
|
95
|
+
results += additional_results
|
96
|
+
|
97
|
+
# Набор обновленных вариантов интепретации
|
98
|
+
new_results = []
|
99
|
+
# Добавляем к каждому варианту возможную интпретацию символа
|
100
|
+
index = 0
|
101
|
+
results.each_slice(results.size / variants.size) do |set|
|
102
|
+
set.each do |result|
|
103
|
+
result += variants[index]
|
104
|
+
new_results << result
|
105
|
+
end
|
106
|
+
index += 1
|
107
|
+
end
|
108
|
+
results = new_results
|
109
|
+
end
|
110
|
+
results
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: smart_translitter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Stanislav Mekhonoshin
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-10 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &2151909640 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2151909640
|
25
|
+
description: Translits doubly translitting russian chars
|
26
|
+
email: ejabberd@gmail.com
|
27
|
+
executables: []
|
28
|
+
extensions: []
|
29
|
+
extra_rdoc_files: []
|
30
|
+
files:
|
31
|
+
- lib/smart_translitter.rb
|
32
|
+
homepage: http://rubygems.org/gems/smart_translitter
|
33
|
+
licenses: []
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options: []
|
36
|
+
require_paths:
|
37
|
+
- lib
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
45
|
+
none: false
|
46
|
+
requirements:
|
47
|
+
- - ! '>='
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubyforge_project:
|
52
|
+
rubygems_version: 1.8.17
|
53
|
+
signing_key:
|
54
|
+
specification_version: 3
|
55
|
+
summary: Smart cyrillic2latin translitter!
|
56
|
+
test_files: []
|