gost_translit 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/gost_translit.rb +112 -0
- data/spec/lib/gost_translit_spec.rb +131 -0
- metadata +66 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 48f17d4cc768844b03c5769e9e0c8921faf2ca6f70c3928c576ad12811cec8c3
|
4
|
+
data.tar.gz: 43ea36c1fe5485616d08c35b0c769d3b703a4d2ede713e8ea96af8ec0e91f1fd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e55e9989a6eee8192fc1a550748982ef7b38b2e09fc101ce7bafde406f1acf294bab6ace6f2e55a096307f52ff0cab699f6605eec986833fb5ecbbe8277583f6
|
7
|
+
data.tar.gz: b9b5210af713249fb3dd984df0aa9536eca742bc98043e92f8c265b539dd77e7afb55711993f77f4ac5409b96bb4e52b84ecb69998bfd8f82d92b5e922f18fdf
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GostTranslit
|
4
|
+
UPPER_REGEXP = /[[:upper:]]/
|
5
|
+
LOWER_REGEXP = /[[:lower:]]/
|
6
|
+
|
7
|
+
RU_MAPPING = {
|
8
|
+
'а': 'a',
|
9
|
+
'б': 'b',
|
10
|
+
'в': 'v',
|
11
|
+
'г': 'g',
|
12
|
+
'д': 'd',
|
13
|
+
'е': 'e',
|
14
|
+
'ё': 'yo',
|
15
|
+
'ж': 'zh',
|
16
|
+
'з': 'z',
|
17
|
+
'и': 'i',
|
18
|
+
'й': 'j',
|
19
|
+
'к': 'k',
|
20
|
+
'л': 'l',
|
21
|
+
'м': 'm',
|
22
|
+
'н': 'n',
|
23
|
+
'о': 'o',
|
24
|
+
'п': 'p',
|
25
|
+
'р': 'r',
|
26
|
+
'с': 's',
|
27
|
+
'т': 't',
|
28
|
+
'у': 'u',
|
29
|
+
'ф': 'f',
|
30
|
+
'х': 'x',
|
31
|
+
'ц': 'cz',
|
32
|
+
'ч': 'ch',
|
33
|
+
'ш': 'sh',
|
34
|
+
'щ': 'shh',
|
35
|
+
'ъ': '``',
|
36
|
+
'ы': 'y`',
|
37
|
+
'ь': '`',
|
38
|
+
'э': 'e`',
|
39
|
+
'ю': 'yu',
|
40
|
+
'я': 'ya'
|
41
|
+
}
|
42
|
+
|
43
|
+
LATIN_REPLACING_MAPPING = {
|
44
|
+
'shh' => 'щ',
|
45
|
+
'sh' => 'ш',
|
46
|
+
'yu' => 'ю',
|
47
|
+
'ya' => 'я',
|
48
|
+
'``' => 'ъ',
|
49
|
+
'y`' => 'ы',
|
50
|
+
'e`' => 'э',
|
51
|
+
'ch' => 'ч',
|
52
|
+
'cz' => 'ц',
|
53
|
+
'zh' => 'ж',
|
54
|
+
'yo' => 'ё'
|
55
|
+
}
|
56
|
+
|
57
|
+
LATIN_MAPPING = Hash[
|
58
|
+
GostTranslit::RU_MAPPING.invert.collect { |k, v| [ k.to_s, v.to_s ] }
|
59
|
+
].merge!('c' => 'ц')
|
60
|
+
|
61
|
+
class << self
|
62
|
+
def to_latin(string)
|
63
|
+
words = string.split(' ')
|
64
|
+
|
65
|
+
words.map! do |word|
|
66
|
+
translit_word = word.downcase
|
67
|
+
.split('')
|
68
|
+
.map { |l| RU_MAPPING[l.to_sym] || l}
|
69
|
+
.join
|
70
|
+
|
71
|
+
translit_word.gsub!(/(cz)(?=[i|e|j|y])/, 'c')
|
72
|
+
apply_capitalize_rules(word, translit_word)
|
73
|
+
end.join(' ')
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_cyrillic(string)
|
77
|
+
words = string.split(' ')
|
78
|
+
|
79
|
+
words.map! do |word|
|
80
|
+
translit_word = word.downcase
|
81
|
+
LATIN_REPLACING_MAPPING.each { |k,v| translit_word.gsub!(k, v) }
|
82
|
+
|
83
|
+
translit_word = translit_word.split('')
|
84
|
+
.map! { |l| LATIN_MAPPING[l] || l }
|
85
|
+
.join
|
86
|
+
|
87
|
+
apply_capitalize_rules(word, translit_word)
|
88
|
+
end.join(' ')
|
89
|
+
end
|
90
|
+
|
91
|
+
def convert(string)
|
92
|
+
language(string) == :rus ? to_latin(string) : to_cyrillic(string)
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
def language(string)
|
98
|
+
string.scan(/\w+/).empty? ? :rus : :eng
|
99
|
+
end
|
100
|
+
|
101
|
+
def apply_capitalize_rules(word, translit_word)
|
102
|
+
case
|
103
|
+
when UPPER_REGEXP.match?(word[0]) && word.match?(LOWER_REGEXP)
|
104
|
+
translit_word.capitalize
|
105
|
+
when !word.match?(LOWER_REGEXP)
|
106
|
+
translit_word.upcase
|
107
|
+
else
|
108
|
+
translit_word
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rspec'
|
4
|
+
require 'gost_translit'
|
5
|
+
|
6
|
+
RSpec.describe GostTranslit do
|
7
|
+
subject { GostTranslit }
|
8
|
+
|
9
|
+
let(:latin_str) { 'Xot` tyazhelo podchas v nej bremya' }
|
10
|
+
let(:cyrillic_str) { 'Хоть тяжело подчас в ней бремя' }
|
11
|
+
|
12
|
+
let(:cyrillic_text) do
|
13
|
+
text = <<~HEREDOC
|
14
|
+
Хоть тяжело подчас в ней бремя,
|
15
|
+
Телега на ходу легка;
|
16
|
+
Ямщик лихой, седое время,
|
17
|
+
Везет, не слезет с облучка.
|
18
|
+
|
19
|
+
С утра садимся мы в телегу;
|
20
|
+
Мы рады голову сломать
|
21
|
+
И, презирая лень и негу,
|
22
|
+
Кричим: пошел! Ебёна мать!
|
23
|
+
|
24
|
+
Но в полдень нет уж той отваги;
|
25
|
+
Порастрясло нас; нам страшней
|
26
|
+
И косогоры и овраги;
|
27
|
+
Кричим: полегче, дуралей!
|
28
|
+
|
29
|
+
Катит по-прежнему телега;
|
30
|
+
Под вечер мы привыкли к ней
|
31
|
+
И, дремля, едем до ночлега —
|
32
|
+
А время гонит лошадей.
|
33
|
+
HEREDOC
|
34
|
+
|
35
|
+
text.gsub(/\s+/, ' ').strip
|
36
|
+
end
|
37
|
+
|
38
|
+
let(:latin_text) do
|
39
|
+
text = <<~HEREDOC
|
40
|
+
Xot` tyazhelo podchas v nej bremya,
|
41
|
+
Telega na xodu legka;
|
42
|
+
Yamshhik lixoj, sedoe vremya,
|
43
|
+
Vezet, ne slezet s obluchka.
|
44
|
+
|
45
|
+
S utra sadimsya my` v telegu;
|
46
|
+
My` rady` golovu slomat`
|
47
|
+
I, preziraya len` i negu,
|
48
|
+
Krichim: poshel! Ebyona mat`!
|
49
|
+
|
50
|
+
No v polden` net uzh toj otvagi;
|
51
|
+
Porastryaslo nas; nam strashnej
|
52
|
+
I kosogory` i ovragi;
|
53
|
+
Krichim: polegche, duralej!
|
54
|
+
|
55
|
+
Katit po-prezhnemu telega;
|
56
|
+
Pod vecher my` privy`kli k nej
|
57
|
+
I, dremlya, edem do nochlega —
|
58
|
+
A vremya gonit loshadej.
|
59
|
+
HEREDOC
|
60
|
+
|
61
|
+
text.gsub(/\s+/, ' ').strip
|
62
|
+
end
|
63
|
+
|
64
|
+
describe '.to_cyrillic' do
|
65
|
+
it 'translit text to cyrillic' do
|
66
|
+
expect(subject.to_cyrillic(latin_str)).to eq(cyrillic_str)
|
67
|
+
end
|
68
|
+
|
69
|
+
context '"c" letter' do
|
70
|
+
it 'changed to "ц" when next letter "e"' do
|
71
|
+
expect(subject.to_cyrillic('celoe')).to eq('целое')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'changed to "ц" when next letter "i"' do
|
75
|
+
expect(subject.to_cyrillic('citadel`')).to eq('цитадель')
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'changed to "ц" when next letter "y"' do
|
79
|
+
expect(subject.to_cyrillic('cyurix')).to eq('цюрих')
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'changed to "ц" when next letter "j"' do
|
83
|
+
expect(subject.to_cyrillic('cj')).to eq('цй')
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'changed to "ц" from "cz"' do
|
87
|
+
expect(subject.to_cyrillic('czaplya')).to eq('цапля')
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
describe '.to_latin' do
|
93
|
+
it 'translit text to latin' do
|
94
|
+
expect(subject.to_latin(cyrillic_str)).to eq(latin_str)
|
95
|
+
end
|
96
|
+
|
97
|
+
context '"ц" letter' do
|
98
|
+
it 'changed to "c" when next letter "e"' do
|
99
|
+
expect(subject.to_latin('целое')).to eq('celoe')
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'changed to "c" when next letter "i"' do
|
103
|
+
expect(subject.to_latin('цитадель')).to eq('citadel`')
|
104
|
+
end
|
105
|
+
|
106
|
+
it 'changed to "c" when next letter "y"' do
|
107
|
+
expect(subject.to_latin('цюрих')).to eq('cyurix')
|
108
|
+
end
|
109
|
+
|
110
|
+
it 'changed to "c" when next letter "j"' do
|
111
|
+
expect(subject.to_latin('цй')).to eq('cj')
|
112
|
+
end
|
113
|
+
|
114
|
+
it 'changed to "cz"' do
|
115
|
+
expect(subject.to_latin('цапля')).to eq('czaplya')
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe '.translit' do
|
121
|
+
it 'translit latin text to cyrillic' do
|
122
|
+
puts subject.translit(latin_text)
|
123
|
+
puts cyrillic_text
|
124
|
+
expect(subject.translit(latin_text)).to eq(cyrillic_text)
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'translit cyrillic text to latin' do
|
128
|
+
expect(subject.translit(cyrillic_text)).to eq(latin_text)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gost_translit
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Viacheslav Soldatov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-02-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.7'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.7.0
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '2.7'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.7.0
|
33
|
+
description:
|
34
|
+
email:
|
35
|
+
- syntaxys0dll@gmail.com
|
36
|
+
executables: []
|
37
|
+
extensions: []
|
38
|
+
extra_rdoc_files: []
|
39
|
+
files:
|
40
|
+
- lib/gost_translit.rb
|
41
|
+
- spec/lib/gost_translit_spec.rb
|
42
|
+
homepage: https://github.com/Syntaxys-dll/gost_7_79_2000_b_translit
|
43
|
+
licenses:
|
44
|
+
- MIT
|
45
|
+
metadata: {}
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options: []
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
requirements: []
|
61
|
+
rubyforge_project:
|
62
|
+
rubygems_version: 2.7.6.2
|
63
|
+
signing_key:
|
64
|
+
specification_version: 4
|
65
|
+
summary: GOST 7.79-2000 type b transliteration
|
66
|
+
test_files: []
|