gost_translit 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/gost_translit.rb +112 -0
- data/spec/lib/gost_translit_spec.rb +131 -0
- metadata +66 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 48f17d4cc768844b03c5769e9e0c8921faf2ca6f70c3928c576ad12811cec8c3
|
4
|
+
data.tar.gz: 43ea36c1fe5485616d08c35b0c769d3b703a4d2ede713e8ea96af8ec0e91f1fd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e55e9989a6eee8192fc1a550748982ef7b38b2e09fc101ce7bafde406f1acf294bab6ace6f2e55a096307f52ff0cab699f6605eec986833fb5ecbbe8277583f6
|
7
|
+
data.tar.gz: b9b5210af713249fb3dd984df0aa9536eca742bc98043e92f8c265b539dd77e7afb55711993f77f4ac5409b96bb4e52b84ecb69998bfd8f82d92b5e922f18fdf
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GostTranslit
|
4
|
+
UPPER_REGEXP = /[[:upper:]]/
|
5
|
+
LOWER_REGEXP = /[[:lower:]]/
|
6
|
+
|
7
|
+
RU_MAPPING = {
|
8
|
+
'а': 'a',
|
9
|
+
'б': 'b',
|
10
|
+
'в': 'v',
|
11
|
+
'г': 'g',
|
12
|
+
'д': 'd',
|
13
|
+
'е': 'e',
|
14
|
+
'ё': 'yo',
|
15
|
+
'ж': 'zh',
|
16
|
+
'з': 'z',
|
17
|
+
'и': 'i',
|
18
|
+
'й': 'j',
|
19
|
+
'к': 'k',
|
20
|
+
'л': 'l',
|
21
|
+
'м': 'm',
|
22
|
+
'н': 'n',
|
23
|
+
'о': 'o',
|
24
|
+
'п': 'p',
|
25
|
+
'р': 'r',
|
26
|
+
'с': 's',
|
27
|
+
'т': 't',
|
28
|
+
'у': 'u',
|
29
|
+
'ф': 'f',
|
30
|
+
'х': 'x',
|
31
|
+
'ц': 'cz',
|
32
|
+
'ч': 'ch',
|
33
|
+
'ш': 'sh',
|
34
|
+
'щ': 'shh',
|
35
|
+
'ъ': '``',
|
36
|
+
'ы': 'y`',
|
37
|
+
'ь': '`',
|
38
|
+
'э': 'e`',
|
39
|
+
'ю': 'yu',
|
40
|
+
'я': 'ya'
|
41
|
+
}
|
42
|
+
|
43
|
+
LATIN_REPLACING_MAPPING = {
|
44
|
+
'shh' => 'щ',
|
45
|
+
'sh' => 'ш',
|
46
|
+
'yu' => 'ю',
|
47
|
+
'ya' => 'я',
|
48
|
+
'``' => 'ъ',
|
49
|
+
'y`' => 'ы',
|
50
|
+
'e`' => 'э',
|
51
|
+
'ch' => 'ч',
|
52
|
+
'cz' => 'ц',
|
53
|
+
'zh' => 'ж',
|
54
|
+
'yo' => 'ё'
|
55
|
+
}
|
56
|
+
|
57
|
+
LATIN_MAPPING = Hash[
|
58
|
+
GostTranslit::RU_MAPPING.invert.collect { |k, v| [ k.to_s, v.to_s ] }
|
59
|
+
].merge!('c' => 'ц')
|
60
|
+
|
61
|
+
class << self
|
62
|
+
def to_latin(string)
|
63
|
+
words = string.split(' ')
|
64
|
+
|
65
|
+
words.map! do |word|
|
66
|
+
translit_word = word.downcase
|
67
|
+
.split('')
|
68
|
+
.map { |l| RU_MAPPING[l.to_sym] || l}
|
69
|
+
.join
|
70
|
+
|
71
|
+
translit_word.gsub!(/(cz)(?=[i|e|j|y])/, 'c')
|
72
|
+
apply_capitalize_rules(word, translit_word)
|
73
|
+
end.join(' ')
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_cyrillic(string)
|
77
|
+
words = string.split(' ')
|
78
|
+
|
79
|
+
words.map! do |word|
|
80
|
+
translit_word = word.downcase
|
81
|
+
LATIN_REPLACING_MAPPING.each { |k,v| translit_word.gsub!(k, v) }
|
82
|
+
|
83
|
+
translit_word = translit_word.split('')
|
84
|
+
.map! { |l| LATIN_MAPPING[l] || l }
|
85
|
+
.join
|
86
|
+
|
87
|
+
apply_capitalize_rules(word, translit_word)
|
88
|
+
end.join(' ')
|
89
|
+
end
|
90
|
+
|
91
|
+
def convert(string)
|
92
|
+
language(string) == :rus ? to_latin(string) : to_cyrillic(string)
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
def language(string)
|
98
|
+
string.scan(/\w+/).empty? ? :rus : :eng
|
99
|
+
end
|
100
|
+
|
101
|
+
def apply_capitalize_rules(word, translit_word)
|
102
|
+
case
|
103
|
+
when UPPER_REGEXP.match?(word[0]) && word.match?(LOWER_REGEXP)
|
104
|
+
translit_word.capitalize
|
105
|
+
when !word.match?(LOWER_REGEXP)
|
106
|
+
translit_word.upcase
|
107
|
+
else
|
108
|
+
translit_word
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rspec'
|
4
|
+
require 'gost_translit'
|
5
|
+
|
6
|
+
RSpec.describe GostTranslit do
|
7
|
+
subject { GostTranslit }
|
8
|
+
|
9
|
+
let(:latin_str) { 'Xot` tyazhelo podchas v nej bremya' }
|
10
|
+
let(:cyrillic_str) { 'Хоть тяжело подчас в ней бремя' }
|
11
|
+
|
12
|
+
let(:cyrillic_text) do
|
13
|
+
text = <<~HEREDOC
|
14
|
+
Хоть тяжело подчас в ней бремя,
|
15
|
+
Телега на ходу легка;
|
16
|
+
Ямщик лихой, седое время,
|
17
|
+
Везет, не слезет с облучка.
|
18
|
+
|
19
|
+
С утра садимся мы в телегу;
|
20
|
+
Мы рады голову сломать
|
21
|
+
И, презирая лень и негу,
|
22
|
+
Кричим: пошел! Ебёна мать!
|
23
|
+
|
24
|
+
Но в полдень нет уж той отваги;
|
25
|
+
Порастрясло нас; нам страшней
|
26
|
+
И косогоры и овраги;
|
27
|
+
Кричим: полегче, дуралей!
|
28
|
+
|
29
|
+
Катит по-прежнему телега;
|
30
|
+
Под вечер мы привыкли к ней
|
31
|
+
И, дремля, едем до ночлега —
|
32
|
+
А время гонит лошадей.
|
33
|
+
HEREDOC
|
34
|
+
|
35
|
+
text.gsub(/\s+/, ' ').strip
|
36
|
+
end
|
37
|
+
|
38
|
+
let(:latin_text) do
|
39
|
+
text = <<~HEREDOC
|
40
|
+
Xot` tyazhelo podchas v nej bremya,
|
41
|
+
Telega na xodu legka;
|
42
|
+
Yamshhik lixoj, sedoe vremya,
|
43
|
+
Vezet, ne slezet s obluchka.
|
44
|
+
|
45
|
+
S utra sadimsya my` v telegu;
|
46
|
+
My` rady` golovu slomat`
|
47
|
+
I, preziraya len` i negu,
|
48
|
+
Krichim: poshel! Ebyona mat`!
|
49
|
+
|
50
|
+
No v polden` net uzh toj otvagi;
|
51
|
+
Porastryaslo nas; nam strashnej
|
52
|
+
I kosogory` i ovragi;
|
53
|
+
Krichim: polegche, duralej!
|
54
|
+
|
55
|
+
Katit po-prezhnemu telega;
|
56
|
+
Pod vecher my` privy`kli k nej
|
57
|
+
I, dremlya, edem do nochlega —
|
58
|
+
A vremya gonit loshadej.
|
59
|
+
HEREDOC
|
60
|
+
|
61
|
+
text.gsub(/\s+/, ' ').strip
|
62
|
+
end
|
63
|
+
|
64
|
+
describe '.to_cyrillic' do
|
65
|
+
it 'translit text to cyrillic' do
|
66
|
+
expect(subject.to_cyrillic(latin_str)).to eq(cyrillic_str)
|
67
|
+
end
|
68
|
+
|
69
|
+
context '"c" letter' do
|
70
|
+
it 'changed to "ц" when next letter "e"' do
|
71
|
+
expect(subject.to_cyrillic('celoe')).to eq('целое')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'changed to "ц" when next letter "i"' do
|
75
|
+
expect(subject.to_cyrillic('citadel`')).to eq('цитадель')
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'changed to "ц" when next letter "y"' do
|
79
|
+
expect(subject.to_cyrillic('cyurix')).to eq('цюрих')
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'changed to "ц" when next letter "j"' do
|
83
|
+
expect(subject.to_cyrillic('cj')).to eq('цй')
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'changed to "ц" from "cz"' do
|
87
|
+
expect(subject.to_cyrillic('czaplya')).to eq('цапля')
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
describe '.to_latin' do
|
93
|
+
it 'translit text to latin' do
|
94
|
+
expect(subject.to_latin(cyrillic_str)).to eq(latin_str)
|
95
|
+
end
|
96
|
+
|
97
|
+
context '"ц" letter' do
|
98
|
+
it 'changed to "c" when next letter "e"' do
|
99
|
+
expect(subject.to_latin('целое')).to eq('celoe')
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'changed to "c" when next letter "i"' do
|
103
|
+
expect(subject.to_latin('цитадель')).to eq('citadel`')
|
104
|
+
end
|
105
|
+
|
106
|
+
it 'changed to "c" when next letter "y"' do
|
107
|
+
expect(subject.to_latin('цюрих')).to eq('cyurix')
|
108
|
+
end
|
109
|
+
|
110
|
+
it 'changed to "c" when next letter "j"' do
|
111
|
+
expect(subject.to_latin('цй')).to eq('cj')
|
112
|
+
end
|
113
|
+
|
114
|
+
it 'changed to "cz"' do
|
115
|
+
expect(subject.to_latin('цапля')).to eq('czaplya')
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe '.translit' do
|
121
|
+
it 'translit latin text to cyrillic' do
|
122
|
+
puts subject.translit(latin_text)
|
123
|
+
puts cyrillic_text
|
124
|
+
expect(subject.translit(latin_text)).to eq(cyrillic_text)
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'translit cyrillic text to latin' do
|
128
|
+
expect(subject.translit(cyrillic_text)).to eq(latin_text)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gost_translit
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Viacheslav Soldatov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-02-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.7'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.7.0
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '2.7'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.7.0
|
33
|
+
description:
|
34
|
+
email:
|
35
|
+
- syntaxys0dll@gmail.com
|
36
|
+
executables: []
|
37
|
+
extensions: []
|
38
|
+
extra_rdoc_files: []
|
39
|
+
files:
|
40
|
+
- lib/gost_translit.rb
|
41
|
+
- spec/lib/gost_translit_spec.rb
|
42
|
+
homepage: https://github.com/Syntaxys-dll/gost_7_79_2000_b_translit
|
43
|
+
licenses:
|
44
|
+
- MIT
|
45
|
+
metadata: {}
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options: []
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
requirements: []
|
61
|
+
rubyforge_project:
|
62
|
+
rubygems_version: 2.7.6.2
|
63
|
+
signing_key:
|
64
|
+
specification_version: 4
|
65
|
+
summary: GOST 7.79-2000 type b transliteration
|
66
|
+
test_files: []
|