byk 0.4.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +22 -2
- data/README.md +96 -45
- data/exe/byk +51 -0
- data/ext/byk/byk.c +312 -246
- data/lib/byk/core_ext/string.rb +8 -0
- data/lib/byk/safe.rb +14 -0
- data/lib/byk/version.rb +1 -1
- data/lib/byk.rb +2 -14
- data/spec/byk_spec.rb +186 -72
- metadata +48 -17
data/spec/byk_spec.rb
CHANGED
@@ -1,126 +1,240 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
3
2
|
require "spec_helper"
|
4
3
|
|
5
4
|
describe Byk do
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
it "has a version number" do
|
7
|
+
expect(Byk::VERSION).not_to be nil
|
8
|
+
end
|
9
|
+
|
10
|
+
shared_examples :base do |method|
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
# See http://sr.wikipedia.org/wiki/Панграм
|
13
|
+
let(:pangram) { "фијуче ветар у шибљу, леди пасаже и куће иза њих и гунђа у оџацима." }
|
14
|
+
let(:pangram_latin) { "fijuče vetar u šiblju, ledi pasaže i kuće iza njih i gunđa u odžacima." }
|
15
|
+
let(:pangram_ascii_latin) { "fijuce vetar u siblju, ledi pasaze i kuce iza njih i gundja u odzacima." }
|
15
16
|
|
16
|
-
|
17
|
-
|
17
|
+
let(:pangram_caps) { "ФИЈУЧЕ ВЕТАР У ШИБЉУ, ЛЕДИ ПАСАЖЕ И КУЋЕ ИЗА ЊИХ И ГУНЂА У ОЏАЦИМА." }
|
18
|
+
let(:pangram_latin_caps) { "FIJUČE VETAR U ŠIBLJU, LEDI PASAŽE I KUĆE IZA NJIH I GUNĐA U ODŽACIMA." }
|
19
|
+
let(:pangram_ascii_latin_caps) { "FIJUCE VETAR U SIBLJU, LEDI PASAZE I KUCE IZA NJIH I GUNDJA U ODZACIMA." }
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
21
|
+
let(:full_cyrillic_coderange) { (0x400..0x4ff).map { |i| i.chr(Encoding::UTF_8) } }
|
22
|
+
let(:non_serbian_cyrillic_coderange) { full_cyrillic_coderange - Byk::AZBUKA - Byk::AZBUKA_CAPS }
|
23
|
+
let(:non_serbian_cyrillic) { non_serbian_cyrillic_coderange.join }
|
22
24
|
|
23
|
-
|
24
|
-
|
25
|
+
let(:ascii) { "The quick brown fox jumps over the lazy dog." }
|
26
|
+
let(:other) { "संस्कृतम्" }
|
27
|
+
|
28
|
+
let(:mixed) { "संस्कृतम् илити Sanskrit, obrati ПАЖЊУ." }
|
29
|
+
let(:mixed_cyrillic) { "संस्कृतम् илити Санскрит, обрати ПАЖЊУ." }
|
30
|
+
let(:mixed_latin) { "संस्कृतम् iliti Sanskrit, obrati PAŽNJU." }
|
31
|
+
let(:mixed_ascii_latin) { "संस्कृतम् iliti Sanskrit, obrati PAZNJU." }
|
32
|
+
|
33
|
+
it "doesn't translate an empty string" do
|
34
|
+
expect(Byk.send(method, "")).to eq ""
|
35
|
+
end
|
36
|
+
|
37
|
+
it "doesn't translate foreign coderanges" do
|
38
|
+
expect(Byk.send(method, other)).to eq other
|
39
|
+
end
|
25
40
|
end
|
26
41
|
|
27
|
-
|
42
|
+
shared_examples :cyrillization_method do |method|
|
43
|
+
include_examples :base, method
|
28
44
|
|
29
|
-
|
30
|
-
|
45
|
+
let(:edge_cases) do
|
46
|
+
[
|
47
|
+
["lJ", "љ"],
|
48
|
+
["nJ", "њ"],
|
49
|
+
["dŽ", "џ"]
|
50
|
+
]
|
31
51
|
end
|
32
52
|
|
33
|
-
it "doesn't
|
34
|
-
expect(
|
53
|
+
it "doesn't translate Cyrillic" do
|
54
|
+
expect(Byk.send(method, pangram)).to eq pangram
|
35
55
|
end
|
36
56
|
|
37
|
-
it "doesn't
|
38
|
-
expect(
|
57
|
+
it "doesn't translate non-Serbian Cyrillic" do
|
58
|
+
expect(Byk.send(method, non_serbian_cyrillic)).to eq non_serbian_cyrillic
|
39
59
|
end
|
40
60
|
|
41
|
-
it "
|
42
|
-
expect(
|
61
|
+
it "translates Latin to Cyrillic" do
|
62
|
+
expect(Byk.send(method, pangram_latin)).to eq pangram
|
43
63
|
end
|
44
64
|
|
45
|
-
it "
|
46
|
-
expect(
|
65
|
+
it "translates Latin caps to Cyrillic caps" do
|
66
|
+
expect(Byk.send(method, pangram_latin_caps)).to eq pangram_caps
|
47
67
|
end
|
48
68
|
|
49
|
-
it "
|
50
|
-
expect(
|
69
|
+
it "translates mixed text properly" do
|
70
|
+
expect(Byk.send(method, mixed)).to eq mixed_cyrillic
|
51
71
|
end
|
52
72
|
|
53
|
-
it "
|
54
|
-
|
73
|
+
it "translates edge cases properly" do
|
74
|
+
edge_cases.each do |input, output|
|
75
|
+
expect(Byk.send(method, input)).to eq output
|
76
|
+
end
|
55
77
|
end
|
56
78
|
|
57
|
-
it "
|
58
|
-
expect(Byk::
|
79
|
+
it "translates ABECEDA to AZBUKA" do
|
80
|
+
expect(Byk::ABECEDA.map { |l| l.dup.send(:to_cyrillic) }).to match_array(Byk::AZBUKA)
|
59
81
|
end
|
60
82
|
|
61
|
-
it "
|
62
|
-
expect(Byk::
|
83
|
+
it "translates ABECEDA_CAPS to AZBUKA_CAPS" do
|
84
|
+
expect(Byk::ABECEDA_CAPS.map { |l| l.dup.send(:to_cyrillic) }).to match_array(Byk::AZBUKA_CAPS)
|
63
85
|
end
|
64
86
|
end
|
65
87
|
|
66
|
-
|
88
|
+
shared_examples :latinization_method do |method|
|
89
|
+
include_examples :base, method
|
67
90
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
"
|
72
|
-
"
|
73
|
-
|
74
|
-
|
75
|
-
"ЊЊ" => "NJNJ",
|
76
|
-
"ЉЉ" => "LJLJ",
|
77
|
-
"ЂЂ" => "DJDJ",
|
78
|
-
"ĐĐ" => "DJDJ",
|
79
|
-
"ГУЊ" => "GUNJ",
|
80
|
-
"ПАСУЉ" => "PASULJ",
|
81
|
-
"ЂУРАЂ" => "DJURADJ",
|
82
|
-
"ĐURAĐ" => "DJURADJ",
|
83
|
-
"ĐURAĐ Đorđević" => "DJURADJ Djordjevic",
|
84
|
-
"ĐURAĐ. Đorđević" => "DJURADJ. Djordjevic"
|
85
|
-
}
|
86
|
-
}
|
91
|
+
let(:edge_cases) do
|
92
|
+
[
|
93
|
+
["ЉЉ", "LJLJ"],
|
94
|
+
["ЊЊ", "NJNJ"],
|
95
|
+
["ЏЏ", "DŽDŽ"]
|
96
|
+
]
|
97
|
+
end
|
87
98
|
|
88
|
-
it "doesn't
|
89
|
-
expect(
|
99
|
+
it "doesn't translate ASCII" do
|
100
|
+
expect(Byk.send(method, ascii)).to eq ascii
|
90
101
|
end
|
91
102
|
|
92
|
-
it "doesn't
|
93
|
-
expect(
|
103
|
+
it "doesn't translate Latin" do
|
104
|
+
expect(Byk.send(method, pangram_latin)).to eq pangram_latin
|
94
105
|
end
|
95
106
|
|
96
|
-
it "doesn't
|
97
|
-
expect(
|
107
|
+
it "doesn't translate non-Serbian Cyrillic" do
|
108
|
+
expect(Byk.send(method, non_serbian_cyrillic)).to eq non_serbian_cyrillic
|
98
109
|
end
|
99
110
|
|
100
|
-
it "
|
101
|
-
expect(pangram
|
111
|
+
it "translates Cyrillic to Latin" do
|
112
|
+
expect(Byk.send(method, pangram)).to eq pangram_latin
|
102
113
|
end
|
103
114
|
|
104
|
-
it "
|
105
|
-
expect(pangram_caps
|
115
|
+
it "translates Cyrillic caps to Latin caps" do
|
116
|
+
expect(Byk.send(method, pangram_caps)).to eq pangram_latin_caps
|
117
|
+
end
|
118
|
+
|
119
|
+
it "translates mixed text properly" do
|
120
|
+
expect(Byk.send(method, mixed)).to eq mixed_latin
|
121
|
+
end
|
122
|
+
|
123
|
+
it "translates edge cases properly" do
|
124
|
+
edge_cases.each do |input, output|
|
125
|
+
expect(Byk.send(method, input)).to eq output
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
it "translates AZBUKA to ABECEDA" do
|
130
|
+
expect(Byk::AZBUKA.map { |l| l.dup.send(method) }).to match_array(Byk::ABECEDA)
|
131
|
+
end
|
132
|
+
|
133
|
+
it "translates AZBUKA_CAPS to ABECEDA_CAPS" do
|
134
|
+
expect(Byk::AZBUKA_CAPS.map { |l| l.dup.send(method) }).to match_array(Byk::ABECEDA_CAPS)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
shared_examples :ascii_latinization_method do |method|
|
139
|
+
include_examples :base, method
|
140
|
+
|
141
|
+
let(:edge_cases) do
|
142
|
+
[
|
143
|
+
["Њ", "Nj"],
|
144
|
+
["Љ", "Lj"],
|
145
|
+
["Џ", "Dz"],
|
146
|
+
["Ђ", "Dj"],
|
147
|
+
["Đ", "Dj"],
|
148
|
+
["ЊЊ", "NJNJ"],
|
149
|
+
["ЉЉ", "LJLJ"],
|
150
|
+
["ЏЏ", "DZDZ"],
|
151
|
+
["ЂЂ", "DJDJ"],
|
152
|
+
["ĐĐ", "DJDJ"],
|
153
|
+
["ЂУРАЂ Ђурђевић", "DJURADJ Djurdjevic"],
|
154
|
+
["ĐURAĐ Đurđević", "DJURADJ Djurdjevic"]
|
155
|
+
]
|
106
156
|
end
|
107
157
|
|
108
|
-
it "
|
109
|
-
expect(
|
158
|
+
it "doesn't translate ASCII" do
|
159
|
+
expect(Byk.send(method, ascii)).to eq ascii
|
110
160
|
end
|
111
161
|
|
112
|
-
it "
|
113
|
-
expect(
|
162
|
+
it "translates Latin to ASCII Latin" do
|
163
|
+
expect(Byk.send(method, pangram_latin)).to eq pangram_ascii_latin
|
114
164
|
end
|
115
165
|
|
116
|
-
it "
|
117
|
-
expect(
|
166
|
+
it "translates Latin caps to ASCII Latin caps" do
|
167
|
+
expect(Byk.send(method, pangram_latin_caps)).to eq pangram_ascii_latin_caps
|
118
168
|
end
|
119
169
|
|
120
|
-
it "
|
170
|
+
it "translates Cyrillic to ASCII Latin" do
|
171
|
+
expect(Byk.send(method, pangram)).to eq pangram_ascii_latin
|
172
|
+
end
|
173
|
+
|
174
|
+
it "translates Cyrillic caps to ASCII Latin caps" do
|
175
|
+
expect(Byk.send(method, pangram_caps)).to eq pangram_ascii_latin_caps
|
176
|
+
end
|
177
|
+
|
178
|
+
it "translates mixed text properly" do
|
179
|
+
expect(Byk.send(method, mixed)).to eq mixed_ascii_latin
|
180
|
+
end
|
181
|
+
|
182
|
+
it "translates edge cases properly" do
|
121
183
|
edge_cases.each do |input, output|
|
122
|
-
expect(input
|
184
|
+
expect(Byk.send(method, input)).to eq output
|
123
185
|
end
|
124
186
|
end
|
125
187
|
end
|
188
|
+
|
189
|
+
shared_examples :non_destructive_method do |method|
|
190
|
+
it "doesn't modify the arg" do
|
191
|
+
str = "ЖŽ"
|
192
|
+
expect { Byk.send(method, str) }.to_not change { str }
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
shared_examples :destructive_method do |method|
|
197
|
+
it "modifies the arg" do
|
198
|
+
str = "ЖŽ"
|
199
|
+
expect { Byk.send(method, str) }.to change { str }
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
describe ".to_cyrillic" do
|
204
|
+
it_behaves_like :cyrillization_method, :to_cyrillic
|
205
|
+
it_behaves_like :non_destructive_method, :to_cyrillic
|
206
|
+
end
|
207
|
+
|
208
|
+
describe ".to_cyrillic!" do
|
209
|
+
it_behaves_like :cyrillization_method, :to_cyrillic!
|
210
|
+
it_behaves_like :destructive_method, :to_cyrillic!
|
211
|
+
end
|
212
|
+
|
213
|
+
describe ".to_latin" do
|
214
|
+
it_behaves_like :latinization_method, :to_latin
|
215
|
+
it_behaves_like :non_destructive_method, :to_latin
|
216
|
+
end
|
217
|
+
|
218
|
+
describe ".to_latin!" do
|
219
|
+
it_behaves_like :latinization_method, :to_latin!
|
220
|
+
it_behaves_like :destructive_method, :to_latin!
|
221
|
+
end
|
222
|
+
|
223
|
+
describe ".to_ascii_latin" do
|
224
|
+
it_behaves_like :ascii_latinization_method, :to_ascii_latin
|
225
|
+
it_behaves_like :non_destructive_method, :to_ascii_latin
|
226
|
+
end
|
227
|
+
|
228
|
+
describe ".to_ascii_latin!" do
|
229
|
+
it_behaves_like :ascii_latinization_method, :to_ascii_latin!
|
230
|
+
it_behaves_like :destructive_method, :to_ascii_latin!
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
describe String do
|
235
|
+
it "responds to Byk methods" do
|
236
|
+
Byk.singleton_methods.each do |method|
|
237
|
+
expect("").to respond_to(method)
|
238
|
+
end
|
239
|
+
end
|
126
240
|
end
|
metadata
CHANGED
@@ -1,47 +1,76 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: byk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikola Topalović
|
8
|
-
autorequire:
|
9
|
-
bindir:
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '13.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '13.0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rake-compiler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
16
30
|
requirements:
|
17
31
|
- - "~>"
|
18
32
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
33
|
+
version: '1.1'
|
20
34
|
type: :development
|
21
35
|
prerelease: false
|
22
36
|
version_requirements: !ruby/object:Gem::Requirement
|
23
37
|
requirements:
|
24
38
|
- - "~>"
|
25
39
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
40
|
+
version: '1.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rspec
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - "~>"
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: '3.
|
47
|
+
version: '3.10'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: benchmark-ips
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.9'
|
34
62
|
type: :development
|
35
63
|
prerelease: false
|
36
64
|
version_requirements: !ruby/object:Gem::Requirement
|
37
65
|
requirements:
|
38
66
|
- - "~>"
|
39
67
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
41
|
-
description:
|
42
|
-
|
68
|
+
version: '2.9'
|
69
|
+
description: Fast transliteration of Serbian Cyrillic to Latin and back. Brzo preslovljavanje
|
70
|
+
ćirilice u latinicu i obratno.
|
43
71
|
email: nikola.topalovic@gmail.com
|
44
|
-
executables:
|
72
|
+
executables:
|
73
|
+
- byk
|
45
74
|
extensions:
|
46
75
|
- ext/byk/extconf.rb
|
47
76
|
extra_rdoc_files: []
|
@@ -49,16 +78,19 @@ files:
|
|
49
78
|
- CHANGELOG.md
|
50
79
|
- LICENSE
|
51
80
|
- README.md
|
81
|
+
- exe/byk
|
52
82
|
- ext/byk/byk.c
|
53
83
|
- ext/byk/extconf.rb
|
54
84
|
- lib/byk.rb
|
85
|
+
- lib/byk/core_ext/string.rb
|
86
|
+
- lib/byk/safe.rb
|
55
87
|
- lib/byk/version.rb
|
56
88
|
- spec/byk_spec.rb
|
57
89
|
homepage: https://github.com/topalovic/byk
|
58
90
|
licenses:
|
59
91
|
- MIT
|
60
92
|
metadata: {}
|
61
|
-
post_install_message:
|
93
|
+
post_install_message:
|
62
94
|
rdoc_options: []
|
63
95
|
require_paths:
|
64
96
|
- lib
|
@@ -66,17 +98,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
66
98
|
requirements:
|
67
99
|
- - ">="
|
68
100
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
101
|
+
version: 2.2.0
|
70
102
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
103
|
requirements:
|
72
104
|
- - ">="
|
73
105
|
- !ruby/object:Gem::Version
|
74
106
|
version: '0'
|
75
107
|
requirements: []
|
76
|
-
|
77
|
-
|
78
|
-
signing_key:
|
108
|
+
rubygems_version: 3.3.3
|
109
|
+
signing_key:
|
79
110
|
specification_version: 4
|
80
|
-
summary:
|
111
|
+
summary: Transliteration of Serbian Cyrillic <-> Latin
|
81
112
|
test_files:
|
82
113
|
- spec/byk_spec.rb
|