byk 0.4.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +22 -2
- data/README.md +96 -45
- data/exe/byk +51 -0
- data/ext/byk/byk.c +312 -246
- data/lib/byk/core_ext/string.rb +8 -0
- data/lib/byk/safe.rb +14 -0
- data/lib/byk/version.rb +1 -1
- data/lib/byk.rb +2 -14
- data/spec/byk_spec.rb +186 -72
- metadata +48 -17
data/spec/byk_spec.rb
CHANGED
@@ -1,126 +1,240 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
3
2
|
require "spec_helper"
|
4
3
|
|
5
4
|
describe Byk do
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
it "has a version number" do
|
7
|
+
expect(Byk::VERSION).not_to be nil
|
8
|
+
end
|
9
|
+
|
10
|
+
shared_examples :base do |method|
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
# See http://sr.wikipedia.org/wiki/Панграм
|
13
|
+
let(:pangram) { "фијуче ветар у шибљу, леди пасаже и куће иза њих и гунђа у оџацима." }
|
14
|
+
let(:pangram_latin) { "fijuče vetar u šiblju, ledi pasaže i kuće iza njih i gunđa u odžacima." }
|
15
|
+
let(:pangram_ascii_latin) { "fijuce vetar u siblju, ledi pasaze i kuce iza njih i gundja u odzacima." }
|
15
16
|
|
16
|
-
|
17
|
-
|
17
|
+
let(:pangram_caps) { "ФИЈУЧЕ ВЕТАР У ШИБЉУ, ЛЕДИ ПАСАЖЕ И КУЋЕ ИЗА ЊИХ И ГУНЂА У ОЏАЦИМА." }
|
18
|
+
let(:pangram_latin_caps) { "FIJUČE VETAR U ŠIBLJU, LEDI PASAŽE I KUĆE IZA NJIH I GUNĐA U ODŽACIMA." }
|
19
|
+
let(:pangram_ascii_latin_caps) { "FIJUCE VETAR U SIBLJU, LEDI PASAZE I KUCE IZA NJIH I GUNDJA U ODZACIMA." }
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
21
|
+
let(:full_cyrillic_coderange) { (0x400..0x4ff).map { |i| i.chr(Encoding::UTF_8) } }
|
22
|
+
let(:non_serbian_cyrillic_coderange) { full_cyrillic_coderange - Byk::AZBUKA - Byk::AZBUKA_CAPS }
|
23
|
+
let(:non_serbian_cyrillic) { non_serbian_cyrillic_coderange.join }
|
22
24
|
|
23
|
-
|
24
|
-
|
25
|
+
let(:ascii) { "The quick brown fox jumps over the lazy dog." }
|
26
|
+
let(:other) { "संस्कृतम्" }
|
27
|
+
|
28
|
+
let(:mixed) { "संस्कृतम् илити Sanskrit, obrati ПАЖЊУ." }
|
29
|
+
let(:mixed_cyrillic) { "संस्कृतम् илити Санскрит, обрати ПАЖЊУ." }
|
30
|
+
let(:mixed_latin) { "संस्कृतम् iliti Sanskrit, obrati PAŽNJU." }
|
31
|
+
let(:mixed_ascii_latin) { "संस्कृतम् iliti Sanskrit, obrati PAZNJU." }
|
32
|
+
|
33
|
+
it "doesn't translate an empty string" do
|
34
|
+
expect(Byk.send(method, "")).to eq ""
|
35
|
+
end
|
36
|
+
|
37
|
+
it "doesn't translate foreign coderanges" do
|
38
|
+
expect(Byk.send(method, other)).to eq other
|
39
|
+
end
|
25
40
|
end
|
26
41
|
|
27
|
-
|
42
|
+
shared_examples :cyrillization_method do |method|
|
43
|
+
include_examples :base, method
|
28
44
|
|
29
|
-
|
30
|
-
|
45
|
+
let(:edge_cases) do
|
46
|
+
[
|
47
|
+
["lJ", "љ"],
|
48
|
+
["nJ", "њ"],
|
49
|
+
["dŽ", "џ"]
|
50
|
+
]
|
31
51
|
end
|
32
52
|
|
33
|
-
it "doesn't
|
34
|
-
expect(
|
53
|
+
it "doesn't translate Cyrillic" do
|
54
|
+
expect(Byk.send(method, pangram)).to eq pangram
|
35
55
|
end
|
36
56
|
|
37
|
-
it "doesn't
|
38
|
-
expect(
|
57
|
+
it "doesn't translate non-Serbian Cyrillic" do
|
58
|
+
expect(Byk.send(method, non_serbian_cyrillic)).to eq non_serbian_cyrillic
|
39
59
|
end
|
40
60
|
|
41
|
-
it "
|
42
|
-
expect(
|
61
|
+
it "translates Latin to Cyrillic" do
|
62
|
+
expect(Byk.send(method, pangram_latin)).to eq pangram
|
43
63
|
end
|
44
64
|
|
45
|
-
it "
|
46
|
-
expect(
|
65
|
+
it "translates Latin caps to Cyrillic caps" do
|
66
|
+
expect(Byk.send(method, pangram_latin_caps)).to eq pangram_caps
|
47
67
|
end
|
48
68
|
|
49
|
-
it "
|
50
|
-
expect(
|
69
|
+
it "translates mixed text properly" do
|
70
|
+
expect(Byk.send(method, mixed)).to eq mixed_cyrillic
|
51
71
|
end
|
52
72
|
|
53
|
-
it "
|
54
|
-
|
73
|
+
it "translates edge cases properly" do
|
74
|
+
edge_cases.each do |input, output|
|
75
|
+
expect(Byk.send(method, input)).to eq output
|
76
|
+
end
|
55
77
|
end
|
56
78
|
|
57
|
-
it "
|
58
|
-
expect(Byk::
|
79
|
+
it "translates ABECEDA to AZBUKA" do
|
80
|
+
expect(Byk::ABECEDA.map { |l| l.dup.send(:to_cyrillic) }).to match_array(Byk::AZBUKA)
|
59
81
|
end
|
60
82
|
|
61
|
-
it "
|
62
|
-
expect(Byk::
|
83
|
+
it "translates ABECEDA_CAPS to AZBUKA_CAPS" do
|
84
|
+
expect(Byk::ABECEDA_CAPS.map { |l| l.dup.send(:to_cyrillic) }).to match_array(Byk::AZBUKA_CAPS)
|
63
85
|
end
|
64
86
|
end
|
65
87
|
|
66
|
-
|
88
|
+
shared_examples :latinization_method do |method|
|
89
|
+
include_examples :base, method
|
67
90
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
"
|
72
|
-
"
|
73
|
-
|
74
|
-
|
75
|
-
"ЊЊ" => "NJNJ",
|
76
|
-
"ЉЉ" => "LJLJ",
|
77
|
-
"ЂЂ" => "DJDJ",
|
78
|
-
"ĐĐ" => "DJDJ",
|
79
|
-
"ГУЊ" => "GUNJ",
|
80
|
-
"ПАСУЉ" => "PASULJ",
|
81
|
-
"ЂУРАЂ" => "DJURADJ",
|
82
|
-
"ĐURAĐ" => "DJURADJ",
|
83
|
-
"ĐURAĐ Đorđević" => "DJURADJ Djordjevic",
|
84
|
-
"ĐURAĐ. Đorđević" => "DJURADJ. Djordjevic"
|
85
|
-
}
|
86
|
-
}
|
91
|
+
let(:edge_cases) do
|
92
|
+
[
|
93
|
+
["ЉЉ", "LJLJ"],
|
94
|
+
["ЊЊ", "NJNJ"],
|
95
|
+
["ЏЏ", "DŽDŽ"]
|
96
|
+
]
|
97
|
+
end
|
87
98
|
|
88
|
-
it "doesn't
|
89
|
-
expect(
|
99
|
+
it "doesn't translate ASCII" do
|
100
|
+
expect(Byk.send(method, ascii)).to eq ascii
|
90
101
|
end
|
91
102
|
|
92
|
-
it "doesn't
|
93
|
-
expect(
|
103
|
+
it "doesn't translate Latin" do
|
104
|
+
expect(Byk.send(method, pangram_latin)).to eq pangram_latin
|
94
105
|
end
|
95
106
|
|
96
|
-
it "doesn't
|
97
|
-
expect(
|
107
|
+
it "doesn't translate non-Serbian Cyrillic" do
|
108
|
+
expect(Byk.send(method, non_serbian_cyrillic)).to eq non_serbian_cyrillic
|
98
109
|
end
|
99
110
|
|
100
|
-
it "
|
101
|
-
expect(pangram
|
111
|
+
it "translates Cyrillic to Latin" do
|
112
|
+
expect(Byk.send(method, pangram)).to eq pangram_latin
|
102
113
|
end
|
103
114
|
|
104
|
-
it "
|
105
|
-
expect(pangram_caps
|
115
|
+
it "translates Cyrillic caps to Latin caps" do
|
116
|
+
expect(Byk.send(method, pangram_caps)).to eq pangram_latin_caps
|
117
|
+
end
|
118
|
+
|
119
|
+
it "translates mixed text properly" do
|
120
|
+
expect(Byk.send(method, mixed)).to eq mixed_latin
|
121
|
+
end
|
122
|
+
|
123
|
+
it "translates edge cases properly" do
|
124
|
+
edge_cases.each do |input, output|
|
125
|
+
expect(Byk.send(method, input)).to eq output
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
it "translates AZBUKA to ABECEDA" do
|
130
|
+
expect(Byk::AZBUKA.map { |l| l.dup.send(method) }).to match_array(Byk::ABECEDA)
|
131
|
+
end
|
132
|
+
|
133
|
+
it "translates AZBUKA_CAPS to ABECEDA_CAPS" do
|
134
|
+
expect(Byk::AZBUKA_CAPS.map { |l| l.dup.send(method) }).to match_array(Byk::ABECEDA_CAPS)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
shared_examples :ascii_latinization_method do |method|
|
139
|
+
include_examples :base, method
|
140
|
+
|
141
|
+
let(:edge_cases) do
|
142
|
+
[
|
143
|
+
["Њ", "Nj"],
|
144
|
+
["Љ", "Lj"],
|
145
|
+
["Џ", "Dz"],
|
146
|
+
["Ђ", "Dj"],
|
147
|
+
["Đ", "Dj"],
|
148
|
+
["ЊЊ", "NJNJ"],
|
149
|
+
["ЉЉ", "LJLJ"],
|
150
|
+
["ЏЏ", "DZDZ"],
|
151
|
+
["ЂЂ", "DJDJ"],
|
152
|
+
["ĐĐ", "DJDJ"],
|
153
|
+
["ЂУРАЂ Ђурђевић", "DJURADJ Djurdjevic"],
|
154
|
+
["ĐURAĐ Đurđević", "DJURADJ Djurdjevic"]
|
155
|
+
]
|
106
156
|
end
|
107
157
|
|
108
|
-
it "
|
109
|
-
expect(
|
158
|
+
it "doesn't translate ASCII" do
|
159
|
+
expect(Byk.send(method, ascii)).to eq ascii
|
110
160
|
end
|
111
161
|
|
112
|
-
it "
|
113
|
-
expect(
|
162
|
+
it "translates Latin to ASCII Latin" do
|
163
|
+
expect(Byk.send(method, pangram_latin)).to eq pangram_ascii_latin
|
114
164
|
end
|
115
165
|
|
116
|
-
it "
|
117
|
-
expect(
|
166
|
+
it "translates Latin caps to ASCII Latin caps" do
|
167
|
+
expect(Byk.send(method, pangram_latin_caps)).to eq pangram_ascii_latin_caps
|
118
168
|
end
|
119
169
|
|
120
|
-
it "
|
170
|
+
it "translates Cyrillic to ASCII Latin" do
|
171
|
+
expect(Byk.send(method, pangram)).to eq pangram_ascii_latin
|
172
|
+
end
|
173
|
+
|
174
|
+
it "translates Cyrillic caps to ASCII Latin caps" do
|
175
|
+
expect(Byk.send(method, pangram_caps)).to eq pangram_ascii_latin_caps
|
176
|
+
end
|
177
|
+
|
178
|
+
it "translates mixed text properly" do
|
179
|
+
expect(Byk.send(method, mixed)).to eq mixed_ascii_latin
|
180
|
+
end
|
181
|
+
|
182
|
+
it "translates edge cases properly" do
|
121
183
|
edge_cases.each do |input, output|
|
122
|
-
expect(input
|
184
|
+
expect(Byk.send(method, input)).to eq output
|
123
185
|
end
|
124
186
|
end
|
125
187
|
end
|
188
|
+
|
189
|
+
shared_examples :non_destructive_method do |method|
|
190
|
+
it "doesn't modify the arg" do
|
191
|
+
str = "ЖŽ"
|
192
|
+
expect { Byk.send(method, str) }.to_not change { str }
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
shared_examples :destructive_method do |method|
|
197
|
+
it "modifies the arg" do
|
198
|
+
str = "ЖŽ"
|
199
|
+
expect { Byk.send(method, str) }.to change { str }
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
describe ".to_cyrillic" do
|
204
|
+
it_behaves_like :cyrillization_method, :to_cyrillic
|
205
|
+
it_behaves_like :non_destructive_method, :to_cyrillic
|
206
|
+
end
|
207
|
+
|
208
|
+
describe ".to_cyrillic!" do
|
209
|
+
it_behaves_like :cyrillization_method, :to_cyrillic!
|
210
|
+
it_behaves_like :destructive_method, :to_cyrillic!
|
211
|
+
end
|
212
|
+
|
213
|
+
describe ".to_latin" do
|
214
|
+
it_behaves_like :latinization_method, :to_latin
|
215
|
+
it_behaves_like :non_destructive_method, :to_latin
|
216
|
+
end
|
217
|
+
|
218
|
+
describe ".to_latin!" do
|
219
|
+
it_behaves_like :latinization_method, :to_latin!
|
220
|
+
it_behaves_like :destructive_method, :to_latin!
|
221
|
+
end
|
222
|
+
|
223
|
+
describe ".to_ascii_latin" do
|
224
|
+
it_behaves_like :ascii_latinization_method, :to_ascii_latin
|
225
|
+
it_behaves_like :non_destructive_method, :to_ascii_latin
|
226
|
+
end
|
227
|
+
|
228
|
+
describe ".to_ascii_latin!" do
|
229
|
+
it_behaves_like :ascii_latinization_method, :to_ascii_latin!
|
230
|
+
it_behaves_like :destructive_method, :to_ascii_latin!
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
describe String do
|
235
|
+
it "responds to Byk methods" do
|
236
|
+
Byk.singleton_methods.each do |method|
|
237
|
+
expect("").to respond_to(method)
|
238
|
+
end
|
239
|
+
end
|
126
240
|
end
|
metadata
CHANGED
@@ -1,47 +1,76 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: byk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikola Topalović
|
8
|
-
autorequire:
|
9
|
-
bindir:
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '13.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '13.0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rake-compiler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
16
30
|
requirements:
|
17
31
|
- - "~>"
|
18
32
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
33
|
+
version: '1.1'
|
20
34
|
type: :development
|
21
35
|
prerelease: false
|
22
36
|
version_requirements: !ruby/object:Gem::Requirement
|
23
37
|
requirements:
|
24
38
|
- - "~>"
|
25
39
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
40
|
+
version: '1.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rspec
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - "~>"
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: '3.
|
47
|
+
version: '3.10'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: benchmark-ips
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.9'
|
34
62
|
type: :development
|
35
63
|
prerelease: false
|
36
64
|
version_requirements: !ruby/object:Gem::Requirement
|
37
65
|
requirements:
|
38
66
|
- - "~>"
|
39
67
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
41
|
-
description:
|
42
|
-
|
68
|
+
version: '2.9'
|
69
|
+
description: Fast transliteration of Serbian Cyrillic to Latin and back. Brzo preslovljavanje
|
70
|
+
ćirilice u latinicu i obratno.
|
43
71
|
email: nikola.topalovic@gmail.com
|
44
|
-
executables:
|
72
|
+
executables:
|
73
|
+
- byk
|
45
74
|
extensions:
|
46
75
|
- ext/byk/extconf.rb
|
47
76
|
extra_rdoc_files: []
|
@@ -49,16 +78,19 @@ files:
|
|
49
78
|
- CHANGELOG.md
|
50
79
|
- LICENSE
|
51
80
|
- README.md
|
81
|
+
- exe/byk
|
52
82
|
- ext/byk/byk.c
|
53
83
|
- ext/byk/extconf.rb
|
54
84
|
- lib/byk.rb
|
85
|
+
- lib/byk/core_ext/string.rb
|
86
|
+
- lib/byk/safe.rb
|
55
87
|
- lib/byk/version.rb
|
56
88
|
- spec/byk_spec.rb
|
57
89
|
homepage: https://github.com/topalovic/byk
|
58
90
|
licenses:
|
59
91
|
- MIT
|
60
92
|
metadata: {}
|
61
|
-
post_install_message:
|
93
|
+
post_install_message:
|
62
94
|
rdoc_options: []
|
63
95
|
require_paths:
|
64
96
|
- lib
|
@@ -66,17 +98,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
66
98
|
requirements:
|
67
99
|
- - ">="
|
68
100
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
101
|
+
version: 2.2.0
|
70
102
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
103
|
requirements:
|
72
104
|
- - ">="
|
73
105
|
- !ruby/object:Gem::Version
|
74
106
|
version: '0'
|
75
107
|
requirements: []
|
76
|
-
|
77
|
-
|
78
|
-
signing_key:
|
108
|
+
rubygems_version: 3.3.3
|
109
|
+
signing_key:
|
79
110
|
specification_version: 4
|
80
|
-
summary:
|
111
|
+
summary: Transliteration of Serbian Cyrillic <-> Latin
|
81
112
|
test_files:
|
82
113
|
- spec/byk_spec.rb
|