interscript-maps 2.1.0a9 → 2.1.0b4
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69e1d269e2f63895ec1cdeb7f11f7b9144a3124a3e419e0712d8aada527f78fa
|
4
|
+
data.tar.gz: 216967e0955d3cd194b3cee6b6e11822c5bce004844913ca1b87e66fd72bfc35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eabcf9c7e79c863094e24308325d5f97737eafb77ca76116c6cfc0359f3100385d9441451aaa86eac45305dc38260a7106dc60560c0827e70cffc41216f0e89d
|
7
|
+
data.tar.gz: 500b3dbb2d912a93de57874920cd77ff268d6d92b5cec1d7d0057c85fd824ffb119b442e6cefa3d49fa1c738afa46837cef049858f18e4988fb4c502d2f14488
|
data/interscript-maps.gemspec
CHANGED
@@ -7,15 +7,16 @@ metadata {
|
|
7
7
|
name: Hong Kong Government Cantonese Romanisation
|
8
8
|
url: http://caes.hku.hk/hkjalonline/issues/download_the_file.php?f=2008_v11_1_kataoka__n__lee.pdf
|
9
9
|
creation_date: 2020-01
|
10
|
-
description:
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
10
|
+
description: |
|
11
|
+
This system is commonly used for the transliteration of place names or
|
12
|
+
person's names in Hong Kong, as pronounced in Cantonese. There will be more
|
13
|
+
than one legitimate transliteration for the same syllable, or sometimes even
|
14
|
+
for the same character. For example, the character 仔 can be transcribed as
|
15
|
+
Chai or Tsai in this system. Some of the choice is context-dependent (e.g.
|
16
|
+
the same character in the place name 灣仔 is almost always Chai, but more
|
17
|
+
likely to be Tsai elsewhere). There will be more variations and
|
18
|
+
unpredictable conversions in person's names, and these conventions need to
|
19
|
+
be hard-coded.
|
19
20
|
|
20
21
|
notes:
|
21
22
|
- Tone is not represented in this system.
|
@@ -27,10 +28,9 @@ metadata {
|
|
27
28
|
distinctions were represented by ’ or diacritics (e.g. á vs. a), but were
|
28
29
|
removed in printed / typeset documents.
|
29
30
|
|
30
|
-
|
31
|
-
#A longer test list will be generated from GeoNames database
|
32
31
|
}
|
33
32
|
|
33
|
+
# TODO: A longer test list will be generated from GeoNames database
|
34
34
|
tests {
|
35
35
|
test "煎魚灣", "Tsin Yue Wan"
|
36
36
|
test "分流廟灣", "Fan Lau Miu Wan"
|
@@ -38,7 +38,7 @@ metadata {
|
|
38
38
|
غ is ġ instead of gh
|
39
39
|
ة is ẗ instead of h/t
|
40
40
|
ى is ỳ
|
41
|
-
ـِي is iy instead of
|
41
|
+
ـِي is iy instead of ī
|
42
42
|
ـُو is uw instead of ū
|
43
43
|
ـَا is a’ instead of ā
|
44
44
|
ـَى is aỳ instead of á
|
@@ -55,7 +55,6 @@ tests {
|
|
55
55
|
|
56
56
|
stage {
|
57
57
|
|
58
|
-
|
59
58
|
# CHARACTERS
|
60
59
|
parallel {
|
61
60
|
|
@@ -105,7 +104,6 @@ stage {
|
|
105
104
|
|
106
105
|
# Shadda
|
107
106
|
|
108
|
-
|
109
107
|
sub "\u0628\u0651", "bb" # ب
|
110
108
|
sub "\u062a\u0651", "tt" # ت
|
111
109
|
sub "\u062b\u0651", "ṯṯ" # ث
|
@@ -133,7 +131,6 @@ stage {
|
|
133
131
|
sub "\u0648\u0651", "ww" # و
|
134
132
|
sub "\u064a\u0651", "yy" # ي
|
135
133
|
|
136
|
-
|
137
134
|
sub "\u0622", "’â" # آ
|
138
135
|
|
139
136
|
sub "\u0627", "â" # ا
|
@@ -142,7 +139,6 @@ stage {
|
|
142
139
|
|
143
140
|
sub "\u0626", "'" # ئ
|
144
141
|
|
145
|
-
|
146
142
|
sub "\u0621", maybe("’") # ء# see note A
|
147
143
|
|
148
144
|
sub "\u0623", "a" # أ
|
@@ -0,0 +1,348 @@
|
|
1
|
+
metadata {
|
2
|
+
authority_id: iso
|
3
|
+
id: 233-2-1993
|
4
|
+
language: iso-639-2:ara
|
5
|
+
source_script: Arab
|
6
|
+
destination_script: Latn
|
7
|
+
name: ISO 233-2:1993 Documentation — Transliteration of Arabic characters into Latin characters
|
8
|
+
url:
|
9
|
+
- https://www.iso.org/standard/4118.html
|
10
|
+
- https://cdn.standards.iteh.ai/samples/4118/2f03c828842c4055a5619c1bded39381/ISO-233-2-1993.pdf
|
11
|
+
creation_date: 1993-08
|
12
|
+
confirmation_date: 2018-06
|
13
|
+
description: |
|
14
|
+
Establishes a simplified system for the transliteration. This
|
15
|
+
simplification of the stringent rules established by ISO 233:1984
|
16
|
+
is especially intended to facilitate the processing of bibliographic
|
17
|
+
information (e.g. catalogues, indices, citations, etc.). Annex A
|
18
|
+
gives the diacritical signs used (taken from the code table of
|
19
|
+
ISO 5436:1983).
|
20
|
+
notes:
|
21
|
+
- |
|
22
|
+
4.1.1 In order to render a transliterated text more legible, the vowels
|
23
|
+
are supplied [method 2.1 c) of ISO 233:1984]. However sukün (`\u0652`) is
|
24
|
+
omitted, as well as the vowels and diphthongs which have only a flexional
|
25
|
+
function in nominal forms.
|
26
|
+
|
27
|
+
- |
|
28
|
+
4.1.2 The initial alif (`\u0627`) is not represented: the presence of an
|
29
|
+
initial vowel in the transliterated word is enough to indicate an alif in
|
30
|
+
the original script.
|
31
|
+
|
32
|
+
- |
|
33
|
+
4.1.3 A character bearing a hamzat (`\u0621`), which depends on the
|
34
|
+
vocalic context, is not represented.
|
35
|
+
|
36
|
+
- |
|
37
|
+
4.1.4 The definite article (`\u0627\u0644`), is always represented by the
|
38
|
+
characters “al-”, whatever its vocalization.
|
39
|
+
|
40
|
+
- |
|
41
|
+
4.1.5 The prepositions (li, bi, ka) as well as the conjunction wa, which
|
42
|
+
in Arabic are joined to the word, are separated by a hyphen in the
|
43
|
+
transliterated script.
|
44
|
+
|
45
|
+
}
|
46
|
+
|
47
|
+
tests {
|
48
|
+
test "مِصر", "Miṣr"
|
49
|
+
test "قَطَر", "Qaṭar"
|
50
|
+
test "الرِيَاض", "al-Riyāḍ"
|
51
|
+
test "الشارِقة", "al-Šâriqaẗ"
|
52
|
+
test "فِي نُورِ الْقَمَرِ", "Fī Nūr al-Qamar"
|
53
|
+
test "بِئْر", "Bi’r"
|
54
|
+
test "سَأَلَ", "Sa’al"
|
55
|
+
test "أَخْبَار", "Aẖbār"
|
56
|
+
test "قُرْآن", "Qur’ān"
|
57
|
+
test "آدَاب", "Ādāb"
|
58
|
+
test "الشَمْسُ", "al-Šams"
|
59
|
+
test "بِاللَيلِ", "bi-al-Layl"
|
60
|
+
test "لِلوَلَدِ", "li-l-Walad"
|
61
|
+
}
|
62
|
+
|
63
|
+
# Although this system inherits from iso-ara-Arab-Latn-233-1984,
|
64
|
+
# it utilizes a set of simplified rules.
|
65
|
+
# It is therefore easier to be implemented as a separate map instead
|
66
|
+
# of using the inherit flag.
|
67
|
+
# inherit: iso-ara-Arab-Latn-233-1984
|
68
|
+
|
69
|
+
stage {
|
70
|
+
|
71
|
+
# CHARACTERS
|
72
|
+
parallel {
|
73
|
+
|
74
|
+
# pointing
|
75
|
+
|
76
|
+
# Table 2 No. 30
|
77
|
+
sub "\u064e", "a" # َ fatha
|
78
|
+
sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota
|
79
|
+
|
80
|
+
# Table 2, No. 31
|
81
|
+
sub "\u064f", "u" # ُ damma
|
82
|
+
|
83
|
+
# Table 2, No. 32
|
84
|
+
sub "\u0650", "i" # ِ kasra
|
85
|
+
|
86
|
+
# Table 3, No. 33
|
87
|
+
sub "\u0652", "" # ْ sokoon, see 4.1.1
|
88
|
+
|
89
|
+
# pointing omitted in the end of words
|
90
|
+
sub "\u064e" + boundary, "" # َ fatha
|
91
|
+
sub "\u064f" + boundary, "" # ُ damma
|
92
|
+
sub "\u0650" + boundary, "" # ِ kasra
|
93
|
+
|
94
|
+
# special pointed letters
|
95
|
+
sub "\u0639\u064e", "‘a" # عَ
|
96
|
+
sub "\u0639\u0650", "‘i" # عِ
|
97
|
+
sub "\u0639\u064f", "‘ū" # عُ
|
98
|
+
# handle MacOS regex difference
|
99
|
+
sub "\u0639\u064f\u0648", "‘ū" # عُو damma followed by و
|
100
|
+
|
101
|
+
sub "\u0650\u064a", "ī" # ـِي kasra followed by ي
|
102
|
+
sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ
|
103
|
+
sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي
|
104
|
+
sub "\u064e\u0627", "ā" # ـَا fatha followed by ا
|
105
|
+
sub "\u064e\u0649", "aỳ" # ـَى fatha followed by ى which is ا not ي
|
106
|
+
sub "\u064f\u0648", "ū" # ـُو damma followed by و
|
107
|
+
|
108
|
+
# Table 2 No. 31, column 4
|
109
|
+
sub "\u064e\u0648\u0652", "aw" # ـَوْ
|
110
|
+
|
111
|
+
# Table 2 No. 32, column 4
|
112
|
+
sub "\u064e\u064a\u0652", "ay" # ـَيْ
|
113
|
+
|
114
|
+
# Table 1 No. 27a
|
115
|
+
# ta' marboota in iso-233-2-1993 is all the same `aẗ`
|
116
|
+
sub "\u0629", "aẗ" # ة in the middle of the sentence
|
117
|
+
# sub "\u0629", "ẗ" # ة
|
118
|
+
|
119
|
+
# Table 3, No. 34
|
120
|
+
# Shadda
|
121
|
+
sub "\u0628\u0651", "bb" # ب
|
122
|
+
sub "\u062a\u0651", "tt" # ت
|
123
|
+
sub "\u062b\u0651", "ṯṯ" # ث
|
124
|
+
sub "\u062c\u0651", "ǧǧ" # ج
|
125
|
+
sub "\u062d\u0651", "ḥḥ" # ح
|
126
|
+
sub "\u062e\u0651", "ẖẖ" # خ
|
127
|
+
sub "\u062f\u0651", "dd" # د
|
128
|
+
sub "\u0630\u0651", "ḏḏ" # ذ
|
129
|
+
sub "\u0631\u0651", "rr" # ر
|
130
|
+
sub "\u0632\u0651", "zz" # ز
|
131
|
+
sub "\u0633\u0651", "ss" # س
|
132
|
+
sub "\u0634\u0651", "šš" # ش
|
133
|
+
sub "\u0635\u0651", "ṣṣ" # ص
|
134
|
+
sub "\u0636\u0651", "ḍḍ" # ض
|
135
|
+
sub "\u0637\u0651", "ṭṭ" # ط
|
136
|
+
sub "\u0638\u0651", "ẓẓ" # ظ
|
137
|
+
sub "\u063a\u0651", "ġġ" # غ
|
138
|
+
sub "\u0641\u0651", "ff" # ف
|
139
|
+
sub "\u0642\u0651", "qq" # ق
|
140
|
+
sub "\u0643\u0651", "kk" # ك
|
141
|
+
sub "\u0644\u0651", "ll" # ل
|
142
|
+
sub "\u0645\u0651", "mm" # م
|
143
|
+
sub "\u0646\u0651", "nn" # ن
|
144
|
+
sub "\u0647\u0651", "hh" # ه
|
145
|
+
sub "\u0648\u0651", "ww" # و
|
146
|
+
sub "\u064a\u0651", "yy" # ي
|
147
|
+
|
148
|
+
sub "\u0627", "â" # ا
|
149
|
+
|
150
|
+
sub "\u0649", "ỳ" # ى
|
151
|
+
|
152
|
+
sub "\u0623", "’" # أ
|
153
|
+
sub boundary + "\u0623", "" # أ
|
154
|
+
|
155
|
+
# See 4.1.4
|
156
|
+
# '\uFE8E' : '' # ﺎ
|
157
|
+
|
158
|
+
# Table 1 No. 3
|
159
|
+
sub "\u0628", "b" # ب
|
160
|
+
sub "\uFE91", "b" # ﺑ
|
161
|
+
sub "\uFE92", "b" # ﺒ
|
162
|
+
sub "\uFE90", "b" # ﺐ
|
163
|
+
|
164
|
+
# See note C
|
165
|
+
# Table 1 No. 4
|
166
|
+
sub "\u062a", "t" # ت
|
167
|
+
sub "\ufe97", "t" # ﺗ
|
168
|
+
sub "\ufe98", "t" # ﺘ
|
169
|
+
sub "\ufe96", "t" # ﺖ
|
170
|
+
|
171
|
+
# Table 1 No. 5
|
172
|
+
sub "\u062b", "ṯ" # ث
|
173
|
+
sub "\ufe9b", "ṯ" # ﺛ
|
174
|
+
sub "\ufe9c", "ṯ" # ﺜ
|
175
|
+
sub "\ufe9a", "ṯ" # ﺚ
|
176
|
+
|
177
|
+
# Table 1 No. 6
|
178
|
+
sub "\u062c", "ǧ" # ج
|
179
|
+
sub "\ufe9f", "ǧ" # ﺟ
|
180
|
+
sub "\ufea0", "ǧ" # ﺠ
|
181
|
+
sub "\ufe9e", "ǧ" # ﺞ
|
182
|
+
|
183
|
+
# Table 1 No. 7
|
184
|
+
sub "\u062d", "ḥ" # ح
|
185
|
+
sub "\ufea3", "ḥ" # ﺣ
|
186
|
+
sub "\ufea4", "ḥ" # ﺤ
|
187
|
+
sub "\ufea2", "ḥ" # ﺢ
|
188
|
+
|
189
|
+
# Table 1 No. 8
|
190
|
+
sub "\u062e", "ẖ" # خ
|
191
|
+
sub "\ufea7", "ẖ" # ﺧ
|
192
|
+
sub "\ufea8", "ẖ" # ﺨ
|
193
|
+
sub "\ufea6", "ẖ" # ﺦ
|
194
|
+
|
195
|
+
# Table 1 No. 9
|
196
|
+
sub "\u062f", "d" # د
|
197
|
+
sub "\ufeaa", "d" # ﺪ
|
198
|
+
|
199
|
+
# Table 1 No. 10
|
200
|
+
sub "\u0630", "ḏ" # ذ
|
201
|
+
sub "\ufeac", "ḏ" # ﺬ
|
202
|
+
|
203
|
+
# Table 1 No. 11
|
204
|
+
sub "\u0631", "r" # ر
|
205
|
+
sub "\ufeae", "r" # ﺮ
|
206
|
+
|
207
|
+
# Table 1 No. 12
|
208
|
+
sub "\u0632", "z" # ز
|
209
|
+
sub "\ufeb0", "z" # ﺰ
|
210
|
+
|
211
|
+
# Table 1 No. 13
|
212
|
+
sub "\u0633", "s" # س
|
213
|
+
sub "\ufeb3", "s" # ﺳ
|
214
|
+
sub "\ufeb4", "s" # ﺴ
|
215
|
+
sub "\ufeb2", "s" # ﺲ
|
216
|
+
|
217
|
+
# Table 1 No. 14
|
218
|
+
sub "\u0634", "š" # ش
|
219
|
+
sub "\ufeb7", "š" # ﺷ
|
220
|
+
sub "\ufeb8", "š" # ﺸ
|
221
|
+
sub "\ufeb6", "š" # ﺶ
|
222
|
+
|
223
|
+
# Table 1 No. 15
|
224
|
+
sub "\u0635", "ṣ" # ص
|
225
|
+
sub "\ufebb", "ṣ" # ﺻ
|
226
|
+
sub "\ufebc", "ṣ" # ﺼ
|
227
|
+
sub "\ufeba", "ṣ" # ﺺ
|
228
|
+
|
229
|
+
# Table 1 No. 16
|
230
|
+
sub "\u0636", "ḍ" # ض
|
231
|
+
sub "\ufebf", "ḍ" # ﺿ
|
232
|
+
sub "\ufec0", "ḍ" # ﻀ
|
233
|
+
sub "\ufebe", "ḍ" # ﺾ
|
234
|
+
|
235
|
+
# Table 1 No. 17
|
236
|
+
sub "\u0637", "ṭ" # ط
|
237
|
+
sub "\ufec3", "ṭ" # ﻃ
|
238
|
+
sub "\ufec4", "ṭ" # ﻄ
|
239
|
+
sub "\ufec2", "ṭ" # ﻂ
|
240
|
+
|
241
|
+
# Table 1 No. 18
|
242
|
+
sub "\u0638", "ẓ" # ظ
|
243
|
+
sub "\ufec7", "ẓ" # ﻇ
|
244
|
+
sub "\ufec8", "ẓ" # ﻈ
|
245
|
+
sub "\ufec6", "ẓ" # ﻆ
|
246
|
+
|
247
|
+
# Table 1 No. 19
|
248
|
+
sub "\u0639", "‘" # ع
|
249
|
+
sub "\ufecb", "‘" # ﻋ
|
250
|
+
sub "\ufecc", "‘" # ﻌ
|
251
|
+
sub "\ufeca", "‘" # ﻊ
|
252
|
+
|
253
|
+
# Table 1 No. 20
|
254
|
+
sub "\u063a", "ġ" # غ
|
255
|
+
sub "\ufecf", "ġ" # ﻏ
|
256
|
+
sub "\ufed0", "ġ" # ﻐ
|
257
|
+
sub "\ufece", "ġ" # ﻎ
|
258
|
+
|
259
|
+
# Table 1 No. 21
|
260
|
+
sub "\u0641", "f" # ف
|
261
|
+
sub "\ufed3", "f" # ﻓ
|
262
|
+
sub "\ufed4", "f" # ﻔ
|
263
|
+
sub "\ufed2", "f" # ﻒ
|
264
|
+
sub "\u06a2", "f" # ڢ Maghrebi form
|
265
|
+
|
266
|
+
# Table 1 No. 22
|
267
|
+
sub "\u0642", "q" # ق
|
268
|
+
sub "\ufed7", "q" # ﻗ
|
269
|
+
sub "\ufed8", "q" # ﻘ
|
270
|
+
sub "\ufed6", "q" # ﻖ
|
271
|
+
sub "\u06a8", "q" # ڧ Maghrebi form
|
272
|
+
|
273
|
+
# Table 1 No. 23
|
274
|
+
sub "\u0643", "k" # ك
|
275
|
+
sub "\ufedb", "k" # ﻛ
|
276
|
+
sub "\ufedc", "k" # ﻜ
|
277
|
+
sub "\ufeda", "k" # ﻚ
|
278
|
+
|
279
|
+
# Table 1 No. 24
|
280
|
+
sub "\u0644", "l" # ل
|
281
|
+
sub "\ufedf", "l" # ﻟ
|
282
|
+
sub "\ufee0", "l" # ﻠ
|
283
|
+
sub "\ufede", "l" # ﻞ
|
284
|
+
|
285
|
+
# Table 1 No. 25
|
286
|
+
sub "\u0645", "m" # م
|
287
|
+
sub "\ufee3", "m" # ﻣ
|
288
|
+
sub "\ufee4", "m" # ﻤ
|
289
|
+
sub "\ufee2", "m" # ﻢ
|
290
|
+
|
291
|
+
# Table 1 No. 26
|
292
|
+
sub "\u0646", "n" # ن
|
293
|
+
sub "\ufee7", "n" # ﻧ
|
294
|
+
sub "\ufee8", "n" # ﻨ
|
295
|
+
sub "\ufee6", "n" # ﻦ
|
296
|
+
|
297
|
+
# Table 1 No. 27
|
298
|
+
sub "\u0647", "h" # ه
|
299
|
+
sub "\ufeeb", "h" # ﻫ
|
300
|
+
sub "\ufeec", "h" # ﻬ
|
301
|
+
sub "\ufeea", "h" # ﻪ
|
302
|
+
|
303
|
+
# Table 1 No. 28
|
304
|
+
sub "\u0648", "w" # و
|
305
|
+
sub "\ufeee", "w" # ﻮ
|
306
|
+
|
307
|
+
# Table 1 No. 29
|
308
|
+
sub "\u064a", "y" # ي
|
309
|
+
sub "\ufef3", "y" # ﻳ
|
310
|
+
sub "\ufef4", "y" # ﻴ
|
311
|
+
sub "\ufef1", "y" # ﻱ
|
312
|
+
|
313
|
+
# Table 4 row 1
|
314
|
+
sub "\u060c", "," # ،
|
315
|
+
|
316
|
+
# Table 4 row 2
|
317
|
+
sub "\u061b", ";" # ؛
|
318
|
+
|
319
|
+
# Table 4 row 3
|
320
|
+
sub "\u061f", "?" # ؟
|
321
|
+
|
322
|
+
# 4.3 Notes to Tables
|
323
|
+
sub "\u0626", "’" # ئ
|
324
|
+
|
325
|
+
sub "\u0622", "’ā" # آ
|
326
|
+
|
327
|
+
sub boundary + "\u0622", "ā" # آ
|
328
|
+
|
329
|
+
# definite article
|
330
|
+
|
331
|
+
sub boundary + "\u0627\u0644", "al-" # ال
|
332
|
+
|
333
|
+
sub "\u0627\u0644", "al-", before: "\u0628\u0650" # بِال
|
334
|
+
|
335
|
+
sub boundary + "\u0628\u0650", "bi-", after: "\u0627\u0644" # بـِ
|
336
|
+
|
337
|
+
sub boundary + "\u0644\u0650\u0644", "li-l-" # لِل
|
338
|
+
|
339
|
+
}
|
340
|
+
|
341
|
+
|
342
|
+
# POSTRULES
|
343
|
+
sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
|
344
|
+
# don't capitalize defined article in the middle of a sentence
|
345
|
+
sub "Al-", "al-" # ال
|
346
|
+
sub "Bi-", "bi-" # بِ
|
347
|
+
sub "Li-L-", "li-l-" # بِل
|
348
|
+
}
|
@@ -86,14 +86,14 @@ tests {
|
|
86
86
|
stage {
|
87
87
|
|
88
88
|
# RULES
|
89
|
-
sub
|
90
|
-
sub
|
91
|
-
sub
|
92
|
-
sub
|
93
|
-
sub
|
94
|
-
sub
|
95
|
-
sub
|
96
|
-
sub
|
89
|
+
sub "\u2019\u0415", "Je" # Е
|
90
|
+
sub "\u2019\u0435", "je" # е
|
91
|
+
sub "\u2019\u0401", "Jo" # Ë
|
92
|
+
sub "\u2019\u0451", "jo" # ё
|
93
|
+
sub "\u2019\u042E", "Ju" # Ю
|
94
|
+
sub "\u2019\u044E", "ju" # ю
|
95
|
+
sub "\u2019\u042F", "Ja" # Я
|
96
|
+
sub "\u2019\u044F", "ja" # я
|
97
97
|
|
98
98
|
sub "\u0415", "Je", before: any("АаЕеЁёИиОоУуЭэЮюЯяЬьЎў") # Е after vowels
|
99
99
|
sub "\u0435", "je", before: any("АаЕеЁёИиОоУуЭэЮюЯяЬьЎў") # е after vowels
|
@@ -43,8 +43,9 @@ stage {
|
|
43
43
|
|
44
44
|
# RULES
|
45
45
|
# note[5]
|
46
|
-
|
47
|
-
sub "\
|
46
|
+
# Those two are nonsense and harmful for reversibility
|
47
|
+
# sub "\u044C", ref( 1 ), before: any("ЗзЛлНнСсЦц") # ь after consonants
|
48
|
+
# sub "\u02B9", ref( 1 ), before: any("ЗзЛлНнСсЦц") # Ь after consonants
|
48
49
|
# Й at end
|
49
50
|
sub "\u0419" + line_end, "", before: any("ЕеЁёЫыЮюЯя") # Я after vowels
|
50
51
|
sub "\u0439" + line_end, "", before: any("ЕеЁёЫыЮюЯя") # я after vowels
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: interscript-maps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.0b4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
@@ -184,6 +184,7 @@ files:
|
|
184
184
|
- maps/icao-srp-Cyrl-Latn-9303.imp
|
185
185
|
- maps/icao-ukr-Cyrl-Latn-9303.imp
|
186
186
|
- maps/iso-ara-Arab-Latn-233-1984.imp
|
187
|
+
- maps/iso-ara-Arab-Latn-233-2-1993.imp
|
187
188
|
- maps/iso-asm-Beng-Latn-15919-2001.imp
|
188
189
|
- maps/iso-ben-Beng-Latn-15919-2001.imp
|
189
190
|
- maps/iso-ell-Grek-Latn-843-1997-t1.imp
|