interscript-maps 2.1.0a7 → 2.1.0b2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +45 -0
- data/interscript-maps.gemspec +1 -1
- data/maps/hk-yue-Hani-Latn-1888.imp +11 -11
- data/maps/iso-ara-Arab-Latn-233-1984.imp +1 -5
- data/maps/iso-ara-Arab-Latn-233-2-1993.imp +348 -0
- data/maps/mvd-bel-Cyrl-Latn-2008.imp +8 -8
- data/maps/mvd-bel-Cyrl-Latn-2010.imp +3 -2
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c84ee5825c4035b4e7424cdd8c006f9fb79a207ccc4a1315e68dc6f21c75f73e
|
4
|
+
data.tar.gz: de8ad4a2c0f19b8524a2e8680c7c39d8b86f03f7c267dc2182b02f1efb816ac3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b65735d569726d1d3fb07303446c9fbc4ac9153272233a3a60332675c21d30aa3db258e1920efc9a41bad43cbfdb1b04c7c8077b1179ed7a77e2a70ea6ace249
|
7
|
+
data.tar.gz: c284db676b0a1183638867c86c2ae379c1c716e0edad559529dce73f03c36638d0782746ff010add8837ba22de020420dc50b0b2be26c08aa332bb47f15253b3
|
@@ -0,0 +1,45 @@
|
|
1
|
+
name: test
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
|
8
|
+
jobs:
|
9
|
+
test:
|
10
|
+
runs-on: ubuntu-latest
|
11
|
+
|
12
|
+
steps:
|
13
|
+
- uses: actions/checkout@v2
|
14
|
+
with:
|
15
|
+
repository: interscript/interscript
|
16
|
+
submodules: true
|
17
|
+
|
18
|
+
- run: |
|
19
|
+
rm -rf maps
|
20
|
+
mkdir maps
|
21
|
+
|
22
|
+
- uses: actions/checkout@v2
|
23
|
+
with:
|
24
|
+
path: maps
|
25
|
+
|
26
|
+
- name: Set up Ruby
|
27
|
+
uses: ruby/setup-ruby@v1
|
28
|
+
with:
|
29
|
+
ruby-version: 2.7
|
30
|
+
|
31
|
+
- name: cache ruby gems
|
32
|
+
uses: actions/cache@v2
|
33
|
+
env:
|
34
|
+
cache-name: cache-ruby-modules
|
35
|
+
with:
|
36
|
+
path: vendor/bundle
|
37
|
+
key: ${{ runner.os }}-ruby-2.7-gems-${{ hashFiles('**/interscript.gemspec') }}
|
38
|
+
restore-keys: |
|
39
|
+
${{ runner.os }}-ruby-2.7-gems-
|
40
|
+
|
41
|
+
- run: |
|
42
|
+
bundle config path vendor/bundle
|
43
|
+
pushd ruby && bundle install --jobs 4 --retry 3 --with jsexec --without secryst && popd
|
44
|
+
|
45
|
+
- run: pushd ruby && bundle exec rspec spec/interscript_spec.rb && popd
|
data/interscript-maps.gemspec
CHANGED
@@ -7,15 +7,16 @@ metadata {
|
|
7
7
|
name: Hong Kong Government Cantonese Romanisation
|
8
8
|
url: http://caes.hku.hk/hkjalonline/issues/download_the_file.php?f=2008_v11_1_kataoka__n__lee.pdf
|
9
9
|
creation_date: 2020-01
|
10
|
-
description:
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
10
|
+
description: |
|
11
|
+
This system is commonly used for the transliteration of place names or
|
12
|
+
person's names in Hong Kong, as pronounced in Cantonese. There will be more
|
13
|
+
than one legitimate transliteration for the same syllable, or sometimes even
|
14
|
+
for the same character. For example, the character 仔 can be transcribed as
|
15
|
+
Chai or Tsai in this system. Some of the choice is context-dependent (e.g.
|
16
|
+
the same character in the place name 灣仔 is almost always Chai, but more
|
17
|
+
likely to be Tsai elsewhere). There will be more variations and
|
18
|
+
unpredictable conversions in person's names, and these conventions need to
|
19
|
+
be hard-coded.
|
19
20
|
|
20
21
|
notes:
|
21
22
|
- Tone is not represented in this system.
|
@@ -27,10 +28,9 @@ metadata {
|
|
27
28
|
distinctions were represented by ’ or diacritics (e.g. á vs. a), but were
|
28
29
|
removed in printed / typeset documents.
|
29
30
|
|
30
|
-
|
31
|
-
#A longer test list will be generated from GeoNames database
|
32
31
|
}
|
33
32
|
|
33
|
+
# TODO: A longer test list will be generated from GeoNames database
|
34
34
|
tests {
|
35
35
|
test "煎魚灣", "Tsin Yue Wan"
|
36
36
|
test "分流廟灣", "Fan Lau Miu Wan"
|
@@ -38,7 +38,7 @@ metadata {
|
|
38
38
|
غ is ġ instead of gh
|
39
39
|
ة is ẗ instead of h/t
|
40
40
|
ى is ỳ
|
41
|
-
ـِي is iy instead of
|
41
|
+
ـِي is iy instead of ī
|
42
42
|
ـُو is uw instead of ū
|
43
43
|
ـَا is a’ instead of ā
|
44
44
|
ـَى is aỳ instead of á
|
@@ -55,7 +55,6 @@ tests {
|
|
55
55
|
|
56
56
|
stage {
|
57
57
|
|
58
|
-
|
59
58
|
# CHARACTERS
|
60
59
|
parallel {
|
61
60
|
|
@@ -105,7 +104,6 @@ stage {
|
|
105
104
|
|
106
105
|
# Shadda
|
107
106
|
|
108
|
-
|
109
107
|
sub "\u0628\u0651", "bb" # ب
|
110
108
|
sub "\u062a\u0651", "tt" # ت
|
111
109
|
sub "\u062b\u0651", "ṯṯ" # ث
|
@@ -133,7 +131,6 @@ stage {
|
|
133
131
|
sub "\u0648\u0651", "ww" # و
|
134
132
|
sub "\u064a\u0651", "yy" # ي
|
135
133
|
|
136
|
-
|
137
134
|
sub "\u0622", "’â" # آ
|
138
135
|
|
139
136
|
sub "\u0627", "â" # ا
|
@@ -142,7 +139,6 @@ stage {
|
|
142
139
|
|
143
140
|
sub "\u0626", "'" # ئ
|
144
141
|
|
145
|
-
|
146
142
|
sub "\u0621", maybe("’") # ء# see note A
|
147
143
|
|
148
144
|
sub "\u0623", "a" # أ
|
@@ -0,0 +1,348 @@
|
|
1
|
+
metadata {
|
2
|
+
authority_id: iso
|
3
|
+
id: 233-2-1993
|
4
|
+
language: iso-639-2:ara
|
5
|
+
source_script: Arab
|
6
|
+
destination_script: Latn
|
7
|
+
name: ISO 233-2:1993 Documentation — Transliteration of Arabic characters into Latin characters
|
8
|
+
url:
|
9
|
+
- https://www.iso.org/standard/4118.html
|
10
|
+
- https://cdn.standards.iteh.ai/samples/4118/2f03c828842c4055a5619c1bded39381/ISO-233-2-1993.pdf
|
11
|
+
creation_date: 1993-08
|
12
|
+
confirmation_date: 2018-06
|
13
|
+
description: |
|
14
|
+
Establishes a simplified system for the transliteration. This
|
15
|
+
simplification of the stringent rules established by ISO 233:1984
|
16
|
+
is especially intended to facilitate the processing of bibliographic
|
17
|
+
information (e.g. catalogues, indices, citations, etc.). Annex A
|
18
|
+
gives the diacritical signs used (taken from the code table of
|
19
|
+
ISO 5436:1983).
|
20
|
+
notes:
|
21
|
+
- |
|
22
|
+
4.1.1 In order to render a transliterated text more legible, the vowels
|
23
|
+
are supplied [method 2.1 c) of ISO 233:1984]. However sukün (`\u0652`) is
|
24
|
+
omitted, as well as the vowels and diphthongs which have only a flexional
|
25
|
+
function in nominal forms.
|
26
|
+
|
27
|
+
- |
|
28
|
+
4.1.2 The initial alif (`\u0627`) is not represented: the presence of an
|
29
|
+
initial vowel in the transliterated word is enough to indicate an alif in
|
30
|
+
the original script.
|
31
|
+
|
32
|
+
- |
|
33
|
+
4.1.3 A character bearing a hamzat (`\u0621`), which depends on the
|
34
|
+
vocalic context, is not represented.
|
35
|
+
|
36
|
+
- |
|
37
|
+
4.1.4 The definite article (`\u0627\u0644`), is always represented by the
|
38
|
+
characters “al-”, whatever its vocalization.
|
39
|
+
|
40
|
+
- |
|
41
|
+
4.1.5 The prepositions (li, bi, ka) as well as the conjunction wa, which
|
42
|
+
in Arabic are joined to the word, are separated by a hyphen in the
|
43
|
+
transliterated script.
|
44
|
+
|
45
|
+
}
|
46
|
+
|
47
|
+
tests {
|
48
|
+
test "مِصر", "Miṣr"
|
49
|
+
test "قَطَر", "Qaṭar"
|
50
|
+
test "الرِيَاض", "al-Riyāḍ"
|
51
|
+
test "الشارِقة", "al-Šâriqaẗ"
|
52
|
+
test "فِي نُورِ الْقَمَرِ", "Fī Nūr al-Qamar"
|
53
|
+
test "بِئْر", "Bi’r"
|
54
|
+
test "سَأَلَ", "Sa’al"
|
55
|
+
test "أَخْبَار", "Aẖbār"
|
56
|
+
test "قُرْآن", "Qur’ān"
|
57
|
+
test "آدَاب", "Ādāb"
|
58
|
+
test "الشَمْسُ", "al-Šams"
|
59
|
+
test "بِاللَيلِ", "bi-al-Layl"
|
60
|
+
test "لِلوَلَدِ", "li-l-Walad"
|
61
|
+
}
|
62
|
+
|
63
|
+
# Although this system inherits from iso-ara-Arab-Latn-233-1984,
|
64
|
+
# it utilizes a set of simplified rules.
|
65
|
+
# It is therefore easier to be implemented as a separate map instead
|
66
|
+
# of using the inherit flag.
|
67
|
+
# inherit: iso-ara-Arab-Latn-233-1984
|
68
|
+
|
69
|
+
stage {
|
70
|
+
|
71
|
+
# CHARACTERS
|
72
|
+
parallel {
|
73
|
+
|
74
|
+
# pointing
|
75
|
+
|
76
|
+
# Table 2 No. 30
|
77
|
+
sub "\u064e", "a" # َ fatha
|
78
|
+
sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota
|
79
|
+
|
80
|
+
# Table 2, No. 31
|
81
|
+
sub "\u064f", "u" # ُ damma
|
82
|
+
|
83
|
+
# Table 2, No. 32
|
84
|
+
sub "\u0650", "i" # ِ kasra
|
85
|
+
|
86
|
+
# Table 3, No. 33
|
87
|
+
sub "\u0652", "" # ْ sokoon, see 4.1.1
|
88
|
+
|
89
|
+
# pointing omitted in the end of words
|
90
|
+
sub "\u064e" + boundary, "" # َ fatha
|
91
|
+
sub "\u064f" + boundary, "" # ُ damma
|
92
|
+
sub "\u0650" + boundary, "" # ِ kasra
|
93
|
+
|
94
|
+
# special pointed letters
|
95
|
+
sub "\u0639\u064e", "‘a" # عَ
|
96
|
+
sub "\u0639\u0650", "‘i" # عِ
|
97
|
+
sub "\u0639\u064f", "‘ū" # عُ
|
98
|
+
# handle MacOS regex difference
|
99
|
+
sub "\u0639\u064f\u0648", "‘ū" # عُو damma followed by و
|
100
|
+
|
101
|
+
sub "\u0650\u064a", "ī" # ـِي kasra followed by ي
|
102
|
+
sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ
|
103
|
+
sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي
|
104
|
+
sub "\u064e\u0627", "ā" # ـَا fatha followed by ا
|
105
|
+
sub "\u064e\u0649", "aỳ" # ـَى fatha followed by ى which is ا not ي
|
106
|
+
sub "\u064f\u0648", "ū" # ـُو damma followed by و
|
107
|
+
|
108
|
+
# Table 2 No. 31, column 4
|
109
|
+
sub "\u064e\u0648\u0652", "aw" # ـَوْ
|
110
|
+
|
111
|
+
# Table 2 No. 32, column 4
|
112
|
+
sub "\u064e\u064a\u0652", "ay" # ـَيْ
|
113
|
+
|
114
|
+
# Table 1 No. 27a
|
115
|
+
# ta' marboota in iso-233-2-1993 is all the same `aẗ`
|
116
|
+
sub "\u0629", "aẗ" # ة in the middle of the sentence
|
117
|
+
# sub "\u0629", "ẗ" # ة
|
118
|
+
|
119
|
+
# Table 3, No. 34
|
120
|
+
# Shadda
|
121
|
+
sub "\u0628\u0651", "bb" # ب
|
122
|
+
sub "\u062a\u0651", "tt" # ت
|
123
|
+
sub "\u062b\u0651", "ṯṯ" # ث
|
124
|
+
sub "\u062c\u0651", "ǧǧ" # ج
|
125
|
+
sub "\u062d\u0651", "ḥḥ" # ح
|
126
|
+
sub "\u062e\u0651", "ẖẖ" # خ
|
127
|
+
sub "\u062f\u0651", "dd" # د
|
128
|
+
sub "\u0630\u0651", "ḏḏ" # ذ
|
129
|
+
sub "\u0631\u0651", "rr" # ر
|
130
|
+
sub "\u0632\u0651", "zz" # ز
|
131
|
+
sub "\u0633\u0651", "ss" # س
|
132
|
+
sub "\u0634\u0651", "šš" # ش
|
133
|
+
sub "\u0635\u0651", "ṣṣ" # ص
|
134
|
+
sub "\u0636\u0651", "ḍḍ" # ض
|
135
|
+
sub "\u0637\u0651", "ṭṭ" # ط
|
136
|
+
sub "\u0638\u0651", "ẓẓ" # ظ
|
137
|
+
sub "\u063a\u0651", "ġġ" # غ
|
138
|
+
sub "\u0641\u0651", "ff" # ف
|
139
|
+
sub "\u0642\u0651", "qq" # ق
|
140
|
+
sub "\u0643\u0651", "kk" # ك
|
141
|
+
sub "\u0644\u0651", "ll" # ل
|
142
|
+
sub "\u0645\u0651", "mm" # م
|
143
|
+
sub "\u0646\u0651", "nn" # ن
|
144
|
+
sub "\u0647\u0651", "hh" # ه
|
145
|
+
sub "\u0648\u0651", "ww" # و
|
146
|
+
sub "\u064a\u0651", "yy" # ي
|
147
|
+
|
148
|
+
sub "\u0627", "â" # ا
|
149
|
+
|
150
|
+
sub "\u0649", "ỳ" # ى
|
151
|
+
|
152
|
+
sub "\u0623", "’" # أ
|
153
|
+
sub boundary + "\u0623", "" # أ
|
154
|
+
|
155
|
+
# See 4.1.4
|
156
|
+
# '\uFE8E' : '' # ﺎ
|
157
|
+
|
158
|
+
# Table 1 No. 3
|
159
|
+
sub "\u0628", "b" # ب
|
160
|
+
sub "\uFE91", "b" # ﺑ
|
161
|
+
sub "\uFE92", "b" # ﺒ
|
162
|
+
sub "\uFE90", "b" # ﺐ
|
163
|
+
|
164
|
+
# See note C
|
165
|
+
# Table 1 No. 4
|
166
|
+
sub "\u062a", "t" # ت
|
167
|
+
sub "\ufe97", "t" # ﺗ
|
168
|
+
sub "\ufe98", "t" # ﺘ
|
169
|
+
sub "\ufe96", "t" # ﺖ
|
170
|
+
|
171
|
+
# Table 1 No. 5
|
172
|
+
sub "\u062b", "ṯ" # ث
|
173
|
+
sub "\ufe9b", "ṯ" # ﺛ
|
174
|
+
sub "\ufe9c", "ṯ" # ﺜ
|
175
|
+
sub "\ufe9a", "ṯ" # ﺚ
|
176
|
+
|
177
|
+
# Table 1 No. 6
|
178
|
+
sub "\u062c", "ǧ" # ج
|
179
|
+
sub "\ufe9f", "ǧ" # ﺟ
|
180
|
+
sub "\ufea0", "ǧ" # ﺠ
|
181
|
+
sub "\ufe9e", "ǧ" # ﺞ
|
182
|
+
|
183
|
+
# Table 1 No. 7
|
184
|
+
sub "\u062d", "ḥ" # ح
|
185
|
+
sub "\ufea3", "ḥ" # ﺣ
|
186
|
+
sub "\ufea4", "ḥ" # ﺤ
|
187
|
+
sub "\ufea2", "ḥ" # ﺢ
|
188
|
+
|
189
|
+
# Table 1 No. 8
|
190
|
+
sub "\u062e", "ẖ" # خ
|
191
|
+
sub "\ufea7", "ẖ" # ﺧ
|
192
|
+
sub "\ufea8", "ẖ" # ﺨ
|
193
|
+
sub "\ufea6", "ẖ" # ﺦ
|
194
|
+
|
195
|
+
# Table 1 No. 9
|
196
|
+
sub "\u062f", "d" # د
|
197
|
+
sub "\ufeaa", "d" # ﺪ
|
198
|
+
|
199
|
+
# Table 1 No. 10
|
200
|
+
sub "\u0630", "ḏ" # ذ
|
201
|
+
sub "\ufeac", "ḏ" # ﺬ
|
202
|
+
|
203
|
+
# Table 1 No. 11
|
204
|
+
sub "\u0631", "r" # ر
|
205
|
+
sub "\ufeae", "r" # ﺮ
|
206
|
+
|
207
|
+
# Table 1 No. 12
|
208
|
+
sub "\u0632", "z" # ز
|
209
|
+
sub "\ufeb0", "z" # ﺰ
|
210
|
+
|
211
|
+
# Table 1 No. 13
|
212
|
+
sub "\u0633", "s" # س
|
213
|
+
sub "\ufeb3", "s" # ﺳ
|
214
|
+
sub "\ufeb4", "s" # ﺴ
|
215
|
+
sub "\ufeb2", "s" # ﺲ
|
216
|
+
|
217
|
+
# Table 1 No. 14
|
218
|
+
sub "\u0634", "š" # ش
|
219
|
+
sub "\ufeb7", "š" # ﺷ
|
220
|
+
sub "\ufeb8", "š" # ﺸ
|
221
|
+
sub "\ufeb6", "š" # ﺶ
|
222
|
+
|
223
|
+
# Table 1 No. 15
|
224
|
+
sub "\u0635", "ṣ" # ص
|
225
|
+
sub "\ufebb", "ṣ" # ﺻ
|
226
|
+
sub "\ufebc", "ṣ" # ﺼ
|
227
|
+
sub "\ufeba", "ṣ" # ﺺ
|
228
|
+
|
229
|
+
# Table 1 No. 16
|
230
|
+
sub "\u0636", "ḍ" # ض
|
231
|
+
sub "\ufebf", "ḍ" # ﺿ
|
232
|
+
sub "\ufec0", "ḍ" # ﻀ
|
233
|
+
sub "\ufebe", "ḍ" # ﺾ
|
234
|
+
|
235
|
+
# Table 1 No. 17
|
236
|
+
sub "\u0637", "ṭ" # ط
|
237
|
+
sub "\ufec3", "ṭ" # ﻃ
|
238
|
+
sub "\ufec4", "ṭ" # ﻄ
|
239
|
+
sub "\ufec2", "ṭ" # ﻂ
|
240
|
+
|
241
|
+
# Table 1 No. 18
|
242
|
+
sub "\u0638", "ẓ" # ظ
|
243
|
+
sub "\ufec7", "ẓ" # ﻇ
|
244
|
+
sub "\ufec8", "ẓ" # ﻈ
|
245
|
+
sub "\ufec6", "ẓ" # ﻆ
|
246
|
+
|
247
|
+
# Table 1 No. 19
|
248
|
+
sub "\u0639", "‘" # ع
|
249
|
+
sub "\ufecb", "‘" # ﻋ
|
250
|
+
sub "\ufecc", "‘" # ﻌ
|
251
|
+
sub "\ufeca", "‘" # ﻊ
|
252
|
+
|
253
|
+
# Table 1 No. 20
|
254
|
+
sub "\u063a", "ġ" # غ
|
255
|
+
sub "\ufecf", "ġ" # ﻏ
|
256
|
+
sub "\ufed0", "ġ" # ﻐ
|
257
|
+
sub "\ufece", "ġ" # ﻎ
|
258
|
+
|
259
|
+
# Table 1 No. 21
|
260
|
+
sub "\u0641", "f" # ف
|
261
|
+
sub "\ufed3", "f" # ﻓ
|
262
|
+
sub "\ufed4", "f" # ﻔ
|
263
|
+
sub "\ufed2", "f" # ﻒ
|
264
|
+
sub "\u06a2", "f" # ڢ Maghrebi form
|
265
|
+
|
266
|
+
# Table 1 No. 22
|
267
|
+
sub "\u0642", "q" # ق
|
268
|
+
sub "\ufed7", "q" # ﻗ
|
269
|
+
sub "\ufed8", "q" # ﻘ
|
270
|
+
sub "\ufed6", "q" # ﻖ
|
271
|
+
sub "\u06a8", "q" # ڧ Maghrebi form
|
272
|
+
|
273
|
+
# Table 1 No. 23
|
274
|
+
sub "\u0643", "k" # ك
|
275
|
+
sub "\ufedb", "k" # ﻛ
|
276
|
+
sub "\ufedc", "k" # ﻜ
|
277
|
+
sub "\ufeda", "k" # ﻚ
|
278
|
+
|
279
|
+
# Table 1 No. 24
|
280
|
+
sub "\u0644", "l" # ل
|
281
|
+
sub "\ufedf", "l" # ﻟ
|
282
|
+
sub "\ufee0", "l" # ﻠ
|
283
|
+
sub "\ufede", "l" # ﻞ
|
284
|
+
|
285
|
+
# Table 1 No. 25
|
286
|
+
sub "\u0645", "m" # م
|
287
|
+
sub "\ufee3", "m" # ﻣ
|
288
|
+
sub "\ufee4", "m" # ﻤ
|
289
|
+
sub "\ufee2", "m" # ﻢ
|
290
|
+
|
291
|
+
# Table 1 No. 26
|
292
|
+
sub "\u0646", "n" # ن
|
293
|
+
sub "\ufee7", "n" # ﻧ
|
294
|
+
sub "\ufee8", "n" # ﻨ
|
295
|
+
sub "\ufee6", "n" # ﻦ
|
296
|
+
|
297
|
+
# Table 1 No. 27
|
298
|
+
sub "\u0647", "h" # ه
|
299
|
+
sub "\ufeeb", "h" # ﻫ
|
300
|
+
sub "\ufeec", "h" # ﻬ
|
301
|
+
sub "\ufeea", "h" # ﻪ
|
302
|
+
|
303
|
+
# Table 1 No. 28
|
304
|
+
sub "\u0648", "w" # و
|
305
|
+
sub "\ufeee", "w" # ﻮ
|
306
|
+
|
307
|
+
# Table 1 No. 29
|
308
|
+
sub "\u064a", "y" # ي
|
309
|
+
sub "\ufef3", "y" # ﻳ
|
310
|
+
sub "\ufef4", "y" # ﻴ
|
311
|
+
sub "\ufef1", "y" # ﻱ
|
312
|
+
|
313
|
+
# Table 4 row 1
|
314
|
+
sub "\u060c", "," # ،
|
315
|
+
|
316
|
+
# Table 4 row 2
|
317
|
+
sub "\u061b", ";" # ؛
|
318
|
+
|
319
|
+
# Table 4 row 3
|
320
|
+
sub "\u061f", "?" # ؟
|
321
|
+
|
322
|
+
# 4.3 Notes to Tables
|
323
|
+
sub "\u0626", "’" # ئ
|
324
|
+
|
325
|
+
sub "\u0622", "’ā" # آ
|
326
|
+
|
327
|
+
sub boundary + "\u0622", "ā" # آ
|
328
|
+
|
329
|
+
# definite article
|
330
|
+
|
331
|
+
sub boundary + "\u0627\u0644", "al-" # ال
|
332
|
+
|
333
|
+
sub "\u0627\u0644", "al-", before: "\u0628\u0650" # بِال
|
334
|
+
|
335
|
+
sub boundary + "\u0628\u0650", "bi-", after: "\u0627\u0644" # بـِ
|
336
|
+
|
337
|
+
sub boundary + "\u0644\u0650\u0644", "li-l-" # لِل
|
338
|
+
|
339
|
+
}
|
340
|
+
|
341
|
+
|
342
|
+
# POSTRULES
|
343
|
+
sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
|
344
|
+
# don't capitalize defined article in the middle of a sentence
|
345
|
+
sub "Al-", "al-" # ال
|
346
|
+
sub "Bi-", "bi-" # بِ
|
347
|
+
sub "Li-L-", "li-l-" # بِل
|
348
|
+
}
|
@@ -86,14 +86,14 @@ tests {
|
|
86
86
|
stage {
|
87
87
|
|
88
88
|
# RULES
|
89
|
-
sub
|
90
|
-
sub
|
91
|
-
sub
|
92
|
-
sub
|
93
|
-
sub
|
94
|
-
sub
|
95
|
-
sub
|
96
|
-
sub
|
89
|
+
sub "\u2019\u0415", "Je" # Е
|
90
|
+
sub "\u2019\u0435", "je" # е
|
91
|
+
sub "\u2019\u0401", "Jo" # Ë
|
92
|
+
sub "\u2019\u0451", "jo" # ё
|
93
|
+
sub "\u2019\u042E", "Ju" # Ю
|
94
|
+
sub "\u2019\u044E", "ju" # ю
|
95
|
+
sub "\u2019\u042F", "Ja" # Я
|
96
|
+
sub "\u2019\u044F", "ja" # я
|
97
97
|
|
98
98
|
sub "\u0415", "Je", before: any("АаЕеЁёИиОоУуЭэЮюЯяЬьЎў") # Е after vowels
|
99
99
|
sub "\u0435", "je", before: any("АаЕеЁёИиОоУуЭэЮюЯяЬьЎў") # е after vowels
|
@@ -43,8 +43,9 @@ stage {
|
|
43
43
|
|
44
44
|
# RULES
|
45
45
|
# note[5]
|
46
|
-
|
47
|
-
sub "\
|
46
|
+
# Those two are nonsense and harmful for reversibility
|
47
|
+
# sub "\u044C", ref( 1 ), before: any("ЗзЛлНнСсЦц") # ь after consonants
|
48
|
+
# sub "\u02B9", ref( 1 ), before: any("ЗзЛлНнСсЦц") # Ь after consonants
|
48
49
|
# Й at end
|
49
50
|
sub "\u0419" + line_end, "", before: any("ЕеЁёЫыЮюЯя") # Я after vowels
|
50
51
|
sub "\u0439" + line_end, "", before: any("ЕеЁёЫыЮюЯя") # я after vowels
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: interscript-maps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.0b2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
@@ -17,6 +17,7 @@ executables: []
|
|
17
17
|
extensions: []
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
|
+
- ".github/workflows/test.yml"
|
20
21
|
- README.adoc
|
21
22
|
- interscript-maps.gemspec
|
22
23
|
- interscript-maps.yaml
|
@@ -183,6 +184,7 @@ files:
|
|
183
184
|
- maps/icao-srp-Cyrl-Latn-9303.imp
|
184
185
|
- maps/icao-ukr-Cyrl-Latn-9303.imp
|
185
186
|
- maps/iso-ara-Arab-Latn-233-1984.imp
|
187
|
+
- maps/iso-ara-Arab-Latn-233-2-1993.imp
|
186
188
|
- maps/iso-asm-Beng-Latn-15919-2001.imp
|
187
189
|
- maps/iso-ben-Beng-Latn-15919-2001.imp
|
188
190
|
- maps/iso-ell-Grek-Latn-843-1997-t1.imp
|