interscript-maps 2.1.0b1 → 2.1.0b5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/interscript-maps.gemspec +1 -1
- data/maps/iso-ara-Arab-Latn-233-2-1993.imp +348 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 958e31d63f5bf124b93b29cecda03843560454c93f9cfda88fccdc78ea445bd8
|
4
|
+
data.tar.gz: b8a27044933cc628df8e8be2d7996b7cbbe715daa213d8a64a19f1493a9e6c51
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: babfb87ac3c32a698004acdb5e91f42fe431d5977609893f7f3c336aca22fd2d295303798eab42db796a58e2d6bdf87702f5dd229dffb1fefcd759a270bf45d6
|
7
|
+
data.tar.gz: a75cb96b252caec6eeb55a7127f703ac2ae0fc1f7b404b9d35181284a7a132e2da7617fc84a13c75d1bdced0679788df9d39d0a3478ae7db3e6e4b1567d592f9
|
data/interscript-maps.gemspec
CHANGED
@@ -0,0 +1,348 @@
|
|
1
|
+
metadata {
|
2
|
+
authority_id: iso
|
3
|
+
id: 233-2-1993
|
4
|
+
language: iso-639-2:ara
|
5
|
+
source_script: Arab
|
6
|
+
destination_script: Latn
|
7
|
+
name: ISO 233-2:1993 Documentation — Transliteration of Arabic characters into Latin characters
|
8
|
+
url:
|
9
|
+
- https://www.iso.org/standard/4118.html
|
10
|
+
- https://cdn.standards.iteh.ai/samples/4118/2f03c828842c4055a5619c1bded39381/ISO-233-2-1993.pdf
|
11
|
+
creation_date: 1993-08
|
12
|
+
confirmation_date: 2018-06
|
13
|
+
description: |
|
14
|
+
Establishes a simplified system for the transliteration. This
|
15
|
+
simplification of the stringent rules established by ISO 233:1984
|
16
|
+
is especially intended to facilitate the processing of bibliographic
|
17
|
+
information (e.g. catalogues, indices, citations, etc.). Annex A
|
18
|
+
gives the diacritical signs used (taken from the code table of
|
19
|
+
ISO 5436:1983).
|
20
|
+
notes:
|
21
|
+
- |
|
22
|
+
4.1.1 In order to render a transliterated text more legible, the vowels
|
23
|
+
are supplied [method 2.1 c) of ISO 233:1984]. However sukün (`\u0652`) is
|
24
|
+
omitted, as well as the vowels and diphthongs which have only a flexional
|
25
|
+
function in nominal forms.
|
26
|
+
|
27
|
+
- |
|
28
|
+
4.1.2 The initial alif (`\u0627`) is not represented: the presence of an
|
29
|
+
initial vowel in the transliterated word is enough to indicate an alif in
|
30
|
+
the original script.
|
31
|
+
|
32
|
+
- |
|
33
|
+
4.1.3 A character bearing a hamzat (`\u0621`), which depends on the
|
34
|
+
vocalic context, is not represented.
|
35
|
+
|
36
|
+
- |
|
37
|
+
4.1.4 The definite article (`\u0627\u0644`), is always represented by the
|
38
|
+
characters “al-”, whatever its vocalization.
|
39
|
+
|
40
|
+
- |
|
41
|
+
4.1.5 The prepositions (li, bi, ka) as well as the conjunction wa, which
|
42
|
+
in Arabic are joined to the word, are separated by a hyphen in the
|
43
|
+
transliterated script.
|
44
|
+
|
45
|
+
}
|
46
|
+
|
47
|
+
tests {
|
48
|
+
test "مِصر", "Miṣr"
|
49
|
+
test "قَطَر", "Qaṭar"
|
50
|
+
test "الرِيَاض", "al-Riyāḍ"
|
51
|
+
test "الشارِقة", "al-Šâriqaẗ"
|
52
|
+
test "فِي نُورِ الْقَمَرِ", "Fī Nūr al-Qamar"
|
53
|
+
test "بِئْر", "Bi’r"
|
54
|
+
test "سَأَلَ", "Sa’al"
|
55
|
+
test "أَخْبَار", "Aẖbār"
|
56
|
+
test "قُرْآن", "Qur’ān"
|
57
|
+
test "آدَاب", "Ādāb"
|
58
|
+
test "الشَمْسُ", "al-Šams"
|
59
|
+
test "بِاللَيلِ", "bi-al-Layl"
|
60
|
+
test "لِلوَلَدِ", "li-l-Walad"
|
61
|
+
}
|
62
|
+
|
63
|
+
# Although this system inherits from iso-ara-Arab-Latn-233-1984,
|
64
|
+
# it utilizes a set of simplified rules.
|
65
|
+
# It is therefore easier to be implemented as a separate map instead
|
66
|
+
# of using the inherit flag.
|
67
|
+
# inherit: iso-ara-Arab-Latn-233-1984
|
68
|
+
|
69
|
+
stage {
|
70
|
+
|
71
|
+
# CHARACTERS
|
72
|
+
parallel {
|
73
|
+
|
74
|
+
# pointing
|
75
|
+
|
76
|
+
# Table 2 No. 30
|
77
|
+
sub "\u064e", "a" # َ fatha
|
78
|
+
sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota
|
79
|
+
|
80
|
+
# Table 2, No. 31
|
81
|
+
sub "\u064f", "u" # ُ damma
|
82
|
+
|
83
|
+
# Table 2, No. 32
|
84
|
+
sub "\u0650", "i" # ِ kasra
|
85
|
+
|
86
|
+
# Table 3, No. 33
|
87
|
+
sub "\u0652", "" # ْ sokoon, see 4.1.1
|
88
|
+
|
89
|
+
# pointing omitted in the end of words
|
90
|
+
sub "\u064e" + boundary, "" # َ fatha
|
91
|
+
sub "\u064f" + boundary, "" # ُ damma
|
92
|
+
sub "\u0650" + boundary, "" # ِ kasra
|
93
|
+
|
94
|
+
# special pointed letters
|
95
|
+
sub "\u0639\u064e", "‘a" # عَ
|
96
|
+
sub "\u0639\u0650", "‘i" # عِ
|
97
|
+
sub "\u0639\u064f", "‘ū" # عُ
|
98
|
+
# handle MacOS regex difference
|
99
|
+
sub "\u0639\u064f\u0648", "‘ū" # عُو damma followed by و
|
100
|
+
|
101
|
+
sub "\u0650\u064a", "ī" # ـِي kasra followed by ي
|
102
|
+
sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ
|
103
|
+
sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي
|
104
|
+
sub "\u064e\u0627", "ā" # ـَا fatha followed by ا
|
105
|
+
sub "\u064e\u0649", "aỳ" # ـَى fatha followed by ى which is ا not ي
|
106
|
+
sub "\u064f\u0648", "ū" # ـُو damma followed by و
|
107
|
+
|
108
|
+
# Table 2 No. 31, column 4
|
109
|
+
sub "\u064e\u0648\u0652", "aw" # ـَوْ
|
110
|
+
|
111
|
+
# Table 2 No. 32, column 4
|
112
|
+
sub "\u064e\u064a\u0652", "ay" # ـَيْ
|
113
|
+
|
114
|
+
# Table 1 No. 27a
|
115
|
+
# ta' marboota in iso-233-2-1993 is all the same `aẗ`
|
116
|
+
sub "\u0629", "aẗ" # ة in the middle of the sentence
|
117
|
+
# sub "\u0629", "ẗ" # ة
|
118
|
+
|
119
|
+
# Table 3, No. 34
|
120
|
+
# Shadda
|
121
|
+
sub "\u0628\u0651", "bb" # ب
|
122
|
+
sub "\u062a\u0651", "tt" # ت
|
123
|
+
sub "\u062b\u0651", "ṯṯ" # ث
|
124
|
+
sub "\u062c\u0651", "ǧǧ" # ج
|
125
|
+
sub "\u062d\u0651", "ḥḥ" # ح
|
126
|
+
sub "\u062e\u0651", "ẖẖ" # خ
|
127
|
+
sub "\u062f\u0651", "dd" # د
|
128
|
+
sub "\u0630\u0651", "ḏḏ" # ذ
|
129
|
+
sub "\u0631\u0651", "rr" # ر
|
130
|
+
sub "\u0632\u0651", "zz" # ز
|
131
|
+
sub "\u0633\u0651", "ss" # س
|
132
|
+
sub "\u0634\u0651", "šš" # ش
|
133
|
+
sub "\u0635\u0651", "ṣṣ" # ص
|
134
|
+
sub "\u0636\u0651", "ḍḍ" # ض
|
135
|
+
sub "\u0637\u0651", "ṭṭ" # ط
|
136
|
+
sub "\u0638\u0651", "ẓẓ" # ظ
|
137
|
+
sub "\u063a\u0651", "ġġ" # غ
|
138
|
+
sub "\u0641\u0651", "ff" # ف
|
139
|
+
sub "\u0642\u0651", "qq" # ق
|
140
|
+
sub "\u0643\u0651", "kk" # ك
|
141
|
+
sub "\u0644\u0651", "ll" # ل
|
142
|
+
sub "\u0645\u0651", "mm" # م
|
143
|
+
sub "\u0646\u0651", "nn" # ن
|
144
|
+
sub "\u0647\u0651", "hh" # ه
|
145
|
+
sub "\u0648\u0651", "ww" # و
|
146
|
+
sub "\u064a\u0651", "yy" # ي
|
147
|
+
|
148
|
+
sub "\u0627", "â" # ا
|
149
|
+
|
150
|
+
sub "\u0649", "ỳ" # ى
|
151
|
+
|
152
|
+
sub "\u0623", "’" # أ
|
153
|
+
sub boundary + "\u0623", "" # أ
|
154
|
+
|
155
|
+
# See 4.1.4
|
156
|
+
# '\uFE8E' : '' # ﺎ
|
157
|
+
|
158
|
+
# Table 1 No. 3
|
159
|
+
sub "\u0628", "b" # ب
|
160
|
+
sub "\uFE91", "b" # ﺑ
|
161
|
+
sub "\uFE92", "b" # ﺒ
|
162
|
+
sub "\uFE90", "b" # ﺐ
|
163
|
+
|
164
|
+
# See note C
|
165
|
+
# Table 1 No. 4
|
166
|
+
sub "\u062a", "t" # ت
|
167
|
+
sub "\ufe97", "t" # ﺗ
|
168
|
+
sub "\ufe98", "t" # ﺘ
|
169
|
+
sub "\ufe96", "t" # ﺖ
|
170
|
+
|
171
|
+
# Table 1 No. 5
|
172
|
+
sub "\u062b", "ṯ" # ث
|
173
|
+
sub "\ufe9b", "ṯ" # ﺛ
|
174
|
+
sub "\ufe9c", "ṯ" # ﺜ
|
175
|
+
sub "\ufe9a", "ṯ" # ﺚ
|
176
|
+
|
177
|
+
# Table 1 No. 6
|
178
|
+
sub "\u062c", "ǧ" # ج
|
179
|
+
sub "\ufe9f", "ǧ" # ﺟ
|
180
|
+
sub "\ufea0", "ǧ" # ﺠ
|
181
|
+
sub "\ufe9e", "ǧ" # ﺞ
|
182
|
+
|
183
|
+
# Table 1 No. 7
|
184
|
+
sub "\u062d", "ḥ" # ح
|
185
|
+
sub "\ufea3", "ḥ" # ﺣ
|
186
|
+
sub "\ufea4", "ḥ" # ﺤ
|
187
|
+
sub "\ufea2", "ḥ" # ﺢ
|
188
|
+
|
189
|
+
# Table 1 No. 8
|
190
|
+
sub "\u062e", "ẖ" # خ
|
191
|
+
sub "\ufea7", "ẖ" # ﺧ
|
192
|
+
sub "\ufea8", "ẖ" # ﺨ
|
193
|
+
sub "\ufea6", "ẖ" # ﺦ
|
194
|
+
|
195
|
+
# Table 1 No. 9
|
196
|
+
sub "\u062f", "d" # د
|
197
|
+
sub "\ufeaa", "d" # ﺪ
|
198
|
+
|
199
|
+
# Table 1 No. 10
|
200
|
+
sub "\u0630", "ḏ" # ذ
|
201
|
+
sub "\ufeac", "ḏ" # ﺬ
|
202
|
+
|
203
|
+
# Table 1 No. 11
|
204
|
+
sub "\u0631", "r" # ر
|
205
|
+
sub "\ufeae", "r" # ﺮ
|
206
|
+
|
207
|
+
# Table 1 No. 12
|
208
|
+
sub "\u0632", "z" # ز
|
209
|
+
sub "\ufeb0", "z" # ﺰ
|
210
|
+
|
211
|
+
# Table 1 No. 13
|
212
|
+
sub "\u0633", "s" # س
|
213
|
+
sub "\ufeb3", "s" # ﺳ
|
214
|
+
sub "\ufeb4", "s" # ﺴ
|
215
|
+
sub "\ufeb2", "s" # ﺲ
|
216
|
+
|
217
|
+
# Table 1 No. 14
|
218
|
+
sub "\u0634", "š" # ش
|
219
|
+
sub "\ufeb7", "š" # ﺷ
|
220
|
+
sub "\ufeb8", "š" # ﺸ
|
221
|
+
sub "\ufeb6", "š" # ﺶ
|
222
|
+
|
223
|
+
# Table 1 No. 15
|
224
|
+
sub "\u0635", "ṣ" # ص
|
225
|
+
sub "\ufebb", "ṣ" # ﺻ
|
226
|
+
sub "\ufebc", "ṣ" # ﺼ
|
227
|
+
sub "\ufeba", "ṣ" # ﺺ
|
228
|
+
|
229
|
+
# Table 1 No. 16
|
230
|
+
sub "\u0636", "ḍ" # ض
|
231
|
+
sub "\ufebf", "ḍ" # ﺿ
|
232
|
+
sub "\ufec0", "ḍ" # ﻀ
|
233
|
+
sub "\ufebe", "ḍ" # ﺾ
|
234
|
+
|
235
|
+
# Table 1 No. 17
|
236
|
+
sub "\u0637", "ṭ" # ط
|
237
|
+
sub "\ufec3", "ṭ" # ﻃ
|
238
|
+
sub "\ufec4", "ṭ" # ﻄ
|
239
|
+
sub "\ufec2", "ṭ" # ﻂ
|
240
|
+
|
241
|
+
# Table 1 No. 18
|
242
|
+
sub "\u0638", "ẓ" # ظ
|
243
|
+
sub "\ufec7", "ẓ" # ﻇ
|
244
|
+
sub "\ufec8", "ẓ" # ﻈ
|
245
|
+
sub "\ufec6", "ẓ" # ﻆ
|
246
|
+
|
247
|
+
# Table 1 No. 19
|
248
|
+
sub "\u0639", "‘" # ع
|
249
|
+
sub "\ufecb", "‘" # ﻋ
|
250
|
+
sub "\ufecc", "‘" # ﻌ
|
251
|
+
sub "\ufeca", "‘" # ﻊ
|
252
|
+
|
253
|
+
# Table 1 No. 20
|
254
|
+
sub "\u063a", "ġ" # غ
|
255
|
+
sub "\ufecf", "ġ" # ﻏ
|
256
|
+
sub "\ufed0", "ġ" # ﻐ
|
257
|
+
sub "\ufece", "ġ" # ﻎ
|
258
|
+
|
259
|
+
# Table 1 No. 21
|
260
|
+
sub "\u0641", "f" # ف
|
261
|
+
sub "\ufed3", "f" # ﻓ
|
262
|
+
sub "\ufed4", "f" # ﻔ
|
263
|
+
sub "\ufed2", "f" # ﻒ
|
264
|
+
sub "\u06a2", "f" # ڢ Maghrebi form
|
265
|
+
|
266
|
+
# Table 1 No. 22
|
267
|
+
sub "\u0642", "q" # ق
|
268
|
+
sub "\ufed7", "q" # ﻗ
|
269
|
+
sub "\ufed8", "q" # ﻘ
|
270
|
+
sub "\ufed6", "q" # ﻖ
|
271
|
+
sub "\u06a8", "q" # ڧ Maghrebi form
|
272
|
+
|
273
|
+
# Table 1 No. 23
|
274
|
+
sub "\u0643", "k" # ك
|
275
|
+
sub "\ufedb", "k" # ﻛ
|
276
|
+
sub "\ufedc", "k" # ﻜ
|
277
|
+
sub "\ufeda", "k" # ﻚ
|
278
|
+
|
279
|
+
# Table 1 No. 24
|
280
|
+
sub "\u0644", "l" # ل
|
281
|
+
sub "\ufedf", "l" # ﻟ
|
282
|
+
sub "\ufee0", "l" # ﻠ
|
283
|
+
sub "\ufede", "l" # ﻞ
|
284
|
+
|
285
|
+
# Table 1 No. 25
|
286
|
+
sub "\u0645", "m" # م
|
287
|
+
sub "\ufee3", "m" # ﻣ
|
288
|
+
sub "\ufee4", "m" # ﻤ
|
289
|
+
sub "\ufee2", "m" # ﻢ
|
290
|
+
|
291
|
+
# Table 1 No. 26
|
292
|
+
sub "\u0646", "n" # ن
|
293
|
+
sub "\ufee7", "n" # ﻧ
|
294
|
+
sub "\ufee8", "n" # ﻨ
|
295
|
+
sub "\ufee6", "n" # ﻦ
|
296
|
+
|
297
|
+
# Table 1 No. 27
|
298
|
+
sub "\u0647", "h" # ه
|
299
|
+
sub "\ufeeb", "h" # ﻫ
|
300
|
+
sub "\ufeec", "h" # ﻬ
|
301
|
+
sub "\ufeea", "h" # ﻪ
|
302
|
+
|
303
|
+
# Table 1 No. 28
|
304
|
+
sub "\u0648", "w" # و
|
305
|
+
sub "\ufeee", "w" # ﻮ
|
306
|
+
|
307
|
+
# Table 1 No. 29
|
308
|
+
sub "\u064a", "y" # ي
|
309
|
+
sub "\ufef3", "y" # ﻳ
|
310
|
+
sub "\ufef4", "y" # ﻴ
|
311
|
+
sub "\ufef1", "y" # ﻱ
|
312
|
+
|
313
|
+
# Table 4 row 1
|
314
|
+
sub "\u060c", "," # ،
|
315
|
+
|
316
|
+
# Table 4 row 2
|
317
|
+
sub "\u061b", ";" # ؛
|
318
|
+
|
319
|
+
# Table 4 row 3
|
320
|
+
sub "\u061f", "?" # ؟
|
321
|
+
|
322
|
+
# 4.3 Notes to Tables
|
323
|
+
sub "\u0626", "’" # ئ
|
324
|
+
|
325
|
+
sub "\u0622", "’ā" # آ
|
326
|
+
|
327
|
+
sub boundary + "\u0622", "ā" # آ
|
328
|
+
|
329
|
+
# definite article
|
330
|
+
|
331
|
+
sub boundary + "\u0627\u0644", "al-" # ال
|
332
|
+
|
333
|
+
sub "\u0627\u0644", "al-", before: "\u0628\u0650" # بِال
|
334
|
+
|
335
|
+
sub boundary + "\u0628\u0650", "bi-", after: "\u0627\u0644" # بـِ
|
336
|
+
|
337
|
+
sub boundary + "\u0644\u0650\u0644", "li-l-" # لِل
|
338
|
+
|
339
|
+
}
|
340
|
+
|
341
|
+
|
342
|
+
# POSTRULES
|
343
|
+
sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
|
344
|
+
# don't capitalize defined article in the middle of a sentence
|
345
|
+
sub "Al-", "al-" # ال
|
346
|
+
sub "Bi-", "bi-" # بِ
|
347
|
+
sub "Li-L-", "li-l-" # بِل
|
348
|
+
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: interscript-maps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.0b5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
@@ -184,6 +184,7 @@ files:
|
|
184
184
|
- maps/icao-srp-Cyrl-Latn-9303.imp
|
185
185
|
- maps/icao-ukr-Cyrl-Latn-9303.imp
|
186
186
|
- maps/iso-ara-Arab-Latn-233-1984.imp
|
187
|
+
- maps/iso-ara-Arab-Latn-233-2-1993.imp
|
187
188
|
- maps/iso-asm-Beng-Latn-15919-2001.imp
|
188
189
|
- maps/iso-ben-Beng-Latn-15919-2001.imp
|
189
190
|
- maps/iso-ell-Grek-Latn-843-1997-t1.imp
|