interscript-maps 2.1.0b1 → 2.1.0b2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b43dc37aa94446eb83bfececb00291e1fe7ebf17c11e7f0a11969196d33f179
4
- data.tar.gz: c919d5084e1ac1e97985e0357bacdf282611d8717ae72175e18ed7f2e74d3582
3
+ metadata.gz: c84ee5825c4035b4e7424cdd8c006f9fb79a207ccc4a1315e68dc6f21c75f73e
4
+ data.tar.gz: de8ad4a2c0f19b8524a2e8680c7c39d8b86f03f7c267dc2182b02f1efb816ac3
5
5
  SHA512:
6
- metadata.gz: c4df63fb4c1c30c63cf11c5ea61b727001d14840f856073de1c65eaaa59153e5ba9c3918ec9ba234a3dcc4e945331b3247d8667545b0fe75c9854b75472b9f62
7
- data.tar.gz: db7750b60fbab7dcef42d4e05fd3320a41f06c2c2ed8639aae6ddb52be547e73d0e8c7e6e59b2f7134c3e2274c695226bea148bff71989db47fd31a7c1f32da2
6
+ metadata.gz: b65735d569726d1d3fb07303446c9fbc4ac9153272233a3a60332675c21d30aa3db258e1920efc9a41bad43cbfdb1b04c7c8077b1179ed7a77e2a70ea6ace249
7
+ data.tar.gz: c284db676b0a1183638867c86c2ae379c1c716e0edad559529dce73f03c36638d0782746ff010add8837ba22de020420dc50b0b2be26c08aa332bb47f15253b3
@@ -1,4 +1,4 @@
1
- INTERSCRIPT_MAPS_VERSION="2.1.0b1"
1
+ INTERSCRIPT_MAPS_VERSION="2.1.0b2"
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "interscript-maps"
@@ -0,0 +1,348 @@
1
+ metadata {
2
+ authority_id: iso
3
+ id: 233-2-1993
4
+ language: iso-639-2:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ISO 233-2:1993 Documentation — Transliteration of Arabic characters into Latin characters
8
+ url:
9
+ - https://www.iso.org/standard/4118.html
10
+ - https://cdn.standards.iteh.ai/samples/4118/2f03c828842c4055a5619c1bded39381/ISO-233-2-1993.pdf
11
+ creation_date: 1993-08
12
+ confirmation_date: 2018-06
13
+ description: |
14
+ Establishes a simplified system for the transliteration. This
15
+ simplification of the stringent rules established by ISO 233:1984
16
+ is especially intended to facilitate the processing of bibliographic
17
+ information (e.g. catalogues, indices, citations, etc.). Annex A
18
+ gives the diacritical signs used (taken from the code table of
19
+ ISO 5436:1983).
20
+ notes:
21
+ - |
22
+ 4.1.1 In order to render a transliterated text more legible, the vowels
23
+ are supplied [method 2.1 c) of ISO 233:1984]. However sukün (`\u0652`) is
24
+ omitted, as well as the vowels and diphthongs which have only a flexional
25
+ function in nominal forms.
26
+
27
+ - |
28
+ 4.1.2 The initial alif (`\u0627`) is not represented: the presence of an
29
+ initial vowel in the transliterated word is enough to indicate an alif in
30
+ the original script.
31
+
32
+ - |
33
+ 4.1.3 A character bearing a hamzat (`\u0621`), which depends on the
34
+ vocalic context, is not represented.
35
+
36
+ - |
37
+ 4.1.4 The definite article (`\u0627\u0644`), is always represented by the
38
+ characters “al-”, whatever its vocalization.
39
+
40
+ - |
41
+ 4.1.5 The prepositions (li, bi, ka) as well as the conjunction wa, which
42
+ in Arabic are joined to the word, are separated by a hyphen in the
43
+ transliterated script.
44
+
45
+ }
46
+
47
+ tests {
48
+ test "مِصر", "Miṣr"
49
+ test "قَطَر", "Qaṭar"
50
+ test "الرِيَاض", "al-Riyāḍ"
51
+ test "الشارِقة", "al-Šâriqaẗ"
52
+ test "فِي نُورِ الْقَمَرِ", "Fī Nūr al-Qamar"
53
+ test "بِئْر", "Bi’r"
54
+ test "سَأَلَ", "Sa’al"
55
+ test "أَخْبَار", "Aẖbār"
56
+ test "قُرْآن", "Qur’ān"
57
+ test "آدَاب", "Ādāb"
58
+ test "الشَمْسُ", "al-Šams"
59
+ test "بِاللَيلِ", "bi-al-Layl"
60
+ test "لِلوَلَدِ", "li-l-Walad"
61
+ }
62
+
63
+ # Although this system inherits from iso-ara-Arab-Latn-233-1984,
64
+ # it utilizes a set of simplified rules.
65
+ # It is therefore easier to be implemented as a separate map instead
66
+ # of using the inherit flag.
67
+ # inherit: iso-ara-Arab-Latn-233-1984
68
+
69
+ stage {
70
+
71
+ # CHARACTERS
72
+ parallel {
73
+
74
+ # pointing
75
+
76
+ # Table 2 No. 30
77
+ sub "\u064e", "a" # َ fatha
78
+ sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota
79
+
80
+ # Table 2, No. 31
81
+ sub "\u064f", "u" # ُ damma
82
+
83
+ # Table 2, No. 32
84
+ sub "\u0650", "i" # ِ kasra
85
+
86
+ # Table 3, No. 33
87
+ sub "\u0652", "" # ْ sokoon, see 4.1.1
88
+
89
+ # pointing omitted in the end of words
90
+ sub "\u064e" + boundary, "" # َ fatha
91
+ sub "\u064f" + boundary, "" # ُ damma
92
+ sub "\u0650" + boundary, "" # ِ kasra
93
+
94
+ # special pointed letters
95
+ sub "\u0639\u064e", "‘a" # عَ
96
+ sub "\u0639\u0650", "‘i" # عِ
97
+ sub "\u0639\u064f", "‘ū" # عُ
98
+ # handle MacOS regex difference
99
+ sub "\u0639\u064f\u0648", "‘ū" # عُو damma followed by و
100
+
101
+ sub "\u0650\u064a", "ī" # ـِي kasra followed by ي
102
+ sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ
103
+ sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي
104
+ sub "\u064e\u0627", "ā" # ـَا fatha followed by ا
105
+ sub "\u064e\u0649", "aỳ" # ـَى fatha followed by ى which is ا not ي
106
+ sub "\u064f\u0648", "ū" # ـُو damma followed by و
107
+
108
+ # Table 2 No. 31, column 4
109
+ sub "\u064e\u0648\u0652", "aw" # ـَوْ
110
+
111
+ # Table 2 No. 32, column 4
112
+ sub "\u064e\u064a\u0652", "ay" # ـَيْ
113
+
114
+ # Table 1 No. 27a
115
+ # ta' marboota in iso-233-2-1993 is all the same `aẗ`
116
+ sub "\u0629", "aẗ" # ة in the middle of the sentence
117
+ # sub "\u0629", "ẗ" # ة
118
+
119
+ # Table 3, No. 34
120
+ # Shadda
121
+ sub "\u0628\u0651", "bb" # ب
122
+ sub "\u062a\u0651", "tt" # ت
123
+ sub "\u062b\u0651", "ṯṯ" # ث
124
+ sub "\u062c\u0651", "ǧǧ" # ج
125
+ sub "\u062d\u0651", "ḥḥ" # ح
126
+ sub "\u062e\u0651", "ẖẖ" # خ
127
+ sub "\u062f\u0651", "dd" # د
128
+ sub "\u0630\u0651", "ḏḏ" # ذ
129
+ sub "\u0631\u0651", "rr" # ر
130
+ sub "\u0632\u0651", "zz" # ز
131
+ sub "\u0633\u0651", "ss" # س
132
+ sub "\u0634\u0651", "šš" # ش
133
+ sub "\u0635\u0651", "ṣṣ" # ص
134
+ sub "\u0636\u0651", "ḍḍ" # ض
135
+ sub "\u0637\u0651", "ṭṭ" # ط
136
+ sub "\u0638\u0651", "ẓẓ" # ظ
137
+ sub "\u063a\u0651", "ġġ" # غ
138
+ sub "\u0641\u0651", "ff" # ف
139
+ sub "\u0642\u0651", "qq" # ق
140
+ sub "\u0643\u0651", "kk" # ك
141
+ sub "\u0644\u0651", "ll" # ل
142
+ sub "\u0645\u0651", "mm" # م
143
+ sub "\u0646\u0651", "nn" # ن
144
+ sub "\u0647\u0651", "hh" # ه
145
+ sub "\u0648\u0651", "ww" # و
146
+ sub "\u064a\u0651", "yy" # ي
147
+
148
+ sub "\u0627", "â" # ا
149
+
150
+ sub "\u0649", "ỳ" # ى
151
+
152
+ sub "\u0623", "’" # أ
153
+ sub boundary + "\u0623", "" # أ
154
+
155
+ # See 4.1.4
156
+ # '\uFE8E' : '' # ﺎ
157
+
158
+ # Table 1 No. 3
159
+ sub "\u0628", "b" # ب
160
+ sub "\uFE91", "b" # ﺑ
161
+ sub "\uFE92", "b" # ﺒ
162
+ sub "\uFE90", "b" # ﺐ
163
+
164
+ # See note C
165
+ # Table 1 No. 4
166
+ sub "\u062a", "t" # ت
167
+ sub "\ufe97", "t" # ﺗ
168
+ sub "\ufe98", "t" # ﺘ
169
+ sub "\ufe96", "t" # ﺖ
170
+
171
+ # Table 1 No. 5
172
+ sub "\u062b", "ṯ" # ث
173
+ sub "\ufe9b", "ṯ" # ﺛ
174
+ sub "\ufe9c", "ṯ" # ﺜ
175
+ sub "\ufe9a", "ṯ" # ﺚ
176
+
177
+ # Table 1 No. 6
178
+ sub "\u062c", "ǧ" # ج
179
+ sub "\ufe9f", "ǧ" # ﺟ
180
+ sub "\ufea0", "ǧ" # ﺠ
181
+ sub "\ufe9e", "ǧ" # ﺞ
182
+
183
+ # Table 1 No. 7
184
+ sub "\u062d", "ḥ" # ح
185
+ sub "\ufea3", "ḥ" # ﺣ
186
+ sub "\ufea4", "ḥ" # ﺤ
187
+ sub "\ufea2", "ḥ" # ﺢ
188
+
189
+ # Table 1 No. 8
190
+ sub "\u062e", "ẖ" # خ
191
+ sub "\ufea7", "ẖ" # ﺧ
192
+ sub "\ufea8", "ẖ" # ﺨ
193
+ sub "\ufea6", "ẖ" # ﺦ
194
+
195
+ # Table 1 No. 9
196
+ sub "\u062f", "d" # د
197
+ sub "\ufeaa", "d" # ﺪ
198
+
199
+ # Table 1 No. 10
200
+ sub "\u0630", "ḏ" # ذ
201
+ sub "\ufeac", "ḏ" # ﺬ
202
+
203
+ # Table 1 No. 11
204
+ sub "\u0631", "r" # ر
205
+ sub "\ufeae", "r" # ﺮ
206
+
207
+ # Table 1 No. 12
208
+ sub "\u0632", "z" # ز
209
+ sub "\ufeb0", "z" # ﺰ
210
+
211
+ # Table 1 No. 13
212
+ sub "\u0633", "s" # س
213
+ sub "\ufeb3", "s" # ﺳ
214
+ sub "\ufeb4", "s" # ﺴ
215
+ sub "\ufeb2", "s" # ﺲ
216
+
217
+ # Table 1 No. 14
218
+ sub "\u0634", "š" # ش
219
+ sub "\ufeb7", "š" # ﺷ
220
+ sub "\ufeb8", "š" # ﺸ
221
+ sub "\ufeb6", "š" # ﺶ
222
+
223
+ # Table 1 No. 15
224
+ sub "\u0635", "ṣ" # ص
225
+ sub "\ufebb", "ṣ" # ﺻ
226
+ sub "\ufebc", "ṣ" # ﺼ
227
+ sub "\ufeba", "ṣ" # ﺺ
228
+
229
+ # Table 1 No. 16
230
+ sub "\u0636", "ḍ" # ض
231
+ sub "\ufebf", "ḍ" # ﺿ
232
+ sub "\ufec0", "ḍ" # ﻀ
233
+ sub "\ufebe", "ḍ" # ﺾ
234
+
235
+ # Table 1 No. 17
236
+ sub "\u0637", "ṭ" # ط
237
+ sub "\ufec3", "ṭ" # ﻃ
238
+ sub "\ufec4", "ṭ" # ﻄ
239
+ sub "\ufec2", "ṭ" # ﻂ
240
+
241
+ # Table 1 No. 18
242
+ sub "\u0638", "ẓ" # ظ
243
+ sub "\ufec7", "ẓ" # ﻇ
244
+ sub "\ufec8", "ẓ" # ﻈ
245
+ sub "\ufec6", "ẓ" # ﻆ
246
+
247
+ # Table 1 No. 19
248
+ sub "\u0639", "‘" # ع
249
+ sub "\ufecb", "‘" # ﻋ
250
+ sub "\ufecc", "‘" # ﻌ
251
+ sub "\ufeca", "‘" # ﻊ
252
+
253
+ # Table 1 No. 20
254
+ sub "\u063a", "ġ" # غ
255
+ sub "\ufecf", "ġ" # ﻏ
256
+ sub "\ufed0", "ġ" # ﻐ
257
+ sub "\ufece", "ġ" # ﻎ
258
+
259
+ # Table 1 No. 21
260
+ sub "\u0641", "f" # ف
261
+ sub "\ufed3", "f" # ﻓ
262
+ sub "\ufed4", "f" # ﻔ
263
+ sub "\ufed2", "f" # ﻒ
264
+ sub "\u06a2", "f" # ڢ Maghrebi form
265
+
266
+ # Table 1 No. 22
267
+ sub "\u0642", "q" # ق
268
+ sub "\ufed7", "q" # ﻗ
269
+ sub "\ufed8", "q" # ﻘ
270
+ sub "\ufed6", "q" # ﻖ
271
+ sub "\u06a8", "q" # ڧ Maghrebi form
272
+
273
+ # Table 1 No. 23
274
+ sub "\u0643", "k" # ك
275
+ sub "\ufedb", "k" # ﻛ
276
+ sub "\ufedc", "k" # ﻜ
277
+ sub "\ufeda", "k" # ﻚ
278
+
279
+ # Table 1 No. 24
280
+ sub "\u0644", "l" # ل
281
+ sub "\ufedf", "l" # ﻟ
282
+ sub "\ufee0", "l" # ﻠ
283
+ sub "\ufede", "l" # ﻞ
284
+
285
+ # Table 1 No. 25
286
+ sub "\u0645", "m" # م
287
+ sub "\ufee3", "m" # ﻣ
288
+ sub "\ufee4", "m" # ﻤ
289
+ sub "\ufee2", "m" # ﻢ
290
+
291
+ # Table 1 No. 26
292
+ sub "\u0646", "n" # ن
293
+ sub "\ufee7", "n" # ﻧ
294
+ sub "\ufee8", "n" # ﻨ
295
+ sub "\ufee6", "n" # ﻦ
296
+
297
+ # Table 1 No. 27
298
+ sub "\u0647", "h" # ه
299
+ sub "\ufeeb", "h" # ﻫ
300
+ sub "\ufeec", "h" # ﻬ
301
+ sub "\ufeea", "h" # ﻪ
302
+
303
+ # Table 1 No. 28
304
+ sub "\u0648", "w" # و
305
+ sub "\ufeee", "w" # ﻮ
306
+
307
+ # Table 1 No. 29
308
+ sub "\u064a", "y" # ي
309
+ sub "\ufef3", "y" # ﻳ
310
+ sub "\ufef4", "y" # ﻴ
311
+ sub "\ufef1", "y" # ﻱ
312
+
313
+ # Table 4 row 1
314
+ sub "\u060c", "," # ،
315
+
316
+ # Table 4 row 2
317
+ sub "\u061b", ";" # ؛
318
+
319
+ # Table 4 row 3
320
+ sub "\u061f", "?" # ؟
321
+
322
+ # 4.3 Notes to Tables
323
+ sub "\u0626", "’" # ئ
324
+
325
+ sub "\u0622", "’ā" # آ
326
+
327
+ sub boundary + "\u0622", "ā" # آ
328
+
329
+ # definite article
330
+
331
+ sub boundary + "\u0627\u0644", "al-" # ال
332
+
333
+ sub "\u0627\u0644", "al-", before: "\u0628\u0650" # بِال
334
+
335
+ sub boundary + "\u0628\u0650", "bi-", after: "\u0627\u0644" # بـِ
336
+
337
+ sub boundary + "\u0644\u0650\u0644", "li-l-" # لِل
338
+
339
+ }
340
+
341
+
342
+ # POSTRULES
343
+ sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
344
+ # don't capitalize defined article in the middle of a sentence
345
+ sub "Al-", "al-" # ال
346
+ sub "Bi-", "bi-" # بِ
347
+ sub "Li-L-", "li-l-" # بِل
348
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: interscript-maps
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0b1
4
+ version: 2.1.0b2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -184,6 +184,7 @@ files:
184
184
  - maps/icao-srp-Cyrl-Latn-9303.imp
185
185
  - maps/icao-ukr-Cyrl-Latn-9303.imp
186
186
  - maps/iso-ara-Arab-Latn-233-1984.imp
187
+ - maps/iso-ara-Arab-Latn-233-2-1993.imp
187
188
  - maps/iso-asm-Beng-Latn-15919-2001.imp
188
189
  - maps/iso-ben-Beng-Latn-15919-2001.imp
189
190
  - maps/iso-ell-Grek-Latn-843-1997-t1.imp