interscript-maps 2.1.0b1 → 2.1.0b5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b43dc37aa94446eb83bfececb00291e1fe7ebf17c11e7f0a11969196d33f179
4
- data.tar.gz: c919d5084e1ac1e97985e0357bacdf282611d8717ae72175e18ed7f2e74d3582
3
+ metadata.gz: 958e31d63f5bf124b93b29cecda03843560454c93f9cfda88fccdc78ea445bd8
4
+ data.tar.gz: b8a27044933cc628df8e8be2d7996b7cbbe715daa213d8a64a19f1493a9e6c51
5
5
  SHA512:
6
- metadata.gz: c4df63fb4c1c30c63cf11c5ea61b727001d14840f856073de1c65eaaa59153e5ba9c3918ec9ba234a3dcc4e945331b3247d8667545b0fe75c9854b75472b9f62
7
- data.tar.gz: db7750b60fbab7dcef42d4e05fd3320a41f06c2c2ed8639aae6ddb52be547e73d0e8c7e6e59b2f7134c3e2274c695226bea148bff71989db47fd31a7c1f32da2
6
+ metadata.gz: babfb87ac3c32a698004acdb5e91f42fe431d5977609893f7f3c336aca22fd2d295303798eab42db796a58e2d6bdf87702f5dd229dffb1fefcd759a270bf45d6
7
+ data.tar.gz: a75cb96b252caec6eeb55a7127f703ac2ae0fc1f7b404b9d35181284a7a132e2da7617fc84a13c75d1bdced0679788df9d39d0a3478ae7db3e6e4b1567d592f9
@@ -1,4 +1,4 @@
1
- INTERSCRIPT_MAPS_VERSION="2.1.0b1"
1
+ INTERSCRIPT_MAPS_VERSION="2.1.0b5"
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "interscript-maps"
@@ -0,0 +1,348 @@
1
+ metadata {
2
+ authority_id: iso
3
+ id: 233-2-1993
4
+ language: iso-639-2:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ISO 233-2:1993 Documentation — Transliteration of Arabic characters into Latin characters
8
+ url:
9
+ - https://www.iso.org/standard/4118.html
10
+ - https://cdn.standards.iteh.ai/samples/4118/2f03c828842c4055a5619c1bded39381/ISO-233-2-1993.pdf
11
+ creation_date: 1993-08
12
+ confirmation_date: 2018-06
13
+ description: |
14
+ Establishes a simplified system for the transliteration. This
15
+ simplification of the stringent rules established by ISO 233:1984
16
+ is especially intended to facilitate the processing of bibliographic
17
+ information (e.g. catalogues, indices, citations, etc.). Annex A
18
+ gives the diacritical signs used (taken from the code table of
19
+ ISO 5436:1983).
20
+ notes:
21
+ - |
22
+ 4.1.1 In order to render a transliterated text more legible, the vowels
23
+ are supplied [method 2.1 c) of ISO 233:1984]. However sukün (`\u0652`) is
24
+ omitted, as well as the vowels and diphthongs which have only a flexional
25
+ function in nominal forms.
26
+
27
+ - |
28
+ 4.1.2 The initial alif (`\u0627`) is not represented: the presence of an
29
+ initial vowel in the transliterated word is enough to indicate an alif in
30
+ the original script.
31
+
32
+ - |
33
+ 4.1.3 A character bearing a hamzat (`\u0621`), which depends on the
34
+ vocalic context, is not represented.
35
+
36
+ - |
37
+ 4.1.4 The definite article (`\u0627\u0644`), is always represented by the
38
+ characters “al-”, whatever its vocalization.
39
+
40
+ - |
41
+ 4.1.5 The prepositions (li, bi, ka) as well as the conjunction wa, which
42
+ in Arabic are joined to the word, are separated by a hyphen in the
43
+ transliterated script.
44
+
45
+ }
46
+
47
+ tests {
48
+ test "مِصر", "Miṣr"
49
+ test "قَطَر", "Qaṭar"
50
+ test "الرِيَاض", "al-Riyāḍ"
51
+ test "الشارِقة", "al-Šâriqaẗ"
52
+ test "فِي نُورِ الْقَمَرِ", "Fī Nūr al-Qamar"
53
+ test "بِئْر", "Bi’r"
54
+ test "سَأَلَ", "Sa’al"
55
+ test "أَخْبَار", "Aẖbār"
56
+ test "قُرْآن", "Qur’ān"
57
+ test "آدَاب", "Ādāb"
58
+ test "الشَمْسُ", "al-Šams"
59
+ test "بِاللَيلِ", "bi-al-Layl"
60
+ test "لِلوَلَدِ", "li-l-Walad"
61
+ }
62
+
63
+ # Although this system inherits from iso-ara-Arab-Latn-233-1984,
64
+ # it utilizes a set of simplified rules.
65
+ # It is therefore easier to be implemented as a separate map instead
66
+ # of using the inherit flag.
67
+ # inherit: iso-ara-Arab-Latn-233-1984
68
+
69
+ stage {
70
+
71
+ # CHARACTERS
72
+ parallel {
73
+
74
+ # pointing
75
+
76
+ # Table 2 No. 30
77
+ sub "\u064e", "a" # َ fatha
78
+ sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota
79
+
80
+ # Table 2, No. 31
81
+ sub "\u064f", "u" # ُ damma
82
+
83
+ # Table 2, No. 32
84
+ sub "\u0650", "i" # ِ kasra
85
+
86
+ # Table 3, No. 33
87
+ sub "\u0652", "" # ْ sokoon, see 4.1.1
88
+
89
+ # pointing omitted in the end of words
90
+ sub "\u064e" + boundary, "" # َ fatha
91
+ sub "\u064f" + boundary, "" # ُ damma
92
+ sub "\u0650" + boundary, "" # ِ kasra
93
+
94
+ # special pointed letters
95
+ sub "\u0639\u064e", "‘a" # عَ
96
+ sub "\u0639\u0650", "‘i" # عِ
97
+ sub "\u0639\u064f", "‘ū" # عُ
98
+ # handle MacOS regex difference
99
+ sub "\u0639\u064f\u0648", "‘ū" # عُو damma followed by و
100
+
101
+ sub "\u0650\u064a", "ī" # ـِي kasra followed by ي
102
+ sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ
103
+ sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي
104
+ sub "\u064e\u0627", "ā" # ـَا fatha followed by ا
105
+ sub "\u064e\u0649", "aỳ" # ـَى fatha followed by ى which is ا not ي
106
+ sub "\u064f\u0648", "ū" # ـُو damma followed by و
107
+
108
+ # Table 2 No. 31, column 4
109
+ sub "\u064e\u0648\u0652", "aw" # ـَوْ
110
+
111
+ # Table 2 No. 32, column 4
112
+ sub "\u064e\u064a\u0652", "ay" # ـَيْ
113
+
114
+ # Table 1 No. 27a
115
+ # ta' marboota in iso-233-2-1993 is all the same `aẗ`
116
+ sub "\u0629", "aẗ" # ة in the middle of the sentence
117
+ # sub "\u0629", "ẗ" # ة
118
+
119
+ # Table 3, No. 34
120
+ # Shadda
121
+ sub "\u0628\u0651", "bb" # ب
122
+ sub "\u062a\u0651", "tt" # ت
123
+ sub "\u062b\u0651", "ṯṯ" # ث
124
+ sub "\u062c\u0651", "ǧǧ" # ج
125
+ sub "\u062d\u0651", "ḥḥ" # ح
126
+ sub "\u062e\u0651", "ẖẖ" # خ
127
+ sub "\u062f\u0651", "dd" # د
128
+ sub "\u0630\u0651", "ḏḏ" # ذ
129
+ sub "\u0631\u0651", "rr" # ر
130
+ sub "\u0632\u0651", "zz" # ز
131
+ sub "\u0633\u0651", "ss" # س
132
+ sub "\u0634\u0651", "šš" # ش
133
+ sub "\u0635\u0651", "ṣṣ" # ص
134
+ sub "\u0636\u0651", "ḍḍ" # ض
135
+ sub "\u0637\u0651", "ṭṭ" # ط
136
+ sub "\u0638\u0651", "ẓẓ" # ظ
137
+ sub "\u063a\u0651", "ġġ" # غ
138
+ sub "\u0641\u0651", "ff" # ف
139
+ sub "\u0642\u0651", "qq" # ق
140
+ sub "\u0643\u0651", "kk" # ك
141
+ sub "\u0644\u0651", "ll" # ل
142
+ sub "\u0645\u0651", "mm" # م
143
+ sub "\u0646\u0651", "nn" # ن
144
+ sub "\u0647\u0651", "hh" # ه
145
+ sub "\u0648\u0651", "ww" # و
146
+ sub "\u064a\u0651", "yy" # ي
147
+
148
+ sub "\u0627", "â" # ا
149
+
150
+ sub "\u0649", "ỳ" # ى
151
+
152
+ sub "\u0623", "’" # أ
153
+ sub boundary + "\u0623", "" # أ
154
+
155
+ # See 4.1.4
156
+ # '\uFE8E' : '' # ﺎ
157
+
158
+ # Table 1 No. 3
159
+ sub "\u0628", "b" # ب
160
+ sub "\uFE91", "b" # ﺑ
161
+ sub "\uFE92", "b" # ﺒ
162
+ sub "\uFE90", "b" # ﺐ
163
+
164
+ # See note C
165
+ # Table 1 No. 4
166
+ sub "\u062a", "t" # ت
167
+ sub "\ufe97", "t" # ﺗ
168
+ sub "\ufe98", "t" # ﺘ
169
+ sub "\ufe96", "t" # ﺖ
170
+
171
+ # Table 1 No. 5
172
+ sub "\u062b", "ṯ" # ث
173
+ sub "\ufe9b", "ṯ" # ﺛ
174
+ sub "\ufe9c", "ṯ" # ﺜ
175
+ sub "\ufe9a", "ṯ" # ﺚ
176
+
177
+ # Table 1 No. 6
178
+ sub "\u062c", "ǧ" # ج
179
+ sub "\ufe9f", "ǧ" # ﺟ
180
+ sub "\ufea0", "ǧ" # ﺠ
181
+ sub "\ufe9e", "ǧ" # ﺞ
182
+
183
+ # Table 1 No. 7
184
+ sub "\u062d", "ḥ" # ح
185
+ sub "\ufea3", "ḥ" # ﺣ
186
+ sub "\ufea4", "ḥ" # ﺤ
187
+ sub "\ufea2", "ḥ" # ﺢ
188
+
189
+ # Table 1 No. 8
190
+ sub "\u062e", "ẖ" # خ
191
+ sub "\ufea7", "ẖ" # ﺧ
192
+ sub "\ufea8", "ẖ" # ﺨ
193
+ sub "\ufea6", "ẖ" # ﺦ
194
+
195
+ # Table 1 No. 9
196
+ sub "\u062f", "d" # د
197
+ sub "\ufeaa", "d" # ﺪ
198
+
199
+ # Table 1 No. 10
200
+ sub "\u0630", "ḏ" # ذ
201
+ sub "\ufeac", "ḏ" # ﺬ
202
+
203
+ # Table 1 No. 11
204
+ sub "\u0631", "r" # ر
205
+ sub "\ufeae", "r" # ﺮ
206
+
207
+ # Table 1 No. 12
208
+ sub "\u0632", "z" # ز
209
+ sub "\ufeb0", "z" # ﺰ
210
+
211
+ # Table 1 No. 13
212
+ sub "\u0633", "s" # س
213
+ sub "\ufeb3", "s" # ﺳ
214
+ sub "\ufeb4", "s" # ﺴ
215
+ sub "\ufeb2", "s" # ﺲ
216
+
217
+ # Table 1 No. 14
218
+ sub "\u0634", "š" # ش
219
+ sub "\ufeb7", "š" # ﺷ
220
+ sub "\ufeb8", "š" # ﺸ
221
+ sub "\ufeb6", "š" # ﺶ
222
+
223
+ # Table 1 No. 15
224
+ sub "\u0635", "ṣ" # ص
225
+ sub "\ufebb", "ṣ" # ﺻ
226
+ sub "\ufebc", "ṣ" # ﺼ
227
+ sub "\ufeba", "ṣ" # ﺺ
228
+
229
+ # Table 1 No. 16
230
+ sub "\u0636", "ḍ" # ض
231
+ sub "\ufebf", "ḍ" # ﺿ
232
+ sub "\ufec0", "ḍ" # ﻀ
233
+ sub "\ufebe", "ḍ" # ﺾ
234
+
235
+ # Table 1 No. 17
236
+ sub "\u0637", "ṭ" # ط
237
+ sub "\ufec3", "ṭ" # ﻃ
238
+ sub "\ufec4", "ṭ" # ﻄ
239
+ sub "\ufec2", "ṭ" # ﻂ
240
+
241
+ # Table 1 No. 18
242
+ sub "\u0638", "ẓ" # ظ
243
+ sub "\ufec7", "ẓ" # ﻇ
244
+ sub "\ufec8", "ẓ" # ﻈ
245
+ sub "\ufec6", "ẓ" # ﻆ
246
+
247
+ # Table 1 No. 19
248
+ sub "\u0639", "‘" # ع
249
+ sub "\ufecb", "‘" # ﻋ
250
+ sub "\ufecc", "‘" # ﻌ
251
+ sub "\ufeca", "‘" # ﻊ
252
+
253
+ # Table 1 No. 20
254
+ sub "\u063a", "ġ" # غ
255
+ sub "\ufecf", "ġ" # ﻏ
256
+ sub "\ufed0", "ġ" # ﻐ
257
+ sub "\ufece", "ġ" # ﻎ
258
+
259
+ # Table 1 No. 21
260
+ sub "\u0641", "f" # ف
261
+ sub "\ufed3", "f" # ﻓ
262
+ sub "\ufed4", "f" # ﻔ
263
+ sub "\ufed2", "f" # ﻒ
264
+ sub "\u06a2", "f" # ڢ Maghrebi form
265
+
266
+ # Table 1 No. 22
267
+ sub "\u0642", "q" # ق
268
+ sub "\ufed7", "q" # ﻗ
269
+ sub "\ufed8", "q" # ﻘ
270
+ sub "\ufed6", "q" # ﻖ
271
+ sub "\u06a8", "q" # ڧ Maghrebi form
272
+
273
+ # Table 1 No. 23
274
+ sub "\u0643", "k" # ك
275
+ sub "\ufedb", "k" # ﻛ
276
+ sub "\ufedc", "k" # ﻜ
277
+ sub "\ufeda", "k" # ﻚ
278
+
279
+ # Table 1 No. 24
280
+ sub "\u0644", "l" # ل
281
+ sub "\ufedf", "l" # ﻟ
282
+ sub "\ufee0", "l" # ﻠ
283
+ sub "\ufede", "l" # ﻞ
284
+
285
+ # Table 1 No. 25
286
+ sub "\u0645", "m" # م
287
+ sub "\ufee3", "m" # ﻣ
288
+ sub "\ufee4", "m" # ﻤ
289
+ sub "\ufee2", "m" # ﻢ
290
+
291
+ # Table 1 No. 26
292
+ sub "\u0646", "n" # ن
293
+ sub "\ufee7", "n" # ﻧ
294
+ sub "\ufee8", "n" # ﻨ
295
+ sub "\ufee6", "n" # ﻦ
296
+
297
+ # Table 1 No. 27
298
+ sub "\u0647", "h" # ه
299
+ sub "\ufeeb", "h" # ﻫ
300
+ sub "\ufeec", "h" # ﻬ
301
+ sub "\ufeea", "h" # ﻪ
302
+
303
+ # Table 1 No. 28
304
+ sub "\u0648", "w" # و
305
+ sub "\ufeee", "w" # ﻮ
306
+
307
+ # Table 1 No. 29
308
+ sub "\u064a", "y" # ي
309
+ sub "\ufef3", "y" # ﻳ
310
+ sub "\ufef4", "y" # ﻴ
311
+ sub "\ufef1", "y" # ﻱ
312
+
313
+ # Table 4 row 1
314
+ sub "\u060c", "," # ،
315
+
316
+ # Table 4 row 2
317
+ sub "\u061b", ";" # ؛
318
+
319
+ # Table 4 row 3
320
+ sub "\u061f", "?" # ؟
321
+
322
+ # 4.3 Notes to Tables
323
+ sub "\u0626", "’" # ئ
324
+
325
+ sub "\u0622", "’ā" # آ
326
+
327
+ sub boundary + "\u0622", "ā" # آ
328
+
329
+ # definite article
330
+
331
+ sub boundary + "\u0627\u0644", "al-" # ال
332
+
333
+ sub "\u0627\u0644", "al-", before: "\u0628\u0650" # بِال
334
+
335
+ sub boundary + "\u0628\u0650", "bi-", after: "\u0627\u0644" # بـِ
336
+
337
+ sub boundary + "\u0644\u0650\u0644", "li-l-" # لِل
338
+
339
+ }
340
+
341
+
342
+ # POSTRULES
343
+ sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
344
+ # don't capitalize defined article in the middle of a sentence
345
+ sub "Al-", "al-" # ال
346
+ sub "Bi-", "bi-" # بِ
347
+ sub "Li-L-", "li-l-" # بِل
348
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: interscript-maps
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0b1
4
+ version: 2.1.0b5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -184,6 +184,7 @@ files:
184
184
  - maps/icao-srp-Cyrl-Latn-9303.imp
185
185
  - maps/icao-ukr-Cyrl-Latn-9303.imp
186
186
  - maps/iso-ara-Arab-Latn-233-1984.imp
187
+ - maps/iso-ara-Arab-Latn-233-2-1993.imp
187
188
  - maps/iso-asm-Beng-Latn-15919-2001.imp
188
189
  - maps/iso-ben-Beng-Latn-15919-2001.imp
189
190
  - maps/iso-ell-Grek-Latn-843-1997-t1.imp