interscript 0.1.7 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,405 @@
1
+ ---
2
+ authority_id: ungegn
3
+ id: 2017
4
+ language: iso-639-2:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
8
+ url: https://www.eki.ee/wgrs/rom1_ur.htm
9
+ creation_date: 2017
10
+ confirmation date: 2018-06
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (
13
+ II/11) and amended in 1977 (III/12), based on a report
14
+ prepared by D. N. Sharma. The tables and their corrections
15
+ were published in volume II of the conference reports1,2.
16
+
17
+ There is no evidence of the use of the system either in
18
+ Pakistan, India or in international cartographic products.
19
+ Instead, in Pakistan the Hunterian system is officially
20
+ used3. The resolutions III/12 (1977) and IV/17 (1982)
21
+ recommended association, inter alia, with Pakistan, in
22
+ carrying out further studies on the system.
23
+
24
+ Urdu (Urdū) uses the Perso-Arabic script which is written
25
+ from right to left. In the script vowel points are usually
26
+ omitted which makes it difficult to obtain uniform
27
+ romanizations. Some of the Arabic consonants are
28
+ undifferentiated in romanization which means that the
29
+ system is not fully reversible.
30
+ notes:
31
+ - A If preceded by short a, it is romanized ‘ā, e.g. مَعمُل M‘āmul.
32
+ - B When و is imperceptible, e.g. in a few words of Persian origin when preceded by خ (ḳh).
33
+ - C Word-finally after a short vowel.
34
+ - D Marks aspiration of consonants.
35
+ - E The character ے is used only word-finally.
36
+ tests:
37
+ - source: بوغدِی
38
+ expected: Bvghdī
39
+
40
+ - source: مَعمُل
41
+ expected: M‘āmul
42
+
43
+ - source: پَالِير
44
+ expected: Pālīr
45
+
46
+ - source: بیزوت كَلے
47
+ expected: Byzvt Kale
48
+
49
+ - source: عَمَل كوٹ
50
+ expected: ‘Amal Kvṭ
51
+
52
+ - source: ثَابِر
53
+ expected: Sābir
54
+
55
+ - source: شَاه نَثَار ميلة
56
+ expected: Shāh Nasār Mylah
57
+
58
+ - source: چَپرِی
59
+ expected: Chaprī
60
+
61
+ - source: أَحمَد خَان كَلے
62
+ expected: Ahmad Ḳhān Kale
63
+
64
+ - source: دُرَانِي
65
+ expected: Durānī
66
+
67
+ - source: ڈَنگِیلا
68
+ expected: Ḍangīlā
69
+
70
+ - source: ذَرَانِی
71
+ expected: Zarānī
72
+
73
+ - source: بُركِي
74
+ expected: Burkī
75
+
76
+ - source: گِیدَڑَه
77
+ expected: Gīdaṙah
78
+
79
+ - source: عَلِي زَائِي
80
+ expected: ‘Alī Zā-ī
81
+
82
+ - source: ژوب
83
+ expected: Ỵvb
84
+
85
+ - source: بِسَاتُو
86
+ expected: Bisātū
87
+
88
+ - source: أَحمَدِي شَامَا
89
+ expected: Ahmadī Shāmā
90
+
91
+ - source: اَصَالَت كَلے
92
+ expected: Asālat Kale
93
+
94
+ - source: خَضَر خَان
95
+ expected: Ḳhazar Ḳhān
96
+
97
+ - source: سُلْطَان
98
+ expected: Sultān
99
+
100
+ - source: عَزَم سَيِّد نُور كَلے
101
+ expected: ‘Azam Sayyid Nūr Kale
102
+
103
+ - source: بغَاكِي
104
+ expected: Bghākī
105
+
106
+ - source: حَقدَرَه
107
+ expected: Haqdarah
108
+
109
+ - source: کَچکِینَہ
110
+ expected: Kachkīnaḥ
111
+
112
+ - source: بَاگَن
113
+ expected: Bāgan
114
+
115
+ - source: بُلبَلَک
116
+ expected: Bulbalak
117
+
118
+ - source: بِلیَامِین
119
+ expected: Bilyāmīn
120
+
121
+ - source: نَہر
122
+ expected: Nahr
123
+
124
+ - source: اَرَوْالِی
125
+ expected: Arawālī
126
+
127
+ - source: مَہردِی
128
+ expected: Mahrdī
129
+
130
+ - source: بَڑھ
131
+ expected: Baṙh
132
+
133
+ - source: یَاردَا کَلے
134
+ expected: Yārdā Kale
135
+
136
+ - source: بهَائِي خَان
137
+ expected: Bhā-ī Ḳhān
138
+
139
+ - source: پھاشک
140
+ expected: Phāshk
141
+
142
+ - source: تھَلّ
143
+ expected: Thall
144
+
145
+ - source: پَٹھان ريَا
146
+ expected: Paṭhān Ryā
147
+
148
+ - source: جھِیل
149
+ expected: Jhīl
150
+
151
+ - source: غَزْنِي سْپِين
152
+ expected: Ghaznī Spīn
153
+
154
+ - source: بَادشَاه چھُم
155
+ expected: Bādshāh Chhum
156
+
157
+ - source: سِندھ
158
+ expected: Sindh
159
+
160
+ - source: ڈھَنڈ
161
+ expected: Ḍhanḍ
162
+
163
+ - source: خَان گھَڑِی
164
+ expected: Ḳhān Ghaṙī
165
+
166
+ - source: غُلَامَک كَلے
167
+ expected: Ghulāmak Kale
168
+
169
+ - source: خَپیَنگا
170
+ expected: Ḳhapyangā
171
+
172
+ - source: گَندَه كَلے
173
+ expected: Gandah Kale
174
+
175
+ - source: مَورپِتھِی
176
+ expected: Maurpithī
177
+
178
+ - source: درے پلارِی
179
+ expected: Dre Plārī
180
+
181
+ - source: آگرَہ
182
+ expected: Āgraḥ
183
+
184
+ - source: ڈَنڈَر
185
+ expected: Ḍanḍar
186
+
187
+ - source: گُبازانَہ
188
+ expected: Gubāzānaḥ
189
+
190
+ - source: حَےدَر عَلِی كَلے
191
+ expected: Haidar ‘Alī Kale
192
+
193
+ - source: تَودَہ چِینَہ
194
+ expected: Taudaḥ Chīnaḥ
195
+
196
+ - source: مُوسى خَان كَلے
197
+ expected: Mūsá Ḳhān Kale
198
+
199
+ - source: مُلَّا بَاغ
200
+ expected: Mullā Bāgh
201
+
202
+ map:
203
+ postrules:
204
+ - pattern: (?<=\b)(?<!\b[‘|’|'|-])[\u0061-\uFFFF]
205
+ result: "upcase"
206
+ # don't capitalize defined article in the middle of a sentence
207
+ - pattern : ' At T' # الت
208
+ result: ' at T'
209
+ - pattern : ' As̄ S̄' # الث
210
+ result: ' as̄ S̄'
211
+ - pattern : ' Ad D' # الد
212
+ result: ' ad D'
213
+ - pattern : ' Az Z' # الذ
214
+ result: ' az Z'
215
+ - pattern : ' Ar R' # الر
216
+ result: ' ar R'
217
+ - pattern : ' Az Z' # الز
218
+ result: ' az Z'
219
+ - pattern : ' As S' # الس
220
+ result: ' as S'
221
+ - pattern : ' Ash Sh' # الش
222
+ result: ' ash Sh'
223
+ - pattern : ' As S' # الص
224
+ result: ' as S'
225
+ - pattern : ' Az Z' # الض
226
+ result: ' az Z'
227
+ - pattern : ' At T' # الط
228
+ result: ' at T'
229
+ - pattern : ' Az Z' # الظ
230
+ result: ' az Z'
231
+ - pattern : ' Al L' # الل
232
+ result: ' al L'
233
+ - pattern : ' An N' # الن
234
+ result: ' an N'
235
+ - pattern: " Al " # ال
236
+ result: " al "
237
+ characters:
238
+ # special rules
239
+
240
+ '\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
241
+ '\ufdf2': 'Allāh' # See note 5
242
+
243
+ # Vowels, Diphthongs, and Diacritical Marks
244
+ '\u064e' : 'a' # َ fatha
245
+ '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
246
+ '\u0627' : 'ā' # ا
247
+ '\u0649\u0670' : 'ā' # ىٰ
248
+ '\u06D2\u0670' : 'ā' # ےٰ
249
+ '\u0622' : 'ā' # آ
250
+ '\b\u0627' : '' # ا
251
+ '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
252
+ '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
253
+
254
+ '\u0652' : '' # ْ sokoon
255
+ '\u0659': 'ê'
256
+
257
+ '\u0650' : 'i' # karsra
258
+ '\u0650[\u064a|\u06cc]' : 'ī' # ـِي kasra followed by ي
259
+ '\u0650\u06d2\u0652' : 'e' # ـے
260
+ '\u0650\u06d2' : 'e' # ـے
261
+ '\u06d2' : 'e' # ـے
262
+
263
+ '\u064f' : 'u' # ُ damma
264
+ '\u064f\u0648' : 'ū' # ـُو damma followed by و
265
+ '\u064f\u0648\u0652' : 'o' # ـَوْ
266
+
267
+
268
+ '\u064e\u06d2' : 'ai' # ـے
269
+ '\u064e\u0648' : 'au' # ـَو
270
+ '\u0670': 'á' # ىٰ
271
+ '\u0649': 'á' # ىٰ
272
+
273
+ # shadda
274
+ '\u0628\u0651' : 'bb' # ب
275
+ '\u062a\u0651' : 'tt' # ت
276
+ '\u062b\u0651' : 'ss' # ث
277
+ '\u062c\u0651' : 'jj' # ج
278
+ '\u062d\u0651' : 'hh' # ح
279
+ '\u062e\u0651' : 'ḳhḳh' # خ
280
+ '\u062f\u0651' : 'dd' # د
281
+ '\u0630\u0651' : 'zz' # ذ
282
+ '\u0631\u0651' : 'rr' # ر
283
+ '\u0632\u0651' : 'zz' # ز
284
+ '\u0633\u0651' : 'ss' # س
285
+ '\u0634\u0651' : 'sh' # ش
286
+ '\u0635\u0651' : 'ss' # ص
287
+ '\u0636\u0651' : 'ḏḏ' # ض
288
+ '\u0637\u0651' : 'tt' # ط
289
+ '\u0638\u0651' : 'zz' # ظ
290
+ '\u063a\u0651' : 'ghgh' # غ
291
+ '\u0641\u0651' : 'ff' # ف
292
+ '\u0642\u0651' : 'qq' # ق
293
+ '\u0643\u0651' : 'kk' # ك
294
+ '\u0644\u0651' : 'll' # ل
295
+ '\u0645\u0651' : 'mm' # م
296
+ '\u0646\u0651' : 'nn' # ن
297
+ '\u0647\u0651' : 'hh' # ه
298
+ '\u0648\u0651' : 'vv' # و
299
+ '[\u064a|\u06cc]\u0651' : 'yy' # ي
300
+
301
+ # NOTE 1
302
+ '\u0650\b' : '-e' # ِ kasra
303
+ '\u0674' : '-e' # ٴ
304
+ '\u0654' : '-e' # ٔ
305
+
306
+ '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
307
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
308
+ '\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
309
+ '\u064e\u0648\u0652' : 'aw' # ـَوْ
310
+ '\u064e\u064a\u0652' : 'ay' # ـَيْ
311
+ '\u0650\u06cc\u0651\u064e' : 'īy' # ـِيَّ
312
+ '\u064e\u064a' : 'aī' # ـَي
313
+ '\u064e\u06cc' : 'aī' # ـَي
314
+ # - '-ye'
315
+
316
+
317
+ # ta' marboota
318
+ '\u0629' : 'at' # ة in the middle of the sentence
319
+ '\u0629$' : 'ah'
320
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
321
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
322
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
323
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
324
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
325
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
326
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
327
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
328
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
329
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
330
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
331
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
332
+
333
+
334
+
335
+ '\u0621' : '-' # ء
336
+ '\u0624' : '-' # ؤ
337
+ '\u0626' : '-' # ئ
338
+
339
+ '\u0623' : '' # أ
340
+ '\u0625' : '' # إ
341
+ # See note B
342
+ '\b\u0627\u0644' : 'al ' # ال
343
+ # '\uFE8E' : '' # ﺎ
344
+
345
+ # Sun letters
346
+ '\b\u0627\u0644\u062a' : 'at t' # الت
347
+ '\b\u0627\u0644\u062b' : 'as s' # الث
348
+ '\b\u0627\u0644\u062f' : 'ad d' # الد
349
+ '\b\u0627\u0644\u0630' : 'az z' # الذ
350
+ '\b\u0627\u0644\u0631' : 'ar r' # الر
351
+ '\b\u0627\u0644\u0632' : 'az z' # الز
352
+ '\b\u0627\u0644\u0633' : 'as s' # الس
353
+ '\b\u0627\u0644\u0634' : 'ash sh' # الش
354
+ '\b\u0627\u0644\u0635' : 'as s' # الص
355
+ '\b\u0627\u0644\u0636' : 'az z' # الض
356
+ '\b\u0627\u0644\u0637' : 'at t' # الط
357
+ '\b\u0627\u0644\u0638' : 'az z' # الظ
358
+ '\b\u0627\u0644\u0644' : 'al l' # الل
359
+ '\b\u0627\u0644\u0646' : 'an n' # الن
360
+
361
+
362
+ # consonant characters
363
+
364
+ '\u0628' : 'b' # ب
365
+ '\u067E' : 'p' # پ
366
+ '\u062a' : 't' # ت
367
+ '\u0679' : 'ṭ' # ٹ
368
+ '\u062B' : 's' # ث
369
+ '\u062c' : 'j' # ج
370
+ '\u0686' : 'ch' # ‫چ‬
371
+ '\u062d' : 'h' # ح
372
+ '\u062e' : 'ḳh' # خ
373
+ '\u062f' : 'd' # د
374
+ '\u0688' : 'ḍ' # ‫ڈ
375
+ '\u0630' : 'z' # ذ
376
+ '\u0631' : 'r' # ر
377
+ '\u0691' : 'ṙ' # ڑ
378
+ '\u0632' : 'z' # ز
379
+ '\u0698' : 'ỵ' # ‫ژ‬
380
+ '\u0633' : 's' # س
381
+ '\u0634' : 'sh' # ش
382
+ '\u0635' : 's' # ص
383
+ '\u0636' : 'z' # ض
384
+ '\u0637' : 't' # ط
385
+ '\u0638' : 'z' # ظ
386
+ '\u0639' : '‘' # ع
387
+ '\u064e\u0639' : '‘ā' # ع NOTE A
388
+ '\u063a' : 'gh' # غ
389
+ '\u0641' : 'f' # ف
390
+ '\u0642' : 'q' # ق
391
+ '\u0643' : 'k' # ك
392
+ '\u06A9' : 'k' # ک
393
+ '\u06AF' : 'g' # ‫گ‬
394
+ '\u0644' : 'l' # ل
395
+ '\u0645' : 'm' # م
396
+ '[\u06BA|\u0646]' : 'n' # ن, ں
397
+ '[\ufba9|\u06c1]' : 'h' # ہ , ﮩ
398
+ '(?<=[\u064e|\u0650|\u064f])[\ufba9|\u06c1]\b' : 'ḥ' # ہ , ﮩ NOTE C
399
+ '[\u0647|\u06be]' : 'h' # ه, ھ
400
+ '\u0648' : 'v' # و
401
+ '(?<=\u062e)\u0648' : 'ẉ' # و NOTE B
402
+ '[\u064a|\u06cc]' : 'y' # ي
403
+ # '\u0649' : 'y' # ي
404
+ '\u06D0' : 'ē' # ې
405
+ '\u06CD' : 'êy' # ‫ۍ‬
@@ -0,0 +1,466 @@
1
+ ---
2
+ authority_id: var
3
+ id: 2003
4
+ language: amh
5
+ source_script: Ethi
6
+ destination_script: Latn
7
+ name: Encyclopaedia Aethiopica Amharic transliteration system (2003)
8
+ url: https://brill.com/view/book/edcoll/9789004419582/front-11.xml?language=en
9
+ creation_date: 2003
10
+ description: |
11
+ The Encyclopaedia Aethiopica (EAe) is a basic encyclopedia for Ethiopian and Eritrean studies. It employs an in-house form of romanization of Geez, Amharic, and other languages, which varies greatly from standard formats, such as BGN/PCGN: the emperor Menelek II's name, for example, is written as "Mənilək II".
12
+
13
+ tests:
14
+ - source: የዜግነት ክብር በ ኢትዮጵያችን ጸንቶ
15
+ expected: yäzegənätə kəbərə bä ʾitəyoṗəyačənə ṣänəto
16
+ - source: ታየ ሕዝባዊነት ዳር እስከዳር በርቶ
17
+ expected: tayä ḥəzəbawinätə darə ʾəsəkädarə bärəto
18
+ - source: ለሰላም ለፍትህ ለሕዝቦች ነጻነት
19
+ expected: läsälamə läfətəhə läḥəzəbočə näṣanätə
20
+ - source: በእኩልነት በፍቅር ቆመናል ባንድነት
21
+ expected: bäʾəkulənätə bäfəqərə qomänalə banədənätə
22
+ - source: መሠረተ ፅኑ ሰብዕናን ያልሻርን
23
+ expected: mäśärätä ṣ̓ənu säbəʿənanə yaləšarənə
24
+ - source: ሕዝቦች ነን ለሥራ በሥራ የኖርን
25
+ expected: ḥəzəbočə nänə läśəra bäśəra yänorənə
26
+ - source: ድንቅ የባህል መድረክ ያኩሪ ቅርስ ባለቤት
27
+ expected: dənəqə yäbahələ mädəräkə yakuri qərəsə baläbetə
28
+ - source: የተፈጥሮ ጸጋ የጀግና ሕዝብ እናት
29
+ expected: yätäfäṭəro ṣäga yäǧägəna ḥəzəbə ʾənatə
30
+ - source: እንጠብቅሻለን አለብን አደራ
31
+ expected: ʾənəṭäbəqəšalänə ʾaläbənə ʾadära
32
+ - source: ኢትዮጵያችን ኑሪ እኛም ባንቺ እንኩራ
33
+ expected: ʾitəyoṗəyačənə nuri ʾəñamə banəči ʾənəkura
34
+ - source: ቋንቋ የድምጽ፣ የምልክት ወይም የምስል ቅንብር ሆኖ
35
+ expected: qʷanəqʷa yädəməṣə፣ yämələkətə wäyəmə yäməsələ qənəbərə hono
36
+ - source: ለማሰብ ወይም የታሰበን ሃሳብ ለሌላ ለማስተላለፍ የሚረዳ መሳሪያ ነው
37
+ expected: lämasäbə wäyəmə yätasäbänə hasabə lälela lämasətälaläfə yämiräda mäsariya näwə
38
+ - source: በአጭሩ ቋንቋ የምልክቶች ስርዓትና እኒህን ምልክቶች ለማቀናበር
39
+ expected: bäʾač̣əru qʷanəqʷa yämələkətočə sərəʿatəna ʾənihənə mələkətočə lämaqänabärə
40
+ - source: የሚያስፈልጉ ህጎች ጥንቅር ነው። ቋንቋወችን ለመፈረጅ እንዲሁም
41
+ expected: yämiyasəfäləgu həጎčə ṭənəqərə näwə። qʷanəqʷawäčənə lämäfäräǧə ʾənədihumə
42
+ - source: ለምክፈል የሚያስችሉ መስፈርቶችን ለማስቀመጥ ባለው ችግር
43
+ expected: läməkəfälə yämiyasəčəlu mäsəfärətočənə lämasəqämäṭə baläwə čəgərə
44
+ - source: ምክንያት በአሁኑ ሰዓት በርግጠኝነት ስንት ቋንቋ በዓለም ላይ
45
+ expected: məkənəyatə bäʾahunu säʿatə bärəgəṭäñənätə sənətə qʷanəqʷa bäʿalämə layə
46
+ - source: እንዳለ ማወቅ አስቸጋሪ ነው
47
+ expected: ʾənədalä mawäqə ʾasəčägari näwə
48
+ - source: አሰላ
49
+ expected: ʾasäla
50
+ - source: አሶሳ
51
+ expected: ʾasosa
52
+ - source: አንኮበር
53
+ expected: ʾanəkobärə
54
+ - source: አክሱም
55
+ expected: ʾakəsumə
56
+ - source: አዋሳ
57
+ expected: ʾawasa
58
+ - source: አዲስ ዘመን (ከተማ)
59
+ expected: ʾadisə zämänə (kätäma)
60
+ - source: አዲግራት
61
+ expected: ʾadigəratə
62
+ - source: አዳማ
63
+ expected: ʾadama
64
+ - source: ደምበጫ
65
+ expected: däməbäč̣a
66
+ - source: ደርባ
67
+ expected: därəba
68
+ - source: ደብረ ማርቆስ
69
+ expected: däbərä marəqosə
70
+ - source: ደብረ ብርሃን
71
+ expected: däbərä bərəhanə
72
+ - source: ደብረ ታቦር (ከተማ)
73
+ expected: däbərä taborə (kätäma)
74
+ - source: ደብረ ዘይት
75
+ expected: däbərä zäyətə
76
+ - source: ደገሃቡር
77
+ expected: dägähaburə
78
+ - source: ወልቂጤ
79
+ expected: wäləqiṭe
80
+ - source: ወልወል
81
+ expected: wäləwälə
82
+ - source: ወልደያ
83
+ expected: wälədäya
84
+ - source: ናይሎ ሳህራን
85
+ expected: nayəlo sahəranə
86
+ - source: አኙዋክኛ
87
+ expected: ʾañuwakəña
88
+ - source: ኡዱክኛ
89
+ expected: ʾudukəña
90
+ - source: ኦፓኛ
91
+ expected: ʾopaña
92
+ - source: ጉምዝኛ
93
+ expected: guməzəña
94
+ - source: አፋርኛ
95
+ expected: ʾafarəña
96
+ - source: አላባኛ
97
+ expected: ʾalabaña
98
+ - source: አርቦርኛ
99
+ expected: ʾarəborəña
100
+ - source: ባይሶኛ
101
+ expected: bayəsoña
102
+ - source: ቡሳኛ
103
+ expected: busaña
104
+ - source: ሁለተኛ ጥፋት ከገበያ ማንቀላፋት
105
+ expected: hulätäña ṭəfatə kägäbäya manəqälafatə
106
+ - source: ሁሉም ከልኩ አያልፍም
107
+ expected: hulumə käləku ʾayaləfəmə
108
+ - source: አልሞት ባይ ተጋዳይ
109
+ expected: ʾaləmotə bayə tägadayə
110
+ - source: ውርድ ከራሴ
111
+ expected: wərədə kärase
112
+ - source: ፀጉር መሰንጠቅ
113
+ expected: ṣ̓ägurə mäsänəṭäqə
114
+ - source: ግንትር ፀሐይ
115
+ expected: gənətərə ṣ̓äḥayə
116
+ - source: በሬ ወለደ
117
+ expected: bäre wälädä
118
+ - source: ራስ ሳይጠና ጉተና
119
+ expected: rasə sayəṭäna gutäna
120
+ - source: ለሆዴ ጠግቤ በልብሴ አንግቤ
121
+ expected: lähode ṭägəbe bäləbəse ʾanəgəbe
122
+ - source: ለልጅ ከሳቁለት ለውሻ ከሮጡለት
123
+ expected: läləǧə käsaqulätə läwəša käroṭulätə
124
+ - source: መልካም ባል መጥፎ ሴት ይገራል
125
+ expected: mäləkamə balə mäṭəfo setə yəgäralə
126
+ - source: ሆድና ግንባር አይሸሸግም
127
+ expected: hodəna gənəbarə ʾayəšäšägəmə
128
+ - source: ቀሊል አማት ሲሶ በትር አላት
129
+ expected: qälilə ʾamatə siso bätərə ʾalatə
130
+ - source: ጨው ለራስህ ብለህ ጣፍጥ አለበለዚያ ድንጋይ ነው ብለው ይወረውሩሀል
131
+ expected: č̣äwə lärasəhə bəlähə ṭafəṭə ʾaläbäläziya dənəgayə näwə bəläwə yəwäräwəruhalə
132
+ - source: ጀምሮ ይጨርሳል አልሞ ይተኩሳል
133
+ expected: ǧäməro yəč̣ärəsalə ʾaləmo yətäkusalə
134
+
135
+ map:
136
+ characters:
137
+ '\u1200': "ha" # ሀ
138
+ '\u1201': "hu" # ሁ
139
+ '\u1202': "hi" # ሂ
140
+ '\u1203': "ha" # ሃ
141
+ '\u1204': "he" # ሄ
142
+ '\u1205': "hə" # ህ
143
+ '\u1206': "ho" # ሆ
144
+
145
+ '\u1208': "lä" # ለ
146
+ '\u1209': "lu" # ሉ
147
+ '\u120A': "li" # ሊ
148
+ '\u120B': "la" # ላ
149
+ '\u120C': "le" # ሌ
150
+ '\u120D': "lə" # ል
151
+ '\u120E': "lo" # ሎ
152
+
153
+ '\u1210': "ḥa" # ሐ
154
+ '\u1211': "ḥu" # ሑ
155
+ '\u1212': "ḥi" # ሒ
156
+ '\u1213': "ḥa" # ሓ
157
+ '\u1214': "ḥe" # ሔ
158
+ '\u1215': "ḥə" # ሕ
159
+ '\u1216': "ḥo" # ሖ
160
+
161
+ '\u1218': "mä" # መ
162
+ '\u1219': "mu" # ሙ
163
+ '\u121A': "mi" # ሚ
164
+ '\u121B': "ma" # ማ
165
+ '\u121C': "me" # ሜ
166
+ '\u121D': "mə" # ም
167
+ '\u121E': "mo" # ሞ
168
+
169
+ '\u1220': "śä" # ሠ
170
+ '\u1221': "śu" # ሡ
171
+ '\u1222': "śi" # ሢ
172
+ '\u1223': "śa" # ሣ
173
+ '\u1224': "śe" # ሤ
174
+ '\u1225': "śə" # ሥ
175
+ '\u1226': "śo" # ሦ
176
+
177
+ '\u1228': "rä" # ረ
178
+ '\u1229': "ru" # ሩ
179
+ '\u122A': "ri" # ሪ
180
+ '\u122B': "ra" # ራ
181
+ '\u122C': "re" # ሬ
182
+ '\u122D': "rə" # ር
183
+ '\u122E': "ro" # ሮ
184
+
185
+ '\u1230': "sä" # ሰ
186
+ '\u1231': "su" # ሱ
187
+ '\u1232': "si" # ሲ
188
+ '\u1233': "sa" # ሳ
189
+ '\u1234': "se" # ሴ
190
+ '\u1235': "sə" # ስ
191
+ '\u1236': "so" # ሶ
192
+
193
+ '\u1238': "šä" # ሸ
194
+ '\u1239': "šu" # ሹ
195
+ '\u123A': "ši" # ሺ
196
+ '\u123B': "ša" # ሻ
197
+ '\u123C': "še" # ሼ
198
+ '\u123D': "šə" # ሽ
199
+ '\u123E': "šo" # ሾ
200
+
201
+ '\u1240': "qä" # ቀ
202
+ '\u1241': "qu" # ቁ
203
+ '\u1242': "qi" # ቂ
204
+ '\u1243': "qa" # ቃ
205
+ '\u1244': "qe" # ቄ
206
+ '\u1245': "qə" # ቅ
207
+ '\u1246': "qo" # ቆ
208
+
209
+ '\u1260': "bä" # በ
210
+ '\u1261': "bu" # ቡ
211
+ '\u1262': "bi" # ቢ
212
+ '\u1263': "ba" # ባ
213
+ '\u1264': "be" # ቤ
214
+ '\u1265': "bə" # ብ
215
+ '\u1266': "bo" # ቦ
216
+
217
+ '\u1270': "tä" # ተ
218
+ '\u1271': "tu" # ቱ
219
+ '\u1272': "ti" # ቲ
220
+ '\u1273': "ta" # ታ
221
+ '\u1274': "te" # ቴ
222
+ '\u1275': "tə" # ት
223
+ '\u1276': "to" # ቶ
224
+
225
+ '\u1278': "čä" # ቸ
226
+ '\u1279': "ču" # ቹ
227
+ '\u127A': "či" # ቺ
228
+ '\u127B': "ča" # ቻ
229
+ '\u127C': "če" # ቼ
230
+ '\u127D': "čə" # ች
231
+ '\u127E': "čo" # ቾ
232
+
233
+ '\u1280': "ḫa" # ኀ
234
+ '\u1281': "ḫu" # ኁ
235
+ '\u1282': "ḫi" # ኂ
236
+ '\u1283': "ḫa" # ኃ
237
+ '\u1284': "ḫe" # ኄ
238
+ '\u1285': "ḫə" # ኅ
239
+ '\u1286': "ḫo" # ኆ
240
+
241
+ '\u1290': "nä" # ነ
242
+ '\u1291': "nu" # ኑ
243
+ '\u1292': "ni" # ኒ
244
+ '\u1293': "na" # ና
245
+ '\u1294': "ne" # ኔ
246
+ '\u1295': "nə" # ን
247
+ '\u1296': "no" # ኖ
248
+
249
+ '\u1298': "ñä" # ኘ
250
+ '\u1299': "ñu" # ኙ
251
+ '\u129A': "ñi" # ኚ
252
+ '\u129B': "ña" # ኛ
253
+ '\u129C': "ñe" # ኜ
254
+ '\u129D': "ñə" # ኝ
255
+ '\u129E': "ño" # ኞ
256
+
257
+ '\u12A0': "ʾa" # አ
258
+ '\u12A1': "ʾu" # ኡ
259
+ '\u12A2': "ʾi" # ኢ
260
+ '\u12A3': "ʾa" # ኣ
261
+ '\u12A4': "ʾe" # ኤ
262
+ '\u12A5': "ʾə" # እ
263
+ '\u12A6': "ʾo" # ኦ
264
+
265
+ '\u12A8': "kä" # ከ
266
+ '\u12A9': "ku" # ኩ
267
+ '\u12AA': "ki" # ኪ
268
+ '\u12AB': "ka" # ካ
269
+ '\u12AC': "ke" # ኬ
270
+ '\u12AD': "kə" # ክ
271
+ '\u12AE': "ko" # ኮ
272
+
273
+ '\u12B8': "ḵä" # ኸ
274
+ '\u12B9': "ḵu" # ኹ
275
+ '\u12BA': "ḵi" # ኺ
276
+ '\u12BB': "ḵa" # ኻ
277
+ '\u12BC': "ḵe" # ኼ
278
+ '\u12BD': "ḵə" # ኽ
279
+ '\u12BE': "ḵo" # ኾ
280
+
281
+ '\u12C8': "wä" # ወ
282
+ '\u12C9': "wu" # ዉ
283
+ '\u12CA': "wi" # ዊ
284
+ '\u12CB': "wa" # ዋ
285
+ '\u12CC': "we" # ዌ
286
+ '\u12CD': "wə" # ው
287
+ '\u12CE': "wo" # ዎ
288
+
289
+ '\u12D0': "ʿa" # ዐ
290
+ '\u12D1': "ʿu" # ዑ
291
+ '\u12D2': "ʿi" # ዒ
292
+ '\u12D3': "ʿa" # ዓ
293
+ '\u12D4': "ʿe" # ዔ
294
+ '\u12D5': "ʿə" # ዕ
295
+ '\u12D6': "ʿo" # ዖ
296
+
297
+ '\u12D8': "zä" # ዘ
298
+ '\u12D9': "zu" # ዙ
299
+ '\u12DA': "zi" # ዚ
300
+ '\u12DB': "za" # ዛ
301
+ '\u12DC': "ze" # ዜ
302
+ '\u12DD': "zə" # ዝ
303
+ '\u12DE': "zo" # ዞ
304
+
305
+ '\u12E0': "žä" # ዠ
306
+ '\u12E1': "žu" # ዡ
307
+ '\u12E2': "ži" # ዢ
308
+ '\u12E3': "ža" # ዣ
309
+ '\u12E4': "že" # ዤ
310
+ '\u12E5': "žə" # ዥ
311
+ '\u12E6': "žo" # ዦ
312
+
313
+ '\u12E8': "yä" # የ
314
+ '\u12E9': "yu" # ዩ
315
+ '\u12EA': "yi" # ዪ
316
+ '\u12EB': "ya" # ያ
317
+ '\u12EC': "ye" # ዬ
318
+ '\u12ED': "yə" # ይ
319
+ '\u12EE': "yo" # ዮ
320
+
321
+ '\u12F0': "dä" # ደ
322
+ '\u12F1': "du" # ዱ
323
+ '\u12F2': "di" # ዲ
324
+ '\u12F3': "da" # ዳ
325
+ '\u12F4': "de" # ዴ
326
+ '\u12F5': "də" # ድ
327
+ '\u12F6': "do" # ዶ
328
+
329
+ '\u1300': "ǧä" # ጀ
330
+ '\u1301': "ǧu" # ጁ
331
+ '\u1302': "ǧi" # ጂ
332
+ '\u1303': "ǧa" # ጃ
333
+ '\u1304': "ǧe" # ጄ
334
+ '\u1305': "ǧə" # ጅ
335
+ '\u1306': "ǧo" # ጆ
336
+
337
+ '\u1308': "gä" # ገ
338
+ '\u1309': "gu" # ጉ
339
+ '\u130A': "gi" # ጊ
340
+ '\u130B': "ga" # ጋ
341
+ '\u130C': "ge" # ጌ
342
+ '\u130D': "gə" # ግ
343
+
344
+ '\u1320': "ṭä" # ጠ
345
+ '\u1321': "ṭu" # ጡ
346
+ '\u1322': "ṭi" # ጢ
347
+ '\u1323': "ṭa" # ጣ
348
+ '\u1324': "ṭe" # ጤ
349
+ '\u1325': "ṭə" # ጥ
350
+ '\u1326': "ṭo" # ጦ
351
+
352
+ '\u1328': "č̣ä" # ጨ
353
+ '\u1329': "č̣u" # ጩ
354
+ '\u132A': "č̣i" # ጪ
355
+ '\u132B': "č̣a" # ጫ
356
+ '\u132C': "č̣e" # ጬ
357
+ '\u132D': "č̣ə" # ጭ
358
+ '\u132E': "č̣o" # ጮ
359
+
360
+ '\u1330': "ṗä" # ጰ
361
+ '\u1331': "ṗu" # ጱ
362
+ '\u1332': "ṗi" # ጲ
363
+ '\u1333': "ṗa" # ጳ
364
+ '\u1334': "ṗe" # ጴ
365
+ '\u1335': "ṗə" # ጵ
366
+ '\u1336': "ṗo" # ጶ
367
+
368
+ '\u1338': "ṣä" # ጸ
369
+ '\u1339': "ṣu" # ጹ
370
+ '\u133A': "ṣi" # ጺ
371
+ '\u133B': "ṣa" # ጻ
372
+ '\u133C': "ṣe" # ጼ
373
+ '\u133D': "ṣə" # ጽ
374
+ '\u133E': "ṣo" # ጾ
375
+
376
+ '\u1340': "ṣ̓ä" # ፀ
377
+ '\u1341': "ṣ̓u" # ፁ
378
+ '\u1342': "ṣ̓i" # ፂ
379
+ '\u1343': "ṣ̓a" # ፃ
380
+ '\u1344': "ṣ̓e" # ፄ
381
+ '\u1345': "ṣ̓ə" # ፅ
382
+ '\u1346': "ṣ̓o" # ፆ
383
+
384
+ '\u1348': "fä" # ፈ
385
+ '\u1349': "fu" # ፉ
386
+ '\u134A': "fi" # ፊ
387
+ '\u134B': "fa" # ፋ
388
+ '\u134C': "fe" # ፌ
389
+ '\u134D': "fə" # ፍ
390
+ '\u134E': "fo" # ፎ
391
+
392
+ '\u1350': "pä" # ፐ
393
+ '\u1351': "pu" # ፑ
394
+ '\u1352': "pi" # ፒ
395
+ '\u1353': "pa" # ፓ
396
+ '\u1354': "pe" # ፔ
397
+ '\u1355': "pə" # ፕ
398
+ '\u1356': "po" # ፖ
399
+
400
+ '\u1268': "vä" # ቨ
401
+ '\u1269': "vu" # ቩ
402
+ '\u126A': "vi" # ቪ
403
+ '\u126B': "va" # ቫ
404
+ '\u126C': "ve" # ቬ
405
+ '\u126D': "və" # ቭ
406
+ '\u126E': "vo" # ቮ
407
+
408
+ # Labiovelars
409
+ '\u1248': "qʷä" # ቈ
410
+ '\u124a': "qʷi" # ቊ
411
+ '\u124b': "qʷa" # ቋ
412
+ '\u124c': "qʷe" # ቌ
413
+ '\u124d': "qʷə" # ቍ
414
+
415
+ '\u1288': "ḫʷä" # ኈ
416
+ '\u128a': "ḫʷi" # ኊ
417
+ '\u128b': "ḫʷa" # ኋ
418
+ '\u128c': "ḫʷe" # ኌ
419
+ '\u128d': "ḫʷə" # ኍ
420
+
421
+ '\u12b0': "kʷä" # ኰ
422
+ '\u12b2': "kʷi" # ኲ
423
+ '\u12b3': "kʷa" # ኳ
424
+ '\u12b4': "kʷe" # ኴ
425
+ '\u12b5': "kʷə" # ኵ
426
+
427
+ '\u1310': "gʷä" # ጐ
428
+ '\u1312': "gʷi" # ጒ
429
+ '\u1313': "gʷa" # ጓ
430
+ '\u1314': "gʷe" # ጔ
431
+ '\u1315': "gʷə" # ጕ
432
+
433
+ '\u12c0': "ḵʷä" # ዀ
434
+ '\u12c2': "ḵʷi" # ዂ
435
+ '\u12c3': "ḵʷa" # ዃ
436
+ '\u12c4': "ḵʷe" # ዄ
437
+ '\u12c5': "ḵʷə" # ዅ
438
+
439
+ '\u1258': "q̱ʷä" # ቘ
440
+ '\u125a': "q̱ʷi" # ቚ
441
+ '\u125b': "q̱ʷa" # ቛ
442
+ '\u125c': "q̱ʷe" # ቜ
443
+ '\u125d': "q̱ʷə" # ቝ
444
+
445
+ # Numbers
446
+
447
+ '\u1369': "1" # ፩
448
+ '\u136A': "2" # ፪
449
+ '\u136B': "3" # ፫
450
+ '\u136C': "4" # ፬
451
+ '\u136D': "5" # ፭
452
+ '\u136E': "6" # ፮
453
+ '\u136F': "7" # ፯
454
+ '\u1370': "8" # ፰
455
+ '\u1371': "9" # ፱
456
+ '\u1372': "10" # ፲
457
+ '\u1373': "20" # ፳
458
+ '\u1374': "30" # ፴
459
+ '\u1375': "40" # ፵
460
+ '\u1376': "50" # ፶
461
+ '\u1377': "60" # ፷
462
+ '\u1379': "80" # ፹
463
+ '\u137A': "90" # ፺
464
+ '\u137B': "100" # ፻
465
+ '\u1372\u137B': "1000" # ፲፻
466
+ '\u137B\u137B': "10000" # ፻፻