interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,405 @@
1
+ ---
2
+ authority_id: ungegn
3
+ id: 2017
4
+ language: iso-639-2:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
8
+ url: https://www.eki.ee/wgrs/rom1_ur.htm
9
+ creation_date: 2017
10
+ confirmation date: 2018-06
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (
13
+ II/11) and amended in 1977 (III/12), based on a report
14
+ prepared by D. N. Sharma. The tables and their corrections
15
+ were published in volume II of the conference reports1,2.
16
+
17
+ There is no evidence of the use of the system either in
18
+ Pakistan, India or in international cartographic products.
19
+ Instead, in Pakistan the Hunterian system is officially
20
+ used3. The resolutions III/12 (1977) and IV/17 (1982)
21
+ recommended association, inter alia, with Pakistan, in
22
+ carrying out further studies on the system.
23
+
24
+ Urdu (Urdū) uses the Perso-Arabic script which is written
25
+ from right to left. In the script vowel points are usually
26
+ omitted which makes it difficult to obtain uniform
27
+ romanizations. Some of the Arabic consonants are
28
+ undifferentiated in romanization which means that the
29
+ system is not fully reversible.
30
+ notes:
31
+ - A If preceded by short a, it is romanized ‘ā, e.g. مَعمُل M‘āmul.
32
+ - B When و is imperceptible, e.g. in a few words of Persian origin when preceded by خ (ḳh).
33
+ - C Word-finally after a short vowel.
34
+ - D Marks aspiration of consonants.
35
+ - E The character ے is used only word-finally.
36
+ tests:
37
+ - source: بوغدِی
38
+ expected: Bvghdī
39
+
40
+ - source: مَعمُل
41
+ expected: M‘āmul
42
+
43
+ - source: پَالِير
44
+ expected: Pālīr
45
+
46
+ - source: بیزوت كَلے
47
+ expected: Byzvt Kale
48
+
49
+ - source: عَمَل كوٹ
50
+ expected: ‘Amal Kvṭ
51
+
52
+ - source: ثَابِر
53
+ expected: Sābir
54
+
55
+ - source: شَاه نَثَار ميلة
56
+ expected: Shāh Nasār Mylah
57
+
58
+ - source: چَپرِی
59
+ expected: Chaprī
60
+
61
+ - source: أَحمَد خَان كَلے
62
+ expected: Ahmad Ḳhān Kale
63
+
64
+ - source: دُرَانِي
65
+ expected: Durānī
66
+
67
+ - source: ڈَنگِیلا
68
+ expected: Ḍangīlā
69
+
70
+ - source: ذَرَانِی
71
+ expected: Zarānī
72
+
73
+ - source: بُركِي
74
+ expected: Burkī
75
+
76
+ - source: گِیدَڑَه
77
+ expected: Gīdaṙah
78
+
79
+ - source: عَلِي زَائِي
80
+ expected: ‘Alī Zā-ī
81
+
82
+ - source: ژوب
83
+ expected: Ỵvb
84
+
85
+ - source: بِسَاتُو
86
+ expected: Bisātū
87
+
88
+ - source: أَحمَدِي شَامَا
89
+ expected: Ahmadī Shāmā
90
+
91
+ - source: اَصَالَت كَلے
92
+ expected: Asālat Kale
93
+
94
+ - source: خَضَر خَان
95
+ expected: Ḳhazar Ḳhān
96
+
97
+ - source: سُلْطَان
98
+ expected: Sultān
99
+
100
+ - source: عَزَم سَيِّد نُور كَلے
101
+ expected: ‘Azam Sayyid Nūr Kale
102
+
103
+ - source: بغَاكِي
104
+ expected: Bghākī
105
+
106
+ - source: حَقدَرَه
107
+ expected: Haqdarah
108
+
109
+ - source: کَچکِینَہ
110
+ expected: Kachkīnaḥ
111
+
112
+ - source: بَاگَن
113
+ expected: Bāgan
114
+
115
+ - source: بُلبَلَک
116
+ expected: Bulbalak
117
+
118
+ - source: بِلیَامِین
119
+ expected: Bilyāmīn
120
+
121
+ - source: نَہر
122
+ expected: Nahr
123
+
124
+ - source: اَرَوْالِی
125
+ expected: Arawālī
126
+
127
+ - source: مَہردِی
128
+ expected: Mahrdī
129
+
130
+ - source: بَڑھ
131
+ expected: Baṙh
132
+
133
+ - source: یَاردَا کَلے
134
+ expected: Yārdā Kale
135
+
136
+ - source: بهَائِي خَان
137
+ expected: Bhā-ī Ḳhān
138
+
139
+ - source: پھاشک
140
+ expected: Phāshk
141
+
142
+ - source: تھَلّ
143
+ expected: Thall
144
+
145
+ - source: پَٹھان ريَا
146
+ expected: Paṭhān Ryā
147
+
148
+ - source: جھِیل
149
+ expected: Jhīl
150
+
151
+ - source: غَزْنِي سْپِين
152
+ expected: Ghaznī Spīn
153
+
154
+ - source: بَادشَاه چھُم
155
+ expected: Bādshāh Chhum
156
+
157
+ - source: سِندھ
158
+ expected: Sindh
159
+
160
+ - source: ڈھَنڈ
161
+ expected: Ḍhanḍ
162
+
163
+ - source: خَان گھَڑِی
164
+ expected: Ḳhān Ghaṙī
165
+
166
+ - source: غُلَامَک كَلے
167
+ expected: Ghulāmak Kale
168
+
169
+ - source: خَپیَنگا
170
+ expected: Ḳhapyangā
171
+
172
+ - source: گَندَه كَلے
173
+ expected: Gandah Kale
174
+
175
+ - source: مَورپِتھِی
176
+ expected: Maurpithī
177
+
178
+ - source: درے پلارِی
179
+ expected: Dre Plārī
180
+
181
+ - source: آگرَہ
182
+ expected: Āgraḥ
183
+
184
+ - source: ڈَنڈَر
185
+ expected: Ḍanḍar
186
+
187
+ - source: گُبازانَہ
188
+ expected: Gubāzānaḥ
189
+
190
+ - source: حَےدَر عَلِی كَلے
191
+ expected: Haidar ‘Alī Kale
192
+
193
+ - source: تَودَہ چِینَہ
194
+ expected: Taudaḥ Chīnaḥ
195
+
196
+ - source: مُوسى خَان كَلے
197
+ expected: Mūsá Ḳhān Kale
198
+
199
+ - source: مُلَّا بَاغ
200
+ expected: Mullā Bāgh
201
+
202
+ map:
203
+ postrules:
204
+ - pattern: (?<=\b)(?<!\b[‘|’|'|-])[\u0061-\uFFFF]
205
+ result: "upcase"
206
+ # don't capitalize defined article in the middle of a sentence
207
+ - pattern : ' At T' # الت
208
+ result: ' at T'
209
+ - pattern : ' As̄ S̄' # الث
210
+ result: ' as̄ S̄'
211
+ - pattern : ' Ad D' # الد
212
+ result: ' ad D'
213
+ - pattern : ' Az Z' # الذ
214
+ result: ' az Z'
215
+ - pattern : ' Ar R' # الر
216
+ result: ' ar R'
217
+ - pattern : ' Az Z' # الز
218
+ result: ' az Z'
219
+ - pattern : ' As S' # الس
220
+ result: ' as S'
221
+ - pattern : ' Ash Sh' # الش
222
+ result: ' ash Sh'
223
+ - pattern : ' As S' # الص
224
+ result: ' as S'
225
+ - pattern : ' Az Z' # الض
226
+ result: ' az Z'
227
+ - pattern : ' At T' # الط
228
+ result: ' at T'
229
+ - pattern : ' Az Z' # الظ
230
+ result: ' az Z'
231
+ - pattern : ' Al L' # الل
232
+ result: ' al L'
233
+ - pattern : ' An N' # الن
234
+ result: ' an N'
235
+ - pattern: " Al " # ال
236
+ result: " al "
237
+ characters:
238
+ # special rules
239
+
240
+ '\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
241
+ '\ufdf2': 'Allāh' # See note 5
242
+
243
+ # Vowels, Diphthongs, and Diacritical Marks
244
+ '\u064e' : 'a' # َ fatha
245
+ '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
246
+ '\u0627' : 'ā' # ا
247
+ '\u0649\u0670' : 'ā' # ىٰ
248
+ '\u06D2\u0670' : 'ā' # ےٰ
249
+ '\u0622' : 'ā' # آ
250
+ '\b\u0627' : '' # ا
251
+ '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
252
+ '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
253
+
254
+ '\u0652' : '' # ْ sokoon
255
+ '\u0659': 'ê'
256
+
257
+ '\u0650' : 'i' # karsra
258
+ '\u0650[\u064a|\u06cc]' : 'ī' # ـِي kasra followed by ي
259
+ '\u0650\u06d2\u0652' : 'e' # ـے
260
+ '\u0650\u06d2' : 'e' # ـے
261
+ '\u06d2' : 'e' # ـے
262
+
263
+ '\u064f' : 'u' # ُ damma
264
+ '\u064f\u0648' : 'ū' # ـُو damma followed by و
265
+ '\u064f\u0648\u0652' : 'o' # ـَوْ
266
+
267
+
268
+ '\u064e\u06d2' : 'ai' # ـے
269
+ '\u064e\u0648' : 'au' # ـَو
270
+ '\u0670': 'á' # ىٰ
271
+ '\u0649': 'á' # ىٰ
272
+
273
+ # shadda
274
+ '\u0628\u0651' : 'bb' # ب
275
+ '\u062a\u0651' : 'tt' # ت
276
+ '\u062b\u0651' : 'ss' # ث
277
+ '\u062c\u0651' : 'jj' # ج
278
+ '\u062d\u0651' : 'hh' # ح
279
+ '\u062e\u0651' : 'ḳhḳh' # خ
280
+ '\u062f\u0651' : 'dd' # د
281
+ '\u0630\u0651' : 'zz' # ذ
282
+ '\u0631\u0651' : 'rr' # ر
283
+ '\u0632\u0651' : 'zz' # ز
284
+ '\u0633\u0651' : 'ss' # س
285
+ '\u0634\u0651' : 'sh' # ش
286
+ '\u0635\u0651' : 'ss' # ص
287
+ '\u0636\u0651' : 'ḏḏ' # ض
288
+ '\u0637\u0651' : 'tt' # ط
289
+ '\u0638\u0651' : 'zz' # ظ
290
+ '\u063a\u0651' : 'ghgh' # غ
291
+ '\u0641\u0651' : 'ff' # ف
292
+ '\u0642\u0651' : 'qq' # ق
293
+ '\u0643\u0651' : 'kk' # ك
294
+ '\u0644\u0651' : 'll' # ل
295
+ '\u0645\u0651' : 'mm' # م
296
+ '\u0646\u0651' : 'nn' # ن
297
+ '\u0647\u0651' : 'hh' # ه
298
+ '\u0648\u0651' : 'vv' # و
299
+ '[\u064a|\u06cc]\u0651' : 'yy' # ي
300
+
301
+ # NOTE 1
302
+ '\u0650\b' : '-e' # ِ kasra
303
+ '\u0674' : '-e' # ٴ
304
+ '\u0654' : '-e' # ٔ
305
+
306
+ '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
307
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
308
+ '\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
309
+ '\u064e\u0648\u0652' : 'aw' # ـَوْ
310
+ '\u064e\u064a\u0652' : 'ay' # ـَيْ
311
+ '\u0650\u06cc\u0651\u064e' : 'īy' # ـِيَّ
312
+ '\u064e\u064a' : 'aī' # ـَي
313
+ '\u064e\u06cc' : 'aī' # ـَي
314
+ # - '-ye'
315
+
316
+
317
+ # ta' marboota
318
+ '\u0629' : 'at' # ة in the middle of the sentence
319
+ '\u0629$' : 'ah'
320
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
321
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
322
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
323
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
324
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
325
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
326
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
327
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
328
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
329
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
330
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
331
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
332
+
333
+
334
+
335
+ '\u0621' : '-' # ء
336
+ '\u0624' : '-' # ؤ
337
+ '\u0626' : '-' # ئ
338
+
339
+ '\u0623' : '' # أ
340
+ '\u0625' : '' # إ
341
+ # See note B
342
+ '\b\u0627\u0644' : 'al ' # ال
343
+ # '\uFE8E' : '' # ﺎ
344
+
345
+ # Sun letters
346
+ '\b\u0627\u0644\u062a' : 'at t' # الت
347
+ '\b\u0627\u0644\u062b' : 'as s' # الث
348
+ '\b\u0627\u0644\u062f' : 'ad d' # الد
349
+ '\b\u0627\u0644\u0630' : 'az z' # الذ
350
+ '\b\u0627\u0644\u0631' : 'ar r' # الر
351
+ '\b\u0627\u0644\u0632' : 'az z' # الز
352
+ '\b\u0627\u0644\u0633' : 'as s' # الس
353
+ '\b\u0627\u0644\u0634' : 'ash sh' # الش
354
+ '\b\u0627\u0644\u0635' : 'as s' # الص
355
+ '\b\u0627\u0644\u0636' : 'az z' # الض
356
+ '\b\u0627\u0644\u0637' : 'at t' # الط
357
+ '\b\u0627\u0644\u0638' : 'az z' # الظ
358
+ '\b\u0627\u0644\u0644' : 'al l' # الل
359
+ '\b\u0627\u0644\u0646' : 'an n' # الن
360
+
361
+
362
+ # consonant characters
363
+
364
+ '\u0628' : 'b' # ب
365
+ '\u067E' : 'p' # پ
366
+ '\u062a' : 't' # ت
367
+ '\u0679' : 'ṭ' # ٹ
368
+ '\u062B' : 's' # ث
369
+ '\u062c' : 'j' # ج
370
+ '\u0686' : 'ch' # ‫چ‬
371
+ '\u062d' : 'h' # ح
372
+ '\u062e' : 'ḳh' # خ
373
+ '\u062f' : 'd' # د
374
+ '\u0688' : 'ḍ' # ‫ڈ
375
+ '\u0630' : 'z' # ذ
376
+ '\u0631' : 'r' # ر
377
+ '\u0691' : 'ṙ' # ڑ
378
+ '\u0632' : 'z' # ز
379
+ '\u0698' : 'ỵ' # ‫ژ‬
380
+ '\u0633' : 's' # س
381
+ '\u0634' : 'sh' # ش
382
+ '\u0635' : 's' # ص
383
+ '\u0636' : 'z' # ض
384
+ '\u0637' : 't' # ط
385
+ '\u0638' : 'z' # ظ
386
+ '\u0639' : '‘' # ع
387
+ '\u064e\u0639' : '‘ā' # ع NOTE A
388
+ '\u063a' : 'gh' # غ
389
+ '\u0641' : 'f' # ف
390
+ '\u0642' : 'q' # ق
391
+ '\u0643' : 'k' # ك
392
+ '\u06A9' : 'k' # ک
393
+ '\u06AF' : 'g' # ‫گ‬
394
+ '\u0644' : 'l' # ل
395
+ '\u0645' : 'm' # م
396
+ '[\u06BA|\u0646]' : 'n' # ن, ں
397
+ '[\ufba9|\u06c1]' : 'h' # ہ , ﮩ
398
+ '(?<=[\u064e|\u0650|\u064f])[\ufba9|\u06c1]\b' : 'ḥ' # ہ , ﮩ NOTE C
399
+ '[\u0647|\u06be]' : 'h' # ه, ھ
400
+ '\u0648' : 'v' # و
401
+ '(?<=\u062e)\u0648' : 'ẉ' # و NOTE B
402
+ '[\u064a|\u06cc]' : 'y' # ي
403
+ # '\u0649' : 'y' # ي
404
+ '\u06D0' : 'ē' # ې
405
+ '\u06CD' : 'êy' # ‫ۍ‬
@@ -0,0 +1,466 @@
1
+ ---
2
+ authority_id: var
3
+ id: 2003
4
+ language: amh
5
+ source_script: Ethi
6
+ destination_script: Latn
7
+ name: Encyclopaedia Aethiopica Amharic transliteration system (2003)
8
+ url: https://brill.com/view/book/edcoll/9789004419582/front-11.xml?language=en
9
+ creation_date: 2003
10
+ description: |
11
+ The Encyclopaedia Aethiopica (EAe) is a basic encyclopedia for Ethiopian and Eritrean studies. It employs an in-house form of romanization of Geez, Amharic, and other languages, which varies greatly from standard formats, such as BGN/PCGN: the emperor Menelek II's name, for example, is written as "Mənilək II".
12
+
13
+ tests:
14
+ - source: የዜግነት ክብር በ ኢትዮጵያችን ጸንቶ
15
+ expected: yäzegənätə kəbərə bä ʾitəyoṗəyačənə ṣänəto
16
+ - source: ታየ ሕዝባዊነት ዳር እስከዳር በርቶ
17
+ expected: tayä ḥəzəbawinätə darə ʾəsəkädarə bärəto
18
+ - source: ለሰላም ለፍትህ ለሕዝቦች ነጻነት
19
+ expected: läsälamə läfətəhə läḥəzəbočə näṣanätə
20
+ - source: በእኩልነት በፍቅር ቆመናል ባንድነት
21
+ expected: bäʾəkulənätə bäfəqərə qomänalə banədənätə
22
+ - source: መሠረተ ፅኑ ሰብዕናን ያልሻርን
23
+ expected: mäśärätä ṣ̓ənu säbəʿənanə yaləšarənə
24
+ - source: ሕዝቦች ነን ለሥራ በሥራ የኖርን
25
+ expected: ḥəzəbočə nänə läśəra bäśəra yänorənə
26
+ - source: ድንቅ የባህል መድረክ ያኩሪ ቅርስ ባለቤት
27
+ expected: dənəqə yäbahələ mädəräkə yakuri qərəsə baläbetə
28
+ - source: የተፈጥሮ ጸጋ የጀግና ሕዝብ እናት
29
+ expected: yätäfäṭəro ṣäga yäǧägəna ḥəzəbə ʾənatə
30
+ - source: እንጠብቅሻለን አለብን አደራ
31
+ expected: ʾənəṭäbəqəšalänə ʾaläbənə ʾadära
32
+ - source: ኢትዮጵያችን ኑሪ እኛም ባንቺ እንኩራ
33
+ expected: ʾitəyoṗəyačənə nuri ʾəñamə banəči ʾənəkura
34
+ - source: ቋንቋ የድምጽ፣ የምልክት ወይም የምስል ቅንብር ሆኖ
35
+ expected: qʷanəqʷa yädəməṣə፣ yämələkətə wäyəmə yäməsələ qənəbərə hono
36
+ - source: ለማሰብ ወይም የታሰበን ሃሳብ ለሌላ ለማስተላለፍ የሚረዳ መሳሪያ ነው
37
+ expected: lämasäbə wäyəmə yätasäbänə hasabə lälela lämasətälaläfə yämiräda mäsariya näwə
38
+ - source: በአጭሩ ቋንቋ የምልክቶች ስርዓትና እኒህን ምልክቶች ለማቀናበር
39
+ expected: bäʾač̣əru qʷanəqʷa yämələkətočə sərəʿatəna ʾənihənə mələkətočə lämaqänabärə
40
+ - source: የሚያስፈልጉ ህጎች ጥንቅር ነው። ቋንቋወችን ለመፈረጅ እንዲሁም
41
+ expected: yämiyasəfäləgu həጎčə ṭənəqərə näwə። qʷanəqʷawäčənə lämäfäräǧə ʾənədihumə
42
+ - source: ለምክፈል የሚያስችሉ መስፈርቶችን ለማስቀመጥ ባለው ችግር
43
+ expected: läməkəfälə yämiyasəčəlu mäsəfärətočənə lämasəqämäṭə baläwə čəgərə
44
+ - source: ምክንያት በአሁኑ ሰዓት በርግጠኝነት ስንት ቋንቋ በዓለም ላይ
45
+ expected: məkənəyatə bäʾahunu säʿatə bärəgəṭäñənätə sənətə qʷanəqʷa bäʿalämə layə
46
+ - source: እንዳለ ማወቅ አስቸጋሪ ነው
47
+ expected: ʾənədalä mawäqə ʾasəčägari näwə
48
+ - source: አሰላ
49
+ expected: ʾasäla
50
+ - source: አሶሳ
51
+ expected: ʾasosa
52
+ - source: አንኮበር
53
+ expected: ʾanəkobärə
54
+ - source: አክሱም
55
+ expected: ʾakəsumə
56
+ - source: አዋሳ
57
+ expected: ʾawasa
58
+ - source: አዲስ ዘመን (ከተማ)
59
+ expected: ʾadisə zämänə (kätäma)
60
+ - source: አዲግራት
61
+ expected: ʾadigəratə
62
+ - source: አዳማ
63
+ expected: ʾadama
64
+ - source: ደምበጫ
65
+ expected: däməbäč̣a
66
+ - source: ደርባ
67
+ expected: därəba
68
+ - source: ደብረ ማርቆስ
69
+ expected: däbərä marəqosə
70
+ - source: ደብረ ብርሃን
71
+ expected: däbərä bərəhanə
72
+ - source: ደብረ ታቦር (ከተማ)
73
+ expected: däbərä taborə (kätäma)
74
+ - source: ደብረ ዘይት
75
+ expected: däbərä zäyətə
76
+ - source: ደገሃቡር
77
+ expected: dägähaburə
78
+ - source: ወልቂጤ
79
+ expected: wäləqiṭe
80
+ - source: ወልወል
81
+ expected: wäləwälə
82
+ - source: ወልደያ
83
+ expected: wälədäya
84
+ - source: ናይሎ ሳህራን
85
+ expected: nayəlo sahəranə
86
+ - source: አኙዋክኛ
87
+ expected: ʾañuwakəña
88
+ - source: ኡዱክኛ
89
+ expected: ʾudukəña
90
+ - source: ኦፓኛ
91
+ expected: ʾopaña
92
+ - source: ጉምዝኛ
93
+ expected: guməzəña
94
+ - source: አፋርኛ
95
+ expected: ʾafarəña
96
+ - source: አላባኛ
97
+ expected: ʾalabaña
98
+ - source: አርቦርኛ
99
+ expected: ʾarəborəña
100
+ - source: ባይሶኛ
101
+ expected: bayəsoña
102
+ - source: ቡሳኛ
103
+ expected: busaña
104
+ - source: ሁለተኛ ጥፋት ከገበያ ማንቀላፋት
105
+ expected: hulätäña ṭəfatə kägäbäya manəqälafatə
106
+ - source: ሁሉም ከልኩ አያልፍም
107
+ expected: hulumə käləku ʾayaləfəmə
108
+ - source: አልሞት ባይ ተጋዳይ
109
+ expected: ʾaləmotə bayə tägadayə
110
+ - source: ውርድ ከራሴ
111
+ expected: wərədə kärase
112
+ - source: ፀጉር መሰንጠቅ
113
+ expected: ṣ̓ägurə mäsänəṭäqə
114
+ - source: ግንትር ፀሐይ
115
+ expected: gənətərə ṣ̓äḥayə
116
+ - source: በሬ ወለደ
117
+ expected: bäre wälädä
118
+ - source: ራስ ሳይጠና ጉተና
119
+ expected: rasə sayəṭäna gutäna
120
+ - source: ለሆዴ ጠግቤ በልብሴ አንግቤ
121
+ expected: lähode ṭägəbe bäləbəse ʾanəgəbe
122
+ - source: ለልጅ ከሳቁለት ለውሻ ከሮጡለት
123
+ expected: läləǧə käsaqulätə läwəša käroṭulätə
124
+ - source: መልካም ባል መጥፎ ሴት ይገራል
125
+ expected: mäləkamə balə mäṭəfo setə yəgäralə
126
+ - source: ሆድና ግንባር አይሸሸግም
127
+ expected: hodəna gənəbarə ʾayəšäšägəmə
128
+ - source: ቀሊል አማት ሲሶ በትር አላት
129
+ expected: qälilə ʾamatə siso bätərə ʾalatə
130
+ - source: ጨው ለራስህ ብለህ ጣፍጥ አለበለዚያ ድንጋይ ነው ብለው ይወረውሩሀል
131
+ expected: č̣äwə lärasəhə bəlähə ṭafəṭə ʾaläbäläziya dənəgayə näwə bəläwə yəwäräwəruhalə
132
+ - source: ጀምሮ ይጨርሳል አልሞ ይተኩሳል
133
+ expected: ǧäməro yəč̣ärəsalə ʾaləmo yətäkusalə
134
+
135
+ map:
136
+ characters:
137
+ '\u1200': "ha" # ሀ
138
+ '\u1201': "hu" # ሁ
139
+ '\u1202': "hi" # ሂ
140
+ '\u1203': "ha" # ሃ
141
+ '\u1204': "he" # ሄ
142
+ '\u1205': "hə" # ህ
143
+ '\u1206': "ho" # ሆ
144
+
145
+ '\u1208': "lä" # ለ
146
+ '\u1209': "lu" # ሉ
147
+ '\u120A': "li" # ሊ
148
+ '\u120B': "la" # ላ
149
+ '\u120C': "le" # ሌ
150
+ '\u120D': "lə" # ል
151
+ '\u120E': "lo" # ሎ
152
+
153
+ '\u1210': "ḥa" # ሐ
154
+ '\u1211': "ḥu" # ሑ
155
+ '\u1212': "ḥi" # ሒ
156
+ '\u1213': "ḥa" # ሓ
157
+ '\u1214': "ḥe" # ሔ
158
+ '\u1215': "ḥə" # ሕ
159
+ '\u1216': "ḥo" # ሖ
160
+
161
+ '\u1218': "mä" # መ
162
+ '\u1219': "mu" # ሙ
163
+ '\u121A': "mi" # ሚ
164
+ '\u121B': "ma" # ማ
165
+ '\u121C': "me" # ሜ
166
+ '\u121D': "mə" # ም
167
+ '\u121E': "mo" # ሞ
168
+
169
+ '\u1220': "śä" # ሠ
170
+ '\u1221': "śu" # ሡ
171
+ '\u1222': "śi" # ሢ
172
+ '\u1223': "śa" # ሣ
173
+ '\u1224': "śe" # ሤ
174
+ '\u1225': "śə" # ሥ
175
+ '\u1226': "śo" # ሦ
176
+
177
+ '\u1228': "rä" # ረ
178
+ '\u1229': "ru" # ሩ
179
+ '\u122A': "ri" # ሪ
180
+ '\u122B': "ra" # ራ
181
+ '\u122C': "re" # ሬ
182
+ '\u122D': "rə" # ር
183
+ '\u122E': "ro" # ሮ
184
+
185
+ '\u1230': "sä" # ሰ
186
+ '\u1231': "su" # ሱ
187
+ '\u1232': "si" # ሲ
188
+ '\u1233': "sa" # ሳ
189
+ '\u1234': "se" # ሴ
190
+ '\u1235': "sə" # ስ
191
+ '\u1236': "so" # ሶ
192
+
193
+ '\u1238': "šä" # ሸ
194
+ '\u1239': "šu" # ሹ
195
+ '\u123A': "ši" # ሺ
196
+ '\u123B': "ša" # ሻ
197
+ '\u123C': "še" # ሼ
198
+ '\u123D': "šə" # ሽ
199
+ '\u123E': "šo" # ሾ
200
+
201
+ '\u1240': "qä" # ቀ
202
+ '\u1241': "qu" # ቁ
203
+ '\u1242': "qi" # ቂ
204
+ '\u1243': "qa" # ቃ
205
+ '\u1244': "qe" # ቄ
206
+ '\u1245': "qə" # ቅ
207
+ '\u1246': "qo" # ቆ
208
+
209
+ '\u1260': "bä" # በ
210
+ '\u1261': "bu" # ቡ
211
+ '\u1262': "bi" # ቢ
212
+ '\u1263': "ba" # ባ
213
+ '\u1264': "be" # ቤ
214
+ '\u1265': "bə" # ብ
215
+ '\u1266': "bo" # ቦ
216
+
217
+ '\u1270': "tä" # ተ
218
+ '\u1271': "tu" # ቱ
219
+ '\u1272': "ti" # ቲ
220
+ '\u1273': "ta" # ታ
221
+ '\u1274': "te" # ቴ
222
+ '\u1275': "tə" # ት
223
+ '\u1276': "to" # ቶ
224
+
225
+ '\u1278': "čä" # ቸ
226
+ '\u1279': "ču" # ቹ
227
+ '\u127A': "či" # ቺ
228
+ '\u127B': "ča" # ቻ
229
+ '\u127C': "če" # ቼ
230
+ '\u127D': "čə" # ች
231
+ '\u127E': "čo" # ቾ
232
+
233
+ '\u1280': "ḫa" # ኀ
234
+ '\u1281': "ḫu" # ኁ
235
+ '\u1282': "ḫi" # ኂ
236
+ '\u1283': "ḫa" # ኃ
237
+ '\u1284': "ḫe" # ኄ
238
+ '\u1285': "ḫə" # ኅ
239
+ '\u1286': "ḫo" # ኆ
240
+
241
+ '\u1290': "nä" # ነ
242
+ '\u1291': "nu" # ኑ
243
+ '\u1292': "ni" # ኒ
244
+ '\u1293': "na" # ና
245
+ '\u1294': "ne" # ኔ
246
+ '\u1295': "nə" # ን
247
+ '\u1296': "no" # ኖ
248
+
249
+ '\u1298': "ñä" # ኘ
250
+ '\u1299': "ñu" # ኙ
251
+ '\u129A': "ñi" # ኚ
252
+ '\u129B': "ña" # ኛ
253
+ '\u129C': "ñe" # ኜ
254
+ '\u129D': "ñə" # ኝ
255
+ '\u129E': "ño" # ኞ
256
+
257
+ '\u12A0': "ʾa" # አ
258
+ '\u12A1': "ʾu" # ኡ
259
+ '\u12A2': "ʾi" # ኢ
260
+ '\u12A3': "ʾa" # ኣ
261
+ '\u12A4': "ʾe" # ኤ
262
+ '\u12A5': "ʾə" # እ
263
+ '\u12A6': "ʾo" # ኦ
264
+
265
+ '\u12A8': "kä" # ከ
266
+ '\u12A9': "ku" # ኩ
267
+ '\u12AA': "ki" # ኪ
268
+ '\u12AB': "ka" # ካ
269
+ '\u12AC': "ke" # ኬ
270
+ '\u12AD': "kə" # ክ
271
+ '\u12AE': "ko" # ኮ
272
+
273
+ '\u12B8': "ḵä" # ኸ
274
+ '\u12B9': "ḵu" # ኹ
275
+ '\u12BA': "ḵi" # ኺ
276
+ '\u12BB': "ḵa" # ኻ
277
+ '\u12BC': "ḵe" # ኼ
278
+ '\u12BD': "ḵə" # ኽ
279
+ '\u12BE': "ḵo" # ኾ
280
+
281
+ '\u12C8': "wä" # ወ
282
+ '\u12C9': "wu" # ዉ
283
+ '\u12CA': "wi" # ዊ
284
+ '\u12CB': "wa" # ዋ
285
+ '\u12CC': "we" # ዌ
286
+ '\u12CD': "wə" # ው
287
+ '\u12CE': "wo" # ዎ
288
+
289
+ '\u12D0': "ʿa" # ዐ
290
+ '\u12D1': "ʿu" # ዑ
291
+ '\u12D2': "ʿi" # ዒ
292
+ '\u12D3': "ʿa" # ዓ
293
+ '\u12D4': "ʿe" # ዔ
294
+ '\u12D5': "ʿə" # ዕ
295
+ '\u12D6': "ʿo" # ዖ
296
+
297
+ '\u12D8': "zä" # ዘ
298
+ '\u12D9': "zu" # ዙ
299
+ '\u12DA': "zi" # ዚ
300
+ '\u12DB': "za" # ዛ
301
+ '\u12DC': "ze" # ዜ
302
+ '\u12DD': "zə" # ዝ
303
+ '\u12DE': "zo" # ዞ
304
+
305
+ '\u12E0': "žä" # ዠ
306
+ '\u12E1': "žu" # ዡ
307
+ '\u12E2': "ži" # ዢ
308
+ '\u12E3': "ža" # ዣ
309
+ '\u12E4': "že" # ዤ
310
+ '\u12E5': "žə" # ዥ
311
+ '\u12E6': "žo" # ዦ
312
+
313
+ '\u12E8': "yä" # የ
314
+ '\u12E9': "yu" # ዩ
315
+ '\u12EA': "yi" # ዪ
316
+ '\u12EB': "ya" # ያ
317
+ '\u12EC': "ye" # ዬ
318
+ '\u12ED': "yə" # ይ
319
+ '\u12EE': "yo" # ዮ
320
+
321
+ '\u12F0': "dä" # ደ
322
+ '\u12F1': "du" # ዱ
323
+ '\u12F2': "di" # ዲ
324
+ '\u12F3': "da" # ዳ
325
+ '\u12F4': "de" # ዴ
326
+ '\u12F5': "də" # ድ
327
+ '\u12F6': "do" # ዶ
328
+
329
+ '\u1300': "ǧä" # ጀ
330
+ '\u1301': "ǧu" # ጁ
331
+ '\u1302': "ǧi" # ጂ
332
+ '\u1303': "ǧa" # ጃ
333
+ '\u1304': "ǧe" # ጄ
334
+ '\u1305': "ǧə" # ጅ
335
+ '\u1306': "ǧo" # ጆ
336
+
337
+ '\u1308': "gä" # ገ
338
+ '\u1309': "gu" # ጉ
339
+ '\u130A': "gi" # ጊ
340
+ '\u130B': "ga" # ጋ
341
+ '\u130C': "ge" # ጌ
342
+ '\u130D': "gə" # ግ
343
+
344
+ '\u1320': "ṭä" # ጠ
345
+ '\u1321': "ṭu" # ጡ
346
+ '\u1322': "ṭi" # ጢ
347
+ '\u1323': "ṭa" # ጣ
348
+ '\u1324': "ṭe" # ጤ
349
+ '\u1325': "ṭə" # ጥ
350
+ '\u1326': "ṭo" # ጦ
351
+
352
+ '\u1328': "č̣ä" # ጨ
353
+ '\u1329': "č̣u" # ጩ
354
+ '\u132A': "č̣i" # ጪ
355
+ '\u132B': "č̣a" # ጫ
356
+ '\u132C': "č̣e" # ጬ
357
+ '\u132D': "č̣ə" # ጭ
358
+ '\u132E': "č̣o" # ጮ
359
+
360
+ '\u1330': "ṗä" # ጰ
361
+ '\u1331': "ṗu" # ጱ
362
+ '\u1332': "ṗi" # ጲ
363
+ '\u1333': "ṗa" # ጳ
364
+ '\u1334': "ṗe" # ጴ
365
+ '\u1335': "ṗə" # ጵ
366
+ '\u1336': "ṗo" # ጶ
367
+
368
+ '\u1338': "ṣä" # ጸ
369
+ '\u1339': "ṣu" # ጹ
370
+ '\u133A': "ṣi" # ጺ
371
+ '\u133B': "ṣa" # ጻ
372
+ '\u133C': "ṣe" # ጼ
373
+ '\u133D': "ṣə" # ጽ
374
+ '\u133E': "ṣo" # ጾ
375
+
376
+ '\u1340': "ṣ̓ä" # ፀ
377
+ '\u1341': "ṣ̓u" # ፁ
378
+ '\u1342': "ṣ̓i" # ፂ
379
+ '\u1343': "ṣ̓a" # ፃ
380
+ '\u1344': "ṣ̓e" # ፄ
381
+ '\u1345': "ṣ̓ə" # ፅ
382
+ '\u1346': "ṣ̓o" # ፆ
383
+
384
+ '\u1348': "fä" # ፈ
385
+ '\u1349': "fu" # ፉ
386
+ '\u134A': "fi" # ፊ
387
+ '\u134B': "fa" # ፋ
388
+ '\u134C': "fe" # ፌ
389
+ '\u134D': "fə" # ፍ
390
+ '\u134E': "fo" # ፎ
391
+
392
+ '\u1350': "pä" # ፐ
393
+ '\u1351': "pu" # ፑ
394
+ '\u1352': "pi" # ፒ
395
+ '\u1353': "pa" # ፓ
396
+ '\u1354': "pe" # ፔ
397
+ '\u1355': "pə" # ፕ
398
+ '\u1356': "po" # ፖ
399
+
400
+ '\u1268': "vä" # ቨ
401
+ '\u1269': "vu" # ቩ
402
+ '\u126A': "vi" # ቪ
403
+ '\u126B': "va" # ቫ
404
+ '\u126C': "ve" # ቬ
405
+ '\u126D': "və" # ቭ
406
+ '\u126E': "vo" # ቮ
407
+
408
+ # Labiovelars
409
+ '\u1248': "qʷä" # ቈ
410
+ '\u124a': "qʷi" # ቊ
411
+ '\u124b': "qʷa" # ቋ
412
+ '\u124c': "qʷe" # ቌ
413
+ '\u124d': "qʷə" # ቍ
414
+
415
+ '\u1288': "ḫʷä" # ኈ
416
+ '\u128a': "ḫʷi" # ኊ
417
+ '\u128b': "ḫʷa" # ኋ
418
+ '\u128c': "ḫʷe" # ኌ
419
+ '\u128d': "ḫʷə" # ኍ
420
+
421
+ '\u12b0': "kʷä" # ኰ
422
+ '\u12b2': "kʷi" # ኲ
423
+ '\u12b3': "kʷa" # ኳ
424
+ '\u12b4': "kʷe" # ኴ
425
+ '\u12b5': "kʷə" # ኵ
426
+
427
+ '\u1310': "gʷä" # ጐ
428
+ '\u1312': "gʷi" # ጒ
429
+ '\u1313': "gʷa" # ጓ
430
+ '\u1314': "gʷe" # ጔ
431
+ '\u1315': "gʷə" # ጕ
432
+
433
+ '\u12c0': "ḵʷä" # ዀ
434
+ '\u12c2': "ḵʷi" # ዂ
435
+ '\u12c3': "ḵʷa" # ዃ
436
+ '\u12c4': "ḵʷe" # ዄ
437
+ '\u12c5': "ḵʷə" # ዅ
438
+
439
+ '\u1258': "q̱ʷä" # ቘ
440
+ '\u125a': "q̱ʷi" # ቚ
441
+ '\u125b': "q̱ʷa" # ቛ
442
+ '\u125c': "q̱ʷe" # ቜ
443
+ '\u125d': "q̱ʷə" # ቝ
444
+
445
+ # Numbers
446
+
447
+ '\u1369': "1" # ፩
448
+ '\u136A': "2" # ፪
449
+ '\u136B': "3" # ፫
450
+ '\u136C': "4" # ፬
451
+ '\u136D': "5" # ፭
452
+ '\u136E': "6" # ፮
453
+ '\u136F': "7" # ፯
454
+ '\u1370': "8" # ፰
455
+ '\u1371': "9" # ፱
456
+ '\u1372': "10" # ፲
457
+ '\u1373': "20" # ፳
458
+ '\u1374': "30" # ፴
459
+ '\u1375': "40" # ፵
460
+ '\u1376': "50" # ፶
461
+ '\u1377': "60" # ፷
462
+ '\u1379': "80" # ፹
463
+ '\u137A': "90" # ፺
464
+ '\u137B': "100" # ፻
465
+ '\u1372\u137B': "1000" # ፲፻
466
+ '\u137B\u137B': "10000" # ፻፻