interscript 0.1.1 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  6. data/lib/g2pwrapper.py +34 -0
  7. data/lib/interscript-opal.rb +2 -0
  8. data/lib/interscript.rb +138 -20
  9. data/lib/interscript/command.rb +28 -0
  10. data/lib/interscript/fs.rb +71 -0
  11. data/lib/interscript/mapping.rb +142 -0
  12. data/lib/interscript/opal.rb +27 -0
  13. data/lib/interscript/opal/maps.js.erb +10 -0
  14. data/lib/interscript/opal_map_translate.rb +12 -0
  15. data/lib/interscript/version.rb +1 -1
  16. data/lib/model-7 +0 -0
  17. data/lib/tha-pt-b-7 +0 -0
  18. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  19. data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
  20. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  21. data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
  22. data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
  23. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
  24. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +125 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  27. data/maps/alalc-ell-Grek-Latn-1997.yaml +624 -0
  28. data/maps/alalc-ell-Grek-Latn-2010.yaml +627 -0
  29. data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
  30. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  31. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  32. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  33. data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
  34. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  35. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  36. data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
  37. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +221 -0
  38. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  39. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  40. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  41. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  42. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  43. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  44. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +174 -0
  45. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
  46. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  47. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  48. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  49. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  50. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  51. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
  52. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
  53. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
  54. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  55. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
  56. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +285 -0
  57. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  58. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
  59. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +701 -0
  60. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +19 -0
  61. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  62. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  63. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  64. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  65. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  66. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  67. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  68. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  69. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
  70. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +92 -0
  71. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
  72. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  73. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +162 -0
  74. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  75. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  76. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  77. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  78. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  79. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
  80. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  81. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  82. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  83. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  84. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  85. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  86. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  87. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  88. data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
  89. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  90. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  91. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  92. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  93. data/maps/ggg-kat-Geor-Latn-2002.yaml +88 -0
  94. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  95. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  96. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +186 -0
  97. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  98. data/maps/icao-bel-Cyrl-Latn-9303.yaml +136 -0
  99. data/maps/icao-bul-Cyrl-Latn-9303.yaml +118 -0
  100. data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
  101. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
  102. data/maps/icao-per-Arab-Latn-9303.yaml +103 -0
  103. data/maps/icao-rus-Cyrl-Latn-9303.yaml +117 -0
  104. data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
  105. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +119 -0
  106. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  107. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +609 -0
  108. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +40 -0
  109. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  110. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +271 -0
  111. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  112. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  113. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  114. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  115. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  116. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  117. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  118. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  119. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  120. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  121. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  122. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  123. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  124. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  125. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  126. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  127. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  128. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  129. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  130. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  131. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  132. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  133. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  134. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  135. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  136. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  137. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  138. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  139. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  140. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  141. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  142. data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
  143. data/maps/ses-ara-Arab-Latn-1930.yaml +279 -0
  144. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  145. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  146. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  147. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  148. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  149. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  150. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  151. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  152. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  153. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  154. data/maps/un-mon-Mong-Latn-2013.yaml +99 -0
  155. data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
  156. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  157. data/maps/un-ukr-Cyrl-Latn-1998.yaml +30 -0
  158. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
  159. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  160. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  161. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  162. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  163. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  164. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  165. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  166. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  167. data/spec/interscript/mapping_spec.rb +42 -0
  168. data/spec/interscript_spec.rb +26 -0
  169. data/spec/spec_helper.rb +3 -0
  170. metadata +298 -18
@@ -0,0 +1,420 @@
1
+ ---
2
+ authority_id: ungegn
3
+ id: 2017
4
+ language: ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
8
+ url: http://www.eki.ee/wgrs/rom1_ar.pdf
9
+ creation_date: 2017
10
+ confirmation date: 2018-06
11
+ description: |
12
+ The current United Nations recommended romanization
13
+ system was approved in 2017 (resolution XI/3), based on
14
+ the system adopted by Arabic experts at the conference
15
+ held in Beirut in 2007, the Unified Arabic
16
+ Transliteration System, taking into account the
17
+ practical amendments and corrections carried out and
18
+ agreed upon by the representatives of the Arabic-
19
+ speaking countries at the Fourth Arab Conference on
20
+ Geographical Names, held in Beirut in 2008, and some
21
+ clarifications and amendments agreed in Riyadh in 20171.
22
+ Previously, the United Nations had approved a
23
+ romanization system in 1972 (resolution II/8), based on the
24
+ system adopted by Arabic experts at the conference
25
+ held at Beirut in 1971 with the practical amendments carried out
26
+ and agreed upon by the representatives of the Arabic-speaking
27
+ countries at their conference. The table was published in volume
28
+ II of the conference report.
29
+ In UN resolution XI/3 it is specifically stated that the
30
+ system was recommended for the “romanization of the
31
+ geographical names within those Arabic-speaking countries
32
+ where this system is officially adopted”. There is
33
+ evidence of its partial implementation in Jordan, Oman and
34
+ Saudi Arabia. The UNGEGN Working Group on Romanization
35
+ Systems intends to continue monitoring the UN system’s
36
+ implementation across Arabic-speaking countries.
37
+ In some countries there exist local romanization schemes
38
+ or practices. The geographical names of Algeria, Djibouti,
39
+ Mauritania, Morocco and Tunisia are generally rendered in
40
+ the traditional manner which conforms to the principles of
41
+ the French orthography.
42
+ The previous UN-approved system is still found in
43
+ considerable international usage.
44
+ Arabic is written from right to left. The Arabic script
45
+ usually omits vowel points and diacritical marks from
46
+ writing which makes it difficult to obtain uniform results
47
+ in the romanization of Arabic. It is essential to identify
48
+ correctly the words which appear in any particular name
49
+ and to know the standard Arabic-script spelling including
50
+ the relevant vowels. One must also take into account
51
+ dialectal and idiosyncratic deviations. The romanization
52
+ is generally reversible though there may be some ambiguous
53
+ letter sequences (dh, kh, sh, th) which may also point to
54
+ combinations of Arabic characters in addition to the
55
+ respective single characters.
56
+ notes:
57
+ - |
58
+ When the definite article al precedes a word beginning with
59
+ one of the "sun letters" (t, th, d, dh, r, z, s, sh, s̱, ḏ, ṯ,
60
+ d͟h, l, n) the l of the definite article is assimilated with
61
+ the first consonant of the word: الشارقة Ash Shāriqah.
62
+ - |
63
+ The definite article is always written with a capital
64
+ initial: الزيتون Az Zaytūn, البلد Al Balad, منية الضنية Minyat Aḏ
65
+ Ḏinniyyah.
66
+ - |
67
+ Nunation is unlikely to be found in geographical names and
68
+ the last letter remains silent: جبل = جبلٌ Jabal (not Jabalun).
69
+ - |
70
+ In order to disambiguate certain character sequences a
71
+ middle dot (·) may be used: سهيلة S·haylah (cf. شيلة Shaylah), دهيب
72
+ D·hayb (cf. ذيب Dhayb), أدهم Ad·ham (cf. أذم Adham).
73
+ - |
74
+ ta' marboota should be transliterated to 'ah' if it's in
75
+ a definite article, or at the end of the sentence
76
+ otherwise it should be transliterated to 'at'
77
+ to handle words starting with AL and ending with ta' marboota
78
+ which is pronounced as "ah" not "at" divided into multiple
79
+ regex because lookbehind in ruby doesn't support variable length
80
+ - |
81
+ مَكّة should be transliterated to makkah, shadda above ك
82
+ is to double the consonant, same applies to all arabic letters
83
+
84
+ tests:
85
+
86
+ # Examples taken from:
87
+ # https://unstats.un.org/unsd/geoinfo/geonames/
88
+
89
+ - source: مِصر
90
+ expected: Mis̱r
91
+
92
+ - source: قَطَر
93
+ expected: Qaṯar
94
+
95
+ - source: المَغرِب
96
+ expected: Al Maghrib
97
+
98
+ - source: الجُمهُورِيَّة العِراقِيَّة
99
+ expected: Al Jumhūrīyah al ‘Irāqīyah
100
+
101
+ - source: جُمهُورِيَّة العِراق
102
+ expected: Jumhūrīyat al ‘Irāq
103
+
104
+ - source: جُمهُورِيَّة مِصر العَرَبِيَّة
105
+ expected: Jumhūrīyat Mis̱r al ‘Arabīyah
106
+
107
+ - source: بَغداد
108
+ expected: Baghdād
109
+
110
+ - source: تُونِس
111
+ expected: Tūnis
112
+
113
+ - source: السُعُودِيَّة
114
+ expected: As Su‘ūdīyah
115
+
116
+ - source: اليَمَن
117
+ expected: Al Yaman
118
+
119
+ - source: السُودان
120
+ expected: As Sūdān
121
+
122
+ - source: الجَزائِر
123
+ expected: Al Jazā'ir
124
+
125
+ - source: الجُمهُورِيَّة اللُبنانِيَّة
126
+ expected: Al Jumhūrīyah al Lubnānīyah
127
+
128
+ - source: أسمَرة
129
+ expected: Asmarah
130
+
131
+ - source: جِدَّة
132
+ expected: Jiddah
133
+
134
+ - source: مَكَّة
135
+ expected: Makkah
136
+
137
+ - source: الرِيَاض
138
+ expected: Ar Riyāḏ
139
+
140
+ map:
141
+ postrules:
142
+ - pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
143
+ result: "upcase"
144
+ # don't capitalize defined article in the middle of a sentence
145
+ - pattern : ' At T' # الت
146
+ result: ' at T'
147
+ - pattern : ' Ath Th' # الث
148
+ result: ' ath th'
149
+ - pattern : ' Ad D' # الد
150
+ result: ' ad D'
151
+ - pattern : ' Adh Dh' # الذ
152
+ result: ' adh Dh'
153
+ - pattern : ' Ar R' # الر
154
+ result: ' ar R'
155
+ - pattern : ' Az Z' # الز
156
+ result: ' az Z'
157
+ - pattern : ' As S' # الس
158
+ result: ' as S'
159
+ - pattern : ' Ash Sh' # الش
160
+ result: ' ash Sh'
161
+ - pattern : ' As̱ S̱' # الص
162
+ result: ' as̱ S̱'
163
+ - pattern : ' Aḏ Ḏ' # الض
164
+ result: ' aḏ Ḏ'
165
+ - pattern : ' Aṯ Ṯ' # الط
166
+ result: ' aṯ Ṯ'
167
+ - pattern : ' Ad͟h D͟h' # الظ
168
+ result: ' ad͟h D͟h'
169
+ - pattern : ' Al L' # الل
170
+ result: ' al L'
171
+ - pattern : ' an n' # الن
172
+ result: ' an N'
173
+ - pattern: " Al " # ال
174
+ result: " al "
175
+
176
+ characters:
177
+
178
+ # Tool used for Unicode finding:
179
+ # https://www.branah.com/unicode-converter
180
+
181
+ # pointing
182
+ '\u064e' : 'a' # َ fatha
183
+ '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
184
+ '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
185
+ '\u0650' : 'i' # ِ kasra
186
+ '\u064f' : 'u' # ُ damma
187
+ '\u0652' : '' # ْ sokoon, see note A below
188
+
189
+
190
+ # special pointed letters
191
+ '\u0639\u064e' : '‘a' # عَ
192
+ '\u0639\u0650' : '‘i' # عِ
193
+ '\u0639\u064f' : '‘ū' # عُ
194
+ # handle MacOS regex difference
195
+ '\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
196
+
197
+ '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
198
+ '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
199
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
200
+ '\u064f\u0648' : 'ū' # ـُو damma followed by و
201
+ '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
202
+ '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
203
+ '\u064e\u0648\u0652' : 'aw' # ـَوْ
204
+ '\u064e\u064a\u0652' : 'ay' # ـَيْ
205
+ '\u0622' : 'ā' # آ
206
+
207
+ # (A) Marks absence of the vowel.
208
+ # (B) Marks doubling of the consonant.
209
+
210
+ # Sun letters
211
+ '\b\u0627\u0644\u062a' : 'at t' # الت
212
+ '\b\u0627\u0644\u062b' : 'ath th' # الث
213
+ '\b\u0627\u0644\u062f' : 'ad d' # الد
214
+ '\b\u0627\u0644\u0630' : 'adh dh' # الذ
215
+ '\b\u0627\u0644\u0631' : 'ar r' # الر
216
+ '\b\u0627\u0644\u0632' : 'az z' # الز
217
+ '\b\u0627\u0644\u0633' : 'as s' # الس
218
+ '\b\u0627\u0644\u0634' : 'ash sh' # الش
219
+ '\b\u0627\u0644\u0635' : 'as̱ s̱' # الص
220
+ '\b\u0627\u0644\u0636' : 'aḏ ḏ' # الض
221
+ '\b\u0627\u0644\u0637' : 'aṯ ṯ' # الط
222
+ '\b\u0627\u0644\u0638' : 'ad͟h d͟h' # الظ
223
+ '\b\u0627\u0644\u0644' : 'al l' # الل
224
+ '\b\u0627\u0644\u0646' : 'an n' # الن
225
+
226
+ # ta' marboota
227
+ '\u0629' : 'at' # ة in the middle of the sentence
228
+ '\u0629$' : 'ah'
229
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
230
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
231
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
232
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
233
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
234
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
235
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
236
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
237
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
238
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
239
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
240
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
241
+
242
+ # shadda
243
+
244
+ '\u0628\u0651' : 'bb' # ب
245
+ '\u062a\u0651' : 'tt' # ت
246
+ '\u062b\u0651' : 'thth' # ث
247
+ '\u062c\u0651' : 'jj' # ج
248
+ '\u062d\u0651' : 'ẖẖ' # ح
249
+ '\u062e\u0651' : 'khkh' # خ
250
+ '\u062f\u0651' : 'dd' # د
251
+ '\u0630\u0651' : 'dhdh' # ذ
252
+ '\u0631\u0651' : 'rr' # ر
253
+ '\u0632\u0651' : 'zz' # ز
254
+ '\u0633\u0651' : 'ss' # س
255
+ '\u0634\u0651' : 'sh' # ش
256
+ '\u0635\u0651' : 's̱s̱' # ص
257
+ '\u0636\u0651' : 'ḏḏ' # ض
258
+ '\u0637\u0651' : 'ṯṯ' # ط
259
+ '\u0638\u0651' : 'd͟hd͟h' # ظ
260
+ '\u063a\u0651' : 'ghgh' # غ
261
+ '\u0641\u0651' : 'ff' # ف
262
+ '\u0642\u0651' : 'qq' # ق
263
+ '\u0643\u0651' : 'kk' # ك
264
+ '\u0644\u0651' : 'll' # ل
265
+ '\u0645\u0651' : 'mm' # م
266
+ '\u0646\u0651' : 'nn' # ن
267
+ '\u0647\u0651' : 'hh' # ه
268
+ '\u0648\u0651' : 'ww' # و
269
+ '\u064a\u0651' : 'yy' # ي
270
+
271
+ '\u0626' : "'" # ئ
272
+
273
+
274
+ '\u0621' : # ء
275
+ - '’'
276
+ - '' # see note A
277
+
278
+ '\u0623' : 'a' # أ
279
+ '\u0627' : 'ā' # ا
280
+
281
+ # See note B
282
+ '\b\u0627\u0644' : 'al ' # ال
283
+ # '\uFE8E' : '' # ﺎ
284
+
285
+ '\u0628' : 'b' # ب
286
+ '\uFE91' : 'b' # ﺑ
287
+ '\uFE92' : 'b' # ﺒ
288
+ '\uFE90' : 'b' # ﺐ
289
+
290
+ # See note C
291
+ '\u062a' : 't' # ت
292
+ '\ufe97' : 't' # ﺗ
293
+ '\ufe98' : 't' # ﺘ
294
+ '\ufe96' : 't' # ﺖ
295
+
296
+ '\u062b' : 'th' # ث
297
+ '\ufe9b' : 'th' # ﺛ
298
+ '\ufe9c' : 'th' # ﺜ
299
+ '\ufe9a' : 'th' # ﺚ
300
+
301
+ '\u062c' : 'j' # ج
302
+ '\ufe9f' : 'j' # ﺟ
303
+ '\ufea0' : 'j' # ﺠ
304
+ '\ufe9e' : 'j' # ﺞ
305
+
306
+ '\u062d' : 'ẖ' # ح
307
+ '\ufea3' : 'ẖ' # ﺣ
308
+ '\ufea4' : 'ẖ' # ﺤ
309
+ '\ufea2' : 'ẖ' # ﺢ
310
+
311
+ '\u062e' : 'kh' # خ
312
+ '\ufea7' : 'kh' # ﺧ
313
+ '\ufea8' : 'kh' # ﺨ
314
+ '\ufea6' : 'kh' # ﺦ
315
+
316
+ '\u062f' : 'd' # د
317
+ '\ufeaa' : 'd' # ﺪ
318
+
319
+ '\u0630' : 'dh' # ذ
320
+ '\ufeac' : 'dh' # ﺬ
321
+
322
+ '\u0631' : 'r' # ر
323
+ '\ufeae' : 'r' # ﺮ
324
+
325
+ '\u0632' : 'z' # ز
326
+ '\ufeb0' : 'z' # ﺰ
327
+
328
+ '\u0633' : 's' # س
329
+ '\ufeb3' : 's' # ﺳ
330
+ '\ufeb4' : 's' # ﺴ
331
+ '\ufeb2' : 's' # ﺲ
332
+
333
+ '\u0634' : 'sh' # ش
334
+ '\ufeb7' : 'sh' # ﺷ
335
+ '\ufeb8' : 'sh' # ﺸ
336
+ '\ufeb6' : 'sh' # ﺶ
337
+
338
+ '\u0635' : 's̱' # ص
339
+ '\ufebb' : 's̱' # ﺻ
340
+ '\ufebc' : 's̱' # ﺼ
341
+ '\ufeba' : 's̱' # ﺺ
342
+
343
+ '\u0636' : 'ḏ' # ض
344
+ '\ufebf' : 'ḏ' # ﺿ
345
+ '\ufec0' : 'ḏ' # ﻀ
346
+ '\ufebe' : 'ḏ' # ﺾ
347
+
348
+ '\u0637' : 'ṯ' # ط
349
+ '\ufec3' : 'ṯ' # ﻃ
350
+ '\ufec4' : 'ṯ' # ﻄ
351
+ '\ufec2' : 'ṯ' # ﻂ
352
+
353
+ '\u0638' : 'd͟h' # ظ
354
+ '\ufec7' : 'd͟h' # ﻇ
355
+ '\ufec8' : 'd͟h' # ﻈ
356
+ '\ufec6' : 'd͟h' # ﻆ
357
+
358
+ '\u0639' : '‘' # ع
359
+ '\ufecb' : '‘' # ﻋ
360
+ '\ufecc' : '‘' # ﻌ
361
+ '\ufeca' : '‘' # ﻊ
362
+
363
+ '\u063a' : 'gh' # غ
364
+ '\ufecf' : 'gh' # ﻏ
365
+ '\ufed0' : 'gh' # ﻐ
366
+ '\ufece' : 'gh' # ﻎ
367
+
368
+ '\u0641' : 'f' # ف
369
+ '\ufed3' : 'f' # ﻓ
370
+ '\ufed4' : 'f' # ﻔ
371
+ '\ufed2' : 'f' # ﻒ
372
+
373
+ '\u0642' : 'q' # ق
374
+ '\ufed7' : 'q' # ﻗ
375
+ '\ufed8' : 'q' # ﻘ
376
+ '\ufed6' : 'q' # ﻖ
377
+
378
+ '\u0643' : 'k' # ك
379
+ '\ufedb' : 'k' # ﻛ
380
+ '\ufedc' : 'k' # ﻜ
381
+ '\ufeda' : 'k' # ﻚ
382
+
383
+ '\u0644' : 'l' # ل
384
+ '\ufedf' : 'l' # ﻟ
385
+ '\ufee0' : 'l' # ﻠ
386
+ '\ufede' : 'l' # ﻞ
387
+
388
+ '\u0645' : 'm' # م
389
+ '\ufee3' : 'm' # ﻣ
390
+ '\ufee4' : 'm' # ﻤ
391
+ '\ufee2' : 'm' # ﻢ
392
+
393
+ '\u0646' : 'n' # ن
394
+ '\ufee7' : 'n' # ﻧ
395
+ '\ufee8' : 'n' # ﻨ
396
+ '\ufee6' : 'n' # ﻦ
397
+
398
+ # See note C
399
+ '\u0647' : 'h' # ه
400
+ '\ufeeb' : 'h' # ﻫ
401
+ '\ufeec' : 'h' # ﻬ
402
+ '\ufeea' : 'h' # ﻪ
403
+
404
+ '\u0648' : 'w' # و
405
+ '\ufeee' : 'w' # ﻮ
406
+
407
+ '\u064a' : 'y' # ي
408
+ '\ufef3' : 'y' # ﻳ
409
+ '\ufef4' : 'y' # ﻴ
410
+ '\ufef1' : 'y' # ﻱ
411
+
412
+ # (A) Not romanized word-initially.
413
+
414
+ # (B) Not romanized, but see romanizations accompanying alif (ا) in the table for vowels.
415
+
416
+ # (C) In certain endings, an original tā’ (ت) is written ة, i.e., like hā’ (ه) with two dots, and is known as tā’ marbūṯah. It is romanized h, except in the construct form of feminine nouns, where it is romanized t, instead.
417
+
418
+
419
+ # Vowels, diphthongs and diacritical marks
420
+ # (ـ stands for any consonant)
@@ -0,0 +1,114 @@
1
+ ---
2
+ authority_id: un
3
+ id: 2007
4
+ language: bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: National System of Geographic Names Transmission into Roman Alphabet in Belarus
8
+ url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/9th-uncsgn-docs/crp/9th_UNCSGN_e-conf-98-crp-21.pdf
9
+ creation_date: 2007
10
+ description: |
11
+ RESOLUTION OF THE STATE COMMITTEE
12
+ ON PROPERTY OF THE REPUBLIC OF BELARUS June 11, 2007 No. 38
13
+
14
+ 8/16668 (06/18/2007) On amendments and additions to the Instructions
15
+ for the transliteration of geographical names of the
16
+ Republic of Belarus in letters of the Latin alphabet
17
+
18
+ Based on the Regulation on the State Property Committee of the Republic of Belarus,
19
+ approved by the Decree of the Council of Ministers of the Republic of Belarusdated July 29, 2006
20
+ No. 958 "Issues of the State Committee on Property of the Republic of Belarus"
21
+ tests: # the same as "by-bel-cyrl-Latn-2007"
22
+ - source: Аршанскi
23
+ expected: Aršanski
24
+ - source: Бешанковічы
25
+ expected: Biešankovičy
26
+ - source: Віцебск
27
+ expected: Viciebsk
28
+ - source: Гомель
29
+ expected: Homieĺ
30
+ - source: Гаўя
31
+ expected: Haŭja
32
+ - source: Добруш
33
+ expected: Dobruš
34
+ - source: Ельск
35
+ expected: Jeĺsk
36
+ - source: Бабаедава
37
+ expected: Babajedava
38
+ - source: Венцавічы
39
+ expected: Viencavičy
40
+ - source: Ёды
41
+ expected: Jody
42
+ - source: Вераб'ёвічы
43
+ expected: Vierabjovičy
44
+ - source: Мёры
45
+ expected: Miory
46
+ - source: Зэльва
47
+ expected: Zeĺva
48
+ - source: Iванава
49
+ expected: Ivanava
50
+ - source: Iўе
51
+ expected: Iŭje
52
+ - source: Лагойск
53
+ expected: Lahojsk
54
+ - source: Круглае
55
+ expected: Kruhlaje
56
+ - source: Лошыца
57
+ expected: Lošyca
58
+ - source: Любань
59
+ expected: Liubań
60
+ - source: Магілёў
61
+ expected: Mahilioŭ
62
+ - source: Нясвіж
63
+ expected: Niasviž
64
+ - source: Орша
65
+ expected: Orša
66
+ - source: Паставы
67
+ expected: Pastavy
68
+ - source: Рагачоў
69
+ expected: Rahačoŭ
70
+ - source: Смаргонь
71
+ expected: Smarhoń
72
+ - source: Талачын
73
+ expected: Talačyn
74
+ - source: Узда
75
+ expected: Uzda
76
+ - source: Шаркаўшчына
77
+ expected: Šarkaŭščyna
78
+ - source: Фаніпаль
79
+ expected: Fanipaĺ
80
+ - source: Хоцімск
81
+ expected: Chocimsk
82
+ - source: Цёмны Лес
83
+ expected: Ciomny Lies
84
+ - source: Чавусы
85
+ expected: Čavusy
86
+ - source: Шумілiна
87
+ expected: Šumilina
88
+ - source: Чыгірынка
89
+ expected: Čyhirynka
90
+ - source: Чэрвень
91
+ expected: Červień
92
+ - source: Друць
93
+ expected: Druć
94
+ - source: Чачэрск
95
+ expected: Čačersk
96
+ - source: Юхнаўка
97
+ expected: Juchnaŭka
98
+ - source: Гаюціна
99
+ expected: Hajucina
100
+ - source: Цюрлi
101
+ expected: Ciurli
102
+ - source: Любонічы
103
+ expected: Liuboničy
104
+ - source: Ямнае
105
+ expected: Jamnaje
106
+ - source: Баяры
107
+ expected: Bajary
108
+ - source: Валяр'яны
109
+ expected: Valiarjany
110
+ - source: Вязынка
111
+ expected: Viazynka
112
+
113
+ map:
114
+ inherit: "by-bel-Cyrl-Latn-2007"