interscript 0.1.2 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/lib/g2pwrapper.py +34 -0
  4. data/lib/interscript.rb +142 -20
  5. data/lib/interscript/command.rb +28 -0
  6. data/lib/interscript/fs.rb +69 -0
  7. data/lib/interscript/mapping.rb +142 -0
  8. data/lib/interscript/opal.rb +57 -0
  9. data/lib/interscript/opal/entrypoint.rb +12 -0
  10. data/lib/interscript/opal/map_translate.rb +7 -0
  11. data/lib/interscript/opal/maps.js.erb +10 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/lib/model-7 +0 -0
  14. data/lib/tha-pt-b-7 +0 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38916 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.yaml +165 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  21. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  22. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +129 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +98 -0
  25. data/maps/alalc-ell-Grek-Latn-1997.yaml +628 -0
  26. data/maps/alalc-ell-Grek-Latn-2010.yaml +626 -0
  27. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  28. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  29. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  30. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  31. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  32. data/maps/alalc-kat-Geor-Latn-1997.yaml +150 -0
  33. data/maps/alalc-kor-Hang-Latn-1997.yaml +98 -0
  34. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  35. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  36. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  37. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  38. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  39. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  40. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  41. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  42. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  43. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  44. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  45. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  46. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  47. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  48. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  49. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  50. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  51. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +118 -0
  52. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  53. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  54. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  55. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +145 -0
  56. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  57. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  58. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +58 -33
  59. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +55 -35
  60. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  61. data/maps/bgn-kor-Hang-Latn-1943.yaml +35 -0
  62. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  63. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  64. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  65. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  66. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +596 -0
  67. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +2 -3
  68. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  69. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  70. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +289 -0
  71. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +119 -0
  72. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +15 -65
  73. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  74. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +705 -0
  75. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +23 -0
  76. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  77. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  78. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +131 -0
  79. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  80. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  81. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  82. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  83. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +163 -0
  84. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  85. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  86. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  87. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  88. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  89. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  90. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  91. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +150 -65
  92. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +170 -0
  93. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  94. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  95. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +80 -4
  96. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +119 -0
  97. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  98. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  99. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  100. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  101. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  102. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  103. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  104. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  105. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  106. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  107. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  108. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  109. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  110. data/maps/by-bel-Cyrl-Latn-1998.yaml +172 -0
  111. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  112. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  113. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  114. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  115. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  116. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  117. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  118. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  119. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  120. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  121. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  122. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  123. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  124. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  125. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  126. data/maps/ggg-kat-Geor-Latn-2002.yaml +92 -0
  127. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  128. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  129. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +190 -0
  130. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  131. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  132. data/maps/icao-bel-Cyrl-Latn-9303.yaml +109 -98
  133. data/maps/icao-bul-Cyrl-Latn-9303.yaml +2 -7
  134. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +6 -8
  135. data/maps/icao-heb-Hebr-Latn-9303.yaml +119 -125
  136. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +2 -3
  137. data/maps/icao-rus-Cyrl-Latn-9303.yaml +2 -4
  138. data/maps/icao-srp-Cyrl-Latn-9303.yaml +2 -3
  139. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +2 -4
  140. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  141. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  142. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  143. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +613 -0
  144. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +44 -0
  145. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  146. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  147. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  148. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +66 -0
  149. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  150. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  151. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  152. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  153. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  154. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  155. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  156. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  157. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  158. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  159. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  160. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  161. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  162. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  163. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  164. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +4 -6
  165. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  166. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  167. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +909 -0
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  172. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  173. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  174. data/maps/moct-kor-Hang-Latn-2000.yaml +807 -0
  175. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  176. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  177. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  178. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  179. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  180. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  181. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  182. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  183. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  184. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  185. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  186. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  187. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  188. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  189. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  190. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  191. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  192. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  193. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  194. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  195. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  196. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  197. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  198. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  199. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  200. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  201. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  202. data/maps/royin-tha-Thai-Latn-1968.yaml +183 -0
  203. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  204. data/maps/royin-tha-Thai-Latn-1999.yaml +80 -0
  205. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +11 -8
  206. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  207. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  208. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  209. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  210. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +197 -0
  211. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  212. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  213. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  214. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  215. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  216. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  217. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  218. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  219. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +780 -0
  220. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  221. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  222. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  223. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  224. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  225. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  226. data/maps/un-nep-Deva-Latn-1972.yaml +350 -0
  227. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  228. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  229. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  230. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  231. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  232. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  233. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  234. data/maps/var-kor-Hang-Hang-jamo.yaml +11193 -0
  235. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  236. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  237. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  238. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  239. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  240. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  241. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  242. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  243. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  244. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  245. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  246. data/maps/var-zho-Hani-Latn-wd-1979.yaml +38912 -0
  247. data/spec/interscript/filenames_spec.rb +384 -0
  248. data/spec/interscript/mapping_spec.rb +42 -0
  249. data/spec/interscript_spec.rb +23 -5
  250. data/spec/spec_helper.rb +3 -1
  251. metadata +364 -34
  252. data/bin/interscript +0 -20
  253. data/bin/rspec +0 -29
  254. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  255. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  256. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  257. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  258. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  259. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  260. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
  261. data/maps/un-mon-Mong-Latn-2013.yaml +0 -80
@@ -0,0 +1,225 @@
1
+ ---
2
+ authority_id: mvd
3
+ id: 2008
4
+ language: iso-639-2:bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: About approval of the Instructions for transliteration of surnames and proper names of citizens of the Republic of Belarus when their personal data is included in the population register
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2008
10
+
11
+ description: |
12
+ RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
13
+ October 9, 2008, No. 288
14
+ 8/19678 (10.23.2008)
15
+ About 8/19678 approval of the Instructions for transliteration of surnames and proper names of citizens of
16
+ the Republic of Belarus when their personal data is included in the population register
17
+
18
+ notes:
19
+ # Original notes
20
+ # - |
21
+ # Инструкция по транслитерации фамилий и собственных имен граждан Республики Беларусь при включении
22
+ # их персональных данных в регистр населения устанавливает правила передачи с белорусской либо русской формы
23
+ # написания на латиницу при включении фамилий и собственных имен граждан Республики Беларусь в регистр населения.
24
+ # - |
25
+ # Передача фамилий и собственных имен граждан Республики Беларусь на латиницу осуществляется с их правильного
26
+ # написания на белорусском либо русском языке, за свидетельствованного документами, удостоверяющими личность.
27
+ # - Передача фамилий и собственных имен граждан Республики Беларусь осуществляется путем транслитерации
28
+ # литер (букв, знаков) белорусского либорусского написания соответствующими литерами латиницы.
29
+ # - Транслитерацией достигается общность и унифицированность системы латинизированного написания,
30
+ # позволяющей пользоваться ею во всех латинопишущих государствах.
31
+ # - Смягчение согласной буквы, обозначенное мягким знаком, в белорусской латинице следует показывать
32
+ # диакритическим знаком (́), который располагается над соответствующей буквой: дзь – dź, зь – ź, ль – ĺ,
33
+ # нь – ń, сь – ś, ць – ć.
34
+ # - Правила транслитерации букв белорусского и русского алфавитов соответствующими буквами латиницы
35
+ # приведены в таблице транслитерации букв белорусского и русского алфавитов буквами латиницы соглас
36
+ # но приложению к настоящей Инструкции.
37
+ # - Сложные и составные фамилии и собственные имена, пишущиеся слитно, раздельно или через дефис,
38
+ # сохраняют слитное, раздельное или дефисное написание и в латинице.
39
+ - | # 1
40
+ Instructions for transliterating the names and first names of citizens of the Republic of Belarus
41
+ when including their personal data in the population register sets the rules for transferring
42
+ from the Belarusian or Russian form of writing in Latin when including the names and first names
43
+ of citizens of the Republic of Belarus in the population register.
44
+ - | # 2
45
+ Transfer of surnames and proper names of citizens of the Republic of Belarus to the Latin alphabet
46
+ is carried out with their correct spelling in Belarusian or Russian, for evidence of identity documents.
47
+ - | # 3
48
+ The transfer of surnames and proper names of citizens of the Republic of Belarus is carried out by
49
+ transliteration of the letters (letters, signs) of the Belarusian or Russian spelling in the corresponding
50
+ Latin letters.
51
+ - | # 4
52
+ Transliteration achieves the generality and unification of the system of Latinized writing,
53
+ which allows it to be used in all Latin-writing countries.
54
+ - | # 5
55
+ The softening of the consonant, indicated by a soft sign, in the Belarusian Latin should be shown
56
+ with a diacritic mark (́), which is located above the corresponding letter:
57
+ дзь - dź,
58
+ зь - ź,
59
+ ль - ĺ,
60
+ нь - ń,
61
+ сь - ś,
62
+ ць - ć.
63
+ # 6
64
+ - The rules for transliterating letters of the Belarusian and Russian alphabets with the corresponding
65
+ letters of the Latin alphabet are given in the table of transliteration of letters of the Belarusian
66
+ and Russian alphabets with the Latin letters according to the appendix to this Instruction.
67
+ # 7
68
+ - Compound and compound surnames and proper names, spelled together, separately or through a hyphen,
69
+ keep a single, separate or hyphen spelling in Latin.
70
+
71
+ tests:
72
+ - source: Ева
73
+ expected: Jeva
74
+ - source: Васiльева
75
+ expected: Vasiĺjeva
76
+ - source: Васiлёнак
77
+ expected: Vasilionak
78
+ - source: Ёрш
79
+ expected: Jorsh
80
+ - source: Вераб’ёў
81
+ expected: Vierabjow
82
+ - source: Салаўёва
83
+ expected: Salawjova
84
+ - source: Любоў
85
+ expected: Liubow
86
+ - source: В’юноў
87
+ expected: Vjunow
88
+ - source: Чарняк
89
+ expected: Charniak
90
+ - source: Лябецкая
91
+ expected: Liabietskaja # in reference doc it's Liabetskaja CAMOBAP waiting confirmation from officials
92
+ - source: Дар’я
93
+ expected: Darja
94
+
95
+ map:
96
+ rules:
97
+ - pattern: (\u2019\u0415) # Е
98
+ result: Je
99
+ - pattern: (\u2019\u0435) # е
100
+ result: je
101
+ - pattern: (\u2019\u0401) # Ë
102
+ result: Jo
103
+ - pattern: (\u2019\u0451) # ё
104
+ result: jo
105
+ - pattern: (\u2019\u042E) # Ю
106
+ result: Ju
107
+ - pattern: (\u2019\u044E) # ю
108
+ result: ju
109
+ - pattern: (\u2019\u042F) # Я
110
+ result: Ja
111
+ - pattern: (\u2019\u044F) # я
112
+ result: ja
113
+
114
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0415 # Е after vowels
115
+ result: Je
116
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0435 # е after vowels
117
+ result: je
118
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0401 # Ё after vowels
119
+ result: Jo
120
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u0451 # ё after vowels
121
+ result: jo
122
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u042E # Ю after vowels
123
+ result: Ju
124
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u044E # ю after vowels
125
+ result: ju
126
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u042F # Я after vowels
127
+ result: Ja
128
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЬьЎў])\u044F # я after vowels
129
+ result: ja
130
+
131
+ # note[5]
132
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u044C # ь after consonants
133
+ result: "\\1\u0301"
134
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u02B9 # Ь after consonants
135
+ result: "\\1\u0301"
136
+
137
+ # vowels initially
138
+ - pattern: \b\u0415 # Е
139
+ result: Je
140
+ - pattern: \b\u0435 # е
141
+ result: je
142
+ - pattern: \b\u0401 # Ё
143
+ result: Jo
144
+ - pattern: \b\u0451 # ё
145
+ result: jo
146
+ - pattern: \b\u042E # Ю
147
+ result: Ju
148
+ - pattern: \b\u044E # ю
149
+ result: ju
150
+ - pattern: \b\u042F # Я
151
+ result: Ja
152
+ - pattern: \b\u044F # я
153
+ result: ja
154
+
155
+ postrules:
156
+ - pattern: \u2019
157
+ result: j
158
+
159
+ characters:
160
+
161
+ '\u0410' : 'A' # А
162
+ '\u0411' : 'B' # Б
163
+ '\u0412' : 'V' # B
164
+ '\u0413' : 'G' # Г
165
+ '\u0414' : 'D' # Д
166
+ '\u0415' : 'Ie' # Е or JE TODO add rule
167
+ '\u0401' : 'Io' # Ё or JO TODO add rule
168
+ '\u0416' : 'Zh' # Ж
169
+ '\u0417' : 'Z' # З
170
+ '\u0406' : 'I' # І
171
+ '\u0419' : "J" # Й
172
+ '\u041A' : 'K' # К
173
+ '\u041B' : 'L' # Л
174
+ '\u041C' : 'M' # М
175
+ '\u041D' : 'N' # Н
176
+ '\u041E' : 'O' # О
177
+ '\u041F' : 'P' # П
178
+ '\u0420' : 'R' # Р
179
+ '\u0421' : 'S' # С
180
+ '\u0422' : 'T' # Т
181
+ '\u0423' : 'U' # У
182
+ '\U040E' : 'W' # Ў
183
+ '\u0424' : 'F' # Ф
184
+ '\u0425' : 'Kh' # Х
185
+ '\u0426' : 'Ts' # Ц
186
+ '\u0427' : 'Ch' # Ч
187
+ '\u0428' : 'Sh' # Ш
188
+ '\u0429' : 'Shch' # Щ
189
+ '\u042B' : 'Y' # Ы
190
+ '\u042D' : 'E' # Э
191
+ '\u042E' : "Iu" # Ю
192
+ '\u042F' : "Ia" # Я
193
+
194
+ '\u0430' : 'a' # а
195
+ '\u0431' : 'b' # б
196
+ '\u0432' : 'v' # в
197
+ '\u0433' : 'g' # г
198
+ '\u0434' : 'd' # д
199
+ '\u0435' : 'ie' # е
200
+ '\u0451' : 'io' # ё
201
+ '\u0436' : 'zh' # ж
202
+ '\u0437' : 'z' # з
203
+ '\u0456' : 'i' # і
204
+ '\u0439' : 'j' # й
205
+ '\u043A' : 'k' # к
206
+ '\u043B' : 'l' # л
207
+ '\u043C' : 'm' # м
208
+ '\u043D' : 'n' # н
209
+ '\u043E' : 'o' # о
210
+ '\u043F' : 'p' # п
211
+ '\u0440' : 'r' # р
212
+ '\u0441' : 's' # с
213
+ '\u0442' : 't' # т
214
+ '\u0443' : 'u' # у
215
+ '\u045E' : 'w' # ў
216
+ '\u0444' : 'f' # ф
217
+ '\u0445' : 'kh' # х
218
+ '\u0446' : 'ts' # Ц
219
+ '\u0447' : 'ch' # ч
220
+ '\u0448' : 'sh' # ш
221
+ '\u0449' : 'shch' # щ
222
+ '\u044B' : 'y' # ы
223
+ '\u044D' : 'e' # э
224
+ '\u044E' : "iu" # ю
225
+ '\u044F' : "ia" # я
@@ -0,0 +1,63 @@
1
+ ---
2
+ authority_id: mvd
3
+ id: 2010
4
+ language: iso-639-2:bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: |
8
+ 8/22721 About approval of the Instructions on the organization of work of units of citizenship
9
+ and migration of internal affairs bodies on the issuance, registration, exchange,
10
+ invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
11
+ url: https://pravo.by/document/?guid=3871&p0=W21022721
12
+ creation_date: 2010
13
+
14
+ description: |
15
+ RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
16
+ June 28, 2010 No. 200
17
+ On approval of the Instructions on the organization of work of units of citizenship
18
+ and migration of internal affairs bodies on the issuance, registration, exchange,
19
+ invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
20
+
21
+ notes:
22
+ - |
23
+ It is not allowed to use in the spelling of the surname, own name superscripts, punctuation, except
24
+ for the use of the apostrophe in Belarusian and Latin spelling.
25
+ - The Belarusian letter "Г" is written as the Latin "H"
26
+
27
+ tests:
28
+ - source: Бабрыковіч Аляксандр
29
+ expected: Babrykovich Aliaksandr
30
+ - source: Міховіч Марыя
31
+ expected: Mikhovich Maryia
32
+ - source: Максім
33
+ expected: Maksim
34
+ - source: Іван
35
+ expected: Ivan
36
+ - source: СВЯТЛАНА
37
+ expected: SVIATLANA
38
+ - source: Ігар
39
+ expected: Ihar
40
+ - source: МІХАІЛ
41
+ expected: MIKHAIL
42
+
43
+ map:
44
+ inherit: "mvd-bel-Cyrl-Latn-2008"
45
+
46
+ rules:
47
+ # note[5]
48
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u044C # ь after consonants
49
+ result: "\\1"
50
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u02B9 # Ь after consonants
51
+ result: "\\1"
52
+ # Й at end
53
+ - pattern: (?<=[ЕеЁёЫыЮюЯя])\u0419$ # Я after vowels
54
+ result: ""
55
+ - pattern: (?<=[ЕеЁёЫыЮюЯя])\u0439$ # я after vowels
56
+ result: ""
57
+
58
+ characters:
59
+ '\u0413' : 'H' # Г
60
+ '\u0433' : 'h' # г
61
+
62
+ '\u042C' : '' # Ь
63
+ '\u044C' : '' # ь
@@ -0,0 +1,109 @@
1
+ ---
2
+ authority_id: mvd
3
+ id: 2008
4
+ language: iso-639-2:rus
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: 8/19678 On approval of the Instructions for transliteration of surnames and proper names of citizens of the Republic of Belarus when their personal data is included in the population register
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2008
10
+
11
+ notes:
12
+ - check notes from mvd-bel-Cyrl-Latn-2008
13
+
14
+ tests:
15
+ - source: Ева
16
+ expected: Eva
17
+ - source: Васiльева
18
+ expected: Vasiĺeva
19
+ - source: Адъютантов
20
+ expected: Adjutantov
21
+
22
+ map:
23
+ rules:
24
+ # note[5]
25
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u044C # ь after consonants
26
+ result: "\\1\u0301"
27
+ - pattern: (?<=[ЗзЛлНнСсЦц])\u02B9 # Ь after consonants
28
+ result: "\\1\u0301"
29
+ - pattern: ([’Ъъ]\u042E)
30
+ result: Ju
31
+ - pattern: ([’Ъъ]\u044E)
32
+ result: ju
33
+ - pattern: ([’Ъъ]\u042F)
34
+ result: Ja
35
+ - pattern: ([’Ъъ]\u044F)
36
+ result: ja
37
+
38
+ characters:
39
+ '’' : 'j'
40
+
41
+ '\u0410' : 'A' # А
42
+ '\u0411' : 'B' # Б
43
+ '\u0412' : 'V' # B
44
+ '\u0413' : 'G' # Г
45
+ '\u0414' : 'D' # Д
46
+ '\u0415' : 'E' # Е
47
+ '\u0401' : 'E' # Ё
48
+ '\u0416' : 'Zh' # Ж
49
+ '\u0417' : 'Z' # З
50
+ '\u0406' : 'I' # І
51
+ '\u0419' : "J" # Й
52
+ '\u041A' : 'K' # К
53
+ '\u041B' : 'L' # Л
54
+ '\u041C' : 'M' # М
55
+ '\u041D' : 'N' # Н
56
+ '\u041E' : 'O' # О
57
+ '\u041F' : 'P' # П
58
+ '\u0420' : 'R' # Р
59
+ '\u0421' : 'S' # С
60
+ '\u0422' : 'T' # Т
61
+ '\u0423' : 'U' # У
62
+ '\U040E' : 'W' # Ў
63
+ '\u0424' : 'F' # Ф
64
+ '\u0425' : 'Kh' # Х
65
+ '\u0426' : 'Ts' # Ц
66
+ '\u0427' : 'Ch' # Ч
67
+ '\u0428' : 'Sh' # Ш
68
+ '\u0429' : 'Shch' # Щ
69
+ '\u042A' : 'J' # Ъ
70
+ '\u042B' : 'Y' # Ы
71
+ '\u042C' : '' # Ь
72
+ '\u042D' : 'E' # Э
73
+ '\u042E' : 'Iu' # Ю
74
+ '\u042F' : 'Ia' # Я
75
+
76
+ '\u0430' : 'a' # а
77
+ '\u0431' : 'b' # б
78
+ '\u0432' : 'v' # в
79
+ '\u0433' : 'g' # г
80
+ '\u0434' : 'd' # д
81
+ '\u0435' : 'e' # е
82
+ '\u0451' : 'e' # ё
83
+ '\u0436' : 'zh' # ж
84
+ '\u0437' : 'z' # з
85
+ '\u0456' : 'i' # і
86
+ '\u0439' : 'j' # й
87
+ '\u043A' : 'k' # к
88
+ '\u043B' : 'l' # л
89
+ '\u043C' : 'm' # м
90
+ '\u043D' : 'n' # н
91
+ '\u043E' : 'o' # о
92
+ '\u043F' : 'p' # п
93
+ '\u0440' : 'r' # р
94
+ '\u0441' : 's' # с
95
+ '\u0442' : 't' # т
96
+ '\u0443' : 'u' # у
97
+ '\u045E' : 'w' # ў
98
+ '\u0444' : 'f' # ф
99
+ '\u0445' : 'kh' # х
100
+ '\u0446' : 'ts' # Ц
101
+ '\u0447' : 'ch' # ч
102
+ '\u0448' : 'sh' # ш
103
+ '\u0449' : 'shch' # щ
104
+ '\u044A' : 'j' # ъ
105
+ '\u044B' : 'y' # ы
106
+ '\u044C' : '' # ь
107
+ '\u044D' : 'e' # э
108
+ '\u044E' : 'iu' # ю
109
+ '\u044F' : 'ia' # я
@@ -0,0 +1,37 @@
1
+ ---
2
+ authority_id: mvd
3
+ id: 2010
4
+ language: iso-639-2:bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: |
8
+ 8/22721 On approval of the Instructions on the organization of work of units of citizenship
9
+ and migration of internal affairs bodies on the issuance, registration, exchange,
10
+ invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
11
+ url: https://pravo.by/document/?guid=3871&p0=W21022721
12
+ creation_date: 2010
13
+
14
+ description: |
15
+ RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
16
+ June 28, 2010 No. 200
17
+ On approval of the Instructions on the organization of work of units of citizenship
18
+ and migration of internal affairs bodies on the issuance, registration, exchange,
19
+ invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
20
+
21
+ notes:
22
+ - check notes from mvd-rus-Cyrl-Latn-2008
23
+
24
+ tests:
25
+ - source: Ева
26
+ expected: Eva
27
+ - source: Васiльева
28
+ expected: Vasileva
29
+ - source: Адъютантов
30
+ expected: Adjutantov
31
+
32
+ map:
33
+ inherit: "mvd-rus-Cyrl-Latn-2008"
34
+
35
+ postrules:
36
+ - pattern: \u0301 # remove diacritics
37
+ result: ""
@@ -0,0 +1,425 @@
1
+ ---
2
+ authority_id: ungegn
3
+ id: 2017
4
+ language: ics-630-01:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
8
+ url: https://github.com/interscript/ics-630-01/blob/master/reference-docs/ANNEX%20A%20-%20Arabic_Personal_Names_FLTS%20(U).pdf
9
+ creation_date: 2017
10
+ confirmation date: 2018-06
11
+ description: |
12
+ This system, adapted from the Board on Geographic Names, is
13
+ the Intelligence Community (IC) standard for the
14
+ transliteration of Arabic names that will be applied to all
15
+ final written reports and products for IC consumers. It is
16
+ not intended to eliminate variations of a name that can
17
+ contribute forensic information. Rather, it is to provide
18
+ an IC standard Romanized (English) transliteration from
19
+ modern standard Arabic that can then be linked to forensic
20
+ information in ways that will help identify the referent of
21
+ the name. Ambiguities can result from the Romanization of
22
+ Arabic names because the Arabic source generally omits
23
+ short vowel markings, double consonant marks, and other
24
+ diacritics that would clearly distinguish the name.
25
+ Linguists use their experience with the language and aids
26
+ such as on-line tools and name dictionaries to determine
27
+ the exact Arabic and the appropriate transliteration into
28
+ the Roman alphabet. In cases where an individual's name has
29
+ already been transliterated, that is to be indicated -- as
30
+ found -- in parentheses immediately following its rendition
31
+ in the transliteration standard (e.g., Muhammad Khulud (
32
+ Mohamed Khulood)). In addition, if the original Arabic-
33
+ script spelling is known, that spelling should also appear
34
+ in parentheses following the name, if possible, following
35
+ best practices of the issuing organization and taking into
36
+ consideration information system capabilities. This
37
+ convention is designed to ensure that vital forensic
38
+ information is not lost. For names of persons who are known
39
+ to not be part of the Arabic-speaking community, use the
40
+ relevant IC transliteration standard for names from that
41
+ language (e.g., Mikhail, Yitzhak). A translator’s note may
42
+ be used to clarify the known origin of the person. Spell
43
+ names of individuals from languages that are written in
44
+ Roman letters as they are spelled in those languages (e.g.,
45
+ George Clooney, Jorge Garcia, Georges Pompidou). In the
46
+ case of active senior government officials in the on-line
47
+ CIA World Factbook and the online directory of Chiefs of
48
+ State and Cabinet Members of Foreign Governments, the
49
+ spellings given in these on-line reference works should be
50
+ used in place of the IC Standard. For any individual who
51
+ has at one time been listed in the Factbook or Chiefs of
52
+ State directory but who no longer appears in those
53
+ resources (i.e. is no longer a government official), the IC
54
+ Standard spelling should appear first, with the spelling,
55
+ if known, as it previously appeared in those resources
56
+ listed within parentheses at the first usage. The primary
57
+ goal of this system is to produce a consistent Romanized
58
+ transcription of the name that is readable to the non-
59
+ specialist. The system uses the 26 letters of the standard (
60
+ English) Roman alphabet plus the apostrophe. Some
61
+ ambiguities in the Romanized form will occur without the
62
+ use of diacritics. However, within the context of a report,
63
+ where additional information about the individual is
64
+ provided, the referent will be clearly identified. This
65
+ system will be used in conjunction with on-line tools, name
66
+ dictionaries, and lists containing conventional spellings
67
+ of names of well-known individuals.
68
+ notes: |
69
+ - Long/Short Vowels: Long and short vowels are not
70
+ distinguished in this system Samir (could be Saamir or
71
+ Samiir in Arabic).
72
+
73
+ - Double consonants: Double consonants represented by the
74
+ Arabic shaddah are shown in most cases (e.g., Hassan,
75
+ Muhammad). Exceptions: ’ayn and consonants represented by
76
+ digraphs are not doubled (e.g., al-Qadhafi [not
77
+ alQadhdhafi], Mubashir [not Mubashshir]).
78
+
79
+ - Hamzah (glottal stop): The hamzah is represented by an
80
+ apostrophe (’). Note that this is the same symbol used to
81
+ represent another consonant, the ’ayn.
82
+
83
+ - Ta’ marbutah (feminine ending marker): On the construct
84
+ form or when pronounced “t”, it is represented with a roman
85
+ t. In all other cases, it is represented with an h.
86
+
87
+ - Digraphs: No distinction is made between digraphs such as
88
+ sh and single contiguous letters (e.g., s followed by h).
89
+
90
+ - Definite article “al” (‘the’): Follows Arabic spelling
91
+ rather than pronunciation. That is, sun letter assimilation
92
+ is not shown in the Romanized form (e.g., ’Abd-alRahman,
93
+ not ’Abd-ar-Rahman).
94
+
95
+ - Diphthongs: the second element of the diphthong is
96
+ represented by a y or a w (rather than an i or a u):
97
+ Haytham, Faysal, Tawfiq, Rawdah.
98
+
99
+ - Hyphens: Hyphens (-) are used to connect name elements
100
+ within a name: ’Abd- al Rahman, Abu-al-Bashar, Bin-Ladin.
101
+ Exceptions: Names that incorporate “Allah” as part of the
102
+ name (e.g., ’Abdallah, Nasrallah), names marked by the
103
+ lineage/family marker “Al” (e.g., Al Thani) are not
104
+ hyphenated.
105
+
106
+ - The definite article, “al”, within name phrases, is
107
+ Romanized as al and not as ul: Nur-al-Din (not Nur-ul-Din).
108
+ It is not capitalized when name-initial.
109
+
110
+ - Names that incorporate Allah as part of the name retain the
111
+ a of Allah rather than a grammatical marker u: ’Abdallah (
112
+ not ’Abdullah).
113
+
114
+ - Foreign names borrowed or appearing in Arabic are spelled
115
+ according to the standard Western tradition: Georges,
116
+ Michel. However, names of non-Arabic origin no longer
117
+ considered foreign by Arabic speakers follow the IC
118
+ conventions: Butrus (not Peter).
119
+
120
+ - Prefix ‫بن‬ (bin ‘son of’) is Romanized Bin unless written
121
+ with an alif, in which case it is Romanized as Ibn. The
122
+ colloquial form Bu (‘father’) should not be standardized as
123
+ Abu. These prefixes are capitalized.
124
+
125
+ - In general, Romanization follows the Modern Standard
126
+ Arabic (MSA) form rather than local pronunciation
127
+ standards. For example, the letter ‫ج‬ (jim) is represented
128
+ as a j even when pronounced as a “g” (e.g., Egyptian Gamal
129
+ is Romanized as Jamal).
130
+
131
+ tests:
132
+
133
+ - source: مِصر
134
+ expected: Misr
135
+
136
+ - source: قَطَر
137
+ expected: Qatar
138
+
139
+ - source: المَغرِب
140
+ expected: Al Maghrib
141
+
142
+ - source: الجُمهُورِيَّة العِراقِيَّة
143
+ expected: Al Jumhuriyah al ’Iraqiyah
144
+
145
+ - source: جُمهُورِيَّة العِراق
146
+ expected: Jumhuriyat al ’Iraq
147
+
148
+ - source: جُمهُورِيَّة مِصر العَرَبِيَّة
149
+ expected: Jumhuriyat Misr al ’Arabiyah
150
+
151
+ - source: بَغداد
152
+ expected: Baghdad
153
+
154
+ - source: تُونِس
155
+ expected: Tunis
156
+
157
+ - source: حَسّان
158
+ expected: Hassan
159
+
160
+ - source: مُحَمَّد
161
+ expected: Muhammad
162
+
163
+ - source: القَذَّافِي
164
+ expected: Al Qadhafi
165
+
166
+ - source: مُبَشِّر
167
+ expected: Mubashir
168
+
169
+ - source: الجَزائِر
170
+ expected: Al Jaza’ir
171
+
172
+ - source: عَبدالرَحمَن
173
+ expected: ’Abd-al-Rahman
174
+
175
+ - source: هَيْثَم
176
+ expected: Haytham
177
+
178
+ - source: فَيْصَل
179
+ expected: Faysal
180
+
181
+ - source: تَوْفِيق
182
+ expected: Tawfiq
183
+
184
+ - source: رَوْضَة
185
+ expected: Rawdah
186
+
187
+ - source: نُورُالدِين
188
+ expected: Nur-al-Din
189
+
190
+ - source: عَبدُاللَّه
191
+ expected: ’Abdallah
192
+ map:
193
+ postrules:
194
+ - pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
195
+ result: "upcase"
196
+ - pattern: " Al " # ال
197
+ result: " al "
198
+ - pattern: "-Al-" # ال
199
+ result: "-al-"
200
+
201
+ # don't capitalize defined article in the middle of a sentence
202
+
203
+ characters:
204
+
205
+ # Tool used for Unicode finding:
206
+ # https://www.branah.com/unicode-converter
207
+
208
+ # pointing
209
+ '\u064e' : 'a' # َ fatha
210
+ '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
211
+ '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
212
+ '\u0650' : 'i' # ِ kasra
213
+ '\u064f' : 'u' # ُ damma
214
+ '\u0652' : '' # ْ sokoon, see note A below
215
+
216
+
217
+ '\u0650\u064a' : 'i' # ـِي kasra followed by ي
218
+ '\u0650\u064a\u0651\u064e' : 'iy' # ـِيَّ
219
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
220
+ '\u064f\u0648' : 'u' # ـُو damma followed by و
221
+ '\u064e\u0627' : 'a' # ـَا fatha followed by ا
222
+ '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
223
+ '\u064e\u0648\u0652' : 'aw' # ـَوْ
224
+ '\u064e\u064a\u0652' : 'ay' # ـَيْ
225
+ '\u0622' : 'a' # آ
226
+
227
+ # ta' marboota
228
+ '\u0629' : 'at' # ة in the middle of the sentence
229
+ '\u0629$' : 'ah'
230
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
231
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
232
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
233
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
234
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
235
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
236
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
237
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
238
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
239
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
240
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
241
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
242
+
243
+ # shadda
244
+
245
+ '\u0628\u0651' : 'bb' # ب
246
+ '\u062a\u0651' : 'tt' # ت
247
+ '\u062b\u0651' : 'th' # ث
248
+ '\u062c\u0651' : 'jj' # ج
249
+ '\u062d\u0651' : 'hh' # ح
250
+ '\u062e\u0651' : 'kh' # خ
251
+ '\u062f\u0651' : 'dd' # د
252
+ '\u0630\u0651' : 'dh' # ذ
253
+ '\u0631\u0651' : 'rr' # ر
254
+ '\u0632\u0651' : 'zz' # ز
255
+ '\u0633\u0651' : 'ss' # س
256
+ '\u0634\u0651' : 'sh' # ش
257
+ '\u0635\u0651' : 'ss' # ص
258
+ '\u0636\u0651' : 'dd' # ض
259
+ '\u0637\u0651' : 'tt' # ط
260
+ '\u0638\u0651' : 'zz' # ظ
261
+ '\u063a\u0651' : 'gh' # غ
262
+ '\u0641\u0651' : 'ff' # ف
263
+ '\u0642\u0651' : 'qq' # ق
264
+ '\u0643\u0651' : 'kk' # ك
265
+ '\u0644\u0651' : 'll' # ل
266
+ '\u0645\u0651' : 'mm' # م
267
+ '\u0646\u0651' : 'nn' # ن
268
+ '\u0647\u0651' : 'hh' # ه
269
+ '\u0648\u0651' : 'ww' # و
270
+ '\u064a\u0651' : 'yy' # ي
271
+
272
+ '\u0626' : "’" # ئ
273
+
274
+ '\b\u0627\u0644\u0644\u0651\u064e\u0647': 'Allah'
275
+
276
+ '\B\u064f?\u0627\u0644\u0644\u0651\u064e\u0647': 'allah'
277
+
278
+ '\u0621' : # ء
279
+ - '’'
280
+ - ''
281
+
282
+ '\b\u0627\u0644' : 'al ' # ال
283
+ '\B\u064f?\u0627\u0644' : '-al-' # ال in middle of composite name
284
+ # '\uFE8E' : '' # ﺎ
285
+
286
+
287
+ '\u0623' : '' # أ
288
+ '\b\u0627' : '' # ا
289
+ '\u0627' : 'a' # ا
290
+
291
+ '\u0628' : 'b' # ب
292
+ '\uFE91' : 'b' # ﺑ
293
+ '\uFE92' : 'b' # ﺒ
294
+ '\uFE90' : 'b' # ﺐ
295
+
296
+ '\u062a' : 't' # ت
297
+ '\ufe97' : 't' # ﺗ
298
+ '\ufe98' : 't' # ﺘ
299
+ '\ufe96' : 't' # ﺖ
300
+
301
+ '\u062b' : 'th' # ث
302
+ '\ufe9b' : 'th' # ﺛ
303
+ '\ufe9c' : 'th' # ﺜ
304
+ '\ufe9a' : 'th' # ﺚ
305
+
306
+ '\u062c' : 'j' # ج
307
+ '\ufe9f' : 'j' # ﺟ
308
+ '\ufea0' : 'j' # ﺠ
309
+ '\ufe9e' : 'j' # ﺞ
310
+
311
+ '\u062d' : 'h' # ح
312
+ '\ufea3' : 'h' # ﺣ
313
+ '\ufea4' : 'h' # ﺤ
314
+ '\ufea2' : 'h' # ﺢ
315
+
316
+ '\u062e' : 'kh' # خ
317
+ '\ufea7' : 'kh' # ﺧ
318
+ '\ufea8' : 'kh' # ﺨ
319
+ '\ufea6' : 'kh' # ﺦ
320
+
321
+ '\u062f' : 'd' # د
322
+ '\ufeaa' : 'd' # ﺪ
323
+
324
+ '\u0630' : 'dh' # ذ
325
+ '\ufeac' : 'dh' # ﺬ
326
+
327
+ '\u0631' : 'r' # ر
328
+ '\ufeae' : 'r' # ﺮ
329
+
330
+ '\u0632' : 'z' # ز
331
+ '\ufeb0' : 'z' # ﺰ
332
+
333
+ '\u0633' : 's' # س
334
+ '\ufeb3' : 's' # ﺳ
335
+ '\ufeb4' : 's' # ﺴ
336
+ '\ufeb2' : 's' # ﺲ
337
+
338
+ '\u0634' : 'sh' # ش
339
+ '\ufeb7' : 'sh' # ﺷ
340
+ '\ufeb8' : 'sh' # ﺸ
341
+ '\ufeb6' : 'sh' # ﺶ
342
+
343
+ '\u0635' : 's' # ص
344
+ '\ufebb' : 's' # ﺻ
345
+ '\ufebc' : 's' # ﺼ
346
+ '\ufeba' : 's' # ﺺ
347
+
348
+ '\u0636' : 'd' # ض
349
+ '\ufebf' : 'd' # ﺿ
350
+ '\ufec0' : 'd' # ﻀ
351
+ '\ufebe' : 'd' # ﺾ
352
+
353
+ '\u0637' : 't' # ط
354
+ '\ufec3' : 't' # ﻃ
355
+ '\ufec4' : 't' # ﻄ
356
+ '\ufec2' : 't' # ﻂ
357
+
358
+ '\u0638' : 'z' # ظ
359
+ '\ufec7' : 'z' # ﻇ
360
+ '\ufec8' : 'z' # ﻈ
361
+ '\ufec6' : 'z' # ﻆ
362
+
363
+ '\u0639' : '’' # ع
364
+ '\ufecb' : '’' # ﻋ
365
+ '\ufecc' : '’' # ﻌ
366
+ '\ufeca' : '’' # ﻊ
367
+
368
+ '\u063a' : 'gh' # غ
369
+ '\ufecf' : 'gh' # ﻏ
370
+ '\ufed0' : 'gh' # ﻐ
371
+ '\ufece' : 'gh' # ﻎ
372
+
373
+ '\u0641' : 'f' # ف
374
+ '\ufed3' : 'f' # ﻓ
375
+ '\ufed4' : 'f' # ﻔ
376
+ '\ufed2' : 'f' # ﻒ
377
+
378
+ '\u0642' : 'q' # ق
379
+ '\ufed7' : 'q' # ﻗ
380
+ '\ufed8' : 'q' # ﻘ
381
+ '\ufed6' : 'q' # ﻖ
382
+
383
+ '\u0643' : 'k' # ك
384
+ '\ufedb' : 'k' # ﻛ
385
+ '\ufedc' : 'k' # ﻜ
386
+ '\ufeda' : 'k' # ﻚ
387
+
388
+ '\u0644' : 'l' # ل
389
+ '\ufedf' : 'l' # ﻟ
390
+ '\ufee0' : 'l' # ﻠ
391
+ '\ufede' : 'l' # ﻞ
392
+
393
+ '\u0645' : 'm' # م
394
+ '\ufee3' : 'm' # ﻣ
395
+ '\ufee4' : 'm' # ﻤ
396
+ '\ufee2' : 'm' # ﻢ
397
+
398
+ '\u0646' : 'n' # ن
399
+ '\ufee7' : 'n' # ﻧ
400
+ '\ufee8' : 'n' # ﻨ
401
+ '\ufee6' : 'n' # ﻦ
402
+
403
+ # See note C
404
+ '\u0647' : 'h' # ه
405
+ '\ufeeb' : 'h' # ﻫ
406
+ '\ufeec' : 'h' # ﻬ
407
+ '\ufeea' : 'h' # ﻪ
408
+
409
+ '\u0648' : 'w' # و
410
+ '\ufeee' : 'w' # ﻮ
411
+
412
+ '\u064a' : 'y' # ي
413
+ '\ufef3' : 'y' # ﻳ
414
+ '\ufef4' : 'y' # ﻴ
415
+ '\ufef1' : 'y' # ﻱ
416
+
417
+ # (A) Not romanized word-initially.
418
+
419
+ # (B) Not romanized, but see romanizations accompanying alif (ا) in the table for vowels.
420
+
421
+ # (C) In certain endings, an original tā’ (ت) is written ة, i.e., like hā’ (ه) with two dots, and is known as tā’ marbūṯah. It is romanized h, except in the construct form of feminine nouns, where it is romanized t, instead.
422
+
423
+
424
+ # Vowels, diphthongs and diacritical marks
425
+ # (ـ stands for any consonant)