interscript 0.1.2 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/lib/g2pwrapper.py +34 -0
  4. data/lib/interscript.rb +142 -20
  5. data/lib/interscript/command.rb +28 -0
  6. data/lib/interscript/fs.rb +69 -0
  7. data/lib/interscript/mapping.rb +142 -0
  8. data/lib/interscript/opal.rb +57 -0
  9. data/lib/interscript/opal/entrypoint.rb +12 -0
  10. data/lib/interscript/opal/map_translate.rb +7 -0
  11. data/lib/interscript/opal/maps.js.erb +10 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/lib/model-7 +0 -0
  14. data/lib/tha-pt-b-7 +0 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38916 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.yaml +165 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  21. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  22. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +129 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +98 -0
  25. data/maps/alalc-ell-Grek-Latn-1997.yaml +628 -0
  26. data/maps/alalc-ell-Grek-Latn-2010.yaml +626 -0
  27. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  28. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  29. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  30. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  31. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  32. data/maps/alalc-kat-Geor-Latn-1997.yaml +150 -0
  33. data/maps/alalc-kor-Hang-Latn-1997.yaml +98 -0
  34. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  35. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  36. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  37. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  38. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  39. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  40. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  41. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  42. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  43. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  44. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  45. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  46. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  47. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  48. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  49. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  50. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  51. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +118 -0
  52. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  53. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  54. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  55. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +145 -0
  56. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  57. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  58. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +58 -33
  59. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +55 -35
  60. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  61. data/maps/bgn-kor-Hang-Latn-1943.yaml +35 -0
  62. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  63. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  64. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  65. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  66. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +596 -0
  67. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +2 -3
  68. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  69. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  70. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +289 -0
  71. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +119 -0
  72. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +15 -65
  73. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  74. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +705 -0
  75. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +23 -0
  76. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  77. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  78. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +131 -0
  79. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  80. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  81. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  82. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  83. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +163 -0
  84. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  85. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  86. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  87. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  88. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  89. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  90. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  91. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +150 -65
  92. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +170 -0
  93. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  94. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  95. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +80 -4
  96. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +119 -0
  97. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  98. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  99. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  100. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  101. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  102. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  103. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  104. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  105. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  106. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  107. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  108. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  109. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  110. data/maps/by-bel-Cyrl-Latn-1998.yaml +172 -0
  111. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  112. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  113. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  114. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  115. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  116. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  117. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  118. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  119. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  120. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  121. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  122. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  123. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  124. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  125. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  126. data/maps/ggg-kat-Geor-Latn-2002.yaml +92 -0
  127. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  128. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  129. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +190 -0
  130. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  131. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  132. data/maps/icao-bel-Cyrl-Latn-9303.yaml +109 -98
  133. data/maps/icao-bul-Cyrl-Latn-9303.yaml +2 -7
  134. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +6 -8
  135. data/maps/icao-heb-Hebr-Latn-9303.yaml +119 -125
  136. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +2 -3
  137. data/maps/icao-rus-Cyrl-Latn-9303.yaml +2 -4
  138. data/maps/icao-srp-Cyrl-Latn-9303.yaml +2 -3
  139. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +2 -4
  140. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  141. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  142. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  143. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +613 -0
  144. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +44 -0
  145. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  146. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  147. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  148. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +66 -0
  149. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  150. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  151. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  152. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  153. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  154. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  155. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  156. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  157. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  158. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  159. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  160. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  161. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  162. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  163. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  164. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +4 -6
  165. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  166. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  167. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +909 -0
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  172. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  173. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  174. data/maps/moct-kor-Hang-Latn-2000.yaml +807 -0
  175. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  176. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  177. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  178. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  179. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  180. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  181. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  182. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  183. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  184. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  185. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  186. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  187. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  188. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  189. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  190. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  191. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  192. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  193. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  194. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  195. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  196. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  197. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  198. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  199. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  200. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  201. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  202. data/maps/royin-tha-Thai-Latn-1968.yaml +183 -0
  203. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  204. data/maps/royin-tha-Thai-Latn-1999.yaml +80 -0
  205. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +11 -8
  206. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  207. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  208. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  209. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  210. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +197 -0
  211. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  212. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  213. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  214. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  215. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  216. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  217. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  218. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  219. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +780 -0
  220. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  221. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  222. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  223. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  224. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  225. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  226. data/maps/un-nep-Deva-Latn-1972.yaml +350 -0
  227. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  228. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  229. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  230. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  231. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  232. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  233. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  234. data/maps/var-kor-Hang-Hang-jamo.yaml +11193 -0
  235. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  236. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  237. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  238. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  239. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  240. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  241. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  242. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  243. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  244. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  245. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  246. data/maps/var-zho-Hani-Latn-wd-1979.yaml +38912 -0
  247. data/spec/interscript/filenames_spec.rb +384 -0
  248. data/spec/interscript/mapping_spec.rb +42 -0
  249. data/spec/interscript_spec.rb +23 -5
  250. data/spec/spec_helper.rb +3 -1
  251. metadata +364 -34
  252. data/bin/interscript +0 -20
  253. data/bin/rspec +0 -29
  254. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  255. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  256. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  257. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  258. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  259. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  260. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
  261. data/maps/un-mon-Mong-Latn-2013.yaml +0 -80
@@ -0,0 +1,283 @@
1
+ ---
2
+ authority_id: ungegn
3
+ id: 1930
4
+ language: iso-639-2:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ alias:
8
+ ogc11122:
9
+ code: ara_Arab2Latn_SES_1930
10
+ description: Arabic Survey of Egypt System
11
+ name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
12
+ url: http://www.eki.ee/wgrs/rom1_ar.pdf
13
+ creation_date: 1930
14
+ confirmation date: 2018-06
15
+ description: |
16
+ The current United Nations recommended romanization
17
+ system was approved in 2017 (resolution XI/3), based on
18
+ the system adopted by Arabic experts at the conference
19
+ held in Beirut in 2007, the Unified Arabic
20
+ Transliteration System, taking into account the
21
+ practical amendments and corrections carried out and
22
+ agreed upon by the representatives of the Arabic-
23
+ speaking countries at the Fourth Arab Conference on
24
+ Geographical Names, held in Beirut in 2008, and some
25
+ clarifications and amendments agreed in Riyadh in 20171.
26
+ Previously, the United Nations had approved a
27
+ romanization system in 1972 (resolution II/8), based on the
28
+ system adopted by Arabic experts at the conference
29
+ held at Beirut in 1971 with the practical amendments carried out
30
+ and agreed upon by the representatives of the Arabic-speaking
31
+ countries at their conference. The table was published in volume
32
+ II of the conference report.
33
+ In UN resolution XI/3 it is specifically stated that the
34
+ system was recommended for the “romanization of the
35
+ geographical names within those Arabic-speaking countries
36
+ where this system is officially adopted”. There is
37
+ evidence of its partial implementation in Jordan, Oman and
38
+ Saudi Arabia. The UNGEGN Working Group on Romanization
39
+ Systems intends to continue monitoring the UN system’s
40
+ implementation across Arabic-speaking countries.
41
+ In some countries there exist local romanization schemes
42
+ or practices. The geographical names of Algeria, Djibouti,
43
+ Mauritania, Morocco and Tunisia are generally rendered in
44
+ the traditional manner which conforms to the principles of
45
+ the French orthography.
46
+ The previous UN-approved system is still found in
47
+ considerable international usage.
48
+ Arabic is written from right to left. The Arabic script
49
+ usually omits vowel points and diacritical marks from
50
+ writing which makes it difficult to obtain uniform results
51
+ in the romanization of Arabic. It is essential to identify
52
+ correctly the words which appear in any particular name
53
+ and to know the standard Arabic-script spelling including
54
+ the relevant vowels. One must also take into account
55
+ dialectal and idiosyncratic deviations. The romanization
56
+ is generally reversible though there may be some ambiguous
57
+ letter sequences (dh, kh, sh, th) which may also point to
58
+ combinations of Arabic characters in addition to the
59
+ respective single characters.
60
+ notes:
61
+ - |
62
+ The Survey of Egypt System (SES) of romanization has the following correspondences with
63
+ the UN system:
64
+ á = a # ـَى fatha followed by ى which is ا not ي
65
+ ā = â (a) # ـَا fatha followed by alef // آ
66
+ -ah (ة- = (a # ة ta' marboota at the end of a sentence
67
+ aw = ô (au) # ـَوْ
68
+ ay = ei (ai) # ـَيْ
69
+ ḏ = ḍ # ض
70
+ dh = dh (z) # ذ
71
+ d͟h = ẓ (d) # ظ
72
+ ẖ = ḥ # ح
73
+ ī = î
74
+ j = g (j)
75
+ q = q (k)
76
+ s = s (c)
77
+ s̱ = ṣ
78
+ ṯ = ṭ
79
+ th = th (t)
80
+ ū = û
81
+ ‘ = ‛
82
+ - |
83
+ The variants in parentheses are used depending on pronunciation and tradition. Not all the
84
+ variations have been given above. The article is always written el- (El-Kafr el-Qadîm, Sharm
85
+ el-Sheikh).
86
+ tests:
87
+
88
+ # Examples taken from:
89
+ # https://unstats.un.org/unsd/geoinfo/geonames/
90
+
91
+ - source: شَرم الشَيْخ
92
+ expected: Sharm el-Sheikh
93
+
94
+ - source: الكَفر القَدِيم
95
+ expected: El-Kafr el-Qadîm
96
+ map:
97
+ inherit: "un-ara-Arab-Latn-2017"
98
+ postrules:
99
+ - pattern : ' El-' # الص
100
+ result: ' el-'
101
+
102
+ characters:
103
+
104
+
105
+ # special pointed letters
106
+ '\u0639\u064e' : '‛a' # عَ
107
+ '\u0639\u0650' : '‛i' # عِ
108
+ '\u0639\u064f' : '‛û' # عُ
109
+ # handle MacOS regex difference
110
+ '\u0639\u064f\u0648' : '‛û' # عُو damma followed by و
111
+ '\u0650\u064a' : 'î' # ـِي kasra followed by ي
112
+ '\u0650\u064a\u0651\u064e' : 'îy' # ـِيَّ
113
+ '\u064f\u0648' : 'û' # ـُو damma followed by و
114
+ '\u064e\u0627' : # ـَا fatha followed by ا
115
+ - 'â'
116
+ - 'a'
117
+ '\u064e\u0649' : 'a' # ـَى fatha followed by ى which is ا not ي
118
+ '\u064e\u0648\u0652' : # ـَوْ
119
+ - 'ô'
120
+ - 'au'
121
+ '\u064e\u064a\u0652' : # ـَيْ
122
+ - 'ei'
123
+ - 'ai'
124
+ '\u0622' : # آ
125
+ - 'â'
126
+ - 'a'
127
+
128
+ # ta' marboota in iso-233-1984 is all the same `a`
129
+ '\u0629$' : 'a'
130
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'a'
131
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'a'
132
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'a'
133
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'a'
134
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'a'
135
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'a'
136
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'a'
137
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'a'
138
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'a'
139
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'a'
140
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'a'
141
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'a'
142
+
143
+
144
+ # Sun letters
145
+ '\b\u0627\u0644\u062a' : 'el-t' # الت
146
+ '\b\u0627\u0644\u062b' : # الث
147
+ - 'el-th'
148
+ - 'el-t'
149
+ '\b\u0627\u0644\u062f' : 'el-d' # الد
150
+ '\b\u0627\u0644\u0630' : # الذ
151
+ - 'el-dh'
152
+ - 'el-z'
153
+ '\b\u0627\u0644\u0631' : 'el-r' # الر
154
+ '\b\u0627\u0644\u0632' : 'el-z' # الز
155
+ '\b\u0627\u0644\u0633' : # الس
156
+ - 'el-s'
157
+ - 'el-c'
158
+ '\b\u0627\u0644\u0634' : 'el-sh' # الش
159
+ '\b\u0627\u0644\u0635' : 'el-ṣ' # الص
160
+ '\b\u0627\u0644\u0636' : 'el-ḍ' # الض
161
+ '\b\u0627\u0644\u0637' : 'el-ṭ' # الط
162
+ '\b\u0627\u0644\u0638' : # الظ
163
+ - 'el-ẓ'
164
+ - 'el-d'
165
+ '\b\u0627\u0644\u0644' : 'el-l' # الل
166
+ '\b\u0627\u0644\u0646' : 'el-n' # الن
167
+
168
+
169
+ # shadda
170
+ '\u062b\u0651' : # ث
171
+ - 'thth'
172
+ - 'tt'
173
+ '\u062c\u0651' : # ج
174
+ - 'gg'
175
+ - 'jj'
176
+ '\u062d\u0651' : 'ḥḥ' # ح
177
+ '\u062e\u0651' : 'khkh' # خ
178
+
179
+ '\u0633\u0651' : # س
180
+ - 'ss'
181
+ - 'cc'
182
+ '\u0635\u0651' : 'ṣṣ' # ص
183
+ '\u0636\u0651' : 'ḍḍ' # ض
184
+ '\u0637\u0651' : 'ṭṭ' # ط
185
+ '\u0638\u0651' : # ظ
186
+ - 'ẓẓ'
187
+ - 'dd'
188
+ '\u0642\u0651' : # ق
189
+ - 'qq'
190
+ - 'kk'
191
+
192
+ '\b\u0627\u0644' : 'el-' # ال
193
+
194
+ # normal letters
195
+ '\u062c' : # ج
196
+ - 'g'
197
+ - 'j'
198
+ '\ufe9f' : # ﺟ
199
+ - 'g'
200
+ - 'j'
201
+ '\ufea0' : # ﺠ
202
+ - 'g'
203
+ - 'j'
204
+ '\ufe9e' : # ﺞ
205
+ - 'g'
206
+ - 'j'
207
+
208
+ '\u062d' : 'ḥ' # ح
209
+ '\ufea3' : 'ḥ' # ﺣ
210
+ '\ufea4' : 'ḥ' # ﺤ
211
+ '\ufea2' : 'ḥ' # ﺢ
212
+
213
+ '\u062e' : 'kh' # خ
214
+ '\ufea7' : 'kh' # ﺧ
215
+ '\ufea8' : 'kh' # ﺨ
216
+ '\ufea6' : 'kh' # ﺦ
217
+
218
+ '\u0630' : # ذ
219
+ - 'dh'
220
+ - 'z'
221
+ '\ufeac' : # ﺬ
222
+ - 'dh'
223
+ - 'z'
224
+
225
+
226
+ '\u0633' : # س
227
+ - 's'
228
+ - 'c'
229
+ '\ufeb3' : # ﺳ
230
+ - 's'
231
+ - 'c'
232
+ '\ufeb4' : # ﺴ
233
+ - 's'
234
+ - 'c'
235
+ '\ufeb2' : # ﺲ
236
+ - 's'
237
+ - 'c'
238
+
239
+ '\u0635' : 'ṣ' # ص
240
+ '\ufebb' : 'ṣ' # ﺻ
241
+ '\ufebc' : 'ṣ' # ﺼ
242
+ '\ufeba' : 'ṣ' # ﺺ
243
+
244
+ '\u0636' : 'ḍ' # ض
245
+ '\ufebf' : 'ḍ' # ﺿ
246
+ '\ufec0' : 'ḍ' # ﻀ
247
+ '\ufebe' : 'ḍ' # ﺾ
248
+
249
+ '\u0637' : 'ṭ' # ط
250
+ '\ufec3' : 'ṭ' # ﻃ
251
+ '\ufec4' : 'ṭ' # ﻄ
252
+ '\ufec2' : 'ṭ' # ﻂ
253
+
254
+ '\u0639' : '‛' # ع
255
+ '\ufecb' : '‛' # ﻋ
256
+ '\ufecc' : '‛' # ﻌ
257
+ '\ufeca' : '‛' # ﻊ
258
+
259
+ '\u0638' : # ظ
260
+ - 'ẓ'
261
+ - 'd'
262
+ '\ufec7' : # ظ
263
+ - 'ẓ'
264
+ - 'd'
265
+ '\ufec8' : # ظ
266
+ - 'ẓ'
267
+ - 'd'
268
+ '\ufec6' : # ظ
269
+ - 'ẓ'
270
+ - 'd'
271
+
272
+ '\u0642' : # ق
273
+ - 'q'
274
+ - 'k'
275
+ '\ufed7' : # ﻗ
276
+ - 'q'
277
+ - 'k'
278
+ '\ufed8' : # ﻘ
279
+ - 'q'
280
+ - 'k'
281
+ '\ufed6' : # ﻖ
282
+ - 'q'
283
+ - 'k'
@@ -0,0 +1,222 @@
1
+ ---
2
+ authority_id: stategeocadastre
3
+ id: 1993
4
+ language: iso-639-2:ukr
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: PROVISIONAL RULES OF REPRODUCING LETTERS OF THE UKRAINIAN ALPHABET WITH LATIN (ENGLISH) CHARACTERS
8
+ url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/17th-gegn-docs/17th_gegn_WP73.pdf
9
+ creation_date: 1993
10
+ description: |
11
+ These Rules are intended for Romanized transliteration of Ukrainian
12
+ geographic names in international cartographic editions.
13
+
14
+ Geographic names of Russia, Byelorussia, Bulgaria and other states
15
+ using the Cyrillic alphabet are transliterated according to rules
16
+ accepted in those states.
17
+
18
+ These Rules come into effect from the moment of their approval by
19
+ the Main Administration of Geodesy, Cartography and Cadastre and
20
+ will be effective until the introduction of a State standard of
21
+ Ukraine regulating the Romanized transliteration of the Ukrainian
22
+ alphabet.
23
+
24
+ notes:
25
+ - No apostrophe (’) is used in transliteration, the combination "ьо" is transliterated as "io"
26
+ - Use of capitals in Latin version of Ukrainian geographic names correspond to the Ukrainian spelling
27
+ - Generics geographical terms standing before or after a name in full or abbreviated form are transliterated
28
+ - Romanized versions of complex and compound Ukrainian toponyms (one word, hyphenated or separate words) will follow the Ukrainian spelling
29
+ - In indexes of Romanized geographical names entries must be arranged in the order of the Latin (English) alphabet
30
+ - Geographic names of Russia, Byelorussia, Bulgaria and other states using the Cyrillic alphabet are transliterated according to rules accepted in those states.
31
+ - "Ed: There seems to be a mistake in the source document. 'ц' should be replaced with ts instead 'tz'."
32
+
33
+ tests:
34
+ - source: Кам’янка # note[1]
35
+ expected: Kamianka
36
+ - source: Сьомаки # note[1]
37
+ expected: Siomaky
38
+ - source: Усть-Чорна # note[2]
39
+ expected: Ust’-Chorna
40
+ - source: Чорне море # note[2]
41
+ expected: Chorne more
42
+ - source: оз. Сиваш # note[3]
43
+ expected: oz. Syvash
44
+ - source: Кримський канал # note[3]
45
+ expected: Kryms’kyi kanal # ! Example had typo in original document "Krums’kyi kanal"
46
+ - source: Гола Пристань
47
+ expected: Hola Prystan’
48
+ - source: Корсунь Шевченківський
49
+ expected: Korsun’ Shevchenkivs’kyi
50
+ - source: Верхньодніпровськ
51
+ expected: Verkhniodniprovs’k
52
+ - source: Варва
53
+ expected: Varva
54
+ - source: Броди
55
+ expected: Brody
56
+ - source: Верховина
57
+ expected: Verkhovyna
58
+ - source: Глухів
59
+ expected: Hlukhiv
60
+ - source: Великий
61
+ expected: Velykyi
62
+ - source: Ґрунь(гора)
63
+ expected: Grun’(hora)
64
+ - source: Димер
65
+ expected: Dymer
66
+ - source: Срібне
67
+ expected: Sribne
68
+ - source: Євпаторія
69
+ expected: Yevpatoriia
70
+ - source: Єнакієве
71
+ expected: Yenakiieve
72
+ - source: Жолква
73
+ expected: Zholkva
74
+ - source: Затока
75
+ expected: Zatoka
76
+ - source: Житомир
77
+ expected: Zhytomyr
78
+ - source: Інгул
79
+ expected: Inhul
80
+ - source: Зміїв
81
+ expected: Zmiïv
82
+ - source: Йосипівка
83
+ expected: Yosypivka
84
+ - source: Стрий
85
+ expected: Stryi
86
+ - source: Калуш
87
+ expected: Kalush
88
+ - source: Лубни
89
+ expected: Lubny
90
+ - source: Миколаїв
91
+ expected: Mykolaïv
92
+ - source: Ніжин
93
+ expected: Nizhyn
94
+ - source: Острог
95
+ expected: Ostroh
96
+ - source: Печеніги
97
+ expected: Pechenihy
98
+ - source: Рівне
99
+ expected: Rivne
100
+ - source: Сарата
101
+ expected: Sarata
102
+ - source: Тячів
103
+ expected: Tiachiv
104
+ - source: Узин
105
+ expected: Uzyn
106
+ - source: Форос
107
+ expected: Foros
108
+ - source: Харків
109
+ expected: Kharkiv
110
+ - source: Цюрупінськ
111
+ expected: Tsiurupins’k
112
+ - source: Черемош
113
+ expected: Cheremosh
114
+ - source: Шацьк
115
+ expected: Shats’k
116
+ - source: Щорс
117
+ expected: Shchors
118
+ - source: Хмельницький
119
+ expected: Khmel’nyts’kyi # ! Example had typo in original document "Khmel’nyts’ky"
120
+ - source: Юрівка
121
+ expected: Yurivka
122
+ - source: Любеч
123
+ expected: Liubech
124
+ - source: Ялта
125
+ expected: Yalta
126
+ - source: Ясіня
127
+ expected: Yasinia
128
+
129
+
130
+ map:
131
+ rules:
132
+ - pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
133
+ result: Ye
134
+ - pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
135
+ result: ye
136
+ - pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
137
+ result: "Y"
138
+ - pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
139
+ result: "y"
140
+ - pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
141
+ result: Yu
142
+ - pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
143
+ result: yu
144
+ - pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
145
+ result: Ya
146
+ - pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
147
+ result: ya
148
+ # note[1]
149
+ - pattern: \b\u2019\b # remove ’
150
+ result: ""
151
+ - pattern: \u042c\u041e
152
+ result: "IO"
153
+ - pattern: \u044c\u043e
154
+ result: "io"
155
+
156
+ characters:
157
+ "\u0410": "A" # А
158
+ "\u0411": "B" # Б
159
+ "\u0412": "V" # В
160
+ "\u0413": "H" # Г
161
+ "\u0490": "G" # Ґ
162
+ "\u0414": "D" # Д
163
+ "\u0415": "E" # Е
164
+ "\u0404": "Ie" # Є
165
+ "\u0416": "Zh" # Ж
166
+ "\u0417": "Z" # З
167
+ "\u0418": "Y" # И
168
+ "\u0406": "I" # І
169
+ "\u0407": "I\u0308" # Ї
170
+ "\u0419": "I" # Й
171
+ "\u041a": "K" # К
172
+ "\u041b": "L" # Л
173
+ "\u041c": "M" # М
174
+ "\u041d": "N" # Н
175
+ "\u041e": "O" # О
176
+ "\u041f": "P" # П
177
+ "\u0420": "R" # Р
178
+ "\u0421": "S" # С
179
+ "\u0422": "T" # Т
180
+ "\u0423": "U" # У
181
+ "\u0424": "F" # Ф
182
+ "\u0425": "Kh" # Х
183
+ "\u0426": "Ts" # Ц note[7]
184
+ "\u0427": "Ch" # Ч
185
+ "\u0428": "Sh" # Ш
186
+ "\u0429": "Shch" # Щ
187
+ "\u042c": "\u2019" # Ь
188
+ "\u042e": "Iu" # Ю
189
+ "\u042f": "Ia" # Я
190
+ "\u0430": "a" # а
191
+ "\u0431": "b" # б
192
+ "\u0432": "v" # в
193
+ "\u0433": "h" # г
194
+ "\u0491": "g" # ґ
195
+ "\u0434": "d" # д
196
+ "\u0435": "e" # е
197
+ "\u0454": "ie" # є
198
+ "\u0436": "zh" # ж
199
+ "\u0437": "z" # з
200
+ "\u0438": "y" # и
201
+ "\u0456": "i" # і
202
+ "\u0457": "i" # ї
203
+ "\u0439": "i" # й
204
+ "\u043a": "k" # к
205
+ "\u043b": "l" # л
206
+ "\u043c": "m" # м
207
+ "\u043d": "n" # н
208
+ "\u043e": "o" # о
209
+ "\u043f": "p" # п
210
+ "\u0440": "r" # р
211
+ "\u0441": "s" # с
212
+ "\u0442": "t" # т
213
+ "\u0443": "u" # у
214
+ "\u0444": "f" # ф
215
+ "\u0445": "kh" # х
216
+ "\u0446": "ts" # ц note[7]
217
+ "\u0447": "ch" # ч
218
+ "\u0448": "sh" # ш
219
+ "\u0449": "shch" # щ
220
+ "\u044e": "iu" # ю
221
+ "\u044f": "ia" # я
222
+ "\u044c": "\u2019" # ь