interscript 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (251) hide show
  1. checksums.yaml +4 -4
  2. data/lib/interscript.rb +10 -6
  3. data/lib/interscript/fs.rb +0 -2
  4. data/lib/interscript/mapping.rb +1 -1
  5. data/lib/interscript/opal.rb +38 -8
  6. data/lib/interscript/opal/entrypoint.rb +12 -0
  7. data/lib/interscript/opal/map_translate.rb +7 -0
  8. data/lib/interscript/version.rb +1 -1
  9. data/maps/acadsin-zho-Hani-Latn-2002.yaml +5 -1
  10. data/maps/alalc-amh-Ethi-Latn-1997.yaml +5 -1
  11. data/maps/alalc-amh-Ethi-Latn-2011.yaml +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -1
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +9 -3
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  15. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +5 -1
  16. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +5 -1
  17. data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
  18. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
  19. data/maps/alalc-ell-Grek-Latn-1997.yaml +5 -1
  20. data/maps/alalc-ell-Grek-Latn-2010.yaml +1 -2
  21. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  22. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  23. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  24. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  25. data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -1
  26. data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
  27. data/maps/alalc-kor-Hang-Latn-1997.yaml +5 -1
  28. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  29. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  30. data/maps/alalc-mar-Deva-Latn-1997.yaml +21 -2
  31. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  32. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +1 -1
  33. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
  34. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  35. data/maps/{alalc-pan-Deva-Latn-1997.yaml → alalc-pan-Guru-Latn-1997.yaml} +23 -4
  36. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  37. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  38. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  39. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  40. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +5 -1
  41. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +1 -1
  42. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  43. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  44. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  45. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
  46. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +1 -1
  47. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  48. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  49. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
  50. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
  51. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
  52. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -1
  53. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
  54. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -1
  55. data/maps/bgn-kor-Hang-Latn-1943.yaml +7 -3
  56. data/maps/bgn-kor-Kore-Latn-1943.yaml +3 -3
  57. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
  58. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
  59. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +5 -1
  60. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +5 -1
  61. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
  62. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +2 -2
  63. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +6 -2
  64. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +5 -1
  65. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +5 -1
  66. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
  67. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  68. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +5 -1
  69. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -1
  70. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -1
  71. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
  72. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
  73. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -1
  74. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +1 -1
  75. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +1 -1
  76. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +1 -1
  77. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
  78. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
  79. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  80. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +31 -1
  81. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  82. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  83. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  84. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  85. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
  86. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
  87. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  88. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  89. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -1
  90. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
  91. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  92. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  93. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +1 -1
  94. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +1 -1
  95. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +1 -1
  96. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +1 -1
  97. data/maps/{bis-gjr-Gujr-Latn-13194-1991.yaml → bis-guj-Gujr-Latn-13194-1991.yaml} +17 -2
  98. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +1 -1
  99. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +1 -1
  100. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +1 -1
  101. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +1 -1
  102. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +1 -1
  103. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +1 -1
  104. data/maps/by-bel-Cyrl-Latn-1998.yaml +5 -1
  105. data/maps/by-bel-Cyrl-Latn-2007.yaml +1 -1
  106. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  107. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  108. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  109. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  110. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  111. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  112. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  113. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  114. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  115. data/maps/dos-nep-Deva-Latn-1997.yaml +15 -1
  116. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +3 -3
  117. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +2 -2
  118. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -2
  119. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -2
  120. data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -1
  121. data/maps/gki-bel-Cyrl-Latn-1992.yaml +1 -1
  122. data/maps/gki-bel-Cyrl-Latn-2000.yaml +1 -1
  123. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +7 -3
  124. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  125. data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
  126. data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -1
  127. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -1
  128. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -1
  129. data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
  130. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
  131. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -1
  132. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
  133. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -1
  134. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  135. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  136. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  137. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +8 -4
  138. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -4
  139. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  140. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  141. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  142. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
  143. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  144. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  145. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  146. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  147. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  148. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  149. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  150. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  151. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  152. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  153. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  154. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  155. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  156. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  157. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  158. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -3
  159. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  160. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  161. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  162. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
  163. data/maps/kp-kor-Hang-Latn-2002.yaml +25 -17
  164. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +1 -1
  165. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
  166. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  167. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  168. data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
  169. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
  170. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +2 -2
  171. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +3 -3
  172. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +2 -2
  173. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +2 -2
  174. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  175. data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
  176. data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
  177. data/maps/odni-bul-Cyrl-Latn-2015.yaml +2 -2
  178. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  179. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  180. data/maps/odni-hin-Deva-Latn-2015.yaml +1 -1
  181. data/maps/odni-kat-Geor-Latn-2015.yaml +1 -1
  182. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +1 -1
  183. data/maps/odni-kir-Cyrl-Latn-2015.yaml +1 -1
  184. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  185. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +1 -1
  186. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +1 -1
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +1 -1
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +1 -1
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +1 -1
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +1 -1
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +1 -1
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -1
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +1 -1
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +5 -1
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +1 -1
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +5 -1
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
  200. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  201. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  202. data/maps/ses-ara-Arab-Latn-1930.yaml +5 -1
  203. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
  204. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
  205. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  206. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  207. data/maps/{ungegn-amh-Ethi-Latn-2016.yaml → un-amh-Ethi-Latn-2016.yaml} +51 -24
  208. data/maps/un-ara-Arab-Latn-1971.yaml +1 -1
  209. data/maps/un-ara-Arab-Latn-1972.yaml +1 -1
  210. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  211. data/maps/un-bel-Cyrl-Latn-2007.yaml +1 -1
  212. data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
  213. data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +2 -2
  214. data/maps/un-ell-Grek-Latn-1987-tl.yaml +2 -2
  215. data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -3
  216. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  217. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  218. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  219. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  220. data/maps/un-nep-Deva-Latn-1972.yaml +204 -17
  221. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  222. data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
  223. data/maps/un-ukr-Cyrl-Latn-1998.yaml +35 -12
  224. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  225. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  226. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
  227. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
  228. data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +1 -1
  229. data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
  230. data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
  231. data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -1
  232. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  233. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  234. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  235. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  236. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  237. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  238. data/maps/var-tha-Thai-Thai-phonemic.yaml +1 -1
  239. data/maps/var-tha-Thai-Zsym-ipa.yaml +1 -1
  240. data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +6 -2
  241. data/spec/interscript/filenames_spec.rb +384 -0
  242. data/spec/interscript_spec.rb +7 -4
  243. metadata +105 -26
  244. data/bin/interscript +0 -41
  245. data/bin/rspec +0 -29
  246. data/bin/setup +0 -8
  247. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  248. data/lib/interscript-opal.rb +0 -2
  249. data/lib/interscript/opal_map_translate.rb +0 -12
  250. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  251. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
@@ -0,0 +1,192 @@
1
+ ---
2
+ authority_id: ua
3
+ id: 2010
4
+ language: iso-639-2:ukr
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: The Cabinet of Ministers of Ukraine adopted Resolution No 55 "On Normalization of Transliteration of the Ukrainian Alphabet by Means of the Latin Alphabet"
8
+ url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/26th-gegn-docs/WP/WP21_Roma_system_Ukraine%20_engl._.pdf
9
+ creation_date: 2010
10
+
11
+ description: |
12
+ The Cabinet of Ministers of Ukraine adopted Resolution No 55 "On
13
+ Normalization of Transliteration of the Ukrainian Alphabet by Means of the
14
+ Latin Alphabet" on January 27, 2010. The Resolution approved The Table of
15
+ transliteration of the Ukrainian Alphabet by Means of the Latin Alphabet.
16
+ As a result of the adoption of this Resolution Ukrainian proper names
17
+ (geographical names, names and surnames,) are rendered by means of the Latin
18
+ alphabet in accordance with unified rules of transliteration in official documents,
19
+ in cartographic editions, on signs of populated places, streets, metro stations,
20
+ stops, etc.
21
+
22
+ notes:
23
+ - Combination of letters "зг" is transliterated as "zgh" as opposed to "zh" – the equivalent of the Ukrainian letter "ж".
24
+ - Soft sign and the apostrophe are not reproduced in Latin.
25
+ - Transliteration of first and last names of individuals and geograpical names is carried out by way of letter-for-letter representation in Latin.
26
+
27
+ tests:
28
+ - source: Алушта
29
+ expected: Alushta
30
+ - source: Андрій
31
+ expected: Andrii
32
+ - source: Борщагівка
33
+ expected: Borshchahivka
34
+ - source: Борисенко
35
+ expected: Borysenko
36
+ - source: Вінниця
37
+ expected: Vinnytsia
38
+ - source: Володимир
39
+ expected: Volodymyr
40
+ - source: Гадяч
41
+ expected: Hadiach
42
+ - source: Богдан
43
+ expected: Bohdan
44
+ - source: Згурський
45
+ expected: Zghurskyi
46
+ - source: Ґалаґан
47
+ expected: Galagan
48
+ - source: Ґорґани
49
+ expected: Gorgany
50
+ - source: Донецьк
51
+ expected: Donetsk
52
+ - source: Дмитро
53
+ expected: Dmytro
54
+ - source: Рівне
55
+ expected: Rivne
56
+ - source: Олег
57
+ expected: Oleh
58
+ - source: Есмань
59
+ expected: Esman
60
+ - source: Єнакієве
61
+ expected: Yenakiieve
62
+ - source: Гаєвич
63
+ expected: Haievych
64
+ - source: Короп'є
65
+ expected: Koropie
66
+ - source: Житомир
67
+ expected: Zhytomyr
68
+ - source: Жанна
69
+ expected: Zhanna
70
+ - source: Жежелів
71
+ expected: Zhezheliv
72
+ - source: Закарпаття
73
+ expected: Zakarpattia
74
+ - source: Казимирчук
75
+ expected: Kazymyrchuk
76
+ - source: Медвин
77
+ expected: Medvyn
78
+ - source: Михайленко
79
+ expected: Mykhailenko
80
+ - source: Іванків
81
+ expected: Ivankiv
82
+ - source: Іващенко
83
+ expected: Ivashchenko
84
+ - source: Їжакевич
85
+ expected: Yizhakevych
86
+ - source: Кадиївка
87
+ expected: Kadyivka
88
+ - source: Мар'їне
89
+ expected: Marine
90
+ - source: Йосипівка
91
+ expected: Yosypivka
92
+ - source: Стрий
93
+ expected: Stryi
94
+ - source: Олексій
95
+ expected: Oleksii
96
+ - source: Київ
97
+ expected: Kyiv
98
+ - source: Коваленко
99
+ expected: Kovalenko
100
+ - source: Лебедин
101
+ expected: Lebedyn
102
+ - source: Леонід
103
+ expected: Leonid
104
+ - source: Миколаїв
105
+ expected: Mykolaiv
106
+ - source: Маринич
107
+ expected: Marynych
108
+ - source: Ніжин
109
+ expected: Nizhyn
110
+ - source: Наталія
111
+ expected: Nataliia
112
+ - source: Одеса
113
+ expected: Odesa
114
+ - source: Онищенко
115
+ expected: Onyshchenko
116
+ - source: Полтава
117
+ expected: Poltava
118
+ - source: Петро
119
+ expected: Petro
120
+ - source: Решетилівка
121
+ expected: Reshetylivka
122
+ - source: Рибчинський
123
+ expected: Rybchynskyi
124
+ - source: Суми
125
+ expected: Sumy
126
+ - source: Соломія
127
+ expected: Solomiia
128
+ - source: Тернопіль
129
+ expected: Ternopil
130
+ - source: Троць
131
+ expected: Trots
132
+ - source: Ужгород
133
+ expected: Uzhhorod
134
+ - source: Уляна
135
+ expected: Uliana
136
+ - source: Фастів
137
+ expected: Fastiv
138
+ - source: Філіпчук
139
+ expected: Filipchuk
140
+ - source: Харків
141
+ expected: Kharkiv
142
+ - source: Христина
143
+ expected: Khrystyna
144
+ - source: Біла Церква
145
+ expected: Bila Tserkva
146
+ - source: Стеценко
147
+ expected: Stetsenko
148
+ - source: Чернівці
149
+ expected: Chernivtsi
150
+ - source: Шевченко
151
+ expected: Shevchenko
152
+ - source: Шостка
153
+ expected: Shostka
154
+ - source: Кишеньки
155
+ expected: Kyshenky
156
+ - source: Щербухи
157
+ expected: Shcherbukhy
158
+ - source: Гоща
159
+ expected: Hoshcha
160
+ - source: Гаращенко
161
+ expected: Harashchenko
162
+ - source: Яготин
163
+ expected: Yahotyn
164
+ - source: Ярошенко
165
+ expected: Yaroshenko
166
+ - source: Костянтин
167
+ expected: Kostiantyn
168
+ - source: Знам'янка
169
+ expected: Znamianka
170
+ - source: Феодосія
171
+ expected: Feodosiia
172
+ - source: Згорани
173
+ expected: Zghorany
174
+ - source: Розгон
175
+ expected: Rozghon
176
+
177
+ map:
178
+ inherit: ua-ukr-Cyrl-Latn-1996
179
+
180
+ rules:
181
+ - pattern: \u0027
182
+ result: ""
183
+ - pattern: \u2019
184
+ result: ""
185
+
186
+ characters:
187
+ "\u0429": 'Shch' # Щ
188
+ "\u0449": 'shch' # щ
189
+ "\u044c": '' # Ь ->
190
+ "\u042c": '' # ь ->
191
+ "\u0027": '' # ' ->
192
+ "\u2019": '' # ’ ->
@@ -1,7 +1,7 @@
1
1
  ---
2
- authority_id: ungegn
2
+ authority_id: un
3
3
  id: 2016
4
- language: amh
4
+ language: iso-639-2:amh
5
5
  source_script: Ethi
6
6
  destination_script: Latn
7
7
  name: ROMANIZATION OF AMHARIC -- UNGEGN 2016 System
@@ -9,42 +9,68 @@ url: http://www.eki.ee/wgrs/rom1_am.pdf
9
9
  creation_date: 1967
10
10
  confirmation date: 2016-03
11
11
  description: |
12
- The United Nations recommended romanization system was approved in 1967 (resolution I/17), based on the Amharic to English Transliteration System (2nd revised edition, August 1962) by the Imperial Ethiopian Mapping and Geography Institute. The table has been published in volume II of the conference report (see Ref. in Notes section).
13
-
14
- The UN-approved system is not being used in Ethiopia, instead a simple version without diacritical marks is used. In international cartographic products mostly the BGN/PCGN 1967 system is used which differs from the former mainly by using diacritical marks differently. Often the same spellings are used without any diacritical marks.
15
-
16
- Amharic uses the Ethiopic syllabic script whereby each syllable denotes a combination of a consonant and a vowel. It has 34 basic characters, each character having several different forms, usually called orders, according to the vowel with which the basic character is combined. The system is mainly reversible with the exception of some syllables of the 1st order which are pronounced in the same manner as the syllables of the 4th order, and some consonants.
12
+ The United Nations recommended romanization system was approved in 1967
13
+ (resolution I/17), based on the Amharic to English Transliteration System
14
+ (2nd revised edition, August 1962) by the Imperial Ethiopian Mapping and
15
+ Geography Institute. The table has been published in volume II of the
16
+ conference report (see Ref. in Notes section).
17
+
18
+ The UN-approved system is not being used in Ethiopia, instead a simple
19
+ version without diacritical marks is used. In international cartographic
20
+ products mostly the BGN/PCGN 1967 system is used which differs from the
21
+ former mainly by using diacritical marks differently. Often the same
22
+ spellings are used without any diacritical marks.
23
+
24
+ Amharic uses the Ethiopic syllabic script whereby each syllable denotes a
25
+ combination of a consonant and a vowel. It has 34 basic characters, each
26
+ character having several different forms, usually called orders, according to
27
+ the vowel with which the basic character is combined. The system is mainly
28
+ reversible with the exception of some syllables of the 1st order which are
29
+ pronounced in the same manner as the syllables of the 4th order, and some
30
+ consonants.
17
31
 
18
32
  notes:
19
- - (A) The vowel of the sixth order (i̠) is eliminated in spelling except when the actual pronunciation requires it (e.g. not Me̠ni̠gi̠si̠ti̠ but Me̠ngi̠st).
33
+ - (A) The vowel of the sixth order (i̠) is eliminated in spelling except when
34
+ the actual pronunciation requires it (e.g. not Me̠ni̠gi̠si̠ti̠ but Me̠ngi̠st).
20
35
 
21
36
  - (B) Sounds identical to fourth-order form.
22
37
 
23
- - (C) Row 34 and most combinations with W and Y are not present in the original table. These Amharic syllable characters have been taken from the BGN/PCGN 1967 table, preserving however the indication of vowels as it is done elsewhere in the table.
38
+ - (C) Row 34 and most combinations with W and Y are not present in the
39
+ original table. These Amharic syllable characters have been taken from the
40
+ BGN/PCGN 1967 table, preserving however the indication of vowels as it is
41
+ done elsewhere in the table.
24
42
 
25
43
  - |
26
- Other systems of romanization.
27
- The BGN/PCGN 1967 System is practically very close to the system above, except for the diacritical marks used with vowels:
44
+ Other systems of romanization.
45
+ The BGN/PCGN 1967 System is practically very close to the system above,
46
+ except for the diacritical marks used with vowels:
28
47
 
29
- UN BGN/PCGN
30
- 1st order e̠ e
31
- 1st order a ā (ሀ hā, ሐ hā, ኀ hā, አ ā, ’ā, ዐ ‘ā)
32
- 3rd order i ī
33
- 5th order e ē
34
- 6th order i̠ i
48
+ UN BGN/PCGN
49
+ 1st order e̠ e
50
+ 1st order a ā (ሀ hā, ሐ hā, ኀ hā, አ ā, ’ā, ዐ ‘ā)
51
+ 3rd order i ī
52
+ 5th order e ē
53
+ 6th order i̠ i
35
54
 
36
- Other differences:
55
+ Other differences:
37
56
 
38
- 1. The vowel characters in row 16 should be romanized in the BGN/PCGN System ā, u, ī, a, ē, i, and o initially and ’ā, ’u, ’ī, ’a, ’ē, ’i, and ’o in all other positions.
57
+ 1. The vowel characters in row 16 should be romanized in the BGN/PCGN
58
+ System ā, u, ī, a, ē, i, and o initially and ’ā, ’u, ’ī, ’a, ’ē, ’i, and ’o
59
+ in all other positions.
39
60
 
40
- 2. The characters in row 20 should be romanized in the BGN/PCGN System ‘ā, ‘u, ‘ī, ‘a, ‘ē, ‘ or ‘i, and ‘o.
61
+ 2. The characters in row 20 should be romanized in the BGN/PCGN System ‘ā,
62
+ ‘u, ‘ī, ‘a, ‘ē, ‘ or ‘i, and ‘o.
41
63
 
42
- 3. The syllables gwe̠, hwe̠, kwe̠, k’we̠ of the UN system are treated as variations of the syllables go, ho, ko, k’o in the BGN/PCGN system.
64
+ 3. The syllables gwe̠, hwe̠, kwe̠, k’we̠ of the UN system are treated as
65
+ variations of the syllables go, ho, ko, k’o in the BGN/PCGN system.
43
66
 
44
- 4. For documentation purposes the following consonants may be romanized with diacritical marks in the BGN/PCGN system: ḥ (row 3), š (row 5), ḫ (row 13), ẖ (row 18), t͟s’ (row 31).
67
+ 4. For documentation purposes the following consonants may be romanized
68
+ with diacritical marks in the BGN/PCGN system: ḥ (row 3), š (row 5), ḫ (row
69
+ 13), ẖ (row 18), t͟s’ (row 31).
45
70
 
46
-
47
- - (Ref.) Second United Nations Conference on the Standardization of Geographical Names. London, 10–31 May 1972. Vol. II. Technical papers, pp. 165–168 (Table 2).
71
+ - (Ref.) Second United Nations Conference on the Standardization of
72
+ Geographical Names. London, 10–31 May 1972. Vol. II. Technical papers, pp.
73
+ 165–168 (Table 2).
48
74
 
49
75
  tests:
50
76
  - source: የዜግነት ክብር በ ኢትዮጵያችን ጸንቶ
@@ -573,3 +599,4 @@ map:
573
599
  - 'vi̠'
574
600
  - 'v'
575
601
  '\u126E' : 'vo' # ቮ
602
+
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: ungegn
3
3
  id: 1971
4
- language: ara
4
+ language: iso-639-2:ara
5
5
  source_script: Arab
6
6
  destination_script: Latn
7
7
  name: 1971 "Beirut system"
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: ungegn
3
3
  id: 1972
4
- language: ara
4
+ language: iso-639-2:ara
5
5
  source_script: Arab
6
6
  destination_script: Latn
7
7
  name: ROMANIZATION OF ARABIC -- UNGEGN 1972 System
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: ungegn
3
3
  id: 2017
4
- language: ara
4
+ language: iso-639-2:ara
5
5
  source_script: Arab
6
6
  destination_script: Latn
7
7
  name: ROMANIZATION OF ARABIC -- UNGEGN 2017 System
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: un
3
3
  id: 2007
4
- language: bel
4
+ language: iso-639-2:bel
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: National System of Geographic Names Transmission into Roman Alphabet in Belarus
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: un
3
3
  id: 2016
4
- language: ben
4
+ language: iso-639-2:ben
5
5
  source_script: Beng
6
6
  destination_script: Latn
7
7
  name: Bengali Romanization, Version 4.0
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: un
3
- id: 1987
4
- language: ell
3
+ id: 1987-phonetic
4
+ language: iso-639-2:ell
5
5
  source_script: Grek
6
6
  destination_script: Latn
7
7
  name: UNGEGN Greek v1.0, Fifth United Nations Conference on the Standardization of Geographical Names. Writing Systems and Guides to Pronunciation Romanization > Conversion of the Greek alphabet into Latin characters. Paper submitted by Greece and Cyprus.
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: un
3
- id: 1987
4
- language: ell
3
+ id: 1987-tl
4
+ language: iso-639-2:ell
5
5
  source_script: Grek
6
6
  destination_script: Latn
7
7
  name: UNGEGN Greek v4.0
@@ -1,12 +1,12 @@
1
1
  ---
2
2
  authority_id: un
3
- id: 2016
4
- language: ell
3
+ id: 1987-tl
4
+ language: iso-639-2:ell
5
5
  source_script: Grek
6
6
  destination_script: Latn
7
7
  name: UNGEGN Greek v4.0
8
8
  url: http://www.eki.ee/wgrs/rom1_el.htm
9
- creation_date: 2016
9
+ creation_date: 1987
10
10
  description: |
11
11
  UNGEGN Romanization table for Greek
12
12
 
@@ -0,0 +1,222 @@
1
+ ---
2
+ authority_id: ungegn
3
+ id: 2016
4
+ language: iso-639-2:hin
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Hindi Romanization, 1972
8
+ url: https://www.eki.ee/wgrs/rom1_hi.pdf
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977
13
+ (III/12), based on a report prepared by D. N. Sharma. The tables and their corrections were
14
+ published in volume II of the conference reports.
15
+
16
+ There is no evidence of the use of the system either in India or in international cartographic
17
+ products. It was stated in 1987 that the appropriate resolution had not been implemented in
18
+ India and the Hunterian system was still in use in large-scale mapping
19
+
20
+ Hindi uses the alphasyllabic script Devanāgarī whereby each character represents a syllable
21
+ rather than one sound. Vowels and diphthongs are marked in two ways: as independent
22
+ characters (used syllable-initially) and in an abbreviated form, to denote vowels after
23
+ consonants. The romanization table is unambiguous but the user would have to recognize
24
+ many ligatures not given in the original table (only three are given). The system is mostly
25
+ reversible but there may exist some ambiguities in the romanization of vowels (independent
26
+ vs. abbreviated characters) and consonants
27
+
28
+ notes:
29
+ - |
30
+ It is recommended that the vowel अ (a) should always be romanized except when it ends a
31
+ name. If a name ends with a consonant, the consonant should carry a sub-macron. Such
32
+ cases, however, will be very rare. For example, कानपुर Kānapur (not Kānapura), जगत्
33
+ Jagaṯ.
34
+ - |
35
+ If each letter of a digraph or any two parts of a trigraph has a distinct independent sound
36
+ then it should be indicated by a hyphen, thus d-h.
37
+
38
+ tests:
39
+ - source: "परिपक्क"
40
+ expected: "paraipakka"
41
+ - source: "जगत्"
42
+ expected: "jagat"
43
+ - source: "संख्या"
44
+ expected: "saṁkhyaā"
45
+ - source: "गंभीर मरीजों के मामले में भारत दूसरे नंबर पर"
46
+ expected: "gaṁbhaīra maraījaoṁ kae maāmalae maeṁ bhaārata daūsarae naṁbara para"
47
+ - source: "कोरोना अपडेट्स"
48
+ expected: "kaoraonaā apaḍaeṭsa"
49
+ - source: "सीडीसी चीफ का बयान अहम"
50
+ expected: "saīḍaīsaī chaīpha kaā bayaāna ahama"
51
+ - source: "गूगल प्ले स्टोर पर पेटीएम की वापसी"
52
+ expected: "gaūgala plae sṭaora para paeṭaīema kaī vaāpasaī"
53
+ - source: "भारत में गैंबलिंग की इजाजत नहीं"
54
+ expected: "bhaārata maeṁ gaaiṁbalaiṁga kaī ijaājata nahaīṁ"
55
+ - source: "कोरोना वैक्सीन मुद्दे पर घिरे राष्ट्रपति; जो बाइडेन बोले- मुझे और देश को वैज्ञानिकों पर भरोसा है, डोनाल्ड ट्रम्प पर नहीं"
56
+ expected: "kaoraonaā vaaiksaīna mauddae para ghairae raāṣhṭrapatai; jao baāiḍaena baolae- maujhae aura daesha kao vaaijñaānaikaoṁ para bharaosaā haai, ḍaonaālḍa ṭrampa para nahaīṁ"
57
+ - source: "गूगल की कार्रवाई पर पेटीएम ने कहा था कि ऐप को अस्थायी तौर पर प्ले-स्टोर से हटाया गया है, आपके पैसे सुरक्षित हैं"
58
+ expected: "gaūgala kaī kaārravaāī para paeṭaīema nae kahaā thaā kai aipa kao asthaāyaī taaura para plae-sṭaora sae haṭaāyaā gayaā haai, āpakae paaisae saurakṣhaita haaiṁ"
59
+ map:
60
+
61
+ characters:
62
+
63
+ # I. Independent vowel characters
64
+ 'अ': 'a'
65
+ 'आ': 'ā'
66
+ 'इ': 'i'
67
+ 'ई': 'ī'
68
+ 'उ': 'u'
69
+ 'ऊ': 'ū'
70
+ 'ऋ': 'ṛ'
71
+ 'ॠ': 'ṝ'
72
+ 'ऌ': 'l̤'
73
+ 'ए': 'e'
74
+ 'ऐ': 'ai'
75
+ 'ओ': 'o'
76
+ 'औ': 'au'
77
+
78
+ # II. Abbreviated vowel characters
79
+
80
+ 'ा': "ā" # का
81
+ 'ॉ': "ā̆ " # additional mark: कॉ
82
+ 'ि': "i" # कि i
83
+ 'ी': "ī" # की
84
+ 'ु': "u" # कु
85
+ 'ू': "ū" # कू
86
+ 'ृ': "ṛ" # कृ
87
+ 'े': "e" # के
88
+ 'ै': "ai" # कै
89
+ 'ो': "o" # को
90
+ 'ौ': "au" # कौ
91
+
92
+
93
+ # Consonants (see Note 1)
94
+
95
+ # Gutturals
96
+ 'क': 'ka'
97
+ 'ख': 'kha'
98
+ 'ग': 'ga'
99
+ 'घ': 'gha'
100
+ 'ङ': 'ṅa'
101
+
102
+ # Palatals
103
+ 'च': 'cha'
104
+ 'छ': 'chha'
105
+ 'ज': 'ja'
106
+ 'झ': 'jha'
107
+ 'ञ': 'ña'
108
+
109
+ # Cerebrals
110
+ 'ट': 'ṭa'
111
+ 'ठ': 'ṭha'
112
+ 'ड': 'ḍa'
113
+ 'ढ': 'ḍha'
114
+ 'ण': 'ṇa'
115
+
116
+ # Dentals
117
+ 'त': 'ta'
118
+ 'थ': 'tha'
119
+ 'द': 'da'
120
+ 'ध': 'dha'
121
+ 'न': 'na'
122
+
123
+ # Labials
124
+ 'प': 'pa'
125
+ 'फ': 'pha'
126
+ 'ब': 'ba'
127
+ 'भ': 'bha'
128
+ 'म': 'ma'
129
+
130
+ # Semivowels
131
+ 'य': 'ya'
132
+ 'र': 'ra'
133
+ 'ल': 'la'
134
+ 'व': 'va'
135
+
136
+ # Sibilants
137
+ 'श': 'sha'
138
+ 'ष': 'ṣha'
139
+ 'स': 'sa'
140
+
141
+ # Dotted variants
142
+ 'क़': 'qa'
143
+ 'ख़': 'ḳha'
144
+ 'ग़': 'ġa'
145
+ 'ज़': 'za'
146
+ 'ड़': 'ṙa'
147
+ 'ढ़': 'ṙha'
148
+ 'फ़': 'fa'
149
+
150
+
151
+ # Aspirate
152
+ 'ह': 'ha'
153
+
154
+ # Anusvāra
155
+ 'ं': 'ṁ'
156
+
157
+ # Anunāsika
158
+ 'ँ': 'm̐'
159
+
160
+ # halanta
161
+ '्': ''
162
+
163
+ # bisharga
164
+ 'ः' : 'ḥ'
165
+
166
+ #V. Ligatures(To cover all Ligatures at unicode)
167
+ # Implemnting Pronunciation without a vowel: क् k.
168
+
169
+ # Gutturals
170
+ 'क्': 'k'
171
+ 'ख्': 'kh'
172
+ 'ग्': 'g'
173
+ 'घ्': 'gh'
174
+ 'ङ्': 'ṅ'
175
+
176
+ # Palatals
177
+ 'च्': 'ch'
178
+ 'छ्': 'chh'
179
+ 'ज्': 'j'
180
+ 'झ्': 'jh'
181
+ 'ञ्': 'ñ'
182
+
183
+ # Cerebrals
184
+ 'ट्': 'ṭ'
185
+ 'ठ्': 'ṭh'
186
+ 'ड्': 'ḍ'
187
+ 'ढ्': 'ḍh'
188
+ 'ण्': 'ṇ'
189
+
190
+ # Dentals
191
+ 'त्': 't'
192
+ 'थ्': 'th'
193
+ 'द्': 'd'
194
+ 'ध्': 'dh'
195
+ 'न्': 'n'
196
+
197
+ # Labials
198
+ 'प्': 'p'
199
+ 'फ्': 'ph'
200
+ 'ब्': 'b'
201
+ 'भ्': 'bh'
202
+ 'म्': 'm'
203
+
204
+ # Semivowels
205
+ 'य्': 'y'
206
+ 'र्': 'r'
207
+ 'ल्': 'l'
208
+ 'व्': 'v'
209
+
210
+ # Sibilants
211
+ 'श्': 'sh'
212
+ 'ष्': 'ṣh'
213
+ 'स्': 's'
214
+
215
+ # Dotted variants
216
+ 'क़्': 'q'
217
+ 'ख़्': 'ḳh'
218
+ 'ग़्': 'ġ'
219
+ 'ज़्': 'z'
220
+ 'ड़्': 'ṙ'
221
+ 'ढ़्': 'ṙh'
222
+ 'फ़्': 'f'