interscript 0.1.2 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/lib/g2pwrapper.py +34 -0
  4. data/lib/interscript.rb +142 -20
  5. data/lib/interscript/command.rb +28 -0
  6. data/lib/interscript/fs.rb +69 -0
  7. data/lib/interscript/mapping.rb +142 -0
  8. data/lib/interscript/opal.rb +57 -0
  9. data/lib/interscript/opal/entrypoint.rb +12 -0
  10. data/lib/interscript/opal/map_translate.rb +7 -0
  11. data/lib/interscript/opal/maps.js.erb +10 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/lib/model-7 +0 -0
  14. data/lib/tha-pt-b-7 +0 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38916 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.yaml +165 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  21. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  22. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +129 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +98 -0
  25. data/maps/alalc-ell-Grek-Latn-1997.yaml +628 -0
  26. data/maps/alalc-ell-Grek-Latn-2010.yaml +626 -0
  27. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  28. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  29. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  30. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  31. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  32. data/maps/alalc-kat-Geor-Latn-1997.yaml +150 -0
  33. data/maps/alalc-kor-Hang-Latn-1997.yaml +98 -0
  34. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  35. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  36. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  37. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  38. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  39. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  40. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  41. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  42. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  43. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  44. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  45. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  46. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  47. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  48. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  49. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  50. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  51. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +118 -0
  52. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  53. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  54. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  55. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +145 -0
  56. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  57. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  58. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +58 -33
  59. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +55 -35
  60. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  61. data/maps/bgn-kor-Hang-Latn-1943.yaml +35 -0
  62. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  63. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  64. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  65. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  66. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +596 -0
  67. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +2 -3
  68. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  69. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  70. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +289 -0
  71. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +119 -0
  72. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +15 -65
  73. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  74. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +705 -0
  75. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +23 -0
  76. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  77. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  78. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +131 -0
  79. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  80. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  81. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  82. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  83. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +163 -0
  84. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  85. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  86. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  87. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  88. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  89. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  90. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  91. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +150 -65
  92. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +170 -0
  93. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  94. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  95. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +80 -4
  96. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +119 -0
  97. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  98. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  99. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  100. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  101. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  102. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  103. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  104. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  105. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  106. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  107. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  108. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  109. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  110. data/maps/by-bel-Cyrl-Latn-1998.yaml +172 -0
  111. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  112. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  113. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  114. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  115. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  116. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  117. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  118. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  119. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  120. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  121. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  122. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  123. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  124. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  125. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  126. data/maps/ggg-kat-Geor-Latn-2002.yaml +92 -0
  127. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  128. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  129. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +190 -0
  130. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  131. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  132. data/maps/icao-bel-Cyrl-Latn-9303.yaml +109 -98
  133. data/maps/icao-bul-Cyrl-Latn-9303.yaml +2 -7
  134. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +6 -8
  135. data/maps/icao-heb-Hebr-Latn-9303.yaml +119 -125
  136. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +2 -3
  137. data/maps/icao-rus-Cyrl-Latn-9303.yaml +2 -4
  138. data/maps/icao-srp-Cyrl-Latn-9303.yaml +2 -3
  139. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +2 -4
  140. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  141. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  142. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  143. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +613 -0
  144. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +44 -0
  145. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  146. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  147. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  148. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +66 -0
  149. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  150. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  151. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  152. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  153. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  154. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  155. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  156. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  157. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  158. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  159. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  160. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  161. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  162. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  163. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  164. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +4 -6
  165. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  166. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  167. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +909 -0
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  172. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  173. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  174. data/maps/moct-kor-Hang-Latn-2000.yaml +807 -0
  175. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  176. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  177. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  178. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  179. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  180. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  181. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  182. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  183. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  184. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  185. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  186. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  187. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  188. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  189. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  190. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  191. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  192. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  193. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  194. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  195. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  196. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  197. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  198. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  199. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  200. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  201. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  202. data/maps/royin-tha-Thai-Latn-1968.yaml +183 -0
  203. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  204. data/maps/royin-tha-Thai-Latn-1999.yaml +80 -0
  205. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +11 -8
  206. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  207. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  208. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  209. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  210. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +197 -0
  211. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  212. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  213. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  214. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  215. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  216. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  217. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  218. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  219. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +780 -0
  220. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  221. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  222. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  223. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  224. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  225. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  226. data/maps/un-nep-Deva-Latn-1972.yaml +350 -0
  227. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  228. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  229. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  230. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  231. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  232. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  233. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  234. data/maps/var-kor-Hang-Hang-jamo.yaml +11193 -0
  235. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  236. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  237. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  238. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  239. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  240. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  241. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  242. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  243. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  244. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  245. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  246. data/maps/var-zho-Hani-Latn-wd-1979.yaml +38912 -0
  247. data/spec/interscript/filenames_spec.rb +384 -0
  248. data/spec/interscript/mapping_spec.rb +42 -0
  249. data/spec/interscript_spec.rb +23 -5
  250. data/spec/spec_helper.rb +3 -1
  251. metadata +364 -34
  252. data/bin/interscript +0 -20
  253. data/bin/rspec +0 -29
  254. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  255. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  256. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  257. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  258. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  259. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  260. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
  261. data/maps/un-mon-Mong-Latn-2013.yaml +0 -80
@@ -0,0 +1,119 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1952
4
+ language: iso-639-2:bul
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ROMANIZATION SYSTEM FOR BULGARIAN BGN/PCGN of 1952
8
+ alias:
9
+ ogc11122:
10
+ code: bul_Cyrl2Latn_BGN_1952
11
+ description: Bulgarian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) Bulgarian 1952 System
12
+ url: https://libraries.ucsd.edu/bib/fed/USBGN_romanization.pdf
13
+ creation_date: 1945
14
+ confirmation_date: 1952
15
+ description: |
16
+ This system was adopted by the BGN in 1949 and by the PCGN in 1952. It
17
+ reflects the much simplified Bulgarian orthography as officially revised in
18
+ February 1945. The Bulgarian alphabet contains all of the characters present
19
+ in the Russian alphabet with the exception of ё, ы, and э. Notes 1, 2, and 3
20
+ are applicable to sources predating the orthographic reform of 1945.
21
+
22
+ notes:
23
+ - In modern Bulgarian orthography, the character ъ, does not occcur in word-final position. It should be omitted in romanization when found in word-final position on older sources
24
+ - The obsolete character ѫ, which was replaced by ъ in 1945, should be romanized ŭ
25
+ - The obsolete character ѣ, replaced in 1945 by е or я according to local pronunciation, should be romanized as e or ya, accordingly, if the pronunciation is known; otherwise, as ye
26
+ - The character sequence тс may be romanized t·s in order to differentiate that romanization from the regularly occurring digraph ts, which represents the character ц
27
+
28
+ tests:
29
+ - source: София
30
+ expected: Sofiya
31
+ - source: София-Град
32
+ expected: Sofiya-Grad
33
+ - source: България
34
+ expected: "Bu\u0306lgariya"
35
+
36
+ map:
37
+ rules:
38
+ # note[1]
39
+ - pattern: (?<=)\u042a(?=\b)
40
+ result: ""
41
+ - pattern: (?<=)\u044a(?=\b)
42
+ result: ""
43
+ # note[4]
44
+ - pattern: "\u0422\u0421"
45
+ result: T·S
46
+ - pattern: "\u0422\u0441"
47
+ result: T·s
48
+ - pattern: "\u0442\u0441"
49
+ result: t·s
50
+
51
+ characters:
52
+ '\u0410': 'A'
53
+ '\u0411': 'B'
54
+ '\u0412': 'V'
55
+ '\u0413': 'G'
56
+ '\u0414': 'D'
57
+ '\u0415': 'E'
58
+ '\u0416': 'Zh'
59
+ '\u0417': 'Z'
60
+ '\u0418': 'I'
61
+ '\u0419': 'Y'
62
+ '\u041a': 'K'
63
+ '\u041b': 'L'
64
+ '\u041c': 'M'
65
+ '\u041d': 'N'
66
+ '\u041e': 'O'
67
+ '\u041f': 'P'
68
+ '\u0420': 'R'
69
+ '\u0421': 'S'
70
+ '\u0422': 'T'
71
+ '\u0423': 'U'
72
+ '\u0424': 'F'
73
+ '\u0425': 'Kh'
74
+ '\u0426': 'Ts'
75
+ '\u0427': 'Ch'
76
+ '\u0428': 'Sh'
77
+ '\u0429': 'St'
78
+ '\u042a': "U\u0306"
79
+ '\u042c': "\\'"
80
+ '\u042e': 'Yu'
81
+ '\u042f': 'Ya'
82
+ '\u0430': 'a'
83
+ '\u0431': 'b'
84
+ '\u0432': 'v'
85
+ '\u0433': 'g'
86
+ '\u0434': 'd'
87
+ '\u0435': 'e'
88
+ '\u0436': 'zh'
89
+ '\u0437': 'z'
90
+ '\u0438': 'i'
91
+ '\u0439': 'y'
92
+ '\u043a': 'k'
93
+ '\u043b': 'l'
94
+ '\u043c': 'm'
95
+ '\u043d': 'n'
96
+ '\u043e': 'o'
97
+ '\u043f': 'p'
98
+ '\u0440': 'r'
99
+ '\u0441': 's'
100
+ '\u0442': 't'
101
+ '\u0443': 'u'
102
+ '\u0444': 'f'
103
+ '\u0445': 'kh'
104
+ '\u0446': 'ts'
105
+ '\u0447': 'ch'
106
+ '\u0448': 'sh'
107
+ '\u0449': 'sht'
108
+ '\u044a': "u\u0306"
109
+ '\u044c': "\\'"
110
+ '\u044e': 'yu'
111
+ '\u044f': 'ya'
112
+
113
+ # note 2
114
+ '\u046A': "U\u0306" # Ѫ
115
+ '\u046B': "u\u0306" # ѫ
116
+
117
+ # note[3]
118
+ '\u0462': "Ye" # Ѣ
119
+ '\u0463': "ye" # ѣ
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: bgnpcgn
3
3
  id: 2013
4
- language: bul
4
+ language: iso-639-2:bul
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: BGN/PCGN 2013 Agreement
8
+ alias:
9
+ ogc11122:
10
+ code: bul_Cyrl2Latn_BGN_2013
11
+ description: Bulgarian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names(PCGN) Bulgarian 2013 System
8
12
  url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811509/ROMANIZATION_OF_BULGARIAN.pdf
9
13
  creation_date: 2013
10
14
  confirmation date: 2019-06
@@ -15,9 +19,9 @@ description: |
15
19
 
16
20
  notes:
17
21
  - When in final position, “ия” is romanized as “ia” (e.g., София = Sofia; София-Град= Sofia-Grad).
18
- - An exception to the romanization system is allowed for the name of the state. Thus, България is roman-
19
- ized as Bulgaria.
22
+ - An exception to the romanization system is allowed for the name of the state. Thus, България is romanized as Bulgaria.
20
23
  - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase Roman letters as appropriate should be used.
24
+
21
25
  tests:
22
26
  - source: София
23
27
  expected: Sofia
@@ -27,66 +31,12 @@ tests:
27
31
  expected: Bulgaria
28
32
 
29
33
  map:
30
- characters:
31
- 'България': 'Bulgaria'
32
- '\u0410': 'A'
33
- '\u0411': 'B'
34
- '\u0412': 'V'
35
- '\u0413': 'G'
36
- '\u0414': 'D'
37
- '\u0415': 'E'
38
- '\u0416': 'ZH'
39
- '\u0417': 'Z'
40
- '\u0418': 'I'
41
- '\u0419': 'Y'
42
- '\u041a': 'K'
43
- '\u041b': 'L'
44
- '\u041c': 'M'
45
- '\u041d': 'N'
46
- '\u041e': 'O'
47
- '\u041f': 'P'
48
- '\u0420': 'R'
49
- '\u0421': 'S'
50
- '\u0422': 'T'
51
- '\u0423': 'U'
52
- '\u0424': 'F'
53
- '\u0425': 'KH'
54
- '\u0426': 'TS'
55
- '\u0427': 'CH'
56
- '\u0428': 'SH'
57
- '\u0429': 'SHT'
58
- '\u042a': '\u016c'
59
- '\u042c': "\'"
60
- '\u042e': 'YU'
61
- '\u042f': 'YA'
62
- '\u0430': 'a'
63
- '\u0431': 'b'
64
- '\u0432': 'v'
65
- '\u0433': 'g'
66
- '\u0434': 'd'
67
- '\u0435': 'e'
68
- '\u0436': 'zh'
69
- '\u0437': 'z'
70
- '\u0438': 'i'
71
- '\u0439': 'y'
72
- '\u043a': 'k'
73
- '\u043b': 'l'
74
- '\u043c': 'm'
75
- '\u043d': 'n'
76
- '\u043e': 'o'
77
- '\u043f': 'p'
78
- '\u0440': 'r'
79
- '\u0441': 's'
80
- '\u0442': 't'
81
- '\u0443': 'u'
82
- '\u0444': 'f'
83
- '\u0445': 'kh'
84
- '\u0446': 'ts'
85
- '\u0447': 'ch'
86
- '\u0448': 'sh'
87
- '\u0449': 'sht'
88
- '\u044a': '\u016d'
89
- '\u044c': "\'"
90
- '\u044e': 'yu'
91
- '\u044f': 'ya'
34
+ inherit: bgnpcgn-bul-Cyrl-Latn-1952
92
35
 
36
+ rules:
37
+ - pattern: България
38
+ result: Bulgaria
39
+ - pattern: (?<=\u0418)\u042f(?=\b) # final position, “ИЯ” is romanized as “IA”
40
+ result: A
41
+ - pattern: (?<=\u0438)\u044f(?=\b) # final position, “ия” is romanized as “ia”
42
+ result: a
@@ -0,0 +1,184 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2008
4
+ language: iso-639-2:che
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/835782/TABLE_OF_CORRESPONDENCES_FOR_CHECHEN.pdf
8
+ creation_date: 2008
9
+ confirmation_date: 2019-07
10
+ description: |
11
+ Chechen is an official language within Chechnya, one of the republics of the Russian Federation.
12
+ It will normally be encountered in Cyrillic script, in which case it should be romanized by means of
13
+ the Cyrillic-Roman table of correspondences given below.
14
+
15
+ notes:
16
+ - The letter ə is used to represent short a; a is used for long a.
17
+ - Ye is used word- and syllable-initially. This Cyrillic letter may be encountered as the ligature ie in Chechen-Roman sources.
18
+ - This character occurs only in loan words.
19
+ - q in the combination ккх (qq) and q̇in the combination ккъ (q̇q̇)
20
+ - ŋ when preceding vowel is nasalized.
21
+ - This Cyrillic letter may be encountered as the ligature ꭣ [U+AB63] or the diphthong oa in Chechen Roman sources.
22
+ - Unless in the combinations гӀ, кӀ, пӀ, тӀ, хӀ, цӀ and чӀ.
23
+ - |
24
+ An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
25
+ unmodified letters of the basic Roman script is:
26
+
27
+ All apostrophes appearing in romanization are U+2019
28
+
29
+ Ə (U+018F) ə (U+0259)
30
+ Ä (U+00C4) ä (U+00E4)
31
+ Ġ (U+0047+0307) ġ (U+0067+0307)
32
+ Z̵ (U+005A+0335) z̵ (U+007A+0335)
33
+ Q̇ (U+0051+0307) q̇ (U+0071+0307)
34
+ Ŋ (U+014A) ŋ (U+014B)
35
+ Ö (U+00D6) ö (U+00F6)
36
+ Ü (U+00DC) ü (U+00FC)
37
+ Ẋ (U+0058+0307) ẋ (U+0078+0307)
38
+ Ċ (U+0043+0307) ċ (U0063+0307)
39
+ Ç (U+00C7) ç (U+00E7)
40
+ Ç̇ (U+00C7+0307) ç̇ (U+00E7+0307)
41
+ Ş (U+015E) ş (U+015F)
42
+
43
+ - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used
44
+
45
+ tests:
46
+ - source: кӏант
47
+ expected: khant
48
+ - source: зуда
49
+ expected: zuda
50
+ - source: пхьагал
51
+ expected: pẋagal
52
+ - source: наж
53
+ expected: naz̵
54
+ - source: мангал
55
+ expected: mangal
56
+ - source: Ӏаж
57
+ expected: Jaz̵
58
+ - source: Нохчийн Википеди
59
+ expected: Noxçiyn Vikipedi
60
+ - source: сагӏадаккхар
61
+ expected: saġadaqqar
62
+ - source: йеза
63
+ expected: yeza
64
+ - source: еара
65
+ expected: yeara
66
+ - source: елха
67
+ expected: yelxa
68
+
69
+ map:
70
+ rules:
71
+ # note[2]
72
+ - pattern: (?<!\b\u2019)\b\u0415 # Е in initial position -> Ye
73
+ result: Ye
74
+ - pattern: (?<!\b\u2019)\b\u0435 # е in initial position -> ye
75
+ result: ye
76
+ # note[4]
77
+ - pattern: \u043A\u043A\u0445
78
+ result: qq
79
+ - pattern: \u043A\u043A\u042A
80
+ result: q̇q̇
81
+ - pattern: \u041A\u041A\u0445
82
+ result: QQ
83
+ - pattern: \u041A\u041A\u042A
84
+ result: Q̇Q̇
85
+
86
+ characters:
87
+ '\u0410' : ['A', 'Ə'] # А note[1]
88
+ '\u0410\u044C' : "A\u0308" # Аь -> Ä
89
+ '\u0411' : 'B' # Б
90
+ '\u0412' : 'V' # В
91
+ '\u0413' : 'G' # Г
92
+ '\u0413\u04C0' : "G\u0307" # ГӀ -> Ġ
93
+ '\u0414' : 'D' # Д
94
+ '\u0415' : 'E' # Е note[2]
95
+ '\u0401' : 'Yo' # Ё note[3]
96
+ '\u0416' : "Z\u0335" # Ж -> Ƶ
97
+ '\u0417' : 'Z' # З
98
+ '\u0418' : 'I' # И
99
+ '\u042B' : 'Y' # Й
100
+ '\u041A' : 'K' # К note[4]
101
+ '\u041A\u0445' : 'Q' # Кх note[4]
102
+ '\u041A\u044A' : "Q\u0307" # Къ -> Q̇ note[4]
103
+ '\u041A\u04C0' : 'Kh' # КӀ note[4]
104
+ '\u041B' : 'L' # Л
105
+ '\u041C' : 'M' # М
106
+ '\u041D' : ['N', 'Ŋ'] # Н note[5]
107
+ '\u041E' : 'O' # О note[6]
108
+ '\u041E\u044C' : "O\u0308" # Оь -> Ö
109
+ '\u041F' : 'P' # П
110
+ '\u041F\u04C0' : 'Ph' # ПӀ
111
+ '\u0420' : 'R' # Р
112
+ '\u0421' : 'S' # С
113
+ '\u0422' : 'T' # Т
114
+ '\u0422\u04C0' : 'Th' # TӀ
115
+ '\u0423' : 'U' # У
116
+ '\u0423\u044C' : "U\u0308" # Уь -> Ü
117
+ '\u0424' : 'F' # Ф
118
+ '\u0425' : 'X' # Х
119
+ '\u0425\u044C' : "X\u0307" # Хь -> Ẋ
120
+ '\u0425\u04C0' : "H" # ХӀ
121
+ '\u04B8' : 'C' # Ц
122
+ '\u04B8\u04C0' : "C\u0307" # ЦӀ -> Ċ
123
+ '\u0427' : "C\u0327" # Ч -> Ç
124
+ '\u0427\u04C0' : "\u00C7\u0307" # ЧӀ -> Ç̇
125
+ '\u0428' : "S\u0327" # Ш -> Ş
126
+ '\u0429' : "S\u0327C\u0327" # Щ -> ŞÇ note[3]
127
+ '\u042A' : "’" # Ъ note[3]
128
+ '\u042B' : "Y" # Ы
129
+ '\u042C' : "" # Ь note[3]
130
+ '\u042D' : "E" # Э
131
+ '\u042E' : "Yu" # Ю
132
+ '\u042E\u044C' : "Yu\u0308" # Юь -> Yü
133
+ '\u042F' : "Ya" # Я
134
+ '\u042F\u044C' : "Ya\u0308" # Яь -> Yä
135
+ '\u04C0' : "J" # Ӏ note[7]
136
+
137
+ '\u0430' : ['a', 'ə'] # а note[1]
138
+ '\u0430\u044C' : "a\u0308" # аь -> ä
139
+ '\u0431' : 'b' # б
140
+ '\u0432' : 'v' # в
141
+ '\u0433' : 'g' # г
142
+ '\u0433\u04CF' : "g\u0307" # гӏ -> ġ
143
+ '\u0434' : 'd' # д
144
+ '\u0435' : 'e' # е note[2]
145
+ '\u0451' : 'yo' # ё note[3]
146
+ '\u0436' : "z\u0335" # ж -> ƶ
147
+ '\u0437' : 'z' # з
148
+ '\u0438' : 'i' # и
149
+ '\u0439' : 'y' # й
150
+ '\u043A' : 'k' # к note[4]
151
+ '\u043A\u0445' : 'q' # кх note[4]
152
+ '\u043A\u044A' : "q\u0307" # къ -> q̇ note[4]
153
+ '\u043A\u04CF' : 'kh' # кӏ note[4]
154
+ '\u043B' : 'l' # л
155
+ '\u043C' : 'm' # м
156
+ '\u043D' : ['n', 'ŋ'] # н note[5]
157
+ '\u043E' : 'o' # о note[6]
158
+ '\u043E\u044C' : "o\u0308" # оь -> ö
159
+ '\u043F' : 'p' # п
160
+ '\u0440' : 'r' # р
161
+ '\u0441' : 's' # с
162
+ '\u0442' : 't' # т
163
+ '\u0442\u04CF' : 'th' # тӏ
164
+ '\u0443' : 'u' # у
165
+ '\u0443\u044C' : "u\u0308" # Уь -> ü
166
+ '\u0444' : 'f' # ф
167
+ '\u0445' : 'x' # х
168
+ '\u0445\u044C' : "x\u0307" # хь -> ẋ
169
+ '\u0445\u04CF' : "h" # хӏ
170
+ '\u04B9' : 'c' # ц
171
+ '\u04B9\u04CF' : "с\u0307" # цӏ -> ċ
172
+ '\u0447' : "c\u0327" # ч -> ç
173
+ '\u0447\u04CF' : "c\u00E7\u0307" # чӏ -> ç̇
174
+ '\u0448' : "s\u0327" # ш -> ş
175
+ '\u0449' : "s\u0327c\u0327" # щ -> şç note[3]
176
+ '\u044A' : "’" # ъ note[3]
177
+ '\u044B' : "y" # ы
178
+ '\u044C' : '' # ь note[3]
179
+ '\u044D' : "e" # э
180
+ '\u044E' : "yu" # ю
181
+ '\u044E' : "yu\u0308" # юь -> yü
182
+ '\u044F' : "ya" # я
183
+ '\u044F' : "ya\u0308" # яь -> yä
184
+ '\u04CF' : "j" # ӏ note[7]
@@ -0,0 +1,705 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1962
4
+ language: iso-639-2:ell
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: BGN/PCGN 1962 System
8
+ alias:
9
+ ogc11122:
10
+ code: ell_Grek2Latn_BGN_1962
11
+ description: US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) Greek 1962 System (out of date)
12
+ url: https://github.com/riboseinc/interscript/files/4225556/BGN_Romanization_Guide_1962_greek.pdf
13
+ creation_date: 1962
14
+ description: |
15
+ BGN/PCGN Romanization table for Greek
16
+
17
+ note:
18
+ - Original specification is for polytonic Greek; this has been adapted for monotonic Greek
19
+ - The treatment of έι in https://transliteration.eki.ee/pdf/Greek.pdf is incorrect; although
20
+ not explicitly discussed in the original specification, έι is phonetically equivalent to
21
+ έϊ, and is to be transliterated as eï, not í (like εί). The same applies to other diphthongs
22
+ accented on the first syllable.
23
+
24
+ tests:
25
+
26
+ - source: |
27
+ Ένα πράμα μόνον με παρακίνησε κι εμένα να γράψω ότι τούτην την πατρίδα την έχομεν όλοι μαζί, και σοφοί κι αμαθείς και πλούσιοι και φτωχοί και πολιτικοί και στρατιωτικοί και οι πλέον μικρότεροι άνθρωποι· όσοι αγωνιστήκαμεν, αναλόγως ο καθείς, έχομεν να ζήσομεν εδώ. Το λοιπόν δουλέψαμεν όλοι μαζί, να την φυλάμεν κι όλοι μαζί και να μην λέγει ούτε ο δυνατός «εγώ» ούτε ο αδύνατος. Ξέρετε πότε να λέγει ο καθείς «εγώ»; Όταν αγωνιστεί μόνος του και φκιάσει ή χαλάσει, να λέγει «εγώ»· όταν όμως αγωνίζονται πολλοί και φκιάνουν, τότε να λένε «εμείς». Είμαστε εις το «εμείς» κι όχι εις το «εγώ». Και εις το εξής να μάθομεν γνώση, αν θέλομεν να φκιάσομεν χωριόν, να ζήσομεν όλοι μαζί.
28
+
29
+ Γιάννης Μακρυγιάννης.
30
+
31
+ expected: |
32
+ Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrídha tin ékhomen óloi mazí, kai sofoí ki amathís kai ploúsioi kai ftokhoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o kathís, ékhomen na zísomen edhó. To loipón dhoulépsamen óloi mazí, na tin filámen ki óloi mazí kai na min léyi oúte o dhinatós «egó» oúte o adhínatos. Xérete póte na léyi o kathís «egó»? Ótan agonistí mónos tou kai fkiási í khalási, na léyi «egó»; ótan ómos agonízondai polloí kai fkiánoun, tóte na léne «emís». Ímaste is to «emís» ki ókhi is to «egó». Kai is to exís na máthomen gnósi, an thélomen na fkiásomen khorión, na zísomen óloi mazí.
33
+
34
+ Yiánnis Makriyiánnis.
35
+
36
+
37
+ - source: ΑΘΗΝΑ
38
+ expected: ATHINA
39
+ - source: μπαμπάκι
40
+ expected: bambáki
41
+ - source: νταντά
42
+ expected: dandá
43
+ - source: γκέγκε
44
+ expected: génge
45
+ - source: Γκαμπόν
46
+ expected: Gambón
47
+ - source: Μάγχη
48
+ expected: Mánkhi
49
+ - source: κογξ
50
+ expected: konx
51
+ - source: υιός
52
+ expected: iós
53
+ - source: Υιός
54
+ expected: Iós
55
+ - source: νεράντζι
56
+ expected: nerántzi
57
+ - source: Γοίθιος
58
+ expected: Goíthios
59
+ - source: μπέικον
60
+ expected: béïkon
61
+ - source: μπέϊκον
62
+ expected: béïkon
63
+ - source: βόλεϊ
64
+ expected: vóleï
65
+ - source: αθεΐα
66
+ expected: atheḯa
67
+ - source: Εϊγιαφιάτλαγιοκουτλ
68
+ expected: Eïyiafiátlayiokoutl
69
+ - source: Εΐτζι
70
+ expected: Eḯtzi
71
+ - source: Μυρτώο
72
+ expected: Mirtóö
73
+ - source: αέρας
74
+ expected: aë́ras
75
+ - source: γαυ γαυ
76
+ expected: gav gav
77
+ - source: Ταΰγετος
78
+ expected: Taḯyetos
79
+ - source: σπρέυ
80
+ expected: spréi
81
+
82
+ - source: Αθήνα
83
+ expected: Athína
84
+ - source: Άγιον Όρος
85
+ expected: Áyion Óros
86
+ - source: Άγραφα
87
+ expected: Ágrafa
88
+ - source: Αγρίνιο
89
+ expected: Agrínio
90
+ - source: Αίγινα
91
+ expected: Aíyina
92
+ - source: Αίγιο
93
+ expected: Aíyio
94
+ - source: Αλεξανδρούπολη
95
+ expected: Alexandroúpoli
96
+ - source: Αλεποχώρι
97
+ expected: Alepokhóri
98
+ - source: Αμοργός
99
+ expected: Amorgós
100
+ - source: Άμφισσα
101
+ expected: Ámfissa
102
+ - source: Αράχωβα
103
+ expected: Arákhova
104
+ - source: Άργος
105
+ expected: Árgos
106
+ - source: Αρκαδία
107
+ expected: Arkadhía
108
+ - source: Άρτα
109
+ expected: Árta
110
+ - source: Βελούχι
111
+ expected: Veloúkhi
112
+ - source: Βέροια
113
+ expected: Véroia
114
+ - source: Βοιωτία
115
+ expected: Voiotía
116
+ - source: Βόλος
117
+ expected: Vólos
118
+ - source: Βόνιτσα
119
+ expected: Vónitsa
120
+ - source: Γαλαξίδι
121
+ expected: Galaxídhi
122
+ - source: Γαλάτσι
123
+ expected: Galátsi
124
+ - source: Γιαννιτσά
125
+ expected: Yiannitsá
126
+ - source: Γλυφάδα
127
+ expected: Glifádha
128
+ - source: Γρανίτσα
129
+ expected: Granítsa
130
+ - source: Γρεβενά
131
+ expected: Grevená
132
+ - source: Γύθειο
133
+ expected: Yíthio
134
+ - source: Διόνυσος
135
+ expected: Dhiónisos
136
+ - source: Δίστομο
137
+ expected: Dhístomo
138
+ - source: Δολιανά
139
+ expected: Dholianá
140
+ - source: Δράμα
141
+ expected: Dhráma
142
+ - source: Δωδεκάνησα
143
+ expected: Dhodhekánisa
144
+ - source: Έδεσσα
145
+ expected: Édhessa
146
+ - source: Ελευσίνα
147
+ expected: Elevsína
148
+ - source: Επίδαυρος
149
+ expected: Epídhavros
150
+ - source: Επτάνησα
151
+ expected: Eptánisa
152
+ - source: Ερμούπολη
153
+ expected: Ermoúpoli
154
+ - source: Εύβοια
155
+ expected: Évvoia
156
+ - source: Ζάκυνθος
157
+ expected: Zákinthos
158
+ - source: Ήπειρος
159
+ expected: Ípiros
160
+ - source: Ηράκλειο
161
+ expected: Iráklio
162
+ - source: Θάσος
163
+ expected: Thásos
164
+ - source: Θεσσαλονίκη
165
+ expected: Thessaloníki
166
+ - source: Θεσσαλία
167
+ expected: Thessalía
168
+ - source: Θεσπρωτία
169
+ expected: Thesprotía
170
+ - source: Θήβα
171
+ expected: Thíva
172
+ - source: Θράκη
173
+ expected: Thráki
174
+ - source: Ιθάκη
175
+ expected: Itháki
176
+ - source: Ίος
177
+ expected: Íos
178
+ - source: Ιωάννινα
179
+ expected: Ioánnina
180
+ - source: Καβάλα
181
+ expected: Kavála
182
+ - source: Καλάβρυτα
183
+ expected: Kalávrita
184
+ - source: Καλαμάτα
185
+ expected: Kalamáta
186
+ - source: Καλαμπάκα
187
+ expected: Kalambáka
188
+ - source: Καλύβια
189
+ expected: Kalívia
190
+ - source: Κάλυμνος
191
+ expected: Kálimnos
192
+ - source: Καρδίτσα
193
+ expected: Kardhítsa
194
+ - source: Καρπενήσι
195
+ expected: Karpenísi
196
+ - source: Κάρυστος
197
+ expected: Káristos
198
+ - source: Καστελλόριζο
199
+ expected: Kastellórizo
200
+ - source: Καστοριά
201
+ expected: Kastoriá
202
+ - source: Κατερίνη
203
+ expected: Kateríni
204
+ - source: Κάτω Αχαΐα
205
+ expected: Káto Akhaḯa
206
+ - source: Κερατέα
207
+ expected: Keratéa
208
+ - source: Κέρκυρα
209
+ expected: Kérkira
210
+ - source: Κεφαλλονιά
211
+ expected: Kefalloniá
212
+ - source: Κηφισιά
213
+ expected: Kifisiá
214
+ - source: Κιλκίς
215
+ expected: Kilkís
216
+ - source: Κοζάνη
217
+ expected: Kozáni
218
+ - source: Κολωνός
219
+ expected: Kolonós
220
+ - source: Κομοτηνή
221
+ expected: Komotiní
222
+ - source: Κόρινθος
223
+ expected: Kórinthos
224
+ - source: Κορώνη
225
+ expected: Koróni
226
+ - source: Κρανίδι
227
+ expected: Kranídhi
228
+ - source: Κρέστενα
229
+ expected: Kréstena
230
+ - source: Κρήτη
231
+ expected: Kríti
232
+ - source: Κύθηρα
233
+ expected: Kíthira
234
+ - source: Κυκλάδες
235
+ expected: Kikládhes
236
+ - source: Κύμη
237
+ expected: Kími
238
+ - source: Κυψέλη
239
+ expected: Kipséli
240
+ - source: Κως
241
+ expected: Kos
242
+ - source: Λαγκαδάς
243
+ expected: Langadhás
244
+ - source: Λαμία
245
+ expected: Lamía
246
+ - source: Λάρισα
247
+ expected: Lárisa
248
+ - source: Λαύριο
249
+ expected: Lávrio
250
+ - source: Λέρος
251
+ expected: Léros
252
+ - source: Λέσβος
253
+ expected: Lésvos
254
+ - source: Λευκάδα
255
+ expected: Levkádha
256
+ - source: Λήμνος
257
+ expected: Límnos
258
+ - source: Λιβαδειά
259
+ expected: Livadhiá
260
+ - source: Μακεδονία
261
+ expected: Makedhonía
262
+ - source: Μάνη
263
+ expected: Máni
264
+ - source: Μαραθώνας
265
+ expected: Marathónas
266
+ - source: Μαρκόπουλο
267
+ expected: Markópoulo
268
+ - source: Μαρούσι
269
+ expected: Maroúsi
270
+ - source: Μέγαρα
271
+ expected: Mégara
272
+ - source: Μεσολόγγι
273
+ expected: Mesolóngi
274
+ - source: Μεταξουργείο
275
+ expected: Metaxouryío
276
+ - source: Μέτσοβο
277
+ expected: Métsovo
278
+ - source: Μήλος
279
+ expected: Mílos
280
+ - source: Μύκονος
281
+ expected: Míkonos
282
+ - source: Μυστράς
283
+ expected: Mistrás
284
+ - source: Μυτιλήνη
285
+ expected: Mitilíni
286
+ - source: Νάξος
287
+ expected: Náxos
288
+ - source: Νάουσα
289
+ expected: Náousa
290
+ - source: Ναύπακτος
291
+ expected: Návpaktos
292
+ - source: Ναύπλιο
293
+ expected: Návplio
294
+ - source: Νέα Σμύρνη
295
+ expected: Néa Smírni
296
+ - source: Νίσυρος
297
+ expected: Nísiros
298
+ - source: Ξάνθη
299
+ expected: Xánthi
300
+ - source: Όλυμπος
301
+ expected: Ólimbos
302
+ - source: Παγκράτι
303
+ expected: Pangráti
304
+ - source: Παπάγου
305
+ expected: Papágou
306
+ - source: Πάρος
307
+ expected: Páros
308
+ - source: Πασαλιμάνι
309
+ expected: Pasalimáni
310
+ - source: Πατήσια
311
+ expected: Patísia
312
+ - source: Πάτμος
313
+ expected: Pátmos
314
+ - source: Πάτρα
315
+ expected: Pátra
316
+ - source: Πειραιάς
317
+ expected: Piraiás
318
+ - source: Πελοπόννησος
319
+ expected: Pelopónnisos
320
+ - source: Περιστέρι
321
+ expected: Peristéri
322
+ - source: Πεύκη
323
+ expected: Pévki
324
+ - source: Πήλιο
325
+ expected: Pílio
326
+ - source: Πολύγυρος
327
+ expected: Políyiros
328
+ - source: Πόρος
329
+ expected: Póros
330
+ - source: Πρέβεζα
331
+ expected: Préveza
332
+ - source: Πτολεμαΐδα
333
+ expected: Ptolemaḯdha
334
+ - source: Πύλος
335
+ expected: Pílos
336
+ - source: Πύργος
337
+ expected: Pírgos
338
+ - source: Ρέθυμνο
339
+ expected: Réthimno
340
+ - source: Ρόδος
341
+ expected: Ródhos
342
+ - source: Ρούμελη
343
+ expected: Roúmeli
344
+ - source: Σαλαμίνα
345
+ expected: Salamína
346
+ - source: Σαμοθράκη
347
+ expected: Samothráki
348
+ - source: Σάμος
349
+ expected: Sámos
350
+ - source: Σαντορίνη
351
+ expected: Sandoríni
352
+ - source: Σέρρες
353
+ expected: Sérres
354
+ - source: Σίκινος
355
+ expected: Síkinos
356
+ - source: Σίφνος
357
+ expected: Sífnos
358
+ - source: Σκιάθος
359
+ expected: Skiáthos
360
+ - source: Σκόπελος
361
+ expected: Skópelos
362
+ - source: Σούλι
363
+ expected: Soúli
364
+ - source: Σπάρτη
365
+ expected: Spárti
366
+ - source: Στερεά Ελλάδα
367
+ expected: Stereá Elládha
368
+ - source: Στύρα
369
+ expected: Stíra
370
+ - source: Σύμη
371
+ expected: Sími
372
+ - source: Σύρος
373
+ expected: Síros
374
+ - source: Σφακιά
375
+ expected: Sfakiá
376
+ - source: Τήλος
377
+ expected: Tílos
378
+ - source: Τήνος
379
+ expected: Tínos
380
+ - source: Τρίκαλα
381
+ expected: Tríkala
382
+ - source: Τρίπολη
383
+ expected: Trípoli
384
+ - source: Τσακωνιά
385
+ expected: Tsakoniá
386
+ - source: Ύδρα
387
+ expected: Ídhra
388
+ - source: Φάληρο
389
+ expected: Fáliro
390
+ - source: Φλώρινα
391
+ expected: Flórina
392
+ - source: Φολέγανδρος
393
+ expected: Folégandros
394
+ - source: Χάλκη
395
+ expected: Khálki
396
+ - source: Χαλκίδα
397
+ expected: Khalkídha
398
+ - source: Χαλάνδρι
399
+ expected: Khalándri
400
+ - source: Χαλκιδική
401
+ expected: Khalkidhikí
402
+ - source: Χανιά
403
+ expected: Khaniá
404
+ - source: Χίος
405
+ expected: Khíos
406
+ - source: Ψαρά
407
+ expected: Psará
408
+ - source: Αβάνα
409
+ expected: Avána
410
+ - source: Αγγλία
411
+ expected: Anglía
412
+ - source: Αϊβαλί
413
+ expected: Aïvalí
414
+ - source: Αλεξάνδρεια
415
+ expected: Alexándria
416
+ - source: Άμστερνταμ
417
+ expected: Ámsterndam
418
+ - source: Βαυαρία
419
+ expected: Vavaría
420
+ - source: Βενετία
421
+ expected: Venetía
422
+ - source: Βερολίνο
423
+ expected: Verolíno
424
+ - source: Βερόνα
425
+ expected: Veróna
426
+ - source: Βιέννη
427
+ expected: Viénni
428
+ - source: Γένοβα
429
+ expected: Yénova
430
+ - source: Δουβλίνο
431
+ expected: Dhouvlíno
432
+ - source: Καλαβρία
433
+ expected: Kalavría
434
+ - source: Καλιφόρνια
435
+ expected: Kalifórnia
436
+ - source: Καύκασος
437
+ expected: Kávkasos
438
+ - source: Κονγκό
439
+ expected: Konngó
440
+ - source: Κορσική
441
+ expected: Korsikí
442
+ - source: Κουρδιστάν
443
+ expected: Kourdhistán
444
+ - source: Κωνσταντινούπολη
445
+ expected: Konstandinoúpoli
446
+ - source: Κατεχόμενη Κύπρος
447
+ expected: Katekhómeni Kípros
448
+ - source: Λαπωνία
449
+ expected: Laponía
450
+ - source: Λευκωσία
451
+ expected: Levkosía
452
+ - source: Λιβόρνο
453
+ expected: Livórno
454
+ - source: Λονδίνο
455
+ expected: Londhíno
456
+ - source: Λυών
457
+ expected: Lión
458
+ - source: Μάλαγα
459
+ expected: Málaga
460
+ - source: Μασσαλία
461
+ expected: Massalía
462
+ - source: Μικρονησία
463
+ expected: Mikronisía
464
+ - source: Μιλάνο
465
+ expected: Miláno
466
+ - source: Μόσχα
467
+ expected: Móskha
468
+ - source: Μπολόνια
469
+ expected: Bolónia
470
+ - source: Νάπολη
471
+ expected: Nápoli
472
+ - source: Νταγκεστάν
473
+ expected: Dangestán
474
+ - source: Νέα Υόρκη
475
+ expected: Néa Iórki
476
+ - source: Οξφόρδη
477
+ expected: Oxfórdhi
478
+ - source: Ουαλία
479
+ expected: Oualía
480
+ - source: Παρίσι
481
+ expected: Parísi
482
+ - source: Πάφος
483
+ expected: Páfos
484
+ - source: Πολυνησία
485
+ expected: Polinisía
486
+ - source: Ρώμη
487
+ expected: Rómi
488
+ - source: Σαμάρεια
489
+ expected: Samária
490
+ - source: Σικελία
491
+ expected: Sikelía
492
+ - source: Σκανδιναβία
493
+ expected: Skandhinavía
494
+ - source: Σκόπια
495
+ expected: Skópia
496
+ - source: Σκωτία
497
+ expected: Skotía
498
+ - source: Σμύρνη
499
+ expected: Smírni
500
+ - source: Ταϊτή
501
+ expected: Taïtí
502
+ - source: Ταταρστάν
503
+ expected: Tatarstán
504
+ - source: Τζαμάικα
505
+ expected: Tzamáika
506
+ - source: Τηλλυρία
507
+ expected: Tilliría
508
+ - source: Τιρόλο
509
+ expected: Tirólo
510
+ - source: Τορίνο
511
+ expected: Toríno
512
+ - source: Φανάρι
513
+ expected: Fanári
514
+ - source: Φλωρεντία
515
+ expected: Florendía
516
+ - source: Χαβάη
517
+ expected: Khaváï
518
+ - source: Χονγκ Κονγκ
519
+ expected: Khonng Konng
520
+
521
+ map:
522
+ # https://en.wikipedia.org/wiki/Romanization_of_Greek
523
+ rules:
524
+ - pattern: (?<=[ΑαΕεΗη])\u03A5 # Υ (after Α, Ε, Η)
525
+ result: V
526
+ - pattern: (?<=[ΑαΕεΗη])\u03C5 # υ (after Α, Ε, Η)
527
+ result: v
528
+ - pattern: (?<=[Οο])\u03A5 # Υ (after Ο)
529
+ result: U
530
+ - pattern: (?<=[Οο])\u03C5 # υ (after Ο)
531
+ result: u
532
+ - pattern: (?<=[Οο])\u03CD # ύ (after Ο)
533
+ result: ú
534
+ - pattern: \u03A5[Ιιί] # ΥΙ
535
+ result: I
536
+ - pattern: \u03C5[Ιιί] # υι
537
+ result: i
538
+ - pattern: \u03A5[ί] # ΥΙ
539
+ result: Í
540
+ - pattern: \u03C5[ί] # υι
541
+ result: í
542
+ - pattern: \u0393(?=[ξΞχΧ]) # Γ (before Γ, Ξ, Χ)
543
+ result: N
544
+ - pattern: \u03B3(?=[ξΞχΧ]) # γ (before Γ, Ξ, Χ)
545
+ result: n
546
+ - pattern: \u0393[Γγ] # Γ (before Γ, Ξ, Χ)
547
+ result: Ng
548
+ - pattern: \u03B3\u03B3 # γ (before Γ, Ξ, Χ)
549
+ result: ng
550
+ - pattern: (?<=\b)\u0393[Κκ] # Γ (before Κ initially)
551
+ result: G
552
+ - pattern: (?<=\b)\u03B3[Κκ] # γ (before Κ initially)
553
+ result: g
554
+ - pattern: (?<!\b)\u0393[Κκ] # Γ (before Κ medially)
555
+ result: Ng
556
+ - pattern: (?<!\b)\u03B3[Κκ] # γ (before Κ medially)
557
+ result: ng
558
+ - pattern: \u0393(?=[ΕεέΗηήΙιίΥυύ]) # Γ (before front vowels)
559
+ result: Y
560
+ - pattern: \u03B3(?=[ΕεέΗηήΙιίΥυύ]) # γ (before front vowels)
561
+ result: y
562
+ - pattern: \u0393(?=[Oo][Ιιί]) # Γ (before front vowels)
563
+ result: Y
564
+ - pattern: \u03B3(?=[Oo][Ιιί]) # γ (before front vowels)
565
+ result: y
566
+ - pattern: (?<=\b)\u039D[τΤ] # ΝΤ (initially)
567
+ result: D
568
+ - pattern: (?<=\b)\u03BD[τΤ] # ντ (initially)
569
+ result: d
570
+ - pattern: \u039D[τΤ][ζΖ] # ΝΤΖ
571
+ result: NTZ
572
+ - pattern: \u03BD[τΤ][ζΖ] # ντζ
573
+ result: ntz
574
+ - pattern: (?<!\b)\u039D[τΤ] # ΝΤ (medially)
575
+ result: Nd
576
+ - pattern: (?<!\b)\u03BD[τΤ] # ντ (medially)
577
+ result: nd
578
+ - pattern: (?<=\b)\u039C[πΠ] # ΜΠ (initially)
579
+ result: B
580
+ - pattern: (?<=\b)\u03BC[πΠ] # μπ (initially)
581
+ result: b
582
+ - pattern: \u039C[πΠ] # ΜΠ (medially)
583
+ result: Mb
584
+ - pattern: \u03BC[πΠ] # μπ (medially)
585
+ result: mb
586
+ - pattern: (?<=[νΝ])\u0394(?=[ρΡ]) # νδρ
587
+ result: d
588
+ - pattern: (?<=[νΝ])\u03B4(?=[ρΡ]) # νδρ
589
+ result: d
590
+ - pattern: (?<=[ΑΆαά])\u0395 # ΑΕ
591
+ result: Ë
592
+ - pattern: (?<=[ΑΆαά])\u03B5 # αε
593
+ result: ë
594
+ - pattern: (?<=[ΑΆαά])\u03AD # αέ
595
+ result: ë́
596
+ - pattern: (?<=[ΩΏωώ])\u039F # ΩΟ
597
+ result: Ö
598
+ - pattern: (?<=[ΩΏωώ])\u03BF # ωο
599
+ result: ö
600
+ - pattern: (?<=[ΩΏωώ])\u03CC # ωό
601
+ result: ö́
602
+ - pattern: (?<=[ΑΆαάΟΌοό])\u0397 # ΑΗ, ΟΗ
603
+ result: Ï
604
+ - pattern: (?<=[ΑΆαάΟΌοό])\u03B7 # αη, οη
605
+ result: ï
606
+ - pattern: (?<=[ΑΆαάΟΌοό])\u03AE # αή, οή
607
+ result: ḯ
608
+ - pattern: \u037E # ;
609
+ result: "?"
610
+ - pattern: \u003B # ;
611
+ result: "?"
612
+
613
+ characters:
614
+ "\u0027": ""
615
+ "\u0386": "Á" # Ά
616
+ "\u0391": "A" # Α
617
+ "\u0392": "V" # Β
618
+ "\u0393": "G" # Γ
619
+ "\u0394": "Dh" # Δ
620
+ "\u0395": "E" # Ε
621
+ "\u0395\u0399": "I" # ΕΙ
622
+ "\u0395\u03B9": "I" # Ει
623
+ "\u0395\u03AF": "Í" # Εί
624
+ "\u0395\u03AA": "Εï" # ΕΪ
625
+ "\u0395\u03CA": "Εï" # Εϊ
626
+ "\u0388\u03CA": "Éï" # Έϊ
627
+ "\u0388\u03CA": "Éï" # Έι
628
+ "\u0391\u03CD": "Áv" # Αύ
629
+ "\u0395\u03CD": "Év" # Εύ
630
+ "\u0397\u03CD": "Ív" # Ηύ
631
+ "\u0396": "Z" # Ζ
632
+ "\u0397": "I" # Η
633
+ "\u0398": "Th" # Θ
634
+ "\u0399": "I" # Ι
635
+ "\u039A": "K" # Κ
636
+ "\u039B": "L" # Λ
637
+ "\u039C": "M" # Μ
638
+ "\u039D": "N" # Ν
639
+ "\u039E": "X" # Ξ
640
+ "\u039F": "O" # Ο
641
+ "\u03A0": "P" # Π
642
+ "\u03A1": "R" # Ρ
643
+ "\u03A3": "S" # Σ
644
+ "\u03A4": "T" # Τ
645
+ "\u03A5": "I" # Υ
646
+ "\u03A6": "F" # Φ
647
+ "\u03A7": "Kh" # Χ
648
+ "\u03A8": "Ps" # Ψ
649
+ "\u03A9": "O" # Ω
650
+ "\u0388": "É" # Έ
651
+ "\u0389": "Í" # Ή
652
+ "\u038A": "Í" # Ί
653
+ "\u038C": "Ó" # Ό
654
+ "\u038E": "Í" # Ύ
655
+ "\u038F": "Ó" # Ώ
656
+ "\u03AA": "Ï" # Ϊ
657
+ "\u03AB": "Ï" # Ϋ
658
+
659
+ "\u03AC": "á" # ά
660
+ "\u03B1": "a" # α
661
+ "\u03B2": "v" # β
662
+ "\u03B3": "g" # γ
663
+ "\u03B4": "dh" # δ
664
+ "\u03B5": "e" # ε
665
+ "\u03B5\u03B9": "i" # ει
666
+ "\u03B5\u03AF": "í" # εί
667
+ "\u03B5\u03CA": "eï" # εϊ
668
+ "\u03AD\u03CA": "éï" # έϊ
669
+ "\u03AD\u03B9": "éï" # έι
670
+ "\u03B1\u03CD": "áv" # αύ
671
+ "\u03B5\u03CD": "év" # εύ
672
+ "\u03B7\u03CD": "ív" # ηύ
673
+ "\u03B6": "z" # ζ
674
+ "\u03B7": "i" # η
675
+ "\u03B8": "th" # θ
676
+ "\u03B9": "i" # ι
677
+ "\u03BA": "k" # κ
678
+ "\u03BB": "l" # λ
679
+ "\u03BC": "m" # μ
680
+ "\u03BD": "n" # ν
681
+ "\u03BE": "x" # ξ
682
+ "\u03BF": "o" # ο
683
+ "\u03C0": "p" # π
684
+ "\u03C1": "r" # ρ
685
+ "\u03C3": "s" # σ
686
+ "\u03C2": "s" # ς
687
+ "\u03C4": "t" # τ
688
+ "\u03C5": "i" # υ
689
+ "\u03C6": "f" # φ
690
+ "\u03C7": "kh" # χ
691
+ "\u03C8": "ps" # ψ
692
+ "\u03C9": "o" # ω
693
+ "\u03AD": "é" # έ
694
+ "\u03AE": "í" # ή
695
+ "\u03AF": "í" # ί
696
+ "\u03CC": "ó" # ό
697
+ "\u03CD": "í" # ύ
698
+ "\u03CE": "ó" # ώ
699
+ "\u03CA": "ï" # ϊ
700
+ "\u03CB": "ï" # ϋ
701
+ "\u0390": "ḯ" # ΐ
702
+ "\u03B0": "ḯ" # ΰ
703
+
704
+ "\u0387": ";" # ·
705
+ "\u00B7": ";" # ·