interscript 0.1.2 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/lib/g2pwrapper.py +34 -0
  4. data/lib/interscript.rb +142 -20
  5. data/lib/interscript/command.rb +28 -0
  6. data/lib/interscript/fs.rb +69 -0
  7. data/lib/interscript/mapping.rb +142 -0
  8. data/lib/interscript/opal.rb +57 -0
  9. data/lib/interscript/opal/entrypoint.rb +12 -0
  10. data/lib/interscript/opal/map_translate.rb +7 -0
  11. data/lib/interscript/opal/maps.js.erb +10 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/lib/model-7 +0 -0
  14. data/lib/tha-pt-b-7 +0 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38916 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.yaml +165 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  21. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  22. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +129 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +98 -0
  25. data/maps/alalc-ell-Grek-Latn-1997.yaml +628 -0
  26. data/maps/alalc-ell-Grek-Latn-2010.yaml +626 -0
  27. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  28. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  29. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  30. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  31. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  32. data/maps/alalc-kat-Geor-Latn-1997.yaml +150 -0
  33. data/maps/alalc-kor-Hang-Latn-1997.yaml +98 -0
  34. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  35. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  36. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  37. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  38. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  39. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  40. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  41. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  42. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  43. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  44. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  45. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  46. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  47. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  48. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  49. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  50. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  51. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +118 -0
  52. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  53. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  54. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  55. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +145 -0
  56. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  57. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  58. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +58 -33
  59. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +55 -35
  60. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  61. data/maps/bgn-kor-Hang-Latn-1943.yaml +35 -0
  62. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  63. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  64. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  65. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  66. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +596 -0
  67. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +2 -3
  68. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  69. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  70. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +289 -0
  71. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +119 -0
  72. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +15 -65
  73. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  74. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +705 -0
  75. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +23 -0
  76. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  77. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  78. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +131 -0
  79. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  80. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  81. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  82. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  83. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +163 -0
  84. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  85. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  86. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  87. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  88. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  89. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  90. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  91. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +150 -65
  92. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +170 -0
  93. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  94. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  95. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +80 -4
  96. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +119 -0
  97. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  98. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  99. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  100. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  101. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  102. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  103. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  104. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  105. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  106. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  107. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  108. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  109. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  110. data/maps/by-bel-Cyrl-Latn-1998.yaml +172 -0
  111. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  112. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  113. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  114. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  115. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  116. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  117. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  118. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  119. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  120. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  121. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  122. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  123. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  124. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  125. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  126. data/maps/ggg-kat-Geor-Latn-2002.yaml +92 -0
  127. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  128. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  129. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +190 -0
  130. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  131. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  132. data/maps/icao-bel-Cyrl-Latn-9303.yaml +109 -98
  133. data/maps/icao-bul-Cyrl-Latn-9303.yaml +2 -7
  134. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +6 -8
  135. data/maps/icao-heb-Hebr-Latn-9303.yaml +119 -125
  136. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +2 -3
  137. data/maps/icao-rus-Cyrl-Latn-9303.yaml +2 -4
  138. data/maps/icao-srp-Cyrl-Latn-9303.yaml +2 -3
  139. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +2 -4
  140. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  141. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  142. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  143. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +613 -0
  144. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +44 -0
  145. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  146. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  147. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  148. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +66 -0
  149. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  150. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  151. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  152. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  153. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  154. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  155. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  156. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  157. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  158. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  159. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  160. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  161. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  162. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  163. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  164. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +4 -6
  165. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  166. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  167. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +909 -0
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  172. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  173. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  174. data/maps/moct-kor-Hang-Latn-2000.yaml +807 -0
  175. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  176. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  177. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  178. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  179. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  180. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  181. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  182. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  183. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  184. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  185. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  186. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  187. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  188. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  189. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  190. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  191. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  192. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  193. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  194. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  195. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  196. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  197. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  198. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  199. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  200. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  201. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  202. data/maps/royin-tha-Thai-Latn-1968.yaml +183 -0
  203. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  204. data/maps/royin-tha-Thai-Latn-1999.yaml +80 -0
  205. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +11 -8
  206. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  207. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  208. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  209. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  210. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +197 -0
  211. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  212. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  213. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  214. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  215. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  216. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  217. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  218. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  219. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +780 -0
  220. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  221. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  222. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  223. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  224. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  225. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  226. data/maps/un-nep-Deva-Latn-1972.yaml +350 -0
  227. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  228. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  229. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  230. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  231. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  232. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  233. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  234. data/maps/var-kor-Hang-Hang-jamo.yaml +11193 -0
  235. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  236. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  237. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  238. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  239. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  240. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  241. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  242. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  243. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  244. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  245. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  246. data/maps/var-zho-Hani-Latn-wd-1979.yaml +38912 -0
  247. data/spec/interscript/filenames_spec.rb +384 -0
  248. data/spec/interscript/mapping_spec.rb +42 -0
  249. data/spec/interscript_spec.rb +23 -5
  250. data/spec/spec_helper.rb +3 -1
  251. metadata +364 -34
  252. data/bin/interscript +0 -20
  253. data/bin/rspec +0 -29
  254. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  255. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  256. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  257. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  258. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  259. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  260. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
  261. data/maps/un-mon-Mong-Latn-2013.yaml +0 -80
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bgnpcgn
3
3
  id: 1981
4
- language: arm
4
+ language: iso-639-2:arm
5
5
  source_script: Armn
6
6
  destination_script: Latn
7
7
  name: BGN/PCGN 1981 System
@@ -25,8 +25,7 @@ notes:
25
25
  - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase Roman letters as appropriate should be used.
26
26
 
27
27
  tests:
28
- - source:
29
- expected:
28
+
30
29
  map:
31
30
  characters:
32
31
  '\u0531' : 'A'
@@ -0,0 +1,104 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1993
4
+ language: iso-639-2:aze
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: AZERBAIJANI TABLE OF CORRESPONDENCES CYRILLIC-ROMAN -- BGN/PCGN 1993 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816656/TABLE_OF_CORRESPONDENCES_FOR_AZERBAIJANI.pdf
9
+ creation_date: 1993
10
+ confirmation date: 2019-06
11
+ description: |
12
+ Azerbaijani, also known as Azeri, is the official language of the Republic of Azerbaijan. In 1991, the Azerbaijani government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet. When Azerbaijani Roman-alphabet spellings are not available, this table can be used to convert Azerbaijani Cyrillic spellings.
13
+
14
+ notes:
15
+
16
+ - The special letter Ə, ə known as schwa, should be reproduced in that form whenever encountered. The characters Ə (Unicode 04D8) and ə (Unicode 04D9) should be used for schwa when writing in the Cyrillic script, but characters Ə (Unicode 018F) and ə (Unicode 0259) should be used when writing in the Roman alphabet. In those instances when it cannot be reproduced, however, the letter Ä ä may be substituted for it (see below).
17
+
18
+ - The obsolete characters й, э, ю, and я should be romanized ẏ, ė, yu., and ya.
19
+
20
+ - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character. It is not known whether there exists an uppercase ‘J’ specific to the Cyrillic character set.
21
+
22
+ - |
23
+ An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
24
+ Ğ (U+011E), ğ (U+011F)
25
+ Ə (U+018F), ə (U+0259)
26
+ İ (U+0130), ı (U+0131)
27
+ Ö (U+00D6), ö (U+00F6)
28
+ Ü (U+00DC), ü (U+00FC)
29
+ Ç (U+00C7), ç (U+00E7)
30
+ Ş (U+015E), ş (U+015F)
31
+
32
+ - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
33
+
34
+ tests:
35
+ - source:
36
+ expected:
37
+
38
+ map:
39
+ characters:
40
+ '\u0410' : 'A'
41
+ '\u0411' : 'B'
42
+ '\u0412' : 'G'
43
+ '\u0413' : 'V'
44
+ '\u0492' : 'Ğ'
45
+ '\u0414' : 'D'
46
+ '\u0415' : 'E'
47
+ '\u04D8' : 'Ә'
48
+ '\u0416' : 'J'
49
+ '\u0417' : 'Z'
50
+ '\u0418' : 'I'
51
+ '\u042B' : 'İ'
52
+ '\u0408' : 'Y'
53
+ '\u041A' : 'K'
54
+ '\u049C' : 'G'
55
+ '\u041B' : 'L'
56
+ '\u041C' : 'M'
57
+ '\u041D' : 'N'
58
+ '\u041E' : 'O'
59
+ '\u04E8' : 'Ö'
60
+ '\u041F' : 'P'
61
+ '\u0420' : 'R'
62
+ '\u0421' : 'S'
63
+ '\u0422' : 'T'
64
+ '\u0423' : 'U'
65
+ '\u04AE' : 'Ü'
66
+ '\u0424' : 'F'
67
+ '\u0425' : 'X'
68
+ '\u04BA' : 'H'
69
+ '\u0427' : 'Ç'
70
+ '\u04B8' : 'C'
71
+ '\u0428' : 'Ş'
72
+
73
+ '\u0430' : 'a'
74
+ '\u0431' : 'b'
75
+ '\u0432' : 'v'
76
+ '\u0433' : 'g'
77
+ '\u0493' : 'ğ'
78
+ '\u0434' : 'd'
79
+ '\u0435' : 'e'
80
+ '\u04D9' : 'ә'
81
+ '\u0436' : 'j'
82
+ '\u0437' : 'z'
83
+ '\u0438' : 'i'
84
+ '\u044B' : 'ı'
85
+ '\u0458' : 'y'
86
+ '\u043A' : 'k'
87
+ '\u049D' : 'g'
88
+ '\u043B' : 'l'
89
+ '\u043C' : 'm'
90
+ '\u043D' : 'n'
91
+ '\u043E' : 'o'
92
+ '\u04E9' : 'ö'
93
+ '\u043F' : 'p'
94
+ '\u0440' : 'r'
95
+ '\u0441' : 's'
96
+ '\u0442' : 't'
97
+ '\u0443' : 'u'
98
+ '\u04AF' : 'ü'
99
+ '\u0444' : 'f'
100
+ '\u0445' : 'x'
101
+ '\u04BB' : 'h'
102
+ '\u0447' : 'ç'
103
+ '\u04B9' : 'c'
104
+ '\u0448' : 'ş'
@@ -0,0 +1,188 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2007
4
+ language: iso-639-2:rus
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BASHKIR TABLE OF CORRESPONDENCES CYRILLIC-ROMAN BGN/PCGN 2007 Agreement
8
+ alias:
9
+ ogc11122:
10
+ code: bak_Cyrl2Latn_BGN_2007
11
+ description: Bashkir 2007 BGN/PCGN Cyrillic-Latin Table of Correspondences
12
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
13
+ creation_date: 2007
14
+ confirmation_date: 2019
15
+ description: |
16
+ Bashkir is an official language within Respublika Bashkortostan, one of the
17
+ republics of the Russian Federation. It will normally be encountered in Cyrillic script, in
18
+ which case it should be romanized by means of the Cyrillic-Roman table of
19
+ correspondences given below
20
+
21
+ notes:
22
+ - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
23
+ - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
24
+ - The letter w is used between or after vowels.
25
+ - The letter w is used after e, u, ö and ə.
26
+ - |
27
+ An inventory of letter-diacritic combinations, with their Unicode encoding,
28
+ in addition to the unmodified letters of the basic Roman script is:
29
+ Ğ (U+011E) ğ (U+011F)
30
+ Ź (U+0179) ź (U+017A)
31
+ Ë (U+00CB) ë (U+00EB)
32
+ Ñ (U+00D1) ñ (U+00F1)
33
+ Ö (U+00D6) ö (U+00F6)
34
+ Ś (U+015A) ś (U+015B)
35
+ Ü (U+00DC) ü (U+00FC)
36
+ Ç (U+00C7) ç (U+00E7)
37
+ Ş (U+015E) ş (U+015F)
38
+ Ə (U+018F) ə (U+0259)
39
+ - |
40
+ The Roman-script columns show only lowercase forms but, when applying the table,
41
+ uppercase and lowercase Roman letters as appropriate should be used.
42
+
43
+ tests:
44
+ # adopted http://www.eki.ee/knab/lat/kblba.pdf
45
+ - source: Васйылға
46
+ expected: Wasyılğa
47
+ - source: Еҙем
48
+ expected: Yeźem
49
+ - source: Раевка
50
+ expected: Raevka
51
+ - source: Сәйетҡол
52
+ expected: Səyetqol
53
+ - source: Ауырғазы
54
+ expected: Awırğazı
55
+ - source: Бурһыҡтау
56
+ expected: Burhıqtaw
57
+ - source: Мәләүез
58
+ expected: Mələwez
59
+ - source: Ҡыҙылъяр
60
+ expected: Qıźılyar
61
+ # adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
62
+ - source: кемдең
63
+ expected: kemdeñ
64
+ - source: кем
65
+ expected: kem
66
+ - source: был
67
+ expected: bıl
68
+ - source: ошо
69
+ expected: oşo
70
+ - source: быларҙың
71
+ expected: bılarźıñ
72
+ - source: һеҙҙән
73
+ expected: heźźən
74
+ - source: һин
75
+ expected: hin
76
+ - source: һеҙҙең
77
+ expected: heźźeñ
78
+
79
+ map:
80
+ rules:
81
+ # note[1]
82
+ - pattern: \b\u0412(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
83
+ result: "W"
84
+ - pattern: \b\u0432(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
85
+ result: "w"
86
+ # note[2]
87
+ - pattern: \b\u0415
88
+ result: "Ye"
89
+ - pattern: \b\u0435
90
+ result: "ye"
91
+ - pattern: (?=\b)\u0415(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
92
+ result: "Ye"
93
+ - pattern: (?=\b)\u0435(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
94
+ result: "ye"
95
+
96
+ # note[3] # note[4]
97
+ - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0423\u04AE]
98
+ result: W
99
+ - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0443\u04AF]
100
+ result: w
101
+
102
+
103
+ characters:
104
+ '\u0410': 'A' # А
105
+ '\u0411': 'B' # Б note[1]
106
+ '\u0412': 'V' # В
107
+ '\u0413': 'G' # Г
108
+ '\u0492': "\u011E" # Ғ
109
+ '\u0414': 'D' # Д
110
+ '\u0498': "\u0179" # Ҙ
111
+ '\u0415': 'E' # Е note[2]
112
+ '\u0401': 'Ë' # Ё
113
+ '\u0416': 'J' # Ж
114
+ '\u0417': 'Z' # З
115
+ '\u0418': 'I' # И
116
+ '\u0419': 'Y' # Й
117
+ '\u041A': 'K' # К
118
+ '\u04A0': 'Q' # Ҡ
119
+ '\u041B': 'L' # Л
120
+ '\u041C': 'M' # М
121
+ '\u041D': 'N' # Н
122
+ '\u04A2': 'Ñ' # Ң
123
+ '\u041E': 'O' # О
124
+ '\u04E8': "Ö" # Ө
125
+ '\u041F': 'P' # П
126
+ '\u0420': 'R' # Р
127
+ '\u0421': 'S' # С
128
+ '\u04AA': 'Ś' # Ҫ
129
+ '\u0422': 'T' # Т
130
+ '\u0423': 'U' # У
131
+ '\u04AE': 'Ü' # Ү note[3]
132
+ '\u0424': 'F' # Ф
133
+ '\u0425': 'X' # Х
134
+ '\u04BA': 'H' # Һ
135
+ '\u0426': 'Ts' # Ц
136
+ '\u0427': 'Ç' # Ч
137
+ '\u0428': 'Ş' # Ш
138
+ '\u0429': 'ŞÇ' # Щ
139
+ '\u042A': '' # Ъ
140
+ '\u042B': 'I' # Ы
141
+ '\u042C': '' # Ь
142
+ '\u042D': 'E' # Э
143
+ '\u04D8': "\u018F" # Ә
144
+ '\u042E': 'Yu' # Ю
145
+ '\u042F': 'Ya' # Я
146
+
147
+ '\u0430': 'a' # а
148
+ '\u0431': 'b' # б
149
+ '\u0432': 'v' # в note[1]
150
+ '\u0433': 'g' # г
151
+ '\u0493': "\u011F" # ғ
152
+ '\u0434': 'd' # д
153
+ '\u0499': 'ź' # ҙ
154
+ '\u0435': 'e' # e note[2]
155
+ '\u0451': 'yo' # ё
156
+ '\u0436': 'j' # ж
157
+ '\u0437': 'z' # з
158
+ '\u0438': 'i' # и
159
+ '\u0439': 'y' # й
160
+ '\u043A': 'k' # к
161
+ '\u04A1': 'q' # ҡ
162
+ '\u043B': 'l' # л
163
+ '\u043C': 'm' # м
164
+ '\u043D': 'n' # н
165
+ '\u04A3': 'ñ' # ң
166
+ '\u043E': 'o' # о
167
+ '\u04E9': "\u00F6" # ө
168
+ '\u043F': 'p' # п
169
+ '\u0440': 'r' # р
170
+ '\u0441': 's' # с
171
+ '\u04AB': 'ś' # ҫ
172
+ '\u0442': 't' # т
173
+ '\u0443': 'u' # у
174
+ "\u04AF": 'ü' # ү note[3]
175
+ '\u0444': 'f' # ф
176
+ '\u0445': 'x' # х
177
+ '\u04BB': 'h' # һ
178
+ '\u0446': 'ts' # ц
179
+ '\u0447': 'ç' # ч
180
+ '\u0448': 'ş' # ш
181
+ '\u0449': 'şç' # щ
182
+ '\u044A': '' # ъ
183
+ '\u044B': "\u0131" # ы
184
+ '\u044C': '' # ь
185
+ '\u044D': 'e' # э
186
+ '\u04D9': "\u0259" # ә
187
+ '\u044E': 'yu' # ю
188
+ '\u044F': 'ya' # я
@@ -0,0 +1,289 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1979
4
+ language: iso-639-2:bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: United States Board on Geographic Names Foreign Names Committee Staff, 1994. Romanization Systems and Roman-Script Spelling Conventions, p. 23.
8
+ alias:
9
+ ogc11122:
10
+ code: bel_Cyrl2Latn_BGN_1979
11
+ description: Byelorussian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1979 System
12
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811510/ROMANIZATION_OF_BELARUSIAN.pdf
13
+ creation_date: 1979
14
+ description: |
15
+ The BGN/PCGN system for Belarusian (formerly referred to as Byelorussian) was designed for use in
16
+ romanizing names written in the Belarusian Cyrillic alphabet. The Belarusian alphabet contains three
17
+ characters not present in the Russian alphabet: і, ў, and ’.
18
+
19
+ notes:
20
+ - The character sequences зг, кг, сг, тс and цг and may be romanized z·h, k·h, s·h, t·s and ts·h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ш, ц, and the character sequence тш
21
+ - All apostrophes appearing in romanization are Unicode encoding 2019.
22
+
23
+ tests:
24
+ - source: Антон
25
+ expected: Anton
26
+ - source: Вілейка
27
+ expected: Vilyeyka
28
+ - source: Брэст
29
+ expected: Brest
30
+ - source: Дубна
31
+ expected: Dubna
32
+ - source: Віцебск
33
+ expected: Vitsyebsk
34
+ - source: Асіповічы
35
+ expected: Asipovichy
36
+ - source: Гродна
37
+ expected: Hrodna
38
+ - source: Брагін
39
+ expected: Brahin
40
+ - source: Добруш
41
+ expected: Dobrush
42
+ - source: Ліда
43
+ expected: Lida
44
+ - source: Гомель
45
+ expected: Homyel’
46
+ - source: Беліца
47
+ expected: Byelitsa
48
+ - source: Ёдкавічы
49
+ expected: Yodkavichy
50
+ - source: Нёман
51
+ expected: Nyoman
52
+ - source: Жлобін
53
+ expected: Zhlobin
54
+ - source: Ружаны
55
+ expected: Ruzhany
56
+ - source: Зоя
57
+ expected: Zoya
58
+ - source: князь
59
+ expected: knyaz’
60
+ - source: Ігнат
61
+ expected: Ihnat
62
+ - source: Мінск
63
+ expected: Minsk
64
+ - source: Йосель
65
+ expected: Yosyel’
66
+ - source: Койданава
67
+ expected: Koydanava
68
+ - source: Крапіўна
69
+ expected: Krapiwna
70
+ - source: Менск
71
+ expected: Myensk
72
+ - source: Лаўна
73
+ expected: Lawna
74
+ - source: Лёсік
75
+ expected: Lyosik
76
+ - source: Купала
77
+ expected: Kupala
78
+ - source: Вілейка
79
+ expected: Vilyeyka
80
+ - source: Міхал
81
+ expected: Mikhal
82
+ - source: Вільня
83
+ expected: Vil’nya
84
+ - source: Лепель
85
+ expected: Lyepyel’
86
+ - source: Магілёў
87
+ expected: Mahilyow
88
+ - source: Няміга
89
+ expected: Nyamiha
90
+ - source: Наваградак
91
+ expected: Navahradak
92
+ - source: Баранавічы
93
+ expected: Baranavichy
94
+ - source: Орша
95
+ expected: Orsha
96
+ - source: Востраў
97
+ expected: Vostraw
98
+ - source: Пінск
99
+ expected: Pinsk
100
+ - source: Дняпро
101
+ expected: Dnyapro
102
+ - source: Рагачоў
103
+ expected: Rahachow
104
+ - source: Сураж
105
+ expected: Surazh
106
+ - source: Смаляны
107
+ expected: Smalyany
108
+ - source: Арэса
109
+ expected: Aresa
110
+ - source: Рось
111
+ expected: Ros’
112
+ - source: Талочын
113
+ expected: Talochyn
114
+ - source: Масты
115
+ expected: Masty
116
+ - source: Уладзімір
117
+ expected: Uladzimir
118
+ - source: Бабруйск
119
+ expected: Babruysk
120
+ - source: Быхаў
121
+ expected: Bykhaw
122
+ - source: Воўпа
123
+ expected: Vowpa
124
+ - source: Іўе
125
+ expected: Iwye
126
+ - source: Фолюш
127
+ expected: Folyush
128
+ - source: фортка
129
+ expected: fortka
130
+ - source: Хатынь
131
+ expected: Khatyn’
132
+ - source: Быхаў
133
+ expected: Bykhaw
134
+ - source: Ганцавічы
135
+ expected: Hantsavichy
136
+ - source: Стоўбцы
137
+ expected: Stowbtsy
138
+ - source: цьмяны
139
+ expected: ts’myany
140
+ - source: мясцовы
141
+ expected: myastsovy
142
+ - source: Астравец
143
+ expected: Astravyets
144
+ - source: Прыпяць
145
+ expected: Prypyats’
146
+ - source: Чэрыкаў
147
+ expected: Cherykaw
148
+ - source: Шчара
149
+ expected: Shchara
150
+ - source: Нарач
151
+ expected: Narach
152
+ - source: Шклоў
153
+ expected: Shklow
154
+ - source: Ашмяны
155
+ expected: Ashmyany
156
+ - source: Ыттык-Кёль
157
+ expected: Yttyk-Kyol’
158
+ - source: Кобрын
159
+ expected: Kobryn
160
+ - source: Солы
161
+ expected: Soly
162
+ - source: Копысь
163
+ expected: Kopys’
164
+ - source: рунь
165
+ expected: run’
166
+ - source: Эйсманты
167
+ expected: Eysmanty
168
+ - source: Крэва
169
+ expected: Kreva
170
+ - source: Юры
171
+ expected: Yury
172
+ - source: уюн
173
+ expected: uyun
174
+ - source: Язэп
175
+ expected: Yazep
176
+ - source: Івянец
177
+ expected: Ivyanyets
178
+ - source: з’езд
179
+ expected: z”yezd
180
+ - source: Вялiкiя Вераб’евічы
181
+ expected: Vyalikiya Vyerab”yevichy
182
+ - source: Дзям’янаўцы
183
+ expected: Dzyam”yanawtsy
184
+ - source: Задвор’е
185
+ expected: Zadvor”ye
186
+ - source: Гезгалы
187
+ expected: Hyez·haly
188
+ - source: Вадасховішча Гезгальскае
189
+ expected: Vadaskhovishcha Hyez·hal’skaye
190
+
191
+ map:
192
+ postrules:
193
+ - pattern: '\u042C' # Ь
194
+ result: "\u2019"
195
+ - pattern: '\u044C' # ь
196
+ result: "\u2019"
197
+ # Per documentation those rules are optional
198
+ rules:
199
+ - pattern: \u0417\u0413 # ЗГ
200
+ result: "Z\u00B7H" # Z·H
201
+ - pattern: \u0437\u0433 # зг
202
+ result: "z\u00B7h" # z·h
203
+ - pattern: \u041A\u0413 # КГ
204
+ result: "K\u00B7H" # K·H
205
+ - pattern: \u043A\u0433 # кг
206
+ result: "k\u00B7h" # k·h
207
+ - pattern: \u0421\u0413 # СГ
208
+ result: "S\u00B7H" # S·H
209
+ - pattern: \u0441\u0433 # сг
210
+ result: "s\u00B7h" # s·h
211
+ - pattern: \u0422\u0421 # ТС
212
+ result: "T\u00B7S" # T·S
213
+ - pattern: \u0442\u0441 # тс
214
+ result: "t\u00B7s" # t·s
215
+ - pattern: \u0426\u0413 # ЦГ
216
+ result: "TS\u00B7H" # TS·H
217
+ - pattern: \u0446\u0433 # цг
218
+ result: "ts\u00B7h" # ts·h
219
+
220
+ characters:
221
+ '\u00B4' : "\u201D" # apostrophe according to spec
222
+ '\u02BC' : "\u201D" # apostrophe according to spec
223
+ '\u2019' : "\u201D" # apostrophe in actual examples
224
+
225
+ '\u0410' : 'A' # A
226
+ '\u0411' : 'B' # Б
227
+ '\u0412' : 'V' # B
228
+ '\u0413' : 'H' # Г
229
+ '\u0414' : 'D' # Д
230
+ '\u0415' : 'Ye' # Е
231
+ '\u0401' : 'Yo' # Ё
232
+ '\u0416' : 'Zh' # Ж
233
+ '\u0417' : 'Z' # З
234
+ '\u0406' : 'I' # І
235
+ '\u0419' : 'Y' # Й
236
+ '\u041A' : 'K' # К
237
+ '\u041B' : 'L' # Л
238
+ '\u041C' : 'M' # М
239
+ '\u041D' : 'N' # Н
240
+ '\u041E' : 'O' # О
241
+ '\u041F' : 'P' # П
242
+ '\u0420' : 'R' # Р
243
+ '\u0421' : 'S' # С
244
+ '\u0422' : 'T' # Т
245
+ '\u0423' : 'U' # У
246
+ '\U040E' : 'W' # Ў
247
+ '\u0424' : 'F' # Ф
248
+ '\u0425' : 'Kh' # Х
249
+ '\u0426' : 'Ts' # Ц
250
+ '\u0427' : 'Ch' # Ч
251
+ '\u0428' : 'Sh' # Ш
252
+ '\u042B' : 'Y' # Ы
253
+ '\u042D' : 'E' # Э
254
+ '\u042E' : 'Yu' # Ю
255
+ '\u042F' : 'Ya' # Я
256
+ '\u0490' : 'G' # Ґ
257
+
258
+ '\u0430' : 'a' # а
259
+ '\u0431' : 'b' # б
260
+ '\u0432' : 'v' # в
261
+ '\u0433' : 'h' # г
262
+ '\u0434' : 'd' # д
263
+ '\u0435' : 'ye' # е
264
+ '\u0451' : 'yo' # ё
265
+ '\u0436' : 'zh' # ж
266
+ '\u0437' : 'z' # з
267
+ '\u0456' : 'i' # і
268
+ '\u0439' : 'y' # й
269
+ '\u043A' : 'k' # к
270
+ '\u043B' : 'l' # л
271
+ '\u043C' : 'm' # м
272
+ '\u043D' : 'n' # н
273
+ '\u043E' : 'o' # о
274
+ '\u043F' : 'p' # п
275
+ '\u0440' : 'r' # р
276
+ '\u0441' : 's' # с
277
+ '\u0442' : 't' # т
278
+ '\u0443' : 'u' # у
279
+ '\u045E' : 'w' # ў
280
+ '\u0444' : 'f' # ф
281
+ '\u0445' : 'kh' # х
282
+ '\u0446' : 'ts' # ц
283
+ '\u0447' : 'ch' # ч
284
+ '\u0448' : 'sh' # ш
285
+ '\u044B' : 'y' # ы
286
+ '\u044D' : 'e' # э
287
+ '\u044E' : 'yu' # ю
288
+ '\u044F' : 'ya' # я
289
+ '\u0491' : 'g' # ґ