interscript 0.1.2 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/lib/g2pwrapper.py +34 -0
  4. data/lib/interscript.rb +142 -20
  5. data/lib/interscript/command.rb +28 -0
  6. data/lib/interscript/fs.rb +69 -0
  7. data/lib/interscript/mapping.rb +142 -0
  8. data/lib/interscript/opal.rb +57 -0
  9. data/lib/interscript/opal/entrypoint.rb +12 -0
  10. data/lib/interscript/opal/map_translate.rb +7 -0
  11. data/lib/interscript/opal/maps.js.erb +10 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/lib/model-7 +0 -0
  14. data/lib/tha-pt-b-7 +0 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38916 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.yaml +165 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  21. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  22. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +129 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +98 -0
  25. data/maps/alalc-ell-Grek-Latn-1997.yaml +628 -0
  26. data/maps/alalc-ell-Grek-Latn-2010.yaml +626 -0
  27. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  28. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  29. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  30. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  31. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  32. data/maps/alalc-kat-Geor-Latn-1997.yaml +150 -0
  33. data/maps/alalc-kor-Hang-Latn-1997.yaml +98 -0
  34. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  35. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  36. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  37. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  38. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  39. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  40. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  41. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  42. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  43. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  44. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  45. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  46. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  47. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  48. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  49. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  50. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  51. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +118 -0
  52. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  53. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  54. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  55. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +145 -0
  56. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  57. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  58. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +58 -33
  59. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +55 -35
  60. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  61. data/maps/bgn-kor-Hang-Latn-1943.yaml +35 -0
  62. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  63. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  64. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  65. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  66. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +596 -0
  67. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +2 -3
  68. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  69. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  70. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +289 -0
  71. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +119 -0
  72. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +15 -65
  73. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  74. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +705 -0
  75. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +23 -0
  76. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  77. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  78. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +131 -0
  79. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  80. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  81. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  82. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  83. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +163 -0
  84. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  85. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  86. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  87. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  88. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  89. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  90. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  91. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +150 -65
  92. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +170 -0
  93. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  94. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  95. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +80 -4
  96. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +119 -0
  97. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  98. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  99. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  100. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  101. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  102. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  103. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  104. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  105. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  106. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  107. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  108. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  109. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  110. data/maps/by-bel-Cyrl-Latn-1998.yaml +172 -0
  111. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  112. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  113. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  114. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  115. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  116. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  117. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  118. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  119. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  120. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  121. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  122. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  123. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  124. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  125. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  126. data/maps/ggg-kat-Geor-Latn-2002.yaml +92 -0
  127. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  128. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  129. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +190 -0
  130. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  131. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  132. data/maps/icao-bel-Cyrl-Latn-9303.yaml +109 -98
  133. data/maps/icao-bul-Cyrl-Latn-9303.yaml +2 -7
  134. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +6 -8
  135. data/maps/icao-heb-Hebr-Latn-9303.yaml +119 -125
  136. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +2 -3
  137. data/maps/icao-rus-Cyrl-Latn-9303.yaml +2 -4
  138. data/maps/icao-srp-Cyrl-Latn-9303.yaml +2 -3
  139. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +2 -4
  140. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  141. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  142. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  143. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +613 -0
  144. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +44 -0
  145. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  146. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  147. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  148. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +66 -0
  149. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  150. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  151. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  152. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  153. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  154. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  155. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  156. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  157. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  158. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  159. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  160. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  161. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  162. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  163. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  164. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +4 -6
  165. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  166. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  167. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +909 -0
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  172. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  173. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  174. data/maps/moct-kor-Hang-Latn-2000.yaml +807 -0
  175. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  176. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  177. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  178. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  179. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  180. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  181. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  182. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  183. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  184. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  185. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  186. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  187. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  188. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  189. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  190. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  191. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  192. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  193. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  194. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  195. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  196. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  197. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  198. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  199. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  200. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  201. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  202. data/maps/royin-tha-Thai-Latn-1968.yaml +183 -0
  203. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  204. data/maps/royin-tha-Thai-Latn-1999.yaml +80 -0
  205. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +11 -8
  206. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  207. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  208. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  209. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  210. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +197 -0
  211. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  212. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  213. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  214. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  215. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  216. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  217. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  218. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  219. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +780 -0
  220. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  221. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  222. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  223. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  224. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  225. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  226. data/maps/un-nep-Deva-Latn-1972.yaml +350 -0
  227. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  228. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  229. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  230. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  231. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  232. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  233. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  234. data/maps/var-kor-Hang-Hang-jamo.yaml +11193 -0
  235. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  236. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  237. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  238. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  239. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  240. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  241. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  242. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  243. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  244. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  245. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  246. data/maps/var-zho-Hani-Latn-wd-1979.yaml +38912 -0
  247. data/spec/interscript/filenames_spec.rb +384 -0
  248. data/spec/interscript/mapping_spec.rb +42 -0
  249. data/spec/interscript_spec.rb +23 -5
  250. data/spec/spec_helper.rb +3 -1
  251. metadata +364 -34
  252. data/bin/interscript +0 -20
  253. data/bin/rspec +0 -29
  254. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  255. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  256. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  257. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  258. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  259. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  260. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
  261. data/maps/un-mon-Mong-Latn-2013.yaml +0 -80
@@ -0,0 +1,220 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2007
4
+ language: iso-639-2:tat
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: TATAR TABLE OF CORRESPONDENCES CYRILLIC - ROMAN BGN/PCGN 2007 Agreement
8
+ alias:
9
+ ogc11122:
10
+ code: tat_Cyrl2Latn_BGN_2005
11
+ description: Tatar is an official language within Respublika Tatarstan, one of the republics of the Russian Federation.
12
+ url: https://geonames.nga.mil/gns/html/Romanization/TABLE%20OF%20CORRESPONDENCES%20FOR%20TATAR.pdf
13
+ creation_date: 2007
14
+ confirmation_date: 2017-11
15
+ description: |
16
+ Tatar is an official language within Respublika Tatarstan, one of the republics of the Russian
17
+ Federation. It will normally be encountered in Cyrillic script, in which case it should be romanized by means
18
+ of the Cyrillic-Roman table of correspondences given below.
19
+
20
+ notes:
21
+ - |
22
+ The alphabet portrayed in the above table is referred to as yaꞑalif-2.
23
+ A set of simpler characters is also encountered; this is known as zamanalif.
24
+ In this latter set, the alternative characters ä, ñ and ö are used for letters
25
+ 2, 17 and 19 respectively where the user has difficulty reproducing ə, ꞑ, and ө.
26
+ Please note that all three alternatives must be used as a set, and the letters should not be intermingled.
27
+ - Used only in borrowed words.
28
+ - The first option is used in words with back vowels, the second in words with front vowels (though this does not apply to borrowed words).
29
+ - yı/ye is used after a vowel (except и, ю), ъ and ь, also word-initially.
30
+ - w is used after a vowel.
31
+ - After ğ or q, ый is represented i
32
+ - Э is represented ’ after a vowel in words of Arabic origin.
33
+ - Ю and Я are represented ü and a/ä respectively after и.
34
+ - |
35
+ An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters
36
+ of the basic Roman script is:
37
+ Ə (U+018F) ə (U+0259)
38
+ Ğ (U+011E) ğ (U+011F)
39
+ İ (U+0130) ı (U+0131)
40
+ Ü (U+00DC) ü (U+00FC)
41
+ Ꞑ (U+A790) ꞑ (U+A791)
42
+ Ɵ (U+019F) ɵ (U+0275)
43
+ Ç (U+00C7) ç (U+00E7)
44
+ Ş (U+015E) ş (U+015F)
45
+ Ä (U+00C4) ä (U+00E4)
46
+ ’ (U+2019)
47
+ - |
48
+ The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase
49
+ Roman letters as appropriate should be used.
50
+
51
+ tests:
52
+ - source: Гыйльмиев #
53
+ expected: Ğil’miew # note[6] in the note it's Ğilmiev which incorrect according to the rules
54
+ # https://en.wikipedia.org/wiki/Tatar_alphabet
55
+ - source: баеды
56
+ expected: bayıdı # note[4]
57
+ - source: кардәш
58
+ expected: qardəş
59
+ - source: калынлык һәм аеру билгесе
60
+ expected: qalınlıq həm ayıru bilgese # note[4]
61
+ - source: |
62
+ Барлык кешеләр дә азат һәм үз абруйлары һәм хокуклары ягыннан тиң булып туалар.
63
+ Аларга акыл һәм вөҗдан бирелгән һәм бер-берсенә карата туганнарча мөнасәбәттә булырга тиешләр.
64
+ expected: |
65
+ Barlıq keşelər də azat həm üz abruyları həm xoquqları yağınnan tiꞑ bulıp tualar.
66
+ Alarğa aqıl həm wocdan birelgən həm ber-bersenə qarata tuğannarça monasəbəttə bulırğa tieşlər.
67
+ # https://www.azatliq.org/a/30820571.html
68
+ - source: Әлдермештән Әлмәндәр
69
+ expected: Əldermeştən Əlməndər
70
+ - source: Әссәламү галәйкүм
71
+ expected: Əssəlamü ğaləyküm
72
+ - source: Танымаган кешегә
73
+ expected: Tanımağan keşegə # note[3]
74
+ - source: Иң әүвәл кул бирешеп күрешик
75
+ expected: İꞑ əwwəl qul bireşep küreşiq # note[5]
76
+ - source: Ялгышмыйсың
77
+ expected: Yalğışmıysıꞑ
78
+ - source: Нәкъ үзе
79
+ expected: Nəq üze
80
+ - source: Кирәгеннән артыгын
81
+ expected: Kirəgennən artığın # note[3]
82
+ - source: мәңгелеккә килмәгән
83
+ expected: məꞑgeleqkə kilməgən
84
+ - source: кулыңны куй
85
+ expected: qulıꞑnı quy
86
+ - source: Өммия # note[8]
87
+ expected: Ommiä
88
+ - source: Җиһангир # note[3]
89
+ expected: Cihangir
90
+
91
+ map:
92
+ rules:
93
+ # note[3] http://www.hintfox.com/article/sistema-glasnih-zvykov-na-tatarskom-i-anglijskom-jazikah.html
94
+ # back vowels: у, а, ы, о,
95
+ # front vowels: е, ә, и, ө, ү, э
96
+ - pattern: Г(?=[ЕеƏәИиӨөҮүЭэ])
97
+ result: G
98
+ - pattern: г(?=[ЕеƏәИиӨөҮүЭэ])
99
+ result: g
100
+ - pattern: К(?=[ЕеƏәИиӨөҮүЭэ])
101
+ result: K
102
+ - pattern: к(?=[ЕеƏәИиӨөҮүЭэ])
103
+ result: k
104
+ - pattern: Ю(?=[ЕеƏәИиӨөҮүЭэ])
105
+ result: "Y\u00FC"
106
+ - pattern: ю(?=[ЕеƏәИиӨөҮүЭэ])
107
+ result: "y\u00FC"
108
+ - pattern: Я(?=[ЕеƏәИиӨөҮүЭэ])
109
+ result: "Y\u00E4"
110
+ - pattern: я(?=[ЕеƏәИиӨөҮүЭэ])
111
+ result: "y\u00E4"
112
+
113
+ # note[4]
114
+ - pattern: (?<=[АаЕеƏәОоӨөҮүУуЫыЭэЯяЪъЬь])\u0415
115
+ result: "Y\u0131"
116
+ - pattern: (?<=[АаЕеƏәОоӨөҮүУуЫыЭэЯяЪъЬь])\u0435
117
+ result: "y\u0131"
118
+
119
+ # note[5]
120
+ - pattern: (?<=[АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь])\u0423
121
+ result: "W"
122
+ - pattern: (?<=[АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь])\u0443
123
+ result: "w"
124
+ - pattern: (?<=[АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь])\u04AE
125
+ result: "W"
126
+ - pattern: (?<=[АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь])\u04AF
127
+ result: "w"
128
+
129
+ # note[6]
130
+ - pattern: (?<=[Гг])ый
131
+ result: i
132
+
133
+ # note[8]
134
+ - pattern: (?<=[Ии])\u042E
135
+ result: "\u00DC"
136
+ - pattern: (?<=[Ии])\u044E
137
+ result: "\u00FC"
138
+ - pattern: (?<=[Ии])\u042F
139
+ result: "\u00C4"
140
+ - pattern: (?<=[Ии])\u044F
141
+ result: "\u00E4"
142
+
143
+ characters:
144
+ "\u0410": 'A' # А
145
+ "\u04D8": ["\u018F", "\u00C4"] # Ә => [Ə, Ä] note[1]
146
+ "\u0411": 'B' # Б
147
+ "\u0412": ['W', 'V'] # В note[2]
148
+ "\u0413": "\u011E" # Г => Ğ note[3]
149
+ "\u0414": 'D' # Д
150
+ "\u0415": 'E' # Е note[3] note[4]
151
+ "\u0416": 'J' # Ж
152
+ "\u0496": 'C' # Җ
153
+ "\u0417": 'Z' # З
154
+ "\u0418": "\u0130" # И => İ
155
+ "\u0419": 'Y' # Й
156
+ "\u041A": 'Q' # К note[3]
157
+ "\u041B": 'L' # Л
158
+ "\u041C": 'M' # М
159
+ "\u041D": 'N' # Н
160
+ "\u04A2": ["\uA790", "\u00D1"] # Ң => [Ꞑ, Ñ] note[1]
161
+ "\u041E": 'O' # О
162
+ "\u04E8": ['O', "\u00D6"] # Ө => [O, Ö] note[1]
163
+ "\u041F": 'P' # П
164
+ "\u0420": 'R' # Р
165
+ "\u0421": 'S' # С
166
+ "\u0422": 'T' # Т
167
+ "\u0423": 'U' # У note[5]
168
+ "\u04AE": "\u00DC" # Ү => Ü note[5]
169
+ "\u0424": 'F' # Ф
170
+ "\u0425": 'Х' # Х
171
+ "\u04BA": 'H' # Һ
172
+ "\u0426": 'Ts' # Ц
173
+ "\u0427": "\u00C7" # Ч => Ç
174
+ "\u0428": "\u015E" # Ш => Ş
175
+ "\u0429": "\u015E\u00C7" # Щ
176
+ "\u042A": '' # Ъ
177
+ "\u042B": 'I' # Ы => I note[2] note[6]
178
+ "\u042C": "\u2019" # Ь => ’
179
+ "\u042D": 'E' # Э note[7]
180
+ "\u042E": 'Yu' # Ю note[3] note[8]
181
+ "\u042F": 'Ya' # Я note[3] note[8]
182
+
183
+ '\u0430': 'a' # а
184
+ '\u04D9': ["\u0259", "\u00E4"] # ә => [ə, ä] note[1]
185
+ '\u0431': 'b' # б
186
+ '\u0432': ['w', 'v'] # в note[2]
187
+ '\u0433': "\u011F" # г => ğ note[3]
188
+ '\u0434': 'd' # д
189
+ '\u0435': 'e' # e note[3] note[4]
190
+ '\u0436': 'j' # ж
191
+ '\u0497': 'c' # җ
192
+ '\u0437': 'z' # з
193
+ '\u0438': 'i' # и
194
+ '\u0439': 'y' # й
195
+ '\u043A': 'q' # к note[3]
196
+ '\u043B': 'l' # л
197
+ '\u043C': 'm' # м
198
+ '\u043D': 'n' # н
199
+ '\u04A3': ["\uA791", "\u00F1"] # ң => [ꞑ, ñ] note[1]
200
+ '\u043E': 'o' # о
201
+ '\u04E9': ['o', "\u00F6"] # ө => [o, ö] note[1]
202
+ '\u043F': 'p' # п
203
+ '\u0440': 'r' # р
204
+ '\u0441': 's' # с
205
+ '\u0442': 't' # т
206
+ '\u0443': 'u' # у note[5]
207
+ '\u04AF': "\u00FC" # ү => ü note[5]
208
+ '\u0444': 'f' # ф
209
+ '\u0445': 'x' # х
210
+ '\u04BB': 'h' # һ
211
+ '\u0446': 'ts' # ц
212
+ '\u0447': "\u00E7" # ч => ç
213
+ '\u0448': "\u015F" # ш => ş
214
+ '\u0449': "\u015F\u00E7" # щ => şç
215
+ "\u044a": '' # ъ
216
+ '\u044B': "\u0131" # ы => ı note[2] note[6]
217
+ "\u044C": "\u2019" # ь => ’
218
+ '\u044D': 'e' # э note[7]
219
+ '\u044E': 'yu' # ю note[3] note[8]
220
+ '\u044F': 'ya' # я note[3] note[8]
@@ -0,0 +1,240 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1994
4
+ language: iso-639-2:tgk
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BGN/PCGN Romanization System -- Tajik (1994)
8
+ alias:
9
+ ogc11122:
10
+ code: tgk_Cyrl2Latn_BGN_1994
11
+ description: The BGN/PCGN system for Tajik was designed for use in romanizing names written in the Tajik Cyrillic alphabet.
12
+ url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20TAJIK.pdf
13
+ creation_date: 1994
14
+ confirmation_date: 2017-11
15
+ description: |
16
+ The BGN/PCGN system for Tajik was designed for use in romanizing names written in the Tajik Cyrillic alphabet.
17
+ The Tajik Cyrillic alphabet contains six characters not present in the Russian alphabet: ғ, ӣ, қ, ӯ, ҳ and ҷ. An
18
+ orthographic reform of the Tajik Cyrillic alphabet was implemented under the auspices of the Academy of Sciences of
19
+ Tajikistan. This reform was promulgated in a decree of 3 September 1998 by the government of the Republic of
20
+ Tajikistan. The reform abolished the characters ц, щ, ь and ы (see notes 2 through 5).
21
+
22
+ notes:
23
+ - |
24
+ The character sequences гҳ , зҳ , кҳ , and сҳ may be romanized g·h, z·h, k·h, and s·h in order
25
+ to differentiate those romanizations from the digraphs gh, zh, kh, and sh which are used to
26
+ render the characters ғ, ж, x, and ш.
27
+ - |
28
+ The obsolete character ц, abolished in 1998, should be romanized s (before a vowel and/or
29
+ after a consonant within a word) or ts intervocalically.
30
+ - The obsolete character щ, replaced by ш in 1998, should be romanized sh.
31
+ - The obsolete character ь, abolished in 1998, should not be romanized.
32
+ - The obsolete character ы, replaced by и in 1998, should be romanized i.
33
+ - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character.
34
+ - |
35
+ An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
36
+ unmodified letters of the basic Roman script is:
37
+ All apostrophes appearing in romanization are U+2019
38
+ Í (U+00CD) í (U+00ED)
39
+ Ŭ (U+016C) ŭ (U+016D)
40
+ Ė (U+0116) ė (U+0117)
41
+ - |
42
+ The Romanization column shows only lowercase forms but, when romanizing, uppercase and
43
+ lowercase Roman letters as appropriate should be used.
44
+
45
+ tests:
46
+ # https://ru.wikipedia.org/wiki/Таджикская_письменность#Образцы_записи
47
+ - source: |
48
+ Тамоми одамон озод ба дунё меоянд ва аз лиҳози манзилату ҳуқуқ бо ҳам баробаранд.
49
+ Ҳама соҳиби ақлу виҷдонанд, бояд нисбат ба якдигар бародарвор муносабат намоянд.
50
+ expected: |
51
+ Tamomi odamon ozod ba dunyo meoyand va az lihozi manzilatu huquq bo ham barobarand.
52
+ Hama sohibi aqlu vijdonand, boyad nisbat ba yakdigar barodarvor munosabat namoyand.
53
+ - source: Баниодам аъзои як пайкаранд, ки дар офариниш зи як гавҳаранд. Чу узве ба дард оварад рӯзгор, дигар узвҳоро намонад қарор.
54
+ expected: Baniodam a’zoi yak paykarand, ki dar ofarinish zi yak gavharand. Chu uzve ba dard ovarad rŭzgor, digar uzvhoro namonad qaror.
55
+ - source: Саъдӣ
56
+ expected: Sa’dí
57
+ - source: Мурда будам, зинда шудам; гиря будам, xанда шудам. Давлати ишқ омаду ман давлати поянда шудам.
58
+ expected: Murda budam, zinda shudam; girya budam, xanda shudam. Davlati ishq omadu man davlati poyanda shudam.
59
+ - source: Мавлавӣ
60
+ expected: Mavlaví
61
+ - source: санг
62
+ expected: sang
63
+ - source: барг
64
+ expected: barg
65
+ - source: номвар
66
+ expected: nomvar
67
+ - source: Бағдод
68
+ expected: Baghdod
69
+ - source: ғор
70
+ expected: ghor
71
+ - source: модар
72
+ expected: modar
73
+ - source: меравам
74
+ expected: meravam
75
+ - source: дарё
76
+ expected: daryo
77
+ - source: осиёб
78
+ expected: osiyob
79
+ - source: жола
80
+ expected: zhola
81
+ - source: каждум
82
+ expected: kazhdum
83
+ - source: баъз
84
+ expected: ba’z
85
+ - source: назар
86
+ expected: nazar
87
+ - source: заҳоб
88
+ expected: zahob
89
+ - source: ихтиёр
90
+ expected: ikhtiyor
91
+ - source: зебоӣ
92
+ expected: zeboí
93
+ - source: май
94
+ expected: may
95
+ - source: кадом
96
+ expected: kadom
97
+ - source: қадам
98
+ expected: qadam
99
+ - source: лола
100
+ expected: lola
101
+ - source: мурдагӣ
102
+ expected: murdagí
103
+ - source: нон
104
+ expected: non
105
+ - source: орзу
106
+ expected: orzu
107
+ - source: панҷ
108
+ expected: panj
109
+ - source: ранг
110
+ expected: rang
111
+ - source: сар
112
+ expected: sar
113
+ - source: субҳ
114
+ expected: subh
115
+ - source: сурайё
116
+ expected: surayyo
117
+ - source: тоҷик
118
+ expected: tojik
119
+ - source: талаб
120
+ expected: talab
121
+ - source: дуд
122
+ expected: dud
123
+ - source: хӯрдан
124
+ expected: khŭrdan
125
+ - source: фурӯғ
126
+ expected: furŭgh
127
+ - source: хондан
128
+ expected: khondan
129
+ - source: ҳофиз
130
+ expected: hofiz
131
+ - source: чӣ
132
+ expected: chí
133
+ - source: ҷанг
134
+ expected: jang
135
+ - source: шаб
136
+ expected: shab
137
+ - source: таъриф
138
+ expected: ta’rif
139
+ - source: эй
140
+ expected: ėy
141
+ - source: июн
142
+ expected: iyun
143
+ - source: ягонагӣ
144
+ expected: yagonagí
145
+ - source: РАМЗҲО
146
+ expected: RAMZ·HO
147
+
148
+ map:
149
+ rules:
150
+ # note[1]
151
+ - pattern: ([ГгЗзКкСс])\u04B3
152
+ result: "\\1·h"
153
+ - pattern: ([ГгЗзКкСс])\u04B2
154
+ result: "\\1·H"
155
+ # note[2]
156
+ - pattern: \u0426(?=[АаЕеЁёИиОоУуЫыЭэЮюЯя])
157
+ result: S
158
+ - pattern: \u0446(?=[АаЕеЁёИиОоУуЫыЭэЮюЯя])
159
+ result: s
160
+
161
+ characters:
162
+ "\u0410": 'A' # А
163
+ "\u0411": 'B' # Б
164
+ "\u0412": 'V' # В
165
+ "\u0413": 'G' # Г
166
+ "\u0492": 'Gh' # Ғ
167
+ "\u0414": 'D' # Д
168
+ "\u0415": 'E' # Е
169
+ "\u0401": 'Yo' # Ё
170
+ "\u0416": 'Zh' # Ж
171
+ "\u0417": 'Z' # З
172
+ "\u0418": 'I' # И
173
+ "\u04E2": "\u00CD" # Ӣ => Í
174
+ "\u0419": 'Y' # Й
175
+ "\u041A": 'K' # К
176
+ "\u049A": 'Q' # Қ
177
+ "\u041B": 'L' # Л
178
+ "\u041C": 'M' # М
179
+ "\u041D": 'N' # Н
180
+ "\u041E": 'O' # О
181
+ "\u041F": 'P' # П
182
+ "\u0420": 'R' # Р
183
+ "\u0421": 'S' # С
184
+ "\u0422": 'T' # Т
185
+ "\u0423": 'U' # У
186
+ "\u04EE": "\u016C" # Ӯ => Ŭ
187
+ "\u0424": 'F' # Ф
188
+ "\u0425": 'Kh' # Х
189
+ "\u04B2": 'H' # Ҳ
190
+ "\u0427": 'Ch' # Ч
191
+ "\u04B6": 'J' # Ҷ
192
+ "\u0426": 'Ts' # Ц note[2]
193
+ "\u0428": 'Sh' # Ш
194
+ "\u0429": 'Sh' # Щ note[3]
195
+ "\u042A": "\u2019" # Ъ
196
+ "\u042B": 'I' # Ы note[5]
197
+ "\u042C": '' # Ь note[4]
198
+ "\u042D": "\u0116" # Э => Ė
199
+ "\u042E": 'Yu' # Ю
200
+ "\u042F": 'Ya' # Я
201
+
202
+ "\u0430": 'a' # а
203
+ "\u0431": 'b' # б
204
+ "\u0432": 'v' # в
205
+ "\u0433": 'g' # г
206
+ "\u0493": 'gh' # ғ
207
+ "\u0434": 'd' # д
208
+ "\u0435": 'e' # e
209
+ "\u0451": 'yo' # ё
210
+ "\u0436": 'zh' # ж
211
+ "\u0437": 'z' # з
212
+ "\u0438": 'i' # и
213
+ "\u04E3": "\u00ED" # ӣ => í
214
+ "\u0439": 'y' # й
215
+ "\u043A": 'k' # к
216
+ "\u049B": 'q' # қ
217
+ "\u043B": 'l' # л
218
+ "\u043C": 'm' # м
219
+ "\u043D": 'n' # н
220
+ "\u043E": 'o' # о
221
+ "\u043F": 'p' # п
222
+ "\u0440": 'r' # р
223
+ "\u0441": 's' # с
224
+ "\u0442": 't' # т
225
+ "\u0443": 'u' # у
226
+ "\u04EF": "\u016D" # ӯ => ŭ
227
+ "\u0444": 'f' # ф
228
+ "\u0445": 'kh' # х
229
+ "\u04B3": 'h' # ҳ
230
+ "\u0447": 'ch' # ч
231
+ "\u04B7": 'j' # ҷ
232
+ "\u0446": 'ts' # ц note[2]
233
+ "\u0448": 'sh' # ш
234
+ "\u0449": 'sh' # щ note[3]
235
+ "\u044a": "\u2019" # ъ
236
+ "\u044B": 'i' # ы note[5]
237
+ "\u044C": '' # ь note[4]
238
+ "\u044D": "\u0117" # э => ė
239
+ "\u044E": 'yu' # ю
240
+ "\u044F": 'ya' # я