interscript 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. checksums.yaml +4 -4
  2. data/lib/interscript.rb +10 -6
  3. data/lib/interscript/fs.rb +0 -2
  4. data/lib/interscript/mapping.rb +1 -1
  5. data/lib/interscript/opal.rb +38 -8
  6. data/lib/interscript/opal/entrypoint.rb +12 -0
  7. data/lib/interscript/opal/map_translate.rb +7 -0
  8. data/lib/interscript/version.rb +1 -1
  9. data/maps/acadsin-zho-Hani-Latn-2002.yaml +5 -1
  10. data/maps/alalc-amh-Ethi-Latn-1997.yaml +5 -1
  11. data/maps/alalc-amh-Ethi-Latn-2011.yaml +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -1
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +9 -3
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  15. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +5 -1
  16. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +5 -1
  17. data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
  18. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
  19. data/maps/alalc-ell-Grek-Latn-1997.yaml +5 -1
  20. data/maps/alalc-ell-Grek-Latn-2010.yaml +1 -2
  21. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  22. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  23. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  24. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  25. data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -1
  26. data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
  27. data/maps/alalc-kor-Hang-Latn-1997.yaml +5 -1
  28. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  29. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  30. data/maps/alalc-mar-Deva-Latn-1997.yaml +21 -2
  31. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  32. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +1 -1
  33. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
  34. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  35. data/maps/{alalc-pan-Deva-Latn-1997.yaml → alalc-pan-Guru-Latn-1997.yaml} +23 -4
  36. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  37. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  38. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  39. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  40. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +5 -1
  41. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +1 -1
  42. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  43. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  44. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  45. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
  46. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +1 -1
  47. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  48. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  49. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
  50. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
  51. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
  52. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -1
  53. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
  54. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -1
  55. data/maps/bgn-kor-Hang-Latn-1943.yaml +7 -3
  56. data/maps/bgn-kor-Kore-Latn-1943.yaml +3 -3
  57. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
  58. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
  59. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +5 -1
  60. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +5 -1
  61. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
  62. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +2 -2
  63. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +6 -2
  64. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +5 -1
  65. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +5 -1
  66. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
  67. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  68. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +5 -1
  69. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -1
  70. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -1
  71. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
  72. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
  73. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -1
  74. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +1 -1
  75. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +1 -1
  76. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +1 -1
  77. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
  78. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
  79. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  80. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +31 -1
  81. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  82. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  83. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  84. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  85. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
  86. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
  87. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  88. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  89. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -1
  90. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
  91. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  92. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  93. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +1 -1
  94. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +1 -1
  95. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +1 -1
  96. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +1 -1
  97. data/maps/{bis-gjr-Gujr-Latn-13194-1991.yaml → bis-guj-Gujr-Latn-13194-1991.yaml} +17 -2
  98. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +1 -1
  99. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +1 -1
  100. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +1 -1
  101. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +1 -1
  102. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +1 -1
  103. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +1 -1
  104. data/maps/by-bel-Cyrl-Latn-1998.yaml +5 -1
  105. data/maps/by-bel-Cyrl-Latn-2007.yaml +1 -1
  106. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  107. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  108. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  109. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  110. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  111. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  112. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  113. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  114. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  115. data/maps/dos-nep-Deva-Latn-1997.yaml +15 -1
  116. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +3 -3
  117. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +2 -2
  118. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -2
  119. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -2
  120. data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -1
  121. data/maps/gki-bel-Cyrl-Latn-1992.yaml +1 -1
  122. data/maps/gki-bel-Cyrl-Latn-2000.yaml +1 -1
  123. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +7 -3
  124. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  125. data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
  126. data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -1
  127. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -1
  128. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -1
  129. data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
  130. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
  131. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -1
  132. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
  133. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -1
  134. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  135. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  136. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  137. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +8 -4
  138. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -4
  139. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  140. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  141. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  142. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
  143. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  144. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  145. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  146. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  147. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  148. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  149. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  150. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  151. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  152. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  153. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  154. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  155. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  156. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  157. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  158. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -3
  159. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  160. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  161. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  162. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
  163. data/maps/kp-kor-Hang-Latn-2002.yaml +25 -17
  164. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +1 -1
  165. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
  166. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  167. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  168. data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
  169. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
  170. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +2 -2
  171. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +3 -3
  172. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +2 -2
  173. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +2 -2
  174. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  175. data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
  176. data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
  177. data/maps/odni-bul-Cyrl-Latn-2015.yaml +2 -2
  178. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  179. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  180. data/maps/odni-hin-Deva-Latn-2015.yaml +1 -1
  181. data/maps/odni-kat-Geor-Latn-2015.yaml +1 -1
  182. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +1 -1
  183. data/maps/odni-kir-Cyrl-Latn-2015.yaml +1 -1
  184. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  185. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +1 -1
  186. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +1 -1
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +1 -1
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +1 -1
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +1 -1
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +1 -1
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +1 -1
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -1
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +1 -1
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +5 -1
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +1 -1
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +5 -1
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
  200. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  201. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  202. data/maps/ses-ara-Arab-Latn-1930.yaml +5 -1
  203. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
  204. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
  205. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  206. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  207. data/maps/{ungegn-amh-Ethi-Latn-2016.yaml → un-amh-Ethi-Latn-2016.yaml} +51 -24
  208. data/maps/un-ara-Arab-Latn-1971.yaml +1 -1
  209. data/maps/un-ara-Arab-Latn-1972.yaml +1 -1
  210. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  211. data/maps/un-bel-Cyrl-Latn-2007.yaml +1 -1
  212. data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
  213. data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +2 -2
  214. data/maps/un-ell-Grek-Latn-1987-tl.yaml +2 -2
  215. data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -3
  216. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  217. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  218. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  219. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  220. data/maps/un-nep-Deva-Latn-1972.yaml +204 -17
  221. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  222. data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
  223. data/maps/un-ukr-Cyrl-Latn-1998.yaml +35 -12
  224. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  225. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  226. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
  227. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
  228. data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +1 -1
  229. data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
  230. data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
  231. data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -1
  232. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  233. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  234. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  235. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  236. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  237. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  238. data/maps/var-tha-Thai-Thai-phonemic.yaml +1 -1
  239. data/maps/var-tha-Thai-Zsym-ipa.yaml +1 -1
  240. data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +6 -2
  241. data/spec/interscript/filenames_spec.rb +384 -0
  242. data/spec/interscript_spec.rb +7 -4
  243. metadata +105 -26
  244. data/bin/interscript +0 -41
  245. data/bin/rspec +0 -29
  246. data/bin/setup +0 -8
  247. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  248. data/lib/interscript-opal.rb +0 -2
  249. data/lib/interscript/opal_map_translate.rb +0 -12
  250. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  251. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: bgnpcgn
3
3
  id: 1965
4
- language: ukr
4
+ language: iso-639-2:ukr
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: BGN/PCGN 1965 System
8
+ alias:
9
+ ogc11122:
10
+ code: ukr_Cyrl2Latn_BGN_1965
11
+ description: Ukrainian Board on Geographic Names/Permanent Committee on Geographical Names for British Official Use(PCGN) 1965 System
8
12
  url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
9
13
  creation_date: 1947
10
14
  confirmation_date: 2019-06
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bgnpcgn
3
3
  id: 2019
4
- language: ukr
4
+ language: iso-639-2:ukr
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: BGN/PCGN 2019 Agreement
@@ -113,96 +113,7 @@ tests:
113
113
  expected: Znamianka
114
114
 
115
115
  map:
116
- rules:
117
- - pattern: (?<=З|з)(Г|г)
118
- result: gh
119
- - pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
120
- result: Ye
121
- - pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
122
- result: ye
123
- - pattern: (?<!\b\u2019)\b\u0407 # Ї in initial position -> Yi
124
- result: Yi
125
- - pattern: (?<!\b\u2019)\b\u0457 # ї in initial position -> yi
126
- result: yi
127
- - pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
128
- result: "Y"
129
- - pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
130
- result: "y"
131
- - pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
132
- result: Yu
133
- - pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
134
- result: yu
135
- - pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
136
- result: Ya
137
- - pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
138
- result: ya
139
- - pattern: \b\u2019\b # remove ’
140
- result: ""
116
+ inherit: un-ukr-Cyrl-Latn-2012
141
117
 
142
118
  characters:
143
- "\u0410": "A" # А
144
- "\u0411": "B" # Б
145
- "\u0412": "V" # В
146
- "\u0413": "H" # Г
147
- "\u0490": "G" # Ґ
148
- "\u0414": "D" # Д
149
- "\u0415": "E" # Е
150
- "\u0404": "Ie" # Є
151
- "\u0416": "Zh" # Ж
152
- "\u0417": "Z" # З
153
- "\u0418": "Y" # И
154
- "\u0406": "I" # І
155
- "\u0407": "I" # Ї
156
- "\u0419": "I" # Й
157
- "\u041a": "K" # К
158
- "\u041b": "L" # Л
159
- "\u041c": "M" # М
160
- "\u041d": "N" # Н
161
- "\u041e": "O" # О
162
- "\u041f": "P" # П
163
- "\u0420": "R" # Р
164
- "\u0421": "S" # С
165
- "\u0422": "T" # Т
166
- "\u0423": "U" # У
167
- "\u0424": "F" # Ф
168
- "\u0425": "Kh" # Х
169
- "\u0426": "Ts" # Ц
170
- "\u0427": "Ch" # Ч
171
- "\u0428": "Sh" # Ш
172
- "\u0429": "Shch" # Щ
173
- "\u042e": "Iu" # Ю
174
- "\u042f": "Ia" # Я
175
- "\u042c": "" # Ь
176
- "\u0430": "a" # а
177
- "\u0431": "b" # б
178
- "\u0432": "v" # в
179
- "\u0433": "h" # г
180
- "\u0491": "g" # ґ
181
- "\u0434": "d" # д
182
- "\u0435": "e" # е
183
- "\u0454": "ie" # є
184
- "\u0436": "zh" # ж
185
- "\u0437": "z" # з
186
- "\u0438": "y" # и
187
- "\u0456": "i" # і
188
- "\u0457": "i" # ї
189
- "\u0439": "i" # й
190
- "\u043a": "k" # к
191
- "\u043b": "l" # л
192
- "\u043c": "m" # м
193
- "\u043d": "n" # н
194
- "\u043e": "o" # о
195
- "\u043f": "p" # п
196
- "\u0440": "r" # р
197
- "\u0441": "s" # с
198
- "\u0442": "t" # т
199
- "\u0443": "u" # у
200
- "\u0444": "f" # ф
201
- "\u0445": "kh" # х
202
- "\u0446": "ts" # ц
203
- "\u0447": "ch" # ч
204
- "\u0448": "sh" # ш
205
- "\u0449": "shch" # щ
206
- "\u044e": "iu" # ю
207
- "\u044f": "ia" # я
208
- "\u044c": "" # Ь
119
+ "\u0027": '' # ' ->
@@ -0,0 +1,127 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1979
4
+ language: iso-639-2:uzb
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BGN/PCGN Romanization System -- Uzbek Cyrillic (1979)
8
+ url: http://transliteration.eki.ee/pdf/Uzbek.pdf
9
+ creation_date: 1979
10
+
11
+ notes:
12
+ - At the beginning of a syllable, after a vowel, ъ or ь.
13
+
14
+ tests:
15
+ # https://ru.wikipedia.org/wiki/Узбекский_язык
16
+ - source: Ўзбек ёзуви
17
+ expected: Ŭzbek yozuwi
18
+ - source: Ўзбек тили
19
+ expected: Ŭzbek tili
20
+ - source: катта
21
+ expected: katta
22
+ - source: куп
23
+ expected: kup
24
+ - source: кальта
25
+ expected: kalʼta
26
+ - source: Бори элға яхшилик қилғилки, мундин яхши йўқ Ким, дегайлар даҳр аро қолди фалондин яхшилик
27
+ expected: Bori elgha yakhshilik qilghilki, mundin yakhshi yŭq Kim, degaylar dahr aro qoldi falondin yakhshilik
28
+ - source: Бахр ул-худо
29
+ expected: Bakhr ul-khudo
30
+ - source: Рисале-йи маариф-и Шейбани
31
+ expected: Risale-yi maarif-i Sheybani
32
+ - source: Карами Хакка нихоят йукдур
33
+ expected: Karami Khakka nikhoyat yukdur
34
+ - source: Йахши
35
+ expected: Yakhshi
36
+ - source: Тутук белгись
37
+ expected: Tutuk belgisʼ
38
+ - source: |
39
+ Барча одамлар эркин, қадр-қиммат ва ҳуқуқларда тенг бўлиб туғиладилар.
40
+ Улар ақл ва виждон соҳибидирлар ва бир-бирлари ила биродарларча муомала қилишлари зарур.
41
+ expected: |
42
+ Barcha odamlar erkin, qadr-qimmat wa huquqlarda teng bŭlib tughiladilar.
43
+ Ular aql wa wizhdon sohibidirlar wa bir-birlari ila birodarlarcha muomala qilishlari zarur.
44
+ - source: ПАПАПАЧУКА Респект!
45
+ expected: PAPAPACHUKA Respekt!
46
+
47
+ map:
48
+ rules:
49
+ # note[1]
50
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЪъЬь])\u0415
51
+ result: Ye
52
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЪъЬь])\u0435
53
+ result: ye
54
+
55
+ characters:
56
+ '\u0410': 'A' # А
57
+ '\u0411': 'B' # Б
58
+ '\u0412': 'W' # В
59
+ '\u0413': 'G' # Г
60
+ '\u0492': 'Gh' # Ғ
61
+ '\u0414': 'D' # Д
62
+ '\u0415': 'E' # Е
63
+ '\u0401': 'Yo' # Ё
64
+ '\u0416': 'Zh' # Ж
65
+ '\u0417': 'Z' # З
66
+ '\u0418': 'I' # И
67
+ '\u0419': 'Y' # Й
68
+ '\u041A': 'K' # К
69
+ '\u049A': 'Q' # Қ
70
+ '\u041B': 'L' # Л
71
+ '\u041C': 'M' # М
72
+ '\u041D': 'N' # Н
73
+ '\u041E': 'O' # О
74
+ '\u041F': 'P' # П
75
+ '\u0420': 'R' # Р
76
+ '\u0421': 'S' # С
77
+ '\u0422': 'T' # Т
78
+ '\u0423': 'U' # У
79
+ '\u040E': 'Ŭ' # Ў
80
+ '\u0424': 'F' # Ф
81
+ '\u0425': 'Kh' # Х
82
+ '\u04B2': 'H' # Ҳ
83
+ '\u0426': 'Ts' # Ц
84
+ '\u0427': 'Ch' # Ч
85
+ '\u0428': 'Sh' # Ш
86
+ '\u042a': "\u02BC" # Ъ
87
+ '\u042c': "\u02BC" # Ь
88
+ '\u042D': 'E' # Э
89
+ '\u042E': 'Yu' # Ю
90
+ '\u042F': 'Ya' # Я
91
+
92
+ '\u0430': 'a' # а
93
+ '\u0431': 'b' # б
94
+ '\u0432': 'w' # в
95
+ '\u0433': 'g' # г
96
+ '\u0493': 'gh' # ғ
97
+ '\u0434': 'd' # д
98
+ '\u0435': 'e' # e
99
+ '\u0451': 'yo' # ё
100
+ '\u0436': 'zh' # ж
101
+ '\u0437': 'z' # з
102
+ '\u0438': 'i' # и
103
+ '\u0439': 'y' # й
104
+ '\u043A': 'k' # к
105
+ '\u049B': 'q' # қ
106
+ '\u043B': 'l' # л
107
+ '\u043C': 'm' # м
108
+ '\u043D': 'n' # н
109
+ '\u043E': 'o' # о
110
+ '\u043F': 'p' # п
111
+ '\u0440': 'r' # р
112
+ '\u0441': 's' # с
113
+ '\u0442': 't' # т
114
+ '\u0443': 'u' # у
115
+ '\u045E': 'ŭ' # ў
116
+ '\u0444': 'f' # ф
117
+ '\u0445': 'kh' # х
118
+ '\u04B3': 'h' # ҳ
119
+ '\u0446': 'ts' # ц
120
+ '\u0447': 'ch' # ч
121
+ '\u0448': 'sh' # ш
122
+ '\u044a': "\u02BC" # ъ
123
+ '\u044c': "\u02BC" # ь
124
+ '\u044D': 'e' # э
125
+ '\u044F': 'ya' # я
126
+ '\u044E': 'yu' # ю
127
+
@@ -0,0 +1,82 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2000
4
+ language: iso-639-2:uzb
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: TABLE OF CORRESPONDENCES CYRILLIC - ROMAN BGN/PCGN 2000 Agreement
8
+ description: |
9
+ In 1995, the Uzbek government adopted the Roman alphabet to replace the existing Cyrillic alphabet.
10
+ The presentation below provides a table of correspondences between the former Cyrillic alphabet and the
11
+ current Roman alphabet. When Uzbek Roman-alphabet spellings are not available, this table can be used to
12
+ convert Uzbek Cyrillic spellings. This table of correspondences supersedes the BGN/PCGN 1979 romanization
13
+ system for Uzbek.
14
+ url: http://transliteration.eki.ee/pdf/Uzbek.pdf
15
+ creation_date: 2000
16
+ confirmation_date: 2017-11
17
+
18
+ notes:
19
+ - The letter sequence ye is used initially, after the vowel characters 1, 6, 7, 10, 16, 21, 29, 30, 31, and 32, and after characters 11 and 28.
20
+ - The Unicode encoding of the apostrophe appearing in rows 27 and 28 is U+2019. The inverted apostrophe appearing in rows 32 (o‘) and 34 (g‘) is U+2018.
21
+ - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
22
+
23
+ tests:
24
+ # https://ru.wikipedia.org/wiki/Узбекский_язык
25
+ - source: Ўзбек ёзуви
26
+ expected: O‘zbek yozuwi
27
+ - source: Ўзбек тили
28
+ expected: O‘zbek tili
29
+ - source: катта
30
+ expected: katta
31
+ - source: куп
32
+ expected: kup
33
+ - source: кальта
34
+ expected: kal’ta
35
+ - source: Бори элға яхшилик қилғилки, мундин яхши йўқ Ким, дегайлар даҳр аро қолди фалондин яхшилик
36
+ expected: Bori elg‘a yaxshilik qilg‘ilki, mundin yaxshi yo‘q Kim, degaylar dahr aro qoldi falondin yaxshilik
37
+ - source: Бахр ул-худо
38
+ expected: Baxr ul-xudo
39
+ - source: Рисале-йи маариф-и Шейбани
40
+ expected: Risale-yi maarif-i Sheybani
41
+ - source: Карами Хакка нихоят йукдур
42
+ expected: Karami Xakka nixoyat yukdur
43
+ - source: Йахши
44
+ expected: Yaxshi
45
+ - source: Тутук белгись
46
+ expected: Tutuk belgis’
47
+ - source: |
48
+ Барча одамлар эркин, қадр-қиммат ва ҳуқуқларда тенг бўлиб туғиладилар.
49
+ Улар ақл ва виждон соҳибидирлар ва бир-бирлари ила биродарларча муомала қилишлари зарур.
50
+ expected: |
51
+ Barcha odamlar erkin, qadr-qimmat wa huquqlarda teng bo‘lib tug‘iladilar.
52
+ Ular aql wa wijdon sohibidirlar wa bir-birlari ila birodarlarcha muomala qilishlari zarur.
53
+ - source: ПАПАПАЧУКА Респект!
54
+ expected: PAPAPACHUKA Respekt!
55
+
56
+ map:
57
+ inherit: bgnpcgn-uzb-Cyrl-Latn-1979
58
+
59
+ rules:
60
+ # note[1]
61
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЙйЬь])\u0415
62
+ result: Ye
63
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЙйЬь])\u0435
64
+ result: ye
65
+
66
+ characters:
67
+ '\u0412': 'V' # В
68
+ '\u0492': "G\u2018" # Ғ
69
+ '\u0416': 'J' # Ж
70
+ '\u040E': "O\u2018" # Ў
71
+ '\u0425': 'X' # Х
72
+ '\u042a': "\u2019" # Ъ note[2]
73
+ '\u042c': "\u2019" # Ь note[2]
74
+
75
+ '\u0432': 'w' # в
76
+ '\u0493': "g\u2018" # ғ
77
+ '\u0436': 'j' # ж
78
+ '\u045E': "o\u2018" # ў
79
+ '\u0445': 'x' # х
80
+ '\u044a': "\u2019" # ъ note[2]
81
+ '\u044c': "\u2019" # ь note[2]
82
+
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bgnpcgn
3
3
  id: pinyin
4
- language: zho
4
+ language: iso-639-2:zho
5
5
  source_script: Hans
6
6
  destination_script: Latn
7
7
  name: ROMANIZATION OF CHINESE -- BGN/PCGN 1979 AGREEMENT
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: asm
4
+ language: iso-639-2:asm
5
5
  source_script: Beng
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Assamese Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: ben
4
+ language: iso-639-2:ben
5
5
  source_script: Beng
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Bengali Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: dev
4
+ language: iso-639-2:dev
5
5
  source_script: Deva
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Devanagri Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: gjr
4
+ language: iso-639-2:guj
5
5
  source_script: Gujr
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Gujrati Romanization
@@ -43,6 +43,8 @@ tests:
43
43
  expected: "srdār pṭēlē nkkī kryuṃ htuṃ kē kāśmīr bhārtnŏ hissŏ bnśē; 91 vrṣ phēlāṃ lāhŏr jēlmāṃ bhūkhhḍtāḷ drmiyān śhīd thyā htā jtīn dās"
44
44
  - source: "કોરોના પ્રોટોકોલ વચ્ચે આજે મેડિકલ પ્રવેશ પરીક્ષા લેવાશેઃ એન્ટ્રી ટચ ફ્રી રહેશે, એડમિટ કાર્ડ બાર કોડથી ચેક થશે"
45
45
  expected: "kŏrŏnā prŏṭŏkŏl vccē ājē mēḍikl prvēś prīkṣā lēvāśēḥ ēnṭrī ṭc phrī rhēśē, ēḍmiṭ kārḍ bār kŏḍthī cēk thśē"
46
+ - source: "૮૪૬૬૫૪૧૬૪૬૫૧"
47
+ expected: "846654164651"
46
48
 
47
49
 
48
50
 
@@ -163,4 +165,17 @@ map:
163
165
  '્': ''
164
166
  '઼': ''
165
167
  '।': '.'
166
- "‍": ''# Used for joining
168
+ "‍": ''# Used for joining
169
+
170
+ # digits
171
+
172
+ '૦': '0'
173
+ '૧': '1'
174
+ '૨': '2'
175
+ '૩': '3'
176
+ '૪': '4'
177
+ '૫': '5'
178
+ '૬': '6'
179
+ '૭': '7'
180
+ '૮': '8'
181
+ '૯': '9'
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: knd
4
+ language: iso-639-2:knd
5
5
  source_script: Knda
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Kannada Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: mlm
4
+ language: iso-639-2:mlm
5
5
  source_script: Mlym
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Malayalam Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: ori
4
+ language: iso-639-2:ori
5
5
  source_script: Orya
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Oriya Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: pnj
4
+ language: iso-639-2:pnj
5
5
  source_script: Guru
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Panjabi(Gurmukhi) Romanization