interscript 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (251) hide show
  1. checksums.yaml +4 -4
  2. data/lib/interscript.rb +10 -6
  3. data/lib/interscript/fs.rb +0 -2
  4. data/lib/interscript/mapping.rb +1 -1
  5. data/lib/interscript/opal.rb +38 -8
  6. data/lib/interscript/opal/entrypoint.rb +12 -0
  7. data/lib/interscript/opal/map_translate.rb +7 -0
  8. data/lib/interscript/version.rb +1 -1
  9. data/maps/acadsin-zho-Hani-Latn-2002.yaml +5 -1
  10. data/maps/alalc-amh-Ethi-Latn-1997.yaml +5 -1
  11. data/maps/alalc-amh-Ethi-Latn-2011.yaml +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -1
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +9 -3
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  15. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +5 -1
  16. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +5 -1
  17. data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
  18. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
  19. data/maps/alalc-ell-Grek-Latn-1997.yaml +5 -1
  20. data/maps/alalc-ell-Grek-Latn-2010.yaml +1 -2
  21. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  22. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  23. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  24. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  25. data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -1
  26. data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
  27. data/maps/alalc-kor-Hang-Latn-1997.yaml +5 -1
  28. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  29. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  30. data/maps/alalc-mar-Deva-Latn-1997.yaml +21 -2
  31. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  32. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +1 -1
  33. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
  34. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  35. data/maps/{alalc-pan-Deva-Latn-1997.yaml → alalc-pan-Guru-Latn-1997.yaml} +23 -4
  36. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  37. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  38. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  39. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  40. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +5 -1
  41. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +1 -1
  42. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  43. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  44. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  45. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
  46. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +1 -1
  47. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  48. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  49. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
  50. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
  51. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
  52. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -1
  53. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
  54. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -1
  55. data/maps/bgn-kor-Hang-Latn-1943.yaml +7 -3
  56. data/maps/bgn-kor-Kore-Latn-1943.yaml +3 -3
  57. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
  58. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
  59. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +5 -1
  60. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +5 -1
  61. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
  62. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +2 -2
  63. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +6 -2
  64. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +5 -1
  65. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +5 -1
  66. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
  67. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  68. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +5 -1
  69. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -1
  70. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -1
  71. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
  72. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
  73. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -1
  74. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +1 -1
  75. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +1 -1
  76. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +1 -1
  77. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
  78. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
  79. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  80. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +31 -1
  81. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  82. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  83. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  84. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  85. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
  86. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
  87. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  88. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  89. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -1
  90. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
  91. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  92. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  93. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +1 -1
  94. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +1 -1
  95. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +1 -1
  96. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +1 -1
  97. data/maps/{bis-gjr-Gujr-Latn-13194-1991.yaml → bis-guj-Gujr-Latn-13194-1991.yaml} +17 -2
  98. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +1 -1
  99. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +1 -1
  100. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +1 -1
  101. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +1 -1
  102. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +1 -1
  103. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +1 -1
  104. data/maps/by-bel-Cyrl-Latn-1998.yaml +5 -1
  105. data/maps/by-bel-Cyrl-Latn-2007.yaml +1 -1
  106. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  107. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  108. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  109. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  110. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  111. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  112. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  113. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  114. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  115. data/maps/dos-nep-Deva-Latn-1997.yaml +15 -1
  116. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +3 -3
  117. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +2 -2
  118. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -2
  119. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -2
  120. data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -1
  121. data/maps/gki-bel-Cyrl-Latn-1992.yaml +1 -1
  122. data/maps/gki-bel-Cyrl-Latn-2000.yaml +1 -1
  123. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +7 -3
  124. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  125. data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
  126. data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -1
  127. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -1
  128. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -1
  129. data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
  130. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
  131. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -1
  132. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
  133. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -1
  134. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  135. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  136. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  137. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +8 -4
  138. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -4
  139. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  140. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  141. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  142. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
  143. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  144. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  145. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  146. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  147. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  148. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  149. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  150. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  151. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  152. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  153. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  154. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  155. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  156. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  157. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  158. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -3
  159. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  160. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  161. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  162. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
  163. data/maps/kp-kor-Hang-Latn-2002.yaml +25 -17
  164. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +1 -1
  165. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
  166. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  167. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  168. data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
  169. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
  170. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +2 -2
  171. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +3 -3
  172. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +2 -2
  173. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +2 -2
  174. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  175. data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
  176. data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
  177. data/maps/odni-bul-Cyrl-Latn-2015.yaml +2 -2
  178. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  179. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  180. data/maps/odni-hin-Deva-Latn-2015.yaml +1 -1
  181. data/maps/odni-kat-Geor-Latn-2015.yaml +1 -1
  182. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +1 -1
  183. data/maps/odni-kir-Cyrl-Latn-2015.yaml +1 -1
  184. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  185. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +1 -1
  186. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +1 -1
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +1 -1
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +1 -1
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +1 -1
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +1 -1
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +1 -1
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -1
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +1 -1
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +5 -1
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +1 -1
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +5 -1
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
  200. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  201. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  202. data/maps/ses-ara-Arab-Latn-1930.yaml +5 -1
  203. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
  204. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
  205. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  206. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  207. data/maps/{ungegn-amh-Ethi-Latn-2016.yaml → un-amh-Ethi-Latn-2016.yaml} +51 -24
  208. data/maps/un-ara-Arab-Latn-1971.yaml +1 -1
  209. data/maps/un-ara-Arab-Latn-1972.yaml +1 -1
  210. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  211. data/maps/un-bel-Cyrl-Latn-2007.yaml +1 -1
  212. data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
  213. data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +2 -2
  214. data/maps/un-ell-Grek-Latn-1987-tl.yaml +2 -2
  215. data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -3
  216. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  217. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  218. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  219. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  220. data/maps/un-nep-Deva-Latn-1972.yaml +204 -17
  221. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  222. data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
  223. data/maps/un-ukr-Cyrl-Latn-1998.yaml +35 -12
  224. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  225. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  226. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
  227. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
  228. data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +1 -1
  229. data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
  230. data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
  231. data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -1
  232. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  233. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  234. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  235. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  236. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  237. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  238. data/maps/var-tha-Thai-Thai-phonemic.yaml +1 -1
  239. data/maps/var-tha-Thai-Zsym-ipa.yaml +1 -1
  240. data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +6 -2
  241. data/spec/interscript/filenames_spec.rb +384 -0
  242. data/spec/interscript_spec.rb +7 -4
  243. metadata +105 -26
  244. data/bin/interscript +0 -41
  245. data/bin/rspec +0 -29
  246. data/bin/setup +0 -8
  247. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  248. data/lib/interscript-opal.rb +0 -2
  249. data/lib/interscript/opal_map_translate.rb +0 -12
  250. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  251. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: bgnpcgn
3
3
  id: 1965
4
- language: ukr
4
+ language: iso-639-2:ukr
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: BGN/PCGN 1965 System
8
+ alias:
9
+ ogc11122:
10
+ code: ukr_Cyrl2Latn_BGN_1965
11
+ description: Ukrainian Board on Geographic Names/Permanent Committee on Geographical Names for British Official Use(PCGN) 1965 System
8
12
  url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
9
13
  creation_date: 1947
10
14
  confirmation_date: 2019-06
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bgnpcgn
3
3
  id: 2019
4
- language: ukr
4
+ language: iso-639-2:ukr
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: BGN/PCGN 2019 Agreement
@@ -113,96 +113,7 @@ tests:
113
113
  expected: Znamianka
114
114
 
115
115
  map:
116
- rules:
117
- - pattern: (?<=З|з)(Г|г)
118
- result: gh
119
- - pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
120
- result: Ye
121
- - pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
122
- result: ye
123
- - pattern: (?<!\b\u2019)\b\u0407 # Ї in initial position -> Yi
124
- result: Yi
125
- - pattern: (?<!\b\u2019)\b\u0457 # ї in initial position -> yi
126
- result: yi
127
- - pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
128
- result: "Y"
129
- - pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
130
- result: "y"
131
- - pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
132
- result: Yu
133
- - pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
134
- result: yu
135
- - pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
136
- result: Ya
137
- - pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
138
- result: ya
139
- - pattern: \b\u2019\b # remove ’
140
- result: ""
116
+ inherit: un-ukr-Cyrl-Latn-2012
141
117
 
142
118
  characters:
143
- "\u0410": "A" # А
144
- "\u0411": "B" # Б
145
- "\u0412": "V" # В
146
- "\u0413": "H" # Г
147
- "\u0490": "G" # Ґ
148
- "\u0414": "D" # Д
149
- "\u0415": "E" # Е
150
- "\u0404": "Ie" # Є
151
- "\u0416": "Zh" # Ж
152
- "\u0417": "Z" # З
153
- "\u0418": "Y" # И
154
- "\u0406": "I" # І
155
- "\u0407": "I" # Ї
156
- "\u0419": "I" # Й
157
- "\u041a": "K" # К
158
- "\u041b": "L" # Л
159
- "\u041c": "M" # М
160
- "\u041d": "N" # Н
161
- "\u041e": "O" # О
162
- "\u041f": "P" # П
163
- "\u0420": "R" # Р
164
- "\u0421": "S" # С
165
- "\u0422": "T" # Т
166
- "\u0423": "U" # У
167
- "\u0424": "F" # Ф
168
- "\u0425": "Kh" # Х
169
- "\u0426": "Ts" # Ц
170
- "\u0427": "Ch" # Ч
171
- "\u0428": "Sh" # Ш
172
- "\u0429": "Shch" # Щ
173
- "\u042e": "Iu" # Ю
174
- "\u042f": "Ia" # Я
175
- "\u042c": "" # Ь
176
- "\u0430": "a" # а
177
- "\u0431": "b" # б
178
- "\u0432": "v" # в
179
- "\u0433": "h" # г
180
- "\u0491": "g" # ґ
181
- "\u0434": "d" # д
182
- "\u0435": "e" # е
183
- "\u0454": "ie" # є
184
- "\u0436": "zh" # ж
185
- "\u0437": "z" # з
186
- "\u0438": "y" # и
187
- "\u0456": "i" # і
188
- "\u0457": "i" # ї
189
- "\u0439": "i" # й
190
- "\u043a": "k" # к
191
- "\u043b": "l" # л
192
- "\u043c": "m" # м
193
- "\u043d": "n" # н
194
- "\u043e": "o" # о
195
- "\u043f": "p" # п
196
- "\u0440": "r" # р
197
- "\u0441": "s" # с
198
- "\u0442": "t" # т
199
- "\u0443": "u" # у
200
- "\u0444": "f" # ф
201
- "\u0445": "kh" # х
202
- "\u0446": "ts" # ц
203
- "\u0447": "ch" # ч
204
- "\u0448": "sh" # ш
205
- "\u0449": "shch" # щ
206
- "\u044e": "iu" # ю
207
- "\u044f": "ia" # я
208
- "\u044c": "" # Ь
119
+ "\u0027": '' # ' ->
@@ -0,0 +1,127 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1979
4
+ language: iso-639-2:uzb
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BGN/PCGN Romanization System -- Uzbek Cyrillic (1979)
8
+ url: http://transliteration.eki.ee/pdf/Uzbek.pdf
9
+ creation_date: 1979
10
+
11
+ notes:
12
+ - At the beginning of a syllable, after a vowel, ъ or ь.
13
+
14
+ tests:
15
+ # https://ru.wikipedia.org/wiki/Узбекский_язык
16
+ - source: Ўзбек ёзуви
17
+ expected: Ŭzbek yozuwi
18
+ - source: Ўзбек тили
19
+ expected: Ŭzbek tili
20
+ - source: катта
21
+ expected: katta
22
+ - source: куп
23
+ expected: kup
24
+ - source: кальта
25
+ expected: kalʼta
26
+ - source: Бори элға яхшилик қилғилки, мундин яхши йўқ Ким, дегайлар даҳр аро қолди фалондин яхшилик
27
+ expected: Bori elgha yakhshilik qilghilki, mundin yakhshi yŭq Kim, degaylar dahr aro qoldi falondin yakhshilik
28
+ - source: Бахр ул-худо
29
+ expected: Bakhr ul-khudo
30
+ - source: Рисале-йи маариф-и Шейбани
31
+ expected: Risale-yi maarif-i Sheybani
32
+ - source: Карами Хакка нихоят йукдур
33
+ expected: Karami Khakka nikhoyat yukdur
34
+ - source: Йахши
35
+ expected: Yakhshi
36
+ - source: Тутук белгись
37
+ expected: Tutuk belgisʼ
38
+ - source: |
39
+ Барча одамлар эркин, қадр-қиммат ва ҳуқуқларда тенг бўлиб туғиладилар.
40
+ Улар ақл ва виждон соҳибидирлар ва бир-бирлари ила биродарларча муомала қилишлари зарур.
41
+ expected: |
42
+ Barcha odamlar erkin, qadr-qimmat wa huquqlarda teng bŭlib tughiladilar.
43
+ Ular aql wa wizhdon sohibidirlar wa bir-birlari ila birodarlarcha muomala qilishlari zarur.
44
+ - source: ПАПАПАЧУКА Респект!
45
+ expected: PAPAPACHUKA Respekt!
46
+
47
+ map:
48
+ rules:
49
+ # note[1]
50
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЪъЬь])\u0415
51
+ result: Ye
52
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЪъЬь])\u0435
53
+ result: ye
54
+
55
+ characters:
56
+ '\u0410': 'A' # А
57
+ '\u0411': 'B' # Б
58
+ '\u0412': 'W' # В
59
+ '\u0413': 'G' # Г
60
+ '\u0492': 'Gh' # Ғ
61
+ '\u0414': 'D' # Д
62
+ '\u0415': 'E' # Е
63
+ '\u0401': 'Yo' # Ё
64
+ '\u0416': 'Zh' # Ж
65
+ '\u0417': 'Z' # З
66
+ '\u0418': 'I' # И
67
+ '\u0419': 'Y' # Й
68
+ '\u041A': 'K' # К
69
+ '\u049A': 'Q' # Қ
70
+ '\u041B': 'L' # Л
71
+ '\u041C': 'M' # М
72
+ '\u041D': 'N' # Н
73
+ '\u041E': 'O' # О
74
+ '\u041F': 'P' # П
75
+ '\u0420': 'R' # Р
76
+ '\u0421': 'S' # С
77
+ '\u0422': 'T' # Т
78
+ '\u0423': 'U' # У
79
+ '\u040E': 'Ŭ' # Ў
80
+ '\u0424': 'F' # Ф
81
+ '\u0425': 'Kh' # Х
82
+ '\u04B2': 'H' # Ҳ
83
+ '\u0426': 'Ts' # Ц
84
+ '\u0427': 'Ch' # Ч
85
+ '\u0428': 'Sh' # Ш
86
+ '\u042a': "\u02BC" # Ъ
87
+ '\u042c': "\u02BC" # Ь
88
+ '\u042D': 'E' # Э
89
+ '\u042E': 'Yu' # Ю
90
+ '\u042F': 'Ya' # Я
91
+
92
+ '\u0430': 'a' # а
93
+ '\u0431': 'b' # б
94
+ '\u0432': 'w' # в
95
+ '\u0433': 'g' # г
96
+ '\u0493': 'gh' # ғ
97
+ '\u0434': 'd' # д
98
+ '\u0435': 'e' # e
99
+ '\u0451': 'yo' # ё
100
+ '\u0436': 'zh' # ж
101
+ '\u0437': 'z' # з
102
+ '\u0438': 'i' # и
103
+ '\u0439': 'y' # й
104
+ '\u043A': 'k' # к
105
+ '\u049B': 'q' # қ
106
+ '\u043B': 'l' # л
107
+ '\u043C': 'm' # м
108
+ '\u043D': 'n' # н
109
+ '\u043E': 'o' # о
110
+ '\u043F': 'p' # п
111
+ '\u0440': 'r' # р
112
+ '\u0441': 's' # с
113
+ '\u0442': 't' # т
114
+ '\u0443': 'u' # у
115
+ '\u045E': 'ŭ' # ў
116
+ '\u0444': 'f' # ф
117
+ '\u0445': 'kh' # х
118
+ '\u04B3': 'h' # ҳ
119
+ '\u0446': 'ts' # ц
120
+ '\u0447': 'ch' # ч
121
+ '\u0448': 'sh' # ш
122
+ '\u044a': "\u02BC" # ъ
123
+ '\u044c': "\u02BC" # ь
124
+ '\u044D': 'e' # э
125
+ '\u044F': 'ya' # я
126
+ '\u044E': 'yu' # ю
127
+
@@ -0,0 +1,82 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2000
4
+ language: iso-639-2:uzb
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: TABLE OF CORRESPONDENCES CYRILLIC - ROMAN BGN/PCGN 2000 Agreement
8
+ description: |
9
+ In 1995, the Uzbek government adopted the Roman alphabet to replace the existing Cyrillic alphabet.
10
+ The presentation below provides a table of correspondences between the former Cyrillic alphabet and the
11
+ current Roman alphabet. When Uzbek Roman-alphabet spellings are not available, this table can be used to
12
+ convert Uzbek Cyrillic spellings. This table of correspondences supersedes the BGN/PCGN 1979 romanization
13
+ system for Uzbek.
14
+ url: http://transliteration.eki.ee/pdf/Uzbek.pdf
15
+ creation_date: 2000
16
+ confirmation_date: 2017-11
17
+
18
+ notes:
19
+ - The letter sequence ye is used initially, after the vowel characters 1, 6, 7, 10, 16, 21, 29, 30, 31, and 32, and after characters 11 and 28.
20
+ - The Unicode encoding of the apostrophe appearing in rows 27 and 28 is U+2019. The inverted apostrophe appearing in rows 32 (o‘) and 34 (g‘) is U+2018.
21
+ - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
22
+
23
+ tests:
24
+ # https://ru.wikipedia.org/wiki/Узбекский_язык
25
+ - source: Ўзбек ёзуви
26
+ expected: O‘zbek yozuwi
27
+ - source: Ўзбек тили
28
+ expected: O‘zbek tili
29
+ - source: катта
30
+ expected: katta
31
+ - source: куп
32
+ expected: kup
33
+ - source: кальта
34
+ expected: kal’ta
35
+ - source: Бори элға яхшилик қилғилки, мундин яхши йўқ Ким, дегайлар даҳр аро қолди фалондин яхшилик
36
+ expected: Bori elg‘a yaxshilik qilg‘ilki, mundin yaxshi yo‘q Kim, degaylar dahr aro qoldi falondin yaxshilik
37
+ - source: Бахр ул-худо
38
+ expected: Baxr ul-xudo
39
+ - source: Рисале-йи маариф-и Шейбани
40
+ expected: Risale-yi maarif-i Sheybani
41
+ - source: Карами Хакка нихоят йукдур
42
+ expected: Karami Xakka nixoyat yukdur
43
+ - source: Йахши
44
+ expected: Yaxshi
45
+ - source: Тутук белгись
46
+ expected: Tutuk belgis’
47
+ - source: |
48
+ Барча одамлар эркин, қадр-қиммат ва ҳуқуқларда тенг бўлиб туғиладилар.
49
+ Улар ақл ва виждон соҳибидирлар ва бир-бирлари ила биродарларча муомала қилишлари зарур.
50
+ expected: |
51
+ Barcha odamlar erkin, qadr-qimmat wa huquqlarda teng bo‘lib tug‘iladilar.
52
+ Ular aql wa wijdon sohibidirlar wa bir-birlari ila birodarlarcha muomala qilishlari zarur.
53
+ - source: ПАПАПАЧУКА Респект!
54
+ expected: PAPAPACHUKA Respekt!
55
+
56
+ map:
57
+ inherit: bgnpcgn-uzb-Cyrl-Latn-1979
58
+
59
+ rules:
60
+ # note[1]
61
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЙйЬь])\u0415
62
+ result: Ye
63
+ - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЙйЬь])\u0435
64
+ result: ye
65
+
66
+ characters:
67
+ '\u0412': 'V' # В
68
+ '\u0492': "G\u2018" # Ғ
69
+ '\u0416': 'J' # Ж
70
+ '\u040E': "O\u2018" # Ў
71
+ '\u0425': 'X' # Х
72
+ '\u042a': "\u2019" # Ъ note[2]
73
+ '\u042c': "\u2019" # Ь note[2]
74
+
75
+ '\u0432': 'w' # в
76
+ '\u0493': "g\u2018" # ғ
77
+ '\u0436': 'j' # ж
78
+ '\u045E': "o\u2018" # ў
79
+ '\u0445': 'x' # х
80
+ '\u044a': "\u2019" # ъ note[2]
81
+ '\u044c': "\u2019" # ь note[2]
82
+
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bgnpcgn
3
3
  id: pinyin
4
- language: zho
4
+ language: iso-639-2:zho
5
5
  source_script: Hans
6
6
  destination_script: Latn
7
7
  name: ROMANIZATION OF CHINESE -- BGN/PCGN 1979 AGREEMENT
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: asm
4
+ language: iso-639-2:asm
5
5
  source_script: Beng
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Assamese Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: ben
4
+ language: iso-639-2:ben
5
5
  source_script: Beng
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Bengali Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: dev
4
+ language: iso-639-2:dev
5
5
  source_script: Deva
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Devanagri Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: gjr
4
+ language: iso-639-2:guj
5
5
  source_script: Gujr
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Gujrati Romanization
@@ -43,6 +43,8 @@ tests:
43
43
  expected: "srdār pṭēlē nkkī kryuṃ htuṃ kē kāśmīr bhārtnŏ hissŏ bnśē; 91 vrṣ phēlāṃ lāhŏr jēlmāṃ bhūkhhḍtāḷ drmiyān śhīd thyā htā jtīn dās"
44
44
  - source: "કોરોના પ્રોટોકોલ વચ્ચે આજે મેડિકલ પ્રવેશ પરીક્ષા લેવાશેઃ એન્ટ્રી ટચ ફ્રી રહેશે, એડમિટ કાર્ડ બાર કોડથી ચેક થશે"
45
45
  expected: "kŏrŏnā prŏṭŏkŏl vccē ājē mēḍikl prvēś prīkṣā lēvāśēḥ ēnṭrī ṭc phrī rhēśē, ēḍmiṭ kārḍ bār kŏḍthī cēk thśē"
46
+ - source: "૮૪૬૬૫૪૧૬૪૬૫૧"
47
+ expected: "846654164651"
46
48
 
47
49
 
48
50
 
@@ -163,4 +165,17 @@ map:
163
165
  '્': ''
164
166
  '઼': ''
165
167
  '।': '.'
166
- "‍": ''# Used for joining
168
+ "‍": ''# Used for joining
169
+
170
+ # digits
171
+
172
+ '૦': '0'
173
+ '૧': '1'
174
+ '૨': '2'
175
+ '૩': '3'
176
+ '૪': '4'
177
+ '૫': '5'
178
+ '૬': '6'
179
+ '૭': '7'
180
+ '૮': '8'
181
+ '૯': '9'
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: knd
4
+ language: iso-639-2:knd
5
5
  source_script: Knda
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Kannada Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: mlm
4
+ language: iso-639-2:mlm
5
5
  source_script: Mlym
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Malayalam Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: ori
4
+ language: iso-639-2:ori
5
5
  source_script: Orya
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Oriya Romanization
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: bis
3
3
  id: 1991
4
- language: pnj
4
+ language: iso-639-2:pnj
5
5
  source_script: Guru
6
6
  destination_script: Latn
7
7
  name: Indian script code for information interchange - ISCII - Panjabi(Gurmukhi) Romanization