interscript 0.1.3 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +10 -11
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +62 -59
  5. data/lib/interscript/command.rb +3 -2
  6. data/lib/interscript/fs.rb +96 -0
  7. data/lib/interscript/mapping.rb +36 -17
  8. data/lib/interscript/opal.rb +196 -0
  9. data/lib/interscript/opal/entrypoint.rb +20 -0
  10. data/lib/interscript/opal/exports.rb +11 -0
  11. data/lib/interscript/opal/maps.js.erb +8 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/maps/acadsin-zho-Hani-Latn-2002.yaml +6 -2
  14. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  15. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  16. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  17. data/maps/alalc-asm-Deva-Latn-1997.yaml +259 -0
  18. data/maps/alalc-asm-Deva-Latn-2012.yaml +55 -0
  19. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  20. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  21. data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +7 -3
  22. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
  25. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  26. data/maps/alalc-ell-Grek-Latn-1997.yaml +7 -4
  27. data/maps/alalc-ell-Grek-Latn-2010.yaml +3 -5
  28. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  29. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  30. data/maps/alalc-hin-Deva-Latn-1997.yaml +303 -0
  31. data/maps/alalc-hin-Deva-Latn-2011.yaml +65 -0
  32. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  33. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  34. data/maps/alalc-kat-Geok-Latn-1997.yaml +2 -3
  35. data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
  36. data/maps/alalc-kor-Hang-Latn-1997.yaml +6 -2
  37. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  38. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  39. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  40. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  41. data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +1 -1
  42. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
  43. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  44. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  45. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  46. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  47. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  48. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  49. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  50. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  51. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  52. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  53. data/maps/alalc-san-Deva-Latn-2012.yaml +241 -0
  54. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  55. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  56. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
  57. data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +1 -1
  58. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  59. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  60. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  61. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  62. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
  63. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
  64. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
  65. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  66. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  67. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -2
  68. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
  69. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -3
  70. data/maps/bgn-kor-Hang-Latn-1943.yaml +8 -4
  71. data/maps/bgn-kor-Kore-Latn-1943.yaml +4 -4
  72. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
  73. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
  74. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  75. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +598 -0
  76. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
  77. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  78. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  79. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  80. data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +5 -1
  81. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +14 -10
  82. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
  83. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  84. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  85. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +8 -5
  86. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -2
  87. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  88. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  89. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  90. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
  91. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
  92. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -2
  93. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  94. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  95. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +18 -18
  96. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +3 -3
  97. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +3 -3
  98. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  99. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
  100. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +2 -2
  101. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  102. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  103. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +338 -0
  104. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +673 -0
  105. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  106. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  107. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  108. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
  109. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  110. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
  111. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  112. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  113. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -2
  114. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
  115. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  116. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  117. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  118. data/maps/{bgnpcgn-chn-Hans-Latn-1979.yaml → bgnpcgn-zho-Hans-Latn-1979.yaml} +1 -1
  119. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  120. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  121. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  122. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  123. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +173 -0
  124. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  125. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +175 -0
  126. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  127. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  128. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  129. data/maps/by-bel-Cyrl-Latn-1998.yaml +9 -5
  130. data/maps/by-bel-Cyrl-Latn-2007.yaml +4 -4
  131. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  132. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  133. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  134. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  135. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  136. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  137. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  138. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  139. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  140. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  141. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +7 -8
  142. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +6 -7
  143. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -3
  144. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -3
  145. data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -2
  146. data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +2 -2
  147. data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +2 -2
  148. data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +8 -4
  149. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  150. data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
  151. data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -6
  152. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -5
  153. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -2
  154. data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
  155. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
  156. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  157. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
  158. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  159. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  160. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  161. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  162. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +11 -8
  163. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -5
  164. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  165. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  166. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  167. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
  168. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +220 -0
  169. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  170. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  171. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  172. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  173. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  174. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  175. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  176. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  177. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  178. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  179. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  180. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  181. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  182. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  183. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -4
  184. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  185. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  186. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  187. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +2 -2
  188. data/maps/kp-kor-Hang-Latn-2002.yaml +29 -21
  189. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +3 -3
  190. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  191. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  192. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
  193. data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
  194. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
  195. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  196. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  197. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  198. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  199. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  200. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  201. data/maps/odni-ara-Arab-Latn-2015.yaml +315 -0
  202. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  203. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  204. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  205. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  206. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  207. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  208. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  209. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  210. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  211. data/maps/odni-kat-Geor-Latn-2015.yaml +2 -3
  212. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  213. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  214. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  215. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  216. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  217. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  218. data/maps/odni-prs-Arab-Latn-2015.yaml +228 -0
  219. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  220. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  221. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  222. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  223. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  224. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  225. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  226. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +6 -2
  227. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  228. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  229. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +5 -5
  230. data/maps/royin-tha-Thai-Latn-1968.yaml +9 -5
  231. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +5 -5
  232. data/maps/royin-tha-Thai-Latn-1999.yaml +8 -4
  233. data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
  234. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  235. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  236. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  237. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
  238. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
  239. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  240. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  241. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  242. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  243. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  244. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  245. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  246. data/maps/un-bel-Cyrl-Latn-2007.yaml +4 -4
  247. data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
  248. data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +44 -44
  249. data/maps/un-ell-Grek-Latn-1987-tl.yaml +3 -4
  250. data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -4
  251. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  252. data/maps/un-hin-Deva-Latn-2016.yaml +316 -0
  253. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  254. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  255. data/maps/un-mar-Deva-Latn-2016.yaml +102 -0
  256. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  257. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  258. data/maps/un-nep-Deva-Latn-1972.yaml +269 -0
  259. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  260. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  261. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  262. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  263. data/maps/un-rus-Cyrl-Latn-1987.yaml +2 -2
  264. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  265. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  266. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  267. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  268. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  269. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  270. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  271. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  272. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
  273. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
  274. data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +4 -4
  275. data/maps/var-kor-Hang-Latn-mr-1939.yaml +4 -4
  276. data/maps/var-kor-Kore-Hang-2013.yaml +2 -2
  277. data/maps/var-kor-Kore-Latn-mr-1939.yaml +2 -3
  278. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  279. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  280. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  281. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  282. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  283. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  284. data/maps/var-tha-Thai-Thai-phonemic.yaml +6 -6
  285. data/maps/var-tha-Thai-Zsym-ipa.yaml +13 -13
  286. data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +13 -9
  287. data/spec/interscript/filenames_spec.rb +21 -0
  288. data/spec/interscript_spec.rb +16 -5
  289. metadata +275 -27
  290. data/bin/interscript +0 -41
  291. data/bin/rspec +0 -29
  292. data/bin/setup +0 -8
  293. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  294. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 643981da933194b2464ea279e9d31b9fcd9d32519c5cd236ed805855c93755ad
4
- data.tar.gz: f54c4303bb02f0a873cfdf96287d78321648cee19c685bf338cb9f8e2f642c56
3
+ metadata.gz: 7813dba0d0cc7493ed3b9279c61283c8d305f1e05584a44aa700e9b72acb2f06
4
+ data.tar.gz: f4c87e24d7c2719b4f358198967d55e0c17d8aaac354311cb9eecc800a592b2d
5
5
  SHA512:
6
- metadata.gz: 2d8cfd0d60e2d41d8b1e31b4e61353b0bc7fd5ac4fc426d4304ccc86bc0bb6d84b4b4a2a6e44bb342afa6c20202a4bca4180a1f5037c73072e246038c6f36f1f
7
- data.tar.gz: 2a5fffac1de98702494f69d55b2de5200684195b0f7948619bfa2ae9f3f97810c731868f2550578f5ad97a9db9fa72d9c2abad24451437b7e08673dfc1cd97d8
6
+ metadata.gz: 8e23000fe8fb016dec9351241787608c892d7af48682259c10345ce417f94a626ea06a5bc8b1e7a3f084da8c0502d13b96bd06c9a53da31530f5c487b06fe4e9
7
+ data.tar.gz: b2d8cb122b2c1bbb2d989d832802a007a60ecc6d0f2984e323a983ec94ae9505664b98e0d9c33e0b2711f5fa31f4e657a720648dcb572915f69006a1719a9610
@@ -1,6 +1,7 @@
1
1
  = Interscript: Interoperable Script Conversion Systems, with a Ruby implementation
2
2
 
3
- image:https://github.com/interscript/interscript/workflows/test/badge.svg["Build Status", link="https://github.com/interscript/interscript/actions?workflow=test"]
3
+ image:https://github.com/interscript/interscript/workflows/test/badge.svg["Ruby build status", link="https://github.com/interscript/interscript/actions?workflow=test"]
4
+ image:https://github.com/interscript/interscript/workflows/js/badge.svg["JavaScript build status", link="https://github.com/interscript/interscript/actions?workflow=js"]
4
5
 
5
6
  == Introduction
6
7
 
@@ -22,9 +23,9 @@ The goal is to achieve interoperable transliteration schemes allowing quality co
22
23
  These transliteration systems are used in the demo:
23
24
 
24
25
  `bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian
25
- `iso-rus-Cyrl-Latn-iso9`:: ISO 9 Romanization of Russian
26
+ `iso-rus-Cyrl-Latn-9-1995`:: ISO 9 Romanization of Russian
26
27
  `icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian
27
- `bas-rus-Cyrl-Latn-bss`:: Bulgaria Academy of Science Streamlined System for Russian
28
+ `bas-rus-Cyrl-Latn-2017-bss`:: Bulgaria Academy of Science Streamlined System for Russian
28
29
 
29
30
  image:demo/20191118-interscript-demo-cast.gif["interscript screencast"]
30
31
 
@@ -51,9 +52,7 @@ Interscript depends on Python and the https://github.com/sequitur-g2p/sequitur-g
51
52
 
52
53
  [source,sh]
53
54
  ----
54
- pip3 install setuptools numpy
55
- curl -sSL -o sequitur-g2p.zip https://github.com/sequitur-g2p/sequitur-g2p/archive/806273f.zip
56
- pip3 install sequitur-g2p.zip
55
+ pip3 install -r requirments.txt
57
56
  ----
58
57
 
59
58
  Interscript depends on Ruby. Once you manage to install Ruby, it's easy.
@@ -95,7 +94,7 @@ interscript rus-Cyrl.txt \
95
94
  --output=bgnpcgn-rus-Latn.txt
96
95
 
97
96
  interscript rus-Cyrl.txt \
98
- --system=iso-rus-Cyrl-Latn-iso9 \
97
+ --system=iso-rus-Cyrl-Latn-9-1995 \
99
98
  --output=iso-rus-Latn.txt
100
99
 
101
100
  interscript rus-Cyrl.txt \
@@ -103,7 +102,7 @@ interscript rus-Cyrl.txt \
103
102
  --output=icao-rus-Latn.txt
104
103
 
105
104
  interscript rus-Cyrl.txt \
106
- --system=bas-rus-Cyrl-Latn-bss \
105
+ --system=bas-rus-Cyrl-Latn-2017-bss \
107
106
  --output=bas-rus-Latn.txt
108
107
  ----
109
108
 
@@ -149,7 +148,7 @@ tests:
149
148
 
150
149
  map:
151
150
  rules:
152
- - pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415 # Е after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
151
+ - pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415 # Е after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
153
152
  result: Ye
154
153
  - pattern: \b\u0415 # Е initially
155
154
  result: Ye
@@ -167,7 +166,7 @@ The subsection `rules` is placed under the `map` key. All rules are applied in o
167
166
 
168
167
  Each rule has `pattern` and `result` elements.
169
168
 
170
- Pattern is a regex expression. It should be representing as a string without `//` or `%r{}` parentheses. For example `\b\u0415`. In case a rule is depend on previous or next content, lookahead or lookbehind could be used. For example a rule with the pattern `(?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415` find every Е after upper or lower case symbols a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь.
169
+ Pattern is a regex expression. It should be representing as a string without `//` or `%r{}` parentheses. For example `\b\u0415`. In case a rule is depend on previous or next content, lookahead or lookbehind could be used. For example a rule with the pattern `(?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415` find every Е after upper or lower case symbols a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь.
171
170
 
172
171
  Result is a replacement a for pattern's match. It can contain a string, an Unicode characters specified by a hexadecimal number, a captured group reference. String with hexadecimal number or captured group reference should be double quoted. For example `"Y\u00eb"` or `"\\1\u00b7\\2"`. Captured group are referred by double backslash and group's number.
173
172
 
@@ -256,7 +255,7 @@ the system code identifying a script conversion system has the following compone
256
255
  e.g. `bgnpcgn-rus-Cyrl-Latn-1947`:
257
256
 
258
257
  `bgnpcgn`:: the authority identifier
259
- `rus`:: an ISO 639-2 3-letter language code that this system applies to
258
+ `rus`:: an ISO 639-{1,2,3,5} language code that this system applies to (For 639-2, use (T) code)
260
259
  `Cyrl`:: an ISO 15924 script code, identifying the source script
261
260
  `Latn`:: an ISO 15924 script code, identifying the target script
262
261
  `1947`:: an identifier unit within the authority to identify this system
@@ -0,0 +1 @@
1
+ {"pan_Deva2Latn_ALA_1997":"alalc-pan-Guru-Latn-1997","kor_Hang2Latn_ALA_1997":"alalc-kor-Hang-Latn-1997","asm_Deva2Latn_ALA_1997":"alalc-asm-Deva-Latn-1997","aze_Cyrl2Latn_ALA_1997":"alalc-aze-Cyrl-Latn-1997","ukr_Cyrl2Latn_GUP_1996":"ua-ukr-Cyrl-Latn-1996","tha_Thai2Latn_RIT_1968":"royin-tha-Thai-Latn-1968","bul_Cyrl2Latn_BGN_1952":"bgnpcgn-bul-Cyrl-Latn-1952","tam_Taml2Latn_ALA_1997":"alalc-tam-Taml-Latn-1997","kor_Hang2Latn_GKN_2002":"kp-kor-Hang-Latn-2002","ell_Grek2Latn_ELOT743_1996":"bgnpcgn-ell-Grek-Latn-1996","zho_Hani2Latn_AcadSin_2002":"acadsin-zho-Hani-Latn-2002","ara_Arab2Latn_SES_1930":"ses-ara-Arab-Latn-1930","tgk_Cyrl2Latn_BGN_1994":"bgnpcgn-tgk-Cyrl-Latn-1994","fas_Arab2Latn_BGN_1958":"bgnpcgn-fas-Arab-Latn-1956","sin_Sinh2Latn_ALA_1997":"alalc-sin-Sinh-Latn-1997","uas_Arab2Latn_BGN_2007":"bgnpcgn-urd-Arab-Latn-2007","ukr_Cyrl2Latn_ALA_1997":"alalc-ukr-Cyrl-Latn-1997","bak_Cyrl2Latn_BGN_2007":"bgnpcgn-bak-Cyrl-Latn-2007","tam_Taml2Latn_ALA_2011":"alalc-tam-Taml-Latn-2011","ara_Arab2Latn_BGN_1956":"bgnpcgn-ara-Arab-Latn-1956","ell_Grek2Latn_ALA_1997":"alalc-ell-Grek-Latn-1997","rus_Cyrl2Latn_GOST_1983":"gost-rus-Cyrl-Latn-16876-71-1983","mar_Deva2Latn_ALA_1997":"alalc-mar-Deva-Latn-1997","bel_Cyrl2Latn_ALA_1997":"alalc-bel-Cyrl-Latn-1997","kat_Geor2Latn_ALA_1997":"alalc-kat-Geor-Latn-1997","bul_Cyrl2Latn_ALA_1997":"alalc-bul-Cyrl-Latn-1997","ara_Arab2Latn_ALA_1997":"alalc-ara-Arab-Latn-1997","mon_Cyrl2Latn_ALA_1997":"alalc-mon-Cyrl-Latn-1997","div_Thaa2Latn_GMV_1988":"bgnpcgn-div-Thaa-Latn-1988","hin_Deva2Latn_ALA_1997":"alalc-hin-Deva-Latn-1997","bel_Cyrl2Latn_GBO_1998":"by-bel-Cyrl-Latn-1998","ukr_Cyrl2Latn_BGN_1965":"bgnpcgn-ukr-Cyrl-Latn-1965","rus_Cyrl2Latn_ALA_1997":"alalc-rus-Cyrl-Latn-1997","tir_Thai2Latn_RIT_2000":"royin-tha-Thai-Latn-1999","guj_Gujr2Latn_ALA_1997":"alalc-guj-Gujr-Latn-1997","tel_Telu2Latn_ALA_1997":"alalc-tel-Telu-Latn-1997","mkd_Cyrl2Latn_BGN_1981":"bgnpcgn-mkd-Cyrl-Latn-1981","ori_Orya2Latn_ALA_2011":"alalc-ori-Orya-Latn-2011","aze_Arab2Latn_ALA_1997":"alalc-aze-Arab-Latn-1997","ori_Orya2Latn_ALA_1997":"alalc-ori-Orya-Latn-1997","div_Thaa2Latn_ALA_1997":"alalc-div-Thaa-Latn-1997","rue_Cyrl2Latn_BGN_2016":"bgnpcgn-rue-Cyrl-Latn-2016","guj_Gujr2Latn_ALA_2011":"alalc-guj-Gujr-Latn-2011","kat_Geor2Latn_BGN_1981":"bgnpcgn-kat-Geor-Latn-1981","kor_Hang2Latn_MOCT_2000":"moct-kor-Hang-Latn-2000","sin_Sinh2Latn_ALA_2011":"alalc-sin-Sinh-Latn-2011","amh_Ethi2Latn_BGN_1967":"bgnpcgn-amh-Ethi-Latn-1967","srp_Cyrl2Latn_BGN_2005":"bgnpcgn-srp-Cyrl-Latn-2005","srp_Cyrl2Latn_ALA_1997":"alalc-srp-Cyrl-Latn-1997","mal_Mlym2Latn_ALA_2012":"alalc-mal-Mlym-Latn-2012","kat_Geor2Latn_GGG_2002":"ggg-kat-Geor-Latn-2002","mon_Cyrl2Latn_BGN_1964":"bgnpcgn-mon-Cyrl-Latn-1964","mal_Mlym2Latn_ALA_1997":"alalc-mal-Mlym-Latn-1997","ben_Beng2Latn_ALA_1997":"alalc-ben-Beng-Latn-1997","kor_Hang2Latn_MR_1939":"bgn-kor-Hang-Latn-1943","zho_Hani2Latn_GCH_1979":"sac-zho-Hans-Latn-1979","bul_Cyrl2Latn_BGN_2013":"bgnpcgn-bul-Cyrl-Latn-2013","ell_Grek2Latn_BGN_1962":"bgnpcgn-ell-Grek-Latn-1962","amh_Ethi2Latn_ALA_1997":"alalc-amh-Ethi-Latn-1997","pan_Deva2Latn_ALA_2011":"alalc-pan-Guru-Latn-2011","zho_Hani2Latn_WDG_1979":"var-zho-Hani-Latn-wd-1979","rus_Cyrl2Latn_BGN_1947":"bgnpcgn-rus-Cyrl-Latn-1947","bel_Cyrl2Latn_BGN_1979":"bgnpcgn-bel-Cyrl-Latn-1979","tat_Cyrl2Latn_BGN_2005":"bgnpcgn-tat-Cyrl-Latn-2007"}
@@ -1,56 +1,33 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "yaml"
4
3
  require "interscript/mapping"
5
4
 
6
5
  # Transliteration
7
6
  module Interscript
8
7
 
9
- class << self
10
- def root_path
11
- @root_path ||= Pathname.new(File.dirname(__dir__))
12
- end
13
-
14
- def transliterate_file(system_code, input_file, output_file, maps)
15
- input = File.read(input_file)
16
- output = transliterate(system_code, input, maps)
17
-
18
- File.open(output_file, 'w') do |f|
19
- f.puts(output)
20
- end
21
- puts "Output written to: #{output_file}"
22
- end
8
+ class InvalidSystemError < StandardError; end
9
+ class ExternalProcessNotRecognizedError < StandardError; end
10
+ class ExternalProcessUnavailableError < StandardError; end
23
11
 
24
- def import_python_modules
25
- begin
26
- pyimport :g2pwrapper
27
- rescue
28
- pyimport :sys
29
- sys.path.append(root_path.to_s+"/lib/")
30
- pyimport :g2pwrapper
31
- end
32
- end
12
+ if RUBY_ENGINE == 'opal'
13
+ require "interscript/opal"
14
+ extend Opal
15
+ else
16
+ require "interscript/fs"
17
+ extend Fs
18
+ end
33
19
 
34
- def external_process(process_name, string)
35
- import_python_modules
36
- case process_name
37
- when 'sequitur.pythainlp_lexicon'
38
- return g2pwrapper.transliterate('pythainlp_lexicon', string)
39
- when 'sequitur.wiktionary_phonemic'
40
- return g2pwrapper.transliterate('wiktionary_phonemic', string)
41
- else
42
- puts "Invalid Process"
43
- end
44
- end
20
+ class << self
45
21
 
46
22
  def transliterate(system_code, string, maps={})
47
- if (!maps.has_key?system_code)
23
+ system_code = map_resolve(system_code)
24
+
25
+ unless maps.has_key? system_code
48
26
  maps[system_code] = Interscript::Mapping.for(system_code)
49
27
  end
50
28
  # mapping = Interscript::Mapping.for(system_code)
51
29
  mapping = maps[system_code]
52
30
 
53
-
54
31
  # First, apply chained transliteration as specified in the list `chain`
55
32
  chain = mapping.chain.dup
56
33
  while chain.length > 0
@@ -63,17 +40,11 @@ module Interscript
63
40
  title_case = mapping.title_case
64
41
  downcase = mapping.downcase
65
42
 
66
- # charmap = mapping.characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
67
- # dictmap = mapping.dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
68
43
  charmap = mapping.characters_hash
69
44
  dictmap = mapping.dictionary_hash
70
45
  trie = mapping.dictionary_trie
71
46
 
72
- # Segmentation
73
- string = external_process(mapping.segmentation, string) if mapping.segmentation
74
-
75
- # Transliteration/Transcription
76
- string = external_process(mapping.transcription, string) if mapping.transcription
47
+ string = external_processing(mapping, string)
77
48
 
78
49
  pos = 0
79
50
  while pos < string.to_s.size
@@ -81,14 +52,15 @@ module Interscript
81
52
  wordmatch = ""
82
53
 
83
54
  # Using Trie, find the longest matching substring
84
- while (pos + m < string.to_s.size) && (trie.partial_word?string[pos..pos+m])
55
+ while (pos + m < string.to_s.size) && (trie.partial_word?string[pos..pos+m])
85
56
  wordmatch = string[pos..pos+m] if trie.word?string[pos..pos+m]
86
57
  m += 1
87
58
  end
59
+
88
60
  m = wordmatch.length
89
61
  if m > 0
90
62
  repl = dictmap[string[pos..pos+m-1]]
91
- string[pos..pos+m-1] = repl
63
+ string = sub_replace(string, pos, m, repl)
92
64
  pos += repl.length
93
65
  else
94
66
  pos += 1
@@ -109,32 +81,62 @@ module Interscript
109
81
  # offsets[pos] += result.size - match[0].size
110
82
  # end
111
83
  # end
84
+
112
85
  mapping.rules.each do |r|
113
- output.gsub!(/#{r['pattern']}/, r['result'])
86
+ next unless output
87
+ re = mkregexp(r["pattern"])
88
+ output = output.gsub(re, r["result"])
114
89
  end
115
90
 
116
91
  charmap.each do |k, v|
117
- while (match = output&.match(/#{k}/))
92
+ re = mkregexp(k)
93
+ while (match = output&.match(re))
118
94
  pos = match.offset(0).first
119
95
  result = !downcase && up_case_around?(output, pos) ? v.upcase : v
120
- result = result[0] if result.is_a?(Array) # if more than one, choose the first one
121
- output[pos, match[0].size] = add_separator(separator, pos, result)
96
+
97
+ # if more than one, choose the first one
98
+ result = result[0] if result.is_a?(Array)
99
+
100
+ output = sub_replace(
101
+ output,
102
+ pos,
103
+ match[0].size,
104
+ add_separator(separator, pos, result)
105
+ )
122
106
  end
123
107
  end
124
108
 
125
109
  mapping.postrules.each do |r|
126
- output.gsub!(/#{r['pattern']}/, r['result'])
110
+ next unless output
111
+ re = mkregexp(r["pattern"])
112
+ output = if r["result"] == "upcase"
113
+ output.gsub(re, &:upcase)
114
+ else
115
+ output.gsub(re, r["result"])
116
+ end
127
117
  end
128
118
 
129
- if output
130
- output.sub!(/^(.)/, &:upcase) if title_case
131
- if word_separator != ''
132
- output.gsub!(/#{word_separator}#{separator}/,word_separator)
133
- output.gsub!(/#{word_separator}(.)/, &:upcase) if title_case
119
+ return unless output
120
+
121
+ re = mkregexp('^(.)')
122
+ output = output.gsub(re, &:upcase) if title_case
123
+ if word_separator != ''
124
+ re = mkregexp("#{word_separator}#{separator}")
125
+ output = output.gsub(re, word_separator)
126
+
127
+ if title_case
128
+ re = mkregexp("#{word_separator}(.)")
129
+ output = output.gsub(re, &:upcase)
134
130
  end
135
131
  end
136
132
 
137
- output ? output.unicode_normalize : output
133
+ output.unicode_normalize
134
+ end
135
+
136
+ def map_resolve(map)
137
+ map = aliases[map] if aliases.key? map
138
+ raise ArgumentError, "Map #{map} doesn't exist" unless map_exist? map
139
+ map
138
140
  end
139
141
 
140
142
  private
@@ -147,11 +149,11 @@ module Interscript
147
149
  return false if string[pos] == string[pos].downcase
148
150
 
149
151
  i = pos - 1
150
- i -= 1 while i.positive? && string[i] !~ /[[:alpha:]]/
152
+ i -= 1 while i.positive? && string[i] !~ mkregexp('[[:alpha:]]')
151
153
  before = i >= 0 && i < pos ? string[i].to_s.strip : ''
152
154
 
153
155
  i = pos + 1
154
- i += 1 while i < string.size - 1 && string[i] !~ /[[:alpha:]]/
156
+ i += 1 while i < string.size - 1 && string[i] !~ mkregexp('[[:alpha:]]')
155
157
  after = i > pos ? string[i].to_s.strip : ''
156
158
 
157
159
  before_uc = !before.empty? && before == before.upcase
@@ -159,5 +161,6 @@ module Interscript
159
161
  # before_uc && (after.empty? || after_uc) || after_uc && (before.empty? || before_uc)
160
162
  before_uc || after_uc
161
163
  end
164
+
162
165
  end
163
166
  end
@@ -1,16 +1,17 @@
1
1
  require 'thor'
2
2
  require 'interscript'
3
-
3
+ require 'json'
4
4
  module Interscript
5
5
  # Command line interface
6
6
  class Command < Thor
7
7
  desc '<file>', 'Transliterate text'
8
8
  option :system, aliases: '-s', required: true, desc: 'Transliteration system'
9
9
  option :output, aliases: '-o', required: false, desc: 'Output file'
10
+ option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json'
10
11
 
11
12
  def translit(input)
12
13
  if options[:output]
13
- Interscript.transliterate_file(options[:system], input, options[:output])
14
+ Interscript.transliterate_file(options[:system], input, options[:output], JSON.parse(options[:map]))
14
15
  else
15
16
  puts Interscript.transliterate(options[:system], IO.read(input))
16
17
  end
@@ -0,0 +1,96 @@
1
+ require 'pathname'
2
+
3
+ module Interscript
4
+ module Fs
5
+ def sub_replace(string, pos, size, repl)
6
+ string[pos..pos + size - 1] = repl
7
+ string
8
+ end
9
+
10
+ def root_path
11
+ @root_path ||= Pathname.new(File.join(File.dirname(__dir__), ".."))
12
+ end
13
+
14
+ def transliterate_file(system_code, input_file, output_file, maps={})
15
+ input = File.read(input_file)
16
+ output = transliterate(system_code, input, maps)
17
+
18
+ File.open(output_file, 'w') do |f|
19
+ f.puts(output)
20
+ end
21
+
22
+ puts "Output written to: #{output_file}"
23
+ output_file
24
+ end
25
+
26
+ def import_python_modules
27
+ begin
28
+ pyimport :g2pwrapper
29
+ rescue
30
+ pyimport :sys
31
+ sys.path.append(root_path.to_s + "/lib/")
32
+ pyimport :g2pwrapper
33
+ end
34
+ end
35
+
36
+ def external_process(process_name, string)
37
+ import_python_modules
38
+
39
+ case process_name
40
+ when 'sequitur.pythainlp_lexicon'
41
+ return g2pwrapper.transliterate('pythainlp_lexicon', string)
42
+ when 'sequitur.wiktionary_phonemic'
43
+ return g2pwrapper.transliterate('wiktionary_phonemic', string)
44
+ else
45
+ raise ExternalProcessNotRecognizedError.new
46
+ end
47
+
48
+ rescue
49
+ raise ExternalProcessUnavailableError.new
50
+ end
51
+
52
+ def external_processing(mapping, string)
53
+ # Segmentation
54
+ string = external_process(mapping.segmentation, string) if mapping.segmentation
55
+
56
+ # Transliteration/Transcription
57
+ string = external_process(mapping.transcription, string) if mapping.transcription
58
+
59
+ string
60
+ end
61
+
62
+ def aliases (refresh: false)
63
+ file = root_path.join("./aliases.json").to_s
64
+ if !refresh && File.exist?(file)
65
+ JSON.load(File.read(file))
66
+ elsif !refresh && @aliases
67
+ @aliases
68
+ else
69
+ @aliases = {}
70
+ Dir[root_path.join('./maps/*.yaml').to_s].each do |yaml_file|
71
+ org_name = File.basename(yaml_file, ".yaml")
72
+ map = YAML.load_file(yaml_file)
73
+ (map["alias"] || {}).each do |k,v|
74
+ @aliases[v["code"]] = org_name
75
+ end
76
+ end
77
+
78
+ # Try to save it to a file, but not force it.
79
+ File.write("aliases.json", JSON.dump(@aliases)) rescue nil
80
+
81
+ @aliases
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def map_exist?(map)
88
+ File.exist?(root_path.join("./maps/" + map + ".yaml").to_s)
89
+ end
90
+
91
+ def mkregexp(regexpstring)
92
+ /#{regexpstring}/u
93
+ end
94
+
95
+ end
96
+ end
@@ -1,7 +1,8 @@
1
1
  require 'rambling-trie'
2
+ require 'yaml' unless RUBY_ENGINE == 'opal'
3
+ require 'json'
2
4
 
3
5
  module Interscript
4
- class InvalidSystemError < StandardError; end
5
6
 
6
7
  class Mapping
7
8
  attr_reader(
@@ -35,7 +36,10 @@ module Interscript
35
36
  def initialize(system_code, options = {})
36
37
  @system_code = system_code
37
38
  @depth = options.fetch(:depth, 0).to_i
38
- @system_path = options.fetch(:system_code, default_path)
39
+
40
+ unless RUBY_ENGINE == 'opal'
41
+ @system_path = options.fetch(:system_code, default_path)
42
+ end
39
43
 
40
44
  load_and_serialize_system_mappings
41
45
  end
@@ -45,10 +49,10 @@ module Interscript
45
49
  end
46
50
 
47
51
  def load_and_serialize_system_mappings
48
- if depth < 5
49
- mappings = load_system_mappings
50
- serialize_system_mappings(mappings)
51
- end
52
+ return if depth >= 5
53
+
54
+ mappings = load_system_mappings
55
+ serialize_system_mappings(mappings)
52
56
  end
53
57
 
54
58
  private
@@ -64,6 +68,18 @@ module Interscript
64
68
  end
65
69
 
66
70
  def load_system_mappings
71
+ if RUBY_ENGINE == 'opal'
72
+ load_opal_mappings
73
+ else
74
+ load_fs_mappings
75
+ end
76
+ end
77
+
78
+ def load_opal_mappings
79
+ JSON.parse(`Opal.global.InterscriptMaps[#{system_code}]`)
80
+ end
81
+
82
+ def load_fs_mappings
67
83
  YAML.load_file(system_path.join(system_code_file))
68
84
  rescue Errno::ENOENT
69
85
  raise Interscript::InvalidSystemError.new("No system mappings found")
@@ -100,24 +116,27 @@ module Interscript
100
116
 
101
117
  def include_inherited_mappings(mappings)
102
118
  inherit_systems = [].push(mappings["map"]["inherit"]).flatten
103
- for inherit_system in inherit_systems do
104
- if (inherit_system)
105
- inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
106
-
107
- @rules = [inherited_mapping.rules, rules].flatten
108
- @postrules = [inherited_mapping.postrules, postrules].flatten
109
- @characters = (inherited_mapping.characters|| {}).merge(characters)
110
- @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
111
- end
119
+
120
+ inherit_systems.each do |inherit_system|
121
+ next unless inherit_system
122
+
123
+ inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
124
+
125
+ @rules = [rules, inherited_mapping.rules].flatten
126
+ @postrules = [inherited_mapping.postrules, postrules].flatten
127
+ @characters = (inherited_mapping.characters|| {}).merge(characters)
128
+ @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
112
129
  end
130
+
131
+ @characters.compact! # the feature to ignore characters from inherited
113
132
  end
114
133
 
115
- def build_hashes()
134
+ def build_hashes
116
135
  @characters_hash = characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
117
136
  @dictionary_hash = dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
118
137
  end
119
138
 
120
- def build_trie()
139
+ def build_trie
121
140
  @dictionary_trie = Rambling::Trie.create
122
141
  dictionary_trie.concat dictionary.keys
123
142
  end