interscript 0.1.3 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (294) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +10 -11
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +62 -59
  5. data/lib/interscript/command.rb +3 -2
  6. data/lib/interscript/fs.rb +96 -0
  7. data/lib/interscript/mapping.rb +36 -17
  8. data/lib/interscript/opal.rb +196 -0
  9. data/lib/interscript/opal/entrypoint.rb +20 -0
  10. data/lib/interscript/opal/exports.rb +11 -0
  11. data/lib/interscript/opal/maps.js.erb +8 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/maps/acadsin-zho-Hani-Latn-2002.yaml +6 -2
  14. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  15. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  16. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  17. data/maps/alalc-asm-Deva-Latn-1997.yaml +259 -0
  18. data/maps/alalc-asm-Deva-Latn-2012.yaml +55 -0
  19. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  20. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  21. data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +7 -3
  22. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
  25. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  26. data/maps/alalc-ell-Grek-Latn-1997.yaml +7 -4
  27. data/maps/alalc-ell-Grek-Latn-2010.yaml +3 -5
  28. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  29. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  30. data/maps/alalc-hin-Deva-Latn-1997.yaml +303 -0
  31. data/maps/alalc-hin-Deva-Latn-2011.yaml +65 -0
  32. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  33. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  34. data/maps/alalc-kat-Geok-Latn-1997.yaml +2 -3
  35. data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
  36. data/maps/alalc-kor-Hang-Latn-1997.yaml +6 -2
  37. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  38. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  39. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  40. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  41. data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +1 -1
  42. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
  43. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  44. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  45. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  46. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  47. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  48. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  49. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  50. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  51. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  52. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  53. data/maps/alalc-san-Deva-Latn-2012.yaml +241 -0
  54. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  55. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  56. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
  57. data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +1 -1
  58. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  59. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  60. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  61. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  62. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
  63. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
  64. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
  65. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  66. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  67. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -2
  68. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
  69. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -3
  70. data/maps/bgn-kor-Hang-Latn-1943.yaml +8 -4
  71. data/maps/bgn-kor-Kore-Latn-1943.yaml +4 -4
  72. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
  73. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
  74. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  75. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +598 -0
  76. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
  77. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  78. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  79. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  80. data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +5 -1
  81. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +14 -10
  82. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
  83. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  84. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  85. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +8 -5
  86. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -2
  87. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  88. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  89. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  90. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
  91. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
  92. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -2
  93. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  94. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  95. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +18 -18
  96. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +3 -3
  97. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +3 -3
  98. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  99. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
  100. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +2 -2
  101. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  102. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  103. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +338 -0
  104. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +673 -0
  105. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  106. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  107. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  108. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
  109. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  110. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
  111. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  112. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  113. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -2
  114. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
  115. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  116. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  117. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  118. data/maps/{bgnpcgn-chn-Hans-Latn-1979.yaml → bgnpcgn-zho-Hans-Latn-1979.yaml} +1 -1
  119. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  120. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  121. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  122. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  123. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +173 -0
  124. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  125. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +175 -0
  126. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  127. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  128. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  129. data/maps/by-bel-Cyrl-Latn-1998.yaml +9 -5
  130. data/maps/by-bel-Cyrl-Latn-2007.yaml +4 -4
  131. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  132. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  133. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  134. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  135. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  136. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  137. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  138. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  139. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  140. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  141. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +7 -8
  142. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +6 -7
  143. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -3
  144. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -3
  145. data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -2
  146. data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +2 -2
  147. data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +2 -2
  148. data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +8 -4
  149. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  150. data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
  151. data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -6
  152. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -5
  153. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -2
  154. data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
  155. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
  156. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  157. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
  158. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  159. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  160. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  161. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  162. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +11 -8
  163. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -5
  164. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  165. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  166. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  167. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
  168. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +220 -0
  169. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  170. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  171. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  172. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  173. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  174. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  175. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  176. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  177. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  178. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  179. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  180. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  181. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  182. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  183. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -4
  184. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  185. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  186. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  187. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +2 -2
  188. data/maps/kp-kor-Hang-Latn-2002.yaml +29 -21
  189. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +3 -3
  190. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  191. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  192. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
  193. data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
  194. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
  195. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  196. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  197. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  198. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  199. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  200. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  201. data/maps/odni-ara-Arab-Latn-2015.yaml +315 -0
  202. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  203. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  204. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  205. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  206. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  207. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  208. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  209. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  210. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  211. data/maps/odni-kat-Geor-Latn-2015.yaml +2 -3
  212. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  213. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  214. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  215. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  216. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  217. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  218. data/maps/odni-prs-Arab-Latn-2015.yaml +228 -0
  219. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  220. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  221. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  222. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  223. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  224. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  225. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  226. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +6 -2
  227. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  228. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  229. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +5 -5
  230. data/maps/royin-tha-Thai-Latn-1968.yaml +9 -5
  231. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +5 -5
  232. data/maps/royin-tha-Thai-Latn-1999.yaml +8 -4
  233. data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
  234. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  235. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  236. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  237. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
  238. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
  239. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  240. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  241. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  242. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  243. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  244. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  245. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  246. data/maps/un-bel-Cyrl-Latn-2007.yaml +4 -4
  247. data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
  248. data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +44 -44
  249. data/maps/un-ell-Grek-Latn-1987-tl.yaml +3 -4
  250. data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -4
  251. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  252. data/maps/un-hin-Deva-Latn-2016.yaml +316 -0
  253. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  254. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  255. data/maps/un-mar-Deva-Latn-2016.yaml +102 -0
  256. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  257. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  258. data/maps/un-nep-Deva-Latn-1972.yaml +269 -0
  259. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  260. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  261. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  262. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  263. data/maps/un-rus-Cyrl-Latn-1987.yaml +2 -2
  264. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  265. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  266. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  267. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  268. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  269. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  270. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  271. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  272. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
  273. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
  274. data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +4 -4
  275. data/maps/var-kor-Hang-Latn-mr-1939.yaml +4 -4
  276. data/maps/var-kor-Kore-Hang-2013.yaml +2 -2
  277. data/maps/var-kor-Kore-Latn-mr-1939.yaml +2 -3
  278. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  279. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  280. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  281. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  282. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  283. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  284. data/maps/var-tha-Thai-Thai-phonemic.yaml +6 -6
  285. data/maps/var-tha-Thai-Zsym-ipa.yaml +13 -13
  286. data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +13 -9
  287. data/spec/interscript/filenames_spec.rb +21 -0
  288. data/spec/interscript_spec.rb +16 -5
  289. metadata +275 -27
  290. data/bin/interscript +0 -41
  291. data/bin/rspec +0 -29
  292. data/bin/setup +0 -8
  293. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  294. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 643981da933194b2464ea279e9d31b9fcd9d32519c5cd236ed805855c93755ad
4
- data.tar.gz: f54c4303bb02f0a873cfdf96287d78321648cee19c685bf338cb9f8e2f642c56
3
+ metadata.gz: 7813dba0d0cc7493ed3b9279c61283c8d305f1e05584a44aa700e9b72acb2f06
4
+ data.tar.gz: f4c87e24d7c2719b4f358198967d55e0c17d8aaac354311cb9eecc800a592b2d
5
5
  SHA512:
6
- metadata.gz: 2d8cfd0d60e2d41d8b1e31b4e61353b0bc7fd5ac4fc426d4304ccc86bc0bb6d84b4b4a2a6e44bb342afa6c20202a4bca4180a1f5037c73072e246038c6f36f1f
7
- data.tar.gz: 2a5fffac1de98702494f69d55b2de5200684195b0f7948619bfa2ae9f3f97810c731868f2550578f5ad97a9db9fa72d9c2abad24451437b7e08673dfc1cd97d8
6
+ metadata.gz: 8e23000fe8fb016dec9351241787608c892d7af48682259c10345ce417f94a626ea06a5bc8b1e7a3f084da8c0502d13b96bd06c9a53da31530f5c487b06fe4e9
7
+ data.tar.gz: b2d8cb122b2c1bbb2d989d832802a007a60ecc6d0f2984e323a983ec94ae9505664b98e0d9c33e0b2711f5fa31f4e657a720648dcb572915f69006a1719a9610
@@ -1,6 +1,7 @@
1
1
  = Interscript: Interoperable Script Conversion Systems, with a Ruby implementation
2
2
 
3
- image:https://github.com/interscript/interscript/workflows/test/badge.svg["Build Status", link="https://github.com/interscript/interscript/actions?workflow=test"]
3
+ image:https://github.com/interscript/interscript/workflows/test/badge.svg["Ruby build status", link="https://github.com/interscript/interscript/actions?workflow=test"]
4
+ image:https://github.com/interscript/interscript/workflows/js/badge.svg["JavaScript build status", link="https://github.com/interscript/interscript/actions?workflow=js"]
4
5
 
5
6
  == Introduction
6
7
 
@@ -22,9 +23,9 @@ The goal is to achieve interoperable transliteration schemes allowing quality co
22
23
  These transliteration systems are used in the demo:
23
24
 
24
25
  `bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian
25
- `iso-rus-Cyrl-Latn-iso9`:: ISO 9 Romanization of Russian
26
+ `iso-rus-Cyrl-Latn-9-1995`:: ISO 9 Romanization of Russian
26
27
  `icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian
27
- `bas-rus-Cyrl-Latn-bss`:: Bulgaria Academy of Science Streamlined System for Russian
28
+ `bas-rus-Cyrl-Latn-2017-bss`:: Bulgaria Academy of Science Streamlined System for Russian
28
29
 
29
30
  image:demo/20191118-interscript-demo-cast.gif["interscript screencast"]
30
31
 
@@ -51,9 +52,7 @@ Interscript depends on Python and the https://github.com/sequitur-g2p/sequitur-g
51
52
 
52
53
  [source,sh]
53
54
  ----
54
- pip3 install setuptools numpy
55
- curl -sSL -o sequitur-g2p.zip https://github.com/sequitur-g2p/sequitur-g2p/archive/806273f.zip
56
- pip3 install sequitur-g2p.zip
55
+ pip3 install -r requirments.txt
57
56
  ----
58
57
 
59
58
  Interscript depends on Ruby. Once you manage to install Ruby, it's easy.
@@ -95,7 +94,7 @@ interscript rus-Cyrl.txt \
95
94
  --output=bgnpcgn-rus-Latn.txt
96
95
 
97
96
  interscript rus-Cyrl.txt \
98
- --system=iso-rus-Cyrl-Latn-iso9 \
97
+ --system=iso-rus-Cyrl-Latn-9-1995 \
99
98
  --output=iso-rus-Latn.txt
100
99
 
101
100
  interscript rus-Cyrl.txt \
@@ -103,7 +102,7 @@ interscript rus-Cyrl.txt \
103
102
  --output=icao-rus-Latn.txt
104
103
 
105
104
  interscript rus-Cyrl.txt \
106
- --system=bas-rus-Cyrl-Latn-bss \
105
+ --system=bas-rus-Cyrl-Latn-2017-bss \
107
106
  --output=bas-rus-Latn.txt
108
107
  ----
109
108
 
@@ -149,7 +148,7 @@ tests:
149
148
 
150
149
  map:
151
150
  rules:
152
- - pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415 # Е after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
151
+ - pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415 # Е after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
153
152
  result: Ye
154
153
  - pattern: \b\u0415 # Е initially
155
154
  result: Ye
@@ -167,7 +166,7 @@ The subsection `rules` is placed under the `map` key. All rules are applied in o
167
166
 
168
167
  Each rule has `pattern` and `result` elements.
169
168
 
170
- Pattern is a regex expression. It should be representing as a string without `//` or `%r{}` parentheses. For example `\b\u0415`. In case a rule is depend on previous or next content, lookahead or lookbehind could be used. For example a rule with the pattern `(?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415` find every Е after upper or lower case symbols a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь.
169
+ Pattern is a regex expression. It should be representing as a string without `//` or `%r{}` parentheses. For example `\b\u0415`. In case a rule is depend on previous or next content, lookahead or lookbehind could be used. For example a rule with the pattern `(?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415` find every Е after upper or lower case symbols a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь.
171
170
 
172
171
  Result is a replacement a for pattern's match. It can contain a string, an Unicode characters specified by a hexadecimal number, a captured group reference. String with hexadecimal number or captured group reference should be double quoted. For example `"Y\u00eb"` or `"\\1\u00b7\\2"`. Captured group are referred by double backslash and group's number.
173
172
 
@@ -256,7 +255,7 @@ the system code identifying a script conversion system has the following compone
256
255
  e.g. `bgnpcgn-rus-Cyrl-Latn-1947`:
257
256
 
258
257
  `bgnpcgn`:: the authority identifier
259
- `rus`:: an ISO 639-2 3-letter language code that this system applies to
258
+ `rus`:: an ISO 639-{1,2,3,5} language code that this system applies to (For 639-2, use (T) code)
260
259
  `Cyrl`:: an ISO 15924 script code, identifying the source script
261
260
  `Latn`:: an ISO 15924 script code, identifying the target script
262
261
  `1947`:: an identifier unit within the authority to identify this system
@@ -0,0 +1 @@
1
+ {"pan_Deva2Latn_ALA_1997":"alalc-pan-Guru-Latn-1997","kor_Hang2Latn_ALA_1997":"alalc-kor-Hang-Latn-1997","asm_Deva2Latn_ALA_1997":"alalc-asm-Deva-Latn-1997","aze_Cyrl2Latn_ALA_1997":"alalc-aze-Cyrl-Latn-1997","ukr_Cyrl2Latn_GUP_1996":"ua-ukr-Cyrl-Latn-1996","tha_Thai2Latn_RIT_1968":"royin-tha-Thai-Latn-1968","bul_Cyrl2Latn_BGN_1952":"bgnpcgn-bul-Cyrl-Latn-1952","tam_Taml2Latn_ALA_1997":"alalc-tam-Taml-Latn-1997","kor_Hang2Latn_GKN_2002":"kp-kor-Hang-Latn-2002","ell_Grek2Latn_ELOT743_1996":"bgnpcgn-ell-Grek-Latn-1996","zho_Hani2Latn_AcadSin_2002":"acadsin-zho-Hani-Latn-2002","ara_Arab2Latn_SES_1930":"ses-ara-Arab-Latn-1930","tgk_Cyrl2Latn_BGN_1994":"bgnpcgn-tgk-Cyrl-Latn-1994","fas_Arab2Latn_BGN_1958":"bgnpcgn-fas-Arab-Latn-1956","sin_Sinh2Latn_ALA_1997":"alalc-sin-Sinh-Latn-1997","uas_Arab2Latn_BGN_2007":"bgnpcgn-urd-Arab-Latn-2007","ukr_Cyrl2Latn_ALA_1997":"alalc-ukr-Cyrl-Latn-1997","bak_Cyrl2Latn_BGN_2007":"bgnpcgn-bak-Cyrl-Latn-2007","tam_Taml2Latn_ALA_2011":"alalc-tam-Taml-Latn-2011","ara_Arab2Latn_BGN_1956":"bgnpcgn-ara-Arab-Latn-1956","ell_Grek2Latn_ALA_1997":"alalc-ell-Grek-Latn-1997","rus_Cyrl2Latn_GOST_1983":"gost-rus-Cyrl-Latn-16876-71-1983","mar_Deva2Latn_ALA_1997":"alalc-mar-Deva-Latn-1997","bel_Cyrl2Latn_ALA_1997":"alalc-bel-Cyrl-Latn-1997","kat_Geor2Latn_ALA_1997":"alalc-kat-Geor-Latn-1997","bul_Cyrl2Latn_ALA_1997":"alalc-bul-Cyrl-Latn-1997","ara_Arab2Latn_ALA_1997":"alalc-ara-Arab-Latn-1997","mon_Cyrl2Latn_ALA_1997":"alalc-mon-Cyrl-Latn-1997","div_Thaa2Latn_GMV_1988":"bgnpcgn-div-Thaa-Latn-1988","hin_Deva2Latn_ALA_1997":"alalc-hin-Deva-Latn-1997","bel_Cyrl2Latn_GBO_1998":"by-bel-Cyrl-Latn-1998","ukr_Cyrl2Latn_BGN_1965":"bgnpcgn-ukr-Cyrl-Latn-1965","rus_Cyrl2Latn_ALA_1997":"alalc-rus-Cyrl-Latn-1997","tir_Thai2Latn_RIT_2000":"royin-tha-Thai-Latn-1999","guj_Gujr2Latn_ALA_1997":"alalc-guj-Gujr-Latn-1997","tel_Telu2Latn_ALA_1997":"alalc-tel-Telu-Latn-1997","mkd_Cyrl2Latn_BGN_1981":"bgnpcgn-mkd-Cyrl-Latn-1981","ori_Orya2Latn_ALA_2011":"alalc-ori-Orya-Latn-2011","aze_Arab2Latn_ALA_1997":"alalc-aze-Arab-Latn-1997","ori_Orya2Latn_ALA_1997":"alalc-ori-Orya-Latn-1997","div_Thaa2Latn_ALA_1997":"alalc-div-Thaa-Latn-1997","rue_Cyrl2Latn_BGN_2016":"bgnpcgn-rue-Cyrl-Latn-2016","guj_Gujr2Latn_ALA_2011":"alalc-guj-Gujr-Latn-2011","kat_Geor2Latn_BGN_1981":"bgnpcgn-kat-Geor-Latn-1981","kor_Hang2Latn_MOCT_2000":"moct-kor-Hang-Latn-2000","sin_Sinh2Latn_ALA_2011":"alalc-sin-Sinh-Latn-2011","amh_Ethi2Latn_BGN_1967":"bgnpcgn-amh-Ethi-Latn-1967","srp_Cyrl2Latn_BGN_2005":"bgnpcgn-srp-Cyrl-Latn-2005","srp_Cyrl2Latn_ALA_1997":"alalc-srp-Cyrl-Latn-1997","mal_Mlym2Latn_ALA_2012":"alalc-mal-Mlym-Latn-2012","kat_Geor2Latn_GGG_2002":"ggg-kat-Geor-Latn-2002","mon_Cyrl2Latn_BGN_1964":"bgnpcgn-mon-Cyrl-Latn-1964","mal_Mlym2Latn_ALA_1997":"alalc-mal-Mlym-Latn-1997","ben_Beng2Latn_ALA_1997":"alalc-ben-Beng-Latn-1997","kor_Hang2Latn_MR_1939":"bgn-kor-Hang-Latn-1943","zho_Hani2Latn_GCH_1979":"sac-zho-Hans-Latn-1979","bul_Cyrl2Latn_BGN_2013":"bgnpcgn-bul-Cyrl-Latn-2013","ell_Grek2Latn_BGN_1962":"bgnpcgn-ell-Grek-Latn-1962","amh_Ethi2Latn_ALA_1997":"alalc-amh-Ethi-Latn-1997","pan_Deva2Latn_ALA_2011":"alalc-pan-Guru-Latn-2011","zho_Hani2Latn_WDG_1979":"var-zho-Hani-Latn-wd-1979","rus_Cyrl2Latn_BGN_1947":"bgnpcgn-rus-Cyrl-Latn-1947","bel_Cyrl2Latn_BGN_1979":"bgnpcgn-bel-Cyrl-Latn-1979","tat_Cyrl2Latn_BGN_2005":"bgnpcgn-tat-Cyrl-Latn-2007"}
@@ -1,56 +1,33 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "yaml"
4
3
  require "interscript/mapping"
5
4
 
6
5
  # Transliteration
7
6
  module Interscript
8
7
 
9
- class << self
10
- def root_path
11
- @root_path ||= Pathname.new(File.dirname(__dir__))
12
- end
13
-
14
- def transliterate_file(system_code, input_file, output_file, maps)
15
- input = File.read(input_file)
16
- output = transliterate(system_code, input, maps)
17
-
18
- File.open(output_file, 'w') do |f|
19
- f.puts(output)
20
- end
21
- puts "Output written to: #{output_file}"
22
- end
8
+ class InvalidSystemError < StandardError; end
9
+ class ExternalProcessNotRecognizedError < StandardError; end
10
+ class ExternalProcessUnavailableError < StandardError; end
23
11
 
24
- def import_python_modules
25
- begin
26
- pyimport :g2pwrapper
27
- rescue
28
- pyimport :sys
29
- sys.path.append(root_path.to_s+"/lib/")
30
- pyimport :g2pwrapper
31
- end
32
- end
12
+ if RUBY_ENGINE == 'opal'
13
+ require "interscript/opal"
14
+ extend Opal
15
+ else
16
+ require "interscript/fs"
17
+ extend Fs
18
+ end
33
19
 
34
- def external_process(process_name, string)
35
- import_python_modules
36
- case process_name
37
- when 'sequitur.pythainlp_lexicon'
38
- return g2pwrapper.transliterate('pythainlp_lexicon', string)
39
- when 'sequitur.wiktionary_phonemic'
40
- return g2pwrapper.transliterate('wiktionary_phonemic', string)
41
- else
42
- puts "Invalid Process"
43
- end
44
- end
20
+ class << self
45
21
 
46
22
  def transliterate(system_code, string, maps={})
47
- if (!maps.has_key?system_code)
23
+ system_code = map_resolve(system_code)
24
+
25
+ unless maps.has_key? system_code
48
26
  maps[system_code] = Interscript::Mapping.for(system_code)
49
27
  end
50
28
  # mapping = Interscript::Mapping.for(system_code)
51
29
  mapping = maps[system_code]
52
30
 
53
-
54
31
  # First, apply chained transliteration as specified in the list `chain`
55
32
  chain = mapping.chain.dup
56
33
  while chain.length > 0
@@ -63,17 +40,11 @@ module Interscript
63
40
  title_case = mapping.title_case
64
41
  downcase = mapping.downcase
65
42
 
66
- # charmap = mapping.characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
67
- # dictmap = mapping.dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
68
43
  charmap = mapping.characters_hash
69
44
  dictmap = mapping.dictionary_hash
70
45
  trie = mapping.dictionary_trie
71
46
 
72
- # Segmentation
73
- string = external_process(mapping.segmentation, string) if mapping.segmentation
74
-
75
- # Transliteration/Transcription
76
- string = external_process(mapping.transcription, string) if mapping.transcription
47
+ string = external_processing(mapping, string)
77
48
 
78
49
  pos = 0
79
50
  while pos < string.to_s.size
@@ -81,14 +52,15 @@ module Interscript
81
52
  wordmatch = ""
82
53
 
83
54
  # Using Trie, find the longest matching substring
84
- while (pos + m < string.to_s.size) && (trie.partial_word?string[pos..pos+m])
55
+ while (pos + m < string.to_s.size) && (trie.partial_word?string[pos..pos+m])
85
56
  wordmatch = string[pos..pos+m] if trie.word?string[pos..pos+m]
86
57
  m += 1
87
58
  end
59
+
88
60
  m = wordmatch.length
89
61
  if m > 0
90
62
  repl = dictmap[string[pos..pos+m-1]]
91
- string[pos..pos+m-1] = repl
63
+ string = sub_replace(string, pos, m, repl)
92
64
  pos += repl.length
93
65
  else
94
66
  pos += 1
@@ -109,32 +81,62 @@ module Interscript
109
81
  # offsets[pos] += result.size - match[0].size
110
82
  # end
111
83
  # end
84
+
112
85
  mapping.rules.each do |r|
113
- output.gsub!(/#{r['pattern']}/, r['result'])
86
+ next unless output
87
+ re = mkregexp(r["pattern"])
88
+ output = output.gsub(re, r["result"])
114
89
  end
115
90
 
116
91
  charmap.each do |k, v|
117
- while (match = output&.match(/#{k}/))
92
+ re = mkregexp(k)
93
+ while (match = output&.match(re))
118
94
  pos = match.offset(0).first
119
95
  result = !downcase && up_case_around?(output, pos) ? v.upcase : v
120
- result = result[0] if result.is_a?(Array) # if more than one, choose the first one
121
- output[pos, match[0].size] = add_separator(separator, pos, result)
96
+
97
+ # if more than one, choose the first one
98
+ result = result[0] if result.is_a?(Array)
99
+
100
+ output = sub_replace(
101
+ output,
102
+ pos,
103
+ match[0].size,
104
+ add_separator(separator, pos, result)
105
+ )
122
106
  end
123
107
  end
124
108
 
125
109
  mapping.postrules.each do |r|
126
- output.gsub!(/#{r['pattern']}/, r['result'])
110
+ next unless output
111
+ re = mkregexp(r["pattern"])
112
+ output = if r["result"] == "upcase"
113
+ output.gsub(re, &:upcase)
114
+ else
115
+ output.gsub(re, r["result"])
116
+ end
127
117
  end
128
118
 
129
- if output
130
- output.sub!(/^(.)/, &:upcase) if title_case
131
- if word_separator != ''
132
- output.gsub!(/#{word_separator}#{separator}/,word_separator)
133
- output.gsub!(/#{word_separator}(.)/, &:upcase) if title_case
119
+ return unless output
120
+
121
+ re = mkregexp('^(.)')
122
+ output = output.gsub(re, &:upcase) if title_case
123
+ if word_separator != ''
124
+ re = mkregexp("#{word_separator}#{separator}")
125
+ output = output.gsub(re, word_separator)
126
+
127
+ if title_case
128
+ re = mkregexp("#{word_separator}(.)")
129
+ output = output.gsub(re, &:upcase)
134
130
  end
135
131
  end
136
132
 
137
- output ? output.unicode_normalize : output
133
+ output.unicode_normalize
134
+ end
135
+
136
+ def map_resolve(map)
137
+ map = aliases[map] if aliases.key? map
138
+ raise ArgumentError, "Map #{map} doesn't exist" unless map_exist? map
139
+ map
138
140
  end
139
141
 
140
142
  private
@@ -147,11 +149,11 @@ module Interscript
147
149
  return false if string[pos] == string[pos].downcase
148
150
 
149
151
  i = pos - 1
150
- i -= 1 while i.positive? && string[i] !~ /[[:alpha:]]/
152
+ i -= 1 while i.positive? && string[i] !~ mkregexp('[[:alpha:]]')
151
153
  before = i >= 0 && i < pos ? string[i].to_s.strip : ''
152
154
 
153
155
  i = pos + 1
154
- i += 1 while i < string.size - 1 && string[i] !~ /[[:alpha:]]/
156
+ i += 1 while i < string.size - 1 && string[i] !~ mkregexp('[[:alpha:]]')
155
157
  after = i > pos ? string[i].to_s.strip : ''
156
158
 
157
159
  before_uc = !before.empty? && before == before.upcase
@@ -159,5 +161,6 @@ module Interscript
159
161
  # before_uc && (after.empty? || after_uc) || after_uc && (before.empty? || before_uc)
160
162
  before_uc || after_uc
161
163
  end
164
+
162
165
  end
163
166
  end
@@ -1,16 +1,17 @@
1
1
  require 'thor'
2
2
  require 'interscript'
3
-
3
+ require 'json'
4
4
  module Interscript
5
5
  # Command line interface
6
6
  class Command < Thor
7
7
  desc '<file>', 'Transliterate text'
8
8
  option :system, aliases: '-s', required: true, desc: 'Transliteration system'
9
9
  option :output, aliases: '-o', required: false, desc: 'Output file'
10
+ option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json'
10
11
 
11
12
  def translit(input)
12
13
  if options[:output]
13
- Interscript.transliterate_file(options[:system], input, options[:output])
14
+ Interscript.transliterate_file(options[:system], input, options[:output], JSON.parse(options[:map]))
14
15
  else
15
16
  puts Interscript.transliterate(options[:system], IO.read(input))
16
17
  end
@@ -0,0 +1,96 @@
1
+ require 'pathname'
2
+
3
+ module Interscript
4
+ module Fs
5
+ def sub_replace(string, pos, size, repl)
6
+ string[pos..pos + size - 1] = repl
7
+ string
8
+ end
9
+
10
+ def root_path
11
+ @root_path ||= Pathname.new(File.join(File.dirname(__dir__), ".."))
12
+ end
13
+
14
+ def transliterate_file(system_code, input_file, output_file, maps={})
15
+ input = File.read(input_file)
16
+ output = transliterate(system_code, input, maps)
17
+
18
+ File.open(output_file, 'w') do |f|
19
+ f.puts(output)
20
+ end
21
+
22
+ puts "Output written to: #{output_file}"
23
+ output_file
24
+ end
25
+
26
+ def import_python_modules
27
+ begin
28
+ pyimport :g2pwrapper
29
+ rescue
30
+ pyimport :sys
31
+ sys.path.append(root_path.to_s + "/lib/")
32
+ pyimport :g2pwrapper
33
+ end
34
+ end
35
+
36
+ def external_process(process_name, string)
37
+ import_python_modules
38
+
39
+ case process_name
40
+ when 'sequitur.pythainlp_lexicon'
41
+ return g2pwrapper.transliterate('pythainlp_lexicon', string)
42
+ when 'sequitur.wiktionary_phonemic'
43
+ return g2pwrapper.transliterate('wiktionary_phonemic', string)
44
+ else
45
+ raise ExternalProcessNotRecognizedError.new
46
+ end
47
+
48
+ rescue
49
+ raise ExternalProcessUnavailableError.new
50
+ end
51
+
52
+ def external_processing(mapping, string)
53
+ # Segmentation
54
+ string = external_process(mapping.segmentation, string) if mapping.segmentation
55
+
56
+ # Transliteration/Transcription
57
+ string = external_process(mapping.transcription, string) if mapping.transcription
58
+
59
+ string
60
+ end
61
+
62
+ def aliases (refresh: false)
63
+ file = root_path.join("./aliases.json").to_s
64
+ if !refresh && File.exist?(file)
65
+ JSON.load(File.read(file))
66
+ elsif !refresh && @aliases
67
+ @aliases
68
+ else
69
+ @aliases = {}
70
+ Dir[root_path.join('./maps/*.yaml').to_s].each do |yaml_file|
71
+ org_name = File.basename(yaml_file, ".yaml")
72
+ map = YAML.load_file(yaml_file)
73
+ (map["alias"] || {}).each do |k,v|
74
+ @aliases[v["code"]] = org_name
75
+ end
76
+ end
77
+
78
+ # Try to save it to a file, but not force it.
79
+ File.write("aliases.json", JSON.dump(@aliases)) rescue nil
80
+
81
+ @aliases
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def map_exist?(map)
88
+ File.exist?(root_path.join("./maps/" + map + ".yaml").to_s)
89
+ end
90
+
91
+ def mkregexp(regexpstring)
92
+ /#{regexpstring}/u
93
+ end
94
+
95
+ end
96
+ end
@@ -1,7 +1,8 @@
1
1
  require 'rambling-trie'
2
+ require 'yaml' unless RUBY_ENGINE == 'opal'
3
+ require 'json'
2
4
 
3
5
  module Interscript
4
- class InvalidSystemError < StandardError; end
5
6
 
6
7
  class Mapping
7
8
  attr_reader(
@@ -35,7 +36,10 @@ module Interscript
35
36
  def initialize(system_code, options = {})
36
37
  @system_code = system_code
37
38
  @depth = options.fetch(:depth, 0).to_i
38
- @system_path = options.fetch(:system_code, default_path)
39
+
40
+ unless RUBY_ENGINE == 'opal'
41
+ @system_path = options.fetch(:system_code, default_path)
42
+ end
39
43
 
40
44
  load_and_serialize_system_mappings
41
45
  end
@@ -45,10 +49,10 @@ module Interscript
45
49
  end
46
50
 
47
51
  def load_and_serialize_system_mappings
48
- if depth < 5
49
- mappings = load_system_mappings
50
- serialize_system_mappings(mappings)
51
- end
52
+ return if depth >= 5
53
+
54
+ mappings = load_system_mappings
55
+ serialize_system_mappings(mappings)
52
56
  end
53
57
 
54
58
  private
@@ -64,6 +68,18 @@ module Interscript
64
68
  end
65
69
 
66
70
  def load_system_mappings
71
+ if RUBY_ENGINE == 'opal'
72
+ load_opal_mappings
73
+ else
74
+ load_fs_mappings
75
+ end
76
+ end
77
+
78
+ def load_opal_mappings
79
+ JSON.parse(`Opal.global.InterscriptMaps[#{system_code}]`)
80
+ end
81
+
82
+ def load_fs_mappings
67
83
  YAML.load_file(system_path.join(system_code_file))
68
84
  rescue Errno::ENOENT
69
85
  raise Interscript::InvalidSystemError.new("No system mappings found")
@@ -100,24 +116,27 @@ module Interscript
100
116
 
101
117
  def include_inherited_mappings(mappings)
102
118
  inherit_systems = [].push(mappings["map"]["inherit"]).flatten
103
- for inherit_system in inherit_systems do
104
- if (inherit_system)
105
- inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
106
-
107
- @rules = [inherited_mapping.rules, rules].flatten
108
- @postrules = [inherited_mapping.postrules, postrules].flatten
109
- @characters = (inherited_mapping.characters|| {}).merge(characters)
110
- @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
111
- end
119
+
120
+ inherit_systems.each do |inherit_system|
121
+ next unless inherit_system
122
+
123
+ inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
124
+
125
+ @rules = [rules, inherited_mapping.rules].flatten
126
+ @postrules = [inherited_mapping.postrules, postrules].flatten
127
+ @characters = (inherited_mapping.characters|| {}).merge(characters)
128
+ @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
112
129
  end
130
+
131
+ @characters.compact! # the feature to ignore characters from inherited
113
132
  end
114
133
 
115
- def build_hashes()
134
+ def build_hashes
116
135
  @characters_hash = characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
117
136
  @dictionary_hash = dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
118
137
  end
119
138
 
120
- def build_trie()
139
+ def build_trie
121
140
  @dictionary_trie = Rambling::Trie.create
122
141
  dictionary_trie.concat dictionary.keys
123
142
  end