interscript 0.1.3 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +10 -11
- data/aliases.json +1 -0
- data/lib/interscript.rb +62 -59
- data/lib/interscript/command.rb +3 -2
- data/lib/interscript/fs.rb +96 -0
- data/lib/interscript/mapping.rb +36 -17
- data/lib/interscript/opal.rb +196 -0
- data/lib/interscript/opal/entrypoint.rb +20 -0
- data/lib/interscript/opal/exports.rb +11 -0
- data/lib/interscript/opal/maps.js.erb +8 -0
- data/lib/interscript/version.rb +1 -1
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +6 -2
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
- data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
- data/maps/alalc-asm-Deva-Latn-1997.yaml +259 -0
- data/maps/alalc-asm-Deva-Latn-2012.yaml +55 -0
- data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
- data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +7 -3
- data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +7 -4
- data/maps/alalc-ell-Grek-Latn-2010.yaml +3 -5
- data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
- data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +303 -0
- data/maps/alalc-hin-Deva-Latn-2011.yaml +65 -0
- data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
- data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +2 -3
- data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
- data/maps/alalc-kor-Hang-Latn-1997.yaml +6 -2
- data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
- data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
- data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
- data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
- data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +1 -1
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
- data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
- data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
- data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
- data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
- data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
- data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
- data/maps/alalc-san-Deva-Latn-2012.yaml +241 -0
- data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
- data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
- data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +1 -1
- data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
- data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
- data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
- data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
- data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -2
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -3
- data/maps/bgn-kor-Hang-Latn-1943.yaml +8 -4
- data/maps/bgn-kor-Kore-Latn-1943.yaml +4 -4
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +598 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
- data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +5 -1
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +14 -10
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
- data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +8 -5
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -2
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
- data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -2
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +18 -18
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +3 -3
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +3 -3
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +2 -2
- data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +338 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +673 -0
- data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
- data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
- data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
- data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -2
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
- data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
- data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
- data/maps/{bgnpcgn-chn-Hans-Latn-1979.yaml → bgnpcgn-zho-Hans-Latn-1979.yaml} +1 -1
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
- data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
- data/maps/bis-kan-Kana-Latn-13194-1991.yaml +173 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +175 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +9 -5
- data/maps/by-bel-Cyrl-Latn-2007.yaml +4 -4
- data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
- data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
- data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
- data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
- data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
- data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
- data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
- data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
- data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
- data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +7 -8
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +6 -7
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -3
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -3
- data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -2
- data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +2 -2
- data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +2 -2
- data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +8 -4
- data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -6
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -5
- data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -2
- data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
- data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
- data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +11 -8
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -5
- data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
- data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
- data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
- data/maps/iso-kan-Kana-Latn-15919-2001.yaml +220 -0
- data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
- data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
- data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
- data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
- data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
- data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
- data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
- data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
- data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
- data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
- data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
- data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
- data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
- data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -4
- data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
- data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
- data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +2 -2
- data/maps/kp-kor-Hang-Latn-2002.yaml +29 -21
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +3 -3
- data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +163 -0
- data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +200 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
- data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
- data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
- data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +315 -0
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
- data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
- data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
- data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
- data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
- data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +2 -3
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
- data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
- data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
- data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
- data/maps/odni-prs-Arab-Latn-2015.yaml +228 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +6 -2
- data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +5 -5
- data/maps/royin-tha-Thai-Latn-1968.yaml +9 -5
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +5 -5
- data/maps/royin-tha-Thai-Latn-1999.yaml +8 -4
- data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
- data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
- data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
- data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
- data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
- data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
- data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
- data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
- data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
- data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +4 -4
- data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
- data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +44 -44
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +3 -4
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -4
- data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +316 -0
- data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
- data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +102 -0
- data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
- data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
- data/maps/un-nep-Deva-Latn-1972.yaml +269 -0
- data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
- data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
- data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
- data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +2 -2
- data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
- data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
- data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
- data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
- data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
- data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
- data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +4 -4
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +4 -4
- data/maps/var-kor-Kore-Hang-2013.yaml +2 -2
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +2 -3
- data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
- data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
- data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
- data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
- data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
- data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +6 -6
- data/maps/var-tha-Thai-Zsym-ipa.yaml +13 -13
- data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +13 -9
- data/spec/interscript/filenames_spec.rb +21 -0
- data/spec/interscript_spec.rb +16 -5
- metadata +275 -27
- data/bin/interscript +0 -41
- data/bin/rspec +0 -29
- data/bin/setup +0 -8
- data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
- data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7813dba0d0cc7493ed3b9279c61283c8d305f1e05584a44aa700e9b72acb2f06
|
|
4
|
+
data.tar.gz: f4c87e24d7c2719b4f358198967d55e0c17d8aaac354311cb9eecc800a592b2d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8e23000fe8fb016dec9351241787608c892d7af48682259c10345ce417f94a626ea06a5bc8b1e7a3f084da8c0502d13b96bd06c9a53da31530f5c487b06fe4e9
|
|
7
|
+
data.tar.gz: b2d8cb122b2c1bbb2d989d832802a007a60ecc6d0f2984e323a983ec94ae9505664b98e0d9c33e0b2711f5fa31f4e657a720648dcb572915f69006a1719a9610
|
data/README.adoc
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
= Interscript: Interoperable Script Conversion Systems, with a Ruby implementation
|
|
2
2
|
|
|
3
|
-
image:https://github.com/interscript/interscript/workflows/test/badge.svg["
|
|
3
|
+
image:https://github.com/interscript/interscript/workflows/test/badge.svg["Ruby build status", link="https://github.com/interscript/interscript/actions?workflow=test"]
|
|
4
|
+
image:https://github.com/interscript/interscript/workflows/js/badge.svg["JavaScript build status", link="https://github.com/interscript/interscript/actions?workflow=js"]
|
|
4
5
|
|
|
5
6
|
== Introduction
|
|
6
7
|
|
|
@@ -22,9 +23,9 @@ The goal is to achieve interoperable transliteration schemes allowing quality co
|
|
|
22
23
|
These transliteration systems are used in the demo:
|
|
23
24
|
|
|
24
25
|
`bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian
|
|
25
|
-
`iso-rus-Cyrl-Latn-
|
|
26
|
+
`iso-rus-Cyrl-Latn-9-1995`:: ISO 9 Romanization of Russian
|
|
26
27
|
`icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian
|
|
27
|
-
`bas-rus-Cyrl-Latn-bss`:: Bulgaria Academy of Science Streamlined System for Russian
|
|
28
|
+
`bas-rus-Cyrl-Latn-2017-bss`:: Bulgaria Academy of Science Streamlined System for Russian
|
|
28
29
|
|
|
29
30
|
image:demo/20191118-interscript-demo-cast.gif["interscript screencast"]
|
|
30
31
|
|
|
@@ -51,9 +52,7 @@ Interscript depends on Python and the https://github.com/sequitur-g2p/sequitur-g
|
|
|
51
52
|
|
|
52
53
|
[source,sh]
|
|
53
54
|
----
|
|
54
|
-
pip3 install
|
|
55
|
-
curl -sSL -o sequitur-g2p.zip https://github.com/sequitur-g2p/sequitur-g2p/archive/806273f.zip
|
|
56
|
-
pip3 install sequitur-g2p.zip
|
|
55
|
+
pip3 install -r requirments.txt
|
|
57
56
|
----
|
|
58
57
|
|
|
59
58
|
Interscript depends on Ruby. Once you manage to install Ruby, it's easy.
|
|
@@ -95,7 +94,7 @@ interscript rus-Cyrl.txt \
|
|
|
95
94
|
--output=bgnpcgn-rus-Latn.txt
|
|
96
95
|
|
|
97
96
|
interscript rus-Cyrl.txt \
|
|
98
|
-
--system=iso-rus-Cyrl-Latn-
|
|
97
|
+
--system=iso-rus-Cyrl-Latn-9-1995 \
|
|
99
98
|
--output=iso-rus-Latn.txt
|
|
100
99
|
|
|
101
100
|
interscript rus-Cyrl.txt \
|
|
@@ -103,7 +102,7 @@ interscript rus-Cyrl.txt \
|
|
|
103
102
|
--output=icao-rus-Latn.txt
|
|
104
103
|
|
|
105
104
|
interscript rus-Cyrl.txt \
|
|
106
|
-
--system=bas-rus-Cyrl-Latn-bss \
|
|
105
|
+
--system=bas-rus-Cyrl-Latn-2017-bss \
|
|
107
106
|
--output=bas-rus-Latn.txt
|
|
108
107
|
----
|
|
109
108
|
|
|
@@ -149,7 +148,7 @@ tests:
|
|
|
149
148
|
|
|
150
149
|
map:
|
|
151
150
|
rules:
|
|
152
|
-
- pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415 # Е after a, e,
|
|
151
|
+
- pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415 # Е after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
|
|
153
152
|
result: Ye
|
|
154
153
|
- pattern: \b\u0415 # Е initially
|
|
155
154
|
result: Ye
|
|
@@ -167,7 +166,7 @@ The subsection `rules` is placed under the `map` key. All rules are applied in o
|
|
|
167
166
|
|
|
168
167
|
Each rule has `pattern` and `result` elements.
|
|
169
168
|
|
|
170
|
-
Pattern is a regex expression. It should be representing as a string without `//` or `%r{}` parentheses. For example `\b\u0415`. In case a rule is depend on previous or next content, lookahead or lookbehind could be used. For example a rule with the pattern `(?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415` find every Е after upper or lower case symbols a, e,
|
|
169
|
+
Pattern is a regex expression. It should be representing as a string without `//` or `%r{}` parentheses. For example `\b\u0415`. In case a rule is depend on previous or next content, lookahead or lookbehind could be used. For example a rule with the pattern `(?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415` find every Е after upper or lower case symbols a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь.
|
|
171
170
|
|
|
172
171
|
Result is a replacement a for pattern's match. It can contain a string, an Unicode characters specified by a hexadecimal number, a captured group reference. String with hexadecimal number or captured group reference should be double quoted. For example `"Y\u00eb"` or `"\\1\u00b7\\2"`. Captured group are referred by double backslash and group's number.
|
|
173
172
|
|
|
@@ -256,7 +255,7 @@ the system code identifying a script conversion system has the following compone
|
|
|
256
255
|
e.g. `bgnpcgn-rus-Cyrl-Latn-1947`:
|
|
257
256
|
|
|
258
257
|
`bgnpcgn`:: the authority identifier
|
|
259
|
-
`rus`:: an ISO 639-2
|
|
258
|
+
`rus`:: an ISO 639-{1,2,3,5} language code that this system applies to (For 639-2, use (T) code)
|
|
260
259
|
`Cyrl`:: an ISO 15924 script code, identifying the source script
|
|
261
260
|
`Latn`:: an ISO 15924 script code, identifying the target script
|
|
262
261
|
`1947`:: an identifier unit within the authority to identify this system
|
data/aliases.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"pan_Deva2Latn_ALA_1997":"alalc-pan-Guru-Latn-1997","kor_Hang2Latn_ALA_1997":"alalc-kor-Hang-Latn-1997","asm_Deva2Latn_ALA_1997":"alalc-asm-Deva-Latn-1997","aze_Cyrl2Latn_ALA_1997":"alalc-aze-Cyrl-Latn-1997","ukr_Cyrl2Latn_GUP_1996":"ua-ukr-Cyrl-Latn-1996","tha_Thai2Latn_RIT_1968":"royin-tha-Thai-Latn-1968","bul_Cyrl2Latn_BGN_1952":"bgnpcgn-bul-Cyrl-Latn-1952","tam_Taml2Latn_ALA_1997":"alalc-tam-Taml-Latn-1997","kor_Hang2Latn_GKN_2002":"kp-kor-Hang-Latn-2002","ell_Grek2Latn_ELOT743_1996":"bgnpcgn-ell-Grek-Latn-1996","zho_Hani2Latn_AcadSin_2002":"acadsin-zho-Hani-Latn-2002","ara_Arab2Latn_SES_1930":"ses-ara-Arab-Latn-1930","tgk_Cyrl2Latn_BGN_1994":"bgnpcgn-tgk-Cyrl-Latn-1994","fas_Arab2Latn_BGN_1958":"bgnpcgn-fas-Arab-Latn-1956","sin_Sinh2Latn_ALA_1997":"alalc-sin-Sinh-Latn-1997","uas_Arab2Latn_BGN_2007":"bgnpcgn-urd-Arab-Latn-2007","ukr_Cyrl2Latn_ALA_1997":"alalc-ukr-Cyrl-Latn-1997","bak_Cyrl2Latn_BGN_2007":"bgnpcgn-bak-Cyrl-Latn-2007","tam_Taml2Latn_ALA_2011":"alalc-tam-Taml-Latn-2011","ara_Arab2Latn_BGN_1956":"bgnpcgn-ara-Arab-Latn-1956","ell_Grek2Latn_ALA_1997":"alalc-ell-Grek-Latn-1997","rus_Cyrl2Latn_GOST_1983":"gost-rus-Cyrl-Latn-16876-71-1983","mar_Deva2Latn_ALA_1997":"alalc-mar-Deva-Latn-1997","bel_Cyrl2Latn_ALA_1997":"alalc-bel-Cyrl-Latn-1997","kat_Geor2Latn_ALA_1997":"alalc-kat-Geor-Latn-1997","bul_Cyrl2Latn_ALA_1997":"alalc-bul-Cyrl-Latn-1997","ara_Arab2Latn_ALA_1997":"alalc-ara-Arab-Latn-1997","mon_Cyrl2Latn_ALA_1997":"alalc-mon-Cyrl-Latn-1997","div_Thaa2Latn_GMV_1988":"bgnpcgn-div-Thaa-Latn-1988","hin_Deva2Latn_ALA_1997":"alalc-hin-Deva-Latn-1997","bel_Cyrl2Latn_GBO_1998":"by-bel-Cyrl-Latn-1998","ukr_Cyrl2Latn_BGN_1965":"bgnpcgn-ukr-Cyrl-Latn-1965","rus_Cyrl2Latn_ALA_1997":"alalc-rus-Cyrl-Latn-1997","tir_Thai2Latn_RIT_2000":"royin-tha-Thai-Latn-1999","guj_Gujr2Latn_ALA_1997":"alalc-guj-Gujr-Latn-1997","tel_Telu2Latn_ALA_1997":"alalc-tel-Telu-Latn-1997","mkd_Cyrl2Latn_BGN_1981":"bgnpcgn-mkd-Cyrl-Latn-1981","ori_Orya2Latn_ALA_2011":"alalc-ori-Orya-Latn-2011","aze_Arab2Latn_ALA_1997":"alalc-aze-Arab-Latn-1997","ori_Orya2Latn_ALA_1997":"alalc-ori-Orya-Latn-1997","div_Thaa2Latn_ALA_1997":"alalc-div-Thaa-Latn-1997","rue_Cyrl2Latn_BGN_2016":"bgnpcgn-rue-Cyrl-Latn-2016","guj_Gujr2Latn_ALA_2011":"alalc-guj-Gujr-Latn-2011","kat_Geor2Latn_BGN_1981":"bgnpcgn-kat-Geor-Latn-1981","kor_Hang2Latn_MOCT_2000":"moct-kor-Hang-Latn-2000","sin_Sinh2Latn_ALA_2011":"alalc-sin-Sinh-Latn-2011","amh_Ethi2Latn_BGN_1967":"bgnpcgn-amh-Ethi-Latn-1967","srp_Cyrl2Latn_BGN_2005":"bgnpcgn-srp-Cyrl-Latn-2005","srp_Cyrl2Latn_ALA_1997":"alalc-srp-Cyrl-Latn-1997","mal_Mlym2Latn_ALA_2012":"alalc-mal-Mlym-Latn-2012","kat_Geor2Latn_GGG_2002":"ggg-kat-Geor-Latn-2002","mon_Cyrl2Latn_BGN_1964":"bgnpcgn-mon-Cyrl-Latn-1964","mal_Mlym2Latn_ALA_1997":"alalc-mal-Mlym-Latn-1997","ben_Beng2Latn_ALA_1997":"alalc-ben-Beng-Latn-1997","kor_Hang2Latn_MR_1939":"bgn-kor-Hang-Latn-1943","zho_Hani2Latn_GCH_1979":"sac-zho-Hans-Latn-1979","bul_Cyrl2Latn_BGN_2013":"bgnpcgn-bul-Cyrl-Latn-2013","ell_Grek2Latn_BGN_1962":"bgnpcgn-ell-Grek-Latn-1962","amh_Ethi2Latn_ALA_1997":"alalc-amh-Ethi-Latn-1997","pan_Deva2Latn_ALA_2011":"alalc-pan-Guru-Latn-2011","zho_Hani2Latn_WDG_1979":"var-zho-Hani-Latn-wd-1979","rus_Cyrl2Latn_BGN_1947":"bgnpcgn-rus-Cyrl-Latn-1947","bel_Cyrl2Latn_BGN_1979":"bgnpcgn-bel-Cyrl-Latn-1979","tat_Cyrl2Latn_BGN_2005":"bgnpcgn-tat-Cyrl-Latn-2007"}
|
data/lib/interscript.rb
CHANGED
|
@@ -1,56 +1,33 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "yaml"
|
|
4
3
|
require "interscript/mapping"
|
|
5
4
|
|
|
6
5
|
# Transliteration
|
|
7
6
|
module Interscript
|
|
8
7
|
|
|
9
|
-
class
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def transliterate_file(system_code, input_file, output_file, maps)
|
|
15
|
-
input = File.read(input_file)
|
|
16
|
-
output = transliterate(system_code, input, maps)
|
|
17
|
-
|
|
18
|
-
File.open(output_file, 'w') do |f|
|
|
19
|
-
f.puts(output)
|
|
20
|
-
end
|
|
21
|
-
puts "Output written to: #{output_file}"
|
|
22
|
-
end
|
|
8
|
+
class InvalidSystemError < StandardError; end
|
|
9
|
+
class ExternalProcessNotRecognizedError < StandardError; end
|
|
10
|
+
class ExternalProcessUnavailableError < StandardError; end
|
|
23
11
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
end
|
|
32
|
-
end
|
|
12
|
+
if RUBY_ENGINE == 'opal'
|
|
13
|
+
require "interscript/opal"
|
|
14
|
+
extend Opal
|
|
15
|
+
else
|
|
16
|
+
require "interscript/fs"
|
|
17
|
+
extend Fs
|
|
18
|
+
end
|
|
33
19
|
|
|
34
|
-
|
|
35
|
-
import_python_modules
|
|
36
|
-
case process_name
|
|
37
|
-
when 'sequitur.pythainlp_lexicon'
|
|
38
|
-
return g2pwrapper.transliterate('pythainlp_lexicon', string)
|
|
39
|
-
when 'sequitur.wiktionary_phonemic'
|
|
40
|
-
return g2pwrapper.transliterate('wiktionary_phonemic', string)
|
|
41
|
-
else
|
|
42
|
-
puts "Invalid Process"
|
|
43
|
-
end
|
|
44
|
-
end
|
|
20
|
+
class << self
|
|
45
21
|
|
|
46
22
|
def transliterate(system_code, string, maps={})
|
|
47
|
-
|
|
23
|
+
system_code = map_resolve(system_code)
|
|
24
|
+
|
|
25
|
+
unless maps.has_key? system_code
|
|
48
26
|
maps[system_code] = Interscript::Mapping.for(system_code)
|
|
49
27
|
end
|
|
50
28
|
# mapping = Interscript::Mapping.for(system_code)
|
|
51
29
|
mapping = maps[system_code]
|
|
52
30
|
|
|
53
|
-
|
|
54
31
|
# First, apply chained transliteration as specified in the list `chain`
|
|
55
32
|
chain = mapping.chain.dup
|
|
56
33
|
while chain.length > 0
|
|
@@ -63,17 +40,11 @@ module Interscript
|
|
|
63
40
|
title_case = mapping.title_case
|
|
64
41
|
downcase = mapping.downcase
|
|
65
42
|
|
|
66
|
-
# charmap = mapping.characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
|
|
67
|
-
# dictmap = mapping.dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
|
|
68
43
|
charmap = mapping.characters_hash
|
|
69
44
|
dictmap = mapping.dictionary_hash
|
|
70
45
|
trie = mapping.dictionary_trie
|
|
71
46
|
|
|
72
|
-
|
|
73
|
-
string = external_process(mapping.segmentation, string) if mapping.segmentation
|
|
74
|
-
|
|
75
|
-
# Transliteration/Transcription
|
|
76
|
-
string = external_process(mapping.transcription, string) if mapping.transcription
|
|
47
|
+
string = external_processing(mapping, string)
|
|
77
48
|
|
|
78
49
|
pos = 0
|
|
79
50
|
while pos < string.to_s.size
|
|
@@ -81,14 +52,15 @@ module Interscript
|
|
|
81
52
|
wordmatch = ""
|
|
82
53
|
|
|
83
54
|
# Using Trie, find the longest matching substring
|
|
84
|
-
while (pos + m < string.to_s.size) && (trie.partial_word?string[pos..pos+m])
|
|
55
|
+
while (pos + m < string.to_s.size) && (trie.partial_word?string[pos..pos+m])
|
|
85
56
|
wordmatch = string[pos..pos+m] if trie.word?string[pos..pos+m]
|
|
86
57
|
m += 1
|
|
87
58
|
end
|
|
59
|
+
|
|
88
60
|
m = wordmatch.length
|
|
89
61
|
if m > 0
|
|
90
62
|
repl = dictmap[string[pos..pos+m-1]]
|
|
91
|
-
string
|
|
63
|
+
string = sub_replace(string, pos, m, repl)
|
|
92
64
|
pos += repl.length
|
|
93
65
|
else
|
|
94
66
|
pos += 1
|
|
@@ -109,32 +81,62 @@ module Interscript
|
|
|
109
81
|
# offsets[pos] += result.size - match[0].size
|
|
110
82
|
# end
|
|
111
83
|
# end
|
|
84
|
+
|
|
112
85
|
mapping.rules.each do |r|
|
|
113
|
-
output
|
|
86
|
+
next unless output
|
|
87
|
+
re = mkregexp(r["pattern"])
|
|
88
|
+
output = output.gsub(re, r["result"])
|
|
114
89
|
end
|
|
115
90
|
|
|
116
91
|
charmap.each do |k, v|
|
|
117
|
-
|
|
92
|
+
re = mkregexp(k)
|
|
93
|
+
while (match = output&.match(re))
|
|
118
94
|
pos = match.offset(0).first
|
|
119
95
|
result = !downcase && up_case_around?(output, pos) ? v.upcase : v
|
|
120
|
-
|
|
121
|
-
|
|
96
|
+
|
|
97
|
+
# if more than one, choose the first one
|
|
98
|
+
result = result[0] if result.is_a?(Array)
|
|
99
|
+
|
|
100
|
+
output = sub_replace(
|
|
101
|
+
output,
|
|
102
|
+
pos,
|
|
103
|
+
match[0].size,
|
|
104
|
+
add_separator(separator, pos, result)
|
|
105
|
+
)
|
|
122
106
|
end
|
|
123
107
|
end
|
|
124
108
|
|
|
125
109
|
mapping.postrules.each do |r|
|
|
126
|
-
output
|
|
110
|
+
next unless output
|
|
111
|
+
re = mkregexp(r["pattern"])
|
|
112
|
+
output = if r["result"] == "upcase"
|
|
113
|
+
output.gsub(re, &:upcase)
|
|
114
|
+
else
|
|
115
|
+
output.gsub(re, r["result"])
|
|
116
|
+
end
|
|
127
117
|
end
|
|
128
118
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
119
|
+
return unless output
|
|
120
|
+
|
|
121
|
+
re = mkregexp('^(.)')
|
|
122
|
+
output = output.gsub(re, &:upcase) if title_case
|
|
123
|
+
if word_separator != ''
|
|
124
|
+
re = mkregexp("#{word_separator}#{separator}")
|
|
125
|
+
output = output.gsub(re, word_separator)
|
|
126
|
+
|
|
127
|
+
if title_case
|
|
128
|
+
re = mkregexp("#{word_separator}(.)")
|
|
129
|
+
output = output.gsub(re, &:upcase)
|
|
134
130
|
end
|
|
135
131
|
end
|
|
136
132
|
|
|
137
|
-
output
|
|
133
|
+
output.unicode_normalize
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def map_resolve(map)
|
|
137
|
+
map = aliases[map] if aliases.key? map
|
|
138
|
+
raise ArgumentError, "Map #{map} doesn't exist" unless map_exist? map
|
|
139
|
+
map
|
|
138
140
|
end
|
|
139
141
|
|
|
140
142
|
private
|
|
@@ -147,11 +149,11 @@ module Interscript
|
|
|
147
149
|
return false if string[pos] == string[pos].downcase
|
|
148
150
|
|
|
149
151
|
i = pos - 1
|
|
150
|
-
i -= 1 while i.positive? && string[i] !~
|
|
152
|
+
i -= 1 while i.positive? && string[i] !~ mkregexp('[[:alpha:]]')
|
|
151
153
|
before = i >= 0 && i < pos ? string[i].to_s.strip : ''
|
|
152
154
|
|
|
153
155
|
i = pos + 1
|
|
154
|
-
i += 1 while i < string.size - 1 && string[i] !~
|
|
156
|
+
i += 1 while i < string.size - 1 && string[i] !~ mkregexp('[[:alpha:]]')
|
|
155
157
|
after = i > pos ? string[i].to_s.strip : ''
|
|
156
158
|
|
|
157
159
|
before_uc = !before.empty? && before == before.upcase
|
|
@@ -159,5 +161,6 @@ module Interscript
|
|
|
159
161
|
# before_uc && (after.empty? || after_uc) || after_uc && (before.empty? || before_uc)
|
|
160
162
|
before_uc || after_uc
|
|
161
163
|
end
|
|
164
|
+
|
|
162
165
|
end
|
|
163
166
|
end
|
data/lib/interscript/command.rb
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
require 'thor'
|
|
2
2
|
require 'interscript'
|
|
3
|
-
|
|
3
|
+
require 'json'
|
|
4
4
|
module Interscript
|
|
5
5
|
# Command line interface
|
|
6
6
|
class Command < Thor
|
|
7
7
|
desc '<file>', 'Transliterate text'
|
|
8
8
|
option :system, aliases: '-s', required: true, desc: 'Transliteration system'
|
|
9
9
|
option :output, aliases: '-o', required: false, desc: 'Output file'
|
|
10
|
+
option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json'
|
|
10
11
|
|
|
11
12
|
def translit(input)
|
|
12
13
|
if options[:output]
|
|
13
|
-
Interscript.transliterate_file(options[:system], input, options[:output])
|
|
14
|
+
Interscript.transliterate_file(options[:system], input, options[:output], JSON.parse(options[:map]))
|
|
14
15
|
else
|
|
15
16
|
puts Interscript.transliterate(options[:system], IO.read(input))
|
|
16
17
|
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
require 'pathname'
|
|
2
|
+
|
|
3
|
+
module Interscript
|
|
4
|
+
module Fs
|
|
5
|
+
def sub_replace(string, pos, size, repl)
|
|
6
|
+
string[pos..pos + size - 1] = repl
|
|
7
|
+
string
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def root_path
|
|
11
|
+
@root_path ||= Pathname.new(File.join(File.dirname(__dir__), ".."))
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def transliterate_file(system_code, input_file, output_file, maps={})
|
|
15
|
+
input = File.read(input_file)
|
|
16
|
+
output = transliterate(system_code, input, maps)
|
|
17
|
+
|
|
18
|
+
File.open(output_file, 'w') do |f|
|
|
19
|
+
f.puts(output)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
puts "Output written to: #{output_file}"
|
|
23
|
+
output_file
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def import_python_modules
|
|
27
|
+
begin
|
|
28
|
+
pyimport :g2pwrapper
|
|
29
|
+
rescue
|
|
30
|
+
pyimport :sys
|
|
31
|
+
sys.path.append(root_path.to_s + "/lib/")
|
|
32
|
+
pyimport :g2pwrapper
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def external_process(process_name, string)
|
|
37
|
+
import_python_modules
|
|
38
|
+
|
|
39
|
+
case process_name
|
|
40
|
+
when 'sequitur.pythainlp_lexicon'
|
|
41
|
+
return g2pwrapper.transliterate('pythainlp_lexicon', string)
|
|
42
|
+
when 'sequitur.wiktionary_phonemic'
|
|
43
|
+
return g2pwrapper.transliterate('wiktionary_phonemic', string)
|
|
44
|
+
else
|
|
45
|
+
raise ExternalProcessNotRecognizedError.new
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
rescue
|
|
49
|
+
raise ExternalProcessUnavailableError.new
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def external_processing(mapping, string)
|
|
53
|
+
# Segmentation
|
|
54
|
+
string = external_process(mapping.segmentation, string) if mapping.segmentation
|
|
55
|
+
|
|
56
|
+
# Transliteration/Transcription
|
|
57
|
+
string = external_process(mapping.transcription, string) if mapping.transcription
|
|
58
|
+
|
|
59
|
+
string
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def aliases (refresh: false)
|
|
63
|
+
file = root_path.join("./aliases.json").to_s
|
|
64
|
+
if !refresh && File.exist?(file)
|
|
65
|
+
JSON.load(File.read(file))
|
|
66
|
+
elsif !refresh && @aliases
|
|
67
|
+
@aliases
|
|
68
|
+
else
|
|
69
|
+
@aliases = {}
|
|
70
|
+
Dir[root_path.join('./maps/*.yaml').to_s].each do |yaml_file|
|
|
71
|
+
org_name = File.basename(yaml_file, ".yaml")
|
|
72
|
+
map = YAML.load_file(yaml_file)
|
|
73
|
+
(map["alias"] || {}).each do |k,v|
|
|
74
|
+
@aliases[v["code"]] = org_name
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Try to save it to a file, but not force it.
|
|
79
|
+
File.write("aliases.json", JSON.dump(@aliases)) rescue nil
|
|
80
|
+
|
|
81
|
+
@aliases
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
def map_exist?(map)
|
|
88
|
+
File.exist?(root_path.join("./maps/" + map + ".yaml").to_s)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def mkregexp(regexpstring)
|
|
92
|
+
/#{regexpstring}/u
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
end
|
|
96
|
+
end
|
data/lib/interscript/mapping.rb
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
require 'rambling-trie'
|
|
2
|
+
require 'yaml' unless RUBY_ENGINE == 'opal'
|
|
3
|
+
require 'json'
|
|
2
4
|
|
|
3
5
|
module Interscript
|
|
4
|
-
class InvalidSystemError < StandardError; end
|
|
5
6
|
|
|
6
7
|
class Mapping
|
|
7
8
|
attr_reader(
|
|
@@ -35,7 +36,10 @@ module Interscript
|
|
|
35
36
|
def initialize(system_code, options = {})
|
|
36
37
|
@system_code = system_code
|
|
37
38
|
@depth = options.fetch(:depth, 0).to_i
|
|
38
|
-
|
|
39
|
+
|
|
40
|
+
unless RUBY_ENGINE == 'opal'
|
|
41
|
+
@system_path = options.fetch(:system_code, default_path)
|
|
42
|
+
end
|
|
39
43
|
|
|
40
44
|
load_and_serialize_system_mappings
|
|
41
45
|
end
|
|
@@ -45,10 +49,10 @@ module Interscript
|
|
|
45
49
|
end
|
|
46
50
|
|
|
47
51
|
def load_and_serialize_system_mappings
|
|
48
|
-
if depth
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
+
return if depth >= 5
|
|
53
|
+
|
|
54
|
+
mappings = load_system_mappings
|
|
55
|
+
serialize_system_mappings(mappings)
|
|
52
56
|
end
|
|
53
57
|
|
|
54
58
|
private
|
|
@@ -64,6 +68,18 @@ module Interscript
|
|
|
64
68
|
end
|
|
65
69
|
|
|
66
70
|
def load_system_mappings
|
|
71
|
+
if RUBY_ENGINE == 'opal'
|
|
72
|
+
load_opal_mappings
|
|
73
|
+
else
|
|
74
|
+
load_fs_mappings
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def load_opal_mappings
|
|
79
|
+
JSON.parse(`Opal.global.InterscriptMaps[#{system_code}]`)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def load_fs_mappings
|
|
67
83
|
YAML.load_file(system_path.join(system_code_file))
|
|
68
84
|
rescue Errno::ENOENT
|
|
69
85
|
raise Interscript::InvalidSystemError.new("No system mappings found")
|
|
@@ -100,24 +116,27 @@ module Interscript
|
|
|
100
116
|
|
|
101
117
|
def include_inherited_mappings(mappings)
|
|
102
118
|
inherit_systems = [].push(mappings["map"]["inherit"]).flatten
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
119
|
+
|
|
120
|
+
inherit_systems.each do |inherit_system|
|
|
121
|
+
next unless inherit_system
|
|
122
|
+
|
|
123
|
+
inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
|
|
124
|
+
|
|
125
|
+
@rules = [rules, inherited_mapping.rules].flatten
|
|
126
|
+
@postrules = [inherited_mapping.postrules, postrules].flatten
|
|
127
|
+
@characters = (inherited_mapping.characters|| {}).merge(characters)
|
|
128
|
+
@dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
|
|
112
129
|
end
|
|
130
|
+
|
|
131
|
+
@characters.compact! # the feature to ignore characters from inherited
|
|
113
132
|
end
|
|
114
133
|
|
|
115
|
-
def build_hashes
|
|
134
|
+
def build_hashes
|
|
116
135
|
@characters_hash = characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
|
|
117
136
|
@dictionary_hash = dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
|
|
118
137
|
end
|
|
119
138
|
|
|
120
|
-
def build_trie
|
|
139
|
+
def build_trie
|
|
121
140
|
@dictionary_trie = Rambling::Trie.create
|
|
122
141
|
dictionary_trie.concat dictionary.keys
|
|
123
142
|
end
|