interscript 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/interscript.rb +10 -6
- data/lib/interscript/fs.rb +0 -2
- data/lib/interscript/mapping.rb +1 -1
- data/lib/interscript/opal.rb +38 -8
- data/lib/interscript/opal/entrypoint.rb +12 -0
- data/lib/interscript/opal/map_translate.rb +7 -0
- data/lib/interscript/version.rb +1 -1
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +5 -1
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +5 -1
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -1
- data/maps/alalc-asm-Deva-Latn-1997.yaml +9 -3
- data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-bel-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-ell-Grek-Latn-1997.yaml +5 -1
- data/maps/alalc-ell-Grek-Latn-2010.yaml +1 -2
- data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
- data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -1
- data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
- data/maps/alalc-kor-Hang-Latn-1997.yaml +5 -1
- data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
- data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
- data/maps/alalc-mar-Deva-Latn-1997.yaml +21 -2
- data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
- data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +1 -1
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
- data/maps/{alalc-pan-Deva-Latn-1997.yaml → alalc-pan-Guru-Latn-1997.yaml} +23 -4
- data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
- data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
- data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +1 -1
- data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
- data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
- data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-srp-Cyrl-Latn-2013.yaml +1 -1
- data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
- data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -1
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -1
- data/maps/bgn-kor-Hang-Latn-1943.yaml +7 -3
- data/maps/bgn-kor-Kore-Latn-1943.yaml +3 -3
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +5 -1
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +5 -1
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +2 -2
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +6 -2
- data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +5 -1
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +5 -1
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
- data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +5 -1
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -1
- data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -1
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -1
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +1 -1
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +1 -1
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +1 -1
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +31 -1
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
- data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
- data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
- data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
- data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -1
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
- data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
- data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +1 -1
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +1 -1
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +1 -1
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +1 -1
- data/maps/{bis-gjr-Gujr-Latn-13194-1991.yaml → bis-guj-Gujr-Latn-13194-1991.yaml} +17 -2
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +1 -1
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +1 -1
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +1 -1
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +1 -1
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +1 -1
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +1 -1
- data/maps/by-bel-Cyrl-Latn-1998.yaml +5 -1
- data/maps/by-bel-Cyrl-Latn-2007.yaml +1 -1
- data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
- data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
- data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
- data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
- data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
- data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
- data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
- data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
- data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
- data/maps/dos-nep-Deva-Latn-1997.yaml +15 -1
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +3 -3
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +2 -2
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -2
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -2
- data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -1
- data/maps/gki-bel-Cyrl-Latn-1992.yaml +1 -1
- data/maps/gki-bel-Cyrl-Latn-2000.yaml +1 -1
- data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +7 -3
- data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -1
- data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -1
- data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -1
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
- data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +8 -4
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -4
- data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
- data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
- data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
- data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
- data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
- data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
- data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
- data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
- data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
- data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
- data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
- data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
- data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
- data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
- data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
- data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
- data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
- data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -3
- data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
- data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
- data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
- data/maps/kp-kor-Hang-Latn-2002.yaml +25 -17
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +1 -1
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
- data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
- data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +2 -2
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +3 -3
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +2 -2
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +2 -2
- data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +2 -2
- data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
- data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
- data/maps/odni-hin-Deva-Latn-2015.yaml +1 -1
- data/maps/odni-kat-Geor-Latn-2015.yaml +1 -1
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
- data/maps/odni-mkd-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-urd-Arab-Latn-2015.yaml +1 -1
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -1
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +1 -1
- data/maps/royin-tha-Thai-Latn-1968.yaml +5 -1
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +1 -1
- data/maps/royin-tha-Thai-Latn-1999.yaml +5 -1
- data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
- data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
- data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
- data/maps/ses-ara-Arab-Latn-1930.yaml +5 -1
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
- data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
- data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
- data/maps/{ungegn-amh-Ethi-Latn-2016.yaml → un-amh-Ethi-Latn-2016.yaml} +51 -24
- data/maps/un-ara-Arab-Latn-1971.yaml +1 -1
- data/maps/un-ara-Arab-Latn-1972.yaml +1 -1
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-bel-Cyrl-Latn-2007.yaml +1 -1
- data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
- data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +2 -2
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +2 -2
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -3
- data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
- data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
- data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
- data/maps/un-nep-Deva-Latn-1972.yaml +204 -17
- data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
- data/maps/un-ukr-Cyrl-Latn-1998.yaml +35 -12
- data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
- data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
- data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +1 -1
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
- data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -1
- data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
- data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
- data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
- data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
- data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
- data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +1 -1
- data/maps/var-tha-Thai-Zsym-ipa.yaml +1 -1
- data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +6 -2
- data/spec/interscript/filenames_spec.rb +384 -0
- data/spec/interscript_spec.rb +7 -4
- metadata +105 -26
- data/bin/interscript +0 -41
- data/bin/rspec +0 -29
- data/bin/setup +0 -8
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/interscript-opal.rb +0 -2
- data/lib/interscript/opal_map_translate.rb +0 -12
- data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
- data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 82fae2b248d9c86139b7f188da2ac72699696c9768e54a4510a6f1af2b933dc9
|
|
4
|
+
data.tar.gz: 3e2cc24b8d33f5a8ed0f8b475e4d109049439221274ecda9ee1b9c7743896e07
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5f9925a97d17f0433446a898f63d18869a73e92f9975d8259c916dae242fc5b15ff93dd6d3c28ca2ff5bcbda29489d4ea59af26d4e16f2e1416d701354a6f6e2
|
|
7
|
+
data.tar.gz: 77321c4a1001cabda8cc057950682037b637176fd05637ce78a6ea698d8c55e238e6bbcc850dda97828c719afbdae1192a8e7a93fd2307a9d52196342fb015f5
|
data/lib/interscript.rb
CHANGED
|
@@ -90,7 +90,8 @@ module Interscript
|
|
|
90
90
|
end
|
|
91
91
|
|
|
92
92
|
charmap.each do |k, v|
|
|
93
|
-
|
|
93
|
+
re = mkregexp(k)
|
|
94
|
+
while (match = output&.match(re))
|
|
94
95
|
pos = match.offset(0).first
|
|
95
96
|
result = !downcase && up_case_around?(output, pos) ? v.upcase : v
|
|
96
97
|
|
|
@@ -118,12 +119,15 @@ module Interscript
|
|
|
118
119
|
|
|
119
120
|
return unless output
|
|
120
121
|
|
|
121
|
-
|
|
122
|
+
re = mkregexp('^(.)')
|
|
123
|
+
output = output.gsub(re, &:upcase) if title_case
|
|
122
124
|
if word_separator != ''
|
|
123
|
-
|
|
125
|
+
re = mkregexp("#{word_separator}#{separator}")
|
|
126
|
+
output = output.gsub(re, word_separator)
|
|
124
127
|
|
|
125
128
|
if title_case
|
|
126
|
-
|
|
129
|
+
re = mkregexp("#{word_separator}(.)")
|
|
130
|
+
output = output.gsub(re, &:upcase)
|
|
127
131
|
end
|
|
128
132
|
end
|
|
129
133
|
|
|
@@ -140,11 +144,11 @@ module Interscript
|
|
|
140
144
|
return false if string[pos] == string[pos].downcase
|
|
141
145
|
|
|
142
146
|
i = pos - 1
|
|
143
|
-
i -= 1 while i.positive? && string[i] !~
|
|
147
|
+
i -= 1 while i.positive? && string[i] !~ mkregexp('[[:alpha:]]')
|
|
144
148
|
before = i >= 0 && i < pos ? string[i].to_s.strip : ''
|
|
145
149
|
|
|
146
150
|
i = pos + 1
|
|
147
|
-
i += 1 while i < string.size - 1 && string[i] !~
|
|
151
|
+
i += 1 while i < string.size - 1 && string[i] !~ mkregexp('[[:alpha:]]')
|
|
148
152
|
after = i > pos ? string[i].to_s.strip : ''
|
|
149
153
|
|
|
150
154
|
before_uc = !before.empty? && before == before.upcase
|
data/lib/interscript/fs.rb
CHANGED
data/lib/interscript/mapping.rb
CHANGED
|
@@ -122,7 +122,7 @@ module Interscript
|
|
|
122
122
|
|
|
123
123
|
inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
|
|
124
124
|
|
|
125
|
-
@rules = [
|
|
125
|
+
@rules = [rules, inherited_mapping.rules].flatten
|
|
126
126
|
@postrules = [inherited_mapping.postrules, postrules].flatten
|
|
127
127
|
@characters = (inherited_mapping.characters|| {}).merge(characters)
|
|
128
128
|
@dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
|
data/lib/interscript/opal.rb
CHANGED
|
@@ -1,14 +1,34 @@
|
|
|
1
|
+
require "onigmo"
|
|
2
|
+
require "onigmo/core_ext"
|
|
3
|
+
|
|
4
|
+
# Increase this if there are out-of-memory errors. This setting is
|
|
5
|
+
# tested to be big enough to handle all the maps provided.
|
|
6
|
+
Onigmo::FFI.library.memory.grow(128)
|
|
7
|
+
|
|
1
8
|
module Interscript
|
|
2
9
|
module Opal
|
|
3
|
-
ALPHA_REGEXP = '\p{L}'
|
|
4
|
-
|
|
5
10
|
def mkregexp(regexpstring)
|
|
6
|
-
|
|
7
|
-
if regexpstring
|
|
8
|
-
|
|
9
|
-
|
|
11
|
+
@cache ||= {}
|
|
12
|
+
if s = @cache[regexpstring]
|
|
13
|
+
s
|
|
14
|
+
else
|
|
15
|
+
# JS regexp is more performant than Onigmo. Let's use the JS
|
|
16
|
+
# regexp wherever possible, but use Onigmo where we must.
|
|
17
|
+
# Let's allow those characters to happen for the regexp to be
|
|
18
|
+
# considered compatible: ()|.*+?{} ** BUT NOT (? **.
|
|
19
|
+
if /[\\$^\[\]]|\(\?/.match?(regexpstring)
|
|
20
|
+
# Ruby caches its regexps internally. We can't GC. We could
|
|
21
|
+
# think about freeing them, but we really can't, because they
|
|
22
|
+
# may be in use.
|
|
23
|
+
|
|
24
|
+
# Uncomment those to keep track of Onigmo/JS regexp compilation.
|
|
25
|
+
# print '#'
|
|
26
|
+
@cache[regexpstring] = Onigmo::Regexp.new(regexpstring)
|
|
27
|
+
else
|
|
28
|
+
# print '.'
|
|
29
|
+
@cache[regexpstring] = Regexp.new(regexpstring)
|
|
30
|
+
end
|
|
10
31
|
end
|
|
11
|
-
Regexp.new("/#{regexpstring}/#{flags}")
|
|
12
32
|
end
|
|
13
33
|
|
|
14
34
|
def sub_replace(string, pos, size, repl)
|
|
@@ -19,9 +39,19 @@ module Interscript
|
|
|
19
39
|
string
|
|
20
40
|
end
|
|
21
41
|
|
|
42
|
+
# name is unused
|
|
22
43
|
def load_map_json(name, json)
|
|
23
|
-
|
|
44
|
+
JSON.load(json).each do |k,v|
|
|
45
|
+
`Opal.global.InterscriptMaps[#{k}] = #{JSON.dump(v)}`
|
|
46
|
+
end
|
|
24
47
|
end
|
|
25
48
|
|
|
26
49
|
end
|
|
27
50
|
end
|
|
51
|
+
|
|
52
|
+
class String
|
|
53
|
+
# Opal has a wrong implementation of String#unicode_normalize
|
|
54
|
+
def unicode_normalize
|
|
55
|
+
self.JS.normalize
|
|
56
|
+
end
|
|
57
|
+
end
|
data/lib/interscript/version.rb
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: acadsin
|
|
3
3
|
id: 2002
|
|
4
|
-
language: zho
|
|
4
|
+
language: iso-639-2:zho
|
|
5
5
|
source_script: Hani
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: Chinese Tongyong Pinyin Academica Sinica 2002 System
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: zho_Hani2Latn_AcadSin_2002
|
|
11
|
+
description: Chinese Tongyong Pinyin Academica Sinica 2002 System
|
|
8
12
|
url:
|
|
9
13
|
description: Chinese Tongyong Pinyin Academica Sinica 2002 System
|
|
10
14
|
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: alalc
|
|
3
3
|
id: 1997
|
|
4
|
-
language: amh
|
|
4
|
+
language: iso-639-2:amh
|
|
5
5
|
source_script: Ethi
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: ALA-LC Romanization Table -- Amharic (1997)
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: amh_Ethi2Latn_ALA_1997
|
|
11
|
+
description: Amharic ALA-Library of Congress 1997 System
|
|
8
12
|
url: http://catdir.loc.gov/catdir/cpso/romanization/amharic.pdf
|
|
9
13
|
creation_date: 1997
|
|
10
14
|
description: |
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: alalc
|
|
3
3
|
id: 1997
|
|
4
|
-
language: ara
|
|
4
|
+
language: iso-639-2:ara
|
|
5
5
|
source_script: Arab
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: ALA-LC Romanization Table -- Arabic (1997)
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: ara_Arab2Latn_ALA_1997
|
|
11
|
+
description: Arabic ALA-Library of Congress 1997 System
|
|
8
12
|
url: http://catdir.loc.gov/catdir/cpso/romanization/arabic.pdf
|
|
9
13
|
creation_date: 1997
|
|
10
14
|
description: |
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: alalc
|
|
3
3
|
id: 1997
|
|
4
|
-
language: asm
|
|
4
|
+
language: iso-639-2:asm
|
|
5
5
|
source_script: Deva
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: Assamese Romanization, 1997
|
|
8
|
-
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: asm_Deva2Latn_ALA_1997
|
|
11
|
+
description: Assamese ALA-Library of Congress 1997 System
|
|
12
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/assamese.pdf
|
|
9
13
|
creation_date: 1997
|
|
10
14
|
description: |
|
|
11
15
|
ALA-LC Romanization table for Assamese
|
|
@@ -47,7 +51,7 @@ tests:
|
|
|
47
51
|
- source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
|
|
48
52
|
expected: "kamaumabaāira maeẏarara daehata kaobhaiḍa pajaiṭaibha"
|
|
49
53
|
- source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
|
|
50
|
-
expected: "
|
|
54
|
+
expected: "ṭauiṭaāraẏaogae khaoda sadaraī karae ei kathaā"
|
|
51
55
|
- source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
|
|
52
56
|
expected: "lakhaimapaura jailaāra naāraāẏaṇapaurara barapathaārata ājai paraśaānatai dhaāma naāmaerae ekhana baṛdadhaāśaramara śaubhaāramabha karaā haẏa"
|
|
53
57
|
|
|
@@ -119,6 +123,7 @@ map:
|
|
|
119
123
|
|
|
120
124
|
#Semivowels
|
|
121
125
|
'য়': 'ya'
|
|
126
|
+
'য': 'ẏa'
|
|
122
127
|
'য়': 'ẏa'
|
|
123
128
|
'ৰ': 'ra'
|
|
124
129
|
'ল': 'la'
|
|
@@ -156,4 +161,5 @@ map:
|
|
|
156
161
|
'\u09c8': 'ai'
|
|
157
162
|
'\u09cb': 'o'
|
|
158
163
|
'\u09cc': 'au'
|
|
164
|
+
'।': '.'
|
|
159
165
|
'\u09CD': '' # Used for joining
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 2012
|
|
4
|
+
language: iso-639-2:asm
|
|
5
|
+
source_script: Deva
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Assamese Romanization, 2012
|
|
8
|
+
url: https://www.loc.gov/catdir/cpso/romanization/assamese.pdf
|
|
9
|
+
creation_date: 1997
|
|
10
|
+
description: |
|
|
11
|
+
ALA-LC Romanization table for Assamese
|
|
12
|
+
|
|
13
|
+
notes:
|
|
14
|
+
|
|
15
|
+
- Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
|
|
16
|
+
vowels following a consonant can be found in grammars; no distinction between the two is
|
|
17
|
+
made in transliteration.
|
|
18
|
+
|
|
19
|
+
- |
|
|
20
|
+
The vowel a is implicit after all consonants and consonant clusters and is supplied in
|
|
21
|
+
transliteration, with the following exceptions:
|
|
22
|
+
|
|
23
|
+
a) when another vowel is indicated by its appropriate sign; and
|
|
24
|
+
b) when the absence of any vowel is indicated by the subscript sign ( ◌् ) called halanta or
|
|
25
|
+
birāma.
|
|
26
|
+
|
|
27
|
+
- Candrabindu before guttural, palatal, cerebral, and dental occlusives is transliterated n̐. Before
|
|
28
|
+
labials, sibilants, semivowels, aspirates, vowels, and in final position it is transliterated m̐.
|
|
29
|
+
|
|
30
|
+
- When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
|
|
31
|
+
|
|
32
|
+
tests:
|
|
33
|
+
- source: "ৰাজ্যিক স্বাস্থ্য মন্ত্ৰী পীয়ুষ হাজৰিকাৰ বিৰুদ্ধে দাখিল কৰা হৈছে এজাহাৰ।"
|
|
34
|
+
expected: "raājaẏaika sabaāsathaẏa manataraī paīyausha haājaraikaāra bairaudadhae daākhaila karaā haaichae ejaāhaāra."
|
|
35
|
+
- source: "কোৰোনা মহামাৰীৰ এই সময়ত সভাখনত হাজাৰ হাজাৰ লোকে মাস্ক পৰিধান নকৰাৰ লগতে সামাজিক দূৰত্ব নমনাৰ অভিযোগ উত্থাপন কৰা হৈছে"
|
|
36
|
+
expected: "kaoraonaā mahaāmaāraīra ei samayata sabhaākhanata haājaāra haājaāra laokae maāsaka paraidhaāna nakaraāra lagatae saāmaājaika daūrataba namanaāra abhaiẏaoga utathaāpana karaā haaichae"
|
|
37
|
+
|
|
38
|
+
map:
|
|
39
|
+
|
|
40
|
+
inherit: "alalc-asm-Deva-Latn-1997"
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: alalc
|
|
3
3
|
id: 1997
|
|
4
|
-
language: aze
|
|
4
|
+
language: iso-639-2:aze
|
|
5
5
|
source_script: Cyrl
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: ALA-LC Romanization Table -- Azerbaijani (1997)
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: aze_Cyrl2Latn_ALA_1997
|
|
11
|
+
description: Azerbaijani ALA-Library of Congress 1997 System
|
|
8
12
|
url: https://transliteration.eki.ee/pdf/Azerbaijani.pdf
|
|
9
13
|
creation_date: 1997
|
|
10
14
|
description: |
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: alalc
|
|
3
3
|
id: 1997
|
|
4
|
-
language: bel
|
|
4
|
+
language: iso-639-2:bel
|
|
5
5
|
source_script: Cyrl
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: ALA-LC Romanization Table -- Byelorussian (1997)
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: bel_Cyrl2Latn_ALA_1997
|
|
11
|
+
description: Byelorussian ALA-Library of Congress 1997 System
|
|
8
12
|
url: http://catdir.loc.gov/catdir/cpso/romanization/beloruss.pdf
|
|
9
13
|
creation_date: 1997
|
|
10
14
|
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: alalc
|
|
3
3
|
id: 1997
|
|
4
|
-
language: bul
|
|
4
|
+
language: iso-639-2:bul
|
|
5
5
|
source_script: Cyrl
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: ALA-LC Romanization Table -- Bulgarian (1997)
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: bul_Cyrl2Latn_ALA_1997
|
|
11
|
+
description: Bulgarian ALA-Library of Congress Bulgarian 1997 System
|
|
8
12
|
url: http://www.rechtertie.nl/databases/judd/downloads/Bulgarian.pdf
|
|
9
13
|
creation_date: 1997
|
|
10
14
|
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: alalc
|
|
3
3
|
id: 1997
|
|
4
|
-
language: ell
|
|
4
|
+
language: iso-639-2:ell
|
|
5
5
|
source_script: Grek
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: Greek Romanization, 1997
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: ell_Grek2Latn_ALA_1997
|
|
11
|
+
description: Greek ALA-Library of Congress 1997 System
|
|
8
12
|
url: http://catdir.loc.gov/catdir/cpso/romanization/greek.pdf
|
|
9
13
|
creation_date: 1997
|
|
10
14
|
description: |
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: alalc
|
|
3
3
|
id: 2010
|
|
4
|
-
language: ell
|
|
4
|
+
language: iso-639-2:ell
|
|
5
5
|
source_script: Grek
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: Greek Romanization, 2010
|
|
@@ -27,7 +27,6 @@ tests:
|
|
|
27
27
|
|
|
28
28
|
Giannēs Makrygiannēs.
|
|
29
29
|
|
|
30
|
-
|
|
31
30
|
- source: ΑΘΗΝΑ
|
|
32
31
|
expected: ATHĒNA
|
|
33
32
|
- source: μπαμπάκι
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 1997
|
|
4
|
+
language: iso-639-2:guj
|
|
5
|
+
source_script: Gujr
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Gujarati Romanization, 1997
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: guj_Gujr2Latn_ALA_1997
|
|
11
|
+
description: Gujarati ALA-Library of Congress 1997 System
|
|
12
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/gujarati.pdf
|
|
13
|
+
creation_date: 1997
|
|
14
|
+
description: |
|
|
15
|
+
ALA-LC Romanization table for Gujarati
|
|
16
|
+
|
|
17
|
+
notes:
|
|
18
|
+
|
|
19
|
+
- Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
|
|
20
|
+
vowels following a consonant can be found in grammars; no distinction between the two is
|
|
21
|
+
made in transliteration.
|
|
22
|
+
|
|
23
|
+
- |
|
|
24
|
+
The vowel a is implicit after all consonants and consonant clusters and is supplied in
|
|
25
|
+
transliteration, with the following exceptions:
|
|
26
|
+
a) when another vowel is indicated by its appropriate sign and
|
|
27
|
+
b) when the absence of any vowel is indicated by the subscript symbol ( ્ ) called halanta or
|
|
28
|
+
virāma.
|
|
29
|
+
|
|
30
|
+
- |
|
|
31
|
+
Exception: Anusvāra is transliterated by:
|
|
32
|
+
a) ṅ before gutturals,
|
|
33
|
+
b) ñ before palatals,
|
|
34
|
+
c) ṇ before cerebrals,
|
|
35
|
+
d) n before dentals, and
|
|
36
|
+
e) m before labials.
|
|
37
|
+
|
|
38
|
+
- When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
|
|
39
|
+
|
|
40
|
+
tests:
|
|
41
|
+
- source: "અમિત શાહનો કોરોના રિપોર્ટ ૨ ઓગસ્ટે પોઝિટિવ આવ્યો હતો, ત્યારથી તેમનું સ્વાસ્થ્ય સારું નથી"
|
|
42
|
+
expected: "amita śāhanȏ kȏrȏnā ripȏrṭa 2 ȏgasṭȇ pȏjhiṭiva āvyȏ hatȏ, tyārathī tȇmanuṃ svāsthya sāruṃ nathī"
|
|
43
|
+
- source: "મેદાંતા હોસ્પિટલમાં તેમનો ઇલાજ ચાલી રહ્યો હતો"
|
|
44
|
+
expected: "mȇdāntā hȏspiṭalamāṃ tȇmanȏ ilāja cālī rahyȏ hatȏ"
|
|
45
|
+
- source: "ભારતના વિશ્વનાથન આનંદે શેનયાનમાં પહેલો ફિડે શતરંજ વિશ્વ કપ જીત્યો"
|
|
46
|
+
expected: "bhāratanā viśvanāthana ānandȇ śȇnayānamāṃ pahȇlȏ phiḍȇ śatarañja viśva kapa jītyȏ"
|
|
47
|
+
- source: "ભારતીય વડા પ્રધાન જવાહરલાલ નેહરુએ ૪૦ લાખ હિન્દુઓ અને મુસલમાનોના પારસ્પરિક સ્થાનાંતરણનું સૂચન આપ્યું"
|
|
48
|
+
expected: "bhāratīya vaḍā pradhāna javāharalāla nȇharuȇ 40 lākha hinduȏ anȇ musalamānȏnā pārasparika sthānāntaraṇanuṃ sūcana āpyuṃ"
|
|
49
|
+
- source: "લિબિયાના એલ અજિજિયામાં ધરતી પર સૌથી વધુ તાપમાન નોંધાયું. એ વખતે છાયામાં નોંધવામાં આવેલું તાપમાન ૫૮ ડિગ્રી સેલ્સિયસ હતું."
|
|
50
|
+
expected: "libiyānā ȇla ajijiyāmāṃ dharatī para sauthī vadhu tāpamāna nȏndhāyuṃ. ȇ vakhatȇ chāyāmāṃ nȏndhavāmāṃ āvȇluṃ tāpamāna 58 ḍigrī sȇlsiyasa hatuṃ."
|
|
51
|
+
- source: "પ્રથમ વિશ્વયુદ્ધઃ જર્મની અને ફ્રાન્સ વચ્ચે એસ્નેની લડાઈ શરૂ થઈ હતી"
|
|
52
|
+
expected: "prathama viśvayuddhaḥ jarmanī anȇ phrānsa vaccȇ ȇsnȇnī laḍāī śarū thaī hatī"
|
|
53
|
+
- source: "એન્ગ્લો-મિસ્ત્ર યુદ્ધઃ તેલ અલ કેબિરનું યુદ્ધ લડવામાં આવ્યું હતું."
|
|
54
|
+
expected: "ȇnglȏ-mistra yuddhaḥ tȇla ala kȇbiranuṃ yuddha laḍavāmāṃ āvyuṃ hatuṃ."
|
|
55
|
+
- source: "પુરાવા ન હતા, એ જ કારણે કેસ ચાલ્યો નહીં, પણ તેમને નજરકેદ રાખવામાં આવ્યા"
|
|
56
|
+
expected: "purāvā na hatā, ȇ ja kāraṇȇ kȇsa cālyȏ nahīṃ, paṇa tȇmanȇ najarakȇda rākhavāmāṃ āvyā"
|
|
57
|
+
- source: "સરદાર પટેલે નક્કી કર્યું હતું કે કાશ્મીર ભારતનો હિસ્સો બનશે; ૯૧ વર્ષ પહેલાં લાહોર જેલમાં ભૂખહડતાળ દરમિયાન શહીદ થયા હતા જતીન દાસ"
|
|
58
|
+
expected: "saradāra paṭȇlȇ nakkī karyuṃ hatuṃ kȇ kāśmīra bhāratanȏ hissȏ banaśȇ; 91 varsha pahȇlāṃ lāhȏra jȇlamāṃ bhūkhahaḍatāḷa daramiyāna śahīda thayā hatā jatīna dāsa"
|
|
59
|
+
- source: "કોરોના પ્રોટોકોલ વચ્ચે આજે મેડિકલ પ્રવેશ પરીક્ષા લેવાશેઃ એન્ટ્રી ટચ ફ્રી રહેશે, એડમિટ કાર્ડ બાર કોડથી ચેક થશે"
|
|
60
|
+
expected: "kȏrȏnā prȏṭȏkȏla vaccȇ ājȇ mȇḍikala pravȇśa parīkshā lȇvāśȇḥ ȇnṭrī ṭaca phrī rahȇśȇ, ȇḍamiṭa kārḍa bāra kȏḍathī cȇka thaśȇ"
|
|
61
|
+
- source: "અલ્ ક઼`ઇદ્ માં હવામાન"
|
|
62
|
+
expected: "al ka`id māṃ havāmāna"
|
|
63
|
+
- source: "મંત્રાલય તથા ખ઼.ય ના વિ૨ષ્ઠ અધિકા૨ીઓ ઉપસ્થિત ૨હ્યા હતા"
|
|
64
|
+
expected: "mantrālaya tathā kha.ya nā vi2shṭha adhikā2īȏ upasthita 2hyā hatā"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
map:
|
|
68
|
+
|
|
69
|
+
rules:
|
|
70
|
+
# note 3
|
|
71
|
+
- pattern: \u0A82(?=[કખગઘઙ])
|
|
72
|
+
result: ṅ
|
|
73
|
+
- pattern: \u0A82(?=[ચછજઝઞ])
|
|
74
|
+
result: ñ
|
|
75
|
+
- pattern: \u0A82(?=[ટઠડઢણ])
|
|
76
|
+
result: ṇ
|
|
77
|
+
- pattern: \u0A82(?=[તથદધન])
|
|
78
|
+
result: n
|
|
79
|
+
- pattern: \u0A82(?=[પફબભમ])
|
|
80
|
+
result: m
|
|
81
|
+
|
|
82
|
+
# note[2(a,b)]
|
|
83
|
+
- pattern: ([ક]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
84
|
+
result: 'k'
|
|
85
|
+
- pattern: ([ખ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
86
|
+
result: 'kh'
|
|
87
|
+
- pattern: ([ગ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
88
|
+
result: 'g'
|
|
89
|
+
- pattern: ([ઘ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
90
|
+
result: 'gh'
|
|
91
|
+
- pattern: ([ઙ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
92
|
+
result: 'ṅ'
|
|
93
|
+
- pattern: ([ચ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
94
|
+
result: 'c'
|
|
95
|
+
- pattern: ([છ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
96
|
+
result: 'ch'
|
|
97
|
+
- pattern: ([જ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
98
|
+
result: 'j'
|
|
99
|
+
- pattern: ([ઝ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
100
|
+
result: 'jh'
|
|
101
|
+
- pattern: ([ઞ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
102
|
+
result: 'ñ'
|
|
103
|
+
- pattern: ([ટ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
104
|
+
result: 'ṭ'
|
|
105
|
+
- pattern: ([ઠ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
106
|
+
result: 'ṭh'
|
|
107
|
+
- pattern: ([ડ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
108
|
+
result: 'ḍ'
|
|
109
|
+
- pattern: ([ઢ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
110
|
+
result: 'ḍh'
|
|
111
|
+
- pattern: ([ણ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
112
|
+
result: 'ṇ'
|
|
113
|
+
- pattern: ([ત]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
114
|
+
result: 't'
|
|
115
|
+
- pattern: ([થ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
116
|
+
result: 'th'
|
|
117
|
+
- pattern: ([દ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
118
|
+
result: 'd'
|
|
119
|
+
- pattern: ([ધ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
120
|
+
result: 'dh'
|
|
121
|
+
- pattern: ([ન]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
122
|
+
result: 'n'
|
|
123
|
+
- pattern: ([પ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
124
|
+
result: 'p'
|
|
125
|
+
- pattern: ([ફ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
126
|
+
result: 'ph'
|
|
127
|
+
- pattern: ([બ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
128
|
+
result: 'b'
|
|
129
|
+
- pattern: ([ભ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
130
|
+
result: 'bh'
|
|
131
|
+
- pattern: ([મ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
132
|
+
result: 'm'
|
|
133
|
+
- pattern: ([ય]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
134
|
+
result: 'y'
|
|
135
|
+
- pattern: ([ર]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
136
|
+
result: 'r'
|
|
137
|
+
- pattern: ([લ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
138
|
+
result: 'l'
|
|
139
|
+
- pattern: ([ળ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
140
|
+
result: 'ḷ'
|
|
141
|
+
- pattern: ([વ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
142
|
+
result: 'v'
|
|
143
|
+
- pattern: ([શ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
144
|
+
result: 'ś'
|
|
145
|
+
- pattern: ([ષ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
146
|
+
result: 'sh'
|
|
147
|
+
- pattern: ([સ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
148
|
+
result: 's'
|
|
149
|
+
- pattern: ([હ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
|
|
150
|
+
result: 'h'
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
characters:
|
|
154
|
+
|
|
155
|
+
'અ': 'a'
|
|
156
|
+
'આ': 'ā'
|
|
157
|
+
'ઇ': 'i'
|
|
158
|
+
'ઈ': 'ī'
|
|
159
|
+
'ઉ': 'u'
|
|
160
|
+
'ઊ': 'ū'
|
|
161
|
+
'ઋ': 'ṛ'
|
|
162
|
+
|
|
163
|
+
'ઍ': 'e'
|
|
164
|
+
'એ': 'ȇ'
|
|
165
|
+
'ઐ': 'ai'
|
|
166
|
+
|
|
167
|
+
'ઑ': 'o'
|
|
168
|
+
'ઓ': 'ȏ'
|
|
169
|
+
'ઔ': 'au'
|
|
170
|
+
|
|
171
|
+
# II. Consonants (see Note 2)
|
|
172
|
+
# Gutturals
|
|
173
|
+
'ક': 'ka'
|
|
174
|
+
'ખ': 'kha'
|
|
175
|
+
'ગ': 'ga'
|
|
176
|
+
'ઘ': 'gha'
|
|
177
|
+
'ઙ': 'ṅa'
|
|
178
|
+
|
|
179
|
+
# Palatals
|
|
180
|
+
'ચ': 'ca'
|
|
181
|
+
'છ': 'cha'
|
|
182
|
+
'જ': 'ja'
|
|
183
|
+
'ઝ': 'jha'
|
|
184
|
+
'ઞ': 'ña'
|
|
185
|
+
|
|
186
|
+
# Cerebrals
|
|
187
|
+
'ટ': 'ṭa'
|
|
188
|
+
'ઠ': 'ṭha'
|
|
189
|
+
'ડ': 'ḍa'
|
|
190
|
+
'ઢ': 'ḍha'
|
|
191
|
+
'ણ': 'ṇa'
|
|
192
|
+
|
|
193
|
+
# Dentals
|
|
194
|
+
'ત': 'ta'
|
|
195
|
+
'થ': 'tha'
|
|
196
|
+
'દ': 'da'
|
|
197
|
+
'ધ': 'dha'
|
|
198
|
+
'ન': 'na'
|
|
199
|
+
|
|
200
|
+
# Labials
|
|
201
|
+
'પ': 'pa'
|
|
202
|
+
'ફ': 'pha'
|
|
203
|
+
'બ': 'ba'
|
|
204
|
+
'ભ': 'bha'
|
|
205
|
+
'મ': 'ma'
|
|
206
|
+
|
|
207
|
+
# Semivowels
|
|
208
|
+
'ય': 'ya'
|
|
209
|
+
'ર': 'ra'
|
|
210
|
+
'લ': 'la'
|
|
211
|
+
'ળ': 'ḷa'
|
|
212
|
+
'વ': 'va'
|
|
213
|
+
|
|
214
|
+
# Sibilants
|
|
215
|
+
'શ': 'śa'
|
|
216
|
+
'ષ': 'sha'
|
|
217
|
+
'સ': 'sa'
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
# Aspirate
|
|
221
|
+
'હ': 'ha'
|
|
222
|
+
|
|
223
|
+
# Bisarga
|
|
224
|
+
'ઃ': 'ḥ'
|
|
225
|
+
|
|
226
|
+
# Anusvāra
|
|
227
|
+
'ં': 'ṃ'
|
|
228
|
+
|
|
229
|
+
# Abagraha (see Note 4)
|
|
230
|
+
'ઽ': '’' # (apostrophe)
|
|
231
|
+
|
|
232
|
+
# Medials # Needed for connecting constants
|
|
233
|
+
|
|
234
|
+
'ા': 'ā'
|
|
235
|
+
'િ': 'i'
|
|
236
|
+
'ી': 'ī'
|
|
237
|
+
'ુ': 'u'
|
|
238
|
+
'ૂ': 'ū'
|
|
239
|
+
'ૃ': 'ṛ'
|
|
240
|
+
'ૅ': 'e'
|
|
241
|
+
'ે': 'ȇ'
|
|
242
|
+
'ૈ': 'ai'
|
|
243
|
+
'ૉ': 'o'
|
|
244
|
+
'ો': 'ȏ'
|
|
245
|
+
'ૌ': 'au'
|
|
246
|
+
|
|
247
|
+
# digits
|
|
248
|
+
|
|
249
|
+
'૦': '0'
|
|
250
|
+
'૧': '1'
|
|
251
|
+
'૨': '2'
|
|
252
|
+
'૩': '3'
|
|
253
|
+
'૪': '4'
|
|
254
|
+
'૫': '5'
|
|
255
|
+
'૬': '6'
|
|
256
|
+
'૭': '7'
|
|
257
|
+
'૮': '8'
|
|
258
|
+
'૯': '9'
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
'્': ''
|
|
263
|
+
'઼': ''
|
|
264
|
+
'।': '.'
|
|
265
|
+
'\u09CD': '' # Used for joining
|
|
266
|
+
"": ''# Used for joining
|