interscript-maps 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.adoc +28 -0
- data/interscript-maps.gemspec +28 -0
- data/interscript-maps.yaml +235 -0
- data/libs/posix.iml +11 -0
- data/libs/unicode.iml +13 -0
- data/libs/var-Cyrl.iml +7 -0
- data/libs/var-kor.iml +17 -0
- data/maps-staging/royin-tha-Thai-Latn-1939-generic.imp +98 -0
- data/maps-staging/royin-tha-Thai-Latn-1968.imp +156 -0
- data/maps-staging/royin-tha-Thai-Latn-1999-chained.imp +161 -0
- data/maps-staging/royin-tha-Thai-Latn-1999.imp +78 -0
- data/maps-staging/var-tha-Thai-Thai-phonemic.imp +53 -0
- data/maps-staging/var-tha-Thai-Zsym-ipa.imp +273 -0
- data/maps/acadsin-zho-Hani-Latn-2002.imp +27515 -0
- data/maps/alalc-amh-Ethi-Latn-1997.imp +392 -0
- data/maps/alalc-amh-Ethi-Latn-2011.imp +85 -0
- data/maps/alalc-ara-Arab-Latn-1997.imp +1171 -0
- data/maps/alalc-asm-Deva-Latn-1997.imp +214 -0
- data/maps/alalc-asm-Deva-Latn-2012.imp +53 -0
- data/maps/alalc-aze-Arab-Latn-1997.imp +321 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.imp +101 -0
- data/maps/alalc-bel-Cyrl-Latn-1997.imp +118 -0
- data/maps/alalc-ben-Beng-Latn-1997.imp +225 -0
- data/maps/alalc-ben-Beng-Latn-2017.imp +135 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.imp +110 -0
- data/maps/alalc-div-Thaa-Latn-1997.imp +171 -0
- data/maps/alalc-ell-Grek-Latn-1997.imp +381 -0
- data/maps/alalc-ell-Grek-Latn-2010.imp +382 -0
- data/maps/alalc-guj-Gujr-Latn-1997.imp +223 -0
- data/maps/alalc-guj-Gujr-Latn-2011.imp +57 -0
- data/maps/alalc-hin-Deva-Latn-1997.imp +248 -0
- data/maps/alalc-hin-Deva-Latn-2011.imp +63 -0
- data/maps/alalc-kan-Kana-Latn-1997.imp +233 -0
- data/maps/alalc-kan-Kana-Latn-2011.imp +58 -0
- data/maps/alalc-kat-Geok-Latn-1997.imp +109 -0
- data/maps/alalc-kat-Geor-Latn-1997.imp +104 -0
- data/maps/alalc-kor-Hang-Latn-1997.imp +68 -0
- data/maps/alalc-mal-Mlym-Latn-1997.imp +260 -0
- data/maps/alalc-mal-Mlym-Latn-2012.imp +65 -0
- data/maps/alalc-mar-Deva-Latn-1997.imp +178 -0
- data/maps/alalc-mar-Deva-Latn-2011.imp +51 -0
- data/maps/alalc-mkd-Cyrl-Latn-1997.imp +125 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.imp +113 -0
- data/maps/alalc-mon-Cyrl-Latn-1997.imp +161 -0
- data/maps/alalc-ori-Orya-Latn-1997.imp +234 -0
- data/maps/alalc-ori-Orya-Latn-2011.imp +59 -0
- data/maps/alalc-pan-Guru-Latn-1997.imp +241 -0
- data/maps/alalc-pan-Guru-Latn-2011.imp +71 -0
- data/maps/alalc-per-Arab-Latn-1997.imp +318 -0
- data/maps/alalc-pli-Deva-Latn-2012.imp +140 -0
- data/maps/alalc-pra-Deva-Latn-2012.imp +52 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.imp +165 -0
- data/maps/alalc-rus-Cyrl-Latn-2012.imp +107 -0
- data/maps/alalc-san-Deva-Latn-2012.imp +207 -0
- data/maps/alalc-sin-Sinh-Latn-1997.imp +246 -0
- data/maps/alalc-sin-Sinh-Latn-2011.imp +63 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.imp +124 -0
- data/maps/alalc-srp-Cyrl-Latn-2013.imp +115 -0
- data/maps/alalc-tam-Taml-Latn-1997.imp +52 -0
- data/maps/alalc-tam-Taml-Latn-2011.imp +49 -0
- data/maps/alalc-tel-Telu-Latn-1997.imp +237 -0
- data/maps/alalc-tel-Telu-Latn-2011.imp +58 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.imp +123 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.imp +32 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.imp +194 -0
- data/maps/az-aze-Cyrl-Latn-1939.imp +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.imp +50 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.imp +160 -0
- data/maps/bas-rus-Cyrl-Latn-2017-oss.imp +165 -0
- data/maps/bgn-jpn-Hrkt-Latn-1962.imp +288 -0
- data/maps/bgn-kor-Hang-Latn-1943.imp +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.imp +33 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.imp +119 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.imp +119 -0
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.imp +393 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.imp +472 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.imp +125 -0
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.imp +111 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.imp +169 -0
- data/maps/bgnpcgn-bal-Arab-Latn-2008.imp +296 -0
- data/maps/bgnpcgn-bel-Cyrl-Latn-1979.imp +200 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.imp +137 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.imp +38 -0
- data/maps/bgnpcgn-che-Cyrl-Latn-2008.imp +176 -0
- data/maps/bgnpcgn-deu-Latn-Latn-2000.imp +56 -0
- data/maps/bgnpcgn-div-Thaa-Latn-1972.imp +90 -0
- data/maps/bgnpcgn-div-Thaa-Latn-1988.imp +71 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.imp +443 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.imp +269 -0
- data/maps/bgnpcgn-fao-Latn-Latn-1964.imp +41 -0
- data/maps/bgnpcgn-fao-Latn-Latn-1968.imp +28 -0
- data/maps/bgnpcgn-fas-Arab-Latn-1956.imp +111 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1964.imp +42 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1968.imp +32 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.imp +191 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.imp +116 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.imp +43 -0
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.imp +193 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.imp +170 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.imp +177 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.imp +40 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.imp +41 -0
- data/maps/bgnpcgn-kur-Arab-Latn-2007.imp +240 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.imp +132 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.imp +174 -0
- data/maps/bgnpcgn-mon-Cyrl-Latn-1964.imp +168 -0
- data/maps/bgnpcgn-nep-Deva-Latn-2011.imp +208 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.imp +312 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.imp +552 -0
- data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.imp +445 -0
- data/maps/bgnpcgn-pus-Arab-Latn-1968.imp +289 -0
- data/maps/bgnpcgn-ron-cyrl-latn-2002.imp +165 -0
- data/maps/bgnpcgn-rue-Cyrl-Latn-2016.imp +133 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.imp +195 -0
- data/maps/bgnpcgn-sme-Latn-Latn-1984.imp +48 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.imp +55 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.imp +146 -0
- data/maps/bgnpcgn-tat-Cyrl-Latn-2007.imp +185 -0
- data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.imp +188 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.imp +136 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.imp +88 -0
- data/maps/bgnpcgn-urd-Arab-Latn-2007.imp +333 -0
- data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.imp +145 -0
- data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.imp +74 -0
- data/maps/bgnpcgn-zho-Hans-Latn-1979.imp +7463 -0
- data/maps/bis-asm-Beng-Latn-13194-1991.imp +154 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.imp +151 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.imp +178 -0
- data/maps/bis-guj-Gujr-Latn-13194-1991.imp +172 -0
- data/maps/bis-kan-Kana-Latn-13194-1991.imp +166 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.imp +170 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.imp +168 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.imp +169 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.imp +165 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.imp +149 -0
- data/maps/by-bel-Cyrl-Latn-1998.imp +123 -0
- data/maps/by-bel-Cyrl-Latn-2007.imp +77 -0
- data/maps/din-grc-Grek-Latn-31634-2011-t1.imp +627 -0
- data/maps/din-hin-Deva-Latn-33904-2018.imp +101 -0
- data/maps/din-kat-Geor-Latn-32707-2010.imp +103 -0
- data/maps/din-mar-Deva-Latn-33904-2018.imp +83 -0
- data/maps/din-nep-Deva-Latn-33904-2018.imp +110 -0
- data/maps/din-pli-Deva-Latn-33904-2018.imp +72 -0
- data/maps/din-pra-Deva-Latn-33904-2018.imp +66 -0
- data/maps/din-san-Deva-Latn-33904-2018.imp +294 -0
- data/maps/din-tam-Taml-Latn-33903-2016.imp +187 -0
- data/maps/dos-nep-Deva-Latn-1997.imp +47 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.imp +399 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.imp +397 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.imp +34 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.imp +178 -0
- data/maps/ggg-kat-Geor-Latn-2002.imp +75 -0
- data/maps/gki-bel-Cyrl-Latn-1992.imp +44 -0
- data/maps/gki-bel-Cyrl-Latn-2000.imp +159 -0
- data/maps/gost-rus-Cyrl-Latn-16876-71-1983.imp +179 -0
- data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.imp +132 -0
- data/maps/hk-yue-Hani-Latn-1888.imp +29201 -0
- data/maps/icao-bel-Cyrl-Latn-9303.imp +136 -0
- data/maps/icao-bul-Cyrl-Latn-9303.imp +127 -0
- data/maps/icao-fas-Arab-Latn-9303.imp +112 -0
- data/maps/icao-heb-Hebr-Latn-9303.imp +160 -0
- data/maps/icao-mkd-Cyrl-Latn-9303.imp +126 -0
- data/maps/icao-rus-Cyrl-Latn-9303.imp +126 -0
- data/maps/icao-srp-Cyrl-Latn-9303.imp +126 -0
- data/maps/icao-ukr-Cyrl-Latn-9303.imp +127 -0
- data/maps/iso-ara-Arab-Latn-233-1984.imp +301 -0
- data/maps/iso-asm-Beng-Latn-15919-2001.imp +73 -0
- data/maps/iso-ben-Beng-Latn-15919-2001.imp +171 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.imp +365 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.imp +43 -0
- data/maps/iso-guj-Gujr-Latn-15919-2001.imp +214 -0
- data/maps/iso-hin-Deva-Latn-15919-2001.imp +73 -0
- data/maps/iso-inc-Deva-Latn-15919-2001.imp +61 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.imp +59 -0
- data/maps/iso-kan-Kana-Latn-15919-2001.imp +212 -0
- data/maps/iso-kat-Geor-Latn-9984-1996.imp +103 -0
- data/maps/iso-kor-Hang-Latn-1996-method1.imp +140 -0
- data/maps/iso-kor-Hang-Latn-1996-method2.imp +132 -0
- data/maps/iso-mal-Mlym-Latn-15919-2001.imp +276 -0
- data/maps/iso-mar-Deva-Latn-15919-2001.imp +68 -0
- data/maps/iso-nep-Deva-Latn-15919-2001.imp +75 -0
- data/maps/iso-ori-Orya-Latn-15919-2001.imp +188 -0
- data/maps/iso-pan-Guru-Latn-15919-2001.imp +217 -0
- data/maps/iso-pli-Beng-Latn-15919-2001.imp +66 -0
- data/maps/iso-pli-Deva-Latn-15919-2001.imp +68 -0
- data/maps/iso-pli-Sinh-Latn-15919-2001.imp +211 -0
- data/maps/iso-pli-Thai-Latn-15919-2001.imp +47 -0
- data/maps/iso-pra-Deva-Latn-15919-2001.imp +60 -0
- data/maps/iso-prs-Arab-Latn-233-3-1999.imp +352 -0
- data/maps/iso-rus-Cyrl-Latn-9-1995.imp +279 -0
- data/maps/iso-san-Deva-Latn-15919-2001.imp +215 -0
- data/maps/iso-tam-Taml-Latn-15919-2001.imp +153 -0
- data/maps/iso-tel-Telu-Latn-15919-2001.imp +214 -0
- data/maps/iso-tha-Thai-Latn-11940-1998.imp +114 -0
- data/maps/kp-kor-Hang-Latn-2002.imp +540 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.imp +29005 -0
- data/maps/masm-mon-Cyrl-Latn-5217-2012.imp +136 -0
- data/maps/masm-mon-Latn-Cyrl-5217-2012.imp +162 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.imp +403 -0
- data/maps/moct-kor-Hang-Latn-2000.imp +475 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.imp +484 -0
- data/maps/mv-div-Thaa-Latn-1987.imp +144 -0
- data/maps/mvd-bel-Cyrl-Latn-2008.imp +224 -0
- data/maps/mvd-bel-Cyrl-Latn-2010.imp +64 -0
- data/maps/mvd-rus-Cyrl-Latn-2008.imp +110 -0
- data/maps/mvd-rus-Cyrl-Latn-2010.imp +40 -0
- data/maps/odni-ara-Arab-Latn-2004.imp +106 -0
- data/maps/odni-ara-Arab-Latn-2015.imp +281 -0
- data/maps/odni-aze-Cyrl-Latn-2015.imp +158 -0
- data/maps/odni-bel-Cyrl-Latn-2015.imp +138 -0
- data/maps/odni-bul-Cyrl-Latn-2005.imp +90 -0
- data/maps/odni-bul-Cyrl-Latn-2015.imp +103 -0
- data/maps/odni-che-Cyrl-Latn-2015.imp +165 -0
- data/maps/odni-fas-Arab-Latn-2004.imp +268 -0
- data/maps/odni-fas-Arab-Latn-2015.imp +398 -0
- data/maps/odni-hin-Deva-Latn-2004.imp +180 -0
- data/maps/odni-hin-Deva-Latn-2015.imp +256 -0
- data/maps/odni-kat-Geor-Latn-2015.imp +76 -0
- data/maps/odni-kaz-Cyrl-Latn-2015.imp +164 -0
- data/maps/odni-kir-Cyrl-Latn-2015.imp +149 -0
- data/maps/odni-kor-Hang-Latn-2015.imp +307 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.imp +28 -0
- data/maps/odni-mkd-Cyrl-Latn-2015.imp +124 -0
- data/maps/odni-prs-Arab-Latn-2004.imp +120 -0
- data/maps/odni-prs-Arab-Latn-2015.imp +225 -0
- data/maps/odni-pus-Arab-Latn-2011.imp +327 -0
- data/maps/odni-rus-Cyrl-Latn-2015.imp +79 -0
- data/maps/odni-srp-Cyrl-Latn-2005.imp +35 -0
- data/maps/odni-srp-Cyrl-Latn-2015.imp +130 -0
- data/maps/odni-tat-Cyrl-Latn-2015.imp +157 -0
- data/maps/odni-tgk-Cyrl-Latn-2015.imp +161 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.imp +159 -0
- data/maps/odni-uig-Cyrl-Latn-2015.imp +151 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.imp +136 -0
- data/maps/odni-urd-Arab-Latn-2015.imp +220 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.imp +165 -0
- data/maps/sac-zho-Hans-Latn-1979.imp +20940 -0
- data/maps/sasm-mon-Mong-Latn-general-1978.imp +294 -0
- data/maps/sasm-mon-Mong-Latn-phonetic-1978.imp +261 -0
- data/maps/ses-ara-Arab-Latn-1930.imp +225 -0
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.imp +171 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.imp +149 -0
- data/maps/ua-ukr-Cyrl-Latn-2007.imp +69 -0
- data/maps/ua-ukr-Cyrl-Latn-2010.imp +128 -0
- data/maps/un-amh-Ethi-Latn-2016.imp +483 -0
- data/maps/un-ara-Arab-Latn-1971.imp +137 -0
- data/maps/un-ara-Arab-Latn-1972.imp +155 -0
- data/maps/un-ara-Arab-Latn-2017.imp +375 -0
- data/maps/un-asm-Beng-Latn-1972.imp +188 -0
- data/maps/un-bel-Cyrl-Latn-2007.imp +78 -0
- data/maps/un-ben-Beng-Latn-2016.imp +516 -0
- data/maps/un-ell-Grek-Latn-1987-phonetic.imp +437 -0
- data/maps/un-ell-Grek-Latn-1987-tl.imp +27 -0
- data/maps/un-ell-Grek-Latn-1987-ts.imp +269 -0
- data/maps/un-guj-Gujr-Latn-1972.imp +196 -0
- data/maps/un-hin-Deva-Latn-2016.imp +356 -0
- data/maps/un-kan-Kana-Latn-2016.imp +214 -0
- data/maps/un-mal-Mlym-Latn-1972.imp +215 -0
- data/maps/un-mar-Deva-Latn-2016.imp +96 -0
- data/maps/un-mon-Mong-Latn-general-2013.imp +170 -0
- data/maps/un-mon-Mong-Latn-phonetic-2013.imp +170 -0
- data/maps/un-nep-Deva-Latn-1972.imp +295 -0
- data/maps/un-nep-Deva-Latn-2013.imp +62 -0
- data/maps/un-ori-Orya-Latn-1972.imp +208 -0
- data/maps/un-pan-Guru-Latn-1972.imp +321 -0
- data/maps/un-prs-Arab-Latn-1967.imp +214 -0
- data/maps/un-rus-Cyrl-Latn-1987.imp +96 -0
- data/maps/un-sin-Sinh-Latn-1972.imp +193 -0
- data/maps/un-tam-Taml-Latn-1972.imp +173 -0
- data/maps/un-tel-Telu-Latn-1972.imp +229 -0
- data/maps/un-ukr-Cyrl-Latn-1998.imp +58 -0
- data/maps/un-ukr-Cyrl-Latn-2012.imp +95 -0
- data/maps/un-urd-Arab-Latn-1972.imp +290 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.imp +414 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.imp +54 -0
- data/maps/var-hin-Deva-Latn-hunterian-1872.imp +212 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.imp +399 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.imp +382 -0
- data/maps/var-kor-Hang-Hang-jamo.imp +11196 -0
- data/maps/var-kor-Hang-Latn-mr-1939.imp +574 -0
- data/maps/var-kor-Kore-Hang-2013.imp +59764 -0
- data/maps/var-kor-Kore-Latn-mr-1939.imp +36 -0
- data/maps/var-mar-Deva-Latn-hunterian-1872.imp +39 -0
- data/maps/var-mon-Mong-Latn-1930.imp +101 -0
- data/maps/var-mon-Mong-Latn-lessing.imp +181 -0
- data/maps/var-mon-Mong-Latn-vpmc.imp +182 -0
- data/maps/var-pra-Deva-Latn-iast-1912.imp +36 -0
- data/maps/var-san-Deva-Latn-iast-1912.imp +147 -0
- data/maps/var-zho-Hani-Latn-wd-1979.imp +27549 -0
- metadata +335 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
metadata {
|
|
2
|
+
authority_id: az
|
|
3
|
+
id: 1958
|
|
4
|
+
language: iso-639-2:aze
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
url: https://omniglot.com/writing/azeri.htm
|
|
8
|
+
creation_date: 1958
|
|
9
|
+
description: |
|
|
10
|
+
In 1939 Joseph Stalin ordered the Cyrillic alphabet to be used by Azeri speakers in the Soviet Union.
|
|
11
|
+
|
|
12
|
+
notes:
|
|
13
|
+
- In 1947, the letter Цц was excluded from the alphabet. Previously, it was used for Russian borrowings
|
|
14
|
+
- In 1958, the letters Ээ, Юю, Яя were eliminated, and the letter Йй was replaced by Јј
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
tests {
|
|
18
|
+
# test "Юя", "Юя" # TODO why should it be left unchanged? this is the only test that's failing
|
|
19
|
+
# from internet
|
|
20
|
+
test "Азәрбајҹан әлифбасы", "Azərbaycan əlifbası"
|
|
21
|
+
test "Бүтүн инсанлар ләјагәт вә һүгугларына ҝөрә азад бәрабәр доғулурлар.\nОнларын шүурлары вә виҹданлары вар вә бир-бирләринә мүнасибәтдә гардашлыг руһунда давранмалыдырлар.", "Bütün insanlar ləyaqət və hüquqlarına görə azad bərabər doğulurlar.\nOnların şüurları və vicdanları var və bir-birlərinə münasibətdə qardaşlıq ruhunda davranmalıdırlar."
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
dependency "az-aze-Cyrl-Latn-1939", as: cyrllatn
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
stage {
|
|
28
|
+
sub "Й", "J" # Й note[2]
|
|
29
|
+
sub "й", "j" # й note[2]
|
|
30
|
+
|
|
31
|
+
run map.cyrllatn.stage.main
|
|
32
|
+
|
|
33
|
+
# CHARACTERS
|
|
34
|
+
parallel {
|
|
35
|
+
sub "Ј", "Y" # Ј note[2]
|
|
36
|
+
sub "Й", "J" # Й note[2]
|
|
37
|
+
sub "Ц", "" # Ц note[1]
|
|
38
|
+
sub "Э", "" # Э note[2]
|
|
39
|
+
sub "Ю", "" # Ю note[2]
|
|
40
|
+
sub "Я", "" # Я note[2]
|
|
41
|
+
|
|
42
|
+
sub "ј", "y" # ј note[2]
|
|
43
|
+
sub "й", "j" # й note[2]
|
|
44
|
+
sub "ц", "" # ц note[1]
|
|
45
|
+
sub "э", "" # э note[2]
|
|
46
|
+
sub "ю", "" # ю note[2]
|
|
47
|
+
sub "я", "" # я note[2]
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
metadata {
|
|
2
|
+
authority_id: bas
|
|
3
|
+
id: 2017-bss
|
|
4
|
+
language: iso-639-2:rus
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Streamlined Romanization of Russian Cyrillic -- Basic Streamlined System
|
|
8
|
+
url: https://www.researchgate.net/publication/318402098
|
|
9
|
+
creation_date: 2017-07
|
|
10
|
+
description: |
|
|
11
|
+
The streamlined approach to transliteration was initiated by the
|
|
12
|
+
author with the development of the Streamlined System for the
|
|
13
|
+
Romanization of Bulgarian, which was eventually codified by the
|
|
14
|
+
Transliteration Act of 2009 (ДВ 2009) of the Bulgarian Parliament.
|
|
15
|
+
|
|
16
|
+
The four purposes of the system below are in order of priority:
|
|
17
|
+
1. ensure a plausible phonetic approximation of Russian words by English speaking users, including those having no knowledge of the Russian language and no available additional explanations;
|
|
18
|
+
2. the system should allow for the retrieval of the original Cyrillic spellings as much as feasible;
|
|
19
|
+
3. transliterated Russian words should fit an English language environment i.e. not be perceived as too ‘un-English’; and
|
|
20
|
+
4. transliterated word forms should be streamlined and simple. (Ivanov 2003, Ivanov et al. 2010)
|
|
21
|
+
|
|
22
|
+
notes:
|
|
23
|
+
- Typical for the streamlined approach is its non-use of diacritics,
|
|
24
|
+
its use of Latin y for rendering only Cyrillic й rather than both й and
|
|
25
|
+
ы, its non-use of Latin j, as well as its use of Latin h rather than kh
|
|
26
|
+
for Cyrillic х.
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
tests {
|
|
30
|
+
test "Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа\nты могла только родиться, в той земле, что не любит шутить, а\nровнем-гладнем разметнулась на полсвета, да и ступай считать версты, пока\nне зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не\nжелезным схвачен винтом, а наскоро живьём с одним топором да долотом\nснарядил и собрал тебя ярославский расторопный мужик. Не в немецких\nботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а\nпривстал, да замахнулся, да затянул песню — кони вихрем, спицы в\nколесах смешались в один гладкий круг, только дрогнула дорога, да вскрикнул\nв испуге остановившийся пешеход — и вон она понеслась, понеслась,\nпонеслась!\n\nН.В. Гоголь", "Eh, troyka! ptitsa troyka, kto tebya vidumal? znat, u boykogo naroda\nti mogla tolko roditsya, v toy zemle, chto ne lyubit shutit, a\nrovnem-gladnem razmetnulas na polsveta, da i stupay schitat versti, poka\nne zaryabit tebe v ochi. I ne hitriy, kazhis, dorozhniy snaryad, ne\nzheleznim shvachen vintom, a naskoro zhivyem s odnim toporom da dolotom\nsnaryadil i sobral tebya yaroslavskiy rastoropniy muzhik. Ne v nemetskih\nbotfortah yamshchik: boroda da rukavitsi, i sidit chert znaet na chem; a\nprivstal, da zamahnulsya, da zatyanul pesnyu — koni vihrem, spitsi v\nkolesah smeshalis v odin gladkiy krug, tolko drognula doroga, da vskriknul\nv ispuge ostanovivshiysya peshehod — i von ona poneslas, poneslas,\nponeslas!\n\nN.V. Gogol"
|
|
31
|
+
test "ЁЖ Ёж ёж", "EZH Ezh ezh"
|
|
32
|
+
test "Цветущий сад", "Tsvetushchiy sad"
|
|
33
|
+
test "Чувство юмора", "Chuvstvo yumora"
|
|
34
|
+
test "Широкий выбор", "Shirokiy vibor"
|
|
35
|
+
test "Все подъезды заблокированны", "Vse podezdi zablokirovanni"
|
|
36
|
+
test "Ожерелье", "Ozherelye"
|
|
37
|
+
test "Ручьи", "Ruchyi"
|
|
38
|
+
test "Каньон", "Kanyon"
|
|
39
|
+
test "Бельэтаж", "Belyetazh"
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
dependency "posix", import: true
|
|
43
|
+
|
|
44
|
+
stage {
|
|
45
|
+
|
|
46
|
+
# RULES
|
|
47
|
+
sub "\u042c", "Y", after: any("ЕеЁёИиОоЭэ") # Ь (before Е, Ё, И, O, Э)
|
|
48
|
+
sub "\u044c", "y", after: any("ЕеЁёИиОоЭэ") # ь (before Е, Ё, И, O, Э)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# CHARACTERS
|
|
52
|
+
parallel {
|
|
53
|
+
# "\u0027": "" # '
|
|
54
|
+
sub "А", "A" # А
|
|
55
|
+
sub "Б", "B" # Б
|
|
56
|
+
sub "В", "V" # В
|
|
57
|
+
sub "Г", "G" # Г
|
|
58
|
+
sub "Д", "D" # Д
|
|
59
|
+
sub "Ё", "E" # Ё
|
|
60
|
+
sub "Е", "E" # Е
|
|
61
|
+
sub "Ж", "Zh" # Ж
|
|
62
|
+
sub "Ж", "ZH", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
63
|
+
sub "Ж", "ZH", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
64
|
+
|
|
65
|
+
sub "З", "Z" # З
|
|
66
|
+
sub "Э", "E" # Э
|
|
67
|
+
sub "И", "I" # И
|
|
68
|
+
sub "Й", "Y" # Й
|
|
69
|
+
sub "К", "K" # К
|
|
70
|
+
sub "Л", "L" # Л
|
|
71
|
+
sub "М", "M" # М
|
|
72
|
+
sub "Н", "N" # Н
|
|
73
|
+
sub "О", "O" # О
|
|
74
|
+
sub "П", "P" # П
|
|
75
|
+
sub "Р", "R" # Р
|
|
76
|
+
sub "С", "S" # С
|
|
77
|
+
sub "Т", "T" # Т
|
|
78
|
+
sub "У", "U" # У
|
|
79
|
+
sub "Ф", "F" # Ф
|
|
80
|
+
sub "Х", "H" # Х
|
|
81
|
+
sub "Ц", "Ts" # Ц
|
|
82
|
+
sub "Ц", "TS", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
83
|
+
sub "Ц", "TS", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
84
|
+
sub "Ч", "Ch" # Ч
|
|
85
|
+
sub "Ч", "CH", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
86
|
+
sub "Ч", "CH", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
87
|
+
sub "Ш", "Sh" # Ш
|
|
88
|
+
sub "Ш", "SH", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
89
|
+
sub "Ш", "SH", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
90
|
+
sub "Щ", "Shch" # Щ
|
|
91
|
+
sub "Щ", "SHCH", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
92
|
+
sub "Щ", "SHCH", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
93
|
+
sub "Ы", "I" # Ы
|
|
94
|
+
sub "Я", "Ya" # Я
|
|
95
|
+
sub "Я", "YA", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
96
|
+
sub "Я", "YA", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
97
|
+
sub "Ю", "Yu" # Ю
|
|
98
|
+
sub "Ю", "YU", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
99
|
+
sub "Ю", "YU", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
100
|
+
|
|
101
|
+
# Ь (before Е, Ё, И, O, Э)
|
|
102
|
+
# "\u042c\u0401": "YE" # Ё
|
|
103
|
+
# "\u042c\u0415": "YE" # Е
|
|
104
|
+
# "\u042c\u0418": "YI" # И
|
|
105
|
+
# "\u042c\u041E": "YO" # O
|
|
106
|
+
# "\u042c\u0417": "YE" # Э
|
|
107
|
+
|
|
108
|
+
# Ь (otherwise) -> (none)
|
|
109
|
+
sub "Ь", ""
|
|
110
|
+
|
|
111
|
+
# Ъ -> (none)
|
|
112
|
+
sub "Ъ", ""
|
|
113
|
+
|
|
114
|
+
sub "а", "a" # а
|
|
115
|
+
sub "б", "b" # б
|
|
116
|
+
sub "в", "v" # в
|
|
117
|
+
sub "г", "g" # г
|
|
118
|
+
sub "д", "d" # д
|
|
119
|
+
sub "ё", "e" # ё
|
|
120
|
+
sub "е", "e" # e
|
|
121
|
+
sub "ж", "zh" # ж
|
|
122
|
+
sub "з", "z" # з
|
|
123
|
+
sub "э", "e" # э
|
|
124
|
+
sub "и", "i" # и
|
|
125
|
+
sub "й", "y" # й
|
|
126
|
+
sub "к", "k" # к
|
|
127
|
+
sub "л", "l" # л
|
|
128
|
+
sub "м", "m" # м
|
|
129
|
+
sub "н", "n" # н
|
|
130
|
+
sub "о", "o" # о
|
|
131
|
+
sub "п", "p" # п
|
|
132
|
+
sub "р", "r" # р
|
|
133
|
+
sub "с", "s" # с
|
|
134
|
+
sub "т", "t" # т
|
|
135
|
+
sub "у", "u" # у
|
|
136
|
+
sub "ф", "f" # ф
|
|
137
|
+
sub "х", "h" # х
|
|
138
|
+
sub "ц", "ts" # ц
|
|
139
|
+
sub "ч", "ch" # ч
|
|
140
|
+
sub "ш", "sh" # ш
|
|
141
|
+
sub "щ", "shch" # щ
|
|
142
|
+
sub "ы", "i" # ы
|
|
143
|
+
sub "я", "ya" # я
|
|
144
|
+
sub "ю", "yu" # ю
|
|
145
|
+
|
|
146
|
+
# ь (before е, ё, и, o, э)
|
|
147
|
+
# "\u044c\u0435": "ye" # ё
|
|
148
|
+
# "\u044c\u0451": "ye" # е
|
|
149
|
+
# "\u044c\u0438": "yi" # и
|
|
150
|
+
# "\u044c\u006f": "yo" # o
|
|
151
|
+
# "\u044c\u044d": "ye" # э
|
|
152
|
+
|
|
153
|
+
# ь (otherwise) -> (none)
|
|
154
|
+
sub "ь", ""
|
|
155
|
+
|
|
156
|
+
# ъ -> (none)
|
|
157
|
+
sub "ъ", ""
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
metadata {
|
|
2
|
+
authority_id: bas
|
|
3
|
+
id: 2017-oss
|
|
4
|
+
language: iso-639-2:rus
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Streamlined Romanization of Russian Cyrillic -- Optimized Streamlined System
|
|
8
|
+
url: https://www.researchgate.net/publication/318402098
|
|
9
|
+
creation_date: 2017-07
|
|
10
|
+
description: |
|
|
11
|
+
The streamlined approach to transliteration was initiated by the
|
|
12
|
+
author with the development of the Streamlined System for the
|
|
13
|
+
Romanization of Bulgarian, which was eventually codified by the
|
|
14
|
+
Transliteration Act of 2009 (ДВ 2009) of the Bulgarian Parliament.
|
|
15
|
+
|
|
16
|
+
The four purposes of the system below are in order of priority:
|
|
17
|
+
1. ensure a plausible phonetic approximation of Russian words by English speaking users, including those having no knowledge of the Russian language and no available additional explanations;
|
|
18
|
+
2. the system should allow for the retrieval of the original Cyrillic spellings as much as feasible;
|
|
19
|
+
3. transliterated Russian words should fit an English language environment i.e. not be perceived as too ‘un-English’; and
|
|
20
|
+
4. transliterated word forms should be streamlined and simple. (Ivanov 2003, Ivanov et al. 2010)
|
|
21
|
+
|
|
22
|
+
notes:
|
|
23
|
+
- Typical for the streamlined approach is its non-use of diacritics,
|
|
24
|
+
its use of Latin y for rendering only Cyrillic й rather than both й and
|
|
25
|
+
ы, its non-use of Latin j, as well as its use of Latin h rather than kh
|
|
26
|
+
for Cyrillic х.
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
tests {
|
|
30
|
+
test "Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа ты могла только родиться, в той земле, что не любит шутить, а ровнем-гладнем разметнулась на полсвета, да и ступай считать версты, пока не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не железным схвачен винтом, а наскоро живьём с одним топором да долотом снарядил и собрал тебя ярославский расторопный мужик. Не в немецких ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а привстал, да замахнулся, да затянул песню — кони вихрем, спицы в колесах смешались в один гладкий круг, только дрогнула дорога, да вскрикнул в испуге остановившийся пешеход — и вон она понеслась, понеслась, понеслась!\nН.В. Гоголь", "`Eh, troyka! ptitsa troyka, kto tebya v`idumal? znat', u boykogo naroda t`i mogla tol'ko rodit'sya, v toy zemle, chto ne lyubit shutit', a rovnem-gladnem razmetnulas' na polsveta, da i stupay schitat' verst`i, poka ne zaryabit tebe v ochi. I ne hitr`iy, kazhis', dorozhn`iy snaryad, ne zhelezn`im shvachen vintom, a naskoro zhivy``em s odnim toporom da dolotom snaryadil i sobral tebya yaroslavskiy rastoropn`iy muzhik. Ne v nemetskih botfortah yamshchik: boroda da rukavits`i, i sidit ch``ert znaet na ch``em; a privstal, da zamahnulsya, da zatyanul pesnyu — koni vihrem, spits`i v kolesah smeshalis' v odin gladkiy krug, tol'ko drognula doroga, da vskriknul v ispuge ostanovivshiysya peshehod — i von ona poneslas', poneslas', poneslas'!\nN.V. Gogol'"
|
|
31
|
+
test "ЁЖ Ёж ёж", "``EZH ``Ezh ``ezh"
|
|
32
|
+
test "Цветущий сад", "Tsvetushchiy sad"
|
|
33
|
+
test "Чувство юмора", "Chuvstvo yumora"
|
|
34
|
+
test "Широкий выбор", "Shirokiy v`ibor"
|
|
35
|
+
test "Все подъезды заблокированны", "Vse pod\"ezd`i zablokirovann`i"
|
|
36
|
+
test "Ожерелье", "Ozherelye"
|
|
37
|
+
test "Ручьи", "Ruchyi"
|
|
38
|
+
test "Каньон", "Kanyon"
|
|
39
|
+
test "Бельэтаж", "Bely`etazh"
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
stage {
|
|
43
|
+
|
|
44
|
+
# RULES
|
|
45
|
+
sub "\u042c", "Y", after: any("ЕеЁёИиОоЭэ") # Ь (before Е, Ё, И, O, Э)
|
|
46
|
+
sub "\u044c", "y", after: any("ЕеЁёИиОоЭэ") # ь (before Е, Ё, И, O, Э)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# CHARACTERS
|
|
50
|
+
parallel {
|
|
51
|
+
# "\u0027": "" # '
|
|
52
|
+
sub "А", "A" # А
|
|
53
|
+
sub "Б", "B" # Б
|
|
54
|
+
sub "В", "V" # В
|
|
55
|
+
sub "Г", "G" # Г
|
|
56
|
+
sub "Д", "D" # Д
|
|
57
|
+
sub "Ё", "``E" # Ё
|
|
58
|
+
sub "Е", "E" # Е
|
|
59
|
+
sub "Ж", "Zh" # Ж
|
|
60
|
+
sub "Ж", "ZH", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
61
|
+
sub "Ж", "ZH", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
62
|
+
|
|
63
|
+
sub "З", "Z" # З
|
|
64
|
+
sub "Э", "`E" # Э
|
|
65
|
+
sub "И", "I" # И
|
|
66
|
+
sub "Й", "Y" # Й
|
|
67
|
+
sub "К", "K" # К
|
|
68
|
+
sub "Л", "L" # Л
|
|
69
|
+
sub "М", "M" # М
|
|
70
|
+
sub "Н", "N" # Н
|
|
71
|
+
sub "О", "O" # О
|
|
72
|
+
sub "П", "P" # П
|
|
73
|
+
sub "Р", "R" # Р
|
|
74
|
+
sub "С", "S" # С
|
|
75
|
+
sub "Т", "T" # Т
|
|
76
|
+
sub "У", "U" # У
|
|
77
|
+
sub "Ф", "F" # Ф
|
|
78
|
+
sub "Х", "H" # Х
|
|
79
|
+
sub "Ц", "Ts" # Ц
|
|
80
|
+
sub "Ц", "TS", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
81
|
+
sub "Ц", "TS", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
82
|
+
|
|
83
|
+
sub "Ч", "Ch" # Ч
|
|
84
|
+
sub "Ч", "CH", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
85
|
+
sub "Ч", "CH", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
86
|
+
|
|
87
|
+
sub "Ш", "Sh" # Ш
|
|
88
|
+
sub "Ш", "SH", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
89
|
+
sub "Ш", "SH", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
90
|
+
|
|
91
|
+
sub "Щ", "Shch" # Щ
|
|
92
|
+
sub "Щ", "SHCH", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
93
|
+
sub "Щ", "SHCH", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
94
|
+
|
|
95
|
+
sub "Ы", "`I" # Ы
|
|
96
|
+
sub "Я", "Ya" # Я
|
|
97
|
+
sub "Я", "YA", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
98
|
+
sub "Я", "YA", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
99
|
+
|
|
100
|
+
sub "Ю", "Yu" # Ю
|
|
101
|
+
sub "Ю", "YU", before: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
102
|
+
sub "Ю", "YU", after: any('AБBГДЕЁЖЗИЙКЛМНОПРСТУЎФХЦЧШЩЪЫЬЭЮЯІ')
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# Ь (before Е, Ё, И, O, Э)
|
|
106
|
+
# "\u042c\u0401": "Y``e" # Ё
|
|
107
|
+
# "\u042c\u0415": "Ye" # Е
|
|
108
|
+
# "\u042c\u0418": "Yi" # И
|
|
109
|
+
# "\u042c\u041E": "Yo" # O
|
|
110
|
+
# "\u042c\u0417": "Y`e" # Э
|
|
111
|
+
|
|
112
|
+
# Ь (otherwise) -> ' (or none)
|
|
113
|
+
sub "Ь", "'"
|
|
114
|
+
|
|
115
|
+
# Ъ -> " (or none)
|
|
116
|
+
sub "Ъ", "\""
|
|
117
|
+
|
|
118
|
+
sub "а", "a" # а
|
|
119
|
+
sub "б", "b" # б
|
|
120
|
+
sub "в", "v" # в
|
|
121
|
+
sub "г", "g" # г
|
|
122
|
+
sub "д", "d" # д
|
|
123
|
+
sub "ё", "``e" # ё
|
|
124
|
+
sub "е", "e" # e
|
|
125
|
+
sub "ж", "zh" # ж
|
|
126
|
+
sub "з", "z" # з
|
|
127
|
+
sub "э", "`e" # э
|
|
128
|
+
sub "и", "i" # и
|
|
129
|
+
sub "й", "y" # й
|
|
130
|
+
sub "к", "k" # к
|
|
131
|
+
sub "л", "l" # л
|
|
132
|
+
sub "м", "m" # м
|
|
133
|
+
sub "н", "n" # н
|
|
134
|
+
sub "о", "o" # о
|
|
135
|
+
sub "п", "p" # п
|
|
136
|
+
sub "р", "r" # р
|
|
137
|
+
sub "с", "s" # с
|
|
138
|
+
sub "т", "t" # т
|
|
139
|
+
sub "у", "u" # у
|
|
140
|
+
sub "ф", "f" # ф
|
|
141
|
+
sub "х", "h" # х
|
|
142
|
+
sub "ц", "ts" # ц
|
|
143
|
+
sub "ч", "ch" # ч
|
|
144
|
+
sub "ш", "sh" # ш
|
|
145
|
+
sub "щ", "shch" # щ
|
|
146
|
+
sub "ы", "`i" # ы
|
|
147
|
+
sub "я", "ya" # я
|
|
148
|
+
sub "ю", "yu" # ю
|
|
149
|
+
|
|
150
|
+
# ь (before е, ё, и, o, э)
|
|
151
|
+
# "\u044c\u0435": "ye" # ё
|
|
152
|
+
# "\u044c\u0451": "y``e" # e
|
|
153
|
+
# "\u044c\u0438": "yi" # и
|
|
154
|
+
# "\u044c\u006f": "yo" # o
|
|
155
|
+
# "\u044c\u044d": "y`e" # э
|
|
156
|
+
|
|
157
|
+
# ь (otherwise) -> ' (or none)
|
|
158
|
+
sub "ь", "'"
|
|
159
|
+
|
|
160
|
+
# ъ -> " (or none)
|
|
161
|
+
sub "ъ", "\""
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
}
|
|
165
|
+
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
metadata {
|
|
2
|
+
authority_id: bgn
|
|
3
|
+
id: 1962
|
|
4
|
+
language: iso-639-2:jpn
|
|
5
|
+
source_script: Hrkt
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: BGN (Modified Hepburn) System
|
|
8
|
+
url:
|
|
9
|
+
creation_date: 1930
|
|
10
|
+
adoption_date: 1962
|
|
11
|
+
description: |
|
|
12
|
+
The BGN (Modified Hepburn) System for the transliteration of Japanese
|
|
13
|
+
has been in use by the Board on Geographic Names since about 1930 and
|
|
14
|
+
has been extensively employed in the systematic standardsization of
|
|
15
|
+
thousands of geographic names of Japan in romanized form.
|
|
16
|
+
|
|
17
|
+
notes: |
|
|
18
|
+
|
|
19
|
+
1. The "tsu" forms (ツ/つ) are also used to indicate a double consonant and
|
|
20
|
+
are generally (but not alwyas) written in smaller script or type
|
|
21
|
+
slightly to the right of or below the regular line. These characters
|
|
22
|
+
are trasnliterated as k before k; s before s or sh; t before t, ts, or
|
|
23
|
+
ch; and p before p. Occasionally, when a "ku" (ク/く) or "ki" (キ/き) form
|
|
24
|
+
precedes k, the u in ku or the i in ki is dropped.
|
|
25
|
+
|
|
26
|
+
2. The transliterations in parentheses are used in specific cases when
|
|
27
|
+
the kana symbol is known to be so pronounced.
|
|
28
|
+
|
|
29
|
+
3. The transliteration m is used before b, p, and m.
|
|
30
|
+
|
|
31
|
+
4. This letter has been added for the use in transliterating foreign
|
|
32
|
+
words.
|
|
33
|
+
|
|
34
|
+
5. The asterisk (*) indicates standard combined forms. Those combined
|
|
35
|
+
forms not so marked are rarely used.
|
|
36
|
+
|
|
37
|
+
----
|
|
38
|
+
|
|
39
|
+
Implementation Notes:
|
|
40
|
+
|
|
41
|
+
a. Despite the mentioning of the term "Modified Hepburn" in the
|
|
42
|
+
specification, the handling of ん/ン in this standard is different from
|
|
43
|
+
Modified Hepburn. It follows the Traditional Hepburn in that the
|
|
44
|
+
letter m is used before b, m, p.
|
|
45
|
+
|
|
46
|
+
b. This document includes obsolete (pre-reform) combinations.
|
|
47
|
+
Pre-reform combinations will clash with modern Japanese transliteration.
|
|
48
|
+
|
|
49
|
+
c. There is no discussion on how cross-morpheme vowel sounds should be
|
|
50
|
+
handled.
|
|
51
|
+
|
|
52
|
+
d. There is no mentioning of separation mark between n and another vowel.
|
|
53
|
+
|
|
54
|
+
e. Everything not explicitly stated in the specification will be
|
|
55
|
+
assumed to be inherited from var-jpn-Hrkt-Latn-hepburn-1954.
|
|
56
|
+
|
|
57
|
+
f. Obsolete combinations can be handled by post rules, and are
|
|
58
|
+
included for the sake of completeness only. They have been commented
|
|
59
|
+
out, since they are rarely used and follow different rules than modern
|
|
60
|
+
Japanese.
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
tests {
|
|
64
|
+
# Note: these test cases follow the pre-reform standard.
|
|
65
|
+
# They are commented out for now.
|
|
66
|
+
#
|
|
67
|
+
# - source: "けふ"
|
|
68
|
+
# expected: "kyō"
|
|
69
|
+
# - source: "ぎうにう"
|
|
70
|
+
# expected: "gyūnyū"
|
|
71
|
+
# - source: "きふ" # きふ should always be kifu in Modern Japanese
|
|
72
|
+
# expected: "kyū"
|
|
73
|
+
# - source: "ちう"
|
|
74
|
+
# expected: "chū"
|
|
75
|
+
# - source: "けう"
|
|
76
|
+
# expected: "kyō"
|
|
77
|
+
# Modern Japanese test cases
|
|
78
|
+
test "しんばし", "shimbashi"
|
|
79
|
+
test "とうきょう", "tōkyō"
|
|
80
|
+
test "しんじゅく", "shinjuku"
|
|
81
|
+
test "かんおう", "kan’ō"
|
|
82
|
+
test "かのう", "kanō"
|
|
83
|
+
test "きんゆう", "kin’yū"
|
|
84
|
+
test "とうきょう", "tōkyō"
|
|
85
|
+
test "かごっま", "kagomma"
|
|
86
|
+
test "ぽっぽっや", "poppoyya"
|
|
87
|
+
test "てっら", "terra"
|
|
88
|
+
test "にゃっほー", "nyahhō"
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
dependency "var-jpn-Hrkt-Latn-hepburn-1954", as: hrktlatn
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
stage {
|
|
95
|
+
# RULES
|
|
96
|
+
# Convert ん into m before b, m, p
|
|
97
|
+
sub any("んン"), "m", after: any("ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# CHARACTERS
|
|
101
|
+
parallel {
|
|
102
|
+
# ke
|
|
103
|
+
# These are listed as alternative pronunciation, but in fact this usage of ヶ
|
|
104
|
+
# as the archaic possessive marker is not found in Kana only texts.
|
|
105
|
+
# Also it is always typed using the smaller form. (ヶ U+30F6)
|
|
106
|
+
sub "け", any(["ke", "ga", "ka", "ko"])
|
|
107
|
+
sub "ケ", any(["ke", "ga\"", "ka", "ko"])
|
|
108
|
+
sub "ヶ", any(["ga", "ka", "ko"])
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# The Ha-column
|
|
112
|
+
# は is still pronounced as wa when used as a particle,
|
|
113
|
+
# the alternative pronunciations for the other four kana's are obsolete.
|
|
114
|
+
sub "は", any(["ha", "wa"])
|
|
115
|
+
sub "ひ", any(["hi", "i"])
|
|
116
|
+
sub "ふ", any(["fu", "u", "o"])
|
|
117
|
+
sub "へ", any(["he", "e"])
|
|
118
|
+
sub "ほ", any(["ho", "o"])
|
|
119
|
+
sub "ハ", any(["ha", "wa"])
|
|
120
|
+
sub "ヒ", any(["hi", "i"])
|
|
121
|
+
sub "フ", any(["fu", "u", "o"])
|
|
122
|
+
sub "ヘ", any(["he", "e"])
|
|
123
|
+
sub "ホ", any(["ho", "o"])
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# The Wa-column
|
|
127
|
+
# These two kanas below are only used in pre-reform texts.
|
|
128
|
+
sub "ゐ", "i"
|
|
129
|
+
sub "ゑ", "e"
|
|
130
|
+
sub "ヰ", "i"
|
|
131
|
+
sub "ヱ", "e"
|
|
132
|
+
|
|
133
|
+
# Combined forms
|
|
134
|
+
# These are obsolete forms. See Note 5.
|
|
135
|
+
# They can be handled by post-rules if ever needed.
|
|
136
|
+
# "あう": "ō"
|
|
137
|
+
# "あふ": "ō"
|
|
138
|
+
# "いふ": "yū"
|
|
139
|
+
# "えう": "yō"
|
|
140
|
+
# "えふ": "yō"
|
|
141
|
+
# "おふ": "ō"
|
|
142
|
+
# "かう": "kō"
|
|
143
|
+
# "かふ": "kō"
|
|
144
|
+
# "がう": "gō"
|
|
145
|
+
# "がふ": "gō"
|
|
146
|
+
# "きう": "kyū"
|
|
147
|
+
# "きふ": "kyū"
|
|
148
|
+
# "きやう": "kyō"
|
|
149
|
+
# "ぎう": "gyū"
|
|
150
|
+
# "ぎふ": "gyū"
|
|
151
|
+
# "ぎやう": "gyō"
|
|
152
|
+
sub "くわ", "ka"
|
|
153
|
+
sub "くわう", "kō"
|
|
154
|
+
sub "ぐわ", "ga"
|
|
155
|
+
sub "ぐわう", "gō"
|
|
156
|
+
sub "クワ", "ka"
|
|
157
|
+
sub "クワウ", "kō"
|
|
158
|
+
sub "グワ", "ga"
|
|
159
|
+
sub "グワウ", "gō"
|
|
160
|
+
# "けう": "kyō"
|
|
161
|
+
# "けふ": "kyō"
|
|
162
|
+
# "げう": "gyō"
|
|
163
|
+
# "げふ": "gyō"
|
|
164
|
+
# "こふ": "kō"
|
|
165
|
+
# "ごふ": "gō"
|
|
166
|
+
# "さう": "sō"
|
|
167
|
+
# "さふ": "sō"
|
|
168
|
+
# "ざう": "zō"
|
|
169
|
+
# "ざふ": "zō"
|
|
170
|
+
# "しう": "shū"
|
|
171
|
+
# "しふ": "shū"
|
|
172
|
+
# "しやう": "shō"
|
|
173
|
+
# "じう": "jū"
|
|
174
|
+
# "じふ": "jū"
|
|
175
|
+
# "じやう": "jō"
|
|
176
|
+
# "せう": "shō"
|
|
177
|
+
# "せふ": "shō"
|
|
178
|
+
# "ぜう": "jō"
|
|
179
|
+
# "ぜふ": "jō"
|
|
180
|
+
# "そふ": "sō"
|
|
181
|
+
# "ぞふ": "zō"
|
|
182
|
+
# "たう": "tō"
|
|
183
|
+
# "たふ": "tō"
|
|
184
|
+
# "だう": "dō"
|
|
185
|
+
# "だふ": "dō"
|
|
186
|
+
# "ちう": "chū"
|
|
187
|
+
# "ちふ": "chū"
|
|
188
|
+
# "ちやう": "chō"
|
|
189
|
+
# "ぢう": "jū"
|
|
190
|
+
# "ぢふ": "jū"
|
|
191
|
+
# "ぢや": "ja"
|
|
192
|
+
# "ぢやう": "jō"
|
|
193
|
+
# "ぢゆ": "ju"
|
|
194
|
+
# "ぢよ": "jo"
|
|
195
|
+
# "ぢよう": "jō"
|
|
196
|
+
# "てう": "chō"
|
|
197
|
+
# "てふ": "chō"
|
|
198
|
+
# "でう": "jō"
|
|
199
|
+
# "でふ": "jō"
|
|
200
|
+
# "とふ": "tō"
|
|
201
|
+
# "どふ": "dō"
|
|
202
|
+
# "なう": "nō"
|
|
203
|
+
# "なふ": "nō"
|
|
204
|
+
# "にう": "nyū"
|
|
205
|
+
# "にふ": "nyū"
|
|
206
|
+
# "にやう": "nyō"
|
|
207
|
+
# "ねう": "nyō"
|
|
208
|
+
# "ねふ": "nyō"
|
|
209
|
+
# "のふ": "nō"
|
|
210
|
+
# "はう": ["hō","ō"]
|
|
211
|
+
# "はふ": "hō"
|
|
212
|
+
# "ばふ": "bō"
|
|
213
|
+
# "ばう": "bō"
|
|
214
|
+
# "ぱう": "pō"
|
|
215
|
+
# "ぱふ": "pō"
|
|
216
|
+
# "ひう": "hyū"
|
|
217
|
+
# "ひふ": "hyū"
|
|
218
|
+
# "ひやう": "hyō"
|
|
219
|
+
# "びう": "byū"
|
|
220
|
+
# "びふ": "byū"
|
|
221
|
+
# "びやう": "byō"
|
|
222
|
+
# "ぴう": "pyū"
|
|
223
|
+
# "ぴふ": "pyū"
|
|
224
|
+
# "ぴやう": "pyō"
|
|
225
|
+
# "へう": "hyō"
|
|
226
|
+
# "へふ": "hyō"
|
|
227
|
+
# "べう": "byō"
|
|
228
|
+
# "べふ": "byō"
|
|
229
|
+
# "ぺう": "pyō"
|
|
230
|
+
# "ぺふ": "pyō"
|
|
231
|
+
# "ほふ": "hō"
|
|
232
|
+
# "ぼふ": "bō"
|
|
233
|
+
# "ぽふ": "pō"
|
|
234
|
+
# "まう": "mō"
|
|
235
|
+
# "まふ": "mō"
|
|
236
|
+
# "まを": "mō"
|
|
237
|
+
# "みやう": "myō"
|
|
238
|
+
# "みう": "myū"
|
|
239
|
+
# "みふ": "myū"
|
|
240
|
+
# "めう": "myō"
|
|
241
|
+
# "めふ": "myō"
|
|
242
|
+
# "めを": "myō"
|
|
243
|
+
# "もふ": "mō"
|
|
244
|
+
# "やう": "yō"
|
|
245
|
+
# "やふ": "yō"
|
|
246
|
+
# "よふ": "yō"
|
|
247
|
+
# "らう": "rō"
|
|
248
|
+
# "らふ": "rō"
|
|
249
|
+
# "りう": "ryū"
|
|
250
|
+
# "りふ": "ryū"
|
|
251
|
+
# "りやう": "ryō"
|
|
252
|
+
# "れう": "ryō"
|
|
253
|
+
# "れふ": "ryō"
|
|
254
|
+
# "ろふ": "rō"
|
|
255
|
+
# "わう": "wō"
|
|
256
|
+
# "わふ": "wō"
|
|
257
|
+
# "ゑふ": "yō"
|
|
258
|
+
# "をう": "ō"
|
|
259
|
+
# "をふ": "ō"
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
run map.hrktlatn.stage.main
|
|
263
|
+
|
|
264
|
+
# POSTRULES
|
|
265
|
+
# Handle obsolete forms
|
|
266
|
+
# Note that these forms are present in the rules, but will break
|
|
267
|
+
# if used with Modern Japanese. They are commented out for now.
|
|
268
|
+
#
|
|
269
|
+
# - pattern: "ef?[uo]|iyau"
|
|
270
|
+
# result: "yō"
|
|
271
|
+
# - pattern: "if?u"
|
|
272
|
+
# result: "yū"
|
|
273
|
+
# - pattern: "[ao]f?[uo]"
|
|
274
|
+
# result: "ō"
|
|
275
|
+
# - pattern: "iy"
|
|
276
|
+
# result: "y"
|
|
277
|
+
# - pattern: "ty"
|
|
278
|
+
# result: "ch"
|
|
279
|
+
# - pattern: "dy"
|
|
280
|
+
# result: "j"
|
|
281
|
+
# - pattern: "[jz]y"
|
|
282
|
+
# result: "j"
|
|
283
|
+
# - pattern: "(?<=[sc])hy"
|
|
284
|
+
# result: "h"
|
|
285
|
+
# - pattern: "sy"
|
|
286
|
+
# result: "sh"
|
|
287
|
+
|
|
288
|
+
}
|