interscript 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/interscript.rb +10 -6
- data/lib/interscript/fs.rb +0 -2
- data/lib/interscript/mapping.rb +1 -1
- data/lib/interscript/opal.rb +38 -8
- data/lib/interscript/opal/entrypoint.rb +12 -0
- data/lib/interscript/opal/map_translate.rb +7 -0
- data/lib/interscript/version.rb +1 -1
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +5 -1
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +5 -1
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -1
- data/maps/alalc-asm-Deva-Latn-1997.yaml +9 -3
- data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-bel-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-ell-Grek-Latn-1997.yaml +5 -1
- data/maps/alalc-ell-Grek-Latn-2010.yaml +1 -2
- data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
- data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -1
- data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
- data/maps/alalc-kor-Hang-Latn-1997.yaml +5 -1
- data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
- data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
- data/maps/alalc-mar-Deva-Latn-1997.yaml +21 -2
- data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
- data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +1 -1
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
- data/maps/{alalc-pan-Deva-Latn-1997.yaml → alalc-pan-Guru-Latn-1997.yaml} +23 -4
- data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
- data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
- data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +1 -1
- data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
- data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
- data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-srp-Cyrl-Latn-2013.yaml +1 -1
- data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
- data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -1
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -1
- data/maps/bgn-kor-Hang-Latn-1943.yaml +7 -3
- data/maps/bgn-kor-Kore-Latn-1943.yaml +3 -3
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +5 -1
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +5 -1
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +2 -2
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +6 -2
- data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +5 -1
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +5 -1
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
- data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +5 -1
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -1
- data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -1
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -1
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +1 -1
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +1 -1
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +1 -1
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +31 -1
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
- data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
- data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
- data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
- data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -1
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
- data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
- data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +1 -1
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +1 -1
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +1 -1
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +1 -1
- data/maps/{bis-gjr-Gujr-Latn-13194-1991.yaml → bis-guj-Gujr-Latn-13194-1991.yaml} +17 -2
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +1 -1
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +1 -1
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +1 -1
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +1 -1
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +1 -1
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +1 -1
- data/maps/by-bel-Cyrl-Latn-1998.yaml +5 -1
- data/maps/by-bel-Cyrl-Latn-2007.yaml +1 -1
- data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
- data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
- data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
- data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
- data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
- data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
- data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
- data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
- data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
- data/maps/dos-nep-Deva-Latn-1997.yaml +15 -1
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +3 -3
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +2 -2
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -2
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -2
- data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -1
- data/maps/gki-bel-Cyrl-Latn-1992.yaml +1 -1
- data/maps/gki-bel-Cyrl-Latn-2000.yaml +1 -1
- data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +7 -3
- data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -1
- data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -1
- data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -1
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
- data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +8 -4
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -4
- data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
- data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
- data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
- data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
- data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
- data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
- data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
- data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
- data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
- data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
- data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
- data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
- data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
- data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
- data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
- data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
- data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
- data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -3
- data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
- data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
- data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
- data/maps/kp-kor-Hang-Latn-2002.yaml +25 -17
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +1 -1
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
- data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
- data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +2 -2
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +3 -3
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +2 -2
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +2 -2
- data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +2 -2
- data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
- data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
- data/maps/odni-hin-Deva-Latn-2015.yaml +1 -1
- data/maps/odni-kat-Geor-Latn-2015.yaml +1 -1
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
- data/maps/odni-mkd-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-urd-Arab-Latn-2015.yaml +1 -1
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -1
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +1 -1
- data/maps/royin-tha-Thai-Latn-1968.yaml +5 -1
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +1 -1
- data/maps/royin-tha-Thai-Latn-1999.yaml +5 -1
- data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
- data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
- data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
- data/maps/ses-ara-Arab-Latn-1930.yaml +5 -1
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
- data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
- data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
- data/maps/{ungegn-amh-Ethi-Latn-2016.yaml → un-amh-Ethi-Latn-2016.yaml} +51 -24
- data/maps/un-ara-Arab-Latn-1971.yaml +1 -1
- data/maps/un-ara-Arab-Latn-1972.yaml +1 -1
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-bel-Cyrl-Latn-2007.yaml +1 -1
- data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
- data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +2 -2
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +2 -2
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -3
- data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
- data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
- data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
- data/maps/un-nep-Deva-Latn-1972.yaml +204 -17
- data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
- data/maps/un-ukr-Cyrl-Latn-1998.yaml +35 -12
- data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
- data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
- data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +1 -1
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
- data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -1
- data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
- data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
- data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
- data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
- data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
- data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +1 -1
- data/maps/var-tha-Thai-Zsym-ipa.yaml +1 -1
- data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +6 -2
- data/spec/interscript/filenames_spec.rb +384 -0
- data/spec/interscript_spec.rb +7 -4
- metadata +105 -26
- data/bin/interscript +0 -41
- data/bin/rspec +0 -29
- data/bin/setup +0 -8
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/interscript-opal.rb +0 -2
- data/lib/interscript/opal_map_translate.rb +0 -12
- data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
- data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 2016
|
|
4
|
+
language: iso-639-2:rue
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: ROMANIZATION OF RUSYN, BGN/PCGN 2016 System
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: rue_Cyrl2Latn_BGN_2016
|
|
11
|
+
description: The BGN/PCGN system for Rusyn was designed for use in romanizing names written in the Rusyn alphabet.
|
|
12
|
+
url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20RUSYN.pdf
|
|
13
|
+
creation_date: 2016
|
|
14
|
+
confirmation_date: 2017
|
|
15
|
+
description: |
|
|
16
|
+
The BGN/PCGN system for Rusyn was designed for use in romanizing names written in the Rusyn
|
|
17
|
+
alphabet. There are two primary dialects of the Rusyn language: Carpatho‐Rusyn and Pannonian Rusyn.
|
|
18
|
+
The Rusyn alphabet is almost identical to the Ukrainian alphabet, but contains three characters not
|
|
19
|
+
present in the Ukrainian alphabet: ё, ы, and ъ. These letters are also absent from the alphabet used by
|
|
20
|
+
Pannonian Rusyn. This table applies to both dialects of Rusyn.
|
|
21
|
+
|
|
22
|
+
notes:
|
|
23
|
+
- |
|
|
24
|
+
The letters ё, ы, and ъ are present in the orthography of the Carpatho‐Rusyn variant of the Rusyn
|
|
25
|
+
language. This variant is predominant among Rusyn speakers in Ukraine, Poland, Slovakia, Hungary, and
|
|
26
|
+
Romania. The letters are absent from the orthography of Pannonian Rusyn, which is predominant in
|
|
27
|
+
Serbia and Croatia.
|
|
28
|
+
|
|
29
|
+
- |
|
|
30
|
+
Unicode for Latin‐script characters: Ž/ž (017D, 017E); Č/č (010C, 010D); Š/š (0160, 0161); ’ (0027).
|
|
31
|
+
|
|
32
|
+
- |
|
|
33
|
+
The Romanization columns show only lowercase forms but, when romanizing, uppercase and
|
|
34
|
+
lowercase Roman letters as appropriate should be used.
|
|
35
|
+
|
|
36
|
+
tests:
|
|
37
|
+
# Based on https://ru.wikipedia.org/wiki/Русинский_язык#Примеры_текста
|
|
38
|
+
- source: русиньскый язык
|
|
39
|
+
expected: rusyn'skyj yazyk
|
|
40
|
+
- source: руська бисіда
|
|
41
|
+
expected: rus'ka bysida
|
|
42
|
+
- source: руснацькый язык
|
|
43
|
+
expected: rusnac'kyj yazyk
|
|
44
|
+
- source: руски язик
|
|
45
|
+
expected: rusky yazyk
|
|
46
|
+
- source: |
|
|
47
|
+
Чоловік найчастїше споминать на молоды часы. Є то цалком нормалне.
|
|
48
|
+
Тадь то рокы, кідь зазнаме всякого. І доброго, і планого. В тім часї ся чоловік находить, як кібы в скаралущі.
|
|
49
|
+
Розвивать ся, як цвіт на черешни. Выпхати ся мож з того обалу лем тогды, як прийде час, кідь цалком дозріє.
|
|
50
|
+
Даколи стачіть ся неограбаным способом дотулити білого домику, такой ся пораниш, што ті буде тякнути на цілый жывот.
|
|
51
|
+
А кідь ся народиш в теплї, обколесеный ласков, розвиваш ся в добрых условіях, выпадеш із скаралущі, як міцна істота.
|
|
52
|
+
Такым потім буде і твій далшый жывот. Із добрї заложеным фундаментом. Было бы смішно сі робити надїй, же жывот є лем єдна рівна путь…
|
|
53
|
+
Кібы то так чоловік знав… Кібы ся міг іщі раз народити і піти по тій істій пути…
|
|
54
|
+
expected: |
|
|
55
|
+
Čolovik najčastjiše spomynat' na molody časy. Je to calkom normalne.
|
|
56
|
+
Tad' to roky, kid' zazname vsyakogo. I dobrogo, i planogo. V tim časji sya čolovik nachodyt', yak kiby v skaralušči.
|
|
57
|
+
Rozvyvat' sya, yak cvit na čerešny. Vypchaty sya mož z togo obalu lem togdy, yak pryjde čas, kid' calkom dozrije.
|
|
58
|
+
Dakoly stačit' sya neograbanym sposobom dotulyty bilogo domyku, takoj sya poranyš, što ti bude tyaknuty na cilyj žyvot.
|
|
59
|
+
A kid' sya narodyš v teplji, obkolesenyj laskov, rozvyvaš sya v dobrych usloviyach, vypadeš iz skaralušči, yak micna istota.
|
|
60
|
+
Takym potim bude i tvij dalšyj žyvot. Iz dobrji založenym fundamentom. Bylo by smišno si robyty nadjij, že žyvot je lem jedna rivna put'…
|
|
61
|
+
Kiby to tak čolovik znav… Kiby sya mig išči raz narodyty i pity po tij istij puty…
|
|
62
|
+
# Based on http://www.philology.ru/linguistics3/suprun-89.htm
|
|
63
|
+
- source: |
|
|
64
|
+
Вишло слунко красне, ясне,
|
|
65
|
+
и цму швета розогнало -
|
|
66
|
+
жем желену, били хмарки
|
|
67
|
+
як зоз златом да обцагло.
|
|
68
|
+
expected: |
|
|
69
|
+
Vyšlo slunko krasne, yasne,
|
|
70
|
+
y cmu šveta rozognalo -
|
|
71
|
+
žem želenu, byly chmarky
|
|
72
|
+
yak zoz zlatom da obcaglo.
|
|
73
|
+
- source: шнїг
|
|
74
|
+
expected: šnjig
|
|
75
|
+
- source: жем
|
|
76
|
+
expected: žem
|
|
77
|
+
- source: дзень
|
|
78
|
+
expected: dzen'
|
|
79
|
+
- source: спомнуц
|
|
80
|
+
expected: spomnuc
|
|
81
|
+
- source: крава
|
|
82
|
+
expected: krava
|
|
83
|
+
# Based on https://lingvoforum.net/index.php?topic=43545.0
|
|
84
|
+
- source: дївка
|
|
85
|
+
expected: djivka
|
|
86
|
+
- source: дрыв
|
|
87
|
+
expected: dryv
|
|
88
|
+
- source: фёрд
|
|
89
|
+
expected: fjord
|
|
90
|
+
- source: Ёзеф
|
|
91
|
+
expected: Jozef
|
|
92
|
+
- source: пастырї
|
|
93
|
+
expected: pastyrji
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
map:
|
|
97
|
+
characters:
|
|
98
|
+
"\u0410": "A" # А
|
|
99
|
+
"\u0411": "B" # Б
|
|
100
|
+
"\u0412": "V" # В
|
|
101
|
+
"\u0413": "H" # Г
|
|
102
|
+
"\u0414": "D" # Д
|
|
103
|
+
"\u0415": "E" # Е
|
|
104
|
+
"\u0404": "Je" # Є
|
|
105
|
+
"\u0401": "Jo" # Ё
|
|
106
|
+
"\u0416": "\u017D" # Ж => Ž note[2]
|
|
107
|
+
"\u0417": "Z" # З
|
|
108
|
+
"\u0418": "Y" # И
|
|
109
|
+
"\u0406": "I" # І
|
|
110
|
+
"\u042b": "Y" # Ы
|
|
111
|
+
"\u0407": "Ji" # Ї
|
|
112
|
+
"\u0419": "J" # Й
|
|
113
|
+
"\u041a": "K" # К
|
|
114
|
+
"\u041b": "L" # Л
|
|
115
|
+
"\u041c": "M" # М
|
|
116
|
+
"\u041d": "N" # Н
|
|
117
|
+
"\u041e": "O" # О
|
|
118
|
+
"\u041f": "P" # П
|
|
119
|
+
"\u0420": "R" # Р
|
|
120
|
+
"\u0421": "S" # С
|
|
121
|
+
"\u0422": "T" # Т
|
|
122
|
+
"\u0423": "U" # У
|
|
123
|
+
"\u0424": "F" # Ф
|
|
124
|
+
"\u0425": "Ch" # Х
|
|
125
|
+
"\u0426": "C" # Ц
|
|
126
|
+
"\u0427": "\u010C" # Ч => Č note[2]
|
|
127
|
+
"\u0428": "\u0160" # Ш => Š note[2]
|
|
128
|
+
"\u0429": "\u0160\u010C" # Щ => ŠČ
|
|
129
|
+
"\u042e": "Yu" # Ю
|
|
130
|
+
"\u042f": "Ya" # Я
|
|
131
|
+
"\u042c": "\u0027" # Ь => '
|
|
132
|
+
"\u042a": "\u0027" # Ъ => '
|
|
133
|
+
|
|
134
|
+
"\u0430": "a" # а
|
|
135
|
+
"\u0431": "b" # б
|
|
136
|
+
"\u0432": "v" # в
|
|
137
|
+
"\u0433": "g" # г
|
|
138
|
+
"\u0434": "d" # д
|
|
139
|
+
"\u0435": "e" # е
|
|
140
|
+
"\u0454": "je" # є
|
|
141
|
+
"\u0451": "jo" # ё
|
|
142
|
+
"\u0436": "\u017E" # ж => ž note[2]
|
|
143
|
+
"\u0437": "z" # з
|
|
144
|
+
"\u0438": "y" # и
|
|
145
|
+
"\u0456": "i" # і
|
|
146
|
+
"\u044b": "y" # ы
|
|
147
|
+
"\u0457": "ji" # ї
|
|
148
|
+
"\u0439": "j" # й
|
|
149
|
+
"\u043a": "k" # к
|
|
150
|
+
"\u043b": "l" # л
|
|
151
|
+
"\u043c": "m" # м
|
|
152
|
+
"\u043d": "n" # н
|
|
153
|
+
"\u043e": "o" # о
|
|
154
|
+
"\u043f": "p" # п
|
|
155
|
+
"\u0440": "r" # р
|
|
156
|
+
"\u0441": "s" # с
|
|
157
|
+
"\u0442": "t" # т
|
|
158
|
+
"\u0443": "u" # у
|
|
159
|
+
"\u0444": "f" # ф
|
|
160
|
+
"\u0445": "ch" # х
|
|
161
|
+
"\u0446": "c" # ц
|
|
162
|
+
"\u0447": "\u010D" # ч => č note[2]
|
|
163
|
+
"\u0448": "\u0161" # ш => š note[2]
|
|
164
|
+
"\u0449": "\u0161\u010D" # щ => šč
|
|
165
|
+
"\u044e": "yu" # ю
|
|
166
|
+
"\u044f": "ya" # я
|
|
167
|
+
"\u044c": "\u0027" # ь => '
|
|
168
|
+
"\u044a": "\u0027" # ъ => '
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: bgnpcgn
|
|
3
3
|
id: 1947
|
|
4
|
-
language: rus
|
|
4
|
+
language: iso-639-2:rus
|
|
5
5
|
source_script: Cyrl
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: ROMANIZATION OF RUSSIAN, BGN/PCGN 1947 System
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: rus_Cyrl2Latn_BGN_1947
|
|
11
|
+
description: Russian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1947 System
|
|
8
12
|
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/807920/ROMANIZATION_OF_RUSSIAN.pdf
|
|
9
13
|
creation_date: 1947
|
|
10
14
|
confirmation_date: 2019-06
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
---
|
|
2
2
|
authority_id: bgnpcgn
|
|
3
3
|
id: 2005
|
|
4
|
-
language: srp
|
|
4
|
+
language: iso-639-2:srp
|
|
5
5
|
source_script: Cyrl
|
|
6
6
|
destination_script: Latn
|
|
7
7
|
name: ROMANIZATION OF SERBIAN, BGN/PCGN 2005 System
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: srp_Cyrl2Latn_BGN_2005
|
|
11
|
+
description: Serbian Cyrillic Table of Correspondences US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 2005
|
|
8
12
|
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816783/TABLE_OF_CORRESPONDENCES_FOR_SERBIAN.pdf
|
|
9
13
|
creation_date: 2005
|
|
10
14
|
confirmation_date: 2019-06
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 2007
|
|
4
|
+
language: iso-639-2:tat
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: TATAR TABLE OF CORRESPONDENCES CYRILLIC - ROMAN BGN/PCGN 2007 Agreement
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: tat_Cyrl2Latn_BGN_2005
|
|
11
|
+
description: Tatar is an official language within Respublika Tatarstan, one of the republics of the Russian Federation.
|
|
12
|
+
url: https://geonames.nga.mil/gns/html/Romanization/TABLE%20OF%20CORRESPONDENCES%20FOR%20TATAR.pdf
|
|
13
|
+
creation_date: 2007
|
|
14
|
+
confirmation_date: 2017-11
|
|
15
|
+
description: |
|
|
16
|
+
Tatar is an official language within Respublika Tatarstan, one of the republics of the Russian
|
|
17
|
+
Federation. It will normally be encountered in Cyrillic script, in which case it should be romanized by means
|
|
18
|
+
of the Cyrillic-Roman table of correspondences given below.
|
|
19
|
+
|
|
20
|
+
notes:
|
|
21
|
+
- |
|
|
22
|
+
The alphabet portrayed in the above table is referred to as yaꞑalif-2.
|
|
23
|
+
A set of simpler characters is also encountered; this is known as zamanalif.
|
|
24
|
+
In this latter set, the alternative characters ä, ñ and ö are used for letters
|
|
25
|
+
2, 17 and 19 respectively where the user has difficulty reproducing ə, ꞑ, and ө.
|
|
26
|
+
Please note that all three alternatives must be used as a set, and the letters should not be intermingled.
|
|
27
|
+
- Used only in borrowed words.
|
|
28
|
+
- The first option is used in words with back vowels, the second in words with front vowels (though this does not apply to borrowed words).
|
|
29
|
+
- yı/ye is used after a vowel (except и, ю), ъ and ь, also word-initially.
|
|
30
|
+
- w is used after a vowel.
|
|
31
|
+
- After ğ or q, ый is represented i
|
|
32
|
+
- Э is represented ’ after a vowel in words of Arabic origin.
|
|
33
|
+
- Ю and Я are represented ü and a/ä respectively after и.
|
|
34
|
+
- |
|
|
35
|
+
An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters
|
|
36
|
+
of the basic Roman script is:
|
|
37
|
+
Ə (U+018F) ə (U+0259)
|
|
38
|
+
Ğ (U+011E) ğ (U+011F)
|
|
39
|
+
İ (U+0130) ı (U+0131)
|
|
40
|
+
Ü (U+00DC) ü (U+00FC)
|
|
41
|
+
Ꞑ (U+A790) ꞑ (U+A791)
|
|
42
|
+
Ɵ (U+019F) ɵ (U+0275)
|
|
43
|
+
Ç (U+00C7) ç (U+00E7)
|
|
44
|
+
Ş (U+015E) ş (U+015F)
|
|
45
|
+
Ä (U+00C4) ä (U+00E4)
|
|
46
|
+
’ (U+2019)
|
|
47
|
+
- |
|
|
48
|
+
The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase
|
|
49
|
+
Roman letters as appropriate should be used.
|
|
50
|
+
|
|
51
|
+
tests:
|
|
52
|
+
- source: Гыйльмиев #
|
|
53
|
+
expected: Ğil’miew # note[6] in the note it's Ğilmiev which incorrect according to the rules
|
|
54
|
+
# https://en.wikipedia.org/wiki/Tatar_alphabet
|
|
55
|
+
- source: баеды
|
|
56
|
+
expected: bayıdı # note[4]
|
|
57
|
+
- source: кардәш
|
|
58
|
+
expected: qardəş
|
|
59
|
+
- source: калынлык һәм аеру билгесе
|
|
60
|
+
expected: qalınlıq həm ayıru bilgese # note[4]
|
|
61
|
+
- source: |
|
|
62
|
+
Барлык кешеләр дә азат һәм үз абруйлары һәм хокуклары ягыннан тиң булып туалар.
|
|
63
|
+
Аларга акыл һәм вөҗдан бирелгән һәм бер-берсенә карата туганнарча мөнасәбәттә булырга тиешләр.
|
|
64
|
+
expected: |
|
|
65
|
+
Barlıq keşelər də azat həm üz abruyları həm xoquqları yağınnan tiꞑ bulıp tualar.
|
|
66
|
+
Alarğa aqıl həm wocdan birelgən həm ber-bersenə qarata tuğannarça monasəbəttə bulırğa tieşlər.
|
|
67
|
+
# https://www.azatliq.org/a/30820571.html
|
|
68
|
+
- source: Әлдермештән Әлмәндәр
|
|
69
|
+
expected: Əldermeştən Əlməndər
|
|
70
|
+
- source: Әссәламү галәйкүм
|
|
71
|
+
expected: Əssəlamü ğaləyküm
|
|
72
|
+
- source: Танымаган кешегә
|
|
73
|
+
expected: Tanımağan keşegə # note[3]
|
|
74
|
+
- source: Иң әүвәл кул бирешеп күрешик
|
|
75
|
+
expected: İꞑ əwwəl qul bireşep küreşiq # note[5]
|
|
76
|
+
- source: Ялгышмыйсың
|
|
77
|
+
expected: Yalğışmıysıꞑ
|
|
78
|
+
- source: Нәкъ үзе
|
|
79
|
+
expected: Nəq üze
|
|
80
|
+
- source: Кирәгеннән артыгын
|
|
81
|
+
expected: Kirəgennən artığın # note[3]
|
|
82
|
+
- source: мәңгелеккә килмәгән
|
|
83
|
+
expected: məꞑgeleqkə kilməgən
|
|
84
|
+
- source: кулыңны куй
|
|
85
|
+
expected: qulıꞑnı quy
|
|
86
|
+
- source: Өммия # note[8]
|
|
87
|
+
expected: Ommiä
|
|
88
|
+
- source: Җиһангир # note[3]
|
|
89
|
+
expected: Cihangir
|
|
90
|
+
|
|
91
|
+
map:
|
|
92
|
+
rules:
|
|
93
|
+
# note[3] http://www.hintfox.com/article/sistema-glasnih-zvykov-na-tatarskom-i-anglijskom-jazikah.html
|
|
94
|
+
# back vowels: у, а, ы, о,
|
|
95
|
+
# front vowels: е, ә, и, ө, ү, э
|
|
96
|
+
- pattern: Г(?=[ЕеƏәИиӨөҮүЭэ])
|
|
97
|
+
result: G
|
|
98
|
+
- pattern: г(?=[ЕеƏәИиӨөҮүЭэ])
|
|
99
|
+
result: g
|
|
100
|
+
- pattern: К(?=[ЕеƏәИиӨөҮүЭэ])
|
|
101
|
+
result: K
|
|
102
|
+
- pattern: к(?=[ЕеƏәИиӨөҮүЭэ])
|
|
103
|
+
result: k
|
|
104
|
+
- pattern: Ю(?=[ЕеƏәИиӨөҮүЭэ])
|
|
105
|
+
result: "Y\u00FC"
|
|
106
|
+
- pattern: ю(?=[ЕеƏәИиӨөҮүЭэ])
|
|
107
|
+
result: "y\u00FC"
|
|
108
|
+
- pattern: Я(?=[ЕеƏәИиӨөҮүЭэ])
|
|
109
|
+
result: "Y\u00E4"
|
|
110
|
+
- pattern: я(?=[ЕеƏәИиӨөҮүЭэ])
|
|
111
|
+
result: "y\u00E4"
|
|
112
|
+
|
|
113
|
+
# note[4]
|
|
114
|
+
- pattern: (?<=[АаЕеƏәОоӨөҮүУуЫыЭэЯяЪъЬь])\u0415
|
|
115
|
+
result: "Y\u0131"
|
|
116
|
+
- pattern: (?<=[АаЕеƏәОоӨөҮүУуЫыЭэЯяЪъЬь])\u0435
|
|
117
|
+
result: "y\u0131"
|
|
118
|
+
|
|
119
|
+
# note[5]
|
|
120
|
+
- pattern: (?<=[АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь])\u0423
|
|
121
|
+
result: "W"
|
|
122
|
+
- pattern: (?<=[АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь])\u0443
|
|
123
|
+
result: "w"
|
|
124
|
+
- pattern: (?<=[АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь])\u04AE
|
|
125
|
+
result: "W"
|
|
126
|
+
- pattern: (?<=[АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь])\u04AF
|
|
127
|
+
result: "w"
|
|
128
|
+
|
|
129
|
+
# note[6]
|
|
130
|
+
- pattern: (?<=[Гг])ый
|
|
131
|
+
result: i
|
|
132
|
+
|
|
133
|
+
# note[8]
|
|
134
|
+
- pattern: (?<=[Ии])\u042E
|
|
135
|
+
result: "\u00DC"
|
|
136
|
+
- pattern: (?<=[Ии])\u044E
|
|
137
|
+
result: "\u00FC"
|
|
138
|
+
- pattern: (?<=[Ии])\u042F
|
|
139
|
+
result: "\u00C4"
|
|
140
|
+
- pattern: (?<=[Ии])\u044F
|
|
141
|
+
result: "\u00E4"
|
|
142
|
+
|
|
143
|
+
characters:
|
|
144
|
+
"\u0410": 'A' # А
|
|
145
|
+
"\u04D8": ["\u018F", "\u00C4"] # Ә => [Ə, Ä] note[1]
|
|
146
|
+
"\u0411": 'B' # Б
|
|
147
|
+
"\u0412": ['W', 'V'] # В note[2]
|
|
148
|
+
"\u0413": "\u011E" # Г => Ğ note[3]
|
|
149
|
+
"\u0414": 'D' # Д
|
|
150
|
+
"\u0415": 'E' # Е note[3] note[4]
|
|
151
|
+
"\u0416": 'J' # Ж
|
|
152
|
+
"\u0496": 'C' # Җ
|
|
153
|
+
"\u0417": 'Z' # З
|
|
154
|
+
"\u0418": "\u0130" # И => İ
|
|
155
|
+
"\u0419": 'Y' # Й
|
|
156
|
+
"\u041A": 'Q' # К note[3]
|
|
157
|
+
"\u041B": 'L' # Л
|
|
158
|
+
"\u041C": 'M' # М
|
|
159
|
+
"\u041D": 'N' # Н
|
|
160
|
+
"\u04A2": ["\uA790", "\u00D1"] # Ң => [Ꞑ, Ñ] note[1]
|
|
161
|
+
"\u041E": 'O' # О
|
|
162
|
+
"\u04E8": ['O', "\u00D6"] # Ө => [O, Ö] note[1]
|
|
163
|
+
"\u041F": 'P' # П
|
|
164
|
+
"\u0420": 'R' # Р
|
|
165
|
+
"\u0421": 'S' # С
|
|
166
|
+
"\u0422": 'T' # Т
|
|
167
|
+
"\u0423": 'U' # У note[5]
|
|
168
|
+
"\u04AE": "\u00DC" # Ү => Ü note[5]
|
|
169
|
+
"\u0424": 'F' # Ф
|
|
170
|
+
"\u0425": 'Х' # Х
|
|
171
|
+
"\u04BA": 'H' # Һ
|
|
172
|
+
"\u0426": 'Ts' # Ц
|
|
173
|
+
"\u0427": "\u00C7" # Ч => Ç
|
|
174
|
+
"\u0428": "\u015E" # Ш => Ş
|
|
175
|
+
"\u0429": "\u015E\u00C7" # Щ
|
|
176
|
+
"\u042A": '' # Ъ
|
|
177
|
+
"\u042B": 'I' # Ы => I note[2] note[6]
|
|
178
|
+
"\u042C": "\u2019" # Ь => ’
|
|
179
|
+
"\u042D": 'E' # Э note[7]
|
|
180
|
+
"\u042E": 'Yu' # Ю note[3] note[8]
|
|
181
|
+
"\u042F": 'Ya' # Я note[3] note[8]
|
|
182
|
+
|
|
183
|
+
'\u0430': 'a' # а
|
|
184
|
+
'\u04D9': ["\u0259", "\u00E4"] # ә => [ə, ä] note[1]
|
|
185
|
+
'\u0431': 'b' # б
|
|
186
|
+
'\u0432': ['w', 'v'] # в note[2]
|
|
187
|
+
'\u0433': "\u011F" # г => ğ note[3]
|
|
188
|
+
'\u0434': 'd' # д
|
|
189
|
+
'\u0435': 'e' # e note[3] note[4]
|
|
190
|
+
'\u0436': 'j' # ж
|
|
191
|
+
'\u0497': 'c' # җ
|
|
192
|
+
'\u0437': 'z' # з
|
|
193
|
+
'\u0438': 'i' # и
|
|
194
|
+
'\u0439': 'y' # й
|
|
195
|
+
'\u043A': 'q' # к note[3]
|
|
196
|
+
'\u043B': 'l' # л
|
|
197
|
+
'\u043C': 'm' # м
|
|
198
|
+
'\u043D': 'n' # н
|
|
199
|
+
'\u04A3': ["\uA791", "\u00F1"] # ң => [ꞑ, ñ] note[1]
|
|
200
|
+
'\u043E': 'o' # о
|
|
201
|
+
'\u04E9': ['o', "\u00F6"] # ө => [o, ö] note[1]
|
|
202
|
+
'\u043F': 'p' # п
|
|
203
|
+
'\u0440': 'r' # р
|
|
204
|
+
'\u0441': 's' # с
|
|
205
|
+
'\u0442': 't' # т
|
|
206
|
+
'\u0443': 'u' # у note[5]
|
|
207
|
+
'\u04AF': "\u00FC" # ү => ü note[5]
|
|
208
|
+
'\u0444': 'f' # ф
|
|
209
|
+
'\u0445': 'x' # х
|
|
210
|
+
'\u04BB': 'h' # һ
|
|
211
|
+
'\u0446': 'ts' # ц
|
|
212
|
+
'\u0447': "\u00E7" # ч => ç
|
|
213
|
+
'\u0448': "\u015F" # ш => ş
|
|
214
|
+
'\u0449': "\u015F\u00E7" # щ => şç
|
|
215
|
+
"\u044a": '' # ъ
|
|
216
|
+
'\u044B': "\u0131" # ы => ı note[2] note[6]
|
|
217
|
+
"\u044C": "\u2019" # ь => ’
|
|
218
|
+
'\u044D': 'e' # э note[7]
|
|
219
|
+
'\u044E': 'yu' # ю note[3] note[8]
|
|
220
|
+
'\u044F': 'ya' # я note[3] note[8]
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 1994
|
|
4
|
+
language: iso-639-2:tgk
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: BGN/PCGN Romanization System -- Tajik (1994)
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: tgk_Cyrl2Latn_BGN_1994
|
|
11
|
+
description: The BGN/PCGN system for Tajik was designed for use in romanizing names written in the Tajik Cyrillic alphabet.
|
|
12
|
+
url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20TAJIK.pdf
|
|
13
|
+
creation_date: 1994
|
|
14
|
+
confirmation_date: 2017-11
|
|
15
|
+
description: |
|
|
16
|
+
The BGN/PCGN system for Tajik was designed for use in romanizing names written in the Tajik Cyrillic alphabet.
|
|
17
|
+
The Tajik Cyrillic alphabet contains six characters not present in the Russian alphabet: ғ, ӣ, қ, ӯ, ҳ and ҷ. An
|
|
18
|
+
orthographic reform of the Tajik Cyrillic alphabet was implemented under the auspices of the Academy of Sciences of
|
|
19
|
+
Tajikistan. This reform was promulgated in a decree of 3 September 1998 by the government of the Republic of
|
|
20
|
+
Tajikistan. The reform abolished the characters ц, щ, ь and ы (see notes 2 through 5).
|
|
21
|
+
|
|
22
|
+
notes:
|
|
23
|
+
- |
|
|
24
|
+
The character sequences гҳ , зҳ , кҳ , and сҳ may be romanized g·h, z·h, k·h, and s·h in order
|
|
25
|
+
to differentiate those romanizations from the digraphs gh, zh, kh, and sh which are used to
|
|
26
|
+
render the characters ғ, ж, x, and ш.
|
|
27
|
+
- |
|
|
28
|
+
The obsolete character ц, abolished in 1998, should be romanized s (before a vowel and/or
|
|
29
|
+
after a consonant within a word) or ts intervocalically.
|
|
30
|
+
- The obsolete character щ, replaced by ш in 1998, should be romanized sh.
|
|
31
|
+
- The obsolete character ь, abolished in 1998, should not be romanized.
|
|
32
|
+
- The obsolete character ы, replaced by и in 1998, should be romanized i.
|
|
33
|
+
- Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character.
|
|
34
|
+
- |
|
|
35
|
+
An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
|
|
36
|
+
unmodified letters of the basic Roman script is:
|
|
37
|
+
All apostrophes appearing in romanization are U+2019
|
|
38
|
+
Í (U+00CD) í (U+00ED)
|
|
39
|
+
Ŭ (U+016C) ŭ (U+016D)
|
|
40
|
+
Ė (U+0116) ė (U+0117)
|
|
41
|
+
- |
|
|
42
|
+
The Romanization column shows only lowercase forms but, when romanizing, uppercase and
|
|
43
|
+
lowercase Roman letters as appropriate should be used.
|
|
44
|
+
|
|
45
|
+
tests:
|
|
46
|
+
# https://ru.wikipedia.org/wiki/Таджикская_письменность#Образцы_записи
|
|
47
|
+
- source: |
|
|
48
|
+
Тамоми одамон озод ба дунё меоянд ва аз лиҳози манзилату ҳуқуқ бо ҳам баробаранд.
|
|
49
|
+
Ҳама соҳиби ақлу виҷдонанд, бояд нисбат ба якдигар бародарвор муносабат намоянд.
|
|
50
|
+
expected: |
|
|
51
|
+
Tamomi odamon ozod ba dunyo meoyand va az lihozi manzilatu huquq bo ham barobarand.
|
|
52
|
+
Hama sohibi aqlu vijdonand, boyad nisbat ba yakdigar barodarvor munosabat namoyand.
|
|
53
|
+
- source: Баниодам аъзои як пайкаранд, ки дар офариниш зи як гавҳаранд. Чу узве ба дард оварад рӯзгор, дигар узвҳоро намонад қарор.
|
|
54
|
+
expected: Baniodam a’zoi yak paykarand, ki dar ofarinish zi yak gavharand. Chu uzve ba dard ovarad rŭzgor, digar uzvhoro namonad qaror.
|
|
55
|
+
- source: Саъдӣ
|
|
56
|
+
expected: Sa’dí
|
|
57
|
+
- source: Мурда будам, зинда шудам; гиря будам, xанда шудам. Давлати ишқ омаду ман давлати поянда шудам.
|
|
58
|
+
expected: Murda budam, zinda shudam; girya budam, xanda shudam. Davlati ishq omadu man davlati poyanda shudam.
|
|
59
|
+
- source: Мавлавӣ
|
|
60
|
+
expected: Mavlaví
|
|
61
|
+
- source: санг
|
|
62
|
+
expected: sang
|
|
63
|
+
- source: барг
|
|
64
|
+
expected: barg
|
|
65
|
+
- source: номвар
|
|
66
|
+
expected: nomvar
|
|
67
|
+
- source: Бағдод
|
|
68
|
+
expected: Baghdod
|
|
69
|
+
- source: ғор
|
|
70
|
+
expected: ghor
|
|
71
|
+
- source: модар
|
|
72
|
+
expected: modar
|
|
73
|
+
- source: меравам
|
|
74
|
+
expected: meravam
|
|
75
|
+
- source: дарё
|
|
76
|
+
expected: daryo
|
|
77
|
+
- source: осиёб
|
|
78
|
+
expected: osiyob
|
|
79
|
+
- source: жола
|
|
80
|
+
expected: zhola
|
|
81
|
+
- source: каждум
|
|
82
|
+
expected: kazhdum
|
|
83
|
+
- source: баъз
|
|
84
|
+
expected: ba’z
|
|
85
|
+
- source: назар
|
|
86
|
+
expected: nazar
|
|
87
|
+
- source: заҳоб
|
|
88
|
+
expected: zahob
|
|
89
|
+
- source: ихтиёр
|
|
90
|
+
expected: ikhtiyor
|
|
91
|
+
- source: зебоӣ
|
|
92
|
+
expected: zeboí
|
|
93
|
+
- source: май
|
|
94
|
+
expected: may
|
|
95
|
+
- source: кадом
|
|
96
|
+
expected: kadom
|
|
97
|
+
- source: қадам
|
|
98
|
+
expected: qadam
|
|
99
|
+
- source: лола
|
|
100
|
+
expected: lola
|
|
101
|
+
- source: мурдагӣ
|
|
102
|
+
expected: murdagí
|
|
103
|
+
- source: нон
|
|
104
|
+
expected: non
|
|
105
|
+
- source: орзу
|
|
106
|
+
expected: orzu
|
|
107
|
+
- source: панҷ
|
|
108
|
+
expected: panj
|
|
109
|
+
- source: ранг
|
|
110
|
+
expected: rang
|
|
111
|
+
- source: сар
|
|
112
|
+
expected: sar
|
|
113
|
+
- source: субҳ
|
|
114
|
+
expected: subh
|
|
115
|
+
- source: сурайё
|
|
116
|
+
expected: surayyo
|
|
117
|
+
- source: тоҷик
|
|
118
|
+
expected: tojik
|
|
119
|
+
- source: талаб
|
|
120
|
+
expected: talab
|
|
121
|
+
- source: дуд
|
|
122
|
+
expected: dud
|
|
123
|
+
- source: хӯрдан
|
|
124
|
+
expected: khŭrdan
|
|
125
|
+
- source: фурӯғ
|
|
126
|
+
expected: furŭgh
|
|
127
|
+
- source: хондан
|
|
128
|
+
expected: khondan
|
|
129
|
+
- source: ҳофиз
|
|
130
|
+
expected: hofiz
|
|
131
|
+
- source: чӣ
|
|
132
|
+
expected: chí
|
|
133
|
+
- source: ҷанг
|
|
134
|
+
expected: jang
|
|
135
|
+
- source: шаб
|
|
136
|
+
expected: shab
|
|
137
|
+
- source: таъриф
|
|
138
|
+
expected: ta’rif
|
|
139
|
+
- source: эй
|
|
140
|
+
expected: ėy
|
|
141
|
+
- source: июн
|
|
142
|
+
expected: iyun
|
|
143
|
+
- source: ягонагӣ
|
|
144
|
+
expected: yagonagí
|
|
145
|
+
- source: РАМЗҲО
|
|
146
|
+
expected: RAMZ·HO
|
|
147
|
+
|
|
148
|
+
map:
|
|
149
|
+
rules:
|
|
150
|
+
# note[1]
|
|
151
|
+
- pattern: ([ГгЗзКкСс])\u04B3
|
|
152
|
+
result: "\\1·h"
|
|
153
|
+
- pattern: ([ГгЗзКкСс])\u04B2
|
|
154
|
+
result: "\\1·H"
|
|
155
|
+
# note[2]
|
|
156
|
+
- pattern: \u0426(?=[АаЕеЁёИиОоУуЫыЭэЮюЯя])
|
|
157
|
+
result: S
|
|
158
|
+
- pattern: \u0446(?=[АаЕеЁёИиОоУуЫыЭэЮюЯя])
|
|
159
|
+
result: s
|
|
160
|
+
|
|
161
|
+
characters:
|
|
162
|
+
"\u0410": 'A' # А
|
|
163
|
+
"\u0411": 'B' # Б
|
|
164
|
+
"\u0412": 'V' # В
|
|
165
|
+
"\u0413": 'G' # Г
|
|
166
|
+
"\u0492": 'Gh' # Ғ
|
|
167
|
+
"\u0414": 'D' # Д
|
|
168
|
+
"\u0415": 'E' # Е
|
|
169
|
+
"\u0401": 'Yo' # Ё
|
|
170
|
+
"\u0416": 'Zh' # Ж
|
|
171
|
+
"\u0417": 'Z' # З
|
|
172
|
+
"\u0418": 'I' # И
|
|
173
|
+
"\u04E2": "\u00CD" # Ӣ => Í
|
|
174
|
+
"\u0419": 'Y' # Й
|
|
175
|
+
"\u041A": 'K' # К
|
|
176
|
+
"\u049A": 'Q' # Қ
|
|
177
|
+
"\u041B": 'L' # Л
|
|
178
|
+
"\u041C": 'M' # М
|
|
179
|
+
"\u041D": 'N' # Н
|
|
180
|
+
"\u041E": 'O' # О
|
|
181
|
+
"\u041F": 'P' # П
|
|
182
|
+
"\u0420": 'R' # Р
|
|
183
|
+
"\u0421": 'S' # С
|
|
184
|
+
"\u0422": 'T' # Т
|
|
185
|
+
"\u0423": 'U' # У
|
|
186
|
+
"\u04EE": "\u016C" # Ӯ => Ŭ
|
|
187
|
+
"\u0424": 'F' # Ф
|
|
188
|
+
"\u0425": 'Kh' # Х
|
|
189
|
+
"\u04B2": 'H' # Ҳ
|
|
190
|
+
"\u0427": 'Ch' # Ч
|
|
191
|
+
"\u04B6": 'J' # Ҷ
|
|
192
|
+
"\u0426": 'Ts' # Ц note[2]
|
|
193
|
+
"\u0428": 'Sh' # Ш
|
|
194
|
+
"\u0429": 'Sh' # Щ note[3]
|
|
195
|
+
"\u042A": "\u2019" # Ъ
|
|
196
|
+
"\u042B": 'I' # Ы note[5]
|
|
197
|
+
"\u042C": '' # Ь note[4]
|
|
198
|
+
"\u042D": "\u0116" # Э => Ė
|
|
199
|
+
"\u042E": 'Yu' # Ю
|
|
200
|
+
"\u042F": 'Ya' # Я
|
|
201
|
+
|
|
202
|
+
"\u0430": 'a' # а
|
|
203
|
+
"\u0431": 'b' # б
|
|
204
|
+
"\u0432": 'v' # в
|
|
205
|
+
"\u0433": 'g' # г
|
|
206
|
+
"\u0493": 'gh' # ғ
|
|
207
|
+
"\u0434": 'd' # д
|
|
208
|
+
"\u0435": 'e' # e
|
|
209
|
+
"\u0451": 'yo' # ё
|
|
210
|
+
"\u0436": 'zh' # ж
|
|
211
|
+
"\u0437": 'z' # з
|
|
212
|
+
"\u0438": 'i' # и
|
|
213
|
+
"\u04E3": "\u00ED" # ӣ => í
|
|
214
|
+
"\u0439": 'y' # й
|
|
215
|
+
"\u043A": 'k' # к
|
|
216
|
+
"\u049B": 'q' # қ
|
|
217
|
+
"\u043B": 'l' # л
|
|
218
|
+
"\u043C": 'm' # м
|
|
219
|
+
"\u043D": 'n' # н
|
|
220
|
+
"\u043E": 'o' # о
|
|
221
|
+
"\u043F": 'p' # п
|
|
222
|
+
"\u0440": 'r' # р
|
|
223
|
+
"\u0441": 's' # с
|
|
224
|
+
"\u0442": 't' # т
|
|
225
|
+
"\u0443": 'u' # у
|
|
226
|
+
"\u04EF": "\u016D" # ӯ => ŭ
|
|
227
|
+
"\u0444": 'f' # ф
|
|
228
|
+
"\u0445": 'kh' # х
|
|
229
|
+
"\u04B3": 'h' # ҳ
|
|
230
|
+
"\u0447": 'ch' # ч
|
|
231
|
+
"\u04B7": 'j' # ҷ
|
|
232
|
+
"\u0446": 'ts' # ц note[2]
|
|
233
|
+
"\u0448": 'sh' # ш
|
|
234
|
+
"\u0449": 'sh' # щ note[3]
|
|
235
|
+
"\u044a": "\u2019" # ъ
|
|
236
|
+
"\u044B": 'i' # ы note[5]
|
|
237
|
+
"\u044C": '' # ь note[4]
|
|
238
|
+
"\u044D": "\u0117" # э => ė
|
|
239
|
+
"\u044E": 'yu' # ю
|
|
240
|
+
"\u044F": 'ya' # я
|