interscript 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +1 -3
- data/aliases.json +1 -0
- data/lib/interscript.rb +8 -3
- data/lib/interscript/fs.rb +27 -0
- data/lib/interscript/mapping.rb +3 -1
- data/lib/interscript/opal.rb +142 -3
- data/lib/interscript/opal/entrypoint.rb +8 -0
- data/lib/interscript/opal/exports.rb +11 -0
- data/lib/interscript/opal/maps.js.erb +2 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
- data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
- data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
- data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
- data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
- data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
- data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
- data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
- data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
- data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
- data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
- data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
- data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
- data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
- data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
- data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
- data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
- data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
- data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
- data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
- data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
- data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
- data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
- data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
- data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
- data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
- data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
- data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
- data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
- data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
- data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
- data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
- data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
- data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
- data/spec/interscript/filenames_spec.rb +6 -369
- data/spec/interscript_spec.rb +10 -2
- metadata +50 -7
- data/lib/interscript/opal/map_translate.rb +0 -7
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 1988
|
|
4
|
+
language: iso-639-2:div
|
|
5
|
+
source_script: Thaa
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: ROMANIZATION OF MALDIVIAN BGN/PCGN 1988 Agreement, with modifications 2009
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: div_Thaa2Latn_GMV_1988
|
|
11
|
+
description: Maldivian (Divehi) 1988 system
|
|
12
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816778/ROMANIZATION_OF_MALDIVIAN.pdf
|
|
13
|
+
creation_date: 1988
|
|
14
|
+
confirmation_date: 2019
|
|
15
|
+
description: |
|
|
16
|
+
This romanization system supersedes the one that was approved by BGN and PCGN in 1972. An
|
|
17
|
+
official system was submitted to PCGN by the Maldivian government in 1987 and approved by BGN and
|
|
18
|
+
PCGN in 1988. The system presented here reflects the 1988 Agreement with minor modifications introduced by the government of the Maldives in 2009
|
|
19
|
+
|
|
20
|
+
notes:
|
|
21
|
+
- Maldivian is read from right to left.
|
|
22
|
+
- The symbol ◌ appearing in the Vowel Characters table represents any Maldivian consonant character.
|
|
23
|
+
- The character އ is not romanized. If it bears a vowel character, that vowel character alone is romanized (e.g. އެނބޫދޫ En’boodhoo).
|
|
24
|
+
- |
|
|
25
|
+
When characters ށ and އ appear in combination with a supercircle (the ‘sukun’, which usually marks
|
|
26
|
+
the absence of a vowel, see the Diacritical Mark table): ށ ; އ , these characters are not romanized
|
|
27
|
+
but the following consonant is doubled (e.g. ކަޅުހުރާ Kalhehuttaa), unless the following consonant is a digraph in the
|
|
28
|
+
romanized form, in which case they are romanized h (e.g. ކެރެށްދޫ Kerehdhoo). ށ and އ appearing at the end of a word are romanized h (e.g. ވޭވައް Veyvah).
|
|
29
|
+
- Noonu (ނ) is romanized n’ when appearing without any vowel or auxiliary sign (e.g. ކަނޑުފުށި Kan’dufushi)
|
|
30
|
+
- Thaa (ތ) is romanized iy when appearing in combination with a supercircle (ތ) ,( e.g. ޒިޔާރަތްފުށި Ziyaaraiyfushi).
|
|
31
|
+
- The Roman-script columns show only lowercase forms but, when applying the table, uppercase and
|
|
32
|
+
lowercase Roman letters as appropriate should be used.
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
tests:
|
|
36
|
+
- source: "އިރުގައި"
|
|
37
|
+
expected: "irugai"
|
|
38
|
+
- source: "ޒިޔާރަތްފުށި"
|
|
39
|
+
expected: "ziyaaraiyfushi"
|
|
40
|
+
- source: "ރައްކާތެރިކުރުމާއި"
|
|
41
|
+
expected: "rakkaatherikurumaai"
|
|
42
|
+
- source: "ޝަހީދުންގެ ދުވަސް"
|
|
43
|
+
expected: "sh’aheedhun’ge dhuvas"
|
|
44
|
+
- source: "މަރުޙަބާ"
|
|
45
|
+
expected: "maruh’abaa"
|
|
46
|
+
- source: "ކިހިނެހް"
|
|
47
|
+
expected: "kihin’eh"
|
|
48
|
+
- source: "ކޮން ނަމެއް ކިޔަނީ"
|
|
49
|
+
expected: "kon’ n’ameh kiyan’ee"
|
|
50
|
+
- source: "ބައްއަޖޖެވުރި ހެނދުނެހް"
|
|
51
|
+
expected: "baajjevuri hen’dhun’eh"
|
|
52
|
+
- source: "މެނދުރެހް"
|
|
53
|
+
expected: "men’dhureh"
|
|
54
|
+
- source: "ހަވީރެހް"
|
|
55
|
+
expected: "haveereh"
|
|
56
|
+
|
|
57
|
+
map:
|
|
58
|
+
inherit: mv-div-Thaa-Latn-1987
|
|
59
|
+
characters:
|
|
60
|
+
#Consonants with diacritical marks (used mainly in words of Arabic origin, corresponding
|
|
61
|
+
# Arabic characters and their romanizations are shown in parentheses)
|
|
62
|
+
"ޘ": "th’"
|
|
63
|
+
"ޙ": "h’"
|
|
64
|
+
"ޚ": "kh"
|
|
65
|
+
"ޛ": "dh’"
|
|
66
|
+
"ޜ": "x"
|
|
67
|
+
"ޝ": "sh’"
|
|
68
|
+
"ޞ": "s’"
|
|
69
|
+
"ޟ": "l’"
|
|
70
|
+
"ޠ": "t’"
|
|
71
|
+
"ޡ": "z’"
|
|
72
|
+
"ޢ": "’"
|
|
73
|
+
"ޣ": "gh"
|
|
74
|
+
"ޤ": "q"
|
|
75
|
+
"ޥ": "w"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 1964
|
|
4
|
+
language: iso-639-2:far
|
|
5
|
+
source_script: Latn
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: https://github.com/interscript/interscript/files/5180777/BGN_Romanization_Guide_1964_faeroese.pdf
|
|
8
|
+
creation_date: 1964
|
|
9
|
+
description: |
|
|
10
|
+
The Faeroese language is a dialect of Icelandic and, like Icelandic, employes the letter eth (Ð ð).
|
|
11
|
+
Unlike Icelandic, however, the Faeroese ð has the sound of y (as in "yes") before i, v before u, and is silent in all other cases.
|
|
12
|
+
The Icelandic letter thorn (Þ þ) does not occur in the writting of Faeroese.
|
|
13
|
+
|
|
14
|
+
To avoid the use of the unfamiliar symbol ð, the Board transliterates it as dh, as in Icelandic,
|
|
15
|
+
even though its pronunciation in the two languages is not the same.
|
|
16
|
+
|
|
17
|
+
tests:
|
|
18
|
+
- source: Fyrirgefðu
|
|
19
|
+
expected: Fyrirgefdhu
|
|
20
|
+
- source: Þakka
|
|
21
|
+
expected: Þakka
|
|
22
|
+
|
|
23
|
+
map:
|
|
24
|
+
inherit: bgnpcgn-isl-Latn-Latn-1964
|
|
25
|
+
|
|
26
|
+
characters:
|
|
27
|
+
"\u00DE": ~ # Þ translitarion removed
|
|
28
|
+
"\u00FE": ~ # þ translitarion removed
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 1964
|
|
4
|
+
language: iso-639-2:isl
|
|
5
|
+
source_script: Latn
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: TRANSLITERATION OF ICELANDIC BGN/PCGN 1947 System
|
|
8
|
+
url: https://github.com/interscript/interscript/files/5180785/BGN_Romanization_Guide_1964_icelandic_1947.pdf
|
|
9
|
+
creation_date: 1964
|
|
10
|
+
description: |
|
|
11
|
+
The BGN and the PCGn in 1947 jointly agreed to the transliteration of two letters of the Icelandic alphabet which,
|
|
12
|
+
although used in writing Old English, have disappeared from the modern English alphabet.
|
|
13
|
+
|
|
14
|
+
The transliterated letters are the edh (Ð ð) and the thorn (Þ þ), pronounved as th in "thus" and th in "think," respectively.
|
|
15
|
+
|
|
16
|
+
It was felt that it was better to transliterate these letters into familiar symbols than preserve such unfamiliar letters in the nomenclature.
|
|
17
|
+
|
|
18
|
+
notes:
|
|
19
|
+
- More about "edh" letter - https://en.wikipedia.org/wiki/Eth
|
|
20
|
+
- More about "thorn" letter - https://en.wikipedia.org/wiki/Thorn_(letter)
|
|
21
|
+
|
|
22
|
+
tests:
|
|
23
|
+
- source: Fyrirgefðu
|
|
24
|
+
expected: Fyrirgefdhu
|
|
25
|
+
- source: þu ert velkominn
|
|
26
|
+
expected: thu ert velkominn
|
|
27
|
+
- source: GOÐAN DAGINN
|
|
28
|
+
expected: GODHAN DAGINN
|
|
29
|
+
- source: Þakka
|
|
30
|
+
expected: Thakka
|
|
31
|
+
|
|
32
|
+
map:
|
|
33
|
+
characters:
|
|
34
|
+
"\u00D0": "Dh" # Ð
|
|
35
|
+
"\u00F0": "dh" # ð
|
|
36
|
+
"\u00DE": "Th" # Þ
|
|
37
|
+
"\u00FE": "th" # þ
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
|
|
2
|
+
---
|
|
3
|
+
authority_id: bgnpcgn
|
|
4
|
+
id: 1979
|
|
5
|
+
language: iso-639-2:kaz
|
|
6
|
+
source_script: Cyrl
|
|
7
|
+
destination_script: Latn
|
|
8
|
+
name: Romanization of Kazakh
|
|
9
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811511/ROMANIZATION_OF_KAZAKH.pdf
|
|
10
|
+
creation_date: 1979
|
|
11
|
+
confirmation_date: 2019
|
|
12
|
+
description: |
|
|
13
|
+
The BGN/PCGN system for Kazakh was designed for use in romanizing names written in the Kazakh Cyrillic alphabet.
|
|
14
|
+
The Kazakh Cyrillic alphabet contains nine characters not present in the Russian alphabet: ә, ғ, қ, ң, ө, ұ, ү, һ and і.
|
|
15
|
+
|
|
16
|
+
notes:
|
|
17
|
+
- The character sequences гһ, зһ, кһ, нг, сһ and цһ may be romanized g·h, z·h, k·h, n·g, s·h
|
|
18
|
+
and ts·h in order to differentiate those romanizations from from the digraphs gh, zh, kh, ng, sh,
|
|
19
|
+
and the letter sequence tsh, which are used to render the characters ғ, ж, х, ң, ш, and the character sequence тш.
|
|
20
|
+
- The character ы may be romanized i̵ (Unicode encoding 0069+0335) instead of у, if so desired.
|
|
21
|
+
- Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character.
|
|
22
|
+
- The Kazakh government has adopted a programme to move to using the Roman-script as the principal writing system for Kazakh.
|
|
23
|
+
- 'An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
|
|
24
|
+
unmodified letters of the basic Roman script is:
|
|
25
|
+
Ä (U+00C4) ӓ (U+00E4)
|
|
26
|
+
Ī (U+012A) ī (U+012B)
|
|
27
|
+
Ö (U+00D6) ö (U+00F6)
|
|
28
|
+
Ū (U+016A) ū (U+016B)
|
|
29
|
+
Ü (U+00DC) ü (U+00FC)
|
|
30
|
+
” (U+201D) ’ (U+2019)
|
|
31
|
+
Ė (U+0116) ė (U+0117)'
|
|
32
|
+
- The Romanization column shows only lowercase forms but, when romanizing, uppercase and
|
|
33
|
+
lowercase Roman letters as appropriate should be used.
|
|
34
|
+
|
|
35
|
+
tests:
|
|
36
|
+
- source: Өңірек
|
|
37
|
+
expected: Öngirek
|
|
38
|
+
- source: Өтебас Артезиан Құдығы
|
|
39
|
+
expected: Ötebas Artezīan Qudyghy
|
|
40
|
+
- source: Өскенбай
|
|
41
|
+
expected: Öskenbay
|
|
42
|
+
- source: Өсек Көлі
|
|
43
|
+
expected: Ösek Köli
|
|
44
|
+
- source: Өрмексу
|
|
45
|
+
expected: Örmeksū
|
|
46
|
+
- source: Өмірзақ
|
|
47
|
+
expected: Ömirzaq
|
|
48
|
+
- source: Өлеңті
|
|
49
|
+
expected: Ölengti
|
|
50
|
+
- source: Өл-Фараби Даңғылы
|
|
51
|
+
expected: Öl-Farabī Dangghyly
|
|
52
|
+
- source: Өкпекті Тауы
|
|
53
|
+
expected: Ökpekti Taūy
|
|
54
|
+
- source: Өкенсоркен Қыстауы
|
|
55
|
+
expected: Ökensorken Qystaūy
|
|
56
|
+
- source: Өзен Ойысы
|
|
57
|
+
expected: Özen Oyysy
|
|
58
|
+
- source: Өзен
|
|
59
|
+
expected: Özen
|
|
60
|
+
- source: Өгізтөбе Тауы
|
|
61
|
+
expected: Ögiztöbe Taūy
|
|
62
|
+
- source: Өгізтау Қыстауы
|
|
63
|
+
expected: Ögiztaū Qystaūy
|
|
64
|
+
- source: Өгізмүйіз Тауы
|
|
65
|
+
expected: Ögizmüyiz Taūy
|
|
66
|
+
- source: Өгізбұлақ
|
|
67
|
+
expected: Ögizbulaq
|
|
68
|
+
- source: Өгіз Үреулі
|
|
69
|
+
expected: Ögiz Üreūli
|
|
70
|
+
- source: Өгем Жотасы
|
|
71
|
+
expected: Ögem Zhotasy
|
|
72
|
+
- source: Өгем
|
|
73
|
+
expected: Ögem
|
|
74
|
+
- source: Әшім
|
|
75
|
+
expected: Äshim
|
|
76
|
+
- source: Әулиетөбе Тауы
|
|
77
|
+
expected: Äūlīetöbe Taūy
|
|
78
|
+
- source: Әулиекөл
|
|
79
|
+
expected: Äūlīeköl
|
|
80
|
+
- source: Әндіжан Құдығы
|
|
81
|
+
expected: Ändizhan Qudyghy
|
|
82
|
+
- source: Ұясай
|
|
83
|
+
expected: Uyasay
|
|
84
|
+
- source: Ұялы Метеорологиялық Станциясы
|
|
85
|
+
expected: Uyaly Meteorologīyalyq Stantsīyasy
|
|
86
|
+
- source: Ұшқын Қыстауы
|
|
87
|
+
expected: Ushqyn Qystaūy
|
|
88
|
+
- source: Үңгіртас
|
|
89
|
+
expected: Ünggirtas
|
|
90
|
+
- source: Үшқұлын
|
|
91
|
+
expected: Üshqulyn
|
|
92
|
+
- source: Құтырғы Асуы
|
|
93
|
+
expected: Qutyrghy Asūy
|
|
94
|
+
- source: Ярмы Стансасы
|
|
95
|
+
expected: Yarmy Stansasy
|
|
96
|
+
- source: Юпитер Қыстауы
|
|
97
|
+
expected: Yupīter Qystaūy
|
|
98
|
+
- source: Энгельс Көшесi
|
|
99
|
+
expected: Ėngel’s Köshesi
|
|
100
|
+
- source: Ырғызбай Жайлауы
|
|
101
|
+
expected: Yrghyzbay Zhaylaūy
|
|
102
|
+
- source: Щебнюха Тауы
|
|
103
|
+
expected: Shchebnyukha Taūy
|
|
104
|
+
- source: Шөміштікөл Соры
|
|
105
|
+
expected: Shömishtiköl Sory
|
|
106
|
+
- source: Чалов Барак Қыстауы
|
|
107
|
+
expected: Chalov Barak Qystaūy
|
|
108
|
+
- source: Чайкино
|
|
109
|
+
expected: Chaykīno
|
|
110
|
+
- source: Цуриковка
|
|
111
|
+
expected: Tsūrīkovka
|
|
112
|
+
- source: Хамитқора Қыстауы
|
|
113
|
+
expected: Khamītqora Qystaūy
|
|
114
|
+
- source: Фыкалка
|
|
115
|
+
expected: Fykalka
|
|
116
|
+
- source: Уақбай Қыстауы
|
|
117
|
+
expected: Ūaqbay Qystaūy
|
|
118
|
+
- source: Төңірекшың Тоғайы
|
|
119
|
+
expected: Töngirekshyng Toghayy
|
|
120
|
+
- source: Сабағали Қыстауы
|
|
121
|
+
expected: Sabaghalī Qystaūy
|
|
122
|
+
- source: Рысқұлов Даңғылы
|
|
123
|
+
expected: Rysqulov Dangghyly
|
|
124
|
+
- source: Пірназар Құдығы
|
|
125
|
+
expected: Pirnazar Qudyghy
|
|
126
|
+
- source: Оңтүстік Қазақстан Облысы
|
|
127
|
+
expected: Ongtüstik Qazaqstan Oblysy
|
|
128
|
+
- source: Нөмір Үшінші Суторабының Бөгені
|
|
129
|
+
expected: Nömir Üshinshi Sūtorabynyng Bögeni
|
|
130
|
+
- source: Мәмбетқазған Құдығы
|
|
131
|
+
expected: Mämbetqazghan Qudyghy
|
|
132
|
+
- source: Мемлекеттік Аудандық Электр Стансасы - Бір
|
|
133
|
+
expected: Memlekettik Aūdandyq Ėlektr Stansasy - Bir
|
|
134
|
+
- source: Линейский Белок Тауы
|
|
135
|
+
expected: Līneyskīy Belok Taūy
|
|
136
|
+
- source: Көшердік Бөгені
|
|
137
|
+
expected: Kösherdik Bögeni
|
|
138
|
+
- source: Көлфонтан Артезиан Құдығы
|
|
139
|
+
expected: Kölfontan Artezīan Qudyghy
|
|
140
|
+
- source: Изендіарал Мүйісі
|
|
141
|
+
expected: Īzendiaral Müyisi
|
|
142
|
+
- source: Злиха Метеорологиялық Станциасы
|
|
143
|
+
expected: Zlīkha Meteorologīyalyq Stantsīasy
|
|
144
|
+
- source: Жұлжұрған Көлі
|
|
145
|
+
expected: Zhulzhurghan Köli
|
|
146
|
+
- source: Ескі Үшал Қыстауы
|
|
147
|
+
expected: Eski Üshal Qystaūy
|
|
148
|
+
- source: Дөңгелексор Қыстауы
|
|
149
|
+
expected: Dönggeleksor Qystaūy
|
|
150
|
+
- source: Горько-Солёное Көлі
|
|
151
|
+
expected: Gor’ko-Solyonoe Köli
|
|
152
|
+
- source: Вагулино
|
|
153
|
+
expected: Vagūlīno
|
|
154
|
+
- source: Бөстай Учаскесі
|
|
155
|
+
expected: Böstay Ūchaskesi
|
|
156
|
+
- source: Аққолқы Тоғайы
|
|
157
|
+
expected: Aqqolqy Toghayy
|
|
158
|
+
- source: Іңқардария
|
|
159
|
+
expected: Ingqardarīya
|
|
160
|
+
|
|
161
|
+
map:
|
|
162
|
+
characters:
|
|
163
|
+
'\u0410': 'A' # А
|
|
164
|
+
'\u04D8': 'Ä' # Ә
|
|
165
|
+
'\u0411': 'B' # Б
|
|
166
|
+
'\u0412': 'V' # В
|
|
167
|
+
'\u0413': 'G' # Г
|
|
168
|
+
'\u0492': 'Gh' # Ғ
|
|
169
|
+
'\u0414': 'D' # Д
|
|
170
|
+
'\u0415': 'E' # Е
|
|
171
|
+
'\u0401': 'Yo' # Ё
|
|
172
|
+
'\u0416': 'Zh' # Ж
|
|
173
|
+
'\u0417': 'Z' # З
|
|
174
|
+
'\u0418': 'Ī' # И
|
|
175
|
+
'\u0419': 'Y' # Й
|
|
176
|
+
'\u041A': 'K' # К
|
|
177
|
+
'\u049A': 'Q' # Қ
|
|
178
|
+
'\u041B': 'L' # Л
|
|
179
|
+
'\u041C': 'M' # М
|
|
180
|
+
'\u041D': 'N' # Н
|
|
181
|
+
'\u04A2': 'Ng' # Ң
|
|
182
|
+
'\u041E': 'O' # О
|
|
183
|
+
'\u04E8': 'Ö' # Ө
|
|
184
|
+
'\u041F': 'P' # П
|
|
185
|
+
'\u0420': 'R' # Р
|
|
186
|
+
'\u0421': 'S' # С
|
|
187
|
+
'\u0422': 'T' # Т
|
|
188
|
+
'\u0423': 'Ū' # У
|
|
189
|
+
'\u04B0': 'U' # Ұ
|
|
190
|
+
'\u04AE': 'Ü' # Ү
|
|
191
|
+
'\u0424': 'F' # Ф
|
|
192
|
+
'\u0425': 'Kh' # Х
|
|
193
|
+
'\u04BA': 'H' # Һ
|
|
194
|
+
'\u0426': 'Ts' # Ц
|
|
195
|
+
'\u0427': 'Ch' # Ч
|
|
196
|
+
'\u0428': 'Sh' # Ш
|
|
197
|
+
'\u0429': 'Shch' # Щ
|
|
198
|
+
'\u042A': '”' # Ъ
|
|
199
|
+
'\u042B': 'Y' # Ы
|
|
200
|
+
'\u0406': 'I' # І
|
|
201
|
+
'\u042C': '’' # Ь
|
|
202
|
+
'\u042D': 'Ė' # Э
|
|
203
|
+
'\u042E': 'Yu' # Ю
|
|
204
|
+
'\u042F': 'Ya' # Я
|
|
205
|
+
|
|
206
|
+
'\u0430': 'a' # а
|
|
207
|
+
'\u04D9': 'ä' # ә
|
|
208
|
+
'\u0431': 'b' # б
|
|
209
|
+
'\u0432': 'v' # в
|
|
210
|
+
'\u0433': 'g' # г
|
|
211
|
+
'\u0493': 'gh' # ғ
|
|
212
|
+
'\u0434': 'd' # д
|
|
213
|
+
'\u0435': 'e' # e
|
|
214
|
+
'\u0451': 'yo' # ё
|
|
215
|
+
'\u0436': 'zh' # ж
|
|
216
|
+
'\u0437': 'z' # з
|
|
217
|
+
'\u0438': 'ī' # и
|
|
218
|
+
'\u0439': 'y' # й
|
|
219
|
+
'\u043A': 'k' # к
|
|
220
|
+
'\u049B': 'q' # қ
|
|
221
|
+
'\u043B': 'l' # л
|
|
222
|
+
'\u043C': 'm' # м
|
|
223
|
+
'\u043D': 'n' # н
|
|
224
|
+
'\u04A3': 'ng' # ң
|
|
225
|
+
'\u043E': 'o' # о
|
|
226
|
+
'\u04E9': 'ö' # ө
|
|
227
|
+
'\u043F': 'p' # п
|
|
228
|
+
'\u0440': 'r' # р
|
|
229
|
+
'\u0441': 's' # с
|
|
230
|
+
'\u0442': 't' # т
|
|
231
|
+
'\u0443': 'ū' # у
|
|
232
|
+
'\u04B1': 'u' # ұ
|
|
233
|
+
'\u04AF': 'ü' # ү
|
|
234
|
+
'\u0444': 'f' # ф
|
|
235
|
+
'\u0445': 'kh' # х
|
|
236
|
+
'\u04BB': 'h' # һ
|
|
237
|
+
'\u0446': 'ts' # ц
|
|
238
|
+
'\u0447': 'ch' # ч
|
|
239
|
+
'\u0448': 'sh' # ш
|
|
240
|
+
'\u0449': 'shch' # щ
|
|
241
|
+
'\u044A': '”' # ъ
|
|
242
|
+
'\u044B': 'y' # ы
|
|
243
|
+
'\u0456': 'i' # і
|
|
244
|
+
'\u044C': '’' # ь
|
|
245
|
+
'\u044D': 'ė' # э
|
|
246
|
+
'\u044E': 'yu' # ю
|
|
247
|
+
'\u044F': 'ya' # я
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: bgnpcgn
|
|
3
|
+
id: 1979
|
|
4
|
+
language: iso-639-2:kir
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Romanization of Kyrgyz
|
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816663/ROMANIZATION_OF_KYRGYZ.pdf
|
|
9
|
+
creation_date: 1979
|
|
10
|
+
confirmation_date: 2019
|
|
11
|
+
description: |
|
|
12
|
+
The BGN/PCGN system for Kyrgyz Cyrillic was designed for use in romanizing names written
|
|
13
|
+
in the Kyrgyz Cyrillic alphabet. The Kyrgyz Cyrillic alphabet contains three characters not present in
|
|
14
|
+
the Russian alphabet: Ң , Ө, and Y.
|
|
15
|
+
|
|
16
|
+
notes:
|
|
17
|
+
- Both Kyrgyz and Kirghiz may frequently be seen as the language name; both these spellings are used in the
|
|
18
|
+
ISO 639 Standard on the representation of names for languages.
|
|
19
|
+
- The character sequence н г may be romanized n·g in order to differentiate that romanization
|
|
20
|
+
from the digraph ng, which is used to render the character ң.
|
|
21
|
+
- The character ы may be romanized i (Unicode encoding 0069+0335) instead of y, if so desired.
|
|
22
|
+
- 'An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
|
|
23
|
+
unmodified letters of the basic Roman script is:
|
|
24
|
+
All apostrophes appearing in romanization are U+2019
|
|
25
|
+
Ö (U+00D6) ö (U+00F6)
|
|
26
|
+
Ü (U+00DC) ü (U+00FC)'
|
|
27
|
+
- The Romanization column shows only lowercase forms but, when romanizing, uppercase and
|
|
28
|
+
lowercase Roman letters as appropriate should be used.
|
|
29
|
+
|
|
30
|
+
tests:
|
|
31
|
+
- source: Ысык-Көл Облусу
|
|
32
|
+
expected: Ysyk-Köl Oblusu
|
|
33
|
+
- source: Ысык-Көл
|
|
34
|
+
expected: Ysyk-Köl
|
|
35
|
+
- source: Шедвик-Сай
|
|
36
|
+
expected: Shedvik-Say
|
|
37
|
+
- source: Чүй Облусу
|
|
38
|
+
expected: Chüy Oblusu
|
|
39
|
+
- source: Чүй
|
|
40
|
+
expected: Chüy
|
|
41
|
+
- source: Чирик-Сай
|
|
42
|
+
expected: Chirik-Say
|
|
43
|
+
- source: Хребет Джети-Сандал
|
|
44
|
+
expected: Khrebet Djeti-Sandal
|
|
45
|
+
- source: Узук-Булак
|
|
46
|
+
expected: Uzuk-Bulak
|
|
47
|
+
- source: Торугарт Ашуу
|
|
48
|
+
expected: Torugart Ashuu
|
|
49
|
+
- source: Торетал
|
|
50
|
+
expected: Toretal
|
|
51
|
+
- source: Терек
|
|
52
|
+
expected: Terek
|
|
53
|
+
- source: Талды-Булак
|
|
54
|
+
expected: Taldy-Bulak
|
|
55
|
+
- source: Талас Облусу
|
|
56
|
+
expected: Talas Oblusu
|
|
57
|
+
- source: Талас
|
|
58
|
+
expected: Talas
|
|
59
|
+
- source: Сарык-Кёль
|
|
60
|
+
expected: Saryk-Kyol’
|
|
61
|
+
- source: Родник Кара-Суу
|
|
62
|
+
expected: Rodnik Kara-Suu
|
|
63
|
+
- source: Родник Бейрёк-Булак
|
|
64
|
+
expected: Rodnik Beyryok-Bulak
|
|
65
|
+
- source: Перевал Сары-Челек
|
|
66
|
+
expected: Pereval Sary-Chelek
|
|
67
|
+
- source: Перевал Макмал
|
|
68
|
+
expected: Pereval Makmal
|
|
69
|
+
- source: Перевал Кара-Токой
|
|
70
|
+
expected: Pereval Kara-Tokoy
|
|
71
|
+
- source: Перевал Ашуу-Тёр
|
|
72
|
+
expected: Pereval Ashuu-Tyor
|
|
73
|
+
- source: Перевал Ашуу
|
|
74
|
+
expected: Pereval Ashuu
|
|
75
|
+
- source: Ош Шаары
|
|
76
|
+
expected: Osh Shaary
|
|
77
|
+
- source: Ош Облусу
|
|
78
|
+
expected: Osh Oblusu
|
|
79
|
+
- source: Ош
|
|
80
|
+
expected: Osh
|
|
81
|
+
- source: Ош
|
|
82
|
+
expected: Osh
|
|
83
|
+
- source: Осоавиахим
|
|
84
|
+
expected: Osoaviakhim
|
|
85
|
+
- source: Озеро Афлатук
|
|
86
|
+
expected: Ozero Aflatuk
|
|
87
|
+
- source: Нарын Облусу
|
|
88
|
+
expected: Naryn Oblusu
|
|
89
|
+
- source: Нарын
|
|
90
|
+
expected: Naryn
|
|
91
|
+
- source: Метеорологическая Станция Чамкал
|
|
92
|
+
expected: Meteorologicheskaya Stantsiya Chamkal
|
|
93
|
+
- source: Марза-Булак
|
|
94
|
+
expected: Marza-Bulak
|
|
95
|
+
- source: Макмал
|
|
96
|
+
expected: Makmal
|
|
97
|
+
- source: Кыргызстан
|
|
98
|
+
expected: Kyrgyzstan
|
|
99
|
+
- source: Кыргыз Республикасы
|
|
100
|
+
expected: Kyrgyz Respublikasy
|
|
101
|
+
- source: Куру-Сай
|
|
102
|
+
expected: Kuru-Say
|
|
103
|
+
- source: Куру-Сай
|
|
104
|
+
expected: Kuru-Say
|
|
105
|
+
- source: Кур-Пырылды
|
|
106
|
+
expected: Kur-Pyryldy
|
|
107
|
+
- source: Кок-Бель-Таш
|
|
108
|
+
expected: Kok-Bel’-Tash
|
|
109
|
+
- source: Кичи-Сандык
|
|
110
|
+
expected: Kichi-Sandyk
|
|
111
|
+
- source: Кель-Сай
|
|
112
|
+
expected: Kel’-Say
|
|
113
|
+
- source: Карагайлы
|
|
114
|
+
expected: Karagayly
|
|
115
|
+
- source: Кара-Суу
|
|
116
|
+
expected: Kara-Suu
|
|
117
|
+
- source: Жалал-Абад Облусу
|
|
118
|
+
expected: Jalal-Abad Oblusu
|
|
119
|
+
- source: Жалал-Абад
|
|
120
|
+
expected: Jalal-Abad
|
|
121
|
+
- source: Долина Беш-Башат
|
|
122
|
+
expected: Dolina Besh-Bashat
|
|
123
|
+
- source: Гора Арпа-Турча
|
|
124
|
+
expected: Gora Arpa-Turcha
|
|
125
|
+
- source: Бишкек Шаары
|
|
126
|
+
expected: Bishkek Shaary
|
|
127
|
+
- source: Бишкек
|
|
128
|
+
expected: Bishkek
|
|
129
|
+
- source: Бишкек
|
|
130
|
+
expected: Bishkek
|
|
131
|
+
- source: Баткен Облусу
|
|
132
|
+
expected: Batken Oblusu
|
|
133
|
+
- source: Баткен
|
|
134
|
+
expected: Batken
|
|
135
|
+
- source: Аяк-Терек
|
|
136
|
+
expected: Ayak-Terek
|
|
137
|
+
- source: Аюу-Чача
|
|
138
|
+
expected: Ayuu-Chacha
|
|
139
|
+
- source: Арпа
|
|
140
|
+
expected: Arpa
|
|
141
|
+
- source: Ак-Суу
|
|
142
|
+
expected: Ak-Suu
|
|
143
|
+
|
|
144
|
+
map:
|
|
145
|
+
characters:
|
|
146
|
+
'\u0410': 'A' # А
|
|
147
|
+
'\u0411': 'B' # Б
|
|
148
|
+
'\u0412': 'V' # В
|
|
149
|
+
'\u0413': 'G' # Г
|
|
150
|
+
'\u0414': 'D' # Д
|
|
151
|
+
'\u0415': 'E' # Е
|
|
152
|
+
'\u0401': 'Yo' # Ё
|
|
153
|
+
'\u0416': 'J' # Ж
|
|
154
|
+
'\u0417': 'Z' # З
|
|
155
|
+
'\u0418': 'I' # И
|
|
156
|
+
'\u0419': 'Y' # Й
|
|
157
|
+
'\u041A': 'K' # К
|
|
158
|
+
'\u041B': 'L' # Л
|
|
159
|
+
'\u041C': 'M' # М
|
|
160
|
+
'\u041D': 'N' # Н
|
|
161
|
+
'\u04A2': 'Ng' # Ң
|
|
162
|
+
'\u041E': 'O' # О
|
|
163
|
+
'\u04E8': 'Ö' # Ө
|
|
164
|
+
'\u041F': 'P' # П
|
|
165
|
+
'\u0420': 'R' # Р
|
|
166
|
+
'\u0421': 'S' # С
|
|
167
|
+
'\u0422': 'T' # Т
|
|
168
|
+
'\u0423': 'U' # У
|
|
169
|
+
'\u04AE': 'Ü' # Ү
|
|
170
|
+
'\u0424': 'F' # Ф
|
|
171
|
+
'\u0425': 'Kh' # Х
|
|
172
|
+
'\u0426': 'Ts' # Ц
|
|
173
|
+
'\u0427': 'Ch' # Ч
|
|
174
|
+
'\u0428': 'Sh' # Ш
|
|
175
|
+
'\u0429': 'Shch' # Щ
|
|
176
|
+
'\u042A': '”' # Ъ
|
|
177
|
+
'\u042B': 'Y' # Ы
|
|
178
|
+
'\u042C': '’' # Ь
|
|
179
|
+
'\u042D': 'E' # Э
|
|
180
|
+
'\u042E': 'Yu' # Ю
|
|
181
|
+
'\u042F': 'Ya' # Я
|
|
182
|
+
|
|
183
|
+
'\u0430': 'a' # а
|
|
184
|
+
'\u0431': 'b' # б
|
|
185
|
+
'\u0432': 'v' # в
|
|
186
|
+
'\u0433': 'g' # г
|
|
187
|
+
'\u0434': 'd' # д
|
|
188
|
+
'\u0435': 'e' # e
|
|
189
|
+
'\u0451': 'yo' # ё
|
|
190
|
+
'\u0436': 'j' # ж
|
|
191
|
+
'\u0437': 'z' # з
|
|
192
|
+
'\u0438': 'i' # и
|
|
193
|
+
'\u0439': 'y' # й
|
|
194
|
+
'\u043A': 'k' # к
|
|
195
|
+
'\u043B': 'l' # л
|
|
196
|
+
'\u043C': 'm' # м
|
|
197
|
+
'\u043D': 'n' # н
|
|
198
|
+
'\u04A3': 'ng' # ң
|
|
199
|
+
'\u043E': 'o' # о
|
|
200
|
+
'\u04E9': 'ö' # ө
|
|
201
|
+
'\u043F': 'p' # п
|
|
202
|
+
'\u0440': 'r' # р
|
|
203
|
+
'\u0441': 's' # с
|
|
204
|
+
'\u0442': 't' # т
|
|
205
|
+
'\u0443': 'u' # у
|
|
206
|
+
'\u04AF': 'ü' # ү
|
|
207
|
+
'\u0444': 'f' # ф
|
|
208
|
+
'\u0445': 'kh' # х
|
|
209
|
+
'\u0446': 'ts' # ц
|
|
210
|
+
'\u0447': 'ch' # ч
|
|
211
|
+
'\u0448': 'sh' # ш
|
|
212
|
+
'\u0449': 'shch' # щ
|
|
213
|
+
'\u044A': '”' # ъ
|
|
214
|
+
'\u044B': 'y' # ы
|
|
215
|
+
'\u044C': '’' # ь
|
|
216
|
+
'\u044D': 'e' # э
|
|
217
|
+
'\u044E': 'yu' # ю
|
|
218
|
+
'\u044F': 'ya' # я
|