interscript 0.1.7 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +1 -3
- data/aliases.json +1 -0
- data/lib/interscript.rb +8 -3
- data/lib/interscript/fs.rb +27 -0
- data/lib/interscript/mapping.rb +3 -1
- data/lib/interscript/opal.rb +142 -3
- data/lib/interscript/opal/entrypoint.rb +8 -0
- data/lib/interscript/opal/exports.rb +11 -0
- data/lib/interscript/opal/maps.js.erb +2 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
- data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
- data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
- data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
- data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
- data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
- data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
- data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
- data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
- data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
- data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
- data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
- data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
- data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
- data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
- data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
- data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
- data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
- data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
- data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
- data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
- data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
- data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
- data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
- data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
- data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
- data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
- data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
- data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
- data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
- data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
- data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
- data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
- data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
- data/spec/interscript/filenames_spec.rb +6 -369
- data/spec/interscript_spec.rb +10 -2
- metadata +50 -7
- data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,75 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1988
|
4
|
+
language: iso-639-2:div
|
5
|
+
source_script: Thaa
|
6
|
+
destination_script: Latn
|
7
|
+
name: ROMANIZATION OF MALDIVIAN BGN/PCGN 1988 Agreement, with modifications 2009
|
8
|
+
alias:
|
9
|
+
ogc11122:
|
10
|
+
code: div_Thaa2Latn_GMV_1988
|
11
|
+
description: Maldivian (Divehi) 1988 system
|
12
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816778/ROMANIZATION_OF_MALDIVIAN.pdf
|
13
|
+
creation_date: 1988
|
14
|
+
confirmation_date: 2019
|
15
|
+
description: |
|
16
|
+
This romanization system supersedes the one that was approved by BGN and PCGN in 1972. An
|
17
|
+
official system was submitted to PCGN by the Maldivian government in 1987 and approved by BGN and
|
18
|
+
PCGN in 1988. The system presented here reflects the 1988 Agreement with minor modifications introduced by the government of the Maldives in 2009
|
19
|
+
|
20
|
+
notes:
|
21
|
+
- Maldivian is read from right to left.
|
22
|
+
- The symbol ◌ appearing in the Vowel Characters table represents any Maldivian consonant character.
|
23
|
+
- The character އ is not romanized. If it bears a vowel character, that vowel character alone is romanized (e.g. އެނބޫދޫ En’boodhoo).
|
24
|
+
- |
|
25
|
+
When characters ށ and އ appear in combination with a supercircle (the ‘sukun’, which usually marks
|
26
|
+
the absence of a vowel, see the Diacritical Mark table): ށ ; އ , these characters are not romanized
|
27
|
+
but the following consonant is doubled (e.g. ކަޅުހުރާ Kalhehuttaa), unless the following consonant is a digraph in the
|
28
|
+
romanized form, in which case they are romanized h (e.g. ކެރެށްދޫ Kerehdhoo). ށ and އ appearing at the end of a word are romanized h (e.g. ވޭވައް Veyvah).
|
29
|
+
- Noonu (ނ) is romanized n’ when appearing without any vowel or auxiliary sign (e.g. ކަނޑުފުށި Kan’dufushi)
|
30
|
+
- Thaa (ތ) is romanized iy when appearing in combination with a supercircle (ތ) ,( e.g. ޒިޔާރަތްފުށި Ziyaaraiyfushi).
|
31
|
+
- The Roman-script columns show only lowercase forms but, when applying the table, uppercase and
|
32
|
+
lowercase Roman letters as appropriate should be used.
|
33
|
+
|
34
|
+
|
35
|
+
tests:
|
36
|
+
- source: "އިރުގައި"
|
37
|
+
expected: "irugai"
|
38
|
+
- source: "ޒިޔާރަތްފުށި"
|
39
|
+
expected: "ziyaaraiyfushi"
|
40
|
+
- source: "ރައްކާތެރިކުރުމާއި"
|
41
|
+
expected: "rakkaatherikurumaai"
|
42
|
+
- source: "ޝަހީދުންގެ ދުވަސް"
|
43
|
+
expected: "sh’aheedhun’ge dhuvas"
|
44
|
+
- source: "މަރުޙަބާ"
|
45
|
+
expected: "maruh’abaa"
|
46
|
+
- source: "ކިހިނެހް"
|
47
|
+
expected: "kihin’eh"
|
48
|
+
- source: "ކޮން ނަމެއް ކިޔަނީ"
|
49
|
+
expected: "kon’ n’ameh kiyan’ee"
|
50
|
+
- source: "ބައްއަޖޖެވުރި ހެނދުނެހް"
|
51
|
+
expected: "baajjevuri hen’dhun’eh"
|
52
|
+
- source: "މެނދުރެހް"
|
53
|
+
expected: "men’dhureh"
|
54
|
+
- source: "ހަވީރެހް"
|
55
|
+
expected: "haveereh"
|
56
|
+
|
57
|
+
map:
|
58
|
+
inherit: mv-div-Thaa-Latn-1987
|
59
|
+
characters:
|
60
|
+
#Consonants with diacritical marks (used mainly in words of Arabic origin, corresponding
|
61
|
+
# Arabic characters and their romanizations are shown in parentheses)
|
62
|
+
"ޘ": "th’"
|
63
|
+
"ޙ": "h’"
|
64
|
+
"ޚ": "kh"
|
65
|
+
"ޛ": "dh’"
|
66
|
+
"ޜ": "x"
|
67
|
+
"ޝ": "sh’"
|
68
|
+
"ޞ": "s’"
|
69
|
+
"ޟ": "l’"
|
70
|
+
"ޠ": "t’"
|
71
|
+
"ޡ": "z’"
|
72
|
+
"ޢ": "’"
|
73
|
+
"ޣ": "gh"
|
74
|
+
"ޤ": "q"
|
75
|
+
"ޥ": "w"
|
@@ -0,0 +1,28 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1964
|
4
|
+
language: iso-639-2:far
|
5
|
+
source_script: Latn
|
6
|
+
destination_script: Latn
|
7
|
+
name: https://github.com/interscript/interscript/files/5180777/BGN_Romanization_Guide_1964_faeroese.pdf
|
8
|
+
creation_date: 1964
|
9
|
+
description: |
|
10
|
+
The Faeroese language is a dialect of Icelandic and, like Icelandic, employes the letter eth (Ð ð).
|
11
|
+
Unlike Icelandic, however, the Faeroese ð has the sound of y (as in "yes") before i, v before u, and is silent in all other cases.
|
12
|
+
The Icelandic letter thorn (Þ þ) does not occur in the writting of Faeroese.
|
13
|
+
|
14
|
+
To avoid the use of the unfamiliar symbol ð, the Board transliterates it as dh, as in Icelandic,
|
15
|
+
even though its pronunciation in the two languages is not the same.
|
16
|
+
|
17
|
+
tests:
|
18
|
+
- source: Fyrirgefðu
|
19
|
+
expected: Fyrirgefdhu
|
20
|
+
- source: Þakka
|
21
|
+
expected: Þakka
|
22
|
+
|
23
|
+
map:
|
24
|
+
inherit: bgnpcgn-isl-Latn-Latn-1964
|
25
|
+
|
26
|
+
characters:
|
27
|
+
"\u00DE": ~ # Þ translitarion removed
|
28
|
+
"\u00FE": ~ # þ translitarion removed
|
@@ -0,0 +1,37 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1964
|
4
|
+
language: iso-639-2:isl
|
5
|
+
source_script: Latn
|
6
|
+
destination_script: Latn
|
7
|
+
name: TRANSLITERATION OF ICELANDIC BGN/PCGN 1947 System
|
8
|
+
url: https://github.com/interscript/interscript/files/5180785/BGN_Romanization_Guide_1964_icelandic_1947.pdf
|
9
|
+
creation_date: 1964
|
10
|
+
description: |
|
11
|
+
The BGN and the PCGn in 1947 jointly agreed to the transliteration of two letters of the Icelandic alphabet which,
|
12
|
+
although used in writing Old English, have disappeared from the modern English alphabet.
|
13
|
+
|
14
|
+
The transliterated letters are the edh (Ð ð) and the thorn (Þ þ), pronounved as th in "thus" and th in "think," respectively.
|
15
|
+
|
16
|
+
It was felt that it was better to transliterate these letters into familiar symbols than preserve such unfamiliar letters in the nomenclature.
|
17
|
+
|
18
|
+
notes:
|
19
|
+
- More about "edh" letter - https://en.wikipedia.org/wiki/Eth
|
20
|
+
- More about "thorn" letter - https://en.wikipedia.org/wiki/Thorn_(letter)
|
21
|
+
|
22
|
+
tests:
|
23
|
+
- source: Fyrirgefðu
|
24
|
+
expected: Fyrirgefdhu
|
25
|
+
- source: þu ert velkominn
|
26
|
+
expected: thu ert velkominn
|
27
|
+
- source: GOÐAN DAGINN
|
28
|
+
expected: GODHAN DAGINN
|
29
|
+
- source: Þakka
|
30
|
+
expected: Thakka
|
31
|
+
|
32
|
+
map:
|
33
|
+
characters:
|
34
|
+
"\u00D0": "Dh" # Ð
|
35
|
+
"\u00F0": "dh" # ð
|
36
|
+
"\u00DE": "Th" # Þ
|
37
|
+
"\u00FE": "th" # þ
|
@@ -0,0 +1,247 @@
|
|
1
|
+
|
2
|
+
---
|
3
|
+
authority_id: bgnpcgn
|
4
|
+
id: 1979
|
5
|
+
language: iso-639-2:kaz
|
6
|
+
source_script: Cyrl
|
7
|
+
destination_script: Latn
|
8
|
+
name: Romanization of Kazakh
|
9
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811511/ROMANIZATION_OF_KAZAKH.pdf
|
10
|
+
creation_date: 1979
|
11
|
+
confirmation_date: 2019
|
12
|
+
description: |
|
13
|
+
The BGN/PCGN system for Kazakh was designed for use in romanizing names written in the Kazakh Cyrillic alphabet.
|
14
|
+
The Kazakh Cyrillic alphabet contains nine characters not present in the Russian alphabet: ә, ғ, қ, ң, ө, ұ, ү, һ and і.
|
15
|
+
|
16
|
+
notes:
|
17
|
+
- The character sequences гһ, зһ, кһ, нг, сһ and цһ may be romanized g·h, z·h, k·h, n·g, s·h
|
18
|
+
and ts·h in order to differentiate those romanizations from from the digraphs gh, zh, kh, ng, sh,
|
19
|
+
and the letter sequence tsh, which are used to render the characters ғ, ж, х, ң, ш, and the character sequence тш.
|
20
|
+
- The character ы may be romanized i̵ (Unicode encoding 0069+0335) instead of у, if so desired.
|
21
|
+
- Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character.
|
22
|
+
- The Kazakh government has adopted a programme to move to using the Roman-script as the principal writing system for Kazakh.
|
23
|
+
- 'An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
|
24
|
+
unmodified letters of the basic Roman script is:
|
25
|
+
Ä (U+00C4) ӓ (U+00E4)
|
26
|
+
Ī (U+012A) ī (U+012B)
|
27
|
+
Ö (U+00D6) ö (U+00F6)
|
28
|
+
Ū (U+016A) ū (U+016B)
|
29
|
+
Ü (U+00DC) ü (U+00FC)
|
30
|
+
” (U+201D) ’ (U+2019)
|
31
|
+
Ė (U+0116) ė (U+0117)'
|
32
|
+
- The Romanization column shows only lowercase forms but, when romanizing, uppercase and
|
33
|
+
lowercase Roman letters as appropriate should be used.
|
34
|
+
|
35
|
+
tests:
|
36
|
+
- source: Өңірек
|
37
|
+
expected: Öngirek
|
38
|
+
- source: Өтебас Артезиан Құдығы
|
39
|
+
expected: Ötebas Artezīan Qudyghy
|
40
|
+
- source: Өскенбай
|
41
|
+
expected: Öskenbay
|
42
|
+
- source: Өсек Көлі
|
43
|
+
expected: Ösek Köli
|
44
|
+
- source: Өрмексу
|
45
|
+
expected: Örmeksū
|
46
|
+
- source: Өмірзақ
|
47
|
+
expected: Ömirzaq
|
48
|
+
- source: Өлеңті
|
49
|
+
expected: Ölengti
|
50
|
+
- source: Өл-Фараби Даңғылы
|
51
|
+
expected: Öl-Farabī Dangghyly
|
52
|
+
- source: Өкпекті Тауы
|
53
|
+
expected: Ökpekti Taūy
|
54
|
+
- source: Өкенсоркен Қыстауы
|
55
|
+
expected: Ökensorken Qystaūy
|
56
|
+
- source: Өзен Ойысы
|
57
|
+
expected: Özen Oyysy
|
58
|
+
- source: Өзен
|
59
|
+
expected: Özen
|
60
|
+
- source: Өгізтөбе Тауы
|
61
|
+
expected: Ögiztöbe Taūy
|
62
|
+
- source: Өгізтау Қыстауы
|
63
|
+
expected: Ögiztaū Qystaūy
|
64
|
+
- source: Өгізмүйіз Тауы
|
65
|
+
expected: Ögizmüyiz Taūy
|
66
|
+
- source: Өгізбұлақ
|
67
|
+
expected: Ögizbulaq
|
68
|
+
- source: Өгіз Үреулі
|
69
|
+
expected: Ögiz Üreūli
|
70
|
+
- source: Өгем Жотасы
|
71
|
+
expected: Ögem Zhotasy
|
72
|
+
- source: Өгем
|
73
|
+
expected: Ögem
|
74
|
+
- source: Әшім
|
75
|
+
expected: Äshim
|
76
|
+
- source: Әулиетөбе Тауы
|
77
|
+
expected: Äūlīetöbe Taūy
|
78
|
+
- source: Әулиекөл
|
79
|
+
expected: Äūlīeköl
|
80
|
+
- source: Әндіжан Құдығы
|
81
|
+
expected: Ändizhan Qudyghy
|
82
|
+
- source: Ұясай
|
83
|
+
expected: Uyasay
|
84
|
+
- source: Ұялы Метеорологиялық Станциясы
|
85
|
+
expected: Uyaly Meteorologīyalyq Stantsīyasy
|
86
|
+
- source: Ұшқын Қыстауы
|
87
|
+
expected: Ushqyn Qystaūy
|
88
|
+
- source: Үңгіртас
|
89
|
+
expected: Ünggirtas
|
90
|
+
- source: Үшқұлын
|
91
|
+
expected: Üshqulyn
|
92
|
+
- source: Құтырғы Асуы
|
93
|
+
expected: Qutyrghy Asūy
|
94
|
+
- source: Ярмы Стансасы
|
95
|
+
expected: Yarmy Stansasy
|
96
|
+
- source: Юпитер Қыстауы
|
97
|
+
expected: Yupīter Qystaūy
|
98
|
+
- source: Энгельс Көшесi
|
99
|
+
expected: Ėngel’s Köshesi
|
100
|
+
- source: Ырғызбай Жайлауы
|
101
|
+
expected: Yrghyzbay Zhaylaūy
|
102
|
+
- source: Щебнюха Тауы
|
103
|
+
expected: Shchebnyukha Taūy
|
104
|
+
- source: Шөміштікөл Соры
|
105
|
+
expected: Shömishtiköl Sory
|
106
|
+
- source: Чалов Барак Қыстауы
|
107
|
+
expected: Chalov Barak Qystaūy
|
108
|
+
- source: Чайкино
|
109
|
+
expected: Chaykīno
|
110
|
+
- source: Цуриковка
|
111
|
+
expected: Tsūrīkovka
|
112
|
+
- source: Хамитқора Қыстауы
|
113
|
+
expected: Khamītqora Qystaūy
|
114
|
+
- source: Фыкалка
|
115
|
+
expected: Fykalka
|
116
|
+
- source: Уақбай Қыстауы
|
117
|
+
expected: Ūaqbay Qystaūy
|
118
|
+
- source: Төңірекшың Тоғайы
|
119
|
+
expected: Töngirekshyng Toghayy
|
120
|
+
- source: Сабағали Қыстауы
|
121
|
+
expected: Sabaghalī Qystaūy
|
122
|
+
- source: Рысқұлов Даңғылы
|
123
|
+
expected: Rysqulov Dangghyly
|
124
|
+
- source: Пірназар Құдығы
|
125
|
+
expected: Pirnazar Qudyghy
|
126
|
+
- source: Оңтүстік Қазақстан Облысы
|
127
|
+
expected: Ongtüstik Qazaqstan Oblysy
|
128
|
+
- source: Нөмір Үшінші Суторабының Бөгені
|
129
|
+
expected: Nömir Üshinshi Sūtorabynyng Bögeni
|
130
|
+
- source: Мәмбетқазған Құдығы
|
131
|
+
expected: Mämbetqazghan Qudyghy
|
132
|
+
- source: Мемлекеттік Аудандық Электр Стансасы - Бір
|
133
|
+
expected: Memlekettik Aūdandyq Ėlektr Stansasy - Bir
|
134
|
+
- source: Линейский Белок Тауы
|
135
|
+
expected: Līneyskīy Belok Taūy
|
136
|
+
- source: Көшердік Бөгені
|
137
|
+
expected: Kösherdik Bögeni
|
138
|
+
- source: Көлфонтан Артезиан Құдығы
|
139
|
+
expected: Kölfontan Artezīan Qudyghy
|
140
|
+
- source: Изендіарал Мүйісі
|
141
|
+
expected: Īzendiaral Müyisi
|
142
|
+
- source: Злиха Метеорологиялық Станциасы
|
143
|
+
expected: Zlīkha Meteorologīyalyq Stantsīasy
|
144
|
+
- source: Жұлжұрған Көлі
|
145
|
+
expected: Zhulzhurghan Köli
|
146
|
+
- source: Ескі Үшал Қыстауы
|
147
|
+
expected: Eski Üshal Qystaūy
|
148
|
+
- source: Дөңгелексор Қыстауы
|
149
|
+
expected: Dönggeleksor Qystaūy
|
150
|
+
- source: Горько-Солёное Көлі
|
151
|
+
expected: Gor’ko-Solyonoe Köli
|
152
|
+
- source: Вагулино
|
153
|
+
expected: Vagūlīno
|
154
|
+
- source: Бөстай Учаскесі
|
155
|
+
expected: Böstay Ūchaskesi
|
156
|
+
- source: Аққолқы Тоғайы
|
157
|
+
expected: Aqqolqy Toghayy
|
158
|
+
- source: Іңқардария
|
159
|
+
expected: Ingqardarīya
|
160
|
+
|
161
|
+
map:
|
162
|
+
characters:
|
163
|
+
'\u0410': 'A' # А
|
164
|
+
'\u04D8': 'Ä' # Ә
|
165
|
+
'\u0411': 'B' # Б
|
166
|
+
'\u0412': 'V' # В
|
167
|
+
'\u0413': 'G' # Г
|
168
|
+
'\u0492': 'Gh' # Ғ
|
169
|
+
'\u0414': 'D' # Д
|
170
|
+
'\u0415': 'E' # Е
|
171
|
+
'\u0401': 'Yo' # Ё
|
172
|
+
'\u0416': 'Zh' # Ж
|
173
|
+
'\u0417': 'Z' # З
|
174
|
+
'\u0418': 'Ī' # И
|
175
|
+
'\u0419': 'Y' # Й
|
176
|
+
'\u041A': 'K' # К
|
177
|
+
'\u049A': 'Q' # Қ
|
178
|
+
'\u041B': 'L' # Л
|
179
|
+
'\u041C': 'M' # М
|
180
|
+
'\u041D': 'N' # Н
|
181
|
+
'\u04A2': 'Ng' # Ң
|
182
|
+
'\u041E': 'O' # О
|
183
|
+
'\u04E8': 'Ö' # Ө
|
184
|
+
'\u041F': 'P' # П
|
185
|
+
'\u0420': 'R' # Р
|
186
|
+
'\u0421': 'S' # С
|
187
|
+
'\u0422': 'T' # Т
|
188
|
+
'\u0423': 'Ū' # У
|
189
|
+
'\u04B0': 'U' # Ұ
|
190
|
+
'\u04AE': 'Ü' # Ү
|
191
|
+
'\u0424': 'F' # Ф
|
192
|
+
'\u0425': 'Kh' # Х
|
193
|
+
'\u04BA': 'H' # Һ
|
194
|
+
'\u0426': 'Ts' # Ц
|
195
|
+
'\u0427': 'Ch' # Ч
|
196
|
+
'\u0428': 'Sh' # Ш
|
197
|
+
'\u0429': 'Shch' # Щ
|
198
|
+
'\u042A': '”' # Ъ
|
199
|
+
'\u042B': 'Y' # Ы
|
200
|
+
'\u0406': 'I' # І
|
201
|
+
'\u042C': '’' # Ь
|
202
|
+
'\u042D': 'Ė' # Э
|
203
|
+
'\u042E': 'Yu' # Ю
|
204
|
+
'\u042F': 'Ya' # Я
|
205
|
+
|
206
|
+
'\u0430': 'a' # а
|
207
|
+
'\u04D9': 'ä' # ә
|
208
|
+
'\u0431': 'b' # б
|
209
|
+
'\u0432': 'v' # в
|
210
|
+
'\u0433': 'g' # г
|
211
|
+
'\u0493': 'gh' # ғ
|
212
|
+
'\u0434': 'd' # д
|
213
|
+
'\u0435': 'e' # e
|
214
|
+
'\u0451': 'yo' # ё
|
215
|
+
'\u0436': 'zh' # ж
|
216
|
+
'\u0437': 'z' # з
|
217
|
+
'\u0438': 'ī' # и
|
218
|
+
'\u0439': 'y' # й
|
219
|
+
'\u043A': 'k' # к
|
220
|
+
'\u049B': 'q' # қ
|
221
|
+
'\u043B': 'l' # л
|
222
|
+
'\u043C': 'm' # м
|
223
|
+
'\u043D': 'n' # н
|
224
|
+
'\u04A3': 'ng' # ң
|
225
|
+
'\u043E': 'o' # о
|
226
|
+
'\u04E9': 'ö' # ө
|
227
|
+
'\u043F': 'p' # п
|
228
|
+
'\u0440': 'r' # р
|
229
|
+
'\u0441': 's' # с
|
230
|
+
'\u0442': 't' # т
|
231
|
+
'\u0443': 'ū' # у
|
232
|
+
'\u04B1': 'u' # ұ
|
233
|
+
'\u04AF': 'ü' # ү
|
234
|
+
'\u0444': 'f' # ф
|
235
|
+
'\u0445': 'kh' # х
|
236
|
+
'\u04BB': 'h' # һ
|
237
|
+
'\u0446': 'ts' # ц
|
238
|
+
'\u0447': 'ch' # ч
|
239
|
+
'\u0448': 'sh' # ш
|
240
|
+
'\u0449': 'shch' # щ
|
241
|
+
'\u044A': '”' # ъ
|
242
|
+
'\u044B': 'y' # ы
|
243
|
+
'\u0456': 'i' # і
|
244
|
+
'\u044C': '’' # ь
|
245
|
+
'\u044D': 'ė' # э
|
246
|
+
'\u044E': 'yu' # ю
|
247
|
+
'\u044F': 'ya' # я
|
@@ -0,0 +1,218 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1979
|
4
|
+
language: iso-639-2:kir
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: Romanization of Kyrgyz
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816663/ROMANIZATION_OF_KYRGYZ.pdf
|
9
|
+
creation_date: 1979
|
10
|
+
confirmation_date: 2019
|
11
|
+
description: |
|
12
|
+
The BGN/PCGN system for Kyrgyz Cyrillic was designed for use in romanizing names written
|
13
|
+
in the Kyrgyz Cyrillic alphabet. The Kyrgyz Cyrillic alphabet contains three characters not present in
|
14
|
+
the Russian alphabet: Ң , Ө, and Y.
|
15
|
+
|
16
|
+
notes:
|
17
|
+
- Both Kyrgyz and Kirghiz may frequently be seen as the language name; both these spellings are used in the
|
18
|
+
ISO 639 Standard on the representation of names for languages.
|
19
|
+
- The character sequence н г may be romanized n·g in order to differentiate that romanization
|
20
|
+
from the digraph ng, which is used to render the character ң.
|
21
|
+
- The character ы may be romanized i (Unicode encoding 0069+0335) instead of y, if so desired.
|
22
|
+
- 'An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
|
23
|
+
unmodified letters of the basic Roman script is:
|
24
|
+
All apostrophes appearing in romanization are U+2019
|
25
|
+
Ö (U+00D6) ö (U+00F6)
|
26
|
+
Ü (U+00DC) ü (U+00FC)'
|
27
|
+
- The Romanization column shows only lowercase forms but, when romanizing, uppercase and
|
28
|
+
lowercase Roman letters as appropriate should be used.
|
29
|
+
|
30
|
+
tests:
|
31
|
+
- source: Ысык-Көл Облусу
|
32
|
+
expected: Ysyk-Köl Oblusu
|
33
|
+
- source: Ысык-Көл
|
34
|
+
expected: Ysyk-Köl
|
35
|
+
- source: Шедвик-Сай
|
36
|
+
expected: Shedvik-Say
|
37
|
+
- source: Чүй Облусу
|
38
|
+
expected: Chüy Oblusu
|
39
|
+
- source: Чүй
|
40
|
+
expected: Chüy
|
41
|
+
- source: Чирик-Сай
|
42
|
+
expected: Chirik-Say
|
43
|
+
- source: Хребет Джети-Сандал
|
44
|
+
expected: Khrebet Djeti-Sandal
|
45
|
+
- source: Узук-Булак
|
46
|
+
expected: Uzuk-Bulak
|
47
|
+
- source: Торугарт Ашуу
|
48
|
+
expected: Torugart Ashuu
|
49
|
+
- source: Торетал
|
50
|
+
expected: Toretal
|
51
|
+
- source: Терек
|
52
|
+
expected: Terek
|
53
|
+
- source: Талды-Булак
|
54
|
+
expected: Taldy-Bulak
|
55
|
+
- source: Талас Облусу
|
56
|
+
expected: Talas Oblusu
|
57
|
+
- source: Талас
|
58
|
+
expected: Talas
|
59
|
+
- source: Сарык-Кёль
|
60
|
+
expected: Saryk-Kyol’
|
61
|
+
- source: Родник Кара-Суу
|
62
|
+
expected: Rodnik Kara-Suu
|
63
|
+
- source: Родник Бейрёк-Булак
|
64
|
+
expected: Rodnik Beyryok-Bulak
|
65
|
+
- source: Перевал Сары-Челек
|
66
|
+
expected: Pereval Sary-Chelek
|
67
|
+
- source: Перевал Макмал
|
68
|
+
expected: Pereval Makmal
|
69
|
+
- source: Перевал Кара-Токой
|
70
|
+
expected: Pereval Kara-Tokoy
|
71
|
+
- source: Перевал Ашуу-Тёр
|
72
|
+
expected: Pereval Ashuu-Tyor
|
73
|
+
- source: Перевал Ашуу
|
74
|
+
expected: Pereval Ashuu
|
75
|
+
- source: Ош Шаары
|
76
|
+
expected: Osh Shaary
|
77
|
+
- source: Ош Облусу
|
78
|
+
expected: Osh Oblusu
|
79
|
+
- source: Ош
|
80
|
+
expected: Osh
|
81
|
+
- source: Ош
|
82
|
+
expected: Osh
|
83
|
+
- source: Осоавиахим
|
84
|
+
expected: Osoaviakhim
|
85
|
+
- source: Озеро Афлатук
|
86
|
+
expected: Ozero Aflatuk
|
87
|
+
- source: Нарын Облусу
|
88
|
+
expected: Naryn Oblusu
|
89
|
+
- source: Нарын
|
90
|
+
expected: Naryn
|
91
|
+
- source: Метеорологическая Станция Чамкал
|
92
|
+
expected: Meteorologicheskaya Stantsiya Chamkal
|
93
|
+
- source: Марза-Булак
|
94
|
+
expected: Marza-Bulak
|
95
|
+
- source: Макмал
|
96
|
+
expected: Makmal
|
97
|
+
- source: Кыргызстан
|
98
|
+
expected: Kyrgyzstan
|
99
|
+
- source: Кыргыз Республикасы
|
100
|
+
expected: Kyrgyz Respublikasy
|
101
|
+
- source: Куру-Сай
|
102
|
+
expected: Kuru-Say
|
103
|
+
- source: Куру-Сай
|
104
|
+
expected: Kuru-Say
|
105
|
+
- source: Кур-Пырылды
|
106
|
+
expected: Kur-Pyryldy
|
107
|
+
- source: Кок-Бель-Таш
|
108
|
+
expected: Kok-Bel’-Tash
|
109
|
+
- source: Кичи-Сандык
|
110
|
+
expected: Kichi-Sandyk
|
111
|
+
- source: Кель-Сай
|
112
|
+
expected: Kel’-Say
|
113
|
+
- source: Карагайлы
|
114
|
+
expected: Karagayly
|
115
|
+
- source: Кара-Суу
|
116
|
+
expected: Kara-Suu
|
117
|
+
- source: Жалал-Абад Облусу
|
118
|
+
expected: Jalal-Abad Oblusu
|
119
|
+
- source: Жалал-Абад
|
120
|
+
expected: Jalal-Abad
|
121
|
+
- source: Долина Беш-Башат
|
122
|
+
expected: Dolina Besh-Bashat
|
123
|
+
- source: Гора Арпа-Турча
|
124
|
+
expected: Gora Arpa-Turcha
|
125
|
+
- source: Бишкек Шаары
|
126
|
+
expected: Bishkek Shaary
|
127
|
+
- source: Бишкек
|
128
|
+
expected: Bishkek
|
129
|
+
- source: Бишкек
|
130
|
+
expected: Bishkek
|
131
|
+
- source: Баткен Облусу
|
132
|
+
expected: Batken Oblusu
|
133
|
+
- source: Баткен
|
134
|
+
expected: Batken
|
135
|
+
- source: Аяк-Терек
|
136
|
+
expected: Ayak-Terek
|
137
|
+
- source: Аюу-Чача
|
138
|
+
expected: Ayuu-Chacha
|
139
|
+
- source: Арпа
|
140
|
+
expected: Arpa
|
141
|
+
- source: Ак-Суу
|
142
|
+
expected: Ak-Suu
|
143
|
+
|
144
|
+
map:
|
145
|
+
characters:
|
146
|
+
'\u0410': 'A' # А
|
147
|
+
'\u0411': 'B' # Б
|
148
|
+
'\u0412': 'V' # В
|
149
|
+
'\u0413': 'G' # Г
|
150
|
+
'\u0414': 'D' # Д
|
151
|
+
'\u0415': 'E' # Е
|
152
|
+
'\u0401': 'Yo' # Ё
|
153
|
+
'\u0416': 'J' # Ж
|
154
|
+
'\u0417': 'Z' # З
|
155
|
+
'\u0418': 'I' # И
|
156
|
+
'\u0419': 'Y' # Й
|
157
|
+
'\u041A': 'K' # К
|
158
|
+
'\u041B': 'L' # Л
|
159
|
+
'\u041C': 'M' # М
|
160
|
+
'\u041D': 'N' # Н
|
161
|
+
'\u04A2': 'Ng' # Ң
|
162
|
+
'\u041E': 'O' # О
|
163
|
+
'\u04E8': 'Ö' # Ө
|
164
|
+
'\u041F': 'P' # П
|
165
|
+
'\u0420': 'R' # Р
|
166
|
+
'\u0421': 'S' # С
|
167
|
+
'\u0422': 'T' # Т
|
168
|
+
'\u0423': 'U' # У
|
169
|
+
'\u04AE': 'Ü' # Ү
|
170
|
+
'\u0424': 'F' # Ф
|
171
|
+
'\u0425': 'Kh' # Х
|
172
|
+
'\u0426': 'Ts' # Ц
|
173
|
+
'\u0427': 'Ch' # Ч
|
174
|
+
'\u0428': 'Sh' # Ш
|
175
|
+
'\u0429': 'Shch' # Щ
|
176
|
+
'\u042A': '”' # Ъ
|
177
|
+
'\u042B': 'Y' # Ы
|
178
|
+
'\u042C': '’' # Ь
|
179
|
+
'\u042D': 'E' # Э
|
180
|
+
'\u042E': 'Yu' # Ю
|
181
|
+
'\u042F': 'Ya' # Я
|
182
|
+
|
183
|
+
'\u0430': 'a' # а
|
184
|
+
'\u0431': 'b' # б
|
185
|
+
'\u0432': 'v' # в
|
186
|
+
'\u0433': 'g' # г
|
187
|
+
'\u0434': 'd' # д
|
188
|
+
'\u0435': 'e' # e
|
189
|
+
'\u0451': 'yo' # ё
|
190
|
+
'\u0436': 'j' # ж
|
191
|
+
'\u0437': 'z' # з
|
192
|
+
'\u0438': 'i' # и
|
193
|
+
'\u0439': 'y' # й
|
194
|
+
'\u043A': 'k' # к
|
195
|
+
'\u043B': 'l' # л
|
196
|
+
'\u043C': 'm' # м
|
197
|
+
'\u043D': 'n' # н
|
198
|
+
'\u04A3': 'ng' # ң
|
199
|
+
'\u043E': 'o' # о
|
200
|
+
'\u04E9': 'ö' # ө
|
201
|
+
'\u043F': 'p' # п
|
202
|
+
'\u0440': 'r' # р
|
203
|
+
'\u0441': 's' # с
|
204
|
+
'\u0442': 't' # т
|
205
|
+
'\u0443': 'u' # у
|
206
|
+
'\u04AF': 'ü' # ү
|
207
|
+
'\u0444': 'f' # ф
|
208
|
+
'\u0445': 'kh' # х
|
209
|
+
'\u0446': 'ts' # ц
|
210
|
+
'\u0447': 'ch' # ч
|
211
|
+
'\u0448': 'sh' # ш
|
212
|
+
'\u0449': 'shch' # щ
|
213
|
+
'\u044A': '”' # ъ
|
214
|
+
'\u044B': 'y' # ы
|
215
|
+
'\u044C': '’' # ь
|
216
|
+
'\u044D': 'e' # э
|
217
|
+
'\u044E': 'yu' # ю
|
218
|
+
'\u044F': 'ya' # я
|