interscript 0.1.7 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -74,7 +74,7 @@ map:
74
74
  '\u0426': 'Ts'
75
75
  '\u0427': 'Ch'
76
76
  '\u0428': 'Sh'
77
- '\u0429': 'St'
77
+ '\u0429': 'Sht'
78
78
  '\u042a': "U\u0306"
79
79
  '\u042c': "\\'"
80
80
  '\u042e': 'Yu'
@@ -0,0 +1,75 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1988
4
+ language: iso-639-2:div
5
+ source_script: Thaa
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF MALDIVIAN BGN/PCGN 1988 Agreement, with modifications 2009
8
+ alias:
9
+ ogc11122:
10
+ code: div_Thaa2Latn_GMV_1988
11
+ description: Maldivian (Divehi) 1988 system
12
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816778/ROMANIZATION_OF_MALDIVIAN.pdf
13
+ creation_date: 1988
14
+ confirmation_date: 2019
15
+ description: |
16
+ This romanization system supersedes the one that was approved by BGN and PCGN in 1972. An
17
+ official system was submitted to PCGN by the Maldivian government in 1987 and approved by BGN and
18
+ PCGN in 1988. The system presented here reflects the 1988 Agreement with minor modifications introduced by the government of the Maldives in 2009
19
+
20
+ notes:
21
+ - Maldivian is read from right to left.
22
+ - The symbol ◌ appearing in the Vowel Characters table represents any Maldivian consonant character.
23
+ - The character އ is not romanized. If it bears a vowel character, that vowel character alone is romanized (e.g. އެނބޫދޫ En’boodhoo).
24
+ - |
25
+ When characters ށ and އ appear in combination with a supercircle (the ‘sukun’, which usually marks
26
+ the absence of a vowel, see the Diacritical Mark table): ށ ; އ , these characters are not romanized
27
+ but the following consonant is doubled (e.g. ކަޅުހުރާ Kalhehuttaa), unless the following consonant is a digraph in the
28
+ romanized form, in which case they are romanized h (e.g. ކެރެށްދޫ Kerehdhoo). ށ and އ appearing at the end of a word are romanized h (e.g. ވޭވައް Veyvah).
29
+ - Noonu (ނ) is romanized n’ when appearing without any vowel or auxiliary sign (e.g. ކަނޑުފުށި Kan’dufushi)
30
+ - Thaa (ތ) is romanized iy when appearing in combination with a supercircle (ތ) ,( e.g. ޒިޔާރަތްފުށި Ziyaaraiyfushi).
31
+ - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and
32
+ lowercase Roman letters as appropriate should be used.
33
+
34
+
35
+ tests:
36
+ - source: "އިރުގައި"
37
+ expected: "irugai"
38
+ - source: "ޒިޔާރަތްފުށި"
39
+ expected: "ziyaaraiyfushi"
40
+ - source: "ރައްކާތެރިކުރުމާއި"
41
+ expected: "rakkaatherikurumaai"
42
+ - source: "ޝަހީދުންގެ ދުވަސް"
43
+ expected: "sh’aheedhun’ge dhuvas"
44
+ - source: "މަރުޙަބާ"
45
+ expected: "maruh’abaa"
46
+ - source: "ކިހިނެހް"
47
+ expected: "kihin’eh"
48
+ - source: "ކޮން ނަމެއް ކިޔަނީ"
49
+ expected: "kon’ n’ameh kiyan’ee"
50
+ - source: "ބައްއަޖޖެވުރި ހެނދުނެހް"
51
+ expected: "baajjevuri hen’dhun’eh"
52
+ - source: "މެނދުރެހް"
53
+ expected: "men’dhureh"
54
+ - source: "ހަވީރެހް"
55
+ expected: "haveereh"
56
+
57
+ map:
58
+ inherit: mv-div-Thaa-Latn-1987
59
+ characters:
60
+ #Consonants with diacritical marks (used mainly in words of Arabic origin, corresponding
61
+ # Arabic characters and their romanizations are shown in parentheses)
62
+ "ޘ": "th’"
63
+ "ޙ": "h’"
64
+ "ޚ": "kh"
65
+ "ޛ": "dh’"
66
+ "ޜ": "x"
67
+ "ޝ": "sh’"
68
+ "ޞ": "s’"
69
+ "ޟ": "l’"
70
+ "ޠ": "t’"
71
+ "ޡ": "z’"
72
+ "ޢ": "’"
73
+ "ޣ": "gh"
74
+ "ޤ": "q"
75
+ "ޥ": "w"
@@ -0,0 +1,28 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1964
4
+ language: iso-639-2:far
5
+ source_script: Latn
6
+ destination_script: Latn
7
+ name: https://github.com/interscript/interscript/files/5180777/BGN_Romanization_Guide_1964_faeroese.pdf
8
+ creation_date: 1964
9
+ description: |
10
+ The Faeroese language is a dialect of Icelandic and, like Icelandic, employes the letter eth (Ð ð).
11
+ Unlike Icelandic, however, the Faeroese ð has the sound of y (as in "yes") before i, v before u, and is silent in all other cases.
12
+ The Icelandic letter thorn (Þ þ) does not occur in the writting of Faeroese.
13
+
14
+ To avoid the use of the unfamiliar symbol ð, the Board transliterates it as dh, as in Icelandic,
15
+ even though its pronunciation in the two languages is not the same.
16
+
17
+ tests:
18
+ - source: Fyrirgefðu
19
+ expected: Fyrirgefdhu
20
+ - source: Þakka
21
+ expected: Þakka
22
+
23
+ map:
24
+ inherit: bgnpcgn-isl-Latn-Latn-1964
25
+
26
+ characters:
27
+ "\u00DE": ~ # Þ translitarion removed
28
+ "\u00FE": ~ # þ translitarion removed
@@ -0,0 +1,37 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1964
4
+ language: iso-639-2:isl
5
+ source_script: Latn
6
+ destination_script: Latn
7
+ name: TRANSLITERATION OF ICELANDIC BGN/PCGN 1947 System
8
+ url: https://github.com/interscript/interscript/files/5180785/BGN_Romanization_Guide_1964_icelandic_1947.pdf
9
+ creation_date: 1964
10
+ description: |
11
+ The BGN and the PCGn in 1947 jointly agreed to the transliteration of two letters of the Icelandic alphabet which,
12
+ although used in writing Old English, have disappeared from the modern English alphabet.
13
+
14
+ The transliterated letters are the edh (Ð ð) and the thorn (Þ þ), pronounved as th in "thus" and th in "think," respectively.
15
+
16
+ It was felt that it was better to transliterate these letters into familiar symbols than preserve such unfamiliar letters in the nomenclature.
17
+
18
+ notes:
19
+ - More about "edh" letter - https://en.wikipedia.org/wiki/Eth
20
+ - More about "thorn" letter - https://en.wikipedia.org/wiki/Thorn_(letter)
21
+
22
+ tests:
23
+ - source: Fyrirgefðu
24
+ expected: Fyrirgefdhu
25
+ - source: þu ert velkominn
26
+ expected: thu ert velkominn
27
+ - source: GOÐAN DAGINN
28
+ expected: GODHAN DAGINN
29
+ - source: Þakka
30
+ expected: Thakka
31
+
32
+ map:
33
+ characters:
34
+ "\u00D0": "Dh" # Ð
35
+ "\u00F0": "dh" # ð
36
+ "\u00DE": "Th" # Þ
37
+ "\u00FE": "th" # þ
@@ -0,0 +1,247 @@
1
+
2
+ ---
3
+ authority_id: bgnpcgn
4
+ id: 1979
5
+ language: iso-639-2:kaz
6
+ source_script: Cyrl
7
+ destination_script: Latn
8
+ name: Romanization of Kazakh
9
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811511/ROMANIZATION_OF_KAZAKH.pdf
10
+ creation_date: 1979
11
+ confirmation_date: 2019
12
+ description: |
13
+ The BGN/PCGN system for Kazakh was designed for use in romanizing names written in the Kazakh Cyrillic alphabet.
14
+ The Kazakh Cyrillic alphabet contains nine characters not present in the Russian alphabet: ә, ғ, қ, ң, ө, ұ, ү, һ and і.
15
+
16
+ notes:
17
+ - The character sequences гһ, зһ, кһ, нг, сһ and цһ may be romanized g·h, z·h, k·h, n·g, s·h
18
+ and ts·h in order to differentiate those romanizations from from the digraphs gh, zh, kh, ng, sh,
19
+ and the letter sequence tsh, which are used to render the characters ғ, ж, х, ң, ш, and the character sequence тш.
20
+ - The character ы may be romanized i̵ (Unicode encoding 0069+0335) instead of у, if so desired.
21
+ - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character.
22
+ - The Kazakh government has adopted a programme to move to using the Roman-script as the principal writing system for Kazakh.
23
+ - 'An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
24
+ unmodified letters of the basic Roman script is:
25
+ Ä (U+00C4) ӓ (U+00E4)
26
+ Ī (U+012A) ī (U+012B)
27
+ Ö (U+00D6) ö (U+00F6)
28
+ Ū (U+016A) ū (U+016B)
29
+ Ü (U+00DC) ü (U+00FC)
30
+ ” (U+201D) ’ (U+2019)
31
+ Ė (U+0116) ė (U+0117)'
32
+ - The Romanization column shows only lowercase forms but, when romanizing, uppercase and
33
+ lowercase Roman letters as appropriate should be used.
34
+
35
+ tests:
36
+ - source: Өңірек
37
+ expected: Öngirek
38
+ - source: Өтебас Артезиан Құдығы
39
+ expected: Ötebas Artezīan Qudyghy
40
+ - source: Өскенбай
41
+ expected: Öskenbay
42
+ - source: Өсек Көлі
43
+ expected: Ösek Köli
44
+ - source: Өрмексу
45
+ expected: Örmeksū
46
+ - source: Өмірзақ
47
+ expected: Ömirzaq
48
+ - source: Өлеңті
49
+ expected: Ölengti
50
+ - source: Өл-Фараби Даңғылы
51
+ expected: Öl-Farabī Dangghyly
52
+ - source: Өкпекті Тауы
53
+ expected: Ökpekti Taūy
54
+ - source: Өкенсоркен Қыстауы
55
+ expected: Ökensorken Qystaūy
56
+ - source: Өзен Ойысы
57
+ expected: Özen Oyysy
58
+ - source: Өзен
59
+ expected: Özen
60
+ - source: Өгізтөбе Тауы
61
+ expected: Ögiztöbe Taūy
62
+ - source: Өгізтау Қыстауы
63
+ expected: Ögiztaū Qystaūy
64
+ - source: Өгізмүйіз Тауы
65
+ expected: Ögizmüyiz Taūy
66
+ - source: Өгізбұлақ
67
+ expected: Ögizbulaq
68
+ - source: Өгіз Үреулі
69
+ expected: Ögiz Üreūli
70
+ - source: Өгем Жотасы
71
+ expected: Ögem Zhotasy
72
+ - source: Өгем
73
+ expected: Ögem
74
+ - source: Әшім
75
+ expected: Äshim
76
+ - source: Әулиетөбе Тауы
77
+ expected: Äūlīetöbe Taūy
78
+ - source: Әулиекөл
79
+ expected: Äūlīeköl
80
+ - source: Әндіжан Құдығы
81
+ expected: Ändizhan Qudyghy
82
+ - source: Ұясай
83
+ expected: Uyasay
84
+ - source: Ұялы Метеорологиялық Станциясы
85
+ expected: Uyaly Meteorologīyalyq Stantsīyasy
86
+ - source: Ұшқын Қыстауы
87
+ expected: Ushqyn Qystaūy
88
+ - source: Үңгіртас
89
+ expected: Ünggirtas
90
+ - source: Үшқұлын
91
+ expected: Üshqulyn
92
+ - source: Құтырғы Асуы
93
+ expected: Qutyrghy Asūy
94
+ - source: Ярмы Стансасы
95
+ expected: Yarmy Stansasy
96
+ - source: Юпитер Қыстауы
97
+ expected: Yupīter Qystaūy
98
+ - source: Энгельс Көшесi
99
+ expected: Ėngel’s Köshesi
100
+ - source: Ырғызбай Жайлауы
101
+ expected: Yrghyzbay Zhaylaūy
102
+ - source: Щебнюха Тауы
103
+ expected: Shchebnyukha Taūy
104
+ - source: Шөміштікөл Соры
105
+ expected: Shömishtiköl Sory
106
+ - source: Чалов Барак Қыстауы
107
+ expected: Chalov Barak Qystaūy
108
+ - source: Чайкино
109
+ expected: Chaykīno
110
+ - source: Цуриковка
111
+ expected: Tsūrīkovka
112
+ - source: Хамитқора Қыстауы
113
+ expected: Khamītqora Qystaūy
114
+ - source: Фыкалка
115
+ expected: Fykalka
116
+ - source: Уақбай Қыстауы
117
+ expected: Ūaqbay Qystaūy
118
+ - source: Төңірекшың Тоғайы
119
+ expected: Töngirekshyng Toghayy
120
+ - source: Сабағали Қыстауы
121
+ expected: Sabaghalī Qystaūy
122
+ - source: Рысқұлов Даңғылы
123
+ expected: Rysqulov Dangghyly
124
+ - source: Пірназар Құдығы
125
+ expected: Pirnazar Qudyghy
126
+ - source: Оңтүстік Қазақстан Облысы
127
+ expected: Ongtüstik Qazaqstan Oblysy
128
+ - source: Нөмір Үшінші Суторабының Бөгені
129
+ expected: Nömir Üshinshi Sūtorabynyng Bögeni
130
+ - source: Мәмбетқазған Құдығы
131
+ expected: Mämbetqazghan Qudyghy
132
+ - source: Мемлекеттік Аудандық Электр Стансасы - Бір
133
+ expected: Memlekettik Aūdandyq Ėlektr Stansasy - Bir
134
+ - source: Линейский Белок Тауы
135
+ expected: Līneyskīy Belok Taūy
136
+ - source: Көшердік Бөгені
137
+ expected: Kösherdik Bögeni
138
+ - source: Көлфонтан Артезиан Құдығы
139
+ expected: Kölfontan Artezīan Qudyghy
140
+ - source: Изендіарал Мүйісі
141
+ expected: Īzendiaral Müyisi
142
+ - source: Злиха Метеорологиялық Станциасы
143
+ expected: Zlīkha Meteorologīyalyq Stantsīasy
144
+ - source: Жұлжұрған Көлі
145
+ expected: Zhulzhurghan Köli
146
+ - source: Ескі Үшал Қыстауы
147
+ expected: Eski Üshal Qystaūy
148
+ - source: Дөңгелексор Қыстауы
149
+ expected: Dönggeleksor Qystaūy
150
+ - source: Горько-Солёное Көлі
151
+ expected: Gor’ko-Solyonoe Köli
152
+ - source: Вагулино
153
+ expected: Vagūlīno
154
+ - source: Бөстай Учаскесі
155
+ expected: Böstay Ūchaskesi
156
+ - source: Аққолқы Тоғайы
157
+ expected: Aqqolqy Toghayy
158
+ - source: Іңқардария
159
+ expected: Ingqardarīya
160
+
161
+ map:
162
+ characters:
163
+ '\u0410': 'A' # А
164
+ '\u04D8': 'Ä' # Ә
165
+ '\u0411': 'B' # Б
166
+ '\u0412': 'V' # В
167
+ '\u0413': 'G' # Г
168
+ '\u0492': 'Gh' # Ғ
169
+ '\u0414': 'D' # Д
170
+ '\u0415': 'E' # Е
171
+ '\u0401': 'Yo' # Ё
172
+ '\u0416': 'Zh' # Ж
173
+ '\u0417': 'Z' # З
174
+ '\u0418': 'Ī' # И
175
+ '\u0419': 'Y' # Й
176
+ '\u041A': 'K' # К
177
+ '\u049A': 'Q' # Қ
178
+ '\u041B': 'L' # Л
179
+ '\u041C': 'M' # М
180
+ '\u041D': 'N' # Н
181
+ '\u04A2': 'Ng' # Ң
182
+ '\u041E': 'O' # О
183
+ '\u04E8': 'Ö' # Ө
184
+ '\u041F': 'P' # П
185
+ '\u0420': 'R' # Р
186
+ '\u0421': 'S' # С
187
+ '\u0422': 'T' # Т
188
+ '\u0423': 'Ū' # У
189
+ '\u04B0': 'U' # Ұ
190
+ '\u04AE': 'Ü' # Ү
191
+ '\u0424': 'F' # Ф
192
+ '\u0425': 'Kh' # Х
193
+ '\u04BA': 'H' # Һ
194
+ '\u0426': 'Ts' # Ц
195
+ '\u0427': 'Ch' # Ч
196
+ '\u0428': 'Sh' # Ш
197
+ '\u0429': 'Shch' # Щ
198
+ '\u042A': '”' # Ъ
199
+ '\u042B': 'Y' # Ы
200
+ '\u0406': 'I' # І
201
+ '\u042C': '’' # Ь
202
+ '\u042D': 'Ė' # Э
203
+ '\u042E': 'Yu' # Ю
204
+ '\u042F': 'Ya' # Я
205
+
206
+ '\u0430': 'a' # а
207
+ '\u04D9': 'ä' # ә
208
+ '\u0431': 'b' # б
209
+ '\u0432': 'v' # в
210
+ '\u0433': 'g' # г
211
+ '\u0493': 'gh' # ғ
212
+ '\u0434': 'd' # д
213
+ '\u0435': 'e' # e
214
+ '\u0451': 'yo' # ё
215
+ '\u0436': 'zh' # ж
216
+ '\u0437': 'z' # з
217
+ '\u0438': 'ī' # и
218
+ '\u0439': 'y' # й
219
+ '\u043A': 'k' # к
220
+ '\u049B': 'q' # қ
221
+ '\u043B': 'l' # л
222
+ '\u043C': 'm' # м
223
+ '\u043D': 'n' # н
224
+ '\u04A3': 'ng' # ң
225
+ '\u043E': 'o' # о
226
+ '\u04E9': 'ö' # ө
227
+ '\u043F': 'p' # п
228
+ '\u0440': 'r' # р
229
+ '\u0441': 's' # с
230
+ '\u0442': 't' # т
231
+ '\u0443': 'ū' # у
232
+ '\u04B1': 'u' # ұ
233
+ '\u04AF': 'ü' # ү
234
+ '\u0444': 'f' # ф
235
+ '\u0445': 'kh' # х
236
+ '\u04BB': 'h' # һ
237
+ '\u0446': 'ts' # ц
238
+ '\u0447': 'ch' # ч
239
+ '\u0448': 'sh' # ш
240
+ '\u0449': 'shch' # щ
241
+ '\u044A': '”' # ъ
242
+ '\u044B': 'y' # ы
243
+ '\u0456': 'i' # і
244
+ '\u044C': '’' # ь
245
+ '\u044D': 'ė' # э
246
+ '\u044E': 'yu' # ю
247
+ '\u044F': 'ya' # я
@@ -0,0 +1,218 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1979
4
+ language: iso-639-2:kir
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Romanization of Kyrgyz
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816663/ROMANIZATION_OF_KYRGYZ.pdf
9
+ creation_date: 1979
10
+ confirmation_date: 2019
11
+ description: |
12
+ The BGN/PCGN system for Kyrgyz Cyrillic was designed for use in romanizing names written
13
+ in the Kyrgyz Cyrillic alphabet. The Kyrgyz Cyrillic alphabet contains three characters not present in
14
+ the Russian alphabet: Ң , Ө, and Y.
15
+
16
+ notes:
17
+ - Both Kyrgyz and Kirghiz may frequently be seen as the language name; both these spellings are used in the
18
+ ISO 639 Standard on the representation of names for languages.
19
+ - The character sequence н г may be romanized n·g in order to differentiate that romanization
20
+ from the digraph ng, which is used to render the character ң.
21
+ - The character ы may be romanized i (Unicode encoding 0069+0335) instead of y, if so desired.
22
+ - 'An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
23
+ unmodified letters of the basic Roman script is:
24
+ All apostrophes appearing in romanization are U+2019
25
+ Ö (U+00D6) ö (U+00F6)
26
+ Ü (U+00DC) ü (U+00FC)'
27
+ - The Romanization column shows only lowercase forms but, when romanizing, uppercase and
28
+ lowercase Roman letters as appropriate should be used.
29
+
30
+ tests:
31
+ - source: Ысык-Көл Облусу
32
+ expected: Ysyk-Köl Oblusu
33
+ - source: Ысык-Көл
34
+ expected: Ysyk-Köl
35
+ - source: Шедвик-Сай
36
+ expected: Shedvik-Say
37
+ - source: Чүй Облусу
38
+ expected: Chüy Oblusu
39
+ - source: Чүй
40
+ expected: Chüy
41
+ - source: Чирик-Сай
42
+ expected: Chirik-Say
43
+ - source: Хребет Джети-Сандал
44
+ expected: Khrebet Djeti-Sandal
45
+ - source: Узук-Булак
46
+ expected: Uzuk-Bulak
47
+ - source: Торугарт Ашуу
48
+ expected: Torugart Ashuu
49
+ - source: Торетал
50
+ expected: Toretal
51
+ - source: Терек
52
+ expected: Terek
53
+ - source: Талды-Булак
54
+ expected: Taldy-Bulak
55
+ - source: Талас Облусу
56
+ expected: Talas Oblusu
57
+ - source: Талас
58
+ expected: Talas
59
+ - source: Сарык-Кёль
60
+ expected: Saryk-Kyol’
61
+ - source: Родник Кара-Суу
62
+ expected: Rodnik Kara-Suu
63
+ - source: Родник Бейрёк-Булак
64
+ expected: Rodnik Beyryok-Bulak
65
+ - source: Перевал Сары-Челек
66
+ expected: Pereval Sary-Chelek
67
+ - source: Перевал Макмал
68
+ expected: Pereval Makmal
69
+ - source: Перевал Кара-Токой
70
+ expected: Pereval Kara-Tokoy
71
+ - source: Перевал Ашуу-Тёр
72
+ expected: Pereval Ashuu-Tyor
73
+ - source: Перевал Ашуу
74
+ expected: Pereval Ashuu
75
+ - source: Ош Шаары
76
+ expected: Osh Shaary
77
+ - source: Ош Облусу
78
+ expected: Osh Oblusu
79
+ - source: Ош
80
+ expected: Osh
81
+ - source: Ош
82
+ expected: Osh
83
+ - source: Осоавиахим
84
+ expected: Osoaviakhim
85
+ - source: Озеро Афлатук
86
+ expected: Ozero Aflatuk
87
+ - source: Нарын Облусу
88
+ expected: Naryn Oblusu
89
+ - source: Нарын
90
+ expected: Naryn
91
+ - source: Метеорологическая Станция Чамкал
92
+ expected: Meteorologicheskaya Stantsiya Chamkal
93
+ - source: Марза-Булак
94
+ expected: Marza-Bulak
95
+ - source: Макмал
96
+ expected: Makmal
97
+ - source: Кыргызстан
98
+ expected: Kyrgyzstan
99
+ - source: Кыргыз Республикасы
100
+ expected: Kyrgyz Respublikasy
101
+ - source: Куру-Сай
102
+ expected: Kuru-Say
103
+ - source: Куру-Сай
104
+ expected: Kuru-Say
105
+ - source: Кур-Пырылды
106
+ expected: Kur-Pyryldy
107
+ - source: Кок-Бель-Таш
108
+ expected: Kok-Bel’-Tash
109
+ - source: Кичи-Сандык
110
+ expected: Kichi-Sandyk
111
+ - source: Кель-Сай
112
+ expected: Kel’-Say
113
+ - source: Карагайлы
114
+ expected: Karagayly
115
+ - source: Кара-Суу
116
+ expected: Kara-Suu
117
+ - source: Жалал-Абад Облусу
118
+ expected: Jalal-Abad Oblusu
119
+ - source: Жалал-Абад
120
+ expected: Jalal-Abad
121
+ - source: Долина Беш-Башат
122
+ expected: Dolina Besh-Bashat
123
+ - source: Гора Арпа-Турча
124
+ expected: Gora Arpa-Turcha
125
+ - source: Бишкек Шаары
126
+ expected: Bishkek Shaary
127
+ - source: Бишкек
128
+ expected: Bishkek
129
+ - source: Бишкек
130
+ expected: Bishkek
131
+ - source: Баткен Облусу
132
+ expected: Batken Oblusu
133
+ - source: Баткен
134
+ expected: Batken
135
+ - source: Аяк-Терек
136
+ expected: Ayak-Terek
137
+ - source: Аюу-Чача
138
+ expected: Ayuu-Chacha
139
+ - source: Арпа
140
+ expected: Arpa
141
+ - source: Ак-Суу
142
+ expected: Ak-Suu
143
+
144
+ map:
145
+ characters:
146
+ '\u0410': 'A' # А
147
+ '\u0411': 'B' # Б
148
+ '\u0412': 'V' # В
149
+ '\u0413': 'G' # Г
150
+ '\u0414': 'D' # Д
151
+ '\u0415': 'E' # Е
152
+ '\u0401': 'Yo' # Ё
153
+ '\u0416': 'J' # Ж
154
+ '\u0417': 'Z' # З
155
+ '\u0418': 'I' # И
156
+ '\u0419': 'Y' # Й
157
+ '\u041A': 'K' # К
158
+ '\u041B': 'L' # Л
159
+ '\u041C': 'M' # М
160
+ '\u041D': 'N' # Н
161
+ '\u04A2': 'Ng' # Ң
162
+ '\u041E': 'O' # О
163
+ '\u04E8': 'Ö' # Ө
164
+ '\u041F': 'P' # П
165
+ '\u0420': 'R' # Р
166
+ '\u0421': 'S' # С
167
+ '\u0422': 'T' # Т
168
+ '\u0423': 'U' # У
169
+ '\u04AE': 'Ü' # Ү
170
+ '\u0424': 'F' # Ф
171
+ '\u0425': 'Kh' # Х
172
+ '\u0426': 'Ts' # Ц
173
+ '\u0427': 'Ch' # Ч
174
+ '\u0428': 'Sh' # Ш
175
+ '\u0429': 'Shch' # Щ
176
+ '\u042A': '”' # Ъ
177
+ '\u042B': 'Y' # Ы
178
+ '\u042C': '’' # Ь
179
+ '\u042D': 'E' # Э
180
+ '\u042E': 'Yu' # Ю
181
+ '\u042F': 'Ya' # Я
182
+
183
+ '\u0430': 'a' # а
184
+ '\u0431': 'b' # б
185
+ '\u0432': 'v' # в
186
+ '\u0433': 'g' # г
187
+ '\u0434': 'd' # д
188
+ '\u0435': 'e' # e
189
+ '\u0451': 'yo' # ё
190
+ '\u0436': 'j' # ж
191
+ '\u0437': 'z' # з
192
+ '\u0438': 'i' # и
193
+ '\u0439': 'y' # й
194
+ '\u043A': 'k' # к
195
+ '\u043B': 'l' # л
196
+ '\u043C': 'm' # м
197
+ '\u043D': 'n' # н
198
+ '\u04A3': 'ng' # ң
199
+ '\u043E': 'o' # о
200
+ '\u04E9': 'ö' # ө
201
+ '\u043F': 'p' # п
202
+ '\u0440': 'r' # р
203
+ '\u0441': 's' # с
204
+ '\u0442': 't' # т
205
+ '\u0443': 'u' # у
206
+ '\u04AF': 'ü' # ү
207
+ '\u0444': 'f' # ф
208
+ '\u0445': 'kh' # х
209
+ '\u0446': 'ts' # ц
210
+ '\u0447': 'ch' # ч
211
+ '\u0448': 'sh' # ш
212
+ '\u0449': 'shch' # щ
213
+ '\u044A': '”' # ъ
214
+ '\u044B': 'y' # ы
215
+ '\u044C': '’' # ь
216
+ '\u044D': 'e' # э
217
+ '\u044E': 'yu' # ю
218
+ '\u044F': 'ya' # я