interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -74,7 +74,7 @@ map:
74
74
  '\u0426': 'Ts'
75
75
  '\u0427': 'Ch'
76
76
  '\u0428': 'Sh'
77
- '\u0429': 'St'
77
+ '\u0429': 'Sht'
78
78
  '\u042a': "U\u0306"
79
79
  '\u042c': "\\'"
80
80
  '\u042e': 'Yu'
@@ -0,0 +1,75 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1988
4
+ language: iso-639-2:div
5
+ source_script: Thaa
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF MALDIVIAN BGN/PCGN 1988 Agreement, with modifications 2009
8
+ alias:
9
+ ogc11122:
10
+ code: div_Thaa2Latn_GMV_1988
11
+ description: Maldivian (Divehi) 1988 system
12
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816778/ROMANIZATION_OF_MALDIVIAN.pdf
13
+ creation_date: 1988
14
+ confirmation_date: 2019
15
+ description: |
16
+ This romanization system supersedes the one that was approved by BGN and PCGN in 1972. An
17
+ official system was submitted to PCGN by the Maldivian government in 1987 and approved by BGN and
18
+ PCGN in 1988. The system presented here reflects the 1988 Agreement with minor modifications introduced by the government of the Maldives in 2009
19
+
20
+ notes:
21
+ - Maldivian is read from right to left.
22
+ - The symbol ◌ appearing in the Vowel Characters table represents any Maldivian consonant character.
23
+ - The character އ is not romanized. If it bears a vowel character, that vowel character alone is romanized (e.g. އެނބޫދޫ En’boodhoo).
24
+ - |
25
+ When characters ށ and އ appear in combination with a supercircle (the ‘sukun’, which usually marks
26
+ the absence of a vowel, see the Diacritical Mark table): ށ ; އ , these characters are not romanized
27
+ but the following consonant is doubled (e.g. ކަޅުހުރާ Kalhehuttaa), unless the following consonant is a digraph in the
28
+ romanized form, in which case they are romanized h (e.g. ކެރެށްދޫ Kerehdhoo). ށ and އ appearing at the end of a word are romanized h (e.g. ވޭވައް Veyvah).
29
+ - Noonu (ނ) is romanized n’ when appearing without any vowel or auxiliary sign (e.g. ކަނޑުފުށި Kan’dufushi)
30
+ - Thaa (ތ) is romanized iy when appearing in combination with a supercircle (ތ) ,( e.g. ޒިޔާރަތްފުށި Ziyaaraiyfushi).
31
+ - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and
32
+ lowercase Roman letters as appropriate should be used.
33
+
34
+
35
+ tests:
36
+ - source: "އިރުގައި"
37
+ expected: "irugai"
38
+ - source: "ޒިޔާރަތްފުށި"
39
+ expected: "ziyaaraiyfushi"
40
+ - source: "ރައްކާތެރިކުރުމާއި"
41
+ expected: "rakkaatherikurumaai"
42
+ - source: "ޝަހީދުންގެ ދުވަސް"
43
+ expected: "sh’aheedhun’ge dhuvas"
44
+ - source: "މަރުޙަބާ"
45
+ expected: "maruh’abaa"
46
+ - source: "ކިހިނެހް"
47
+ expected: "kihin’eh"
48
+ - source: "ކޮން ނަމެއް ކިޔަނީ"
49
+ expected: "kon’ n’ameh kiyan’ee"
50
+ - source: "ބައްއަޖޖެވުރި ހެނދުނެހް"
51
+ expected: "baajjevuri hen’dhun’eh"
52
+ - source: "މެނދުރެހް"
53
+ expected: "men’dhureh"
54
+ - source: "ހަވީރެހް"
55
+ expected: "haveereh"
56
+
57
+ map:
58
+ inherit: mv-div-Thaa-Latn-1987
59
+ characters:
60
+ #Consonants with diacritical marks (used mainly in words of Arabic origin, corresponding
61
+ # Arabic characters and their romanizations are shown in parentheses)
62
+ "ޘ": "th’"
63
+ "ޙ": "h’"
64
+ "ޚ": "kh"
65
+ "ޛ": "dh’"
66
+ "ޜ": "x"
67
+ "ޝ": "sh’"
68
+ "ޞ": "s’"
69
+ "ޟ": "l’"
70
+ "ޠ": "t’"
71
+ "ޡ": "z’"
72
+ "ޢ": "’"
73
+ "ޣ": "gh"
74
+ "ޤ": "q"
75
+ "ޥ": "w"
@@ -0,0 +1,28 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1964
4
+ language: iso-639-2:far
5
+ source_script: Latn
6
+ destination_script: Latn
7
+ name: https://github.com/interscript/interscript/files/5180777/BGN_Romanization_Guide_1964_faeroese.pdf
8
+ creation_date: 1964
9
+ description: |
10
+ The Faeroese language is a dialect of Icelandic and, like Icelandic, employes the letter eth (Ð ð).
11
+ Unlike Icelandic, however, the Faeroese ð has the sound of y (as in "yes") before i, v before u, and is silent in all other cases.
12
+ The Icelandic letter thorn (Þ þ) does not occur in the writting of Faeroese.
13
+
14
+ To avoid the use of the unfamiliar symbol ð, the Board transliterates it as dh, as in Icelandic,
15
+ even though its pronunciation in the two languages is not the same.
16
+
17
+ tests:
18
+ - source: Fyrirgefðu
19
+ expected: Fyrirgefdhu
20
+ - source: Þakka
21
+ expected: Þakka
22
+
23
+ map:
24
+ inherit: bgnpcgn-isl-Latn-Latn-1964
25
+
26
+ characters:
27
+ "\u00DE": ~ # Þ translitarion removed
28
+ "\u00FE": ~ # þ translitarion removed
@@ -0,0 +1,37 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1964
4
+ language: iso-639-2:isl
5
+ source_script: Latn
6
+ destination_script: Latn
7
+ name: TRANSLITERATION OF ICELANDIC BGN/PCGN 1947 System
8
+ url: https://github.com/interscript/interscript/files/5180785/BGN_Romanization_Guide_1964_icelandic_1947.pdf
9
+ creation_date: 1964
10
+ description: |
11
+ The BGN and the PCGn in 1947 jointly agreed to the transliteration of two letters of the Icelandic alphabet which,
12
+ although used in writing Old English, have disappeared from the modern English alphabet.
13
+
14
+ The transliterated letters are the edh (Ð ð) and the thorn (Þ þ), pronounved as th in "thus" and th in "think," respectively.
15
+
16
+ It was felt that it was better to transliterate these letters into familiar symbols than preserve such unfamiliar letters in the nomenclature.
17
+
18
+ notes:
19
+ - More about "edh" letter - https://en.wikipedia.org/wiki/Eth
20
+ - More about "thorn" letter - https://en.wikipedia.org/wiki/Thorn_(letter)
21
+
22
+ tests:
23
+ - source: Fyrirgefðu
24
+ expected: Fyrirgefdhu
25
+ - source: þu ert velkominn
26
+ expected: thu ert velkominn
27
+ - source: GOÐAN DAGINN
28
+ expected: GODHAN DAGINN
29
+ - source: Þakka
30
+ expected: Thakka
31
+
32
+ map:
33
+ characters:
34
+ "\u00D0": "Dh" # Ð
35
+ "\u00F0": "dh" # ð
36
+ "\u00DE": "Th" # Þ
37
+ "\u00FE": "th" # þ
@@ -0,0 +1,247 @@
1
+
2
+ ---
3
+ authority_id: bgnpcgn
4
+ id: 1979
5
+ language: iso-639-2:kaz
6
+ source_script: Cyrl
7
+ destination_script: Latn
8
+ name: Romanization of Kazakh
9
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811511/ROMANIZATION_OF_KAZAKH.pdf
10
+ creation_date: 1979
11
+ confirmation_date: 2019
12
+ description: |
13
+ The BGN/PCGN system for Kazakh was designed for use in romanizing names written in the Kazakh Cyrillic alphabet.
14
+ The Kazakh Cyrillic alphabet contains nine characters not present in the Russian alphabet: ә, ғ, қ, ң, ө, ұ, ү, һ and і.
15
+
16
+ notes:
17
+ - The character sequences гһ, зһ, кһ, нг, сһ and цһ may be romanized g·h, z·h, k·h, n·g, s·h
18
+ and ts·h in order to differentiate those romanizations from from the digraphs gh, zh, kh, ng, sh,
19
+ and the letter sequence tsh, which are used to render the characters ғ, ж, х, ң, ш, and the character sequence тш.
20
+ - The character ы may be romanized i̵ (Unicode encoding 0069+0335) instead of у, if so desired.
21
+ - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character.
22
+ - The Kazakh government has adopted a programme to move to using the Roman-script as the principal writing system for Kazakh.
23
+ - 'An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
24
+ unmodified letters of the basic Roman script is:
25
+ Ä (U+00C4) ӓ (U+00E4)
26
+ Ī (U+012A) ī (U+012B)
27
+ Ö (U+00D6) ö (U+00F6)
28
+ Ū (U+016A) ū (U+016B)
29
+ Ü (U+00DC) ü (U+00FC)
30
+ ” (U+201D) ’ (U+2019)
31
+ Ė (U+0116) ė (U+0117)'
32
+ - The Romanization column shows only lowercase forms but, when romanizing, uppercase and
33
+ lowercase Roman letters as appropriate should be used.
34
+
35
+ tests:
36
+ - source: Өңірек
37
+ expected: Öngirek
38
+ - source: Өтебас Артезиан Құдығы
39
+ expected: Ötebas Artezīan Qudyghy
40
+ - source: Өскенбай
41
+ expected: Öskenbay
42
+ - source: Өсек Көлі
43
+ expected: Ösek Köli
44
+ - source: Өрмексу
45
+ expected: Örmeksū
46
+ - source: Өмірзақ
47
+ expected: Ömirzaq
48
+ - source: Өлеңті
49
+ expected: Ölengti
50
+ - source: Өл-Фараби Даңғылы
51
+ expected: Öl-Farabī Dangghyly
52
+ - source: Өкпекті Тауы
53
+ expected: Ökpekti Taūy
54
+ - source: Өкенсоркен Қыстауы
55
+ expected: Ökensorken Qystaūy
56
+ - source: Өзен Ойысы
57
+ expected: Özen Oyysy
58
+ - source: Өзен
59
+ expected: Özen
60
+ - source: Өгізтөбе Тауы
61
+ expected: Ögiztöbe Taūy
62
+ - source: Өгізтау Қыстауы
63
+ expected: Ögiztaū Qystaūy
64
+ - source: Өгізмүйіз Тауы
65
+ expected: Ögizmüyiz Taūy
66
+ - source: Өгізбұлақ
67
+ expected: Ögizbulaq
68
+ - source: Өгіз Үреулі
69
+ expected: Ögiz Üreūli
70
+ - source: Өгем Жотасы
71
+ expected: Ögem Zhotasy
72
+ - source: Өгем
73
+ expected: Ögem
74
+ - source: Әшім
75
+ expected: Äshim
76
+ - source: Әулиетөбе Тауы
77
+ expected: Äūlīetöbe Taūy
78
+ - source: Әулиекөл
79
+ expected: Äūlīeköl
80
+ - source: Әндіжан Құдығы
81
+ expected: Ändizhan Qudyghy
82
+ - source: Ұясай
83
+ expected: Uyasay
84
+ - source: Ұялы Метеорологиялық Станциясы
85
+ expected: Uyaly Meteorologīyalyq Stantsīyasy
86
+ - source: Ұшқын Қыстауы
87
+ expected: Ushqyn Qystaūy
88
+ - source: Үңгіртас
89
+ expected: Ünggirtas
90
+ - source: Үшқұлын
91
+ expected: Üshqulyn
92
+ - source: Құтырғы Асуы
93
+ expected: Qutyrghy Asūy
94
+ - source: Ярмы Стансасы
95
+ expected: Yarmy Stansasy
96
+ - source: Юпитер Қыстауы
97
+ expected: Yupīter Qystaūy
98
+ - source: Энгельс Көшесi
99
+ expected: Ėngel’s Köshesi
100
+ - source: Ырғызбай Жайлауы
101
+ expected: Yrghyzbay Zhaylaūy
102
+ - source: Щебнюха Тауы
103
+ expected: Shchebnyukha Taūy
104
+ - source: Шөміштікөл Соры
105
+ expected: Shömishtiköl Sory
106
+ - source: Чалов Барак Қыстауы
107
+ expected: Chalov Barak Qystaūy
108
+ - source: Чайкино
109
+ expected: Chaykīno
110
+ - source: Цуриковка
111
+ expected: Tsūrīkovka
112
+ - source: Хамитқора Қыстауы
113
+ expected: Khamītqora Qystaūy
114
+ - source: Фыкалка
115
+ expected: Fykalka
116
+ - source: Уақбай Қыстауы
117
+ expected: Ūaqbay Qystaūy
118
+ - source: Төңірекшың Тоғайы
119
+ expected: Töngirekshyng Toghayy
120
+ - source: Сабағали Қыстауы
121
+ expected: Sabaghalī Qystaūy
122
+ - source: Рысқұлов Даңғылы
123
+ expected: Rysqulov Dangghyly
124
+ - source: Пірназар Құдығы
125
+ expected: Pirnazar Qudyghy
126
+ - source: Оңтүстік Қазақстан Облысы
127
+ expected: Ongtüstik Qazaqstan Oblysy
128
+ - source: Нөмір Үшінші Суторабының Бөгені
129
+ expected: Nömir Üshinshi Sūtorabynyng Bögeni
130
+ - source: Мәмбетқазған Құдығы
131
+ expected: Mämbetqazghan Qudyghy
132
+ - source: Мемлекеттік Аудандық Электр Стансасы - Бір
133
+ expected: Memlekettik Aūdandyq Ėlektr Stansasy - Bir
134
+ - source: Линейский Белок Тауы
135
+ expected: Līneyskīy Belok Taūy
136
+ - source: Көшердік Бөгені
137
+ expected: Kösherdik Bögeni
138
+ - source: Көлфонтан Артезиан Құдығы
139
+ expected: Kölfontan Artezīan Qudyghy
140
+ - source: Изендіарал Мүйісі
141
+ expected: Īzendiaral Müyisi
142
+ - source: Злиха Метеорологиялық Станциасы
143
+ expected: Zlīkha Meteorologīyalyq Stantsīasy
144
+ - source: Жұлжұрған Көлі
145
+ expected: Zhulzhurghan Köli
146
+ - source: Ескі Үшал Қыстауы
147
+ expected: Eski Üshal Qystaūy
148
+ - source: Дөңгелексор Қыстауы
149
+ expected: Dönggeleksor Qystaūy
150
+ - source: Горько-Солёное Көлі
151
+ expected: Gor’ko-Solyonoe Köli
152
+ - source: Вагулино
153
+ expected: Vagūlīno
154
+ - source: Бөстай Учаскесі
155
+ expected: Böstay Ūchaskesi
156
+ - source: Аққолқы Тоғайы
157
+ expected: Aqqolqy Toghayy
158
+ - source: Іңқардария
159
+ expected: Ingqardarīya
160
+
161
+ map:
162
+ characters:
163
+ '\u0410': 'A' # А
164
+ '\u04D8': 'Ä' # Ә
165
+ '\u0411': 'B' # Б
166
+ '\u0412': 'V' # В
167
+ '\u0413': 'G' # Г
168
+ '\u0492': 'Gh' # Ғ
169
+ '\u0414': 'D' # Д
170
+ '\u0415': 'E' # Е
171
+ '\u0401': 'Yo' # Ё
172
+ '\u0416': 'Zh' # Ж
173
+ '\u0417': 'Z' # З
174
+ '\u0418': 'Ī' # И
175
+ '\u0419': 'Y' # Й
176
+ '\u041A': 'K' # К
177
+ '\u049A': 'Q' # Қ
178
+ '\u041B': 'L' # Л
179
+ '\u041C': 'M' # М
180
+ '\u041D': 'N' # Н
181
+ '\u04A2': 'Ng' # Ң
182
+ '\u041E': 'O' # О
183
+ '\u04E8': 'Ö' # Ө
184
+ '\u041F': 'P' # П
185
+ '\u0420': 'R' # Р
186
+ '\u0421': 'S' # С
187
+ '\u0422': 'T' # Т
188
+ '\u0423': 'Ū' # У
189
+ '\u04B0': 'U' # Ұ
190
+ '\u04AE': 'Ü' # Ү
191
+ '\u0424': 'F' # Ф
192
+ '\u0425': 'Kh' # Х
193
+ '\u04BA': 'H' # Һ
194
+ '\u0426': 'Ts' # Ц
195
+ '\u0427': 'Ch' # Ч
196
+ '\u0428': 'Sh' # Ш
197
+ '\u0429': 'Shch' # Щ
198
+ '\u042A': '”' # Ъ
199
+ '\u042B': 'Y' # Ы
200
+ '\u0406': 'I' # І
201
+ '\u042C': '’' # Ь
202
+ '\u042D': 'Ė' # Э
203
+ '\u042E': 'Yu' # Ю
204
+ '\u042F': 'Ya' # Я
205
+
206
+ '\u0430': 'a' # а
207
+ '\u04D9': 'ä' # ә
208
+ '\u0431': 'b' # б
209
+ '\u0432': 'v' # в
210
+ '\u0433': 'g' # г
211
+ '\u0493': 'gh' # ғ
212
+ '\u0434': 'd' # д
213
+ '\u0435': 'e' # e
214
+ '\u0451': 'yo' # ё
215
+ '\u0436': 'zh' # ж
216
+ '\u0437': 'z' # з
217
+ '\u0438': 'ī' # и
218
+ '\u0439': 'y' # й
219
+ '\u043A': 'k' # к
220
+ '\u049B': 'q' # қ
221
+ '\u043B': 'l' # л
222
+ '\u043C': 'm' # м
223
+ '\u043D': 'n' # н
224
+ '\u04A3': 'ng' # ң
225
+ '\u043E': 'o' # о
226
+ '\u04E9': 'ö' # ө
227
+ '\u043F': 'p' # п
228
+ '\u0440': 'r' # р
229
+ '\u0441': 's' # с
230
+ '\u0442': 't' # т
231
+ '\u0443': 'ū' # у
232
+ '\u04B1': 'u' # ұ
233
+ '\u04AF': 'ü' # ү
234
+ '\u0444': 'f' # ф
235
+ '\u0445': 'kh' # х
236
+ '\u04BB': 'h' # һ
237
+ '\u0446': 'ts' # ц
238
+ '\u0447': 'ch' # ч
239
+ '\u0448': 'sh' # ш
240
+ '\u0449': 'shch' # щ
241
+ '\u044A': '”' # ъ
242
+ '\u044B': 'y' # ы
243
+ '\u0456': 'i' # і
244
+ '\u044C': '’' # ь
245
+ '\u044D': 'ė' # э
246
+ '\u044E': 'yu' # ю
247
+ '\u044F': 'ya' # я
@@ -0,0 +1,218 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1979
4
+ language: iso-639-2:kir
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Romanization of Kyrgyz
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816663/ROMANIZATION_OF_KYRGYZ.pdf
9
+ creation_date: 1979
10
+ confirmation_date: 2019
11
+ description: |
12
+ The BGN/PCGN system for Kyrgyz Cyrillic was designed for use in romanizing names written
13
+ in the Kyrgyz Cyrillic alphabet. The Kyrgyz Cyrillic alphabet contains three characters not present in
14
+ the Russian alphabet: Ң , Ө, and Y.
15
+
16
+ notes:
17
+ - Both Kyrgyz and Kirghiz may frequently be seen as the language name; both these spellings are used in the
18
+ ISO 639 Standard on the representation of names for languages.
19
+ - The character sequence н г may be romanized n·g in order to differentiate that romanization
20
+ from the digraph ng, which is used to render the character ң.
21
+ - The character ы may be romanized i (Unicode encoding 0069+0335) instead of y, if so desired.
22
+ - 'An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
23
+ unmodified letters of the basic Roman script is:
24
+ All apostrophes appearing in romanization are U+2019
25
+ Ö (U+00D6) ö (U+00F6)
26
+ Ü (U+00DC) ü (U+00FC)'
27
+ - The Romanization column shows only lowercase forms but, when romanizing, uppercase and
28
+ lowercase Roman letters as appropriate should be used.
29
+
30
+ tests:
31
+ - source: Ысык-Көл Облусу
32
+ expected: Ysyk-Köl Oblusu
33
+ - source: Ысык-Көл
34
+ expected: Ysyk-Köl
35
+ - source: Шедвик-Сай
36
+ expected: Shedvik-Say
37
+ - source: Чүй Облусу
38
+ expected: Chüy Oblusu
39
+ - source: Чүй
40
+ expected: Chüy
41
+ - source: Чирик-Сай
42
+ expected: Chirik-Say
43
+ - source: Хребет Джети-Сандал
44
+ expected: Khrebet Djeti-Sandal
45
+ - source: Узук-Булак
46
+ expected: Uzuk-Bulak
47
+ - source: Торугарт Ашуу
48
+ expected: Torugart Ashuu
49
+ - source: Торетал
50
+ expected: Toretal
51
+ - source: Терек
52
+ expected: Terek
53
+ - source: Талды-Булак
54
+ expected: Taldy-Bulak
55
+ - source: Талас Облусу
56
+ expected: Talas Oblusu
57
+ - source: Талас
58
+ expected: Talas
59
+ - source: Сарык-Кёль
60
+ expected: Saryk-Kyol’
61
+ - source: Родник Кара-Суу
62
+ expected: Rodnik Kara-Suu
63
+ - source: Родник Бейрёк-Булак
64
+ expected: Rodnik Beyryok-Bulak
65
+ - source: Перевал Сары-Челек
66
+ expected: Pereval Sary-Chelek
67
+ - source: Перевал Макмал
68
+ expected: Pereval Makmal
69
+ - source: Перевал Кара-Токой
70
+ expected: Pereval Kara-Tokoy
71
+ - source: Перевал Ашуу-Тёр
72
+ expected: Pereval Ashuu-Tyor
73
+ - source: Перевал Ашуу
74
+ expected: Pereval Ashuu
75
+ - source: Ош Шаары
76
+ expected: Osh Shaary
77
+ - source: Ош Облусу
78
+ expected: Osh Oblusu
79
+ - source: Ош
80
+ expected: Osh
81
+ - source: Ош
82
+ expected: Osh
83
+ - source: Осоавиахим
84
+ expected: Osoaviakhim
85
+ - source: Озеро Афлатук
86
+ expected: Ozero Aflatuk
87
+ - source: Нарын Облусу
88
+ expected: Naryn Oblusu
89
+ - source: Нарын
90
+ expected: Naryn
91
+ - source: Метеорологическая Станция Чамкал
92
+ expected: Meteorologicheskaya Stantsiya Chamkal
93
+ - source: Марза-Булак
94
+ expected: Marza-Bulak
95
+ - source: Макмал
96
+ expected: Makmal
97
+ - source: Кыргызстан
98
+ expected: Kyrgyzstan
99
+ - source: Кыргыз Республикасы
100
+ expected: Kyrgyz Respublikasy
101
+ - source: Куру-Сай
102
+ expected: Kuru-Say
103
+ - source: Куру-Сай
104
+ expected: Kuru-Say
105
+ - source: Кур-Пырылды
106
+ expected: Kur-Pyryldy
107
+ - source: Кок-Бель-Таш
108
+ expected: Kok-Bel’-Tash
109
+ - source: Кичи-Сандык
110
+ expected: Kichi-Sandyk
111
+ - source: Кель-Сай
112
+ expected: Kel’-Say
113
+ - source: Карагайлы
114
+ expected: Karagayly
115
+ - source: Кара-Суу
116
+ expected: Kara-Suu
117
+ - source: Жалал-Абад Облусу
118
+ expected: Jalal-Abad Oblusu
119
+ - source: Жалал-Абад
120
+ expected: Jalal-Abad
121
+ - source: Долина Беш-Башат
122
+ expected: Dolina Besh-Bashat
123
+ - source: Гора Арпа-Турча
124
+ expected: Gora Arpa-Turcha
125
+ - source: Бишкек Шаары
126
+ expected: Bishkek Shaary
127
+ - source: Бишкек
128
+ expected: Bishkek
129
+ - source: Бишкек
130
+ expected: Bishkek
131
+ - source: Баткен Облусу
132
+ expected: Batken Oblusu
133
+ - source: Баткен
134
+ expected: Batken
135
+ - source: Аяк-Терек
136
+ expected: Ayak-Terek
137
+ - source: Аюу-Чача
138
+ expected: Ayuu-Chacha
139
+ - source: Арпа
140
+ expected: Arpa
141
+ - source: Ак-Суу
142
+ expected: Ak-Suu
143
+
144
+ map:
145
+ characters:
146
+ '\u0410': 'A' # А
147
+ '\u0411': 'B' # Б
148
+ '\u0412': 'V' # В
149
+ '\u0413': 'G' # Г
150
+ '\u0414': 'D' # Д
151
+ '\u0415': 'E' # Е
152
+ '\u0401': 'Yo' # Ё
153
+ '\u0416': 'J' # Ж
154
+ '\u0417': 'Z' # З
155
+ '\u0418': 'I' # И
156
+ '\u0419': 'Y' # Й
157
+ '\u041A': 'K' # К
158
+ '\u041B': 'L' # Л
159
+ '\u041C': 'M' # М
160
+ '\u041D': 'N' # Н
161
+ '\u04A2': 'Ng' # Ң
162
+ '\u041E': 'O' # О
163
+ '\u04E8': 'Ö' # Ө
164
+ '\u041F': 'P' # П
165
+ '\u0420': 'R' # Р
166
+ '\u0421': 'S' # С
167
+ '\u0422': 'T' # Т
168
+ '\u0423': 'U' # У
169
+ '\u04AE': 'Ü' # Ү
170
+ '\u0424': 'F' # Ф
171
+ '\u0425': 'Kh' # Х
172
+ '\u0426': 'Ts' # Ц
173
+ '\u0427': 'Ch' # Ч
174
+ '\u0428': 'Sh' # Ш
175
+ '\u0429': 'Shch' # Щ
176
+ '\u042A': '”' # Ъ
177
+ '\u042B': 'Y' # Ы
178
+ '\u042C': '’' # Ь
179
+ '\u042D': 'E' # Э
180
+ '\u042E': 'Yu' # Ю
181
+ '\u042F': 'Ya' # Я
182
+
183
+ '\u0430': 'a' # а
184
+ '\u0431': 'b' # б
185
+ '\u0432': 'v' # в
186
+ '\u0433': 'g' # г
187
+ '\u0434': 'd' # д
188
+ '\u0435': 'e' # e
189
+ '\u0451': 'yo' # ё
190
+ '\u0436': 'j' # ж
191
+ '\u0437': 'z' # з
192
+ '\u0438': 'i' # и
193
+ '\u0439': 'y' # й
194
+ '\u043A': 'k' # к
195
+ '\u043B': 'l' # л
196
+ '\u043C': 'm' # м
197
+ '\u043D': 'n' # н
198
+ '\u04A3': 'ng' # ң
199
+ '\u043E': 'o' # о
200
+ '\u04E9': 'ö' # ө
201
+ '\u043F': 'p' # п
202
+ '\u0440': 'r' # р
203
+ '\u0441': 's' # с
204
+ '\u0442': 't' # т
205
+ '\u0443': 'u' # у
206
+ '\u04AF': 'ü' # ү
207
+ '\u0444': 'f' # ф
208
+ '\u0445': 'kh' # х
209
+ '\u0446': 'ts' # ц
210
+ '\u0447': 'ch' # ч
211
+ '\u0448': 'sh' # ш
212
+ '\u0449': 'shch' # щ
213
+ '\u044A': '”' # ъ
214
+ '\u044B': 'y' # ы
215
+ '\u044C': '’' # ь
216
+ '\u044D': 'e' # э
217
+ '\u044E': 'yu' # ю
218
+ '\u044F': 'ya' # я