interscript 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +246 -14
- data/bin/interscript +38 -17
- data/bin/setup +8 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript.rb +140 -16
- data/lib/interscript/command.rb +27 -0
- data/lib/interscript/mapping.rb +125 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
- data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
- data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +20 -5
- data/spec/spec_helper.rb +3 -1
- metadata +149 -24
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
- data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,208 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgna
|
3
|
+
id: 2009
|
4
|
+
language: bul
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: Streamlined System for Romanization of Bulgarian
|
8
|
+
creation_date: 2009
|
9
|
+
adoption_date: 2009-02-27
|
10
|
+
|
11
|
+
tests:
|
12
|
+
- source: нунатак Абрит
|
13
|
+
expected: nunatak Abrit
|
14
|
+
- source: връх Академия
|
15
|
+
expected: vrah Akademiya
|
16
|
+
- source: връх Ами Буе
|
17
|
+
expected: vrah Ami Bue
|
18
|
+
- source: нос Айтос
|
19
|
+
expected: nos Aytos
|
20
|
+
- source: залив Баба Тонка
|
21
|
+
expected: zaliv Baba Tonka
|
22
|
+
- source: Балабански камък
|
23
|
+
expected: Balabanski kamak
|
24
|
+
- source: Бедечки поток
|
25
|
+
expected: Bedechki potok
|
26
|
+
- source: нос Бяга
|
27
|
+
expected: nos Byaga
|
28
|
+
- source: остров Качо
|
29
|
+
expected: ostrov Kacho # wiki ostrov Cacho
|
30
|
+
- source: Чакъров остров
|
31
|
+
expected: Chakarov ostrov
|
32
|
+
- source: връх Дъбник
|
33
|
+
expected: vrah Dabnik
|
34
|
+
- source: залив Десислава
|
35
|
+
expected: zaliv Desislava
|
36
|
+
- source: ледник Джераси
|
37
|
+
expected: lednik Dzherasi
|
38
|
+
- source: Джегова скала
|
39
|
+
expected: Dzhegova skala
|
40
|
+
- source: Нунатак Едуард
|
41
|
+
expected: Nunatak Eduard
|
42
|
+
- source: Елховска седловина
|
43
|
+
expected: Elhovska sedlovina
|
44
|
+
- source: ледник Етър
|
45
|
+
expected: lednik Etar
|
46
|
+
- source: нунатак Филип Тотю
|
47
|
+
expected: nunatak Filip Totyu
|
48
|
+
- source: ледник Габаре
|
49
|
+
expected: lednik Gabare
|
50
|
+
- source: риф Гергини
|
51
|
+
expected: rif Gergini
|
52
|
+
- source: Гяуров връх
|
53
|
+
expected: Gyaurov vrah
|
54
|
+
- source: Гуцалски рид
|
55
|
+
expected: Gutsalski rid # not found
|
56
|
+
- source: Хараламбиев остров
|
57
|
+
expected: Haralambiev ostrov
|
58
|
+
- source: връх Ичера
|
59
|
+
expected: vrah Ichera
|
60
|
+
- source: полуостров Йоан Павел II
|
61
|
+
expected: poluostrov Yoan Pavel II
|
62
|
+
- source: нос Иван Александър
|
63
|
+
expected: nos Ivan Aleksandar
|
64
|
+
- source: нос Иречек
|
65
|
+
expected: nos Irechek
|
66
|
+
- source: нос Кърджали
|
67
|
+
expected: nos Kardzhali
|
68
|
+
- source: седловина Кърнаре
|
69
|
+
expected: sedlovina Karnare
|
70
|
+
- source: нунатак Керсеблепт
|
71
|
+
expected: nunatak Kerseblept
|
72
|
+
- source: Кондофрейски възвишения
|
73
|
+
expected: Kondofreyski vazvisheniya
|
74
|
+
- source: Костинбродски проход
|
75
|
+
expected: Kostinbrodski prohod
|
76
|
+
- source: връх Кожух
|
77
|
+
expected: vrah Kozhuh
|
78
|
+
- source: Кукерски нунатаци
|
79
|
+
expected: Kukerski nunatatsi
|
80
|
+
- source: залив Лазурен бряг
|
81
|
+
expected: zaliv Lazuren bryag
|
82
|
+
- source: връх Лудогорие
|
83
|
+
expected: vrah Ludogorie
|
84
|
+
- source: Лютибродски скали
|
85
|
+
expected: Lyutibrodski skali
|
86
|
+
- source: Масларов нунатак
|
87
|
+
expected: Maslarov nunatak
|
88
|
+
- source: Михневски връх
|
89
|
+
expected: Mihnevski vrah
|
90
|
+
- source: рид Митино
|
91
|
+
expected: rid Mitino
|
92
|
+
- source: езеро Наяда
|
93
|
+
expected: ezero Nayada
|
94
|
+
- source: нос Никюп
|
95
|
+
expected: nos Nikyup
|
96
|
+
- source: рид Оборище
|
97
|
+
expected: rid Oborishte
|
98
|
+
- source: залив Олуша
|
99
|
+
expected: zaliv Olusha
|
100
|
+
- source: Оряховски възвишения
|
101
|
+
expected: Oryahovski vazvisheniya
|
102
|
+
- source: нунатак Памидово
|
103
|
+
expected: nunatak Pamidovo
|
104
|
+
- source: връх Парангалица
|
105
|
+
expected: vrah Parangalitsa
|
106
|
+
- source: Първомайски провлак
|
107
|
+
expected: Parvomayski provlak
|
108
|
+
- source: ледник Патлейна
|
109
|
+
expected: lednik Patleyna
|
110
|
+
- source: полуостров Перник
|
111
|
+
expected: poluostrov Pernik
|
112
|
+
- source: връх Петко Войвода
|
113
|
+
expected: vrah Petko Voyvoda
|
114
|
+
- source: остров Фанагория
|
115
|
+
expected: ostrov Fanagoriya
|
116
|
+
- source: нос Плас
|
117
|
+
expected: nos Plas
|
118
|
+
- source: Пресиянов рид
|
119
|
+
expected: Presiyanov rid
|
120
|
+
- source: връх Принсипе де Астуриас
|
121
|
+
expected: vrah Prinsipe de Asturias # wiki: vrah Príncipe de Asturias
|
122
|
+
- source: нунатак Ръченица
|
123
|
+
expected: nunatak Rachenitsa
|
124
|
+
- source: връх Райна Княгиня
|
125
|
+
expected: vrah Rayna Knyaginya
|
126
|
+
- source: Рид Ръжана
|
127
|
+
expected: Rid Razhana
|
128
|
+
- source: връх Ригс
|
129
|
+
expected: vrah Rigs
|
130
|
+
- source: остров Рогулят
|
131
|
+
expected: ostrov Rogulyat
|
132
|
+
- source: ледник Сабазий
|
133
|
+
expected: lednik Sabaziy
|
134
|
+
- source: ледник Съединение
|
135
|
+
expected: lednik Saedinenie
|
136
|
+
- source: нунатак Сенокос
|
137
|
+
expected: nunatak Senokos
|
138
|
+
- source: Сейдолски камък
|
139
|
+
expected: Seydolski kamak
|
140
|
+
- source: ледник Щерна
|
141
|
+
expected: lednik Shterna
|
142
|
+
- source: връх Шишман
|
143
|
+
expected: vrah Shishman
|
144
|
+
- source: ледник Сигмен
|
145
|
+
expected: lednik Sigmen
|
146
|
+
- source: Седловина Синитово
|
147
|
+
expected: Sedlovina Sinitovo
|
148
|
+
- source: Ледник Скаплизо
|
149
|
+
expected: Lednik Skaplizo
|
150
|
+
- source: залив Слънчев бряг
|
151
|
+
expected: zaliv Slanchev bryag
|
152
|
+
- source: остров Соатрис
|
153
|
+
expected: ostrov Soatris
|
154
|
+
- source: планина Софийски Университет
|
155
|
+
expected: planina Sofiyski Universitet
|
156
|
+
- source: ледник Сребърна
|
157
|
+
expected: lednik Srebarna
|
158
|
+
- source: Средногорски възвишения
|
159
|
+
expected: Srednogorski vazvisheniya
|
160
|
+
- source: Св. Евтимиев камък
|
161
|
+
expected: Sv. Evtimiev kamak
|
162
|
+
- source: база Св. Климент Охридски
|
163
|
+
expected: baza Sv. Kliment Ohridski
|
164
|
+
- source: връх Стъргел
|
165
|
+
expected: vrah Stargel
|
166
|
+
- source: нунатак Сторгозия
|
167
|
+
expected: nunatak Storgoziya # nunatak Storgozia according to wiki
|
168
|
+
- source: нунатак Сурвакари
|
169
|
+
expected: nunatak Survakari
|
170
|
+
- source: ледник Световрачене
|
171
|
+
expected: lednik Svetovrachene
|
172
|
+
- source: остров Теменуга
|
173
|
+
expected: ostrov Temenuga
|
174
|
+
- source: Тракийски възвишения
|
175
|
+
expected: Trakiyski vazvisheniya
|
176
|
+
- source: хълм Цамблак
|
177
|
+
expected: halm Tsamblak
|
178
|
+
- source: ледник Урдовиза
|
179
|
+
expected: lednik Urdoviza
|
180
|
+
- source: остров Вълчедръм
|
181
|
+
expected: ostrov Valchedram
|
182
|
+
- source: острови Вардим
|
183
|
+
expected: ostrovi Vardim
|
184
|
+
- source: Владигеров проток
|
185
|
+
expected: Vladigerov protok
|
186
|
+
- source: ледник Ябланица
|
187
|
+
expected: lednik Yablanitsa
|
188
|
+
- source: залив Ямфорина
|
189
|
+
expected: zaliv Yamforina
|
190
|
+
- source: Йовков нос
|
191
|
+
expected: Yovkov nos
|
192
|
+
- source: рид Заберново
|
193
|
+
expected: rid Zabernovo
|
194
|
+
- source: ледник Збелсурд
|
195
|
+
expected: lednik Zbelsurd
|
196
|
+
- source: Жефарович камък
|
197
|
+
expected: Zhefarovich kamak
|
198
|
+
- source: връх Зиези
|
199
|
+
expected: vrah Ziezi
|
200
|
+
- source: залив Златни пясъци
|
201
|
+
expected: zaliv Zlatni pyasatsi
|
202
|
+
- source: ледник Злокучене
|
203
|
+
expected: lednik Zlokuchene
|
204
|
+
- source: проток Злогош
|
205
|
+
expected: protok Zlogosh
|
206
|
+
|
207
|
+
map:
|
208
|
+
inherit: apcbg-bul-Cyrl-Latn-1995
|
@@ -0,0 +1,104 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1993
|
4
|
+
language: aze
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: AZERBAIJANI TABLE OF CORRESPONDENCES CYRILLIC-ROMAN -- BGN/PCGN 1993 Agreement
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816656/TABLE_OF_CORRESPONDENCES_FOR_AZERBAIJANI.pdf
|
9
|
+
creation_date: 1993
|
10
|
+
confirmation date: 2019-06
|
11
|
+
description: |
|
12
|
+
Azerbaijani, also known as Azeri, is the official language of the Republic of Azerbaijan. In 1991, the Azerbaijani government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet. When Azerbaijani Roman-alphabet spellings are not available, this table can be used to convert Azerbaijani Cyrillic spellings.
|
13
|
+
|
14
|
+
notes:
|
15
|
+
|
16
|
+
- The special letter Ə, ə known as schwa, should be reproduced in that form whenever encountered. The characters Ə (Unicode 04D8) and ə (Unicode 04D9) should be used for schwa when writing in the Cyrillic script, but characters Ə (Unicode 018F) and ə (Unicode 0259) should be used when writing in the Roman alphabet. In those instances when it cannot be reproduced, however, the letter Ä ä may be substituted for it (see below).
|
17
|
+
|
18
|
+
- The obsolete characters й, э, ю, and я should be romanized ẏ, ė, yu., and ya.
|
19
|
+
|
20
|
+
- Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character. It is not known whether there exists an uppercase ‘J’ specific to the Cyrillic character set.
|
21
|
+
|
22
|
+
- |
|
23
|
+
An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
|
24
|
+
Ğ (U+011E), ğ (U+011F)
|
25
|
+
Ə (U+018F), ə (U+0259)
|
26
|
+
İ (U+0130), ı (U+0131)
|
27
|
+
Ö (U+00D6), ö (U+00F6)
|
28
|
+
Ü (U+00DC), ü (U+00FC)
|
29
|
+
Ç (U+00C7), ç (U+00E7)
|
30
|
+
Ş (U+015E), ş (U+015F)
|
31
|
+
|
32
|
+
- The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
|
33
|
+
|
34
|
+
tests:
|
35
|
+
- source:
|
36
|
+
expected:
|
37
|
+
|
38
|
+
map:
|
39
|
+
characters:
|
40
|
+
'\u0410' : 'A'
|
41
|
+
'\u0411' : 'B'
|
42
|
+
'\u0412' : 'G'
|
43
|
+
'\u0413' : 'V'
|
44
|
+
'\u0492' : 'Ğ'
|
45
|
+
'\u0414' : 'D'
|
46
|
+
'\u0415' : 'E'
|
47
|
+
'\u04D8' : 'Ә'
|
48
|
+
'\u0416' : 'J'
|
49
|
+
'\u0417' : 'Z'
|
50
|
+
'\u0418' : 'I'
|
51
|
+
'\u042B' : 'İ'
|
52
|
+
'\u0408' : 'Y'
|
53
|
+
'\u041A' : 'K'
|
54
|
+
'\u049C' : 'G'
|
55
|
+
'\u041B' : 'L'
|
56
|
+
'\u041C' : 'M'
|
57
|
+
'\u041D' : 'N'
|
58
|
+
'\u041E' : 'O'
|
59
|
+
'\u04E8' : 'ö'
|
60
|
+
'\u041F' : 'P'
|
61
|
+
'\u0420' : 'R'
|
62
|
+
'\u0421' : 'S'
|
63
|
+
'\u0422' : 'T'
|
64
|
+
'\u0423' : 'U'
|
65
|
+
'\u04AE' : 'Ü'
|
66
|
+
'\u0424' : 'F'
|
67
|
+
'\u0425' : 'X'
|
68
|
+
'\u04BA' : 'H'
|
69
|
+
'\u0427' : 'Ç'
|
70
|
+
'\u04B8' : 'C'
|
71
|
+
'\u0428' : 'Ş'
|
72
|
+
|
73
|
+
'\u0430' : 'a'
|
74
|
+
'\u0431' : 'b'
|
75
|
+
'\u0432' : 'v'
|
76
|
+
'\u0433' : 'g'
|
77
|
+
'\u0493' : 'ğ'
|
78
|
+
'\u0434' : 'd'
|
79
|
+
'\u0435' : 'e'
|
80
|
+
'\u04D9' : 'ә'
|
81
|
+
'\u0436' : 'j'
|
82
|
+
'\u0437' : 'z'
|
83
|
+
'\u0438' : 'i'
|
84
|
+
'\u044B' : 'ı'
|
85
|
+
'\u0458' : 'y'
|
86
|
+
'\u043A' : 'k'
|
87
|
+
'\u049D' : 'g'
|
88
|
+
'\u043B' : 'l'
|
89
|
+
'\u043C' : 'm'
|
90
|
+
'\u043D' : 'n'
|
91
|
+
'\u043E' : 'o'
|
92
|
+
'\u04E9' : 'ö'
|
93
|
+
'\u043F' : 'p'
|
94
|
+
'\u0440' : 'r'
|
95
|
+
'\u0441' : 's'
|
96
|
+
'\u0442' : 't'
|
97
|
+
'\u0443' : 'u'
|
98
|
+
'\u04AF' : 'ü'
|
99
|
+
'\u0444' : 'f'
|
100
|
+
'\u0445' : 'x'
|
101
|
+
'\u04BB' : 'h'
|
102
|
+
'\u0447' : 'ç'
|
103
|
+
'\u04B9' : 'c'
|
104
|
+
'\u0448' : 'ş'
|
@@ -0,0 +1,285 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1979
|
4
|
+
language: bel
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: United States Board on Geographic Names Foreign Names Committee Staff, 1994. Romanization Systems and Roman-Script Spelling Conventions, p. 23.
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811510/ROMANIZATION_OF_BELARUSIAN.pdf
|
9
|
+
creation_date: 1979
|
10
|
+
description: |
|
11
|
+
The BGN/PCGN system for Belarusian (formerly referred to as Byelorussian) was designed for use in
|
12
|
+
romanizing names written in the Belarusian Cyrillic alphabet. The Belarusian alphabet contains three
|
13
|
+
characters not present in the Russian alphabet: і, ў, and ’.
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- The character sequences зг, кг, сг, тс and цг and may be romanized z·h, k·h, s·h, t·s and ts·h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ш, ц, and the character sequence тш
|
17
|
+
- All apostrophes appearing in romanization are Unicode encoding 2019.
|
18
|
+
|
19
|
+
tests:
|
20
|
+
- source: Антон
|
21
|
+
expected: Anton
|
22
|
+
- source: Вілейка
|
23
|
+
expected: Vilyeyka
|
24
|
+
- source: Брэст
|
25
|
+
expected: Brest
|
26
|
+
- source: Дубна
|
27
|
+
expected: Dubna
|
28
|
+
- source: Віцебск
|
29
|
+
expected: Vitsyebsk
|
30
|
+
- source: Асіповічы
|
31
|
+
expected: Asipovichy
|
32
|
+
- source: Гродна
|
33
|
+
expected: Hrodna
|
34
|
+
- source: Брагін
|
35
|
+
expected: Brahin
|
36
|
+
- source: Добруш
|
37
|
+
expected: Dobrush
|
38
|
+
- source: Ліда
|
39
|
+
expected: Lida
|
40
|
+
- source: Гомель
|
41
|
+
expected: Homyel’
|
42
|
+
- source: Беліца
|
43
|
+
expected: Byelitsa
|
44
|
+
- source: Ёдкавічы
|
45
|
+
expected: Yodkavichy
|
46
|
+
- source: Нёман
|
47
|
+
expected: Nyoman
|
48
|
+
- source: Жлобін
|
49
|
+
expected: Zhlobin
|
50
|
+
- source: Ружаны
|
51
|
+
expected: Ruzhany
|
52
|
+
- source: Зоя
|
53
|
+
expected: Zoya
|
54
|
+
- source: князь
|
55
|
+
expected: knyaz’
|
56
|
+
- source: Ігнат
|
57
|
+
expected: Ihnat
|
58
|
+
- source: Мінск
|
59
|
+
expected: Minsk
|
60
|
+
- source: Йосель
|
61
|
+
expected: Yosyel’
|
62
|
+
- source: Койданава
|
63
|
+
expected: Koydanava
|
64
|
+
- source: Крапіўна
|
65
|
+
expected: Krapiwna
|
66
|
+
- source: Менск
|
67
|
+
expected: Myensk
|
68
|
+
- source: Лаўна
|
69
|
+
expected: Lawna
|
70
|
+
- source: Лёсік
|
71
|
+
expected: Lyosik
|
72
|
+
- source: Купала
|
73
|
+
expected: Kupala
|
74
|
+
- source: Вілейка
|
75
|
+
expected: Vilyeyka
|
76
|
+
- source: Міхал
|
77
|
+
expected: Mikhal
|
78
|
+
- source: Вільня
|
79
|
+
expected: Vil’nya
|
80
|
+
- source: Лепель
|
81
|
+
expected: Lyepyel’
|
82
|
+
- source: Магілёў
|
83
|
+
expected: Mahilyow
|
84
|
+
- source: Няміга
|
85
|
+
expected: Nyamiha
|
86
|
+
- source: Наваградак
|
87
|
+
expected: Navahradak
|
88
|
+
- source: Баранавічы
|
89
|
+
expected: Baranavichy
|
90
|
+
- source: Орша
|
91
|
+
expected: Orsha
|
92
|
+
- source: Востраў
|
93
|
+
expected: Vostraw
|
94
|
+
- source: Пінск
|
95
|
+
expected: Pinsk
|
96
|
+
- source: Дняпро
|
97
|
+
expected: Dnyapro
|
98
|
+
- source: Рагачоў
|
99
|
+
expected: Rahachow
|
100
|
+
- source: Сураж
|
101
|
+
expected: Surazh
|
102
|
+
- source: Смаляны
|
103
|
+
expected: Smalyany
|
104
|
+
- source: Арэса
|
105
|
+
expected: Aresa
|
106
|
+
- source: Рось
|
107
|
+
expected: Ros’
|
108
|
+
- source: Талочын
|
109
|
+
expected: Talochyn
|
110
|
+
- source: Масты
|
111
|
+
expected: Masty
|
112
|
+
- source: Уладзімір
|
113
|
+
expected: Uladzimir
|
114
|
+
- source: Бабруйск
|
115
|
+
expected: Babruysk
|
116
|
+
- source: Быхаў
|
117
|
+
expected: Bykhaw
|
118
|
+
- source: Воўпа
|
119
|
+
expected: Vowpa
|
120
|
+
- source: Іўе
|
121
|
+
expected: Iwye
|
122
|
+
- source: Фолюш
|
123
|
+
expected: Folyush
|
124
|
+
- source: фортка
|
125
|
+
expected: fortka
|
126
|
+
- source: Хатынь
|
127
|
+
expected: Khatyn’
|
128
|
+
- source: Быхаў
|
129
|
+
expected: Bykhaw
|
130
|
+
- source: Ганцавічы
|
131
|
+
expected: Hantsavichy
|
132
|
+
- source: Стоўбцы
|
133
|
+
expected: Stowbtsy
|
134
|
+
- source: цьмяны
|
135
|
+
expected: ts’myany
|
136
|
+
- source: мясцовы
|
137
|
+
expected: myastsovy
|
138
|
+
- source: Астравец
|
139
|
+
expected: Astravyets
|
140
|
+
- source: Прыпяць
|
141
|
+
expected: Prypyats’
|
142
|
+
- source: Чэрыкаў
|
143
|
+
expected: Cherykaw
|
144
|
+
- source: Шчара
|
145
|
+
expected: Shchara
|
146
|
+
- source: Нарач
|
147
|
+
expected: Narach
|
148
|
+
- source: Шклоў
|
149
|
+
expected: Shklow
|
150
|
+
- source: Ашмяны
|
151
|
+
expected: Ashmyany
|
152
|
+
- source: Ыттык-Кёль
|
153
|
+
expected: Yttyk-Kyol’
|
154
|
+
- source: Кобрын
|
155
|
+
expected: Kobryn
|
156
|
+
- source: Солы
|
157
|
+
expected: Soly
|
158
|
+
- source: Копысь
|
159
|
+
expected: Kopys’
|
160
|
+
- source: рунь
|
161
|
+
expected: run’
|
162
|
+
- source: Эйсманты
|
163
|
+
expected: Eysmanty
|
164
|
+
- source: Крэва
|
165
|
+
expected: Kreva
|
166
|
+
- source: Юры
|
167
|
+
expected: Yury
|
168
|
+
- source: уюн
|
169
|
+
expected: uyun
|
170
|
+
- source: Язэп
|
171
|
+
expected: Yazep
|
172
|
+
- source: Івянец
|
173
|
+
expected: Ivyanyets
|
174
|
+
- source: з’езд
|
175
|
+
expected: z”yezd
|
176
|
+
- source: Вялiкiя Вераб’евічы
|
177
|
+
expected: Vyalikiya Vyerab”yevichy
|
178
|
+
- source: Дзям’янаўцы
|
179
|
+
expected: Dzyam”yanawtsy
|
180
|
+
- source: Задвор’е
|
181
|
+
expected: Zadvor”ye
|
182
|
+
- source: Гезгалы
|
183
|
+
expected: Hyez·haly
|
184
|
+
- source: Вадасховішча Гезгальскае
|
185
|
+
expected: Vadaskhovishcha Hyez·hal’skaye
|
186
|
+
|
187
|
+
map:
|
188
|
+
postrules:
|
189
|
+
- pattern: '\u042C' # Ь
|
190
|
+
result: "\u2019"
|
191
|
+
- pattern: '\u044C' # ь
|
192
|
+
result: "\u2019"
|
193
|
+
# Per documentation those rules are optional
|
194
|
+
rules:
|
195
|
+
- pattern: \u0417\u0413 # ЗГ
|
196
|
+
result: "Z\u00B7H" # Z·H
|
197
|
+
- pattern: \u0437\u0433 # зг
|
198
|
+
result: "z\u00B7h" # z·h
|
199
|
+
- pattern: \u041A\u0413 # КГ
|
200
|
+
result: "K\u00B7H" # K·H
|
201
|
+
- pattern: \u043A\u0433 # кг
|
202
|
+
result: "k\u00B7h" # k·h
|
203
|
+
- pattern: \u0421\u0413 # СГ
|
204
|
+
result: "S\u00B7H" # S·H
|
205
|
+
- pattern: \u0441\u0433 # сг
|
206
|
+
result: "s\u00B7h" # s·h
|
207
|
+
- pattern: \u0422\u0421 # ТС
|
208
|
+
result: "T\u00B7S" # T·S
|
209
|
+
- pattern: \u0442\u0441 # тс
|
210
|
+
result: "t\u00B7s" # t·s
|
211
|
+
- pattern: \u0426\u0413 # ЦГ
|
212
|
+
result: "TS\u00B7H" # TS·H
|
213
|
+
- pattern: \u0446\u0433 # цг
|
214
|
+
result: "ts\u00B7h" # ts·h
|
215
|
+
|
216
|
+
characters:
|
217
|
+
'\u00B4' : "\u201D" # apostrophe according to spec
|
218
|
+
'\u02BC' : "\u201D" # apostrophe according to spec
|
219
|
+
'\u2019' : "\u201D" # apostrophe in actual examples
|
220
|
+
|
221
|
+
'\u0410' : 'A' # A
|
222
|
+
'\u0411' : 'B' # Б
|
223
|
+
'\u0412' : 'V' # B
|
224
|
+
'\u0413' : 'H' # Г
|
225
|
+
'\u0414' : 'D' # Д
|
226
|
+
'\u0415' : 'Ye' # Е
|
227
|
+
'\u0401' : 'Yo' # Ё
|
228
|
+
'\u0416' : 'Zh' # Ж
|
229
|
+
'\u0417' : 'Z' # З
|
230
|
+
'\u0406' : 'I' # І
|
231
|
+
'\u0419' : 'Y' # Й
|
232
|
+
'\u041A' : 'K' # К
|
233
|
+
'\u041B' : 'L' # Л
|
234
|
+
'\u041C' : 'M' # М
|
235
|
+
'\u041D' : 'N' # Н
|
236
|
+
'\u041E' : 'O' # О
|
237
|
+
'\u041F' : 'P' # П
|
238
|
+
'\u0420' : 'R' # Р
|
239
|
+
'\u0421' : 'S' # С
|
240
|
+
'\u0422' : 'T' # Т
|
241
|
+
'\u0423' : 'U' # У
|
242
|
+
'\U040E' : 'W' # Ў
|
243
|
+
'\u0424' : 'F' # Ф
|
244
|
+
'\u0425' : 'Kh' # Х
|
245
|
+
'\u0426' : 'Ts' # Ц
|
246
|
+
'\u0427' : 'Ch' # Ч
|
247
|
+
'\u0428' : 'Sh' # Ш
|
248
|
+
'\u042B' : 'Y' # Ы
|
249
|
+
'\u042D' : 'E' # Э
|
250
|
+
'\u042E' : 'Yu' # Ю
|
251
|
+
'\u042F' : 'Ya' # Я
|
252
|
+
'\u0490' : 'G' # Ґ
|
253
|
+
|
254
|
+
'\u0430' : 'a' # а
|
255
|
+
'\u0431' : 'b' # б
|
256
|
+
'\u0432' : 'v' # в
|
257
|
+
'\u0433' : 'h' # г
|
258
|
+
'\u0434' : 'd' # д
|
259
|
+
'\u0435' : 'ye' # е
|
260
|
+
'\u0451' : 'yo' # ё
|
261
|
+
'\u0436' : 'zh' # ж
|
262
|
+
'\u0437' : 'z' # з
|
263
|
+
'\u0456' : 'i' # і
|
264
|
+
'\u0439' : 'y' # й
|
265
|
+
'\u043A' : 'k' # к
|
266
|
+
'\u043B' : 'l' # л
|
267
|
+
'\u043C' : 'm' # м
|
268
|
+
'\u043D' : 'n' # н
|
269
|
+
'\u043E' : 'o' # о
|
270
|
+
'\u043F' : 'p' # п
|
271
|
+
'\u0440' : 'r' # р
|
272
|
+
'\u0441' : 's' # с
|
273
|
+
'\u0442' : 't' # т
|
274
|
+
'\u0443' : 'u' # у
|
275
|
+
'\u045E' : 'w' # ў
|
276
|
+
'\u0444' : 'f' # ф
|
277
|
+
'\u0445' : 'kh' # х
|
278
|
+
'\u0446' : 'ts' # ц
|
279
|
+
'\u0447' : 'ch' # ч
|
280
|
+
'\u0448' : 'sh' # ш
|
281
|
+
'\u044B' : 'y' # ы
|
282
|
+
'\u044D' : 'e' # э
|
283
|
+
'\u044E' : 'yu' # ю
|
284
|
+
'\u044F' : 'ya' # я
|
285
|
+
'\u0491' : 'g' # ґ
|