interscript 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +246 -14
- data/bin/interscript +38 -17
- data/bin/setup +8 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript.rb +140 -16
- data/lib/interscript/command.rb +27 -0
- data/lib/interscript/mapping.rb +125 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
- data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
- data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +20 -5
- data/spec/spec_helper.rb +3 -1
- metadata +149 -24
- data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
- data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
- data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
- data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
- data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
- data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
- data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -18,7 +18,80 @@ notes:
|
|
18
18
|
- The character sequences з г, к г, с г, т с and ц г and may be romanized z∙h, k∙h, s∙h, t∙s and ts∙h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ц, ш, and the character sequence тш.
|
19
19
|
- All apostrophes appearing in romanization are Unicode encoding 2019.
|
20
20
|
- The Roman‐script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
|
21
|
+
|
22
|
+
tests:
|
23
|
+
- source: Авдіївська Міськрада
|
24
|
+
expected: Avdiyivs’ka Mis’krada
|
25
|
+
- source: Бабаї
|
26
|
+
expected: Babayi
|
27
|
+
- source: Віленька
|
28
|
+
expected: Vilen’ka
|
29
|
+
- source: Гагарінський Район
|
30
|
+
expected: Haharins’kyy Rayon
|
31
|
+
- source: Довбушева Криниця
|
32
|
+
expected: Dovbusheva Krynytsya
|
33
|
+
- source: Дідівщина
|
34
|
+
expected: Didivshchyna
|
35
|
+
- source: Економічна
|
36
|
+
expected: Ekonomichna
|
37
|
+
- source: Єфросинівка
|
38
|
+
expected: Yefrosynivka
|
39
|
+
- source: Жигуліна Роща
|
40
|
+
expected: Zhyhulina Roshcha
|
41
|
+
- source: Загір’я
|
42
|
+
expected: Zahir”ya
|
43
|
+
- source: З’єднувальний Канал
|
44
|
+
expected: Z”yednuval’nyy Kanal
|
45
|
+
- source: Ивахи
|
46
|
+
expected: Yvakhy
|
47
|
+
- source: Івано-Франківська Міськрада
|
48
|
+
expected: Ivano-Frankivs’ka Mis’krada
|
49
|
+
- source: Їжаківка
|
50
|
+
expected: Yizhakivka
|
51
|
+
- source: Йосиповичі
|
52
|
+
expected: Yosypovychi
|
53
|
+
- source: Кабичівка
|
54
|
+
expected: Kabychivka
|
55
|
+
- source: Лазуровий Провулок
|
56
|
+
expected: Lazurovyy Provulok
|
57
|
+
- source: Мала Сейдеминуха
|
58
|
+
expected: Mala Seydemynukha
|
59
|
+
- source: Нагірний
|
60
|
+
expected: Nahirnyy
|
61
|
+
- source: Овер’янівське Озеро
|
62
|
+
expected: Over”yanivs’ke Ozero
|
63
|
+
- source: Павлопільське Водосховище
|
64
|
+
expected: Pavlopil’s’ke Vodoskhovyshche
|
65
|
+
- source: Приґородний
|
66
|
+
expected: Prygorodnyy
|
67
|
+
- source: Радгосп Правда
|
68
|
+
expected: Radhosp Pravda
|
69
|
+
- source: Садово-Хрустальненський
|
70
|
+
expected: Sadovo-Khrustal’nens’kyy
|
71
|
+
- source: Таратутине
|
72
|
+
expected: Taratutyne
|
73
|
+
- source: Улу-Узень
|
74
|
+
expected: Ulu-Uzen’
|
75
|
+
- source: Христофорівка
|
76
|
+
expected: Khrystoforivka
|
77
|
+
- source: Центральна Вулиця
|
78
|
+
expected: Tsentral’na Vulytsya
|
79
|
+
- source: Чайковичі
|
80
|
+
expected: Chaykovychi
|
81
|
+
- source: Шалаші
|
82
|
+
expected: Shalashi
|
83
|
+
- source: Щербинівка
|
84
|
+
expected: Shcherbynivka
|
85
|
+
- source: Южноукраїнська Міськрада
|
86
|
+
expected: Yuzhnoukrayins’ka Mis’krada
|
87
|
+
- source: Ясениця
|
88
|
+
expected: Yasenytsya
|
89
|
+
|
21
90
|
map:
|
91
|
+
rules:
|
92
|
+
- pattern: \b\u2019\b # ’ in the middle of a word -> ”
|
93
|
+
result: "\u201d"
|
94
|
+
|
22
95
|
characters:
|
23
96
|
"\u0430": 'a'
|
24
97
|
"\u0431": 'b'
|
@@ -46,7 +119,7 @@ map:
|
|
46
119
|
"\u0447": 'ch'
|
47
120
|
"\u0448": 'sh'
|
48
121
|
"\u0449": 'shch'
|
49
|
-
"\u044c":
|
122
|
+
"\u044c": "\u2019"
|
50
123
|
"\u044e": 'yu'
|
51
124
|
"\u044f": 'ya'
|
52
125
|
"\u0454": 'ye'
|
@@ -83,7 +156,7 @@ map:
|
|
83
156
|
"\u0427": 'Ch'
|
84
157
|
"\u0428": 'Sh'
|
85
158
|
"\u0429": 'Shch'
|
86
|
-
"\u042c":
|
159
|
+
"\u042c": "\u2019"
|
87
160
|
"\u042e": 'Yu'
|
88
161
|
"\u042f": 'Ya'
|
89
162
|
"\u0490": 'G'
|
@@ -0,0 +1,208 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 2019
|
4
|
+
language: ukr
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: BGN/PCGN 2019 Agreement
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/864314/ROMANIZATION_OF_UKRAINIAN.pdf
|
9
|
+
creation_date: 2019
|
10
|
+
confirmation_date: 2020-01
|
11
|
+
description: |
|
12
|
+
The BGN/PCGN system for Ukrainian was designed for use in romanizing names written
|
13
|
+
in the Ukrainian alphabet. It is an adoption of the Ukrainian national system in use
|
14
|
+
since 2010, and supersedes the BGN/PCGN 1965 System for Ukrainian.
|
15
|
+
|
16
|
+
notes:
|
17
|
+
- |
|
18
|
+
The 2019 system was adopted by BGN and PCGN after monitoring a good level of implementation
|
19
|
+
of the national system within Ukraine. Note, however, that this system is not recommended for
|
20
|
+
reverse transliteration; take caution when attempting to convert a romanized name back into Ukrainian.
|
21
|
+
This system also lacks the methodology outlined in the 1965 System to provide additional
|
22
|
+
differentiation between digraphs and individual character sequences.
|
23
|
+
For example, unlike the 1965 System, the 2019 System doesn’t differentiate the special character
|
24
|
+
sequences зг, кг, сг, тс, and тсг (previously romanized as z∙h, k∙h, s∙h, t∙s, and ts∙h)
|
25
|
+
from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render
|
26
|
+
the characters ж, х, ш, ц and the character sequence тш.
|
27
|
+
- To use the keyboard Unicode function, hold ALT and enter in sequence listed in the table.
|
28
|
+
- The character sequence З Г, previously romanized as zh, is romanized zgh under the 2019 system.
|
29
|
+
- These characters differ significantly in romanization from the BGN/PCGN 1965 system.
|
30
|
+
|
31
|
+
tests:
|
32
|
+
- source: Алушта
|
33
|
+
expected: Alushta
|
34
|
+
- source: Борщагівка
|
35
|
+
expected: Borshchahivka
|
36
|
+
- source: Вишгород
|
37
|
+
expected: Vyshhorod
|
38
|
+
- source: Гадяч
|
39
|
+
expected: Hadiach
|
40
|
+
- source: Згорани
|
41
|
+
expected: Zghorany
|
42
|
+
- source: Ґалаґан
|
43
|
+
expected: Galagan
|
44
|
+
- source: Дон
|
45
|
+
expected: Don
|
46
|
+
- source: Рівне
|
47
|
+
expected: Rivne
|
48
|
+
- source: Єнакієве
|
49
|
+
expected: Yenakiieve
|
50
|
+
- source: Наєнко
|
51
|
+
expected: Naienko
|
52
|
+
- source: Житомир
|
53
|
+
expected: Zhytomyr
|
54
|
+
- source: Запоріжжя
|
55
|
+
expected: Zaporizhzhia
|
56
|
+
- source: Закарпаття
|
57
|
+
expected: Zakarpattia
|
58
|
+
- source: Медвин
|
59
|
+
expected: Medvyn
|
60
|
+
- source: Іршава
|
61
|
+
expected: Irshava
|
62
|
+
- source: Їжакевич
|
63
|
+
expected: Yizhakevych
|
64
|
+
- source: Кадіївка
|
65
|
+
expected: Kadiivka
|
66
|
+
- source: Йосипівка
|
67
|
+
expected: Yosypivka
|
68
|
+
- source: Стрий
|
69
|
+
expected: Stryi
|
70
|
+
- source: Київ
|
71
|
+
expected: Kyiv
|
72
|
+
- source: Лебедин
|
73
|
+
expected: Lebedyn
|
74
|
+
- source: Миколаїв
|
75
|
+
expected: Mykolaiv
|
76
|
+
- source: Ніжин
|
77
|
+
expected: Nizhyn
|
78
|
+
- source: Одеса
|
79
|
+
expected: Odesa
|
80
|
+
- source: Полтава
|
81
|
+
expected: Poltava
|
82
|
+
- source: Ромни
|
83
|
+
expected: Romny
|
84
|
+
- source: Суми
|
85
|
+
expected: Sumy
|
86
|
+
- source: Тетерів
|
87
|
+
expected: Teteriv
|
88
|
+
- source: Ужгород
|
89
|
+
expected: Uzhhorod
|
90
|
+
- source: Фастів
|
91
|
+
expected: Fastiv
|
92
|
+
- source: Харків
|
93
|
+
expected: Kharkiv
|
94
|
+
- source: Біла Церква
|
95
|
+
expected: Bila Tserkva
|
96
|
+
- source: Чернівці
|
97
|
+
expected: Chernivtsi
|
98
|
+
- source: Шостка
|
99
|
+
expected: Shostka
|
100
|
+
- source: Гоща
|
101
|
+
expected: Hoshcha
|
102
|
+
- source: Русь
|
103
|
+
expected: Rus
|
104
|
+
- source: Юрій
|
105
|
+
expected: Yurii
|
106
|
+
- source: Крюківка
|
107
|
+
expected: Kriukivka
|
108
|
+
- source: Яготин
|
109
|
+
expected: Yahotyn
|
110
|
+
- source: Ічня
|
111
|
+
expected: Ichnia
|
112
|
+
- source: Знам’янка
|
113
|
+
expected: Znamianka
|
114
|
+
|
115
|
+
map:
|
116
|
+
rules:
|
117
|
+
- pattern: (?<=З|з)(Г|г)
|
118
|
+
result: gh
|
119
|
+
- pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
|
120
|
+
result: Ye
|
121
|
+
- pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
|
122
|
+
result: ye
|
123
|
+
- pattern: (?<!\b\u2019)\b\u0407 # Ї in initial position -> Yi
|
124
|
+
result: Yi
|
125
|
+
- pattern: (?<!\b\u2019)\b\u0457 # ї in initial position -> yi
|
126
|
+
result: yi
|
127
|
+
- pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
|
128
|
+
result: "Y"
|
129
|
+
- pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
|
130
|
+
result: "y"
|
131
|
+
- pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
|
132
|
+
result: Yu
|
133
|
+
- pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
|
134
|
+
result: yu
|
135
|
+
- pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
|
136
|
+
result: Ya
|
137
|
+
- pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
|
138
|
+
result: ya
|
139
|
+
- pattern: \b\u2019\b # remove ’
|
140
|
+
result: ""
|
141
|
+
|
142
|
+
characters:
|
143
|
+
"\u0410": "A" # А
|
144
|
+
"\u0411": "B" # Б
|
145
|
+
"\u0412": "V" # В
|
146
|
+
"\u0413": "H" # Г
|
147
|
+
"\u0490": "G" # Ґ
|
148
|
+
"\u0414": "D" # Д
|
149
|
+
"\u0415": "E" # Е
|
150
|
+
"\u0404": "Ie" # Є
|
151
|
+
"\u0416": "Zh" # Ж
|
152
|
+
"\u0417": "Z" # З
|
153
|
+
"\u0418": "Y" # И
|
154
|
+
"\u0406": "I" # І
|
155
|
+
"\u0407": "I" # Ї
|
156
|
+
"\u0419": "I" # Й
|
157
|
+
"\u041a": "K" # К
|
158
|
+
"\u041b": "L" # Л
|
159
|
+
"\u041c": "M" # М
|
160
|
+
"\u041d": "N" # Н
|
161
|
+
"\u041e": "O" # О
|
162
|
+
"\u041f": "P" # П
|
163
|
+
"\u0420": "R" # Р
|
164
|
+
"\u0421": "S" # С
|
165
|
+
"\u0422": "T" # Т
|
166
|
+
"\u0423": "U" # У
|
167
|
+
"\u0424": "F" # Ф
|
168
|
+
"\u0425": "Kh" # Х
|
169
|
+
"\u0426": "Ts" # Ц
|
170
|
+
"\u0427": "Ch" # Ч
|
171
|
+
"\u0428": "Sh" # Ш
|
172
|
+
"\u0429": "Shch" # Щ
|
173
|
+
"\u042e": "Iu" # Ю
|
174
|
+
"\u042f": "Ia" # Я
|
175
|
+
"\u042c": "" # Ь
|
176
|
+
"\u0430": "a" # а
|
177
|
+
"\u0431": "b" # б
|
178
|
+
"\u0432": "v" # в
|
179
|
+
"\u0433": "h" # г
|
180
|
+
"\u0491": "g" # ґ
|
181
|
+
"\u0434": "d" # д
|
182
|
+
"\u0435": "e" # е
|
183
|
+
"\u0454": "ie" # є
|
184
|
+
"\u0436": "zh" # ж
|
185
|
+
"\u0437": "z" # з
|
186
|
+
"\u0438": "y" # и
|
187
|
+
"\u0456": "i" # і
|
188
|
+
"\u0457": "i" # ї
|
189
|
+
"\u0439": "i" # й
|
190
|
+
"\u043a": "k" # к
|
191
|
+
"\u043b": "l" # л
|
192
|
+
"\u043c": "m" # м
|
193
|
+
"\u043d": "n" # н
|
194
|
+
"\u043e": "o" # о
|
195
|
+
"\u043f": "p" # п
|
196
|
+
"\u0440": "r" # р
|
197
|
+
"\u0441": "s" # с
|
198
|
+
"\u0442": "t" # т
|
199
|
+
"\u0443": "u" # у
|
200
|
+
"\u0444": "f" # ф
|
201
|
+
"\u0445": "kh" # х
|
202
|
+
"\u0446": "ts" # ц
|
203
|
+
"\u0447": "ch" # ч
|
204
|
+
"\u0448": "sh" # ш
|
205
|
+
"\u0449": "shch" # щ
|
206
|
+
"\u044e": "iu" # ю
|
207
|
+
"\u044f": "ia" # я
|
208
|
+
"\u044c": "" # Ь
|
@@ -0,0 +1,168 @@
|
|
1
|
+
---
|
2
|
+
authority_id: un
|
3
|
+
id: 1998
|
4
|
+
language: bel
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: National System of Geographic Names Transmission into Roman Alphabet in Belarus
|
8
|
+
url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/20th-gegn-docs/20th_gegn_WP34.pdf
|
9
|
+
creation_date: 1998
|
10
|
+
description: |
|
11
|
+
The national system of romanization for Belarusian was approved by the State Committee and Land Resources,
|
12
|
+
Geodesy and Cartography, Republic of Belarus, on 20 March, 1998. This scheme was also supported by
|
13
|
+
the Y. Kolas Institute of Linguistics and the Republic Committee on Toponymy at the Belarusian Academy of Sciences.
|
14
|
+
While the system is still based on GOST 1983, it takes more precisely into account the peculiarities of
|
15
|
+
the Belarusian orthography. The system is reversible though there may exist some ambiguous consonant combinations.
|
16
|
+
|
17
|
+
tests: # the same as in by-bel-Cyrl-Latn-2007
|
18
|
+
- source: Аршанскi
|
19
|
+
expected: Aršanski
|
20
|
+
- source: Бешанковічы
|
21
|
+
expected: Biešankovičy
|
22
|
+
- source: Віцебск
|
23
|
+
expected: Viciebsk
|
24
|
+
- source: Гомель
|
25
|
+
expected: Homieĺ
|
26
|
+
- source: Гаўя
|
27
|
+
expected: Haŭja
|
28
|
+
- source: Добруш
|
29
|
+
expected: Dobruš
|
30
|
+
- source: Ельск
|
31
|
+
expected: Jeĺsk
|
32
|
+
- source: Бабаедава
|
33
|
+
expected: Babajedava
|
34
|
+
- source: Венцавічы
|
35
|
+
expected: Viencavičy
|
36
|
+
- source: Ёды
|
37
|
+
expected: Jody
|
38
|
+
- source: Вераб'ёвічы
|
39
|
+
expected: Vierabjovičy
|
40
|
+
- source: Мёры
|
41
|
+
expected: Miory
|
42
|
+
- source: Зэльва
|
43
|
+
expected: Zeĺva
|
44
|
+
- source: Iванава
|
45
|
+
expected: Ivanava
|
46
|
+
- source: Iўе
|
47
|
+
expected: Iŭje
|
48
|
+
- source: Лагойск
|
49
|
+
expected: Lahojsk
|
50
|
+
- source: Круглае
|
51
|
+
expected: Kruhlaje
|
52
|
+
- source: Лошыца
|
53
|
+
expected: Lošyca
|
54
|
+
- source: Любань
|
55
|
+
expected: Liubań
|
56
|
+
- source: Магілёў
|
57
|
+
expected: Mahilioŭ
|
58
|
+
- source: Нясвіж
|
59
|
+
expected: Niasviž
|
60
|
+
- source: Орша
|
61
|
+
expected: Orša
|
62
|
+
- source: Паставы
|
63
|
+
expected: Pastavy
|
64
|
+
- source: Рагачоў
|
65
|
+
expected: Rahačoŭ
|
66
|
+
- source: Смаргонь
|
67
|
+
expected: Smarhoń
|
68
|
+
- source: Талачын
|
69
|
+
expected: Talačyn
|
70
|
+
- source: Узда
|
71
|
+
expected: Uzda
|
72
|
+
- source: Шаркаўшчына
|
73
|
+
expected: Šarkaŭščyna
|
74
|
+
- source: Фаніпаль
|
75
|
+
expected: Fanipaĺ
|
76
|
+
- source: Хоцімск
|
77
|
+
expected: Chocimsk
|
78
|
+
- source: Цёмны Лес
|
79
|
+
expected: Ciomny Lies
|
80
|
+
- source: Чавусы
|
81
|
+
expected: Čavusy
|
82
|
+
- source: Шумілiна
|
83
|
+
expected: Šumilina
|
84
|
+
- source: Чыгірынка
|
85
|
+
expected: Čyhirynka
|
86
|
+
- source: Чэрвень
|
87
|
+
expected: Červień
|
88
|
+
- source: Друць
|
89
|
+
expected: Druć
|
90
|
+
- source: Чачэрск
|
91
|
+
expected: Čačersk
|
92
|
+
- source: Юхнаўка
|
93
|
+
expected: Juchnaŭka
|
94
|
+
- source: Гаюціна
|
95
|
+
expected: Hajucina
|
96
|
+
- source: Цюрлі
|
97
|
+
expected: Ciurli
|
98
|
+
- source: Любонічы
|
99
|
+
expected: Liuboničy
|
100
|
+
- source: Ямнае
|
101
|
+
expected: Jamnaje
|
102
|
+
- source: Баяры
|
103
|
+
expected: Bajary
|
104
|
+
- source: Валяр'яны
|
105
|
+
expected: Valiarjany
|
106
|
+
- source: Вязынка
|
107
|
+
expected: Viazynka
|
108
|
+
|
109
|
+
map:
|
110
|
+
inherit: gost-rus-cyrl-latn-16876-71-1983
|
111
|
+
|
112
|
+
rules:
|
113
|
+
- pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0415 # Е after consonants
|
114
|
+
result: IE
|
115
|
+
- pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0435 # е after consonants
|
116
|
+
result: ie
|
117
|
+
- pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0401 # Ё after consonants
|
118
|
+
result: IO
|
119
|
+
- pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u0451 # ё after consonants
|
120
|
+
result: io
|
121
|
+
- pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u042E # Ю after consonants
|
122
|
+
result: IU
|
123
|
+
- pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u044E # ю after consonants
|
124
|
+
result: iu
|
125
|
+
- pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u042F # Я after consonants
|
126
|
+
result: IA
|
127
|
+
- pattern: (?<=[БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШш])\u044F # я after consonants
|
128
|
+
result: ia
|
129
|
+
- pattern: \u0417\u042C # ЗЬ
|
130
|
+
result: "\u0179"
|
131
|
+
- pattern: \u0437\u044C # зь
|
132
|
+
result: "\u017A"
|
133
|
+
- pattern: \u041B\u042C # ЛЬ
|
134
|
+
result: "\u0139"
|
135
|
+
- pattern: \u043B\u044C # ль
|
136
|
+
result: "\u013A"
|
137
|
+
- pattern: \u0421\u042C # СЬ
|
138
|
+
result: "\u015A"
|
139
|
+
- pattern: \u0441\u044C # сь
|
140
|
+
result: "\u015B"
|
141
|
+
- pattern: \u0426\u042C # ЦЬ
|
142
|
+
result: "\u0106"
|
143
|
+
- pattern: \u0446\u044C # ць
|
144
|
+
result: "\u0107"
|
145
|
+
- pattern: \u041D\u042C # НЬ
|
146
|
+
result: "\u0143"
|
147
|
+
- pattern: \u043D\u044C # нь
|
148
|
+
result: "\u0144"
|
149
|
+
characters:
|
150
|
+
'\u0406' : 'I' # І
|
151
|
+
'\u0413' : 'H' # Г
|
152
|
+
'\u0415' : 'Je' # Е
|
153
|
+
'\u0401' : 'Jo' # Ё
|
154
|
+
'\U040E' : 'Ŭ' # Ў
|
155
|
+
'\u0425' : 'Ch' # Х
|
156
|
+
'\u042C' : '' # Ь
|
157
|
+
'\u042D' : 'E' # Э
|
158
|
+
|
159
|
+
'\u0433' : 'h' # г
|
160
|
+
'\u0456' : 'i' # і
|
161
|
+
'\u0435' : 'je' # е
|
162
|
+
'\u0451' : 'jo' # ё
|
163
|
+
'\u045E' : 'ŭ' # ў
|
164
|
+
'\u0445' : 'ch' # х
|
165
|
+
'\u044C' : '' # ь
|
166
|
+
'\u044D' : 'e' # э
|
167
|
+
|
168
|
+
'\u0027' : '' # '
|