interscript 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +246 -14
  3. data/bin/interscript +38 -17
  4. data/bin/setup +8 -0
  5. data/lib/g2pwrapper.py +34 -0
  6. data/lib/interscript.rb +140 -16
  7. data/lib/interscript/command.rb +27 -0
  8. data/lib/interscript/mapping.rb +125 -0
  9. data/lib/interscript/version.rb +1 -1
  10. data/lib/model-7 +0 -0
  11. data/lib/tha-pt-b-7 +0 -0
  12. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  13. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  14. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  15. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  18. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  19. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  21. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  22. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  23. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  24. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  25. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  26. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  27. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  28. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +57 -31
  29. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +54 -34
  30. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  31. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  32. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  33. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  34. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  35. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -2
  36. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  37. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  38. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  39. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +10 -64
  40. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +7456 -0
  41. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  42. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  43. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  44. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  45. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  46. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  47. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  48. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  49. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  50. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  51. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +145 -64
  52. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  53. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +75 -2
  54. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  57. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  59. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  60. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  61. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  62. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  63. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  64. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  65. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +108 -92
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -2
  68. data/maps/icao-heb-Hebr-Latn-9303.yaml +118 -124
  69. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -2
  70. data/maps/icao-per-Arab-Latn-9303.yaml +5 -6
  71. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  72. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -2
  73. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  74. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  75. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  76. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  77. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +2 -3
  78. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  79. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  80. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  81. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  82. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  83. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  84. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  85. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  86. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  87. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  88. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  89. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  90. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  91. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +6 -7
  92. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  93. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  94. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  95. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  96. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  97. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  98. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  99. data/maps/un-mon-Mong-Latn-2013.yaml +19 -6
  100. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  101. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  102. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  103. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  104. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  105. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  106. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  107. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  108. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  109. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  110. data/spec/interscript/mapping_spec.rb +42 -0
  111. data/spec/interscript_spec.rb +20 -5
  112. data/spec/spec_helper.rb +3 -1
  113. metadata +149 -24
  114. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  115. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  116. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  117. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  118. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  119. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  120. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
@@ -0,0 +1,208 @@
1
+ ---
2
+ authority_id: bgna
3
+ id: 2009
4
+ language: bul
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Streamlined System for Romanization of Bulgarian
8
+ creation_date: 2009
9
+ adoption_date: 2009-02-27
10
+
11
+ tests:
12
+ - source: нунатак Абрит
13
+ expected: nunatak Abrit
14
+ - source: връх Академия
15
+ expected: vrah Akademiya
16
+ - source: връх Ами Буе
17
+ expected: vrah Ami Bue
18
+ - source: нос Айтос
19
+ expected: nos Aytos
20
+ - source: залив Баба Тонка
21
+ expected: zaliv Baba Tonka
22
+ - source: Балабански камък
23
+ expected: Balabanski kamak
24
+ - source: Бедечки поток
25
+ expected: Bedechki potok
26
+ - source: нос Бяга
27
+ expected: nos Byaga
28
+ - source: остров Качо
29
+ expected: ostrov Kacho # wiki ostrov Cacho
30
+ - source: Чакъров остров
31
+ expected: Chakarov ostrov
32
+ - source: връх Дъбник
33
+ expected: vrah Dabnik
34
+ - source: залив Десислава
35
+ expected: zaliv Desislava
36
+ - source: ледник Джераси
37
+ expected: lednik Dzherasi
38
+ - source: Джегова скала
39
+ expected: Dzhegova skala
40
+ - source: Нунатак Едуард
41
+ expected: Nunatak Eduard
42
+ - source: Елховска седловина
43
+ expected: Elhovska sedlovina
44
+ - source: ледник Етър
45
+ expected: lednik Etar
46
+ - source: нунатак Филип Тотю
47
+ expected: nunatak Filip Totyu
48
+ - source: ледник Габаре
49
+ expected: lednik Gabare
50
+ - source: риф Гергини
51
+ expected: rif Gergini
52
+ - source: Гяуров връх
53
+ expected: Gyaurov vrah
54
+ - source: Гуцалски рид
55
+ expected: Gutsalski rid # not found
56
+ - source: Хараламбиев остров
57
+ expected: Haralambiev ostrov
58
+ - source: връх Ичера
59
+ expected: vrah Ichera
60
+ - source: полуостров Йоан Павел II
61
+ expected: poluostrov Yoan Pavel II
62
+ - source: нос Иван Александър
63
+ expected: nos Ivan Aleksandar
64
+ - source: нос Иречек
65
+ expected: nos Irechek
66
+ - source: нос Кърджали
67
+ expected: nos Kardzhali
68
+ - source: седловина Кърнаре
69
+ expected: sedlovina Karnare
70
+ - source: нунатак Керсеблепт
71
+ expected: nunatak Kerseblept
72
+ - source: Кондофрейски възвишения
73
+ expected: Kondofreyski vazvisheniya
74
+ - source: Костинбродски проход
75
+ expected: Kostinbrodski prohod
76
+ - source: връх Кожух
77
+ expected: vrah Kozhuh
78
+ - source: Кукерски нунатаци
79
+ expected: Kukerski nunatatsi
80
+ - source: залив Лазурен бряг
81
+ expected: zaliv Lazuren bryag
82
+ - source: връх Лудогорие
83
+ expected: vrah Ludogorie
84
+ - source: Лютибродски скали
85
+ expected: Lyutibrodski skali
86
+ - source: Масларов нунатак
87
+ expected: Maslarov nunatak
88
+ - source: Михневски връх
89
+ expected: Mihnevski vrah
90
+ - source: рид Митино
91
+ expected: rid Mitino
92
+ - source: езеро Наяда
93
+ expected: ezero Nayada
94
+ - source: нос Никюп
95
+ expected: nos Nikyup
96
+ - source: рид Оборище
97
+ expected: rid Oborishte
98
+ - source: залив Олуша
99
+ expected: zaliv Olusha
100
+ - source: Оряховски възвишения
101
+ expected: Oryahovski vazvisheniya
102
+ - source: нунатак Памидово
103
+ expected: nunatak Pamidovo
104
+ - source: връх Парангалица
105
+ expected: vrah Parangalitsa
106
+ - source: Първомайски провлак
107
+ expected: Parvomayski provlak
108
+ - source: ледник Патлейна
109
+ expected: lednik Patleyna
110
+ - source: полуостров Перник
111
+ expected: poluostrov Pernik
112
+ - source: връх Петко Войвода
113
+ expected: vrah Petko Voyvoda
114
+ - source: остров Фанагория
115
+ expected: ostrov Fanagoriya
116
+ - source: нос Плас
117
+ expected: nos Plas
118
+ - source: Пресиянов рид
119
+ expected: Presiyanov rid
120
+ - source: връх Принсипе де Астуриас
121
+ expected: vrah Prinsipe de Asturias # wiki: vrah Príncipe de Asturias
122
+ - source: нунатак Ръченица
123
+ expected: nunatak Rachenitsa
124
+ - source: връх Райна Княгиня
125
+ expected: vrah Rayna Knyaginya
126
+ - source: Рид Ръжана
127
+ expected: Rid Razhana
128
+ - source: връх Ригс
129
+ expected: vrah Rigs
130
+ - source: остров Рогулят
131
+ expected: ostrov Rogulyat
132
+ - source: ледник Сабазий
133
+ expected: lednik Sabaziy
134
+ - source: ледник Съединение
135
+ expected: lednik Saedinenie
136
+ - source: нунатак Сенокос
137
+ expected: nunatak Senokos
138
+ - source: Сейдолски камък
139
+ expected: Seydolski kamak
140
+ - source: ледник Щерна
141
+ expected: lednik Shterna
142
+ - source: връх Шишман
143
+ expected: vrah Shishman
144
+ - source: ледник Сигмен
145
+ expected: lednik Sigmen
146
+ - source: Седловина Синитово
147
+ expected: Sedlovina Sinitovo
148
+ - source: Ледник Скаплизо
149
+ expected: Lednik Skaplizo
150
+ - source: залив Слънчев бряг
151
+ expected: zaliv Slanchev bryag
152
+ - source: остров Соатрис
153
+ expected: ostrov Soatris
154
+ - source: планина Софийски Университет
155
+ expected: planina Sofiyski Universitet
156
+ - source: ледник Сребърна
157
+ expected: lednik Srebarna
158
+ - source: Средногорски възвишения
159
+ expected: Srednogorski vazvisheniya
160
+ - source: Св. Евтимиев камък
161
+ expected: Sv. Evtimiev kamak
162
+ - source: база Св. Климент Охридски
163
+ expected: baza Sv. Kliment Ohridski
164
+ - source: връх Стъргел
165
+ expected: vrah Stargel
166
+ - source: нунатак Сторгозия
167
+ expected: nunatak Storgoziya # nunatak Storgozia according to wiki
168
+ - source: нунатак Сурвакари
169
+ expected: nunatak Survakari
170
+ - source: ледник Световрачене
171
+ expected: lednik Svetovrachene
172
+ - source: остров Теменуга
173
+ expected: ostrov Temenuga
174
+ - source: Тракийски възвишения
175
+ expected: Trakiyski vazvisheniya
176
+ - source: хълм Цамблак
177
+ expected: halm Tsamblak
178
+ - source: ледник Урдовиза
179
+ expected: lednik Urdoviza
180
+ - source: остров Вълчедръм
181
+ expected: ostrov Valchedram
182
+ - source: острови Вардим
183
+ expected: ostrovi Vardim
184
+ - source: Владигеров проток
185
+ expected: Vladigerov protok
186
+ - source: ледник Ябланица
187
+ expected: lednik Yablanitsa
188
+ - source: залив Ямфорина
189
+ expected: zaliv Yamforina
190
+ - source: Йовков нос
191
+ expected: Yovkov nos
192
+ - source: рид Заберново
193
+ expected: rid Zabernovo
194
+ - source: ледник Збелсурд
195
+ expected: lednik Zbelsurd
196
+ - source: Жефарович камък
197
+ expected: Zhefarovich kamak
198
+ - source: връх Зиези
199
+ expected: vrah Ziezi
200
+ - source: залив Златни пясъци
201
+ expected: zaliv Zlatni pyasatsi
202
+ - source: ледник Злокучене
203
+ expected: lednik Zlokuchene
204
+ - source: проток Злогош
205
+ expected: protok Zlogosh
206
+
207
+ map:
208
+ inherit: apcbg-bul-Cyrl-Latn-1995
@@ -25,8 +25,7 @@ notes:
25
25
  - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase Roman letters as appropriate should be used.
26
26
 
27
27
  tests:
28
- - source:
29
- expected:
28
+
30
29
  map:
31
30
  characters:
32
31
  '\u0531' : 'A'
@@ -0,0 +1,104 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1993
4
+ language: aze
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: AZERBAIJANI TABLE OF CORRESPONDENCES CYRILLIC-ROMAN -- BGN/PCGN 1993 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816656/TABLE_OF_CORRESPONDENCES_FOR_AZERBAIJANI.pdf
9
+ creation_date: 1993
10
+ confirmation date: 2019-06
11
+ description: |
12
+ Azerbaijani, also known as Azeri, is the official language of the Republic of Azerbaijan. In 1991, the Azerbaijani government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet. When Azerbaijani Roman-alphabet spellings are not available, this table can be used to convert Azerbaijani Cyrillic spellings.
13
+
14
+ notes:
15
+
16
+ - The special letter Ə, ə known as schwa, should be reproduced in that form whenever encountered. The characters Ə (Unicode 04D8) and ə (Unicode 04D9) should be used for schwa when writing in the Cyrillic script, but characters Ə (Unicode 018F) and ə (Unicode 0259) should be used when writing in the Roman alphabet. In those instances when it cannot be reproduced, however, the letter Ä ä may be substituted for it (see below).
17
+
18
+ - The obsolete characters й, э, ю, and я should be romanized ẏ, ė, yu., and ya.
19
+
20
+ - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character. It is not known whether there exists an uppercase ‘J’ specific to the Cyrillic character set.
21
+
22
+ - |
23
+ An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
24
+ Ğ (U+011E), ğ (U+011F)
25
+ Ə (U+018F), ə (U+0259)
26
+ İ (U+0130), ı (U+0131)
27
+ Ö (U+00D6), ö (U+00F6)
28
+ Ü (U+00DC), ü (U+00FC)
29
+ Ç (U+00C7), ç (U+00E7)
30
+ Ş (U+015E), ş (U+015F)
31
+
32
+ - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
33
+
34
+ tests:
35
+ - source:
36
+ expected:
37
+
38
+ map:
39
+ characters:
40
+ '\u0410' : 'A'
41
+ '\u0411' : 'B'
42
+ '\u0412' : 'G'
43
+ '\u0413' : 'V'
44
+ '\u0492' : 'Ğ'
45
+ '\u0414' : 'D'
46
+ '\u0415' : 'E'
47
+ '\u04D8' : 'Ә'
48
+ '\u0416' : 'J'
49
+ '\u0417' : 'Z'
50
+ '\u0418' : 'I'
51
+ '\u042B' : 'İ'
52
+ '\u0408' : 'Y'
53
+ '\u041A' : 'K'
54
+ '\u049C' : 'G'
55
+ '\u041B' : 'L'
56
+ '\u041C' : 'M'
57
+ '\u041D' : 'N'
58
+ '\u041E' : 'O'
59
+ '\u04E8' : 'ö'
60
+ '\u041F' : 'P'
61
+ '\u0420' : 'R'
62
+ '\u0421' : 'S'
63
+ '\u0422' : 'T'
64
+ '\u0423' : 'U'
65
+ '\u04AE' : 'Ü'
66
+ '\u0424' : 'F'
67
+ '\u0425' : 'X'
68
+ '\u04BA' : 'H'
69
+ '\u0427' : 'Ç'
70
+ '\u04B8' : 'C'
71
+ '\u0428' : 'Ş'
72
+
73
+ '\u0430' : 'a'
74
+ '\u0431' : 'b'
75
+ '\u0432' : 'v'
76
+ '\u0433' : 'g'
77
+ '\u0493' : 'ğ'
78
+ '\u0434' : 'd'
79
+ '\u0435' : 'e'
80
+ '\u04D9' : 'ә'
81
+ '\u0436' : 'j'
82
+ '\u0437' : 'z'
83
+ '\u0438' : 'i'
84
+ '\u044B' : 'ı'
85
+ '\u0458' : 'y'
86
+ '\u043A' : 'k'
87
+ '\u049D' : 'g'
88
+ '\u043B' : 'l'
89
+ '\u043C' : 'm'
90
+ '\u043D' : 'n'
91
+ '\u043E' : 'o'
92
+ '\u04E9' : 'ö'
93
+ '\u043F' : 'p'
94
+ '\u0440' : 'r'
95
+ '\u0441' : 's'
96
+ '\u0442' : 't'
97
+ '\u0443' : 'u'
98
+ '\u04AF' : 'ü'
99
+ '\u0444' : 'f'
100
+ '\u0445' : 'x'
101
+ '\u04BB' : 'h'
102
+ '\u0447' : 'ç'
103
+ '\u04B9' : 'c'
104
+ '\u0448' : 'ş'
@@ -0,0 +1,285 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1979
4
+ language: bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: United States Board on Geographic Names Foreign Names Committee Staff, 1994. Romanization Systems and Roman-Script Spelling Conventions, p. 23.
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811510/ROMANIZATION_OF_BELARUSIAN.pdf
9
+ creation_date: 1979
10
+ description: |
11
+ The BGN/PCGN system for Belarusian (formerly referred to as Byelorussian) was designed for use in
12
+ romanizing names written in the Belarusian Cyrillic alphabet. The Belarusian alphabet contains three
13
+ characters not present in the Russian alphabet: і, ў, and ’.
14
+
15
+ notes:
16
+ - The character sequences зг, кг, сг, тс and цг and may be romanized z·h, k·h, s·h, t·s and ts·h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ш, ц, and the character sequence тш
17
+ - All apostrophes appearing in romanization are Unicode encoding 2019.
18
+
19
+ tests:
20
+ - source: Антон
21
+ expected: Anton
22
+ - source: Вілейка
23
+ expected: Vilyeyka
24
+ - source: Брэст
25
+ expected: Brest
26
+ - source: Дубна
27
+ expected: Dubna
28
+ - source: Віцебск
29
+ expected: Vitsyebsk
30
+ - source: Асіповічы
31
+ expected: Asipovichy
32
+ - source: Гродна
33
+ expected: Hrodna
34
+ - source: Брагін
35
+ expected: Brahin
36
+ - source: Добруш
37
+ expected: Dobrush
38
+ - source: Ліда
39
+ expected: Lida
40
+ - source: Гомель
41
+ expected: Homyel’
42
+ - source: Беліца
43
+ expected: Byelitsa
44
+ - source: Ёдкавічы
45
+ expected: Yodkavichy
46
+ - source: Нёман
47
+ expected: Nyoman
48
+ - source: Жлобін
49
+ expected: Zhlobin
50
+ - source: Ружаны
51
+ expected: Ruzhany
52
+ - source: Зоя
53
+ expected: Zoya
54
+ - source: князь
55
+ expected: knyaz’
56
+ - source: Ігнат
57
+ expected: Ihnat
58
+ - source: Мінск
59
+ expected: Minsk
60
+ - source: Йосель
61
+ expected: Yosyel’
62
+ - source: Койданава
63
+ expected: Koydanava
64
+ - source: Крапіўна
65
+ expected: Krapiwna
66
+ - source: Менск
67
+ expected: Myensk
68
+ - source: Лаўна
69
+ expected: Lawna
70
+ - source: Лёсік
71
+ expected: Lyosik
72
+ - source: Купала
73
+ expected: Kupala
74
+ - source: Вілейка
75
+ expected: Vilyeyka
76
+ - source: Міхал
77
+ expected: Mikhal
78
+ - source: Вільня
79
+ expected: Vil’nya
80
+ - source: Лепель
81
+ expected: Lyepyel’
82
+ - source: Магілёў
83
+ expected: Mahilyow
84
+ - source: Няміга
85
+ expected: Nyamiha
86
+ - source: Наваградак
87
+ expected: Navahradak
88
+ - source: Баранавічы
89
+ expected: Baranavichy
90
+ - source: Орша
91
+ expected: Orsha
92
+ - source: Востраў
93
+ expected: Vostraw
94
+ - source: Пінск
95
+ expected: Pinsk
96
+ - source: Дняпро
97
+ expected: Dnyapro
98
+ - source: Рагачоў
99
+ expected: Rahachow
100
+ - source: Сураж
101
+ expected: Surazh
102
+ - source: Смаляны
103
+ expected: Smalyany
104
+ - source: Арэса
105
+ expected: Aresa
106
+ - source: Рось
107
+ expected: Ros’
108
+ - source: Талочын
109
+ expected: Talochyn
110
+ - source: Масты
111
+ expected: Masty
112
+ - source: Уладзімір
113
+ expected: Uladzimir
114
+ - source: Бабруйск
115
+ expected: Babruysk
116
+ - source: Быхаў
117
+ expected: Bykhaw
118
+ - source: Воўпа
119
+ expected: Vowpa
120
+ - source: Іўе
121
+ expected: Iwye
122
+ - source: Фолюш
123
+ expected: Folyush
124
+ - source: фортка
125
+ expected: fortka
126
+ - source: Хатынь
127
+ expected: Khatyn’
128
+ - source: Быхаў
129
+ expected: Bykhaw
130
+ - source: Ганцавічы
131
+ expected: Hantsavichy
132
+ - source: Стоўбцы
133
+ expected: Stowbtsy
134
+ - source: цьмяны
135
+ expected: ts’myany
136
+ - source: мясцовы
137
+ expected: myastsovy
138
+ - source: Астравец
139
+ expected: Astravyets
140
+ - source: Прыпяць
141
+ expected: Prypyats’
142
+ - source: Чэрыкаў
143
+ expected: Cherykaw
144
+ - source: Шчара
145
+ expected: Shchara
146
+ - source: Нарач
147
+ expected: Narach
148
+ - source: Шклоў
149
+ expected: Shklow
150
+ - source: Ашмяны
151
+ expected: Ashmyany
152
+ - source: Ыттык-Кёль
153
+ expected: Yttyk-Kyol’
154
+ - source: Кобрын
155
+ expected: Kobryn
156
+ - source: Солы
157
+ expected: Soly
158
+ - source: Копысь
159
+ expected: Kopys’
160
+ - source: рунь
161
+ expected: run’
162
+ - source: Эйсманты
163
+ expected: Eysmanty
164
+ - source: Крэва
165
+ expected: Kreva
166
+ - source: Юры
167
+ expected: Yury
168
+ - source: уюн
169
+ expected: uyun
170
+ - source: Язэп
171
+ expected: Yazep
172
+ - source: Івянец
173
+ expected: Ivyanyets
174
+ - source: з’езд
175
+ expected: z”yezd
176
+ - source: Вялiкiя Вераб’евічы
177
+ expected: Vyalikiya Vyerab”yevichy
178
+ - source: Дзям’янаўцы
179
+ expected: Dzyam”yanawtsy
180
+ - source: Задвор’е
181
+ expected: Zadvor”ye
182
+ - source: Гезгалы
183
+ expected: Hyez·haly
184
+ - source: Вадасховішча Гезгальскае
185
+ expected: Vadaskhovishcha Hyez·hal’skaye
186
+
187
+ map:
188
+ postrules:
189
+ - pattern: '\u042C' # Ь
190
+ result: "\u2019"
191
+ - pattern: '\u044C' # ь
192
+ result: "\u2019"
193
+ # Per documentation those rules are optional
194
+ rules:
195
+ - pattern: \u0417\u0413 # ЗГ
196
+ result: "Z\u00B7H" # Z·H
197
+ - pattern: \u0437\u0433 # зг
198
+ result: "z\u00B7h" # z·h
199
+ - pattern: \u041A\u0413 # КГ
200
+ result: "K\u00B7H" # K·H
201
+ - pattern: \u043A\u0433 # кг
202
+ result: "k\u00B7h" # k·h
203
+ - pattern: \u0421\u0413 # СГ
204
+ result: "S\u00B7H" # S·H
205
+ - pattern: \u0441\u0433 # сг
206
+ result: "s\u00B7h" # s·h
207
+ - pattern: \u0422\u0421 # ТС
208
+ result: "T\u00B7S" # T·S
209
+ - pattern: \u0442\u0441 # тс
210
+ result: "t\u00B7s" # t·s
211
+ - pattern: \u0426\u0413 # ЦГ
212
+ result: "TS\u00B7H" # TS·H
213
+ - pattern: \u0446\u0433 # цг
214
+ result: "ts\u00B7h" # ts·h
215
+
216
+ characters:
217
+ '\u00B4' : "\u201D" # apostrophe according to spec
218
+ '\u02BC' : "\u201D" # apostrophe according to spec
219
+ '\u2019' : "\u201D" # apostrophe in actual examples
220
+
221
+ '\u0410' : 'A' # A
222
+ '\u0411' : 'B' # Б
223
+ '\u0412' : 'V' # B
224
+ '\u0413' : 'H' # Г
225
+ '\u0414' : 'D' # Д
226
+ '\u0415' : 'Ye' # Е
227
+ '\u0401' : 'Yo' # Ё
228
+ '\u0416' : 'Zh' # Ж
229
+ '\u0417' : 'Z' # З
230
+ '\u0406' : 'I' # І
231
+ '\u0419' : 'Y' # Й
232
+ '\u041A' : 'K' # К
233
+ '\u041B' : 'L' # Л
234
+ '\u041C' : 'M' # М
235
+ '\u041D' : 'N' # Н
236
+ '\u041E' : 'O' # О
237
+ '\u041F' : 'P' # П
238
+ '\u0420' : 'R' # Р
239
+ '\u0421' : 'S' # С
240
+ '\u0422' : 'T' # Т
241
+ '\u0423' : 'U' # У
242
+ '\U040E' : 'W' # Ў
243
+ '\u0424' : 'F' # Ф
244
+ '\u0425' : 'Kh' # Х
245
+ '\u0426' : 'Ts' # Ц
246
+ '\u0427' : 'Ch' # Ч
247
+ '\u0428' : 'Sh' # Ш
248
+ '\u042B' : 'Y' # Ы
249
+ '\u042D' : 'E' # Э
250
+ '\u042E' : 'Yu' # Ю
251
+ '\u042F' : 'Ya' # Я
252
+ '\u0490' : 'G' # Ґ
253
+
254
+ '\u0430' : 'a' # а
255
+ '\u0431' : 'b' # б
256
+ '\u0432' : 'v' # в
257
+ '\u0433' : 'h' # г
258
+ '\u0434' : 'd' # д
259
+ '\u0435' : 'ye' # е
260
+ '\u0451' : 'yo' # ё
261
+ '\u0436' : 'zh' # ж
262
+ '\u0437' : 'z' # з
263
+ '\u0456' : 'i' # і
264
+ '\u0439' : 'y' # й
265
+ '\u043A' : 'k' # к
266
+ '\u043B' : 'l' # л
267
+ '\u043C' : 'm' # м
268
+ '\u043D' : 'n' # н
269
+ '\u043E' : 'o' # о
270
+ '\u043F' : 'p' # п
271
+ '\u0440' : 'r' # р
272
+ '\u0441' : 's' # с
273
+ '\u0442' : 't' # т
274
+ '\u0443' : 'u' # у
275
+ '\u045E' : 'w' # ў
276
+ '\u0444' : 'f' # ф
277
+ '\u0445' : 'kh' # х
278
+ '\u0446' : 'ts' # ц
279
+ '\u0447' : 'ch' # ч
280
+ '\u0448' : 'sh' # ш
281
+ '\u044B' : 'y' # ы
282
+ '\u044D' : 'e' # э
283
+ '\u044E' : 'yu' # ю
284
+ '\u044F' : 'ya' # я
285
+ '\u0491' : 'g' # ґ