interscript 0.1.0 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +250 -17
- data/bin/interscript +36 -17
- data/bin/rspec +29 -0
- data/bin/setup +8 -0
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/g2pwrapper.py +34 -0
- data/lib/interscript-opal.rb +2 -0
- data/lib/interscript.rb +138 -38
- data/lib/interscript/command.rb +28 -0
- data/lib/interscript/fs.rb +69 -0
- data/lib/interscript/mapping.rb +142 -0
- data/lib/interscript/opal.rb +23 -0
- data/lib/interscript/opal/maps.js.erb +7 -0
- data/lib/interscript/opal_map_translate.rb +12 -0
- data/lib/interscript/version.rb +1 -1
- data/lib/model-7 +0 -0
- data/lib/tha-pt-b-7 +0 -0
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
- data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
- data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
- data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
- data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
- data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
- data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
- data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
- data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +222 -0
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
- data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
- data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
- data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
- data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
- data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +175 -0
- data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
- data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
- data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
- data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
- data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
- data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
- data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
- data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
- data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +93 -0
- data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +163 -0
- data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
- data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
- data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
- data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
- data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
- data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
- data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
- data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +141 -0
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +122 -0
- data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
- data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
- data/maps/icao-per-Arab-Latn-9303.yaml +104 -0
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +118 -0
- data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +120 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
- data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +272 -0
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
- data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
- data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
- data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
- data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +110 -0
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
- data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
- data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
- data/maps/odni-mkd-cyrl-latn-2015.yaml +122 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
- data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
- data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
- data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +167 -0
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
- data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
- data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
- data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
- data/maps/ses-ara-arab-latn-1930.yaml +275 -0
- data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
- data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
- data/maps/un-ara-Arab-Latn-1971.yaml +127 -0
- data/maps/un-ara-Arab-Latn-1972.yaml +152 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +383 -0
- data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
- data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
- data/maps/un-mon-Mong-Latn-2013.yaml +93 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
- data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
- data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
- data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
- data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
- data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
- data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
- data/spec/interscript/mapping_spec.rb +42 -0
- data/spec/interscript_spec.rb +26 -0
- data/spec/spec_helper.rb +3 -0
- metadata +295 -11
@@ -0,0 +1,166 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 2005
|
4
|
+
language: srp
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: ROMANIZATION OF SERBIAN, BGN/PCGN 2005 System
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816783/TABLE_OF_CORRESPONDENCES_FOR_SERBIAN.pdf
|
9
|
+
creation_date: 2005
|
10
|
+
confirmation_date: 2019-06
|
11
|
+
description: |
|
12
|
+
The tabulation below reflects the Serbian Cyrillic alphabet and the standard Roman script equivalents
|
13
|
+
used in both Serbia and Montenegro.
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- The Serbian Cyrillic lowercase italic Д may sometimes be seen as g.
|
17
|
+
There is no specific Unicode encoding for this variant form so a comparable character
|
18
|
+
has been used here for illustrative purposes.
|
19
|
+
|
20
|
+
- The digraph dj(Dj) will occasionally be found as an alternative form of đ(Đ).
|
21
|
+
|
22
|
+
- The Serbian Cyrillic lowercase italic П may sometimes be seen as ӣ.
|
23
|
+
There is no specific Unicode encoding for this variant form so a comparable character
|
24
|
+
has been used here for illustrative purposes.
|
25
|
+
|
26
|
+
- The Serbian Cyrillic lowercase italic Т may sometimes be seen as w.
|
27
|
+
There is no specific Unicode encoding for this variant form so a comparable character
|
28
|
+
has been used here for illustrative purposes.
|
29
|
+
|
30
|
+
- |
|
31
|
+
An inventory of letter-diacritic combinations, with their Unicode encoding,
|
32
|
+
in addition to the unmodified letters of the basic Roman script is:
|
33
|
+
| Đ (U+0110) | đ (U+0111) |
|
34
|
+
| Ž (U+017D) | ž (U+017E) |
|
35
|
+
| Lj (U+01C8)* | lj (U+01C9)* |
|
36
|
+
| Ć (U+0106) | ć (U+0107) |
|
37
|
+
| Dž (U+01C5)* | dž (U+01C6)* |
|
38
|
+
| Š (U+0160) | š (U+0161) |
|
39
|
+
* Note that these characters can also be reproduced with individual letters (e.g. l+j).
|
40
|
+
|
41
|
+
- The Roman-script columns show only lowercase forms but, when applying the table,
|
42
|
+
uppercase and lowercase Roman letters as appropriate should be used.
|
43
|
+
|
44
|
+
tests:
|
45
|
+
- source: Шупља Стена
|
46
|
+
expected: Šuplja Stena
|
47
|
+
- source: Чукарица
|
48
|
+
expected: Čukarica
|
49
|
+
- source: Црна Трава
|
50
|
+
expected: Crna Trava
|
51
|
+
- source: Херцег Нови
|
52
|
+
expected: Herceg Novi
|
53
|
+
- source: Улцињ
|
54
|
+
expected: Ulcinj
|
55
|
+
- source: Ужице
|
56
|
+
expected: Užice
|
57
|
+
- source: Тресаначка Река
|
58
|
+
expected: Tresanačka Reka
|
59
|
+
- source: Сјеница
|
60
|
+
expected: Sjenica
|
61
|
+
- source: Рожаје
|
62
|
+
expected: Rožaje
|
63
|
+
- source: Пљевља
|
64
|
+
expected: Pljevlja
|
65
|
+
- source: Оџаци
|
66
|
+
expected: Odžaci
|
67
|
+
- source: Никшић
|
68
|
+
expected: Nikšić
|
69
|
+
- source: Медвеђа
|
70
|
+
expected: Medveđa
|
71
|
+
- source: Лозница
|
72
|
+
expected: Loznica
|
73
|
+
- source: Књажевац
|
74
|
+
expected: Knjaževac
|
75
|
+
- source: Зрењанин
|
76
|
+
expected: Zrenjanin
|
77
|
+
- source: Житорађа
|
78
|
+
expected: Žitorađa
|
79
|
+
- source: Ервеник
|
80
|
+
expected: Ervenik
|
81
|
+
- source: Доње Љупче
|
82
|
+
expected: Donje Ljupče
|
83
|
+
- source: Гусиње
|
84
|
+
expected: Gusinje
|
85
|
+
- source: ГУСИЊЕ
|
86
|
+
expected: GUSINJE
|
87
|
+
- source: Врњачка Бања
|
88
|
+
expected: Vrnjačka Banja
|
89
|
+
- source: Бијело Поље
|
90
|
+
expected: Bijelo Polje
|
91
|
+
- source: Алибунар
|
92
|
+
expected: Alibunar
|
93
|
+
|
94
|
+
map:
|
95
|
+
postrules:
|
96
|
+
#LJ
|
97
|
+
- pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
|
98
|
+
result: "LJ"
|
99
|
+
#NJ
|
100
|
+
- pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
|
101
|
+
result: "NJ"
|
102
|
+
#DŽ
|
103
|
+
- pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
|
104
|
+
result: "DŽ"
|
105
|
+
|
106
|
+
characters:
|
107
|
+
"\u0410": "A"
|
108
|
+
"\u0411": "B"
|
109
|
+
"\u0412": "V"
|
110
|
+
"\u0413": "G"
|
111
|
+
"\u0414": "D"
|
112
|
+
"\u0402": "\u0110" # Đ
|
113
|
+
"\u0415": "E"
|
114
|
+
"\u0416": "\u005a\u030c" # Ž
|
115
|
+
"\u0417": "Z"
|
116
|
+
"\u0418": "I"
|
117
|
+
"\u0408": "J"
|
118
|
+
"\u041A": "K"
|
119
|
+
"\u041B": "L"
|
120
|
+
"\u0409": "Lj"
|
121
|
+
"\u041C": "M"
|
122
|
+
"\u041D": "N"
|
123
|
+
"\u040A": "Nj"
|
124
|
+
"\u041E": "O"
|
125
|
+
"\u041F": "P"
|
126
|
+
"\u0420": "R"
|
127
|
+
"\u0421": "S"
|
128
|
+
"\u0422": "T"
|
129
|
+
"\u040B": "\u0043\u0301" # Ć
|
130
|
+
"\u0423": "U"
|
131
|
+
"\u0424": "F"
|
132
|
+
"\u0425": "H"
|
133
|
+
"\u0426": "C"
|
134
|
+
"\u0427": "\u0043\u030c" # Č
|
135
|
+
"\u040F": "D\u007a\u030c" # Dž
|
136
|
+
"\u0428": "\u0053\u030c" # Š
|
137
|
+
"\u0430": "a"
|
138
|
+
"\u0431": "b"
|
139
|
+
"\u0432": "v"
|
140
|
+
"\u0433": "g"
|
141
|
+
"\u0434": "d"
|
142
|
+
"\u0452": "\u0111" # đ
|
143
|
+
"\u0435": "e"
|
144
|
+
"\u0436": "\u007a\u030c" # ž
|
145
|
+
"\u0437": "z"
|
146
|
+
"\u0438": "i"
|
147
|
+
"\u0458": "j"
|
148
|
+
"\u043A": "k"
|
149
|
+
"\u043B": "l"
|
150
|
+
"\u0459": "lj"
|
151
|
+
"\u043C": "m"
|
152
|
+
"\u043D": "n"
|
153
|
+
"\u045A": "nj"
|
154
|
+
"\u043E": "o"
|
155
|
+
"\u043F": "p"
|
156
|
+
"\u0440": "r"
|
157
|
+
"\u0441": "s"
|
158
|
+
"\u0442": "t"
|
159
|
+
"\u045B": "\u0063\u0301" # ć́
|
160
|
+
"\u0443": "u"
|
161
|
+
"\u0444": "f"
|
162
|
+
"\u0445": "h"
|
163
|
+
"\u0446": "c"
|
164
|
+
"\u0447": "\u0063\u030c" # č
|
165
|
+
"\u045F": "d\u007a\u030c" # dž
|
166
|
+
"\u0448": "\u0073\u030c" # š
|
@@ -0,0 +1,163 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 1965
|
4
|
+
language: ukr
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: BGN/PCGN 1965 System
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
|
9
|
+
creation_date: 1947
|
10
|
+
confirmation_date: 2019-06
|
11
|
+
description: |
|
12
|
+
The BGN/PCGN system for Ukrainian was designed for use in romanizing
|
13
|
+
names written in the Ukrainian alphabet. The Ukrainian alphabet
|
14
|
+
contains five characters not present in the Russian alphabet: ґ, є, і,
|
15
|
+
ї, and ’.
|
16
|
+
|
17
|
+
notes:
|
18
|
+
- The character sequences з г, к г, с г, т с and ц г and may be romanized z∙h, k∙h, s∙h, t∙s and ts∙h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ц, ш, and the character sequence тш.
|
19
|
+
- All apostrophes appearing in romanization are Unicode encoding 2019.
|
20
|
+
- The Roman‐script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
|
21
|
+
|
22
|
+
tests:
|
23
|
+
- source: Авдіївська Міськрада
|
24
|
+
expected: Avdiyivs’ka Mis’krada
|
25
|
+
- source: Бабаї
|
26
|
+
expected: Babayi
|
27
|
+
- source: Віленька
|
28
|
+
expected: Vilen’ka
|
29
|
+
- source: Гагарінський Район
|
30
|
+
expected: Haharins’kyy Rayon
|
31
|
+
- source: Довбушева Криниця
|
32
|
+
expected: Dovbusheva Krynytsya
|
33
|
+
- source: Дідівщина
|
34
|
+
expected: Didivshchyna
|
35
|
+
- source: Економічна
|
36
|
+
expected: Ekonomichna
|
37
|
+
- source: Єфросинівка
|
38
|
+
expected: Yefrosynivka
|
39
|
+
- source: Жигуліна Роща
|
40
|
+
expected: Zhyhulina Roshcha
|
41
|
+
- source: Загір’я
|
42
|
+
expected: Zahir”ya
|
43
|
+
- source: З’єднувальний Канал
|
44
|
+
expected: Z”yednuval’nyy Kanal
|
45
|
+
- source: Ивахи
|
46
|
+
expected: Yvakhy
|
47
|
+
- source: Івано-Франківська Міськрада
|
48
|
+
expected: Ivano-Frankivs’ka Mis’krada
|
49
|
+
- source: Їжаківка
|
50
|
+
expected: Yizhakivka
|
51
|
+
- source: Йосиповичі
|
52
|
+
expected: Yosypovychi
|
53
|
+
- source: Кабичівка
|
54
|
+
expected: Kabychivka
|
55
|
+
- source: Лазуровий Провулок
|
56
|
+
expected: Lazurovyy Provulok
|
57
|
+
- source: Мала Сейдеминуха
|
58
|
+
expected: Mala Seydemynukha
|
59
|
+
- source: Нагірний
|
60
|
+
expected: Nahirnyy
|
61
|
+
- source: Овер’янівське Озеро
|
62
|
+
expected: Over”yanivs’ke Ozero
|
63
|
+
- source: Павлопільське Водосховище
|
64
|
+
expected: Pavlopil’s’ke Vodoskhovyshche
|
65
|
+
- source: Приґородний
|
66
|
+
expected: Prygorodnyy
|
67
|
+
- source: Радгосп Правда
|
68
|
+
expected: Radhosp Pravda
|
69
|
+
- source: Садово-Хрустальненський
|
70
|
+
expected: Sadovo-Khrustal’nens’kyy
|
71
|
+
- source: Таратутине
|
72
|
+
expected: Taratutyne
|
73
|
+
- source: Улу-Узень
|
74
|
+
expected: Ulu-Uzen’
|
75
|
+
- source: Христофорівка
|
76
|
+
expected: Khrystoforivka
|
77
|
+
- source: Центральна Вулиця
|
78
|
+
expected: Tsentral’na Vulytsya
|
79
|
+
- source: Чайковичі
|
80
|
+
expected: Chaykovychi
|
81
|
+
- source: Шалаші
|
82
|
+
expected: Shalashi
|
83
|
+
- source: Щербинівка
|
84
|
+
expected: Shcherbynivka
|
85
|
+
- source: Южноукраїнська Міськрада
|
86
|
+
expected: Yuzhnoukrayins’ka Mis’krada
|
87
|
+
- source: Ясениця
|
88
|
+
expected: Yasenytsya
|
89
|
+
|
90
|
+
map:
|
91
|
+
rules:
|
92
|
+
- pattern: \b\u2019\b # ’ in the middle of a word -> ”
|
93
|
+
result: "\u201d"
|
94
|
+
|
95
|
+
characters:
|
96
|
+
"\u0430": 'a'
|
97
|
+
"\u0431": 'b'
|
98
|
+
"\u0432": 'v'
|
99
|
+
"\u0433": 'h'
|
100
|
+
"\u0434": 'd'
|
101
|
+
"\u0435": 'e'
|
102
|
+
"\u0436": 'zh'
|
103
|
+
"\u0437": 'z'
|
104
|
+
"\u0438": 'y'
|
105
|
+
"\u0439": 'y'
|
106
|
+
"\u043a": 'k'
|
107
|
+
"\u043b": 'l'
|
108
|
+
"\u043c": 'm'
|
109
|
+
"\u043d": 'n'
|
110
|
+
"\u043e": 'o'
|
111
|
+
"\u043f": 'p'
|
112
|
+
"\u0440": 'r'
|
113
|
+
"\u0441": 's'
|
114
|
+
"\u0442": 't'
|
115
|
+
"\u0443": 'u'
|
116
|
+
"\u0444": 'f'
|
117
|
+
"\u0445": 'kh'
|
118
|
+
"\u0446": 'ts'
|
119
|
+
"\u0447": 'ch'
|
120
|
+
"\u0448": 'sh'
|
121
|
+
"\u0449": 'shch'
|
122
|
+
"\u044c": "\u2019"
|
123
|
+
"\u044e": 'yu'
|
124
|
+
"\u044f": 'ya'
|
125
|
+
"\u0454": 'ye'
|
126
|
+
"\u0456": 'i'
|
127
|
+
"\u0457": 'yi'
|
128
|
+
"\u0491": 'g'
|
129
|
+
"\ufeff": ' '
|
130
|
+
"\u0404": 'Ye'
|
131
|
+
"\u0406": 'I'
|
132
|
+
"\u0407": 'Yi'
|
133
|
+
"\u0410": 'A'
|
134
|
+
"\u0411": 'B'
|
135
|
+
"\u0412": 'V'
|
136
|
+
"\u0413": 'H'
|
137
|
+
"\u0414": 'D'
|
138
|
+
"\u0415": 'E'
|
139
|
+
"\u0416": 'Zh'
|
140
|
+
"\u0417": 'Z'
|
141
|
+
"\u0418": 'Y'
|
142
|
+
"\u0419": 'Y'
|
143
|
+
"\u041a": 'K'
|
144
|
+
"\u041b": 'L'
|
145
|
+
"\u041c": 'M'
|
146
|
+
"\u041d": 'N'
|
147
|
+
"\u041e": 'O'
|
148
|
+
"\u041f": 'P'
|
149
|
+
"\u0420": 'R'
|
150
|
+
"\u0421": 'S'
|
151
|
+
"\u0422": 'T'
|
152
|
+
"\u0423": 'U'
|
153
|
+
"\u0424": 'F'
|
154
|
+
"\u0425": 'Kh'
|
155
|
+
"\u0426": 'Ts'
|
156
|
+
"\u0427": 'Ch'
|
157
|
+
"\u0428": 'Sh'
|
158
|
+
"\u0429": 'Shch'
|
159
|
+
"\u042c": "\u2019"
|
160
|
+
"\u042e": 'Yu'
|
161
|
+
"\u042f": 'Ya'
|
162
|
+
"\u0490": 'G'
|
163
|
+
|
@@ -0,0 +1,208 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 2019
|
4
|
+
language: ukr
|
5
|
+
source_script: Cyrl
|
6
|
+
destination_script: Latn
|
7
|
+
name: BGN/PCGN 2019 Agreement
|
8
|
+
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/864314/ROMANIZATION_OF_UKRAINIAN.pdf
|
9
|
+
creation_date: 2019
|
10
|
+
confirmation_date: 2020-01
|
11
|
+
description: |
|
12
|
+
The BGN/PCGN system for Ukrainian was designed for use in romanizing names written
|
13
|
+
in the Ukrainian alphabet. It is an adoption of the Ukrainian national system in use
|
14
|
+
since 2010, and supersedes the BGN/PCGN 1965 System for Ukrainian.
|
15
|
+
|
16
|
+
notes:
|
17
|
+
- |
|
18
|
+
The 2019 system was adopted by BGN and PCGN after monitoring a good level of implementation
|
19
|
+
of the national system within Ukraine. Note, however, that this system is not recommended for
|
20
|
+
reverse transliteration; take caution when attempting to convert a romanized name back into Ukrainian.
|
21
|
+
This system also lacks the methodology outlined in the 1965 System to provide additional
|
22
|
+
differentiation between digraphs and individual character sequences.
|
23
|
+
For example, unlike the 1965 System, the 2019 System doesn’t differentiate the special character
|
24
|
+
sequences зг, кг, сг, тс, and тсг (previously romanized as z∙h, k∙h, s∙h, t∙s, and ts∙h)
|
25
|
+
from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render
|
26
|
+
the characters ж, х, ш, ц and the character sequence тш.
|
27
|
+
- To use the keyboard Unicode function, hold ALT and enter in sequence listed in the table.
|
28
|
+
- The character sequence З Г, previously romanized as zh, is romanized zgh under the 2019 system.
|
29
|
+
- These characters differ significantly in romanization from the BGN/PCGN 1965 system.
|
30
|
+
|
31
|
+
tests:
|
32
|
+
- source: Алушта
|
33
|
+
expected: Alushta
|
34
|
+
- source: Борщагівка
|
35
|
+
expected: Borshchahivka
|
36
|
+
- source: Вишгород
|
37
|
+
expected: Vyshhorod
|
38
|
+
- source: Гадяч
|
39
|
+
expected: Hadiach
|
40
|
+
- source: Згорани
|
41
|
+
expected: Zghorany
|
42
|
+
- source: Ґалаґан
|
43
|
+
expected: Galagan
|
44
|
+
- source: Дон
|
45
|
+
expected: Don
|
46
|
+
- source: Рівне
|
47
|
+
expected: Rivne
|
48
|
+
- source: Єнакієве
|
49
|
+
expected: Yenakiieve
|
50
|
+
- source: Наєнко
|
51
|
+
expected: Naienko
|
52
|
+
- source: Житомир
|
53
|
+
expected: Zhytomyr
|
54
|
+
- source: Запоріжжя
|
55
|
+
expected: Zaporizhzhia
|
56
|
+
- source: Закарпаття
|
57
|
+
expected: Zakarpattia
|
58
|
+
- source: Медвин
|
59
|
+
expected: Medvyn
|
60
|
+
- source: Іршава
|
61
|
+
expected: Irshava
|
62
|
+
- source: Їжакевич
|
63
|
+
expected: Yizhakevych
|
64
|
+
- source: Кадіївка
|
65
|
+
expected: Kadiivka
|
66
|
+
- source: Йосипівка
|
67
|
+
expected: Yosypivka
|
68
|
+
- source: Стрий
|
69
|
+
expected: Stryi
|
70
|
+
- source: Київ
|
71
|
+
expected: Kyiv
|
72
|
+
- source: Лебедин
|
73
|
+
expected: Lebedyn
|
74
|
+
- source: Миколаїв
|
75
|
+
expected: Mykolaiv
|
76
|
+
- source: Ніжин
|
77
|
+
expected: Nizhyn
|
78
|
+
- source: Одеса
|
79
|
+
expected: Odesa
|
80
|
+
- source: Полтава
|
81
|
+
expected: Poltava
|
82
|
+
- source: Ромни
|
83
|
+
expected: Romny
|
84
|
+
- source: Суми
|
85
|
+
expected: Sumy
|
86
|
+
- source: Тетерів
|
87
|
+
expected: Teteriv
|
88
|
+
- source: Ужгород
|
89
|
+
expected: Uzhhorod
|
90
|
+
- source: Фастів
|
91
|
+
expected: Fastiv
|
92
|
+
- source: Харків
|
93
|
+
expected: Kharkiv
|
94
|
+
- source: Біла Церква
|
95
|
+
expected: Bila Tserkva
|
96
|
+
- source: Чернівці
|
97
|
+
expected: Chernivtsi
|
98
|
+
- source: Шостка
|
99
|
+
expected: Shostka
|
100
|
+
- source: Гоща
|
101
|
+
expected: Hoshcha
|
102
|
+
- source: Русь
|
103
|
+
expected: Rus
|
104
|
+
- source: Юрій
|
105
|
+
expected: Yurii
|
106
|
+
- source: Крюківка
|
107
|
+
expected: Kriukivka
|
108
|
+
- source: Яготин
|
109
|
+
expected: Yahotyn
|
110
|
+
- source: Ічня
|
111
|
+
expected: Ichnia
|
112
|
+
- source: Знам’янка
|
113
|
+
expected: Znamianka
|
114
|
+
|
115
|
+
map:
|
116
|
+
rules:
|
117
|
+
- pattern: (?<=З|з)(Г|г)
|
118
|
+
result: gh
|
119
|
+
- pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
|
120
|
+
result: Ye
|
121
|
+
- pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
|
122
|
+
result: ye
|
123
|
+
- pattern: (?<!\b\u2019)\b\u0407 # Ї in initial position -> Yi
|
124
|
+
result: Yi
|
125
|
+
- pattern: (?<!\b\u2019)\b\u0457 # ї in initial position -> yi
|
126
|
+
result: yi
|
127
|
+
- pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
|
128
|
+
result: "Y"
|
129
|
+
- pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
|
130
|
+
result: "y"
|
131
|
+
- pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
|
132
|
+
result: Yu
|
133
|
+
- pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
|
134
|
+
result: yu
|
135
|
+
- pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
|
136
|
+
result: Ya
|
137
|
+
- pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
|
138
|
+
result: ya
|
139
|
+
- pattern: \b\u2019\b # remove ’
|
140
|
+
result: ""
|
141
|
+
|
142
|
+
characters:
|
143
|
+
"\u0410": "A" # А
|
144
|
+
"\u0411": "B" # Б
|
145
|
+
"\u0412": "V" # В
|
146
|
+
"\u0413": "H" # Г
|
147
|
+
"\u0490": "G" # Ґ
|
148
|
+
"\u0414": "D" # Д
|
149
|
+
"\u0415": "E" # Е
|
150
|
+
"\u0404": "Ie" # Є
|
151
|
+
"\u0416": "Zh" # Ж
|
152
|
+
"\u0417": "Z" # З
|
153
|
+
"\u0418": "Y" # И
|
154
|
+
"\u0406": "I" # І
|
155
|
+
"\u0407": "I" # Ї
|
156
|
+
"\u0419": "I" # Й
|
157
|
+
"\u041a": "K" # К
|
158
|
+
"\u041b": "L" # Л
|
159
|
+
"\u041c": "M" # М
|
160
|
+
"\u041d": "N" # Н
|
161
|
+
"\u041e": "O" # О
|
162
|
+
"\u041f": "P" # П
|
163
|
+
"\u0420": "R" # Р
|
164
|
+
"\u0421": "S" # С
|
165
|
+
"\u0422": "T" # Т
|
166
|
+
"\u0423": "U" # У
|
167
|
+
"\u0424": "F" # Ф
|
168
|
+
"\u0425": "Kh" # Х
|
169
|
+
"\u0426": "Ts" # Ц
|
170
|
+
"\u0427": "Ch" # Ч
|
171
|
+
"\u0428": "Sh" # Ш
|
172
|
+
"\u0429": "Shch" # Щ
|
173
|
+
"\u042e": "Iu" # Ю
|
174
|
+
"\u042f": "Ia" # Я
|
175
|
+
"\u042c": "" # Ь
|
176
|
+
"\u0430": "a" # а
|
177
|
+
"\u0431": "b" # б
|
178
|
+
"\u0432": "v" # в
|
179
|
+
"\u0433": "h" # г
|
180
|
+
"\u0491": "g" # ґ
|
181
|
+
"\u0434": "d" # д
|
182
|
+
"\u0435": "e" # е
|
183
|
+
"\u0454": "ie" # є
|
184
|
+
"\u0436": "zh" # ж
|
185
|
+
"\u0437": "z" # з
|
186
|
+
"\u0438": "y" # и
|
187
|
+
"\u0456": "i" # і
|
188
|
+
"\u0457": "i" # ї
|
189
|
+
"\u0439": "i" # й
|
190
|
+
"\u043a": "k" # к
|
191
|
+
"\u043b": "l" # л
|
192
|
+
"\u043c": "m" # м
|
193
|
+
"\u043d": "n" # н
|
194
|
+
"\u043e": "o" # о
|
195
|
+
"\u043f": "p" # п
|
196
|
+
"\u0440": "r" # р
|
197
|
+
"\u0441": "s" # с
|
198
|
+
"\u0442": "t" # т
|
199
|
+
"\u0443": "u" # у
|
200
|
+
"\u0444": "f" # ф
|
201
|
+
"\u0445": "kh" # х
|
202
|
+
"\u0446": "ts" # ц
|
203
|
+
"\u0447": "ch" # ч
|
204
|
+
"\u0448": "sh" # ш
|
205
|
+
"\u0449": "shch" # щ
|
206
|
+
"\u044e": "iu" # ю
|
207
|
+
"\u044f": "ia" # я
|
208
|
+
"\u044c": "" # Ь
|