interscript 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/bin/interscript +36 -17
  4. data/bin/rspec +29 -0
  5. data/bin/setup +8 -0
  6. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  7. data/lib/g2pwrapper.py +34 -0
  8. data/lib/interscript-opal.rb +2 -0
  9. data/lib/interscript.rb +138 -38
  10. data/lib/interscript/command.rb +28 -0
  11. data/lib/interscript/fs.rb +69 -0
  12. data/lib/interscript/mapping.rb +142 -0
  13. data/lib/interscript/opal.rb +23 -0
  14. data/lib/interscript/opal/maps.js.erb +7 -0
  15. data/lib/interscript/opal_map_translate.rb +12 -0
  16. data/lib/interscript/version.rb +1 -1
  17. data/lib/model-7 +0 -0
  18. data/lib/tha-pt-b-7 +0 -0
  19. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  20. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
  21. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  22. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  23. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  24. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  25. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  26. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  27. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  28. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  29. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  30. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  31. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +222 -0
  32. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  33. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  34. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  35. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  36. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  37. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  38. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +175 -0
  39. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
  40. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  41. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  42. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  43. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  44. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  45. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
  46. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  47. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
  48. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  49. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  50. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
  51. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  52. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  53. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  54. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  55. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  56. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  57. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  58. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  59. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  60. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  61. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +93 -0
  62. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
  63. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  64. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +163 -0
  65. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  66. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  67. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  68. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  69. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  70. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  71. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  72. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  73. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  74. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  75. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  76. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  77. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  78. data/maps/icao-bel-Cyrl-Latn-9303.yaml +141 -0
  79. data/maps/icao-bul-Cyrl-Latn-9303.yaml +122 -0
  80. data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
  81. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
  82. data/maps/icao-per-Arab-Latn-9303.yaml +104 -0
  83. data/maps/icao-rus-Cyrl-Latn-9303.yaml +118 -0
  84. data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
  85. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +120 -0
  86. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  87. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  88. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  89. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +272 -0
  90. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  91. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  92. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  93. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  94. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  95. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  96. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  97. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  98. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +110 -0
  99. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  100. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  101. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  102. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  103. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  104. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  105. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  106. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  107. data/maps/odni-mkd-cyrl-latn-2015.yaml +122 -0
  108. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  109. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  110. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  111. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  112. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  113. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  114. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +167 -0
  115. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  116. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  117. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  118. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  119. data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
  120. data/maps/ses-ara-arab-latn-1930.yaml +275 -0
  121. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  122. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  123. data/maps/un-ara-Arab-Latn-1971.yaml +127 -0
  124. data/maps/un-ara-Arab-Latn-1972.yaml +152 -0
  125. data/maps/un-ara-Arab-Latn-2017.yaml +383 -0
  126. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  127. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  128. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  129. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  130. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  131. data/maps/un-mon-Mong-Latn-2013.yaml +93 -0
  132. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  133. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  134. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  135. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  136. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  137. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  138. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  139. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  140. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  141. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  142. data/spec/interscript/mapping_spec.rb +42 -0
  143. data/spec/interscript_spec.rb +26 -0
  144. data/spec/spec_helper.rb +3 -0
  145. metadata +295 -11
@@ -0,0 +1,166 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2005
4
+ language: srp
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF SERBIAN, BGN/PCGN 2005 System
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816783/TABLE_OF_CORRESPONDENCES_FOR_SERBIAN.pdf
9
+ creation_date: 2005
10
+ confirmation_date: 2019-06
11
+ description: |
12
+ The tabulation below reflects the Serbian Cyrillic alphabet and the standard Roman script equivalents
13
+ used in both Serbia and Montenegro.
14
+
15
+ notes:
16
+ - The Serbian Cyrillic lowercase italic Д may sometimes be seen as g.
17
+ There is no specific Unicode encoding for this variant form so a comparable character
18
+ has been used here for illustrative purposes.
19
+
20
+ - The digraph dj(Dj) will occasionally be found as an alternative form of đ(Đ).
21
+
22
+ - The Serbian Cyrillic lowercase italic П may sometimes be seen as ӣ.
23
+ There is no specific Unicode encoding for this variant form so a comparable character
24
+ has been used here for illustrative purposes.
25
+
26
+ - The Serbian Cyrillic lowercase italic Т may sometimes be seen as w.
27
+ There is no specific Unicode encoding for this variant form so a comparable character
28
+ has been used here for illustrative purposes.
29
+
30
+ - |
31
+ An inventory of letter-diacritic combinations, with their Unicode encoding,
32
+ in addition to the unmodified letters of the basic Roman script is:
33
+ | Đ (U+0110) | đ (U+0111) |
34
+ | Ž (U+017D) | ž (U+017E) |
35
+ | Lj (U+01C8)* | lj (U+01C9)* |
36
+ | Ć (U+0106) | ć (U+0107) |
37
+ | Dž (U+01C5)* | dž (U+01C6)* |
38
+ | Š (U+0160) | š (U+0161) |
39
+ * Note that these characters can also be reproduced with individual letters (e.g. l+j).
40
+
41
+ - The Roman-script columns show only lowercase forms but, when applying the table,
42
+ uppercase and lowercase Roman letters as appropriate should be used.
43
+
44
+ tests:
45
+ - source: Шупља Стена
46
+ expected: Šuplja Stena
47
+ - source: Чукарица
48
+ expected: Čukarica
49
+ - source: Црна Трава
50
+ expected: Crna Trava
51
+ - source: Херцег Нови
52
+ expected: Herceg Novi
53
+ - source: Улцињ
54
+ expected: Ulcinj
55
+ - source: Ужице
56
+ expected: Užice
57
+ - source: Тресаначка Река
58
+ expected: Tresanačka Reka
59
+ - source: Сјеница
60
+ expected: Sjenica
61
+ - source: Рожаје
62
+ expected: Rožaje
63
+ - source: Пљевља
64
+ expected: Pljevlja
65
+ - source: Оџаци
66
+ expected: Odžaci
67
+ - source: Никшић
68
+ expected: Nikšić
69
+ - source: Медвеђа
70
+ expected: Medveđa
71
+ - source: Лозница
72
+ expected: Loznica
73
+ - source: Књажевац
74
+ expected: Knjaževac
75
+ - source: Зрењанин
76
+ expected: Zrenjanin
77
+ - source: Житорађа
78
+ expected: Žitorađa
79
+ - source: Ервеник
80
+ expected: Ervenik
81
+ - source: Доње Љупче
82
+ expected: Donje Ljupče
83
+ - source: Гусиње
84
+ expected: Gusinje
85
+ - source: ГУСИЊЕ
86
+ expected: GUSINJE
87
+ - source: Врњачка Бања
88
+ expected: Vrnjačka Banja
89
+ - source: Бијело Поље
90
+ expected: Bijelo Polje
91
+ - source: Алибунар
92
+ expected: Alibunar
93
+
94
+ map:
95
+ postrules:
96
+ #LJ
97
+ - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
98
+ result: "LJ"
99
+ #NJ
100
+ - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
101
+ result: "NJ"
102
+ #DŽ
103
+ - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
104
+ result: "DŽ"
105
+
106
+ characters:
107
+ "\u0410": "A"
108
+ "\u0411": "B"
109
+ "\u0412": "V"
110
+ "\u0413": "G"
111
+ "\u0414": "D"
112
+ "\u0402": "\u0110" # Đ
113
+ "\u0415": "E"
114
+ "\u0416": "\u005a\u030c" # Ž
115
+ "\u0417": "Z"
116
+ "\u0418": "I"
117
+ "\u0408": "J"
118
+ "\u041A": "K"
119
+ "\u041B": "L"
120
+ "\u0409": "Lj"
121
+ "\u041C": "M"
122
+ "\u041D": "N"
123
+ "\u040A": "Nj"
124
+ "\u041E": "O"
125
+ "\u041F": "P"
126
+ "\u0420": "R"
127
+ "\u0421": "S"
128
+ "\u0422": "T"
129
+ "\u040B": "\u0043\u0301" # Ć
130
+ "\u0423": "U"
131
+ "\u0424": "F"
132
+ "\u0425": "H"
133
+ "\u0426": "C"
134
+ "\u0427": "\u0043\u030c" # Č
135
+ "\u040F": "D\u007a\u030c" # Dž
136
+ "\u0428": "\u0053\u030c" # Š
137
+ "\u0430": "a"
138
+ "\u0431": "b"
139
+ "\u0432": "v"
140
+ "\u0433": "g"
141
+ "\u0434": "d"
142
+ "\u0452": "\u0111" # đ
143
+ "\u0435": "e"
144
+ "\u0436": "\u007a\u030c" # ž
145
+ "\u0437": "z"
146
+ "\u0438": "i"
147
+ "\u0458": "j"
148
+ "\u043A": "k"
149
+ "\u043B": "l"
150
+ "\u0459": "lj"
151
+ "\u043C": "m"
152
+ "\u043D": "n"
153
+ "\u045A": "nj"
154
+ "\u043E": "o"
155
+ "\u043F": "p"
156
+ "\u0440": "r"
157
+ "\u0441": "s"
158
+ "\u0442": "t"
159
+ "\u045B": "\u0063\u0301" # ć́
160
+ "\u0443": "u"
161
+ "\u0444": "f"
162
+ "\u0445": "h"
163
+ "\u0446": "c"
164
+ "\u0447": "\u0063\u030c" # č
165
+ "\u045F": "d\u007a\u030c" # dž
166
+ "\u0448": "\u0073\u030c" # š
@@ -0,0 +1,163 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 1965
4
+ language: ukr
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BGN/PCGN 1965 System
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
9
+ creation_date: 1947
10
+ confirmation_date: 2019-06
11
+ description: |
12
+ The BGN/PCGN system for Ukrainian was designed for use in romanizing
13
+ names written in the Ukrainian alphabet. The Ukrainian alphabet
14
+ contains five characters not present in the Russian alphabet: ґ, є, і,
15
+ ї, and ’.
16
+
17
+ notes:
18
+ - The character sequences з г, к г, с г, т с and ц г and may be romanized z∙h, k∙h, s∙h, t∙s and ts∙h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ц, ш, and the character sequence тш.
19
+ - All apostrophes appearing in romanization are Unicode encoding 2019.
20
+ - The Roman‐script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
21
+
22
+ tests:
23
+ - source: Авдіївська Міськрада
24
+ expected: Avdiyivs’ka Mis’krada
25
+ - source: Бабаї
26
+ expected: Babayi
27
+ - source: Віленька
28
+ expected: Vilen’ka
29
+ - source: Гагарінський Район
30
+ expected: Haharins’kyy Rayon
31
+ - source: Довбушева Криниця
32
+ expected: Dovbusheva Krynytsya
33
+ - source: Дідівщина
34
+ expected: Didivshchyna
35
+ - source: Економічна
36
+ expected: Ekonomichna
37
+ - source: Єфросинівка
38
+ expected: Yefrosynivka
39
+ - source: Жигуліна Роща
40
+ expected: Zhyhulina Roshcha
41
+ - source: Загір’я
42
+ expected: Zahir”ya
43
+ - source: З’єднувальний Канал
44
+ expected: Z”yednuval’nyy Kanal
45
+ - source: Ивахи
46
+ expected: Yvakhy
47
+ - source: Івано-Франківська Міськрада
48
+ expected: Ivano-Frankivs’ka Mis’krada
49
+ - source: Їжаківка
50
+ expected: Yizhakivka
51
+ - source: Йосиповичі
52
+ expected: Yosypovychi
53
+ - source: Кабичівка
54
+ expected: Kabychivka
55
+ - source: Лазуровий Провулок
56
+ expected: Lazurovyy Provulok
57
+ - source: Мала Сейдеминуха
58
+ expected: Mala Seydemynukha
59
+ - source: Нагірний
60
+ expected: Nahirnyy
61
+ - source: Овер’янівське Озеро
62
+ expected: Over”yanivs’ke Ozero
63
+ - source: Павлопільське Водосховище
64
+ expected: Pavlopil’s’ke Vodoskhovyshche
65
+ - source: Приґородний
66
+ expected: Prygorodnyy
67
+ - source: Радгосп Правда
68
+ expected: Radhosp Pravda
69
+ - source: Садово-Хрустальненський
70
+ expected: Sadovo-Khrustal’nens’kyy
71
+ - source: Таратутине
72
+ expected: Taratutyne
73
+ - source: Улу-Узень
74
+ expected: Ulu-Uzen’
75
+ - source: Христофорівка
76
+ expected: Khrystoforivka
77
+ - source: Центральна Вулиця
78
+ expected: Tsentral’na Vulytsya
79
+ - source: Чайковичі
80
+ expected: Chaykovychi
81
+ - source: Шалаші
82
+ expected: Shalashi
83
+ - source: Щербинівка
84
+ expected: Shcherbynivka
85
+ - source: Южноукраїнська Міськрада
86
+ expected: Yuzhnoukrayins’ka Mis’krada
87
+ - source: Ясениця
88
+ expected: Yasenytsya
89
+
90
+ map:
91
+ rules:
92
+ - pattern: \b\u2019\b # ’ in the middle of a word -> ”
93
+ result: "\u201d"
94
+
95
+ characters:
96
+ "\u0430": 'a'
97
+ "\u0431": 'b'
98
+ "\u0432": 'v'
99
+ "\u0433": 'h'
100
+ "\u0434": 'd'
101
+ "\u0435": 'e'
102
+ "\u0436": 'zh'
103
+ "\u0437": 'z'
104
+ "\u0438": 'y'
105
+ "\u0439": 'y'
106
+ "\u043a": 'k'
107
+ "\u043b": 'l'
108
+ "\u043c": 'm'
109
+ "\u043d": 'n'
110
+ "\u043e": 'o'
111
+ "\u043f": 'p'
112
+ "\u0440": 'r'
113
+ "\u0441": 's'
114
+ "\u0442": 't'
115
+ "\u0443": 'u'
116
+ "\u0444": 'f'
117
+ "\u0445": 'kh'
118
+ "\u0446": 'ts'
119
+ "\u0447": 'ch'
120
+ "\u0448": 'sh'
121
+ "\u0449": 'shch'
122
+ "\u044c": "\u2019"
123
+ "\u044e": 'yu'
124
+ "\u044f": 'ya'
125
+ "\u0454": 'ye'
126
+ "\u0456": 'i'
127
+ "\u0457": 'yi'
128
+ "\u0491": 'g'
129
+ "\ufeff": ' '
130
+ "\u0404": 'Ye'
131
+ "\u0406": 'I'
132
+ "\u0407": 'Yi'
133
+ "\u0410": 'A'
134
+ "\u0411": 'B'
135
+ "\u0412": 'V'
136
+ "\u0413": 'H'
137
+ "\u0414": 'D'
138
+ "\u0415": 'E'
139
+ "\u0416": 'Zh'
140
+ "\u0417": 'Z'
141
+ "\u0418": 'Y'
142
+ "\u0419": 'Y'
143
+ "\u041a": 'K'
144
+ "\u041b": 'L'
145
+ "\u041c": 'M'
146
+ "\u041d": 'N'
147
+ "\u041e": 'O'
148
+ "\u041f": 'P'
149
+ "\u0420": 'R'
150
+ "\u0421": 'S'
151
+ "\u0422": 'T'
152
+ "\u0423": 'U'
153
+ "\u0424": 'F'
154
+ "\u0425": 'Kh'
155
+ "\u0426": 'Ts'
156
+ "\u0427": 'Ch'
157
+ "\u0428": 'Sh'
158
+ "\u0429": 'Shch'
159
+ "\u042c": "\u2019"
160
+ "\u042e": 'Yu'
161
+ "\u042f": 'Ya'
162
+ "\u0490": 'G'
163
+
@@ -0,0 +1,208 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2019
4
+ language: ukr
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BGN/PCGN 2019 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/864314/ROMANIZATION_OF_UKRAINIAN.pdf
9
+ creation_date: 2019
10
+ confirmation_date: 2020-01
11
+ description: |
12
+ The BGN/PCGN system for Ukrainian was designed for use in romanizing names written
13
+ in the Ukrainian alphabet. It is an adoption of the Ukrainian national system in use
14
+ since 2010, and supersedes the BGN/PCGN 1965 System for Ukrainian.
15
+
16
+ notes:
17
+ - |
18
+ The 2019 system was adopted by BGN and PCGN after monitoring a good level of implementation
19
+ of the national system within Ukraine. Note, however, that this system is not recommended for
20
+ reverse transliteration; take caution when attempting to convert a romanized name back into Ukrainian.
21
+ This system also lacks the methodology outlined in the 1965 System to provide additional
22
+ differentiation between digraphs and individual character sequences.
23
+ For example, unlike the 1965 System, the 2019 System doesn’t differentiate the special character
24
+ sequences зг, кг, сг, тс, and тсг (previously romanized as z∙h, k∙h, s∙h, t∙s, and ts∙h)
25
+ from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render
26
+ the characters ж, х, ш, ц and the character sequence тш.
27
+ - To use the keyboard Unicode function, hold ALT and enter in sequence listed in the table.
28
+ - The character sequence З Г, previously romanized as zh, is romanized zgh under the 2019 system.
29
+ - These characters differ significantly in romanization from the BGN/PCGN 1965 system.
30
+
31
+ tests:
32
+ - source: Алушта
33
+ expected: Alushta
34
+ - source: Борщагівка
35
+ expected: Borshchahivka
36
+ - source: Вишгород
37
+ expected: Vyshhorod
38
+ - source: Гадяч
39
+ expected: Hadiach
40
+ - source: Згорани
41
+ expected: Zghorany
42
+ - source: Ґалаґан
43
+ expected: Galagan
44
+ - source: Дон
45
+ expected: Don
46
+ - source: Рівне
47
+ expected: Rivne
48
+ - source: Єнакієве
49
+ expected: Yenakiieve
50
+ - source: Наєнко
51
+ expected: Naienko
52
+ - source: Житомир
53
+ expected: Zhytomyr
54
+ - source: Запоріжжя
55
+ expected: Zaporizhzhia
56
+ - source: Закарпаття
57
+ expected: Zakarpattia
58
+ - source: Медвин
59
+ expected: Medvyn
60
+ - source: Іршава
61
+ expected: Irshava
62
+ - source: Їжакевич
63
+ expected: Yizhakevych
64
+ - source: Кадіївка
65
+ expected: Kadiivka
66
+ - source: Йосипівка
67
+ expected: Yosypivka
68
+ - source: Стрий
69
+ expected: Stryi
70
+ - source: Київ
71
+ expected: Kyiv
72
+ - source: Лебедин
73
+ expected: Lebedyn
74
+ - source: Миколаїв
75
+ expected: Mykolaiv
76
+ - source: Ніжин
77
+ expected: Nizhyn
78
+ - source: Одеса
79
+ expected: Odesa
80
+ - source: Полтава
81
+ expected: Poltava
82
+ - source: Ромни
83
+ expected: Romny
84
+ - source: Суми
85
+ expected: Sumy
86
+ - source: Тетерів
87
+ expected: Teteriv
88
+ - source: Ужгород
89
+ expected: Uzhhorod
90
+ - source: Фастів
91
+ expected: Fastiv
92
+ - source: Харків
93
+ expected: Kharkiv
94
+ - source: Біла Церква
95
+ expected: Bila Tserkva
96
+ - source: Чернівці
97
+ expected: Chernivtsi
98
+ - source: Шостка
99
+ expected: Shostka
100
+ - source: Гоща
101
+ expected: Hoshcha
102
+ - source: Русь
103
+ expected: Rus
104
+ - source: Юрій
105
+ expected: Yurii
106
+ - source: Крюківка
107
+ expected: Kriukivka
108
+ - source: Яготин
109
+ expected: Yahotyn
110
+ - source: Ічня
111
+ expected: Ichnia
112
+ - source: Знам’янка
113
+ expected: Znamianka
114
+
115
+ map:
116
+ rules:
117
+ - pattern: (?<=З|з)(Г|г)
118
+ result: gh
119
+ - pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
120
+ result: Ye
121
+ - pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
122
+ result: ye
123
+ - pattern: (?<!\b\u2019)\b\u0407 # Ї in initial position -> Yi
124
+ result: Yi
125
+ - pattern: (?<!\b\u2019)\b\u0457 # ї in initial position -> yi
126
+ result: yi
127
+ - pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
128
+ result: "Y"
129
+ - pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
130
+ result: "y"
131
+ - pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
132
+ result: Yu
133
+ - pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
134
+ result: yu
135
+ - pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
136
+ result: Ya
137
+ - pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
138
+ result: ya
139
+ - pattern: \b\u2019\b # remove ’
140
+ result: ""
141
+
142
+ characters:
143
+ "\u0410": "A" # А
144
+ "\u0411": "B" # Б
145
+ "\u0412": "V" # В
146
+ "\u0413": "H" # Г
147
+ "\u0490": "G" # Ґ
148
+ "\u0414": "D" # Д
149
+ "\u0415": "E" # Е
150
+ "\u0404": "Ie" # Є
151
+ "\u0416": "Zh" # Ж
152
+ "\u0417": "Z" # З
153
+ "\u0418": "Y" # И
154
+ "\u0406": "I" # І
155
+ "\u0407": "I" # Ї
156
+ "\u0419": "I" # Й
157
+ "\u041a": "K" # К
158
+ "\u041b": "L" # Л
159
+ "\u041c": "M" # М
160
+ "\u041d": "N" # Н
161
+ "\u041e": "O" # О
162
+ "\u041f": "P" # П
163
+ "\u0420": "R" # Р
164
+ "\u0421": "S" # С
165
+ "\u0422": "T" # Т
166
+ "\u0423": "U" # У
167
+ "\u0424": "F" # Ф
168
+ "\u0425": "Kh" # Х
169
+ "\u0426": "Ts" # Ц
170
+ "\u0427": "Ch" # Ч
171
+ "\u0428": "Sh" # Ш
172
+ "\u0429": "Shch" # Щ
173
+ "\u042e": "Iu" # Ю
174
+ "\u042f": "Ia" # Я
175
+ "\u042c": "" # Ь
176
+ "\u0430": "a" # а
177
+ "\u0431": "b" # б
178
+ "\u0432": "v" # в
179
+ "\u0433": "h" # г
180
+ "\u0491": "g" # ґ
181
+ "\u0434": "d" # д
182
+ "\u0435": "e" # е
183
+ "\u0454": "ie" # є
184
+ "\u0436": "zh" # ж
185
+ "\u0437": "z" # з
186
+ "\u0438": "y" # и
187
+ "\u0456": "i" # і
188
+ "\u0457": "i" # ї
189
+ "\u0439": "i" # й
190
+ "\u043a": "k" # к
191
+ "\u043b": "l" # л
192
+ "\u043c": "m" # м
193
+ "\u043d": "n" # н
194
+ "\u043e": "o" # о
195
+ "\u043f": "p" # п
196
+ "\u0440": "r" # р
197
+ "\u0441": "s" # с
198
+ "\u0442": "t" # т
199
+ "\u0443": "u" # у
200
+ "\u0444": "f" # ф
201
+ "\u0445": "kh" # х
202
+ "\u0446": "ts" # ц
203
+ "\u0447": "ch" # ч
204
+ "\u0448": "sh" # ш
205
+ "\u0449": "shch" # щ
206
+ "\u044e": "iu" # ю
207
+ "\u044f": "ia" # я
208
+ "\u044c": "" # Ь