interscript 0.1.4 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (183) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +76 -128
  21. data/lib/interscript/command.rb +6 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -223
  63. data/README.adoc +0 -297
  64. data/bin/rspec +0 -29
  65. data/lib/g2pwrapper.py +0 -34
  66. data/lib/interscript/mapping.rb +0 -125
  67. data/lib/model-7 +0 -0
  68. data/lib/tha-pt-b-7 +0 -0
  69. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  70. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  71. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  72. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  73. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  74. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  75. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  76. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  77. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  78. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  79. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  80. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  81. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  82. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  83. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  84. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  85. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  86. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  87. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  88. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  89. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  90. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  91. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  92. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  93. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  94. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  95. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  96. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  97. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  98. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  99. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  100. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  101. data/maps/bgnpcgn-chn-Hans-Latn-1979.yaml +0 -7456
  102. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  103. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  104. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  105. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  106. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  107. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  108. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  109. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  110. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  111. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  112. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  113. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  114. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  115. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  116. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  117. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  118. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  119. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  120. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  121. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  122. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  123. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  124. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  125. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  126. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  127. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  128. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  129. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  130. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  131. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  132. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  133. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  134. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  135. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  136. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  137. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  138. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  139. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  140. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  141. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  142. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  143. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  144. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  145. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  146. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  147. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  148. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  149. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  150. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  151. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  152. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  153. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  154. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  155. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  156. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  157. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  158. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  159. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  160. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  161. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  162. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  163. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  164. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  165. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  166. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  167. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  168. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  169. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  170. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  171. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  172. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  173. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  174. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  175. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  176. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  177. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  178. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  179. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  180. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  181. data/spec/interscript/mapping_spec.rb +0 -42
  182. data/spec/interscript_spec.rb +0 -26
  183. data/spec/spec_helper.rb +0 -3
@@ -1,166 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2005
4
- language: srp
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ROMANIZATION OF SERBIAN, BGN/PCGN 2005 System
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816783/TABLE_OF_CORRESPONDENCES_FOR_SERBIAN.pdf
9
- creation_date: 2005
10
- confirmation_date: 2019-06
11
- description: |
12
- The tabulation below reflects the Serbian Cyrillic alphabet and the standard Roman script equivalents
13
- used in both Serbia and Montenegro.
14
-
15
- notes:
16
- - The Serbian Cyrillic lowercase italic Д may sometimes be seen as g.
17
- There is no specific Unicode encoding for this variant form so a comparable character
18
- has been used here for illustrative purposes.
19
-
20
- - The digraph dj(Dj) will occasionally be found as an alternative form of đ(Đ).
21
-
22
- - The Serbian Cyrillic lowercase italic П may sometimes be seen as ӣ.
23
- There is no specific Unicode encoding for this variant form so a comparable character
24
- has been used here for illustrative purposes.
25
-
26
- - The Serbian Cyrillic lowercase italic Т may sometimes be seen as w.
27
- There is no specific Unicode encoding for this variant form so a comparable character
28
- has been used here for illustrative purposes.
29
-
30
- - |
31
- An inventory of letter-diacritic combinations, with their Unicode encoding,
32
- in addition to the unmodified letters of the basic Roman script is:
33
- | Đ (U+0110) | đ (U+0111) |
34
- | Ž (U+017D) | ž (U+017E) |
35
- | Lj (U+01C8)* | lj (U+01C9)* |
36
- | Ć (U+0106) | ć (U+0107) |
37
- | Dž (U+01C5)* | dž (U+01C6)* |
38
- | Š (U+0160) | š (U+0161) |
39
- * Note that these characters can also be reproduced with individual letters (e.g. l+j).
40
-
41
- - The Roman-script columns show only lowercase forms but, when applying the table,
42
- uppercase and lowercase Roman letters as appropriate should be used.
43
-
44
- tests:
45
- - source: Шупља Стена
46
- expected: Šuplja Stena
47
- - source: Чукарица
48
- expected: Čukarica
49
- - source: Црна Трава
50
- expected: Crna Trava
51
- - source: Херцег Нови
52
- expected: Herceg Novi
53
- - source: Улцињ
54
- expected: Ulcinj
55
- - source: Ужице
56
- expected: Užice
57
- - source: Тресаначка Река
58
- expected: Tresanačka Reka
59
- - source: Сјеница
60
- expected: Sjenica
61
- - source: Рожаје
62
- expected: Rožaje
63
- - source: Пљевља
64
- expected: Pljevlja
65
- - source: Оџаци
66
- expected: Odžaci
67
- - source: Никшић
68
- expected: Nikšić
69
- - source: Медвеђа
70
- expected: Medveđa
71
- - source: Лозница
72
- expected: Loznica
73
- - source: Књажевац
74
- expected: Knjaževac
75
- - source: Зрењанин
76
- expected: Zrenjanin
77
- - source: Житорађа
78
- expected: Žitorađa
79
- - source: Ервеник
80
- expected: Ervenik
81
- - source: Доње Љупче
82
- expected: Donje Ljupče
83
- - source: Гусиње
84
- expected: Gusinje
85
- - source: ГУСИЊЕ
86
- expected: GUSINJE
87
- - source: Врњачка Бања
88
- expected: Vrnjačka Banja
89
- - source: Бијело Поље
90
- expected: Bijelo Polje
91
- - source: Алибунар
92
- expected: Alibunar
93
-
94
- map:
95
- postrules:
96
- #LJ
97
- - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
98
- result: "LJ"
99
- #NJ
100
- - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
101
- result: "NJ"
102
- #DŽ
103
- - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
104
- result: "DŽ"
105
-
106
- characters:
107
- "\u0410": "A"
108
- "\u0411": "B"
109
- "\u0412": "V"
110
- "\u0413": "G"
111
- "\u0414": "D"
112
- "\u0402": "\u0110" # Đ
113
- "\u0415": "E"
114
- "\u0416": "\u005a\u030c" # Ž
115
- "\u0417": "Z"
116
- "\u0418": "I"
117
- "\u0408": "J"
118
- "\u041A": "K"
119
- "\u041B": "L"
120
- "\u0409": "Lj"
121
- "\u041C": "M"
122
- "\u041D": "N"
123
- "\u040A": "Nj"
124
- "\u041E": "O"
125
- "\u041F": "P"
126
- "\u0420": "R"
127
- "\u0421": "S"
128
- "\u0422": "T"
129
- "\u040B": "\u0043\u0301" # Ć
130
- "\u0423": "U"
131
- "\u0424": "F"
132
- "\u0425": "H"
133
- "\u0426": "C"
134
- "\u0427": "\u0043\u030c" # Č
135
- "\u040F": "D\u007a\u030c" # Dž
136
- "\u0428": "\u0053\u030c" # Š
137
- "\u0430": "a"
138
- "\u0431": "b"
139
- "\u0432": "v"
140
- "\u0433": "g"
141
- "\u0434": "d"
142
- "\u0452": "\u0111" # đ
143
- "\u0435": "e"
144
- "\u0436": "\u007a\u030c" # ž
145
- "\u0437": "z"
146
- "\u0438": "i"
147
- "\u0458": "j"
148
- "\u043A": "k"
149
- "\u043B": "l"
150
- "\u0459": "lj"
151
- "\u043C": "m"
152
- "\u043D": "n"
153
- "\u045A": "nj"
154
- "\u043E": "o"
155
- "\u043F": "p"
156
- "\u0440": "r"
157
- "\u0441": "s"
158
- "\u0442": "t"
159
- "\u045B": "\u0063\u0301" # ć́
160
- "\u0443": "u"
161
- "\u0444": "f"
162
- "\u0445": "h"
163
- "\u0446": "c"
164
- "\u0447": "\u0063\u030c" # č
165
- "\u045F": "d\u007a\u030c" # dž
166
- "\u0448": "\u0073\u030c" # š
@@ -1,163 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1965
4
- language: ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BGN/PCGN 1965 System
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
9
- creation_date: 1947
10
- confirmation_date: 2019-06
11
- description: |
12
- The BGN/PCGN system for Ukrainian was designed for use in romanizing
13
- names written in the Ukrainian alphabet. The Ukrainian alphabet
14
- contains five characters not present in the Russian alphabet: ґ, є, і,
15
- ї, and ’.
16
-
17
- notes:
18
- - The character sequences з г, к г, с г, т с and ц г and may be romanized z∙h, k∙h, s∙h, t∙s and ts∙h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ц, ш, and the character sequence тш.
19
- - All apostrophes appearing in romanization are Unicode encoding 2019.
20
- - The Roman‐script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
21
-
22
- tests:
23
- - source: Авдіївська Міськрада
24
- expected: Avdiyivs’ka Mis’krada
25
- - source: Бабаї
26
- expected: Babayi
27
- - source: Віленька
28
- expected: Vilen’ka
29
- - source: Гагарінський Район
30
- expected: Haharins’kyy Rayon
31
- - source: Довбушева Криниця
32
- expected: Dovbusheva Krynytsya
33
- - source: Дідівщина
34
- expected: Didivshchyna
35
- - source: Економічна
36
- expected: Ekonomichna
37
- - source: Єфросинівка
38
- expected: Yefrosynivka
39
- - source: Жигуліна Роща
40
- expected: Zhyhulina Roshcha
41
- - source: Загір’я
42
- expected: Zahir”ya
43
- - source: З’єднувальний Канал
44
- expected: Z”yednuval’nyy Kanal
45
- - source: Ивахи
46
- expected: Yvakhy
47
- - source: Івано-Франківська Міськрада
48
- expected: Ivano-Frankivs’ka Mis’krada
49
- - source: Їжаківка
50
- expected: Yizhakivka
51
- - source: Йосиповичі
52
- expected: Yosypovychi
53
- - source: Кабичівка
54
- expected: Kabychivka
55
- - source: Лазуровий Провулок
56
- expected: Lazurovyy Provulok
57
- - source: Мала Сейдеминуха
58
- expected: Mala Seydemynukha
59
- - source: Нагірний
60
- expected: Nahirnyy
61
- - source: Овер’янівське Озеро
62
- expected: Over”yanivs’ke Ozero
63
- - source: Павлопільське Водосховище
64
- expected: Pavlopil’s’ke Vodoskhovyshche
65
- - source: Приґородний
66
- expected: Prygorodnyy
67
- - source: Радгосп Правда
68
- expected: Radhosp Pravda
69
- - source: Садово-Хрустальненський
70
- expected: Sadovo-Khrustal’nens’kyy
71
- - source: Таратутине
72
- expected: Taratutyne
73
- - source: Улу-Узень
74
- expected: Ulu-Uzen’
75
- - source: Христофорівка
76
- expected: Khrystoforivka
77
- - source: Центральна Вулиця
78
- expected: Tsentral’na Vulytsya
79
- - source: Чайковичі
80
- expected: Chaykovychi
81
- - source: Шалаші
82
- expected: Shalashi
83
- - source: Щербинівка
84
- expected: Shcherbynivka
85
- - source: Южноукраїнська Міськрада
86
- expected: Yuzhnoukrayins’ka Mis’krada
87
- - source: Ясениця
88
- expected: Yasenytsya
89
-
90
- map:
91
- rules:
92
- - pattern: \b\u2019\b # ’ in the middle of a word -> ”
93
- result: "\u201d"
94
-
95
- characters:
96
- "\u0430": 'a'
97
- "\u0431": 'b'
98
- "\u0432": 'v'
99
- "\u0433": 'h'
100
- "\u0434": 'd'
101
- "\u0435": 'e'
102
- "\u0436": 'zh'
103
- "\u0437": 'z'
104
- "\u0438": 'y'
105
- "\u0439": 'y'
106
- "\u043a": 'k'
107
- "\u043b": 'l'
108
- "\u043c": 'm'
109
- "\u043d": 'n'
110
- "\u043e": 'o'
111
- "\u043f": 'p'
112
- "\u0440": 'r'
113
- "\u0441": 's'
114
- "\u0442": 't'
115
- "\u0443": 'u'
116
- "\u0444": 'f'
117
- "\u0445": 'kh'
118
- "\u0446": 'ts'
119
- "\u0447": 'ch'
120
- "\u0448": 'sh'
121
- "\u0449": 'shch'
122
- "\u044c": "\u2019"
123
- "\u044e": 'yu'
124
- "\u044f": 'ya'
125
- "\u0454": 'ye'
126
- "\u0456": 'i'
127
- "\u0457": 'yi'
128
- "\u0491": 'g'
129
- "\ufeff": ' '
130
- "\u0404": 'Ye'
131
- "\u0406": 'I'
132
- "\u0407": 'Yi'
133
- "\u0410": 'A'
134
- "\u0411": 'B'
135
- "\u0412": 'V'
136
- "\u0413": 'H'
137
- "\u0414": 'D'
138
- "\u0415": 'E'
139
- "\u0416": 'Zh'
140
- "\u0417": 'Z'
141
- "\u0418": 'Y'
142
- "\u0419": 'Y'
143
- "\u041a": 'K'
144
- "\u041b": 'L'
145
- "\u041c": 'M'
146
- "\u041d": 'N'
147
- "\u041e": 'O'
148
- "\u041f": 'P'
149
- "\u0420": 'R'
150
- "\u0421": 'S'
151
- "\u0422": 'T'
152
- "\u0423": 'U'
153
- "\u0424": 'F'
154
- "\u0425": 'Kh'
155
- "\u0426": 'Ts'
156
- "\u0427": 'Ch'
157
- "\u0428": 'Sh'
158
- "\u0429": 'Shch'
159
- "\u042c": "\u2019"
160
- "\u042e": 'Yu'
161
- "\u042f": 'Ya'
162
- "\u0490": 'G'
163
-
@@ -1,208 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2019
4
- language: ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BGN/PCGN 2019 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/864314/ROMANIZATION_OF_UKRAINIAN.pdf
9
- creation_date: 2019
10
- confirmation_date: 2020-01
11
- description: |
12
- The BGN/PCGN system for Ukrainian was designed for use in romanizing names written
13
- in the Ukrainian alphabet. It is an adoption of the Ukrainian national system in use
14
- since 2010, and supersedes the BGN/PCGN 1965 System for Ukrainian.
15
-
16
- notes:
17
- - |
18
- The 2019 system was adopted by BGN and PCGN after monitoring a good level of implementation
19
- of the national system within Ukraine. Note, however, that this system is not recommended for
20
- reverse transliteration; take caution when attempting to convert a romanized name back into Ukrainian.
21
- This system also lacks the methodology outlined in the 1965 System to provide additional
22
- differentiation between digraphs and individual character sequences.
23
- For example, unlike the 1965 System, the 2019 System doesn’t differentiate the special character
24
- sequences зг, кг, сг, тс, and тсг (previously romanized as z∙h, k∙h, s∙h, t∙s, and ts∙h)
25
- from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render
26
- the characters ж, х, ш, ц and the character sequence тш.
27
- - To use the keyboard Unicode function, hold ALT and enter in sequence listed in the table.
28
- - The character sequence З Г, previously romanized as zh, is romanized zgh under the 2019 system.
29
- - These characters differ significantly in romanization from the BGN/PCGN 1965 system.
30
-
31
- tests:
32
- - source: Алушта
33
- expected: Alushta
34
- - source: Борщагівка
35
- expected: Borshchahivka
36
- - source: Вишгород
37
- expected: Vyshhorod
38
- - source: Гадяч
39
- expected: Hadiach
40
- - source: Згорани
41
- expected: Zghorany
42
- - source: Ґалаґан
43
- expected: Galagan
44
- - source: Дон
45
- expected: Don
46
- - source: Рівне
47
- expected: Rivne
48
- - source: Єнакієве
49
- expected: Yenakiieve
50
- - source: Наєнко
51
- expected: Naienko
52
- - source: Житомир
53
- expected: Zhytomyr
54
- - source: Запоріжжя
55
- expected: Zaporizhzhia
56
- - source: Закарпаття
57
- expected: Zakarpattia
58
- - source: Медвин
59
- expected: Medvyn
60
- - source: Іршава
61
- expected: Irshava
62
- - source: Їжакевич
63
- expected: Yizhakevych
64
- - source: Кадіївка
65
- expected: Kadiivka
66
- - source: Йосипівка
67
- expected: Yosypivka
68
- - source: Стрий
69
- expected: Stryi
70
- - source: Київ
71
- expected: Kyiv
72
- - source: Лебедин
73
- expected: Lebedyn
74
- - source: Миколаїв
75
- expected: Mykolaiv
76
- - source: Ніжин
77
- expected: Nizhyn
78
- - source: Одеса
79
- expected: Odesa
80
- - source: Полтава
81
- expected: Poltava
82
- - source: Ромни
83
- expected: Romny
84
- - source: Суми
85
- expected: Sumy
86
- - source: Тетерів
87
- expected: Teteriv
88
- - source: Ужгород
89
- expected: Uzhhorod
90
- - source: Фастів
91
- expected: Fastiv
92
- - source: Харків
93
- expected: Kharkiv
94
- - source: Біла Церква
95
- expected: Bila Tserkva
96
- - source: Чернівці
97
- expected: Chernivtsi
98
- - source: Шостка
99
- expected: Shostka
100
- - source: Гоща
101
- expected: Hoshcha
102
- - source: Русь
103
- expected: Rus
104
- - source: Юрій
105
- expected: Yurii
106
- - source: Крюківка
107
- expected: Kriukivka
108
- - source: Яготин
109
- expected: Yahotyn
110
- - source: Ічня
111
- expected: Ichnia
112
- - source: Знам’янка
113
- expected: Znamianka
114
-
115
- map:
116
- rules:
117
- - pattern: (?<=З|з)(Г|г)
118
- result: gh
119
- - pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
120
- result: Ye
121
- - pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
122
- result: ye
123
- - pattern: (?<!\b\u2019)\b\u0407 # Ї in initial position -> Yi
124
- result: Yi
125
- - pattern: (?<!\b\u2019)\b\u0457 # ї in initial position -> yi
126
- result: yi
127
- - pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
128
- result: "Y"
129
- - pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
130
- result: "y"
131
- - pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
132
- result: Yu
133
- - pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
134
- result: yu
135
- - pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
136
- result: Ya
137
- - pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
138
- result: ya
139
- - pattern: \b\u2019\b # remove ’
140
- result: ""
141
-
142
- characters:
143
- "\u0410": "A" # А
144
- "\u0411": "B" # Б
145
- "\u0412": "V" # В
146
- "\u0413": "H" # Г
147
- "\u0490": "G" # Ґ
148
- "\u0414": "D" # Д
149
- "\u0415": "E" # Е
150
- "\u0404": "Ie" # Є
151
- "\u0416": "Zh" # Ж
152
- "\u0417": "Z" # З
153
- "\u0418": "Y" # И
154
- "\u0406": "I" # І
155
- "\u0407": "I" # Ї
156
- "\u0419": "I" # Й
157
- "\u041a": "K" # К
158
- "\u041b": "L" # Л
159
- "\u041c": "M" # М
160
- "\u041d": "N" # Н
161
- "\u041e": "O" # О
162
- "\u041f": "P" # П
163
- "\u0420": "R" # Р
164
- "\u0421": "S" # С
165
- "\u0422": "T" # Т
166
- "\u0423": "U" # У
167
- "\u0424": "F" # Ф
168
- "\u0425": "Kh" # Х
169
- "\u0426": "Ts" # Ц
170
- "\u0427": "Ch" # Ч
171
- "\u0428": "Sh" # Ш
172
- "\u0429": "Shch" # Щ
173
- "\u042e": "Iu" # Ю
174
- "\u042f": "Ia" # Я
175
- "\u042c": "" # Ь
176
- "\u0430": "a" # а
177
- "\u0431": "b" # б
178
- "\u0432": "v" # в
179
- "\u0433": "h" # г
180
- "\u0491": "g" # ґ
181
- "\u0434": "d" # д
182
- "\u0435": "e" # е
183
- "\u0454": "ie" # є
184
- "\u0436": "zh" # ж
185
- "\u0437": "z" # з
186
- "\u0438": "y" # и
187
- "\u0456": "i" # і
188
- "\u0457": "i" # ї
189
- "\u0439": "i" # й
190
- "\u043a": "k" # к
191
- "\u043b": "l" # л
192
- "\u043c": "m" # м
193
- "\u043d": "n" # н
194
- "\u043e": "o" # о
195
- "\u043f": "p" # п
196
- "\u0440": "r" # р
197
- "\u0441": "s" # с
198
- "\u0442": "t" # т
199
- "\u0443": "u" # у
200
- "\u0444": "f" # ф
201
- "\u0445": "kh" # х
202
- "\u0446": "ts" # ц
203
- "\u0447": "ch" # ч
204
- "\u0448": "sh" # ш
205
- "\u0449": "shch" # щ
206
- "\u044e": "iu" # ю
207
- "\u044f": "ia" # я
208
- "\u044c": "" # Ь