interscript 0.1.5 → 2.1.0a8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (200) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -123
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -311
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -69
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -23
  71. data/lib/interscript/opal/maps.js.erb +0 -7
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  77. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  78. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  79. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  80. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  81. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  82. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  83. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  84. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  85. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  86. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  87. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  88. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  89. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  90. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  91. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  92. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  93. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  94. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  95. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  96. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  97. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  98. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  99. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  100. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  101. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  102. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  103. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  104. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  105. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  106. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  107. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  108. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  109. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  110. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  111. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  112. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  113. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  114. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  115. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  116. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  117. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  118. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  119. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  120. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  122. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  123. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  124. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  125. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  126. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  127. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  128. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  129. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  130. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  131. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  132. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  133. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  134. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  135. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  136. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  137. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  138. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  139. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  140. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  141. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  142. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  143. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  144. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  145. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  146. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  147. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  148. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  149. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  150. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  151. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  152. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  153. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  154. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  155. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  156. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  157. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  158. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  159. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  160. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  161. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  162. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  163. data/maps/odni-mkd-cyrl-latn-2015.yaml +0 -122
  164. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  165. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  166. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  167. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  168. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  169. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  170. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  171. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  172. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  173. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  174. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  175. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  176. data/maps/ses-ara-arab-latn-1930.yaml +0 -275
  177. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  178. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  179. data/maps/un-ara-Arab-Latn-1971.yaml +0 -127
  180. data/maps/un-ara-Arab-Latn-1972.yaml +0 -152
  181. data/maps/un-ara-Arab-Latn-2017.yaml +0 -383
  182. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  183. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  184. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  185. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  186. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  187. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  188. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  189. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  190. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  191. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  192. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  193. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  194. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  195. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  196. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  197. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  198. data/spec/interscript/mapping_spec.rb +0 -42
  199. data/spec/interscript_spec.rb +0 -26
  200. data/spec/spec_helper.rb +0 -3
@@ -1,166 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2005
4
- language: srp
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ROMANIZATION OF SERBIAN, BGN/PCGN 2005 System
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816783/TABLE_OF_CORRESPONDENCES_FOR_SERBIAN.pdf
9
- creation_date: 2005
10
- confirmation_date: 2019-06
11
- description: |
12
- The tabulation below reflects the Serbian Cyrillic alphabet and the standard Roman script equivalents
13
- used in both Serbia and Montenegro.
14
-
15
- notes:
16
- - The Serbian Cyrillic lowercase italic Д may sometimes be seen as g.
17
- There is no specific Unicode encoding for this variant form so a comparable character
18
- has been used here for illustrative purposes.
19
-
20
- - The digraph dj(Dj) will occasionally be found as an alternative form of đ(Đ).
21
-
22
- - The Serbian Cyrillic lowercase italic П may sometimes be seen as ӣ.
23
- There is no specific Unicode encoding for this variant form so a comparable character
24
- has been used here for illustrative purposes.
25
-
26
- - The Serbian Cyrillic lowercase italic Т may sometimes be seen as w.
27
- There is no specific Unicode encoding for this variant form so a comparable character
28
- has been used here for illustrative purposes.
29
-
30
- - |
31
- An inventory of letter-diacritic combinations, with their Unicode encoding,
32
- in addition to the unmodified letters of the basic Roman script is:
33
- | Đ (U+0110) | đ (U+0111) |
34
- | Ž (U+017D) | ž (U+017E) |
35
- | Lj (U+01C8)* | lj (U+01C9)* |
36
- | Ć (U+0106) | ć (U+0107) |
37
- | Dž (U+01C5)* | dž (U+01C6)* |
38
- | Š (U+0160) | š (U+0161) |
39
- * Note that these characters can also be reproduced with individual letters (e.g. l+j).
40
-
41
- - The Roman-script columns show only lowercase forms but, when applying the table,
42
- uppercase and lowercase Roman letters as appropriate should be used.
43
-
44
- tests:
45
- - source: Шупља Стена
46
- expected: Šuplja Stena
47
- - source: Чукарица
48
- expected: Čukarica
49
- - source: Црна Трава
50
- expected: Crna Trava
51
- - source: Херцег Нови
52
- expected: Herceg Novi
53
- - source: Улцињ
54
- expected: Ulcinj
55
- - source: Ужице
56
- expected: Užice
57
- - source: Тресаначка Река
58
- expected: Tresanačka Reka
59
- - source: Сјеница
60
- expected: Sjenica
61
- - source: Рожаје
62
- expected: Rožaje
63
- - source: Пљевља
64
- expected: Pljevlja
65
- - source: Оџаци
66
- expected: Odžaci
67
- - source: Никшић
68
- expected: Nikšić
69
- - source: Медвеђа
70
- expected: Medveđa
71
- - source: Лозница
72
- expected: Loznica
73
- - source: Књажевац
74
- expected: Knjaževac
75
- - source: Зрењанин
76
- expected: Zrenjanin
77
- - source: Житорађа
78
- expected: Žitorađa
79
- - source: Ервеник
80
- expected: Ervenik
81
- - source: Доње Љупче
82
- expected: Donje Ljupče
83
- - source: Гусиње
84
- expected: Gusinje
85
- - source: ГУСИЊЕ
86
- expected: GUSINJE
87
- - source: Врњачка Бања
88
- expected: Vrnjačka Banja
89
- - source: Бијело Поље
90
- expected: Bijelo Polje
91
- - source: Алибунар
92
- expected: Alibunar
93
-
94
- map:
95
- postrules:
96
- #LJ
97
- - pattern: "((?<=[[:upper:]])Lj(?=[[:upper:]])?|(?<=[[:upper:]])?Lj(?=[[:upper:]]))"
98
- result: "LJ"
99
- #NJ
100
- - pattern: "((?<=[[:upper:]])Nj(?=[[:upper:]])?|(?<=[[:upper:]])?Nj(?=[[:upper:]]))"
101
- result: "NJ"
102
- #DŽ
103
- - pattern: "((?<=[[:upper:]])Dž(?=[[:upper:]])?|(?<=[[:upper:]])?Dž(?=[[:upper:]]))"
104
- result: "DŽ"
105
-
106
- characters:
107
- "\u0410": "A"
108
- "\u0411": "B"
109
- "\u0412": "V"
110
- "\u0413": "G"
111
- "\u0414": "D"
112
- "\u0402": "\u0110" # Đ
113
- "\u0415": "E"
114
- "\u0416": "\u005a\u030c" # Ž
115
- "\u0417": "Z"
116
- "\u0418": "I"
117
- "\u0408": "J"
118
- "\u041A": "K"
119
- "\u041B": "L"
120
- "\u0409": "Lj"
121
- "\u041C": "M"
122
- "\u041D": "N"
123
- "\u040A": "Nj"
124
- "\u041E": "O"
125
- "\u041F": "P"
126
- "\u0420": "R"
127
- "\u0421": "S"
128
- "\u0422": "T"
129
- "\u040B": "\u0043\u0301" # Ć
130
- "\u0423": "U"
131
- "\u0424": "F"
132
- "\u0425": "H"
133
- "\u0426": "C"
134
- "\u0427": "\u0043\u030c" # Č
135
- "\u040F": "D\u007a\u030c" # Dž
136
- "\u0428": "\u0053\u030c" # Š
137
- "\u0430": "a"
138
- "\u0431": "b"
139
- "\u0432": "v"
140
- "\u0433": "g"
141
- "\u0434": "d"
142
- "\u0452": "\u0111" # đ
143
- "\u0435": "e"
144
- "\u0436": "\u007a\u030c" # ž
145
- "\u0437": "z"
146
- "\u0438": "i"
147
- "\u0458": "j"
148
- "\u043A": "k"
149
- "\u043B": "l"
150
- "\u0459": "lj"
151
- "\u043C": "m"
152
- "\u043D": "n"
153
- "\u045A": "nj"
154
- "\u043E": "o"
155
- "\u043F": "p"
156
- "\u0440": "r"
157
- "\u0441": "s"
158
- "\u0442": "t"
159
- "\u045B": "\u0063\u0301" # ć́
160
- "\u0443": "u"
161
- "\u0444": "f"
162
- "\u0445": "h"
163
- "\u0446": "c"
164
- "\u0447": "\u0063\u030c" # č
165
- "\u045F": "d\u007a\u030c" # dž
166
- "\u0448": "\u0073\u030c" # š
@@ -1,163 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1965
4
- language: ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BGN/PCGN 1965 System
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
9
- creation_date: 1947
10
- confirmation_date: 2019-06
11
- description: |
12
- The BGN/PCGN system for Ukrainian was designed for use in romanizing
13
- names written in the Ukrainian alphabet. The Ukrainian alphabet
14
- contains five characters not present in the Russian alphabet: ґ, є, і,
15
- ї, and ’.
16
-
17
- notes:
18
- - The character sequences з г, к г, с г, т с and ц г and may be romanized z∙h, k∙h, s∙h, t∙s and ts∙h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ц, ш, and the character sequence тш.
19
- - All apostrophes appearing in romanization are Unicode encoding 2019.
20
- - The Roman‐script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
21
-
22
- tests:
23
- - source: Авдіївська Міськрада
24
- expected: Avdiyivs’ka Mis’krada
25
- - source: Бабаї
26
- expected: Babayi
27
- - source: Віленька
28
- expected: Vilen’ka
29
- - source: Гагарінський Район
30
- expected: Haharins’kyy Rayon
31
- - source: Довбушева Криниця
32
- expected: Dovbusheva Krynytsya
33
- - source: Дідівщина
34
- expected: Didivshchyna
35
- - source: Економічна
36
- expected: Ekonomichna
37
- - source: Єфросинівка
38
- expected: Yefrosynivka
39
- - source: Жигуліна Роща
40
- expected: Zhyhulina Roshcha
41
- - source: Загір’я
42
- expected: Zahir”ya
43
- - source: З’єднувальний Канал
44
- expected: Z”yednuval’nyy Kanal
45
- - source: Ивахи
46
- expected: Yvakhy
47
- - source: Івано-Франківська Міськрада
48
- expected: Ivano-Frankivs’ka Mis’krada
49
- - source: Їжаківка
50
- expected: Yizhakivka
51
- - source: Йосиповичі
52
- expected: Yosypovychi
53
- - source: Кабичівка
54
- expected: Kabychivka
55
- - source: Лазуровий Провулок
56
- expected: Lazurovyy Provulok
57
- - source: Мала Сейдеминуха
58
- expected: Mala Seydemynukha
59
- - source: Нагірний
60
- expected: Nahirnyy
61
- - source: Овер’янівське Озеро
62
- expected: Over”yanivs’ke Ozero
63
- - source: Павлопільське Водосховище
64
- expected: Pavlopil’s’ke Vodoskhovyshche
65
- - source: Приґородний
66
- expected: Prygorodnyy
67
- - source: Радгосп Правда
68
- expected: Radhosp Pravda
69
- - source: Садово-Хрустальненський
70
- expected: Sadovo-Khrustal’nens’kyy
71
- - source: Таратутине
72
- expected: Taratutyne
73
- - source: Улу-Узень
74
- expected: Ulu-Uzen’
75
- - source: Христофорівка
76
- expected: Khrystoforivka
77
- - source: Центральна Вулиця
78
- expected: Tsentral’na Vulytsya
79
- - source: Чайковичі
80
- expected: Chaykovychi
81
- - source: Шалаші
82
- expected: Shalashi
83
- - source: Щербинівка
84
- expected: Shcherbynivka
85
- - source: Южноукраїнська Міськрада
86
- expected: Yuzhnoukrayins’ka Mis’krada
87
- - source: Ясениця
88
- expected: Yasenytsya
89
-
90
- map:
91
- rules:
92
- - pattern: \b\u2019\b # ’ in the middle of a word -> ”
93
- result: "\u201d"
94
-
95
- characters:
96
- "\u0430": 'a'
97
- "\u0431": 'b'
98
- "\u0432": 'v'
99
- "\u0433": 'h'
100
- "\u0434": 'd'
101
- "\u0435": 'e'
102
- "\u0436": 'zh'
103
- "\u0437": 'z'
104
- "\u0438": 'y'
105
- "\u0439": 'y'
106
- "\u043a": 'k'
107
- "\u043b": 'l'
108
- "\u043c": 'm'
109
- "\u043d": 'n'
110
- "\u043e": 'o'
111
- "\u043f": 'p'
112
- "\u0440": 'r'
113
- "\u0441": 's'
114
- "\u0442": 't'
115
- "\u0443": 'u'
116
- "\u0444": 'f'
117
- "\u0445": 'kh'
118
- "\u0446": 'ts'
119
- "\u0447": 'ch'
120
- "\u0448": 'sh'
121
- "\u0449": 'shch'
122
- "\u044c": "\u2019"
123
- "\u044e": 'yu'
124
- "\u044f": 'ya'
125
- "\u0454": 'ye'
126
- "\u0456": 'i'
127
- "\u0457": 'yi'
128
- "\u0491": 'g'
129
- "\ufeff": ' '
130
- "\u0404": 'Ye'
131
- "\u0406": 'I'
132
- "\u0407": 'Yi'
133
- "\u0410": 'A'
134
- "\u0411": 'B'
135
- "\u0412": 'V'
136
- "\u0413": 'H'
137
- "\u0414": 'D'
138
- "\u0415": 'E'
139
- "\u0416": 'Zh'
140
- "\u0417": 'Z'
141
- "\u0418": 'Y'
142
- "\u0419": 'Y'
143
- "\u041a": 'K'
144
- "\u041b": 'L'
145
- "\u041c": 'M'
146
- "\u041d": 'N'
147
- "\u041e": 'O'
148
- "\u041f": 'P'
149
- "\u0420": 'R'
150
- "\u0421": 'S'
151
- "\u0422": 'T'
152
- "\u0423": 'U'
153
- "\u0424": 'F'
154
- "\u0425": 'Kh'
155
- "\u0426": 'Ts'
156
- "\u0427": 'Ch'
157
- "\u0428": 'Sh'
158
- "\u0429": 'Shch'
159
- "\u042c": "\u2019"
160
- "\u042e": 'Yu'
161
- "\u042f": 'Ya'
162
- "\u0490": 'G'
163
-
@@ -1,208 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2019
4
- language: ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BGN/PCGN 2019 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/864314/ROMANIZATION_OF_UKRAINIAN.pdf
9
- creation_date: 2019
10
- confirmation_date: 2020-01
11
- description: |
12
- The BGN/PCGN system for Ukrainian was designed for use in romanizing names written
13
- in the Ukrainian alphabet. It is an adoption of the Ukrainian national system in use
14
- since 2010, and supersedes the BGN/PCGN 1965 System for Ukrainian.
15
-
16
- notes:
17
- - |
18
- The 2019 system was adopted by BGN and PCGN after monitoring a good level of implementation
19
- of the national system within Ukraine. Note, however, that this system is not recommended for
20
- reverse transliteration; take caution when attempting to convert a romanized name back into Ukrainian.
21
- This system also lacks the methodology outlined in the 1965 System to provide additional
22
- differentiation between digraphs and individual character sequences.
23
- For example, unlike the 1965 System, the 2019 System doesn’t differentiate the special character
24
- sequences зг, кг, сг, тс, and тсг (previously romanized as z∙h, k∙h, s∙h, t∙s, and ts∙h)
25
- from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render
26
- the characters ж, х, ш, ц and the character sequence тш.
27
- - To use the keyboard Unicode function, hold ALT and enter in sequence listed in the table.
28
- - The character sequence З Г, previously romanized as zh, is romanized zgh under the 2019 system.
29
- - These characters differ significantly in romanization from the BGN/PCGN 1965 system.
30
-
31
- tests:
32
- - source: Алушта
33
- expected: Alushta
34
- - source: Борщагівка
35
- expected: Borshchahivka
36
- - source: Вишгород
37
- expected: Vyshhorod
38
- - source: Гадяч
39
- expected: Hadiach
40
- - source: Згорани
41
- expected: Zghorany
42
- - source: Ґалаґан
43
- expected: Galagan
44
- - source: Дон
45
- expected: Don
46
- - source: Рівне
47
- expected: Rivne
48
- - source: Єнакієве
49
- expected: Yenakiieve
50
- - source: Наєнко
51
- expected: Naienko
52
- - source: Житомир
53
- expected: Zhytomyr
54
- - source: Запоріжжя
55
- expected: Zaporizhzhia
56
- - source: Закарпаття
57
- expected: Zakarpattia
58
- - source: Медвин
59
- expected: Medvyn
60
- - source: Іршава
61
- expected: Irshava
62
- - source: Їжакевич
63
- expected: Yizhakevych
64
- - source: Кадіївка
65
- expected: Kadiivka
66
- - source: Йосипівка
67
- expected: Yosypivka
68
- - source: Стрий
69
- expected: Stryi
70
- - source: Київ
71
- expected: Kyiv
72
- - source: Лебедин
73
- expected: Lebedyn
74
- - source: Миколаїв
75
- expected: Mykolaiv
76
- - source: Ніжин
77
- expected: Nizhyn
78
- - source: Одеса
79
- expected: Odesa
80
- - source: Полтава
81
- expected: Poltava
82
- - source: Ромни
83
- expected: Romny
84
- - source: Суми
85
- expected: Sumy
86
- - source: Тетерів
87
- expected: Teteriv
88
- - source: Ужгород
89
- expected: Uzhhorod
90
- - source: Фастів
91
- expected: Fastiv
92
- - source: Харків
93
- expected: Kharkiv
94
- - source: Біла Церква
95
- expected: Bila Tserkva
96
- - source: Чернівці
97
- expected: Chernivtsi
98
- - source: Шостка
99
- expected: Shostka
100
- - source: Гоща
101
- expected: Hoshcha
102
- - source: Русь
103
- expected: Rus
104
- - source: Юрій
105
- expected: Yurii
106
- - source: Крюківка
107
- expected: Kriukivka
108
- - source: Яготин
109
- expected: Yahotyn
110
- - source: Ічня
111
- expected: Ichnia
112
- - source: Знам’янка
113
- expected: Znamianka
114
-
115
- map:
116
- rules:
117
- - pattern: (?<=З|з)(Г|г)
118
- result: gh
119
- - pattern: (?<!\b\u2019)\b\u0404 # Є in initial position -> Ye
120
- result: Ye
121
- - pattern: (?<!\b\u2019)\b\u0454 # є in initial position -> ye
122
- result: ye
123
- - pattern: (?<!\b\u2019)\b\u0407 # Ї in initial position -> Yi
124
- result: Yi
125
- - pattern: (?<!\b\u2019)\b\u0457 # ї in initial position -> yi
126
- result: yi
127
- - pattern: (?<!\b\u2019)\b\u0419 # Й in initial position -> Y
128
- result: "Y"
129
- - pattern: (?<!\b\u2019)\b\u0419 # й in initial position -> y
130
- result: "y"
131
- - pattern: (?<!\b\u2019)\b\u042e # Ю in initial position -> Yu
132
- result: Yu
133
- - pattern: (?<!\b\u2019)\b\u044e # ю in initial position -> yu
134
- result: yu
135
- - pattern: (?<!\b\u2019)\b\u042f # Я in initial position -> Ya
136
- result: Ya
137
- - pattern: (?<!\b\u2019)\b\u044f # я in initial position -> ya
138
- result: ya
139
- - pattern: \b\u2019\b # remove ’
140
- result: ""
141
-
142
- characters:
143
- "\u0410": "A" # А
144
- "\u0411": "B" # Б
145
- "\u0412": "V" # В
146
- "\u0413": "H" # Г
147
- "\u0490": "G" # Ґ
148
- "\u0414": "D" # Д
149
- "\u0415": "E" # Е
150
- "\u0404": "Ie" # Є
151
- "\u0416": "Zh" # Ж
152
- "\u0417": "Z" # З
153
- "\u0418": "Y" # И
154
- "\u0406": "I" # І
155
- "\u0407": "I" # Ї
156
- "\u0419": "I" # Й
157
- "\u041a": "K" # К
158
- "\u041b": "L" # Л
159
- "\u041c": "M" # М
160
- "\u041d": "N" # Н
161
- "\u041e": "O" # О
162
- "\u041f": "P" # П
163
- "\u0420": "R" # Р
164
- "\u0421": "S" # С
165
- "\u0422": "T" # Т
166
- "\u0423": "U" # У
167
- "\u0424": "F" # Ф
168
- "\u0425": "Kh" # Х
169
- "\u0426": "Ts" # Ц
170
- "\u0427": "Ch" # Ч
171
- "\u0428": "Sh" # Ш
172
- "\u0429": "Shch" # Щ
173
- "\u042e": "Iu" # Ю
174
- "\u042f": "Ia" # Я
175
- "\u042c": "" # Ь
176
- "\u0430": "a" # а
177
- "\u0431": "b" # б
178
- "\u0432": "v" # в
179
- "\u0433": "h" # г
180
- "\u0491": "g" # ґ
181
- "\u0434": "d" # д
182
- "\u0435": "e" # е
183
- "\u0454": "ie" # є
184
- "\u0436": "zh" # ж
185
- "\u0437": "z" # з
186
- "\u0438": "y" # и
187
- "\u0456": "i" # і
188
- "\u0457": "i" # ї
189
- "\u0439": "i" # й
190
- "\u043a": "k" # к
191
- "\u043b": "l" # л
192
- "\u043c": "m" # м
193
- "\u043d": "n" # н
194
- "\u043e": "o" # о
195
- "\u043f": "p" # п
196
- "\u0440": "r" # р
197
- "\u0441": "s" # с
198
- "\u0442": "t" # т
199
- "\u0443": "u" # у
200
- "\u0444": "f" # ф
201
- "\u0445": "kh" # х
202
- "\u0446": "ts" # ц
203
- "\u0447": "ch" # ч
204
- "\u0448": "sh" # ш
205
- "\u0449": "shch" # щ
206
- "\u044e": "iu" # ю
207
- "\u044f": "ia" # я
208
- "\u044c": "" # Ь