interscript 0.1.9 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,377 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2007
4
- language: iso-639-3:prs
5
- source_script: Arab
6
- destination_script: Latn
7
- name: BGN/PCGN Romanization System -- Pashto (1968)
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693760/ROMANIZATION_OF_PASHTO.pdf
9
- creation_date: 1968
10
- confirmation_date: 2017-11
11
- description: |
12
- Pashto is an Indo-Iranian language and is one of two
13
- nationally official languages in Afghanistan and one of
14
- five regionally recognised languages in Pakistan. The
15
- romanization system presented here may be applied to all
16
- Pashto geographical names. Although the BGN/PCGN policy for
17
- geographical names in Afghanistan is to apply the BGN/PCGN
18
- national system of romanization for Afghanistan (2007),
19
- which incorporates Dari elements, when applied to a Pashto
20
- geographical name, the romanized results of the BGN/PCGN
21
- national system for Afghanistan are the same as those of
22
- this Pashto romanization system1 . The Pashto alphabet uses
23
- a modified form of the Perso-Arabic script, and contains
24
- twelve additional consonants not present in standard
25
- Arabic, as well as three additional vowel characters and an
26
- additional vowel point. ڼ گ ښ ژ ږ ړ ډ ځ څ چ ټ پ :Consonants
27
- ٙ :Point Vowel; ې ۍ ى :Vowels The points used in Arabic to
28
- mark short vowels and certain other diacritical marks are
29
- not written in Pashto. Consequently, a reference source may
30
- sometimes be required to aid correct identification of the
31
- standard spellings and proper vowels and elimination of
32
- dialectal and idiosyncratic variations. In the interests of
33
- clarity, a column showing vowel pointing from Arabic to
34
- indicate short vowels has been included in the examples
35
- below, alongside the unpointed form that will usually be
36
- encountered. However it should be noted that the
37
- pronunciation of short vowels will vary. (Note: it is
38
- recommended that a font such as Scheherazade, available
39
- from www.sil.org, which includes the Unicode extended
40
- Arabic sub-range, be used to view this system2 .)
41
-
42
- notes:
43
- - 1. Alif ( ‫ا‬ ) should be romanized as follows
44
- a. Initially,it indicates that the word begins with a vowel or
45
- diphthong; the alif itself is not romanized, but rather the
46
- short vowel it “carries” is romanized; e.g., Aslam Zhrandah
47
- ‫ه‬ َ‫د‬ ‫ن‬ ‫ژر‬ ‫سلَم‬ َ‫أ‬ ‫ميړ‬ → b. When it carries a
48
- maddah (‫)آ‬ (see vowel table, row 3), it represents ā;
49
- e.g., Band. Mīṟ ‫د‬ ‫ن‬ ‫ب‬ َ ‫آب‬ → Āb c. Medially and
50
- finally it represents ā (see table 2, row 2); e.g., ‫ۍ‬
51
- ‫ماڼ‬ → Māṉêy d. Medially and finally in words of Arabic
52
- origin, alif may serve as the bearer of hamzah, e.g.
53
- ‫رأس‬ → ra’s. See also note 4.
54
-
55
- - 2. The characters tsē ( ‫څ‬ ) and dzē ( ‫ځ‬ ) may be
56
- romanized t͡ s and d͡ z (the combining double breve (
57
- Unicode 0361) appearing over the digraph) when for special
58
- reasons it is desired that confusion be avoided between
59
- ‫ت‬ (t) plus ‫س‬ (s) and between ‫د‬ (d) plus ‫ز‬ (z),
60
- respectively.
61
-
62
- - 3. Occasionally the character sequences ‫ه‬ ‫ك‬ , ‫ه‬ ‫ز‬ ,
63
- ‫ه‬ ‫س‬ , and ‫ه‬ ‫گ‬ occur . They may be romanized k·h, z·
64
- h, s·h, and g·h in order to differentiate these
65
- romanizations from the digraphs kh, zh, sh, and gh, which
66
- are used to represent the characters ‫خ‬ , ‫ژ‬, ‫ش‬ , and
67
- ‫غ‬ respectively .
68
-
69
- - 4. Hamzah ( ‫ء‬ ) should be romanized as follows a. In
70
- word-initial position, where it will appear either above or
71
- below alif ( indicates a short vowel and should not itself
72
- be romanized. romanized by an apostrophe, e.g. ‫أ‬ or
73
- ‫إ‬ ), it In other positions it should be ‫جُزء‬ → juz’. b.
74
- Yeh with hamzah ( ‫ئ‬ ) should be romanized êy, unless it
75
- represents the compound (iẕāfah) morpheme, in which case it
76
- is romanized according to note 9 below.
77
-
78
- - 5. The division of words utilized in Pashto writing is
79
- followed in romanization, except that the elements –ābād, -
80
- khwā, -shahr, -zādah, -zay and -ullāh are always romanized
81
- as part of the preceding word, e.g. ‫آباد‬ ‫ت‬ ‫م‬ َ ْ‫ح‬
82
- ‫ر‬ َ → Raḩmatābād and ‫الله‬ ‫ت‬ ‫م‬ َ ْ‫ح‬ ‫ر‬ َ →
83
- Raḩmatullāh. However, when the word for God ( ‫الله‬ )
84
- appears as a standalone word it should be written Allāh.
85
- Note also the “dagger alif” ( ٙ) above the second ‫ل‬ (lām)
86
- in the word ‫الله‬ ; this, like the short vowels, is not
87
- written in Pashto but should be romanized ā, like a full-
88
- size alif. Persian derivational endings such as –vand and
89
- endings of Turkish origin such as –lar, -lī, -lū, -i, -u, -
90
- si, and –su, should be written together with the preceding
91
- word.
92
-
93
- - 6. The Pashto preposition ‫د‬ should be romanized dê in
94
- agreement with its pronunciation, despite the fact that
95
- it is sometimes pointed with kasrah ( ٙ ).
96
-
97
- - 7. In names of Arabic origin, the l of the definite article
98
- al/ul is assimilated before the ‘sun letters’ t, s̄ , d,
99
- z̄ , r, z, s, sh, ş, ẕ, ţ, z̧ , l and n. In romanization,
100
- the article will be written al or its assimilated
101
- equivalent in name-initial position but ul or its
102
- assimilated equivalent elsewhere; the article should be
103
- separated from the name it precedes and should not be
104
- capitalized, except at the beginning of a name, e.g. جَبَل
105
- السَرَاج → Jabal us Sarāj
106
-
107
- - 8. In Arabic names, a shaddah, ٙ is used to denote the
108
- doubling of a particular consonant character, e.g. ‫مَّد‬
109
- َ‫ح‬ ‫م‬ ُ → Muḩammad. However, in Pashto this ‘doubling’
110
- is frequently omitted in both Perso-Arabic script and the
111
- resulting romanization. Guidance on doubling may be taken
112
- from an authoritative names source, such as an Afghan
113
- government source or Pashto dictionary; for example, it is
114
- usual to see Ḩājī without and ‘Abbās with the doubled
115
- consonant. The doubled y consonant is almost always
116
- retained, as in Sayyid or Qayyūm
117
-
118
- - 9. The iẕāfah morpheme is not a grammatical feature of
119
- Pashto and, if encountered in a linguistically hybrid
120
- geographical name (i.e. combining features of both Pashto
121
- and Dari), it should be treated according to the BGN/PCGN
122
- national system of romanization for Afghanistan, 2007, as –
123
- e, unless the preceding word ends with a silent heh (‫)ه‬
124
- or a vowel when it should be shown – ye, e.g. 10. The
125
- character sequence ‫خو‬ , ‫صار‬ ‫ح‬ ِ ‫غر‬ → Ghar-e Ḩişār;
126
- ‫و‬ ‫ن‬ َ ‫ه‬ ٔ ‫لع‬ َ ‫ق‬ َ → when followed by ‫ا‬ or
127
- ‫ی‬ , Qal‘ah-ye Now.
128
-
129
-
130
- - 10. The character sequence خو when followed by ‫ا‬ or
131
- ‫ی‬ ,should be romanized khw, although the w is either not
132
- pronounced, or only weakly pronounced; e.g. ‫خواجه‬ →
133
- khwājah.
134
-
135
- - 11. An inventory of letter-diacritic combinations in addition to the unmodified letters of the
136
- basic Roman script is
137
- ‘ (U+2018)
138
- ʼ (U+2019)
139
- Ā (U+0100)
140
- ā (U+0101)
141
- Á (U+00C1)
142
- á (U+00E1)
143
- Ḏ (U+0044+0031)
144
- ḏ (U+0064+00031)
145
- Ē (U+0112)
146
- ē (U+0113)
147
- Ê (U+00CA)
148
- ê (U+00EA)
149
- Ḩ (U+1E28)
150
- ḩ (U+1E29)
151
- Ī (U+012A)
152
- ī (U+012B)
153
- N̄ (U+004E+0304)
154
- n̄ (U+004E+0304)
155
- Ō (U+014C)
156
- ō (U+014D)
157
- Ṟ (U+0052+0031)
158
- ṟ (U+0072+0031)
159
- Ş (U+015E)
160
- ş (U+015F)
161
- S̄ (U+0053+0304)
162
- s̄ (U+0073+0304)
163
- Ṯ (U+0054+0031)
164
- ṯ (U+0074+0031)
165
- Ţ (U+0162)
166
- ţ (U+0163)
167
- Ū (U+016A)
168
- ū (U+016B)
169
- Z̧ (U+005A+0327)
170
- z̧ (U+007A+0327)
171
- Z̄ (U+005A+0304)
172
- z̄ (U+007A+0304)
173
- Ẕ (U+005A+0331)
174
- ẕ (U+007A+0331)
175
- Z͟ H (U+005A+0048+035F)
176
- z͟ h (U+007A+0068+035F)
177
-
178
- tests:
179
- - source: بَغْلان
180
- expected: Baghlān
181
-
182
- - source: پُوټَكَى
183
- expected: Pōṯakay
184
-
185
- - source: شِيرِين تَگَاب
186
- expected: Shīrīn Tagāb
187
-
188
- - source: کُوْټ
189
- expected: Kōṯ
190
-
191
- - source: ثَابِر
192
- expected: S̄ābir
193
-
194
- - source: جَلال آبَاد
195
- expected: Jalālābād
196
-
197
- - source: چَارِيكَار
198
- expected: Chārīkār
199
-
200
- - source: ځَدْرَاڼ
201
- expected: Dzadrāṉ
202
-
203
- - source: څَوکۍ
204
- expected: Tsowkêy
205
-
206
- - source: حَضْرَتِ إِمَام
207
- expected: Ḩaẕrat-e Imām
208
-
209
- - source: خُوْسْت
210
- expected: Khōst
211
-
212
- - source: سْپِين بُوْلْدَک
213
- expected: Spīn Bōldak
214
-
215
- - source: ډَنْډ وَ پَتَان
216
- expected: Ḏanḏ Wa Patān
217
-
218
- - source: كَنْدَهَار
219
- expected: Kandahār
220
-
221
- - source: أَنْدَړ
222
- expected: Andaṟ
223
-
224
- - source: كُنْدُز
225
- expected: Kunduz
226
-
227
- - source: مِير أَسْلَم ژْرَنْدَه
228
- expected: Mīr Aslam Zhrandah
229
-
230
- - source: ږِيرَه
231
- expected: Z͟hīrah
232
-
233
- - source: سَمَنْگَان
234
- expected: Samangān
235
-
236
- - source: كښٙتَه كَلا
237
- expected: Ks͟hêtah Kalā
238
-
239
- - source: قَيْصَار
240
- expected: Qayşār
241
-
242
- - source: فَيض آبَاد
243
- expected: Faīẕābād
244
-
245
- - source: حَضْرَتِ سُلْطَان
246
- expected: Ḩaẕrat-e Sulţān
247
-
248
- - source: ظَاهِر كَلا
249
- expected: Z̧āhir Kalā
250
-
251
- - source: پُلِ عَلَم
252
- expected: Pul-e ‘Alam
253
-
254
- - source: غَزْنِي
255
- expected: Ghaznī
256
-
257
- - source: مَزَارِ شَرِيف
258
- expected: Mazār-e Sharīf
259
-
260
- - source: قَيْصَار
261
- expected: Qayşār
262
-
263
- - source: كَنْدَهَار
264
- expected: Kandahār
265
-
266
- - source: گَرْدېز
267
- expected: Gardēz
268
-
269
- - source: کَابُل
270
- expected: Kābul
271
-
272
- - source: مَيمَنَه
273
- expected: Maīmanah
274
-
275
- - source: خَان آبَاد
276
- expected: Khānābād
277
-
278
- - source: مَاڼۍ
279
- expected: Māṉêy
280
-
281
- - source: وَاخَان
282
- expected: Wākhān
283
-
284
- - source: يَنْگِي قَلعَه
285
- expected: Yangī Qal‘ah
286
-
287
- - source: جَلال آبَاد
288
- expected: Jalālābād
289
-
290
- - source: مُرْغَاب کَابُل
291
- expected: Murghāb Kābul
292
-
293
- - source: گٙردُون
294
- expected: Gêrdōn
295
-
296
- - source: آب بَنْد
297
- expected: Āb Band
298
-
299
- - source: سْپِين بُوْلْدَک
300
- expected: Spīn Bōldak
301
-
302
- - source: جَوزجَان
303
- expected: Jowzjān
304
-
305
- - source: گَرْدېز
306
- expected: Gardēz
307
-
308
- - source: مَیدان شَهْر
309
- expected: Maīdān Shahr
310
-
311
- - source: ډَنْډِ سُفْلىٰ
312
- expected: Ḏanḏ-e Suflá
313
-
314
- - source: جَبَل السَرَاج
315
- expected: Jabal us Sarāj
316
- map:
317
- inherit: bgnpcgn-prs-Arab-Latn-2007
318
- postrules:
319
- - pattern: (?<=\b)(?<!\b[‘|’|'|-])[\u0061-\uFFFF]
320
- result: "upcase"
321
- # don't capitalize defined article in the middle of a sentence
322
- - pattern : ' Ut T' # الت
323
- result: ' ut T'
324
- - pattern : ' Us̄ S̄' # الث
325
- result: ' us̄ S̄'
326
- - pattern : ' Ud D' # الد
327
- result: ' ud D'
328
- - pattern : ' Uz̄ Z̄' # الذ
329
- result: ' uz̄ Z̄'
330
- - pattern : ' Ur R' # الر
331
- result: ' ur R'
332
- - pattern : ' Uz Z' # الز
333
- result: ' uz Z'
334
- - pattern : ' Us S' # الس
335
- result: ' us S'
336
- - pattern : ' Ush Sh' # الش
337
- result: ' ush Sh'
338
- - pattern : ' Uş Ş' # الص
339
- result: ' uş Ş'
340
- - pattern : ' Uẕ Ẕ' # الض
341
- result: ' uẕ Ẕ'
342
- - pattern : ' Uţ Ţ' # الط
343
- result: ' uţ Ţ'
344
- - pattern : ' Uz̧ Z̧' # الظ
345
- result: ' uz̧ Z̧'
346
- - pattern : ' Ul L' # الل
347
- result: ' ul L'
348
- - pattern : ' Un n' # الن
349
- result: ' un N'
350
- characters:
351
-
352
- '\u0650': 'i' # ِ kasra
353
- '\u064f': 'u' # ُ damma
354
-
355
- '\u0650\b' : '-e' # ِ kasra
356
-
357
- '\s\u0627\u0644\u0644\u0651\u064e\u0647' : 'ullāh' # Note5
358
- '\u0652' : '' # ْ sokoon
359
- '\u0659': 'ê'
360
-
361
- # Sun letters
362
- '\b\u0627\u0644\u062a' : 'ut t' # الت
363
- '\b\u0627\u0644\u062b' : 'us̄ s̄' # الث
364
- '\b\u0627\u0644\u062f' : 'ud d' # الد
365
- '\b\u0627\u0644\u0630' : 'uz̄ z̄' # الذ
366
- '\b\u0627\u0644\u0631' : 'ur r' # الر
367
- '\b\u0627\u0644\u0632' : 'uz z' # الز
368
- '\b\u0627\u0644\u0633' : 'us s' # الس
369
- '\b\u0627\u0644\u0634' : 'ush sh' # الش
370
- '\b\u0627\u0644\u0635' : 'uş ş' # الص
371
- '\b\u0627\u0644\u0636' : 'uẕ ẕ' # الض
372
- '\b\u0627\u0644\u0637' : 'uţ ţ' # الط
373
- '\b\u0627\u0644\u0638' : 'uz̧ z̧' # الظ
374
- '\b\u0627\u0644\u0644' : 'ul l' # الل
375
- '\b\u0627\u0644\u0646' : 'un n' # الن
376
-
377
- '\u0626': 'êy' # ئ
@@ -1,168 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2016
4
- language: iso-639-2:rue
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: ROMANIZATION OF RUSYN, BGN/PCGN 2016 System
8
- alias:
9
- ogc11122:
10
- code: rue_Cyrl2Latn_BGN_2016
11
- description: The BGN/PCGN system for Rusyn was designed for use in romanizing names written in the Rusyn alphabet.
12
- url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20RUSYN.pdf
13
- creation_date: 2016
14
- confirmation_date: 2017
15
- description: |
16
- The BGN/PCGN system for Rusyn was designed for use in romanizing names written in the Rusyn
17
- alphabet. There are two primary dialects of the Rusyn language: Carpatho‐Rusyn and Pannonian Rusyn.
18
- The Rusyn alphabet is almost identical to the Ukrainian alphabet, but contains three characters not
19
- present in the Ukrainian alphabet: ё, ы, and ъ. These letters are also absent from the alphabet used by
20
- Pannonian Rusyn. This table applies to both dialects of Rusyn.
21
-
22
- notes:
23
- - |
24
- The letters ё, ы, and ъ are present in the orthography of the Carpatho‐Rusyn variant of the Rusyn
25
- language. This variant is predominant among Rusyn speakers in Ukraine, Poland, Slovakia, Hungary, and
26
- Romania. The letters are absent from the orthography of Pannonian Rusyn, which is predominant in
27
- Serbia and Croatia.
28
-
29
- - |
30
- Unicode for Latin‐script characters: Ž/ž (017D, 017E); Č/č (010C, 010D); Š/š (0160, 0161); ’ (0027).
31
-
32
- - |
33
- The Romanization columns show only lowercase forms but, when romanizing, uppercase and
34
- lowercase Roman letters as appropriate should be used.
35
-
36
- tests:
37
- # Based on https://ru.wikipedia.org/wiki/Русинский_язык#Примеры_текста
38
- - source: русиньскый язык
39
- expected: rusyn'skyj yazyk
40
- - source: руська бисіда
41
- expected: rus'ka bysida
42
- - source: руснацькый язык
43
- expected: rusnac'kyj yazyk
44
- - source: руски язик
45
- expected: rusky yazyk
46
- - source: |
47
- Чоловік найчастїше споминать на молоды часы. Є то цалком нормалне.
48
- Тадь то рокы, кідь зазнаме всякого. І доброго, і планого. В тім часї ся чоловік находить, як кібы в скаралущі.
49
- Розвивать ся, як цвіт на черешни. Выпхати ся мож з того обалу лем тогды, як прийде час, кідь цалком дозріє.
50
- Даколи стачіть ся неограбаным способом дотулити білого домику, такой ся пораниш, што ті буде тякнути на цілый жывот.
51
- А кідь ся народиш в теплї, обколесеный ласков, розвиваш ся в добрых условіях, выпадеш із скаралущі, як міцна істота.
52
- Такым потім буде і твій далшый жывот. Із добрї заложеным фундаментом. Было бы смішно сі робити надїй, же жывот є лем єдна рівна путь…
53
- Кібы то так чоловік знав… Кібы ся міг іщі раз народити і піти по тій істій пути…
54
- expected: |
55
- Čolovik najčastjiše spomynat' na molody časy. Je to calkom normalne.
56
- Tad' to roky, kid' zazname vsyakogo. I dobrogo, i planogo. V tim časji sya čolovik nachodyt', yak kiby v skaralušči.
57
- Rozvyvat' sya, yak cvit na čerešny. Vypchaty sya mož z togo obalu lem togdy, yak pryjde čas, kid' calkom dozrije.
58
- Dakoly stačit' sya neograbanym sposobom dotulyty bilogo domyku, takoj sya poranyš, što ti bude tyaknuty na cilyj žyvot.
59
- A kid' sya narodyš v teplji, obkolesenyj laskov, rozvyvaš sya v dobrych usloviyach, vypadeš iz skaralušči, yak micna istota.
60
- Takym potim bude i tvij dalšyj žyvot. Iz dobrji založenym fundamentom. Bylo by smišno si robyty nadjij, že žyvot je lem jedna rivna put'…
61
- Kiby to tak čolovik znav… Kiby sya mig išči raz narodyty i pity po tij istij puty…
62
- # Based on http://www.philology.ru/linguistics3/suprun-89.htm
63
- - source: |
64
- Вишло слунко красне, ясне,
65
- и цму швета розогнало -
66
- жем желену, били хмарки
67
- як зоз златом да обцагло.
68
- expected: |
69
- Vyšlo slunko krasne, yasne,
70
- y cmu šveta rozognalo -
71
- žem želenu, byly chmarky
72
- yak zoz zlatom da obcaglo.
73
- - source: шнїг
74
- expected: šnjig
75
- - source: жем
76
- expected: žem
77
- - source: дзень
78
- expected: dzen'
79
- - source: спомнуц
80
- expected: spomnuc
81
- - source: крава
82
- expected: krava
83
- # Based on https://lingvoforum.net/index.php?topic=43545.0
84
- - source: дївка
85
- expected: djivka
86
- - source: дрыв
87
- expected: dryv
88
- - source: фёрд
89
- expected: fjord
90
- - source: Ёзеф
91
- expected: Jozef
92
- - source: пастырї
93
- expected: pastyrji
94
-
95
-
96
- map:
97
- characters:
98
- "\u0410": "A" # А
99
- "\u0411": "B" # Б
100
- "\u0412": "V" # В
101
- "\u0413": "H" # Г
102
- "\u0414": "D" # Д
103
- "\u0415": "E" # Е
104
- "\u0404": "Je" # Є
105
- "\u0401": "Jo" # Ё
106
- "\u0416": "\u017D" # Ж => Ž note[2]
107
- "\u0417": "Z" # З
108
- "\u0418": "Y" # И
109
- "\u0406": "I" # І
110
- "\u042b": "Y" # Ы
111
- "\u0407": "Ji" # Ї
112
- "\u0419": "J" # Й
113
- "\u041a": "K" # К
114
- "\u041b": "L" # Л
115
- "\u041c": "M" # М
116
- "\u041d": "N" # Н
117
- "\u041e": "O" # О
118
- "\u041f": "P" # П
119
- "\u0420": "R" # Р
120
- "\u0421": "S" # С
121
- "\u0422": "T" # Т
122
- "\u0423": "U" # У
123
- "\u0424": "F" # Ф
124
- "\u0425": "Ch" # Х
125
- "\u0426": "C" # Ц
126
- "\u0427": "\u010C" # Ч => Č note[2]
127
- "\u0428": "\u0160" # Ш => Š note[2]
128
- "\u0429": "\u0160\u010C" # Щ => ŠČ
129
- "\u042e": "Yu" # Ю
130
- "\u042f": "Ya" # Я
131
- "\u042c": "\u0027" # Ь => '
132
- "\u042a": "\u0027" # Ъ => '
133
-
134
- "\u0430": "a" # а
135
- "\u0431": "b" # б
136
- "\u0432": "v" # в
137
- "\u0433": "g" # г
138
- "\u0434": "d" # д
139
- "\u0435": "e" # е
140
- "\u0454": "je" # є
141
- "\u0451": "jo" # ё
142
- "\u0436": "\u017E" # ж => ž note[2]
143
- "\u0437": "z" # з
144
- "\u0438": "y" # и
145
- "\u0456": "i" # і
146
- "\u044b": "y" # ы
147
- "\u0457": "ji" # ї
148
- "\u0439": "j" # й
149
- "\u043a": "k" # к
150
- "\u043b": "l" # л
151
- "\u043c": "m" # м
152
- "\u043d": "n" # н
153
- "\u043e": "o" # о
154
- "\u043f": "p" # п
155
- "\u0440": "r" # р
156
- "\u0441": "s" # с
157
- "\u0442": "t" # т
158
- "\u0443": "u" # у
159
- "\u0444": "f" # ф
160
- "\u0445": "ch" # х
161
- "\u0446": "c" # ц
162
- "\u0447": "\u010D" # ч => č note[2]
163
- "\u0448": "\u0161" # ш => š note[2]
164
- "\u0449": "\u0161\u010D" # щ => šč
165
- "\u044e": "yu" # ю
166
- "\u044f": "ya" # я
167
- "\u044c": "\u0027" # ь => '
168
- "\u044a": "\u0027" # ъ => '