interscript 0.1.7 → 2.1.0b1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (314) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +116 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +83 -133
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +68 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +68 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +76 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +68 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/lib/interscript/visualize.rb +61 -0
  63. data/lib/interscript/visualize/group.html.erb +59 -0
  64. data/lib/interscript/visualize/json.rb +57 -0
  65. data/lib/interscript/visualize/map.html.erb +46 -0
  66. data/lib/interscript/visualize/nodes.rb +89 -0
  67. data/requirements.txt +1 -0
  68. metadata +78 -416
  69. data/README.adoc +0 -298
  70. data/lib/g2pwrapper.py +0 -34
  71. data/lib/interscript/fs.rb +0 -69
  72. data/lib/interscript/mapping.rb +0 -142
  73. data/lib/interscript/opal.rb +0 -57
  74. data/lib/interscript/opal/entrypoint.rb +0 -12
  75. data/lib/interscript/opal/map_translate.rb +0 -7
  76. data/lib/interscript/opal/maps.js.erb +0 -10
  77. data/lib/model-7 +0 -0
  78. data/lib/tha-pt-b-7 +0 -0
  79. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  80. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  81. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  82. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  83. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -165
  84. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -40
  85. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  86. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  87. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  88. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  89. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  90. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  91. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  92. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  93. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -211
  94. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -47
  95. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  96. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  97. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  98. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  99. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  100. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  101. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  102. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  103. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  104. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  105. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  106. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  107. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  108. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  109. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  110. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  111. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  112. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -172
  113. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  114. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  115. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  116. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  117. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  118. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  119. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  120. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  121. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  122. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  123. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  124. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  125. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  126. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  127. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  128. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  129. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  130. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -596
  131. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  132. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  133. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  134. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  135. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  136. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  137. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  138. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  139. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  140. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  141. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  142. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  143. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  144. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  145. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  146. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  147. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  148. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  149. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  150. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  151. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -336
  152. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -639
  153. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  154. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  155. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  156. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  157. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  158. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  159. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  160. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  161. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  162. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  163. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  164. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  165. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  166. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  167. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  168. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  169. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  170. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  171. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  172. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  173. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  174. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  175. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  176. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  177. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  178. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  179. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  180. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  181. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  182. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  183. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  184. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  185. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  186. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  187. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  188. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  189. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  190. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  191. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  192. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  193. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  194. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  195. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  196. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  197. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  198. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  199. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  200. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  201. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  202. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  203. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  204. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  205. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  206. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  207. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  208. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  209. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  210. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  211. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  212. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  213. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +0 -220
  214. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  215. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  216. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  217. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  218. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  219. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  220. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  221. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  222. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  223. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  224. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  225. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  226. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  227. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  228. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  229. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  230. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  231. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  232. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  233. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  234. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  235. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  236. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  237. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  238. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  239. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  240. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  241. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  242. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  243. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  244. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -425
  245. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  246. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  247. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  248. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  249. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  250. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  251. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  252. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  253. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  254. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  255. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  256. data/maps/odni-per-Arab-Latn-2015.yaml +0 -228
  257. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  258. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  259. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  260. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  261. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  262. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  263. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  264. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  265. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  266. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  267. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  268. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  269. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  270. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  271. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  272. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  273. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  274. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  275. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  276. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  277. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  278. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  279. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  280. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  281. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  282. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  283. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  284. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  285. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  286. data/maps/un-hin-Deva-Latn-2016.yaml +0 -222
  287. data/maps/un-mar-Deva-Latn-2016.yaml +0 -91
  288. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  289. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  290. data/maps/un-nep-Deva-Latn-1972.yaml +0 -350
  291. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  292. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  293. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  294. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  295. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  296. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  297. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  298. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  299. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  300. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  301. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  302. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  303. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  304. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  305. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  306. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  307. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  308. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  309. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  310. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  311. data/spec/interscript/filenames_spec.rb +0 -384
  312. data/spec/interscript/mapping_spec.rb +0 -42
  313. data/spec/interscript_spec.rb +0 -29
  314. data/spec/spec_helper.rb +0 -3
@@ -1,108 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1981
4
- language: iso-639-2:arm
5
- source_script: Armn
6
- destination_script: Latn
7
- name: BGN/PCGN 1981 System
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/810208/ROMANIZATION_OF_ARMENIAN.pdf
9
- creation_date: 2013
10
- confirmation date: 2019-06
11
- description: |
12
- The BGN/PCGN system for Armenian was designed for use in romanizing
13
- names written in the Armenian alphabet. The Roman letters and letter
14
- combinations shown as equivalents to the Armenian characters reflect
15
- the eastern variety of Armenian, i.e. the language spoken in the
16
- Republic of Armenia.
17
-
18
- notes:
19
- - The character ե should be romanized ye initially and after the vowel characters ա, ե, է, ը, ի, ո, ու and օ. In all other instances, it should be romanized e.
20
- - The character ո should be romanized vo initially except in the word ով, which should be roman- ized ov. In all other instances, it should be romanized o.
21
- - In Soviet-era sources this upper-case digraph character is found as Եի (Unicode encoding 0535+056B).
22
- - This lower-case character may be seen either in digraph form as եւ (Unicode encoding 0565+0582) or in single character form as եւ (Unicode encoding 0587).
23
- - The characters ԵՎ , եւ and եւ should be romanized yev initially, in isolation, and after the vowel characters ա, ե, է, ը, ի, ո, ու, and օ. In all other instances these characters should be romanized ev.
24
- - All apostrophes appearing in Armenian romanization are encoded Unicode 2019.
25
- - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase Roman letters as appropriate should be used.
26
-
27
- tests:
28
-
29
- map:
30
- characters:
31
- '\u0531' : 'A'
32
- '\u0532' : 'B'
33
- '\u0533' : 'G'
34
- '\u0534' : 'D'
35
- '\u0535' : 'Ye' #treated same as Russian 'ye'
36
- '\u0536' : 'Z'
37
- '\u0537' : 'E'
38
- '\u0538' : 'Y'
39
- '\u0539' : 'T\u2019'
40
- '\u053a' : 'Zh'
41
- '\u053b' : 'I'
42
- '\u053c' : 'L'
43
- '\u053d' : 'Kh'
44
- '\u053e' : 'Ts'
45
- '\u053f' : 'K'
46
- '\u0540' : 'H'
47
- '\u0541' : 'Dz'
48
- '\u0542' : 'Gh'
49
- '\u0543' : 'Ch'
50
- '\u0544' : 'M'
51
- '\u0545' : 'Y'
52
- '\u0546' : 'N'
53
- '\u0547' : 'Sh'
54
- '\u0548' : 'O' # VO initially and U when in combination with \u0552
55
- '\u0549' : u'Ch\u2019'
56
- '\u054a' : 'P'
57
- '\u054b' : 'J'
58
- '\u054c' : 'Rr'
59
- '\u054d' : 'S'
60
- '\u054e' : 'V'
61
- '\u054f' : 'T'
62
- '\u0550' : 'R'
63
- '\u0551' : 'Ts\u2019'
64
- '\u0548\u0552' : 'U'
65
- '\u0548\u0582' : 'U'
66
- '\u0553' : 'P\u2019'
67
- '\u0554' : 'K\u2019'
68
- '\u0555' : 'O'
69
- '\u0556' : 'F'
70
- '\u0561' : 'a'
71
- '\u0562' : 'b'
72
- '\u0563' : 'g'
73
- '\u0564' : 'd'
74
- '\u0565' : 'e' # ye initially
75
- '\u0566' : 'z'
76
- '\u0567' : 'e'
77
- '\u0568' : 'y'
78
- '\u0569' : u't\u2019'
79
- '\u056a' : 'zh'
80
- '\u056b' : 'i'
81
- '\u056c' : 'l'
82
- '\u056d' : 'kh'
83
- '\u056e' : 'ts'
84
- '\u056f' : 'k'
85
- '\u0570' : 'h'
86
- '\u0571' : 'dz'
87
- '\u0572' : 'gh'
88
- '\u0573' : 'ch'
89
- '\u0574' : 'm'
90
- '\u0575' : 'y'
91
- '\u0576' : 'n'
92
- '\u0577' : 'sh'
93
- '\u0578' : 'o' # vo initially and u when in combination with \u0582
94
- '\u0579' : 'ch\u2019'
95
- '\u057a' : 'p'
96
- '\u057b' : 'j'
97
- '\u057c' : 'rr'
98
- '\u057d' : 's'
99
- '\u057e' : 'v'
100
- '\u057f' : 't'
101
- '\u0580' : 'r'
102
- '\u0581' : 'ts\u2019'
103
- '\u0578\u0582' : 'u'
104
- '\u0583' : 'p\u2019'
105
- '\u0584' : 'k\u2019'
106
- '\u0585' : 'o'
107
- '\u0586' : 'f'
108
- '\u0587' : 'ev' # yev initially
@@ -1,104 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1993
4
- language: iso-639-2:aze
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: AZERBAIJANI TABLE OF CORRESPONDENCES CYRILLIC-ROMAN -- BGN/PCGN 1993 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816656/TABLE_OF_CORRESPONDENCES_FOR_AZERBAIJANI.pdf
9
- creation_date: 1993
10
- confirmation date: 2019-06
11
- description: |
12
- Azerbaijani, also known as Azeri, is the official language of the Republic of Azerbaijan. In 1991, the Azerbaijani government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet. When Azerbaijani Roman-alphabet spellings are not available, this table can be used to convert Azerbaijani Cyrillic spellings.
13
-
14
- notes:
15
-
16
- - The special letter Ə, ə known as schwa, should be reproduced in that form whenever encountered. The characters Ə (Unicode 04D8) and ə (Unicode 04D9) should be used for schwa when writing in the Cyrillic script, but characters Ə (Unicode 018F) and ə (Unicode 0259) should be used when writing in the Roman alphabet. In those instances when it cannot be reproduced, however, the letter Ä ä may be substituted for it (see below).
17
-
18
- - The obsolete characters й, э, ю, and я should be romanized ẏ, ė, yu., and ya.
19
-
20
- - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character. It is not known whether there exists an uppercase ‘J’ specific to the Cyrillic character set.
21
-
22
- - |
23
- An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
24
- Ğ (U+011E), ğ (U+011F)
25
- Ə (U+018F), ə (U+0259)
26
- İ (U+0130), ı (U+0131)
27
- Ö (U+00D6), ö (U+00F6)
28
- Ü (U+00DC), ü (U+00FC)
29
- Ç (U+00C7), ç (U+00E7)
30
- Ş (U+015E), ş (U+015F)
31
-
32
- - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
33
-
34
- tests:
35
- - source:
36
- expected:
37
-
38
- map:
39
- characters:
40
- '\u0410' : 'A'
41
- '\u0411' : 'B'
42
- '\u0412' : 'G'
43
- '\u0413' : 'V'
44
- '\u0492' : 'Ğ'
45
- '\u0414' : 'D'
46
- '\u0415' : 'E'
47
- '\u04D8' : 'Ә'
48
- '\u0416' : 'J'
49
- '\u0417' : 'Z'
50
- '\u0418' : 'I'
51
- '\u042B' : 'İ'
52
- '\u0408' : 'Y'
53
- '\u041A' : 'K'
54
- '\u049C' : 'G'
55
- '\u041B' : 'L'
56
- '\u041C' : 'M'
57
- '\u041D' : 'N'
58
- '\u041E' : 'O'
59
- '\u04E8' : 'Ö'
60
- '\u041F' : 'P'
61
- '\u0420' : 'R'
62
- '\u0421' : 'S'
63
- '\u0422' : 'T'
64
- '\u0423' : 'U'
65
- '\u04AE' : 'Ü'
66
- '\u0424' : 'F'
67
- '\u0425' : 'X'
68
- '\u04BA' : 'H'
69
- '\u0427' : 'Ç'
70
- '\u04B8' : 'C'
71
- '\u0428' : 'Ş'
72
-
73
- '\u0430' : 'a'
74
- '\u0431' : 'b'
75
- '\u0432' : 'v'
76
- '\u0433' : 'g'
77
- '\u0493' : 'ğ'
78
- '\u0434' : 'd'
79
- '\u0435' : 'e'
80
- '\u04D9' : 'ә'
81
- '\u0436' : 'j'
82
- '\u0437' : 'z'
83
- '\u0438' : 'i'
84
- '\u044B' : 'ı'
85
- '\u0458' : 'y'
86
- '\u043A' : 'k'
87
- '\u049D' : 'g'
88
- '\u043B' : 'l'
89
- '\u043C' : 'm'
90
- '\u043D' : 'n'
91
- '\u043E' : 'o'
92
- '\u04E9' : 'ö'
93
- '\u043F' : 'p'
94
- '\u0440' : 'r'
95
- '\u0441' : 's'
96
- '\u0442' : 't'
97
- '\u0443' : 'u'
98
- '\u04AF' : 'ü'
99
- '\u0444' : 'f'
100
- '\u0445' : 'x'
101
- '\u04BB' : 'h'
102
- '\u0447' : 'ç'
103
- '\u04B9' : 'c'
104
- '\u0448' : 'ş'
@@ -1,188 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2007
4
- language: iso-639-2:rus
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BASHKIR TABLE OF CORRESPONDENCES CYRILLIC-ROMAN BGN/PCGN 2007 Agreement
8
- alias:
9
- ogc11122:
10
- code: bak_Cyrl2Latn_BGN_2007
11
- description: Bashkir 2007 BGN/PCGN Cyrillic-Latin Table of Correspondences
12
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
13
- creation_date: 2007
14
- confirmation_date: 2019
15
- description: |
16
- Bashkir is an official language within Respublika Bashkortostan, one of the
17
- republics of the Russian Federation. It will normally be encountered in Cyrillic script, in
18
- which case it should be romanized by means of the Cyrillic-Roman table of
19
- correspondences given below
20
-
21
- notes:
22
- - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
23
- - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
24
- - The letter w is used between or after vowels.
25
- - The letter w is used after e, u, ö and ə.
26
- - |
27
- An inventory of letter-diacritic combinations, with their Unicode encoding,
28
- in addition to the unmodified letters of the basic Roman script is:
29
- Ğ (U+011E) ğ (U+011F)
30
- Ź (U+0179) ź (U+017A)
31
- Ë (U+00CB) ë (U+00EB)
32
- Ñ (U+00D1) ñ (U+00F1)
33
- Ö (U+00D6) ö (U+00F6)
34
- Ś (U+015A) ś (U+015B)
35
- Ü (U+00DC) ü (U+00FC)
36
- Ç (U+00C7) ç (U+00E7)
37
- Ş (U+015E) ş (U+015F)
38
- Ə (U+018F) ə (U+0259)
39
- - |
40
- The Roman-script columns show only lowercase forms but, when applying the table,
41
- uppercase and lowercase Roman letters as appropriate should be used.
42
-
43
- tests:
44
- # adopted http://www.eki.ee/knab/lat/kblba.pdf
45
- - source: Васйылға
46
- expected: Wasyılğa
47
- - source: Еҙем
48
- expected: Yeźem
49
- - source: Раевка
50
- expected: Raevka
51
- - source: Сәйетҡол
52
- expected: Səyetqol
53
- - source: Ауырғазы
54
- expected: Awırğazı
55
- - source: Бурһыҡтау
56
- expected: Burhıqtaw
57
- - source: Мәләүез
58
- expected: Mələwez
59
- - source: Ҡыҙылъяр
60
- expected: Qıźılyar
61
- # adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
62
- - source: кемдең
63
- expected: kemdeñ
64
- - source: кем
65
- expected: kem
66
- - source: был
67
- expected: bıl
68
- - source: ошо
69
- expected: oşo
70
- - source: быларҙың
71
- expected: bılarźıñ
72
- - source: һеҙҙән
73
- expected: heźźən
74
- - source: һин
75
- expected: hin
76
- - source: һеҙҙең
77
- expected: heźźeñ
78
-
79
- map:
80
- rules:
81
- # note[1]
82
- - pattern: \b\u0412(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
83
- result: "W"
84
- - pattern: \b\u0432(?=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
85
- result: "w"
86
- # note[2]
87
- - pattern: \b\u0415
88
- result: "Ye"
89
- - pattern: \b\u0435
90
- result: "ye"
91
- - pattern: (?=\b)\u0415(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
92
- result: "Ye"
93
- - pattern: (?=\b)\u0435(?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])
94
- result: "ye"
95
-
96
- # note[3] # note[4]
97
- - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0423\u04AE]
98
- result: W
99
- - pattern: (?<=[АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя])[\u0443\u04AF]
100
- result: w
101
-
102
-
103
- characters:
104
- '\u0410': 'A' # А
105
- '\u0411': 'B' # Б note[1]
106
- '\u0412': 'V' # В
107
- '\u0413': 'G' # Г
108
- '\u0492': "\u011E" # Ғ
109
- '\u0414': 'D' # Д
110
- '\u0498': "\u0179" # Ҙ
111
- '\u0415': 'E' # Е note[2]
112
- '\u0401': 'Ë' # Ё
113
- '\u0416': 'J' # Ж
114
- '\u0417': 'Z' # З
115
- '\u0418': 'I' # И
116
- '\u0419': 'Y' # Й
117
- '\u041A': 'K' # К
118
- '\u04A0': 'Q' # Ҡ
119
- '\u041B': 'L' # Л
120
- '\u041C': 'M' # М
121
- '\u041D': 'N' # Н
122
- '\u04A2': 'Ñ' # Ң
123
- '\u041E': 'O' # О
124
- '\u04E8': "Ö" # Ө
125
- '\u041F': 'P' # П
126
- '\u0420': 'R' # Р
127
- '\u0421': 'S' # С
128
- '\u04AA': 'Ś' # Ҫ
129
- '\u0422': 'T' # Т
130
- '\u0423': 'U' # У
131
- '\u04AE': 'Ü' # Ү note[3]
132
- '\u0424': 'F' # Ф
133
- '\u0425': 'X' # Х
134
- '\u04BA': 'H' # Һ
135
- '\u0426': 'Ts' # Ц
136
- '\u0427': 'Ç' # Ч
137
- '\u0428': 'Ş' # Ш
138
- '\u0429': 'ŞÇ' # Щ
139
- '\u042A': '' # Ъ
140
- '\u042B': 'I' # Ы
141
- '\u042C': '' # Ь
142
- '\u042D': 'E' # Э
143
- '\u04D8': "\u018F" # Ә
144
- '\u042E': 'Yu' # Ю
145
- '\u042F': 'Ya' # Я
146
-
147
- '\u0430': 'a' # а
148
- '\u0431': 'b' # б
149
- '\u0432': 'v' # в note[1]
150
- '\u0433': 'g' # г
151
- '\u0493': "\u011F" # ғ
152
- '\u0434': 'd' # д
153
- '\u0499': 'ź' # ҙ
154
- '\u0435': 'e' # e note[2]
155
- '\u0451': 'yo' # ё
156
- '\u0436': 'j' # ж
157
- '\u0437': 'z' # з
158
- '\u0438': 'i' # и
159
- '\u0439': 'y' # й
160
- '\u043A': 'k' # к
161
- '\u04A1': 'q' # ҡ
162
- '\u043B': 'l' # л
163
- '\u043C': 'm' # м
164
- '\u043D': 'n' # н
165
- '\u04A3': 'ñ' # ң
166
- '\u043E': 'o' # о
167
- '\u04E9': "\u00F6" # ө
168
- '\u043F': 'p' # п
169
- '\u0440': 'r' # р
170
- '\u0441': 's' # с
171
- '\u04AB': 'ś' # ҫ
172
- '\u0442': 't' # т
173
- '\u0443': 'u' # у
174
- "\u04AF": 'ü' # ү note[3]
175
- '\u0444': 'f' # ф
176
- '\u0445': 'x' # х
177
- '\u04BB': 'h' # һ
178
- '\u0446': 'ts' # ц
179
- '\u0447': 'ç' # ч
180
- '\u0448': 'ş' # ш
181
- '\u0449': 'şç' # щ
182
- '\u044A': '' # ъ
183
- '\u044B': "\u0131" # ы
184
- '\u044C': '' # ь
185
- '\u044D': 'e' # э
186
- '\u04D9': "\u0259" # ә
187
- '\u044E': 'yu' # ю
188
- '\u044F': 'ya' # я
@@ -1,289 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1979
4
- language: iso-639-2:bel
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: United States Board on Geographic Names Foreign Names Committee Staff, 1994. Romanization Systems and Roman-Script Spelling Conventions, p. 23.
8
- alias:
9
- ogc11122:
10
- code: bel_Cyrl2Latn_BGN_1979
11
- description: Byelorussian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1979 System
12
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811510/ROMANIZATION_OF_BELARUSIAN.pdf
13
- creation_date: 1979
14
- description: |
15
- The BGN/PCGN system for Belarusian (formerly referred to as Byelorussian) was designed for use in
16
- romanizing names written in the Belarusian Cyrillic alphabet. The Belarusian alphabet contains three
17
- characters not present in the Russian alphabet: і, ў, and ’.
18
-
19
- notes:
20
- - The character sequences зг, кг, сг, тс and цг and may be romanized z·h, k·h, s·h, t·s and ts·h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ш, ц, and the character sequence тш
21
- - All apostrophes appearing in romanization are Unicode encoding 2019.
22
-
23
- tests:
24
- - source: Антон
25
- expected: Anton
26
- - source: Вілейка
27
- expected: Vilyeyka
28
- - source: Брэст
29
- expected: Brest
30
- - source: Дубна
31
- expected: Dubna
32
- - source: Віцебск
33
- expected: Vitsyebsk
34
- - source: Асіповічы
35
- expected: Asipovichy
36
- - source: Гродна
37
- expected: Hrodna
38
- - source: Брагін
39
- expected: Brahin
40
- - source: Добруш
41
- expected: Dobrush
42
- - source: Ліда
43
- expected: Lida
44
- - source: Гомель
45
- expected: Homyel’
46
- - source: Беліца
47
- expected: Byelitsa
48
- - source: Ёдкавічы
49
- expected: Yodkavichy
50
- - source: Нёман
51
- expected: Nyoman
52
- - source: Жлобін
53
- expected: Zhlobin
54
- - source: Ружаны
55
- expected: Ruzhany
56
- - source: Зоя
57
- expected: Zoya
58
- - source: князь
59
- expected: knyaz’
60
- - source: Ігнат
61
- expected: Ihnat
62
- - source: Мінск
63
- expected: Minsk
64
- - source: Йосель
65
- expected: Yosyel’
66
- - source: Койданава
67
- expected: Koydanava
68
- - source: Крапіўна
69
- expected: Krapiwna
70
- - source: Менск
71
- expected: Myensk
72
- - source: Лаўна
73
- expected: Lawna
74
- - source: Лёсік
75
- expected: Lyosik
76
- - source: Купала
77
- expected: Kupala
78
- - source: Вілейка
79
- expected: Vilyeyka
80
- - source: Міхал
81
- expected: Mikhal
82
- - source: Вільня
83
- expected: Vil’nya
84
- - source: Лепель
85
- expected: Lyepyel’
86
- - source: Магілёў
87
- expected: Mahilyow
88
- - source: Няміга
89
- expected: Nyamiha
90
- - source: Наваградак
91
- expected: Navahradak
92
- - source: Баранавічы
93
- expected: Baranavichy
94
- - source: Орша
95
- expected: Orsha
96
- - source: Востраў
97
- expected: Vostraw
98
- - source: Пінск
99
- expected: Pinsk
100
- - source: Дняпро
101
- expected: Dnyapro
102
- - source: Рагачоў
103
- expected: Rahachow
104
- - source: Сураж
105
- expected: Surazh
106
- - source: Смаляны
107
- expected: Smalyany
108
- - source: Арэса
109
- expected: Aresa
110
- - source: Рось
111
- expected: Ros’
112
- - source: Талочын
113
- expected: Talochyn
114
- - source: Масты
115
- expected: Masty
116
- - source: Уладзімір
117
- expected: Uladzimir
118
- - source: Бабруйск
119
- expected: Babruysk
120
- - source: Быхаў
121
- expected: Bykhaw
122
- - source: Воўпа
123
- expected: Vowpa
124
- - source: Іўе
125
- expected: Iwye
126
- - source: Фолюш
127
- expected: Folyush
128
- - source: фортка
129
- expected: fortka
130
- - source: Хатынь
131
- expected: Khatyn’
132
- - source: Быхаў
133
- expected: Bykhaw
134
- - source: Ганцавічы
135
- expected: Hantsavichy
136
- - source: Стоўбцы
137
- expected: Stowbtsy
138
- - source: цьмяны
139
- expected: ts’myany
140
- - source: мясцовы
141
- expected: myastsovy
142
- - source: Астравец
143
- expected: Astravyets
144
- - source: Прыпяць
145
- expected: Prypyats’
146
- - source: Чэрыкаў
147
- expected: Cherykaw
148
- - source: Шчара
149
- expected: Shchara
150
- - source: Нарач
151
- expected: Narach
152
- - source: Шклоў
153
- expected: Shklow
154
- - source: Ашмяны
155
- expected: Ashmyany
156
- - source: Ыттык-Кёль
157
- expected: Yttyk-Kyol’
158
- - source: Кобрын
159
- expected: Kobryn
160
- - source: Солы
161
- expected: Soly
162
- - source: Копысь
163
- expected: Kopys’
164
- - source: рунь
165
- expected: run’
166
- - source: Эйсманты
167
- expected: Eysmanty
168
- - source: Крэва
169
- expected: Kreva
170
- - source: Юры
171
- expected: Yury
172
- - source: уюн
173
- expected: uyun
174
- - source: Язэп
175
- expected: Yazep
176
- - source: Івянец
177
- expected: Ivyanyets
178
- - source: з’езд
179
- expected: z”yezd
180
- - source: Вялiкiя Вераб’евічы
181
- expected: Vyalikiya Vyerab”yevichy
182
- - source: Дзям’янаўцы
183
- expected: Dzyam”yanawtsy
184
- - source: Задвор’е
185
- expected: Zadvor”ye
186
- - source: Гезгалы
187
- expected: Hyez·haly
188
- - source: Вадасховішча Гезгальскае
189
- expected: Vadaskhovishcha Hyez·hal’skaye
190
-
191
- map:
192
- postrules:
193
- - pattern: '\u042C' # Ь
194
- result: "\u2019"
195
- - pattern: '\u044C' # ь
196
- result: "\u2019"
197
- # Per documentation those rules are optional
198
- rules:
199
- - pattern: \u0417\u0413 # ЗГ
200
- result: "Z\u00B7H" # Z·H
201
- - pattern: \u0437\u0433 # зг
202
- result: "z\u00B7h" # z·h
203
- - pattern: \u041A\u0413 # КГ
204
- result: "K\u00B7H" # K·H
205
- - pattern: \u043A\u0433 # кг
206
- result: "k\u00B7h" # k·h
207
- - pattern: \u0421\u0413 # СГ
208
- result: "S\u00B7H" # S·H
209
- - pattern: \u0441\u0433 # сг
210
- result: "s\u00B7h" # s·h
211
- - pattern: \u0422\u0421 # ТС
212
- result: "T\u00B7S" # T·S
213
- - pattern: \u0442\u0441 # тс
214
- result: "t\u00B7s" # t·s
215
- - pattern: \u0426\u0413 # ЦГ
216
- result: "TS\u00B7H" # TS·H
217
- - pattern: \u0446\u0433 # цг
218
- result: "ts\u00B7h" # ts·h
219
-
220
- characters:
221
- '\u00B4' : "\u201D" # apostrophe according to spec
222
- '\u02BC' : "\u201D" # apostrophe according to spec
223
- '\u2019' : "\u201D" # apostrophe in actual examples
224
-
225
- '\u0410' : 'A' # A
226
- '\u0411' : 'B' # Б
227
- '\u0412' : 'V' # B
228
- '\u0413' : 'H' # Г
229
- '\u0414' : 'D' # Д
230
- '\u0415' : 'Ye' # Е
231
- '\u0401' : 'Yo' # Ё
232
- '\u0416' : 'Zh' # Ж
233
- '\u0417' : 'Z' # З
234
- '\u0406' : 'I' # І
235
- '\u0419' : 'Y' # Й
236
- '\u041A' : 'K' # К
237
- '\u041B' : 'L' # Л
238
- '\u041C' : 'M' # М
239
- '\u041D' : 'N' # Н
240
- '\u041E' : 'O' # О
241
- '\u041F' : 'P' # П
242
- '\u0420' : 'R' # Р
243
- '\u0421' : 'S' # С
244
- '\u0422' : 'T' # Т
245
- '\u0423' : 'U' # У
246
- '\U040E' : 'W' # Ў
247
- '\u0424' : 'F' # Ф
248
- '\u0425' : 'Kh' # Х
249
- '\u0426' : 'Ts' # Ц
250
- '\u0427' : 'Ch' # Ч
251
- '\u0428' : 'Sh' # Ш
252
- '\u042B' : 'Y' # Ы
253
- '\u042D' : 'E' # Э
254
- '\u042E' : 'Yu' # Ю
255
- '\u042F' : 'Ya' # Я
256
- '\u0490' : 'G' # Ґ
257
-
258
- '\u0430' : 'a' # а
259
- '\u0431' : 'b' # б
260
- '\u0432' : 'v' # в
261
- '\u0433' : 'h' # г
262
- '\u0434' : 'd' # д
263
- '\u0435' : 'ye' # е
264
- '\u0451' : 'yo' # ё
265
- '\u0436' : 'zh' # ж
266
- '\u0437' : 'z' # з
267
- '\u0456' : 'i' # і
268
- '\u0439' : 'y' # й
269
- '\u043A' : 'k' # к
270
- '\u043B' : 'l' # л
271
- '\u043C' : 'm' # м
272
- '\u043D' : 'n' # н
273
- '\u043E' : 'o' # о
274
- '\u043F' : 'p' # п
275
- '\u0440' : 'r' # р
276
- '\u0441' : 's' # с
277
- '\u0442' : 't' # т
278
- '\u0443' : 'u' # у
279
- '\u045E' : 'w' # ў
280
- '\u0444' : 'f' # ф
281
- '\u0445' : 'kh' # х
282
- '\u0446' : 'ts' # ц
283
- '\u0447' : 'ch' # ч
284
- '\u0448' : 'sh' # ш
285
- '\u044B' : 'y' # ы
286
- '\u044D' : 'e' # э
287
- '\u044E' : 'yu' # ю
288
- '\u044F' : 'ya' # я
289
- '\u0491' : 'g' # ґ