interscript 0.1.7 → 2.1.0b1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (314) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +116 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +83 -133
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +68 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +68 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +76 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +68 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/lib/interscript/visualize.rb +61 -0
  63. data/lib/interscript/visualize/group.html.erb +59 -0
  64. data/lib/interscript/visualize/json.rb +57 -0
  65. data/lib/interscript/visualize/map.html.erb +46 -0
  66. data/lib/interscript/visualize/nodes.rb +89 -0
  67. data/requirements.txt +1 -0
  68. metadata +78 -416
  69. data/README.adoc +0 -298
  70. data/lib/g2pwrapper.py +0 -34
  71. data/lib/interscript/fs.rb +0 -69
  72. data/lib/interscript/mapping.rb +0 -142
  73. data/lib/interscript/opal.rb +0 -57
  74. data/lib/interscript/opal/entrypoint.rb +0 -12
  75. data/lib/interscript/opal/map_translate.rb +0 -7
  76. data/lib/interscript/opal/maps.js.erb +0 -10
  77. data/lib/model-7 +0 -0
  78. data/lib/tha-pt-b-7 +0 -0
  79. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  80. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  81. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  82. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  83. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -165
  84. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -40
  85. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  86. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  87. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  88. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  89. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  90. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  91. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  92. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  93. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -211
  94. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -47
  95. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  96. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  97. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  98. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  99. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  100. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  101. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  102. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  103. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  104. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  105. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  106. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  107. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  108. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  109. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  110. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  111. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  112. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -172
  113. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  114. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  115. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  116. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  117. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  118. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  119. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  120. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  121. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  122. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  123. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  124. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  125. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  126. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  127. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  128. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  129. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  130. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -596
  131. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  132. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  133. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  134. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  135. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  136. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  137. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  138. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  139. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  140. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  141. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  142. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  143. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  144. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  145. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  146. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  147. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  148. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  149. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  150. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  151. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -336
  152. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -639
  153. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  154. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  155. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  156. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  157. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  158. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  159. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  160. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  161. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  162. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  163. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  164. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  165. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  166. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  167. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  168. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  169. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  170. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  171. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  172. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  173. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  174. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  175. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  176. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  177. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  178. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  179. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  180. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  181. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  182. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  183. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  184. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  185. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  186. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  187. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  188. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  189. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  190. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  191. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  192. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  193. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  194. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  195. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  196. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  197. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  198. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  199. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  200. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  201. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  202. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  203. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  204. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  205. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  206. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  207. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  208. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  209. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  210. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  211. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  212. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  213. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +0 -220
  214. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  215. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  216. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  217. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  218. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  219. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  220. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  221. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  222. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  223. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  224. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  225. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  226. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  227. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  228. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  229. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  230. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  231. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  232. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  233. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  234. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  235. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  236. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  237. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  238. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  239. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  240. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  241. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  242. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  243. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  244. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -425
  245. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  246. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  247. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  248. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  249. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  250. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  251. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  252. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  253. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  254. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  255. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  256. data/maps/odni-per-Arab-Latn-2015.yaml +0 -228
  257. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  258. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  259. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  260. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  261. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  262. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  263. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  264. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  265. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  266. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  267. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  268. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  269. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  270. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  271. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  272. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  273. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  274. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  275. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  276. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  277. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  278. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  279. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  280. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  281. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  282. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  283. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  284. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  285. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  286. data/maps/un-hin-Deva-Latn-2016.yaml +0 -222
  287. data/maps/un-mar-Deva-Latn-2016.yaml +0 -91
  288. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  289. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  290. data/maps/un-nep-Deva-Latn-1972.yaml +0 -350
  291. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  292. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  293. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  294. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  295. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  296. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  297. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  298. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  299. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  300. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  301. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  302. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  303. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  304. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  305. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  306. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  307. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  308. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  309. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  310. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  311. data/spec/interscript/filenames_spec.rb +0 -384
  312. data/spec/interscript/mapping_spec.rb +0 -42
  313. data/spec/interscript_spec.rb +0 -29
  314. data/spec/spec_helper.rb +0 -3
@@ -1,166 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1965
4
- language: iso-639-2:ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BGN/PCGN 1965 System
8
- alias:
9
- ogc11122:
10
- code: ukr_Cyrl2Latn_BGN_1965
11
- description: Ukrainian Board on Geographic Names/Permanent Committee on Geographical Names for British Official Use(PCGN) 1965 System
12
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
13
- creation_date: 1947
14
- confirmation_date: 2019-06
15
- description: |
16
- The BGN/PCGN system for Ukrainian was designed for use in romanizing
17
- names written in the Ukrainian alphabet. The Ukrainian alphabet
18
- contains five characters not present in the Russian alphabet: ґ, є, і,
19
- ї, and ’.
20
-
21
- notes:
22
- - The character sequences з г, к г, с г, т с and ц г and may be romanized z∙h, k∙h, s∙h, t∙s and ts∙h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ц, ш, and the character sequence тш.
23
- - All apostrophes appearing in romanization are Unicode encoding 2019.
24
- - The Roman‐script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
25
-
26
- tests:
27
- - source: Авдіївська Міськрада
28
- expected: Avdiyivs’ka Mis’krada
29
- - source: Бабаї
30
- expected: Babayi
31
- - source: Віленька
32
- expected: Vilen’ka
33
- - source: Гагарінський Район
34
- expected: Haharins’kyy Rayon
35
- - source: Довбушева Криниця
36
- expected: Dovbusheva Krynytsya
37
- - source: Дідівщина
38
- expected: Didivshchyna
39
- - source: Економічна
40
- expected: Ekonomichna
41
- - source: Єфросинівка
42
- expected: Yefrosynivka
43
- - source: Жигуліна Роща
44
- expected: Zhyhulina Roshcha
45
- - source: Загір’я
46
- expected: Zahir”ya
47
- - source: З’єднувальний Канал
48
- expected: Z”yednuval’nyy Kanal
49
- - source: Ивахи
50
- expected: Yvakhy
51
- - source: Івано-Франківська Міськрада
52
- expected: Ivano-Frankivs’ka Mis’krada
53
- - source: Їжаківка
54
- expected: Yizhakivka
55
- - source: Йосиповичі
56
- expected: Yosypovychi
57
- - source: Кабичівка
58
- expected: Kabychivka
59
- - source: Лазуровий Провулок
60
- expected: Lazurovyy Provulok
61
- - source: Мала Сейдеминуха
62
- expected: Mala Seydemynukha
63
- - source: Нагірний
64
- expected: Nahirnyy
65
- - source: Овер’янівське Озеро
66
- expected: Over”yanivs’ke Ozero
67
- - source: Павлопільське Водосховище
68
- expected: Pavlopil’s’ke Vodoskhovyshche
69
- - source: Приґородний
70
- expected: Prygorodnyy
71
- - source: Радгосп Правда
72
- expected: Radhosp Pravda
73
- - source: Садово-Хрустальненський
74
- expected: Sadovo-Khrustal’nens’kyy
75
- - source: Таратутине
76
- expected: Taratutyne
77
- - source: Улу-Узень
78
- expected: Ulu-Uzen’
79
- - source: Христофорівка
80
- expected: Khrystoforivka
81
- - source: Центральна Вулиця
82
- expected: Tsentral’na Vulytsya
83
- - source: Чайковичі
84
- expected: Chaykovychi
85
- - source: Шалаші
86
- expected: Shalashi
87
- - source: Щербинівка
88
- expected: Shcherbynivka
89
- - source: Южноукраїнська Міськрада
90
- expected: Yuzhnoukrayins’ka Mis’krada
91
- - source: Ясениця
92
- expected: Yasenytsya
93
-
94
- map:
95
- rules:
96
- - pattern: \b\u2019\b # ’ in the middle of a word -> ”
97
- result: "\u201d"
98
-
99
- characters:
100
- "\u0430": 'a'
101
- "\u0431": 'b'
102
- "\u0432": 'v'
103
- "\u0433": 'h'
104
- "\u0434": 'd'
105
- "\u0435": 'e'
106
- "\u0436": 'zh'
107
- "\u0437": 'z'
108
- "\u0438": 'y'
109
- "\u0439": 'y'
110
- "\u043a": 'k'
111
- "\u043b": 'l'
112
- "\u043c": 'm'
113
- "\u043d": 'n'
114
- "\u043e": 'o'
115
- "\u043f": 'p'
116
- "\u0440": 'r'
117
- "\u0441": 's'
118
- "\u0442": 't'
119
- "\u0443": 'u'
120
- "\u0444": 'f'
121
- "\u0445": 'kh'
122
- "\u0446": 'ts'
123
- "\u0447": 'ch'
124
- "\u0448": 'sh'
125
- "\u0449": 'shch'
126
- "\u044c": "\u2019"
127
- "\u044e": 'yu'
128
- "\u044f": 'ya'
129
- "\u0454": 'ye'
130
- "\u0456": 'i'
131
- "\u0457": 'yi'
132
- "\u0491": 'g'
133
- "\ufeff": ' '
134
- "\u0404": 'Ye'
135
- "\u0406": 'I'
136
- "\u0407": 'Yi'
137
- "\u0410": 'A'
138
- "\u0411": 'B'
139
- "\u0412": 'V'
140
- "\u0413": 'H'
141
- "\u0414": 'D'
142
- "\u0415": 'E'
143
- "\u0416": 'Zh'
144
- "\u0417": 'Z'
145
- "\u0418": 'Y'
146
- "\u0419": 'Y'
147
- "\u041a": 'K'
148
- "\u041b": 'L'
149
- "\u041c": 'M'
150
- "\u041d": 'N'
151
- "\u041e": 'O'
152
- "\u041f": 'P'
153
- "\u0420": 'R'
154
- "\u0421": 'S'
155
- "\u0422": 'T'
156
- "\u0423": 'U'
157
- "\u0424": 'F'
158
- "\u0425": 'Kh'
159
- "\u0426": 'Ts'
160
- "\u0427": 'Ch'
161
- "\u0428": 'Sh'
162
- "\u0429": 'Shch'
163
- "\u042c": "\u2019"
164
- "\u042e": 'Yu'
165
- "\u042f": 'Ya'
166
- "\u0490": 'G'
@@ -1,119 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2019
4
- language: iso-639-2:ukr
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BGN/PCGN 2019 Agreement
8
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/864314/ROMANIZATION_OF_UKRAINIAN.pdf
9
- creation_date: 2019
10
- confirmation_date: 2020-01
11
- description: |
12
- The BGN/PCGN system for Ukrainian was designed for use in romanizing names written
13
- in the Ukrainian alphabet. It is an adoption of the Ukrainian national system in use
14
- since 2010, and supersedes the BGN/PCGN 1965 System for Ukrainian.
15
-
16
- notes:
17
- - |
18
- The 2019 system was adopted by BGN and PCGN after monitoring a good level of implementation
19
- of the national system within Ukraine. Note, however, that this system is not recommended for
20
- reverse transliteration; take caution when attempting to convert a romanized name back into Ukrainian.
21
- This system also lacks the methodology outlined in the 1965 System to provide additional
22
- differentiation between digraphs and individual character sequences.
23
- For example, unlike the 1965 System, the 2019 System doesn’t differentiate the special character
24
- sequences зг, кг, сг, тс, and тсг (previously romanized as z∙h, k∙h, s∙h, t∙s, and ts∙h)
25
- from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render
26
- the characters ж, х, ш, ц and the character sequence тш.
27
- - To use the keyboard Unicode function, hold ALT and enter in sequence listed in the table.
28
- - The character sequence З Г, previously romanized as zh, is romanized zgh under the 2019 system.
29
- - These characters differ significantly in romanization from the BGN/PCGN 1965 system.
30
-
31
- tests:
32
- - source: Алушта
33
- expected: Alushta
34
- - source: Борщагівка
35
- expected: Borshchahivka
36
- - source: Вишгород
37
- expected: Vyshhorod
38
- - source: Гадяч
39
- expected: Hadiach
40
- - source: Згорани
41
- expected: Zghorany
42
- - source: Ґалаґан
43
- expected: Galagan
44
- - source: Дон
45
- expected: Don
46
- - source: Рівне
47
- expected: Rivne
48
- - source: Єнакієве
49
- expected: Yenakiieve
50
- - source: Наєнко
51
- expected: Naienko
52
- - source: Житомир
53
- expected: Zhytomyr
54
- - source: Запоріжжя
55
- expected: Zaporizhzhia
56
- - source: Закарпаття
57
- expected: Zakarpattia
58
- - source: Медвин
59
- expected: Medvyn
60
- - source: Іршава
61
- expected: Irshava
62
- - source: Їжакевич
63
- expected: Yizhakevych
64
- - source: Кадіївка
65
- expected: Kadiivka
66
- - source: Йосипівка
67
- expected: Yosypivka
68
- - source: Стрий
69
- expected: Stryi
70
- - source: Київ
71
- expected: Kyiv
72
- - source: Лебедин
73
- expected: Lebedyn
74
- - source: Миколаїв
75
- expected: Mykolaiv
76
- - source: Ніжин
77
- expected: Nizhyn
78
- - source: Одеса
79
- expected: Odesa
80
- - source: Полтава
81
- expected: Poltava
82
- - source: Ромни
83
- expected: Romny
84
- - source: Суми
85
- expected: Sumy
86
- - source: Тетерів
87
- expected: Teteriv
88
- - source: Ужгород
89
- expected: Uzhhorod
90
- - source: Фастів
91
- expected: Fastiv
92
- - source: Харків
93
- expected: Kharkiv
94
- - source: Біла Церква
95
- expected: Bila Tserkva
96
- - source: Чернівці
97
- expected: Chernivtsi
98
- - source: Шостка
99
- expected: Shostka
100
- - source: Гоща
101
- expected: Hoshcha
102
- - source: Русь
103
- expected: Rus
104
- - source: Юрій
105
- expected: Yurii
106
- - source: Крюківка
107
- expected: Kriukivka
108
- - source: Яготин
109
- expected: Yahotyn
110
- - source: Ічня
111
- expected: Ichnia
112
- - source: Знам’янка
113
- expected: Znamianka
114
-
115
- map:
116
- inherit: un-ukr-Cyrl-Latn-2012
117
-
118
- characters:
119
- "\u0027": '' # ' ->
@@ -1,127 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1979
4
- language: iso-639-2:uzb
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: BGN/PCGN Romanization System -- Uzbek Cyrillic (1979)
8
- url: http://transliteration.eki.ee/pdf/Uzbek.pdf
9
- creation_date: 1979
10
-
11
- notes:
12
- - At the beginning of a syllable, after a vowel, ъ or ь.
13
-
14
- tests:
15
- # https://ru.wikipedia.org/wiki/Узбекский_язык
16
- - source: Ўзбек ёзуви
17
- expected: Ŭzbek yozuwi
18
- - source: Ўзбек тили
19
- expected: Ŭzbek tili
20
- - source: катта
21
- expected: katta
22
- - source: куп
23
- expected: kup
24
- - source: кальта
25
- expected: kalʼta
26
- - source: Бори элға яхшилик қилғилки, мундин яхши йўқ Ким, дегайлар даҳр аро қолди фалондин яхшилик
27
- expected: Bori elgha yakhshilik qilghilki, mundin yakhshi yŭq Kim, degaylar dahr aro qoldi falondin yakhshilik
28
- - source: Бахр ул-худо
29
- expected: Bakhr ul-khudo
30
- - source: Рисале-йи маариф-и Шейбани
31
- expected: Risale-yi maarif-i Sheybani
32
- - source: Карами Хакка нихоят йукдур
33
- expected: Karami Khakka nikhoyat yukdur
34
- - source: Йахши
35
- expected: Yakhshi
36
- - source: Тутук белгись
37
- expected: Tutuk belgisʼ
38
- - source: |
39
- Барча одамлар эркин, қадр-қиммат ва ҳуқуқларда тенг бўлиб туғиладилар.
40
- Улар ақл ва виждон соҳибидирлар ва бир-бирлари ила биродарларча муомала қилишлари зарур.
41
- expected: |
42
- Barcha odamlar erkin, qadr-qimmat wa huquqlarda teng bŭlib tughiladilar.
43
- Ular aql wa wizhdon sohibidirlar wa bir-birlari ila birodarlarcha muomala qilishlari zarur.
44
- - source: ПАПАПАЧУКА Респект!
45
- expected: PAPAPACHUKA Respekt!
46
-
47
- map:
48
- rules:
49
- # note[1]
50
- - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЪъЬь])\u0415
51
- result: Ye
52
- - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЪъЬь])\u0435
53
- result: ye
54
-
55
- characters:
56
- '\u0410': 'A' # А
57
- '\u0411': 'B' # Б
58
- '\u0412': 'W' # В
59
- '\u0413': 'G' # Г
60
- '\u0492': 'Gh' # Ғ
61
- '\u0414': 'D' # Д
62
- '\u0415': 'E' # Е
63
- '\u0401': 'Yo' # Ё
64
- '\u0416': 'Zh' # Ж
65
- '\u0417': 'Z' # З
66
- '\u0418': 'I' # И
67
- '\u0419': 'Y' # Й
68
- '\u041A': 'K' # К
69
- '\u049A': 'Q' # Қ
70
- '\u041B': 'L' # Л
71
- '\u041C': 'M' # М
72
- '\u041D': 'N' # Н
73
- '\u041E': 'O' # О
74
- '\u041F': 'P' # П
75
- '\u0420': 'R' # Р
76
- '\u0421': 'S' # С
77
- '\u0422': 'T' # Т
78
- '\u0423': 'U' # У
79
- '\u040E': 'Ŭ' # Ў
80
- '\u0424': 'F' # Ф
81
- '\u0425': 'Kh' # Х
82
- '\u04B2': 'H' # Ҳ
83
- '\u0426': 'Ts' # Ц
84
- '\u0427': 'Ch' # Ч
85
- '\u0428': 'Sh' # Ш
86
- '\u042a': "\u02BC" # Ъ
87
- '\u042c': "\u02BC" # Ь
88
- '\u042D': 'E' # Э
89
- '\u042E': 'Yu' # Ю
90
- '\u042F': 'Ya' # Я
91
-
92
- '\u0430': 'a' # а
93
- '\u0431': 'b' # б
94
- '\u0432': 'w' # в
95
- '\u0433': 'g' # г
96
- '\u0493': 'gh' # ғ
97
- '\u0434': 'd' # д
98
- '\u0435': 'e' # e
99
- '\u0451': 'yo' # ё
100
- '\u0436': 'zh' # ж
101
- '\u0437': 'z' # з
102
- '\u0438': 'i' # и
103
- '\u0439': 'y' # й
104
- '\u043A': 'k' # к
105
- '\u049B': 'q' # қ
106
- '\u043B': 'l' # л
107
- '\u043C': 'm' # м
108
- '\u043D': 'n' # н
109
- '\u043E': 'o' # о
110
- '\u043F': 'p' # п
111
- '\u0440': 'r' # р
112
- '\u0441': 's' # с
113
- '\u0442': 't' # т
114
- '\u0443': 'u' # у
115
- '\u045E': 'ŭ' # ў
116
- '\u0444': 'f' # ф
117
- '\u0445': 'kh' # х
118
- '\u04B3': 'h' # ҳ
119
- '\u0446': 'ts' # ц
120
- '\u0447': 'ch' # ч
121
- '\u0448': 'sh' # ш
122
- '\u044a': "\u02BC" # ъ
123
- '\u044c': "\u02BC" # ь
124
- '\u044D': 'e' # э
125
- '\u044F': 'ya' # я
126
- '\u044E': 'yu' # ю
127
-
@@ -1,82 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 2000
4
- language: iso-639-2:uzb
5
- source_script: Cyrl
6
- destination_script: Latn
7
- name: TABLE OF CORRESPONDENCES CYRILLIC - ROMAN BGN/PCGN 2000 Agreement
8
- description: |
9
- In 1995, the Uzbek government adopted the Roman alphabet to replace the existing Cyrillic alphabet.
10
- The presentation below provides a table of correspondences between the former Cyrillic alphabet and the
11
- current Roman alphabet. When Uzbek Roman-alphabet spellings are not available, this table can be used to
12
- convert Uzbek Cyrillic spellings. This table of correspondences supersedes the BGN/PCGN 1979 romanization
13
- system for Uzbek.
14
- url: http://transliteration.eki.ee/pdf/Uzbek.pdf
15
- creation_date: 2000
16
- confirmation_date: 2017-11
17
-
18
- notes:
19
- - The letter sequence ye is used initially, after the vowel characters 1, 6, 7, 10, 16, 21, 29, 30, 31, and 32, and after characters 11 and 28.
20
- - The Unicode encoding of the apostrophe appearing in rows 27 and 28 is U+2019. The inverted apostrophe appearing in rows 32 (o‘) and 34 (g‘) is U+2018.
21
- - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
22
-
23
- tests:
24
- # https://ru.wikipedia.org/wiki/Узбекский_язык
25
- - source: Ўзбек ёзуви
26
- expected: O‘zbek yozuwi
27
- - source: Ўзбек тили
28
- expected: O‘zbek tili
29
- - source: катта
30
- expected: katta
31
- - source: куп
32
- expected: kup
33
- - source: кальта
34
- expected: kal’ta
35
- - source: Бори элға яхшилик қилғилки, мундин яхши йўқ Ким, дегайлар даҳр аро қолди фалондин яхшилик
36
- expected: Bori elg‘a yaxshilik qilg‘ilki, mundin yaxshi yo‘q Kim, degaylar dahr aro qoldi falondin yaxshilik
37
- - source: Бахр ул-худо
38
- expected: Baxr ul-xudo
39
- - source: Рисале-йи маариф-и Шейбани
40
- expected: Risale-yi maarif-i Sheybani
41
- - source: Карами Хакка нихоят йукдур
42
- expected: Karami Xakka nixoyat yukdur
43
- - source: Йахши
44
- expected: Yaxshi
45
- - source: Тутук белгись
46
- expected: Tutuk belgis’
47
- - source: |
48
- Барча одамлар эркин, қадр-қиммат ва ҳуқуқларда тенг бўлиб туғиладилар.
49
- Улар ақл ва виждон соҳибидирлар ва бир-бирлари ила биродарларча муомала қилишлари зарур.
50
- expected: |
51
- Barcha odamlar erkin, qadr-qimmat wa huquqlarda teng bo‘lib tug‘iladilar.
52
- Ular aql wa wijdon sohibidirlar wa bir-birlari ila birodarlarcha muomala qilishlari zarur.
53
- - source: ПАПАПАЧУКА Респект!
54
- expected: PAPAPACHUKA Respekt!
55
-
56
- map:
57
- inherit: bgnpcgn-uzb-Cyrl-Latn-1979
58
-
59
- rules:
60
- # note[1]
61
- - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЙйЬь])\u0415
62
- result: Ye
63
- - pattern: (?<=[АаЕеЁёИиОоУуЭэЮюЯяЙйЬь])\u0435
64
- result: ye
65
-
66
- characters:
67
- '\u0412': 'V' # В
68
- '\u0492': "G\u2018" # Ғ
69
- '\u0416': 'J' # Ж
70
- '\u040E': "O\u2018" # Ў
71
- '\u0425': 'X' # Х
72
- '\u042a': "\u2019" # Ъ note[2]
73
- '\u042c': "\u2019" # Ь note[2]
74
-
75
- '\u0432': 'w' # в
76
- '\u0493': "g\u2018" # ғ
77
- '\u0436': 'j' # ж
78
- '\u045E': "o\u2018" # ў
79
- '\u0445': 'x' # х
80
- '\u044a': "\u2019" # ъ note[2]
81
- '\u044c': "\u2019" # ь note[2]
82
-