interscript 0.1.7 → 2.1.0b1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +116 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +83 -133
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +68 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +68 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +76 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +68 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/lib/interscript/visualize.rb +61 -0
  63. data/lib/interscript/visualize/group.html.erb +59 -0
  64. data/lib/interscript/visualize/json.rb +57 -0
  65. data/lib/interscript/visualize/map.html.erb +46 -0
  66. data/lib/interscript/visualize/nodes.rb +89 -0
  67. data/requirements.txt +1 -0
  68. metadata +78 -416
  69. data/README.adoc +0 -298
  70. data/lib/g2pwrapper.py +0 -34
  71. data/lib/interscript/fs.rb +0 -69
  72. data/lib/interscript/mapping.rb +0 -142
  73. data/lib/interscript/opal.rb +0 -57
  74. data/lib/interscript/opal/entrypoint.rb +0 -12
  75. data/lib/interscript/opal/map_translate.rb +0 -7
  76. data/lib/interscript/opal/maps.js.erb +0 -10
  77. data/lib/model-7 +0 -0
  78. data/lib/tha-pt-b-7 +0 -0
  79. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  80. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  81. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  82. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  83. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -165
  84. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -40
  85. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  86. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  87. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  88. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  89. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  90. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  91. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  92. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  93. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -211
  94. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -47
  95. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  96. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  97. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  98. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  99. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  100. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  101. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  102. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  103. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  104. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  105. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  106. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  107. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  108. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  109. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  110. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  111. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  112. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -172
  113. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  114. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  115. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  116. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  117. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  118. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  119. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  120. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  121. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  122. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  123. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  124. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  125. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  126. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  127. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  128. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  129. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  130. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -596
  131. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  132. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  133. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  134. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  135. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  136. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  137. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  138. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  139. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  140. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  141. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  142. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  143. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  144. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  145. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  146. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  147. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  148. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  149. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  150. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  151. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -336
  152. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -639
  153. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  154. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  155. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  156. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  157. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  158. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  159. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  160. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  161. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  162. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  163. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  164. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  165. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  166. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  167. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  168. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  169. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  170. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  171. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  172. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  173. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  174. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  175. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  176. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  177. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  178. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  179. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  180. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  181. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  182. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  183. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  184. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  185. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  186. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  187. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  188. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  189. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  190. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  191. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  192. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  193. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  194. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  195. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  196. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  197. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  198. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  199. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  200. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  201. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  202. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  203. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  204. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  205. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  206. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  207. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  208. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  209. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  210. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  211. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  212. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  213. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +0 -220
  214. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  215. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  216. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  217. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  218. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  219. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  220. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  221. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  222. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  223. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  224. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  225. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  226. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  227. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  228. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  229. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  230. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  231. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  232. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  233. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  234. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  235. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  236. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  237. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  238. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  239. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  240. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  241. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  242. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  243. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  244. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -425
  245. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  246. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  247. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  248. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  249. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  250. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  251. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  252. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  253. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  254. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  255. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  256. data/maps/odni-per-Arab-Latn-2015.yaml +0 -228
  257. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  258. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  259. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  260. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  261. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  262. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  263. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  264. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  265. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  266. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  267. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  268. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  269. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  270. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  271. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  272. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  273. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  274. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  275. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  276. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  277. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  278. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  279. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  280. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  281. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  282. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  283. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  284. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  285. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  286. data/maps/un-hin-Deva-Latn-2016.yaml +0 -222
  287. data/maps/un-mar-Deva-Latn-2016.yaml +0 -91
  288. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  289. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  290. data/maps/un-nep-Deva-Latn-1972.yaml +0 -350
  291. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  292. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  293. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  294. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  295. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  296. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  297. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  298. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  299. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  300. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  301. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  302. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  303. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  304. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  305. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  306. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  307. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  308. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  309. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  310. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  311. data/spec/interscript/filenames_spec.rb +0 -384
  312. data/spec/interscript/mapping_spec.rb +0 -42
  313. data/spec/interscript_spec.rb +0 -29
  314. data/spec/spec_helper.rb +0 -3
data/README.adoc DELETED
@@ -1,298 +0,0 @@
1
- = Interscript: Interoperable Script Conversion Systems, with a Ruby implementation
2
-
3
- image:https://github.com/interscript/interscript/workflows/test/badge.svg["Ruby build status", link="https://github.com/interscript/interscript/actions?workflow=test"]
4
- image:https://github.com/interscript/interscript/workflows/js/badge.svg["JavaScript build status", link="https://github.com/interscript/interscript/actions?workflow=js"]
5
-
6
- == Introduction
7
-
8
- This repository contains interoperable transliteration schemes from:
9
-
10
- * ALA-LC
11
- * BGN/PCGN
12
- * ICAO
13
- * ISO
14
- * UN (by UNGEGN)
15
- * Many, many other script conversion system authorities.
16
-
17
- The goal is to achieve interoperable transliteration schemes allowing quality comparisons.
18
-
19
-
20
-
21
- == Demonstration
22
-
23
- These transliteration systems are used in the demo:
24
-
25
- `bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian
26
- `iso-rus-Cyrl-Latn-9-1995`:: ISO 9 Romanization of Russian
27
- `icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian
28
- `bas-rus-Cyrl-Latn-2017-bss`:: Bulgaria Academy of Science Streamlined System for Russian
29
-
30
- image:demo/20191118-interscript-demo-cast.gif["interscript screencast"]
31
-
32
-
33
- == Installation
34
-
35
- === Prerequisites
36
-
37
- Linux:
38
-
39
- [source,sh]
40
- ----
41
- apt-get install swig python3-setuptools
42
- ----
43
-
44
- Windows:
45
-
46
- [source,sh]
47
- ----
48
- choco install --no-progress swig
49
- ----
50
-
51
- Interscript depends on Python and the https://github.com/sequitur-g2p/sequitur-g2p[`sequitur-g2p`] module
52
-
53
- [source,sh]
54
- ----
55
- pip3 install setuptools numpy
56
- curl -sSL -o sequitur-g2p.zip https://github.com/sequitur-g2p/sequitur-g2p/archive/806273f.zip
57
- pip3 install sequitur-g2p.zip
58
- ----
59
-
60
- Interscript depends on Ruby. Once you manage to install Ruby, it's easy.
61
-
62
- [source,sh]
63
- ----
64
- gem install interscript
65
- ----
66
-
67
- == Usage
68
-
69
- Assume you have a file ready in the source script like this:
70
-
71
- [source,sh]
72
- ----
73
- cat <<EOT > rus-Cyrl.txt
74
- Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа ты
75
- могла только родиться, в той земле, что не любит шутить, а
76
- ровнем-гладнем разметнулась на полсвета, да и ступай считать версты,
77
- пока не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не
78
- железным схвачен винтом, а наскоро живьём с одним топором да долотом
79
- снарядил и собрал тебя ярославский расторопный мужик. Не в немецких
80
- ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а
81
- привстал, да замахнулся, да затянул песню — кони вихрем, спицы в
82
- колесах смешались в один гладкий круг, только дрогнула дорога, да
83
- вскрикнул в испуге остановившийся пешеход — и вон она понеслась,
84
- понеслась, понеслась!
85
-
86
- Н.В. Гоголь
87
- EOT
88
- ----
89
-
90
- You can run `interscript` on this text using different transliteration systems.
91
-
92
- [source,sh]
93
- ----
94
- interscript rus-Cyrl.txt \
95
- --system=bgnpcgn-rus-Cyrl-Latn-1947 \
96
- --output=bgnpcgn-rus-Latn.txt
97
-
98
- interscript rus-Cyrl.txt \
99
- --system=iso-rus-Cyrl-Latn-9-1995 \
100
- --output=iso-rus-Latn.txt
101
-
102
- interscript rus-Cyrl.txt \
103
- --system=icao-rus-Cyrl-Latn-9303 \
104
- --output=icao-rus-Latn.txt
105
-
106
- interscript rus-Cyrl.txt \
107
- --system=bas-rus-Cyrl-Latn-2017-bss \
108
- --output=bas-rus-Latn.txt
109
- ----
110
-
111
- It is then easy to see the exact differences in rendering between the systems.
112
-
113
- [source,sh]
114
- ----
115
- diff bgnpcgn-rus-Latn.txt bas-rus-Latn.txt
116
- ----
117
-
118
- == Adding transliteration system
119
-
120
- Transliteration systems stored in a `maps/` directory as YAML files.
121
- You can create a new file and add it to the directory.
122
-
123
- The file should be named as `<system-code>.yaml`, where `system-code`
124
- is in accordance with
125
- http://calconnect.gitlab.io/tc-localization/csd-transcription-systems[ISO/CC 24229].
126
-
127
- === File structure
128
-
129
- [source,yaml]
130
- ----
131
- authority_id: bgnpcgn
132
- id: 1947
133
- language: rus
134
- source_script: Cyrl
135
- destination_script: Latn
136
- name: ROMANIZATION OF RUSSIAN, BGN/PCGN 1947 System
137
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/807920/ROMANIZATION_OF_RUSSIAN.pdf
138
- creation_date: 1947
139
- confirmation_date: 2019-06
140
- description: The BGN/PCGN system for Russian was adopted ...
141
-
142
- notes:
143
- - The character e should be romanized ye initially, after the vowel ...
144
-
145
- tests:
146
- - source: ДЛИННОЕ ПОКРЫВАЛО
147
- expected: DLINNOYE POKRYVALO
148
- - source: Еловая шишка
149
- expected: Yelovaya shishka
150
-
151
- map:
152
- rules:
153
- - pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415 # Е after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
154
- result: Ye
155
- - pattern: \b\u0415 # Е initially
156
- result: Ye
157
-
158
- characters:
159
- "\u0410": "A"
160
- "\u0411": "B"
161
- "\u0412": "V"
162
- ----
163
-
164
-
165
- === Rules
166
-
167
- The subsection `rules` is placed under the `map` key. All rules are applied in order they are placed before the subsection `characters` applying. Rules apply to an original text, not to a result of previous rules applying.
168
-
169
- Each rule has `pattern` and `result` elements.
170
-
171
- Pattern is a regex expression. It should be representing as a string without `//` or `%r{}` parentheses. For example `\b\u0415`. In case a rule is depend on previous or next content, lookahead or lookbehind could be used. For example a rule with the pattern `(?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415` find every Е after upper or lower case symbols a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь.
172
-
173
- Result is a replacement a for pattern's match. It can contain a string, an Unicode characters specified by a hexadecimal number, a captured group reference. String with hexadecimal number or captured group reference should be double quoted. For example `"Y\u00eb"` or `"\\1\u00b7\\2"`. Captured group are referred by double backslash and group's number.
174
-
175
- Because rules are applied in order, multiple rules applicable to the same segment of a string can be addressed by rule ordering, and rules can be used as priority over characters. For example:
176
-
177
- [source,yaml]
178
- ----
179
- map:
180
- rules:
181
- - pattern: \u03B3\u03B3 # γ (before Γ, Ξ, Χ)
182
- result: ng
183
- - pattern: (?<![Γγ])\u03B3(?=[ΕεέΗηήΙιίΥυύ]) # γ (before front vowels)
184
- result: y
185
- ----
186
-
187
- (γι maps to `yi`; but γγ maps to `ng`. In the case of γγι, the first rule takes priority, and the transliteration is `ngi`: it makes the second rule impossible.)
188
-
189
- [source,yaml]
190
- ----
191
- map:
192
- rules:
193
- - pattern: (?<=\b)\u03BC[πΠ] # μπ (initially)
194
- result: b
195
- - pattern: \u03BC[πΠ] # μπ (medially)
196
- result: mb
197
- ----
198
-
199
- (The first rule applies at the start of a word; the second rule does not specify a context, as it applies in all other cases not covered by the first rule.)
200
-
201
- [source,yaml]
202
- ----
203
- map:
204
- rules:
205
- - pattern: ";"
206
- result: "?"
207
-
208
- characters
209
- "\u00B7": ";
210
- ----
211
-
212
- (This guarantees that any `;` are converted to `?` before any new `;` are introduced; because all three are Latin script, they could be mixed up in ordering.)
213
-
214
- Normally rules "`bleed`" each other: once a rule applies to a segment, that segment cannot trigger other rules, because it is already converted to Roman. Exceptionally, it will be necessary to have a rule add or remove characters in the original script, rather than transliterate them, so that the same context can be invoked by two rules in succession:
215
-
216
- [source,yaml]
217
- ----
218
- map:
219
- rules:
220
- - pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯя])\u042b # Ы after any vowel character
221
- result: "\u00b7Ы"
222
- - pattern: \u042b(?=[АаУуЫыЭэ]) # Ы before а, у, ы, or э
223
- result: "Ы\u00b7"
224
- ----
225
-
226
- (If the result were `\u00B7Y`, the second rule could not be applied afterwards; but we want ОЫУ to transliterate as `O·Y·U`. In order to make that happen, we preserve the Ы during the rules phase, resulting in О·Ы·У; we only convert the letters to Roman script in the `characters` phase.)
227
-
228
- === Testing transliteration systems
229
-
230
- To test all transliteration systems in the `maps/` directory, run:
231
-
232
- [source,sh]
233
- ----
234
- bundle exec rspec
235
- ----
236
-
237
- The command takes `source` texts from the `test` section, transforms
238
- them using `rules` and `charmaps` from the `map` key, and compares the
239
- results with `expected:` text from the `source:` section.
240
-
241
- To test a specific transliteration system, set the environment variable
242
- `TRANSLIT_SYSTEM` to the system code of the desired system
243
- (i.e. the "`basename`" of the system's YAML file):
244
-
245
- [source,sh]
246
- ----
247
- TRANSLIT_SYSTEM=bgnpcgn-rus-Cyrl-Latn-1947 bundle exec rspec
248
- ----
249
-
250
-
251
- == ISCS system codes
252
-
253
- In accordance with
254
- http://calconnect.gitlab.io/tc-localization/csd-transcription-systems[ISO/CC 24229],
255
- the system code identifying a script conversion system has the following components:
256
-
257
- e.g. `bgnpcgn-rus-Cyrl-Latn-1947`:
258
-
259
- `bgnpcgn`:: the authority identifier
260
- `rus`:: an ISO 639-{1,2,3,5} language code that this system applies to (For 639-2, use (T) code)
261
- `Cyrl`:: an ISO 15924 script code, identifying the source script
262
- `Latn`:: an ISO 15924 script code, identifying the target script
263
- `1947`:: an identifier unit within the authority to identify this system
264
-
265
-
266
- == Covered languages
267
-
268
- Currently the schemes cover Cyrillic, Armenian, Greek, Arabic and Hebrew.
269
-
270
-
271
- == Samples to play with
272
-
273
- * `rus-Cyrl-1.txt`: Copied from the XLS output from http://www.primorsk.vybory.izbirkom.ru/region/primorsk?action=show&global=true&root=254017025&tvd=4254017212287&vrn=100100067795849&prver=0&pronetvd=0&region=25&sub_region=25&type=242&vibid=4254017212287
274
-
275
- * `rus-Cyrl-2.txt`: Copied from the XLS output from http://www.yaroslavl.vybory.izbirkom.ru/region/yaroslavl?action=show&root=764013001&tvd=4764013188704&vrn=4764013188693&prver=0&pronetvd=0&region=76&sub_region=76&type=426&vibid=4764013188704
276
-
277
-
278
- == References
279
-
280
- Reference documents are located at the
281
- https://github.com/interscript/interscript-references[interscript-references repository].
282
- Some specifications that have distribution limitations may not be reproduced there.
283
-
284
-
285
- == Links to system definitions
286
-
287
- * https://www.iso.org/committee/48750.html[ISO/TC 46 (see standards published by WG 3)]
288
- * http://geonames.nga.mil/gns/html/romanization.html[BGN/PCGN and BGN Romanization systems (BGN)]
289
- * https://www.gov.uk/government/publications/romanization-systems[BGN/PCGN Romanization systems (PCGN)]
290
- * https://www.loc.gov/catdir/cpso/roman.html[ALA-LC Romanization systems in current use]
291
- * http://catdir.loc.gov/catdir/cpso/roman.html[ALA-LC Romanization systems from 1997]
292
- * http://www.eki.ee/wgrs/[UN Romanization systems]
293
- * http://www.eki.ee/knab/kblatyl2.htm[EKI KNAB systems]
294
-
295
- == Copyright and license
296
-
297
- This is a Ribose project. Copyright Ribose.
298
-
data/lib/g2pwrapper.py DELETED
@@ -1,34 +0,0 @@
1
- import g2p, SequiturTool
2
- import numpy
3
-
4
- def transliterate(model, word):
5
-
6
- class Struct:
7
- def __init__(self, **entries):
8
- self.__dict__.update(entries)
9
-
10
- model_path = {
11
- 'pythainlp_lexicon': './lib/model-7',
12
- 'wiktionary_phonemic': './lib/tha-pt-b-7'
13
- }
14
-
15
- connector_dict = {
16
- 'pythainlp_lexicon': '',
17
- 'wiktionary_phonemic': '-'
18
- }
19
-
20
-
21
- modelFile = model_path[model]
22
- connector = connector_dict[model]
23
-
24
- options = Struct(**{'profile': None, 'resource_usage': None, 'psyco': None, 'tempdir': None, 'trainSample': None, 'develSample': None, 'testSample': None, 'checkpoint': None, 'resume_from_checkpoint': None, 'shouldTranspose': None, 'modelFile': modelFile , 'newModelFile': None, 'shouldTestContinuously': None, 'shouldSelfTest': None, 'lengthConstraints': None, 'shouldSuppressNewMultigrams': None, 'viterbi': None, 'shouldRampUp': None, 'shouldWipeModel': None, 'shouldInitializeWithCounts': None, 'minIterations': 20, 'maxIterations': 100, 'eager_discount_adjustment': None, 'fixed_discount': None, 'encoding': 'UTF-8', 'phoneme_to_phoneme': None, 'test_segmental': None, 'testResult': None, 'applySample': None, 'applyWord': word, 'variants_mass': None, 'variants_number': None, 'fakeTranslator': None, 'stack_limit': None})
25
-
26
- loadSample = g2p.loadG2PSample
27
-
28
- model = SequiturTool.procureModel(options, loadSample)
29
- if not model:
30
- return 1
31
- translator = g2p.Translator(model)
32
- del model
33
-
34
- return connector.join(translator(tuple(word)))
@@ -1,69 +0,0 @@
1
- require 'pathname'
2
-
3
- module Interscript
4
- module Fs
5
- def sub_replace(string, pos, size, repl)
6
- string[pos..pos + size - 1] = repl
7
- string
8
- end
9
-
10
- def root_path
11
- @root_path ||= Pathname.new(File.join(File.dirname(__dir__), ".."))
12
- end
13
-
14
- def transliterate_file(system_code, input_file, output_file, maps={})
15
- input = File.read(input_file)
16
- output = transliterate(system_code, input, maps)
17
-
18
- File.open(output_file, 'w') do |f|
19
- f.puts(output)
20
- end
21
-
22
- puts "Output written to: #{output_file}"
23
- output_file
24
- end
25
-
26
- def import_python_modules
27
- begin
28
- pyimport :g2pwrapper
29
- rescue
30
- pyimport :sys
31
- sys.path.append(root_path.to_s + "/lib/")
32
- pyimport :g2pwrapper
33
- end
34
- end
35
-
36
- def external_process(process_name, string)
37
- import_python_modules
38
-
39
- case process_name
40
- when 'sequitur.pythainlp_lexicon'
41
- return g2pwrapper.transliterate('pythainlp_lexicon', string)
42
- when 'sequitur.wiktionary_phonemic'
43
- return g2pwrapper.transliterate('wiktionary_phonemic', string)
44
- else
45
- raise ExternalProcessNotRecognizedError.new
46
- end
47
-
48
- rescue
49
- raise ExternalProcessUnavailableError.new
50
- end
51
-
52
- def external_processing(mapping, string)
53
- # Segmentation
54
- string = external_process(mapping.segmentation, string) if mapping.segmentation
55
-
56
- # Transliteration/Transcription
57
- string = external_process(mapping.transcription, string) if mapping.transcription
58
-
59
- string
60
- end
61
-
62
- private
63
-
64
- def mkregexp(regexpstring)
65
- /#{regexpstring}/u
66
- end
67
-
68
- end
69
- end
@@ -1,142 +0,0 @@
1
- require 'rambling-trie'
2
- require 'yaml' unless RUBY_ENGINE == 'opal'
3
- require 'json'
4
-
5
- module Interscript
6
-
7
- class Mapping
8
- attr_reader(
9
- :id,
10
- :url,
11
- :name,
12
- :notes,
13
- :rules,
14
- :tests,
15
- :language,
16
- :postrules,
17
- :characters,
18
- :description,
19
- :authority_id,
20
- :creation_date,
21
- :source_script,
22
- :destination_script,
23
- :chain,
24
- :character_separator,
25
- :word_separator,
26
- :title_case,
27
- :downcase,
28
- :dictionary,
29
- :characters_hash,
30
- :dictionary_hash,
31
- :segmentation,
32
- :transcription,
33
- :dictionary_trie
34
- )
35
-
36
- def initialize(system_code, options = {})
37
- @system_code = system_code
38
- @depth = options.fetch(:depth, 0).to_i
39
-
40
- unless RUBY_ENGINE == 'opal'
41
- @system_path = options.fetch(:system_code, default_path)
42
- end
43
-
44
- load_and_serialize_system_mappings
45
- end
46
-
47
- def self.for(system_code, options = {})
48
- new(system_code, options)
49
- end
50
-
51
- def load_and_serialize_system_mappings
52
- return if depth >= 5
53
-
54
- mappings = load_system_mappings
55
- serialize_system_mappings(mappings)
56
- end
57
-
58
- private
59
-
60
- attr_reader :depth, :system_code, :system_path
61
-
62
- def system_code_file
63
- [system_code, "yaml"].join(".")
64
- end
65
-
66
- def default_path
67
- @default_path ||= Interscript.root_path.join("maps")
68
- end
69
-
70
- def load_system_mappings
71
- if RUBY_ENGINE == 'opal'
72
- load_opal_mappings
73
- else
74
- load_fs_mappings
75
- end
76
- end
77
-
78
- def load_opal_mappings
79
- JSON.parse(`InterscriptMaps[#{system_code}]`)
80
- end
81
-
82
- def load_fs_mappings
83
- YAML.load_file(system_path.join(system_code_file))
84
- rescue Errno::ENOENT
85
- raise Interscript::InvalidSystemError.new("No system mappings found")
86
- end
87
-
88
- def serialize_system_mappings(mappings)
89
- @id = mappings.fetch("id", nil)
90
- @url = mappings.fetch("url", nil)
91
- @name = mappings.fetch("name", nil)
92
- @notes = mappings.fetch("notes", nil)
93
- @tests = mappings.fetch("tests", [])
94
- @language = mappings.fetch("language", nil)
95
- @description = mappings.fetch("description", nil)
96
- @authority_id = mappings.fetch("authority_id", nil)
97
- @creation_date = mappings.fetch("creation_date", nil)
98
- @source_script = mappings.fetch("source_script", nil)
99
- @destination_script = mappings.fetch("destination_script", nil)
100
- @chain = mappings.fetch("chain", [])
101
- @character_separator = mappings["map"]["character_separator"] || nil
102
- @word_separator = mappings["map"]["word_separator"] || nil
103
- @title_case = mappings["map"]["title_case"] || false
104
- @downcase = mappings["map"]["downcase"] || false
105
- @rules = mappings["map"]["rules"] || []
106
- @postrules = mappings["map"]["postrules"] || []
107
- @characters = mappings["map"]["characters"] || {}
108
- @dictionary = mappings["map"]["dictionary"] || {}
109
- @segmentation = mappings["map"]["segementation"] || nil
110
- @transcription = mappings["map"]["transcription"] || nil
111
-
112
- include_inherited_mappings(mappings)
113
- build_hashes
114
- build_trie
115
- end
116
-
117
- def include_inherited_mappings(mappings)
118
- inherit_systems = [].push(mappings["map"]["inherit"]).flatten
119
-
120
- inherit_systems.each do |inherit_system|
121
- next unless inherit_system
122
-
123
- inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
124
-
125
- @rules = [rules, inherited_mapping.rules].flatten
126
- @postrules = [inherited_mapping.postrules, postrules].flatten
127
- @characters = (inherited_mapping.characters|| {}).merge(characters)
128
- @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
129
- end
130
- end
131
-
132
- def build_hashes
133
- @characters_hash = characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
134
- @dictionary_hash = dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
135
- end
136
-
137
- def build_trie
138
- @dictionary_trie = Rambling::Trie.create
139
- dictionary_trie.concat dictionary.keys
140
- end
141
- end
142
- end