interscript 0.1.9 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
data/README.adoc DELETED
@@ -1,296 +0,0 @@
1
- = Interscript: Interoperable Script Conversion Systems, with a Ruby implementation
2
-
3
- image:https://github.com/interscript/interscript/workflows/test/badge.svg["Ruby build status", link="https://github.com/interscript/interscript/actions?workflow=test"]
4
- image:https://github.com/interscript/interscript/workflows/js/badge.svg["JavaScript build status", link="https://github.com/interscript/interscript/actions?workflow=js"]
5
-
6
- == Introduction
7
-
8
- This repository contains interoperable transliteration schemes from:
9
-
10
- * ALA-LC
11
- * BGN/PCGN
12
- * ICAO
13
- * ISO
14
- * UN (by UNGEGN)
15
- * Many, many other script conversion system authorities.
16
-
17
- The goal is to achieve interoperable transliteration schemes allowing quality comparisons.
18
-
19
-
20
-
21
- == Demonstration
22
-
23
- These transliteration systems are used in the demo:
24
-
25
- `bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian
26
- `iso-rus-Cyrl-Latn-9-1995`:: ISO 9 Romanization of Russian
27
- `icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian
28
- `bas-rus-Cyrl-Latn-2017-bss`:: Bulgaria Academy of Science Streamlined System for Russian
29
-
30
- image:demo/20191118-interscript-demo-cast.gif["interscript screencast"]
31
-
32
-
33
- == Installation
34
-
35
- === Prerequisites
36
-
37
- Linux:
38
-
39
- [source,sh]
40
- ----
41
- apt-get install swig python3-setuptools
42
- ----
43
-
44
- Windows:
45
-
46
- [source,sh]
47
- ----
48
- choco install --no-progress swig
49
- ----
50
-
51
- Interscript depends on Python and the https://github.com/sequitur-g2p/sequitur-g2p[`sequitur-g2p`] module
52
-
53
- [source,sh]
54
- ----
55
- pip3 install -r requirments.txt
56
- ----
57
-
58
- Interscript depends on Ruby. Once you manage to install Ruby, it's easy.
59
-
60
- [source,sh]
61
- ----
62
- gem install interscript
63
- ----
64
-
65
- == Usage
66
-
67
- Assume you have a file ready in the source script like this:
68
-
69
- [source,sh]
70
- ----
71
- cat <<EOT > rus-Cyrl.txt
72
- Эх, тройка! птица тройка, кто тебя выдумал? знать, у бойкого народа ты
73
- могла только родиться, в той земле, что не любит шутить, а
74
- ровнем-гладнем разметнулась на полсвета, да и ступай считать версты,
75
- пока не зарябит тебе в очи. И не хитрый, кажись, дорожный снаряд, не
76
- железным схвачен винтом, а наскоро живьём с одним топором да долотом
77
- снарядил и собрал тебя ярославский расторопный мужик. Не в немецких
78
- ботфортах ямщик: борода да рукавицы, и сидит чёрт знает на чём; а
79
- привстал, да замахнулся, да затянул песню — кони вихрем, спицы в
80
- колесах смешались в один гладкий круг, только дрогнула дорога, да
81
- вскрикнул в испуге остановившийся пешеход — и вон она понеслась,
82
- понеслась, понеслась!
83
-
84
- Н.В. Гоголь
85
- EOT
86
- ----
87
-
88
- You can run `interscript` on this text using different transliteration systems.
89
-
90
- [source,sh]
91
- ----
92
- interscript rus-Cyrl.txt \
93
- --system=bgnpcgn-rus-Cyrl-Latn-1947 \
94
- --output=bgnpcgn-rus-Latn.txt
95
-
96
- interscript rus-Cyrl.txt \
97
- --system=iso-rus-Cyrl-Latn-9-1995 \
98
- --output=iso-rus-Latn.txt
99
-
100
- interscript rus-Cyrl.txt \
101
- --system=icao-rus-Cyrl-Latn-9303 \
102
- --output=icao-rus-Latn.txt
103
-
104
- interscript rus-Cyrl.txt \
105
- --system=bas-rus-Cyrl-Latn-2017-bss \
106
- --output=bas-rus-Latn.txt
107
- ----
108
-
109
- It is then easy to see the exact differences in rendering between the systems.
110
-
111
- [source,sh]
112
- ----
113
- diff bgnpcgn-rus-Latn.txt bas-rus-Latn.txt
114
- ----
115
-
116
- == Adding transliteration system
117
-
118
- Transliteration systems stored in a `maps/` directory as YAML files.
119
- You can create a new file and add it to the directory.
120
-
121
- The file should be named as `<system-code>.yaml`, where `system-code`
122
- is in accordance with
123
- http://calconnect.gitlab.io/tc-localization/csd-transcription-systems[ISO/CC 24229].
124
-
125
- === File structure
126
-
127
- [source,yaml]
128
- ----
129
- authority_id: bgnpcgn
130
- id: 1947
131
- language: rus
132
- source_script: Cyrl
133
- destination_script: Latn
134
- name: ROMANIZATION OF RUSSIAN, BGN/PCGN 1947 System
135
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/807920/ROMANIZATION_OF_RUSSIAN.pdf
136
- creation_date: 1947
137
- confirmation_date: 2019-06
138
- description: The BGN/PCGN system for Russian was adopted ...
139
-
140
- notes:
141
- - The character e should be romanized ye initially, after the vowel ...
142
-
143
- tests:
144
- - source: ДЛИННОЕ ПОКРЫВАЛО
145
- expected: DLINNOYE POKRYVALO
146
- - source: Еловая шишка
147
- expected: Yelovaya shishka
148
-
149
- map:
150
- rules:
151
- - pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415 # Е after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
152
- result: Ye
153
- - pattern: \b\u0415 # Е initially
154
- result: Ye
155
-
156
- characters:
157
- "\u0410": "A"
158
- "\u0411": "B"
159
- "\u0412": "V"
160
- ----
161
-
162
-
163
- === Rules
164
-
165
- The subsection `rules` is placed under the `map` key. All rules are applied in order they are placed before the subsection `characters` applying. Rules apply to an original text, not to a result of previous rules applying.
166
-
167
- Each rule has `pattern` and `result` elements.
168
-
169
- Pattern is a regex expression. It should be representing as a string without `//` or `%r{}` parentheses. For example `\b\u0415`. In case a rule is depend on previous or next content, lookahead or lookbehind could be used. For example a rule with the pattern `(?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415` find every Е after upper or lower case symbols a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь.
170
-
171
- Result is a replacement a for pattern's match. It can contain a string, an Unicode characters specified by a hexadecimal number, a captured group reference. String with hexadecimal number or captured group reference should be double quoted. For example `"Y\u00eb"` or `"\\1\u00b7\\2"`. Captured group are referred by double backslash and group's number.
172
-
173
- Because rules are applied in order, multiple rules applicable to the same segment of a string can be addressed by rule ordering, and rules can be used as priority over characters. For example:
174
-
175
- [source,yaml]
176
- ----
177
- map:
178
- rules:
179
- - pattern: \u03B3\u03B3 # γ (before Γ, Ξ, Χ)
180
- result: ng
181
- - pattern: (?<![Γγ])\u03B3(?=[ΕεέΗηήΙιίΥυύ]) # γ (before front vowels)
182
- result: y
183
- ----
184
-
185
- (γι maps to `yi`; but γγ maps to `ng`. In the case of γγι, the first rule takes priority, and the transliteration is `ngi`: it makes the second rule impossible.)
186
-
187
- [source,yaml]
188
- ----
189
- map:
190
- rules:
191
- - pattern: (?<=\b)\u03BC[πΠ] # μπ (initially)
192
- result: b
193
- - pattern: \u03BC[πΠ] # μπ (medially)
194
- result: mb
195
- ----
196
-
197
- (The first rule applies at the start of a word; the second rule does not specify a context, as it applies in all other cases not covered by the first rule.)
198
-
199
- [source,yaml]
200
- ----
201
- map:
202
- rules:
203
- - pattern: ";"
204
- result: "?"
205
-
206
- characters
207
- "\u00B7": ";
208
- ----
209
-
210
- (This guarantees that any `;` are converted to `?` before any new `;` are introduced; because all three are Latin script, they could be mixed up in ordering.)
211
-
212
- Normally rules "`bleed`" each other: once a rule applies to a segment, that segment cannot trigger other rules, because it is already converted to Roman. Exceptionally, it will be necessary to have a rule add or remove characters in the original script, rather than transliterate them, so that the same context can be invoked by two rules in succession:
213
-
214
- [source,yaml]
215
- ----
216
- map:
217
- rules:
218
- - pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯя])\u042b # Ы after any vowel character
219
- result: "\u00b7Ы"
220
- - pattern: \u042b(?=[АаУуЫыЭэ]) # Ы before а, у, ы, or э
221
- result: "Ы\u00b7"
222
- ----
223
-
224
- (If the result were `\u00B7Y`, the second rule could not be applied afterwards; but we want ОЫУ to transliterate as `O·Y·U`. In order to make that happen, we preserve the Ы during the rules phase, resulting in О·Ы·У; we only convert the letters to Roman script in the `characters` phase.)
225
-
226
- === Testing transliteration systems
227
-
228
- To test all transliteration systems in the `maps/` directory, run:
229
-
230
- [source,sh]
231
- ----
232
- bundle exec rspec
233
- ----
234
-
235
- The command takes `source` texts from the `test` section, transforms
236
- them using `rules` and `charmaps` from the `map` key, and compares the
237
- results with `expected:` text from the `source:` section.
238
-
239
- To test a specific transliteration system, set the environment variable
240
- `TRANSLIT_SYSTEM` to the system code of the desired system
241
- (i.e. the "`basename`" of the system's YAML file):
242
-
243
- [source,sh]
244
- ----
245
- TRANSLIT_SYSTEM=bgnpcgn-rus-Cyrl-Latn-1947 bundle exec rspec
246
- ----
247
-
248
-
249
- == ISCS system codes
250
-
251
- In accordance with
252
- http://calconnect.gitlab.io/tc-localization/csd-transcription-systems[ISO/CC 24229],
253
- the system code identifying a script conversion system has the following components:
254
-
255
- e.g. `bgnpcgn-rus-Cyrl-Latn-1947`:
256
-
257
- `bgnpcgn`:: the authority identifier
258
- `rus`:: an ISO 639-{1,2,3,5} language code that this system applies to (For 639-2, use (T) code)
259
- `Cyrl`:: an ISO 15924 script code, identifying the source script
260
- `Latn`:: an ISO 15924 script code, identifying the target script
261
- `1947`:: an identifier unit within the authority to identify this system
262
-
263
-
264
- == Covered languages
265
-
266
- Currently the schemes cover Cyrillic, Armenian, Greek, Arabic and Hebrew.
267
-
268
-
269
- == Samples to play with
270
-
271
- * `rus-Cyrl-1.txt`: Copied from the XLS output from http://www.primorsk.vybory.izbirkom.ru/region/primorsk?action=show&global=true&root=254017025&tvd=4254017212287&vrn=100100067795849&prver=0&pronetvd=0&region=25&sub_region=25&type=242&vibid=4254017212287
272
-
273
- * `rus-Cyrl-2.txt`: Copied from the XLS output from http://www.yaroslavl.vybory.izbirkom.ru/region/yaroslavl?action=show&root=764013001&tvd=4764013188704&vrn=4764013188693&prver=0&pronetvd=0&region=76&sub_region=76&type=426&vibid=4764013188704
274
-
275
-
276
- == References
277
-
278
- Reference documents are located at the
279
- https://github.com/interscript/interscript-references[interscript-references repository].
280
- Some specifications that have distribution limitations may not be reproduced there.
281
-
282
-
283
- == Links to system definitions
284
-
285
- * https://www.iso.org/committee/48750.html[ISO/TC 46 (see standards published by WG 3)]
286
- * http://geonames.nga.mil/gns/html/romanization.html[BGN/PCGN and BGN Romanization systems (BGN)]
287
- * https://www.gov.uk/government/publications/romanization-systems[BGN/PCGN Romanization systems (PCGN)]
288
- * https://www.loc.gov/catdir/cpso/roman.html[ALA-LC Romanization systems in current use]
289
- * http://catdir.loc.gov/catdir/cpso/roman.html[ALA-LC Romanization systems from 1997]
290
- * http://www.eki.ee/wgrs/[UN Romanization systems]
291
- * http://www.eki.ee/knab/kblatyl2.htm[EKI KNAB systems]
292
-
293
- == Copyright and license
294
-
295
- This is a Ribose project. Copyright Ribose.
296
-
data/aliases.json DELETED
@@ -1 +0,0 @@
1
- {"pan_Deva2Latn_ALA_1997":"alalc-pan-Guru-Latn-1997","kor_Hang2Latn_ALA_1997":"alalc-kor-Hang-Latn-1997","asm_Deva2Latn_ALA_1997":"alalc-asm-Deva-Latn-1997","aze_Cyrl2Latn_ALA_1997":"alalc-aze-Cyrl-Latn-1997","ukr_Cyrl2Latn_GUP_1996":"ua-ukr-Cyrl-Latn-1996","tha_Thai2Latn_RIT_1968":"royin-tha-Thai-Latn-1968","bul_Cyrl2Latn_BGN_1952":"bgnpcgn-bul-Cyrl-Latn-1952","tam_Taml2Latn_ALA_1997":"alalc-tam-Taml-Latn-1997","kor_Hang2Latn_GKN_2002":"kp-kor-Hang-Latn-2002","ell_Grek2Latn_ELOT743_1996":"bgnpcgn-ell-Grek-Latn-1996","zho_Hani2Latn_AcadSin_2002":"acadsin-zho-Hani-Latn-2002","ara_Arab2Latn_SES_1930":"ses-ara-Arab-Latn-1930","tgk_Cyrl2Latn_BGN_1994":"bgnpcgn-tgk-Cyrl-Latn-1994","fas_Arab2Latn_BGN_1958":"bgnpcgn-fas-Arab-Latn-1956","sin_Sinh2Latn_ALA_1997":"alalc-sin-Sinh-Latn-1997","uas_Arab2Latn_BGN_2007":"bgnpcgn-urd-Arab-Latn-2007","ukr_Cyrl2Latn_ALA_1997":"alalc-ukr-Cyrl-Latn-1997","bak_Cyrl2Latn_BGN_2007":"bgnpcgn-bak-Cyrl-Latn-2007","tam_Taml2Latn_ALA_2011":"alalc-tam-Taml-Latn-2011","ara_Arab2Latn_BGN_1956":"bgnpcgn-ara-Arab-Latn-1956","ell_Grek2Latn_ALA_1997":"alalc-ell-Grek-Latn-1997","rus_Cyrl2Latn_GOST_1983":"gost-rus-Cyrl-Latn-16876-71-1983","mar_Deva2Latn_ALA_1997":"alalc-mar-Deva-Latn-1997","bel_Cyrl2Latn_ALA_1997":"alalc-bel-Cyrl-Latn-1997","kat_Geor2Latn_ALA_1997":"alalc-kat-Geor-Latn-1997","bul_Cyrl2Latn_ALA_1997":"alalc-bul-Cyrl-Latn-1997","ara_Arab2Latn_ALA_1997":"alalc-ara-Arab-Latn-1997","mon_Cyrl2Latn_ALA_1997":"alalc-mon-Cyrl-Latn-1997","div_Thaa2Latn_GMV_1988":"bgnpcgn-div-Thaa-Latn-1988","hin_Deva2Latn_ALA_1997":"alalc-hin-Deva-Latn-1997","bel_Cyrl2Latn_GBO_1998":"by-bel-Cyrl-Latn-1998","ukr_Cyrl2Latn_BGN_1965":"bgnpcgn-ukr-Cyrl-Latn-1965","rus_Cyrl2Latn_ALA_1997":"alalc-rus-Cyrl-Latn-1997","tir_Thai2Latn_RIT_2000":"royin-tha-Thai-Latn-1999","guj_Gujr2Latn_ALA_1997":"alalc-guj-Gujr-Latn-1997","tel_Telu2Latn_ALA_1997":"alalc-tel-Telu-Latn-1997","mkd_Cyrl2Latn_BGN_1981":"bgnpcgn-mkd-Cyrl-Latn-1981","ori_Orya2Latn_ALA_2011":"alalc-ori-Orya-Latn-2011","aze_Arab2Latn_ALA_1997":"alalc-aze-Arab-Latn-1997","ori_Orya2Latn_ALA_1997":"alalc-ori-Orya-Latn-1997","div_Thaa2Latn_ALA_1997":"alalc-div-Thaa-Latn-1997","rue_Cyrl2Latn_BGN_2016":"bgnpcgn-rue-Cyrl-Latn-2016","guj_Gujr2Latn_ALA_2011":"alalc-guj-Gujr-Latn-2011","kat_Geor2Latn_BGN_1981":"bgnpcgn-kat-Geor-Latn-1981","kor_Hang2Latn_MOCT_2000":"moct-kor-Hang-Latn-2000","sin_Sinh2Latn_ALA_2011":"alalc-sin-Sinh-Latn-2011","amh_Ethi2Latn_BGN_1967":"bgnpcgn-amh-Ethi-Latn-1967","srp_Cyrl2Latn_BGN_2005":"bgnpcgn-srp-Cyrl-Latn-2005","srp_Cyrl2Latn_ALA_1997":"alalc-srp-Cyrl-Latn-1997","mal_Mlym2Latn_ALA_2012":"alalc-mal-Mlym-Latn-2012","kat_Geor2Latn_GGG_2002":"ggg-kat-Geor-Latn-2002","mon_Cyrl2Latn_BGN_1964":"bgnpcgn-mon-Cyrl-Latn-1964","mal_Mlym2Latn_ALA_1997":"alalc-mal-Mlym-Latn-1997","ben_Beng2Latn_ALA_1997":"alalc-ben-Beng-Latn-1997","kor_Hang2Latn_MR_1939":"bgn-kor-Hang-Latn-1943","zho_Hani2Latn_GCH_1979":"sac-zho-Hans-Latn-1979","bul_Cyrl2Latn_BGN_2013":"bgnpcgn-bul-Cyrl-Latn-2013","ell_Grek2Latn_BGN_1962":"bgnpcgn-ell-Grek-Latn-1962","amh_Ethi2Latn_ALA_1997":"alalc-amh-Ethi-Latn-1997","pan_Deva2Latn_ALA_2011":"alalc-pan-Guru-Latn-2011","zho_Hani2Latn_WDG_1979":"var-zho-Hani-Latn-wd-1979","rus_Cyrl2Latn_BGN_1947":"bgnpcgn-rus-Cyrl-Latn-1947","bel_Cyrl2Latn_BGN_1979":"bgnpcgn-bel-Cyrl-Latn-1979","tat_Cyrl2Latn_BGN_2005":"bgnpcgn-tat-Cyrl-Latn-2007"}
data/lib/g2pwrapper.py DELETED
@@ -1,34 +0,0 @@
1
- import g2p, SequiturTool
2
- import numpy
3
-
4
- def transliterate(model, word):
5
-
6
- class Struct:
7
- def __init__(self, **entries):
8
- self.__dict__.update(entries)
9
-
10
- model_path = {
11
- 'pythainlp_lexicon': './lib/model-7',
12
- 'wiktionary_phonemic': './lib/tha-pt-b-7'
13
- }
14
-
15
- connector_dict = {
16
- 'pythainlp_lexicon': '',
17
- 'wiktionary_phonemic': '-'
18
- }
19
-
20
-
21
- modelFile = model_path[model]
22
- connector = connector_dict[model]
23
-
24
- options = Struct(**{'profile': None, 'resource_usage': None, 'psyco': None, 'tempdir': None, 'trainSample': None, 'develSample': None, 'testSample': None, 'checkpoint': None, 'resume_from_checkpoint': None, 'shouldTranspose': None, 'modelFile': modelFile , 'newModelFile': None, 'shouldTestContinuously': None, 'shouldSelfTest': None, 'lengthConstraints': None, 'shouldSuppressNewMultigrams': None, 'viterbi': None, 'shouldRampUp': None, 'shouldWipeModel': None, 'shouldInitializeWithCounts': None, 'minIterations': 20, 'maxIterations': 100, 'eager_discount_adjustment': None, 'fixed_discount': None, 'encoding': 'UTF-8', 'phoneme_to_phoneme': None, 'test_segmental': None, 'testResult': None, 'applySample': None, 'applyWord': word, 'variants_mass': None, 'variants_number': None, 'fakeTranslator': None, 'stack_limit': None})
25
-
26
- loadSample = g2p.loadG2PSample
27
-
28
- model = SequiturTool.procureModel(options, loadSample)
29
- if not model:
30
- return 1
31
- translator = g2p.Translator(model)
32
- del model
33
-
34
- return connector.join(translator(tuple(word)))
@@ -1,96 +0,0 @@
1
- require 'pathname'
2
-
3
- module Interscript
4
- module Fs
5
- def sub_replace(string, pos, size, repl)
6
- string[pos..pos + size - 1] = repl
7
- string
8
- end
9
-
10
- def root_path
11
- @root_path ||= Pathname.new(File.join(File.dirname(__dir__), ".."))
12
- end
13
-
14
- def transliterate_file(system_code, input_file, output_file, maps={})
15
- input = File.read(input_file)
16
- output = transliterate(system_code, input, maps)
17
-
18
- File.open(output_file, 'w') do |f|
19
- f.puts(output)
20
- end
21
-
22
- puts "Output written to: #{output_file}"
23
- output_file
24
- end
25
-
26
- def import_python_modules
27
- begin
28
- pyimport :g2pwrapper
29
- rescue
30
- pyimport :sys
31
- sys.path.append(root_path.to_s + "/lib/")
32
- pyimport :g2pwrapper
33
- end
34
- end
35
-
36
- def external_process(process_name, string)
37
- import_python_modules
38
-
39
- case process_name
40
- when 'sequitur.pythainlp_lexicon'
41
- return g2pwrapper.transliterate('pythainlp_lexicon', string)
42
- when 'sequitur.wiktionary_phonemic'
43
- return g2pwrapper.transliterate('wiktionary_phonemic', string)
44
- else
45
- raise ExternalProcessNotRecognizedError.new
46
- end
47
-
48
- rescue
49
- raise ExternalProcessUnavailableError.new
50
- end
51
-
52
- def external_processing(mapping, string)
53
- # Segmentation
54
- string = external_process(mapping.segmentation, string) if mapping.segmentation
55
-
56
- # Transliteration/Transcription
57
- string = external_process(mapping.transcription, string) if mapping.transcription
58
-
59
- string
60
- end
61
-
62
- def aliases (refresh: false)
63
- file = root_path.join("./aliases.json").to_s
64
- if !refresh && File.exist?(file)
65
- JSON.load(File.read(file))
66
- elsif !refresh && @aliases
67
- @aliases
68
- else
69
- @aliases = {}
70
- Dir[root_path.join('./maps/*.yaml').to_s].each do |yaml_file|
71
- org_name = File.basename(yaml_file, ".yaml")
72
- map = YAML.load_file(yaml_file)
73
- (map["alias"] || {}).each do |k,v|
74
- @aliases[v["code"]] = org_name
75
- end
76
- end
77
-
78
- # Try to save it to a file, but not force it.
79
- File.write("aliases.json", JSON.dump(@aliases)) rescue nil
80
-
81
- @aliases
82
- end
83
- end
84
-
85
- private
86
-
87
- def map_exist?(map)
88
- File.exist?(root_path.join("./maps/" + map + ".yaml").to_s)
89
- end
90
-
91
- def mkregexp(regexpstring)
92
- /#{regexpstring}/u
93
- end
94
-
95
- end
96
- end
@@ -1,144 +0,0 @@
1
- require 'rambling-trie'
2
- require 'yaml' unless RUBY_ENGINE == 'opal'
3
- require 'json'
4
-
5
- module Interscript
6
-
7
- class Mapping
8
- attr_reader(
9
- :id,
10
- :url,
11
- :name,
12
- :notes,
13
- :rules,
14
- :tests,
15
- :language,
16
- :postrules,
17
- :characters,
18
- :description,
19
- :authority_id,
20
- :creation_date,
21
- :source_script,
22
- :destination_script,
23
- :chain,
24
- :character_separator,
25
- :word_separator,
26
- :title_case,
27
- :downcase,
28
- :dictionary,
29
- :characters_hash,
30
- :dictionary_hash,
31
- :segmentation,
32
- :transcription,
33
- :dictionary_trie
34
- )
35
-
36
- def initialize(system_code, options = {})
37
- @system_code = system_code
38
- @depth = options.fetch(:depth, 0).to_i
39
-
40
- unless RUBY_ENGINE == 'opal'
41
- @system_path = options.fetch(:system_code, default_path)
42
- end
43
-
44
- load_and_serialize_system_mappings
45
- end
46
-
47
- def self.for(system_code, options = {})
48
- new(system_code, options)
49
- end
50
-
51
- def load_and_serialize_system_mappings
52
- return if depth >= 5
53
-
54
- mappings = load_system_mappings
55
- serialize_system_mappings(mappings)
56
- end
57
-
58
- private
59
-
60
- attr_reader :depth, :system_code, :system_path
61
-
62
- def system_code_file
63
- [system_code, "yaml"].join(".")
64
- end
65
-
66
- def default_path
67
- @default_path ||= Interscript.root_path.join("maps")
68
- end
69
-
70
- def load_system_mappings
71
- if RUBY_ENGINE == 'opal'
72
- load_opal_mappings
73
- else
74
- load_fs_mappings
75
- end
76
- end
77
-
78
- def load_opal_mappings
79
- JSON.parse(`Opal.global.InterscriptMaps[#{system_code}]`)
80
- end
81
-
82
- def load_fs_mappings
83
- YAML.load_file(system_path.join(system_code_file))
84
- rescue Errno::ENOENT
85
- raise Interscript::InvalidSystemError.new("No system mappings found")
86
- end
87
-
88
- def serialize_system_mappings(mappings)
89
- @id = mappings.fetch("id", nil)
90
- @url = mappings.fetch("url", nil)
91
- @name = mappings.fetch("name", nil)
92
- @notes = mappings.fetch("notes", nil)
93
- @tests = mappings.fetch("tests", [])
94
- @language = mappings.fetch("language", nil)
95
- @description = mappings.fetch("description", nil)
96
- @authority_id = mappings.fetch("authority_id", nil)
97
- @creation_date = mappings.fetch("creation_date", nil)
98
- @source_script = mappings.fetch("source_script", nil)
99
- @destination_script = mappings.fetch("destination_script", nil)
100
- @chain = mappings.fetch("chain", [])
101
- @character_separator = mappings["map"]["character_separator"] || nil
102
- @word_separator = mappings["map"]["word_separator"] || nil
103
- @title_case = mappings["map"]["title_case"] || false
104
- @downcase = mappings["map"]["downcase"] || false
105
- @rules = mappings["map"]["rules"] || []
106
- @postrules = mappings["map"]["postrules"] || []
107
- @characters = mappings["map"]["characters"] || {}
108
- @dictionary = mappings["map"]["dictionary"] || {}
109
- @segmentation = mappings["map"]["segementation"] || nil
110
- @transcription = mappings["map"]["transcription"] || nil
111
-
112
- include_inherited_mappings(mappings)
113
- build_hashes
114
- build_trie
115
- end
116
-
117
- def include_inherited_mappings(mappings)
118
- inherit_systems = [].push(mappings["map"]["inherit"]).flatten
119
-
120
- inherit_systems.each do |inherit_system|
121
- next unless inherit_system
122
-
123
- inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
124
-
125
- @rules = [rules, inherited_mapping.rules].flatten
126
- @postrules = [inherited_mapping.postrules, postrules].flatten
127
- @characters = (inherited_mapping.characters|| {}).merge(characters)
128
- @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
129
- end
130
-
131
- @characters.compact! # the feature to ignore characters from inherited
132
- end
133
-
134
- def build_hashes
135
- @characters_hash = characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
136
- @dictionary_hash = dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
137
- end
138
-
139
- def build_trie
140
- @dictionary_trie = Rambling::Trie.create
141
- dictionary_trie.concat dictionary.keys
142
- end
143
- end
144
- end