interscript 0.1.6 → 2.1.0a9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -127
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +75 -339
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -71
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -27
  71. data/lib/interscript/opal/maps.js.erb +0 -10
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -509
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1283
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -159
  80. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  81. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -125
  82. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  83. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  84. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -624
  85. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -627
  86. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  87. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  88. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  89. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  90. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -170
  91. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  92. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  93. data/maps/alalc-pan-Deva-Latn-1997.yaml +0 -237
  94. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -221
  95. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  96. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  97. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  98. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  99. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  100. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  101. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  102. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  103. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  104. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  105. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  106. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  107. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  108. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -528
  109. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -592
  110. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  111. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  112. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  113. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -285
  114. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  115. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  116. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -701
  117. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -19
  118. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  119. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  120. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  121. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  122. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  123. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  124. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  125. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  126. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -200
  127. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -92
  128. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  129. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  130. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -162
  131. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  132. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  133. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  134. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  135. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  136. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +0 -166
  137. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +0 -173
  138. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  139. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -160
  140. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  141. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  142. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  143. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  144. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  145. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -33
  146. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  147. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  148. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  149. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  150. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -88
  151. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  152. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  153. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -186
  154. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  155. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  156. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  157. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  158. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  159. data/maps/icao-per-Arab-Latn-9303.yaml +0 -103
  160. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  161. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  162. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  163. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  164. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -609
  165. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -40
  166. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  167. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  172. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  173. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  174. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  175. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  176. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  177. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  178. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  179. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  180. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  181. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  182. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  183. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  184. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  185. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  186. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  200. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -279
  201. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  202. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  203. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  204. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  205. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  206. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  207. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  208. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  209. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  210. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  211. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
  212. data/maps/un-nep-Deva-Latn-1972.yaml +0 -163
  213. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  214. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -30
  215. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +0 -575
  216. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  217. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  218. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  219. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  220. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  221. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  222. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  223. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  224. data/spec/interscript/mapping_spec.rb +0 -42
  225. data/spec/interscript_spec.rb +0 -26
  226. data/spec/spec_helper.rb +0 -3
@@ -0,0 +1,262 @@
1
+ $main_binding = binding
2
+
3
+ class Interscript::Compiler::Ruby < Interscript::Compiler
4
+ def compile(map, debug: false)
5
+ @map = map
6
+ @debug = debug
7
+ @parallel_trees = {}
8
+ @parallel_regexps = {}
9
+ c = "require 'interscript/stdlib'\n"
10
+ c << "if !defined?(Interscript::Maps); module Interscript; module Maps\n"
11
+ c << "module Cache; end\n"
12
+ c << "class Map < Struct.new(:stages, :aliases, :aliases_re); end\n"
13
+ c << "@maps = Hash.new { |h,id| h[id] = Map.new({},{},{}) }\n"
14
+ c << "def self.has_map?(map); @maps.include?(map); end\n"
15
+ c << "def self.add_map_alias(map,name,value) @maps[map].aliases[name] = value; end\n"
16
+ c << "def self.add_map_alias_re(map,name,value) @maps[map].aliases_re[name] = value; end\n"
17
+ c << "def self.add_map_stage(map,stage,&block); @maps[map].stages[stage] = block; end\n"
18
+ c << "def self.get_alias(map,name); @maps[map].aliases[name]; end\n"
19
+ c << "def self.get_alias_re(map,name); @maps[map].aliases_re[name]; end\n"
20
+ c << "def self.transliterate(map,string,stage=:main); @maps[map].stages[stage].(string); end\n"
21
+ c << "end; end; end\n"
22
+ c
23
+
24
+ map.aliases.each do |name, value|
25
+ val = compile_item(value.data, map, :str)
26
+ c << "Interscript::Maps.add_map_alias(#{map.name.inspect}, #{name.inspect}, #{val})\n"
27
+ val = '/'+compile_item(value.data, map, :re).gsub('/', '\\\\/')+'/'
28
+ c << "Interscript::Maps.add_map_alias_re(#{map.name.inspect}, #{name.inspect}, #{val})\n"
29
+ end
30
+
31
+ map.stages.each do |_, stage|
32
+ c << compile_rule(stage, @map, true)
33
+ end
34
+ @parallel_trees.each do |k,v|
35
+ c << "Interscript::Maps::Cache::PTREE_#{k} ||= #{v.inspect}\n"
36
+ end
37
+ @parallel_regexps.each do |k,v|
38
+ c << "Interscript::Maps::Cache::PRE_#{k} ||= #{v.inspect}\n"
39
+ end
40
+ @code = c
41
+ end
42
+
43
+ def compile_rule(r, map = @map, wrapper = false)
44
+ c = ""
45
+ case r
46
+ when Interscript::Node::Stage
47
+ c += "Interscript::Maps.add_map_stage \"#{@map.name}\", #{r.name.inspect} do |s|\n"
48
+ c += "$map_debug ||= []\n" if @debug
49
+ c += "s = s.dup\n"
50
+ r.children.each do |t|
51
+ comp = compile_rule(t, map)
52
+ c += comp
53
+ c += %{$map_debug << [s.dup, #{@map.name.to_s.inspect}, #{r.name.to_s.inspect}, #{t.inspect.inspect}, #{comp.inspect}]\n} if @debug
54
+ end
55
+ c += "s\n"
56
+ c += "end\n"
57
+ when Interscript::Node::Group::Parallel
58
+ begin
59
+ # Try to build a tree
60
+ a = []
61
+ r.children.each do |i|
62
+ raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
63
+ raise ArgumentError, "Can't parallelize rules with :before" if i.before
64
+ raise ArgumentError, "Can't parallelize rules with :after" if i.after
65
+ raise ArgumentError, "Can't parallelize rules with :not_before" if i.not_before
66
+ raise ArgumentError, "Can't parallelize rules with :not_after" if i.not_after
67
+
68
+ a << [compile_item(i.from, map, :par), compile_item(i.to, map, :parstr)]
69
+ end
70
+ ah = a.hash.abs
71
+ unless @parallel_trees.include? ah
72
+ tree = Interscript::Stdlib.parallel_replace_compile_tree(a)
73
+ @parallel_trees[ah] = tree
74
+ end
75
+ c += "s = Interscript::Stdlib.parallel_replace_tree(s, Interscript::Maps::Cache::PTREE_#{ah})\n"
76
+ rescue
77
+ # Otherwise let's build a megaregexp
78
+ a = []
79
+ Interscript::Stdlib.deterministic_sort_by_max_length(r.children).each do |i|
80
+ raise ArgumentError, "Can't parallelize #{i.class}" unless Interscript::Node::Rule::Sub === i
81
+
82
+ a << [build_regexp(i, map), compile_item(i.to, map, :parstr)]
83
+ end
84
+ ah = a.hash.abs
85
+ unless @parallel_regexps.include? ah
86
+ re = Interscript::Stdlib.parallel_regexp_compile(a)
87
+ @parallel_regexps[ah] = [re, a.map(&:last)]
88
+ end
89
+ c += "s = Interscript::Stdlib.parallel_regexp_gsub(s, *Interscript::Maps::Cache::PRE_#{ah})\n"
90
+ end
91
+ when Interscript::Node::Rule::Sub
92
+ from = "/#{build_regexp(r, map).gsub("/", "\\\\/")}/"
93
+ if r.to == :upcase
94
+ to = '&:upcase'
95
+ else
96
+ to = compile_item(r.to, map, :str)
97
+ end
98
+ c += "s.gsub!(#{from}, #{to})\n"
99
+ when Interscript::Node::Rule::Funcall
100
+ c += "s = Interscript::Stdlib::Functions.#{r.name}(s, #{r.kwargs.inspect[1..-2]})\n"
101
+ when Interscript::Node::Rule::Run
102
+ if r.stage.map
103
+ doc = map.dep_aliases[r.stage.map].document
104
+ stage = doc.imported_stages[r.stage.name]
105
+ else
106
+ stage = map.imported_stages[r.stage.name]
107
+ end
108
+ c += "s = Interscript::Maps.transliterate(#{stage.doc_name.inspect}, s, #{stage.name.inspect})\n"
109
+ else
110
+ raise ArgumentError, "Can't compile unhandled #{r.class}"
111
+ end
112
+ c
113
+ end
114
+
115
+ def build_regexp(r, map=@map)
116
+ from = compile_item(r.from, map, :re)
117
+ before = compile_item(r.before, map, :re) if r.before
118
+ after = compile_item(r.after, map, :re) if r.after
119
+ not_before = compile_item(r.not_before, map, :re) if r.not_before
120
+ not_after = compile_item(r.not_after, map, :re) if r.not_after
121
+
122
+ re = ""
123
+ re += "(?<=#{before})" if before
124
+ re += "(?<!#{not_before})" if not_before
125
+ re += from
126
+ re += "(?!#{not_after})" if not_after
127
+ re += "(?=#{after})" if after
128
+ re
129
+ end
130
+
131
+ def compile_item i, doc=@map, target=nil
132
+ i = i.first_string if %i[str parstr].include? target
133
+ i = Interscript::Node::Item.try_convert(i)
134
+ if target == :parstr
135
+ parstr = true
136
+ target = :par
137
+ end
138
+
139
+ out = case i
140
+ when Interscript::Node::Item::Alias
141
+ astr = if i.map
142
+ d = doc.dep_aliases[i.map].document
143
+ a = d.imported_aliases[i.name]
144
+ raise ArgumentError, "Alias #{i.name} of #{i.stage.map} not found" unless a
145
+ "Interscript::Maps.get_alias_ALIASTYPE(#{a.doc_name.inspect}, #{a.name.inspect})"
146
+ elsif Interscript::Stdlib::ALIASES.include?(i.name)
147
+ if target != :re && Interscript::Stdlib.re_only_alias?(i.name)
148
+ raise ArgumentError, "Can't use #{i.name} in a #{target} context"
149
+ end
150
+ stdlib_alias = true
151
+ "Interscript::Stdlib::ALIASES[#{i.name.inspect}]"
152
+ else
153
+ a = doc.imported_aliases[i.name]
154
+ raise ArgumentError, "Alias #{i.name} not found" unless a
155
+
156
+ "Interscript::Maps.get_alias_ALIASTYPE(#{a.doc_name.inspect}, #{a.name.inspect})"
157
+ end
158
+
159
+ if target == :str
160
+ astr = astr.sub("_ALIASTYPE(", "(")
161
+ elsif target == :re
162
+ astr = "\#{#{astr.sub("_ALIASTYPE(", "_re(")}}"
163
+ elsif parstr && stdlib_alias
164
+ astr = Interscript::Stdlib::ALIASES[i.name]
165
+ elsif target == :par
166
+ # raise NotImplementedError, "Can't use aliases in parallel mode yet"
167
+ astr = Interscript::Stdlib::ALIASES[i.name]
168
+ end
169
+ when Interscript::Node::Item::String
170
+ if target == :str
171
+ # Replace \1 with \\1, this is weird, but it works!
172
+ i.data.gsub("\\", "\\\\\\\\").inspect
173
+ elsif target == :par
174
+ i.data
175
+ elsif target == :re
176
+ Regexp.escape(i.data)
177
+ end
178
+ when Interscript::Node::Item::Group
179
+ if target == :par
180
+ i.children.map do |j|
181
+ compile_item(j, doc, target)
182
+ end.reduce([""]) do |j,k|
183
+ Array(j).product(Array(k)).map(&:join)
184
+ end
185
+ elsif target == :str
186
+ i.children.map { |j| compile_item(j, doc, target) }.join("+")
187
+ elsif target == :re
188
+ i.children.map { |j| compile_item(j, doc, target) }.join
189
+ end
190
+ when Interscript::Node::Item::CaptureGroup
191
+ if target != :re
192
+ raise ArgumentError, "Can't use a CaptureGroup in a #{target} context"
193
+ end
194
+ "(" + compile_item(i.data, doc, target) + ")"
195
+ when Interscript::Node::Item::Maybe,
196
+ Interscript::Node::Item::MaybeSome,
197
+ Interscript::Node::Item::Some
198
+
199
+ resuffix = { Interscript::Node::Item::Maybe => "?" ,
200
+ Interscript::Node::Item::Some => "+" ,
201
+ Interscript::Node::Item::MaybeSome => "*" }[i.class]
202
+
203
+ if target == :par
204
+ raise ArgumentError, "Can't use a Maybe in a #{target} context"
205
+ end
206
+ if Interscript::Node::Item::String === i.data && i.data.data.length != 1
207
+ "(?:" + compile_item(i.data, doc, target) + ")" + resuffix
208
+ else
209
+ compile_item(i.data, doc, target) + resuffix
210
+ end
211
+ when Interscript::Node::Item::CaptureRef
212
+ if target == :par
213
+ raise ArgumentError, "Can't use CaptureRef in parallel mode"
214
+ elsif target == :re
215
+ "\\#{i.id}"
216
+ elsif target == :str
217
+ "\"\\\\#{i.id}\""
218
+ end
219
+ when Interscript::Node::Item::Any
220
+ if target == :str
221
+ raise ArgumentError, "Can't use Any in a string context" # A linter could find this!
222
+ elsif target == :par
223
+ i.data.map(&:data)
224
+ elsif target == :re
225
+ case i.value
226
+ when Array
227
+ data = i.data.map { |j| compile_item(j, doc, target) }
228
+ "(?:"+data.join("|")+")"
229
+ when String
230
+ "[#{Regexp.escape(i.value)}]"
231
+ when Range
232
+ "[#{Regexp.escape(i.value.first)}-#{Regexp.escape(i.value.last)}]"
233
+ end
234
+ end
235
+ end
236
+ end
237
+
238
+ def load
239
+ if !defined?(Interscript::Maps) || !Interscript::Maps.has_map?(@map.name)
240
+ @map.dependencies.each do |dep|
241
+ dep = dep.full_name
242
+ if !defined?(Interscript::Maps) || !Interscript::Maps.has_map?(dep)
243
+ Interscript.load(dep, compiler: self.class).load
244
+ end
245
+ end
246
+ eval(@code, $main_binding)
247
+ end
248
+ end
249
+
250
+ def call(str, stage=:main)
251
+ load
252
+ Interscript::Maps.transliterate(@map.name, str, stage)
253
+ end
254
+
255
+ def self.read_debug_data
256
+ $map_debug || []
257
+ end
258
+
259
+ def self.reset_debug_data
260
+ $map_debug = []
261
+ end
262
+ end
@@ -0,0 +1,67 @@
1
+ require "yaml"
2
+
3
+ module Interscript::DSL
4
+ @cache = {}
5
+ def self.parse(map_name)
6
+ # map name aliases? here may be a place to wrap it
7
+
8
+ return @cache[map_name] if @cache[map_name]
9
+ path = Interscript.locate(map_name)
10
+ map_name = File.basename(path, ".imp")
11
+ map_name = File.basename(map_name, ".iml")
12
+
13
+ ruby = []
14
+ yaml = []
15
+
16
+ file = File.read(path).split("\n")
17
+ exc_fname = File.expand_path(path, Dir.pwd)
18
+
19
+ md_reading = false
20
+ md_indent = nil
21
+ md_inner_indent = nil
22
+ file.each do |l|
23
+ if md_reading && l =~ /\A#{md_indent}\}\s*\z/
24
+ md_reading = false
25
+ elsif md_reading
26
+ ruby << ""
27
+ yaml << l
28
+ elsif l =~ /\A(\s*)metadata\s*\{\z/
29
+ md_indent = $1
30
+ md_reading = true
31
+ else
32
+ yaml << ""
33
+ ruby << l
34
+ end
35
+ end
36
+ raise ArgumentError, "metadata stage isn't terminated" if md_reading
37
+ ruby, yaml = ruby.join("\n"), yaml.join("\n")
38
+
39
+ obj = Interscript::DSL::Document.new(map_name)
40
+ obj.instance_eval ruby, exc_fname, 1
41
+
42
+ yaml = if yaml =~ /\A\s*\z/
43
+ {}
44
+ else
45
+ YAML.load(yaml, exc_fname)
46
+ end
47
+
48
+ md = Interscript::DSL::Metadata.new(yaml: true) do
49
+ yaml.each do |k,v|
50
+ public_send(k.to_sym, v)
51
+ end
52
+ end
53
+ obj.node.metadata = md.node
54
+
55
+ @cache[map_name] = obj.node
56
+ end
57
+ end
58
+
59
+ require 'interscript/dsl/symbol_mm'
60
+ require 'interscript/dsl/items'
61
+
62
+ require 'interscript/dsl/document'
63
+ require 'interscript/dsl/group'
64
+ require 'interscript/dsl/stage'
65
+ require 'interscript/dsl/metadata'
66
+ require 'interscript/dsl/tests'
67
+ require 'interscript/dsl/aliases'
@@ -0,0 +1,23 @@
1
+ class Interscript::DSL::Aliases
2
+ include Interscript::DSL::Items
3
+
4
+ attr_accessor :node
5
+
6
+ def initialize(&block)
7
+ @node = {}
8
+ self.instance_exec(&block)
9
+ end
10
+
11
+ def def_alias(name, value)
12
+ if Interscript::Node::Item::Alias === name
13
+ name = name.name
14
+ end
15
+
16
+ unless Symbol === name
17
+ raise TypeError, "Alias name must be a Symbol, given #{name.class}"
18
+ end
19
+
20
+ puts "def_alias(#{name.inspect}, #{thing.inspect})" if $DEBUG
21
+ @node[name] = Interscript::Node::AliasDef.new(name, value)
22
+ end
23
+ end
@@ -0,0 +1,46 @@
1
+ class Interscript::DSL::Document
2
+ include Interscript::DSL::SymbolMM
3
+
4
+ attr_accessor :node
5
+
6
+ def initialize(name = nil, &block)
7
+ @node = Interscript::Node::Document.new
8
+ @node.name = name if name
9
+ self.instance_exec &block if block_given?
10
+ end
11
+
12
+ def metadata(&block)
13
+ metadata = Interscript::DSL::Metadata.new(&block)
14
+ @node.metadata = metadata.node
15
+ end
16
+
17
+ def tests(&block)
18
+ tests = Interscript::DSL::Tests.new(&block)
19
+ @node.tests = tests.node
20
+ end
21
+
22
+ def aliases(&block)
23
+ aliases = Interscript::DSL::Aliases.new(&block)
24
+ @node.aliases = aliases.node
25
+ @node.aliases.transform_values { |v| v.doc_name = @node.name; v }
26
+ end
27
+
28
+ def dependency(full_name, **kargs)
29
+ puts "dependency(#{name.inspect}, #{kargs.inspect}" if $DEBUG
30
+ dep = Interscript::Node::Dependency.new
31
+ dep.name = kargs[:as]
32
+ dep.full_name = full_name
33
+ dep.import = kargs[:import] || false
34
+
35
+ dep.document = Interscript::DSL.parse(full_name)
36
+ @node.dependencies << dep
37
+ @node.dep_aliases[dep.name] = dep if dep.name
38
+ end
39
+
40
+ def stage(name = :main, &block)
41
+ puts "stage(#{name}) from #{self.inspect}" if $DEBUG
42
+ stage = Interscript::DSL::Stage.new(name, &block)
43
+ stage.node.doc_name = @node.name
44
+ @node.stages[name] = stage.node
45
+ end
46
+ end
@@ -0,0 +1,45 @@
1
+ class Interscript::DSL::Group
2
+ include Interscript::DSL::Items
3
+
4
+ attr_accessor :node
5
+
6
+ def initialize(&block)
7
+ @node = Interscript::Node::Group.new
8
+ self.instance_exec(&block)
9
+ end
10
+
11
+ def run(stage)
12
+ if stage.class != Interscript::Node::Item::Stage
13
+ raise TypeError, "I::Node::Item::Stage expected, got #{stage.class}"
14
+ end
15
+ @node.children << Interscript::Node::Rule::Run.new(stage)
16
+ end
17
+
18
+ def sub(from, to, **kwargs, &block)
19
+ puts "sub(#{from.inspect},#{to}, kargs = #{
20
+ kargs.inspect
21
+ }) from #{self.inspect}" if $DEBUG
22
+
23
+ rule = Interscript::Node::Rule::Sub.new(from, to, **kwargs)
24
+ @node.children << rule
25
+ end
26
+
27
+ def upcase; :upcase; end
28
+
29
+ Interscript::Stdlib.available_functions.each do |fun|
30
+ define_method fun do |**kwargs|
31
+ puts "funcall(#{fun}, #{kwargs.inspect}) from #{self.inspect}" if $DEBUG
32
+
33
+ rule = Interscript::Node::Rule::Funcall.new(fun, **kwargs)
34
+ @node.children << rule
35
+ end
36
+ end
37
+
38
+ def parallel(&block)
39
+ puts "parallel(#{chars.inspect}) from #{self.inspect}" if $DEBUG
40
+ group = Interscript::DSL::Group::Parallel.new(&block)
41
+ @node.children << group.node
42
+ end
43
+ end
44
+
45
+ require 'interscript/dsl/group/parallel'