immunio 1.2.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +13 -5
  3. data/ext/immunio/Rakefile +14 -6
  4. data/lib/immunio/context.rb +2 -0
  5. data/lib/immunio/plugins/action_view.rb +7 -668
  6. data/lib/immunio/plugins/action_view/action_view.rb +22 -0
  7. data/lib/immunio/plugins/action_view/active_support_hash.rb +29 -0
  8. data/lib/immunio/plugins/action_view/cache_store.rb +24 -0
  9. data/lib/immunio/plugins/action_view/erubi.rb +38 -0
  10. data/lib/immunio/plugins/action_view/erubis.rb +39 -0
  11. data/lib/immunio/plugins/action_view/fragment_caching.rb +29 -0
  12. data/lib/immunio/plugins/action_view/haml.rb +46 -0
  13. data/lib/immunio/plugins/action_view/slim.rb +42 -0
  14. data/lib/immunio/plugins/action_view/template.rb +431 -0
  15. data/lib/immunio/plugins/action_view/template_rendering.rb +45 -0
  16. data/lib/immunio/plugins/http_tracker.rb +2 -0
  17. data/lib/immunio/plugins/io.rb +34 -0
  18. data/lib/immunio/version.rb +1 -1
  19. data/lua-hooks/Makefile +36 -9
  20. data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
  21. data/lua-hooks/ext/luajit/Makefile +22 -15
  22. data/lua-hooks/ext/luajit/README +2 -2
  23. data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
  24. data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
  25. data/lua-hooks/ext/luajit/doc/changes.html +69 -3
  26. data/lua-hooks/ext/luajit/doc/contact.html +10 -3
  27. data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
  28. data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
  29. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
  30. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +3 -4
  31. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
  32. data/lua-hooks/ext/luajit/doc/ext_jit.html +3 -3
  33. data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
  34. data/lua-hooks/ext/luajit/doc/extensions.html +47 -20
  35. data/lua-hooks/ext/luajit/doc/faq.html +2 -2
  36. data/lua-hooks/ext/luajit/doc/install.html +74 -45
  37. data/lua-hooks/ext/luajit/doc/luajit.html +5 -5
  38. data/lua-hooks/ext/luajit/doc/running.html +3 -3
  39. data/lua-hooks/ext/luajit/doc/status.html +13 -8
  40. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
  41. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1 -1
  42. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
  43. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1 -1
  44. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +8 -5
  45. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +66 -11
  46. data/lua-hooks/ext/luajit/dynasm/dasm_mips64.lua +12 -0
  47. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
  48. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1 -1
  49. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +1 -1
  50. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
  51. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +1 -1
  52. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +5 -1
  53. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +2 -2
  54. data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
  55. data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
  56. data/lua-hooks/ext/luajit/src/Makefile +15 -11
  57. data/lua-hooks/ext/luajit/src/Makefile.dep +16 -16
  58. data/lua-hooks/ext/luajit/src/host/buildvm.c +2 -2
  59. data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
  60. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +9 -4
  61. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +2 -2
  62. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
  63. data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +14 -3
  64. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +27 -3
  65. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
  66. data/lua-hooks/ext/luajit/src/host/genminilua.lua +6 -5
  67. data/lua-hooks/ext/luajit/src/host/minilua.c +1 -1
  68. data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
  69. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +8 -8
  70. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +2 -2
  71. data/lua-hooks/ext/luajit/src/jit/dis_arm64.lua +1216 -0
  72. data/lua-hooks/ext/luajit/src/jit/dis_arm64be.lua +12 -0
  73. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +35 -20
  74. data/lua-hooks/ext/luajit/src/jit/dis_mips64.lua +17 -0
  75. data/lua-hooks/ext/luajit/src/jit/dis_mips64el.lua +17 -0
  76. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
  77. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +2 -2
  78. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
  79. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +7 -4
  80. data/lua-hooks/ext/luajit/src/jit/dump.lua +17 -12
  81. data/lua-hooks/ext/luajit/src/jit/p.lua +3 -2
  82. data/lua-hooks/ext/luajit/src/jit/v.lua +2 -2
  83. data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
  84. data/lua-hooks/ext/luajit/src/lauxlib.h +14 -20
  85. data/lua-hooks/ext/luajit/src/lib_aux.c +38 -27
  86. data/lua-hooks/ext/luajit/src/lib_base.c +12 -5
  87. data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
  88. data/lua-hooks/ext/luajit/src/lib_debug.c +5 -5
  89. data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -2
  90. data/lua-hooks/ext/luajit/src/lib_init.c +16 -16
  91. data/lua-hooks/ext/luajit/src/lib_io.c +6 -7
  92. data/lua-hooks/ext/luajit/src/lib_jit.c +14 -4
  93. data/lua-hooks/ext/luajit/src/lib_math.c +1 -5
  94. data/lua-hooks/ext/luajit/src/lib_os.c +1 -1
  95. data/lua-hooks/ext/luajit/src/lib_package.c +14 -23
  96. data/lua-hooks/ext/luajit/src/lib_string.c +1 -5
  97. data/lua-hooks/ext/luajit/src/lib_table.c +21 -1
  98. data/lua-hooks/ext/luajit/src/lj.supp +3 -3
  99. data/lua-hooks/ext/luajit/src/lj_alloc.c +174 -83
  100. data/lua-hooks/ext/luajit/src/lj_api.c +97 -18
  101. data/lua-hooks/ext/luajit/src/lj_arch.h +54 -22
  102. data/lua-hooks/ext/luajit/src/lj_asm.c +172 -53
  103. data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
  104. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +19 -16
  105. data/lua-hooks/ext/luajit/src/lj_asm_arm64.h +2022 -0
  106. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +564 -158
  107. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +19 -18
  108. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +578 -92
  109. data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
  110. data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
  111. data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
  112. data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
  113. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
  114. data/lua-hooks/ext/luajit/src/lj_buf.c +1 -1
  115. data/lua-hooks/ext/luajit/src/lj_buf.h +1 -1
  116. data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
  117. data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
  118. data/lua-hooks/ext/luajit/src/lj_ccall.c +172 -7
  119. data/lua-hooks/ext/luajit/src/lj_ccall.h +21 -5
  120. data/lua-hooks/ext/luajit/src/lj_ccallback.c +71 -17
  121. data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
  122. data/lua-hooks/ext/luajit/src/lj_cconv.c +4 -2
  123. data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
  124. data/lua-hooks/ext/luajit/src/lj_cdata.c +7 -5
  125. data/lua-hooks/ext/luajit/src/lj_cdata.h +1 -1
  126. data/lua-hooks/ext/luajit/src/lj_clib.c +5 -5
  127. data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
  128. data/lua-hooks/ext/luajit/src/lj_cparse.c +11 -6
  129. data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
  130. data/lua-hooks/ext/luajit/src/lj_crecord.c +70 -14
  131. data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
  132. data/lua-hooks/ext/luajit/src/lj_ctype.c +1 -1
  133. data/lua-hooks/ext/luajit/src/lj_ctype.h +8 -8
  134. data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
  135. data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
  136. data/lua-hooks/ext/luajit/src/lj_def.h +6 -9
  137. data/lua-hooks/ext/luajit/src/lj_dispatch.c +3 -3
  138. data/lua-hooks/ext/luajit/src/lj_dispatch.h +2 -1
  139. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +5 -4
  140. data/lua-hooks/ext/luajit/src/lj_emit_arm64.h +419 -0
  141. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +100 -20
  142. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +4 -4
  143. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +116 -25
  144. data/lua-hooks/ext/luajit/src/lj_err.c +34 -13
  145. data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
  146. data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
  147. data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
  148. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +58 -49
  149. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
  150. data/lua-hooks/ext/luajit/src/lj_frame.h +33 -6
  151. data/lua-hooks/ext/luajit/src/lj_func.c +4 -2
  152. data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
  153. data/lua-hooks/ext/luajit/src/lj_gc.c +16 -7
  154. data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
  155. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +31 -1
  156. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
  157. data/lua-hooks/ext/luajit/src/lj_ir.c +69 -96
  158. data/lua-hooks/ext/luajit/src/lj_ir.h +29 -18
  159. data/lua-hooks/ext/luajit/src/lj_ircall.h +24 -30
  160. data/lua-hooks/ext/luajit/src/lj_iropt.h +9 -9
  161. data/lua-hooks/ext/luajit/src/lj_jit.h +67 -9
  162. data/lua-hooks/ext/luajit/src/lj_lex.c +1 -1
  163. data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
  164. data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
  165. data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
  166. data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
  167. data/lua-hooks/ext/luajit/src/lj_mcode.c +11 -10
  168. data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
  169. data/lua-hooks/ext/luajit/src/lj_meta.c +1 -1
  170. data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
  171. data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
  172. data/lua-hooks/ext/luajit/src/lj_obj.h +7 -3
  173. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
  174. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +84 -17
  175. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
  176. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +3 -3
  177. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +24 -22
  178. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +11 -6
  179. data/lua-hooks/ext/luajit/src/lj_opt_split.c +11 -2
  180. data/lua-hooks/ext/luajit/src/lj_parse.c +9 -7
  181. data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
  182. data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
  183. data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
  184. data/lua-hooks/ext/luajit/src/lj_record.c +201 -117
  185. data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
  186. data/lua-hooks/ext/luajit/src/lj_snap.c +72 -26
  187. data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
  188. data/lua-hooks/ext/luajit/src/lj_state.c +6 -6
  189. data/lua-hooks/ext/luajit/src/lj_state.h +2 -2
  190. data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
  191. data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
  192. data/lua-hooks/ext/luajit/src/lj_strfmt.c +7 -3
  193. data/lua-hooks/ext/luajit/src/lj_strfmt.h +1 -1
  194. data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +4 -3
  195. data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
  196. data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
  197. data/lua-hooks/ext/luajit/src/lj_tab.c +1 -2
  198. data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
  199. data/lua-hooks/ext/luajit/src/lj_target.h +3 -3
  200. data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
  201. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +239 -7
  202. data/lua-hooks/ext/luajit/src/lj_target_mips.h +111 -22
  203. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
  204. data/lua-hooks/ext/luajit/src/lj_target_x86.h +21 -4
  205. data/lua-hooks/ext/luajit/src/lj_trace.c +63 -18
  206. data/lua-hooks/ext/luajit/src/lj_trace.h +2 -1
  207. data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -1
  208. data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
  209. data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
  210. data/lua-hooks/ext/luajit/src/lj_vm.h +5 -1
  211. data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
  212. data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
  213. data/lua-hooks/ext/luajit/src/lj_vmmath.c +1 -1
  214. data/lua-hooks/ext/luajit/src/ljamalg.c +1 -1
  215. data/lua-hooks/ext/luajit/src/lua.h +9 -1
  216. data/lua-hooks/ext/luajit/src/luaconf.h +3 -7
  217. data/lua-hooks/ext/luajit/src/luajit.c +69 -54
  218. data/lua-hooks/ext/luajit/src/luajit.h +4 -4
  219. data/lua-hooks/ext/luajit/src/lualib.h +1 -1
  220. data/lua-hooks/ext/luajit/src/msvcbuild.bat +12 -4
  221. data/lua-hooks/ext/luajit/src/vm_arm.dasc +1 -1
  222. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +255 -32
  223. data/lua-hooks/ext/luajit/src/vm_mips.dasc +26 -23
  224. data/lua-hooks/ext/luajit/src/vm_mips64.dasc +5062 -0
  225. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +1 -1
  226. data/lua-hooks/ext/luajit/src/vm_x64.dasc +24 -25
  227. data/lua-hooks/ext/luajit/src/vm_x86.dasc +77 -4
  228. data/lua-hooks/libluahooks.darwin.a +0 -0
  229. data/lua-hooks/libluahooks.linux.a +0 -0
  230. data/lua-hooks/options.mk +1 -1
  231. metadata +37 -77
  232. data/lua-hooks/ext/all.c +0 -69
  233. data/lua-hooks/ext/libinjection/COPYING +0 -37
  234. data/lua-hooks/ext/libinjection/libinjection.h +0 -65
  235. data/lua-hooks/ext/libinjection/libinjection_html5.c +0 -847
  236. data/lua-hooks/ext/libinjection/libinjection_html5.h +0 -54
  237. data/lua-hooks/ext/libinjection/libinjection_sqli.c +0 -2301
  238. data/lua-hooks/ext/libinjection/libinjection_sqli.h +0 -295
  239. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +0 -9349
  240. data/lua-hooks/ext/libinjection/libinjection_xss.c +0 -531
  241. data/lua-hooks/ext/libinjection/libinjection_xss.h +0 -21
  242. data/lua-hooks/ext/libinjection/lualib.c +0 -145
  243. data/lua-hooks/ext/libinjection/module.mk +0 -5
  244. data/lua-hooks/ext/lpeg/HISTORY +0 -96
  245. data/lua-hooks/ext/lpeg/lpcap.c +0 -537
  246. data/lua-hooks/ext/lpeg/lpcap.h +0 -56
  247. data/lua-hooks/ext/lpeg/lpcode.c +0 -1014
  248. data/lua-hooks/ext/lpeg/lpcode.h +0 -40
  249. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  250. data/lua-hooks/ext/lpeg/lpeg.html +0 -1445
  251. data/lua-hooks/ext/lpeg/lpprint.c +0 -244
  252. data/lua-hooks/ext/lpeg/lpprint.h +0 -36
  253. data/lua-hooks/ext/lpeg/lptree.c +0 -1303
  254. data/lua-hooks/ext/lpeg/lptree.h +0 -82
  255. data/lua-hooks/ext/lpeg/lptypes.h +0 -149
  256. data/lua-hooks/ext/lpeg/lpvm.c +0 -364
  257. data/lua-hooks/ext/lpeg/lpvm.h +0 -58
  258. data/lua-hooks/ext/lpeg/makefile +0 -55
  259. data/lua-hooks/ext/lpeg/module.mk +0 -6
  260. data/lua-hooks/ext/lpeg/re.html +0 -498
  261. data/lua-hooks/ext/lua-cmsgpack/.gitignore +0 -13
  262. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +0 -45
  263. data/lua-hooks/ext/lua-cmsgpack/README.md +0 -115
  264. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +0 -970
  265. data/lua-hooks/ext/lua-cmsgpack/module.mk +0 -2
  266. data/lua-hooks/ext/lua-cmsgpack/test.lua +0 -570
  267. data/lua-hooks/ext/lua-snapshot/LICENSE +0 -7
  268. data/lua-hooks/ext/lua-snapshot/Makefile +0 -12
  269. data/lua-hooks/ext/lua-snapshot/README.md +0 -18
  270. data/lua-hooks/ext/lua-snapshot/dump.lua +0 -15
  271. data/lua-hooks/ext/lua-snapshot/module.mk +0 -2
  272. data/lua-hooks/ext/lua-snapshot/snapshot.c +0 -462
  273. data/lua-hooks/ext/luautf8/README.md +0 -152
  274. data/lua-hooks/ext/luautf8/lutf8lib.c +0 -1274
  275. data/lua-hooks/ext/luautf8/module.mk +0 -2
  276. data/lua-hooks/ext/luautf8/unidata.h +0 -3064
  277. data/lua-hooks/ext/module.mk +0 -15
  278. data/lua-hooks/ext/modules.h +0 -17
  279. data/lua-hooks/ext/perf/luacpu.c +0 -114
  280. data/lua-hooks/ext/perf/lualoadavg.c +0 -40
  281. data/lua-hooks/ext/perf/luameminfo.c +0 -38
  282. data/lua-hooks/ext/perf/luaoslib.c +0 -203
  283. data/lua-hooks/ext/perf/module.mk +0 -5
  284. data/lua-hooks/ext/sha1/luasha1.c +0 -74
  285. data/lua-hooks/ext/sha1/module.mk +0 -5
  286. data/lua-hooks/ext/sha1/sha1.c +0 -145
  287. data/lua-hooks/ext/sha2/luasha256.c +0 -77
  288. data/lua-hooks/ext/sha2/module.mk +0 -5
  289. data/lua-hooks/ext/sha2/sha256.c +0 -196
  290. data/lua-hooks/ext/sysutils/lua_utils.c +0 -56
  291. data/lua-hooks/ext/sysutils/module.mk +0 -2
@@ -1,56 +0,0 @@
1
- /*
2
- ** $Id: lpcap.h,v 1.3 2016/09/13 17:45:58 roberto Exp $
3
- */
4
-
5
- #if !defined(lpcap_h)
6
- #define lpcap_h
7
-
8
-
9
- #include "lptypes.h"
10
-
11
-
12
- /* kinds of captures */
13
- typedef enum CapKind {
14
- Cclose, /* not used in trees */
15
- Cposition,
16
- Cconst, /* ktable[key] is Lua constant */
17
- Cbackref, /* ktable[key] is "name" of group to get capture */
18
- Carg, /* 'key' is arg's number */
19
- Csimple, /* next node is pattern */
20
- Ctable, /* next node is pattern */
21
- Cfunction, /* ktable[key] is function; next node is pattern */
22
- Cquery, /* ktable[key] is table; next node is pattern */
23
- Cstring, /* ktable[key] is string; next node is pattern */
24
- Cnum, /* numbered capture; 'key' is number of value to return */
25
- Csubst, /* substitution capture; next node is pattern */
26
- Cfold, /* ktable[key] is function; next node is pattern */
27
- Cruntime, /* not used in trees (is uses another type for tree) */
28
- Cgroup /* ktable[key] is group's "name" */
29
- } CapKind;
30
-
31
-
32
- typedef struct Capture {
33
- const char *s; /* subject position */
34
- unsigned short idx; /* extra info (group name, arg index, etc.) */
35
- byte kind; /* kind of capture */
36
- byte siz; /* size of full capture + 1 (0 = not a full capture) */
37
- } Capture;
38
-
39
-
40
- typedef struct CapState {
41
- Capture *cap; /* current capture */
42
- Capture *ocap; /* (original) capture list */
43
- lua_State *L;
44
- int ptop; /* index of last argument to 'match' */
45
- const char *s; /* original string */
46
- int valuecached; /* value stored in cache slot */
47
- } CapState;
48
-
49
-
50
- int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
51
- int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
52
- int finddyncap (Capture *cap, Capture *last);
53
-
54
- #endif
55
-
56
-
@@ -1,1014 +0,0 @@
1
- /*
2
- ** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $
3
- ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4
- */
5
-
6
- #include <limits.h>
7
-
8
-
9
- #include "lua.h"
10
- #include "lauxlib.h"
11
-
12
- #include "lptypes.h"
13
- #include "lpcode.h"
14
-
15
-
16
- /* signals a "no-instruction */
17
- #define NOINST -1
18
-
19
-
20
-
21
- static const Charset fullset_ =
22
- {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
23
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
25
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}};
26
-
27
- static const Charset *fullset = &fullset_;
28
-
29
- /*
30
- ** {======================================================
31
- ** Analysis and some optimizations
32
- ** =======================================================
33
- */
34
-
35
- /*
36
- ** Check whether a charset is empty (returns IFail), singleton (IChar),
37
- ** full (IAny), or none of those (ISet). When singleton, '*c' returns
38
- ** which character it is. (When generic set, the set was the input,
39
- ** so there is no need to return it.)
40
- */
41
- static Opcode charsettype (const byte *cs, int *c) {
42
- int count = 0; /* number of characters in the set */
43
- int i;
44
- int candidate = -1; /* candidate position for the singleton char */
45
- for (i = 0; i < CHARSETSIZE; i++) { /* for each byte */
46
- int b = cs[i];
47
- if (b == 0) { /* is byte empty? */
48
- if (count > 1) /* was set neither empty nor singleton? */
49
- return ISet; /* neither full nor empty nor singleton */
50
- /* else set is still empty or singleton */
51
- }
52
- else if (b == 0xFF) { /* is byte full? */
53
- if (count < (i * BITSPERCHAR)) /* was set not full? */
54
- return ISet; /* neither full nor empty nor singleton */
55
- else count += BITSPERCHAR; /* set is still full */
56
- }
57
- else if ((b & (b - 1)) == 0) { /* has byte only one bit? */
58
- if (count > 0) /* was set not empty? */
59
- return ISet; /* neither full nor empty nor singleton */
60
- else { /* set has only one char till now; track it */
61
- count++;
62
- candidate = i;
63
- }
64
- }
65
- else return ISet; /* byte is neither empty, full, nor singleton */
66
- }
67
- switch (count) {
68
- case 0: return IFail; /* empty set */
69
- case 1: { /* singleton; find character bit inside byte */
70
- int b = cs[candidate];
71
- *c = candidate * BITSPERCHAR;
72
- if ((b & 0xF0) != 0) { *c += 4; b >>= 4; }
73
- if ((b & 0x0C) != 0) { *c += 2; b >>= 2; }
74
- if ((b & 0x02) != 0) { *c += 1; }
75
- return IChar;
76
- }
77
- default: {
78
- assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */
79
- return IAny;
80
- }
81
- }
82
- }
83
-
84
-
85
- /*
86
- ** A few basic operations on Charsets
87
- */
88
- static void cs_complement (Charset *cs) {
89
- loopset(i, cs->cs[i] = ~cs->cs[i]);
90
- }
91
-
92
- static int cs_equal (const byte *cs1, const byte *cs2) {
93
- loopset(i, if (cs1[i] != cs2[i]) return 0);
94
- return 1;
95
- }
96
-
97
- static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
98
- loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
99
- return 1;
100
- }
101
-
102
-
103
- /*
104
- ** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
105
- ** charset and return 1; else return 0.
106
- */
107
- int tocharset (TTree *tree, Charset *cs) {
108
- switch (tree->tag) {
109
- case TSet: { /* copy set */
110
- loopset(i, cs->cs[i] = treebuffer(tree)[i]);
111
- return 1;
112
- }
113
- case TChar: { /* only one char */
114
- assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
115
- loopset(i, cs->cs[i] = 0); /* erase all chars */
116
- setchar(cs->cs, tree->u.n); /* add that one */
117
- return 1;
118
- }
119
- case TAny: {
120
- loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */
121
- return 1;
122
- }
123
- default: return 0;
124
- }
125
- }
126
-
127
-
128
- /*
129
- ** Visit a TCall node taking care to stop recursion. If node not yet
130
- ** visited, return 'f(sib2(tree))', otherwise return 'def' (default
131
- ** value)
132
- */
133
- static int callrecursive (TTree *tree, int f (TTree *t), int def) {
134
- int key = tree->key;
135
- assert(tree->tag == TCall);
136
- assert(sib2(tree)->tag == TRule);
137
- if (key == 0) /* node already visited? */
138
- return def; /* return default value */
139
- else { /* first visit */
140
- int result;
141
- tree->key = 0; /* mark call as already visited */
142
- result = f(sib2(tree)); /* go to called rule */
143
- tree->key = key; /* restore tree */
144
- return result;
145
- }
146
- }
147
-
148
-
149
- /*
150
- ** Check whether a pattern tree has captures
151
- */
152
- int hascaptures (TTree *tree) {
153
- tailcall:
154
- switch (tree->tag) {
155
- case TCapture: case TRunTime:
156
- return 1;
157
- case TCall:
158
- return callrecursive(tree, hascaptures, 0);
159
- case TRule: /* do not follow siblings */
160
- tree = sib1(tree); goto tailcall;
161
- case TOpenCall: assert(0);
162
- default: {
163
- switch (numsiblings[tree->tag]) {
164
- case 1: /* return hascaptures(sib1(tree)); */
165
- tree = sib1(tree); goto tailcall;
166
- case 2:
167
- if (hascaptures(sib1(tree)))
168
- return 1;
169
- /* else return hascaptures(sib2(tree)); */
170
- tree = sib2(tree); goto tailcall;
171
- default: assert(numsiblings[tree->tag] == 0); return 0;
172
- }
173
- }
174
- }
175
- }
176
-
177
-
178
- /*
179
- ** Checks how a pattern behaves regarding the empty string,
180
- ** in one of two different ways:
181
- ** A pattern is *nullable* if it can match without consuming any character;
182
- ** A pattern is *nofail* if it never fails for any string
183
- ** (including the empty string).
184
- ** The difference is only for predicates and run-time captures;
185
- ** for other patterns, the two properties are equivalent.
186
- ** (With predicates, &'a' is nullable but not nofail. Of course,
187
- ** nofail => nullable.)
188
- ** These functions are all convervative in the following way:
189
- ** p is nullable => nullable(p)
190
- ** nofail(p) => p cannot fail
191
- ** The function assumes that TOpenCall is not nullable;
192
- ** this will be checked again when the grammar is fixed.
193
- ** Run-time captures can do whatever they want, so the result
194
- ** is conservative.
195
- */
196
- int checkaux (TTree *tree, int pred) {
197
- tailcall:
198
- switch (tree->tag) {
199
- case TChar: case TSet: case TAny:
200
- case TFalse: case TOpenCall:
201
- return 0; /* not nullable */
202
- case TRep: case TTrue:
203
- return 1; /* no fail */
204
- case TNot: case TBehind: /* can match empty, but can fail */
205
- if (pred == PEnofail) return 0;
206
- else return 1; /* PEnullable */
207
- case TAnd: /* can match empty; fail iff body does */
208
- if (pred == PEnullable) return 1;
209
- /* else return checkaux(sib1(tree), pred); */
210
- tree = sib1(tree); goto tailcall;
211
- case TRunTime: /* can fail; match empty iff body does */
212
- if (pred == PEnofail) return 0;
213
- /* else return checkaux(sib1(tree), pred); */
214
- tree = sib1(tree); goto tailcall;
215
- case TSeq:
216
- if (!checkaux(sib1(tree), pred)) return 0;
217
- /* else return checkaux(sib2(tree), pred); */
218
- tree = sib2(tree); goto tailcall;
219
- case TChoice:
220
- if (checkaux(sib2(tree), pred)) return 1;
221
- /* else return checkaux(sib1(tree), pred); */
222
- tree = sib1(tree); goto tailcall;
223
- case TCapture: case TGrammar: case TRule:
224
- /* return checkaux(sib1(tree), pred); */
225
- tree = sib1(tree); goto tailcall;
226
- case TCall: /* return checkaux(sib2(tree), pred); */
227
- tree = sib2(tree); goto tailcall;
228
- default: assert(0); return 0;
229
- }
230
- }
231
-
232
-
233
- /*
234
- ** number of characters to match a pattern (or -1 if variable)
235
- */
236
- int fixedlen (TTree *tree) {
237
- int len = 0; /* to accumulate in tail calls */
238
- tailcall:
239
- switch (tree->tag) {
240
- case TChar: case TSet: case TAny:
241
- return len + 1;
242
- case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
243
- return len;
244
- case TRep: case TRunTime: case TOpenCall:
245
- return -1;
246
- case TCapture: case TRule: case TGrammar:
247
- /* return fixedlen(sib1(tree)); */
248
- tree = sib1(tree); goto tailcall;
249
- case TCall: {
250
- int n1 = callrecursive(tree, fixedlen, -1);
251
- if (n1 < 0)
252
- return -1;
253
- else
254
- return len + n1;
255
- }
256
- case TSeq: {
257
- int n1 = fixedlen(sib1(tree));
258
- if (n1 < 0)
259
- return -1;
260
- /* else return fixedlen(sib2(tree)) + len; */
261
- len += n1; tree = sib2(tree); goto tailcall;
262
- }
263
- case TChoice: {
264
- int n1 = fixedlen(sib1(tree));
265
- int n2 = fixedlen(sib2(tree));
266
- if (n1 != n2 || n1 < 0)
267
- return -1;
268
- else
269
- return len + n1;
270
- }
271
- default: assert(0); return 0;
272
- };
273
- }
274
-
275
-
276
- /*
277
- ** Computes the 'first set' of a pattern.
278
- ** The result is a conservative aproximation:
279
- ** match p ax -> x (for some x) ==> a belongs to first(p)
280
- ** or
281
- ** a not in first(p) ==> match p ax -> fail (for all x)
282
- **
283
- ** The set 'follow' is the first set of what follows the
284
- ** pattern (full set if nothing follows it).
285
- **
286
- ** The function returns 0 when this resulting set can be used for
287
- ** test instructions that avoid the pattern altogether.
288
- ** A non-zero return can happen for two reasons:
289
- ** 1) match p '' -> '' ==> return has bit 1 set
290
- ** (tests cannot be used because they would always fail for an empty input);
291
- ** 2) there is a match-time capture ==> return has bit 2 set
292
- ** (optimizations should not bypass match-time captures).
293
- */
294
- static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
295
- tailcall:
296
- switch (tree->tag) {
297
- case TChar: case TSet: case TAny: {
298
- tocharset(tree, firstset);
299
- return 0;
300
- }
301
- case TTrue: {
302
- loopset(i, firstset->cs[i] = follow->cs[i]);
303
- return 1; /* accepts the empty string */
304
- }
305
- case TFalse: {
306
- loopset(i, firstset->cs[i] = 0);
307
- return 0;
308
- }
309
- case TChoice: {
310
- Charset csaux;
311
- int e1 = getfirst(sib1(tree), follow, firstset);
312
- int e2 = getfirst(sib2(tree), follow, &csaux);
313
- loopset(i, firstset->cs[i] |= csaux.cs[i]);
314
- return e1 | e2;
315
- }
316
- case TSeq: {
317
- if (!nullable(sib1(tree))) {
318
- /* when p1 is not nullable, p2 has nothing to contribute;
319
- return getfirst(sib1(tree), fullset, firstset); */
320
- tree = sib1(tree); follow = fullset; goto tailcall;
321
- }
322
- else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
323
- Charset csaux;
324
- int e2 = getfirst(sib2(tree), follow, &csaux);
325
- int e1 = getfirst(sib1(tree), &csaux, firstset);
326
- if (e1 == 0) return 0; /* 'e1' ensures that first can be used */
327
- else if ((e1 | e2) & 2) /* one of the children has a matchtime? */
328
- return 2; /* pattern has a matchtime capture */
329
- else return e2; /* else depends on 'e2' */
330
- }
331
- }
332
- case TRep: {
333
- getfirst(sib1(tree), follow, firstset);
334
- loopset(i, firstset->cs[i] |= follow->cs[i]);
335
- return 1; /* accept the empty string */
336
- }
337
- case TCapture: case TGrammar: case TRule: {
338
- /* return getfirst(sib1(tree), follow, firstset); */
339
- tree = sib1(tree); goto tailcall;
340
- }
341
- case TRunTime: { /* function invalidates any follow info. */
342
- int e = getfirst(sib1(tree), fullset, firstset);
343
- if (e) return 2; /* function is not "protected"? */
344
- else return 0; /* pattern inside capture ensures first can be used */
345
- }
346
- case TCall: {
347
- /* return getfirst(sib2(tree), follow, firstset); */
348
- tree = sib2(tree); goto tailcall;
349
- }
350
- case TAnd: {
351
- int e = getfirst(sib1(tree), follow, firstset);
352
- loopset(i, firstset->cs[i] &= follow->cs[i]);
353
- return e;
354
- }
355
- case TNot: {
356
- if (tocharset(sib1(tree), firstset)) {
357
- cs_complement(firstset);
358
- return 1;
359
- }
360
- /* else go through */
361
- }
362
- case TBehind: { /* instruction gives no new information */
363
- /* call 'getfirst' only to check for math-time captures */
364
- int e = getfirst(sib1(tree), follow, firstset);
365
- loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */
366
- return e | 1; /* always can accept the empty string */
367
- }
368
- default: assert(0); return 0;
369
- }
370
- }
371
-
372
-
373
- /*
374
- ** If 'headfail(tree)' true, then 'tree' can fail only depending on the
375
- ** next character of the subject.
376
- */
377
- static int headfail (TTree *tree) {
378
- tailcall:
379
- switch (tree->tag) {
380
- case TChar: case TSet: case TAny: case TFalse:
381
- return 1;
382
- case TTrue: case TRep: case TRunTime: case TNot:
383
- case TBehind:
384
- return 0;
385
- case TCapture: case TGrammar: case TRule: case TAnd:
386
- tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */
387
- case TCall:
388
- tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */
389
- case TSeq:
390
- if (!nofail(sib2(tree))) return 0;
391
- /* else return headfail(sib1(tree)); */
392
- tree = sib1(tree); goto tailcall;
393
- case TChoice:
394
- if (!headfail(sib1(tree))) return 0;
395
- /* else return headfail(sib2(tree)); */
396
- tree = sib2(tree); goto tailcall;
397
- default: assert(0); return 0;
398
- }
399
- }
400
-
401
-
402
- /*
403
- ** Check whether the code generation for the given tree can benefit
404
- ** from a follow set (to avoid computing the follow set when it is
405
- ** not needed)
406
- */
407
- static int needfollow (TTree *tree) {
408
- tailcall:
409
- switch (tree->tag) {
410
- case TChar: case TSet: case TAny:
411
- case TFalse: case TTrue: case TAnd: case TNot:
412
- case TRunTime: case TGrammar: case TCall: case TBehind:
413
- return 0;
414
- case TChoice: case TRep:
415
- return 1;
416
- case TCapture:
417
- tree = sib1(tree); goto tailcall;
418
- case TSeq:
419
- tree = sib2(tree); goto tailcall;
420
- default: assert(0); return 0;
421
- }
422
- }
423
-
424
- /* }====================================================== */
425
-
426
-
427
-
428
- /*
429
- ** {======================================================
430
- ** Code generation
431
- ** =======================================================
432
- */
433
-
434
-
435
- /*
436
- ** size of an instruction
437
- */
438
- int sizei (const Instruction *i) {
439
- switch((Opcode)i->i.code) {
440
- case ISet: case ISpan: return CHARSETINSTSIZE;
441
- case ITestSet: return CHARSETINSTSIZE + 1;
442
- case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall:
443
- case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit:
444
- return 2;
445
- default: return 1;
446
- }
447
- }
448
-
449
-
450
- /*
451
- ** state for the compiler
452
- */
453
- typedef struct CompileState {
454
- Pattern *p; /* pattern being compiled */
455
- int ncode; /* next position in p->code to be filled */
456
- lua_State *L;
457
- } CompileState;
458
-
459
-
460
- /*
461
- ** code generation is recursive; 'opt' indicates that the code is being
462
- ** generated as the last thing inside an optional pattern (so, if that
463
- ** code is optional too, it can reuse the 'IChoice' already in place for
464
- ** the outer pattern). 'tt' points to a previous test protecting this
465
- ** code (or NOINST). 'fl' is the follow set of the pattern.
466
- */
467
- static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
468
- const Charset *fl);
469
-
470
-
471
- void realloccode (lua_State *L, Pattern *p, int nsize) {
472
- void *ud;
473
- lua_Alloc f = lua_getallocf(L, &ud);
474
- void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
475
- nsize * sizeof(Instruction));
476
- if (newblock == NULL && nsize > 0)
477
- luaL_error(L, "not enough memory");
478
- p->code = (Instruction *)newblock;
479
- p->codesize = nsize;
480
- }
481
-
482
-
483
- static int nextinstruction (CompileState *compst) {
484
- int size = compst->p->codesize;
485
- if (compst->ncode >= size)
486
- realloccode(compst->L, compst->p, size * 2);
487
- return compst->ncode++;
488
- }
489
-
490
-
491
- #define getinstr(cs,i) ((cs)->p->code[i])
492
-
493
-
494
- static int addinstruction (CompileState *compst, Opcode op, int aux) {
495
- int i = nextinstruction(compst);
496
- getinstr(compst, i).i.code = op;
497
- getinstr(compst, i).i.aux = aux;
498
- return i;
499
- }
500
-
501
-
502
- /*
503
- ** Add an instruction followed by space for an offset (to be set later)
504
- */
505
- static int addoffsetinst (CompileState *compst, Opcode op) {
506
- int i = addinstruction(compst, op, 0); /* instruction */
507
- addinstruction(compst, (Opcode)0, 0); /* open space for offset */
508
- assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2);
509
- return i;
510
- }
511
-
512
-
513
- /*
514
- ** Set the offset of an instruction
515
- */
516
- static void setoffset (CompileState *compst, int instruction, int offset) {
517
- getinstr(compst, instruction + 1).offset = offset;
518
- }
519
-
520
-
521
- /*
522
- ** Add a capture instruction:
523
- ** 'op' is the capture instruction; 'cap' the capture kind;
524
- ** 'key' the key into ktable; 'aux' is the optional capture offset
525
- **
526
- */
527
- static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
528
- int aux) {
529
- int i = addinstruction(compst, op, joinkindoff(cap, aux));
530
- getinstr(compst, i).i.key = key;
531
- return i;
532
- }
533
-
534
-
535
- #define gethere(compst) ((compst)->ncode)
536
-
537
- #define target(code,i) ((i) + code[i + 1].offset)
538
-
539
-
540
- /*
541
- ** Patch 'instruction' to jump to 'target'
542
- */
543
- static void jumptothere (CompileState *compst, int instruction, int target) {
544
- if (instruction >= 0)
545
- setoffset(compst, instruction, target - instruction);
546
- }
547
-
548
-
549
- /*
550
- ** Patch 'instruction' to jump to current position
551
- */
552
- static void jumptohere (CompileState *compst, int instruction) {
553
- jumptothere(compst, instruction, gethere(compst));
554
- }
555
-
556
-
557
- /*
558
- ** Code an IChar instruction, or IAny if there is an equivalent
559
- ** test dominating it
560
- */
561
- static void codechar (CompileState *compst, int c, int tt) {
562
- if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar &&
563
- getinstr(compst, tt).i.aux == c)
564
- addinstruction(compst, IAny, 0);
565
- else
566
- addinstruction(compst, IChar, c);
567
- }
568
-
569
-
570
- /*
571
- ** Add a charset posfix to an instruction
572
- */
573
- static void addcharset (CompileState *compst, const byte *cs) {
574
- int p = gethere(compst);
575
- int i;
576
- for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
577
- nextinstruction(compst); /* space for buffer */
578
- /* fill buffer with charset */
579
- loopset(j, getinstr(compst, p).buff[j] = cs[j]);
580
- }
581
-
582
-
583
- /*
584
- ** code a char set, optimizing unit sets for IChar, "complete"
585
- ** sets for IAny, and empty sets for IFail; also use an IAny
586
- ** when instruction is dominated by an equivalent test.
587
- */
588
- static void codecharset (CompileState *compst, const byte *cs, int tt) {
589
- int c = 0; /* (=) to avoid warnings */
590
- Opcode op = charsettype(cs, &c);
591
- switch (op) {
592
- case IChar: codechar(compst, c, tt); break;
593
- case ISet: { /* non-trivial set? */
594
- if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet &&
595
- cs_equal(cs, getinstr(compst, tt + 2).buff))
596
- addinstruction(compst, IAny, 0);
597
- else {
598
- addinstruction(compst, ISet, 0);
599
- addcharset(compst, cs);
600
- }
601
- break;
602
- }
603
- default: addinstruction(compst, op, c); break;
604
- }
605
- }
606
-
607
-
608
- /*
609
- ** code a test set, optimizing unit sets for ITestChar, "complete"
610
- ** sets for ITestAny, and empty sets for IJmp (always fails).
611
- ** 'e' is true iff test should accept the empty string. (Test
612
- ** instructions in the current VM never accept the empty string.)
613
- */
614
- static int codetestset (CompileState *compst, Charset *cs, int e) {
615
- if (e) return NOINST; /* no test */
616
- else {
617
- int c = 0;
618
- Opcode op = charsettype(cs->cs, &c);
619
- switch (op) {
620
- case IFail: return addoffsetinst(compst, IJmp); /* always jump */
621
- case IAny: return addoffsetinst(compst, ITestAny);
622
- case IChar: {
623
- int i = addoffsetinst(compst, ITestChar);
624
- getinstr(compst, i).i.aux = c;
625
- return i;
626
- }
627
- case ISet: {
628
- int i = addoffsetinst(compst, ITestSet);
629
- addcharset(compst, cs->cs);
630
- return i;
631
- }
632
- default: assert(0); return 0;
633
- }
634
- }
635
- }
636
-
637
-
638
- /*
639
- ** Find the final destination of a sequence of jumps
640
- */
641
- static int finaltarget (Instruction *code, int i) {
642
- while (code[i].i.code == IJmp)
643
- i = target(code, i);
644
- return i;
645
- }
646
-
647
-
648
- /*
649
- ** final label (after traversing any jumps)
650
- */
651
- static int finallabel (Instruction *code, int i) {
652
- return finaltarget(code, target(code, i));
653
- }
654
-
655
-
656
- /*
657
- ** <behind(p)> == behind n; <p> (where n = fixedlen(p))
658
- */
659
- static void codebehind (CompileState *compst, TTree *tree) {
660
- if (tree->u.n > 0)
661
- addinstruction(compst, IBehind, tree->u.n);
662
- codegen(compst, sib1(tree), 0, NOINST, fullset);
663
- }
664
-
665
-
666
- /*
667
- ** Choice; optimizations:
668
- ** - when p1 is headfail or
669
- ** when first(p1) and first(p2) are disjoint, than
670
- ** a character not in first(p1) cannot go to p1, and a character
671
- ** in first(p1) cannot go to p2 (at it is not in first(p2)).
672
- ** (The optimization is not valid if p1 accepts the empty string,
673
- ** as then there is no character at all...)
674
- ** - when p2 is empty and opt is true; a IPartialCommit can reuse
675
- ** the Choice already active in the stack.
676
- */
677
- static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
678
- const Charset *fl) {
679
- int emptyp2 = (p2->tag == TTrue);
680
- Charset cs1, cs2;
681
- int e1 = getfirst(p1, fullset, &cs1);
682
- if (headfail(p1) ||
683
- (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) {
684
- /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
685
- int test = codetestset(compst, &cs1, 0);
686
- int jmp = NOINST;
687
- codegen(compst, p1, 0, test, fl);
688
- if (!emptyp2)
689
- jmp = addoffsetinst(compst, IJmp);
690
- jumptohere(compst, test);
691
- codegen(compst, p2, opt, NOINST, fl);
692
- jumptohere(compst, jmp);
693
- }
694
- else if (opt && emptyp2) {
695
- /* p1? == IPartialCommit; p1 */
696
- jumptohere(compst, addoffsetinst(compst, IPartialCommit));
697
- codegen(compst, p1, 1, NOINST, fullset);
698
- }
699
- else {
700
- /* <p1 / p2> ==
701
- test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
702
- int pcommit;
703
- int test = codetestset(compst, &cs1, e1);
704
- int pchoice = addoffsetinst(compst, IChoice);
705
- codegen(compst, p1, emptyp2, test, fullset);
706
- pcommit = addoffsetinst(compst, ICommit);
707
- jumptohere(compst, pchoice);
708
- jumptohere(compst, test);
709
- codegen(compst, p2, opt, NOINST, fl);
710
- jumptohere(compst, pcommit);
711
- }
712
- }
713
-
714
-
715
- /*
716
- ** And predicate
717
- ** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
718
- ** (valid only when 'p' has no captures)
719
- */
720
- static void codeand (CompileState *compst, TTree *tree, int tt) {
721
- int n = fixedlen(tree);
722
- if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) {
723
- codegen(compst, tree, 0, tt, fullset);
724
- if (n > 0)
725
- addinstruction(compst, IBehind, n);
726
- }
727
- else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
728
- int pcommit;
729
- int pchoice = addoffsetinst(compst, IChoice);
730
- codegen(compst, tree, 0, tt, fullset);
731
- pcommit = addoffsetinst(compst, IBackCommit);
732
- jumptohere(compst, pchoice);
733
- addinstruction(compst, IFail, 0);
734
- jumptohere(compst, pcommit);
735
- }
736
- }
737
-
738
-
739
- /*
740
- ** Captures: if pattern has fixed (and not too big) length, and it
741
- ** has no nested captures, use a single IFullCapture instruction
742
- ** after the match; otherwise, enclose the pattern with OpenCapture -
743
- ** CloseCapture.
744
- */
745
- static void codecapture (CompileState *compst, TTree *tree, int tt,
746
- const Charset *fl) {
747
- int len = fixedlen(sib1(tree));
748
- if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) {
749
- codegen(compst, sib1(tree), 0, tt, fl);
750
- addinstcap(compst, IFullCapture, tree->cap, tree->key, len);
751
- }
752
- else {
753
- addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0);
754
- codegen(compst, sib1(tree), 0, tt, fl);
755
- addinstcap(compst, ICloseCapture, Cclose, 0, 0);
756
- }
757
- }
758
-
759
-
760
- static void coderuntime (CompileState *compst, TTree *tree, int tt) {
761
- addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0);
762
- codegen(compst, sib1(tree), 0, tt, fullset);
763
- addinstcap(compst, ICloseRunTime, Cclose, 0, 0);
764
- }
765
-
766
-
767
- /*
768
- ** Repetion; optimizations:
769
- ** When pattern is a charset, can use special instruction ISpan.
770
- ** When pattern is head fail, or if it starts with characters that
771
- ** are disjoint from what follows the repetions, a simple test
772
- ** is enough (a fail inside the repetition would backtrack to fail
773
- ** again in the following pattern, so there is no need for a choice).
774
- ** When 'opt' is true, the repetion can reuse the Choice already
775
- ** active in the stack.
776
- */
777
- static void coderep (CompileState *compst, TTree *tree, int opt,
778
- const Charset *fl) {
779
- Charset st;
780
- if (tocharset(tree, &st)) {
781
- addinstruction(compst, ISpan, 0);
782
- addcharset(compst, st.cs);
783
- }
784
- else {
785
- int e1 = getfirst(tree, fullset, &st);
786
- if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) {
787
- /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */
788
- int jmp;
789
- int test = codetestset(compst, &st, 0);
790
- codegen(compst, tree, 0, test, fullset);
791
- jmp = addoffsetinst(compst, IJmp);
792
- jumptohere(compst, test);
793
- jumptothere(compst, jmp, test);
794
- }
795
- else {
796
- /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */
797
- /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */
798
- int commit, l2;
799
- int test = codetestset(compst, &st, e1);
800
- int pchoice = NOINST;
801
- if (opt)
802
- jumptohere(compst, addoffsetinst(compst, IPartialCommit));
803
- else
804
- pchoice = addoffsetinst(compst, IChoice);
805
- l2 = gethere(compst);
806
- codegen(compst, tree, 0, NOINST, fullset);
807
- commit = addoffsetinst(compst, IPartialCommit);
808
- jumptothere(compst, commit, l2);
809
- jumptohere(compst, pchoice);
810
- jumptohere(compst, test);
811
- }
812
- }
813
- }
814
-
815
-
816
- /*
817
- ** Not predicate; optimizations:
818
- ** In any case, if first test fails, 'not' succeeds, so it can jump to
819
- ** the end. If pattern is headfail, that is all (it cannot fail
820
- ** in other parts); this case includes 'not' of simple sets. Otherwise,
821
- ** use the default code (a choice plus a failtwice).
822
- */
823
- static void codenot (CompileState *compst, TTree *tree) {
824
- Charset st;
825
- int e = getfirst(tree, fullset, &st);
826
- int test = codetestset(compst, &st, e);
827
- if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */
828
- addinstruction(compst, IFail, 0);
829
- else {
830
- /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1: */
831
- int pchoice = addoffsetinst(compst, IChoice);
832
- codegen(compst, tree, 0, NOINST, fullset);
833
- addinstruction(compst, IFailTwice, 0);
834
- jumptohere(compst, pchoice);
835
- }
836
- jumptohere(compst, test);
837
- }
838
-
839
-
840
- /*
841
- ** change open calls to calls, using list 'positions' to find
842
- ** correct offsets; also optimize tail calls
843
- */
844
- static void correctcalls (CompileState *compst, int *positions,
845
- int from, int to) {
846
- int i;
847
- Instruction *code = compst->p->code;
848
- for (i = from; i < to; i += sizei(&code[i])) {
849
- if (code[i].i.code == IOpenCall) {
850
- int n = code[i].i.key; /* rule number */
851
- int rule = positions[n]; /* rule position */
852
- assert(rule == from || code[rule - 1].i.code == IRet);
853
- if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */
854
- code[i].i.code = IJmp; /* tail call */
855
- else
856
- code[i].i.code = ICall;
857
- jumptothere(compst, i, rule); /* call jumps to respective rule */
858
- }
859
- }
860
- assert(i == to);
861
- }
862
-
863
-
864
- /*
865
- ** Code for a grammar:
866
- ** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
867
- */
868
- static void codegrammar (CompileState *compst, TTree *grammar) {
869
- int positions[MAXRULES];
870
- int rulenumber = 0;
871
- TTree *rule;
872
- int firstcall = addoffsetinst(compst, ICall); /* call initial rule */
873
- int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */
874
- int start = gethere(compst); /* here starts the initial rule */
875
- jumptohere(compst, firstcall);
876
- for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
877
- positions[rulenumber++] = gethere(compst); /* save rule position */
878
- codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */
879
- addinstruction(compst, IRet, 0);
880
- }
881
- assert(rule->tag == TTrue);
882
- jumptohere(compst, jumptoend);
883
- correctcalls(compst, positions, start, gethere(compst));
884
- }
885
-
886
-
887
- static void codecall (CompileState *compst, TTree *call) {
888
- int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */
889
- getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */
890
- assert(sib2(call)->tag == TRule);
891
- }
892
-
893
-
894
- /*
895
- ** Code first child of a sequence
896
- ** (second child is called in-place to allow tail call)
897
- ** Return 'tt' for second child
898
- */
899
- static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
900
- int tt, const Charset *fl) {
901
- if (needfollow(p1)) {
902
- Charset fl1;
903
- getfirst(p2, fl, &fl1); /* p1 follow is p2 first */
904
- codegen(compst, p1, 0, tt, &fl1);
905
- }
906
- else /* use 'fullset' as follow */
907
- codegen(compst, p1, 0, tt, fullset);
908
- if (fixedlen(p1) != 0) /* can 'p1' consume anything? */
909
- return NOINST; /* invalidate test */
910
- else return tt; /* else 'tt' still protects sib2 */
911
- }
912
-
913
-
914
- /*
915
- ** Main code-generation function: dispatch to auxiliar functions
916
- ** according to kind of tree. ('needfollow' should return true
917
- ** only for consructions that use 'fl'.)
918
- */
919
- static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
920
- const Charset *fl) {
921
- tailcall:
922
- switch (tree->tag) {
923
- case TChar: codechar(compst, tree->u.n, tt); break;
924
- case TAny: addinstruction(compst, IAny, 0); break;
925
- case TSet: codecharset(compst, treebuffer(tree), tt); break;
926
- case TTrue: break;
927
- case TFalse: addinstruction(compst, IFail, 0); break;
928
- case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
929
- case TRep: coderep(compst, sib1(tree), opt, fl); break;
930
- case TBehind: codebehind(compst, tree); break;
931
- case TNot: codenot(compst, sib1(tree)); break;
932
- case TAnd: codeand(compst, sib1(tree), tt); break;
933
- case TCapture: codecapture(compst, tree, tt, fl); break;
934
- case TRunTime: coderuntime(compst, tree, tt); break;
935
- case TGrammar: codegrammar(compst, tree); break;
936
- case TCall: codecall(compst, tree); break;
937
- case TSeq: {
938
- tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */
939
- /* codegen(compst, p2, opt, tt, fl); */
940
- tree = sib2(tree); goto tailcall;
941
- }
942
- default: assert(0);
943
- }
944
- }
945
-
946
-
947
- /*
948
- ** Optimize jumps and other jump-like instructions.
949
- ** * Update labels of instructions with labels to their final
950
- ** destinations (e.g., choice L1; ... L1: jmp L2: becomes
951
- ** choice L2)
952
- ** * Jumps to other instructions that do jumps become those
953
- ** instructions (e.g., jump to return becomes a return; jump
954
- ** to commit becomes a commit)
955
- */
956
- static void peephole (CompileState *compst) {
957
- Instruction *code = compst->p->code;
958
- int i;
959
- for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
960
- redo:
961
- switch (code[i].i.code) {
962
- case IChoice: case ICall: case ICommit: case IPartialCommit:
963
- case IBackCommit: case ITestChar: case ITestSet:
964
- case ITestAny: { /* instructions with labels */
965
- jumptothere(compst, i, finallabel(code, i)); /* optimize label */
966
- break;
967
- }
968
- case IJmp: {
969
- int ft = finaltarget(code, i);
970
- switch (code[ft].i.code) { /* jumping to what? */
971
- case IRet: case IFail: case IFailTwice:
972
- case IEnd: { /* instructions with unconditional implicit jumps */
973
- code[i] = code[ft]; /* jump becomes that instruction */
974
- code[i + 1].i.code = IAny; /* 'no-op' for target position */
975
- break;
976
- }
977
- case ICommit: case IPartialCommit:
978
- case IBackCommit: { /* inst. with unconditional explicit jumps */
979
- int fft = finallabel(code, ft);
980
- code[i] = code[ft]; /* jump becomes that instruction... */
981
- jumptothere(compst, i, fft); /* but must correct its offset */
982
- goto redo; /* reoptimize its label */
983
- }
984
- default: {
985
- jumptothere(compst, i, ft); /* optimize label */
986
- break;
987
- }
988
- }
989
- break;
990
- }
991
- default: break;
992
- }
993
- }
994
- assert(code[i - 1].i.code == IEnd);
995
- }
996
-
997
-
998
- /*
999
- ** Compile a pattern
1000
- */
1001
- Instruction *compile (lua_State *L, Pattern *p) {
1002
- CompileState compst;
1003
- compst.p = p; compst.ncode = 0; compst.L = L;
1004
- realloccode(L, p, 2); /* minimum initial size */
1005
- codegen(&compst, p->tree, 0, NOINST, fullset);
1006
- addinstruction(&compst, IEnd, 0);
1007
- realloccode(L, p, compst.ncode); /* set final size */
1008
- peephole(&compst);
1009
- return p->code;
1010
- }
1011
-
1012
-
1013
- /* }====================================================== */
1014
-