immunio 0.15.4 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (454) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +0 -27
  3. data/ext/immunio/Rakefile +9 -0
  4. data/lib/immunio/plugins/active_record.rb +1 -1
  5. data/lib/immunio/plugins/active_record_relation.rb +1 -1
  6. data/lib/immunio/plugins/environment_reporter.rb +20 -0
  7. data/lib/immunio/rufus_lua_ext/ref.rb +1 -3
  8. data/lib/immunio/version.rb +1 -1
  9. data/lib/immunio/vm.rb +1 -2
  10. data/lua-hooks/Makefile +97 -0
  11. data/lua-hooks/ext/all.c +41 -52
  12. data/lua-hooks/ext/all.o +0 -0
  13. data/lua-hooks/ext/libinjection/libinjection_html5.o +0 -0
  14. data/lua-hooks/ext/libinjection/libinjection_sqli.o +0 -0
  15. data/lua-hooks/ext/libinjection/libinjection_xss.o +0 -0
  16. data/lua-hooks/ext/libinjection/lualib.c +2 -2
  17. data/lua-hooks/ext/lpeg/lpcap.c +2 -2
  18. data/lua-hooks/ext/lpeg/lpcap.o +0 -0
  19. data/lua-hooks/ext/lpeg/lpcode.c +2 -2
  20. data/lua-hooks/ext/lpeg/lpcode.h +1 -1
  21. data/lua-hooks/ext/lpeg/lpcode.o +0 -0
  22. data/lua-hooks/ext/lpeg/lpprint.o +0 -0
  23. data/lua-hooks/ext/lpeg/lptree.c +2 -2
  24. data/lua-hooks/ext/lpeg/lptypes.h +1 -1
  25. data/lua-hooks/ext/lpeg/lpvm.c +2 -2
  26. data/lua-hooks/ext/lpeg/lpvm.o +0 -0
  27. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +16 -3
  28. data/lua-hooks/ext/lua-snapshot/snapshot.c +14 -7
  29. data/lua-hooks/ext/luajit/COPYRIGHT +56 -0
  30. data/lua-hooks/ext/luajit/Makefile +159 -0
  31. data/lua-hooks/ext/luajit/README +16 -0
  32. data/lua-hooks/ext/luajit/doc/bluequad-print.css +166 -0
  33. data/lua-hooks/ext/luajit/doc/bluequad.css +325 -0
  34. data/lua-hooks/ext/luajit/doc/changes.html +804 -0
  35. data/lua-hooks/ext/luajit/doc/contact.html +104 -0
  36. data/lua-hooks/ext/luajit/doc/ext_c_api.html +189 -0
  37. data/lua-hooks/ext/luajit/doc/ext_ffi.html +332 -0
  38. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +570 -0
  39. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +1261 -0
  40. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +603 -0
  41. data/lua-hooks/ext/luajit/doc/ext_jit.html +201 -0
  42. data/lua-hooks/ext/luajit/doc/ext_profiler.html +365 -0
  43. data/lua-hooks/ext/luajit/doc/extensions.html +448 -0
  44. data/lua-hooks/ext/luajit/doc/faq.html +186 -0
  45. data/lua-hooks/ext/luajit/doc/img/contact.png +0 -0
  46. data/lua-hooks/ext/luajit/doc/install.html +659 -0
  47. data/lua-hooks/ext/luajit/doc/luajit.html +236 -0
  48. data/lua-hooks/ext/luajit/doc/running.html +309 -0
  49. data/lua-hooks/ext/luajit/doc/status.html +118 -0
  50. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +456 -0
  51. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1125 -0
  52. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +518 -0
  53. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1166 -0
  54. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +416 -0
  55. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +953 -0
  56. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +419 -0
  57. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1919 -0
  58. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +83 -0
  59. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +12 -0
  60. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +471 -0
  61. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +1945 -0
  62. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +1094 -0
  63. data/lua-hooks/ext/luajit/etc/luajit.1 +88 -0
  64. data/lua-hooks/ext/luajit/etc/luajit.pc +25 -0
  65. data/lua-hooks/ext/luajit/src/Makefile +697 -0
  66. data/lua-hooks/ext/luajit/src/Makefile.dep +244 -0
  67. data/lua-hooks/ext/luajit/src/host/README +4 -0
  68. data/lua-hooks/ext/luajit/src/host/buildvm +0 -0
  69. data/lua-hooks/ext/luajit/src/host/buildvm.c +518 -0
  70. data/lua-hooks/ext/luajit/src/host/buildvm.h +105 -0
  71. data/lua-hooks/ext/luajit/src/host/buildvm.o +0 -0
  72. data/lua-hooks/ext/luajit/src/host/buildvm_arch.h +7449 -0
  73. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +345 -0
  74. data/lua-hooks/ext/luajit/src/host/buildvm_asm.o +0 -0
  75. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +229 -0
  76. data/lua-hooks/ext/luajit/src/host/buildvm_fold.o +0 -0
  77. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +457 -0
  78. data/lua-hooks/ext/luajit/src/host/buildvm_lib.o +0 -0
  79. data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +45 -0
  80. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +368 -0
  81. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.o +0 -0
  82. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +197 -0
  83. data/lua-hooks/ext/luajit/src/host/genminilua.lua +428 -0
  84. data/lua-hooks/ext/luajit/src/host/minilua +0 -0
  85. data/lua-hooks/ext/luajit/src/host/minilua.c +7770 -0
  86. data/lua-hooks/ext/luajit/src/host/minilua.o +0 -0
  87. data/lua-hooks/ext/luajit/src/jit/bc.lua +190 -0
  88. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +661 -0
  89. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +689 -0
  90. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +428 -0
  91. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +17 -0
  92. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +591 -0
  93. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +17 -0
  94. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +838 -0
  95. data/lua-hooks/ext/luajit/src/jit/dump.lua +706 -0
  96. data/lua-hooks/ext/luajit/src/jit/p.lua +310 -0
  97. data/lua-hooks/ext/luajit/src/jit/v.lua +170 -0
  98. data/lua-hooks/ext/luajit/src/jit/vmdef.lua +362 -0
  99. data/lua-hooks/ext/luajit/src/jit/zone.lua +45 -0
  100. data/lua-hooks/ext/{lua → luajit/src}/lauxlib.h +10 -17
  101. data/lua-hooks/ext/luajit/src/lib_aux.c +356 -0
  102. data/lua-hooks/ext/luajit/src/lib_aux.o +0 -0
  103. data/lua-hooks/ext/luajit/src/lib_aux_dyn.o +0 -0
  104. data/lua-hooks/ext/luajit/src/lib_base.c +664 -0
  105. data/lua-hooks/ext/luajit/src/lib_base.o +0 -0
  106. data/lua-hooks/ext/luajit/src/lib_base_dyn.o +0 -0
  107. data/lua-hooks/ext/luajit/src/lib_bit.c +180 -0
  108. data/lua-hooks/ext/luajit/src/lib_bit.o +0 -0
  109. data/lua-hooks/ext/luajit/src/lib_bit_dyn.o +0 -0
  110. data/lua-hooks/ext/luajit/src/lib_debug.c +405 -0
  111. data/lua-hooks/ext/luajit/src/lib_debug.o +0 -0
  112. data/lua-hooks/ext/luajit/src/lib_debug_dyn.o +0 -0
  113. data/lua-hooks/ext/luajit/src/lib_ffi.c +872 -0
  114. data/lua-hooks/ext/luajit/src/lib_ffi.o +0 -0
  115. data/lua-hooks/ext/luajit/src/lib_ffi_dyn.o +0 -0
  116. data/lua-hooks/ext/luajit/src/lib_init.c +55 -0
  117. data/lua-hooks/ext/luajit/src/lib_init.o +0 -0
  118. data/lua-hooks/ext/luajit/src/lib_init_dyn.o +0 -0
  119. data/lua-hooks/ext/luajit/src/lib_io.c +541 -0
  120. data/lua-hooks/ext/luajit/src/lib_io.o +0 -0
  121. data/lua-hooks/ext/luajit/src/lib_io_dyn.o +0 -0
  122. data/lua-hooks/ext/luajit/src/lib_jit.c +767 -0
  123. data/lua-hooks/ext/luajit/src/lib_jit.o +0 -0
  124. data/lua-hooks/ext/luajit/src/lib_jit_dyn.o +0 -0
  125. data/lua-hooks/ext/luajit/src/lib_math.c +230 -0
  126. data/lua-hooks/ext/luajit/src/lib_math.o +0 -0
  127. data/lua-hooks/ext/luajit/src/lib_math_dyn.o +0 -0
  128. data/lua-hooks/ext/luajit/src/lib_os.c +292 -0
  129. data/lua-hooks/ext/luajit/src/lib_os.o +0 -0
  130. data/lua-hooks/ext/luajit/src/lib_os_dyn.o +0 -0
  131. data/lua-hooks/ext/luajit/src/lib_package.c +610 -0
  132. data/lua-hooks/ext/luajit/src/lib_package.o +0 -0
  133. data/lua-hooks/ext/luajit/src/lib_package_dyn.o +0 -0
  134. data/lua-hooks/ext/luajit/src/lib_string.c +752 -0
  135. data/lua-hooks/ext/luajit/src/lib_string.o +0 -0
  136. data/lua-hooks/ext/luajit/src/lib_string_dyn.o +0 -0
  137. data/lua-hooks/ext/luajit/src/lib_table.c +307 -0
  138. data/lua-hooks/ext/luajit/src/lib_table.o +0 -0
  139. data/lua-hooks/ext/luajit/src/lib_table_dyn.o +0 -0
  140. data/lua-hooks/ext/luajit/src/libluajit.a +0 -0
  141. data/lua-hooks/ext/luajit/src/libluajit.so +0 -0
  142. data/lua-hooks/ext/luajit/src/lj.supp +26 -0
  143. data/lua-hooks/ext/luajit/src/lj_alloc.c +1398 -0
  144. data/lua-hooks/ext/luajit/src/lj_alloc.h +17 -0
  145. data/lua-hooks/ext/luajit/src/lj_alloc.o +0 -0
  146. data/lua-hooks/ext/luajit/src/lj_alloc_dyn.o +0 -0
  147. data/lua-hooks/ext/luajit/src/lj_api.c +1210 -0
  148. data/lua-hooks/ext/luajit/src/lj_api.o +0 -0
  149. data/lua-hooks/ext/luajit/src/lj_api_dyn.o +0 -0
  150. data/lua-hooks/ext/luajit/src/lj_arch.h +509 -0
  151. data/lua-hooks/ext/luajit/src/lj_asm.c +2278 -0
  152. data/lua-hooks/ext/luajit/src/lj_asm.h +17 -0
  153. data/lua-hooks/ext/luajit/src/lj_asm.o +0 -0
  154. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +2217 -0
  155. data/lua-hooks/ext/luajit/src/lj_asm_dyn.o +0 -0
  156. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +1833 -0
  157. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2015 -0
  158. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2634 -0
  159. data/lua-hooks/ext/luajit/src/lj_bc.c +14 -0
  160. data/lua-hooks/ext/luajit/src/lj_bc.h +265 -0
  161. data/lua-hooks/ext/luajit/src/lj_bc.o +0 -0
  162. data/lua-hooks/ext/luajit/src/lj_bc_dyn.o +0 -0
  163. data/lua-hooks/ext/luajit/src/lj_bcdef.h +220 -0
  164. data/lua-hooks/ext/luajit/src/lj_bcdump.h +68 -0
  165. data/lua-hooks/ext/luajit/src/lj_bcread.c +457 -0
  166. data/lua-hooks/ext/luajit/src/lj_bcread.o +0 -0
  167. data/lua-hooks/ext/luajit/src/lj_bcread_dyn.o +0 -0
  168. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +361 -0
  169. data/lua-hooks/ext/luajit/src/lj_bcwrite.o +0 -0
  170. data/lua-hooks/ext/luajit/src/lj_bcwrite_dyn.o +0 -0
  171. data/lua-hooks/ext/luajit/src/lj_buf.c +234 -0
  172. data/lua-hooks/ext/luajit/src/lj_buf.h +105 -0
  173. data/lua-hooks/ext/luajit/src/lj_buf.o +0 -0
  174. data/lua-hooks/ext/luajit/src/lj_buf_dyn.o +0 -0
  175. data/lua-hooks/ext/luajit/src/lj_carith.c +429 -0
  176. data/lua-hooks/ext/luajit/src/lj_carith.h +37 -0
  177. data/lua-hooks/ext/luajit/src/lj_carith.o +0 -0
  178. data/lua-hooks/ext/luajit/src/lj_carith_dyn.o +0 -0
  179. data/lua-hooks/ext/luajit/src/lj_ccall.c +984 -0
  180. data/lua-hooks/ext/luajit/src/lj_ccall.h +178 -0
  181. data/lua-hooks/ext/luajit/src/lj_ccall.o +0 -0
  182. data/lua-hooks/ext/luajit/src/lj_ccall_dyn.o +0 -0
  183. data/lua-hooks/ext/luajit/src/lj_ccallback.c +712 -0
  184. data/lua-hooks/ext/luajit/src/lj_ccallback.h +25 -0
  185. data/lua-hooks/ext/luajit/src/lj_ccallback.o +0 -0
  186. data/lua-hooks/ext/luajit/src/lj_ccallback_dyn.o +0 -0
  187. data/lua-hooks/ext/luajit/src/lj_cconv.c +752 -0
  188. data/lua-hooks/ext/luajit/src/lj_cconv.h +70 -0
  189. data/lua-hooks/ext/luajit/src/lj_cconv.o +0 -0
  190. data/lua-hooks/ext/luajit/src/lj_cconv_dyn.o +0 -0
  191. data/lua-hooks/ext/luajit/src/lj_cdata.c +288 -0
  192. data/lua-hooks/ext/luajit/src/lj_cdata.h +76 -0
  193. data/lua-hooks/ext/luajit/src/lj_cdata.o +0 -0
  194. data/lua-hooks/ext/luajit/src/lj_cdata_dyn.o +0 -0
  195. data/lua-hooks/ext/luajit/src/lj_char.c +43 -0
  196. data/lua-hooks/ext/luajit/src/lj_char.h +42 -0
  197. data/lua-hooks/ext/luajit/src/lj_char.o +0 -0
  198. data/lua-hooks/ext/luajit/src/lj_char_dyn.o +0 -0
  199. data/lua-hooks/ext/luajit/src/lj_clib.c +418 -0
  200. data/lua-hooks/ext/luajit/src/lj_clib.h +29 -0
  201. data/lua-hooks/ext/luajit/src/lj_clib.o +0 -0
  202. data/lua-hooks/ext/luajit/src/lj_clib_dyn.o +0 -0
  203. data/lua-hooks/ext/luajit/src/lj_cparse.c +1862 -0
  204. data/lua-hooks/ext/luajit/src/lj_cparse.h +65 -0
  205. data/lua-hooks/ext/luajit/src/lj_cparse.o +0 -0
  206. data/lua-hooks/ext/luajit/src/lj_cparse_dyn.o +0 -0
  207. data/lua-hooks/ext/luajit/src/lj_crecord.c +1834 -0
  208. data/lua-hooks/ext/luajit/src/lj_crecord.h +38 -0
  209. data/lua-hooks/ext/luajit/src/lj_crecord.o +0 -0
  210. data/lua-hooks/ext/luajit/src/lj_crecord_dyn.o +0 -0
  211. data/lua-hooks/ext/luajit/src/lj_ctype.c +635 -0
  212. data/lua-hooks/ext/luajit/src/lj_ctype.h +461 -0
  213. data/lua-hooks/ext/luajit/src/lj_ctype.o +0 -0
  214. data/lua-hooks/ext/luajit/src/lj_ctype_dyn.o +0 -0
  215. data/lua-hooks/ext/luajit/src/lj_debug.c +699 -0
  216. data/lua-hooks/ext/luajit/src/lj_debug.h +65 -0
  217. data/lua-hooks/ext/luajit/src/lj_debug.o +0 -0
  218. data/lua-hooks/ext/luajit/src/lj_debug_dyn.o +0 -0
  219. data/lua-hooks/ext/luajit/src/lj_def.h +365 -0
  220. data/lua-hooks/ext/luajit/src/lj_dispatch.c +557 -0
  221. data/lua-hooks/ext/luajit/src/lj_dispatch.h +138 -0
  222. data/lua-hooks/ext/luajit/src/lj_dispatch.o +0 -0
  223. data/lua-hooks/ext/luajit/src/lj_dispatch_dyn.o +0 -0
  224. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +356 -0
  225. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +211 -0
  226. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +238 -0
  227. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +462 -0
  228. data/lua-hooks/ext/luajit/src/lj_err.c +794 -0
  229. data/lua-hooks/ext/luajit/src/lj_err.h +41 -0
  230. data/lua-hooks/ext/luajit/src/lj_err.o +0 -0
  231. data/lua-hooks/ext/luajit/src/lj_err_dyn.o +0 -0
  232. data/lua-hooks/ext/luajit/src/lj_errmsg.h +190 -0
  233. data/lua-hooks/ext/luajit/src/lj_ff.h +18 -0
  234. data/lua-hooks/ext/luajit/src/lj_ffdef.h +209 -0
  235. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +1247 -0
  236. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +24 -0
  237. data/lua-hooks/ext/luajit/src/lj_ffrecord.o +0 -0
  238. data/lua-hooks/ext/luajit/src/lj_ffrecord_dyn.o +0 -0
  239. data/lua-hooks/ext/luajit/src/lj_folddef.h +1138 -0
  240. data/lua-hooks/ext/luajit/src/lj_frame.h +259 -0
  241. data/lua-hooks/ext/luajit/src/lj_func.c +185 -0
  242. data/lua-hooks/ext/luajit/src/lj_func.h +24 -0
  243. data/lua-hooks/ext/luajit/src/lj_func.o +0 -0
  244. data/lua-hooks/ext/luajit/src/lj_func_dyn.o +0 -0
  245. data/lua-hooks/ext/luajit/src/lj_gc.c +845 -0
  246. data/lua-hooks/ext/luajit/src/lj_gc.h +134 -0
  247. data/lua-hooks/ext/luajit/src/lj_gc.o +0 -0
  248. data/lua-hooks/ext/luajit/src/lj_gc_dyn.o +0 -0
  249. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +787 -0
  250. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +22 -0
  251. data/lua-hooks/ext/luajit/src/lj_gdbjit.o +0 -0
  252. data/lua-hooks/ext/luajit/src/lj_gdbjit_dyn.o +0 -0
  253. data/lua-hooks/ext/luajit/src/lj_ir.c +505 -0
  254. data/lua-hooks/ext/luajit/src/lj_ir.h +577 -0
  255. data/lua-hooks/ext/luajit/src/lj_ir.o +0 -0
  256. data/lua-hooks/ext/luajit/src/lj_ir_dyn.o +0 -0
  257. data/lua-hooks/ext/luajit/src/lj_ircall.h +321 -0
  258. data/lua-hooks/ext/luajit/src/lj_iropt.h +161 -0
  259. data/lua-hooks/ext/luajit/src/lj_jit.h +440 -0
  260. data/lua-hooks/ext/luajit/src/lj_lex.c +482 -0
  261. data/lua-hooks/ext/luajit/src/lj_lex.h +86 -0
  262. data/lua-hooks/ext/luajit/src/lj_lex.o +0 -0
  263. data/lua-hooks/ext/luajit/src/lj_lex_dyn.o +0 -0
  264. data/lua-hooks/ext/luajit/src/lj_lib.c +303 -0
  265. data/lua-hooks/ext/luajit/src/lj_lib.h +115 -0
  266. data/lua-hooks/ext/luajit/src/lj_lib.o +0 -0
  267. data/lua-hooks/ext/luajit/src/lj_lib_dyn.o +0 -0
  268. data/lua-hooks/ext/luajit/src/lj_libdef.h +414 -0
  269. data/lua-hooks/ext/luajit/src/lj_load.c +168 -0
  270. data/lua-hooks/ext/luajit/src/lj_load.o +0 -0
  271. data/lua-hooks/ext/luajit/src/lj_load_dyn.o +0 -0
  272. data/lua-hooks/ext/luajit/src/lj_mcode.c +386 -0
  273. data/lua-hooks/ext/luajit/src/lj_mcode.h +30 -0
  274. data/lua-hooks/ext/luajit/src/lj_mcode.o +0 -0
  275. data/lua-hooks/ext/luajit/src/lj_mcode_dyn.o +0 -0
  276. data/lua-hooks/ext/luajit/src/lj_meta.c +477 -0
  277. data/lua-hooks/ext/luajit/src/lj_meta.h +38 -0
  278. data/lua-hooks/ext/luajit/src/lj_meta.o +0 -0
  279. data/lua-hooks/ext/luajit/src/lj_meta_dyn.o +0 -0
  280. data/lua-hooks/ext/luajit/src/lj_obj.c +50 -0
  281. data/lua-hooks/ext/luajit/src/lj_obj.h +976 -0
  282. data/lua-hooks/ext/luajit/src/lj_obj.o +0 -0
  283. data/lua-hooks/ext/luajit/src/lj_obj_dyn.o +0 -0
  284. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +78 -0
  285. data/lua-hooks/ext/luajit/src/lj_opt_dce.o +0 -0
  286. data/lua-hooks/ext/luajit/src/lj_opt_dce_dyn.o +0 -0
  287. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +2488 -0
  288. data/lua-hooks/ext/luajit/src/lj_opt_fold.o +0 -0
  289. data/lua-hooks/ext/luajit/src/lj_opt_fold_dyn.o +0 -0
  290. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +449 -0
  291. data/lua-hooks/ext/luajit/src/lj_opt_loop.o +0 -0
  292. data/lua-hooks/ext/luajit/src/lj_opt_loop_dyn.o +0 -0
  293. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +935 -0
  294. data/lua-hooks/ext/luajit/src/lj_opt_mem.o +0 -0
  295. data/lua-hooks/ext/luajit/src/lj_opt_mem_dyn.o +0 -0
  296. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +652 -0
  297. data/lua-hooks/ext/luajit/src/lj_opt_narrow.o +0 -0
  298. data/lua-hooks/ext/luajit/src/lj_opt_narrow_dyn.o +0 -0
  299. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +245 -0
  300. data/lua-hooks/ext/luajit/src/lj_opt_sink.o +0 -0
  301. data/lua-hooks/ext/luajit/src/lj_opt_sink_dyn.o +0 -0
  302. data/lua-hooks/ext/luajit/src/lj_opt_split.c +856 -0
  303. data/lua-hooks/ext/luajit/src/lj_opt_split.o +0 -0
  304. data/lua-hooks/ext/luajit/src/lj_opt_split_dyn.o +0 -0
  305. data/lua-hooks/ext/luajit/src/lj_parse.c +2725 -0
  306. data/lua-hooks/ext/luajit/src/lj_parse.h +18 -0
  307. data/lua-hooks/ext/luajit/src/lj_parse.o +0 -0
  308. data/lua-hooks/ext/luajit/src/lj_parse_dyn.o +0 -0
  309. data/lua-hooks/ext/luajit/src/lj_profile.c +368 -0
  310. data/lua-hooks/ext/luajit/src/lj_profile.h +21 -0
  311. data/lua-hooks/ext/luajit/src/lj_profile.o +0 -0
  312. data/lua-hooks/ext/luajit/src/lj_profile_dyn.o +0 -0
  313. data/lua-hooks/ext/luajit/src/lj_recdef.h +270 -0
  314. data/lua-hooks/ext/luajit/src/lj_record.c +2554 -0
  315. data/lua-hooks/ext/luajit/src/lj_record.h +45 -0
  316. data/lua-hooks/ext/luajit/src/lj_record.o +0 -0
  317. data/lua-hooks/ext/luajit/src/lj_record_dyn.o +0 -0
  318. data/lua-hooks/ext/luajit/src/lj_snap.c +870 -0
  319. data/lua-hooks/ext/luajit/src/lj_snap.h +34 -0
  320. data/lua-hooks/ext/luajit/src/lj_snap.o +0 -0
  321. data/lua-hooks/ext/luajit/src/lj_snap_dyn.o +0 -0
  322. data/lua-hooks/ext/luajit/src/lj_state.c +300 -0
  323. data/lua-hooks/ext/luajit/src/lj_state.h +35 -0
  324. data/lua-hooks/ext/luajit/src/lj_state.o +0 -0
  325. data/lua-hooks/ext/luajit/src/lj_state_dyn.o +0 -0
  326. data/lua-hooks/ext/luajit/src/lj_str.c +197 -0
  327. data/lua-hooks/ext/luajit/src/lj_str.h +27 -0
  328. data/lua-hooks/ext/luajit/src/lj_str.o +0 -0
  329. data/lua-hooks/ext/luajit/src/lj_str_dyn.o +0 -0
  330. data/lua-hooks/ext/luajit/src/lj_strfmt.c +554 -0
  331. data/lua-hooks/ext/luajit/src/lj_strfmt.h +125 -0
  332. data/lua-hooks/ext/luajit/src/lj_strfmt.o +0 -0
  333. data/lua-hooks/ext/luajit/src/lj_strfmt_dyn.o +0 -0
  334. data/lua-hooks/ext/luajit/src/lj_strscan.c +547 -0
  335. data/lua-hooks/ext/luajit/src/lj_strscan.h +39 -0
  336. data/lua-hooks/ext/luajit/src/lj_strscan.o +0 -0
  337. data/lua-hooks/ext/luajit/src/lj_strscan_dyn.o +0 -0
  338. data/lua-hooks/ext/luajit/src/lj_tab.c +666 -0
  339. data/lua-hooks/ext/luajit/src/lj_tab.h +73 -0
  340. data/lua-hooks/ext/luajit/src/lj_tab.o +0 -0
  341. data/lua-hooks/ext/luajit/src/lj_tab_dyn.o +0 -0
  342. data/lua-hooks/ext/luajit/src/lj_target.h +164 -0
  343. data/lua-hooks/ext/luajit/src/lj_target_arm.h +270 -0
  344. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +97 -0
  345. data/lua-hooks/ext/luajit/src/lj_target_mips.h +260 -0
  346. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +280 -0
  347. data/lua-hooks/ext/luajit/src/lj_target_x86.h +345 -0
  348. data/lua-hooks/ext/luajit/src/lj_trace.c +859 -0
  349. data/lua-hooks/ext/luajit/src/lj_trace.h +54 -0
  350. data/lua-hooks/ext/luajit/src/lj_trace.o +0 -0
  351. data/lua-hooks/ext/luajit/src/lj_trace_dyn.o +0 -0
  352. data/lua-hooks/ext/luajit/src/lj_traceerr.h +63 -0
  353. data/lua-hooks/ext/luajit/src/lj_udata.c +34 -0
  354. data/lua-hooks/ext/luajit/src/lj_udata.h +14 -0
  355. data/lua-hooks/ext/luajit/src/lj_udata.o +0 -0
  356. data/lua-hooks/ext/luajit/src/lj_udata_dyn.o +0 -0
  357. data/lua-hooks/ext/luajit/src/lj_vm.S +2730 -0
  358. data/lua-hooks/ext/luajit/src/lj_vm.h +114 -0
  359. data/lua-hooks/ext/luajit/src/lj_vm.o +0 -0
  360. data/lua-hooks/ext/luajit/src/lj_vm_dyn.o +0 -0
  361. data/lua-hooks/ext/luajit/src/lj_vmevent.c +58 -0
  362. data/lua-hooks/ext/luajit/src/lj_vmevent.h +59 -0
  363. data/lua-hooks/ext/luajit/src/lj_vmevent.o +0 -0
  364. data/lua-hooks/ext/luajit/src/lj_vmevent_dyn.o +0 -0
  365. data/lua-hooks/ext/luajit/src/lj_vmmath.c +152 -0
  366. data/lua-hooks/ext/luajit/src/lj_vmmath.o +0 -0
  367. data/lua-hooks/ext/luajit/src/lj_vmmath_dyn.o +0 -0
  368. data/lua-hooks/ext/luajit/src/ljamalg.c +96 -0
  369. data/lua-hooks/ext/{lua → luajit/src}/lua.h +12 -7
  370. data/lua-hooks/ext/luajit/src/lua.hpp +9 -0
  371. data/lua-hooks/ext/luajit/src/luaconf.h +156 -0
  372. data/lua-hooks/ext/luajit/src/luajit +0 -0
  373. data/lua-hooks/ext/luajit/src/luajit.c +570 -0
  374. data/lua-hooks/ext/luajit/src/luajit.h +79 -0
  375. data/lua-hooks/ext/luajit/src/luajit.o +0 -0
  376. data/lua-hooks/ext/luajit/src/lualib.h +43 -0
  377. data/lua-hooks/ext/luajit/src/msvcbuild.bat +114 -0
  378. data/lua-hooks/ext/luajit/src/ps4build.bat +103 -0
  379. data/lua-hooks/ext/luajit/src/psvitabuild.bat +93 -0
  380. data/lua-hooks/ext/luajit/src/vm_arm.dasc +4585 -0
  381. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +3764 -0
  382. data/lua-hooks/ext/luajit/src/vm_mips.dasc +4355 -0
  383. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +5252 -0
  384. data/lua-hooks/ext/luajit/src/vm_x64.dasc +4902 -0
  385. data/lua-hooks/ext/luajit/src/vm_x86.dasc +5710 -0
  386. data/lua-hooks/ext/luajit/src/xb1build.bat +101 -0
  387. data/lua-hooks/ext/luajit/src/xedkbuild.bat +92 -0
  388. data/lua-hooks/ext/luautf8/lutf8lib.c +3 -3
  389. data/lua-hooks/lib/boot.lua +37 -2
  390. metadata +372 -69
  391. data/lua-hooks/ext/bitop/README +0 -22
  392. data/lua-hooks/ext/bitop/bit.c +0 -189
  393. data/lua-hooks/ext/extconf.rb +0 -38
  394. data/lua-hooks/ext/lua/COPYRIGHT +0 -34
  395. data/lua-hooks/ext/lua/lapi.c +0 -1087
  396. data/lua-hooks/ext/lua/lapi.h +0 -16
  397. data/lua-hooks/ext/lua/lauxlib.c +0 -652
  398. data/lua-hooks/ext/lua/lbaselib.c +0 -659
  399. data/lua-hooks/ext/lua/lcode.c +0 -831
  400. data/lua-hooks/ext/lua/lcode.h +0 -76
  401. data/lua-hooks/ext/lua/ldblib.c +0 -398
  402. data/lua-hooks/ext/lua/ldebug.c +0 -638
  403. data/lua-hooks/ext/lua/ldebug.h +0 -33
  404. data/lua-hooks/ext/lua/ldo.c +0 -519
  405. data/lua-hooks/ext/lua/ldo.h +0 -57
  406. data/lua-hooks/ext/lua/ldump.c +0 -164
  407. data/lua-hooks/ext/lua/lfunc.c +0 -174
  408. data/lua-hooks/ext/lua/lfunc.h +0 -34
  409. data/lua-hooks/ext/lua/lgc.c +0 -710
  410. data/lua-hooks/ext/lua/lgc.h +0 -110
  411. data/lua-hooks/ext/lua/linit.c +0 -38
  412. data/lua-hooks/ext/lua/liolib.c +0 -556
  413. data/lua-hooks/ext/lua/llex.c +0 -463
  414. data/lua-hooks/ext/lua/llex.h +0 -81
  415. data/lua-hooks/ext/lua/llimits.h +0 -128
  416. data/lua-hooks/ext/lua/lmathlib.c +0 -263
  417. data/lua-hooks/ext/lua/lmem.c +0 -86
  418. data/lua-hooks/ext/lua/lmem.h +0 -49
  419. data/lua-hooks/ext/lua/loadlib.c +0 -705
  420. data/lua-hooks/ext/lua/loadlib_rel.c +0 -760
  421. data/lua-hooks/ext/lua/lobject.c +0 -214
  422. data/lua-hooks/ext/lua/lobject.h +0 -381
  423. data/lua-hooks/ext/lua/lopcodes.c +0 -102
  424. data/lua-hooks/ext/lua/lopcodes.h +0 -268
  425. data/lua-hooks/ext/lua/loslib.c +0 -243
  426. data/lua-hooks/ext/lua/lparser.c +0 -1339
  427. data/lua-hooks/ext/lua/lparser.h +0 -82
  428. data/lua-hooks/ext/lua/lstate.c +0 -214
  429. data/lua-hooks/ext/lua/lstate.h +0 -169
  430. data/lua-hooks/ext/lua/lstring.c +0 -111
  431. data/lua-hooks/ext/lua/lstring.h +0 -31
  432. data/lua-hooks/ext/lua/lstrlib.c +0 -871
  433. data/lua-hooks/ext/lua/ltable.c +0 -588
  434. data/lua-hooks/ext/lua/ltable.h +0 -40
  435. data/lua-hooks/ext/lua/ltablib.c +0 -287
  436. data/lua-hooks/ext/lua/ltm.c +0 -75
  437. data/lua-hooks/ext/lua/ltm.h +0 -54
  438. data/lua-hooks/ext/lua/lua.c +0 -392
  439. data/lua-hooks/ext/lua/lua.def +0 -131
  440. data/lua-hooks/ext/lua/lua.rc +0 -28
  441. data/lua-hooks/ext/lua/lua_dll.rc +0 -26
  442. data/lua-hooks/ext/lua/luac.c +0 -200
  443. data/lua-hooks/ext/lua/luac.rc +0 -1
  444. data/lua-hooks/ext/lua/luaconf.h +0 -763
  445. data/lua-hooks/ext/lua/luaconf.h.in +0 -724
  446. data/lua-hooks/ext/lua/luaconf.h.orig +0 -763
  447. data/lua-hooks/ext/lua/lualib.h +0 -53
  448. data/lua-hooks/ext/lua/lundump.c +0 -227
  449. data/lua-hooks/ext/lua/lundump.h +0 -36
  450. data/lua-hooks/ext/lua/lvm.c +0 -767
  451. data/lua-hooks/ext/lua/lvm.h +0 -36
  452. data/lua-hooks/ext/lua/lzio.c +0 -82
  453. data/lua-hooks/ext/lua/lzio.h +0 -67
  454. data/lua-hooks/ext/lua/print.c +0 -227
@@ -0,0 +1,2278 @@
1
+ /*
2
+ ** IR assembler (SSA IR -> machine code).
3
+ ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4
+ */
5
+
6
+ #define lj_asm_c
7
+ #define LUA_CORE
8
+
9
+ #include "lj_obj.h"
10
+
11
+ #if LJ_HASJIT
12
+
13
+ #include "lj_gc.h"
14
+ #include "lj_str.h"
15
+ #include "lj_tab.h"
16
+ #include "lj_frame.h"
17
+ #if LJ_HASFFI
18
+ #include "lj_ctype.h"
19
+ #endif
20
+ #include "lj_ir.h"
21
+ #include "lj_jit.h"
22
+ #include "lj_ircall.h"
23
+ #include "lj_iropt.h"
24
+ #include "lj_mcode.h"
25
+ #include "lj_iropt.h"
26
+ #include "lj_trace.h"
27
+ #include "lj_snap.h"
28
+ #include "lj_asm.h"
29
+ #include "lj_dispatch.h"
30
+ #include "lj_vm.h"
31
+ #include "lj_target.h"
32
+
33
+ #ifdef LUA_USE_ASSERT
34
+ #include <stdio.h>
35
+ #endif
36
+
37
+ /* -- Assembler state and common macros ----------------------------------- */
38
+
39
+ /* Assembler state. */
40
+ typedef struct ASMState {
41
+ RegCost cost[RID_MAX]; /* Reference and blended allocation cost for regs. */
42
+
43
+ MCode *mcp; /* Current MCode pointer (grows down). */
44
+ MCode *mclim; /* Lower limit for MCode memory + red zone. */
45
+ #ifdef LUA_USE_ASSERT
46
+ MCode *mcp_prev; /* Red zone overflow check. */
47
+ #endif
48
+
49
+ IRIns *ir; /* Copy of pointer to IR instructions/constants. */
50
+ jit_State *J; /* JIT compiler state. */
51
+
52
+ #if LJ_TARGET_X86ORX64
53
+ x86ModRM mrm; /* Fused x86 address operand. */
54
+ #endif
55
+
56
+ RegSet freeset; /* Set of free registers. */
57
+ RegSet modset; /* Set of registers modified inside the loop. */
58
+ RegSet weakset; /* Set of weakly referenced registers. */
59
+ RegSet phiset; /* Set of PHI registers. */
60
+
61
+ uint32_t flags; /* Copy of JIT compiler flags. */
62
+ int loopinv; /* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */
63
+
64
+ int32_t evenspill; /* Next even spill slot. */
65
+ int32_t oddspill; /* Next odd spill slot (or 0). */
66
+
67
+ IRRef curins; /* Reference of current instruction. */
68
+ IRRef stopins; /* Stop assembly before hitting this instruction. */
69
+ IRRef orignins; /* Original T->nins. */
70
+
71
+ IRRef snapref; /* Current snapshot is active after this reference. */
72
+ IRRef snaprename; /* Rename highwater mark for snapshot check. */
73
+ SnapNo snapno; /* Current snapshot number. */
74
+ SnapNo loopsnapno; /* Loop snapshot number. */
75
+
76
+ IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
77
+ IRRef sectref; /* Section base reference (loopref or 0). */
78
+ IRRef loopref; /* Reference of LOOP instruction (or 0). */
79
+
80
+ BCReg topslot; /* Number of slots for stack check (unless 0). */
81
+ int32_t gcsteps; /* Accumulated number of GC steps (per section). */
82
+
83
+ GCtrace *T; /* Trace to assemble. */
84
+ GCtrace *parent; /* Parent trace (or NULL). */
85
+
86
+ MCode *mcbot; /* Bottom of reserved MCode. */
87
+ MCode *mctop; /* Top of generated MCode. */
88
+ MCode *mcloop; /* Pointer to loop MCode (or NULL). */
89
+ MCode *invmcp; /* Points to invertible loop branch (or NULL). */
90
+ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
91
+ MCode *realign; /* Realign loop if not NULL. */
92
+
93
+ #ifdef RID_NUM_KREF
94
+ int32_t krefk[RID_NUM_KREF];
95
+ #endif
96
+ IRRef1 phireg[RID_MAX]; /* PHI register references. */
97
+ uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
98
+ } ASMState;
99
+
100
+ #define IR(ref) (&as->ir[(ref)])
101
+
102
+ #define ASMREF_TMP1 REF_TRUE /* Temp. register. */
103
+ #define ASMREF_TMP2 REF_FALSE /* Temp. register. */
104
+ #define ASMREF_L REF_NIL /* Stores register for L. */
105
+
106
+ /* Check for variant to invariant references. */
107
+ #define iscrossref(as, ref) ((ref) < as->sectref)
108
+
109
+ /* Inhibit memory op fusion from variant to invariant references. */
110
+ #define FUSE_DISABLED (~(IRRef)0)
111
+ #define mayfuse(as, ref) ((ref) > as->fuseref)
112
+ #define neverfuse(as) (as->fuseref == FUSE_DISABLED)
113
+ #define canfuse(as, ir) (!neverfuse(as) && !irt_isphi((ir)->t))
114
+ #define opisfusableload(o) \
115
+ ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \
116
+ (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD)
117
+
118
+ /* Sparse limit checks using a red zone before the actual limit. */
119
+ #define MCLIM_REDZONE 64
120
+
121
+ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
122
+ {
123
+ lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
124
+ }
125
+
126
+ static LJ_AINLINE void checkmclim(ASMState *as)
127
+ {
128
+ #ifdef LUA_USE_ASSERT
129
+ if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
130
+ IRIns *ir = IR(as->curins+1);
131
+ fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp,
132
+ as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
133
+ lua_assert(0);
134
+ }
135
+ #endif
136
+ if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
137
+ #ifdef LUA_USE_ASSERT
138
+ as->mcp_prev = as->mcp;
139
+ #endif
140
+ }
141
+
142
+ #ifdef RID_NUM_KREF
143
+ #define ra_iskref(ref) ((ref) < RID_NUM_KREF)
144
+ #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
145
+ #define ra_krefk(as, ref) (as->krefk[(ref)])
146
+
147
+ static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k)
148
+ {
149
+ IRRef ref = (IRRef)(r - RID_MIN_KREF);
150
+ as->krefk[ref] = k;
151
+ as->cost[r] = REGCOST(ref, ref);
152
+ }
153
+
154
+ #else
155
+ #define ra_iskref(ref) 0
156
+ #define ra_krefreg(ref) RID_MIN_GPR
157
+ #define ra_krefk(as, ref) 0
158
+ #endif
159
+
160
+ /* Arch-specific field offsets. */
161
+ static const uint8_t field_ofs[IRFL__MAX+1] = {
162
+ #define FLOFS(name, ofs) (uint8_t)(ofs),
163
+ IRFLDEF(FLOFS)
164
+ #undef FLOFS
165
+ 0
166
+ };
167
+
168
+ /* -- Target-specific instruction emitter --------------------------------- */
169
+
170
+ #if LJ_TARGET_X86ORX64
171
+ #include "lj_emit_x86.h"
172
+ #elif LJ_TARGET_ARM
173
+ #include "lj_emit_arm.h"
174
+ #elif LJ_TARGET_PPC
175
+ #include "lj_emit_ppc.h"
176
+ #elif LJ_TARGET_MIPS
177
+ #include "lj_emit_mips.h"
178
+ #else
179
+ #error "Missing instruction emitter for target CPU"
180
+ #endif
181
+
182
+ /* Generic load/store of register from/to stack slot. */
183
+ #define emit_spload(as, ir, r, ofs) \
184
+ emit_loadofs(as, ir, (r), RID_SP, (ofs))
185
+ #define emit_spstore(as, ir, r, ofs) \
186
+ emit_storeofs(as, ir, (r), RID_SP, (ofs))
187
+
188
+ /* -- Register allocator debugging ---------------------------------------- */
189
+
190
+ /* #define LUAJIT_DEBUG_RA */
191
+
192
+ #ifdef LUAJIT_DEBUG_RA
193
+
194
+ #include <stdio.h>
195
+ #include <stdarg.h>
196
+
197
+ #define RIDNAME(name) #name,
198
+ static const char *const ra_regname[] = {
199
+ GPRDEF(RIDNAME)
200
+ FPRDEF(RIDNAME)
201
+ VRIDDEF(RIDNAME)
202
+ NULL
203
+ };
204
+ #undef RIDNAME
205
+
206
+ static char ra_dbg_buf[65536];
207
+ static char *ra_dbg_p;
208
+ static char *ra_dbg_merge;
209
+ static MCode *ra_dbg_mcp;
210
+
211
+ static void ra_dstart(void)
212
+ {
213
+ ra_dbg_p = ra_dbg_buf;
214
+ ra_dbg_merge = NULL;
215
+ ra_dbg_mcp = NULL;
216
+ }
217
+
218
+ static void ra_dflush(void)
219
+ {
220
+ fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout);
221
+ ra_dstart();
222
+ }
223
+
224
+ static void ra_dprintf(ASMState *as, const char *fmt, ...)
225
+ {
226
+ char *p;
227
+ va_list argp;
228
+ va_start(argp, fmt);
229
+ p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p;
230
+ ra_dbg_mcp = NULL;
231
+ p += sprintf(p, "%08x \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS);
232
+ for (;;) {
233
+ const char *e = strchr(fmt, '$');
234
+ if (e == NULL) break;
235
+ memcpy(p, fmt, (size_t)(e-fmt));
236
+ p += e-fmt;
237
+ if (e[1] == 'r') {
238
+ Reg r = va_arg(argp, Reg) & RID_MASK;
239
+ if (r <= RID_MAX) {
240
+ const char *q;
241
+ for (q = ra_regname[r]; *q; q++)
242
+ *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
243
+ } else {
244
+ *p++ = '?';
245
+ lua_assert(0);
246
+ }
247
+ } else if (e[1] == 'f' || e[1] == 'i') {
248
+ IRRef ref;
249
+ if (e[1] == 'f')
250
+ ref = va_arg(argp, IRRef);
251
+ else
252
+ ref = va_arg(argp, IRIns *) - as->ir;
253
+ if (ref >= REF_BIAS)
254
+ p += sprintf(p, "%04d", ref - REF_BIAS);
255
+ else
256
+ p += sprintf(p, "K%03d", REF_BIAS - ref);
257
+ } else if (e[1] == 's') {
258
+ uint32_t slot = va_arg(argp, uint32_t);
259
+ p += sprintf(p, "[sp+0x%x]", sps_scale(slot));
260
+ } else if (e[1] == 'x') {
261
+ p += sprintf(p, "%08x", va_arg(argp, int32_t));
262
+ } else {
263
+ lua_assert(0);
264
+ }
265
+ fmt = e+2;
266
+ }
267
+ va_end(argp);
268
+ while (*fmt)
269
+ *p++ = *fmt++;
270
+ *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n';
271
+ if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) {
272
+ fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout);
273
+ p = ra_dbg_buf;
274
+ }
275
+ ra_dbg_p = p;
276
+ }
277
+
278
+ #define RA_DBG_START() ra_dstart()
279
+ #define RA_DBG_FLUSH() ra_dflush()
280
+ #define RA_DBG_REF() \
281
+ do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \
282
+ ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0)
283
+ #define RA_DBGX(x) ra_dprintf x
284
+
285
+ #else
286
+ #define RA_DBG_START() ((void)0)
287
+ #define RA_DBG_FLUSH() ((void)0)
288
+ #define RA_DBG_REF() ((void)0)
289
+ #define RA_DBGX(x) ((void)0)
290
+ #endif
291
+
292
+ /* -- Register allocator -------------------------------------------------- */
293
+
294
+ #define ra_free(as, r) rset_set(as->freeset, (r))
295
+ #define ra_modified(as, r) rset_set(as->modset, (r))
296
+ #define ra_weak(as, r) rset_set(as->weakset, (r))
297
+ #define ra_noweak(as, r) rset_clear(as->weakset, (r))
298
+
299
+ #define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s))
300
+
301
+ /* Setup register allocator. */
302
+ static void ra_setup(ASMState *as)
303
+ {
304
+ Reg r;
305
+ /* Initially all regs (except the stack pointer) are free for use. */
306
+ as->freeset = RSET_INIT;
307
+ as->modset = RSET_EMPTY;
308
+ as->weakset = RSET_EMPTY;
309
+ as->phiset = RSET_EMPTY;
310
+ memset(as->phireg, 0, sizeof(as->phireg));
311
+ for (r = RID_MIN_GPR; r < RID_MAX; r++)
312
+ as->cost[r] = REGCOST(~0u, 0u);
313
+ }
314
+
315
+ /* Rematerialize constants. */
316
+ static Reg ra_rematk(ASMState *as, IRRef ref)
317
+ {
318
+ IRIns *ir;
319
+ Reg r;
320
+ if (ra_iskref(ref)) {
321
+ r = ra_krefreg(ref);
322
+ lua_assert(!rset_test(as->freeset, r));
323
+ ra_free(as, r);
324
+ ra_modified(as, r);
325
+ emit_loadi(as, r, ra_krefk(as, ref));
326
+ return r;
327
+ }
328
+ ir = IR(ref);
329
+ r = ir->r;
330
+ lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
331
+ ra_free(as, r);
332
+ ra_modified(as, r);
333
+ ir->r = RID_INIT; /* Do not keep any hint. */
334
+ RA_DBGX((as, "remat $i $r", ir, r));
335
+ #if !LJ_SOFTFP
336
+ if (ir->o == IR_KNUM) {
337
+ emit_loadn(as, r, ir_knum(ir));
338
+ } else
339
+ #endif
340
+ if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
341
+ ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
342
+ emit_getgl(as, r, jit_base);
343
+ } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
344
+ lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
345
+ emit_getgl(as, r, cur_L);
346
+ #if LJ_64
347
+ } else if (ir->o == IR_KINT64) {
348
+ emit_loadu64(as, r, ir_kint64(ir)->u64);
349
+ #endif
350
+ } else {
351
+ lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
352
+ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
353
+ emit_loadi(as, r, ir->i);
354
+ }
355
+ return r;
356
+ }
357
+
358
+ /* Force a spill. Allocate a new spill slot if needed. */
359
+ static int32_t ra_spill(ASMState *as, IRIns *ir)
360
+ {
361
+ int32_t slot = ir->s;
362
+ lua_assert(ir >= as->ir + REF_TRUE);
363
+ if (!ra_hasspill(slot)) {
364
+ if (irt_is64(ir->t)) {
365
+ slot = as->evenspill;
366
+ as->evenspill += 2;
367
+ } else if (as->oddspill) {
368
+ slot = as->oddspill;
369
+ as->oddspill = 0;
370
+ } else {
371
+ slot = as->evenspill;
372
+ as->oddspill = slot+1;
373
+ as->evenspill += 2;
374
+ }
375
+ if (as->evenspill > 256)
376
+ lj_trace_err(as->J, LJ_TRERR_SPILLOV);
377
+ ir->s = (uint8_t)slot;
378
+ }
379
+ return sps_scale(slot);
380
+ }
381
+
382
+ /* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
383
+ static Reg ra_releasetmp(ASMState *as, IRRef ref)
384
+ {
385
+ IRIns *ir = IR(ref);
386
+ Reg r = ir->r;
387
+ lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
388
+ ra_free(as, r);
389
+ ra_modified(as, r);
390
+ ir->r = RID_INIT;
391
+ return r;
392
+ }
393
+
394
+ /* Restore a register (marked as free). Rematerialize or force a spill. */
395
+ static Reg ra_restore(ASMState *as, IRRef ref)
396
+ {
397
+ if (emit_canremat(ref)) {
398
+ return ra_rematk(as, ref);
399
+ } else {
400
+ IRIns *ir = IR(ref);
401
+ int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */
402
+ Reg r = ir->r;
403
+ lua_assert(ra_hasreg(r));
404
+ ra_sethint(ir->r, r); /* Keep hint. */
405
+ ra_free(as, r);
406
+ if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
407
+ ra_modified(as, r);
408
+ RA_DBGX((as, "restore $i $r", ir, r));
409
+ emit_spload(as, ir, r, ofs);
410
+ }
411
+ return r;
412
+ }
413
+ }
414
+
415
+ /* Save a register to a spill slot. */
416
+ static void ra_save(ASMState *as, IRIns *ir, Reg r)
417
+ {
418
+ RA_DBGX((as, "save $i $r", ir, r));
419
+ emit_spstore(as, ir, r, sps_scale(ir->s));
420
+ }
421
+
422
+ #define MINCOST(name) \
423
+ if (rset_test(RSET_ALL, RID_##name) && \
424
+ LJ_LIKELY(allow&RID2RSET(RID_##name)) && as->cost[RID_##name] < cost) \
425
+ cost = as->cost[RID_##name];
426
+
427
+ /* Evict the register with the lowest cost, forcing a restore. */
428
+ static Reg ra_evict(ASMState *as, RegSet allow)
429
+ {
430
+ IRRef ref;
431
+ RegCost cost = ~(RegCost)0;
432
+ lua_assert(allow != RSET_EMPTY);
433
+ if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
434
+ GPRDEF(MINCOST)
435
+ } else {
436
+ FPRDEF(MINCOST)
437
+ }
438
+ ref = regcost_ref(cost);
439
+ lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins));
440
+ /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
441
+ if (!irref_isk(ref) && (as->weakset & allow)) {
442
+ IRIns *ir = IR(ref);
443
+ if (!rset_test(as->weakset, ir->r))
444
+ ref = regcost_ref(as->cost[rset_pickbot((as->weakset & allow))]);
445
+ }
446
+ return ra_restore(as, ref);
447
+ }
448
+
449
+ /* Pick any register (marked as free). Evict on-demand. */
450
+ static Reg ra_pick(ASMState *as, RegSet allow)
451
+ {
452
+ RegSet pick = as->freeset & allow;
453
+ if (!pick)
454
+ return ra_evict(as, allow);
455
+ else
456
+ return rset_picktop(pick);
457
+ }
458
+
459
+ /* Get a scratch register (marked as free). */
460
+ static Reg ra_scratch(ASMState *as, RegSet allow)
461
+ {
462
+ Reg r = ra_pick(as, allow);
463
+ ra_modified(as, r);
464
+ RA_DBGX((as, "scratch $r", r));
465
+ return r;
466
+ }
467
+
468
+ /* Evict all registers from a set (if not free). */
469
+ static void ra_evictset(ASMState *as, RegSet drop)
470
+ {
471
+ RegSet work;
472
+ as->modset |= drop;
473
+ #if !LJ_SOFTFP
474
+ work = (drop & ~as->freeset) & RSET_FPR;
475
+ while (work) {
476
+ Reg r = rset_pickbot(work);
477
+ ra_restore(as, regcost_ref(as->cost[r]));
478
+ rset_clear(work, r);
479
+ checkmclim(as);
480
+ }
481
+ #endif
482
+ work = (drop & ~as->freeset);
483
+ while (work) {
484
+ Reg r = rset_pickbot(work);
485
+ ra_restore(as, regcost_ref(as->cost[r]));
486
+ rset_clear(work, r);
487
+ checkmclim(as);
488
+ }
489
+ }
490
+
491
+ /* Evict (rematerialize) all registers allocated to constants. */
492
+ static void ra_evictk(ASMState *as)
493
+ {
494
+ RegSet work;
495
+ #if !LJ_SOFTFP
496
+ work = ~as->freeset & RSET_FPR;
497
+ while (work) {
498
+ Reg r = rset_pickbot(work);
499
+ IRRef ref = regcost_ref(as->cost[r]);
500
+ if (emit_canremat(ref) && irref_isk(ref)) {
501
+ ra_rematk(as, ref);
502
+ checkmclim(as);
503
+ }
504
+ rset_clear(work, r);
505
+ }
506
+ #endif
507
+ work = ~as->freeset & RSET_GPR;
508
+ while (work) {
509
+ Reg r = rset_pickbot(work);
510
+ IRRef ref = regcost_ref(as->cost[r]);
511
+ if (emit_canremat(ref) && irref_isk(ref)) {
512
+ ra_rematk(as, ref);
513
+ checkmclim(as);
514
+ }
515
+ rset_clear(work, r);
516
+ }
517
+ }
518
+
519
+ #ifdef RID_NUM_KREF
520
+ /* Allocate a register for a constant. */
521
+ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
522
+ {
523
+ /* First try to find a register which already holds the same constant. */
524
+ RegSet pick, work = ~as->freeset & RSET_GPR;
525
+ Reg r;
526
+ while (work) {
527
+ IRRef ref;
528
+ r = rset_pickbot(work);
529
+ ref = regcost_ref(as->cost[r]);
530
+ if (ref < ASMREF_L &&
531
+ k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
532
+ return r;
533
+ rset_clear(work, r);
534
+ }
535
+ pick = as->freeset & allow;
536
+ if (pick) {
537
+ /* Constants should preferably get unmodified registers. */
538
+ if ((pick & ~as->modset))
539
+ pick &= ~as->modset;
540
+ r = rset_pickbot(pick); /* Reduce conflicts with inverse allocation. */
541
+ } else {
542
+ r = ra_evict(as, allow);
543
+ }
544
+ RA_DBGX((as, "allock $x $r", k, r));
545
+ ra_setkref(as, r, k);
546
+ rset_clear(as->freeset, r);
547
+ ra_noweak(as, r);
548
+ return r;
549
+ }
550
+
551
+ /* Allocate a specific register for a constant. */
552
+ static void ra_allockreg(ASMState *as, int32_t k, Reg r)
553
+ {
554
+ Reg kr = ra_allock(as, k, RID2RSET(r));
555
+ if (kr != r) {
556
+ IRIns irdummy;
557
+ irdummy.t.irt = IRT_INT;
558
+ ra_scratch(as, RID2RSET(r));
559
+ emit_movrr(as, &irdummy, r, kr);
560
+ }
561
+ }
562
+ #else
563
+ #define ra_allockreg(as, k, r) emit_loadi(as, (r), (k))
564
+ #endif
565
+
566
+ /* Allocate a register for ref from the allowed set of registers.
567
+ ** Note: this function assumes the ref does NOT have a register yet!
568
+ ** Picks an optimal register, sets the cost and marks the register as non-free.
569
+ */
570
+ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
571
+ {
572
+ IRIns *ir = IR(ref);
573
+ RegSet pick = as->freeset & allow;
574
+ Reg r;
575
+ lua_assert(ra_noreg(ir->r));
576
+ if (pick) {
577
+ /* First check register hint from propagation or PHI. */
578
+ if (ra_hashint(ir->r)) {
579
+ r = ra_gethint(ir->r);
580
+ if (rset_test(pick, r)) /* Use hint register if possible. */
581
+ goto found;
582
+ /* Rematerialization is cheaper than missing a hint. */
583
+ if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) {
584
+ ra_rematk(as, regcost_ref(as->cost[r]));
585
+ goto found;
586
+ }
587
+ RA_DBGX((as, "hintmiss $f $r", ref, r));
588
+ }
589
+ /* Invariants should preferably get unmodified registers. */
590
+ if (ref < as->loopref && !irt_isphi(ir->t)) {
591
+ if ((pick & ~as->modset))
592
+ pick &= ~as->modset;
593
+ r = rset_pickbot(pick); /* Reduce conflicts with inverse allocation. */
594
+ } else {
595
+ /* We've got plenty of regs, so get callee-save regs if possible. */
596
+ if (RID_NUM_GPR > 8 && (pick & ~RSET_SCRATCH))
597
+ pick &= ~RSET_SCRATCH;
598
+ r = rset_picktop(pick);
599
+ }
600
+ } else {
601
+ r = ra_evict(as, allow);
602
+ }
603
+ found:
604
+ RA_DBGX((as, "alloc $f $r", ref, r));
605
+ ir->r = (uint8_t)r;
606
+ rset_clear(as->freeset, r);
607
+ ra_noweak(as, r);
608
+ as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t));
609
+ return r;
610
+ }
611
+
612
+ /* Allocate a register on-demand. */
613
+ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
614
+ {
615
+ Reg r = IR(ref)->r;
616
+ /* Note: allow is ignored if the register is already allocated. */
617
+ if (ra_noreg(r)) r = ra_allocref(as, ref, allow);
618
+ ra_noweak(as, r);
619
+ return r;
620
+ }
621
+
622
+ /* Rename register allocation and emit move. */
623
+ static void ra_rename(ASMState *as, Reg down, Reg up)
624
+ {
625
+ IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
626
+ IRIns *ir = IR(ref);
627
+ ir->r = (uint8_t)up;
628
+ as->cost[down] = 0;
629
+ lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
630
+ lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up));
631
+ ra_free(as, down); /* 'down' is free ... */
632
+ ra_modified(as, down);
633
+ rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
634
+ ra_noweak(as, up);
635
+ RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
636
+ emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
637
+ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
638
+ lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
639
+ ren = tref_ref(lj_ir_emit(as->J));
640
+ as->ir = as->T->ir; /* The IR may have been reallocated. */
641
+ IR(ren)->r = (uint8_t)down;
642
+ IR(ren)->s = SPS_NONE;
643
+ }
644
+ }
645
+
646
+ /* Pick a destination register (marked as free).
647
+ ** Caveat: allow is ignored if there's already a destination register.
648
+ ** Use ra_destreg() to get a specific register.
649
+ */
650
+ static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow)
651
+ {
652
+ Reg dest = ir->r;
653
+ if (ra_hasreg(dest)) {
654
+ ra_free(as, dest);
655
+ ra_modified(as, dest);
656
+ } else {
657
+ if (ra_hashint(dest) && rset_test((as->freeset&allow), ra_gethint(dest))) {
658
+ dest = ra_gethint(dest);
659
+ ra_modified(as, dest);
660
+ RA_DBGX((as, "dest $r", dest));
661
+ } else {
662
+ dest = ra_scratch(as, allow);
663
+ }
664
+ ir->r = dest;
665
+ }
666
+ if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest);
667
+ return dest;
668
+ }
669
+
670
+ /* Force a specific destination register (marked as free). */
671
+ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
672
+ {
673
+ Reg dest = ra_dest(as, ir, RID2RSET(r));
674
+ if (dest != r) {
675
+ lua_assert(rset_test(as->freeset, r));
676
+ ra_modified(as, r);
677
+ emit_movrr(as, ir, dest, r);
678
+ }
679
+ }
680
+
681
+ #if LJ_TARGET_X86ORX64
682
+ /* Propagate dest register to left reference. Emit moves as needed.
683
+ ** This is a required fixup step for all 2-operand machine instructions.
684
+ */
685
+ static void ra_left(ASMState *as, Reg dest, IRRef lref)
686
+ {
687
+ IRIns *ir = IR(lref);
688
+ Reg left = ir->r;
689
+ if (ra_noreg(left)) {
690
+ if (irref_isk(lref)) {
691
+ if (ir->o == IR_KNUM) {
692
+ cTValue *tv = ir_knum(ir);
693
+ /* FP remat needs a load except for +0. Still better than eviction. */
694
+ if (tvispzero(tv) || !(as->freeset & RSET_FPR)) {
695
+ emit_loadn(as, dest, tv);
696
+ return;
697
+ }
698
+ #if LJ_64
699
+ } else if (ir->o == IR_KINT64) {
700
+ emit_loadu64(as, dest, ir_kint64(ir)->u64);
701
+ return;
702
+ #endif
703
+ } else if (ir->o != IR_KPRI) {
704
+ lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
705
+ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
706
+ emit_loadi(as, dest, ir->i);
707
+ return;
708
+ }
709
+ }
710
+ if (!ra_hashint(left) && !iscrossref(as, lref))
711
+ ra_sethint(ir->r, dest); /* Propagate register hint. */
712
+ left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR);
713
+ }
714
+ ra_noweak(as, left);
715
+ /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */
716
+ if (dest != left) {
717
+ /* Use register renaming if dest is the PHI reg. */
718
+ if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
719
+ ra_modified(as, left);
720
+ ra_rename(as, left, dest);
721
+ } else {
722
+ emit_movrr(as, ir, dest, left);
723
+ }
724
+ }
725
+ }
726
+ #else
727
+ /* Similar to ra_left, except we override any hints. */
728
+ static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
729
+ {
730
+ IRIns *ir = IR(lref);
731
+ Reg left = ir->r;
732
+ if (ra_noreg(left)) {
733
+ ra_sethint(ir->r, dest); /* Propagate register hint. */
734
+ left = ra_allocref(as, lref,
735
+ (LJ_SOFTFP || dest < RID_MAX_GPR) ? RSET_GPR : RSET_FPR);
736
+ }
737
+ ra_noweak(as, left);
738
+ if (dest != left) {
739
+ /* Use register renaming if dest is the PHI reg. */
740
+ if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
741
+ ra_modified(as, left);
742
+ ra_rename(as, left, dest);
743
+ } else {
744
+ emit_movrr(as, ir, dest, left);
745
+ }
746
+ }
747
+ }
748
+ #endif
749
+
750
+ #if !LJ_64
751
+ /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
752
+ static void ra_destpair(ASMState *as, IRIns *ir)
753
+ {
754
+ Reg destlo = ir->r, desthi = (ir+1)->r;
755
+ /* First spill unrelated refs blocking the destination registers. */
756
+ if (!rset_test(as->freeset, RID_RETLO) &&
757
+ destlo != RID_RETLO && desthi != RID_RETLO)
758
+ ra_restore(as, regcost_ref(as->cost[RID_RETLO]));
759
+ if (!rset_test(as->freeset, RID_RETHI) &&
760
+ destlo != RID_RETHI && desthi != RID_RETHI)
761
+ ra_restore(as, regcost_ref(as->cost[RID_RETHI]));
762
+ /* Next free the destination registers (if any). */
763
+ if (ra_hasreg(destlo)) {
764
+ ra_free(as, destlo);
765
+ ra_modified(as, destlo);
766
+ } else {
767
+ destlo = RID_RETLO;
768
+ }
769
+ if (ra_hasreg(desthi)) {
770
+ ra_free(as, desthi);
771
+ ra_modified(as, desthi);
772
+ } else {
773
+ desthi = RID_RETHI;
774
+ }
775
+ /* Check for conflicts and shuffle the registers as needed. */
776
+ if (destlo == RID_RETHI) {
777
+ if (desthi == RID_RETLO) {
778
+ #if LJ_TARGET_X86
779
+ *--as->mcp = XI_XCHGa + RID_RETHI;
780
+ #else
781
+ emit_movrr(as, ir, RID_RETHI, RID_TMP);
782
+ emit_movrr(as, ir, RID_RETLO, RID_RETHI);
783
+ emit_movrr(as, ir, RID_TMP, RID_RETLO);
784
+ #endif
785
+ } else {
786
+ emit_movrr(as, ir, RID_RETHI, RID_RETLO);
787
+ if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
788
+ }
789
+ } else if (desthi == RID_RETLO) {
790
+ emit_movrr(as, ir, RID_RETLO, RID_RETHI);
791
+ if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
792
+ } else {
793
+ if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
794
+ if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
795
+ }
796
+ /* Restore spill slots (if any). */
797
+ if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
798
+ if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
799
+ }
800
+ #endif
801
+
802
+ /* -- Snapshot handling --------- ----------------------------------------- */
803
+
804
+ /* Can we rematerialize a KNUM instead of forcing a spill? */
805
+ static int asm_snap_canremat(ASMState *as)
806
+ {
807
+ Reg r;
808
+ for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++)
809
+ if (irref_isk(regcost_ref(as->cost[r])))
810
+ return 1;
811
+ return 0;
812
+ }
813
+
814
+ /* Check whether a sunk store corresponds to an allocation. */
815
+ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
816
+ {
817
+ if (irs->s == 255) {
818
+ if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
819
+ irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
820
+ IRIns *irk = IR(irs->op1);
821
+ if (irk->o == IR_AREF || irk->o == IR_HREFK)
822
+ irk = IR(irk->op1);
823
+ return (IR(irk->op1) == ira);
824
+ }
825
+ return 0;
826
+ } else {
827
+ return (ira + irs->s == irs); /* Quick check. */
828
+ }
829
+ }
830
+
831
+ /* Allocate register or spill slot for a ref that escapes to a snapshot. */
832
+ static void asm_snap_alloc1(ASMState *as, IRRef ref)
833
+ {
834
+ IRIns *ir = IR(ref);
835
+ if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) {
836
+ if (ir->r == RID_SINK) {
837
+ ir->r = RID_SUNK;
838
+ #if LJ_HASFFI
839
+ if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */
840
+ asm_snap_alloc1(as, ir->op2);
841
+ if (LJ_32 && (ir+1)->o == IR_HIOP)
842
+ asm_snap_alloc1(as, (ir+1)->op2);
843
+ } else
844
+ #endif
845
+ { /* Allocate stored values for TNEW, TDUP and CNEW. */
846
+ IRIns *irs;
847
+ lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW);
848
+ for (irs = IR(as->snapref-1); irs > ir; irs--)
849
+ if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
850
+ lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
851
+ irs->o == IR_FSTORE || irs->o == IR_XSTORE);
852
+ asm_snap_alloc1(as, irs->op2);
853
+ if (LJ_32 && (irs+1)->o == IR_HIOP)
854
+ asm_snap_alloc1(as, (irs+1)->op2);
855
+ }
856
+ }
857
+ } else {
858
+ RegSet allow;
859
+ if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) {
860
+ IRIns *irc;
861
+ for (irc = IR(as->curins); irc > ir; irc--)
862
+ if ((irc->op1 == ref || irc->op2 == ref) &&
863
+ !(irc->r == RID_SINK || irc->r == RID_SUNK))
864
+ goto nosink; /* Don't sink conversion if result is used. */
865
+ asm_snap_alloc1(as, ir->op1);
866
+ return;
867
+ }
868
+ nosink:
869
+ allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR;
870
+ if ((as->freeset & allow) ||
871
+ (allow == RSET_FPR && asm_snap_canremat(as))) {
872
+ /* Get a weak register if we have a free one or can rematerialize. */
873
+ Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */
874
+ if (!irt_isphi(ir->t))
875
+ ra_weak(as, r); /* But mark it as weakly referenced. */
876
+ checkmclim(as);
877
+ RA_DBGX((as, "snapreg $f $r", ref, ir->r));
878
+ } else {
879
+ ra_spill(as, ir); /* Otherwise force a spill slot. */
880
+ RA_DBGX((as, "snapspill $f $s", ref, ir->s));
881
+ }
882
+ }
883
+ }
884
+ }
885
+
886
+ /* Allocate refs escaping to a snapshot. */
887
+ static void asm_snap_alloc(ASMState *as)
888
+ {
889
+ SnapShot *snap = &as->T->snap[as->snapno];
890
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
891
+ MSize n, nent = snap->nent;
892
+ for (n = 0; n < nent; n++) {
893
+ SnapEntry sn = map[n];
894
+ IRRef ref = snap_ref(sn);
895
+ if (!irref_isk(ref)) {
896
+ asm_snap_alloc1(as, ref);
897
+ if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
898
+ lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP);
899
+ asm_snap_alloc1(as, ref+1);
900
+ }
901
+ }
902
+ }
903
+ }
904
+
905
+ /* All guards for a snapshot use the same exitno. This is currently the
906
+ ** same as the snapshot number. Since the exact origin of the exit cannot
907
+ ** be determined, all guards for the same snapshot must exit with the same
908
+ ** RegSP mapping.
909
+ ** A renamed ref which has been used in a prior guard for the same snapshot
910
+ ** would cause an inconsistency. The easy way out is to force a spill slot.
911
+ */
912
+ static int asm_snap_checkrename(ASMState *as, IRRef ren)
913
+ {
914
+ SnapShot *snap = &as->T->snap[as->snapno];
915
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
916
+ MSize n, nent = snap->nent;
917
+ for (n = 0; n < nent; n++) {
918
+ SnapEntry sn = map[n];
919
+ IRRef ref = snap_ref(sn);
920
+ if (ref == ren || (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && ++ref == ren)) {
921
+ IRIns *ir = IR(ref);
922
+ ra_spill(as, ir); /* Register renamed, so force a spill slot. */
923
+ RA_DBGX((as, "snaprensp $f $s", ref, ir->s));
924
+ return 1; /* Found. */
925
+ }
926
+ }
927
+ return 0; /* Not found. */
928
+ }
929
+
930
+ /* Prepare snapshot for next guard instruction. */
931
+ static void asm_snap_prep(ASMState *as)
932
+ {
933
+ if (as->curins < as->snapref) {
934
+ do {
935
+ if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */
936
+ as->snapno--;
937
+ as->snapref = as->T->snap[as->snapno].ref;
938
+ } while (as->curins < as->snapref);
939
+ asm_snap_alloc(as);
940
+ as->snaprename = as->T->nins;
941
+ } else {
942
+ /* Process any renames above the highwater mark. */
943
+ for (; as->snaprename < as->T->nins; as->snaprename++) {
944
+ IRIns *ir = IR(as->snaprename);
945
+ if (asm_snap_checkrename(as, ir->op1))
946
+ ir->op2 = REF_BIAS-1; /* Kill rename. */
947
+ }
948
+ }
949
+ }
950
+
951
+ /* -- Miscellaneous helpers ----------------------------------------------- */
952
+
953
+ /* Calculate stack adjustment. */
954
+ static int32_t asm_stack_adjust(ASMState *as)
955
+ {
956
+ if (as->evenspill <= SPS_FIXED)
957
+ return 0;
958
+ return sps_scale(sps_align(as->evenspill));
959
+ }
960
+
961
+ /* Must match with hash*() in lj_tab.c. */
962
+ static uint32_t ir_khash(IRIns *ir)
963
+ {
964
+ uint32_t lo, hi;
965
+ if (irt_isstr(ir->t)) {
966
+ return ir_kstr(ir)->hash;
967
+ } else if (irt_isnum(ir->t)) {
968
+ lo = ir_knum(ir)->u32.lo;
969
+ hi = ir_knum(ir)->u32.hi << 1;
970
+ } else if (irt_ispri(ir->t)) {
971
+ lua_assert(!irt_isnil(ir->t));
972
+ return irt_type(ir->t)-IRT_FALSE;
973
+ } else {
974
+ lua_assert(irt_isgcv(ir->t));
975
+ lo = u32ptr(ir_kgc(ir));
976
+ hi = lo + HASH_BIAS;
977
+ }
978
+ return hashrot(lo, hi);
979
+ }
980
+
981
+ /* -- Allocations --------------------------------------------------------- */
982
+
983
+ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args);
984
+ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci);
985
+
986
+ static void asm_snew(ASMState *as, IRIns *ir)
987
+ {
988
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
989
+ IRRef args[3];
990
+ args[0] = ASMREF_L; /* lua_State *L */
991
+ args[1] = ir->op1; /* const char *str */
992
+ args[2] = ir->op2; /* size_t len */
993
+ as->gcsteps++;
994
+ asm_setupresult(as, ir, ci); /* GCstr * */
995
+ asm_gencall(as, ci, args);
996
+ }
997
+
998
+ static void asm_tnew(ASMState *as, IRIns *ir)
999
+ {
1000
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
1001
+ IRRef args[2];
1002
+ args[0] = ASMREF_L; /* lua_State *L */
1003
+ args[1] = ASMREF_TMP1; /* uint32_t ahsize */
1004
+ as->gcsteps++;
1005
+ asm_setupresult(as, ir, ci); /* GCtab * */
1006
+ asm_gencall(as, ci, args);
1007
+ ra_allockreg(as, ir->op1 | (ir->op2 << 24), ra_releasetmp(as, ASMREF_TMP1));
1008
+ }
1009
+
1010
+ static void asm_tdup(ASMState *as, IRIns *ir)
1011
+ {
1012
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
1013
+ IRRef args[2];
1014
+ args[0] = ASMREF_L; /* lua_State *L */
1015
+ args[1] = ir->op1; /* const GCtab *kt */
1016
+ as->gcsteps++;
1017
+ asm_setupresult(as, ir, ci); /* GCtab * */
1018
+ asm_gencall(as, ci, args);
1019
+ }
1020
+
1021
+ static void asm_gc_check(ASMState *as);
1022
+
1023
+ /* Explicit GC step. */
1024
+ static void asm_gcstep(ASMState *as, IRIns *ir)
1025
+ {
1026
+ IRIns *ira;
1027
+ for (ira = IR(as->stopins+1); ira < ir; ira++)
1028
+ if ((ira->o == IR_TNEW || ira->o == IR_TDUP ||
1029
+ (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI))) &&
1030
+ ra_used(ira))
1031
+ as->gcsteps++;
1032
+ if (as->gcsteps)
1033
+ asm_gc_check(as);
1034
+ as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1035
+ }
1036
+
1037
+ /* -- Buffer operations --------------------------------------------------- */
1038
+
1039
+ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1040
+
1041
+ static void asm_bufhdr(ASMState *as, IRIns *ir)
1042
+ {
1043
+ Reg sb = ra_dest(as, ir, RSET_GPR);
1044
+ if ((ir->op2 & IRBUFHDR_APPEND)) {
1045
+ /* Rematerialize const buffer pointer instead of likely spill. */
1046
+ IRIns *irp = IR(ir->op1);
1047
+ if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1048
+ (irp == ir-2 && !ra_used(ir-1)))) {
1049
+ while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1050
+ irp = IR(irp->op1);
1051
+ if (irref_isk(irp->op1)) {
1052
+ ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1053
+ ir = irp;
1054
+ }
1055
+ }
1056
+ } else {
1057
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1058
+ /* Passing ir isn't strictly correct, but it's an IRT_P32, too. */
1059
+ emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1060
+ emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1061
+ }
1062
+ #if LJ_TARGET_X86ORX64
1063
+ ra_left(as, sb, ir->op1);
1064
+ #else
1065
+ ra_leftov(as, sb, ir->op1);
1066
+ #endif
1067
+ }
1068
+
1069
+ static void asm_bufput(ASMState *as, IRIns *ir)
1070
+ {
1071
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1072
+ IRRef args[3];
1073
+ IRIns *irs;
1074
+ int kchar = -1;
1075
+ args[0] = ir->op1; /* SBuf * */
1076
+ args[1] = ir->op2; /* GCstr * */
1077
+ irs = IR(ir->op2);
1078
+ lua_assert(irt_isstr(irs->t));
1079
+ if (irs->o == IR_KGC) {
1080
+ GCstr *s = ir_kstr(irs);
1081
+ if (s->len == 1) { /* Optimize put of single-char string constant. */
1082
+ kchar = strdata(s)[0];
1083
+ args[1] = ASMREF_TMP1; /* int, truncated to char */
1084
+ ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1085
+ }
1086
+ } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1087
+ if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1088
+ if (irs->op2 == IRTOSTR_NUM) {
1089
+ args[1] = ASMREF_TMP1; /* TValue * */
1090
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1091
+ } else {
1092
+ lua_assert(irt_isinteger(IR(irs->op1)->t));
1093
+ args[1] = irs->op1; /* int */
1094
+ if (irs->op2 == IRTOSTR_INT)
1095
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1096
+ else
1097
+ ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1098
+ }
1099
+ } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1100
+ args[1] = irs->op1; /* const void * */
1101
+ args[2] = irs->op2; /* MSize */
1102
+ ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1103
+ }
1104
+ }
1105
+ asm_setupresult(as, ir, ci); /* SBuf * */
1106
+ asm_gencall(as, ci, args);
1107
+ if (args[1] == ASMREF_TMP1) {
1108
+ Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1109
+ if (kchar == -1)
1110
+ asm_tvptr(as, tmp, irs->op1);
1111
+ else
1112
+ ra_allockreg(as, kchar, tmp);
1113
+ }
1114
+ }
1115
+
1116
+ static void asm_bufstr(ASMState *as, IRIns *ir)
1117
+ {
1118
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1119
+ IRRef args[1];
1120
+ args[0] = ir->op1; /* SBuf *sb */
1121
+ as->gcsteps++;
1122
+ asm_setupresult(as, ir, ci); /* GCstr * */
1123
+ asm_gencall(as, ci, args);
1124
+ }
1125
+
1126
+ /* -- Type conversions ---------------------------------------------------- */
1127
+
1128
+ static void asm_tostr(ASMState *as, IRIns *ir)
1129
+ {
1130
+ const CCallInfo *ci;
1131
+ IRRef args[2];
1132
+ args[0] = ASMREF_L;
1133
+ as->gcsteps++;
1134
+ if (ir->op2 == IRTOSTR_NUM) {
1135
+ args[1] = ASMREF_TMP1; /* cTValue * */
1136
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1137
+ } else {
1138
+ args[1] = ir->op1; /* int32_t k */
1139
+ if (ir->op2 == IRTOSTR_INT)
1140
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1141
+ else
1142
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1143
+ }
1144
+ asm_setupresult(as, ir, ci); /* GCstr * */
1145
+ asm_gencall(as, ci, args);
1146
+ if (ir->op2 == IRTOSTR_NUM)
1147
+ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1148
+ }
1149
+
1150
+ #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1151
+ static void asm_conv64(ASMState *as, IRIns *ir)
1152
+ {
1153
+ IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1154
+ IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1155
+ IRCallID id;
1156
+ IRRef args[2];
1157
+ lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1158
+ args[LJ_BE] = (ir-1)->op1;
1159
+ args[LJ_LE] = ir->op1;
1160
+ if (st == IRT_NUM || st == IRT_FLOAT) {
1161
+ id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1162
+ ir--;
1163
+ } else {
1164
+ id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1165
+ }
1166
+ {
1167
+ #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1168
+ CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1169
+ cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1170
+ #else
1171
+ const CCallInfo *ci = &lj_ir_callinfo[id];
1172
+ #endif
1173
+ asm_setupresult(as, ir, ci);
1174
+ asm_gencall(as, ci, args);
1175
+ }
1176
+ }
1177
+ #endif
1178
+
1179
+ /* -- Memory references --------------------------------------------------- */
1180
+
1181
+ static void asm_newref(ASMState *as, IRIns *ir)
1182
+ {
1183
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1184
+ IRRef args[3];
1185
+ if (ir->r == RID_SINK)
1186
+ return;
1187
+ args[0] = ASMREF_L; /* lua_State *L */
1188
+ args[1] = ir->op1; /* GCtab *t */
1189
+ args[2] = ASMREF_TMP1; /* cTValue *key */
1190
+ asm_setupresult(as, ir, ci); /* TValue * */
1191
+ asm_gencall(as, ci, args);
1192
+ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1193
+ }
1194
+
1195
+ static void asm_lref(ASMState *as, IRIns *ir)
1196
+ {
1197
+ Reg r = ra_dest(as, ir, RSET_GPR);
1198
+ #if LJ_TARGET_X86ORX64
1199
+ ra_left(as, r, ASMREF_L);
1200
+ #else
1201
+ ra_leftov(as, r, ASMREF_L);
1202
+ #endif
1203
+ }
1204
+
1205
+ /* -- Calls --------------------------------------------------------------- */
1206
+
1207
+ /* Collect arguments from CALL* and CARG instructions. */
1208
+ static void asm_collectargs(ASMState *as, IRIns *ir,
1209
+ const CCallInfo *ci, IRRef *args)
1210
+ {
1211
+ uint32_t n = CCI_XNARGS(ci);
1212
+ lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1213
+ if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1214
+ while (n-- > 1) {
1215
+ ir = IR(ir->op1);
1216
+ lua_assert(ir->o == IR_CARG);
1217
+ args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1218
+ }
1219
+ args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1220
+ lua_assert(IR(ir->op1)->o != IR_CARG);
1221
+ }
1222
+
1223
+ /* Reconstruct CCallInfo flags for CALLX*. */
1224
+ static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1225
+ {
1226
+ uint32_t nargs = 0;
1227
+ if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1228
+ IRIns *ira = IR(ir->op1);
1229
+ nargs++;
1230
+ while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1231
+ }
1232
+ #if LJ_HASFFI
1233
+ if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1234
+ CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1235
+ CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1236
+ nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1237
+ #if LJ_TARGET_X86
1238
+ nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1239
+ #endif
1240
+ }
1241
+ #endif
1242
+ return (nargs | (ir->t.irt << CCI_OTSHIFT));
1243
+ }
1244
+
1245
+ static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1246
+ {
1247
+ const CCallInfo *ci = &lj_ir_callinfo[id];
1248
+ IRRef args[2];
1249
+ args[0] = ir->op1;
1250
+ args[1] = ir->op2;
1251
+ asm_setupresult(as, ir, ci);
1252
+ asm_gencall(as, ci, args);
1253
+ }
1254
+
1255
+ static void asm_call(ASMState *as, IRIns *ir)
1256
+ {
1257
+ IRRef args[CCI_NARGS_MAX];
1258
+ const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1259
+ asm_collectargs(as, ir, ci, args);
1260
+ asm_setupresult(as, ir, ci);
1261
+ asm_gencall(as, ci, args);
1262
+ }
1263
+
1264
+ #if !LJ_SOFTFP
1265
+ static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1266
+ {
1267
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1268
+ IRRef args[2];
1269
+ args[0] = lref;
1270
+ args[1] = rref;
1271
+ asm_setupresult(as, ir, ci);
1272
+ asm_gencall(as, ci, args);
1273
+ }
1274
+
1275
+ static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1276
+ {
1277
+ IRIns *irp = IR(ir->op1);
1278
+ if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1279
+ IRIns *irpp = IR(irp->op1);
1280
+ if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1281
+ irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1282
+ asm_fppow(as, ir, irpp->op1, irp->op2);
1283
+ return 1;
1284
+ }
1285
+ }
1286
+ return 0;
1287
+ }
1288
+ #endif
1289
+
1290
+ /* -- PHI and loop handling ----------------------------------------------- */
1291
+
1292
+ /* Break a PHI cycle by renaming to a free register (evict if needed). */
1293
+ static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby,
1294
+ RegSet allow)
1295
+ {
1296
+ RegSet candidates = blocked & allow;
1297
+ if (candidates) { /* If this register file has candidates. */
1298
+ /* Note: the set for ra_pick cannot be empty, since each register file
1299
+ ** has some registers never allocated to PHIs.
1300
+ */
1301
+ Reg down, up = ra_pick(as, ~blocked & allow); /* Get a free register. */
1302
+ if (candidates & ~blockedby) /* Optimize shifts, else it's a cycle. */
1303
+ candidates = candidates & ~blockedby;
1304
+ down = rset_picktop(candidates); /* Pick candidate PHI register. */
1305
+ ra_rename(as, down, up); /* And rename it to the free register. */
1306
+ }
1307
+ }
1308
+
1309
+ /* PHI register shuffling.
1310
+ **
1311
+ ** The allocator tries hard to preserve PHI register assignments across
1312
+ ** the loop body. Most of the time this loop does nothing, since there
1313
+ ** are no register mismatches.
1314
+ **
1315
+ ** If a register mismatch is detected and ...
1316
+ ** - the register is currently free: rename it.
1317
+ ** - the register is blocked by an invariant: restore/remat and rename it.
1318
+ ** - Otherwise the register is used by another PHI, so mark it as blocked.
1319
+ **
1320
+ ** The renames are order-sensitive, so just retry the loop if a register
1321
+ ** is marked as blocked, but has been freed in the meantime. A cycle is
1322
+ ** detected if all of the blocked registers are allocated. To break the
1323
+ ** cycle rename one of them to a free register and retry.
1324
+ **
1325
+ ** Note that PHI spill slots are kept in sync and don't need to be shuffled.
1326
+ */
1327
+ static void asm_phi_shuffle(ASMState *as)
1328
+ {
1329
+ RegSet work;
1330
+
1331
+ /* Find and resolve PHI register mismatches. */
1332
+ for (;;) {
1333
+ RegSet blocked = RSET_EMPTY;
1334
+ RegSet blockedby = RSET_EMPTY;
1335
+ RegSet phiset = as->phiset;
1336
+ while (phiset) { /* Check all left PHI operand registers. */
1337
+ Reg r = rset_pickbot(phiset);
1338
+ IRIns *irl = IR(as->phireg[r]);
1339
+ Reg left = irl->r;
1340
+ if (r != left) { /* Mismatch? */
1341
+ if (!rset_test(as->freeset, r)) { /* PHI register blocked? */
1342
+ IRRef ref = regcost_ref(as->cost[r]);
1343
+ /* Blocked by other PHI (w/reg)? */
1344
+ if (!ra_iskref(ref) && irt_ismarked(IR(ref)->t)) {
1345
+ rset_set(blocked, r);
1346
+ if (ra_hasreg(left))
1347
+ rset_set(blockedby, left);
1348
+ left = RID_NONE;
1349
+ } else { /* Otherwise grab register from invariant. */
1350
+ ra_restore(as, ref);
1351
+ checkmclim(as);
1352
+ }
1353
+ }
1354
+ if (ra_hasreg(left)) {
1355
+ ra_rename(as, left, r);
1356
+ checkmclim(as);
1357
+ }
1358
+ }
1359
+ rset_clear(phiset, r);
1360
+ }
1361
+ if (!blocked) break; /* Finished. */
1362
+ if (!(as->freeset & blocked)) { /* Break cycles if none are free. */
1363
+ asm_phi_break(as, blocked, blockedby, RSET_GPR);
1364
+ if (!LJ_SOFTFP) asm_phi_break(as, blocked, blockedby, RSET_FPR);
1365
+ checkmclim(as);
1366
+ } /* Else retry some more renames. */
1367
+ }
1368
+
1369
+ /* Restore/remat invariants whose registers are modified inside the loop. */
1370
+ #if !LJ_SOFTFP
1371
+ work = as->modset & ~(as->freeset | as->phiset) & RSET_FPR;
1372
+ while (work) {
1373
+ Reg r = rset_pickbot(work);
1374
+ ra_restore(as, regcost_ref(as->cost[r]));
1375
+ rset_clear(work, r);
1376
+ checkmclim(as);
1377
+ }
1378
+ #endif
1379
+ work = as->modset & ~(as->freeset | as->phiset);
1380
+ while (work) {
1381
+ Reg r = rset_pickbot(work);
1382
+ ra_restore(as, regcost_ref(as->cost[r]));
1383
+ rset_clear(work, r);
1384
+ checkmclim(as);
1385
+ }
1386
+
1387
+ /* Allocate and save all unsaved PHI regs and clear marks. */
1388
+ work = as->phiset;
1389
+ while (work) {
1390
+ Reg r = rset_picktop(work);
1391
+ IRRef lref = as->phireg[r];
1392
+ IRIns *ir = IR(lref);
1393
+ if (ra_hasspill(ir->s)) { /* Left PHI gained a spill slot? */
1394
+ irt_clearmark(ir->t); /* Handled here, so clear marker now. */
1395
+ ra_alloc1(as, lref, RID2RSET(r));
1396
+ ra_save(as, ir, r); /* Save to spill slot inside the loop. */
1397
+ checkmclim(as);
1398
+ }
1399
+ rset_clear(work, r);
1400
+ }
1401
+ }
1402
+
1403
+ /* Copy unsynced left/right PHI spill slots. Rarely needed. */
1404
+ static void asm_phi_copyspill(ASMState *as)
1405
+ {
1406
+ int need = 0;
1407
+ IRIns *ir;
1408
+ for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--)
1409
+ if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s))
1410
+ need |= irt_isfp(ir->t) ? 2 : 1; /* Unsynced spill slot? */
1411
+ if ((need & 1)) { /* Copy integer spill slots. */
1412
+ #if !LJ_TARGET_X86ORX64
1413
+ Reg r = RID_TMP;
1414
+ #else
1415
+ Reg r = RID_RET;
1416
+ if ((as->freeset & RSET_GPR))
1417
+ r = rset_pickbot((as->freeset & RSET_GPR));
1418
+ else
1419
+ emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
1420
+ #endif
1421
+ for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
1422
+ if (ra_hasspill(ir->s)) {
1423
+ IRIns *irl = IR(ir->op1);
1424
+ if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) {
1425
+ emit_spstore(as, irl, r, sps_scale(irl->s));
1426
+ emit_spload(as, ir, r, sps_scale(ir->s));
1427
+ checkmclim(as);
1428
+ }
1429
+ }
1430
+ }
1431
+ #if LJ_TARGET_X86ORX64
1432
+ if (!rset_test(as->freeset, r))
1433
+ emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
1434
+ #endif
1435
+ }
1436
+ #if !LJ_SOFTFP
1437
+ if ((need & 2)) { /* Copy FP spill slots. */
1438
+ #if LJ_TARGET_X86
1439
+ Reg r = RID_XMM0;
1440
+ #else
1441
+ Reg r = RID_FPRET;
1442
+ #endif
1443
+ if ((as->freeset & RSET_FPR))
1444
+ r = rset_pickbot((as->freeset & RSET_FPR));
1445
+ if (!rset_test(as->freeset, r))
1446
+ emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
1447
+ for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) {
1448
+ if (ra_hasspill(ir->s)) {
1449
+ IRIns *irl = IR(ir->op1);
1450
+ if (ra_hasspill(irl->s) && irt_isfp(ir->t)) {
1451
+ emit_spstore(as, irl, r, sps_scale(irl->s));
1452
+ emit_spload(as, ir, r, sps_scale(ir->s));
1453
+ checkmclim(as);
1454
+ }
1455
+ }
1456
+ }
1457
+ if (!rset_test(as->freeset, r))
1458
+ emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP);
1459
+ }
1460
+ #endif
1461
+ }
1462
+
1463
+ /* Emit renames for left PHIs which are only spilled outside the loop. */
1464
+ static void asm_phi_fixup(ASMState *as)
1465
+ {
1466
+ RegSet work = as->phiset;
1467
+ while (work) {
1468
+ Reg r = rset_picktop(work);
1469
+ IRRef lref = as->phireg[r];
1470
+ IRIns *ir = IR(lref);
1471
+ if (irt_ismarked(ir->t)) {
1472
+ irt_clearmark(ir->t);
1473
+ /* Left PHI gained a spill slot before the loop? */
1474
+ if (ra_hasspill(ir->s)) {
1475
+ IRRef ren;
1476
+ lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1477
+ ren = tref_ref(lj_ir_emit(as->J));
1478
+ as->ir = as->T->ir; /* The IR may have been reallocated. */
1479
+ IR(ren)->r = (uint8_t)r;
1480
+ IR(ren)->s = SPS_NONE;
1481
+ }
1482
+ }
1483
+ rset_clear(work, r);
1484
+ }
1485
+ }
1486
+
1487
+ /* Setup right PHI reference. */
1488
+ static void asm_phi(ASMState *as, IRIns *ir)
1489
+ {
1490
+ RegSet allow = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) &
1491
+ ~as->phiset;
1492
+ RegSet afree = (as->freeset & allow);
1493
+ IRIns *irl = IR(ir->op1);
1494
+ IRIns *irr = IR(ir->op2);
1495
+ if (ir->r == RID_SINK) /* Sink PHI. */
1496
+ return;
1497
+ /* Spill slot shuffling is not implemented yet (but rarely needed). */
1498
+ if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
1499
+ lj_trace_err(as->J, LJ_TRERR_NYIPHI);
1500
+ /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */
1501
+ if ((afree & (afree-1))) { /* Two or more free registers? */
1502
+ Reg r;
1503
+ if (ra_noreg(irr->r)) { /* Get a register for the right PHI. */
1504
+ r = ra_allocref(as, ir->op2, allow);
1505
+ } else { /* Duplicate right PHI, need a copy (rare). */
1506
+ r = ra_scratch(as, allow);
1507
+ emit_movrr(as, irr, r, irr->r);
1508
+ }
1509
+ ir->r = (uint8_t)r;
1510
+ rset_set(as->phiset, r);
1511
+ as->phireg[r] = (IRRef1)ir->op1;
1512
+ irt_setmark(irl->t); /* Marks left PHIs _with_ register. */
1513
+ if (ra_noreg(irl->r))
1514
+ ra_sethint(irl->r, r); /* Set register hint for left PHI. */
1515
+ } else { /* Otherwise allocate a spill slot. */
1516
+ /* This is overly restrictive, but it triggers only on synthetic code. */
1517
+ if (ra_hasreg(irl->r) || ra_hasreg(irr->r))
1518
+ lj_trace_err(as->J, LJ_TRERR_NYIPHI);
1519
+ ra_spill(as, ir);
1520
+ irr->s = ir->s; /* Set right PHI spill slot. Sync left slot later. */
1521
+ }
1522
+ }
1523
+
1524
+ static void asm_loop_fixup(ASMState *as);
1525
+
1526
+ /* Middle part of a loop. */
1527
+ static void asm_loop(ASMState *as)
1528
+ {
1529
+ MCode *mcspill;
1530
+ /* LOOP is a guard, so the snapno is up to date. */
1531
+ as->loopsnapno = as->snapno;
1532
+ if (as->gcsteps)
1533
+ asm_gc_check(as);
1534
+ /* LOOP marks the transition from the variant to the invariant part. */
1535
+ as->flagmcp = as->invmcp = NULL;
1536
+ as->sectref = 0;
1537
+ if (!neverfuse(as)) as->fuseref = 0;
1538
+ asm_phi_shuffle(as);
1539
+ mcspill = as->mcp;
1540
+ asm_phi_copyspill(as);
1541
+ asm_loop_fixup(as);
1542
+ as->mcloop = as->mcp;
1543
+ RA_DBGX((as, "===== LOOP ====="));
1544
+ if (!as->realign) RA_DBG_FLUSH();
1545
+ if (as->mcp != mcspill)
1546
+ emit_jmp(as, mcspill);
1547
+ }
1548
+
1549
+ /* -- Target-specific assembler ------------------------------------------- */
1550
+
1551
+ #if LJ_TARGET_X86ORX64
1552
+ #include "lj_asm_x86.h"
1553
+ #elif LJ_TARGET_ARM
1554
+ #include "lj_asm_arm.h"
1555
+ #elif LJ_TARGET_PPC
1556
+ #include "lj_asm_ppc.h"
1557
+ #elif LJ_TARGET_MIPS
1558
+ #include "lj_asm_mips.h"
1559
+ #else
1560
+ #error "Missing assembler for target CPU"
1561
+ #endif
1562
+
1563
+ /* -- Instruction dispatch ------------------------------------------------ */
1564
+
1565
+ /* Assemble a single instruction. */
1566
+ static void asm_ir(ASMState *as, IRIns *ir)
1567
+ {
1568
+ switch ((IROp)ir->o) {
1569
+ /* Miscellaneous ops. */
1570
+ case IR_LOOP: asm_loop(as); break;
1571
+ case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1572
+ case IR_USE:
1573
+ ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1574
+ case IR_PHI: asm_phi(as, ir); break;
1575
+ case IR_HIOP: asm_hiop(as, ir); break;
1576
+ case IR_GCSTEP: asm_gcstep(as, ir); break;
1577
+ case IR_PROF: asm_prof(as, ir); break;
1578
+
1579
+ /* Guarded assertions. */
1580
+ case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1581
+ case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1582
+ case IR_ABC:
1583
+ asm_comp(as, ir);
1584
+ break;
1585
+ case IR_EQ: case IR_NE:
1586
+ if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1587
+ as->curins--;
1588
+ asm_href(as, ir-1, (IROp)ir->o);
1589
+ } else {
1590
+ asm_equal(as, ir);
1591
+ }
1592
+ break;
1593
+
1594
+ case IR_RETF: asm_retf(as, ir); break;
1595
+
1596
+ /* Bit ops. */
1597
+ case IR_BNOT: asm_bnot(as, ir); break;
1598
+ case IR_BSWAP: asm_bswap(as, ir); break;
1599
+ case IR_BAND: asm_band(as, ir); break;
1600
+ case IR_BOR: asm_bor(as, ir); break;
1601
+ case IR_BXOR: asm_bxor(as, ir); break;
1602
+ case IR_BSHL: asm_bshl(as, ir); break;
1603
+ case IR_BSHR: asm_bshr(as, ir); break;
1604
+ case IR_BSAR: asm_bsar(as, ir); break;
1605
+ case IR_BROL: asm_brol(as, ir); break;
1606
+ case IR_BROR: asm_bror(as, ir); break;
1607
+
1608
+ /* Arithmetic ops. */
1609
+ case IR_ADD: asm_add(as, ir); break;
1610
+ case IR_SUB: asm_sub(as, ir); break;
1611
+ case IR_MUL: asm_mul(as, ir); break;
1612
+ case IR_DIV: asm_div(as, ir); break;
1613
+ case IR_MOD: asm_mod(as, ir); break;
1614
+ case IR_POW: asm_pow(as, ir); break;
1615
+ case IR_NEG: asm_neg(as, ir); break;
1616
+ case IR_ABS: asm_abs(as, ir); break;
1617
+ case IR_ATAN2: asm_atan2(as, ir); break;
1618
+ case IR_LDEXP: asm_ldexp(as, ir); break;
1619
+ case IR_MIN: asm_min(as, ir); break;
1620
+ case IR_MAX: asm_max(as, ir); break;
1621
+ case IR_FPMATH: asm_fpmath(as, ir); break;
1622
+
1623
+ /* Overflow-checking arithmetic ops. */
1624
+ case IR_ADDOV: asm_addov(as, ir); break;
1625
+ case IR_SUBOV: asm_subov(as, ir); break;
1626
+ case IR_MULOV: asm_mulov(as, ir); break;
1627
+
1628
+ /* Memory references. */
1629
+ case IR_AREF: asm_aref(as, ir); break;
1630
+ case IR_HREF: asm_href(as, ir, 0); break;
1631
+ case IR_HREFK: asm_hrefk(as, ir); break;
1632
+ case IR_NEWREF: asm_newref(as, ir); break;
1633
+ case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1634
+ case IR_FREF: asm_fref(as, ir); break;
1635
+ case IR_STRREF: asm_strref(as, ir); break;
1636
+ case IR_LREF: asm_lref(as, ir); break;
1637
+
1638
+ /* Loads and stores. */
1639
+ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1640
+ asm_ahuvload(as, ir);
1641
+ break;
1642
+ case IR_FLOAD: asm_fload(as, ir); break;
1643
+ case IR_XLOAD: asm_xload(as, ir); break;
1644
+ case IR_SLOAD: asm_sload(as, ir); break;
1645
+
1646
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1647
+ case IR_FSTORE: asm_fstore(as, ir); break;
1648
+ case IR_XSTORE: asm_xstore(as, ir); break;
1649
+
1650
+ /* Allocations. */
1651
+ case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1652
+ case IR_TNEW: asm_tnew(as, ir); break;
1653
+ case IR_TDUP: asm_tdup(as, ir); break;
1654
+ case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1655
+
1656
+ /* Buffer operations. */
1657
+ case IR_BUFHDR: asm_bufhdr(as, ir); break;
1658
+ case IR_BUFPUT: asm_bufput(as, ir); break;
1659
+ case IR_BUFSTR: asm_bufstr(as, ir); break;
1660
+
1661
+ /* Write barriers. */
1662
+ case IR_TBAR: asm_tbar(as, ir); break;
1663
+ case IR_OBAR: asm_obar(as, ir); break;
1664
+
1665
+ /* Type conversions. */
1666
+ case IR_TOBIT: asm_tobit(as, ir); break;
1667
+ case IR_CONV: asm_conv(as, ir); break;
1668
+ case IR_TOSTR: asm_tostr(as, ir); break;
1669
+ case IR_STRTO: asm_strto(as, ir); break;
1670
+
1671
+ /* Calls. */
1672
+ case IR_CALLA:
1673
+ as->gcsteps++;
1674
+ /* fallthrough */
1675
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1676
+ case IR_CALLXS: asm_callx(as, ir); break;
1677
+ case IR_CARG: break;
1678
+
1679
+ default:
1680
+ setintV(&as->J->errinfo, ir->o);
1681
+ lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1682
+ break;
1683
+ }
1684
+ }
1685
+
1686
+ /* -- Head of trace ------------------------------------------------------- */
1687
+
1688
+ /* Head of a root trace. */
1689
+ static void asm_head_root(ASMState *as)
1690
+ {
1691
+ int32_t spadj;
1692
+ asm_head_root_base(as);
1693
+ emit_setvmstate(as, (int32_t)as->T->traceno);
1694
+ spadj = asm_stack_adjust(as);
1695
+ as->T->spadjust = (uint16_t)spadj;
1696
+ emit_spsub(as, spadj);
1697
+ /* Root traces assume a checked stack for the starting proto. */
1698
+ as->T->topslot = gcref(as->T->startpt)->pt.framesize;
1699
+ }
1700
+
1701
+ /* Head of a side trace.
1702
+ **
1703
+ ** The current simplistic algorithm requires that all slots inherited
1704
+ ** from the parent are live in a register between pass 2 and pass 3. This
1705
+ ** avoids the complexity of stack slot shuffling. But of course this may
1706
+ ** overflow the register set in some cases and cause the dreaded error:
1707
+ ** "NYI: register coalescing too complex". A refined algorithm is needed.
1708
+ */
1709
+ static void asm_head_side(ASMState *as)
1710
+ {
1711
+ IRRef1 sloadins[RID_MAX];
1712
+ RegSet allow = RSET_ALL; /* Inverse of all coalesced registers. */
1713
+ RegSet live = RSET_EMPTY; /* Live parent registers. */
1714
+ IRIns *irp = &as->parent->ir[REF_BASE]; /* Parent base. */
1715
+ int32_t spadj, spdelta;
1716
+ int pass2 = 0;
1717
+ int pass3 = 0;
1718
+ IRRef i;
1719
+
1720
+ if (as->snapno && as->topslot > as->parent->topslot) {
1721
+ /* Force snap #0 alloc to prevent register overwrite in stack check. */
1722
+ as->snapno = 0;
1723
+ asm_snap_alloc(as);
1724
+ }
1725
+ allow = asm_head_side_base(as, irp, allow);
1726
+
1727
+ /* Scan all parent SLOADs and collect register dependencies. */
1728
+ for (i = as->stopins; i > REF_BASE; i--) {
1729
+ IRIns *ir = IR(i);
1730
+ RegSP rs;
1731
+ lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
1732
+ (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL);
1733
+ rs = as->parentmap[i - REF_FIRST];
1734
+ if (ra_hasreg(ir->r)) {
1735
+ rset_clear(allow, ir->r);
1736
+ if (ra_hasspill(ir->s)) {
1737
+ ra_save(as, ir, ir->r);
1738
+ checkmclim(as);
1739
+ }
1740
+ } else if (ra_hasspill(ir->s)) {
1741
+ irt_setmark(ir->t);
1742
+ pass2 = 1;
1743
+ }
1744
+ if (ir->r == rs) { /* Coalesce matching registers right now. */
1745
+ ra_free(as, ir->r);
1746
+ } else if (ra_hasspill(regsp_spill(rs))) {
1747
+ if (ra_hasreg(ir->r))
1748
+ pass3 = 1;
1749
+ } else if (ra_used(ir)) {
1750
+ sloadins[rs] = (IRRef1)i;
1751
+ rset_set(live, rs); /* Block live parent register. */
1752
+ }
1753
+ }
1754
+
1755
+ /* Calculate stack frame adjustment. */
1756
+ spadj = asm_stack_adjust(as);
1757
+ spdelta = spadj - (int32_t)as->parent->spadjust;
1758
+ if (spdelta < 0) { /* Don't shrink the stack frame. */
1759
+ spadj = (int32_t)as->parent->spadjust;
1760
+ spdelta = 0;
1761
+ }
1762
+ as->T->spadjust = (uint16_t)spadj;
1763
+
1764
+ /* Reload spilled target registers. */
1765
+ if (pass2) {
1766
+ for (i = as->stopins; i > REF_BASE; i--) {
1767
+ IRIns *ir = IR(i);
1768
+ if (irt_ismarked(ir->t)) {
1769
+ RegSet mask;
1770
+ Reg r;
1771
+ RegSP rs;
1772
+ irt_clearmark(ir->t);
1773
+ rs = as->parentmap[i - REF_FIRST];
1774
+ if (!ra_hasspill(regsp_spill(rs)))
1775
+ ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */
1776
+ else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
1777
+ continue; /* Same spill slot, do nothing. */
1778
+ mask = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) & allow;
1779
+ if (mask == RSET_EMPTY)
1780
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1781
+ r = ra_allocref(as, i, mask);
1782
+ ra_save(as, ir, r);
1783
+ rset_clear(allow, r);
1784
+ if (r == rs) { /* Coalesce matching registers right now. */
1785
+ ra_free(as, r);
1786
+ rset_clear(live, r);
1787
+ } else if (ra_hasspill(regsp_spill(rs))) {
1788
+ pass3 = 1;
1789
+ }
1790
+ checkmclim(as);
1791
+ }
1792
+ }
1793
+ }
1794
+
1795
+ /* Store trace number and adjust stack frame relative to the parent. */
1796
+ emit_setvmstate(as, (int32_t)as->T->traceno);
1797
+ emit_spsub(as, spdelta);
1798
+
1799
+ #if !LJ_TARGET_X86ORX64
1800
+ /* Restore BASE register from parent spill slot. */
1801
+ if (ra_hasspill(irp->s))
1802
+ emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, sps_scale(irp->s));
1803
+ #endif
1804
+
1805
+ /* Restore target registers from parent spill slots. */
1806
+ if (pass3) {
1807
+ RegSet work = ~as->freeset & RSET_ALL;
1808
+ while (work) {
1809
+ Reg r = rset_pickbot(work);
1810
+ IRRef ref = regcost_ref(as->cost[r]);
1811
+ RegSP rs = as->parentmap[ref - REF_FIRST];
1812
+ rset_clear(work, r);
1813
+ if (ra_hasspill(regsp_spill(rs))) {
1814
+ int32_t ofs = sps_scale(regsp_spill(rs));
1815
+ ra_free(as, r);
1816
+ emit_spload(as, IR(ref), r, ofs);
1817
+ checkmclim(as);
1818
+ }
1819
+ }
1820
+ }
1821
+
1822
+ /* Shuffle registers to match up target regs with parent regs. */
1823
+ for (;;) {
1824
+ RegSet work;
1825
+
1826
+ /* Repeatedly coalesce free live registers by moving to their target. */
1827
+ while ((work = as->freeset & live) != RSET_EMPTY) {
1828
+ Reg rp = rset_pickbot(work);
1829
+ IRIns *ir = IR(sloadins[rp]);
1830
+ rset_clear(live, rp);
1831
+ rset_clear(allow, rp);
1832
+ ra_free(as, ir->r);
1833
+ emit_movrr(as, ir, ir->r, rp);
1834
+ checkmclim(as);
1835
+ }
1836
+
1837
+ /* We're done if no live registers remain. */
1838
+ if (live == RSET_EMPTY)
1839
+ break;
1840
+
1841
+ /* Break cycles by renaming one target to a temp. register. */
1842
+ if (live & RSET_GPR) {
1843
+ RegSet tmpset = as->freeset & ~live & allow & RSET_GPR;
1844
+ if (tmpset == RSET_EMPTY)
1845
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1846
+ ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset));
1847
+ }
1848
+ if (!LJ_SOFTFP && (live & RSET_FPR)) {
1849
+ RegSet tmpset = as->freeset & ~live & allow & RSET_FPR;
1850
+ if (tmpset == RSET_EMPTY)
1851
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1852
+ ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset));
1853
+ }
1854
+ checkmclim(as);
1855
+ /* Continue with coalescing to fix up the broken cycle(s). */
1856
+ }
1857
+
1858
+ /* Inherit top stack slot already checked by parent trace. */
1859
+ as->T->topslot = as->parent->topslot;
1860
+ if (as->topslot > as->T->topslot) { /* Need to check for higher slot? */
1861
+ #ifdef EXITSTATE_CHECKEXIT
1862
+ /* Highest exit + 1 indicates stack check. */
1863
+ ExitNo exitno = as->T->nsnap;
1864
+ #else
1865
+ /* Reuse the parent exit in the context of the parent trace. */
1866
+ ExitNo exitno = as->J->exitno;
1867
+ #endif
1868
+ as->T->topslot = (uint8_t)as->topslot; /* Remember for child traces. */
1869
+ asm_stack_check(as, as->topslot, irp, allow & RSET_GPR, exitno);
1870
+ }
1871
+ }
1872
+
1873
+ /* -- Tail of trace ------------------------------------------------------- */
1874
+
1875
+ /* Get base slot for a snapshot. */
1876
+ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1877
+ {
1878
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
1879
+ MSize n;
1880
+ for (n = snap->nent; n > 0; n--) {
1881
+ SnapEntry sn = map[n-1];
1882
+ if ((sn & SNAP_FRAME)) {
1883
+ *gotframe = 1;
1884
+ return snap_slot(sn);
1885
+ }
1886
+ }
1887
+ return 0;
1888
+ }
1889
+
1890
+ /* Link to another trace. */
1891
+ static void asm_tail_link(ASMState *as)
1892
+ {
1893
+ SnapNo snapno = as->T->nsnap-1; /* Last snapshot. */
1894
+ SnapShot *snap = &as->T->snap[snapno];
1895
+ int gotframe = 0;
1896
+ BCReg baseslot = asm_baseslot(as, snap, &gotframe);
1897
+
1898
+ as->topslot = snap->topslot;
1899
+ checkmclim(as);
1900
+ ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
1901
+
1902
+ if (as->T->link == 0) {
1903
+ /* Setup fixed registers for exit to interpreter. */
1904
+ const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]);
1905
+ int32_t mres;
1906
+ if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1907
+ BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1908
+ if (bc_isret(bc_op(*retpc)))
1909
+ pc = retpc;
1910
+ }
1911
+ ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1912
+ ra_allockreg(as, i32ptr(pc), RID_LPC);
1913
+ mres = (int32_t)(snap->nslots - baseslot);
1914
+ switch (bc_op(*pc)) {
1915
+ case BC_CALLM: case BC_CALLMT:
1916
+ mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1917
+ case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1918
+ case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1919
+ default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
1920
+ }
1921
+ ra_allockreg(as, mres, RID_RET); /* Return MULTRES or 0. */
1922
+ } else if (baseslot) {
1923
+ /* Save modified BASE for linking to trace with higher start frame. */
1924
+ emit_setgl(as, RID_BASE, jit_base);
1925
+ }
1926
+ emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1927
+
1928
+ /* Sync the interpreter state with the on-trace state. */
1929
+ asm_stack_restore(as, snap);
1930
+
1931
+ /* Root traces that add frames need to check the stack at the end. */
1932
+ if (!as->parent && gotframe)
1933
+ asm_stack_check(as, as->topslot, NULL, as->freeset & RSET_GPR, snapno);
1934
+ }
1935
+
1936
+ /* -- Trace setup --------------------------------------------------------- */
1937
+
1938
+ /* Clear reg/sp for all instructions and add register hints. */
1939
+ static void asm_setup_regsp(ASMState *as)
1940
+ {
1941
+ GCtrace *T = as->T;
1942
+ int sink = T->sinktags;
1943
+ IRRef nins = T->nins;
1944
+ IRIns *ir, *lastir;
1945
+ int inloop;
1946
+ #if LJ_TARGET_ARM
1947
+ uint32_t rload = 0xa6402a64;
1948
+ #endif
1949
+
1950
+ ra_setup(as);
1951
+
1952
+ /* Clear reg/sp for constants. */
1953
+ for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++)
1954
+ ir->prev = REGSP_INIT;
1955
+
1956
+ /* REF_BASE is used for implicit references to the BASE register. */
1957
+ lastir->prev = REGSP_HINT(RID_BASE);
1958
+
1959
+ ir = IR(nins-1);
1960
+ if (ir->o == IR_RENAME) {
1961
+ do { ir--; nins--; } while (ir->o == IR_RENAME);
1962
+ T->nins = nins; /* Remove any renames left over from ASM restart. */
1963
+ }
1964
+ as->snaprename = nins;
1965
+ as->snapref = nins;
1966
+ as->snapno = T->nsnap;
1967
+
1968
+ as->stopins = REF_BASE;
1969
+ as->orignins = nins;
1970
+ as->curins = nins;
1971
+
1972
+ /* Setup register hints for parent link instructions. */
1973
+ ir = IR(REF_FIRST);
1974
+ if (as->parent) {
1975
+ uint16_t *p;
1976
+ lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir);
1977
+ if (lastir - ir > LJ_MAX_JSLOTS)
1978
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1979
+ as->stopins = (IRRef)((lastir-1) - as->ir);
1980
+ for (p = as->parentmap; ir < lastir; ir++) {
1981
+ RegSP rs = ir->prev;
1982
+ *p++ = (uint16_t)rs; /* Copy original parent RegSP to parentmap. */
1983
+ if (!ra_hasspill(regsp_spill(rs)))
1984
+ ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
1985
+ else
1986
+ ir->prev = REGSP_INIT;
1987
+ }
1988
+ }
1989
+
1990
+ inloop = 0;
1991
+ as->evenspill = SPS_FIRST;
1992
+ for (lastir = IR(nins); ir < lastir; ir++) {
1993
+ if (sink) {
1994
+ if (ir->r == RID_SINK)
1995
+ continue;
1996
+ if (ir->r == RID_SUNK) { /* Revert after ASM restart. */
1997
+ ir->r = RID_SINK;
1998
+ continue;
1999
+ }
2000
+ }
2001
+ switch (ir->o) {
2002
+ case IR_LOOP:
2003
+ inloop = 1;
2004
+ break;
2005
+ #if LJ_TARGET_ARM
2006
+ case IR_SLOAD:
2007
+ if (!((ir->op2 & IRSLOAD_TYPECHECK) || (ir+1)->o == IR_HIOP))
2008
+ break;
2009
+ /* fallthrough */
2010
+ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2011
+ if (!LJ_SOFTFP && irt_isnum(ir->t)) break;
2012
+ ir->prev = (uint16_t)REGSP_HINT((rload & 15));
2013
+ rload = lj_ror(rload, 4);
2014
+ continue;
2015
+ #endif
2016
+ case IR_CALLXS: {
2017
+ CCallInfo ci;
2018
+ ci.flags = asm_callx_flags(as, ir);
2019
+ ir->prev = asm_setup_call_slots(as, ir, &ci);
2020
+ if (inloop)
2021
+ as->modset |= RSET_SCRATCH;
2022
+ continue;
2023
+ }
2024
+ case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
2025
+ const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
2026
+ ir->prev = asm_setup_call_slots(as, ir, ci);
2027
+ if (inloop)
2028
+ as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
2029
+ (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
2030
+ continue;
2031
+ }
2032
+ #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
2033
+ case IR_HIOP:
2034
+ switch ((ir-1)->o) {
2035
+ #if LJ_SOFTFP && LJ_TARGET_ARM
2036
+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2037
+ if (ra_hashint((ir-1)->r)) {
2038
+ ir->prev = (ir-1)->prev + 1;
2039
+ continue;
2040
+ }
2041
+ break;
2042
+ #endif
2043
+ #if !LJ_SOFTFP && LJ_NEED_FP64
2044
+ case IR_CONV:
2045
+ if (irt_isfp((ir-1)->t)) {
2046
+ ir->prev = REGSP_HINT(RID_FPRET);
2047
+ continue;
2048
+ }
2049
+ /* fallthrough */
2050
+ #endif
2051
+ case IR_CALLN: case IR_CALLXS:
2052
+ #if LJ_SOFTFP
2053
+ case IR_MIN: case IR_MAX:
2054
+ #endif
2055
+ (ir-1)->prev = REGSP_HINT(RID_RETLO);
2056
+ ir->prev = REGSP_HINT(RID_RETHI);
2057
+ continue;
2058
+ default:
2059
+ break;
2060
+ }
2061
+ break;
2062
+ #endif
2063
+ #if LJ_SOFTFP
2064
+ case IR_MIN: case IR_MAX:
2065
+ if ((ir+1)->o != IR_HIOP) break;
2066
+ /* fallthrough */
2067
+ #endif
2068
+ /* C calls evict all scratch regs and return results in RID_RET. */
2069
+ case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
2070
+ if (REGARG_NUMGPR < 3 && as->evenspill < 3)
2071
+ as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
2072
+ #if LJ_TARGET_X86 && LJ_HASFFI
2073
+ if (0) {
2074
+ case IR_CNEW:
2075
+ if (ir->op2 != REF_NIL && as->evenspill < 4)
2076
+ as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2077
+ }
2078
+ #else
2079
+ case IR_CNEW:
2080
+ #endif
2081
+ case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2082
+ case IR_BUFSTR:
2083
+ ir->prev = REGSP_HINT(RID_RET);
2084
+ if (inloop)
2085
+ as->modset = RSET_SCRATCH;
2086
+ continue;
2087
+ case IR_STRTO: case IR_OBAR:
2088
+ if (inloop)
2089
+ as->modset = RSET_SCRATCH;
2090
+ break;
2091
+ #if !LJ_SOFTFP
2092
+ case IR_ATAN2:
2093
+ #if LJ_TARGET_X86
2094
+ if (as->evenspill < 4) /* Leave room to call atan2(). */
2095
+ as->evenspill = 4;
2096
+ #endif
2097
+ #if !LJ_TARGET_X86ORX64
2098
+ case IR_LDEXP:
2099
+ #endif
2100
+ #endif
2101
+ case IR_POW:
2102
+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
2103
+ if (inloop)
2104
+ as->modset |= RSET_SCRATCH;
2105
+ #if LJ_TARGET_X86
2106
+ break;
2107
+ #else
2108
+ ir->prev = REGSP_HINT(RID_FPRET);
2109
+ continue;
2110
+ #endif
2111
+ }
2112
+ /* fallthrough for integer POW */
2113
+ case IR_DIV: case IR_MOD:
2114
+ if (!irt_isnum(ir->t)) {
2115
+ ir->prev = REGSP_HINT(RID_RET);
2116
+ if (inloop)
2117
+ as->modset |= (RSET_SCRATCH & RSET_GPR);
2118
+ continue;
2119
+ }
2120
+ break;
2121
+ case IR_FPMATH:
2122
+ #if LJ_TARGET_X86ORX64
2123
+ if (ir->op2 <= IRFPM_TRUNC) {
2124
+ if (!(as->flags & JIT_F_SSE4_1)) {
2125
+ ir->prev = REGSP_HINT(RID_XMM0);
2126
+ if (inloop)
2127
+ as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2128
+ continue;
2129
+ }
2130
+ break;
2131
+ } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
2132
+ if (as->evenspill < 4) /* Leave room to call pow(). */
2133
+ as->evenspill = 4;
2134
+ }
2135
+ #endif
2136
+ if (inloop)
2137
+ as->modset |= RSET_SCRATCH;
2138
+ #if LJ_TARGET_X86
2139
+ break;
2140
+ #else
2141
+ ir->prev = REGSP_HINT(RID_FPRET);
2142
+ continue;
2143
+ #endif
2144
+ #if LJ_TARGET_X86ORX64
2145
+ /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
2146
+ case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
2147
+ if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
2148
+ IR(ir->op2)->r = REGSP_HINT(RID_ECX);
2149
+ if (inloop)
2150
+ rset_set(as->modset, RID_ECX);
2151
+ }
2152
+ break;
2153
+ #endif
2154
+ /* Do not propagate hints across type conversions or loads. */
2155
+ case IR_TOBIT:
2156
+ case IR_XLOAD:
2157
+ #if !LJ_TARGET_ARM
2158
+ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2159
+ #endif
2160
+ break;
2161
+ case IR_CONV:
2162
+ if (irt_isfp(ir->t) || (ir->op2 & IRCONV_SRCMASK) == IRT_NUM ||
2163
+ (ir->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
2164
+ break;
2165
+ /* fallthrough */
2166
+ default:
2167
+ /* Propagate hints across likely 'op reg, imm' or 'op reg'. */
2168
+ if (irref_isk(ir->op2) && !irref_isk(ir->op1) &&
2169
+ ra_hashint(regsp_reg(IR(ir->op1)->prev))) {
2170
+ ir->prev = IR(ir->op1)->prev;
2171
+ continue;
2172
+ }
2173
+ break;
2174
+ }
2175
+ ir->prev = REGSP_INIT;
2176
+ }
2177
+ if ((as->evenspill & 1))
2178
+ as->oddspill = as->evenspill++;
2179
+ else
2180
+ as->oddspill = 0;
2181
+ }
2182
+
2183
+ /* -- Assembler core ------------------------------------------------------ */
2184
+
2185
+ /* Assemble a trace. */
2186
+ void lj_asm_trace(jit_State *J, GCtrace *T)
2187
+ {
2188
+ ASMState as_;
2189
+ ASMState *as = &as_;
2190
+ MCode *origtop;
2191
+
2192
+ /* Ensure an initialized instruction beyond the last one for HIOP checks. */
2193
+ J->cur.nins = lj_ir_nextins(J);
2194
+ J->cur.ir[J->cur.nins].o = IR_NOP;
2195
+
2196
+ /* Setup initial state. Copy some fields to reduce indirections. */
2197
+ as->J = J;
2198
+ as->T = T;
2199
+ as->ir = T->ir;
2200
+ as->flags = J->flags;
2201
+ as->loopref = J->loopref;
2202
+ as->realign = NULL;
2203
+ as->loopinv = 0;
2204
+ as->parent = J->parent ? traceref(J, J->parent) : NULL;
2205
+
2206
+ /* Reserve MCode memory. */
2207
+ as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot);
2208
+ as->mcp = as->mctop;
2209
+ as->mclim = as->mcbot + MCLIM_REDZONE;
2210
+ asm_setup_target(as);
2211
+
2212
+ do {
2213
+ as->mcp = as->mctop;
2214
+ #ifdef LUA_USE_ASSERT
2215
+ as->mcp_prev = as->mcp;
2216
+ #endif
2217
+ as->curins = T->nins;
2218
+ RA_DBG_START();
2219
+ RA_DBGX((as, "===== STOP ====="));
2220
+
2221
+ /* General trace setup. Emit tail of trace. */
2222
+ asm_tail_prep(as);
2223
+ as->mcloop = NULL;
2224
+ as->flagmcp = NULL;
2225
+ as->topslot = 0;
2226
+ as->gcsteps = 0;
2227
+ as->sectref = as->loopref;
2228
+ as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
2229
+ asm_setup_regsp(as);
2230
+ if (!as->loopref)
2231
+ asm_tail_link(as);
2232
+
2233
+ /* Assemble a trace in linear backwards order. */
2234
+ for (as->curins--; as->curins > as->stopins; as->curins--) {
2235
+ IRIns *ir = IR(as->curins);
2236
+ lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */
2237
+ if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
2238
+ continue; /* Dead-code elimination can be soooo easy. */
2239
+ if (irt_isguard(ir->t))
2240
+ asm_snap_prep(as);
2241
+ RA_DBG_REF();
2242
+ checkmclim(as);
2243
+ asm_ir(as, ir);
2244
+ }
2245
+ } while (as->realign); /* Retry in case the MCode needs to be realigned. */
2246
+
2247
+ /* Emit head of trace. */
2248
+ RA_DBG_REF();
2249
+ checkmclim(as);
2250
+ if (as->gcsteps > 0) {
2251
+ as->curins = as->T->snap[0].ref;
2252
+ asm_snap_prep(as); /* The GC check is a guard. */
2253
+ asm_gc_check(as);
2254
+ }
2255
+ ra_evictk(as);
2256
+ if (as->parent)
2257
+ asm_head_side(as);
2258
+ else
2259
+ asm_head_root(as);
2260
+ asm_phi_fixup(as);
2261
+
2262
+ RA_DBGX((as, "===== START ===="));
2263
+ RA_DBG_FLUSH();
2264
+ if (as->freeset != RSET_ALL)
2265
+ lj_trace_err(as->J, LJ_TRERR_BADRA); /* Ouch! Should never happen. */
2266
+
2267
+ /* Set trace entry point before fixing up tail to allow link to self. */
2268
+ T->mcode = as->mcp;
2269
+ T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
2270
+ if (!as->loopref)
2271
+ asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
2272
+ T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
2273
+ lj_mcode_sync(T->mcode, origtop);
2274
+ }
2275
+
2276
+ #undef IR
2277
+
2278
+ #endif