immunio 0.15.4 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (454) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +0 -27
  3. data/ext/immunio/Rakefile +9 -0
  4. data/lib/immunio/plugins/active_record.rb +1 -1
  5. data/lib/immunio/plugins/active_record_relation.rb +1 -1
  6. data/lib/immunio/plugins/environment_reporter.rb +20 -0
  7. data/lib/immunio/rufus_lua_ext/ref.rb +1 -3
  8. data/lib/immunio/version.rb +1 -1
  9. data/lib/immunio/vm.rb +1 -2
  10. data/lua-hooks/Makefile +97 -0
  11. data/lua-hooks/ext/all.c +41 -52
  12. data/lua-hooks/ext/all.o +0 -0
  13. data/lua-hooks/ext/libinjection/libinjection_html5.o +0 -0
  14. data/lua-hooks/ext/libinjection/libinjection_sqli.o +0 -0
  15. data/lua-hooks/ext/libinjection/libinjection_xss.o +0 -0
  16. data/lua-hooks/ext/libinjection/lualib.c +2 -2
  17. data/lua-hooks/ext/lpeg/lpcap.c +2 -2
  18. data/lua-hooks/ext/lpeg/lpcap.o +0 -0
  19. data/lua-hooks/ext/lpeg/lpcode.c +2 -2
  20. data/lua-hooks/ext/lpeg/lpcode.h +1 -1
  21. data/lua-hooks/ext/lpeg/lpcode.o +0 -0
  22. data/lua-hooks/ext/lpeg/lpprint.o +0 -0
  23. data/lua-hooks/ext/lpeg/lptree.c +2 -2
  24. data/lua-hooks/ext/lpeg/lptypes.h +1 -1
  25. data/lua-hooks/ext/lpeg/lpvm.c +2 -2
  26. data/lua-hooks/ext/lpeg/lpvm.o +0 -0
  27. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +16 -3
  28. data/lua-hooks/ext/lua-snapshot/snapshot.c +14 -7
  29. data/lua-hooks/ext/luajit/COPYRIGHT +56 -0
  30. data/lua-hooks/ext/luajit/Makefile +159 -0
  31. data/lua-hooks/ext/luajit/README +16 -0
  32. data/lua-hooks/ext/luajit/doc/bluequad-print.css +166 -0
  33. data/lua-hooks/ext/luajit/doc/bluequad.css +325 -0
  34. data/lua-hooks/ext/luajit/doc/changes.html +804 -0
  35. data/lua-hooks/ext/luajit/doc/contact.html +104 -0
  36. data/lua-hooks/ext/luajit/doc/ext_c_api.html +189 -0
  37. data/lua-hooks/ext/luajit/doc/ext_ffi.html +332 -0
  38. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +570 -0
  39. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +1261 -0
  40. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +603 -0
  41. data/lua-hooks/ext/luajit/doc/ext_jit.html +201 -0
  42. data/lua-hooks/ext/luajit/doc/ext_profiler.html +365 -0
  43. data/lua-hooks/ext/luajit/doc/extensions.html +448 -0
  44. data/lua-hooks/ext/luajit/doc/faq.html +186 -0
  45. data/lua-hooks/ext/luajit/doc/img/contact.png +0 -0
  46. data/lua-hooks/ext/luajit/doc/install.html +659 -0
  47. data/lua-hooks/ext/luajit/doc/luajit.html +236 -0
  48. data/lua-hooks/ext/luajit/doc/running.html +309 -0
  49. data/lua-hooks/ext/luajit/doc/status.html +118 -0
  50. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +456 -0
  51. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1125 -0
  52. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +518 -0
  53. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1166 -0
  54. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +416 -0
  55. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +953 -0
  56. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +419 -0
  57. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1919 -0
  58. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +83 -0
  59. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +12 -0
  60. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +471 -0
  61. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +1945 -0
  62. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +1094 -0
  63. data/lua-hooks/ext/luajit/etc/luajit.1 +88 -0
  64. data/lua-hooks/ext/luajit/etc/luajit.pc +25 -0
  65. data/lua-hooks/ext/luajit/src/Makefile +697 -0
  66. data/lua-hooks/ext/luajit/src/Makefile.dep +244 -0
  67. data/lua-hooks/ext/luajit/src/host/README +4 -0
  68. data/lua-hooks/ext/luajit/src/host/buildvm +0 -0
  69. data/lua-hooks/ext/luajit/src/host/buildvm.c +518 -0
  70. data/lua-hooks/ext/luajit/src/host/buildvm.h +105 -0
  71. data/lua-hooks/ext/luajit/src/host/buildvm.o +0 -0
  72. data/lua-hooks/ext/luajit/src/host/buildvm_arch.h +7449 -0
  73. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +345 -0
  74. data/lua-hooks/ext/luajit/src/host/buildvm_asm.o +0 -0
  75. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +229 -0
  76. data/lua-hooks/ext/luajit/src/host/buildvm_fold.o +0 -0
  77. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +457 -0
  78. data/lua-hooks/ext/luajit/src/host/buildvm_lib.o +0 -0
  79. data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +45 -0
  80. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +368 -0
  81. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.o +0 -0
  82. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +197 -0
  83. data/lua-hooks/ext/luajit/src/host/genminilua.lua +428 -0
  84. data/lua-hooks/ext/luajit/src/host/minilua +0 -0
  85. data/lua-hooks/ext/luajit/src/host/minilua.c +7770 -0
  86. data/lua-hooks/ext/luajit/src/host/minilua.o +0 -0
  87. data/lua-hooks/ext/luajit/src/jit/bc.lua +190 -0
  88. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +661 -0
  89. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +689 -0
  90. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +428 -0
  91. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +17 -0
  92. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +591 -0
  93. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +17 -0
  94. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +838 -0
  95. data/lua-hooks/ext/luajit/src/jit/dump.lua +706 -0
  96. data/lua-hooks/ext/luajit/src/jit/p.lua +310 -0
  97. data/lua-hooks/ext/luajit/src/jit/v.lua +170 -0
  98. data/lua-hooks/ext/luajit/src/jit/vmdef.lua +362 -0
  99. data/lua-hooks/ext/luajit/src/jit/zone.lua +45 -0
  100. data/lua-hooks/ext/{lua → luajit/src}/lauxlib.h +10 -17
  101. data/lua-hooks/ext/luajit/src/lib_aux.c +356 -0
  102. data/lua-hooks/ext/luajit/src/lib_aux.o +0 -0
  103. data/lua-hooks/ext/luajit/src/lib_aux_dyn.o +0 -0
  104. data/lua-hooks/ext/luajit/src/lib_base.c +664 -0
  105. data/lua-hooks/ext/luajit/src/lib_base.o +0 -0
  106. data/lua-hooks/ext/luajit/src/lib_base_dyn.o +0 -0
  107. data/lua-hooks/ext/luajit/src/lib_bit.c +180 -0
  108. data/lua-hooks/ext/luajit/src/lib_bit.o +0 -0
  109. data/lua-hooks/ext/luajit/src/lib_bit_dyn.o +0 -0
  110. data/lua-hooks/ext/luajit/src/lib_debug.c +405 -0
  111. data/lua-hooks/ext/luajit/src/lib_debug.o +0 -0
  112. data/lua-hooks/ext/luajit/src/lib_debug_dyn.o +0 -0
  113. data/lua-hooks/ext/luajit/src/lib_ffi.c +872 -0
  114. data/lua-hooks/ext/luajit/src/lib_ffi.o +0 -0
  115. data/lua-hooks/ext/luajit/src/lib_ffi_dyn.o +0 -0
  116. data/lua-hooks/ext/luajit/src/lib_init.c +55 -0
  117. data/lua-hooks/ext/luajit/src/lib_init.o +0 -0
  118. data/lua-hooks/ext/luajit/src/lib_init_dyn.o +0 -0
  119. data/lua-hooks/ext/luajit/src/lib_io.c +541 -0
  120. data/lua-hooks/ext/luajit/src/lib_io.o +0 -0
  121. data/lua-hooks/ext/luajit/src/lib_io_dyn.o +0 -0
  122. data/lua-hooks/ext/luajit/src/lib_jit.c +767 -0
  123. data/lua-hooks/ext/luajit/src/lib_jit.o +0 -0
  124. data/lua-hooks/ext/luajit/src/lib_jit_dyn.o +0 -0
  125. data/lua-hooks/ext/luajit/src/lib_math.c +230 -0
  126. data/lua-hooks/ext/luajit/src/lib_math.o +0 -0
  127. data/lua-hooks/ext/luajit/src/lib_math_dyn.o +0 -0
  128. data/lua-hooks/ext/luajit/src/lib_os.c +292 -0
  129. data/lua-hooks/ext/luajit/src/lib_os.o +0 -0
  130. data/lua-hooks/ext/luajit/src/lib_os_dyn.o +0 -0
  131. data/lua-hooks/ext/luajit/src/lib_package.c +610 -0
  132. data/lua-hooks/ext/luajit/src/lib_package.o +0 -0
  133. data/lua-hooks/ext/luajit/src/lib_package_dyn.o +0 -0
  134. data/lua-hooks/ext/luajit/src/lib_string.c +752 -0
  135. data/lua-hooks/ext/luajit/src/lib_string.o +0 -0
  136. data/lua-hooks/ext/luajit/src/lib_string_dyn.o +0 -0
  137. data/lua-hooks/ext/luajit/src/lib_table.c +307 -0
  138. data/lua-hooks/ext/luajit/src/lib_table.o +0 -0
  139. data/lua-hooks/ext/luajit/src/lib_table_dyn.o +0 -0
  140. data/lua-hooks/ext/luajit/src/libluajit.a +0 -0
  141. data/lua-hooks/ext/luajit/src/libluajit.so +0 -0
  142. data/lua-hooks/ext/luajit/src/lj.supp +26 -0
  143. data/lua-hooks/ext/luajit/src/lj_alloc.c +1398 -0
  144. data/lua-hooks/ext/luajit/src/lj_alloc.h +17 -0
  145. data/lua-hooks/ext/luajit/src/lj_alloc.o +0 -0
  146. data/lua-hooks/ext/luajit/src/lj_alloc_dyn.o +0 -0
  147. data/lua-hooks/ext/luajit/src/lj_api.c +1210 -0
  148. data/lua-hooks/ext/luajit/src/lj_api.o +0 -0
  149. data/lua-hooks/ext/luajit/src/lj_api_dyn.o +0 -0
  150. data/lua-hooks/ext/luajit/src/lj_arch.h +509 -0
  151. data/lua-hooks/ext/luajit/src/lj_asm.c +2278 -0
  152. data/lua-hooks/ext/luajit/src/lj_asm.h +17 -0
  153. data/lua-hooks/ext/luajit/src/lj_asm.o +0 -0
  154. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +2217 -0
  155. data/lua-hooks/ext/luajit/src/lj_asm_dyn.o +0 -0
  156. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +1833 -0
  157. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2015 -0
  158. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2634 -0
  159. data/lua-hooks/ext/luajit/src/lj_bc.c +14 -0
  160. data/lua-hooks/ext/luajit/src/lj_bc.h +265 -0
  161. data/lua-hooks/ext/luajit/src/lj_bc.o +0 -0
  162. data/lua-hooks/ext/luajit/src/lj_bc_dyn.o +0 -0
  163. data/lua-hooks/ext/luajit/src/lj_bcdef.h +220 -0
  164. data/lua-hooks/ext/luajit/src/lj_bcdump.h +68 -0
  165. data/lua-hooks/ext/luajit/src/lj_bcread.c +457 -0
  166. data/lua-hooks/ext/luajit/src/lj_bcread.o +0 -0
  167. data/lua-hooks/ext/luajit/src/lj_bcread_dyn.o +0 -0
  168. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +361 -0
  169. data/lua-hooks/ext/luajit/src/lj_bcwrite.o +0 -0
  170. data/lua-hooks/ext/luajit/src/lj_bcwrite_dyn.o +0 -0
  171. data/lua-hooks/ext/luajit/src/lj_buf.c +234 -0
  172. data/lua-hooks/ext/luajit/src/lj_buf.h +105 -0
  173. data/lua-hooks/ext/luajit/src/lj_buf.o +0 -0
  174. data/lua-hooks/ext/luajit/src/lj_buf_dyn.o +0 -0
  175. data/lua-hooks/ext/luajit/src/lj_carith.c +429 -0
  176. data/lua-hooks/ext/luajit/src/lj_carith.h +37 -0
  177. data/lua-hooks/ext/luajit/src/lj_carith.o +0 -0
  178. data/lua-hooks/ext/luajit/src/lj_carith_dyn.o +0 -0
  179. data/lua-hooks/ext/luajit/src/lj_ccall.c +984 -0
  180. data/lua-hooks/ext/luajit/src/lj_ccall.h +178 -0
  181. data/lua-hooks/ext/luajit/src/lj_ccall.o +0 -0
  182. data/lua-hooks/ext/luajit/src/lj_ccall_dyn.o +0 -0
  183. data/lua-hooks/ext/luajit/src/lj_ccallback.c +712 -0
  184. data/lua-hooks/ext/luajit/src/lj_ccallback.h +25 -0
  185. data/lua-hooks/ext/luajit/src/lj_ccallback.o +0 -0
  186. data/lua-hooks/ext/luajit/src/lj_ccallback_dyn.o +0 -0
  187. data/lua-hooks/ext/luajit/src/lj_cconv.c +752 -0
  188. data/lua-hooks/ext/luajit/src/lj_cconv.h +70 -0
  189. data/lua-hooks/ext/luajit/src/lj_cconv.o +0 -0
  190. data/lua-hooks/ext/luajit/src/lj_cconv_dyn.o +0 -0
  191. data/lua-hooks/ext/luajit/src/lj_cdata.c +288 -0
  192. data/lua-hooks/ext/luajit/src/lj_cdata.h +76 -0
  193. data/lua-hooks/ext/luajit/src/lj_cdata.o +0 -0
  194. data/lua-hooks/ext/luajit/src/lj_cdata_dyn.o +0 -0
  195. data/lua-hooks/ext/luajit/src/lj_char.c +43 -0
  196. data/lua-hooks/ext/luajit/src/lj_char.h +42 -0
  197. data/lua-hooks/ext/luajit/src/lj_char.o +0 -0
  198. data/lua-hooks/ext/luajit/src/lj_char_dyn.o +0 -0
  199. data/lua-hooks/ext/luajit/src/lj_clib.c +418 -0
  200. data/lua-hooks/ext/luajit/src/lj_clib.h +29 -0
  201. data/lua-hooks/ext/luajit/src/lj_clib.o +0 -0
  202. data/lua-hooks/ext/luajit/src/lj_clib_dyn.o +0 -0
  203. data/lua-hooks/ext/luajit/src/lj_cparse.c +1862 -0
  204. data/lua-hooks/ext/luajit/src/lj_cparse.h +65 -0
  205. data/lua-hooks/ext/luajit/src/lj_cparse.o +0 -0
  206. data/lua-hooks/ext/luajit/src/lj_cparse_dyn.o +0 -0
  207. data/lua-hooks/ext/luajit/src/lj_crecord.c +1834 -0
  208. data/lua-hooks/ext/luajit/src/lj_crecord.h +38 -0
  209. data/lua-hooks/ext/luajit/src/lj_crecord.o +0 -0
  210. data/lua-hooks/ext/luajit/src/lj_crecord_dyn.o +0 -0
  211. data/lua-hooks/ext/luajit/src/lj_ctype.c +635 -0
  212. data/lua-hooks/ext/luajit/src/lj_ctype.h +461 -0
  213. data/lua-hooks/ext/luajit/src/lj_ctype.o +0 -0
  214. data/lua-hooks/ext/luajit/src/lj_ctype_dyn.o +0 -0
  215. data/lua-hooks/ext/luajit/src/lj_debug.c +699 -0
  216. data/lua-hooks/ext/luajit/src/lj_debug.h +65 -0
  217. data/lua-hooks/ext/luajit/src/lj_debug.o +0 -0
  218. data/lua-hooks/ext/luajit/src/lj_debug_dyn.o +0 -0
  219. data/lua-hooks/ext/luajit/src/lj_def.h +365 -0
  220. data/lua-hooks/ext/luajit/src/lj_dispatch.c +557 -0
  221. data/lua-hooks/ext/luajit/src/lj_dispatch.h +138 -0
  222. data/lua-hooks/ext/luajit/src/lj_dispatch.o +0 -0
  223. data/lua-hooks/ext/luajit/src/lj_dispatch_dyn.o +0 -0
  224. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +356 -0
  225. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +211 -0
  226. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +238 -0
  227. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +462 -0
  228. data/lua-hooks/ext/luajit/src/lj_err.c +794 -0
  229. data/lua-hooks/ext/luajit/src/lj_err.h +41 -0
  230. data/lua-hooks/ext/luajit/src/lj_err.o +0 -0
  231. data/lua-hooks/ext/luajit/src/lj_err_dyn.o +0 -0
  232. data/lua-hooks/ext/luajit/src/lj_errmsg.h +190 -0
  233. data/lua-hooks/ext/luajit/src/lj_ff.h +18 -0
  234. data/lua-hooks/ext/luajit/src/lj_ffdef.h +209 -0
  235. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +1247 -0
  236. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +24 -0
  237. data/lua-hooks/ext/luajit/src/lj_ffrecord.o +0 -0
  238. data/lua-hooks/ext/luajit/src/lj_ffrecord_dyn.o +0 -0
  239. data/lua-hooks/ext/luajit/src/lj_folddef.h +1138 -0
  240. data/lua-hooks/ext/luajit/src/lj_frame.h +259 -0
  241. data/lua-hooks/ext/luajit/src/lj_func.c +185 -0
  242. data/lua-hooks/ext/luajit/src/lj_func.h +24 -0
  243. data/lua-hooks/ext/luajit/src/lj_func.o +0 -0
  244. data/lua-hooks/ext/luajit/src/lj_func_dyn.o +0 -0
  245. data/lua-hooks/ext/luajit/src/lj_gc.c +845 -0
  246. data/lua-hooks/ext/luajit/src/lj_gc.h +134 -0
  247. data/lua-hooks/ext/luajit/src/lj_gc.o +0 -0
  248. data/lua-hooks/ext/luajit/src/lj_gc_dyn.o +0 -0
  249. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +787 -0
  250. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +22 -0
  251. data/lua-hooks/ext/luajit/src/lj_gdbjit.o +0 -0
  252. data/lua-hooks/ext/luajit/src/lj_gdbjit_dyn.o +0 -0
  253. data/lua-hooks/ext/luajit/src/lj_ir.c +505 -0
  254. data/lua-hooks/ext/luajit/src/lj_ir.h +577 -0
  255. data/lua-hooks/ext/luajit/src/lj_ir.o +0 -0
  256. data/lua-hooks/ext/luajit/src/lj_ir_dyn.o +0 -0
  257. data/lua-hooks/ext/luajit/src/lj_ircall.h +321 -0
  258. data/lua-hooks/ext/luajit/src/lj_iropt.h +161 -0
  259. data/lua-hooks/ext/luajit/src/lj_jit.h +440 -0
  260. data/lua-hooks/ext/luajit/src/lj_lex.c +482 -0
  261. data/lua-hooks/ext/luajit/src/lj_lex.h +86 -0
  262. data/lua-hooks/ext/luajit/src/lj_lex.o +0 -0
  263. data/lua-hooks/ext/luajit/src/lj_lex_dyn.o +0 -0
  264. data/lua-hooks/ext/luajit/src/lj_lib.c +303 -0
  265. data/lua-hooks/ext/luajit/src/lj_lib.h +115 -0
  266. data/lua-hooks/ext/luajit/src/lj_lib.o +0 -0
  267. data/lua-hooks/ext/luajit/src/lj_lib_dyn.o +0 -0
  268. data/lua-hooks/ext/luajit/src/lj_libdef.h +414 -0
  269. data/lua-hooks/ext/luajit/src/lj_load.c +168 -0
  270. data/lua-hooks/ext/luajit/src/lj_load.o +0 -0
  271. data/lua-hooks/ext/luajit/src/lj_load_dyn.o +0 -0
  272. data/lua-hooks/ext/luajit/src/lj_mcode.c +386 -0
  273. data/lua-hooks/ext/luajit/src/lj_mcode.h +30 -0
  274. data/lua-hooks/ext/luajit/src/lj_mcode.o +0 -0
  275. data/lua-hooks/ext/luajit/src/lj_mcode_dyn.o +0 -0
  276. data/lua-hooks/ext/luajit/src/lj_meta.c +477 -0
  277. data/lua-hooks/ext/luajit/src/lj_meta.h +38 -0
  278. data/lua-hooks/ext/luajit/src/lj_meta.o +0 -0
  279. data/lua-hooks/ext/luajit/src/lj_meta_dyn.o +0 -0
  280. data/lua-hooks/ext/luajit/src/lj_obj.c +50 -0
  281. data/lua-hooks/ext/luajit/src/lj_obj.h +976 -0
  282. data/lua-hooks/ext/luajit/src/lj_obj.o +0 -0
  283. data/lua-hooks/ext/luajit/src/lj_obj_dyn.o +0 -0
  284. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +78 -0
  285. data/lua-hooks/ext/luajit/src/lj_opt_dce.o +0 -0
  286. data/lua-hooks/ext/luajit/src/lj_opt_dce_dyn.o +0 -0
  287. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +2488 -0
  288. data/lua-hooks/ext/luajit/src/lj_opt_fold.o +0 -0
  289. data/lua-hooks/ext/luajit/src/lj_opt_fold_dyn.o +0 -0
  290. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +449 -0
  291. data/lua-hooks/ext/luajit/src/lj_opt_loop.o +0 -0
  292. data/lua-hooks/ext/luajit/src/lj_opt_loop_dyn.o +0 -0
  293. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +935 -0
  294. data/lua-hooks/ext/luajit/src/lj_opt_mem.o +0 -0
  295. data/lua-hooks/ext/luajit/src/lj_opt_mem_dyn.o +0 -0
  296. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +652 -0
  297. data/lua-hooks/ext/luajit/src/lj_opt_narrow.o +0 -0
  298. data/lua-hooks/ext/luajit/src/lj_opt_narrow_dyn.o +0 -0
  299. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +245 -0
  300. data/lua-hooks/ext/luajit/src/lj_opt_sink.o +0 -0
  301. data/lua-hooks/ext/luajit/src/lj_opt_sink_dyn.o +0 -0
  302. data/lua-hooks/ext/luajit/src/lj_opt_split.c +856 -0
  303. data/lua-hooks/ext/luajit/src/lj_opt_split.o +0 -0
  304. data/lua-hooks/ext/luajit/src/lj_opt_split_dyn.o +0 -0
  305. data/lua-hooks/ext/luajit/src/lj_parse.c +2725 -0
  306. data/lua-hooks/ext/luajit/src/lj_parse.h +18 -0
  307. data/lua-hooks/ext/luajit/src/lj_parse.o +0 -0
  308. data/lua-hooks/ext/luajit/src/lj_parse_dyn.o +0 -0
  309. data/lua-hooks/ext/luajit/src/lj_profile.c +368 -0
  310. data/lua-hooks/ext/luajit/src/lj_profile.h +21 -0
  311. data/lua-hooks/ext/luajit/src/lj_profile.o +0 -0
  312. data/lua-hooks/ext/luajit/src/lj_profile_dyn.o +0 -0
  313. data/lua-hooks/ext/luajit/src/lj_recdef.h +270 -0
  314. data/lua-hooks/ext/luajit/src/lj_record.c +2554 -0
  315. data/lua-hooks/ext/luajit/src/lj_record.h +45 -0
  316. data/lua-hooks/ext/luajit/src/lj_record.o +0 -0
  317. data/lua-hooks/ext/luajit/src/lj_record_dyn.o +0 -0
  318. data/lua-hooks/ext/luajit/src/lj_snap.c +870 -0
  319. data/lua-hooks/ext/luajit/src/lj_snap.h +34 -0
  320. data/lua-hooks/ext/luajit/src/lj_snap.o +0 -0
  321. data/lua-hooks/ext/luajit/src/lj_snap_dyn.o +0 -0
  322. data/lua-hooks/ext/luajit/src/lj_state.c +300 -0
  323. data/lua-hooks/ext/luajit/src/lj_state.h +35 -0
  324. data/lua-hooks/ext/luajit/src/lj_state.o +0 -0
  325. data/lua-hooks/ext/luajit/src/lj_state_dyn.o +0 -0
  326. data/lua-hooks/ext/luajit/src/lj_str.c +197 -0
  327. data/lua-hooks/ext/luajit/src/lj_str.h +27 -0
  328. data/lua-hooks/ext/luajit/src/lj_str.o +0 -0
  329. data/lua-hooks/ext/luajit/src/lj_str_dyn.o +0 -0
  330. data/lua-hooks/ext/luajit/src/lj_strfmt.c +554 -0
  331. data/lua-hooks/ext/luajit/src/lj_strfmt.h +125 -0
  332. data/lua-hooks/ext/luajit/src/lj_strfmt.o +0 -0
  333. data/lua-hooks/ext/luajit/src/lj_strfmt_dyn.o +0 -0
  334. data/lua-hooks/ext/luajit/src/lj_strscan.c +547 -0
  335. data/lua-hooks/ext/luajit/src/lj_strscan.h +39 -0
  336. data/lua-hooks/ext/luajit/src/lj_strscan.o +0 -0
  337. data/lua-hooks/ext/luajit/src/lj_strscan_dyn.o +0 -0
  338. data/lua-hooks/ext/luajit/src/lj_tab.c +666 -0
  339. data/lua-hooks/ext/luajit/src/lj_tab.h +73 -0
  340. data/lua-hooks/ext/luajit/src/lj_tab.o +0 -0
  341. data/lua-hooks/ext/luajit/src/lj_tab_dyn.o +0 -0
  342. data/lua-hooks/ext/luajit/src/lj_target.h +164 -0
  343. data/lua-hooks/ext/luajit/src/lj_target_arm.h +270 -0
  344. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +97 -0
  345. data/lua-hooks/ext/luajit/src/lj_target_mips.h +260 -0
  346. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +280 -0
  347. data/lua-hooks/ext/luajit/src/lj_target_x86.h +345 -0
  348. data/lua-hooks/ext/luajit/src/lj_trace.c +859 -0
  349. data/lua-hooks/ext/luajit/src/lj_trace.h +54 -0
  350. data/lua-hooks/ext/luajit/src/lj_trace.o +0 -0
  351. data/lua-hooks/ext/luajit/src/lj_trace_dyn.o +0 -0
  352. data/lua-hooks/ext/luajit/src/lj_traceerr.h +63 -0
  353. data/lua-hooks/ext/luajit/src/lj_udata.c +34 -0
  354. data/lua-hooks/ext/luajit/src/lj_udata.h +14 -0
  355. data/lua-hooks/ext/luajit/src/lj_udata.o +0 -0
  356. data/lua-hooks/ext/luajit/src/lj_udata_dyn.o +0 -0
  357. data/lua-hooks/ext/luajit/src/lj_vm.S +2730 -0
  358. data/lua-hooks/ext/luajit/src/lj_vm.h +114 -0
  359. data/lua-hooks/ext/luajit/src/lj_vm.o +0 -0
  360. data/lua-hooks/ext/luajit/src/lj_vm_dyn.o +0 -0
  361. data/lua-hooks/ext/luajit/src/lj_vmevent.c +58 -0
  362. data/lua-hooks/ext/luajit/src/lj_vmevent.h +59 -0
  363. data/lua-hooks/ext/luajit/src/lj_vmevent.o +0 -0
  364. data/lua-hooks/ext/luajit/src/lj_vmevent_dyn.o +0 -0
  365. data/lua-hooks/ext/luajit/src/lj_vmmath.c +152 -0
  366. data/lua-hooks/ext/luajit/src/lj_vmmath.o +0 -0
  367. data/lua-hooks/ext/luajit/src/lj_vmmath_dyn.o +0 -0
  368. data/lua-hooks/ext/luajit/src/ljamalg.c +96 -0
  369. data/lua-hooks/ext/{lua → luajit/src}/lua.h +12 -7
  370. data/lua-hooks/ext/luajit/src/lua.hpp +9 -0
  371. data/lua-hooks/ext/luajit/src/luaconf.h +156 -0
  372. data/lua-hooks/ext/luajit/src/luajit +0 -0
  373. data/lua-hooks/ext/luajit/src/luajit.c +570 -0
  374. data/lua-hooks/ext/luajit/src/luajit.h +79 -0
  375. data/lua-hooks/ext/luajit/src/luajit.o +0 -0
  376. data/lua-hooks/ext/luajit/src/lualib.h +43 -0
  377. data/lua-hooks/ext/luajit/src/msvcbuild.bat +114 -0
  378. data/lua-hooks/ext/luajit/src/ps4build.bat +103 -0
  379. data/lua-hooks/ext/luajit/src/psvitabuild.bat +93 -0
  380. data/lua-hooks/ext/luajit/src/vm_arm.dasc +4585 -0
  381. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +3764 -0
  382. data/lua-hooks/ext/luajit/src/vm_mips.dasc +4355 -0
  383. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +5252 -0
  384. data/lua-hooks/ext/luajit/src/vm_x64.dasc +4902 -0
  385. data/lua-hooks/ext/luajit/src/vm_x86.dasc +5710 -0
  386. data/lua-hooks/ext/luajit/src/xb1build.bat +101 -0
  387. data/lua-hooks/ext/luajit/src/xedkbuild.bat +92 -0
  388. data/lua-hooks/ext/luautf8/lutf8lib.c +3 -3
  389. data/lua-hooks/lib/boot.lua +37 -2
  390. metadata +372 -69
  391. data/lua-hooks/ext/bitop/README +0 -22
  392. data/lua-hooks/ext/bitop/bit.c +0 -189
  393. data/lua-hooks/ext/extconf.rb +0 -38
  394. data/lua-hooks/ext/lua/COPYRIGHT +0 -34
  395. data/lua-hooks/ext/lua/lapi.c +0 -1087
  396. data/lua-hooks/ext/lua/lapi.h +0 -16
  397. data/lua-hooks/ext/lua/lauxlib.c +0 -652
  398. data/lua-hooks/ext/lua/lbaselib.c +0 -659
  399. data/lua-hooks/ext/lua/lcode.c +0 -831
  400. data/lua-hooks/ext/lua/lcode.h +0 -76
  401. data/lua-hooks/ext/lua/ldblib.c +0 -398
  402. data/lua-hooks/ext/lua/ldebug.c +0 -638
  403. data/lua-hooks/ext/lua/ldebug.h +0 -33
  404. data/lua-hooks/ext/lua/ldo.c +0 -519
  405. data/lua-hooks/ext/lua/ldo.h +0 -57
  406. data/lua-hooks/ext/lua/ldump.c +0 -164
  407. data/lua-hooks/ext/lua/lfunc.c +0 -174
  408. data/lua-hooks/ext/lua/lfunc.h +0 -34
  409. data/lua-hooks/ext/lua/lgc.c +0 -710
  410. data/lua-hooks/ext/lua/lgc.h +0 -110
  411. data/lua-hooks/ext/lua/linit.c +0 -38
  412. data/lua-hooks/ext/lua/liolib.c +0 -556
  413. data/lua-hooks/ext/lua/llex.c +0 -463
  414. data/lua-hooks/ext/lua/llex.h +0 -81
  415. data/lua-hooks/ext/lua/llimits.h +0 -128
  416. data/lua-hooks/ext/lua/lmathlib.c +0 -263
  417. data/lua-hooks/ext/lua/lmem.c +0 -86
  418. data/lua-hooks/ext/lua/lmem.h +0 -49
  419. data/lua-hooks/ext/lua/loadlib.c +0 -705
  420. data/lua-hooks/ext/lua/loadlib_rel.c +0 -760
  421. data/lua-hooks/ext/lua/lobject.c +0 -214
  422. data/lua-hooks/ext/lua/lobject.h +0 -381
  423. data/lua-hooks/ext/lua/lopcodes.c +0 -102
  424. data/lua-hooks/ext/lua/lopcodes.h +0 -268
  425. data/lua-hooks/ext/lua/loslib.c +0 -243
  426. data/lua-hooks/ext/lua/lparser.c +0 -1339
  427. data/lua-hooks/ext/lua/lparser.h +0 -82
  428. data/lua-hooks/ext/lua/lstate.c +0 -214
  429. data/lua-hooks/ext/lua/lstate.h +0 -169
  430. data/lua-hooks/ext/lua/lstring.c +0 -111
  431. data/lua-hooks/ext/lua/lstring.h +0 -31
  432. data/lua-hooks/ext/lua/lstrlib.c +0 -871
  433. data/lua-hooks/ext/lua/ltable.c +0 -588
  434. data/lua-hooks/ext/lua/ltable.h +0 -40
  435. data/lua-hooks/ext/lua/ltablib.c +0 -287
  436. data/lua-hooks/ext/lua/ltm.c +0 -75
  437. data/lua-hooks/ext/lua/ltm.h +0 -54
  438. data/lua-hooks/ext/lua/lua.c +0 -392
  439. data/lua-hooks/ext/lua/lua.def +0 -131
  440. data/lua-hooks/ext/lua/lua.rc +0 -28
  441. data/lua-hooks/ext/lua/lua_dll.rc +0 -26
  442. data/lua-hooks/ext/lua/luac.c +0 -200
  443. data/lua-hooks/ext/lua/luac.rc +0 -1
  444. data/lua-hooks/ext/lua/luaconf.h +0 -763
  445. data/lua-hooks/ext/lua/luaconf.h.in +0 -724
  446. data/lua-hooks/ext/lua/luaconf.h.orig +0 -763
  447. data/lua-hooks/ext/lua/lualib.h +0 -53
  448. data/lua-hooks/ext/lua/lundump.c +0 -227
  449. data/lua-hooks/ext/lua/lundump.h +0 -36
  450. data/lua-hooks/ext/lua/lvm.c +0 -767
  451. data/lua-hooks/ext/lua/lvm.h +0 -36
  452. data/lua-hooks/ext/lua/lzio.c +0 -82
  453. data/lua-hooks/ext/lua/lzio.h +0 -67
  454. data/lua-hooks/ext/lua/print.c +0 -227
@@ -0,0 +1,2634 @@
1
+ /*
2
+ ** x86/x64 IR assembler (SSA IR -> machine code).
3
+ ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4
+ */
5
+
6
+ /* -- Guard handling ------------------------------------------------------ */
7
+
8
+ /* Generate an exit stub group at the bottom of the reserved MCode memory. */
9
+ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
10
+ {
11
+ ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff;
12
+ MCode *mxp = as->mcbot;
13
+ MCode *mxpstart = mxp;
14
+ if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop)
15
+ asm_mclimit(as);
16
+ /* Push low byte of exitno for each exit stub. */
17
+ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs;
18
+ for (i = 1; i < EXITSTUBS_PER_GROUP; i++) {
19
+ *mxp++ = XI_JMPs; *mxp++ = (MCode)((2+2)*(EXITSTUBS_PER_GROUP - i) - 2);
20
+ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)(groupofs + i);
21
+ }
22
+ /* Push the high byte of the exitno for each exit stub group. */
23
+ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
24
+ /* Store DISPATCH at original stack slot 0. Account for the two push ops. */
25
+ *mxp++ = XI_MOVmi;
26
+ *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
27
+ *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
28
+ *mxp++ = 2*sizeof(void *);
29
+ *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
30
+ /* Jump to exit handler which fills in the ExitState. */
31
+ *mxp++ = XI_JMP; mxp += 4;
32
+ *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
33
+ /* Commit the code for this group (even if assembly fails later on). */
34
+ lj_mcode_commitbot(as->J, mxp);
35
+ as->mcbot = mxp;
36
+ as->mclim = as->mcbot + MCLIM_REDZONE;
37
+ return mxpstart;
38
+ }
39
+
40
+ /* Setup all needed exit stubs. */
41
+ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
42
+ {
43
+ ExitNo i;
44
+ if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
45
+ lj_trace_err(as->J, LJ_TRERR_SNAPOV);
46
+ for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
47
+ if (as->J->exitstubgroup[i] == NULL)
48
+ as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
49
+ }
50
+
51
+ /* Emit conditional branch to exit for guard.
52
+ ** It's important to emit this *after* all registers have been allocated,
53
+ ** because rematerializations may invalidate the flags.
54
+ */
55
+ static void asm_guardcc(ASMState *as, int cc)
56
+ {
57
+ MCode *target = exitstub_addr(as->J, as->snapno);
58
+ MCode *p = as->mcp;
59
+ if (LJ_UNLIKELY(p == as->invmcp)) {
60
+ as->loopinv = 1;
61
+ *(int32_t *)(p+1) = jmprel(p+5, target);
62
+ target = p;
63
+ cc ^= 1;
64
+ if (as->realign) {
65
+ emit_sjcc(as, cc, target);
66
+ return;
67
+ }
68
+ }
69
+ emit_jcc(as, cc, target);
70
+ }
71
+
72
+ /* -- Memory operand fusion ----------------------------------------------- */
73
+
74
+ /* Limit linear search to this distance. Avoids O(n^2) behavior. */
75
+ #define CONFLICT_SEARCH_LIM 31
76
+
77
+ /* Check if a reference is a signed 32 bit constant. */
78
+ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
79
+ {
80
+ if (irref_isk(ref)) {
81
+ IRIns *ir = IR(ref);
82
+ if (ir->o != IR_KINT64) {
83
+ *k = ir->i;
84
+ return 1;
85
+ } else if (checki32((int64_t)ir_kint64(ir)->u64)) {
86
+ *k = (int32_t)ir_kint64(ir)->u64;
87
+ return 1;
88
+ }
89
+ }
90
+ return 0;
91
+ }
92
+
93
+ /* Check if there's no conflicting instruction between curins and ref.
94
+ ** Also avoid fusing loads if there are multiple references.
95
+ */
96
+ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
97
+ {
98
+ IRIns *ir = as->ir;
99
+ IRRef i = as->curins;
100
+ if (i > ref + CONFLICT_SEARCH_LIM)
101
+ return 0; /* Give up, ref is too far away. */
102
+ while (--i > ref) {
103
+ if (ir[i].o == conflict)
104
+ return 0; /* Conflict found. */
105
+ else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
106
+ return 0;
107
+ }
108
+ return 1; /* Ok, no conflict. */
109
+ }
110
+
111
+ /* Fuse array base into memory operand. */
112
+ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
113
+ {
114
+ IRIns *irb = IR(ref);
115
+ as->mrm.ofs = 0;
116
+ if (irb->o == IR_FLOAD) {
117
+ IRIns *ira = IR(irb->op1);
118
+ lua_assert(irb->op2 == IRFL_TAB_ARRAY);
119
+ /* We can avoid the FLOAD of t->array for colocated arrays. */
120
+ if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
121
+ !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
122
+ as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
123
+ return irb->op1; /* Table obj. */
124
+ }
125
+ } else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
126
+ /* Fuse base offset (vararg load). */
127
+ as->mrm.ofs = IR(irb->op2)->i;
128
+ return irb->op1;
129
+ }
130
+ return ref; /* Otherwise use the given array base. */
131
+ }
132
+
133
+ /* Fuse array reference into memory operand. */
134
+ static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
135
+ {
136
+ IRIns *irx;
137
+ lua_assert(ir->o == IR_AREF);
138
+ as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow);
139
+ irx = IR(ir->op2);
140
+ if (irref_isk(ir->op2)) {
141
+ as->mrm.ofs += 8*irx->i;
142
+ as->mrm.idx = RID_NONE;
143
+ } else {
144
+ rset_clear(allow, as->mrm.base);
145
+ as->mrm.scale = XM_SCALE8;
146
+ /* Fuse a constant ADD (e.g. t[i+1]) into the offset.
147
+ ** Doesn't help much without ABCelim, but reduces register pressure.
148
+ */
149
+ if (!LJ_64 && /* Has bad effects with negative index on x64. */
150
+ mayfuse(as, ir->op2) && ra_noreg(irx->r) &&
151
+ irx->o == IR_ADD && irref_isk(irx->op2)) {
152
+ as->mrm.ofs += 8*IR(irx->op2)->i;
153
+ as->mrm.idx = (uint8_t)ra_alloc1(as, irx->op1, allow);
154
+ } else {
155
+ as->mrm.idx = (uint8_t)ra_alloc1(as, ir->op2, allow);
156
+ }
157
+ }
158
+ }
159
+
160
+ /* Fuse array/hash/upvalue reference into memory operand.
161
+ ** Caveat: this may allocate GPRs for the base/idx registers. Be sure to
162
+ ** pass the final allow mask, excluding any GPRs used for other inputs.
163
+ ** In particular: 2-operand GPR instructions need to call ra_dest() first!
164
+ */
165
+ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
166
+ {
167
+ IRIns *ir = IR(ref);
168
+ if (ra_noreg(ir->r)) {
169
+ switch ((IROp)ir->o) {
170
+ case IR_AREF:
171
+ if (mayfuse(as, ref)) {
172
+ asm_fusearef(as, ir, allow);
173
+ return;
174
+ }
175
+ break;
176
+ case IR_HREFK:
177
+ if (mayfuse(as, ref)) {
178
+ as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
179
+ as->mrm.ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
180
+ as->mrm.idx = RID_NONE;
181
+ return;
182
+ }
183
+ break;
184
+ case IR_UREFC:
185
+ if (irref_isk(ir->op1)) {
186
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
187
+ GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
188
+ as->mrm.ofs = ptr2addr(&uv->tv);
189
+ as->mrm.base = as->mrm.idx = RID_NONE;
190
+ return;
191
+ }
192
+ break;
193
+ default:
194
+ lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO ||
195
+ ir->o == IR_KKPTR);
196
+ break;
197
+ }
198
+ }
199
+ as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow);
200
+ as->mrm.ofs = 0;
201
+ as->mrm.idx = RID_NONE;
202
+ }
203
+
204
+ /* Fuse FLOAD/FREF reference into memory operand. */
205
+ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
206
+ {
207
+ lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
208
+ as->mrm.ofs = field_ofs[ir->op2];
209
+ as->mrm.idx = RID_NONE;
210
+ if (irref_isk(ir->op1)) {
211
+ as->mrm.ofs += IR(ir->op1)->i;
212
+ as->mrm.base = RID_NONE;
213
+ } else {
214
+ as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
215
+ }
216
+ }
217
+
218
+ /* Fuse string reference into memory operand. */
219
+ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
220
+ {
221
+ IRIns *irr;
222
+ lua_assert(ir->o == IR_STRREF);
223
+ as->mrm.base = as->mrm.idx = RID_NONE;
224
+ as->mrm.scale = XM_SCALE1;
225
+ as->mrm.ofs = sizeof(GCstr);
226
+ if (irref_isk(ir->op1)) {
227
+ as->mrm.ofs += IR(ir->op1)->i;
228
+ } else {
229
+ Reg r = ra_alloc1(as, ir->op1, allow);
230
+ rset_clear(allow, r);
231
+ as->mrm.base = (uint8_t)r;
232
+ }
233
+ irr = IR(ir->op2);
234
+ if (irref_isk(ir->op2)) {
235
+ as->mrm.ofs += irr->i;
236
+ } else {
237
+ Reg r;
238
+ /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */
239
+ if (!LJ_64 && /* Has bad effects with negative index on x64. */
240
+ mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
241
+ as->mrm.ofs += IR(irr->op2)->i;
242
+ r = ra_alloc1(as, irr->op1, allow);
243
+ } else {
244
+ r = ra_alloc1(as, ir->op2, allow);
245
+ }
246
+ if (as->mrm.base == RID_NONE)
247
+ as->mrm.base = (uint8_t)r;
248
+ else
249
+ as->mrm.idx = (uint8_t)r;
250
+ }
251
+ }
252
+
253
+ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
254
+ {
255
+ IRIns *ir = IR(ref);
256
+ as->mrm.idx = RID_NONE;
257
+ if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
258
+ as->mrm.ofs = ir->i;
259
+ as->mrm.base = RID_NONE;
260
+ } else if (ir->o == IR_STRREF) {
261
+ asm_fusestrref(as, ir, allow);
262
+ } else {
263
+ as->mrm.ofs = 0;
264
+ if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
265
+ /* Gather (base+idx*sz)+ofs as emitted by cdata ptr/array indexing. */
266
+ IRIns *irx;
267
+ IRRef idx;
268
+ Reg r;
269
+ if (asm_isk32(as, ir->op2, &as->mrm.ofs)) { /* Recognize x+ofs. */
270
+ ref = ir->op1;
271
+ ir = IR(ref);
272
+ if (!(ir->o == IR_ADD && canfuse(as, ir) && ra_noreg(ir->r)))
273
+ goto noadd;
274
+ }
275
+ as->mrm.scale = XM_SCALE1;
276
+ idx = ir->op1;
277
+ ref = ir->op2;
278
+ irx = IR(idx);
279
+ if (!(irx->o == IR_BSHL || irx->o == IR_ADD)) { /* Try other operand. */
280
+ idx = ir->op2;
281
+ ref = ir->op1;
282
+ irx = IR(idx);
283
+ }
284
+ if (canfuse(as, irx) && ra_noreg(irx->r)) {
285
+ if (irx->o == IR_BSHL && irref_isk(irx->op2) && IR(irx->op2)->i <= 3) {
286
+ /* Recognize idx<<b with b = 0-3, corresponding to sz = (1),2,4,8. */
287
+ idx = irx->op1;
288
+ as->mrm.scale = (uint8_t)(IR(irx->op2)->i << 6);
289
+ } else if (irx->o == IR_ADD && irx->op1 == irx->op2) {
290
+ /* FOLD does idx*2 ==> idx<<1 ==> idx+idx. */
291
+ idx = irx->op1;
292
+ as->mrm.scale = XM_SCALE2;
293
+ }
294
+ }
295
+ r = ra_alloc1(as, idx, allow);
296
+ rset_clear(allow, r);
297
+ as->mrm.idx = (uint8_t)r;
298
+ }
299
+ noadd:
300
+ as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow);
301
+ }
302
+ }
303
+
304
+ /* Fuse load into memory operand. */
305
+ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
306
+ {
307
+ IRIns *ir = IR(ref);
308
+ if (ra_hasreg(ir->r)) {
309
+ if (allow != RSET_EMPTY) { /* Fast path. */
310
+ ra_noweak(as, ir->r);
311
+ return ir->r;
312
+ }
313
+ fusespill:
314
+ /* Force a spill if only memory operands are allowed (asm_x87load). */
315
+ as->mrm.base = RID_ESP;
316
+ as->mrm.ofs = ra_spill(as, ir);
317
+ as->mrm.idx = RID_NONE;
318
+ return RID_MRM;
319
+ }
320
+ if (ir->o == IR_KNUM) {
321
+ RegSet avail = as->freeset & ~as->modset & RSET_FPR;
322
+ lua_assert(allow != RSET_EMPTY);
323
+ if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
324
+ as->mrm.ofs = ptr2addr(ir_knum(ir));
325
+ as->mrm.base = as->mrm.idx = RID_NONE;
326
+ return RID_MRM;
327
+ }
328
+ } else if (ir->o == IR_KINT64) {
329
+ RegSet avail = as->freeset & ~as->modset & RSET_GPR;
330
+ lua_assert(allow != RSET_EMPTY);
331
+ if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
332
+ as->mrm.ofs = ptr2addr(ir_kint64(ir));
333
+ as->mrm.base = as->mrm.idx = RID_NONE;
334
+ return RID_MRM;
335
+ }
336
+ } else if (mayfuse(as, ref)) {
337
+ RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
338
+ if (ir->o == IR_SLOAD) {
339
+ if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
340
+ noconflict(as, ref, IR_RETF, 0)) {
341
+ as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
342
+ as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
343
+ as->mrm.idx = RID_NONE;
344
+ return RID_MRM;
345
+ }
346
+ } else if (ir->o == IR_FLOAD) {
347
+ /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
348
+ if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
349
+ noconflict(as, ref, IR_FSTORE, 0)) {
350
+ asm_fusefref(as, ir, xallow);
351
+ return RID_MRM;
352
+ }
353
+ } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
354
+ if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) {
355
+ asm_fuseahuref(as, ir->op1, xallow);
356
+ return RID_MRM;
357
+ }
358
+ } else if (ir->o == IR_XLOAD) {
359
+ /* Generic fusion is not ok for 8/16 bit operands (but see asm_comp).
360
+ ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
361
+ */
362
+ if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
363
+ noconflict(as, ref, IR_XSTORE, 0)) {
364
+ asm_fusexref(as, ir->op1, xallow);
365
+ return RID_MRM;
366
+ }
367
+ } else if (ir->o == IR_VLOAD) {
368
+ asm_fuseahuref(as, ir->op1, xallow);
369
+ return RID_MRM;
370
+ }
371
+ }
372
+ if (!(as->freeset & allow) && !irref_isk(ref) &&
373
+ (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
374
+ goto fusespill;
375
+ return ra_allocref(as, ref, allow);
376
+ }
377
+
378
+ #if LJ_64
379
+ /* Don't fuse a 32 bit load into a 64 bit operation. */
380
+ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
381
+ {
382
+ if (is64 && !irt_is64(IR(ref)->t))
383
+ return ra_alloc1(as, ref, allow);
384
+ return asm_fuseload(as, ref, allow);
385
+ }
386
+ #else
387
+ #define asm_fuseloadm(as, ref, allow, is64) asm_fuseload(as, (ref), (allow))
388
+ #endif
389
+
390
+ /* -- Calls --------------------------------------------------------------- */
391
+
392
+ /* Count the required number of stack slots for a call. */
393
+ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
394
+ {
395
+ uint32_t i, nargs = CCI_XNARGS(ci);
396
+ int nslots = 0;
397
+ #if LJ_64
398
+ if (LJ_ABI_WIN) {
399
+ nslots = (int)(nargs*2); /* Only matters for more than four args. */
400
+ } else {
401
+ int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
402
+ for (i = 0; i < nargs; i++)
403
+ if (args[i] && irt_isfp(IR(args[i])->t)) {
404
+ if (nfpr > 0) nfpr--; else nslots += 2;
405
+ } else {
406
+ if (ngpr > 0) ngpr--; else nslots += 2;
407
+ }
408
+ }
409
+ #else
410
+ int ngpr = 0;
411
+ if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
412
+ ngpr = 2;
413
+ else if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
414
+ ngpr = 1;
415
+ for (i = 0; i < nargs; i++)
416
+ if (args[i] && irt_isfp(IR(args[i])->t)) {
417
+ nslots += irt_isnum(IR(args[i])->t) ? 2 : 1;
418
+ } else {
419
+ if (ngpr > 0) ngpr--; else nslots++;
420
+ }
421
+ #endif
422
+ return nslots;
423
+ }
424
+
425
+ /* Generate a call to a C function. */
426
+ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
427
+ {
428
+ uint32_t n, nargs = CCI_XNARGS(ci);
429
+ int32_t ofs = STACKARG_OFS;
430
+ #if LJ_64
431
+ uint32_t gprs = REGARG_GPRS;
432
+ Reg fpr = REGARG_FIRSTFPR;
433
+ #if !LJ_ABI_WIN
434
+ MCode *patchnfpr = NULL;
435
+ #endif
436
+ #else
437
+ uint32_t gprs = 0;
438
+ if ((ci->flags & CCI_CC_MASK) != CCI_CC_CDECL) {
439
+ if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
440
+ gprs = (REGARG_GPRS & 31);
441
+ else if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
442
+ gprs = REGARG_GPRS;
443
+ }
444
+ #endif
445
+ if ((void *)ci->func)
446
+ emit_call(as, ci->func);
447
+ #if LJ_64
448
+ if ((ci->flags & CCI_VARARG)) { /* Special handling for vararg calls. */
449
+ #if LJ_ABI_WIN
450
+ for (n = 0; n < 4 && n < nargs; n++) {
451
+ IRIns *ir = IR(args[n]);
452
+ if (irt_isfp(ir->t)) /* Duplicate FPRs in GPRs. */
453
+ emit_rr(as, XO_MOVDto, (irt_isnum(ir->t) ? REX_64 : 0) | (fpr+n),
454
+ ((gprs >> (n*5)) & 31)); /* Either MOVD or MOVQ. */
455
+ }
456
+ #else
457
+ patchnfpr = --as->mcp; /* Indicate number of used FPRs in register al. */
458
+ *--as->mcp = XI_MOVrib | RID_EAX;
459
+ #endif
460
+ }
461
+ #endif
462
+ for (n = 0; n < nargs; n++) { /* Setup args. */
463
+ IRRef ref = args[n];
464
+ IRIns *ir = IR(ref);
465
+ Reg r;
466
+ #if LJ_64 && LJ_ABI_WIN
467
+ /* Windows/x64 argument registers are strictly positional. */
468
+ r = irt_isfp(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31);
469
+ fpr++; gprs >>= 5;
470
+ #elif LJ_64
471
+ /* POSIX/x64 argument registers are used in order of appearance. */
472
+ if (irt_isfp(ir->t)) {
473
+ r = fpr <= REGARG_LASTFPR ? fpr++ : 0;
474
+ } else {
475
+ r = gprs & 31; gprs >>= 5;
476
+ }
477
+ #else
478
+ if (ref && irt_isfp(ir->t)) {
479
+ r = 0;
480
+ } else {
481
+ r = gprs & 31; gprs >>= 5;
482
+ if (!ref) continue;
483
+ }
484
+ #endif
485
+ if (r) { /* Argument is in a register. */
486
+ if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
487
+ #if LJ_64
488
+ if (ir->o == IR_KINT64)
489
+ emit_loadu64(as, r, ir_kint64(ir)->u64);
490
+ else
491
+ #endif
492
+ emit_loadi(as, r, ir->i);
493
+ } else {
494
+ lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
495
+ if (ra_hasreg(ir->r)) {
496
+ ra_noweak(as, ir->r);
497
+ emit_movrr(as, ir, r, ir->r);
498
+ } else {
499
+ ra_allocref(as, ref, RID2RSET(r));
500
+ }
501
+ }
502
+ } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */
503
+ lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */
504
+ if (LJ_32 && (ofs & 4) && irref_isk(ref)) {
505
+ /* Split stores for unaligned FP consts. */
506
+ emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
507
+ emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
508
+ } else {
509
+ r = ra_alloc1(as, ref, RSET_FPR);
510
+ emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto,
511
+ r, RID_ESP, ofs);
512
+ }
513
+ ofs += (LJ_32 && irt_isfloat(ir->t)) ? 4 : 8;
514
+ } else { /* Non-FP argument is on stack. */
515
+ if (LJ_32 && ref < ASMREF_TMP1) {
516
+ emit_movmroi(as, RID_ESP, ofs, ir->i);
517
+ } else {
518
+ r = ra_alloc1(as, ref, RSET_GPR);
519
+ emit_movtomro(as, REX_64 + r, RID_ESP, ofs);
520
+ }
521
+ ofs += sizeof(intptr_t);
522
+ }
523
+ checkmclim(as);
524
+ }
525
+ #if LJ_64 && !LJ_ABI_WIN
526
+ if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR;
527
+ #endif
528
+ }
529
+
530
+ /* Setup result reg/sp for call. Evict scratch regs. */
531
+ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
532
+ {
533
+ RegSet drop = RSET_SCRATCH;
534
+ int hiop = (LJ_32 && (ir+1)->o == IR_HIOP);
535
+ if ((ci->flags & CCI_NOFPRCLOBBER))
536
+ drop &= ~RSET_FPR;
537
+ if (ra_hasreg(ir->r))
538
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
539
+ if (hiop && ra_hasreg((ir+1)->r))
540
+ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
541
+ ra_evictset(as, drop); /* Evictions must be performed first. */
542
+ if (ra_used(ir)) {
543
+ if (irt_isfp(ir->t)) {
544
+ int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
545
+ #if LJ_64
546
+ if ((ci->flags & CCI_CASTU64)) {
547
+ Reg dest = ir->r;
548
+ if (ra_hasreg(dest)) {
549
+ ra_free(as, dest);
550
+ ra_modified(as, dest);
551
+ emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
552
+ }
553
+ if (ofs) emit_movtomro(as, RID_RET|REX_64, RID_ESP, ofs);
554
+ } else {
555
+ ra_destreg(as, ir, RID_FPRET);
556
+ }
557
+ #else
558
+ /* Number result is in x87 st0 for x86 calling convention. */
559
+ Reg dest = ir->r;
560
+ if (ra_hasreg(dest)) {
561
+ ra_free(as, dest);
562
+ ra_modified(as, dest);
563
+ emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
564
+ dest, RID_ESP, ofs);
565
+ }
566
+ if ((ci->flags & CCI_CASTU64)) {
567
+ emit_movtomro(as, RID_RETLO, RID_ESP, ofs);
568
+ emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
569
+ } else {
570
+ emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
571
+ irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
572
+ }
573
+ #endif
574
+ #if LJ_32
575
+ } else if (hiop) {
576
+ ra_destpair(as, ir);
577
+ #endif
578
+ } else {
579
+ lua_assert(!irt_ispri(ir->t));
580
+ ra_destreg(as, ir, RID_RET);
581
+ }
582
+ } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) {
583
+ emit_x87op(as, XI_FPOP); /* Pop unused result from x87 st0. */
584
+ }
585
+ }
586
+
587
+ /* Return a constant function pointer or NULL for indirect calls. */
588
+ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
589
+ {
590
+ #if LJ_32
591
+ UNUSED(as);
592
+ if (irref_isk(func))
593
+ return (void *)irf->i;
594
+ #else
595
+ if (irref_isk(func)) {
596
+ MCode *p;
597
+ if (irf->o == IR_KINT64)
598
+ p = (MCode *)(void *)ir_k64(irf)->u64;
599
+ else
600
+ p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
601
+ if (p - as->mcp == (int32_t)(p - as->mcp))
602
+ return p; /* Call target is still in +-2GB range. */
603
+ /* Avoid the indirect case of emit_call(). Try to hoist func addr. */
604
+ }
605
+ #endif
606
+ return NULL;
607
+ }
608
+
609
+ static void asm_callx(ASMState *as, IRIns *ir)
610
+ {
611
+ IRRef args[CCI_NARGS_MAX*2];
612
+ CCallInfo ci;
613
+ IRRef func;
614
+ IRIns *irf;
615
+ int32_t spadj = 0;
616
+ ci.flags = asm_callx_flags(as, ir);
617
+ asm_collectargs(as, ir, &ci, args);
618
+ asm_setupresult(as, ir, &ci);
619
+ #if LJ_32
620
+ /* Have to readjust stack after non-cdecl calls due to callee cleanup. */
621
+ if ((ci.flags & CCI_CC_MASK) != CCI_CC_CDECL)
622
+ spadj = 4 * asm_count_call_slots(as, &ci, args);
623
+ #endif
624
+ func = ir->op2; irf = IR(func);
625
+ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
626
+ ci.func = (ASMFunction)asm_callx_func(as, irf, func);
627
+ if (!(void *)ci.func) {
628
+ /* Use a (hoistable) non-scratch register for indirect calls. */
629
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
630
+ Reg r = ra_alloc1(as, func, allow);
631
+ if (LJ_32) emit_spsub(as, spadj); /* Above code may cause restores! */
632
+ emit_rr(as, XO_GROUP5, XOg_CALL, r);
633
+ } else if (LJ_32) {
634
+ emit_spsub(as, spadj);
635
+ }
636
+ asm_gencall(as, &ci, args);
637
+ }
638
+
639
+ /* -- Returns ------------------------------------------------------------- */
640
+
641
+ /* Return to lower frame. Guard that it goes to the right spot. */
642
+ static void asm_retf(ASMState *as, IRIns *ir)
643
+ {
644
+ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
645
+ void *pc = ir_kptr(IR(ir->op2));
646
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
647
+ as->topslot -= (BCReg)delta;
648
+ if ((int32_t)as->topslot < 0) as->topslot = 0;
649
+ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
650
+ emit_setgl(as, base, jit_base);
651
+ emit_addptr(as, base, -8*delta);
652
+ asm_guardcc(as, CC_NE);
653
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
654
+ }
655
+
656
+ /* -- Type conversions ---------------------------------------------------- */
657
+
658
+ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
659
+ {
660
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
661
+ Reg dest = ra_dest(as, ir, RSET_GPR);
662
+ asm_guardcc(as, CC_P);
663
+ asm_guardcc(as, CC_NE);
664
+ emit_rr(as, XO_UCOMISD, left, tmp);
665
+ emit_rr(as, XO_CVTSI2SD, tmp, dest);
666
+ emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
667
+ emit_rr(as, XO_CVTTSD2SI, dest, left);
668
+ /* Can't fuse since left is needed twice. */
669
+ }
670
+
671
+ static void asm_tobit(ASMState *as, IRIns *ir)
672
+ {
673
+ Reg dest = ra_dest(as, ir, RSET_GPR);
674
+ Reg tmp = ra_noreg(IR(ir->op1)->r) ?
675
+ ra_alloc1(as, ir->op1, RSET_FPR) :
676
+ ra_scratch(as, RSET_FPR);
677
+ Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
678
+ emit_rr(as, XO_MOVDto, tmp, dest);
679
+ emit_mrm(as, XO_ADDSD, tmp, right);
680
+ ra_left(as, tmp, ir->op1);
681
+ }
682
+
683
+ static void asm_conv(ASMState *as, IRIns *ir)
684
+ {
685
+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
686
+ int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64));
687
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
688
+ IRRef lref = ir->op1;
689
+ lua_assert(irt_type(ir->t) != st);
690
+ lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */
691
+ if (irt_isfp(ir->t)) {
692
+ Reg dest = ra_dest(as, ir, RSET_FPR);
693
+ if (stfp) { /* FP to FP conversion. */
694
+ Reg left = asm_fuseload(as, lref, RSET_FPR);
695
+ emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left);
696
+ if (left == dest) return; /* Avoid the XO_XORPS. */
697
+ } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */
698
+ /* number = (2^52+2^51 .. u32) - (2^52+2^51) */
699
+ cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000));
700
+ Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
701
+ if (irt_isfloat(ir->t))
702
+ emit_rr(as, XO_CVTSD2SS, dest, dest);
703
+ emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
704
+ emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
705
+ emit_loadn(as, bias, k);
706
+ emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
707
+ return;
708
+ } else { /* Integer to FP conversion. */
709
+ Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ?
710
+ ra_alloc1(as, lref, RSET_GPR) :
711
+ asm_fuseloadm(as, lref, RSET_GPR, st64);
712
+ if (LJ_64 && st == IRT_U64) {
713
+ MCLabel l_end = emit_label(as);
714
+ const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000));
715
+ emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */
716
+ emit_sjcc(as, CC_NS, l_end);
717
+ emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */
718
+ }
719
+ emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
720
+ dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
721
+ }
722
+ emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
723
+ } else if (stfp) { /* FP to integer conversion. */
724
+ if (irt_isguard(ir->t)) {
725
+ /* Checked conversions are only supported from number to int. */
726
+ lua_assert(irt_isint(ir->t) && st == IRT_NUM);
727
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
728
+ } else {
729
+ Reg dest = ra_dest(as, ir, RSET_GPR);
730
+ x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
731
+ if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
732
+ /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
733
+ /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
734
+ Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) :
735
+ ra_scratch(as, RSET_FPR);
736
+ MCLabel l_end = emit_label(as);
737
+ if (LJ_32)
738
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
739
+ emit_rr(as, op, dest|REX_64, tmp);
740
+ if (st == IRT_NUM)
741
+ emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J,
742
+ LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
743
+ else
744
+ emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J,
745
+ LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
746
+ emit_sjcc(as, CC_NS, l_end);
747
+ emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
748
+ emit_rr(as, op, dest|REX_64, tmp);
749
+ ra_left(as, tmp, lref);
750
+ } else {
751
+ Reg left = asm_fuseload(as, lref, RSET_FPR);
752
+ if (LJ_64 && irt_isu32(ir->t))
753
+ emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
754
+ emit_mrm(as, op,
755
+ dest|((LJ_64 &&
756
+ (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
757
+ left);
758
+ }
759
+ }
760
+ } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
761
+ Reg left, dest = ra_dest(as, ir, RSET_GPR);
762
+ RegSet allow = RSET_GPR;
763
+ x86Op op;
764
+ lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
765
+ if (st == IRT_I8) {
766
+ op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX;
767
+ } else if (st == IRT_U8) {
768
+ op = XO_MOVZXb; allow = RSET_GPR8; dest |= FORCE_REX;
769
+ } else if (st == IRT_I16) {
770
+ op = XO_MOVSXw;
771
+ } else {
772
+ op = XO_MOVZXw;
773
+ }
774
+ left = asm_fuseload(as, lref, allow);
775
+ /* Add extra MOV if source is already in wrong register. */
776
+ if (!LJ_64 && left != RID_MRM && !rset_test(allow, left)) {
777
+ Reg tmp = ra_scratch(as, allow);
778
+ emit_rr(as, op, dest, tmp);
779
+ emit_rr(as, XO_MOV, tmp, left);
780
+ } else {
781
+ emit_mrm(as, op, dest, left);
782
+ }
783
+ } else { /* 32/64 bit integer conversions. */
784
+ if (LJ_32) { /* Only need to handle 32/32 bit no-op (cast) on x86. */
785
+ Reg dest = ra_dest(as, ir, RSET_GPR);
786
+ ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */
787
+ } else if (irt_is64(ir->t)) {
788
+ Reg dest = ra_dest(as, ir, RSET_GPR);
789
+ if (st64 || !(ir->op2 & IRCONV_SEXT)) {
790
+ /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
791
+ ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */
792
+ } else { /* 32 to 64 bit sign extension. */
793
+ Reg left = asm_fuseload(as, lref, RSET_GPR);
794
+ emit_mrm(as, XO_MOVSXd, dest|REX_64, left);
795
+ }
796
+ } else {
797
+ Reg dest = ra_dest(as, ir, RSET_GPR);
798
+ if (st64) {
799
+ Reg left = asm_fuseload(as, lref, RSET_GPR);
800
+ /* This is either a 32 bit reg/reg mov which zeroes the hiword
801
+ ** or a load of the loword from a 64 bit address.
802
+ */
803
+ emit_mrm(as, XO_MOV, dest, left);
804
+ } else { /* 32/32 bit no-op (cast). */
805
+ ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */
806
+ }
807
+ }
808
+ }
809
+ }
810
+
811
+ #if LJ_32 && LJ_HASFFI
812
+ /* No SSE conversions to/from 64 bit on x86, so resort to ugly x87 code. */
813
+
814
+ /* 64 bit integer to FP conversion in 32 bit mode. */
815
+ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
816
+ {
817
+ Reg hi = ra_alloc1(as, ir->op1, RSET_GPR);
818
+ Reg lo = ra_alloc1(as, (ir-1)->op1, rset_exclude(RSET_GPR, hi));
819
+ int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
820
+ Reg dest = ir->r;
821
+ if (ra_hasreg(dest)) {
822
+ ra_free(as, dest);
823
+ ra_modified(as, dest);
824
+ emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
825
+ }
826
+ emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
827
+ irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
828
+ if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
829
+ /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
830
+ MCLabel l_end = emit_label(as);
831
+ emit_rma(as, XO_FADDq, XOg_FADDq,
832
+ lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
833
+ emit_sjcc(as, CC_NS, l_end);
834
+ emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
835
+ } else {
836
+ lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64);
837
+ }
838
+ emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0);
839
+ /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */
840
+ emit_rmro(as, XO_MOVto, hi, RID_ESP, 4);
841
+ emit_rmro(as, XO_MOVto, lo, RID_ESP, 0);
842
+ }
843
+
844
+ /* FP to 64 bit integer conversion in 32 bit mode. */
845
+ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
846
+ {
847
+ IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
848
+ IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
849
+ Reg lo, hi;
850
+ lua_assert(st == IRT_NUM || st == IRT_FLOAT);
851
+ lua_assert(dt == IRT_I64 || dt == IRT_U64);
852
+ hi = ra_dest(as, ir, RSET_GPR);
853
+ lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
854
+ if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
855
+ /* NYI: Avoid wide-to-narrow store-to-load forwarding stall. */
856
+ if (!(as->flags & JIT_F_SSE3)) { /* Set FPU rounding mode to default. */
857
+ emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 4);
858
+ emit_rmro(as, XO_MOVto, lo, RID_ESP, 4);
859
+ emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff);
860
+ }
861
+ if (dt == IRT_U64) {
862
+ /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */
863
+ MCLabel l_pop, l_end = emit_label(as);
864
+ emit_x87op(as, XI_FPOP);
865
+ l_pop = emit_label(as);
866
+ emit_sjmp(as, l_end);
867
+ emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
868
+ if ((as->flags & JIT_F_SSE3))
869
+ emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
870
+ else
871
+ emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
872
+ emit_rma(as, XO_FADDq, XOg_FADDq,
873
+ lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
874
+ emit_sjcc(as, CC_NS, l_pop);
875
+ emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
876
+ }
877
+ emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
878
+ if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */
879
+ emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
880
+ } else { /* Otherwise set FPU rounding mode to truncate before the store. */
881
+ emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
882
+ emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 0);
883
+ emit_rmro(as, XO_MOVtow, lo, RID_ESP, 0);
884
+ emit_rmro(as, XO_ARITHw(XOg_OR), lo, RID_ESP, 0);
885
+ emit_loadi(as, lo, 0xc00);
886
+ emit_rmro(as, XO_FNSTCW, XOg_FNSTCW, RID_ESP, 0);
887
+ }
888
+ if (dt == IRT_U64)
889
+ emit_x87op(as, XI_FDUP);
890
+ emit_mrm(as, st == IRT_NUM ? XO_FLDq : XO_FLDd,
891
+ st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
892
+ asm_fuseload(as, ir->op1, RSET_EMPTY));
893
+ }
894
+
895
+ static void asm_conv64(ASMState *as, IRIns *ir)
896
+ {
897
+ if (irt_isfp(ir->t))
898
+ asm_conv_fp_int64(as, ir);
899
+ else
900
+ asm_conv_int64_fp(as, ir);
901
+ }
902
+ #endif
903
+
904
+ static void asm_strto(ASMState *as, IRIns *ir)
905
+ {
906
+ /* Force a spill slot for the destination register (if any). */
907
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
908
+ IRRef args[2];
909
+ RegSet drop = RSET_SCRATCH;
910
+ if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
911
+ rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
912
+ ra_evictset(as, drop);
913
+ asm_guardcc(as, CC_E);
914
+ emit_rr(as, XO_TEST, RID_RET, RID_RET); /* Test return status. */
915
+ args[0] = ir->op1; /* GCstr *str */
916
+ args[1] = ASMREF_TMP1; /* TValue *n */
917
+ asm_gencall(as, ci, args);
918
+ /* Store the result to the spill slot or temp slots. */
919
+ emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
920
+ RID_ESP, sps_scale(ir->s));
921
+ }
922
+
923
+ /* -- Memory references --------------------------------------------------- */
924
+
925
+ /* Get pointer to TValue. */
926
+ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
927
+ {
928
+ IRIns *ir = IR(ref);
929
+ if (irt_isnum(ir->t)) {
930
+ /* For numbers use the constant itself or a spill slot as a TValue. */
931
+ if (irref_isk(ref))
932
+ emit_loada(as, dest, ir_knum(ir));
933
+ else
934
+ emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
935
+ } else {
936
+ /* Otherwise use g->tmptv to hold the TValue. */
937
+ if (!irref_isk(ref)) {
938
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
939
+ emit_movtomro(as, REX_64IR(ir, src), dest, 0);
940
+ } else if (!irt_ispri(ir->t)) {
941
+ emit_movmroi(as, dest, 0, ir->i);
942
+ }
943
+ if (!(LJ_64 && irt_islightud(ir->t)))
944
+ emit_movmroi(as, dest, 4, irt_toitype(ir->t));
945
+ emit_loada(as, dest, &J2G(as->J)->tmptv);
946
+ }
947
+ }
948
+
949
+ static void asm_aref(ASMState *as, IRIns *ir)
950
+ {
951
+ Reg dest = ra_dest(as, ir, RSET_GPR);
952
+ asm_fusearef(as, ir, RSET_GPR);
953
+ if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
954
+ emit_mrm(as, XO_LEA, dest, RID_MRM);
955
+ else if (as->mrm.base != dest)
956
+ emit_rr(as, XO_MOV, dest, as->mrm.base);
957
+ }
958
+
959
+ /* Inlined hash lookup. Specialized for key type and for const keys.
960
+ ** The equivalent C code is:
961
+ ** Node *n = hashkey(t, key);
962
+ ** do {
963
+ ** if (lj_obj_equal(&n->key, key)) return &n->val;
964
+ ** } while ((n = nextnode(n)));
965
+ ** return niltv(L);
966
+ */
967
+ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
968
+ {
969
+ RegSet allow = RSET_GPR;
970
+ int destused = ra_used(ir);
971
+ Reg dest = ra_dest(as, ir, allow);
972
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
973
+ Reg key = RID_NONE, tmp = RID_NONE;
974
+ IRIns *irkey = IR(ir->op2);
975
+ int isk = irref_isk(ir->op2);
976
+ IRType1 kt = irkey->t;
977
+ uint32_t khash;
978
+ MCLabel l_end, l_loop, l_next;
979
+
980
+ if (!isk) {
981
+ rset_clear(allow, tab);
982
+ key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
983
+ if (!irt_isstr(kt))
984
+ tmp = ra_scratch(as, rset_exclude(allow, key));
985
+ }
986
+
987
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
988
+ l_end = emit_label(as);
989
+ if (merge == IR_NE)
990
+ asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
991
+ else if (destused)
992
+ emit_loada(as, dest, niltvg(J2G(as->J)));
993
+
994
+ /* Follow hash chain until the end. */
995
+ l_loop = emit_sjcc_label(as, CC_NZ);
996
+ emit_rr(as, XO_TEST, dest, dest);
997
+ emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
998
+ l_next = emit_label(as);
999
+
1000
+ /* Type and value comparison. */
1001
+ if (merge == IR_EQ)
1002
+ asm_guardcc(as, CC_E);
1003
+ else
1004
+ emit_sjcc(as, CC_E, l_end);
1005
+ if (irt_isnum(kt)) {
1006
+ if (isk) {
1007
+ /* Assumes -0.0 is already canonicalized to +0.0. */
1008
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
1009
+ (int32_t)ir_knum(irkey)->u32.lo);
1010
+ emit_sjcc(as, CC_NE, l_next);
1011
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
1012
+ (int32_t)ir_knum(irkey)->u32.hi);
1013
+ } else {
1014
+ emit_sjcc(as, CC_P, l_next);
1015
+ emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
1016
+ emit_sjcc(as, CC_AE, l_next);
1017
+ /* The type check avoids NaN penalties and complaints from Valgrind. */
1018
+ #if LJ_64
1019
+ emit_u32(as, LJ_TISNUM);
1020
+ emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1021
+ #else
1022
+ emit_i8(as, LJ_TISNUM);
1023
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1024
+ #endif
1025
+ }
1026
+ #if LJ_64
1027
+ } else if (irt_islightud(kt)) {
1028
+ emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
1029
+ #endif
1030
+ } else {
1031
+ if (!irt_ispri(kt)) {
1032
+ lua_assert(irt_isaddr(kt));
1033
+ if (isk)
1034
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr),
1035
+ ptr2addr(ir_kgc(irkey)));
1036
+ else
1037
+ emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr));
1038
+ emit_sjcc(as, CC_NE, l_next);
1039
+ }
1040
+ lua_assert(!irt_isnil(kt));
1041
+ emit_i8(as, irt_toitype(kt));
1042
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1043
+ }
1044
+ emit_sfixup(as, l_loop);
1045
+ checkmclim(as);
1046
+
1047
+ /* Load main position relative to tab->node into dest. */
1048
+ khash = isk ? ir_khash(irkey) : 1;
1049
+ if (khash == 0) {
1050
+ emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
1051
+ } else {
1052
+ emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
1053
+ if ((as->flags & JIT_F_PREFER_IMUL)) {
1054
+ emit_i8(as, sizeof(Node));
1055
+ emit_rr(as, XO_IMULi8, dest, dest);
1056
+ } else {
1057
+ emit_shifti(as, XOg_SHL, dest, 3);
1058
+ emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
1059
+ }
1060
+ if (isk) {
1061
+ emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
1062
+ emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
1063
+ } else if (irt_isstr(kt)) {
1064
+ emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash));
1065
+ emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
1066
+ } else { /* Must match with hashrot() in lj_tab.c. */
1067
+ emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));
1068
+ emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
1069
+ emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
1070
+ emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
1071
+ emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
1072
+ emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
1073
+ emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
1074
+ emit_rr(as, XO_ARITH(XOg_XOR), tmp, dest);
1075
+ if (irt_isnum(kt)) {
1076
+ emit_rr(as, XO_ARITH(XOg_ADD), dest, dest);
1077
+ #if LJ_64
1078
+ emit_shifti(as, XOg_SHR|REX_64, dest, 32);
1079
+ emit_rr(as, XO_MOV, tmp, dest);
1080
+ emit_rr(as, XO_MOVDto, key|REX_64, dest);
1081
+ #else
1082
+ emit_rmro(as, XO_MOV, dest, RID_ESP, ra_spill(as, irkey)+4);
1083
+ emit_rr(as, XO_MOVDto, key, tmp);
1084
+ #endif
1085
+ } else {
1086
+ emit_rr(as, XO_MOV, tmp, key);
1087
+ emit_rmro(as, XO_LEA, dest, key, HASH_BIAS);
1088
+ }
1089
+ }
1090
+ }
1091
+ }
1092
+
1093
+ static void asm_hrefk(ASMState *as, IRIns *ir)
1094
+ {
1095
+ IRIns *kslot = IR(ir->op2);
1096
+ IRIns *irkey = IR(kslot->op1);
1097
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
1098
+ Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
1099
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
1100
+ #if !LJ_64
1101
+ MCLabel l_exit;
1102
+ #endif
1103
+ lua_assert(ofs % sizeof(Node) == 0);
1104
+ if (ra_hasreg(dest)) {
1105
+ if (ofs != 0) {
1106
+ if (dest == node && !(as->flags & JIT_F_LEA_AGU))
1107
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
1108
+ else
1109
+ emit_rmro(as, XO_LEA, dest, node, ofs);
1110
+ } else if (dest != node) {
1111
+ emit_rr(as, XO_MOV, dest, node);
1112
+ }
1113
+ }
1114
+ asm_guardcc(as, CC_NE);
1115
+ #if LJ_64
1116
+ if (!irt_ispri(irkey->t)) {
1117
+ Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node));
1118
+ emit_rmro(as, XO_CMP, key|REX_64, node,
1119
+ ofs + (int32_t)offsetof(Node, key.u64));
1120
+ lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
1121
+ /* Assumes -0.0 is already canonicalized to +0.0. */
1122
+ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
1123
+ ((uint64_t)irt_toitype(irkey->t) << 32) |
1124
+ (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
1125
+ } else {
1126
+ lua_assert(!irt_isnil(irkey->t));
1127
+ emit_i8(as, irt_toitype(irkey->t));
1128
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1129
+ ofs + (int32_t)offsetof(Node, key.it));
1130
+ }
1131
+ #else
1132
+ l_exit = emit_label(as);
1133
+ if (irt_isnum(irkey->t)) {
1134
+ /* Assumes -0.0 is already canonicalized to +0.0. */
1135
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
1136
+ ofs + (int32_t)offsetof(Node, key.u32.lo),
1137
+ (int32_t)ir_knum(irkey)->u32.lo);
1138
+ emit_sjcc(as, CC_NE, l_exit);
1139
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
1140
+ ofs + (int32_t)offsetof(Node, key.u32.hi),
1141
+ (int32_t)ir_knum(irkey)->u32.hi);
1142
+ } else {
1143
+ if (!irt_ispri(irkey->t)) {
1144
+ lua_assert(irt_isgcv(irkey->t));
1145
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
1146
+ ofs + (int32_t)offsetof(Node, key.gcr),
1147
+ ptr2addr(ir_kgc(irkey)));
1148
+ emit_sjcc(as, CC_NE, l_exit);
1149
+ }
1150
+ lua_assert(!irt_isnil(irkey->t));
1151
+ emit_i8(as, irt_toitype(irkey->t));
1152
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1153
+ ofs + (int32_t)offsetof(Node, key.it));
1154
+ }
1155
+ #endif
1156
+ }
1157
+
1158
+ static void asm_uref(ASMState *as, IRIns *ir)
1159
+ {
1160
+ /* NYI: Check that UREFO is still open and not aliasing a slot. */
1161
+ Reg dest = ra_dest(as, ir, RSET_GPR);
1162
+ if (irref_isk(ir->op1)) {
1163
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
1164
+ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
1165
+ emit_rma(as, XO_MOV, dest, v);
1166
+ } else {
1167
+ Reg uv = ra_scratch(as, RSET_GPR);
1168
+ Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
1169
+ if (ir->o == IR_UREFC) {
1170
+ emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
1171
+ asm_guardcc(as, CC_NE);
1172
+ emit_i8(as, 1);
1173
+ emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
1174
+ } else {
1175
+ emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
1176
+ }
1177
+ emit_rmro(as, XO_MOV, uv, func,
1178
+ (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
1179
+ }
1180
+ }
1181
+
1182
+ static void asm_fref(ASMState *as, IRIns *ir)
1183
+ {
1184
+ Reg dest = ra_dest(as, ir, RSET_GPR);
1185
+ asm_fusefref(as, ir, RSET_GPR);
1186
+ emit_mrm(as, XO_LEA, dest, RID_MRM);
1187
+ }
1188
+
1189
+ static void asm_strref(ASMState *as, IRIns *ir)
1190
+ {
1191
+ Reg dest = ra_dest(as, ir, RSET_GPR);
1192
+ asm_fusestrref(as, ir, RSET_GPR);
1193
+ if (as->mrm.base == RID_NONE)
1194
+ emit_loadi(as, dest, as->mrm.ofs);
1195
+ else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
1196
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
1197
+ else
1198
+ emit_mrm(as, XO_LEA, dest, RID_MRM);
1199
+ }
1200
+
1201
+ /* -- Loads and stores ---------------------------------------------------- */
1202
+
1203
+ static void asm_fxload(ASMState *as, IRIns *ir)
1204
+ {
1205
+ Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
1206
+ x86Op xo;
1207
+ if (ir->o == IR_FLOAD)
1208
+ asm_fusefref(as, ir, RSET_GPR);
1209
+ else
1210
+ asm_fusexref(as, ir->op1, RSET_GPR);
1211
+ /* ir->op2 is ignored -- unaligned loads are ok on x86. */
1212
+ switch (irt_type(ir->t)) {
1213
+ case IRT_I8: xo = XO_MOVSXb; break;
1214
+ case IRT_U8: xo = XO_MOVZXb; break;
1215
+ case IRT_I16: xo = XO_MOVSXw; break;
1216
+ case IRT_U16: xo = XO_MOVZXw; break;
1217
+ case IRT_NUM: xo = XO_MOVSD; break;
1218
+ case IRT_FLOAT: xo = XO_MOVSS; break;
1219
+ default:
1220
+ if (LJ_64 && irt_is64(ir->t))
1221
+ dest |= REX_64;
1222
+ else
1223
+ lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
1224
+ xo = XO_MOV;
1225
+ break;
1226
+ }
1227
+ emit_mrm(as, xo, dest, RID_MRM);
1228
+ }
1229
+
1230
+ #define asm_fload(as, ir) asm_fxload(as, ir)
1231
+ #define asm_xload(as, ir) asm_fxload(as, ir)
1232
+
1233
+ static void asm_fxstore(ASMState *as, IRIns *ir)
1234
+ {
1235
+ RegSet allow = RSET_GPR;
1236
+ Reg src = RID_NONE, osrc = RID_NONE;
1237
+ int32_t k = 0;
1238
+ if (ir->r == RID_SINK)
1239
+ return;
1240
+ /* The IRT_I16/IRT_U16 stores should never be simplified for constant
1241
+ ** values since mov word [mem], imm16 has a length-changing prefix.
1242
+ */
1243
+ if (irt_isi16(ir->t) || irt_isu16(ir->t) || irt_isfp(ir->t) ||
1244
+ !asm_isk32(as, ir->op2, &k)) {
1245
+ RegSet allow8 = irt_isfp(ir->t) ? RSET_FPR :
1246
+ (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR;
1247
+ src = osrc = ra_alloc1(as, ir->op2, allow8);
1248
+ if (!LJ_64 && !rset_test(allow8, src)) { /* Already in wrong register. */
1249
+ rset_clear(allow, osrc);
1250
+ src = ra_scratch(as, allow8);
1251
+ }
1252
+ rset_clear(allow, src);
1253
+ }
1254
+ if (ir->o == IR_FSTORE) {
1255
+ asm_fusefref(as, IR(ir->op1), allow);
1256
+ } else {
1257
+ asm_fusexref(as, ir->op1, allow);
1258
+ if (LJ_32 && ir->o == IR_HIOP) as->mrm.ofs += 4;
1259
+ }
1260
+ if (ra_hasreg(src)) {
1261
+ x86Op xo;
1262
+ switch (irt_type(ir->t)) {
1263
+ case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break;
1264
+ case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
1265
+ case IRT_NUM: xo = XO_MOVSDto; break;
1266
+ case IRT_FLOAT: xo = XO_MOVSSto; break;
1267
+ #if LJ_64
1268
+ case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
1269
+ #endif
1270
+ default:
1271
+ if (LJ_64 && irt_is64(ir->t))
1272
+ src |= REX_64;
1273
+ else
1274
+ lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
1275
+ xo = XO_MOVto;
1276
+ break;
1277
+ }
1278
+ emit_mrm(as, xo, src, RID_MRM);
1279
+ if (!LJ_64 && src != osrc) {
1280
+ ra_noweak(as, osrc);
1281
+ emit_rr(as, XO_MOV, src, osrc);
1282
+ }
1283
+ } else {
1284
+ if (irt_isi8(ir->t) || irt_isu8(ir->t)) {
1285
+ emit_i8(as, k);
1286
+ emit_mrm(as, XO_MOVmib, 0, RID_MRM);
1287
+ } else {
1288
+ lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
1289
+ irt_isaddr(ir->t));
1290
+ emit_i32(as, k);
1291
+ emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM);
1292
+ }
1293
+ }
1294
+ }
1295
+
1296
+ #define asm_fstore(as, ir) asm_fxstore(as, ir)
1297
+ #define asm_xstore(as, ir) asm_fxstore(as, ir)
1298
+
1299
+ #if LJ_64
1300
+ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1301
+ {
1302
+ if (ra_used(ir) || typecheck) {
1303
+ Reg dest = ra_dest(as, ir, RSET_GPR);
1304
+ if (typecheck) {
1305
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest));
1306
+ asm_guardcc(as, CC_NE);
1307
+ emit_i8(as, -2);
1308
+ emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
1309
+ emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1310
+ emit_rr(as, XO_MOV, tmp|REX_64, dest);
1311
+ }
1312
+ return dest;
1313
+ } else {
1314
+ return RID_NONE;
1315
+ }
1316
+ }
1317
+ #endif
1318
+
1319
+ static void asm_ahuvload(ASMState *as, IRIns *ir)
1320
+ {
1321
+ lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1322
+ (LJ_DUALNUM && irt_isint(ir->t)));
1323
+ #if LJ_64
1324
+ if (irt_islightud(ir->t)) {
1325
+ Reg dest = asm_load_lightud64(as, ir, 1);
1326
+ if (ra_hasreg(dest)) {
1327
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
1328
+ emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
1329
+ }
1330
+ return;
1331
+ } else
1332
+ #endif
1333
+ if (ra_used(ir)) {
1334
+ RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1335
+ Reg dest = ra_dest(as, ir, allow);
1336
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
1337
+ emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1338
+ } else {
1339
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
1340
+ }
1341
+ /* Always do the type check, even if the load result is unused. */
1342
+ as->mrm.ofs += 4;
1343
+ asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
1344
+ if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
1345
+ lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
1346
+ emit_u32(as, LJ_TISNUM);
1347
+ emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1348
+ } else {
1349
+ emit_i8(as, irt_toitype(ir->t));
1350
+ emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
1351
+ }
1352
+ }
1353
+
1354
+ static void asm_ahustore(ASMState *as, IRIns *ir)
1355
+ {
1356
+ if (ir->r == RID_SINK)
1357
+ return;
1358
+ if (irt_isnum(ir->t)) {
1359
+ Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1360
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
1361
+ emit_mrm(as, XO_MOVSDto, src, RID_MRM);
1362
+ #if LJ_64
1363
+ } else if (irt_islightud(ir->t)) {
1364
+ Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1365
+ asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
1366
+ emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1367
+ #endif
1368
+ } else {
1369
+ IRIns *irr = IR(ir->op2);
1370
+ RegSet allow = RSET_GPR;
1371
+ Reg src = RID_NONE;
1372
+ if (!irref_isk(ir->op2)) {
1373
+ src = ra_alloc1(as, ir->op2, allow);
1374
+ rset_clear(allow, src);
1375
+ }
1376
+ asm_fuseahuref(as, ir->op1, allow);
1377
+ if (ra_hasreg(src)) {
1378
+ emit_mrm(as, XO_MOVto, src, RID_MRM);
1379
+ } else if (!irt_ispri(irr->t)) {
1380
+ lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
1381
+ emit_i32(as, irr->i);
1382
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1383
+ }
1384
+ as->mrm.ofs += 4;
1385
+ emit_i32(as, (int32_t)irt_toitype(ir->t));
1386
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1387
+ }
1388
+ }
1389
+
1390
+ static void asm_sload(ASMState *as, IRIns *ir)
1391
+ {
1392
+ int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1393
+ IRType1 t = ir->t;
1394
+ Reg base;
1395
+ lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
1396
+ lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
1397
+ lua_assert(LJ_DUALNUM ||
1398
+ !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
1399
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1400
+ Reg left = ra_scratch(as, RSET_FPR);
1401
+ asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1402
+ base = ra_alloc1(as, REF_BASE, RSET_GPR);
1403
+ emit_rmro(as, XO_MOVSD, left, base, ofs);
1404
+ t.irt = IRT_NUM; /* Continue with a regular number type check. */
1405
+ #if LJ_64
1406
+ } else if (irt_islightud(t)) {
1407
+ Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
1408
+ if (ra_hasreg(dest)) {
1409
+ base = ra_alloc1(as, REF_BASE, RSET_GPR);
1410
+ emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
1411
+ }
1412
+ return;
1413
+ #endif
1414
+ } else if (ra_used(ir)) {
1415
+ RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR;
1416
+ Reg dest = ra_dest(as, ir, allow);
1417
+ base = ra_alloc1(as, REF_BASE, RSET_GPR);
1418
+ lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1419
+ if ((ir->op2 & IRSLOAD_CONVERT)) {
1420
+ t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1421
+ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1422
+ } else {
1423
+ emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1424
+ }
1425
+ } else {
1426
+ if (!(ir->op2 & IRSLOAD_TYPECHECK))
1427
+ return; /* No type check: avoid base alloc. */
1428
+ base = ra_alloc1(as, REF_BASE, RSET_GPR);
1429
+ }
1430
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1431
+ /* Need type check, even if the load result is unused. */
1432
+ asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
1433
+ if (LJ_64 && irt_type(t) >= IRT_NUM) {
1434
+ lua_assert(irt_isinteger(t) || irt_isnum(t));
1435
+ emit_u32(as, LJ_TISNUM);
1436
+ emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1437
+ } else {
1438
+ emit_i8(as, irt_toitype(t));
1439
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
1440
+ }
1441
+ }
1442
+ }
1443
+
1444
+ /* -- Allocations --------------------------------------------------------- */
1445
+
1446
+ #if LJ_HASFFI
1447
+ static void asm_cnew(ASMState *as, IRIns *ir)
1448
+ {
1449
+ CTState *cts = ctype_ctsG(J2G(as->J));
1450
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
1451
+ CTSize sz;
1452
+ CTInfo info = lj_ctype_info(cts, id, &sz);
1453
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1454
+ IRRef args[4];
1455
+ lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1456
+
1457
+ as->gcsteps++;
1458
+ asm_setupresult(as, ir, ci); /* GCcdata * */
1459
+
1460
+ /* Initialize immutable cdata object. */
1461
+ if (ir->o == IR_CNEWI) {
1462
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1463
+ #if LJ_64
1464
+ Reg r64 = sz == 8 ? REX_64 : 0;
1465
+ if (irref_isk(ir->op2)) {
1466
+ IRIns *irk = IR(ir->op2);
1467
+ uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 :
1468
+ (uint64_t)(uint32_t)irk->i;
1469
+ if (sz == 4 || checki32((int64_t)k)) {
1470
+ emit_i32(as, (int32_t)k);
1471
+ emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
1472
+ } else {
1473
+ emit_movtomro(as, RID_ECX + r64, RID_RET, sizeof(GCcdata));
1474
+ emit_loadu64(as, RID_ECX, k);
1475
+ }
1476
+ } else {
1477
+ Reg r = ra_alloc1(as, ir->op2, allow);
1478
+ emit_movtomro(as, r + r64, RID_RET, sizeof(GCcdata));
1479
+ }
1480
+ #else
1481
+ int32_t ofs = sizeof(GCcdata);
1482
+ if (sz == 8) {
1483
+ ofs += 4; ir++;
1484
+ lua_assert(ir->o == IR_HIOP);
1485
+ }
1486
+ do {
1487
+ if (irref_isk(ir->op2)) {
1488
+ emit_movmroi(as, RID_RET, ofs, IR(ir->op2)->i);
1489
+ } else {
1490
+ Reg r = ra_alloc1(as, ir->op2, allow);
1491
+ emit_movtomro(as, r, RID_RET, ofs);
1492
+ rset_clear(allow, r);
1493
+ }
1494
+ if (ofs == sizeof(GCcdata)) break;
1495
+ ofs -= 4; ir--;
1496
+ } while (1);
1497
+ #endif
1498
+ lua_assert(sz == 4 || sz == 8);
1499
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1500
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1501
+ args[0] = ASMREF_L; /* lua_State *L */
1502
+ args[1] = ir->op1; /* CTypeID id */
1503
+ args[2] = ir->op2; /* CTSize sz */
1504
+ args[3] = ASMREF_TMP1; /* CTSize align */
1505
+ asm_gencall(as, ci, args);
1506
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1507
+ return;
1508
+ }
1509
+
1510
+ /* Combine initialization of marked, gct and ctypeid. */
1511
+ emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
1512
+ emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
1513
+ (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
1514
+ emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
1515
+ emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
1516
+
1517
+ args[0] = ASMREF_L; /* lua_State *L */
1518
+ args[1] = ASMREF_TMP1; /* MSize size */
1519
+ asm_gencall(as, ci, args);
1520
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1521
+ }
1522
+ #else
1523
+ #define asm_cnew(as, ir) ((void)0)
1524
+ #endif
1525
+
1526
+ /* -- Write barriers ------------------------------------------------------ */
1527
+
1528
+ static void asm_tbar(ASMState *as, IRIns *ir)
1529
+ {
1530
+ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1531
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1532
+ MCLabel l_end = emit_label(as);
1533
+ emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
1534
+ emit_setgl(as, tab, gc.grayagain);
1535
+ emit_getgl(as, tmp, gc.grayagain);
1536
+ emit_i8(as, ~LJ_GC_BLACK);
1537
+ emit_rmro(as, XO_ARITHib, XOg_AND, tab, offsetof(GCtab, marked));
1538
+ emit_sjcc(as, CC_Z, l_end);
1539
+ emit_i8(as, LJ_GC_BLACK);
1540
+ emit_rmro(as, XO_GROUP3b, XOg_TEST, tab, offsetof(GCtab, marked));
1541
+ }
1542
+
1543
+ static void asm_obar(ASMState *as, IRIns *ir)
1544
+ {
1545
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1546
+ IRRef args[2];
1547
+ MCLabel l_end;
1548
+ Reg obj;
1549
+ /* No need for other object barriers (yet). */
1550
+ lua_assert(IR(ir->op1)->o == IR_UREFC);
1551
+ ra_evictset(as, RSET_SCRATCH);
1552
+ l_end = emit_label(as);
1553
+ args[0] = ASMREF_TMP1; /* global_State *g */
1554
+ args[1] = ir->op1; /* TValue *tv */
1555
+ asm_gencall(as, ci, args);
1556
+ emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J));
1557
+ obj = IR(ir->op1)->r;
1558
+ emit_sjcc(as, CC_Z, l_end);
1559
+ emit_i8(as, LJ_GC_WHITES);
1560
+ if (irref_isk(ir->op2)) {
1561
+ GCobj *vp = ir_kgc(IR(ir->op2));
1562
+ emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked);
1563
+ } else {
1564
+ Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj));
1565
+ emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
1566
+ }
1567
+ emit_sjcc(as, CC_Z, l_end);
1568
+ emit_i8(as, LJ_GC_BLACK);
1569
+ emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
1570
+ (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1571
+ }
1572
+
1573
+ /* -- FP/int arithmetic and logic operations ------------------------------ */
1574
+
1575
+ /* Load reference onto x87 stack. Force a spill to memory if needed. */
1576
+ static void asm_x87load(ASMState *as, IRRef ref)
1577
+ {
1578
+ IRIns *ir = IR(ref);
1579
+ if (ir->o == IR_KNUM) {
1580
+ cTValue *tv = ir_knum(ir);
1581
+ if (tvispzero(tv)) /* Use fldz only for +0. */
1582
+ emit_x87op(as, XI_FLDZ);
1583
+ else if (tvispone(tv))
1584
+ emit_x87op(as, XI_FLD1);
1585
+ else
1586
+ emit_rma(as, XO_FLDq, XOg_FLDq, tv);
1587
+ } else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT && !ra_used(ir) &&
1588
+ !irref_isk(ir->op1) && mayfuse(as, ir->op1)) {
1589
+ IRIns *iri = IR(ir->op1);
1590
+ emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri));
1591
+ } else {
1592
+ emit_mrm(as, XO_FLDq, XOg_FLDq, asm_fuseload(as, ref, RSET_EMPTY));
1593
+ }
1594
+ }
1595
+
1596
+ static void asm_fpmath(ASMState *as, IRIns *ir)
1597
+ {
1598
+ IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1599
+ if (fpm == IRFPM_SQRT) {
1600
+ Reg dest = ra_dest(as, ir, RSET_FPR);
1601
+ Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
1602
+ emit_mrm(as, XO_SQRTSD, dest, left);
1603
+ } else if (fpm <= IRFPM_TRUNC) {
1604
+ if (as->flags & JIT_F_SSE4_1) { /* SSE4.1 has a rounding instruction. */
1605
+ Reg dest = ra_dest(as, ir, RSET_FPR);
1606
+ Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
1607
+ /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op.
1608
+ ** Let's pretend it's a 3-byte opcode, and compensate afterwards.
1609
+ ** This is atrocious, but the alternatives are much worse.
1610
+ */
1611
+ /* Round down/up/trunc == 1001/1010/1011. */
1612
+ emit_i8(as, 0x09 + fpm);
1613
+ emit_mrm(as, XO_ROUNDSD, dest, left);
1614
+ if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) {
1615
+ as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */
1616
+ }
1617
+ *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */
1618
+ } else { /* Call helper functions for SSE2 variant. */
1619
+ /* The modified regs must match with the *.dasc implementation. */
1620
+ RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
1621
+ if (ra_hasreg(ir->r))
1622
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
1623
+ ra_evictset(as, drop);
1624
+ ra_destreg(as, ir, RID_XMM0);
1625
+ emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse :
1626
+ fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1627
+ ra_left(as, RID_XMM0, ir->op1);
1628
+ }
1629
+ } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1630
+ /* Rejoined to pow(). */
1631
+ } else {
1632
+ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1633
+ }
1634
+ }
1635
+
1636
+ #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1637
+
1638
+ static void asm_ldexp(ASMState *as, IRIns *ir)
1639
+ {
1640
+ int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
1641
+ Reg dest = ir->r;
1642
+ if (ra_hasreg(dest)) {
1643
+ ra_free(as, dest);
1644
+ ra_modified(as, dest);
1645
+ emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1646
+ }
1647
+ emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1648
+ emit_x87op(as, XI_FPOP1);
1649
+ emit_x87op(as, XI_FSCALE);
1650
+ asm_x87load(as, ir->op1);
1651
+ asm_x87load(as, ir->op2);
1652
+ }
1653
+
1654
+ static void asm_fppowi(ASMState *as, IRIns *ir)
1655
+ {
1656
+ /* The modified regs must match with the *.dasc implementation. */
1657
+ RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
1658
+ if (ra_hasreg(ir->r))
1659
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
1660
+ ra_evictset(as, drop);
1661
+ ra_destreg(as, ir, RID_XMM0);
1662
+ emit_call(as, lj_vm_powi_sse);
1663
+ ra_left(as, RID_XMM0, ir->op1);
1664
+ ra_left(as, RID_EAX, ir->op2);
1665
+ }
1666
+
1667
+ static void asm_pow(ASMState *as, IRIns *ir)
1668
+ {
1669
+ #if LJ_64 && LJ_HASFFI
1670
+ if (!irt_isnum(ir->t))
1671
+ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1672
+ IRCALL_lj_carith_powu64);
1673
+ else
1674
+ #endif
1675
+ asm_fppowi(as, ir);
1676
+ }
1677
+
1678
+ static int asm_swapops(ASMState *as, IRIns *ir)
1679
+ {
1680
+ IRIns *irl = IR(ir->op1);
1681
+ IRIns *irr = IR(ir->op2);
1682
+ lua_assert(ra_noreg(irr->r));
1683
+ if (!irm_iscomm(lj_ir_mode[ir->o]))
1684
+ return 0; /* Can't swap non-commutative operations. */
1685
+ if (irref_isk(ir->op2))
1686
+ return 0; /* Don't swap constants to the left. */
1687
+ if (ra_hasreg(irl->r))
1688
+ return 1; /* Swap if left already has a register. */
1689
+ if (ra_samehint(ir->r, irr->r))
1690
+ return 1; /* Swap if dest and right have matching hints. */
1691
+ if (as->curins > as->loopref) { /* In variant part? */
1692
+ if (ir->op2 < as->loopref && !irt_isphi(irr->t))
1693
+ return 0; /* Keep invariants on the right. */
1694
+ if (ir->op1 < as->loopref && !irt_isphi(irl->t))
1695
+ return 1; /* Swap invariants to the right. */
1696
+ }
1697
+ if (opisfusableload(irl->o))
1698
+ return 1; /* Swap fusable loads to the right. */
1699
+ return 0; /* Otherwise don't swap. */
1700
+ }
1701
+
1702
+ static void asm_fparith(ASMState *as, IRIns *ir, x86Op xo)
1703
+ {
1704
+ IRRef lref = ir->op1;
1705
+ IRRef rref = ir->op2;
1706
+ RegSet allow = RSET_FPR;
1707
+ Reg dest;
1708
+ Reg right = IR(rref)->r;
1709
+ if (ra_hasreg(right)) {
1710
+ rset_clear(allow, right);
1711
+ ra_noweak(as, right);
1712
+ }
1713
+ dest = ra_dest(as, ir, allow);
1714
+ if (lref == rref) {
1715
+ right = dest;
1716
+ } else if (ra_noreg(right)) {
1717
+ if (asm_swapops(as, ir)) {
1718
+ IRRef tmp = lref; lref = rref; rref = tmp;
1719
+ }
1720
+ right = asm_fuseload(as, rref, rset_clear(allow, dest));
1721
+ }
1722
+ emit_mrm(as, xo, dest, right);
1723
+ ra_left(as, dest, lref);
1724
+ }
1725
+
1726
+ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
1727
+ {
1728
+ IRRef lref = ir->op1;
1729
+ IRRef rref = ir->op2;
1730
+ RegSet allow = RSET_GPR;
1731
+ Reg dest, right;
1732
+ int32_t k = 0;
1733
+ if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */
1734
+ MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2);
1735
+ if ((p[1] & 15) < 14) {
1736
+ if ((p[1] & 15) >= 12) p[1] -= 4; /* L <->S, NL <-> NS */
1737
+ as->flagmcp = NULL;
1738
+ as->mcp = p;
1739
+ } /* else: cannot transform LE/NLE to cc without use of OF. */
1740
+ }
1741
+ right = IR(rref)->r;
1742
+ if (ra_hasreg(right)) {
1743
+ rset_clear(allow, right);
1744
+ ra_noweak(as, right);
1745
+ }
1746
+ dest = ra_dest(as, ir, allow);
1747
+ if (lref == rref) {
1748
+ right = dest;
1749
+ } else if (ra_noreg(right) && !asm_isk32(as, rref, &k)) {
1750
+ if (asm_swapops(as, ir)) {
1751
+ IRRef tmp = lref; lref = rref; rref = tmp;
1752
+ }
1753
+ right = asm_fuseloadm(as, rref, rset_clear(allow, dest), irt_is64(ir->t));
1754
+ }
1755
+ if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */
1756
+ asm_guardcc(as, CC_O);
1757
+ if (xa != XOg_X_IMUL) {
1758
+ if (ra_hasreg(right))
1759
+ emit_mrm(as, XO_ARITH(xa), REX_64IR(ir, dest), right);
1760
+ else
1761
+ emit_gri(as, XG_ARITHi(xa), REX_64IR(ir, dest), k);
1762
+ } else if (ra_hasreg(right)) { /* IMUL r, mrm. */
1763
+ emit_mrm(as, XO_IMUL, REX_64IR(ir, dest), right);
1764
+ } else { /* IMUL r, r, k. */
1765
+ /* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */
1766
+ Reg left = asm_fuseloadm(as, lref, RSET_GPR, irt_is64(ir->t));
1767
+ x86Op xo;
1768
+ if (checki8(k)) { emit_i8(as, k); xo = XO_IMULi8;
1769
+ } else { emit_i32(as, k); xo = XO_IMULi; }
1770
+ emit_mrm(as, xo, REX_64IR(ir, dest), left);
1771
+ return;
1772
+ }
1773
+ ra_left(as, dest, lref);
1774
+ }
1775
+
1776
+ /* LEA is really a 4-operand ADD with an independent destination register,
1777
+ ** up to two source registers and an immediate. One register can be scaled
1778
+ ** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several
1779
+ ** instructions.
1780
+ **
1781
+ ** Currently only a few common cases are supported:
1782
+ ** - 3-operand ADD: y = a+b; y = a+k with a and b already allocated
1783
+ ** - Left ADD fusion: y = (a+b)+k; y = (a+k)+b
1784
+ ** - Right ADD fusion: y = a+(b+k)
1785
+ ** The ommited variants have already been reduced by FOLD.
1786
+ **
1787
+ ** There are more fusion opportunities, like gathering shifts or joining
1788
+ ** common references. But these are probably not worth the trouble, since
1789
+ ** array indexing is not decomposed and already makes use of all fields
1790
+ ** of the ModRM operand.
1791
+ */
1792
+ static int asm_lea(ASMState *as, IRIns *ir)
1793
+ {
1794
+ IRIns *irl = IR(ir->op1);
1795
+ IRIns *irr = IR(ir->op2);
1796
+ RegSet allow = RSET_GPR;
1797
+ Reg dest;
1798
+ as->mrm.base = as->mrm.idx = RID_NONE;
1799
+ as->mrm.scale = XM_SCALE1;
1800
+ as->mrm.ofs = 0;
1801
+ if (ra_hasreg(irl->r)) {
1802
+ rset_clear(allow, irl->r);
1803
+ ra_noweak(as, irl->r);
1804
+ as->mrm.base = irl->r;
1805
+ if (irref_isk(ir->op2) || ra_hasreg(irr->r)) {
1806
+ /* The PHI renaming logic does a better job in some cases. */
1807
+ if (ra_hasreg(ir->r) &&
1808
+ ((irt_isphi(irl->t) && as->phireg[ir->r] == ir->op1) ||
1809
+ (irt_isphi(irr->t) && as->phireg[ir->r] == ir->op2)))
1810
+ return 0;
1811
+ if (irref_isk(ir->op2)) {
1812
+ as->mrm.ofs = irr->i;
1813
+ } else {
1814
+ rset_clear(allow, irr->r);
1815
+ ra_noweak(as, irr->r);
1816
+ as->mrm.idx = irr->r;
1817
+ }
1818
+ } else if (irr->o == IR_ADD && mayfuse(as, ir->op2) &&
1819
+ irref_isk(irr->op2)) {
1820
+ Reg idx = ra_alloc1(as, irr->op1, allow);
1821
+ rset_clear(allow, idx);
1822
+ as->mrm.idx = (uint8_t)idx;
1823
+ as->mrm.ofs = IR(irr->op2)->i;
1824
+ } else {
1825
+ return 0;
1826
+ }
1827
+ } else if (ir->op1 != ir->op2 && irl->o == IR_ADD && mayfuse(as, ir->op1) &&
1828
+ (irref_isk(ir->op2) || irref_isk(irl->op2))) {
1829
+ Reg idx, base = ra_alloc1(as, irl->op1, allow);
1830
+ rset_clear(allow, base);
1831
+ as->mrm.base = (uint8_t)base;
1832
+ if (irref_isk(ir->op2)) {
1833
+ as->mrm.ofs = irr->i;
1834
+ idx = ra_alloc1(as, irl->op2, allow);
1835
+ } else {
1836
+ as->mrm.ofs = IR(irl->op2)->i;
1837
+ idx = ra_alloc1(as, ir->op2, allow);
1838
+ }
1839
+ rset_clear(allow, idx);
1840
+ as->mrm.idx = (uint8_t)idx;
1841
+ } else {
1842
+ return 0;
1843
+ }
1844
+ dest = ra_dest(as, ir, allow);
1845
+ emit_mrm(as, XO_LEA, dest, RID_MRM);
1846
+ return 1; /* Success. */
1847
+ }
1848
+
1849
+ static void asm_add(ASMState *as, IRIns *ir)
1850
+ {
1851
+ if (irt_isnum(ir->t))
1852
+ asm_fparith(as, ir, XO_ADDSD);
1853
+ else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp ||
1854
+ irt_is64(ir->t) || !asm_lea(as, ir))
1855
+ asm_intarith(as, ir, XOg_ADD);
1856
+ }
1857
+
1858
+ static void asm_sub(ASMState *as, IRIns *ir)
1859
+ {
1860
+ if (irt_isnum(ir->t))
1861
+ asm_fparith(as, ir, XO_SUBSD);
1862
+ else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
1863
+ asm_intarith(as, ir, XOg_SUB);
1864
+ }
1865
+
1866
+ static void asm_mul(ASMState *as, IRIns *ir)
1867
+ {
1868
+ if (irt_isnum(ir->t))
1869
+ asm_fparith(as, ir, XO_MULSD);
1870
+ else
1871
+ asm_intarith(as, ir, XOg_X_IMUL);
1872
+ }
1873
+
1874
+ static void asm_div(ASMState *as, IRIns *ir)
1875
+ {
1876
+ #if LJ_64 && LJ_HASFFI
1877
+ if (!irt_isnum(ir->t))
1878
+ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1879
+ IRCALL_lj_carith_divu64);
1880
+ else
1881
+ #endif
1882
+ asm_fparith(as, ir, XO_DIVSD);
1883
+ }
1884
+
1885
+ static void asm_mod(ASMState *as, IRIns *ir)
1886
+ {
1887
+ #if LJ_64 && LJ_HASFFI
1888
+ if (!irt_isint(ir->t))
1889
+ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1890
+ IRCALL_lj_carith_modu64);
1891
+ else
1892
+ #endif
1893
+ asm_callid(as, ir, IRCALL_lj_vm_modi);
1894
+ }
1895
+
1896
+ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1897
+ {
1898
+ Reg dest = ra_dest(as, ir, RSET_GPR);
1899
+ emit_rr(as, XO_GROUP3, REX_64IR(ir, xg), dest);
1900
+ ra_left(as, dest, ir->op1);
1901
+ }
1902
+
1903
+ static void asm_neg(ASMState *as, IRIns *ir)
1904
+ {
1905
+ if (irt_isnum(ir->t))
1906
+ asm_fparith(as, ir, XO_XORPS);
1907
+ else
1908
+ asm_neg_not(as, ir, XOg_NEG);
1909
+ }
1910
+
1911
+ #define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
1912
+
1913
+ static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1914
+ {
1915
+ Reg right, dest = ra_dest(as, ir, RSET_GPR);
1916
+ IRRef lref = ir->op1, rref = ir->op2;
1917
+ if (irref_isk(rref)) { lref = rref; rref = ir->op1; }
1918
+ right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, dest));
1919
+ emit_rr(as, XO_CMOV + (cc<<24), REX_64IR(ir, dest), right);
1920
+ emit_rr(as, XO_CMP, REX_64IR(ir, dest), right);
1921
+ ra_left(as, dest, lref);
1922
+ }
1923
+
1924
+ static void asm_min(ASMState *as, IRIns *ir)
1925
+ {
1926
+ if (irt_isnum(ir->t))
1927
+ asm_fparith(as, ir, XO_MINSD);
1928
+ else
1929
+ asm_intmin_max(as, ir, CC_G);
1930
+ }
1931
+
1932
+ static void asm_max(ASMState *as, IRIns *ir)
1933
+ {
1934
+ if (irt_isnum(ir->t))
1935
+ asm_fparith(as, ir, XO_MAXSD);
1936
+ else
1937
+ asm_intmin_max(as, ir, CC_L);
1938
+ }
1939
+
1940
+ /* Note: don't use LEA for overflow-checking arithmetic! */
1941
+ #define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
1942
+ #define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
1943
+ #define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
1944
+
1945
+ #define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
1946
+
1947
+ static void asm_bswap(ASMState *as, IRIns *ir)
1948
+ {
1949
+ Reg dest = ra_dest(as, ir, RSET_GPR);
1950
+ as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
1951
+ REX_64IR(ir, 0), dest, 0, as->mcp, 1);
1952
+ ra_left(as, dest, ir->op1);
1953
+ }
1954
+
1955
+ #define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
1956
+ #define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1957
+ #define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1958
+
1959
+ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1960
+ {
1961
+ IRRef rref = ir->op2;
1962
+ IRIns *irr = IR(rref);
1963
+ Reg dest;
1964
+ if (irref_isk(rref)) { /* Constant shifts. */
1965
+ int shift;
1966
+ dest = ra_dest(as, ir, RSET_GPR);
1967
+ shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
1968
+ switch (shift) {
1969
+ case 0: break;
1970
+ case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
1971
+ default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
1972
+ }
1973
+ } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
1974
+ Reg right;
1975
+ dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
1976
+ if (dest == RID_ECX) {
1977
+ dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX));
1978
+ emit_rr(as, XO_MOV, RID_ECX, dest);
1979
+ }
1980
+ right = irr->r;
1981
+ if (ra_noreg(right))
1982
+ right = ra_allocref(as, rref, RID2RSET(RID_ECX));
1983
+ else if (right != RID_ECX)
1984
+ ra_scratch(as, RID2RSET(RID_ECX));
1985
+ emit_rr(as, XO_SHIFTcl, REX_64IR(ir, xs), dest);
1986
+ ra_noweak(as, right);
1987
+ if (right != RID_ECX)
1988
+ emit_rr(as, XO_MOV, RID_ECX, right);
1989
+ }
1990
+ ra_left(as, dest, ir->op1);
1991
+ /*
1992
+ ** Note: avoid using the flags resulting from a shift or rotate!
1993
+ ** All of them cause a partial flag stall, except for r,1 shifts
1994
+ ** (but not rotates). And a shift count of 0 leaves the flags unmodified.
1995
+ */
1996
+ }
1997
+
1998
+ #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
1999
+ #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
2000
+ #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
2001
+ #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
2002
+ #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
2003
+
2004
+ /* -- Comparisons --------------------------------------------------------- */
2005
+
2006
+ /* Virtual flags for unordered FP comparisons. */
2007
+ #define VCC_U 0x1000 /* Unordered. */
2008
+ #define VCC_P 0x2000 /* Needs extra CC_P branch. */
2009
+ #define VCC_S 0x4000 /* Swap avoids CC_P branch. */
2010
+ #define VCC_PS (VCC_P|VCC_S)
2011
+
2012
+ /* Map of comparisons to flags. ORDER IR. */
2013
+ #define COMPFLAGS(ci, cin, cu, cf) ((ci)+((cu)<<4)+((cin)<<8)+(cf))
2014
+ static const uint16_t asm_compmap[IR_ABC+1] = {
2015
+ /* signed non-eq unsigned flags */
2016
+ /* LT */ COMPFLAGS(CC_GE, CC_G, CC_AE, VCC_PS),
2017
+ /* GE */ COMPFLAGS(CC_L, CC_L, CC_B, 0),
2018
+ /* LE */ COMPFLAGS(CC_G, CC_G, CC_A, VCC_PS),
2019
+ /* GT */ COMPFLAGS(CC_LE, CC_L, CC_BE, 0),
2020
+ /* ULT */ COMPFLAGS(CC_AE, CC_A, CC_AE, VCC_U),
2021
+ /* UGE */ COMPFLAGS(CC_B, CC_B, CC_B, VCC_U|VCC_PS),
2022
+ /* ULE */ COMPFLAGS(CC_A, CC_A, CC_A, VCC_U),
2023
+ /* UGT */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS),
2024
+ /* EQ */ COMPFLAGS(CC_NE, CC_NE, CC_NE, VCC_P),
2025
+ /* NE */ COMPFLAGS(CC_E, CC_E, CC_E, VCC_U|VCC_P),
2026
+ /* ABC */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS) /* Same as UGT. */
2027
+ };
2028
+
2029
+ /* FP and integer comparisons. */
2030
+ static void asm_comp(ASMState *as, IRIns *ir)
2031
+ {
2032
+ uint32_t cc = asm_compmap[ir->o];
2033
+ if (irt_isnum(ir->t)) {
2034
+ IRRef lref = ir->op1;
2035
+ IRRef rref = ir->op2;
2036
+ Reg left, right;
2037
+ MCLabel l_around;
2038
+ /*
2039
+ ** An extra CC_P branch is required to preserve ordered/unordered
2040
+ ** semantics for FP comparisons. This can be avoided by swapping
2041
+ ** the operands and inverting the condition (except for EQ and UNE).
2042
+ ** So always try to swap if possible.
2043
+ **
2044
+ ** Another option would be to swap operands to achieve better memory
2045
+ ** operand fusion. But it's unlikely that this outweighs the cost
2046
+ ** of the extra branches.
2047
+ */
2048
+ if (cc & VCC_S) { /* Swap? */
2049
+ IRRef tmp = lref; lref = rref; rref = tmp;
2050
+ cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
2051
+ }
2052
+ left = ra_alloc1(as, lref, RSET_FPR);
2053
+ right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2054
+ l_around = emit_label(as);
2055
+ asm_guardcc(as, cc >> 4);
2056
+ if (cc & VCC_P) { /* Extra CC_P branch required? */
2057
+ if (!(cc & VCC_U)) {
2058
+ asm_guardcc(as, CC_P); /* Branch to exit for ordered comparisons. */
2059
+ } else if (l_around != as->invmcp) {
2060
+ emit_sjcc(as, CC_P, l_around); /* Branch around for unordered. */
2061
+ } else {
2062
+ /* Patched to mcloop by asm_loop_fixup. */
2063
+ as->loopinv = 2;
2064
+ if (as->realign)
2065
+ emit_sjcc(as, CC_P, as->mcp);
2066
+ else
2067
+ emit_jcc(as, CC_P, as->mcp);
2068
+ }
2069
+ }
2070
+ emit_mrm(as, XO_UCOMISD, left, right);
2071
+ } else {
2072
+ IRRef lref = ir->op1, rref = ir->op2;
2073
+ IROp leftop = (IROp)(IR(lref)->o);
2074
+ Reg r64 = REX_64IR(ir, 0);
2075
+ int32_t imm = 0;
2076
+ lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
2077
+ irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
2078
+ /* Swap constants (only for ABC) and fusable loads to the right. */
2079
+ if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
2080
+ if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */
2081
+ else if ((cc & 0xa) == 0x2) cc ^= 0x55; /* A <-> B, AE <-> BE */
2082
+ lref = ir->op2; rref = ir->op1;
2083
+ }
2084
+ if (asm_isk32(as, rref, &imm)) {
2085
+ IRIns *irl = IR(lref);
2086
+ /* Check wether we can use test ins. Not for unsigned, since CF=0. */
2087
+ int usetest = (imm == 0 && (cc & 0xa) != 0x2);
2088
+ if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) {
2089
+ /* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */
2090
+ Reg right, left = RID_NONE;
2091
+ RegSet allow = RSET_GPR;
2092
+ if (!asm_isk32(as, irl->op2, &imm)) {
2093
+ left = ra_alloc1(as, irl->op2, allow);
2094
+ rset_clear(allow, left);
2095
+ } else { /* Try to Fuse IRT_I8/IRT_U8 loads, too. See below. */
2096
+ IRIns *irll = IR(irl->op1);
2097
+ if (opisfusableload((IROp)irll->o) &&
2098
+ (irt_isi8(irll->t) || irt_isu8(irll->t))) {
2099
+ IRType1 origt = irll->t; /* Temporarily flip types. */
2100
+ irll->t.irt = (irll->t.irt & ~IRT_TYPE) | IRT_INT;
2101
+ as->curins--; /* Skip to BAND to avoid failing in noconflict(). */
2102
+ right = asm_fuseload(as, irl->op1, RSET_GPR);
2103
+ as->curins++;
2104
+ irll->t = origt;
2105
+ if (right != RID_MRM) goto test_nofuse;
2106
+ /* Fusion succeeded, emit test byte mrm, imm8. */
2107
+ asm_guardcc(as, cc);
2108
+ emit_i8(as, (imm & 0xff));
2109
+ emit_mrm(as, XO_GROUP3b, XOg_TEST, RID_MRM);
2110
+ return;
2111
+ }
2112
+ }
2113
+ as->curins--; /* Skip to BAND to avoid failing in noconflict(). */
2114
+ right = asm_fuseloadm(as, irl->op1, allow, r64);
2115
+ as->curins++; /* Undo the above. */
2116
+ test_nofuse:
2117
+ asm_guardcc(as, cc);
2118
+ if (ra_noreg(left)) {
2119
+ emit_i32(as, imm);
2120
+ emit_mrm(as, XO_GROUP3, r64 + XOg_TEST, right);
2121
+ } else {
2122
+ emit_mrm(as, XO_TEST, r64 + left, right);
2123
+ }
2124
+ } else {
2125
+ Reg left;
2126
+ if (opisfusableload((IROp)irl->o) &&
2127
+ ((irt_isu8(irl->t) && checku8(imm)) ||
2128
+ ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) ||
2129
+ (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) {
2130
+ /* Only the IRT_INT case is fused by asm_fuseload.
2131
+ ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads
2132
+ ** are handled here.
2133
+ ** Note that cmp word [mem], imm16 should not be generated,
2134
+ ** since it has a length-changing prefix. Compares of a word
2135
+ ** against a sign-extended imm8 are ok, however.
2136
+ */
2137
+ IRType1 origt = irl->t; /* Temporarily flip types. */
2138
+ irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
2139
+ left = asm_fuseload(as, lref, RSET_GPR);
2140
+ irl->t = origt;
2141
+ if (left == RID_MRM) { /* Fusion succeeded? */
2142
+ if (irt_isu8(irl->t) || irt_isu16(irl->t))
2143
+ cc >>= 4; /* Need unsigned compare. */
2144
+ asm_guardcc(as, cc);
2145
+ emit_i8(as, imm);
2146
+ emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
2147
+ XO_ARITHib : XO_ARITHiw8, r64 + XOg_CMP, RID_MRM);
2148
+ return;
2149
+ } /* Otherwise handle register case as usual. */
2150
+ } else {
2151
+ left = asm_fuseloadm(as, lref,
2152
+ irt_isu8(ir->t) ? RSET_GPR8 : RSET_GPR, r64);
2153
+ }
2154
+ asm_guardcc(as, cc);
2155
+ if (usetest && left != RID_MRM) {
2156
+ /* Use test r,r instead of cmp r,0. */
2157
+ x86Op xo = XO_TEST;
2158
+ if (irt_isu8(ir->t)) {
2159
+ lua_assert(ir->o == IR_EQ || ir->o == IR_NE);
2160
+ xo = XO_TESTb;
2161
+ if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) {
2162
+ if (LJ_64) {
2163
+ left |= FORCE_REX;
2164
+ } else {
2165
+ emit_i32(as, 0xff);
2166
+ emit_mrm(as, XO_GROUP3, XOg_TEST, left);
2167
+ return;
2168
+ }
2169
+ }
2170
+ }
2171
+ emit_rr(as, xo, r64 + left, left);
2172
+ if (irl+1 == ir) /* Referencing previous ins? */
2173
+ as->flagmcp = as->mcp; /* Set flag to drop test r,r if possible. */
2174
+ } else {
2175
+ emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm);
2176
+ }
2177
+ }
2178
+ } else {
2179
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
2180
+ Reg right = asm_fuseloadm(as, rref, rset_exclude(RSET_GPR, left), r64);
2181
+ asm_guardcc(as, cc);
2182
+ emit_mrm(as, XO_CMP, r64 + left, right);
2183
+ }
2184
+ }
2185
+ }
2186
+
2187
+ #define asm_equal(as, ir) asm_comp(as, ir)
2188
+
2189
+ #if LJ_32 && LJ_HASFFI
2190
+ /* 64 bit integer comparisons in 32 bit mode. */
2191
+ static void asm_comp_int64(ASMState *as, IRIns *ir)
2192
+ {
2193
+ uint32_t cc = asm_compmap[(ir-1)->o];
2194
+ RegSet allow = RSET_GPR;
2195
+ Reg lefthi = RID_NONE, leftlo = RID_NONE;
2196
+ Reg righthi = RID_NONE, rightlo = RID_NONE;
2197
+ MCLabel l_around;
2198
+ x86ModRM mrm;
2199
+
2200
+ as->curins--; /* Skip loword ins. Avoids failing in noconflict(), too. */
2201
+
2202
+ /* Allocate/fuse hiword operands. */
2203
+ if (irref_isk(ir->op2)) {
2204
+ lefthi = asm_fuseload(as, ir->op1, allow);
2205
+ } else {
2206
+ lefthi = ra_alloc1(as, ir->op1, allow);
2207
+ rset_clear(allow, lefthi);
2208
+ righthi = asm_fuseload(as, ir->op2, allow);
2209
+ if (righthi == RID_MRM) {
2210
+ if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base);
2211
+ if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx);
2212
+ } else {
2213
+ rset_clear(allow, righthi);
2214
+ }
2215
+ }
2216
+ mrm = as->mrm; /* Save state for hiword instruction. */
2217
+
2218
+ /* Allocate/fuse loword operands. */
2219
+ if (irref_isk((ir-1)->op2)) {
2220
+ leftlo = asm_fuseload(as, (ir-1)->op1, allow);
2221
+ } else {
2222
+ leftlo = ra_alloc1(as, (ir-1)->op1, allow);
2223
+ rset_clear(allow, leftlo);
2224
+ rightlo = asm_fuseload(as, (ir-1)->op2, allow);
2225
+ }
2226
+
2227
+ /* All register allocations must be performed _before_ this point. */
2228
+ l_around = emit_label(as);
2229
+ as->invmcp = as->flagmcp = NULL; /* Cannot use these optimizations. */
2230
+
2231
+ /* Loword comparison and branch. */
2232
+ asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */
2233
+ if (ra_noreg(rightlo)) {
2234
+ int32_t imm = IR((ir-1)->op2)->i;
2235
+ if (imm == 0 && ((cc >> 4) & 0xa) != 0x2 && leftlo != RID_MRM)
2236
+ emit_rr(as, XO_TEST, leftlo, leftlo);
2237
+ else
2238
+ emit_gmrmi(as, XG_ARITHi(XOg_CMP), leftlo, imm);
2239
+ } else {
2240
+ emit_mrm(as, XO_CMP, leftlo, rightlo);
2241
+ }
2242
+
2243
+ /* Hiword comparison and branches. */
2244
+ if ((cc & 15) != CC_NE)
2245
+ emit_sjcc(as, CC_NE, l_around); /* Hiword unequal: skip loword compare. */
2246
+ if ((cc & 15) != CC_E)
2247
+ asm_guardcc(as, cc >> 8); /* Hiword compare without equality check. */
2248
+ as->mrm = mrm; /* Restore state. */
2249
+ if (ra_noreg(righthi)) {
2250
+ int32_t imm = IR(ir->op2)->i;
2251
+ if (imm == 0 && (cc & 0xa) != 0x2 && lefthi != RID_MRM)
2252
+ emit_rr(as, XO_TEST, lefthi, lefthi);
2253
+ else
2254
+ emit_gmrmi(as, XG_ARITHi(XOg_CMP), lefthi, imm);
2255
+ } else {
2256
+ emit_mrm(as, XO_CMP, lefthi, righthi);
2257
+ }
2258
+ }
2259
+ #endif
2260
+
2261
+ /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
2262
+
2263
+ /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
2264
+ static void asm_hiop(ASMState *as, IRIns *ir)
2265
+ {
2266
+ #if LJ_32 && LJ_HASFFI
2267
+ /* HIOP is marked as a store because it needs its own DCE logic. */
2268
+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
2269
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2270
+ if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
2271
+ as->curins--; /* Always skip the CONV. */
2272
+ if (usehi || uselo)
2273
+ asm_conv64(as, ir);
2274
+ return;
2275
+ } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
2276
+ asm_comp_int64(as, ir);
2277
+ return;
2278
+ } else if ((ir-1)->o == IR_XSTORE) {
2279
+ if ((ir-1)->r != RID_SINK)
2280
+ asm_fxstore(as, ir);
2281
+ return;
2282
+ }
2283
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
2284
+ switch ((ir-1)->o) {
2285
+ case IR_ADD:
2286
+ as->flagmcp = NULL;
2287
+ as->curins--;
2288
+ asm_intarith(as, ir, XOg_ADC);
2289
+ asm_intarith(as, ir-1, XOg_ADD);
2290
+ break;
2291
+ case IR_SUB:
2292
+ as->flagmcp = NULL;
2293
+ as->curins--;
2294
+ asm_intarith(as, ir, XOg_SBB);
2295
+ asm_intarith(as, ir-1, XOg_SUB);
2296
+ break;
2297
+ case IR_NEG: {
2298
+ Reg dest = ra_dest(as, ir, RSET_GPR);
2299
+ emit_rr(as, XO_GROUP3, XOg_NEG, dest);
2300
+ emit_i8(as, 0);
2301
+ emit_rr(as, XO_ARITHi8, XOg_ADC, dest);
2302
+ ra_left(as, dest, ir->op1);
2303
+ as->curins--;
2304
+ asm_neg_not(as, ir-1, XOg_NEG);
2305
+ break;
2306
+ }
2307
+ case IR_CALLN:
2308
+ case IR_CALLXS:
2309
+ if (!uselo)
2310
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
2311
+ break;
2312
+ case IR_CNEWI:
2313
+ /* Nothing to do here. Handled by CNEWI itself. */
2314
+ break;
2315
+ default: lua_assert(0); break;
2316
+ }
2317
+ #else
2318
+ UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */
2319
+ #endif
2320
+ }
2321
+
2322
+ /* -- Profiling ----------------------------------------------------------- */
2323
+
2324
+ static void asm_prof(ASMState *as, IRIns *ir)
2325
+ {
2326
+ UNUSED(ir);
2327
+ asm_guardcc(as, CC_NE);
2328
+ emit_i8(as, HOOK_PROFILE);
2329
+ emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
2330
+ }
2331
+
2332
+ /* -- Stack handling ------------------------------------------------------ */
2333
+
2334
+ /* Check Lua stack size for overflow. Use exit handler as fallback. */
2335
+ static void asm_stack_check(ASMState *as, BCReg topslot,
2336
+ IRIns *irp, RegSet allow, ExitNo exitno)
2337
+ {
2338
+ /* Try to get an unused temp. register, otherwise spill/restore eax. */
2339
+ Reg pbase = irp ? irp->r : RID_BASE;
2340
+ Reg r = allow ? rset_pickbot(allow) : RID_EAX;
2341
+ emit_jcc(as, CC_B, exitstub_addr(as->J, exitno));
2342
+ if (allow == RSET_EMPTY) /* Restore temp. register. */
2343
+ emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
2344
+ else
2345
+ ra_modified(as, r);
2346
+ emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
2347
+ if (ra_hasreg(pbase) && pbase != r)
2348
+ emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
2349
+ else
2350
+ emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2351
+ ptr2addr(&J2G(as->J)->jit_base));
2352
+ emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2353
+ emit_getgl(as, r, cur_L);
2354
+ if (allow == RSET_EMPTY) /* Spill temp. register. */
2355
+ emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
2356
+ }
2357
+
2358
+ /* Restore Lua stack from on-trace state. */
2359
+ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2360
+ {
2361
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
2362
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
2363
+ MSize n, nent = snap->nent;
2364
+ /* Store the value of all modified slots to the Lua stack. */
2365
+ for (n = 0; n < nent; n++) {
2366
+ SnapEntry sn = map[n];
2367
+ BCReg s = snap_slot(sn);
2368
+ int32_t ofs = 8*((int32_t)s-1);
2369
+ IRRef ref = snap_ref(sn);
2370
+ IRIns *ir = IR(ref);
2371
+ if ((sn & SNAP_NORESTORE))
2372
+ continue;
2373
+ if (irt_isnum(ir->t)) {
2374
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
2375
+ emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
2376
+ } else {
2377
+ lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
2378
+ (LJ_DUALNUM && irt_isinteger(ir->t)));
2379
+ if (!irref_isk(ref)) {
2380
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2381
+ emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
2382
+ } else if (!irt_ispri(ir->t)) {
2383
+ emit_movmroi(as, RID_BASE, ofs, ir->i);
2384
+ }
2385
+ if ((sn & (SNAP_CONT|SNAP_FRAME))) {
2386
+ if (s != 0) /* Do not overwrite link to previous frame. */
2387
+ emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2388
+ } else {
2389
+ if (!(LJ_64 && irt_islightud(ir->t)))
2390
+ emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2391
+ }
2392
+ }
2393
+ checkmclim(as);
2394
+ }
2395
+ lua_assert(map + nent == flinks);
2396
+ }
2397
+
2398
+ /* -- GC handling --------------------------------------------------------- */
2399
+
2400
+ /* Check GC threshold and do one or more GC steps. */
2401
+ static void asm_gc_check(ASMState *as)
2402
+ {
2403
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
2404
+ IRRef args[2];
2405
+ MCLabel l_end;
2406
+ Reg tmp;
2407
+ ra_evictset(as, RSET_SCRATCH);
2408
+ l_end = emit_label(as);
2409
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
2410
+ asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
2411
+ emit_rr(as, XO_TEST, RID_RET, RID_RET);
2412
+ args[0] = ASMREF_TMP1; /* global_State *g */
2413
+ args[1] = ASMREF_TMP2; /* MSize steps */
2414
+ asm_gencall(as, ci, args);
2415
+ tmp = ra_releasetmp(as, ASMREF_TMP1);
2416
+ emit_loada(as, tmp, J2G(as->J));
2417
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
2418
+ /* Jump around GC step if GC total < GC threshold. */
2419
+ emit_sjcc(as, CC_B, l_end);
2420
+ emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
2421
+ emit_getgl(as, tmp, gc.total);
2422
+ as->gcsteps = 0;
2423
+ checkmclim(as);
2424
+ }
2425
+
2426
+ /* -- Loop handling ------------------------------------------------------- */
2427
+
2428
+ /* Fixup the loop branch. */
2429
+ static void asm_loop_fixup(ASMState *as)
2430
+ {
2431
+ MCode *p = as->mctop;
2432
+ MCode *target = as->mcp;
2433
+ if (as->realign) { /* Realigned loops use short jumps. */
2434
+ as->realign = NULL; /* Stop another retry. */
2435
+ lua_assert(((intptr_t)target & 15) == 0);
2436
+ if (as->loopinv) { /* Inverted loop branch? */
2437
+ p -= 5;
2438
+ p[0] = XI_JMP;
2439
+ lua_assert(target - p >= -128);
2440
+ p[-1] = (MCode)(target - p); /* Patch sjcc. */
2441
+ if (as->loopinv == 2)
2442
+ p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */
2443
+ } else {
2444
+ lua_assert(target - p >= -128);
2445
+ p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */
2446
+ p[-2] = XI_JMPs;
2447
+ }
2448
+ } else {
2449
+ MCode *newloop;
2450
+ p[-5] = XI_JMP;
2451
+ if (as->loopinv) { /* Inverted loop branch? */
2452
+ /* asm_guardcc already inverted the jcc and patched the jmp. */
2453
+ p -= 5;
2454
+ newloop = target+4;
2455
+ *(int32_t *)(p-4) = (int32_t)(target - p); /* Patch jcc. */
2456
+ if (as->loopinv == 2) {
2457
+ *(int32_t *)(p-10) = (int32_t)(target - p + 6); /* Patch opt. jp. */
2458
+ newloop = target+8;
2459
+ }
2460
+ } else { /* Otherwise just patch jmp. */
2461
+ *(int32_t *)(p-4) = (int32_t)(target - p);
2462
+ newloop = target+3;
2463
+ }
2464
+ /* Realign small loops and shorten the loop branch. */
2465
+ if (newloop >= p - 128) {
2466
+ as->realign = newloop; /* Force a retry and remember alignment. */
2467
+ as->curins = as->stopins; /* Abort asm_trace now. */
2468
+ as->T->nins = as->orignins; /* Remove any added renames. */
2469
+ }
2470
+ }
2471
+ }
2472
+
2473
+ /* -- Head of trace ------------------------------------------------------- */
2474
+
2475
+ /* Coalesce BASE register for a root trace. */
2476
+ static void asm_head_root_base(ASMState *as)
2477
+ {
2478
+ IRIns *ir = IR(REF_BASE);
2479
+ Reg r = ir->r;
2480
+ if (ra_hasreg(r)) {
2481
+ ra_free(as, r);
2482
+ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
2483
+ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
2484
+ if (r != RID_BASE)
2485
+ emit_rr(as, XO_MOV, r, RID_BASE);
2486
+ }
2487
+ }
2488
+
2489
+ /* Coalesce or reload BASE register for a side trace. */
2490
+ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
2491
+ {
2492
+ IRIns *ir = IR(REF_BASE);
2493
+ Reg r = ir->r;
2494
+ if (ra_hasreg(r)) {
2495
+ ra_free(as, r);
2496
+ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
2497
+ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
2498
+ if (irp->r == r) {
2499
+ rset_clear(allow, r); /* Mark same BASE register as coalesced. */
2500
+ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
2501
+ rset_clear(allow, irp->r);
2502
+ emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */
2503
+ } else {
2504
+ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
2505
+ }
2506
+ }
2507
+ return allow;
2508
+ }
2509
+
2510
+ /* -- Tail of trace ------------------------------------------------------- */
2511
+
2512
+ /* Fixup the tail code. */
2513
+ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2514
+ {
2515
+ /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
2516
+ MCode *p = as->mctop;
2517
+ MCode *target, *q;
2518
+ int32_t spadj = as->T->spadjust;
2519
+ if (spadj == 0) {
2520
+ p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
2521
+ } else {
2522
+ MCode *p1;
2523
+ /* Patch stack adjustment. */
2524
+ if (checki8(spadj)) {
2525
+ p -= 3;
2526
+ p1 = p-6;
2527
+ *p1 = (MCode)spadj;
2528
+ } else {
2529
+ p1 = p-9;
2530
+ *(int32_t *)p1 = spadj;
2531
+ }
2532
+ if ((as->flags & JIT_F_LEA_AGU)) {
2533
+ #if LJ_64
2534
+ p1[-4] = 0x48;
2535
+ #endif
2536
+ p1[-3] = (MCode)XI_LEA;
2537
+ p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
2538
+ p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
2539
+ } else {
2540
+ #if LJ_64
2541
+ p1[-3] = 0x48;
2542
+ #endif
2543
+ p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
2544
+ p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
2545
+ }
2546
+ }
2547
+ /* Patch exit branch. */
2548
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
2549
+ *(int32_t *)(p-4) = jmprel(p, target);
2550
+ p[-5] = XI_JMP;
2551
+ /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
2552
+ for (q = as->mctop-1; q >= p; q--)
2553
+ *q = XI_NOP;
2554
+ as->mctop = p;
2555
+ }
2556
+
2557
+ /* Prepare tail of code. */
2558
+ static void asm_tail_prep(ASMState *as)
2559
+ {
2560
+ MCode *p = as->mctop;
2561
+ /* Realign and leave room for backwards loop branch or exit branch. */
2562
+ if (as->realign) {
2563
+ int i = ((int)(intptr_t)as->realign) & 15;
2564
+ /* Fill unused mcode tail with NOPs to make the prefetcher happy. */
2565
+ while (i-- > 0)
2566
+ *--p = XI_NOP;
2567
+ as->mctop = p;
2568
+ p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */
2569
+ } else {
2570
+ p -= 5; /* Space for exit branch (near jmp). */
2571
+ }
2572
+ if (as->loopref) {
2573
+ as->invmcp = as->mcp = p;
2574
+ } else {
2575
+ /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
2576
+ as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0));
2577
+ as->invmcp = NULL;
2578
+ }
2579
+ }
2580
+
2581
+ /* -- Trace setup --------------------------------------------------------- */
2582
+
2583
+ /* Ensure there are enough stack slots for call arguments. */
2584
+ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2585
+ {
2586
+ IRRef args[CCI_NARGS_MAX*2];
2587
+ int nslots;
2588
+ asm_collectargs(as, ir, ci, args);
2589
+ nslots = asm_count_call_slots(as, ci, args);
2590
+ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2591
+ as->evenspill = nslots;
2592
+ #if LJ_64
2593
+ return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
2594
+ #else
2595
+ return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET);
2596
+ #endif
2597
+ }
2598
+
2599
+ /* Target-specific setup. */
2600
+ static void asm_setup_target(ASMState *as)
2601
+ {
2602
+ asm_exitstub_setup(as, as->T->nsnap);
2603
+ }
2604
+
2605
+ /* -- Trace patching ------------------------------------------------------ */
2606
+
2607
+ /* Patch exit jumps of existing machine code to a new target. */
2608
+ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2609
+ {
2610
+ MCode *p = T->mcode;
2611
+ MCode *mcarea = lj_mcode_patch(J, p, 0);
2612
+ MSize len = T->szmcode;
2613
+ MCode *px = exitstub_addr(J, exitno) - 6;
2614
+ MCode *pe = p+len-6;
2615
+ uint32_t stateaddr = u32ptr(&J2G(J)->vmstate);
2616
+ if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
2617
+ *(int32_t *)(p+len-4) = jmprel(p+len, target);
2618
+ /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
2619
+ for (; p < pe; p++)
2620
+ if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) {
2621
+ p += LJ_64 ? 11 : 10;
2622
+ break;
2623
+ }
2624
+ lua_assert(p < pe);
2625
+ for (; p < pe; p++) {
2626
+ if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) {
2627
+ *(int32_t *)(p+2) = jmprel(p+6, target);
2628
+ p += 5;
2629
+ }
2630
+ }
2631
+ lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
2632
+ lj_mcode_patch(J, mcarea, 1);
2633
+ }
2634
+