immunio 0.15.4 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +0 -27
- data/ext/immunio/Rakefile +9 -0
- data/lib/immunio/plugins/active_record.rb +1 -1
- data/lib/immunio/plugins/active_record_relation.rb +1 -1
- data/lib/immunio/plugins/environment_reporter.rb +20 -0
- data/lib/immunio/rufus_lua_ext/ref.rb +1 -3
- data/lib/immunio/version.rb +1 -1
- data/lib/immunio/vm.rb +1 -2
- data/lua-hooks/Makefile +97 -0
- data/lua-hooks/ext/all.c +41 -52
- data/lua-hooks/ext/all.o +0 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.o +0 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.o +0 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.o +0 -0
- data/lua-hooks/ext/libinjection/lualib.c +2 -2
- data/lua-hooks/ext/lpeg/lpcap.c +2 -2
- data/lua-hooks/ext/lpeg/lpcap.o +0 -0
- data/lua-hooks/ext/lpeg/lpcode.c +2 -2
- data/lua-hooks/ext/lpeg/lpcode.h +1 -1
- data/lua-hooks/ext/lpeg/lpcode.o +0 -0
- data/lua-hooks/ext/lpeg/lpprint.o +0 -0
- data/lua-hooks/ext/lpeg/lptree.c +2 -2
- data/lua-hooks/ext/lpeg/lptypes.h +1 -1
- data/lua-hooks/ext/lpeg/lpvm.c +2 -2
- data/lua-hooks/ext/lpeg/lpvm.o +0 -0
- data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +16 -3
- data/lua-hooks/ext/lua-snapshot/snapshot.c +14 -7
- data/lua-hooks/ext/luajit/COPYRIGHT +56 -0
- data/lua-hooks/ext/luajit/Makefile +159 -0
- data/lua-hooks/ext/luajit/README +16 -0
- data/lua-hooks/ext/luajit/doc/bluequad-print.css +166 -0
- data/lua-hooks/ext/luajit/doc/bluequad.css +325 -0
- data/lua-hooks/ext/luajit/doc/changes.html +804 -0
- data/lua-hooks/ext/luajit/doc/contact.html +104 -0
- data/lua-hooks/ext/luajit/doc/ext_c_api.html +189 -0
- data/lua-hooks/ext/luajit/doc/ext_ffi.html +332 -0
- data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +570 -0
- data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +1261 -0
- data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +603 -0
- data/lua-hooks/ext/luajit/doc/ext_jit.html +201 -0
- data/lua-hooks/ext/luajit/doc/ext_profiler.html +365 -0
- data/lua-hooks/ext/luajit/doc/extensions.html +448 -0
- data/lua-hooks/ext/luajit/doc/faq.html +186 -0
- data/lua-hooks/ext/luajit/doc/img/contact.png +0 -0
- data/lua-hooks/ext/luajit/doc/install.html +659 -0
- data/lua-hooks/ext/luajit/doc/luajit.html +236 -0
- data/lua-hooks/ext/luajit/doc/running.html +309 -0
- data/lua-hooks/ext/luajit/doc/status.html +118 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +456 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1125 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +518 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1166 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +416 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +953 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +419 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1919 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +83 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +12 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +471 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +1945 -0
- data/lua-hooks/ext/luajit/dynasm/dynasm.lua +1094 -0
- data/lua-hooks/ext/luajit/etc/luajit.1 +88 -0
- data/lua-hooks/ext/luajit/etc/luajit.pc +25 -0
- data/lua-hooks/ext/luajit/src/Makefile +697 -0
- data/lua-hooks/ext/luajit/src/Makefile.dep +244 -0
- data/lua-hooks/ext/luajit/src/host/README +4 -0
- data/lua-hooks/ext/luajit/src/host/buildvm +0 -0
- data/lua-hooks/ext/luajit/src/host/buildvm.c +518 -0
- data/lua-hooks/ext/luajit/src/host/buildvm.h +105 -0
- data/lua-hooks/ext/luajit/src/host/buildvm.o +0 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_arch.h +7449 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +345 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_asm.o +0 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +229 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_fold.o +0 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +457 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_lib.o +0 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +45 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +368 -0
- data/lua-hooks/ext/luajit/src/host/buildvm_peobj.o +0 -0
- data/lua-hooks/ext/luajit/src/host/genlibbc.lua +197 -0
- data/lua-hooks/ext/luajit/src/host/genminilua.lua +428 -0
- data/lua-hooks/ext/luajit/src/host/minilua +0 -0
- data/lua-hooks/ext/luajit/src/host/minilua.c +7770 -0
- data/lua-hooks/ext/luajit/src/host/minilua.o +0 -0
- data/lua-hooks/ext/luajit/src/jit/bc.lua +190 -0
- data/lua-hooks/ext/luajit/src/jit/bcsave.lua +661 -0
- data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +689 -0
- data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +428 -0
- data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +17 -0
- data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +591 -0
- data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +17 -0
- data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +838 -0
- data/lua-hooks/ext/luajit/src/jit/dump.lua +706 -0
- data/lua-hooks/ext/luajit/src/jit/p.lua +310 -0
- data/lua-hooks/ext/luajit/src/jit/v.lua +170 -0
- data/lua-hooks/ext/luajit/src/jit/vmdef.lua +362 -0
- data/lua-hooks/ext/luajit/src/jit/zone.lua +45 -0
- data/lua-hooks/ext/{lua → luajit/src}/lauxlib.h +10 -17
- data/lua-hooks/ext/luajit/src/lib_aux.c +356 -0
- data/lua-hooks/ext/luajit/src/lib_aux.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_aux_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_base.c +664 -0
- data/lua-hooks/ext/luajit/src/lib_base.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_base_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_bit.c +180 -0
- data/lua-hooks/ext/luajit/src/lib_bit.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_bit_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_debug.c +405 -0
- data/lua-hooks/ext/luajit/src/lib_debug.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_debug_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_ffi.c +872 -0
- data/lua-hooks/ext/luajit/src/lib_ffi.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_ffi_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_init.c +55 -0
- data/lua-hooks/ext/luajit/src/lib_init.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_init_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_io.c +541 -0
- data/lua-hooks/ext/luajit/src/lib_io.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_io_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_jit.c +767 -0
- data/lua-hooks/ext/luajit/src/lib_jit.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_jit_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_math.c +230 -0
- data/lua-hooks/ext/luajit/src/lib_math.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_math_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_os.c +292 -0
- data/lua-hooks/ext/luajit/src/lib_os.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_os_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_package.c +610 -0
- data/lua-hooks/ext/luajit/src/lib_package.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_package_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_string.c +752 -0
- data/lua-hooks/ext/luajit/src/lib_string.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_string_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_table.c +307 -0
- data/lua-hooks/ext/luajit/src/lib_table.o +0 -0
- data/lua-hooks/ext/luajit/src/lib_table_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/libluajit.a +0 -0
- data/lua-hooks/ext/luajit/src/libluajit.so +0 -0
- data/lua-hooks/ext/luajit/src/lj.supp +26 -0
- data/lua-hooks/ext/luajit/src/lj_alloc.c +1398 -0
- data/lua-hooks/ext/luajit/src/lj_alloc.h +17 -0
- data/lua-hooks/ext/luajit/src/lj_alloc.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_alloc_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_api.c +1210 -0
- data/lua-hooks/ext/luajit/src/lj_api.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_api_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_arch.h +509 -0
- data/lua-hooks/ext/luajit/src/lj_asm.c +2278 -0
- data/lua-hooks/ext/luajit/src/lj_asm.h +17 -0
- data/lua-hooks/ext/luajit/src/lj_asm.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_asm_arm.h +2217 -0
- data/lua-hooks/ext/luajit/src/lj_asm_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_asm_mips.h +1833 -0
- data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2015 -0
- data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2634 -0
- data/lua-hooks/ext/luajit/src/lj_bc.c +14 -0
- data/lua-hooks/ext/luajit/src/lj_bc.h +265 -0
- data/lua-hooks/ext/luajit/src/lj_bc.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_bc_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_bcdef.h +220 -0
- data/lua-hooks/ext/luajit/src/lj_bcdump.h +68 -0
- data/lua-hooks/ext/luajit/src/lj_bcread.c +457 -0
- data/lua-hooks/ext/luajit/src/lj_bcread.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_bcread_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_bcwrite.c +361 -0
- data/lua-hooks/ext/luajit/src/lj_bcwrite.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_bcwrite_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_buf.c +234 -0
- data/lua-hooks/ext/luajit/src/lj_buf.h +105 -0
- data/lua-hooks/ext/luajit/src/lj_buf.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_buf_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_carith.c +429 -0
- data/lua-hooks/ext/luajit/src/lj_carith.h +37 -0
- data/lua-hooks/ext/luajit/src/lj_carith.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_carith_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ccall.c +984 -0
- data/lua-hooks/ext/luajit/src/lj_ccall.h +178 -0
- data/lua-hooks/ext/luajit/src/lj_ccall.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ccall_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ccallback.c +712 -0
- data/lua-hooks/ext/luajit/src/lj_ccallback.h +25 -0
- data/lua-hooks/ext/luajit/src/lj_ccallback.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ccallback_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_cconv.c +752 -0
- data/lua-hooks/ext/luajit/src/lj_cconv.h +70 -0
- data/lua-hooks/ext/luajit/src/lj_cconv.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_cconv_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_cdata.c +288 -0
- data/lua-hooks/ext/luajit/src/lj_cdata.h +76 -0
- data/lua-hooks/ext/luajit/src/lj_cdata.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_cdata_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_char.c +43 -0
- data/lua-hooks/ext/luajit/src/lj_char.h +42 -0
- data/lua-hooks/ext/luajit/src/lj_char.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_char_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_clib.c +418 -0
- data/lua-hooks/ext/luajit/src/lj_clib.h +29 -0
- data/lua-hooks/ext/luajit/src/lj_clib.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_clib_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_cparse.c +1862 -0
- data/lua-hooks/ext/luajit/src/lj_cparse.h +65 -0
- data/lua-hooks/ext/luajit/src/lj_cparse.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_cparse_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_crecord.c +1834 -0
- data/lua-hooks/ext/luajit/src/lj_crecord.h +38 -0
- data/lua-hooks/ext/luajit/src/lj_crecord.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_crecord_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ctype.c +635 -0
- data/lua-hooks/ext/luajit/src/lj_ctype.h +461 -0
- data/lua-hooks/ext/luajit/src/lj_ctype.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ctype_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_debug.c +699 -0
- data/lua-hooks/ext/luajit/src/lj_debug.h +65 -0
- data/lua-hooks/ext/luajit/src/lj_debug.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_debug_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_def.h +365 -0
- data/lua-hooks/ext/luajit/src/lj_dispatch.c +557 -0
- data/lua-hooks/ext/luajit/src/lj_dispatch.h +138 -0
- data/lua-hooks/ext/luajit/src/lj_dispatch.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_dispatch_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_emit_arm.h +356 -0
- data/lua-hooks/ext/luajit/src/lj_emit_mips.h +211 -0
- data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +238 -0
- data/lua-hooks/ext/luajit/src/lj_emit_x86.h +462 -0
- data/lua-hooks/ext/luajit/src/lj_err.c +794 -0
- data/lua-hooks/ext/luajit/src/lj_err.h +41 -0
- data/lua-hooks/ext/luajit/src/lj_err.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_err_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_errmsg.h +190 -0
- data/lua-hooks/ext/luajit/src/lj_ff.h +18 -0
- data/lua-hooks/ext/luajit/src/lj_ffdef.h +209 -0
- data/lua-hooks/ext/luajit/src/lj_ffrecord.c +1247 -0
- data/lua-hooks/ext/luajit/src/lj_ffrecord.h +24 -0
- data/lua-hooks/ext/luajit/src/lj_ffrecord.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ffrecord_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_folddef.h +1138 -0
- data/lua-hooks/ext/luajit/src/lj_frame.h +259 -0
- data/lua-hooks/ext/luajit/src/lj_func.c +185 -0
- data/lua-hooks/ext/luajit/src/lj_func.h +24 -0
- data/lua-hooks/ext/luajit/src/lj_func.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_func_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_gc.c +845 -0
- data/lua-hooks/ext/luajit/src/lj_gc.h +134 -0
- data/lua-hooks/ext/luajit/src/lj_gc.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_gc_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_gdbjit.c +787 -0
- data/lua-hooks/ext/luajit/src/lj_gdbjit.h +22 -0
- data/lua-hooks/ext/luajit/src/lj_gdbjit.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_gdbjit_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ir.c +505 -0
- data/lua-hooks/ext/luajit/src/lj_ir.h +577 -0
- data/lua-hooks/ext/luajit/src/lj_ir.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ir_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_ircall.h +321 -0
- data/lua-hooks/ext/luajit/src/lj_iropt.h +161 -0
- data/lua-hooks/ext/luajit/src/lj_jit.h +440 -0
- data/lua-hooks/ext/luajit/src/lj_lex.c +482 -0
- data/lua-hooks/ext/luajit/src/lj_lex.h +86 -0
- data/lua-hooks/ext/luajit/src/lj_lex.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_lex_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_lib.c +303 -0
- data/lua-hooks/ext/luajit/src/lj_lib.h +115 -0
- data/lua-hooks/ext/luajit/src/lj_lib.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_lib_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_libdef.h +414 -0
- data/lua-hooks/ext/luajit/src/lj_load.c +168 -0
- data/lua-hooks/ext/luajit/src/lj_load.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_load_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_mcode.c +386 -0
- data/lua-hooks/ext/luajit/src/lj_mcode.h +30 -0
- data/lua-hooks/ext/luajit/src/lj_mcode.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_mcode_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_meta.c +477 -0
- data/lua-hooks/ext/luajit/src/lj_meta.h +38 -0
- data/lua-hooks/ext/luajit/src/lj_meta.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_meta_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_obj.c +50 -0
- data/lua-hooks/ext/luajit/src/lj_obj.h +976 -0
- data/lua-hooks/ext/luajit/src/lj_obj.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_obj_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_dce.c +78 -0
- data/lua-hooks/ext/luajit/src/lj_opt_dce.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_dce_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_fold.c +2488 -0
- data/lua-hooks/ext/luajit/src/lj_opt_fold.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_fold_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_loop.c +449 -0
- data/lua-hooks/ext/luajit/src/lj_opt_loop.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_loop_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_mem.c +935 -0
- data/lua-hooks/ext/luajit/src/lj_opt_mem.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_mem_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +652 -0
- data/lua-hooks/ext/luajit/src/lj_opt_narrow.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_narrow_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_sink.c +245 -0
- data/lua-hooks/ext/luajit/src/lj_opt_sink.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_sink_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_split.c +856 -0
- data/lua-hooks/ext/luajit/src/lj_opt_split.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_opt_split_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_parse.c +2725 -0
- data/lua-hooks/ext/luajit/src/lj_parse.h +18 -0
- data/lua-hooks/ext/luajit/src/lj_parse.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_parse_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_profile.c +368 -0
- data/lua-hooks/ext/luajit/src/lj_profile.h +21 -0
- data/lua-hooks/ext/luajit/src/lj_profile.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_profile_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_recdef.h +270 -0
- data/lua-hooks/ext/luajit/src/lj_record.c +2554 -0
- data/lua-hooks/ext/luajit/src/lj_record.h +45 -0
- data/lua-hooks/ext/luajit/src/lj_record.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_record_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_snap.c +870 -0
- data/lua-hooks/ext/luajit/src/lj_snap.h +34 -0
- data/lua-hooks/ext/luajit/src/lj_snap.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_snap_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_state.c +300 -0
- data/lua-hooks/ext/luajit/src/lj_state.h +35 -0
- data/lua-hooks/ext/luajit/src/lj_state.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_state_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_str.c +197 -0
- data/lua-hooks/ext/luajit/src/lj_str.h +27 -0
- data/lua-hooks/ext/luajit/src/lj_str.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_str_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_strfmt.c +554 -0
- data/lua-hooks/ext/luajit/src/lj_strfmt.h +125 -0
- data/lua-hooks/ext/luajit/src/lj_strfmt.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_strfmt_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_strscan.c +547 -0
- data/lua-hooks/ext/luajit/src/lj_strscan.h +39 -0
- data/lua-hooks/ext/luajit/src/lj_strscan.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_strscan_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_tab.c +666 -0
- data/lua-hooks/ext/luajit/src/lj_tab.h +73 -0
- data/lua-hooks/ext/luajit/src/lj_tab.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_tab_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_target.h +164 -0
- data/lua-hooks/ext/luajit/src/lj_target_arm.h +270 -0
- data/lua-hooks/ext/luajit/src/lj_target_arm64.h +97 -0
- data/lua-hooks/ext/luajit/src/lj_target_mips.h +260 -0
- data/lua-hooks/ext/luajit/src/lj_target_ppc.h +280 -0
- data/lua-hooks/ext/luajit/src/lj_target_x86.h +345 -0
- data/lua-hooks/ext/luajit/src/lj_trace.c +859 -0
- data/lua-hooks/ext/luajit/src/lj_trace.h +54 -0
- data/lua-hooks/ext/luajit/src/lj_trace.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_trace_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_traceerr.h +63 -0
- data/lua-hooks/ext/luajit/src/lj_udata.c +34 -0
- data/lua-hooks/ext/luajit/src/lj_udata.h +14 -0
- data/lua-hooks/ext/luajit/src/lj_udata.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_udata_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_vm.S +2730 -0
- data/lua-hooks/ext/luajit/src/lj_vm.h +114 -0
- data/lua-hooks/ext/luajit/src/lj_vm.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_vm_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_vmevent.c +58 -0
- data/lua-hooks/ext/luajit/src/lj_vmevent.h +59 -0
- data/lua-hooks/ext/luajit/src/lj_vmevent.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_vmevent_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_vmmath.c +152 -0
- data/lua-hooks/ext/luajit/src/lj_vmmath.o +0 -0
- data/lua-hooks/ext/luajit/src/lj_vmmath_dyn.o +0 -0
- data/lua-hooks/ext/luajit/src/ljamalg.c +96 -0
- data/lua-hooks/ext/{lua → luajit/src}/lua.h +12 -7
- data/lua-hooks/ext/luajit/src/lua.hpp +9 -0
- data/lua-hooks/ext/luajit/src/luaconf.h +156 -0
- data/lua-hooks/ext/luajit/src/luajit +0 -0
- data/lua-hooks/ext/luajit/src/luajit.c +570 -0
- data/lua-hooks/ext/luajit/src/luajit.h +79 -0
- data/lua-hooks/ext/luajit/src/luajit.o +0 -0
- data/lua-hooks/ext/luajit/src/lualib.h +43 -0
- data/lua-hooks/ext/luajit/src/msvcbuild.bat +114 -0
- data/lua-hooks/ext/luajit/src/ps4build.bat +103 -0
- data/lua-hooks/ext/luajit/src/psvitabuild.bat +93 -0
- data/lua-hooks/ext/luajit/src/vm_arm.dasc +4585 -0
- data/lua-hooks/ext/luajit/src/vm_arm64.dasc +3764 -0
- data/lua-hooks/ext/luajit/src/vm_mips.dasc +4355 -0
- data/lua-hooks/ext/luajit/src/vm_ppc.dasc +5252 -0
- data/lua-hooks/ext/luajit/src/vm_x64.dasc +4902 -0
- data/lua-hooks/ext/luajit/src/vm_x86.dasc +5710 -0
- data/lua-hooks/ext/luajit/src/xb1build.bat +101 -0
- data/lua-hooks/ext/luajit/src/xedkbuild.bat +92 -0
- data/lua-hooks/ext/luautf8/lutf8lib.c +3 -3
- data/lua-hooks/lib/boot.lua +37 -2
- metadata +372 -69
- data/lua-hooks/ext/bitop/README +0 -22
- data/lua-hooks/ext/bitop/bit.c +0 -189
- data/lua-hooks/ext/extconf.rb +0 -38
- data/lua-hooks/ext/lua/COPYRIGHT +0 -34
- data/lua-hooks/ext/lua/lapi.c +0 -1087
- data/lua-hooks/ext/lua/lapi.h +0 -16
- data/lua-hooks/ext/lua/lauxlib.c +0 -652
- data/lua-hooks/ext/lua/lbaselib.c +0 -659
- data/lua-hooks/ext/lua/lcode.c +0 -831
- data/lua-hooks/ext/lua/lcode.h +0 -76
- data/lua-hooks/ext/lua/ldblib.c +0 -398
- data/lua-hooks/ext/lua/ldebug.c +0 -638
- data/lua-hooks/ext/lua/ldebug.h +0 -33
- data/lua-hooks/ext/lua/ldo.c +0 -519
- data/lua-hooks/ext/lua/ldo.h +0 -57
- data/lua-hooks/ext/lua/ldump.c +0 -164
- data/lua-hooks/ext/lua/lfunc.c +0 -174
- data/lua-hooks/ext/lua/lfunc.h +0 -34
- data/lua-hooks/ext/lua/lgc.c +0 -710
- data/lua-hooks/ext/lua/lgc.h +0 -110
- data/lua-hooks/ext/lua/linit.c +0 -38
- data/lua-hooks/ext/lua/liolib.c +0 -556
- data/lua-hooks/ext/lua/llex.c +0 -463
- data/lua-hooks/ext/lua/llex.h +0 -81
- data/lua-hooks/ext/lua/llimits.h +0 -128
- data/lua-hooks/ext/lua/lmathlib.c +0 -263
- data/lua-hooks/ext/lua/lmem.c +0 -86
- data/lua-hooks/ext/lua/lmem.h +0 -49
- data/lua-hooks/ext/lua/loadlib.c +0 -705
- data/lua-hooks/ext/lua/loadlib_rel.c +0 -760
- data/lua-hooks/ext/lua/lobject.c +0 -214
- data/lua-hooks/ext/lua/lobject.h +0 -381
- data/lua-hooks/ext/lua/lopcodes.c +0 -102
- data/lua-hooks/ext/lua/lopcodes.h +0 -268
- data/lua-hooks/ext/lua/loslib.c +0 -243
- data/lua-hooks/ext/lua/lparser.c +0 -1339
- data/lua-hooks/ext/lua/lparser.h +0 -82
- data/lua-hooks/ext/lua/lstate.c +0 -214
- data/lua-hooks/ext/lua/lstate.h +0 -169
- data/lua-hooks/ext/lua/lstring.c +0 -111
- data/lua-hooks/ext/lua/lstring.h +0 -31
- data/lua-hooks/ext/lua/lstrlib.c +0 -871
- data/lua-hooks/ext/lua/ltable.c +0 -588
- data/lua-hooks/ext/lua/ltable.h +0 -40
- data/lua-hooks/ext/lua/ltablib.c +0 -287
- data/lua-hooks/ext/lua/ltm.c +0 -75
- data/lua-hooks/ext/lua/ltm.h +0 -54
- data/lua-hooks/ext/lua/lua.c +0 -392
- data/lua-hooks/ext/lua/lua.def +0 -131
- data/lua-hooks/ext/lua/lua.rc +0 -28
- data/lua-hooks/ext/lua/lua_dll.rc +0 -26
- data/lua-hooks/ext/lua/luac.c +0 -200
- data/lua-hooks/ext/lua/luac.rc +0 -1
- data/lua-hooks/ext/lua/luaconf.h +0 -763
- data/lua-hooks/ext/lua/luaconf.h.in +0 -724
- data/lua-hooks/ext/lua/luaconf.h.orig +0 -763
- data/lua-hooks/ext/lua/lualib.h +0 -53
- data/lua-hooks/ext/lua/lundump.c +0 -227
- data/lua-hooks/ext/lua/lundump.h +0 -36
- data/lua-hooks/ext/lua/lvm.c +0 -767
- data/lua-hooks/ext/lua/lvm.h +0 -36
- data/lua-hooks/ext/lua/lzio.c +0 -82
- data/lua-hooks/ext/lua/lzio.h +0 -67
- data/lua-hooks/ext/lua/print.c +0 -227
@@ -0,0 +1,2634 @@
|
|
1
|
+
/*
|
2
|
+
** x86/x64 IR assembler (SSA IR -> machine code).
|
3
|
+
** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
|
4
|
+
*/
|
5
|
+
|
6
|
+
/* -- Guard handling ------------------------------------------------------ */
|
7
|
+
|
8
|
+
/* Generate an exit stub group at the bottom of the reserved MCode memory. */
|
9
|
+
static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
|
10
|
+
{
|
11
|
+
ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff;
|
12
|
+
MCode *mxp = as->mcbot;
|
13
|
+
MCode *mxpstart = mxp;
|
14
|
+
if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop)
|
15
|
+
asm_mclimit(as);
|
16
|
+
/* Push low byte of exitno for each exit stub. */
|
17
|
+
*mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs;
|
18
|
+
for (i = 1; i < EXITSTUBS_PER_GROUP; i++) {
|
19
|
+
*mxp++ = XI_JMPs; *mxp++ = (MCode)((2+2)*(EXITSTUBS_PER_GROUP - i) - 2);
|
20
|
+
*mxp++ = XI_PUSHi8; *mxp++ = (MCode)(groupofs + i);
|
21
|
+
}
|
22
|
+
/* Push the high byte of the exitno for each exit stub group. */
|
23
|
+
*mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
|
24
|
+
/* Store DISPATCH at original stack slot 0. Account for the two push ops. */
|
25
|
+
*mxp++ = XI_MOVmi;
|
26
|
+
*mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
|
27
|
+
*mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
|
28
|
+
*mxp++ = 2*sizeof(void *);
|
29
|
+
*(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
|
30
|
+
/* Jump to exit handler which fills in the ExitState. */
|
31
|
+
*mxp++ = XI_JMP; mxp += 4;
|
32
|
+
*((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
|
33
|
+
/* Commit the code for this group (even if assembly fails later on). */
|
34
|
+
lj_mcode_commitbot(as->J, mxp);
|
35
|
+
as->mcbot = mxp;
|
36
|
+
as->mclim = as->mcbot + MCLIM_REDZONE;
|
37
|
+
return mxpstart;
|
38
|
+
}
|
39
|
+
|
40
|
+
/* Setup all needed exit stubs. */
|
41
|
+
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
|
42
|
+
{
|
43
|
+
ExitNo i;
|
44
|
+
if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
|
45
|
+
lj_trace_err(as->J, LJ_TRERR_SNAPOV);
|
46
|
+
for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
|
47
|
+
if (as->J->exitstubgroup[i] == NULL)
|
48
|
+
as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
|
49
|
+
}
|
50
|
+
|
51
|
+
/* Emit conditional branch to exit for guard.
|
52
|
+
** It's important to emit this *after* all registers have been allocated,
|
53
|
+
** because rematerializations may invalidate the flags.
|
54
|
+
*/
|
55
|
+
static void asm_guardcc(ASMState *as, int cc)
|
56
|
+
{
|
57
|
+
MCode *target = exitstub_addr(as->J, as->snapno);
|
58
|
+
MCode *p = as->mcp;
|
59
|
+
if (LJ_UNLIKELY(p == as->invmcp)) {
|
60
|
+
as->loopinv = 1;
|
61
|
+
*(int32_t *)(p+1) = jmprel(p+5, target);
|
62
|
+
target = p;
|
63
|
+
cc ^= 1;
|
64
|
+
if (as->realign) {
|
65
|
+
emit_sjcc(as, cc, target);
|
66
|
+
return;
|
67
|
+
}
|
68
|
+
}
|
69
|
+
emit_jcc(as, cc, target);
|
70
|
+
}
|
71
|
+
|
72
|
+
/* -- Memory operand fusion ----------------------------------------------- */
|
73
|
+
|
74
|
+
/* Limit linear search to this distance. Avoids O(n^2) behavior. */
|
75
|
+
#define CONFLICT_SEARCH_LIM 31
|
76
|
+
|
77
|
+
/* Check if a reference is a signed 32 bit constant. */
|
78
|
+
static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
|
79
|
+
{
|
80
|
+
if (irref_isk(ref)) {
|
81
|
+
IRIns *ir = IR(ref);
|
82
|
+
if (ir->o != IR_KINT64) {
|
83
|
+
*k = ir->i;
|
84
|
+
return 1;
|
85
|
+
} else if (checki32((int64_t)ir_kint64(ir)->u64)) {
|
86
|
+
*k = (int32_t)ir_kint64(ir)->u64;
|
87
|
+
return 1;
|
88
|
+
}
|
89
|
+
}
|
90
|
+
return 0;
|
91
|
+
}
|
92
|
+
|
93
|
+
/* Check if there's no conflicting instruction between curins and ref.
|
94
|
+
** Also avoid fusing loads if there are multiple references.
|
95
|
+
*/
|
96
|
+
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
|
97
|
+
{
|
98
|
+
IRIns *ir = as->ir;
|
99
|
+
IRRef i = as->curins;
|
100
|
+
if (i > ref + CONFLICT_SEARCH_LIM)
|
101
|
+
return 0; /* Give up, ref is too far away. */
|
102
|
+
while (--i > ref) {
|
103
|
+
if (ir[i].o == conflict)
|
104
|
+
return 0; /* Conflict found. */
|
105
|
+
else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
|
106
|
+
return 0;
|
107
|
+
}
|
108
|
+
return 1; /* Ok, no conflict. */
|
109
|
+
}
|
110
|
+
|
111
|
+
/* Fuse array base into memory operand. */
|
112
|
+
static IRRef asm_fuseabase(ASMState *as, IRRef ref)
|
113
|
+
{
|
114
|
+
IRIns *irb = IR(ref);
|
115
|
+
as->mrm.ofs = 0;
|
116
|
+
if (irb->o == IR_FLOAD) {
|
117
|
+
IRIns *ira = IR(irb->op1);
|
118
|
+
lua_assert(irb->op2 == IRFL_TAB_ARRAY);
|
119
|
+
/* We can avoid the FLOAD of t->array for colocated arrays. */
|
120
|
+
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
|
121
|
+
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
|
122
|
+
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
|
123
|
+
return irb->op1; /* Table obj. */
|
124
|
+
}
|
125
|
+
} else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
|
126
|
+
/* Fuse base offset (vararg load). */
|
127
|
+
as->mrm.ofs = IR(irb->op2)->i;
|
128
|
+
return irb->op1;
|
129
|
+
}
|
130
|
+
return ref; /* Otherwise use the given array base. */
|
131
|
+
}
|
132
|
+
|
133
|
+
/* Fuse array reference into memory operand. */
|
134
|
+
static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
|
135
|
+
{
|
136
|
+
IRIns *irx;
|
137
|
+
lua_assert(ir->o == IR_AREF);
|
138
|
+
as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow);
|
139
|
+
irx = IR(ir->op2);
|
140
|
+
if (irref_isk(ir->op2)) {
|
141
|
+
as->mrm.ofs += 8*irx->i;
|
142
|
+
as->mrm.idx = RID_NONE;
|
143
|
+
} else {
|
144
|
+
rset_clear(allow, as->mrm.base);
|
145
|
+
as->mrm.scale = XM_SCALE8;
|
146
|
+
/* Fuse a constant ADD (e.g. t[i+1]) into the offset.
|
147
|
+
** Doesn't help much without ABCelim, but reduces register pressure.
|
148
|
+
*/
|
149
|
+
if (!LJ_64 && /* Has bad effects with negative index on x64. */
|
150
|
+
mayfuse(as, ir->op2) && ra_noreg(irx->r) &&
|
151
|
+
irx->o == IR_ADD && irref_isk(irx->op2)) {
|
152
|
+
as->mrm.ofs += 8*IR(irx->op2)->i;
|
153
|
+
as->mrm.idx = (uint8_t)ra_alloc1(as, irx->op1, allow);
|
154
|
+
} else {
|
155
|
+
as->mrm.idx = (uint8_t)ra_alloc1(as, ir->op2, allow);
|
156
|
+
}
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
/* Fuse array/hash/upvalue reference into memory operand.
|
161
|
+
** Caveat: this may allocate GPRs for the base/idx registers. Be sure to
|
162
|
+
** pass the final allow mask, excluding any GPRs used for other inputs.
|
163
|
+
** In particular: 2-operand GPR instructions need to call ra_dest() first!
|
164
|
+
*/
|
165
|
+
static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
|
166
|
+
{
|
167
|
+
IRIns *ir = IR(ref);
|
168
|
+
if (ra_noreg(ir->r)) {
|
169
|
+
switch ((IROp)ir->o) {
|
170
|
+
case IR_AREF:
|
171
|
+
if (mayfuse(as, ref)) {
|
172
|
+
asm_fusearef(as, ir, allow);
|
173
|
+
return;
|
174
|
+
}
|
175
|
+
break;
|
176
|
+
case IR_HREFK:
|
177
|
+
if (mayfuse(as, ref)) {
|
178
|
+
as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
|
179
|
+
as->mrm.ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
|
180
|
+
as->mrm.idx = RID_NONE;
|
181
|
+
return;
|
182
|
+
}
|
183
|
+
break;
|
184
|
+
case IR_UREFC:
|
185
|
+
if (irref_isk(ir->op1)) {
|
186
|
+
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
187
|
+
GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
|
188
|
+
as->mrm.ofs = ptr2addr(&uv->tv);
|
189
|
+
as->mrm.base = as->mrm.idx = RID_NONE;
|
190
|
+
return;
|
191
|
+
}
|
192
|
+
break;
|
193
|
+
default:
|
194
|
+
lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO ||
|
195
|
+
ir->o == IR_KKPTR);
|
196
|
+
break;
|
197
|
+
}
|
198
|
+
}
|
199
|
+
as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow);
|
200
|
+
as->mrm.ofs = 0;
|
201
|
+
as->mrm.idx = RID_NONE;
|
202
|
+
}
|
203
|
+
|
204
|
+
/* Fuse FLOAD/FREF reference into memory operand. */
|
205
|
+
static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
|
206
|
+
{
|
207
|
+
lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
|
208
|
+
as->mrm.ofs = field_ofs[ir->op2];
|
209
|
+
as->mrm.idx = RID_NONE;
|
210
|
+
if (irref_isk(ir->op1)) {
|
211
|
+
as->mrm.ofs += IR(ir->op1)->i;
|
212
|
+
as->mrm.base = RID_NONE;
|
213
|
+
} else {
|
214
|
+
as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
/* Fuse string reference into memory operand. */
|
219
|
+
static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
|
220
|
+
{
|
221
|
+
IRIns *irr;
|
222
|
+
lua_assert(ir->o == IR_STRREF);
|
223
|
+
as->mrm.base = as->mrm.idx = RID_NONE;
|
224
|
+
as->mrm.scale = XM_SCALE1;
|
225
|
+
as->mrm.ofs = sizeof(GCstr);
|
226
|
+
if (irref_isk(ir->op1)) {
|
227
|
+
as->mrm.ofs += IR(ir->op1)->i;
|
228
|
+
} else {
|
229
|
+
Reg r = ra_alloc1(as, ir->op1, allow);
|
230
|
+
rset_clear(allow, r);
|
231
|
+
as->mrm.base = (uint8_t)r;
|
232
|
+
}
|
233
|
+
irr = IR(ir->op2);
|
234
|
+
if (irref_isk(ir->op2)) {
|
235
|
+
as->mrm.ofs += irr->i;
|
236
|
+
} else {
|
237
|
+
Reg r;
|
238
|
+
/* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */
|
239
|
+
if (!LJ_64 && /* Has bad effects with negative index on x64. */
|
240
|
+
mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
|
241
|
+
as->mrm.ofs += IR(irr->op2)->i;
|
242
|
+
r = ra_alloc1(as, irr->op1, allow);
|
243
|
+
} else {
|
244
|
+
r = ra_alloc1(as, ir->op2, allow);
|
245
|
+
}
|
246
|
+
if (as->mrm.base == RID_NONE)
|
247
|
+
as->mrm.base = (uint8_t)r;
|
248
|
+
else
|
249
|
+
as->mrm.idx = (uint8_t)r;
|
250
|
+
}
|
251
|
+
}
|
252
|
+
|
253
|
+
static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
|
254
|
+
{
|
255
|
+
IRIns *ir = IR(ref);
|
256
|
+
as->mrm.idx = RID_NONE;
|
257
|
+
if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
|
258
|
+
as->mrm.ofs = ir->i;
|
259
|
+
as->mrm.base = RID_NONE;
|
260
|
+
} else if (ir->o == IR_STRREF) {
|
261
|
+
asm_fusestrref(as, ir, allow);
|
262
|
+
} else {
|
263
|
+
as->mrm.ofs = 0;
|
264
|
+
if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
|
265
|
+
/* Gather (base+idx*sz)+ofs as emitted by cdata ptr/array indexing. */
|
266
|
+
IRIns *irx;
|
267
|
+
IRRef idx;
|
268
|
+
Reg r;
|
269
|
+
if (asm_isk32(as, ir->op2, &as->mrm.ofs)) { /* Recognize x+ofs. */
|
270
|
+
ref = ir->op1;
|
271
|
+
ir = IR(ref);
|
272
|
+
if (!(ir->o == IR_ADD && canfuse(as, ir) && ra_noreg(ir->r)))
|
273
|
+
goto noadd;
|
274
|
+
}
|
275
|
+
as->mrm.scale = XM_SCALE1;
|
276
|
+
idx = ir->op1;
|
277
|
+
ref = ir->op2;
|
278
|
+
irx = IR(idx);
|
279
|
+
if (!(irx->o == IR_BSHL || irx->o == IR_ADD)) { /* Try other operand. */
|
280
|
+
idx = ir->op2;
|
281
|
+
ref = ir->op1;
|
282
|
+
irx = IR(idx);
|
283
|
+
}
|
284
|
+
if (canfuse(as, irx) && ra_noreg(irx->r)) {
|
285
|
+
if (irx->o == IR_BSHL && irref_isk(irx->op2) && IR(irx->op2)->i <= 3) {
|
286
|
+
/* Recognize idx<<b with b = 0-3, corresponding to sz = (1),2,4,8. */
|
287
|
+
idx = irx->op1;
|
288
|
+
as->mrm.scale = (uint8_t)(IR(irx->op2)->i << 6);
|
289
|
+
} else if (irx->o == IR_ADD && irx->op1 == irx->op2) {
|
290
|
+
/* FOLD does idx*2 ==> idx<<1 ==> idx+idx. */
|
291
|
+
idx = irx->op1;
|
292
|
+
as->mrm.scale = XM_SCALE2;
|
293
|
+
}
|
294
|
+
}
|
295
|
+
r = ra_alloc1(as, idx, allow);
|
296
|
+
rset_clear(allow, r);
|
297
|
+
as->mrm.idx = (uint8_t)r;
|
298
|
+
}
|
299
|
+
noadd:
|
300
|
+
as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow);
|
301
|
+
}
|
302
|
+
}
|
303
|
+
|
304
|
+
/* Fuse load into memory operand. */
|
305
|
+
static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
306
|
+
{
|
307
|
+
IRIns *ir = IR(ref);
|
308
|
+
if (ra_hasreg(ir->r)) {
|
309
|
+
if (allow != RSET_EMPTY) { /* Fast path. */
|
310
|
+
ra_noweak(as, ir->r);
|
311
|
+
return ir->r;
|
312
|
+
}
|
313
|
+
fusespill:
|
314
|
+
/* Force a spill if only memory operands are allowed (asm_x87load). */
|
315
|
+
as->mrm.base = RID_ESP;
|
316
|
+
as->mrm.ofs = ra_spill(as, ir);
|
317
|
+
as->mrm.idx = RID_NONE;
|
318
|
+
return RID_MRM;
|
319
|
+
}
|
320
|
+
if (ir->o == IR_KNUM) {
|
321
|
+
RegSet avail = as->freeset & ~as->modset & RSET_FPR;
|
322
|
+
lua_assert(allow != RSET_EMPTY);
|
323
|
+
if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
|
324
|
+
as->mrm.ofs = ptr2addr(ir_knum(ir));
|
325
|
+
as->mrm.base = as->mrm.idx = RID_NONE;
|
326
|
+
return RID_MRM;
|
327
|
+
}
|
328
|
+
} else if (ir->o == IR_KINT64) {
|
329
|
+
RegSet avail = as->freeset & ~as->modset & RSET_GPR;
|
330
|
+
lua_assert(allow != RSET_EMPTY);
|
331
|
+
if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
|
332
|
+
as->mrm.ofs = ptr2addr(ir_kint64(ir));
|
333
|
+
as->mrm.base = as->mrm.idx = RID_NONE;
|
334
|
+
return RID_MRM;
|
335
|
+
}
|
336
|
+
} else if (mayfuse(as, ref)) {
|
337
|
+
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
|
338
|
+
if (ir->o == IR_SLOAD) {
|
339
|
+
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
|
340
|
+
noconflict(as, ref, IR_RETF, 0)) {
|
341
|
+
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
|
342
|
+
as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
|
343
|
+
as->mrm.idx = RID_NONE;
|
344
|
+
return RID_MRM;
|
345
|
+
}
|
346
|
+
} else if (ir->o == IR_FLOAD) {
|
347
|
+
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
|
348
|
+
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
|
349
|
+
noconflict(as, ref, IR_FSTORE, 0)) {
|
350
|
+
asm_fusefref(as, ir, xallow);
|
351
|
+
return RID_MRM;
|
352
|
+
}
|
353
|
+
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
|
354
|
+
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) {
|
355
|
+
asm_fuseahuref(as, ir->op1, xallow);
|
356
|
+
return RID_MRM;
|
357
|
+
}
|
358
|
+
} else if (ir->o == IR_XLOAD) {
|
359
|
+
/* Generic fusion is not ok for 8/16 bit operands (but see asm_comp).
|
360
|
+
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
|
361
|
+
*/
|
362
|
+
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
|
363
|
+
noconflict(as, ref, IR_XSTORE, 0)) {
|
364
|
+
asm_fusexref(as, ir->op1, xallow);
|
365
|
+
return RID_MRM;
|
366
|
+
}
|
367
|
+
} else if (ir->o == IR_VLOAD) {
|
368
|
+
asm_fuseahuref(as, ir->op1, xallow);
|
369
|
+
return RID_MRM;
|
370
|
+
}
|
371
|
+
}
|
372
|
+
if (!(as->freeset & allow) && !irref_isk(ref) &&
|
373
|
+
(allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
|
374
|
+
goto fusespill;
|
375
|
+
return ra_allocref(as, ref, allow);
|
376
|
+
}
|
377
|
+
|
378
|
+
#if LJ_64
|
379
|
+
/* Don't fuse a 32 bit load into a 64 bit operation. */
|
380
|
+
static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
|
381
|
+
{
|
382
|
+
if (is64 && !irt_is64(IR(ref)->t))
|
383
|
+
return ra_alloc1(as, ref, allow);
|
384
|
+
return asm_fuseload(as, ref, allow);
|
385
|
+
}
|
386
|
+
#else
|
387
|
+
#define asm_fuseloadm(as, ref, allow, is64) asm_fuseload(as, (ref), (allow))
|
388
|
+
#endif
|
389
|
+
|
390
|
+
/* -- Calls --------------------------------------------------------------- */
|
391
|
+
|
392
|
+
/* Count the required number of stack slots for a call. */
|
393
|
+
static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
|
394
|
+
{
|
395
|
+
uint32_t i, nargs = CCI_XNARGS(ci);
|
396
|
+
int nslots = 0;
|
397
|
+
#if LJ_64
|
398
|
+
if (LJ_ABI_WIN) {
|
399
|
+
nslots = (int)(nargs*2); /* Only matters for more than four args. */
|
400
|
+
} else {
|
401
|
+
int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
|
402
|
+
for (i = 0; i < nargs; i++)
|
403
|
+
if (args[i] && irt_isfp(IR(args[i])->t)) {
|
404
|
+
if (nfpr > 0) nfpr--; else nslots += 2;
|
405
|
+
} else {
|
406
|
+
if (ngpr > 0) ngpr--; else nslots += 2;
|
407
|
+
}
|
408
|
+
}
|
409
|
+
#else
|
410
|
+
int ngpr = 0;
|
411
|
+
if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
|
412
|
+
ngpr = 2;
|
413
|
+
else if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
|
414
|
+
ngpr = 1;
|
415
|
+
for (i = 0; i < nargs; i++)
|
416
|
+
if (args[i] && irt_isfp(IR(args[i])->t)) {
|
417
|
+
nslots += irt_isnum(IR(args[i])->t) ? 2 : 1;
|
418
|
+
} else {
|
419
|
+
if (ngpr > 0) ngpr--; else nslots++;
|
420
|
+
}
|
421
|
+
#endif
|
422
|
+
return nslots;
|
423
|
+
}
|
424
|
+
|
425
|
+
/* Generate a call to a C function. */
|
426
|
+
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
427
|
+
{
|
428
|
+
uint32_t n, nargs = CCI_XNARGS(ci);
|
429
|
+
int32_t ofs = STACKARG_OFS;
|
430
|
+
#if LJ_64
|
431
|
+
uint32_t gprs = REGARG_GPRS;
|
432
|
+
Reg fpr = REGARG_FIRSTFPR;
|
433
|
+
#if !LJ_ABI_WIN
|
434
|
+
MCode *patchnfpr = NULL;
|
435
|
+
#endif
|
436
|
+
#else
|
437
|
+
uint32_t gprs = 0;
|
438
|
+
if ((ci->flags & CCI_CC_MASK) != CCI_CC_CDECL) {
|
439
|
+
if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL)
|
440
|
+
gprs = (REGARG_GPRS & 31);
|
441
|
+
else if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL)
|
442
|
+
gprs = REGARG_GPRS;
|
443
|
+
}
|
444
|
+
#endif
|
445
|
+
if ((void *)ci->func)
|
446
|
+
emit_call(as, ci->func);
|
447
|
+
#if LJ_64
|
448
|
+
if ((ci->flags & CCI_VARARG)) { /* Special handling for vararg calls. */
|
449
|
+
#if LJ_ABI_WIN
|
450
|
+
for (n = 0; n < 4 && n < nargs; n++) {
|
451
|
+
IRIns *ir = IR(args[n]);
|
452
|
+
if (irt_isfp(ir->t)) /* Duplicate FPRs in GPRs. */
|
453
|
+
emit_rr(as, XO_MOVDto, (irt_isnum(ir->t) ? REX_64 : 0) | (fpr+n),
|
454
|
+
((gprs >> (n*5)) & 31)); /* Either MOVD or MOVQ. */
|
455
|
+
}
|
456
|
+
#else
|
457
|
+
patchnfpr = --as->mcp; /* Indicate number of used FPRs in register al. */
|
458
|
+
*--as->mcp = XI_MOVrib | RID_EAX;
|
459
|
+
#endif
|
460
|
+
}
|
461
|
+
#endif
|
462
|
+
for (n = 0; n < nargs; n++) { /* Setup args. */
|
463
|
+
IRRef ref = args[n];
|
464
|
+
IRIns *ir = IR(ref);
|
465
|
+
Reg r;
|
466
|
+
#if LJ_64 && LJ_ABI_WIN
|
467
|
+
/* Windows/x64 argument registers are strictly positional. */
|
468
|
+
r = irt_isfp(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31);
|
469
|
+
fpr++; gprs >>= 5;
|
470
|
+
#elif LJ_64
|
471
|
+
/* POSIX/x64 argument registers are used in order of appearance. */
|
472
|
+
if (irt_isfp(ir->t)) {
|
473
|
+
r = fpr <= REGARG_LASTFPR ? fpr++ : 0;
|
474
|
+
} else {
|
475
|
+
r = gprs & 31; gprs >>= 5;
|
476
|
+
}
|
477
|
+
#else
|
478
|
+
if (ref && irt_isfp(ir->t)) {
|
479
|
+
r = 0;
|
480
|
+
} else {
|
481
|
+
r = gprs & 31; gprs >>= 5;
|
482
|
+
if (!ref) continue;
|
483
|
+
}
|
484
|
+
#endif
|
485
|
+
if (r) { /* Argument is in a register. */
|
486
|
+
if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
|
487
|
+
#if LJ_64
|
488
|
+
if (ir->o == IR_KINT64)
|
489
|
+
emit_loadu64(as, r, ir_kint64(ir)->u64);
|
490
|
+
else
|
491
|
+
#endif
|
492
|
+
emit_loadi(as, r, ir->i);
|
493
|
+
} else {
|
494
|
+
lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
|
495
|
+
if (ra_hasreg(ir->r)) {
|
496
|
+
ra_noweak(as, ir->r);
|
497
|
+
emit_movrr(as, ir, r, ir->r);
|
498
|
+
} else {
|
499
|
+
ra_allocref(as, ref, RID2RSET(r));
|
500
|
+
}
|
501
|
+
}
|
502
|
+
} else if (irt_isfp(ir->t)) { /* FP argument is on stack. */
|
503
|
+
lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */
|
504
|
+
if (LJ_32 && (ofs & 4) && irref_isk(ref)) {
|
505
|
+
/* Split stores for unaligned FP consts. */
|
506
|
+
emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
|
507
|
+
emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
|
508
|
+
} else {
|
509
|
+
r = ra_alloc1(as, ref, RSET_FPR);
|
510
|
+
emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto,
|
511
|
+
r, RID_ESP, ofs);
|
512
|
+
}
|
513
|
+
ofs += (LJ_32 && irt_isfloat(ir->t)) ? 4 : 8;
|
514
|
+
} else { /* Non-FP argument is on stack. */
|
515
|
+
if (LJ_32 && ref < ASMREF_TMP1) {
|
516
|
+
emit_movmroi(as, RID_ESP, ofs, ir->i);
|
517
|
+
} else {
|
518
|
+
r = ra_alloc1(as, ref, RSET_GPR);
|
519
|
+
emit_movtomro(as, REX_64 + r, RID_ESP, ofs);
|
520
|
+
}
|
521
|
+
ofs += sizeof(intptr_t);
|
522
|
+
}
|
523
|
+
checkmclim(as);
|
524
|
+
}
|
525
|
+
#if LJ_64 && !LJ_ABI_WIN
|
526
|
+
if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR;
|
527
|
+
#endif
|
528
|
+
}
|
529
|
+
|
530
|
+
/* Setup result reg/sp for call. Evict scratch regs. */
|
531
|
+
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
532
|
+
{
|
533
|
+
RegSet drop = RSET_SCRATCH;
|
534
|
+
int hiop = (LJ_32 && (ir+1)->o == IR_HIOP);
|
535
|
+
if ((ci->flags & CCI_NOFPRCLOBBER))
|
536
|
+
drop &= ~RSET_FPR;
|
537
|
+
if (ra_hasreg(ir->r))
|
538
|
+
rset_clear(drop, ir->r); /* Dest reg handled below. */
|
539
|
+
if (hiop && ra_hasreg((ir+1)->r))
|
540
|
+
rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
|
541
|
+
ra_evictset(as, drop); /* Evictions must be performed first. */
|
542
|
+
if (ra_used(ir)) {
|
543
|
+
if (irt_isfp(ir->t)) {
|
544
|
+
int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
|
545
|
+
#if LJ_64
|
546
|
+
if ((ci->flags & CCI_CASTU64)) {
|
547
|
+
Reg dest = ir->r;
|
548
|
+
if (ra_hasreg(dest)) {
|
549
|
+
ra_free(as, dest);
|
550
|
+
ra_modified(as, dest);
|
551
|
+
emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
|
552
|
+
}
|
553
|
+
if (ofs) emit_movtomro(as, RID_RET|REX_64, RID_ESP, ofs);
|
554
|
+
} else {
|
555
|
+
ra_destreg(as, ir, RID_FPRET);
|
556
|
+
}
|
557
|
+
#else
|
558
|
+
/* Number result is in x87 st0 for x86 calling convention. */
|
559
|
+
Reg dest = ir->r;
|
560
|
+
if (ra_hasreg(dest)) {
|
561
|
+
ra_free(as, dest);
|
562
|
+
ra_modified(as, dest);
|
563
|
+
emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
|
564
|
+
dest, RID_ESP, ofs);
|
565
|
+
}
|
566
|
+
if ((ci->flags & CCI_CASTU64)) {
|
567
|
+
emit_movtomro(as, RID_RETLO, RID_ESP, ofs);
|
568
|
+
emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
|
569
|
+
} else {
|
570
|
+
emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
|
571
|
+
irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
|
572
|
+
}
|
573
|
+
#endif
|
574
|
+
#if LJ_32
|
575
|
+
} else if (hiop) {
|
576
|
+
ra_destpair(as, ir);
|
577
|
+
#endif
|
578
|
+
} else {
|
579
|
+
lua_assert(!irt_ispri(ir->t));
|
580
|
+
ra_destreg(as, ir, RID_RET);
|
581
|
+
}
|
582
|
+
} else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) {
|
583
|
+
emit_x87op(as, XI_FPOP); /* Pop unused result from x87 st0. */
|
584
|
+
}
|
585
|
+
}
|
586
|
+
|
587
|
+
/* Return a constant function pointer or NULL for indirect calls. */
|
588
|
+
static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
|
589
|
+
{
|
590
|
+
#if LJ_32
|
591
|
+
UNUSED(as);
|
592
|
+
if (irref_isk(func))
|
593
|
+
return (void *)irf->i;
|
594
|
+
#else
|
595
|
+
if (irref_isk(func)) {
|
596
|
+
MCode *p;
|
597
|
+
if (irf->o == IR_KINT64)
|
598
|
+
p = (MCode *)(void *)ir_k64(irf)->u64;
|
599
|
+
else
|
600
|
+
p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
|
601
|
+
if (p - as->mcp == (int32_t)(p - as->mcp))
|
602
|
+
return p; /* Call target is still in +-2GB range. */
|
603
|
+
/* Avoid the indirect case of emit_call(). Try to hoist func addr. */
|
604
|
+
}
|
605
|
+
#endif
|
606
|
+
return NULL;
|
607
|
+
}
|
608
|
+
|
609
|
+
static void asm_callx(ASMState *as, IRIns *ir)
|
610
|
+
{
|
611
|
+
IRRef args[CCI_NARGS_MAX*2];
|
612
|
+
CCallInfo ci;
|
613
|
+
IRRef func;
|
614
|
+
IRIns *irf;
|
615
|
+
int32_t spadj = 0;
|
616
|
+
ci.flags = asm_callx_flags(as, ir);
|
617
|
+
asm_collectargs(as, ir, &ci, args);
|
618
|
+
asm_setupresult(as, ir, &ci);
|
619
|
+
#if LJ_32
|
620
|
+
/* Have to readjust stack after non-cdecl calls due to callee cleanup. */
|
621
|
+
if ((ci.flags & CCI_CC_MASK) != CCI_CC_CDECL)
|
622
|
+
spadj = 4 * asm_count_call_slots(as, &ci, args);
|
623
|
+
#endif
|
624
|
+
func = ir->op2; irf = IR(func);
|
625
|
+
if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
|
626
|
+
ci.func = (ASMFunction)asm_callx_func(as, irf, func);
|
627
|
+
if (!(void *)ci.func) {
|
628
|
+
/* Use a (hoistable) non-scratch register for indirect calls. */
|
629
|
+
RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
|
630
|
+
Reg r = ra_alloc1(as, func, allow);
|
631
|
+
if (LJ_32) emit_spsub(as, spadj); /* Above code may cause restores! */
|
632
|
+
emit_rr(as, XO_GROUP5, XOg_CALL, r);
|
633
|
+
} else if (LJ_32) {
|
634
|
+
emit_spsub(as, spadj);
|
635
|
+
}
|
636
|
+
asm_gencall(as, &ci, args);
|
637
|
+
}
|
638
|
+
|
639
|
+
/* -- Returns ------------------------------------------------------------- */
|
640
|
+
|
641
|
+
/* Return to lower frame. Guard that it goes to the right spot. */
|
642
|
+
static void asm_retf(ASMState *as, IRIns *ir)
|
643
|
+
{
|
644
|
+
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
645
|
+
void *pc = ir_kptr(IR(ir->op2));
|
646
|
+
int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
|
647
|
+
as->topslot -= (BCReg)delta;
|
648
|
+
if ((int32_t)as->topslot < 0) as->topslot = 0;
|
649
|
+
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
|
650
|
+
emit_setgl(as, base, jit_base);
|
651
|
+
emit_addptr(as, base, -8*delta);
|
652
|
+
asm_guardcc(as, CC_NE);
|
653
|
+
emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
|
654
|
+
}
|
655
|
+
|
656
|
+
/* -- Type conversions ---------------------------------------------------- */
|
657
|
+
|
658
|
+
static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
|
659
|
+
{
|
660
|
+
Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
|
661
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
662
|
+
asm_guardcc(as, CC_P);
|
663
|
+
asm_guardcc(as, CC_NE);
|
664
|
+
emit_rr(as, XO_UCOMISD, left, tmp);
|
665
|
+
emit_rr(as, XO_CVTSI2SD, tmp, dest);
|
666
|
+
emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
|
667
|
+
emit_rr(as, XO_CVTTSD2SI, dest, left);
|
668
|
+
/* Can't fuse since left is needed twice. */
|
669
|
+
}
|
670
|
+
|
671
|
+
static void asm_tobit(ASMState *as, IRIns *ir)
|
672
|
+
{
|
673
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
674
|
+
Reg tmp = ra_noreg(IR(ir->op1)->r) ?
|
675
|
+
ra_alloc1(as, ir->op1, RSET_FPR) :
|
676
|
+
ra_scratch(as, RSET_FPR);
|
677
|
+
Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
|
678
|
+
emit_rr(as, XO_MOVDto, tmp, dest);
|
679
|
+
emit_mrm(as, XO_ADDSD, tmp, right);
|
680
|
+
ra_left(as, tmp, ir->op1);
|
681
|
+
}
|
682
|
+
|
683
|
+
static void asm_conv(ASMState *as, IRIns *ir)
|
684
|
+
{
|
685
|
+
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
|
686
|
+
int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64));
|
687
|
+
int stfp = (st == IRT_NUM || st == IRT_FLOAT);
|
688
|
+
IRRef lref = ir->op1;
|
689
|
+
lua_assert(irt_type(ir->t) != st);
|
690
|
+
lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */
|
691
|
+
if (irt_isfp(ir->t)) {
|
692
|
+
Reg dest = ra_dest(as, ir, RSET_FPR);
|
693
|
+
if (stfp) { /* FP to FP conversion. */
|
694
|
+
Reg left = asm_fuseload(as, lref, RSET_FPR);
|
695
|
+
emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left);
|
696
|
+
if (left == dest) return; /* Avoid the XO_XORPS. */
|
697
|
+
} else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */
|
698
|
+
/* number = (2^52+2^51 .. u32) - (2^52+2^51) */
|
699
|
+
cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000));
|
700
|
+
Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
|
701
|
+
if (irt_isfloat(ir->t))
|
702
|
+
emit_rr(as, XO_CVTSD2SS, dest, dest);
|
703
|
+
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
|
704
|
+
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
|
705
|
+
emit_loadn(as, bias, k);
|
706
|
+
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
|
707
|
+
return;
|
708
|
+
} else { /* Integer to FP conversion. */
|
709
|
+
Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ?
|
710
|
+
ra_alloc1(as, lref, RSET_GPR) :
|
711
|
+
asm_fuseloadm(as, lref, RSET_GPR, st64);
|
712
|
+
if (LJ_64 && st == IRT_U64) {
|
713
|
+
MCLabel l_end = emit_label(as);
|
714
|
+
const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000));
|
715
|
+
emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */
|
716
|
+
emit_sjcc(as, CC_NS, l_end);
|
717
|
+
emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */
|
718
|
+
}
|
719
|
+
emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
|
720
|
+
dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
|
721
|
+
}
|
722
|
+
emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
|
723
|
+
} else if (stfp) { /* FP to integer conversion. */
|
724
|
+
if (irt_isguard(ir->t)) {
|
725
|
+
/* Checked conversions are only supported from number to int. */
|
726
|
+
lua_assert(irt_isint(ir->t) && st == IRT_NUM);
|
727
|
+
asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
|
728
|
+
} else {
|
729
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
730
|
+
x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
|
731
|
+
if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
|
732
|
+
/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
|
733
|
+
/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
|
734
|
+
Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) :
|
735
|
+
ra_scratch(as, RSET_FPR);
|
736
|
+
MCLabel l_end = emit_label(as);
|
737
|
+
if (LJ_32)
|
738
|
+
emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
|
739
|
+
emit_rr(as, op, dest|REX_64, tmp);
|
740
|
+
if (st == IRT_NUM)
|
741
|
+
emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J,
|
742
|
+
LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
|
743
|
+
else
|
744
|
+
emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J,
|
745
|
+
LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
|
746
|
+
emit_sjcc(as, CC_NS, l_end);
|
747
|
+
emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
|
748
|
+
emit_rr(as, op, dest|REX_64, tmp);
|
749
|
+
ra_left(as, tmp, lref);
|
750
|
+
} else {
|
751
|
+
Reg left = asm_fuseload(as, lref, RSET_FPR);
|
752
|
+
if (LJ_64 && irt_isu32(ir->t))
|
753
|
+
emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
|
754
|
+
emit_mrm(as, op,
|
755
|
+
dest|((LJ_64 &&
|
756
|
+
(irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
|
757
|
+
left);
|
758
|
+
}
|
759
|
+
}
|
760
|
+
} else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
|
761
|
+
Reg left, dest = ra_dest(as, ir, RSET_GPR);
|
762
|
+
RegSet allow = RSET_GPR;
|
763
|
+
x86Op op;
|
764
|
+
lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
|
765
|
+
if (st == IRT_I8) {
|
766
|
+
op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX;
|
767
|
+
} else if (st == IRT_U8) {
|
768
|
+
op = XO_MOVZXb; allow = RSET_GPR8; dest |= FORCE_REX;
|
769
|
+
} else if (st == IRT_I16) {
|
770
|
+
op = XO_MOVSXw;
|
771
|
+
} else {
|
772
|
+
op = XO_MOVZXw;
|
773
|
+
}
|
774
|
+
left = asm_fuseload(as, lref, allow);
|
775
|
+
/* Add extra MOV if source is already in wrong register. */
|
776
|
+
if (!LJ_64 && left != RID_MRM && !rset_test(allow, left)) {
|
777
|
+
Reg tmp = ra_scratch(as, allow);
|
778
|
+
emit_rr(as, op, dest, tmp);
|
779
|
+
emit_rr(as, XO_MOV, tmp, left);
|
780
|
+
} else {
|
781
|
+
emit_mrm(as, op, dest, left);
|
782
|
+
}
|
783
|
+
} else { /* 32/64 bit integer conversions. */
|
784
|
+
if (LJ_32) { /* Only need to handle 32/32 bit no-op (cast) on x86. */
|
785
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
786
|
+
ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */
|
787
|
+
} else if (irt_is64(ir->t)) {
|
788
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
789
|
+
if (st64 || !(ir->op2 & IRCONV_SEXT)) {
|
790
|
+
/* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
|
791
|
+
ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */
|
792
|
+
} else { /* 32 to 64 bit sign extension. */
|
793
|
+
Reg left = asm_fuseload(as, lref, RSET_GPR);
|
794
|
+
emit_mrm(as, XO_MOVSXd, dest|REX_64, left);
|
795
|
+
}
|
796
|
+
} else {
|
797
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
798
|
+
if (st64) {
|
799
|
+
Reg left = asm_fuseload(as, lref, RSET_GPR);
|
800
|
+
/* This is either a 32 bit reg/reg mov which zeroes the hiword
|
801
|
+
** or a load of the loword from a 64 bit address.
|
802
|
+
*/
|
803
|
+
emit_mrm(as, XO_MOV, dest, left);
|
804
|
+
} else { /* 32/32 bit no-op (cast). */
|
805
|
+
ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */
|
806
|
+
}
|
807
|
+
}
|
808
|
+
}
|
809
|
+
}
|
810
|
+
|
811
|
+
#if LJ_32 && LJ_HASFFI
|
812
|
+
/* No SSE conversions to/from 64 bit on x86, so resort to ugly x87 code. */
|
813
|
+
|
814
|
+
/* 64 bit integer to FP conversion in 32 bit mode. */
|
815
|
+
static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
|
816
|
+
{
|
817
|
+
Reg hi = ra_alloc1(as, ir->op1, RSET_GPR);
|
818
|
+
Reg lo = ra_alloc1(as, (ir-1)->op1, rset_exclude(RSET_GPR, hi));
|
819
|
+
int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
|
820
|
+
Reg dest = ir->r;
|
821
|
+
if (ra_hasreg(dest)) {
|
822
|
+
ra_free(as, dest);
|
823
|
+
ra_modified(as, dest);
|
824
|
+
emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
|
825
|
+
}
|
826
|
+
emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
|
827
|
+
irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
|
828
|
+
if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
|
829
|
+
/* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
|
830
|
+
MCLabel l_end = emit_label(as);
|
831
|
+
emit_rma(as, XO_FADDq, XOg_FADDq,
|
832
|
+
lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
|
833
|
+
emit_sjcc(as, CC_NS, l_end);
|
834
|
+
emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
|
835
|
+
} else {
|
836
|
+
lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64);
|
837
|
+
}
|
838
|
+
emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0);
|
839
|
+
/* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */
|
840
|
+
emit_rmro(as, XO_MOVto, hi, RID_ESP, 4);
|
841
|
+
emit_rmro(as, XO_MOVto, lo, RID_ESP, 0);
|
842
|
+
}
|
843
|
+
|
844
|
+
/* FP to 64 bit integer conversion in 32 bit mode. */
|
845
|
+
static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
|
846
|
+
{
|
847
|
+
IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
|
848
|
+
IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
|
849
|
+
Reg lo, hi;
|
850
|
+
lua_assert(st == IRT_NUM || st == IRT_FLOAT);
|
851
|
+
lua_assert(dt == IRT_I64 || dt == IRT_U64);
|
852
|
+
hi = ra_dest(as, ir, RSET_GPR);
|
853
|
+
lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
|
854
|
+
if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
|
855
|
+
/* NYI: Avoid wide-to-narrow store-to-load forwarding stall. */
|
856
|
+
if (!(as->flags & JIT_F_SSE3)) { /* Set FPU rounding mode to default. */
|
857
|
+
emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 4);
|
858
|
+
emit_rmro(as, XO_MOVto, lo, RID_ESP, 4);
|
859
|
+
emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff);
|
860
|
+
}
|
861
|
+
if (dt == IRT_U64) {
|
862
|
+
/* For inputs in [2^63,2^64-1] add -2^64 and convert again. */
|
863
|
+
MCLabel l_pop, l_end = emit_label(as);
|
864
|
+
emit_x87op(as, XI_FPOP);
|
865
|
+
l_pop = emit_label(as);
|
866
|
+
emit_sjmp(as, l_end);
|
867
|
+
emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
|
868
|
+
if ((as->flags & JIT_F_SSE3))
|
869
|
+
emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
|
870
|
+
else
|
871
|
+
emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
|
872
|
+
emit_rma(as, XO_FADDq, XOg_FADDq,
|
873
|
+
lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
|
874
|
+
emit_sjcc(as, CC_NS, l_pop);
|
875
|
+
emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
|
876
|
+
}
|
877
|
+
emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
|
878
|
+
if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */
|
879
|
+
emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
|
880
|
+
} else { /* Otherwise set FPU rounding mode to truncate before the store. */
|
881
|
+
emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
|
882
|
+
emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 0);
|
883
|
+
emit_rmro(as, XO_MOVtow, lo, RID_ESP, 0);
|
884
|
+
emit_rmro(as, XO_ARITHw(XOg_OR), lo, RID_ESP, 0);
|
885
|
+
emit_loadi(as, lo, 0xc00);
|
886
|
+
emit_rmro(as, XO_FNSTCW, XOg_FNSTCW, RID_ESP, 0);
|
887
|
+
}
|
888
|
+
if (dt == IRT_U64)
|
889
|
+
emit_x87op(as, XI_FDUP);
|
890
|
+
emit_mrm(as, st == IRT_NUM ? XO_FLDq : XO_FLDd,
|
891
|
+
st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
|
892
|
+
asm_fuseload(as, ir->op1, RSET_EMPTY));
|
893
|
+
}
|
894
|
+
|
895
|
+
static void asm_conv64(ASMState *as, IRIns *ir)
|
896
|
+
{
|
897
|
+
if (irt_isfp(ir->t))
|
898
|
+
asm_conv_fp_int64(as, ir);
|
899
|
+
else
|
900
|
+
asm_conv_int64_fp(as, ir);
|
901
|
+
}
|
902
|
+
#endif
|
903
|
+
|
904
|
+
static void asm_strto(ASMState *as, IRIns *ir)
|
905
|
+
{
|
906
|
+
/* Force a spill slot for the destination register (if any). */
|
907
|
+
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
|
908
|
+
IRRef args[2];
|
909
|
+
RegSet drop = RSET_SCRATCH;
|
910
|
+
if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
|
911
|
+
rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
|
912
|
+
ra_evictset(as, drop);
|
913
|
+
asm_guardcc(as, CC_E);
|
914
|
+
emit_rr(as, XO_TEST, RID_RET, RID_RET); /* Test return status. */
|
915
|
+
args[0] = ir->op1; /* GCstr *str */
|
916
|
+
args[1] = ASMREF_TMP1; /* TValue *n */
|
917
|
+
asm_gencall(as, ci, args);
|
918
|
+
/* Store the result to the spill slot or temp slots. */
|
919
|
+
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
|
920
|
+
RID_ESP, sps_scale(ir->s));
|
921
|
+
}
|
922
|
+
|
923
|
+
/* -- Memory references --------------------------------------------------- */
|
924
|
+
|
925
|
+
/* Get pointer to TValue. */
|
926
|
+
static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
|
927
|
+
{
|
928
|
+
IRIns *ir = IR(ref);
|
929
|
+
if (irt_isnum(ir->t)) {
|
930
|
+
/* For numbers use the constant itself or a spill slot as a TValue. */
|
931
|
+
if (irref_isk(ref))
|
932
|
+
emit_loada(as, dest, ir_knum(ir));
|
933
|
+
else
|
934
|
+
emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
|
935
|
+
} else {
|
936
|
+
/* Otherwise use g->tmptv to hold the TValue. */
|
937
|
+
if (!irref_isk(ref)) {
|
938
|
+
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
|
939
|
+
emit_movtomro(as, REX_64IR(ir, src), dest, 0);
|
940
|
+
} else if (!irt_ispri(ir->t)) {
|
941
|
+
emit_movmroi(as, dest, 0, ir->i);
|
942
|
+
}
|
943
|
+
if (!(LJ_64 && irt_islightud(ir->t)))
|
944
|
+
emit_movmroi(as, dest, 4, irt_toitype(ir->t));
|
945
|
+
emit_loada(as, dest, &J2G(as->J)->tmptv);
|
946
|
+
}
|
947
|
+
}
|
948
|
+
|
949
|
+
static void asm_aref(ASMState *as, IRIns *ir)
|
950
|
+
{
|
951
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
952
|
+
asm_fusearef(as, ir, RSET_GPR);
|
953
|
+
if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
|
954
|
+
emit_mrm(as, XO_LEA, dest, RID_MRM);
|
955
|
+
else if (as->mrm.base != dest)
|
956
|
+
emit_rr(as, XO_MOV, dest, as->mrm.base);
|
957
|
+
}
|
958
|
+
|
959
|
+
/* Inlined hash lookup. Specialized for key type and for const keys.
|
960
|
+
** The equivalent C code is:
|
961
|
+
** Node *n = hashkey(t, key);
|
962
|
+
** do {
|
963
|
+
** if (lj_obj_equal(&n->key, key)) return &n->val;
|
964
|
+
** } while ((n = nextnode(n)));
|
965
|
+
** return niltv(L);
|
966
|
+
*/
|
967
|
+
static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
968
|
+
{
|
969
|
+
RegSet allow = RSET_GPR;
|
970
|
+
int destused = ra_used(ir);
|
971
|
+
Reg dest = ra_dest(as, ir, allow);
|
972
|
+
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
|
973
|
+
Reg key = RID_NONE, tmp = RID_NONE;
|
974
|
+
IRIns *irkey = IR(ir->op2);
|
975
|
+
int isk = irref_isk(ir->op2);
|
976
|
+
IRType1 kt = irkey->t;
|
977
|
+
uint32_t khash;
|
978
|
+
MCLabel l_end, l_loop, l_next;
|
979
|
+
|
980
|
+
if (!isk) {
|
981
|
+
rset_clear(allow, tab);
|
982
|
+
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
|
983
|
+
if (!irt_isstr(kt))
|
984
|
+
tmp = ra_scratch(as, rset_exclude(allow, key));
|
985
|
+
}
|
986
|
+
|
987
|
+
/* Key not found in chain: jump to exit (if merged) or load niltv. */
|
988
|
+
l_end = emit_label(as);
|
989
|
+
if (merge == IR_NE)
|
990
|
+
asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
|
991
|
+
else if (destused)
|
992
|
+
emit_loada(as, dest, niltvg(J2G(as->J)));
|
993
|
+
|
994
|
+
/* Follow hash chain until the end. */
|
995
|
+
l_loop = emit_sjcc_label(as, CC_NZ);
|
996
|
+
emit_rr(as, XO_TEST, dest, dest);
|
997
|
+
emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
|
998
|
+
l_next = emit_label(as);
|
999
|
+
|
1000
|
+
/* Type and value comparison. */
|
1001
|
+
if (merge == IR_EQ)
|
1002
|
+
asm_guardcc(as, CC_E);
|
1003
|
+
else
|
1004
|
+
emit_sjcc(as, CC_E, l_end);
|
1005
|
+
if (irt_isnum(kt)) {
|
1006
|
+
if (isk) {
|
1007
|
+
/* Assumes -0.0 is already canonicalized to +0.0. */
|
1008
|
+
emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
|
1009
|
+
(int32_t)ir_knum(irkey)->u32.lo);
|
1010
|
+
emit_sjcc(as, CC_NE, l_next);
|
1011
|
+
emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
|
1012
|
+
(int32_t)ir_knum(irkey)->u32.hi);
|
1013
|
+
} else {
|
1014
|
+
emit_sjcc(as, CC_P, l_next);
|
1015
|
+
emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
|
1016
|
+
emit_sjcc(as, CC_AE, l_next);
|
1017
|
+
/* The type check avoids NaN penalties and complaints from Valgrind. */
|
1018
|
+
#if LJ_64
|
1019
|
+
emit_u32(as, LJ_TISNUM);
|
1020
|
+
emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
|
1021
|
+
#else
|
1022
|
+
emit_i8(as, LJ_TISNUM);
|
1023
|
+
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
|
1024
|
+
#endif
|
1025
|
+
}
|
1026
|
+
#if LJ_64
|
1027
|
+
} else if (irt_islightud(kt)) {
|
1028
|
+
emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
|
1029
|
+
#endif
|
1030
|
+
} else {
|
1031
|
+
if (!irt_ispri(kt)) {
|
1032
|
+
lua_assert(irt_isaddr(kt));
|
1033
|
+
if (isk)
|
1034
|
+
emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr),
|
1035
|
+
ptr2addr(ir_kgc(irkey)));
|
1036
|
+
else
|
1037
|
+
emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr));
|
1038
|
+
emit_sjcc(as, CC_NE, l_next);
|
1039
|
+
}
|
1040
|
+
lua_assert(!irt_isnil(kt));
|
1041
|
+
emit_i8(as, irt_toitype(kt));
|
1042
|
+
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
|
1043
|
+
}
|
1044
|
+
emit_sfixup(as, l_loop);
|
1045
|
+
checkmclim(as);
|
1046
|
+
|
1047
|
+
/* Load main position relative to tab->node into dest. */
|
1048
|
+
khash = isk ? ir_khash(irkey) : 1;
|
1049
|
+
if (khash == 0) {
|
1050
|
+
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
|
1051
|
+
} else {
|
1052
|
+
emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
|
1053
|
+
if ((as->flags & JIT_F_PREFER_IMUL)) {
|
1054
|
+
emit_i8(as, sizeof(Node));
|
1055
|
+
emit_rr(as, XO_IMULi8, dest, dest);
|
1056
|
+
} else {
|
1057
|
+
emit_shifti(as, XOg_SHL, dest, 3);
|
1058
|
+
emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
|
1059
|
+
}
|
1060
|
+
if (isk) {
|
1061
|
+
emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
|
1062
|
+
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
|
1063
|
+
} else if (irt_isstr(kt)) {
|
1064
|
+
emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash));
|
1065
|
+
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
|
1066
|
+
} else { /* Must match with hashrot() in lj_tab.c. */
|
1067
|
+
emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));
|
1068
|
+
emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
|
1069
|
+
emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
|
1070
|
+
emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
|
1071
|
+
emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
|
1072
|
+
emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
|
1073
|
+
emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
|
1074
|
+
emit_rr(as, XO_ARITH(XOg_XOR), tmp, dest);
|
1075
|
+
if (irt_isnum(kt)) {
|
1076
|
+
emit_rr(as, XO_ARITH(XOg_ADD), dest, dest);
|
1077
|
+
#if LJ_64
|
1078
|
+
emit_shifti(as, XOg_SHR|REX_64, dest, 32);
|
1079
|
+
emit_rr(as, XO_MOV, tmp, dest);
|
1080
|
+
emit_rr(as, XO_MOVDto, key|REX_64, dest);
|
1081
|
+
#else
|
1082
|
+
emit_rmro(as, XO_MOV, dest, RID_ESP, ra_spill(as, irkey)+4);
|
1083
|
+
emit_rr(as, XO_MOVDto, key, tmp);
|
1084
|
+
#endif
|
1085
|
+
} else {
|
1086
|
+
emit_rr(as, XO_MOV, tmp, key);
|
1087
|
+
emit_rmro(as, XO_LEA, dest, key, HASH_BIAS);
|
1088
|
+
}
|
1089
|
+
}
|
1090
|
+
}
|
1091
|
+
}
|
1092
|
+
|
1093
|
+
static void asm_hrefk(ASMState *as, IRIns *ir)
|
1094
|
+
{
|
1095
|
+
IRIns *kslot = IR(ir->op2);
|
1096
|
+
IRIns *irkey = IR(kslot->op1);
|
1097
|
+
int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
|
1098
|
+
Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
|
1099
|
+
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
|
1100
|
+
#if !LJ_64
|
1101
|
+
MCLabel l_exit;
|
1102
|
+
#endif
|
1103
|
+
lua_assert(ofs % sizeof(Node) == 0);
|
1104
|
+
if (ra_hasreg(dest)) {
|
1105
|
+
if (ofs != 0) {
|
1106
|
+
if (dest == node && !(as->flags & JIT_F_LEA_AGU))
|
1107
|
+
emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
|
1108
|
+
else
|
1109
|
+
emit_rmro(as, XO_LEA, dest, node, ofs);
|
1110
|
+
} else if (dest != node) {
|
1111
|
+
emit_rr(as, XO_MOV, dest, node);
|
1112
|
+
}
|
1113
|
+
}
|
1114
|
+
asm_guardcc(as, CC_NE);
|
1115
|
+
#if LJ_64
|
1116
|
+
if (!irt_ispri(irkey->t)) {
|
1117
|
+
Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node));
|
1118
|
+
emit_rmro(as, XO_CMP, key|REX_64, node,
|
1119
|
+
ofs + (int32_t)offsetof(Node, key.u64));
|
1120
|
+
lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
|
1121
|
+
/* Assumes -0.0 is already canonicalized to +0.0. */
|
1122
|
+
emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
|
1123
|
+
((uint64_t)irt_toitype(irkey->t) << 32) |
|
1124
|
+
(uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
|
1125
|
+
} else {
|
1126
|
+
lua_assert(!irt_isnil(irkey->t));
|
1127
|
+
emit_i8(as, irt_toitype(irkey->t));
|
1128
|
+
emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
|
1129
|
+
ofs + (int32_t)offsetof(Node, key.it));
|
1130
|
+
}
|
1131
|
+
#else
|
1132
|
+
l_exit = emit_label(as);
|
1133
|
+
if (irt_isnum(irkey->t)) {
|
1134
|
+
/* Assumes -0.0 is already canonicalized to +0.0. */
|
1135
|
+
emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
|
1136
|
+
ofs + (int32_t)offsetof(Node, key.u32.lo),
|
1137
|
+
(int32_t)ir_knum(irkey)->u32.lo);
|
1138
|
+
emit_sjcc(as, CC_NE, l_exit);
|
1139
|
+
emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
|
1140
|
+
ofs + (int32_t)offsetof(Node, key.u32.hi),
|
1141
|
+
(int32_t)ir_knum(irkey)->u32.hi);
|
1142
|
+
} else {
|
1143
|
+
if (!irt_ispri(irkey->t)) {
|
1144
|
+
lua_assert(irt_isgcv(irkey->t));
|
1145
|
+
emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
|
1146
|
+
ofs + (int32_t)offsetof(Node, key.gcr),
|
1147
|
+
ptr2addr(ir_kgc(irkey)));
|
1148
|
+
emit_sjcc(as, CC_NE, l_exit);
|
1149
|
+
}
|
1150
|
+
lua_assert(!irt_isnil(irkey->t));
|
1151
|
+
emit_i8(as, irt_toitype(irkey->t));
|
1152
|
+
emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
|
1153
|
+
ofs + (int32_t)offsetof(Node, key.it));
|
1154
|
+
}
|
1155
|
+
#endif
|
1156
|
+
}
|
1157
|
+
|
1158
|
+
static void asm_uref(ASMState *as, IRIns *ir)
|
1159
|
+
{
|
1160
|
+
/* NYI: Check that UREFO is still open and not aliasing a slot. */
|
1161
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
1162
|
+
if (irref_isk(ir->op1)) {
|
1163
|
+
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
1164
|
+
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
1165
|
+
emit_rma(as, XO_MOV, dest, v);
|
1166
|
+
} else {
|
1167
|
+
Reg uv = ra_scratch(as, RSET_GPR);
|
1168
|
+
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
|
1169
|
+
if (ir->o == IR_UREFC) {
|
1170
|
+
emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
|
1171
|
+
asm_guardcc(as, CC_NE);
|
1172
|
+
emit_i8(as, 1);
|
1173
|
+
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
|
1174
|
+
} else {
|
1175
|
+
emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
|
1176
|
+
}
|
1177
|
+
emit_rmro(as, XO_MOV, uv, func,
|
1178
|
+
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
|
1179
|
+
}
|
1180
|
+
}
|
1181
|
+
|
1182
|
+
static void asm_fref(ASMState *as, IRIns *ir)
|
1183
|
+
{
|
1184
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
1185
|
+
asm_fusefref(as, ir, RSET_GPR);
|
1186
|
+
emit_mrm(as, XO_LEA, dest, RID_MRM);
|
1187
|
+
}
|
1188
|
+
|
1189
|
+
static void asm_strref(ASMState *as, IRIns *ir)
|
1190
|
+
{
|
1191
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
1192
|
+
asm_fusestrref(as, ir, RSET_GPR);
|
1193
|
+
if (as->mrm.base == RID_NONE)
|
1194
|
+
emit_loadi(as, dest, as->mrm.ofs);
|
1195
|
+
else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
|
1196
|
+
emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
|
1197
|
+
else
|
1198
|
+
emit_mrm(as, XO_LEA, dest, RID_MRM);
|
1199
|
+
}
|
1200
|
+
|
1201
|
+
/* -- Loads and stores ---------------------------------------------------- */
|
1202
|
+
|
1203
|
+
static void asm_fxload(ASMState *as, IRIns *ir)
|
1204
|
+
{
|
1205
|
+
Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
|
1206
|
+
x86Op xo;
|
1207
|
+
if (ir->o == IR_FLOAD)
|
1208
|
+
asm_fusefref(as, ir, RSET_GPR);
|
1209
|
+
else
|
1210
|
+
asm_fusexref(as, ir->op1, RSET_GPR);
|
1211
|
+
/* ir->op2 is ignored -- unaligned loads are ok on x86. */
|
1212
|
+
switch (irt_type(ir->t)) {
|
1213
|
+
case IRT_I8: xo = XO_MOVSXb; break;
|
1214
|
+
case IRT_U8: xo = XO_MOVZXb; break;
|
1215
|
+
case IRT_I16: xo = XO_MOVSXw; break;
|
1216
|
+
case IRT_U16: xo = XO_MOVZXw; break;
|
1217
|
+
case IRT_NUM: xo = XO_MOVSD; break;
|
1218
|
+
case IRT_FLOAT: xo = XO_MOVSS; break;
|
1219
|
+
default:
|
1220
|
+
if (LJ_64 && irt_is64(ir->t))
|
1221
|
+
dest |= REX_64;
|
1222
|
+
else
|
1223
|
+
lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
|
1224
|
+
xo = XO_MOV;
|
1225
|
+
break;
|
1226
|
+
}
|
1227
|
+
emit_mrm(as, xo, dest, RID_MRM);
|
1228
|
+
}
|
1229
|
+
|
1230
|
+
#define asm_fload(as, ir) asm_fxload(as, ir)
|
1231
|
+
#define asm_xload(as, ir) asm_fxload(as, ir)
|
1232
|
+
|
1233
|
+
static void asm_fxstore(ASMState *as, IRIns *ir)
|
1234
|
+
{
|
1235
|
+
RegSet allow = RSET_GPR;
|
1236
|
+
Reg src = RID_NONE, osrc = RID_NONE;
|
1237
|
+
int32_t k = 0;
|
1238
|
+
if (ir->r == RID_SINK)
|
1239
|
+
return;
|
1240
|
+
/* The IRT_I16/IRT_U16 stores should never be simplified for constant
|
1241
|
+
** values since mov word [mem], imm16 has a length-changing prefix.
|
1242
|
+
*/
|
1243
|
+
if (irt_isi16(ir->t) || irt_isu16(ir->t) || irt_isfp(ir->t) ||
|
1244
|
+
!asm_isk32(as, ir->op2, &k)) {
|
1245
|
+
RegSet allow8 = irt_isfp(ir->t) ? RSET_FPR :
|
1246
|
+
(irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR;
|
1247
|
+
src = osrc = ra_alloc1(as, ir->op2, allow8);
|
1248
|
+
if (!LJ_64 && !rset_test(allow8, src)) { /* Already in wrong register. */
|
1249
|
+
rset_clear(allow, osrc);
|
1250
|
+
src = ra_scratch(as, allow8);
|
1251
|
+
}
|
1252
|
+
rset_clear(allow, src);
|
1253
|
+
}
|
1254
|
+
if (ir->o == IR_FSTORE) {
|
1255
|
+
asm_fusefref(as, IR(ir->op1), allow);
|
1256
|
+
} else {
|
1257
|
+
asm_fusexref(as, ir->op1, allow);
|
1258
|
+
if (LJ_32 && ir->o == IR_HIOP) as->mrm.ofs += 4;
|
1259
|
+
}
|
1260
|
+
if (ra_hasreg(src)) {
|
1261
|
+
x86Op xo;
|
1262
|
+
switch (irt_type(ir->t)) {
|
1263
|
+
case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break;
|
1264
|
+
case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
|
1265
|
+
case IRT_NUM: xo = XO_MOVSDto; break;
|
1266
|
+
case IRT_FLOAT: xo = XO_MOVSSto; break;
|
1267
|
+
#if LJ_64
|
1268
|
+
case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
|
1269
|
+
#endif
|
1270
|
+
default:
|
1271
|
+
if (LJ_64 && irt_is64(ir->t))
|
1272
|
+
src |= REX_64;
|
1273
|
+
else
|
1274
|
+
lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
|
1275
|
+
xo = XO_MOVto;
|
1276
|
+
break;
|
1277
|
+
}
|
1278
|
+
emit_mrm(as, xo, src, RID_MRM);
|
1279
|
+
if (!LJ_64 && src != osrc) {
|
1280
|
+
ra_noweak(as, osrc);
|
1281
|
+
emit_rr(as, XO_MOV, src, osrc);
|
1282
|
+
}
|
1283
|
+
} else {
|
1284
|
+
if (irt_isi8(ir->t) || irt_isu8(ir->t)) {
|
1285
|
+
emit_i8(as, k);
|
1286
|
+
emit_mrm(as, XO_MOVmib, 0, RID_MRM);
|
1287
|
+
} else {
|
1288
|
+
lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
|
1289
|
+
irt_isaddr(ir->t));
|
1290
|
+
emit_i32(as, k);
|
1291
|
+
emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM);
|
1292
|
+
}
|
1293
|
+
}
|
1294
|
+
}
|
1295
|
+
|
1296
|
+
#define asm_fstore(as, ir) asm_fxstore(as, ir)
|
1297
|
+
#define asm_xstore(as, ir) asm_fxstore(as, ir)
|
1298
|
+
|
1299
|
+
#if LJ_64
|
1300
|
+
static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
|
1301
|
+
{
|
1302
|
+
if (ra_used(ir) || typecheck) {
|
1303
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
1304
|
+
if (typecheck) {
|
1305
|
+
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest));
|
1306
|
+
asm_guardcc(as, CC_NE);
|
1307
|
+
emit_i8(as, -2);
|
1308
|
+
emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
|
1309
|
+
emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
|
1310
|
+
emit_rr(as, XO_MOV, tmp|REX_64, dest);
|
1311
|
+
}
|
1312
|
+
return dest;
|
1313
|
+
} else {
|
1314
|
+
return RID_NONE;
|
1315
|
+
}
|
1316
|
+
}
|
1317
|
+
#endif
|
1318
|
+
|
1319
|
+
static void asm_ahuvload(ASMState *as, IRIns *ir)
|
1320
|
+
{
|
1321
|
+
lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
|
1322
|
+
(LJ_DUALNUM && irt_isint(ir->t)));
|
1323
|
+
#if LJ_64
|
1324
|
+
if (irt_islightud(ir->t)) {
|
1325
|
+
Reg dest = asm_load_lightud64(as, ir, 1);
|
1326
|
+
if (ra_hasreg(dest)) {
|
1327
|
+
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
1328
|
+
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
|
1329
|
+
}
|
1330
|
+
return;
|
1331
|
+
} else
|
1332
|
+
#endif
|
1333
|
+
if (ra_used(ir)) {
|
1334
|
+
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
|
1335
|
+
Reg dest = ra_dest(as, ir, allow);
|
1336
|
+
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
1337
|
+
emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
|
1338
|
+
} else {
|
1339
|
+
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
1340
|
+
}
|
1341
|
+
/* Always do the type check, even if the load result is unused. */
|
1342
|
+
as->mrm.ofs += 4;
|
1343
|
+
asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
|
1344
|
+
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
|
1345
|
+
lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
|
1346
|
+
emit_u32(as, LJ_TISNUM);
|
1347
|
+
emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
|
1348
|
+
} else {
|
1349
|
+
emit_i8(as, irt_toitype(ir->t));
|
1350
|
+
emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
|
1351
|
+
}
|
1352
|
+
}
|
1353
|
+
|
1354
|
+
static void asm_ahustore(ASMState *as, IRIns *ir)
|
1355
|
+
{
|
1356
|
+
if (ir->r == RID_SINK)
|
1357
|
+
return;
|
1358
|
+
if (irt_isnum(ir->t)) {
|
1359
|
+
Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
|
1360
|
+
asm_fuseahuref(as, ir->op1, RSET_GPR);
|
1361
|
+
emit_mrm(as, XO_MOVSDto, src, RID_MRM);
|
1362
|
+
#if LJ_64
|
1363
|
+
} else if (irt_islightud(ir->t)) {
|
1364
|
+
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
|
1365
|
+
asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
|
1366
|
+
emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
|
1367
|
+
#endif
|
1368
|
+
} else {
|
1369
|
+
IRIns *irr = IR(ir->op2);
|
1370
|
+
RegSet allow = RSET_GPR;
|
1371
|
+
Reg src = RID_NONE;
|
1372
|
+
if (!irref_isk(ir->op2)) {
|
1373
|
+
src = ra_alloc1(as, ir->op2, allow);
|
1374
|
+
rset_clear(allow, src);
|
1375
|
+
}
|
1376
|
+
asm_fuseahuref(as, ir->op1, allow);
|
1377
|
+
if (ra_hasreg(src)) {
|
1378
|
+
emit_mrm(as, XO_MOVto, src, RID_MRM);
|
1379
|
+
} else if (!irt_ispri(irr->t)) {
|
1380
|
+
lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
|
1381
|
+
emit_i32(as, irr->i);
|
1382
|
+
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
|
1383
|
+
}
|
1384
|
+
as->mrm.ofs += 4;
|
1385
|
+
emit_i32(as, (int32_t)irt_toitype(ir->t));
|
1386
|
+
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
|
1387
|
+
}
|
1388
|
+
}
|
1389
|
+
|
1390
|
+
static void asm_sload(ASMState *as, IRIns *ir)
|
1391
|
+
{
|
1392
|
+
int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
|
1393
|
+
IRType1 t = ir->t;
|
1394
|
+
Reg base;
|
1395
|
+
lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
|
1396
|
+
lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
|
1397
|
+
lua_assert(LJ_DUALNUM ||
|
1398
|
+
!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
|
1399
|
+
if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
|
1400
|
+
Reg left = ra_scratch(as, RSET_FPR);
|
1401
|
+
asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
|
1402
|
+
base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
1403
|
+
emit_rmro(as, XO_MOVSD, left, base, ofs);
|
1404
|
+
t.irt = IRT_NUM; /* Continue with a regular number type check. */
|
1405
|
+
#if LJ_64
|
1406
|
+
} else if (irt_islightud(t)) {
|
1407
|
+
Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
|
1408
|
+
if (ra_hasreg(dest)) {
|
1409
|
+
base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
1410
|
+
emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
|
1411
|
+
}
|
1412
|
+
return;
|
1413
|
+
#endif
|
1414
|
+
} else if (ra_used(ir)) {
|
1415
|
+
RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR;
|
1416
|
+
Reg dest = ra_dest(as, ir, allow);
|
1417
|
+
base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
1418
|
+
lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
|
1419
|
+
if ((ir->op2 & IRSLOAD_CONVERT)) {
|
1420
|
+
t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
|
1421
|
+
emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
|
1422
|
+
} else {
|
1423
|
+
emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
|
1424
|
+
}
|
1425
|
+
} else {
|
1426
|
+
if (!(ir->op2 & IRSLOAD_TYPECHECK))
|
1427
|
+
return; /* No type check: avoid base alloc. */
|
1428
|
+
base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
1429
|
+
}
|
1430
|
+
if ((ir->op2 & IRSLOAD_TYPECHECK)) {
|
1431
|
+
/* Need type check, even if the load result is unused. */
|
1432
|
+
asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
|
1433
|
+
if (LJ_64 && irt_type(t) >= IRT_NUM) {
|
1434
|
+
lua_assert(irt_isinteger(t) || irt_isnum(t));
|
1435
|
+
emit_u32(as, LJ_TISNUM);
|
1436
|
+
emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
|
1437
|
+
} else {
|
1438
|
+
emit_i8(as, irt_toitype(t));
|
1439
|
+
emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
|
1440
|
+
}
|
1441
|
+
}
|
1442
|
+
}
|
1443
|
+
|
1444
|
+
/* -- Allocations --------------------------------------------------------- */
|
1445
|
+
|
1446
|
+
#if LJ_HASFFI
|
1447
|
+
static void asm_cnew(ASMState *as, IRIns *ir)
|
1448
|
+
{
|
1449
|
+
CTState *cts = ctype_ctsG(J2G(as->J));
|
1450
|
+
CTypeID id = (CTypeID)IR(ir->op1)->i;
|
1451
|
+
CTSize sz;
|
1452
|
+
CTInfo info = lj_ctype_info(cts, id, &sz);
|
1453
|
+
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
|
1454
|
+
IRRef args[4];
|
1455
|
+
lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
|
1456
|
+
|
1457
|
+
as->gcsteps++;
|
1458
|
+
asm_setupresult(as, ir, ci); /* GCcdata * */
|
1459
|
+
|
1460
|
+
/* Initialize immutable cdata object. */
|
1461
|
+
if (ir->o == IR_CNEWI) {
|
1462
|
+
RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
|
1463
|
+
#if LJ_64
|
1464
|
+
Reg r64 = sz == 8 ? REX_64 : 0;
|
1465
|
+
if (irref_isk(ir->op2)) {
|
1466
|
+
IRIns *irk = IR(ir->op2);
|
1467
|
+
uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 :
|
1468
|
+
(uint64_t)(uint32_t)irk->i;
|
1469
|
+
if (sz == 4 || checki32((int64_t)k)) {
|
1470
|
+
emit_i32(as, (int32_t)k);
|
1471
|
+
emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
|
1472
|
+
} else {
|
1473
|
+
emit_movtomro(as, RID_ECX + r64, RID_RET, sizeof(GCcdata));
|
1474
|
+
emit_loadu64(as, RID_ECX, k);
|
1475
|
+
}
|
1476
|
+
} else {
|
1477
|
+
Reg r = ra_alloc1(as, ir->op2, allow);
|
1478
|
+
emit_movtomro(as, r + r64, RID_RET, sizeof(GCcdata));
|
1479
|
+
}
|
1480
|
+
#else
|
1481
|
+
int32_t ofs = sizeof(GCcdata);
|
1482
|
+
if (sz == 8) {
|
1483
|
+
ofs += 4; ir++;
|
1484
|
+
lua_assert(ir->o == IR_HIOP);
|
1485
|
+
}
|
1486
|
+
do {
|
1487
|
+
if (irref_isk(ir->op2)) {
|
1488
|
+
emit_movmroi(as, RID_RET, ofs, IR(ir->op2)->i);
|
1489
|
+
} else {
|
1490
|
+
Reg r = ra_alloc1(as, ir->op2, allow);
|
1491
|
+
emit_movtomro(as, r, RID_RET, ofs);
|
1492
|
+
rset_clear(allow, r);
|
1493
|
+
}
|
1494
|
+
if (ofs == sizeof(GCcdata)) break;
|
1495
|
+
ofs -= 4; ir--;
|
1496
|
+
} while (1);
|
1497
|
+
#endif
|
1498
|
+
lua_assert(sz == 4 || sz == 8);
|
1499
|
+
} else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
|
1500
|
+
ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
|
1501
|
+
args[0] = ASMREF_L; /* lua_State *L */
|
1502
|
+
args[1] = ir->op1; /* CTypeID id */
|
1503
|
+
args[2] = ir->op2; /* CTSize sz */
|
1504
|
+
args[3] = ASMREF_TMP1; /* CTSize align */
|
1505
|
+
asm_gencall(as, ci, args);
|
1506
|
+
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
|
1507
|
+
return;
|
1508
|
+
}
|
1509
|
+
|
1510
|
+
/* Combine initialization of marked, gct and ctypeid. */
|
1511
|
+
emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
|
1512
|
+
emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
|
1513
|
+
(int32_t)((~LJ_TCDATA<<8)+(id<<16)));
|
1514
|
+
emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
|
1515
|
+
emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
|
1516
|
+
|
1517
|
+
args[0] = ASMREF_L; /* lua_State *L */
|
1518
|
+
args[1] = ASMREF_TMP1; /* MSize size */
|
1519
|
+
asm_gencall(as, ci, args);
|
1520
|
+
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
|
1521
|
+
}
|
1522
|
+
#else
|
1523
|
+
#define asm_cnew(as, ir) ((void)0)
|
1524
|
+
#endif
|
1525
|
+
|
1526
|
+
/* -- Write barriers ------------------------------------------------------ */
|
1527
|
+
|
1528
|
+
static void asm_tbar(ASMState *as, IRIns *ir)
|
1529
|
+
{
|
1530
|
+
Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
|
1531
|
+
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
|
1532
|
+
MCLabel l_end = emit_label(as);
|
1533
|
+
emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
|
1534
|
+
emit_setgl(as, tab, gc.grayagain);
|
1535
|
+
emit_getgl(as, tmp, gc.grayagain);
|
1536
|
+
emit_i8(as, ~LJ_GC_BLACK);
|
1537
|
+
emit_rmro(as, XO_ARITHib, XOg_AND, tab, offsetof(GCtab, marked));
|
1538
|
+
emit_sjcc(as, CC_Z, l_end);
|
1539
|
+
emit_i8(as, LJ_GC_BLACK);
|
1540
|
+
emit_rmro(as, XO_GROUP3b, XOg_TEST, tab, offsetof(GCtab, marked));
|
1541
|
+
}
|
1542
|
+
|
1543
|
+
static void asm_obar(ASMState *as, IRIns *ir)
|
1544
|
+
{
|
1545
|
+
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
|
1546
|
+
IRRef args[2];
|
1547
|
+
MCLabel l_end;
|
1548
|
+
Reg obj;
|
1549
|
+
/* No need for other object barriers (yet). */
|
1550
|
+
lua_assert(IR(ir->op1)->o == IR_UREFC);
|
1551
|
+
ra_evictset(as, RSET_SCRATCH);
|
1552
|
+
l_end = emit_label(as);
|
1553
|
+
args[0] = ASMREF_TMP1; /* global_State *g */
|
1554
|
+
args[1] = ir->op1; /* TValue *tv */
|
1555
|
+
asm_gencall(as, ci, args);
|
1556
|
+
emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J));
|
1557
|
+
obj = IR(ir->op1)->r;
|
1558
|
+
emit_sjcc(as, CC_Z, l_end);
|
1559
|
+
emit_i8(as, LJ_GC_WHITES);
|
1560
|
+
if (irref_isk(ir->op2)) {
|
1561
|
+
GCobj *vp = ir_kgc(IR(ir->op2));
|
1562
|
+
emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked);
|
1563
|
+
} else {
|
1564
|
+
Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj));
|
1565
|
+
emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
|
1566
|
+
}
|
1567
|
+
emit_sjcc(as, CC_Z, l_end);
|
1568
|
+
emit_i8(as, LJ_GC_BLACK);
|
1569
|
+
emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
|
1570
|
+
(int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
|
1571
|
+
}
|
1572
|
+
|
1573
|
+
/* -- FP/int arithmetic and logic operations ------------------------------ */
|
1574
|
+
|
1575
|
+
/* Load reference onto x87 stack. Force a spill to memory if needed. */
|
1576
|
+
static void asm_x87load(ASMState *as, IRRef ref)
|
1577
|
+
{
|
1578
|
+
IRIns *ir = IR(ref);
|
1579
|
+
if (ir->o == IR_KNUM) {
|
1580
|
+
cTValue *tv = ir_knum(ir);
|
1581
|
+
if (tvispzero(tv)) /* Use fldz only for +0. */
|
1582
|
+
emit_x87op(as, XI_FLDZ);
|
1583
|
+
else if (tvispone(tv))
|
1584
|
+
emit_x87op(as, XI_FLD1);
|
1585
|
+
else
|
1586
|
+
emit_rma(as, XO_FLDq, XOg_FLDq, tv);
|
1587
|
+
} else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT && !ra_used(ir) &&
|
1588
|
+
!irref_isk(ir->op1) && mayfuse(as, ir->op1)) {
|
1589
|
+
IRIns *iri = IR(ir->op1);
|
1590
|
+
emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri));
|
1591
|
+
} else {
|
1592
|
+
emit_mrm(as, XO_FLDq, XOg_FLDq, asm_fuseload(as, ref, RSET_EMPTY));
|
1593
|
+
}
|
1594
|
+
}
|
1595
|
+
|
1596
|
+
static void asm_fpmath(ASMState *as, IRIns *ir)
|
1597
|
+
{
|
1598
|
+
IRFPMathOp fpm = (IRFPMathOp)ir->op2;
|
1599
|
+
if (fpm == IRFPM_SQRT) {
|
1600
|
+
Reg dest = ra_dest(as, ir, RSET_FPR);
|
1601
|
+
Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
|
1602
|
+
emit_mrm(as, XO_SQRTSD, dest, left);
|
1603
|
+
} else if (fpm <= IRFPM_TRUNC) {
|
1604
|
+
if (as->flags & JIT_F_SSE4_1) { /* SSE4.1 has a rounding instruction. */
|
1605
|
+
Reg dest = ra_dest(as, ir, RSET_FPR);
|
1606
|
+
Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
|
1607
|
+
/* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op.
|
1608
|
+
** Let's pretend it's a 3-byte opcode, and compensate afterwards.
|
1609
|
+
** This is atrocious, but the alternatives are much worse.
|
1610
|
+
*/
|
1611
|
+
/* Round down/up/trunc == 1001/1010/1011. */
|
1612
|
+
emit_i8(as, 0x09 + fpm);
|
1613
|
+
emit_mrm(as, XO_ROUNDSD, dest, left);
|
1614
|
+
if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) {
|
1615
|
+
as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */
|
1616
|
+
}
|
1617
|
+
*--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */
|
1618
|
+
} else { /* Call helper functions for SSE2 variant. */
|
1619
|
+
/* The modified regs must match with the *.dasc implementation. */
|
1620
|
+
RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
|
1621
|
+
if (ra_hasreg(ir->r))
|
1622
|
+
rset_clear(drop, ir->r); /* Dest reg handled below. */
|
1623
|
+
ra_evictset(as, drop);
|
1624
|
+
ra_destreg(as, ir, RID_XMM0);
|
1625
|
+
emit_call(as, fpm == IRFPM_FLOOR ? lj_vm_floor_sse :
|
1626
|
+
fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
|
1627
|
+
ra_left(as, RID_XMM0, ir->op1);
|
1628
|
+
}
|
1629
|
+
} else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
|
1630
|
+
/* Rejoined to pow(). */
|
1631
|
+
} else {
|
1632
|
+
asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
|
1633
|
+
}
|
1634
|
+
}
|
1635
|
+
|
1636
|
+
#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
|
1637
|
+
|
1638
|
+
static void asm_ldexp(ASMState *as, IRIns *ir)
|
1639
|
+
{
|
1640
|
+
int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
|
1641
|
+
Reg dest = ir->r;
|
1642
|
+
if (ra_hasreg(dest)) {
|
1643
|
+
ra_free(as, dest);
|
1644
|
+
ra_modified(as, dest);
|
1645
|
+
emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
|
1646
|
+
}
|
1647
|
+
emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
|
1648
|
+
emit_x87op(as, XI_FPOP1);
|
1649
|
+
emit_x87op(as, XI_FSCALE);
|
1650
|
+
asm_x87load(as, ir->op1);
|
1651
|
+
asm_x87load(as, ir->op2);
|
1652
|
+
}
|
1653
|
+
|
1654
|
+
static void asm_fppowi(ASMState *as, IRIns *ir)
|
1655
|
+
{
|
1656
|
+
/* The modified regs must match with the *.dasc implementation. */
|
1657
|
+
RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
|
1658
|
+
if (ra_hasreg(ir->r))
|
1659
|
+
rset_clear(drop, ir->r); /* Dest reg handled below. */
|
1660
|
+
ra_evictset(as, drop);
|
1661
|
+
ra_destreg(as, ir, RID_XMM0);
|
1662
|
+
emit_call(as, lj_vm_powi_sse);
|
1663
|
+
ra_left(as, RID_XMM0, ir->op1);
|
1664
|
+
ra_left(as, RID_EAX, ir->op2);
|
1665
|
+
}
|
1666
|
+
|
1667
|
+
static void asm_pow(ASMState *as, IRIns *ir)
|
1668
|
+
{
|
1669
|
+
#if LJ_64 && LJ_HASFFI
|
1670
|
+
if (!irt_isnum(ir->t))
|
1671
|
+
asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
|
1672
|
+
IRCALL_lj_carith_powu64);
|
1673
|
+
else
|
1674
|
+
#endif
|
1675
|
+
asm_fppowi(as, ir);
|
1676
|
+
}
|
1677
|
+
|
1678
|
+
static int asm_swapops(ASMState *as, IRIns *ir)
|
1679
|
+
{
|
1680
|
+
IRIns *irl = IR(ir->op1);
|
1681
|
+
IRIns *irr = IR(ir->op2);
|
1682
|
+
lua_assert(ra_noreg(irr->r));
|
1683
|
+
if (!irm_iscomm(lj_ir_mode[ir->o]))
|
1684
|
+
return 0; /* Can't swap non-commutative operations. */
|
1685
|
+
if (irref_isk(ir->op2))
|
1686
|
+
return 0; /* Don't swap constants to the left. */
|
1687
|
+
if (ra_hasreg(irl->r))
|
1688
|
+
return 1; /* Swap if left already has a register. */
|
1689
|
+
if (ra_samehint(ir->r, irr->r))
|
1690
|
+
return 1; /* Swap if dest and right have matching hints. */
|
1691
|
+
if (as->curins > as->loopref) { /* In variant part? */
|
1692
|
+
if (ir->op2 < as->loopref && !irt_isphi(irr->t))
|
1693
|
+
return 0; /* Keep invariants on the right. */
|
1694
|
+
if (ir->op1 < as->loopref && !irt_isphi(irl->t))
|
1695
|
+
return 1; /* Swap invariants to the right. */
|
1696
|
+
}
|
1697
|
+
if (opisfusableload(irl->o))
|
1698
|
+
return 1; /* Swap fusable loads to the right. */
|
1699
|
+
return 0; /* Otherwise don't swap. */
|
1700
|
+
}
|
1701
|
+
|
1702
|
+
static void asm_fparith(ASMState *as, IRIns *ir, x86Op xo)
|
1703
|
+
{
|
1704
|
+
IRRef lref = ir->op1;
|
1705
|
+
IRRef rref = ir->op2;
|
1706
|
+
RegSet allow = RSET_FPR;
|
1707
|
+
Reg dest;
|
1708
|
+
Reg right = IR(rref)->r;
|
1709
|
+
if (ra_hasreg(right)) {
|
1710
|
+
rset_clear(allow, right);
|
1711
|
+
ra_noweak(as, right);
|
1712
|
+
}
|
1713
|
+
dest = ra_dest(as, ir, allow);
|
1714
|
+
if (lref == rref) {
|
1715
|
+
right = dest;
|
1716
|
+
} else if (ra_noreg(right)) {
|
1717
|
+
if (asm_swapops(as, ir)) {
|
1718
|
+
IRRef tmp = lref; lref = rref; rref = tmp;
|
1719
|
+
}
|
1720
|
+
right = asm_fuseload(as, rref, rset_clear(allow, dest));
|
1721
|
+
}
|
1722
|
+
emit_mrm(as, xo, dest, right);
|
1723
|
+
ra_left(as, dest, lref);
|
1724
|
+
}
|
1725
|
+
|
1726
|
+
static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
|
1727
|
+
{
|
1728
|
+
IRRef lref = ir->op1;
|
1729
|
+
IRRef rref = ir->op2;
|
1730
|
+
RegSet allow = RSET_GPR;
|
1731
|
+
Reg dest, right;
|
1732
|
+
int32_t k = 0;
|
1733
|
+
if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */
|
1734
|
+
MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2);
|
1735
|
+
if ((p[1] & 15) < 14) {
|
1736
|
+
if ((p[1] & 15) >= 12) p[1] -= 4; /* L <->S, NL <-> NS */
|
1737
|
+
as->flagmcp = NULL;
|
1738
|
+
as->mcp = p;
|
1739
|
+
} /* else: cannot transform LE/NLE to cc without use of OF. */
|
1740
|
+
}
|
1741
|
+
right = IR(rref)->r;
|
1742
|
+
if (ra_hasreg(right)) {
|
1743
|
+
rset_clear(allow, right);
|
1744
|
+
ra_noweak(as, right);
|
1745
|
+
}
|
1746
|
+
dest = ra_dest(as, ir, allow);
|
1747
|
+
if (lref == rref) {
|
1748
|
+
right = dest;
|
1749
|
+
} else if (ra_noreg(right) && !asm_isk32(as, rref, &k)) {
|
1750
|
+
if (asm_swapops(as, ir)) {
|
1751
|
+
IRRef tmp = lref; lref = rref; rref = tmp;
|
1752
|
+
}
|
1753
|
+
right = asm_fuseloadm(as, rref, rset_clear(allow, dest), irt_is64(ir->t));
|
1754
|
+
}
|
1755
|
+
if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */
|
1756
|
+
asm_guardcc(as, CC_O);
|
1757
|
+
if (xa != XOg_X_IMUL) {
|
1758
|
+
if (ra_hasreg(right))
|
1759
|
+
emit_mrm(as, XO_ARITH(xa), REX_64IR(ir, dest), right);
|
1760
|
+
else
|
1761
|
+
emit_gri(as, XG_ARITHi(xa), REX_64IR(ir, dest), k);
|
1762
|
+
} else if (ra_hasreg(right)) { /* IMUL r, mrm. */
|
1763
|
+
emit_mrm(as, XO_IMUL, REX_64IR(ir, dest), right);
|
1764
|
+
} else { /* IMUL r, r, k. */
|
1765
|
+
/* NYI: use lea/shl/add/sub (FOLD only does 2^k) depending on CPU. */
|
1766
|
+
Reg left = asm_fuseloadm(as, lref, RSET_GPR, irt_is64(ir->t));
|
1767
|
+
x86Op xo;
|
1768
|
+
if (checki8(k)) { emit_i8(as, k); xo = XO_IMULi8;
|
1769
|
+
} else { emit_i32(as, k); xo = XO_IMULi; }
|
1770
|
+
emit_mrm(as, xo, REX_64IR(ir, dest), left);
|
1771
|
+
return;
|
1772
|
+
}
|
1773
|
+
ra_left(as, dest, lref);
|
1774
|
+
}
|
1775
|
+
|
1776
|
+
/* LEA is really a 4-operand ADD with an independent destination register,
|
1777
|
+
** up to two source registers and an immediate. One register can be scaled
|
1778
|
+
** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several
|
1779
|
+
** instructions.
|
1780
|
+
**
|
1781
|
+
** Currently only a few common cases are supported:
|
1782
|
+
** - 3-operand ADD: y = a+b; y = a+k with a and b already allocated
|
1783
|
+
** - Left ADD fusion: y = (a+b)+k; y = (a+k)+b
|
1784
|
+
** - Right ADD fusion: y = a+(b+k)
|
1785
|
+
** The ommited variants have already been reduced by FOLD.
|
1786
|
+
**
|
1787
|
+
** There are more fusion opportunities, like gathering shifts or joining
|
1788
|
+
** common references. But these are probably not worth the trouble, since
|
1789
|
+
** array indexing is not decomposed and already makes use of all fields
|
1790
|
+
** of the ModRM operand.
|
1791
|
+
*/
|
1792
|
+
static int asm_lea(ASMState *as, IRIns *ir)
|
1793
|
+
{
|
1794
|
+
IRIns *irl = IR(ir->op1);
|
1795
|
+
IRIns *irr = IR(ir->op2);
|
1796
|
+
RegSet allow = RSET_GPR;
|
1797
|
+
Reg dest;
|
1798
|
+
as->mrm.base = as->mrm.idx = RID_NONE;
|
1799
|
+
as->mrm.scale = XM_SCALE1;
|
1800
|
+
as->mrm.ofs = 0;
|
1801
|
+
if (ra_hasreg(irl->r)) {
|
1802
|
+
rset_clear(allow, irl->r);
|
1803
|
+
ra_noweak(as, irl->r);
|
1804
|
+
as->mrm.base = irl->r;
|
1805
|
+
if (irref_isk(ir->op2) || ra_hasreg(irr->r)) {
|
1806
|
+
/* The PHI renaming logic does a better job in some cases. */
|
1807
|
+
if (ra_hasreg(ir->r) &&
|
1808
|
+
((irt_isphi(irl->t) && as->phireg[ir->r] == ir->op1) ||
|
1809
|
+
(irt_isphi(irr->t) && as->phireg[ir->r] == ir->op2)))
|
1810
|
+
return 0;
|
1811
|
+
if (irref_isk(ir->op2)) {
|
1812
|
+
as->mrm.ofs = irr->i;
|
1813
|
+
} else {
|
1814
|
+
rset_clear(allow, irr->r);
|
1815
|
+
ra_noweak(as, irr->r);
|
1816
|
+
as->mrm.idx = irr->r;
|
1817
|
+
}
|
1818
|
+
} else if (irr->o == IR_ADD && mayfuse(as, ir->op2) &&
|
1819
|
+
irref_isk(irr->op2)) {
|
1820
|
+
Reg idx = ra_alloc1(as, irr->op1, allow);
|
1821
|
+
rset_clear(allow, idx);
|
1822
|
+
as->mrm.idx = (uint8_t)idx;
|
1823
|
+
as->mrm.ofs = IR(irr->op2)->i;
|
1824
|
+
} else {
|
1825
|
+
return 0;
|
1826
|
+
}
|
1827
|
+
} else if (ir->op1 != ir->op2 && irl->o == IR_ADD && mayfuse(as, ir->op1) &&
|
1828
|
+
(irref_isk(ir->op2) || irref_isk(irl->op2))) {
|
1829
|
+
Reg idx, base = ra_alloc1(as, irl->op1, allow);
|
1830
|
+
rset_clear(allow, base);
|
1831
|
+
as->mrm.base = (uint8_t)base;
|
1832
|
+
if (irref_isk(ir->op2)) {
|
1833
|
+
as->mrm.ofs = irr->i;
|
1834
|
+
idx = ra_alloc1(as, irl->op2, allow);
|
1835
|
+
} else {
|
1836
|
+
as->mrm.ofs = IR(irl->op2)->i;
|
1837
|
+
idx = ra_alloc1(as, ir->op2, allow);
|
1838
|
+
}
|
1839
|
+
rset_clear(allow, idx);
|
1840
|
+
as->mrm.idx = (uint8_t)idx;
|
1841
|
+
} else {
|
1842
|
+
return 0;
|
1843
|
+
}
|
1844
|
+
dest = ra_dest(as, ir, allow);
|
1845
|
+
emit_mrm(as, XO_LEA, dest, RID_MRM);
|
1846
|
+
return 1; /* Success. */
|
1847
|
+
}
|
1848
|
+
|
1849
|
+
static void asm_add(ASMState *as, IRIns *ir)
|
1850
|
+
{
|
1851
|
+
if (irt_isnum(ir->t))
|
1852
|
+
asm_fparith(as, ir, XO_ADDSD);
|
1853
|
+
else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp ||
|
1854
|
+
irt_is64(ir->t) || !asm_lea(as, ir))
|
1855
|
+
asm_intarith(as, ir, XOg_ADD);
|
1856
|
+
}
|
1857
|
+
|
1858
|
+
static void asm_sub(ASMState *as, IRIns *ir)
|
1859
|
+
{
|
1860
|
+
if (irt_isnum(ir->t))
|
1861
|
+
asm_fparith(as, ir, XO_SUBSD);
|
1862
|
+
else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
|
1863
|
+
asm_intarith(as, ir, XOg_SUB);
|
1864
|
+
}
|
1865
|
+
|
1866
|
+
static void asm_mul(ASMState *as, IRIns *ir)
|
1867
|
+
{
|
1868
|
+
if (irt_isnum(ir->t))
|
1869
|
+
asm_fparith(as, ir, XO_MULSD);
|
1870
|
+
else
|
1871
|
+
asm_intarith(as, ir, XOg_X_IMUL);
|
1872
|
+
}
|
1873
|
+
|
1874
|
+
static void asm_div(ASMState *as, IRIns *ir)
|
1875
|
+
{
|
1876
|
+
#if LJ_64 && LJ_HASFFI
|
1877
|
+
if (!irt_isnum(ir->t))
|
1878
|
+
asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
|
1879
|
+
IRCALL_lj_carith_divu64);
|
1880
|
+
else
|
1881
|
+
#endif
|
1882
|
+
asm_fparith(as, ir, XO_DIVSD);
|
1883
|
+
}
|
1884
|
+
|
1885
|
+
static void asm_mod(ASMState *as, IRIns *ir)
|
1886
|
+
{
|
1887
|
+
#if LJ_64 && LJ_HASFFI
|
1888
|
+
if (!irt_isint(ir->t))
|
1889
|
+
asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
|
1890
|
+
IRCALL_lj_carith_modu64);
|
1891
|
+
else
|
1892
|
+
#endif
|
1893
|
+
asm_callid(as, ir, IRCALL_lj_vm_modi);
|
1894
|
+
}
|
1895
|
+
|
1896
|
+
static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
|
1897
|
+
{
|
1898
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
1899
|
+
emit_rr(as, XO_GROUP3, REX_64IR(ir, xg), dest);
|
1900
|
+
ra_left(as, dest, ir->op1);
|
1901
|
+
}
|
1902
|
+
|
1903
|
+
static void asm_neg(ASMState *as, IRIns *ir)
|
1904
|
+
{
|
1905
|
+
if (irt_isnum(ir->t))
|
1906
|
+
asm_fparith(as, ir, XO_XORPS);
|
1907
|
+
else
|
1908
|
+
asm_neg_not(as, ir, XOg_NEG);
|
1909
|
+
}
|
1910
|
+
|
1911
|
+
#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
|
1912
|
+
|
1913
|
+
static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
|
1914
|
+
{
|
1915
|
+
Reg right, dest = ra_dest(as, ir, RSET_GPR);
|
1916
|
+
IRRef lref = ir->op1, rref = ir->op2;
|
1917
|
+
if (irref_isk(rref)) { lref = rref; rref = ir->op1; }
|
1918
|
+
right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, dest));
|
1919
|
+
emit_rr(as, XO_CMOV + (cc<<24), REX_64IR(ir, dest), right);
|
1920
|
+
emit_rr(as, XO_CMP, REX_64IR(ir, dest), right);
|
1921
|
+
ra_left(as, dest, lref);
|
1922
|
+
}
|
1923
|
+
|
1924
|
+
static void asm_min(ASMState *as, IRIns *ir)
|
1925
|
+
{
|
1926
|
+
if (irt_isnum(ir->t))
|
1927
|
+
asm_fparith(as, ir, XO_MINSD);
|
1928
|
+
else
|
1929
|
+
asm_intmin_max(as, ir, CC_G);
|
1930
|
+
}
|
1931
|
+
|
1932
|
+
static void asm_max(ASMState *as, IRIns *ir)
|
1933
|
+
{
|
1934
|
+
if (irt_isnum(ir->t))
|
1935
|
+
asm_fparith(as, ir, XO_MAXSD);
|
1936
|
+
else
|
1937
|
+
asm_intmin_max(as, ir, CC_L);
|
1938
|
+
}
|
1939
|
+
|
1940
|
+
/* Note: don't use LEA for overflow-checking arithmetic! */
|
1941
|
+
#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
|
1942
|
+
#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
|
1943
|
+
#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
|
1944
|
+
|
1945
|
+
#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
|
1946
|
+
|
1947
|
+
static void asm_bswap(ASMState *as, IRIns *ir)
|
1948
|
+
{
|
1949
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
1950
|
+
as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
|
1951
|
+
REX_64IR(ir, 0), dest, 0, as->mcp, 1);
|
1952
|
+
ra_left(as, dest, ir->op1);
|
1953
|
+
}
|
1954
|
+
|
1955
|
+
#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
|
1956
|
+
#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
|
1957
|
+
#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
|
1958
|
+
|
1959
|
+
static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
|
1960
|
+
{
|
1961
|
+
IRRef rref = ir->op2;
|
1962
|
+
IRIns *irr = IR(rref);
|
1963
|
+
Reg dest;
|
1964
|
+
if (irref_isk(rref)) { /* Constant shifts. */
|
1965
|
+
int shift;
|
1966
|
+
dest = ra_dest(as, ir, RSET_GPR);
|
1967
|
+
shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
|
1968
|
+
switch (shift) {
|
1969
|
+
case 0: break;
|
1970
|
+
case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
|
1971
|
+
default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
|
1972
|
+
}
|
1973
|
+
} else { /* Variable shifts implicitly use register cl (i.e. ecx). */
|
1974
|
+
Reg right;
|
1975
|
+
dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
|
1976
|
+
if (dest == RID_ECX) {
|
1977
|
+
dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX));
|
1978
|
+
emit_rr(as, XO_MOV, RID_ECX, dest);
|
1979
|
+
}
|
1980
|
+
right = irr->r;
|
1981
|
+
if (ra_noreg(right))
|
1982
|
+
right = ra_allocref(as, rref, RID2RSET(RID_ECX));
|
1983
|
+
else if (right != RID_ECX)
|
1984
|
+
ra_scratch(as, RID2RSET(RID_ECX));
|
1985
|
+
emit_rr(as, XO_SHIFTcl, REX_64IR(ir, xs), dest);
|
1986
|
+
ra_noweak(as, right);
|
1987
|
+
if (right != RID_ECX)
|
1988
|
+
emit_rr(as, XO_MOV, RID_ECX, right);
|
1989
|
+
}
|
1990
|
+
ra_left(as, dest, ir->op1);
|
1991
|
+
/*
|
1992
|
+
** Note: avoid using the flags resulting from a shift or rotate!
|
1993
|
+
** All of them cause a partial flag stall, except for r,1 shifts
|
1994
|
+
** (but not rotates). And a shift count of 0 leaves the flags unmodified.
|
1995
|
+
*/
|
1996
|
+
}
|
1997
|
+
|
1998
|
+
#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
|
1999
|
+
#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
|
2000
|
+
#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
|
2001
|
+
#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
|
2002
|
+
#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
|
2003
|
+
|
2004
|
+
/* -- Comparisons --------------------------------------------------------- */
|
2005
|
+
|
2006
|
+
/* Virtual flags for unordered FP comparisons. */
|
2007
|
+
#define VCC_U 0x1000 /* Unordered. */
|
2008
|
+
#define VCC_P 0x2000 /* Needs extra CC_P branch. */
|
2009
|
+
#define VCC_S 0x4000 /* Swap avoids CC_P branch. */
|
2010
|
+
#define VCC_PS (VCC_P|VCC_S)
|
2011
|
+
|
2012
|
+
/* Map of comparisons to flags. ORDER IR. */
|
2013
|
+
#define COMPFLAGS(ci, cin, cu, cf) ((ci)+((cu)<<4)+((cin)<<8)+(cf))
|
2014
|
+
static const uint16_t asm_compmap[IR_ABC+1] = {
|
2015
|
+
/* signed non-eq unsigned flags */
|
2016
|
+
/* LT */ COMPFLAGS(CC_GE, CC_G, CC_AE, VCC_PS),
|
2017
|
+
/* GE */ COMPFLAGS(CC_L, CC_L, CC_B, 0),
|
2018
|
+
/* LE */ COMPFLAGS(CC_G, CC_G, CC_A, VCC_PS),
|
2019
|
+
/* GT */ COMPFLAGS(CC_LE, CC_L, CC_BE, 0),
|
2020
|
+
/* ULT */ COMPFLAGS(CC_AE, CC_A, CC_AE, VCC_U),
|
2021
|
+
/* UGE */ COMPFLAGS(CC_B, CC_B, CC_B, VCC_U|VCC_PS),
|
2022
|
+
/* ULE */ COMPFLAGS(CC_A, CC_A, CC_A, VCC_U),
|
2023
|
+
/* UGT */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS),
|
2024
|
+
/* EQ */ COMPFLAGS(CC_NE, CC_NE, CC_NE, VCC_P),
|
2025
|
+
/* NE */ COMPFLAGS(CC_E, CC_E, CC_E, VCC_U|VCC_P),
|
2026
|
+
/* ABC */ COMPFLAGS(CC_BE, CC_B, CC_BE, VCC_U|VCC_PS) /* Same as UGT. */
|
2027
|
+
};
|
2028
|
+
|
2029
|
+
/* FP and integer comparisons. */
|
2030
|
+
static void asm_comp(ASMState *as, IRIns *ir)
|
2031
|
+
{
|
2032
|
+
uint32_t cc = asm_compmap[ir->o];
|
2033
|
+
if (irt_isnum(ir->t)) {
|
2034
|
+
IRRef lref = ir->op1;
|
2035
|
+
IRRef rref = ir->op2;
|
2036
|
+
Reg left, right;
|
2037
|
+
MCLabel l_around;
|
2038
|
+
/*
|
2039
|
+
** An extra CC_P branch is required to preserve ordered/unordered
|
2040
|
+
** semantics for FP comparisons. This can be avoided by swapping
|
2041
|
+
** the operands and inverting the condition (except for EQ and UNE).
|
2042
|
+
** So always try to swap if possible.
|
2043
|
+
**
|
2044
|
+
** Another option would be to swap operands to achieve better memory
|
2045
|
+
** operand fusion. But it's unlikely that this outweighs the cost
|
2046
|
+
** of the extra branches.
|
2047
|
+
*/
|
2048
|
+
if (cc & VCC_S) { /* Swap? */
|
2049
|
+
IRRef tmp = lref; lref = rref; rref = tmp;
|
2050
|
+
cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
|
2051
|
+
}
|
2052
|
+
left = ra_alloc1(as, lref, RSET_FPR);
|
2053
|
+
right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
|
2054
|
+
l_around = emit_label(as);
|
2055
|
+
asm_guardcc(as, cc >> 4);
|
2056
|
+
if (cc & VCC_P) { /* Extra CC_P branch required? */
|
2057
|
+
if (!(cc & VCC_U)) {
|
2058
|
+
asm_guardcc(as, CC_P); /* Branch to exit for ordered comparisons. */
|
2059
|
+
} else if (l_around != as->invmcp) {
|
2060
|
+
emit_sjcc(as, CC_P, l_around); /* Branch around for unordered. */
|
2061
|
+
} else {
|
2062
|
+
/* Patched to mcloop by asm_loop_fixup. */
|
2063
|
+
as->loopinv = 2;
|
2064
|
+
if (as->realign)
|
2065
|
+
emit_sjcc(as, CC_P, as->mcp);
|
2066
|
+
else
|
2067
|
+
emit_jcc(as, CC_P, as->mcp);
|
2068
|
+
}
|
2069
|
+
}
|
2070
|
+
emit_mrm(as, XO_UCOMISD, left, right);
|
2071
|
+
} else {
|
2072
|
+
IRRef lref = ir->op1, rref = ir->op2;
|
2073
|
+
IROp leftop = (IROp)(IR(lref)->o);
|
2074
|
+
Reg r64 = REX_64IR(ir, 0);
|
2075
|
+
int32_t imm = 0;
|
2076
|
+
lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
|
2077
|
+
irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
|
2078
|
+
/* Swap constants (only for ABC) and fusable loads to the right. */
|
2079
|
+
if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
|
2080
|
+
if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */
|
2081
|
+
else if ((cc & 0xa) == 0x2) cc ^= 0x55; /* A <-> B, AE <-> BE */
|
2082
|
+
lref = ir->op2; rref = ir->op1;
|
2083
|
+
}
|
2084
|
+
if (asm_isk32(as, rref, &imm)) {
|
2085
|
+
IRIns *irl = IR(lref);
|
2086
|
+
/* Check wether we can use test ins. Not for unsigned, since CF=0. */
|
2087
|
+
int usetest = (imm == 0 && (cc & 0xa) != 0x2);
|
2088
|
+
if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) {
|
2089
|
+
/* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */
|
2090
|
+
Reg right, left = RID_NONE;
|
2091
|
+
RegSet allow = RSET_GPR;
|
2092
|
+
if (!asm_isk32(as, irl->op2, &imm)) {
|
2093
|
+
left = ra_alloc1(as, irl->op2, allow);
|
2094
|
+
rset_clear(allow, left);
|
2095
|
+
} else { /* Try to Fuse IRT_I8/IRT_U8 loads, too. See below. */
|
2096
|
+
IRIns *irll = IR(irl->op1);
|
2097
|
+
if (opisfusableload((IROp)irll->o) &&
|
2098
|
+
(irt_isi8(irll->t) || irt_isu8(irll->t))) {
|
2099
|
+
IRType1 origt = irll->t; /* Temporarily flip types. */
|
2100
|
+
irll->t.irt = (irll->t.irt & ~IRT_TYPE) | IRT_INT;
|
2101
|
+
as->curins--; /* Skip to BAND to avoid failing in noconflict(). */
|
2102
|
+
right = asm_fuseload(as, irl->op1, RSET_GPR);
|
2103
|
+
as->curins++;
|
2104
|
+
irll->t = origt;
|
2105
|
+
if (right != RID_MRM) goto test_nofuse;
|
2106
|
+
/* Fusion succeeded, emit test byte mrm, imm8. */
|
2107
|
+
asm_guardcc(as, cc);
|
2108
|
+
emit_i8(as, (imm & 0xff));
|
2109
|
+
emit_mrm(as, XO_GROUP3b, XOg_TEST, RID_MRM);
|
2110
|
+
return;
|
2111
|
+
}
|
2112
|
+
}
|
2113
|
+
as->curins--; /* Skip to BAND to avoid failing in noconflict(). */
|
2114
|
+
right = asm_fuseloadm(as, irl->op1, allow, r64);
|
2115
|
+
as->curins++; /* Undo the above. */
|
2116
|
+
test_nofuse:
|
2117
|
+
asm_guardcc(as, cc);
|
2118
|
+
if (ra_noreg(left)) {
|
2119
|
+
emit_i32(as, imm);
|
2120
|
+
emit_mrm(as, XO_GROUP3, r64 + XOg_TEST, right);
|
2121
|
+
} else {
|
2122
|
+
emit_mrm(as, XO_TEST, r64 + left, right);
|
2123
|
+
}
|
2124
|
+
} else {
|
2125
|
+
Reg left;
|
2126
|
+
if (opisfusableload((IROp)irl->o) &&
|
2127
|
+
((irt_isu8(irl->t) && checku8(imm)) ||
|
2128
|
+
((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) ||
|
2129
|
+
(irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) {
|
2130
|
+
/* Only the IRT_INT case is fused by asm_fuseload.
|
2131
|
+
** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads
|
2132
|
+
** are handled here.
|
2133
|
+
** Note that cmp word [mem], imm16 should not be generated,
|
2134
|
+
** since it has a length-changing prefix. Compares of a word
|
2135
|
+
** against a sign-extended imm8 are ok, however.
|
2136
|
+
*/
|
2137
|
+
IRType1 origt = irl->t; /* Temporarily flip types. */
|
2138
|
+
irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
|
2139
|
+
left = asm_fuseload(as, lref, RSET_GPR);
|
2140
|
+
irl->t = origt;
|
2141
|
+
if (left == RID_MRM) { /* Fusion succeeded? */
|
2142
|
+
if (irt_isu8(irl->t) || irt_isu16(irl->t))
|
2143
|
+
cc >>= 4; /* Need unsigned compare. */
|
2144
|
+
asm_guardcc(as, cc);
|
2145
|
+
emit_i8(as, imm);
|
2146
|
+
emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
|
2147
|
+
XO_ARITHib : XO_ARITHiw8, r64 + XOg_CMP, RID_MRM);
|
2148
|
+
return;
|
2149
|
+
} /* Otherwise handle register case as usual. */
|
2150
|
+
} else {
|
2151
|
+
left = asm_fuseloadm(as, lref,
|
2152
|
+
irt_isu8(ir->t) ? RSET_GPR8 : RSET_GPR, r64);
|
2153
|
+
}
|
2154
|
+
asm_guardcc(as, cc);
|
2155
|
+
if (usetest && left != RID_MRM) {
|
2156
|
+
/* Use test r,r instead of cmp r,0. */
|
2157
|
+
x86Op xo = XO_TEST;
|
2158
|
+
if (irt_isu8(ir->t)) {
|
2159
|
+
lua_assert(ir->o == IR_EQ || ir->o == IR_NE);
|
2160
|
+
xo = XO_TESTb;
|
2161
|
+
if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) {
|
2162
|
+
if (LJ_64) {
|
2163
|
+
left |= FORCE_REX;
|
2164
|
+
} else {
|
2165
|
+
emit_i32(as, 0xff);
|
2166
|
+
emit_mrm(as, XO_GROUP3, XOg_TEST, left);
|
2167
|
+
return;
|
2168
|
+
}
|
2169
|
+
}
|
2170
|
+
}
|
2171
|
+
emit_rr(as, xo, r64 + left, left);
|
2172
|
+
if (irl+1 == ir) /* Referencing previous ins? */
|
2173
|
+
as->flagmcp = as->mcp; /* Set flag to drop test r,r if possible. */
|
2174
|
+
} else {
|
2175
|
+
emit_gmrmi(as, XG_ARITHi(XOg_CMP), r64 + left, imm);
|
2176
|
+
}
|
2177
|
+
}
|
2178
|
+
} else {
|
2179
|
+
Reg left = ra_alloc1(as, lref, RSET_GPR);
|
2180
|
+
Reg right = asm_fuseloadm(as, rref, rset_exclude(RSET_GPR, left), r64);
|
2181
|
+
asm_guardcc(as, cc);
|
2182
|
+
emit_mrm(as, XO_CMP, r64 + left, right);
|
2183
|
+
}
|
2184
|
+
}
|
2185
|
+
}
|
2186
|
+
|
2187
|
+
#define asm_equal(as, ir) asm_comp(as, ir)
|
2188
|
+
|
2189
|
+
#if LJ_32 && LJ_HASFFI
|
2190
|
+
/* 64 bit integer comparisons in 32 bit mode. */
|
2191
|
+
static void asm_comp_int64(ASMState *as, IRIns *ir)
|
2192
|
+
{
|
2193
|
+
uint32_t cc = asm_compmap[(ir-1)->o];
|
2194
|
+
RegSet allow = RSET_GPR;
|
2195
|
+
Reg lefthi = RID_NONE, leftlo = RID_NONE;
|
2196
|
+
Reg righthi = RID_NONE, rightlo = RID_NONE;
|
2197
|
+
MCLabel l_around;
|
2198
|
+
x86ModRM mrm;
|
2199
|
+
|
2200
|
+
as->curins--; /* Skip loword ins. Avoids failing in noconflict(), too. */
|
2201
|
+
|
2202
|
+
/* Allocate/fuse hiword operands. */
|
2203
|
+
if (irref_isk(ir->op2)) {
|
2204
|
+
lefthi = asm_fuseload(as, ir->op1, allow);
|
2205
|
+
} else {
|
2206
|
+
lefthi = ra_alloc1(as, ir->op1, allow);
|
2207
|
+
rset_clear(allow, lefthi);
|
2208
|
+
righthi = asm_fuseload(as, ir->op2, allow);
|
2209
|
+
if (righthi == RID_MRM) {
|
2210
|
+
if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base);
|
2211
|
+
if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx);
|
2212
|
+
} else {
|
2213
|
+
rset_clear(allow, righthi);
|
2214
|
+
}
|
2215
|
+
}
|
2216
|
+
mrm = as->mrm; /* Save state for hiword instruction. */
|
2217
|
+
|
2218
|
+
/* Allocate/fuse loword operands. */
|
2219
|
+
if (irref_isk((ir-1)->op2)) {
|
2220
|
+
leftlo = asm_fuseload(as, (ir-1)->op1, allow);
|
2221
|
+
} else {
|
2222
|
+
leftlo = ra_alloc1(as, (ir-1)->op1, allow);
|
2223
|
+
rset_clear(allow, leftlo);
|
2224
|
+
rightlo = asm_fuseload(as, (ir-1)->op2, allow);
|
2225
|
+
}
|
2226
|
+
|
2227
|
+
/* All register allocations must be performed _before_ this point. */
|
2228
|
+
l_around = emit_label(as);
|
2229
|
+
as->invmcp = as->flagmcp = NULL; /* Cannot use these optimizations. */
|
2230
|
+
|
2231
|
+
/* Loword comparison and branch. */
|
2232
|
+
asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */
|
2233
|
+
if (ra_noreg(rightlo)) {
|
2234
|
+
int32_t imm = IR((ir-1)->op2)->i;
|
2235
|
+
if (imm == 0 && ((cc >> 4) & 0xa) != 0x2 && leftlo != RID_MRM)
|
2236
|
+
emit_rr(as, XO_TEST, leftlo, leftlo);
|
2237
|
+
else
|
2238
|
+
emit_gmrmi(as, XG_ARITHi(XOg_CMP), leftlo, imm);
|
2239
|
+
} else {
|
2240
|
+
emit_mrm(as, XO_CMP, leftlo, rightlo);
|
2241
|
+
}
|
2242
|
+
|
2243
|
+
/* Hiword comparison and branches. */
|
2244
|
+
if ((cc & 15) != CC_NE)
|
2245
|
+
emit_sjcc(as, CC_NE, l_around); /* Hiword unequal: skip loword compare. */
|
2246
|
+
if ((cc & 15) != CC_E)
|
2247
|
+
asm_guardcc(as, cc >> 8); /* Hiword compare without equality check. */
|
2248
|
+
as->mrm = mrm; /* Restore state. */
|
2249
|
+
if (ra_noreg(righthi)) {
|
2250
|
+
int32_t imm = IR(ir->op2)->i;
|
2251
|
+
if (imm == 0 && (cc & 0xa) != 0x2 && lefthi != RID_MRM)
|
2252
|
+
emit_rr(as, XO_TEST, lefthi, lefthi);
|
2253
|
+
else
|
2254
|
+
emit_gmrmi(as, XG_ARITHi(XOg_CMP), lefthi, imm);
|
2255
|
+
} else {
|
2256
|
+
emit_mrm(as, XO_CMP, lefthi, righthi);
|
2257
|
+
}
|
2258
|
+
}
|
2259
|
+
#endif
|
2260
|
+
|
2261
|
+
/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
|
2262
|
+
|
2263
|
+
/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
|
2264
|
+
static void asm_hiop(ASMState *as, IRIns *ir)
|
2265
|
+
{
|
2266
|
+
#if LJ_32 && LJ_HASFFI
|
2267
|
+
/* HIOP is marked as a store because it needs its own DCE logic. */
|
2268
|
+
int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
|
2269
|
+
if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
|
2270
|
+
if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
|
2271
|
+
as->curins--; /* Always skip the CONV. */
|
2272
|
+
if (usehi || uselo)
|
2273
|
+
asm_conv64(as, ir);
|
2274
|
+
return;
|
2275
|
+
} else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
|
2276
|
+
asm_comp_int64(as, ir);
|
2277
|
+
return;
|
2278
|
+
} else if ((ir-1)->o == IR_XSTORE) {
|
2279
|
+
if ((ir-1)->r != RID_SINK)
|
2280
|
+
asm_fxstore(as, ir);
|
2281
|
+
return;
|
2282
|
+
}
|
2283
|
+
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
|
2284
|
+
switch ((ir-1)->o) {
|
2285
|
+
case IR_ADD:
|
2286
|
+
as->flagmcp = NULL;
|
2287
|
+
as->curins--;
|
2288
|
+
asm_intarith(as, ir, XOg_ADC);
|
2289
|
+
asm_intarith(as, ir-1, XOg_ADD);
|
2290
|
+
break;
|
2291
|
+
case IR_SUB:
|
2292
|
+
as->flagmcp = NULL;
|
2293
|
+
as->curins--;
|
2294
|
+
asm_intarith(as, ir, XOg_SBB);
|
2295
|
+
asm_intarith(as, ir-1, XOg_SUB);
|
2296
|
+
break;
|
2297
|
+
case IR_NEG: {
|
2298
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
2299
|
+
emit_rr(as, XO_GROUP3, XOg_NEG, dest);
|
2300
|
+
emit_i8(as, 0);
|
2301
|
+
emit_rr(as, XO_ARITHi8, XOg_ADC, dest);
|
2302
|
+
ra_left(as, dest, ir->op1);
|
2303
|
+
as->curins--;
|
2304
|
+
asm_neg_not(as, ir-1, XOg_NEG);
|
2305
|
+
break;
|
2306
|
+
}
|
2307
|
+
case IR_CALLN:
|
2308
|
+
case IR_CALLXS:
|
2309
|
+
if (!uselo)
|
2310
|
+
ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
|
2311
|
+
break;
|
2312
|
+
case IR_CNEWI:
|
2313
|
+
/* Nothing to do here. Handled by CNEWI itself. */
|
2314
|
+
break;
|
2315
|
+
default: lua_assert(0); break;
|
2316
|
+
}
|
2317
|
+
#else
|
2318
|
+
UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */
|
2319
|
+
#endif
|
2320
|
+
}
|
2321
|
+
|
2322
|
+
/* -- Profiling ----------------------------------------------------------- */
|
2323
|
+
|
2324
|
+
static void asm_prof(ASMState *as, IRIns *ir)
|
2325
|
+
{
|
2326
|
+
UNUSED(ir);
|
2327
|
+
asm_guardcc(as, CC_NE);
|
2328
|
+
emit_i8(as, HOOK_PROFILE);
|
2329
|
+
emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
|
2330
|
+
}
|
2331
|
+
|
2332
|
+
/* -- Stack handling ------------------------------------------------------ */
|
2333
|
+
|
2334
|
+
/* Check Lua stack size for overflow. Use exit handler as fallback. */
|
2335
|
+
static void asm_stack_check(ASMState *as, BCReg topslot,
|
2336
|
+
IRIns *irp, RegSet allow, ExitNo exitno)
|
2337
|
+
{
|
2338
|
+
/* Try to get an unused temp. register, otherwise spill/restore eax. */
|
2339
|
+
Reg pbase = irp ? irp->r : RID_BASE;
|
2340
|
+
Reg r = allow ? rset_pickbot(allow) : RID_EAX;
|
2341
|
+
emit_jcc(as, CC_B, exitstub_addr(as->J, exitno));
|
2342
|
+
if (allow == RSET_EMPTY) /* Restore temp. register. */
|
2343
|
+
emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
|
2344
|
+
else
|
2345
|
+
ra_modified(as, r);
|
2346
|
+
emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
|
2347
|
+
if (ra_hasreg(pbase) && pbase != r)
|
2348
|
+
emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
|
2349
|
+
else
|
2350
|
+
emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
|
2351
|
+
ptr2addr(&J2G(as->J)->jit_base));
|
2352
|
+
emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
|
2353
|
+
emit_getgl(as, r, cur_L);
|
2354
|
+
if (allow == RSET_EMPTY) /* Spill temp. register. */
|
2355
|
+
emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
|
2356
|
+
}
|
2357
|
+
|
2358
|
+
/* Restore Lua stack from on-trace state. */
|
2359
|
+
static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
2360
|
+
{
|
2361
|
+
SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
2362
|
+
SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
|
2363
|
+
MSize n, nent = snap->nent;
|
2364
|
+
/* Store the value of all modified slots to the Lua stack. */
|
2365
|
+
for (n = 0; n < nent; n++) {
|
2366
|
+
SnapEntry sn = map[n];
|
2367
|
+
BCReg s = snap_slot(sn);
|
2368
|
+
int32_t ofs = 8*((int32_t)s-1);
|
2369
|
+
IRRef ref = snap_ref(sn);
|
2370
|
+
IRIns *ir = IR(ref);
|
2371
|
+
if ((sn & SNAP_NORESTORE))
|
2372
|
+
continue;
|
2373
|
+
if (irt_isnum(ir->t)) {
|
2374
|
+
Reg src = ra_alloc1(as, ref, RSET_FPR);
|
2375
|
+
emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
|
2376
|
+
} else {
|
2377
|
+
lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
|
2378
|
+
(LJ_DUALNUM && irt_isinteger(ir->t)));
|
2379
|
+
if (!irref_isk(ref)) {
|
2380
|
+
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
|
2381
|
+
emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
|
2382
|
+
} else if (!irt_ispri(ir->t)) {
|
2383
|
+
emit_movmroi(as, RID_BASE, ofs, ir->i);
|
2384
|
+
}
|
2385
|
+
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
|
2386
|
+
if (s != 0) /* Do not overwrite link to previous frame. */
|
2387
|
+
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
|
2388
|
+
} else {
|
2389
|
+
if (!(LJ_64 && irt_islightud(ir->t)))
|
2390
|
+
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
|
2391
|
+
}
|
2392
|
+
}
|
2393
|
+
checkmclim(as);
|
2394
|
+
}
|
2395
|
+
lua_assert(map + nent == flinks);
|
2396
|
+
}
|
2397
|
+
|
2398
|
+
/* -- GC handling --------------------------------------------------------- */
|
2399
|
+
|
2400
|
+
/* Check GC threshold and do one or more GC steps. */
|
2401
|
+
static void asm_gc_check(ASMState *as)
|
2402
|
+
{
|
2403
|
+
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
|
2404
|
+
IRRef args[2];
|
2405
|
+
MCLabel l_end;
|
2406
|
+
Reg tmp;
|
2407
|
+
ra_evictset(as, RSET_SCRATCH);
|
2408
|
+
l_end = emit_label(as);
|
2409
|
+
/* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
|
2410
|
+
asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
|
2411
|
+
emit_rr(as, XO_TEST, RID_RET, RID_RET);
|
2412
|
+
args[0] = ASMREF_TMP1; /* global_State *g */
|
2413
|
+
args[1] = ASMREF_TMP2; /* MSize steps */
|
2414
|
+
asm_gencall(as, ci, args);
|
2415
|
+
tmp = ra_releasetmp(as, ASMREF_TMP1);
|
2416
|
+
emit_loada(as, tmp, J2G(as->J));
|
2417
|
+
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
|
2418
|
+
/* Jump around GC step if GC total < GC threshold. */
|
2419
|
+
emit_sjcc(as, CC_B, l_end);
|
2420
|
+
emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
|
2421
|
+
emit_getgl(as, tmp, gc.total);
|
2422
|
+
as->gcsteps = 0;
|
2423
|
+
checkmclim(as);
|
2424
|
+
}
|
2425
|
+
|
2426
|
+
/* -- Loop handling ------------------------------------------------------- */
|
2427
|
+
|
2428
|
+
/* Fixup the loop branch. */
|
2429
|
+
static void asm_loop_fixup(ASMState *as)
|
2430
|
+
{
|
2431
|
+
MCode *p = as->mctop;
|
2432
|
+
MCode *target = as->mcp;
|
2433
|
+
if (as->realign) { /* Realigned loops use short jumps. */
|
2434
|
+
as->realign = NULL; /* Stop another retry. */
|
2435
|
+
lua_assert(((intptr_t)target & 15) == 0);
|
2436
|
+
if (as->loopinv) { /* Inverted loop branch? */
|
2437
|
+
p -= 5;
|
2438
|
+
p[0] = XI_JMP;
|
2439
|
+
lua_assert(target - p >= -128);
|
2440
|
+
p[-1] = (MCode)(target - p); /* Patch sjcc. */
|
2441
|
+
if (as->loopinv == 2)
|
2442
|
+
p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */
|
2443
|
+
} else {
|
2444
|
+
lua_assert(target - p >= -128);
|
2445
|
+
p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */
|
2446
|
+
p[-2] = XI_JMPs;
|
2447
|
+
}
|
2448
|
+
} else {
|
2449
|
+
MCode *newloop;
|
2450
|
+
p[-5] = XI_JMP;
|
2451
|
+
if (as->loopinv) { /* Inverted loop branch? */
|
2452
|
+
/* asm_guardcc already inverted the jcc and patched the jmp. */
|
2453
|
+
p -= 5;
|
2454
|
+
newloop = target+4;
|
2455
|
+
*(int32_t *)(p-4) = (int32_t)(target - p); /* Patch jcc. */
|
2456
|
+
if (as->loopinv == 2) {
|
2457
|
+
*(int32_t *)(p-10) = (int32_t)(target - p + 6); /* Patch opt. jp. */
|
2458
|
+
newloop = target+8;
|
2459
|
+
}
|
2460
|
+
} else { /* Otherwise just patch jmp. */
|
2461
|
+
*(int32_t *)(p-4) = (int32_t)(target - p);
|
2462
|
+
newloop = target+3;
|
2463
|
+
}
|
2464
|
+
/* Realign small loops and shorten the loop branch. */
|
2465
|
+
if (newloop >= p - 128) {
|
2466
|
+
as->realign = newloop; /* Force a retry and remember alignment. */
|
2467
|
+
as->curins = as->stopins; /* Abort asm_trace now. */
|
2468
|
+
as->T->nins = as->orignins; /* Remove any added renames. */
|
2469
|
+
}
|
2470
|
+
}
|
2471
|
+
}
|
2472
|
+
|
2473
|
+
/* -- Head of trace ------------------------------------------------------- */
|
2474
|
+
|
2475
|
+
/* Coalesce BASE register for a root trace. */
|
2476
|
+
static void asm_head_root_base(ASMState *as)
|
2477
|
+
{
|
2478
|
+
IRIns *ir = IR(REF_BASE);
|
2479
|
+
Reg r = ir->r;
|
2480
|
+
if (ra_hasreg(r)) {
|
2481
|
+
ra_free(as, r);
|
2482
|
+
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
|
2483
|
+
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
|
2484
|
+
if (r != RID_BASE)
|
2485
|
+
emit_rr(as, XO_MOV, r, RID_BASE);
|
2486
|
+
}
|
2487
|
+
}
|
2488
|
+
|
2489
|
+
/* Coalesce or reload BASE register for a side trace. */
|
2490
|
+
static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
|
2491
|
+
{
|
2492
|
+
IRIns *ir = IR(REF_BASE);
|
2493
|
+
Reg r = ir->r;
|
2494
|
+
if (ra_hasreg(r)) {
|
2495
|
+
ra_free(as, r);
|
2496
|
+
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
|
2497
|
+
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
|
2498
|
+
if (irp->r == r) {
|
2499
|
+
rset_clear(allow, r); /* Mark same BASE register as coalesced. */
|
2500
|
+
} else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
|
2501
|
+
rset_clear(allow, irp->r);
|
2502
|
+
emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */
|
2503
|
+
} else {
|
2504
|
+
emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
|
2505
|
+
}
|
2506
|
+
}
|
2507
|
+
return allow;
|
2508
|
+
}
|
2509
|
+
|
2510
|
+
/* -- Tail of trace ------------------------------------------------------- */
|
2511
|
+
|
2512
|
+
/* Fixup the tail code. */
|
2513
|
+
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
|
2514
|
+
{
|
2515
|
+
/* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
|
2516
|
+
MCode *p = as->mctop;
|
2517
|
+
MCode *target, *q;
|
2518
|
+
int32_t spadj = as->T->spadjust;
|
2519
|
+
if (spadj == 0) {
|
2520
|
+
p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
|
2521
|
+
} else {
|
2522
|
+
MCode *p1;
|
2523
|
+
/* Patch stack adjustment. */
|
2524
|
+
if (checki8(spadj)) {
|
2525
|
+
p -= 3;
|
2526
|
+
p1 = p-6;
|
2527
|
+
*p1 = (MCode)spadj;
|
2528
|
+
} else {
|
2529
|
+
p1 = p-9;
|
2530
|
+
*(int32_t *)p1 = spadj;
|
2531
|
+
}
|
2532
|
+
if ((as->flags & JIT_F_LEA_AGU)) {
|
2533
|
+
#if LJ_64
|
2534
|
+
p1[-4] = 0x48;
|
2535
|
+
#endif
|
2536
|
+
p1[-3] = (MCode)XI_LEA;
|
2537
|
+
p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
|
2538
|
+
p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
|
2539
|
+
} else {
|
2540
|
+
#if LJ_64
|
2541
|
+
p1[-3] = 0x48;
|
2542
|
+
#endif
|
2543
|
+
p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
|
2544
|
+
p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
|
2545
|
+
}
|
2546
|
+
}
|
2547
|
+
/* Patch exit branch. */
|
2548
|
+
target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
|
2549
|
+
*(int32_t *)(p-4) = jmprel(p, target);
|
2550
|
+
p[-5] = XI_JMP;
|
2551
|
+
/* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
|
2552
|
+
for (q = as->mctop-1; q >= p; q--)
|
2553
|
+
*q = XI_NOP;
|
2554
|
+
as->mctop = p;
|
2555
|
+
}
|
2556
|
+
|
2557
|
+
/* Prepare tail of code. */
|
2558
|
+
static void asm_tail_prep(ASMState *as)
|
2559
|
+
{
|
2560
|
+
MCode *p = as->mctop;
|
2561
|
+
/* Realign and leave room for backwards loop branch or exit branch. */
|
2562
|
+
if (as->realign) {
|
2563
|
+
int i = ((int)(intptr_t)as->realign) & 15;
|
2564
|
+
/* Fill unused mcode tail with NOPs to make the prefetcher happy. */
|
2565
|
+
while (i-- > 0)
|
2566
|
+
*--p = XI_NOP;
|
2567
|
+
as->mctop = p;
|
2568
|
+
p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */
|
2569
|
+
} else {
|
2570
|
+
p -= 5; /* Space for exit branch (near jmp). */
|
2571
|
+
}
|
2572
|
+
if (as->loopref) {
|
2573
|
+
as->invmcp = as->mcp = p;
|
2574
|
+
} else {
|
2575
|
+
/* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
|
2576
|
+
as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0));
|
2577
|
+
as->invmcp = NULL;
|
2578
|
+
}
|
2579
|
+
}
|
2580
|
+
|
2581
|
+
/* -- Trace setup --------------------------------------------------------- */
|
2582
|
+
|
2583
|
+
/* Ensure there are enough stack slots for call arguments. */
|
2584
|
+
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
2585
|
+
{
|
2586
|
+
IRRef args[CCI_NARGS_MAX*2];
|
2587
|
+
int nslots;
|
2588
|
+
asm_collectargs(as, ir, ci, args);
|
2589
|
+
nslots = asm_count_call_slots(as, ci, args);
|
2590
|
+
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
|
2591
|
+
as->evenspill = nslots;
|
2592
|
+
#if LJ_64
|
2593
|
+
return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
|
2594
|
+
#else
|
2595
|
+
return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET);
|
2596
|
+
#endif
|
2597
|
+
}
|
2598
|
+
|
2599
|
+
/* Target-specific setup. */
|
2600
|
+
static void asm_setup_target(ASMState *as)
|
2601
|
+
{
|
2602
|
+
asm_exitstub_setup(as, as->T->nsnap);
|
2603
|
+
}
|
2604
|
+
|
2605
|
+
/* -- Trace patching ------------------------------------------------------ */
|
2606
|
+
|
2607
|
+
/* Patch exit jumps of existing machine code to a new target. */
|
2608
|
+
void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
|
2609
|
+
{
|
2610
|
+
MCode *p = T->mcode;
|
2611
|
+
MCode *mcarea = lj_mcode_patch(J, p, 0);
|
2612
|
+
MSize len = T->szmcode;
|
2613
|
+
MCode *px = exitstub_addr(J, exitno) - 6;
|
2614
|
+
MCode *pe = p+len-6;
|
2615
|
+
uint32_t stateaddr = u32ptr(&J2G(J)->vmstate);
|
2616
|
+
if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
|
2617
|
+
*(int32_t *)(p+len-4) = jmprel(p+len, target);
|
2618
|
+
/* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
|
2619
|
+
for (; p < pe; p++)
|
2620
|
+
if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) {
|
2621
|
+
p += LJ_64 ? 11 : 10;
|
2622
|
+
break;
|
2623
|
+
}
|
2624
|
+
lua_assert(p < pe);
|
2625
|
+
for (; p < pe; p++) {
|
2626
|
+
if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) {
|
2627
|
+
*(int32_t *)(p+2) = jmprel(p+6, target);
|
2628
|
+
p += 5;
|
2629
|
+
}
|
2630
|
+
}
|
2631
|
+
lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
|
2632
|
+
lj_mcode_patch(J, mcarea, 1);
|
2633
|
+
}
|
2634
|
+
|