immunio 1.2.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -5
- data/ext/immunio/Rakefile +14 -6
- data/lib/immunio/context.rb +2 -0
- data/lib/immunio/plugins/action_view.rb +7 -668
- data/lib/immunio/plugins/action_view/action_view.rb +22 -0
- data/lib/immunio/plugins/action_view/active_support_hash.rb +29 -0
- data/lib/immunio/plugins/action_view/cache_store.rb +24 -0
- data/lib/immunio/plugins/action_view/erubi.rb +38 -0
- data/lib/immunio/plugins/action_view/erubis.rb +39 -0
- data/lib/immunio/plugins/action_view/fragment_caching.rb +29 -0
- data/lib/immunio/plugins/action_view/haml.rb +46 -0
- data/lib/immunio/plugins/action_view/slim.rb +42 -0
- data/lib/immunio/plugins/action_view/template.rb +431 -0
- data/lib/immunio/plugins/action_view/template_rendering.rb +45 -0
- data/lib/immunio/plugins/http_tracker.rb +2 -0
- data/lib/immunio/plugins/io.rb +34 -0
- data/lib/immunio/version.rb +1 -1
- data/lua-hooks/Makefile +36 -9
- data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
- data/lua-hooks/ext/luajit/Makefile +22 -15
- data/lua-hooks/ext/luajit/README +2 -2
- data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
- data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
- data/lua-hooks/ext/luajit/doc/changes.html +69 -3
- data/lua-hooks/ext/luajit/doc/contact.html +10 -3
- data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +3 -4
- data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_jit.html +3 -3
- data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
- data/lua-hooks/ext/luajit/doc/extensions.html +47 -20
- data/lua-hooks/ext/luajit/doc/faq.html +2 -2
- data/lua-hooks/ext/luajit/doc/install.html +74 -45
- data/lua-hooks/ext/luajit/doc/luajit.html +5 -5
- data/lua-hooks/ext/luajit/doc/running.html +3 -3
- data/lua-hooks/ext/luajit/doc/status.html +13 -8
- data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +8 -5
- data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +66 -11
- data/lua-hooks/ext/luajit/dynasm/dasm_mips64.lua +12 -0
- data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +5 -1
- data/lua-hooks/ext/luajit/dynasm/dynasm.lua +2 -2
- data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
- data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
- data/lua-hooks/ext/luajit/src/Makefile +15 -11
- data/lua-hooks/ext/luajit/src/Makefile.dep +16 -16
- data/lua-hooks/ext/luajit/src/host/buildvm.c +2 -2
- data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +9 -4
- data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +2 -2
- data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +14 -3
- data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +27 -3
- data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
- data/lua-hooks/ext/luajit/src/host/genminilua.lua +6 -5
- data/lua-hooks/ext/luajit/src/host/minilua.c +1 -1
- data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/bcsave.lua +8 -8
- data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +2 -2
- data/lua-hooks/ext/luajit/src/jit/dis_arm64.lua +1216 -0
- data/lua-hooks/ext/luajit/src/jit/dis_arm64be.lua +12 -0
- data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +35 -20
- data/lua-hooks/ext/luajit/src/jit/dis_mips64.lua +17 -0
- data/lua-hooks/ext/luajit/src/jit/dis_mips64el.lua +17 -0
- data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +2 -2
- data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +7 -4
- data/lua-hooks/ext/luajit/src/jit/dump.lua +17 -12
- data/lua-hooks/ext/luajit/src/jit/p.lua +3 -2
- data/lua-hooks/ext/luajit/src/jit/v.lua +2 -2
- data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
- data/lua-hooks/ext/luajit/src/lauxlib.h +14 -20
- data/lua-hooks/ext/luajit/src/lib_aux.c +38 -27
- data/lua-hooks/ext/luajit/src/lib_base.c +12 -5
- data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_debug.c +5 -5
- data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -2
- data/lua-hooks/ext/luajit/src/lib_init.c +16 -16
- data/lua-hooks/ext/luajit/src/lib_io.c +6 -7
- data/lua-hooks/ext/luajit/src/lib_jit.c +14 -4
- data/lua-hooks/ext/luajit/src/lib_math.c +1 -5
- data/lua-hooks/ext/luajit/src/lib_os.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_package.c +14 -23
- data/lua-hooks/ext/luajit/src/lib_string.c +1 -5
- data/lua-hooks/ext/luajit/src/lib_table.c +21 -1
- data/lua-hooks/ext/luajit/src/lj.supp +3 -3
- data/lua-hooks/ext/luajit/src/lj_alloc.c +174 -83
- data/lua-hooks/ext/luajit/src/lj_api.c +97 -18
- data/lua-hooks/ext/luajit/src/lj_arch.h +54 -22
- data/lua-hooks/ext/luajit/src/lj_asm.c +172 -53
- data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_asm_arm.h +19 -16
- data/lua-hooks/ext/luajit/src/lj_asm_arm64.h +2022 -0
- data/lua-hooks/ext/luajit/src/lj_asm_mips.h +564 -158
- data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +19 -18
- data/lua-hooks/ext/luajit/src/lj_asm_x86.h +578 -92
- data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_buf.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_buf.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ccall.c +172 -7
- data/lua-hooks/ext/luajit/src/lj_ccall.h +21 -5
- data/lua-hooks/ext/luajit/src/lj_ccallback.c +71 -17
- data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_cconv.c +4 -2
- data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_cdata.c +7 -5
- data/lua-hooks/ext/luajit/src/lj_cdata.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_clib.c +5 -5
- data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_cparse.c +11 -6
- data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_crecord.c +70 -14
- data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ctype.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_ctype.h +8 -8
- data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_def.h +6 -9
- data/lua-hooks/ext/luajit/src/lj_dispatch.c +3 -3
- data/lua-hooks/ext/luajit/src/lj_dispatch.h +2 -1
- data/lua-hooks/ext/luajit/src/lj_emit_arm.h +5 -4
- data/lua-hooks/ext/luajit/src/lj_emit_arm64.h +419 -0
- data/lua-hooks/ext/luajit/src/lj_emit_mips.h +100 -20
- data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +4 -4
- data/lua-hooks/ext/luajit/src/lj_emit_x86.h +116 -25
- data/lua-hooks/ext/luajit/src/lj_err.c +34 -13
- data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ffrecord.c +58 -49
- data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_frame.h +33 -6
- data/lua-hooks/ext/luajit/src/lj_func.c +4 -2
- data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_gc.c +16 -7
- data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_gdbjit.c +31 -1
- data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ir.c +69 -96
- data/lua-hooks/ext/luajit/src/lj_ir.h +29 -18
- data/lua-hooks/ext/luajit/src/lj_ircall.h +24 -30
- data/lua-hooks/ext/luajit/src/lj_iropt.h +9 -9
- data/lua-hooks/ext/luajit/src/lj_jit.h +67 -9
- data/lua-hooks/ext/luajit/src/lj_lex.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_mcode.c +11 -10
- data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_meta.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_obj.h +7 -3
- data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_fold.c +84 -17
- data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_mem.c +3 -3
- data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +24 -22
- data/lua-hooks/ext/luajit/src/lj_opt_sink.c +11 -6
- data/lua-hooks/ext/luajit/src/lj_opt_split.c +11 -2
- data/lua-hooks/ext/luajit/src/lj_parse.c +9 -7
- data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_record.c +201 -117
- data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_snap.c +72 -26
- data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_state.c +6 -6
- data/lua-hooks/ext/luajit/src/lj_state.h +2 -2
- data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_strfmt.c +7 -3
- data/lua-hooks/ext/luajit/src/lj_strfmt.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +4 -3
- data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_tab.c +1 -2
- data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target.h +3 -3
- data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_arm64.h +239 -7
- data/lua-hooks/ext/luajit/src/lj_target_mips.h +111 -22
- data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_x86.h +21 -4
- data/lua-hooks/ext/luajit/src/lj_trace.c +63 -18
- data/lua-hooks/ext/luajit/src/lj_trace.h +2 -1
- data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_vm.h +5 -1
- data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_vmmath.c +1 -1
- data/lua-hooks/ext/luajit/src/ljamalg.c +1 -1
- data/lua-hooks/ext/luajit/src/lua.h +9 -1
- data/lua-hooks/ext/luajit/src/luaconf.h +3 -7
- data/lua-hooks/ext/luajit/src/luajit.c +69 -54
- data/lua-hooks/ext/luajit/src/luajit.h +4 -4
- data/lua-hooks/ext/luajit/src/lualib.h +1 -1
- data/lua-hooks/ext/luajit/src/msvcbuild.bat +12 -4
- data/lua-hooks/ext/luajit/src/vm_arm.dasc +1 -1
- data/lua-hooks/ext/luajit/src/vm_arm64.dasc +255 -32
- data/lua-hooks/ext/luajit/src/vm_mips.dasc +26 -23
- data/lua-hooks/ext/luajit/src/vm_mips64.dasc +5062 -0
- data/lua-hooks/ext/luajit/src/vm_ppc.dasc +1 -1
- data/lua-hooks/ext/luajit/src/vm_x64.dasc +24 -25
- data/lua-hooks/ext/luajit/src/vm_x86.dasc +77 -4
- data/lua-hooks/libluahooks.darwin.a +0 -0
- data/lua-hooks/libluahooks.linux.a +0 -0
- data/lua-hooks/options.mk +1 -1
- metadata +37 -77
- data/lua-hooks/ext/all.c +0 -69
- data/lua-hooks/ext/libinjection/COPYING +0 -37
- data/lua-hooks/ext/libinjection/libinjection.h +0 -65
- data/lua-hooks/ext/libinjection/libinjection_html5.c +0 -847
- data/lua-hooks/ext/libinjection/libinjection_html5.h +0 -54
- data/lua-hooks/ext/libinjection/libinjection_sqli.c +0 -2301
- data/lua-hooks/ext/libinjection/libinjection_sqli.h +0 -295
- data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +0 -9349
- data/lua-hooks/ext/libinjection/libinjection_xss.c +0 -531
- data/lua-hooks/ext/libinjection/libinjection_xss.h +0 -21
- data/lua-hooks/ext/libinjection/lualib.c +0 -145
- data/lua-hooks/ext/libinjection/module.mk +0 -5
- data/lua-hooks/ext/lpeg/HISTORY +0 -96
- data/lua-hooks/ext/lpeg/lpcap.c +0 -537
- data/lua-hooks/ext/lpeg/lpcap.h +0 -56
- data/lua-hooks/ext/lpeg/lpcode.c +0 -1014
- data/lua-hooks/ext/lpeg/lpcode.h +0 -40
- data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
- data/lua-hooks/ext/lpeg/lpeg.html +0 -1445
- data/lua-hooks/ext/lpeg/lpprint.c +0 -244
- data/lua-hooks/ext/lpeg/lpprint.h +0 -36
- data/lua-hooks/ext/lpeg/lptree.c +0 -1303
- data/lua-hooks/ext/lpeg/lptree.h +0 -82
- data/lua-hooks/ext/lpeg/lptypes.h +0 -149
- data/lua-hooks/ext/lpeg/lpvm.c +0 -364
- data/lua-hooks/ext/lpeg/lpvm.h +0 -58
- data/lua-hooks/ext/lpeg/makefile +0 -55
- data/lua-hooks/ext/lpeg/module.mk +0 -6
- data/lua-hooks/ext/lpeg/re.html +0 -498
- data/lua-hooks/ext/lua-cmsgpack/.gitignore +0 -13
- data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +0 -45
- data/lua-hooks/ext/lua-cmsgpack/README.md +0 -115
- data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +0 -970
- data/lua-hooks/ext/lua-cmsgpack/module.mk +0 -2
- data/lua-hooks/ext/lua-cmsgpack/test.lua +0 -570
- data/lua-hooks/ext/lua-snapshot/LICENSE +0 -7
- data/lua-hooks/ext/lua-snapshot/Makefile +0 -12
- data/lua-hooks/ext/lua-snapshot/README.md +0 -18
- data/lua-hooks/ext/lua-snapshot/dump.lua +0 -15
- data/lua-hooks/ext/lua-snapshot/module.mk +0 -2
- data/lua-hooks/ext/lua-snapshot/snapshot.c +0 -462
- data/lua-hooks/ext/luautf8/README.md +0 -152
- data/lua-hooks/ext/luautf8/lutf8lib.c +0 -1274
- data/lua-hooks/ext/luautf8/module.mk +0 -2
- data/lua-hooks/ext/luautf8/unidata.h +0 -3064
- data/lua-hooks/ext/module.mk +0 -15
- data/lua-hooks/ext/modules.h +0 -17
- data/lua-hooks/ext/perf/luacpu.c +0 -114
- data/lua-hooks/ext/perf/lualoadavg.c +0 -40
- data/lua-hooks/ext/perf/luameminfo.c +0 -38
- data/lua-hooks/ext/perf/luaoslib.c +0 -203
- data/lua-hooks/ext/perf/module.mk +0 -5
- data/lua-hooks/ext/sha1/luasha1.c +0 -74
- data/lua-hooks/ext/sha1/module.mk +0 -5
- data/lua-hooks/ext/sha1/sha1.c +0 -145
- data/lua-hooks/ext/sha2/luasha256.c +0 -77
- data/lua-hooks/ext/sha2/module.mk +0 -5
- data/lua-hooks/ext/sha2/sha256.c +0 -196
- data/lua-hooks/ext/sysutils/lua_utils.c +0 -56
- data/lua-hooks/ext/sysutils/module.mk +0 -2
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** ARM IR assembler (SSA IR -> machine code).
|
|
3
|
-
** Copyright (C) 2005-
|
|
3
|
+
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
/* -- Register allocator extensions --------------------------------------- */
|
|
@@ -909,7 +909,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
|
|
909
909
|
|
|
910
910
|
static void asm_uref(ASMState *as, IRIns *ir)
|
|
911
911
|
{
|
|
912
|
-
/* NYI: Check that UREFO is still open and not aliasing a slot. */
|
|
913
912
|
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
914
913
|
if (irref_isk(ir->op1)) {
|
|
915
914
|
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
|
@@ -998,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir)
|
|
|
998
997
|
|
|
999
998
|
static void asm_fload(ASMState *as, IRIns *ir)
|
|
1000
999
|
{
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1000
|
+
if (ir->op1 == REF_NIL) {
|
|
1001
|
+
lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */
|
|
1002
|
+
} else {
|
|
1003
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
1004
|
+
Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
1005
|
+
ARMIns ai = asm_fxloadins(ir);
|
|
1006
|
+
int32_t ofs;
|
|
1007
|
+
if (ir->op2 == IRFL_TAB_ARRAY) {
|
|
1008
|
+
ofs = asm_fuseabase(as, ir->op1);
|
|
1009
|
+
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
|
|
1010
|
+
emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
|
|
1011
|
+
return;
|
|
1012
|
+
}
|
|
1010
1013
|
}
|
|
1014
|
+
ofs = field_ofs[ir->op2];
|
|
1015
|
+
if ((ai & 0x04000000))
|
|
1016
|
+
emit_lso(as, ai, dest, idx, ofs);
|
|
1017
|
+
else
|
|
1018
|
+
emit_lsox(as, ai, dest, idx, ofs);
|
|
1011
1019
|
}
|
|
1012
|
-
ofs = field_ofs[ir->op2];
|
|
1013
|
-
if ((ai & 0x04000000))
|
|
1014
|
-
emit_lso(as, ai, dest, idx, ofs);
|
|
1015
|
-
else
|
|
1016
|
-
emit_lsox(as, ai, dest, idx, ofs);
|
|
1017
1020
|
}
|
|
1018
1021
|
|
|
1019
1022
|
static void asm_fstore(ASMState *as, IRIns *ir)
|
|
@@ -0,0 +1,2022 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** ARM64 IR assembler (SSA IR -> machine code).
|
|
3
|
+
** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
|
|
4
|
+
**
|
|
5
|
+
** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
|
|
6
|
+
** Sponsored by Cisco Systems, Inc.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/* -- Register allocator extensions --------------------------------------- */
|
|
10
|
+
|
|
11
|
+
/* Allocate a register with a hint. */
|
|
12
|
+
static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
|
|
13
|
+
{
|
|
14
|
+
Reg r = IR(ref)->r;
|
|
15
|
+
if (ra_noreg(r)) {
|
|
16
|
+
if (!ra_hashint(r) && !iscrossref(as, ref))
|
|
17
|
+
ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
|
|
18
|
+
r = ra_allocref(as, ref, allow);
|
|
19
|
+
}
|
|
20
|
+
ra_noweak(as, r);
|
|
21
|
+
return r;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/* Allocate two source registers for three-operand instructions. */
|
|
25
|
+
static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
|
|
26
|
+
{
|
|
27
|
+
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
|
|
28
|
+
Reg left = irl->r, right = irr->r;
|
|
29
|
+
if (ra_hasreg(left)) {
|
|
30
|
+
ra_noweak(as, left);
|
|
31
|
+
if (ra_noreg(right))
|
|
32
|
+
right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
|
|
33
|
+
else
|
|
34
|
+
ra_noweak(as, right);
|
|
35
|
+
} else if (ra_hasreg(right)) {
|
|
36
|
+
ra_noweak(as, right);
|
|
37
|
+
left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
|
|
38
|
+
} else if (ra_hashint(right)) {
|
|
39
|
+
right = ra_allocref(as, ir->op2, allow);
|
|
40
|
+
left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
|
|
41
|
+
} else {
|
|
42
|
+
left = ra_allocref(as, ir->op1, allow);
|
|
43
|
+
right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
|
|
44
|
+
}
|
|
45
|
+
return left | (right << 8);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/* -- Guard handling ------------------------------------------------------ */
|
|
49
|
+
|
|
50
|
+
/* Setup all needed exit stubs. */
|
|
51
|
+
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
|
|
52
|
+
{
|
|
53
|
+
ExitNo i;
|
|
54
|
+
MCode *mxp = as->mctop;
|
|
55
|
+
if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
|
|
56
|
+
asm_mclimit(as);
|
|
57
|
+
/* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
|
|
58
|
+
for (i = nexits-1; (int32_t)i >= 0; i--)
|
|
59
|
+
*--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
|
|
60
|
+
*--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
|
|
61
|
+
mxp--;
|
|
62
|
+
*mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
|
|
63
|
+
*--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
|
|
64
|
+
as->mctop = mxp;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
|
|
68
|
+
{
|
|
69
|
+
/* Keep this in-sync with exitstub_trace_addr(). */
|
|
70
|
+
return as->mctop + exitno + 3;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/* Emit conditional branch to exit for guard. */
|
|
74
|
+
static void asm_guardcc(ASMState *as, A64CC cc)
|
|
75
|
+
{
|
|
76
|
+
MCode *target = asm_exitstub_addr(as, as->snapno);
|
|
77
|
+
MCode *p = as->mcp;
|
|
78
|
+
if (LJ_UNLIKELY(p == as->invmcp)) {
|
|
79
|
+
as->loopinv = 1;
|
|
80
|
+
*p = A64I_B | ((target-p) & 0x03ffffffu);
|
|
81
|
+
emit_cond_branch(as, cc^1, p-1);
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
emit_cond_branch(as, cc, target);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/* Emit test and branch instruction to exit for guard. */
|
|
88
|
+
static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
|
|
89
|
+
{
|
|
90
|
+
MCode *target = asm_exitstub_addr(as, as->snapno);
|
|
91
|
+
MCode *p = as->mcp;
|
|
92
|
+
if (LJ_UNLIKELY(p == as->invmcp)) {
|
|
93
|
+
as->loopinv = 1;
|
|
94
|
+
*p = A64I_B | ((target-p) & 0x03ffffffu);
|
|
95
|
+
emit_tnb(as, ai^0x01000000u, r, bit, p-1);
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
emit_tnb(as, ai, r, bit, target);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/* Emit compare and branch instruction to exit for guard. */
|
|
102
|
+
static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
|
|
103
|
+
{
|
|
104
|
+
MCode *target = asm_exitstub_addr(as, as->snapno);
|
|
105
|
+
MCode *p = as->mcp;
|
|
106
|
+
if (LJ_UNLIKELY(p == as->invmcp)) {
|
|
107
|
+
as->loopinv = 1;
|
|
108
|
+
*p = A64I_B | ((target-p) & 0x03ffffffu);
|
|
109
|
+
emit_cnb(as, ai^0x01000000u, r, p-1);
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
emit_cnb(as, ai, r, target);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/* -- Operand fusion ------------------------------------------------------ */
|
|
116
|
+
|
|
117
|
+
/* Limit linear search to this distance. Avoids O(n^2) behavior. */
|
|
118
|
+
#define CONFLICT_SEARCH_LIM 31
|
|
119
|
+
|
|
120
|
+
static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
|
|
121
|
+
{
|
|
122
|
+
if (irref_isk(ref)) {
|
|
123
|
+
IRIns *ir = IR(ref);
|
|
124
|
+
if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
|
|
125
|
+
*k = ir->i;
|
|
126
|
+
return 1;
|
|
127
|
+
} else if (checki32((int64_t)ir_k64(ir)->u64)) {
|
|
128
|
+
*k = (int32_t)ir_k64(ir)->u64;
|
|
129
|
+
return 1;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return 0;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/* Check if there's no conflicting instruction between curins and ref. */
|
|
136
|
+
static int noconflict(ASMState *as, IRRef ref, IROp conflict)
|
|
137
|
+
{
|
|
138
|
+
IRIns *ir = as->ir;
|
|
139
|
+
IRRef i = as->curins;
|
|
140
|
+
if (i > ref + CONFLICT_SEARCH_LIM)
|
|
141
|
+
return 0; /* Give up, ref is too far away. */
|
|
142
|
+
while (--i > ref)
|
|
143
|
+
if (ir[i].o == conflict)
|
|
144
|
+
return 0; /* Conflict found. */
|
|
145
|
+
return 1; /* Ok, no conflict. */
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/* Fuse the array base of colocated arrays. */
|
|
149
|
+
static int32_t asm_fuseabase(ASMState *as, IRRef ref)
|
|
150
|
+
{
|
|
151
|
+
IRIns *ir = IR(ref);
|
|
152
|
+
if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
|
|
153
|
+
!neverfuse(as) && noconflict(as, ref, IR_NEWREF))
|
|
154
|
+
return (int32_t)sizeof(GCtab);
|
|
155
|
+
return 0;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
#define FUSE_REG 0x40000000
|
|
159
|
+
|
|
160
|
+
/* Fuse array/hash/upvalue reference into register+offset operand. */
|
|
161
|
+
static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
|
|
162
|
+
A64Ins ins)
|
|
163
|
+
{
|
|
164
|
+
IRIns *ir = IR(ref);
|
|
165
|
+
if (ra_noreg(ir->r)) {
|
|
166
|
+
if (ir->o == IR_AREF) {
|
|
167
|
+
if (mayfuse(as, ref)) {
|
|
168
|
+
if (irref_isk(ir->op2)) {
|
|
169
|
+
IRRef tab = IR(ir->op1)->op1;
|
|
170
|
+
int32_t ofs = asm_fuseabase(as, tab);
|
|
171
|
+
IRRef refa = ofs ? tab : ir->op1;
|
|
172
|
+
ofs += 8*IR(ir->op2)->i;
|
|
173
|
+
if (emit_checkofs(ins, ofs)) {
|
|
174
|
+
*ofsp = ofs;
|
|
175
|
+
return ra_alloc1(as, refa, allow);
|
|
176
|
+
}
|
|
177
|
+
} else {
|
|
178
|
+
Reg base = ra_alloc1(as, ir->op1, allow);
|
|
179
|
+
*ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base));
|
|
180
|
+
return base;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
} else if (ir->o == IR_HREFK) {
|
|
184
|
+
if (mayfuse(as, ref)) {
|
|
185
|
+
int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
|
|
186
|
+
if (emit_checkofs(ins, ofs)) {
|
|
187
|
+
*ofsp = ofs;
|
|
188
|
+
return ra_alloc1(as, ir->op1, allow);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
} else if (ir->o == IR_UREFC) {
|
|
192
|
+
if (irref_isk(ir->op1)) {
|
|
193
|
+
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
|
194
|
+
GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
|
|
195
|
+
int64_t ofs = glofs(as, &uv->tv);
|
|
196
|
+
if (emit_checkofs(ins, ofs)) {
|
|
197
|
+
*ofsp = (int32_t)ofs;
|
|
198
|
+
return RID_GL;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
*ofsp = 0;
|
|
204
|
+
return ra_alloc1(as, ref, allow);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/* Fuse m operand into arithmetic/logic instructions. */
|
|
208
|
+
static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
|
|
209
|
+
{
|
|
210
|
+
IRIns *ir = IR(ref);
|
|
211
|
+
if (ra_hasreg(ir->r)) {
|
|
212
|
+
ra_noweak(as, ir->r);
|
|
213
|
+
return A64F_M(ir->r);
|
|
214
|
+
} else if (irref_isk(ref)) {
|
|
215
|
+
uint32_t m;
|
|
216
|
+
int64_t k = get_k64val(ir);
|
|
217
|
+
if ((ai & 0x1f000000) == 0x0a000000)
|
|
218
|
+
m = emit_isk13(k, irt_is64(ir->t));
|
|
219
|
+
else
|
|
220
|
+
m = emit_isk12(k);
|
|
221
|
+
if (m)
|
|
222
|
+
return m;
|
|
223
|
+
} else if (mayfuse(as, ref)) {
|
|
224
|
+
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) ||
|
|
225
|
+
(ir->o == IR_ADD && ir->op1 == ir->op2)) {
|
|
226
|
+
A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR :
|
|
227
|
+
ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL;
|
|
228
|
+
int shift = ir->o == IR_ADD ? 1 :
|
|
229
|
+
(IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
|
|
230
|
+
IRIns *irl = IR(ir->op1);
|
|
231
|
+
if (sh == A64SH_LSL &&
|
|
232
|
+
irl->o == IR_CONV &&
|
|
233
|
+
irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
|
|
234
|
+
shift <= 4 &&
|
|
235
|
+
canfuse(as, irl)) {
|
|
236
|
+
Reg m = ra_alloc1(as, irl->op1, allow);
|
|
237
|
+
return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift);
|
|
238
|
+
} else {
|
|
239
|
+
Reg m = ra_alloc1(as, ir->op1, allow);
|
|
240
|
+
return A64F_M(m) | A64F_SH(sh, shift);
|
|
241
|
+
}
|
|
242
|
+
} else if (ir->o == IR_CONV &&
|
|
243
|
+
ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
|
|
244
|
+
Reg m = ra_alloc1(as, ir->op1, allow);
|
|
245
|
+
return A64F_M(m) | A64F_EX(A64EX_SXTW);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return A64F_M(ra_allocref(as, ref, allow));
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/* Fuse XLOAD/XSTORE reference into load/store operand. */
|
|
252
|
+
static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
|
|
253
|
+
RegSet allow)
|
|
254
|
+
{
|
|
255
|
+
IRIns *ir = IR(ref);
|
|
256
|
+
Reg base;
|
|
257
|
+
int32_t ofs = 0;
|
|
258
|
+
if (ra_noreg(ir->r) && canfuse(as, ir)) {
|
|
259
|
+
if (ir->o == IR_ADD) {
|
|
260
|
+
if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) {
|
|
261
|
+
ref = ir->op1;
|
|
262
|
+
} else {
|
|
263
|
+
Reg rn, rm;
|
|
264
|
+
IRRef lref = ir->op1, rref = ir->op2;
|
|
265
|
+
IRIns *irl = IR(lref);
|
|
266
|
+
if (mayfuse(as, irl->op1)) {
|
|
267
|
+
unsigned int shift = 4;
|
|
268
|
+
if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
|
|
269
|
+
shift = (IR(irl->op2)->i & 63);
|
|
270
|
+
} else if (irl->o == IR_ADD && irl->op1 == irl->op2) {
|
|
271
|
+
shift = 1;
|
|
272
|
+
}
|
|
273
|
+
if ((ai >> 30) == shift) {
|
|
274
|
+
lref = irl->op1;
|
|
275
|
+
irl = IR(lref);
|
|
276
|
+
ai |= A64I_LS_SH;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
if (irl->o == IR_CONV &&
|
|
280
|
+
irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
|
|
281
|
+
canfuse(as, irl)) {
|
|
282
|
+
lref = irl->op1;
|
|
283
|
+
ai |= A64I_LS_SXTWx;
|
|
284
|
+
} else {
|
|
285
|
+
ai |= A64I_LS_LSLx;
|
|
286
|
+
}
|
|
287
|
+
rm = ra_alloc1(as, lref, allow);
|
|
288
|
+
rn = ra_alloc1(as, rref, rset_exclude(allow, rm));
|
|
289
|
+
emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm);
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
} else if (ir->o == IR_STRREF) {
|
|
293
|
+
if (asm_isk32(as, ir->op2, &ofs)) {
|
|
294
|
+
ref = ir->op1;
|
|
295
|
+
} else if (asm_isk32(as, ir->op1, &ofs)) {
|
|
296
|
+
ref = ir->op2;
|
|
297
|
+
} else {
|
|
298
|
+
Reg rn = ra_alloc1(as, ir->op1, allow);
|
|
299
|
+
IRIns *irr = IR(ir->op2);
|
|
300
|
+
uint32_t m;
|
|
301
|
+
if (irr+1 == ir && !ra_used(irr) &&
|
|
302
|
+
irr->o == IR_ADD && irref_isk(irr->op2)) {
|
|
303
|
+
ofs = sizeof(GCstr) + IR(irr->op2)->i;
|
|
304
|
+
if (emit_checkofs(ai, ofs)) {
|
|
305
|
+
Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
|
|
306
|
+
m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
|
|
307
|
+
goto skipopm;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
|
|
311
|
+
ofs = sizeof(GCstr);
|
|
312
|
+
skipopm:
|
|
313
|
+
emit_lso(as, ai, rd, rd, ofs);
|
|
314
|
+
emit_dn(as, A64I_ADDx^m, rd, rn);
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
317
|
+
ofs += sizeof(GCstr);
|
|
318
|
+
if (!emit_checkofs(ai, ofs)) {
|
|
319
|
+
Reg rn = ra_alloc1(as, ref, allow);
|
|
320
|
+
Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
|
|
321
|
+
emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm);
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
base = ra_alloc1(as, ref, allow);
|
|
327
|
+
emit_lso(as, ai, (rd & 31), base, ofs);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/* Fuse FP multiply-add/sub. */
|
|
331
|
+
static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
|
|
332
|
+
{
|
|
333
|
+
IRRef lref = ir->op1, rref = ir->op2;
|
|
334
|
+
IRIns *irm;
|
|
335
|
+
if (lref != rref &&
|
|
336
|
+
((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
|
|
337
|
+
ra_noreg(irm->r)) ||
|
|
338
|
+
(mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
|
|
339
|
+
(rref = lref, ai = air, ra_noreg(irm->r))))) {
|
|
340
|
+
Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
341
|
+
Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
|
|
342
|
+
Reg left = ra_alloc2(as, irm,
|
|
343
|
+
rset_exclude(rset_exclude(RSET_FPR, dest), add));
|
|
344
|
+
Reg right = (left >> 8); left &= 255;
|
|
345
|
+
emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
|
|
346
|
+
return 1;
|
|
347
|
+
}
|
|
348
|
+
return 0;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/* Fuse BAND + BSHL/BSHR into UBFM. */
|
|
352
|
+
static int asm_fuseandshift(ASMState *as, IRIns *ir)
|
|
353
|
+
{
|
|
354
|
+
IRIns *irl = IR(ir->op1);
|
|
355
|
+
lua_assert(ir->o == IR_BAND);
|
|
356
|
+
if (canfuse(as, irl) && irref_isk(ir->op2)) {
|
|
357
|
+
uint64_t mask = get_k64val(IR(ir->op2));
|
|
358
|
+
if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) {
|
|
359
|
+
int32_t shmask = irt_is64(irl->t) ? 63 : 31;
|
|
360
|
+
int32_t shift = (IR(irl->op2)->i & shmask);
|
|
361
|
+
int32_t imms = shift;
|
|
362
|
+
if (irl->o == IR_BSHL) {
|
|
363
|
+
mask >>= shift;
|
|
364
|
+
shift = (shmask-shift+1) & shmask;
|
|
365
|
+
imms = 0;
|
|
366
|
+
}
|
|
367
|
+
if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */
|
|
368
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
369
|
+
Reg left = ra_alloc1(as, irl->op1, RSET_GPR);
|
|
370
|
+
A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw;
|
|
371
|
+
imms += 63 - emit_clz64(mask);
|
|
372
|
+
if (imms > shmask) imms = shmask;
|
|
373
|
+
emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left);
|
|
374
|
+
return 1;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
return 0;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
|
|
382
|
+
static int asm_fuseorshift(ASMState *as, IRIns *ir)
|
|
383
|
+
{
|
|
384
|
+
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
|
|
385
|
+
lua_assert(ir->o == IR_BOR);
|
|
386
|
+
if (canfuse(as, irl) && canfuse(as, irr) &&
|
|
387
|
+
((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
|
|
388
|
+
(irl->o == IR_BSHL && irr->o == IR_BSHR))) {
|
|
389
|
+
if (irref_isk(irl->op2) && irref_isk(irr->op2)) {
|
|
390
|
+
IRRef lref = irl->op1, rref = irr->op1;
|
|
391
|
+
uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i;
|
|
392
|
+
if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */
|
|
393
|
+
uint32_t tmp2;
|
|
394
|
+
IRRef tmp1 = lref; lref = rref; rref = tmp1;
|
|
395
|
+
tmp2 = lshift; lshift = rshift; rshift = tmp2;
|
|
396
|
+
}
|
|
397
|
+
if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) {
|
|
398
|
+
A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw;
|
|
399
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
400
|
+
Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
401
|
+
Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
|
|
402
|
+
emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right);
|
|
403
|
+
return 1;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
return 0;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/* -- Calls --------------------------------------------------------------- */
|
|
411
|
+
|
|
412
|
+
/* Generate a call to a C function. */
|
|
413
|
+
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|
414
|
+
{
|
|
415
|
+
uint32_t n, nargs = CCI_XNARGS(ci);
|
|
416
|
+
int32_t ofs = 0;
|
|
417
|
+
Reg gpr, fpr = REGARG_FIRSTFPR;
|
|
418
|
+
if ((void *)ci->func)
|
|
419
|
+
emit_call(as, (void *)ci->func);
|
|
420
|
+
for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
|
|
421
|
+
as->cost[gpr] = REGCOST(~0u, ASMREF_L);
|
|
422
|
+
gpr = REGARG_FIRSTGPR;
|
|
423
|
+
for (n = 0; n < nargs; n++) { /* Setup args. */
|
|
424
|
+
IRRef ref = args[n];
|
|
425
|
+
IRIns *ir = IR(ref);
|
|
426
|
+
if (ref) {
|
|
427
|
+
if (irt_isfp(ir->t)) {
|
|
428
|
+
if (fpr <= REGARG_LASTFPR) {
|
|
429
|
+
lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */
|
|
430
|
+
ra_leftov(as, fpr, ref);
|
|
431
|
+
fpr++;
|
|
432
|
+
} else {
|
|
433
|
+
Reg r = ra_alloc1(as, ref, RSET_FPR);
|
|
434
|
+
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
|
|
435
|
+
ofs += 8;
|
|
436
|
+
}
|
|
437
|
+
} else {
|
|
438
|
+
if (gpr <= REGARG_LASTGPR) {
|
|
439
|
+
lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
|
|
440
|
+
ra_leftov(as, gpr, ref);
|
|
441
|
+
gpr++;
|
|
442
|
+
} else {
|
|
443
|
+
Reg r = ra_alloc1(as, ref, RSET_GPR);
|
|
444
|
+
emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
|
|
445
|
+
ofs += 8;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
/* Setup result reg/sp for call. Evict scratch regs. */
|
|
453
|
+
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
|
454
|
+
{
|
|
455
|
+
RegSet drop = RSET_SCRATCH;
|
|
456
|
+
if (ra_hasreg(ir->r))
|
|
457
|
+
rset_clear(drop, ir->r); /* Dest reg handled below. */
|
|
458
|
+
ra_evictset(as, drop); /* Evictions must be performed first. */
|
|
459
|
+
if (ra_used(ir)) {
|
|
460
|
+
lua_assert(!irt_ispri(ir->t));
|
|
461
|
+
if (irt_isfp(ir->t)) {
|
|
462
|
+
if (ci->flags & CCI_CASTU64) {
|
|
463
|
+
Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
|
|
464
|
+
emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R,
|
|
465
|
+
dest, RID_RET);
|
|
466
|
+
} else {
|
|
467
|
+
ra_destreg(as, ir, RID_FPRET);
|
|
468
|
+
}
|
|
469
|
+
} else {
|
|
470
|
+
ra_destreg(as, ir, RID_RET);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
UNUSED(ci);
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
static void asm_callx(ASMState *as, IRIns *ir)
|
|
477
|
+
{
|
|
478
|
+
IRRef args[CCI_NARGS_MAX*2];
|
|
479
|
+
CCallInfo ci;
|
|
480
|
+
IRRef func;
|
|
481
|
+
IRIns *irf;
|
|
482
|
+
ci.flags = asm_callx_flags(as, ir);
|
|
483
|
+
asm_collectargs(as, ir, &ci, args);
|
|
484
|
+
asm_setupresult(as, ir, &ci);
|
|
485
|
+
func = ir->op2; irf = IR(func);
|
|
486
|
+
if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
|
|
487
|
+
if (irref_isk(func)) { /* Call to constant address. */
|
|
488
|
+
ci.func = (ASMFunction)(ir_k64(irf)->u64);
|
|
489
|
+
} else { /* Need a non-argument register for indirect calls. */
|
|
490
|
+
Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
|
|
491
|
+
emit_n(as, A64I_BLR, freg);
|
|
492
|
+
ci.func = (ASMFunction)(void *)0;
|
|
493
|
+
}
|
|
494
|
+
asm_gencall(as, &ci, args);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
/* -- Returns ------------------------------------------------------------- */
|
|
498
|
+
|
|
499
|
+
/* Return to lower frame. Guard that it goes to the right spot. */
|
|
500
|
+
static void asm_retf(ASMState *as, IRIns *ir)
|
|
501
|
+
{
|
|
502
|
+
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
|
503
|
+
void *pc = ir_kptr(IR(ir->op2));
|
|
504
|
+
int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
|
|
505
|
+
as->topslot -= (BCReg)delta;
|
|
506
|
+
if ((int32_t)as->topslot < 0) as->topslot = 0;
|
|
507
|
+
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
|
|
508
|
+
/* Need to force a spill on REF_BASE now to update the stack slot. */
|
|
509
|
+
emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
|
|
510
|
+
emit_setgl(as, base, jit_base);
|
|
511
|
+
emit_addptr(as, base, -8*delta);
|
|
512
|
+
asm_guardcc(as, CC_NE);
|
|
513
|
+
emit_nm(as, A64I_CMPx, RID_TMP,
|
|
514
|
+
ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base)));
|
|
515
|
+
emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
/* -- Type conversions ---------------------------------------------------- */
|
|
519
|
+
|
|
520
|
+
static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
|
|
521
|
+
{
|
|
522
|
+
Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
|
|
523
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
524
|
+
asm_guardcc(as, CC_NE);
|
|
525
|
+
emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31));
|
|
526
|
+
emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest);
|
|
527
|
+
emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31));
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
static void asm_tobit(ASMState *as, IRIns *ir)
|
|
531
|
+
{
|
|
532
|
+
RegSet allow = RSET_FPR;
|
|
533
|
+
Reg left = ra_alloc1(as, ir->op1, allow);
|
|
534
|
+
Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
|
|
535
|
+
Reg tmp = ra_scratch(as, rset_clear(allow, right));
|
|
536
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
537
|
+
emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31));
|
|
538
|
+
emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31));
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
static void asm_conv(ASMState *as, IRIns *ir)
|
|
542
|
+
{
|
|
543
|
+
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
|
|
544
|
+
int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
|
|
545
|
+
int stfp = (st == IRT_NUM || st == IRT_FLOAT);
|
|
546
|
+
IRRef lref = ir->op1;
|
|
547
|
+
lua_assert(irt_type(ir->t) != st);
|
|
548
|
+
if (irt_isfp(ir->t)) {
|
|
549
|
+
Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
550
|
+
if (stfp) { /* FP to FP conversion. */
|
|
551
|
+
emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32,
|
|
552
|
+
(dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31));
|
|
553
|
+
} else { /* Integer to FP conversion. */
|
|
554
|
+
Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
555
|
+
A64Ins ai = irt_isfloat(ir->t) ?
|
|
556
|
+
(((IRT_IS64 >> st) & 1) ?
|
|
557
|
+
(st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) :
|
|
558
|
+
(st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) :
|
|
559
|
+
(((IRT_IS64 >> st) & 1) ?
|
|
560
|
+
(st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) :
|
|
561
|
+
(st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32));
|
|
562
|
+
emit_dn(as, ai, (dest & 31), left);
|
|
563
|
+
}
|
|
564
|
+
} else if (stfp) { /* FP to integer conversion. */
|
|
565
|
+
if (irt_isguard(ir->t)) {
|
|
566
|
+
/* Checked conversions are only supported from number to int. */
|
|
567
|
+
lua_assert(irt_isint(ir->t) && st == IRT_NUM);
|
|
568
|
+
asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
|
|
569
|
+
} else {
|
|
570
|
+
Reg left = ra_alloc1(as, lref, RSET_FPR);
|
|
571
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
572
|
+
A64Ins ai = irt_is64(ir->t) ?
|
|
573
|
+
(st == IRT_NUM ?
|
|
574
|
+
(irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
|
|
575
|
+
(irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
|
|
576
|
+
(st == IRT_NUM ?
|
|
577
|
+
(irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
|
|
578
|
+
(irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
|
|
579
|
+
emit_dn(as, ai, dest, (left & 31));
|
|
580
|
+
}
|
|
581
|
+
} else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
|
|
582
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
583
|
+
Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
584
|
+
A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
|
|
585
|
+
st == IRT_U8 ? A64I_UXTBw :
|
|
586
|
+
st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
|
|
587
|
+
lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
|
|
588
|
+
emit_dn(as, ai, dest, left);
|
|
589
|
+
} else {
|
|
590
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
591
|
+
if (irt_is64(ir->t)) {
|
|
592
|
+
if (st64 || !(ir->op2 & IRCONV_SEXT)) {
|
|
593
|
+
/* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
|
|
594
|
+
ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
|
|
595
|
+
} else { /* 32 to 64 bit sign extension. */
|
|
596
|
+
Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
597
|
+
emit_dn(as, A64I_SXTW, dest, left);
|
|
598
|
+
}
|
|
599
|
+
} else {
|
|
600
|
+
if (st64) {
|
|
601
|
+
/* This is either a 32 bit reg/reg mov which zeroes the hiword
|
|
602
|
+
** or a load of the loword from a 64 bit address.
|
|
603
|
+
*/
|
|
604
|
+
Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
605
|
+
emit_dm(as, A64I_MOVw, dest, left);
|
|
606
|
+
} else { /* 32/32 bit no-op (cast). */
|
|
607
|
+
ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
static void asm_strto(ASMState *as, IRIns *ir)
|
|
614
|
+
{
|
|
615
|
+
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
|
|
616
|
+
IRRef args[2];
|
|
617
|
+
Reg dest = 0, tmp;
|
|
618
|
+
int destused = ra_used(ir);
|
|
619
|
+
int32_t ofs = 0;
|
|
620
|
+
ra_evictset(as, RSET_SCRATCH);
|
|
621
|
+
if (destused) {
|
|
622
|
+
if (ra_hasspill(ir->s)) {
|
|
623
|
+
ofs = sps_scale(ir->s);
|
|
624
|
+
destused = 0;
|
|
625
|
+
if (ra_hasreg(ir->r)) {
|
|
626
|
+
ra_free(as, ir->r);
|
|
627
|
+
ra_modified(as, ir->r);
|
|
628
|
+
emit_spload(as, ir, ir->r, ofs);
|
|
629
|
+
}
|
|
630
|
+
} else {
|
|
631
|
+
dest = ra_dest(as, ir, RSET_FPR);
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
if (destused)
|
|
635
|
+
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
|
|
636
|
+
asm_guardcnb(as, A64I_CBZ, RID_RET);
|
|
637
|
+
args[0] = ir->op1; /* GCstr *str */
|
|
638
|
+
args[1] = ASMREF_TMP1; /* TValue *n */
|
|
639
|
+
asm_gencall(as, ci, args);
|
|
640
|
+
tmp = ra_releasetmp(as, ASMREF_TMP1);
|
|
641
|
+
emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR);
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
/* -- Memory references --------------------------------------------------- */
|
|
645
|
+
|
|
646
|
+
/* Store tagged value for ref at base+ofs. */
|
|
647
|
+
static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
|
|
648
|
+
{
|
|
649
|
+
RegSet allow = rset_exclude(RSET_GPR, base);
|
|
650
|
+
IRIns *ir = IR(ref);
|
|
651
|
+
lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
|
|
652
|
+
if (irref_isk(ref)) {
|
|
653
|
+
TValue k;
|
|
654
|
+
lj_ir_kvalue(as->J->L, &k, ir);
|
|
655
|
+
emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs);
|
|
656
|
+
} else {
|
|
657
|
+
Reg src = ra_alloc1(as, ref, allow);
|
|
658
|
+
rset_clear(allow, src);
|
|
659
|
+
if (irt_isinteger(ir->t)) {
|
|
660
|
+
Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
|
|
661
|
+
emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
|
|
662
|
+
emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src);
|
|
663
|
+
} else {
|
|
664
|
+
Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
|
|
665
|
+
emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
|
|
666
|
+
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type);
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
/* Get pointer to TValue. */
|
|
672
|
+
static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
|
|
673
|
+
{
|
|
674
|
+
IRIns *ir = IR(ref);
|
|
675
|
+
if (irt_isnum(ir->t)) {
|
|
676
|
+
if (irref_isk(ref)) {
|
|
677
|
+
/* Use the number constant itself as a TValue. */
|
|
678
|
+
ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
|
|
679
|
+
} else {
|
|
680
|
+
/* Otherwise force a spill and use the spill slot. */
|
|
681
|
+
emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
|
|
682
|
+
}
|
|
683
|
+
} else {
|
|
684
|
+
/* Otherwise use g->tmptv to hold the TValue. */
|
|
685
|
+
asm_tvstore64(as, dest, 0, ref);
|
|
686
|
+
ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
static void asm_aref(ASMState *as, IRIns *ir)
|
|
691
|
+
{
|
|
692
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
693
|
+
Reg idx, base;
|
|
694
|
+
if (irref_isk(ir->op2)) {
|
|
695
|
+
IRRef tab = IR(ir->op1)->op1;
|
|
696
|
+
int32_t ofs = asm_fuseabase(as, tab);
|
|
697
|
+
IRRef refa = ofs ? tab : ir->op1;
|
|
698
|
+
uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i);
|
|
699
|
+
if (k) {
|
|
700
|
+
base = ra_alloc1(as, refa, RSET_GPR);
|
|
701
|
+
emit_dn(as, A64I_ADDx^k, dest, base);
|
|
702
|
+
return;
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
base = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
706
|
+
idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
|
|
707
|
+
emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx);
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
/* Inlined hash lookup. Specialized for key type and for const keys.
|
|
711
|
+
** The equivalent C code is:
|
|
712
|
+
** Node *n = hashkey(t, key);
|
|
713
|
+
** do {
|
|
714
|
+
** if (lj_obj_equal(&n->key, key)) return &n->val;
|
|
715
|
+
** } while ((n = nextnode(n)));
|
|
716
|
+
** return niltv(L);
|
|
717
|
+
*/
|
|
718
|
+
static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
|
719
|
+
{
|
|
720
|
+
RegSet allow = RSET_GPR;
|
|
721
|
+
int destused = ra_used(ir);
|
|
722
|
+
Reg dest = ra_dest(as, ir, allow);
|
|
723
|
+
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
|
|
724
|
+
Reg key = 0, tmp = RID_TMP;
|
|
725
|
+
IRRef refkey = ir->op2;
|
|
726
|
+
IRIns *irkey = IR(refkey);
|
|
727
|
+
int isk = irref_isk(ir->op2);
|
|
728
|
+
IRType1 kt = irkey->t;
|
|
729
|
+
uint32_t k = 0;
|
|
730
|
+
uint32_t khash;
|
|
731
|
+
MCLabel l_end, l_loop, l_next;
|
|
732
|
+
rset_clear(allow, tab);
|
|
733
|
+
|
|
734
|
+
if (!isk) {
|
|
735
|
+
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
|
|
736
|
+
rset_clear(allow, key);
|
|
737
|
+
if (!irt_isstr(kt)) {
|
|
738
|
+
tmp = ra_scratch(as, allow);
|
|
739
|
+
rset_clear(allow, tmp);
|
|
740
|
+
}
|
|
741
|
+
} else if (irt_isnum(kt)) {
|
|
742
|
+
int64_t val = (int64_t)ir_knum(irkey)->u64;
|
|
743
|
+
if (!(k = emit_isk12(val))) {
|
|
744
|
+
key = ra_allock(as, val, allow);
|
|
745
|
+
rset_clear(allow, key);
|
|
746
|
+
}
|
|
747
|
+
} else if (!irt_ispri(kt)) {
|
|
748
|
+
if (!(k = emit_isk12(irkey->i))) {
|
|
749
|
+
key = ra_alloc1(as, refkey, allow);
|
|
750
|
+
rset_clear(allow, key);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
/* Key not found in chain: jump to exit (if merged) or load niltv. */
|
|
755
|
+
l_end = emit_label(as);
|
|
756
|
+
as->invmcp = NULL;
|
|
757
|
+
if (merge == IR_NE)
|
|
758
|
+
asm_guardcc(as, CC_AL);
|
|
759
|
+
else if (destused)
|
|
760
|
+
emit_loada(as, dest, niltvg(J2G(as->J)));
|
|
761
|
+
|
|
762
|
+
/* Follow hash chain until the end. */
|
|
763
|
+
l_loop = --as->mcp;
|
|
764
|
+
emit_n(as, A64I_CMPx^A64I_K12^0, dest);
|
|
765
|
+
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
|
|
766
|
+
l_next = emit_label(as);
|
|
767
|
+
|
|
768
|
+
/* Type and value comparison. */
|
|
769
|
+
if (merge == IR_EQ)
|
|
770
|
+
asm_guardcc(as, CC_EQ);
|
|
771
|
+
else
|
|
772
|
+
emit_cond_branch(as, CC_EQ, l_end);
|
|
773
|
+
|
|
774
|
+
if (irt_isnum(kt)) {
|
|
775
|
+
if (isk) {
|
|
776
|
+
/* Assumes -0.0 is already canonicalized to +0.0. */
|
|
777
|
+
if (k)
|
|
778
|
+
emit_n(as, A64I_CMPx^k, tmp);
|
|
779
|
+
else
|
|
780
|
+
emit_nm(as, A64I_CMPx, key, tmp);
|
|
781
|
+
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
|
|
782
|
+
} else {
|
|
783
|
+
Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
|
|
784
|
+
Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
|
|
785
|
+
rset_clear(allow, tisnum);
|
|
786
|
+
emit_nm(as, A64I_FCMPd, key, ftmp);
|
|
787
|
+
emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
|
|
788
|
+
emit_cond_branch(as, CC_LO, l_next);
|
|
789
|
+
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
|
|
790
|
+
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
|
|
791
|
+
}
|
|
792
|
+
} else if (irt_isaddr(kt)) {
|
|
793
|
+
Reg scr;
|
|
794
|
+
if (isk) {
|
|
795
|
+
int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
|
|
796
|
+
scr = ra_allock(as, kk, allow);
|
|
797
|
+
emit_nm(as, A64I_CMPx, scr, tmp);
|
|
798
|
+
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
|
|
799
|
+
} else {
|
|
800
|
+
scr = ra_scratch(as, allow);
|
|
801
|
+
emit_nm(as, A64I_CMPx, tmp, scr);
|
|
802
|
+
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
|
|
803
|
+
}
|
|
804
|
+
rset_clear(allow, scr);
|
|
805
|
+
} else {
|
|
806
|
+
Reg type, scr;
|
|
807
|
+
lua_assert(irt_ispri(kt) && !irt_isnil(kt));
|
|
808
|
+
type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
|
|
809
|
+
scr = ra_scratch(as, rset_clear(allow, type));
|
|
810
|
+
rset_clear(allow, scr);
|
|
811
|
+
emit_nm(as, A64I_CMPw, scr, type);
|
|
812
|
+
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
*l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
|
|
816
|
+
if (!isk && irt_isaddr(kt)) {
|
|
817
|
+
Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
|
|
818
|
+
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
|
|
819
|
+
rset_clear(allow, type);
|
|
820
|
+
}
|
|
821
|
+
/* Load main position relative to tab->node into dest. */
|
|
822
|
+
khash = isk ? ir_khash(irkey) : 1;
|
|
823
|
+
if (khash == 0) {
|
|
824
|
+
emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
|
|
825
|
+
} else {
|
|
826
|
+
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest);
|
|
827
|
+
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest);
|
|
828
|
+
emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node));
|
|
829
|
+
if (isk) {
|
|
830
|
+
Reg tmphash = ra_allock(as, khash, allow);
|
|
831
|
+
emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
|
|
832
|
+
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
|
|
833
|
+
} else if (irt_isstr(kt)) {
|
|
834
|
+
/* Fetch of str->hash is cheaper than ra_allock. */
|
|
835
|
+
emit_dnm(as, A64I_ANDw, dest, dest, tmp);
|
|
836
|
+
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash));
|
|
837
|
+
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
|
|
838
|
+
} else { /* Must match with hash*() in lj_tab.c. */
|
|
839
|
+
emit_dnm(as, A64I_ANDw, dest, dest, tmp);
|
|
840
|
+
emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
|
|
841
|
+
emit_dnm(as, A64I_SUBw, dest, dest, tmp);
|
|
842
|
+
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
|
|
843
|
+
emit_dnm(as, A64I_EORw, dest, dest, tmp);
|
|
844
|
+
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
|
|
845
|
+
emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
|
|
846
|
+
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
|
|
847
|
+
emit_dnm(as, A64I_EORw, tmp, tmp, dest);
|
|
848
|
+
if (irt_isnum(kt)) {
|
|
849
|
+
emit_dnm(as, A64I_ADDw, dest, dest, dest);
|
|
850
|
+
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
|
|
851
|
+
emit_dm(as, A64I_MOVw, tmp, dest);
|
|
852
|
+
emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
|
|
853
|
+
} else {
|
|
854
|
+
checkmclim(as);
|
|
855
|
+
emit_dm(as, A64I_MOVw, tmp, key);
|
|
856
|
+
emit_dnm(as, A64I_EORw, dest, dest,
|
|
857
|
+
ra_allock(as, irt_toitype(kt) << 15, allow));
|
|
858
|
+
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
|
|
859
|
+
emit_dm(as, A64I_MOVx, dest, key);
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
static void asm_hrefk(ASMState *as, IRIns *ir)
|
|
866
|
+
{
|
|
867
|
+
IRIns *kslot = IR(ir->op2);
|
|
868
|
+
IRIns *irkey = IR(kslot->op1);
|
|
869
|
+
int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
|
|
870
|
+
int32_t kofs = ofs + (int32_t)offsetof(Node, key);
|
|
871
|
+
int bigofs = !emit_checkofs(A64I_LDRx, ofs);
|
|
872
|
+
RegSet allow = RSET_GPR;
|
|
873
|
+
Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
|
|
874
|
+
Reg node = ra_alloc1(as, ir->op1, allow);
|
|
875
|
+
Reg key = ra_scratch(as, rset_clear(allow, node));
|
|
876
|
+
Reg idx = node;
|
|
877
|
+
uint64_t k;
|
|
878
|
+
lua_assert(ofs % sizeof(Node) == 0);
|
|
879
|
+
rset_clear(allow, key);
|
|
880
|
+
if (bigofs) {
|
|
881
|
+
idx = dest;
|
|
882
|
+
rset_clear(allow, dest);
|
|
883
|
+
kofs = (int32_t)offsetof(Node, key);
|
|
884
|
+
} else if (ra_hasreg(dest)) {
|
|
885
|
+
emit_opk(as, A64I_ADDx, dest, node, ofs, allow);
|
|
886
|
+
}
|
|
887
|
+
asm_guardcc(as, CC_NE);
|
|
888
|
+
if (irt_ispri(irkey->t)) {
|
|
889
|
+
k = ~((int64_t)~irt_toitype(irkey->t) << 47);
|
|
890
|
+
} else if (irt_isnum(irkey->t)) {
|
|
891
|
+
k = ir_knum(irkey)->u64;
|
|
892
|
+
} else {
|
|
893
|
+
k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
|
|
894
|
+
}
|
|
895
|
+
emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow));
|
|
896
|
+
emit_lso(as, A64I_LDRx, key, idx, kofs);
|
|
897
|
+
if (bigofs)
|
|
898
|
+
emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
static void asm_uref(ASMState *as, IRIns *ir)
|
|
902
|
+
{
|
|
903
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
904
|
+
if (irref_isk(ir->op1)) {
|
|
905
|
+
GCfunc *fn = ir_kfunc(IR(ir->op1));
|
|
906
|
+
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
|
907
|
+
emit_lsptr(as, A64I_LDRx, dest, v);
|
|
908
|
+
} else {
|
|
909
|
+
Reg uv = ra_scratch(as, RSET_GPR);
|
|
910
|
+
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
911
|
+
if (ir->o == IR_UREFC) {
|
|
912
|
+
asm_guardcc(as, CC_NE);
|
|
913
|
+
emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
|
|
914
|
+
emit_opk(as, A64I_ADDx, dest, uv,
|
|
915
|
+
(int32_t)offsetof(GCupval, tv), RSET_GPR);
|
|
916
|
+
emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
|
|
917
|
+
} else {
|
|
918
|
+
emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
|
|
919
|
+
}
|
|
920
|
+
emit_lso(as, A64I_LDRx, uv, func,
|
|
921
|
+
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
static void asm_fref(ASMState *as, IRIns *ir)
|
|
926
|
+
{
|
|
927
|
+
UNUSED(as); UNUSED(ir);
|
|
928
|
+
lua_assert(!ra_used(ir));
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
static void asm_strref(ASMState *as, IRIns *ir)
|
|
932
|
+
{
|
|
933
|
+
RegSet allow = RSET_GPR;
|
|
934
|
+
Reg dest = ra_dest(as, ir, allow);
|
|
935
|
+
Reg base = ra_alloc1(as, ir->op1, allow);
|
|
936
|
+
IRIns *irr = IR(ir->op2);
|
|
937
|
+
int32_t ofs = sizeof(GCstr);
|
|
938
|
+
uint32_t m;
|
|
939
|
+
rset_clear(allow, base);
|
|
940
|
+
if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) {
|
|
941
|
+
emit_dn(as, A64I_ADDx^m, dest, base);
|
|
942
|
+
} else {
|
|
943
|
+
emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest);
|
|
944
|
+
emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow));
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
/* -- Loads and stores ---------------------------------------------------- */
|
|
949
|
+
|
|
950
|
+
static A64Ins asm_fxloadins(IRIns *ir)
|
|
951
|
+
{
|
|
952
|
+
switch (irt_type(ir->t)) {
|
|
953
|
+
case IRT_I8: return A64I_LDRB ^ A64I_LS_S;
|
|
954
|
+
case IRT_U8: return A64I_LDRB;
|
|
955
|
+
case IRT_I16: return A64I_LDRH ^ A64I_LS_S;
|
|
956
|
+
case IRT_U16: return A64I_LDRH;
|
|
957
|
+
case IRT_NUM: return A64I_LDRd;
|
|
958
|
+
case IRT_FLOAT: return A64I_LDRs;
|
|
959
|
+
default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw;
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
static A64Ins asm_fxstoreins(IRIns *ir)
|
|
964
|
+
{
|
|
965
|
+
switch (irt_type(ir->t)) {
|
|
966
|
+
case IRT_I8: case IRT_U8: return A64I_STRB;
|
|
967
|
+
case IRT_I16: case IRT_U16: return A64I_STRH;
|
|
968
|
+
case IRT_NUM: return A64I_STRd;
|
|
969
|
+
case IRT_FLOAT: return A64I_STRs;
|
|
970
|
+
default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw;
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
static void asm_fload(ASMState *as, IRIns *ir)
|
|
975
|
+
{
|
|
976
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
977
|
+
Reg idx;
|
|
978
|
+
A64Ins ai = asm_fxloadins(ir);
|
|
979
|
+
int32_t ofs;
|
|
980
|
+
if (ir->op1 == REF_NIL) {
|
|
981
|
+
idx = RID_GL;
|
|
982
|
+
ofs = (ir->op2 << 2) - GG_OFS(g);
|
|
983
|
+
} else {
|
|
984
|
+
idx = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
985
|
+
if (ir->op2 == IRFL_TAB_ARRAY) {
|
|
986
|
+
ofs = asm_fuseabase(as, ir->op1);
|
|
987
|
+
if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
|
|
988
|
+
emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx);
|
|
989
|
+
return;
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
ofs = field_ofs[ir->op2];
|
|
993
|
+
}
|
|
994
|
+
emit_lso(as, ai, (dest & 31), idx, ofs);
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
static void asm_fstore(ASMState *as, IRIns *ir)
|
|
998
|
+
{
|
|
999
|
+
if (ir->r != RID_SINK) {
|
|
1000
|
+
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
|
|
1001
|
+
IRIns *irf = IR(ir->op1);
|
|
1002
|
+
Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
|
|
1003
|
+
int32_t ofs = field_ofs[irf->op2];
|
|
1004
|
+
emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs);
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
static void asm_xload(ASMState *as, IRIns *ir)
|
|
1009
|
+
{
|
|
1010
|
+
Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
|
|
1011
|
+
lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
|
|
1012
|
+
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
static void asm_xstore(ASMState *as, IRIns *ir)
|
|
1016
|
+
{
|
|
1017
|
+
if (ir->r != RID_SINK) {
|
|
1018
|
+
Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
|
|
1019
|
+
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
|
|
1020
|
+
rset_exclude(RSET_GPR, src));
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
static void asm_ahuvload(ASMState *as, IRIns *ir)
|
|
1025
|
+
{
|
|
1026
|
+
Reg idx, tmp, type;
|
|
1027
|
+
int32_t ofs = 0;
|
|
1028
|
+
RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
|
|
1029
|
+
lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
|
|
1030
|
+
irt_isint(ir->t));
|
|
1031
|
+
if (ra_used(ir)) {
|
|
1032
|
+
Reg dest = ra_dest(as, ir, allow);
|
|
1033
|
+
tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
|
|
1034
|
+
if (irt_isaddr(ir->t)) {
|
|
1035
|
+
emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
|
|
1036
|
+
} else if (irt_isnum(ir->t)) {
|
|
1037
|
+
emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
|
|
1038
|
+
} else if (irt_isint(ir->t)) {
|
|
1039
|
+
emit_dm(as, A64I_MOVw, dest, dest);
|
|
1040
|
+
}
|
|
1041
|
+
} else {
|
|
1042
|
+
tmp = ra_scratch(as, gpr);
|
|
1043
|
+
}
|
|
1044
|
+
type = ra_scratch(as, rset_clear(gpr, tmp));
|
|
1045
|
+
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
|
|
1046
|
+
/* Always do the type check, even if the load result is unused. */
|
|
1047
|
+
asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
|
|
1048
|
+
if (irt_type(ir->t) >= IRT_NUM) {
|
|
1049
|
+
lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
|
|
1050
|
+
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
|
|
1051
|
+
ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
|
|
1052
|
+
} else if (irt_isaddr(ir->t)) {
|
|
1053
|
+
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
|
|
1054
|
+
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
|
|
1055
|
+
} else if (irt_isnil(ir->t)) {
|
|
1056
|
+
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
|
|
1057
|
+
} else {
|
|
1058
|
+
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
|
|
1059
|
+
ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp);
|
|
1060
|
+
}
|
|
1061
|
+
if (ofs & FUSE_REG)
|
|
1062
|
+
emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
|
|
1063
|
+
else
|
|
1064
|
+
emit_lso(as, A64I_LDRx, tmp, idx, ofs);
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
static void asm_ahustore(ASMState *as, IRIns *ir)
|
|
1068
|
+
{
|
|
1069
|
+
if (ir->r != RID_SINK) {
|
|
1070
|
+
RegSet allow = RSET_GPR;
|
|
1071
|
+
Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE;
|
|
1072
|
+
int32_t ofs = 0;
|
|
1073
|
+
if (irt_isnum(ir->t)) {
|
|
1074
|
+
src = ra_alloc1(as, ir->op2, RSET_FPR);
|
|
1075
|
+
idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd);
|
|
1076
|
+
if (ofs & FUSE_REG)
|
|
1077
|
+
emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31));
|
|
1078
|
+
else
|
|
1079
|
+
emit_lso(as, A64I_STRd, (src & 31), idx, ofs);
|
|
1080
|
+
} else {
|
|
1081
|
+
if (!irt_ispri(ir->t)) {
|
|
1082
|
+
src = ra_alloc1(as, ir->op2, allow);
|
|
1083
|
+
rset_clear(allow, src);
|
|
1084
|
+
if (irt_isinteger(ir->t))
|
|
1085
|
+
type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
|
|
1086
|
+
else
|
|
1087
|
+
type = ra_allock(as, irt_toitype(ir->t), allow);
|
|
1088
|
+
} else {
|
|
1089
|
+
tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow);
|
|
1090
|
+
}
|
|
1091
|
+
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type),
|
|
1092
|
+
A64I_STRx);
|
|
1093
|
+
if (ofs & FUSE_REG)
|
|
1094
|
+
emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
|
|
1095
|
+
else
|
|
1096
|
+
emit_lso(as, A64I_STRx, tmp, idx, ofs);
|
|
1097
|
+
if (ra_hasreg(src)) {
|
|
1098
|
+
if (irt_isinteger(ir->t)) {
|
|
1099
|
+
emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src);
|
|
1100
|
+
} else {
|
|
1101
|
+
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type);
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
static void asm_sload(ASMState *as, IRIns *ir)
|
|
1109
|
+
{
|
|
1110
|
+
int32_t ofs = 8*((int32_t)ir->op1-2);
|
|
1111
|
+
IRType1 t = ir->t;
|
|
1112
|
+
Reg dest = RID_NONE, base;
|
|
1113
|
+
RegSet allow = RSET_GPR;
|
|
1114
|
+
lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
|
|
1115
|
+
lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
|
|
1116
|
+
if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
|
|
1117
|
+
dest = ra_scratch(as, RSET_FPR);
|
|
1118
|
+
asm_tointg(as, ir, dest);
|
|
1119
|
+
t.irt = IRT_NUM; /* Continue with a regular number type check. */
|
|
1120
|
+
} else if (ra_used(ir)) {
|
|
1121
|
+
Reg tmp = RID_NONE;
|
|
1122
|
+
if ((ir->op2 & IRSLOAD_CONVERT))
|
|
1123
|
+
tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
|
|
1124
|
+
lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t));
|
|
1125
|
+
dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
|
|
1126
|
+
base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest));
|
|
1127
|
+
if (irt_isaddr(t)) {
|
|
1128
|
+
emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
|
|
1129
|
+
} else if ((ir->op2 & IRSLOAD_CONVERT)) {
|
|
1130
|
+
if (irt_isint(t)) {
|
|
1131
|
+
emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31));
|
|
1132
|
+
/* If value is already loaded for type check, move it to FPR. */
|
|
1133
|
+
if ((ir->op2 & IRSLOAD_TYPECHECK))
|
|
1134
|
+
emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest);
|
|
1135
|
+
else
|
|
1136
|
+
dest = tmp;
|
|
1137
|
+
t.irt = IRT_NUM; /* Check for original type. */
|
|
1138
|
+
} else {
|
|
1139
|
+
emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp);
|
|
1140
|
+
dest = tmp;
|
|
1141
|
+
t.irt = IRT_INT; /* Check for original type. */
|
|
1142
|
+
}
|
|
1143
|
+
} else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
|
|
1144
|
+
emit_dm(as, A64I_MOVw, dest, dest);
|
|
1145
|
+
}
|
|
1146
|
+
goto dotypecheck;
|
|
1147
|
+
}
|
|
1148
|
+
base = ra_alloc1(as, REF_BASE, allow);
|
|
1149
|
+
dotypecheck:
|
|
1150
|
+
rset_clear(allow, base);
|
|
1151
|
+
if ((ir->op2 & IRSLOAD_TYPECHECK)) {
|
|
1152
|
+
Reg tmp;
|
|
1153
|
+
if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) {
|
|
1154
|
+
tmp = dest;
|
|
1155
|
+
} else {
|
|
1156
|
+
tmp = ra_scratch(as, allow);
|
|
1157
|
+
rset_clear(allow, tmp);
|
|
1158
|
+
}
|
|
1159
|
+
if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT))
|
|
1160
|
+
emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
|
|
1161
|
+
/* Need type check, even if the load result is unused. */
|
|
1162
|
+
asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
|
|
1163
|
+
if (irt_type(t) >= IRT_NUM) {
|
|
1164
|
+
lua_assert(irt_isinteger(t) || irt_isnum(t));
|
|
1165
|
+
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
|
|
1166
|
+
ra_allock(as, LJ_TISNUM << 15, allow), tmp);
|
|
1167
|
+
} else if (irt_isnil(t)) {
|
|
1168
|
+
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
|
|
1169
|
+
} else if (irt_ispri(t)) {
|
|
1170
|
+
emit_nm(as, A64I_CMPx,
|
|
1171
|
+
ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
|
|
1172
|
+
} else {
|
|
1173
|
+
Reg type = ra_scratch(as, allow);
|
|
1174
|
+
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
|
|
1175
|
+
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
|
|
1176
|
+
}
|
|
1177
|
+
emit_lso(as, A64I_LDRx, tmp, base, ofs);
|
|
1178
|
+
return;
|
|
1179
|
+
}
|
|
1180
|
+
if (ra_hasreg(dest)) {
|
|
1181
|
+
emit_lso(as, irt_isnum(t) ? A64I_LDRd :
|
|
1182
|
+
(irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
|
|
1183
|
+
ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
|
|
1184
|
+
}
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
/* -- Allocations --------------------------------------------------------- */
|
|
1188
|
+
|
|
1189
|
+
#if LJ_HASFFI
|
|
1190
|
+
static void asm_cnew(ASMState *as, IRIns *ir)
|
|
1191
|
+
{
|
|
1192
|
+
CTState *cts = ctype_ctsG(J2G(as->J));
|
|
1193
|
+
CTypeID id = (CTypeID)IR(ir->op1)->i;
|
|
1194
|
+
CTSize sz;
|
|
1195
|
+
CTInfo info = lj_ctype_info(cts, id, &sz);
|
|
1196
|
+
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
|
|
1197
|
+
IRRef args[4];
|
|
1198
|
+
RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
|
|
1199
|
+
lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
|
|
1200
|
+
|
|
1201
|
+
as->gcsteps++;
|
|
1202
|
+
asm_setupresult(as, ir, ci); /* GCcdata * */
|
|
1203
|
+
/* Initialize immutable cdata object. */
|
|
1204
|
+
if (ir->o == IR_CNEWI) {
|
|
1205
|
+
int32_t ofs = sizeof(GCcdata);
|
|
1206
|
+
Reg r = ra_alloc1(as, ir->op2, allow);
|
|
1207
|
+
lua_assert(sz == 4 || sz == 8);
|
|
1208
|
+
emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
|
|
1209
|
+
} else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
|
|
1210
|
+
ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
|
|
1211
|
+
args[0] = ASMREF_L; /* lua_State *L */
|
|
1212
|
+
args[1] = ir->op1; /* CTypeID id */
|
|
1213
|
+
args[2] = ir->op2; /* CTSize sz */
|
|
1214
|
+
args[3] = ASMREF_TMP1; /* CTSize align */
|
|
1215
|
+
asm_gencall(as, ci, args);
|
|
1216
|
+
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
|
|
1217
|
+
return;
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
/* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
|
|
1221
|
+
{
|
|
1222
|
+
Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
|
|
1223
|
+
emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
|
|
1224
|
+
emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
|
|
1225
|
+
emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
|
|
1226
|
+
if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1);
|
|
1227
|
+
}
|
|
1228
|
+
args[0] = ASMREF_L; /* lua_State *L */
|
|
1229
|
+
args[1] = ASMREF_TMP1; /* MSize size */
|
|
1230
|
+
asm_gencall(as, ci, args);
|
|
1231
|
+
ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
|
|
1232
|
+
ra_releasetmp(as, ASMREF_TMP1));
|
|
1233
|
+
}
|
|
1234
|
+
#else
|
|
1235
|
+
#define asm_cnew(as, ir) ((void)0)
|
|
1236
|
+
#endif
|
|
1237
|
+
|
|
1238
|
+
/* -- Write barriers ------------------------------------------------------ */
|
|
1239
|
+
|
|
1240
|
+
static void asm_tbar(ASMState *as, IRIns *ir)
|
|
1241
|
+
{
|
|
1242
|
+
Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
1243
|
+
Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
|
|
1244
|
+
Reg gr = ra_allock(as, i64ptr(J2G(as->J)),
|
|
1245
|
+
rset_exclude(rset_exclude(RSET_GPR, tab), link));
|
|
1246
|
+
Reg mark = RID_TMP;
|
|
1247
|
+
MCLabel l_end = emit_label(as);
|
|
1248
|
+
emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
|
|
1249
|
+
emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
|
|
1250
|
+
emit_lso(as, A64I_STRx, tab, gr,
|
|
1251
|
+
(int32_t)offsetof(global_State, gc.grayagain));
|
|
1252
|
+
emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
|
|
1253
|
+
emit_lso(as, A64I_LDRx, link, gr,
|
|
1254
|
+
(int32_t)offsetof(global_State, gc.grayagain));
|
|
1255
|
+
emit_cond_branch(as, CC_EQ, l_end);
|
|
1256
|
+
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
|
|
1257
|
+
emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
static void asm_obar(ASMState *as, IRIns *ir)
|
|
1261
|
+
{
|
|
1262
|
+
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
|
|
1263
|
+
IRRef args[2];
|
|
1264
|
+
MCLabel l_end;
|
|
1265
|
+
RegSet allow = RSET_GPR;
|
|
1266
|
+
Reg obj, val, tmp;
|
|
1267
|
+
/* No need for other object barriers (yet). */
|
|
1268
|
+
lua_assert(IR(ir->op1)->o == IR_UREFC);
|
|
1269
|
+
ra_evictset(as, RSET_SCRATCH);
|
|
1270
|
+
l_end = emit_label(as);
|
|
1271
|
+
args[0] = ASMREF_TMP1; /* global_State *g */
|
|
1272
|
+
args[1] = ir->op1; /* TValue *tv */
|
|
1273
|
+
asm_gencall(as, ci, args);
|
|
1274
|
+
ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) );
|
|
1275
|
+
obj = IR(ir->op1)->r;
|
|
1276
|
+
tmp = ra_scratch(as, rset_exclude(allow, obj));
|
|
1277
|
+
emit_cond_branch(as, CC_EQ, l_end);
|
|
1278
|
+
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
|
|
1279
|
+
emit_cond_branch(as, CC_EQ, l_end);
|
|
1280
|
+
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
|
|
1281
|
+
val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
|
|
1282
|
+
emit_lso(as, A64I_LDRB, tmp, obj,
|
|
1283
|
+
(int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
|
|
1284
|
+
emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
/* -- Arithmetic and logic operations ------------------------------------- */
|
|
1288
|
+
|
|
1289
|
+
static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai)
|
|
1290
|
+
{
|
|
1291
|
+
Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
1292
|
+
Reg right, left = ra_alloc2(as, ir, RSET_FPR);
|
|
1293
|
+
right = (left >> 8); left &= 255;
|
|
1294
|
+
emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31));
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai)
|
|
1298
|
+
{
|
|
1299
|
+
Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
1300
|
+
Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
|
|
1301
|
+
emit_dn(as, ai, (dest & 31), (left & 31));
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
static void asm_fpmath(ASMState *as, IRIns *ir)
|
|
1305
|
+
{
|
|
1306
|
+
IRFPMathOp fpm = (IRFPMathOp)ir->op2;
|
|
1307
|
+
if (fpm == IRFPM_SQRT) {
|
|
1308
|
+
asm_fpunary(as, ir, A64I_FSQRTd);
|
|
1309
|
+
} else if (fpm <= IRFPM_TRUNC) {
|
|
1310
|
+
asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
|
|
1311
|
+
fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
|
|
1312
|
+
} else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
|
|
1313
|
+
return;
|
|
1314
|
+
} else {
|
|
1315
|
+
asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
|
|
1320
|
+
{
|
|
1321
|
+
IRIns *ir;
|
|
1322
|
+
if (irref_isk(rref))
|
|
1323
|
+
return 0; /* Don't swap constants to the left. */
|
|
1324
|
+
if (irref_isk(lref))
|
|
1325
|
+
return 1; /* But swap constants to the right. */
|
|
1326
|
+
ir = IR(rref);
|
|
1327
|
+
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
|
|
1328
|
+
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
|
|
1329
|
+
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
|
|
1330
|
+
return 0; /* Don't swap fusable operands to the left. */
|
|
1331
|
+
ir = IR(lref);
|
|
1332
|
+
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
|
|
1333
|
+
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
|
|
1334
|
+
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
|
|
1335
|
+
return 1; /* But swap fusable operands to the right. */
|
|
1336
|
+
return 0; /* Otherwise don't swap. */
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai)
|
|
1340
|
+
{
|
|
1341
|
+
IRRef lref = ir->op1, rref = ir->op2;
|
|
1342
|
+
Reg left, dest = ra_dest(as, ir, RSET_GPR);
|
|
1343
|
+
uint32_t m;
|
|
1344
|
+
if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) {
|
|
1345
|
+
IRRef tmp = lref; lref = rref; rref = tmp;
|
|
1346
|
+
}
|
|
1347
|
+
left = ra_hintalloc(as, lref, dest, RSET_GPR);
|
|
1348
|
+
if (irt_is64(ir->t)) ai |= A64I_X;
|
|
1349
|
+
m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
|
|
1350
|
+
if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */
|
|
1351
|
+
asm_guardcc(as, CC_VS);
|
|
1352
|
+
ai |= A64I_S;
|
|
1353
|
+
}
|
|
1354
|
+
emit_dn(as, ai^m, dest, left);
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)
|
|
1358
|
+
{
|
|
1359
|
+
if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
|
|
1360
|
+
as->flagmcp = NULL;
|
|
1361
|
+
as->mcp++;
|
|
1362
|
+
ai |= A64I_S;
|
|
1363
|
+
}
|
|
1364
|
+
asm_intop(as, ir, ai);
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
static void asm_intneg(ASMState *as, IRIns *ir)
|
|
1368
|
+
{
|
|
1369
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
1370
|
+
Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
1371
|
+
emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left);
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
|
|
1375
|
+
static void asm_intmul(ASMState *as, IRIns *ir)
|
|
1376
|
+
{
|
|
1377
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
1378
|
+
Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
|
|
1379
|
+
Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
1380
|
+
if (irt_isguard(ir->t)) { /* IR_MULOV */
|
|
1381
|
+
asm_guardcc(as, CC_NE);
|
|
1382
|
+
emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
|
|
1383
|
+
emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
|
|
1384
|
+
emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
|
|
1385
|
+
emit_dnm(as, A64I_SMULL, dest, right, left);
|
|
1386
|
+
} else {
|
|
1387
|
+
emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
static void asm_add(ASMState *as, IRIns *ir)
|
|
1392
|
+
{
|
|
1393
|
+
if (irt_isnum(ir->t)) {
|
|
1394
|
+
if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
|
|
1395
|
+
asm_fparith(as, ir, A64I_FADDd);
|
|
1396
|
+
return;
|
|
1397
|
+
}
|
|
1398
|
+
asm_intop_s(as, ir, A64I_ADDw);
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
static void asm_sub(ASMState *as, IRIns *ir)
|
|
1402
|
+
{
|
|
1403
|
+
if (irt_isnum(ir->t)) {
|
|
1404
|
+
if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
|
|
1405
|
+
asm_fparith(as, ir, A64I_FSUBd);
|
|
1406
|
+
return;
|
|
1407
|
+
}
|
|
1408
|
+
asm_intop_s(as, ir, A64I_SUBw);
|
|
1409
|
+
}
|
|
1410
|
+
|
|
1411
|
+
static void asm_mul(ASMState *as, IRIns *ir)
|
|
1412
|
+
{
|
|
1413
|
+
if (irt_isnum(ir->t)) {
|
|
1414
|
+
asm_fparith(as, ir, A64I_FMULd);
|
|
1415
|
+
return;
|
|
1416
|
+
}
|
|
1417
|
+
asm_intmul(as, ir);
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
static void asm_div(ASMState *as, IRIns *ir)
|
|
1421
|
+
{
|
|
1422
|
+
#if LJ_HASFFI
|
|
1423
|
+
if (!irt_isnum(ir->t))
|
|
1424
|
+
asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
|
|
1425
|
+
IRCALL_lj_carith_divu64);
|
|
1426
|
+
else
|
|
1427
|
+
#endif
|
|
1428
|
+
asm_fparith(as, ir, A64I_FDIVd);
|
|
1429
|
+
}
|
|
1430
|
+
|
|
1431
|
+
static void asm_pow(ASMState *as, IRIns *ir)
|
|
1432
|
+
{
|
|
1433
|
+
#if LJ_HASFFI
|
|
1434
|
+
if (!irt_isnum(ir->t))
|
|
1435
|
+
asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
|
|
1436
|
+
IRCALL_lj_carith_powu64);
|
|
1437
|
+
else
|
|
1438
|
+
#endif
|
|
1439
|
+
asm_callid(as, ir, IRCALL_lj_vm_powi);
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
#define asm_addov(as, ir) asm_add(as, ir)
|
|
1443
|
+
#define asm_subov(as, ir) asm_sub(as, ir)
|
|
1444
|
+
#define asm_mulov(as, ir) asm_mul(as, ir)
|
|
1445
|
+
|
|
1446
|
+
#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS)
|
|
1447
|
+
#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
|
|
1448
|
+
#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
|
|
1449
|
+
|
|
1450
|
+
static void asm_mod(ASMState *as, IRIns *ir)
|
|
1451
|
+
{
|
|
1452
|
+
#if LJ_HASFFI
|
|
1453
|
+
if (!irt_isint(ir->t))
|
|
1454
|
+
asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
|
|
1455
|
+
IRCALL_lj_carith_modu64);
|
|
1456
|
+
else
|
|
1457
|
+
#endif
|
|
1458
|
+
asm_callid(as, ir, IRCALL_lj_vm_modi);
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
static void asm_neg(ASMState *as, IRIns *ir)
|
|
1462
|
+
{
|
|
1463
|
+
if (irt_isnum(ir->t)) {
|
|
1464
|
+
asm_fpunary(as, ir, A64I_FNEGd);
|
|
1465
|
+
return;
|
|
1466
|
+
}
|
|
1467
|
+
asm_intneg(as, ir);
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1470
|
+
static void asm_band(ASMState *as, IRIns *ir)
|
|
1471
|
+
{
|
|
1472
|
+
A64Ins ai = A64I_ANDw;
|
|
1473
|
+
if (asm_fuseandshift(as, ir))
|
|
1474
|
+
return;
|
|
1475
|
+
if (as->flagmcp == as->mcp) {
|
|
1476
|
+
/* Try to drop cmp r, #0. */
|
|
1477
|
+
as->flagmcp = NULL;
|
|
1478
|
+
as->mcp++;
|
|
1479
|
+
ai = A64I_ANDSw;
|
|
1480
|
+
}
|
|
1481
|
+
asm_intop(as, ir, ai);
|
|
1482
|
+
}
|
|
1483
|
+
|
|
1484
|
+
static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai)
|
|
1485
|
+
{
|
|
1486
|
+
IRRef lref = ir->op1, rref = ir->op2;
|
|
1487
|
+
IRIns *irl = IR(lref), *irr = IR(rref);
|
|
1488
|
+
if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) ||
|
|
1489
|
+
(canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) {
|
|
1490
|
+
Reg left, dest = ra_dest(as, ir, RSET_GPR);
|
|
1491
|
+
uint32_t m;
|
|
1492
|
+
if (irl->o == IR_BNOT) {
|
|
1493
|
+
IRRef tmp = lref; lref = rref; rref = tmp;
|
|
1494
|
+
}
|
|
1495
|
+
left = ra_alloc1(as, lref, RSET_GPR);
|
|
1496
|
+
ai |= A64I_ON;
|
|
1497
|
+
if (irt_is64(ir->t)) ai |= A64I_X;
|
|
1498
|
+
m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left));
|
|
1499
|
+
emit_dn(as, ai^m, dest, left);
|
|
1500
|
+
} else {
|
|
1501
|
+
asm_intop(as, ir, ai);
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1504
|
+
|
|
1505
|
+
static void asm_bor(ASMState *as, IRIns *ir)
|
|
1506
|
+
{
|
|
1507
|
+
if (asm_fuseorshift(as, ir))
|
|
1508
|
+
return;
|
|
1509
|
+
asm_borbxor(as, ir, A64I_ORRw);
|
|
1510
|
+
}
|
|
1511
|
+
|
|
1512
|
+
#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw)
|
|
1513
|
+
|
|
1514
|
+
static void asm_bnot(ASMState *as, IRIns *ir)
|
|
1515
|
+
{
|
|
1516
|
+
A64Ins ai = A64I_MVNw;
|
|
1517
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
1518
|
+
uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
|
|
1519
|
+
if (irt_is64(ir->t)) ai |= A64I_X;
|
|
1520
|
+
emit_d(as, ai^m, dest);
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
static void asm_bswap(ASMState *as, IRIns *ir)
|
|
1524
|
+
{
|
|
1525
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
1526
|
+
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
1527
|
+
emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left);
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh)
|
|
1531
|
+
{
|
|
1532
|
+
int32_t shmask = irt_is64(ir->t) ? 63 : 31;
|
|
1533
|
+
if (irref_isk(ir->op2)) { /* Constant shifts. */
|
|
1534
|
+
Reg left, dest = ra_dest(as, ir, RSET_GPR);
|
|
1535
|
+
int32_t shift = (IR(ir->op2)->i & shmask);
|
|
1536
|
+
IRIns *irl = IR(ir->op1);
|
|
1537
|
+
if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw;
|
|
1538
|
+
|
|
1539
|
+
/* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
|
|
1540
|
+
if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) {
|
|
1541
|
+
if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
|
|
1542
|
+
int32_t shift2 = (IR(irl->op2)->i & shmask);
|
|
1543
|
+
shift = ((shift - shift2) & shmask);
|
|
1544
|
+
shmask -= shift2;
|
|
1545
|
+
ir = irl;
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1549
|
+
left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
1550
|
+
switch (sh) {
|
|
1551
|
+
case A64SH_LSL:
|
|
1552
|
+
emit_dn(as, ai | A64F_IMMS(shmask-shift) |
|
|
1553
|
+
A64F_IMMR((shmask-shift+1)&shmask), dest, left);
|
|
1554
|
+
break;
|
|
1555
|
+
case A64SH_LSR: case A64SH_ASR:
|
|
1556
|
+
emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left);
|
|
1557
|
+
break;
|
|
1558
|
+
case A64SH_ROR:
|
|
1559
|
+
emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left);
|
|
1560
|
+
break;
|
|
1561
|
+
}
|
|
1562
|
+
} else { /* Variable-length shifts. */
|
|
1563
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
1564
|
+
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
1565
|
+
Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
1566
|
+
emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right);
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
|
|
1570
|
+
#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
|
|
1571
|
+
#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
|
|
1572
|
+
#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
|
|
1573
|
+
#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
|
|
1574
|
+
#define asm_brol(as, ir) lua_assert(0)
|
|
1575
|
+
|
|
1576
|
+
static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc)
|
|
1577
|
+
{
|
|
1578
|
+
Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
1579
|
+
Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
1580
|
+
Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
1581
|
+
emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right);
|
|
1582
|
+
emit_nm(as, A64I_CMPw, left, right);
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1585
|
+
static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc)
|
|
1586
|
+
{
|
|
1587
|
+
Reg dest = (ra_dest(as, ir, RSET_FPR) & 31);
|
|
1588
|
+
Reg right, left = ra_alloc2(as, ir, RSET_FPR);
|
|
1589
|
+
right = ((left >> 8) & 31); left &= 31;
|
|
1590
|
+
emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right);
|
|
1591
|
+
emit_nm(as, A64I_FCMPd, left, right);
|
|
1592
|
+
}
|
|
1593
|
+
|
|
1594
|
+
static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc)
|
|
1595
|
+
{
|
|
1596
|
+
if (irt_isnum(ir->t))
|
|
1597
|
+
asm_fpmin_max(as, ir, fcc);
|
|
1598
|
+
else
|
|
1599
|
+
asm_intmin_max(as, ir, cc);
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
|
|
1603
|
+
#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
|
|
1604
|
+
|
|
1605
|
+
/* -- Comparisons --------------------------------------------------------- */
|
|
1606
|
+
|
|
1607
|
+
/* Map of comparisons to flags. ORDER IR. */
|
|
1608
|
+
static const uint8_t asm_compmap[IR_ABC+1] = {
|
|
1609
|
+
/* op FP swp int cc FP cc */
|
|
1610
|
+
/* LT */ CC_GE + (CC_HS << 4),
|
|
1611
|
+
/* GE x */ CC_LT + (CC_HI << 4),
|
|
1612
|
+
/* LE */ CC_GT + (CC_HI << 4),
|
|
1613
|
+
/* GT x */ CC_LE + (CC_HS << 4),
|
|
1614
|
+
/* ULT x */ CC_HS + (CC_LS << 4),
|
|
1615
|
+
/* UGE */ CC_LO + (CC_LO << 4),
|
|
1616
|
+
/* ULE x */ CC_HI + (CC_LO << 4),
|
|
1617
|
+
/* UGT */ CC_LS + (CC_LS << 4),
|
|
1618
|
+
/* EQ */ CC_NE + (CC_NE << 4),
|
|
1619
|
+
/* NE */ CC_EQ + (CC_EQ << 4),
|
|
1620
|
+
/* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */
|
|
1621
|
+
};
|
|
1622
|
+
|
|
1623
|
+
/* FP comparisons. */
|
|
1624
|
+
static void asm_fpcomp(ASMState *as, IRIns *ir)
|
|
1625
|
+
{
|
|
1626
|
+
Reg left, right;
|
|
1627
|
+
A64Ins ai;
|
|
1628
|
+
int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
|
|
1629
|
+
if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
|
|
1630
|
+
left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31);
|
|
1631
|
+
right = 0;
|
|
1632
|
+
ai = A64I_FCMPZd;
|
|
1633
|
+
} else {
|
|
1634
|
+
left = ra_alloc2(as, ir, RSET_FPR);
|
|
1635
|
+
if (swp) {
|
|
1636
|
+
right = (left & 31); left = ((left >> 8) & 31);
|
|
1637
|
+
} else {
|
|
1638
|
+
right = ((left >> 8) & 31); left &= 31;
|
|
1639
|
+
}
|
|
1640
|
+
ai = A64I_FCMPd;
|
|
1641
|
+
}
|
|
1642
|
+
asm_guardcc(as, (asm_compmap[ir->o] >> 4));
|
|
1643
|
+
emit_nm(as, ai, left, right);
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
/* Integer comparisons. */
|
|
1647
|
+
static void asm_intcomp(ASMState *as, IRIns *ir)
|
|
1648
|
+
{
|
|
1649
|
+
A64CC oldcc, cc = (asm_compmap[ir->o] & 15);
|
|
1650
|
+
A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw;
|
|
1651
|
+
IRRef lref = ir->op1, rref = ir->op2;
|
|
1652
|
+
Reg left;
|
|
1653
|
+
uint32_t m;
|
|
1654
|
+
int cmpprev0 = 0;
|
|
1655
|
+
lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
|
|
1656
|
+
irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
|
|
1657
|
+
if (asm_swapops(as, lref, rref)) {
|
|
1658
|
+
IRRef tmp = lref; lref = rref; rref = tmp;
|
|
1659
|
+
if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
|
|
1660
|
+
else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */
|
|
1661
|
+
}
|
|
1662
|
+
oldcc = cc;
|
|
1663
|
+
if (irref_isk(rref) && get_k64val(IR(rref)) == 0) {
|
|
1664
|
+
IRIns *irl = IR(lref);
|
|
1665
|
+
if (cc == CC_GE) cc = CC_PL;
|
|
1666
|
+
else if (cc == CC_LT) cc = CC_MI;
|
|
1667
|
+
else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */
|
|
1668
|
+
cmpprev0 = (irl+1 == ir);
|
|
1669
|
+
/* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
|
|
1670
|
+
if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
|
|
1671
|
+
IRRef blref = irl->op1, brref = irl->op2;
|
|
1672
|
+
uint32_t m2 = 0;
|
|
1673
|
+
Reg bleft;
|
|
1674
|
+
if (asm_swapops(as, blref, brref)) {
|
|
1675
|
+
Reg tmp = blref; blref = brref; brref = tmp;
|
|
1676
|
+
}
|
|
1677
|
+
if (irref_isk(brref)) {
|
|
1678
|
+
uint64_t k = get_k64val(IR(brref));
|
|
1679
|
+
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
|
|
1680
|
+
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
|
|
1681
|
+
ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
|
|
1682
|
+
return;
|
|
1683
|
+
}
|
|
1684
|
+
m2 = emit_isk13(k, irt_is64(irl->t));
|
|
1685
|
+
}
|
|
1686
|
+
bleft = ra_alloc1(as, blref, RSET_GPR);
|
|
1687
|
+
ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
|
|
1688
|
+
if (!m2)
|
|
1689
|
+
m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
|
|
1690
|
+
asm_guardcc(as, cc);
|
|
1691
|
+
emit_n(as, ai^m2, bleft);
|
|
1692
|
+
return;
|
|
1693
|
+
}
|
|
1694
|
+
if (cc == CC_EQ || cc == CC_NE) {
|
|
1695
|
+
/* Combine cmp-bcc into cbz/cbnz. */
|
|
1696
|
+
ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ;
|
|
1697
|
+
if (irt_is64(ir->t)) ai |= A64I_X;
|
|
1698
|
+
asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR));
|
|
1699
|
+
return;
|
|
1700
|
+
}
|
|
1701
|
+
}
|
|
1702
|
+
nocombine:
|
|
1703
|
+
left = ra_alloc1(as, lref, RSET_GPR);
|
|
1704
|
+
m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
|
|
1705
|
+
asm_guardcc(as, cc);
|
|
1706
|
+
emit_n(as, ai^m, left);
|
|
1707
|
+
/* Signed comparison with zero and referencing previous ins? */
|
|
1708
|
+
if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE))
|
|
1709
|
+
as->flagmcp = as->mcp; /* Allow elimination of the compare. */
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
static void asm_comp(ASMState *as, IRIns *ir)
|
|
1713
|
+
{
|
|
1714
|
+
if (irt_isnum(ir->t))
|
|
1715
|
+
asm_fpcomp(as, ir);
|
|
1716
|
+
else
|
|
1717
|
+
asm_intcomp(as, ir);
|
|
1718
|
+
}
|
|
1719
|
+
|
|
1720
|
+
#define asm_equal(as, ir) asm_comp(as, ir)
|
|
1721
|
+
|
|
1722
|
+
/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
|
|
1723
|
+
|
|
1724
|
+
/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
|
|
1725
|
+
static void asm_hiop(ASMState *as, IRIns *ir)
|
|
1726
|
+
{
|
|
1727
|
+
UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */
|
|
1728
|
+
}
|
|
1729
|
+
|
|
1730
|
+
/* -- Profiling ----------------------------------------------------------- */
|
|
1731
|
+
|
|
1732
|
+
static void asm_prof(ASMState *as, IRIns *ir)
|
|
1733
|
+
{
|
|
1734
|
+
uint32_t k = emit_isk13(HOOK_PROFILE, 0);
|
|
1735
|
+
lua_assert(k != 0);
|
|
1736
|
+
UNUSED(ir);
|
|
1737
|
+
asm_guardcc(as, CC_NE);
|
|
1738
|
+
emit_n(as, A64I_TSTw^k, RID_TMP);
|
|
1739
|
+
emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
|
|
1740
|
+
}
|
|
1741
|
+
|
|
1742
|
+
/* -- Stack handling ------------------------------------------------------ */
|
|
1743
|
+
|
|
1744
|
+
/* Check Lua stack size for overflow. Use exit handler as fallback. */
|
|
1745
|
+
static void asm_stack_check(ASMState *as, BCReg topslot,
|
|
1746
|
+
IRIns *irp, RegSet allow, ExitNo exitno)
|
|
1747
|
+
{
|
|
1748
|
+
Reg pbase;
|
|
1749
|
+
uint32_t k;
|
|
1750
|
+
if (irp) {
|
|
1751
|
+
if (!ra_hasspill(irp->s)) {
|
|
1752
|
+
pbase = irp->r;
|
|
1753
|
+
lua_assert(ra_hasreg(pbase));
|
|
1754
|
+
} else if (allow) {
|
|
1755
|
+
pbase = rset_pickbot(allow);
|
|
1756
|
+
} else {
|
|
1757
|
+
pbase = RID_RET;
|
|
1758
|
+
emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
|
|
1759
|
+
}
|
|
1760
|
+
} else {
|
|
1761
|
+
pbase = RID_BASE;
|
|
1762
|
+
}
|
|
1763
|
+
emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
|
|
1764
|
+
k = emit_isk12((8*topslot));
|
|
1765
|
+
lua_assert(k);
|
|
1766
|
+
emit_n(as, A64I_CMPx^k, RID_TMP);
|
|
1767
|
+
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
|
|
1768
|
+
emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
|
|
1769
|
+
(int32_t)offsetof(lua_State, maxstack));
|
|
1770
|
+
if (irp) { /* Must not spill arbitrary registers in head of side trace. */
|
|
1771
|
+
if (ra_hasspill(irp->s))
|
|
1772
|
+
emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
|
|
1773
|
+
emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
|
|
1774
|
+
if (ra_hasspill(irp->s) && !allow)
|
|
1775
|
+
emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
|
|
1776
|
+
} else {
|
|
1777
|
+
emit_getgl(as, RID_TMP, cur_L);
|
|
1778
|
+
}
|
|
1779
|
+
}
|
|
1780
|
+
|
|
1781
|
+
/* Restore Lua stack from on-trace state. */
|
|
1782
|
+
static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
|
1783
|
+
{
|
|
1784
|
+
SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
|
1785
|
+
#ifdef LUA_USE_ASSERT
|
|
1786
|
+
SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
|
|
1787
|
+
#endif
|
|
1788
|
+
MSize n, nent = snap->nent;
|
|
1789
|
+
/* Store the value of all modified slots to the Lua stack. */
|
|
1790
|
+
for (n = 0; n < nent; n++) {
|
|
1791
|
+
SnapEntry sn = map[n];
|
|
1792
|
+
BCReg s = snap_slot(sn);
|
|
1793
|
+
int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
|
|
1794
|
+
IRRef ref = snap_ref(sn);
|
|
1795
|
+
IRIns *ir = IR(ref);
|
|
1796
|
+
if ((sn & SNAP_NORESTORE))
|
|
1797
|
+
continue;
|
|
1798
|
+
if (irt_isnum(ir->t)) {
|
|
1799
|
+
Reg src = ra_alloc1(as, ref, RSET_FPR);
|
|
1800
|
+
emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
|
|
1801
|
+
} else {
|
|
1802
|
+
asm_tvstore64(as, RID_BASE, ofs, ref);
|
|
1803
|
+
}
|
|
1804
|
+
checkmclim(as);
|
|
1805
|
+
}
|
|
1806
|
+
lua_assert(map + nent == flinks);
|
|
1807
|
+
}
|
|
1808
|
+
|
|
1809
|
+
/* -- GC handling --------------------------------------------------------- */
|
|
1810
|
+
|
|
1811
|
+
/* Check GC threshold and do one or more GC steps. */
|
|
1812
|
+
static void asm_gc_check(ASMState *as)
|
|
1813
|
+
{
|
|
1814
|
+
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
|
|
1815
|
+
IRRef args[2];
|
|
1816
|
+
MCLabel l_end;
|
|
1817
|
+
Reg tmp1, tmp2;
|
|
1818
|
+
ra_evictset(as, RSET_SCRATCH);
|
|
1819
|
+
l_end = emit_label(as);
|
|
1820
|
+
/* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
|
|
1821
|
+
asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
|
|
1822
|
+
args[0] = ASMREF_TMP1; /* global_State *g */
|
|
1823
|
+
args[1] = ASMREF_TMP2; /* MSize steps */
|
|
1824
|
+
asm_gencall(as, ci, args);
|
|
1825
|
+
tmp1 = ra_releasetmp(as, ASMREF_TMP1);
|
|
1826
|
+
tmp2 = ra_releasetmp(as, ASMREF_TMP2);
|
|
1827
|
+
emit_loadi(as, tmp2, as->gcsteps);
|
|
1828
|
+
/* Jump around GC step if GC total < GC threshold. */
|
|
1829
|
+
emit_cond_branch(as, CC_LS, l_end);
|
|
1830
|
+
emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
|
|
1831
|
+
emit_lso(as, A64I_LDRx, tmp2, tmp1,
|
|
1832
|
+
(int32_t)offsetof(global_State, gc.threshold));
|
|
1833
|
+
emit_lso(as, A64I_LDRx, RID_TMP, tmp1,
|
|
1834
|
+
(int32_t)offsetof(global_State, gc.total));
|
|
1835
|
+
ra_allockreg(as, i64ptr(J2G(as->J)), tmp1);
|
|
1836
|
+
as->gcsteps = 0;
|
|
1837
|
+
checkmclim(as);
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
/* -- Loop handling ------------------------------------------------------- */
|
|
1841
|
+
|
|
1842
|
+
/* Fixup the loop branch. */
|
|
1843
|
+
static void asm_loop_fixup(ASMState *as)
|
|
1844
|
+
{
|
|
1845
|
+
MCode *p = as->mctop;
|
|
1846
|
+
MCode *target = as->mcp;
|
|
1847
|
+
if (as->loopinv) { /* Inverted loop branch? */
|
|
1848
|
+
uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu;
|
|
1849
|
+
ptrdiff_t delta = target - (p - 2);
|
|
1850
|
+
/* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
|
|
1851
|
+
p[-2] |= ((uint32_t)delta & mask) << 5;
|
|
1852
|
+
} else {
|
|
1853
|
+
ptrdiff_t delta = target - (p - 1);
|
|
1854
|
+
p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu);
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1857
|
+
|
|
1858
|
+
/* -- Head of trace ------------------------------------------------------- */
|
|
1859
|
+
|
|
1860
|
+
/* Reload L register from g->cur_L. */
|
|
1861
|
+
static void asm_head_lreg(ASMState *as)
|
|
1862
|
+
{
|
|
1863
|
+
IRIns *ir = IR(ASMREF_L);
|
|
1864
|
+
if (ra_used(ir)) {
|
|
1865
|
+
Reg r = ra_dest(as, ir, RSET_GPR);
|
|
1866
|
+
emit_getgl(as, r, cur_L);
|
|
1867
|
+
ra_evictk(as);
|
|
1868
|
+
}
|
|
1869
|
+
}
|
|
1870
|
+
|
|
1871
|
+
/* Coalesce BASE register for a root trace. */
|
|
1872
|
+
static void asm_head_root_base(ASMState *as)
|
|
1873
|
+
{
|
|
1874
|
+
IRIns *ir;
|
|
1875
|
+
asm_head_lreg(as);
|
|
1876
|
+
ir = IR(REF_BASE);
|
|
1877
|
+
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
|
|
1878
|
+
ra_spill(as, ir);
|
|
1879
|
+
ra_destreg(as, ir, RID_BASE);
|
|
1880
|
+
}
|
|
1881
|
+
|
|
1882
|
+
/* Coalesce BASE register for a side trace. */
|
|
1883
|
+
static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
|
|
1884
|
+
{
|
|
1885
|
+
IRIns *ir;
|
|
1886
|
+
asm_head_lreg(as);
|
|
1887
|
+
ir = IR(REF_BASE);
|
|
1888
|
+
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
|
|
1889
|
+
ra_spill(as, ir);
|
|
1890
|
+
if (ra_hasspill(irp->s)) {
|
|
1891
|
+
rset_clear(allow, ra_dest(as, ir, allow));
|
|
1892
|
+
} else {
|
|
1893
|
+
Reg r = irp->r;
|
|
1894
|
+
lua_assert(ra_hasreg(r));
|
|
1895
|
+
rset_clear(allow, r);
|
|
1896
|
+
if (r != ir->r && !rset_test(as->freeset, r))
|
|
1897
|
+
ra_restore(as, regcost_ref(as->cost[r]));
|
|
1898
|
+
ra_destreg(as, ir, r);
|
|
1899
|
+
}
|
|
1900
|
+
return allow;
|
|
1901
|
+
}
|
|
1902
|
+
|
|
1903
|
+
/* -- Tail of trace ------------------------------------------------------- */
|
|
1904
|
+
|
|
1905
|
+
/* Fixup the tail code. */
|
|
1906
|
+
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
|
|
1907
|
+
{
|
|
1908
|
+
MCode *p = as->mctop;
|
|
1909
|
+
MCode *target;
|
|
1910
|
+
/* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
|
|
1911
|
+
int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
|
|
1912
|
+
if (spadj == 0) {
|
|
1913
|
+
*--p = A64I_LE(A64I_NOP);
|
|
1914
|
+
as->mctop = p;
|
|
1915
|
+
} else {
|
|
1916
|
+
/* Patch stack adjustment. */
|
|
1917
|
+
uint32_t k = emit_isk12(spadj);
|
|
1918
|
+
lua_assert(k);
|
|
1919
|
+
p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
|
|
1920
|
+
}
|
|
1921
|
+
/* Patch exit branch. */
|
|
1922
|
+
target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
|
|
1923
|
+
p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu);
|
|
1924
|
+
}
|
|
1925
|
+
|
|
1926
|
+
/* Prepare tail of code. */
|
|
1927
|
+
static void asm_tail_prep(ASMState *as)
|
|
1928
|
+
{
|
|
1929
|
+
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
|
|
1930
|
+
if (as->loopref) {
|
|
1931
|
+
as->invmcp = as->mcp = p;
|
|
1932
|
+
} else {
|
|
1933
|
+
as->mcp = p-1; /* Leave room for stack pointer adjustment. */
|
|
1934
|
+
as->invmcp = NULL;
|
|
1935
|
+
}
|
|
1936
|
+
*p = 0; /* Prevent load/store merging. */
|
|
1937
|
+
}
|
|
1938
|
+
|
|
1939
|
+
/* -- Trace setup --------------------------------------------------------- */
|
|
1940
|
+
|
|
1941
|
+
/* Ensure there are enough stack slots for call arguments. */
|
|
1942
|
+
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
|
1943
|
+
{
|
|
1944
|
+
IRRef args[CCI_NARGS_MAX*2];
|
|
1945
|
+
uint32_t i, nargs = CCI_XNARGS(ci);
|
|
1946
|
+
int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
|
|
1947
|
+
asm_collectargs(as, ir, ci, args);
|
|
1948
|
+
for (i = 0; i < nargs; i++) {
|
|
1949
|
+
if (args[i] && irt_isfp(IR(args[i])->t)) {
|
|
1950
|
+
if (nfpr > 0) nfpr--; else nslots += 2;
|
|
1951
|
+
} else {
|
|
1952
|
+
if (ngpr > 0) ngpr--; else nslots += 2;
|
|
1953
|
+
}
|
|
1954
|
+
}
|
|
1955
|
+
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
|
|
1956
|
+
as->evenspill = nslots;
|
|
1957
|
+
return REGSP_HINT(RID_RET);
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
static void asm_setup_target(ASMState *as)
|
|
1961
|
+
{
|
|
1962
|
+
/* May need extra exit for asm_stack_check on side traces. */
|
|
1963
|
+
asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
|
|
1964
|
+
}
|
|
1965
|
+
|
|
1966
|
+
#if LJ_BE
|
|
1967
|
+
/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
|
|
1968
|
+
static void asm_mcode_fixup(MCode *mcode, MSize size)
|
|
1969
|
+
{
|
|
1970
|
+
MCode *pe = (MCode *)((char *)mcode + size);
|
|
1971
|
+
while (mcode < pe) {
|
|
1972
|
+
MCode ins = *mcode;
|
|
1973
|
+
*mcode++ = lj_bswap(ins);
|
|
1974
|
+
}
|
|
1975
|
+
}
|
|
1976
|
+
#define LJ_TARGET_MCODE_FIXUP 1
|
|
1977
|
+
#endif
|
|
1978
|
+
|
|
1979
|
+
/* -- Trace patching ------------------------------------------------------ */
|
|
1980
|
+
|
|
1981
|
+
/* Patch exit jumps of existing machine code to a new target. */
|
|
1982
|
+
void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
|
|
1983
|
+
{
|
|
1984
|
+
MCode *p = T->mcode;
|
|
1985
|
+
MCode *pe = (MCode *)((char *)p + T->szmcode);
|
|
1986
|
+
MCode *cstart = NULL, *cend = p;
|
|
1987
|
+
MCode *mcarea = lj_mcode_patch(J, p, 0);
|
|
1988
|
+
MCode *px = exitstub_trace_addr(T, exitno);
|
|
1989
|
+
for (; p < pe; p++) {
|
|
1990
|
+
/* Look for exitstub branch, replace with branch to target. */
|
|
1991
|
+
MCode ins = A64I_LE(*p);
|
|
1992
|
+
if ((ins & 0xff000000u) == 0x54000000u &&
|
|
1993
|
+
((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
|
|
1994
|
+
/* Patch bcc exitstub. */
|
|
1995
|
+
*p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u));
|
|
1996
|
+
cend = p+1;
|
|
1997
|
+
if (!cstart) cstart = p;
|
|
1998
|
+
} else if ((ins & 0xfc000000u) == 0x14000000u &&
|
|
1999
|
+
((ins ^ (px-p)) & 0x03ffffffu) == 0) {
|
|
2000
|
+
/* Patch b exitstub. */
|
|
2001
|
+
*p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu));
|
|
2002
|
+
cend = p+1;
|
|
2003
|
+
if (!cstart) cstart = p;
|
|
2004
|
+
} else if ((ins & 0x7e000000u) == 0x34000000u &&
|
|
2005
|
+
((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
|
|
2006
|
+
/* Patch cbz/cbnz exitstub. */
|
|
2007
|
+
*p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u));
|
|
2008
|
+
cend = p+1;
|
|
2009
|
+
if (!cstart) cstart = p;
|
|
2010
|
+
} else if ((ins & 0x7e000000u) == 0x36000000u &&
|
|
2011
|
+
((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
|
|
2012
|
+
/* Patch tbz/tbnz exitstub. */
|
|
2013
|
+
*p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u));
|
|
2014
|
+
cend = p+1;
|
|
2015
|
+
if (!cstart) cstart = p;
|
|
2016
|
+
}
|
|
2017
|
+
}
|
|
2018
|
+
lua_assert(cstart != NULL);
|
|
2019
|
+
lj_mcode_sync(cstart, cend);
|
|
2020
|
+
lj_mcode_patch(J, mcarea, 1);
|
|
2021
|
+
}
|
|
2022
|
+
|