immunio 1.1.2 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/immunio/version.rb +1 -1
- data/lua-hooks/Makefile +56 -109
- data/lua-hooks/ext/all.c +3 -14
- data/lua-hooks/ext/libinjection/module.mk +5 -0
- data/lua-hooks/ext/lpeg/module.mk +6 -0
- data/lua-hooks/ext/lua-cmsgpack/module.mk +2 -0
- data/lua-hooks/ext/lua-snapshot/module.mk +2 -0
- data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
- data/lua-hooks/ext/luajit/Makefile +2 -2
- data/lua-hooks/ext/luajit/README +2 -2
- data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
- data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
- data/lua-hooks/ext/luajit/doc/changes.html +15 -2
- data/lua-hooks/ext/luajit/doc/contact.html +3 -3
- data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +4 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_jit.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
- data/lua-hooks/ext/luajit/doc/extensions.html +9 -2
- data/lua-hooks/ext/luajit/doc/faq.html +2 -2
- data/lua-hooks/ext/luajit/doc/install.html +22 -18
- data/lua-hooks/ext/luajit/doc/luajit.html +3 -3
- data/lua-hooks/ext/luajit/doc/running.html +2 -2
- data/lua-hooks/ext/luajit/doc/status.html +2 -2
- data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +4 -4
- data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +4 -4
- data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +4 -4
- data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +4 -4
- data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +3 -3
- data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +34 -7
- data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +427 -102
- data/lua-hooks/ext/luajit/dynasm/dynasm.lua +5 -5
- data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
- data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
- data/lua-hooks/ext/luajit/src/Makefile +36 -21
- data/lua-hooks/ext/luajit/src/Makefile.dep +3 -1
- data/lua-hooks/ext/luajit/src/host/buildvm.c +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +10 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +1 -1
- data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
- data/lua-hooks/ext/luajit/src/host/genminilua.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/bcsave.lua +2 -2
- data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +163 -73
- data/lua-hooks/ext/luajit/src/jit/dump.lua +2 -1
- data/lua-hooks/ext/luajit/src/jit/p.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/v.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
- data/lua-hooks/ext/luajit/src/lib_aux.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_base.c +4 -5
- data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_debug.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -5
- data/lua-hooks/ext/luajit/src/lib_init.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_io.c +2 -3
- data/lua-hooks/ext/luajit/src/lib_jit.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_math.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_os.c +2 -2
- data/lua-hooks/ext/luajit/src/lib_package.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_string.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_table.c +1 -1
- data/lua-hooks/ext/luajit/src/lj.supp +15 -0
- data/lua-hooks/ext/luajit/src/lj_alloc.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_api.c +4 -1
- data/lua-hooks/ext/luajit/src/lj_arch.h +33 -7
- data/lua-hooks/ext/luajit/src/lj_asm.c +12 -5
- data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_asm_arm.h +3 -13
- data/lua-hooks/ext/luajit/src/lj_asm_mips.h +337 -71
- data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2 -2
- data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2 -2
- data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_buf.c +2 -4
- data/lua-hooks/ext/luajit/src/lj_buf.h +1 -3
- data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ccall.c +37 -14
- data/lua-hooks/ext/luajit/src/lj_ccall.h +3 -3
- data/lua-hooks/ext/luajit/src/lj_ccallback.c +16 -7
- data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_cconv.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_cdata.c +10 -1
- data/lua-hooks/ext/luajit/src/lj_cdata.h +3 -1
- data/lua-hooks/ext/luajit/src/lj_clib.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_cparse.c +27 -6
- data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_crecord.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ctype.c +10 -8
- data/lua-hooks/ext/luajit/src/lj_ctype.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_def.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_dispatch.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_dispatch.h +21 -4
- data/lua-hooks/ext/luajit/src/lj_emit_arm.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_emit_mips.h +7 -5
- data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_emit_x86.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_err.c +69 -31
- data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ffrecord.c +10 -40
- data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_frame.h +12 -1
- data/lua-hooks/ext/luajit/src/lj_func.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_gc.c +2 -2
- data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_gdbjit.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ir.c +31 -15
- data/lua-hooks/ext/luajit/src/lj_ir.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ircall.h +29 -1
- data/lua-hooks/ext/luajit/src/lj_iropt.h +2 -1
- data/lua-hooks/ext/luajit/src/lj_jit.h +2 -1
- data/lua-hooks/ext/luajit/src/lj_lex.c +28 -1
- data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_mcode.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_meta.c +8 -8
- data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_obj.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_fold.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_mem.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_sink.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_split.c +10 -5
- data/lua-hooks/ext/luajit/src/lj_parse.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_record.c +13 -5
- data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_snap.c +20 -23
- data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_state.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_state.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_strfmt.c +12 -98
- data/lua-hooks/ext/luajit/src/lj_strfmt.h +4 -4
- data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +591 -0
- data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_tab.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_arm64.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_mips.h +30 -2
- data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_x86.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_trace.c +7 -2
- data/lua-hooks/ext/luajit/src/lj_trace.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -3
- data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_vm.h +5 -3
- data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_vmmath.c +15 -15
- data/lua-hooks/ext/luajit/src/ljamalg.c +2 -1
- data/lua-hooks/ext/luajit/src/lua.h +1 -0
- data/lua-hooks/ext/luajit/src/luaconf.h +2 -2
- data/lua-hooks/ext/luajit/src/luajit.c +1 -1
- data/lua-hooks/ext/luajit/src/luajit.h +4 -4
- data/lua-hooks/ext/luajit/src/lualib.h +1 -1
- data/lua-hooks/ext/luajit/src/msvcbuild.bat +1 -1
- data/lua-hooks/ext/luajit/src/ps4build.bat +26 -6
- data/lua-hooks/ext/luajit/src/vm_arm.dasc +17 -9
- data/lua-hooks/ext/luajit/src/vm_arm64.dasc +1 -1
- data/lua-hooks/ext/luajit/src/vm_mips.dasc +1562 -656
- data/lua-hooks/ext/luajit/src/vm_ppc.dasc +3 -7
- data/lua-hooks/ext/luajit/src/vm_x64.dasc +10 -2
- data/lua-hooks/ext/luajit/src/vm_x86.dasc +5 -8
- data/lua-hooks/ext/luautf8/module.mk +2 -0
- data/lua-hooks/ext/module.mk +15 -0
- data/lua-hooks/ext/modules.h +17 -0
- data/lua-hooks/ext/perf/luacpu.c +1 -1
- data/lua-hooks/ext/perf/lualoadavg.c +1 -1
- data/lua-hooks/ext/perf/luameminfo.c +1 -1
- data/lua-hooks/ext/perf/luaoslib.c +124 -2
- data/lua-hooks/ext/perf/module.mk +5 -0
- data/lua-hooks/ext/sha1/luasha1.c +4 -2
- data/lua-hooks/ext/sha1/module.mk +5 -0
- data/lua-hooks/ext/sha2/luasha256.c +4 -2
- data/lua-hooks/ext/sha2/module.mk +5 -0
- data/lua-hooks/ext/sysutils/lua_utils.c +56 -0
- data/lua-hooks/ext/sysutils/module.mk +2 -0
- data/lua-hooks/lib/boot.lua +2 -1
- data/lua-hooks/lib/hooks/module.mk +31 -0
- data/lua-hooks/lib/hooks/xss/module.mk +4 -0
- data/lua-hooks/lib/lexers/module.mk +10 -0
- data/lua-hooks/lib/module.mk +38 -0
- data/lua-hooks/lib/schema/module.mk +3 -0
- data/lua-hooks/options.mk +59 -0
- metadata +21 -2
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
------------------------------------------------------------------------------
|
|
2
2
|
-- DynASM x64 module.
|
|
3
3
|
--
|
|
4
|
-
-- Copyright (C) 2005-
|
|
4
|
+
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
|
|
5
5
|
-- See dynasm.lua for full copyright notice.
|
|
6
6
|
------------------------------------------------------------------------------
|
|
7
7
|
-- This module just sets 64 bit mode for the combined x86/x64 module.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** DynASM x86 encoding engine.
|
|
3
|
-
** Copyright (C) 2005-
|
|
3
|
+
** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
|
|
4
4
|
** Released under the MIT license. See dynasm.lua for full copyright notice.
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
|
|
|
170
170
|
dasm_State *D = Dst_REF;
|
|
171
171
|
dasm_ActList p = D->actionlist + start;
|
|
172
172
|
dasm_Section *sec = D->section;
|
|
173
|
-
int pos = sec->pos, ofs = sec->ofs, mrm =
|
|
173
|
+
int pos = sec->pos, ofs = sec->ofs, mrm = -1;
|
|
174
174
|
int *b;
|
|
175
175
|
|
|
176
176
|
if (pos >= sec->epos) {
|
|
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
|
|
|
193
193
|
b[pos++] = n;
|
|
194
194
|
switch (action) {
|
|
195
195
|
case DASM_DISP:
|
|
196
|
-
if (n == 0) { if (
|
|
196
|
+
if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
|
|
197
197
|
case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
|
|
198
198
|
case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
|
|
199
199
|
case DASM_IMM_D: ofs += 4; break;
|
|
@@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...)
|
|
|
203
203
|
case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
|
|
204
204
|
case DASM_SPACE: p++; ofs += n; break;
|
|
205
205
|
case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
|
|
206
|
-
case DASM_VREG: CK((n&-
|
|
207
|
-
if (*p
|
|
206
|
+
case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
|
|
207
|
+
if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
|
|
208
|
+
if (*p < 0x20 && (n&7) == 4) ofs++;
|
|
209
|
+
switch ((*p++ >> 3) & 3) {
|
|
210
|
+
case 3: n |= b[pos-3];
|
|
211
|
+
case 2: n |= b[pos-2];
|
|
212
|
+
case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
|
|
213
|
+
}
|
|
214
|
+
continue;
|
|
208
215
|
}
|
|
209
|
-
mrm =
|
|
216
|
+
mrm = -1;
|
|
210
217
|
} else {
|
|
211
218
|
int *pl, n;
|
|
212
219
|
switch (action) {
|
|
@@ -391,7 +398,27 @@ int dasm_encode(Dst_DECL, void *buffer)
|
|
|
391
398
|
case DASM_IMM_D: wd: dasmd(n); break;
|
|
392
399
|
case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
|
|
393
400
|
case DASM_IMM_W: dasmw(n); break;
|
|
394
|
-
case DASM_VREG: {
|
|
401
|
+
case DASM_VREG: {
|
|
402
|
+
int t = *p++;
|
|
403
|
+
unsigned char *ex = cp - (t&7);
|
|
404
|
+
if ((n & 8) && t < 0xa0) {
|
|
405
|
+
if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
|
|
406
|
+
n &= 7;
|
|
407
|
+
} else if (n & 0x10) {
|
|
408
|
+
if (*ex & 0x80) {
|
|
409
|
+
*ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
|
|
410
|
+
}
|
|
411
|
+
while (++ex < cp) ex[-1] = *ex;
|
|
412
|
+
if (mark) mark--;
|
|
413
|
+
cp--;
|
|
414
|
+
n &= 7;
|
|
415
|
+
}
|
|
416
|
+
if (t >= 0xc0) n <<= 4;
|
|
417
|
+
else if (t >= 0x40) n <<= 3;
|
|
418
|
+
else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
|
|
419
|
+
cp[-1] ^= n;
|
|
420
|
+
break;
|
|
421
|
+
}
|
|
395
422
|
case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
|
|
396
423
|
b++; n = (int)(ptrdiff_t)D->globals[-n];
|
|
397
424
|
case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
------------------------------------------------------------------------------
|
|
2
2
|
-- DynASM x86/x64 module.
|
|
3
3
|
--
|
|
4
|
-
-- Copyright (C) 2005-
|
|
4
|
+
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
|
|
5
5
|
-- See dynasm.lua for full copyright notice.
|
|
6
6
|
------------------------------------------------------------------------------
|
|
7
7
|
|
|
@@ -11,9 +11,9 @@ local x64 = x64
|
|
|
11
11
|
local _info = {
|
|
12
12
|
arch = x64 and "x64" or "x86",
|
|
13
13
|
description = "DynASM x86/x64 module",
|
|
14
|
-
version = "1.
|
|
15
|
-
vernum =
|
|
16
|
-
release = "
|
|
14
|
+
version = "1.4.0",
|
|
15
|
+
vernum = 10400,
|
|
16
|
+
release = "2015-10-18",
|
|
17
17
|
author = "Mike Pall",
|
|
18
18
|
license = "MIT",
|
|
19
19
|
}
|
|
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
|
|
|
27
27
|
local _s = string
|
|
28
28
|
local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
|
|
29
29
|
local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
|
|
30
|
-
local concat, sort = table.concat, table.sort
|
|
30
|
+
local concat, sort, remove = table.concat, table.sort, table.remove
|
|
31
31
|
local bit = bit or require("bit")
|
|
32
|
-
local band, shl, shr = bit.band, bit.lshift, bit.rshift
|
|
32
|
+
local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
|
|
33
33
|
|
|
34
34
|
-- Inherited tables and callbacks.
|
|
35
35
|
local g_opt, g_arch
|
|
@@ -41,7 +41,7 @@ local action_names = {
|
|
|
41
41
|
-- int arg, 1 buffer pos:
|
|
42
42
|
"DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
|
|
43
43
|
-- action arg (1 byte), int arg, 1 buffer pos (reg/num):
|
|
44
|
-
"VREG", "SPACE",
|
|
44
|
+
"VREG", "SPACE",
|
|
45
45
|
-- ptrdiff_t arg, 1 buffer pos (address): !x64
|
|
46
46
|
"SETLABEL", "REL_A",
|
|
47
47
|
-- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
|
|
@@ -83,6 +83,21 @@ local actargs = { 0 }
|
|
|
83
83
|
-- Current number of section buffer positions for dasm_put().
|
|
84
84
|
local secpos = 1
|
|
85
85
|
|
|
86
|
+
-- VREG kind encodings, pre-shifted by 5 bits.
|
|
87
|
+
local map_vreg = {
|
|
88
|
+
["modrm.rm.m"] = 0x00,
|
|
89
|
+
["modrm.rm.r"] = 0x20,
|
|
90
|
+
["opcode"] = 0x20,
|
|
91
|
+
["sib.base"] = 0x20,
|
|
92
|
+
["sib.index"] = 0x40,
|
|
93
|
+
["modrm.reg"] = 0x80,
|
|
94
|
+
["vex.v"] = 0xa0,
|
|
95
|
+
["imm.hi"] = 0xc0,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
-- Current number of VREG actions contributing to REX/VEX shrinkage.
|
|
99
|
+
local vreg_shrink_count = 0
|
|
100
|
+
|
|
86
101
|
------------------------------------------------------------------------------
|
|
87
102
|
|
|
88
103
|
-- Compute action numbers for action names.
|
|
@@ -134,6 +149,21 @@ local function waction(action, a, num)
|
|
|
134
149
|
if a or num then secpos = secpos + (num or 1) end
|
|
135
150
|
end
|
|
136
151
|
|
|
152
|
+
-- Optionally add a VREG action.
|
|
153
|
+
local function wvreg(kind, vreg, psz, sk, defer)
|
|
154
|
+
if not vreg then return end
|
|
155
|
+
waction("VREG", vreg)
|
|
156
|
+
local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
|
|
157
|
+
if b < (sk or 0) then
|
|
158
|
+
vreg_shrink_count = vreg_shrink_count + 1
|
|
159
|
+
end
|
|
160
|
+
if not defer then
|
|
161
|
+
b = b + vreg_shrink_count * 8
|
|
162
|
+
vreg_shrink_count = 0
|
|
163
|
+
end
|
|
164
|
+
wputxb(b + (psz or 0))
|
|
165
|
+
end
|
|
166
|
+
|
|
137
167
|
-- Add call to embedded DynASM C code.
|
|
138
168
|
local function wcall(func, args)
|
|
139
169
|
wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
|
|
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
|
|
|
299
329
|
local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
|
|
300
330
|
if needrex then map_reg_needrex[iname] = true end
|
|
301
331
|
local name
|
|
302
|
-
if sz == "o" then name = format("
|
|
332
|
+
if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
|
|
303
333
|
elseif sz == "f" then name = format("st%d", i)
|
|
304
334
|
else name = format("r%d%s", i, sz == addrsize and "" or sz) end
|
|
305
335
|
map_archdef[name] = iname
|
|
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
|
|
|
326
356
|
mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
|
|
327
357
|
map_reg_valid_index[map_archdef.esp] = false
|
|
328
358
|
if x64 then map_reg_valid_index[map_archdef.rsp] = false end
|
|
359
|
+
if x64 then map_reg_needrex[map_archdef.Rb] = true end
|
|
329
360
|
map_archdef["Ra"] = "@"..addrsize
|
|
330
361
|
|
|
331
362
|
-- FP registers (internally tword sized, but use "f" as operand size).
|
|
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
|
|
|
334
365
|
-- SSE registers (oword sized, but qword and dword accessible).
|
|
335
366
|
mkrmap("o", "xmm")
|
|
336
367
|
|
|
368
|
+
-- AVX registers (yword sized, but oword, qword and dword accessible).
|
|
369
|
+
mkrmap("y", "ymm")
|
|
370
|
+
|
|
337
371
|
-- Operand size prefixes to codes.
|
|
338
372
|
local map_opsize = {
|
|
339
|
-
byte = "b", word = "w", dword = "d", qword = "q", oword = "o",
|
|
340
|
-
aword = addrsize,
|
|
373
|
+
byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
|
|
374
|
+
tword = "t", aword = addrsize,
|
|
341
375
|
}
|
|
342
376
|
|
|
343
377
|
-- Operand size code to number.
|
|
344
378
|
local map_opsizenum = {
|
|
345
|
-
b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
|
|
379
|
+
b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
|
|
346
380
|
}
|
|
347
381
|
|
|
348
382
|
-- Operand size code to name.
|
|
349
383
|
local map_opsizename = {
|
|
350
|
-
b = "byte", w = "word", d = "dword", q = "qword", o = "oword",
|
|
351
|
-
f = "fpword",
|
|
384
|
+
b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
|
|
385
|
+
t = "tword", f = "fpword",
|
|
352
386
|
}
|
|
353
387
|
|
|
354
388
|
-- Valid index register scale factors.
|
|
@@ -460,9 +494,45 @@ local function wputszarg(sz, n)
|
|
|
460
494
|
end
|
|
461
495
|
|
|
462
496
|
-- Put multi-byte opcode with operand-size dependent modifications.
|
|
463
|
-
local function wputop(sz, op, rex)
|
|
497
|
+
local function wputop(sz, op, rex, vex, vregr, vregxb)
|
|
498
|
+
local psz, sk = 0, nil
|
|
499
|
+
if vex then
|
|
500
|
+
local tail
|
|
501
|
+
if vex.m == 1 and band(rex, 11) == 0 then
|
|
502
|
+
if x64 and vregxb then
|
|
503
|
+
sk = map_vreg["modrm.reg"]
|
|
504
|
+
else
|
|
505
|
+
wputb(0xc5)
|
|
506
|
+
tail = shl(bxor(band(rex, 4), 4), 5)
|
|
507
|
+
psz = 3
|
|
508
|
+
end
|
|
509
|
+
end
|
|
510
|
+
if not tail then
|
|
511
|
+
wputb(0xc4)
|
|
512
|
+
wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
|
|
513
|
+
tail = shl(band(rex, 8), 4)
|
|
514
|
+
psz = 4
|
|
515
|
+
end
|
|
516
|
+
local reg, vreg = 0, nil
|
|
517
|
+
if vex.v then
|
|
518
|
+
reg = vex.v.reg
|
|
519
|
+
if not reg then werror("bad vex operand") end
|
|
520
|
+
if reg < 0 then reg = 0; vreg = vex.v.vreg end
|
|
521
|
+
end
|
|
522
|
+
if sz == "y" or vex.l then tail = tail + 4 end
|
|
523
|
+
wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
|
|
524
|
+
wvreg("vex.v", vreg)
|
|
525
|
+
rex = 0
|
|
526
|
+
if op >= 256 then werror("bad vex opcode") end
|
|
527
|
+
else
|
|
528
|
+
if rex ~= 0 then
|
|
529
|
+
if not x64 then werror("bad operand size") end
|
|
530
|
+
elseif (vregr or vregxb) and x64 then
|
|
531
|
+
rex = 0x10
|
|
532
|
+
sk = map_vreg["vex.v"]
|
|
533
|
+
end
|
|
534
|
+
end
|
|
464
535
|
local r
|
|
465
|
-
if rex ~= 0 and not x64 then werror("bad operand size") end
|
|
466
536
|
if sz == "w" then wputb(102) end
|
|
467
537
|
-- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
|
|
468
538
|
if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
|
|
@@ -471,20 +541,20 @@ local function wputop(sz, op, rex)
|
|
|
471
541
|
if rex ~= 0 then
|
|
472
542
|
local opc3 = band(op, 0xffff00)
|
|
473
543
|
if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
|
|
474
|
-
wputb(64 + band(rex, 15)); rex = 0
|
|
544
|
+
wputb(64 + band(rex, 15)); rex = 0; psz = 2
|
|
475
545
|
end
|
|
476
546
|
end
|
|
477
|
-
wputb(shr(op, 16)); op = band(op, 0xffff)
|
|
547
|
+
wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
|
|
478
548
|
end
|
|
479
549
|
if op >= 256 then
|
|
480
550
|
local b = shr(op, 8)
|
|
481
|
-
if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
|
|
482
|
-
wputb(b)
|
|
483
|
-
op = band(op, 255)
|
|
551
|
+
if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
|
|
552
|
+
wputb(b); op = band(op, 255); psz = psz + 1
|
|
484
553
|
end
|
|
485
|
-
if rex ~= 0 then wputb(64 + band(rex, 15)) end
|
|
554
|
+
if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
|
|
486
555
|
if sz == "b" then op = op - 1 end
|
|
487
556
|
wputb(op)
|
|
557
|
+
return psz, sk
|
|
488
558
|
end
|
|
489
559
|
|
|
490
560
|
-- Put ModRM or SIB formatted byte.
|
|
@@ -494,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
|
|
|
494
564
|
end
|
|
495
565
|
|
|
496
566
|
-- Put ModRM/SIB plus optional displacement.
|
|
497
|
-
local function wputmrmsib(t, imark, s, vsreg)
|
|
567
|
+
local function wputmrmsib(t, imark, s, vsreg, psz, sk)
|
|
498
568
|
local vreg, vxreg
|
|
499
569
|
local reg, xreg = t.reg, t.xreg
|
|
500
570
|
if reg and reg < 0 then reg = 0; vreg = t.vreg end
|
|
@@ -504,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
|
|
|
504
574
|
-- Register mode.
|
|
505
575
|
if sub(t.mode, 1, 1) == "r" then
|
|
506
576
|
wputmodrm(3, s, reg)
|
|
507
|
-
|
|
508
|
-
|
|
577
|
+
wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
|
|
578
|
+
wvreg("modrm.rm.r", vreg, psz+1, sk)
|
|
509
579
|
return
|
|
510
580
|
end
|
|
511
581
|
|
|
@@ -519,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
|
|
|
519
589
|
-- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
|
|
520
590
|
wputmodrm(0, s, 4)
|
|
521
591
|
if imark == "I" then waction("MARK") end
|
|
522
|
-
|
|
592
|
+
wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
|
|
523
593
|
wputmodrm(t.xsc, xreg, 5)
|
|
524
|
-
|
|
594
|
+
wvreg("sib.index", vxreg, psz+2, sk)
|
|
525
595
|
else
|
|
526
596
|
-- Pure 32 bit displacement.
|
|
527
597
|
if x64 and tdisp ~= "table" then
|
|
528
598
|
wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
|
|
599
|
+
wvreg("modrm.reg", vsreg, psz+1, sk)
|
|
529
600
|
if imark == "I" then waction("MARK") end
|
|
530
601
|
wputmodrm(0, 4, 5)
|
|
531
602
|
else
|
|
532
603
|
riprel = x64
|
|
533
604
|
wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
|
|
605
|
+
wvreg("modrm.reg", vsreg, psz+1, sk)
|
|
534
606
|
if imark == "I" then waction("MARK") end
|
|
535
607
|
end
|
|
536
|
-
if vsreg then waction("VREG", vsreg); wputxb(2) end
|
|
537
608
|
end
|
|
538
609
|
if riprel then -- Emit rip-relative displacement.
|
|
539
610
|
if match("UWSiI", imark) then
|
|
@@ -561,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
|
|
|
561
632
|
if xreg or band(reg, 7) == 4 then
|
|
562
633
|
wputmodrm(m or 2, s, 4) -- ModRM.
|
|
563
634
|
if m == nil or imark == "I" then waction("MARK") end
|
|
564
|
-
|
|
635
|
+
wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
|
|
565
636
|
wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
|
|
566
|
-
|
|
567
|
-
|
|
637
|
+
wvreg("sib.index", vxreg, psz+2, sk, vreg)
|
|
638
|
+
wvreg("sib.base", vreg, psz+2, sk)
|
|
568
639
|
else
|
|
569
640
|
wputmodrm(m or 2, s, reg) -- ModRM.
|
|
570
641
|
if (imark == "I" and (m == 1 or m == 2)) or
|
|
571
642
|
(m == nil and (vsreg or vreg)) then waction("MARK") end
|
|
572
|
-
|
|
573
|
-
|
|
643
|
+
wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
|
|
644
|
+
wvreg("modrm.rm.m", vreg, psz+1, sk)
|
|
574
645
|
end
|
|
575
646
|
|
|
576
647
|
-- Put displacement.
|
|
@@ -881,9 +952,15 @@ end
|
|
|
881
952
|
-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
|
|
882
953
|
-- The spare 3 bits are either filled with the last hex digit or
|
|
883
954
|
-- the result from a previous "r"/"R". The opcode is restored.
|
|
955
|
+
-- "u" Use VEX encoding, vvvv unused.
|
|
956
|
+
-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
|
|
957
|
+
-- removed from the list used by future characters).
|
|
958
|
+
-- "L" Force VEX.L
|
|
884
959
|
--
|
|
885
960
|
-- All of the following characters force a flush of the opcode:
|
|
886
961
|
-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
|
|
962
|
+
-- "s" stores a 4 bit immediate from the last register operand,
|
|
963
|
+
-- followed by 4 zero bits.
|
|
887
964
|
-- "S" stores a signed 8 bit immediate from the last operand.
|
|
888
965
|
-- "U" stores an unsigned 8 bit immediate from the last operand.
|
|
889
966
|
-- "W" stores an unsigned 16 bit immediate from the last operand.
|
|
@@ -1081,10 +1158,11 @@ local map_op = {
|
|
|
1081
1158
|
btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
|
|
1082
1159
|
bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
|
|
1083
1160
|
|
|
1084
|
-
shld_3 = "mriqdw:0FA4RmU|
|
|
1085
|
-
shrd_3 = "mriqdw:0FACRmU|
|
|
1161
|
+
shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
|
|
1162
|
+
shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
|
|
1086
1163
|
|
|
1087
1164
|
rdtsc_0 = "0F31", -- P1+
|
|
1165
|
+
rdpmc_0 = "0F33", -- P6+
|
|
1088
1166
|
cpuid_0 = "0FA2", -- P1+
|
|
1089
1167
|
|
|
1090
1168
|
-- floating point ops
|
|
@@ -1190,7 +1268,7 @@ local map_op = {
|
|
|
1190
1268
|
cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
|
|
1191
1269
|
cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
|
|
1192
1270
|
cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
|
|
1193
|
-
cvtss2si_2 = "rr/do:
|
|
1271
|
+
cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
|
|
1194
1272
|
cvttpd2dq_2 = "rmo:660FE6rM",
|
|
1195
1273
|
cvttps2dq_2 = "rmo:F30F5BrM",
|
|
1196
1274
|
cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
|
|
@@ -1225,46 +1303,14 @@ local map_op = {
|
|
|
1225
1303
|
movups_2 = "rmo:0F10rM|mro:0F11Rm",
|
|
1226
1304
|
orpd_2 = "rmo:660F56rM",
|
|
1227
1305
|
orps_2 = "rmo:0F56rM",
|
|
1228
|
-
packssdw_2 = "rmo:660F6BrM",
|
|
1229
|
-
packsswb_2 = "rmo:660F63rM",
|
|
1230
|
-
packuswb_2 = "rmo:660F67rM",
|
|
1231
|
-
paddb_2 = "rmo:660FFCrM",
|
|
1232
|
-
paddd_2 = "rmo:660FFErM",
|
|
1233
|
-
paddq_2 = "rmo:660FD4rM",
|
|
1234
|
-
paddsb_2 = "rmo:660FECrM",
|
|
1235
|
-
paddsw_2 = "rmo:660FEDrM",
|
|
1236
|
-
paddusb_2 = "rmo:660FDCrM",
|
|
1237
|
-
paddusw_2 = "rmo:660FDDrM",
|
|
1238
|
-
paddw_2 = "rmo:660FFDrM",
|
|
1239
|
-
pand_2 = "rmo:660FDBrM",
|
|
1240
|
-
pandn_2 = "rmo:660FDFrM",
|
|
1241
1306
|
pause_0 = "F390",
|
|
1242
|
-
|
|
1243
|
-
pavgw_2 = "rmo:660FE3rM",
|
|
1244
|
-
pcmpeqb_2 = "rmo:660F74rM",
|
|
1245
|
-
pcmpeqd_2 = "rmo:660F76rM",
|
|
1246
|
-
pcmpeqw_2 = "rmo:660F75rM",
|
|
1247
|
-
pcmpgtb_2 = "rmo:660F64rM",
|
|
1248
|
-
pcmpgtd_2 = "rmo:660F66rM",
|
|
1249
|
-
pcmpgtw_2 = "rmo:660F65rM",
|
|
1250
|
-
pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
|
|
1307
|
+
pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
|
|
1251
1308
|
pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
|
|
1252
|
-
pmaddwd_2 = "rmo:660FF5rM",
|
|
1253
|
-
pmaxsw_2 = "rmo:660FEErM",
|
|
1254
|
-
pmaxub_2 = "rmo:660FDErM",
|
|
1255
|
-
pminsw_2 = "rmo:660FEArM",
|
|
1256
|
-
pminub_2 = "rmo:660FDArM",
|
|
1257
1309
|
pmovmskb_2 = "rr/do:660FD7rM",
|
|
1258
|
-
pmulhuw_2 = "rmo:660FE4rM",
|
|
1259
|
-
pmulhw_2 = "rmo:660FE5rM",
|
|
1260
|
-
pmullw_2 = "rmo:660FD5rM",
|
|
1261
|
-
pmuludq_2 = "rmo:660FF4rM",
|
|
1262
|
-
por_2 = "rmo:660FEBrM",
|
|
1263
1310
|
prefetchnta_1 = "xb:n0F180m",
|
|
1264
1311
|
prefetcht0_1 = "xb:n0F181m",
|
|
1265
1312
|
prefetcht1_1 = "xb:n0F182m",
|
|
1266
1313
|
prefetcht2_1 = "xb:n0F183m",
|
|
1267
|
-
psadbw_2 = "rmo:660FF6rM",
|
|
1268
1314
|
pshufd_3 = "rmio:660F70rMU",
|
|
1269
1315
|
pshufhw_3 = "rmio:F30F70rMU",
|
|
1270
1316
|
pshuflw_3 = "rmio:F20F70rMU",
|
|
@@ -1278,23 +1324,6 @@ local map_op = {
|
|
|
1278
1324
|
psrldq_2 = "rio:660F733mU",
|
|
1279
1325
|
psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
|
|
1280
1326
|
psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
|
|
1281
|
-
psubb_2 = "rmo:660FF8rM",
|
|
1282
|
-
psubd_2 = "rmo:660FFArM",
|
|
1283
|
-
psubq_2 = "rmo:660FFBrM",
|
|
1284
|
-
psubsb_2 = "rmo:660FE8rM",
|
|
1285
|
-
psubsw_2 = "rmo:660FE9rM",
|
|
1286
|
-
psubusb_2 = "rmo:660FD8rM",
|
|
1287
|
-
psubusw_2 = "rmo:660FD9rM",
|
|
1288
|
-
psubw_2 = "rmo:660FF9rM",
|
|
1289
|
-
punpckhbw_2 = "rmo:660F68rM",
|
|
1290
|
-
punpckhdq_2 = "rmo:660F6ArM",
|
|
1291
|
-
punpckhqdq_2 = "rmo:660F6DrM",
|
|
1292
|
-
punpckhwd_2 = "rmo:660F69rM",
|
|
1293
|
-
punpcklbw_2 = "rmo:660F60rM",
|
|
1294
|
-
punpckldq_2 = "rmo:660F62rM",
|
|
1295
|
-
punpcklqdq_2 = "rmo:660F6CrM",
|
|
1296
|
-
punpcklwd_2 = "rmo:660F61rM",
|
|
1297
|
-
pxor_2 = "rmo:660FEFrM",
|
|
1298
1327
|
rcpps_2 = "rmo:0F53rM",
|
|
1299
1328
|
rcpss_2 = "rro:F30F53rM|rx/od:",
|
|
1300
1329
|
rsqrtps_2 = "rmo:0F52rM",
|
|
@@ -1352,7 +1381,7 @@ local map_op = {
|
|
|
1352
1381
|
dpps_3 = "rmio:660F3A40rMU",
|
|
1353
1382
|
extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
|
|
1354
1383
|
insertps_3 = "rrio:660F3A41rMU|rxi/od:",
|
|
1355
|
-
movntdqa_2 = "
|
|
1384
|
+
movntdqa_2 = "rxo:660F382ArM",
|
|
1356
1385
|
mpsadbw_3 = "rmio:660F3A42rMU",
|
|
1357
1386
|
packusdw_2 = "rmo:660F382BrM",
|
|
1358
1387
|
pblendvb_3 = "rmRo:660F3810rM",
|
|
@@ -1412,6 +1441,238 @@ local map_op = {
|
|
|
1412
1441
|
movntsd_2 = "xr/qo:nF20F2BRm",
|
|
1413
1442
|
movntss_2 = "xr/do:F30F2BRm",
|
|
1414
1443
|
-- popcnt is also in SSE4.2
|
|
1444
|
+
|
|
1445
|
+
-- AES-NI
|
|
1446
|
+
aesdec_2 = "rmo:660F38DErM",
|
|
1447
|
+
aesdeclast_2 = "rmo:660F38DFrM",
|
|
1448
|
+
aesenc_2 = "rmo:660F38DCrM",
|
|
1449
|
+
aesenclast_2 = "rmo:660F38DDrM",
|
|
1450
|
+
aesimc_2 = "rmo:660F38DBrM",
|
|
1451
|
+
aeskeygenassist_3 = "rmio:660F3ADFrMU",
|
|
1452
|
+
pclmulqdq_3 = "rmio:660F3A44rMU",
|
|
1453
|
+
|
|
1454
|
+
-- AVX FP ops
|
|
1455
|
+
vaddsubpd_3 = "rrmoy:660FVD0rM",
|
|
1456
|
+
vaddsubps_3 = "rrmoy:F20FVD0rM",
|
|
1457
|
+
vandpd_3 = "rrmoy:660FV54rM",
|
|
1458
|
+
vandps_3 = "rrmoy:0FV54rM",
|
|
1459
|
+
vandnpd_3 = "rrmoy:660FV55rM",
|
|
1460
|
+
vandnps_3 = "rrmoy:0FV55rM",
|
|
1461
|
+
vblendpd_4 = "rrmioy:660F3AV0DrMU",
|
|
1462
|
+
vblendps_4 = "rrmioy:660F3AV0CrMU",
|
|
1463
|
+
vblendvpd_4 = "rrmroy:660F3AV4BrMs",
|
|
1464
|
+
vblendvps_4 = "rrmroy:660F3AV4ArMs",
|
|
1465
|
+
vbroadcastf128_2 = "rx/yo:660F38u1ArM",
|
|
1466
|
+
vcmppd_4 = "rrmioy:660FVC2rMU",
|
|
1467
|
+
vcmpps_4 = "rrmioy:0FVC2rMU",
|
|
1468
|
+
vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
|
|
1469
|
+
vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
|
|
1470
|
+
vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
|
|
1471
|
+
vcomiss_2 = "rro:0Fu2FrM|rx/od:",
|
|
1472
|
+
vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
|
|
1473
|
+
vcvtdq2ps_2 = "rmoy:0Fu5BrM",
|
|
1474
|
+
vcvtpd2dq_2 = "rmoy:F20FuE6rM",
|
|
1475
|
+
vcvtpd2ps_2 = "rmoy:660Fu5ArM",
|
|
1476
|
+
vcvtps2dq_2 = "rmoy:660Fu5BrM",
|
|
1477
|
+
vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
|
|
1478
|
+
vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
|
|
1479
|
+
vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
|
|
1480
|
+
vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
|
|
1481
|
+
vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
|
|
1482
|
+
vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
|
|
1483
|
+
vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
|
|
1484
|
+
vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
|
|
1485
|
+
vcvttps2dq_2 = "rmoy:F30Fu5BrM",
|
|
1486
|
+
vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
|
|
1487
|
+
vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
|
|
1488
|
+
vdppd_4 = "rrmio:660F3AV41rMU",
|
|
1489
|
+
vdpps_4 = "rrmioy:660F3AV40rMU",
|
|
1490
|
+
vextractf128_3 = "mri/oy:660F3AuL19RmU",
|
|
1491
|
+
vextractps_3 = "mri/do:660F3Au17RmU",
|
|
1492
|
+
vhaddpd_3 = "rrmoy:660FV7CrM",
|
|
1493
|
+
vhaddps_3 = "rrmoy:F20FV7CrM",
|
|
1494
|
+
vhsubpd_3 = "rrmoy:660FV7DrM",
|
|
1495
|
+
vhsubps_3 = "rrmoy:F20FV7DrM",
|
|
1496
|
+
vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
|
|
1497
|
+
vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
|
|
1498
|
+
vldmxcsr_1 = "xd:0FuAE2m",
|
|
1499
|
+
vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
|
|
1500
|
+
vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
|
|
1501
|
+
vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
|
|
1502
|
+
vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
|
|
1503
|
+
vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
|
|
1504
|
+
vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
|
|
1505
|
+
vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
|
|
1506
|
+
vmovhlps_3 = "rrro:0FV12rM",
|
|
1507
|
+
vmovhpd_2 = "xr/qo:660Fu17Rm",
|
|
1508
|
+
vmovhpd_3 = "rrx/ooq:660FV16rM",
|
|
1509
|
+
vmovhps_2 = "xr/qo:0Fu17Rm",
|
|
1510
|
+
vmovhps_3 = "rrx/ooq:0FV16rM",
|
|
1511
|
+
vmovlhps_3 = "rrro:0FV16rM",
|
|
1512
|
+
vmovlpd_2 = "xr/qo:660Fu13Rm",
|
|
1513
|
+
vmovlpd_3 = "rrx/ooq:660FV12rM",
|
|
1514
|
+
vmovlps_2 = "xr/qo:0Fu13Rm",
|
|
1515
|
+
vmovlps_3 = "rrx/ooq:0FV12rM",
|
|
1516
|
+
vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
|
|
1517
|
+
vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
|
|
1518
|
+
vmovntpd_2 = "xroy:660Fu2BRm",
|
|
1519
|
+
vmovntps_2 = "xroy:0Fu2BRm",
|
|
1520
|
+
vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
|
|
1521
|
+
vmovsd_3 = "rrro:F20FV10rM",
|
|
1522
|
+
vmovshdup_2 = "rmoy:F30Fu16rM",
|
|
1523
|
+
vmovsldup_2 = "rmoy:F30Fu12rM",
|
|
1524
|
+
vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
|
|
1525
|
+
vmovss_3 = "rrro:F30FV10rM",
|
|
1526
|
+
vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
|
|
1527
|
+
vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
|
|
1528
|
+
vorpd_3 = "rrmoy:660FV56rM",
|
|
1529
|
+
vorps_3 = "rrmoy:0FV56rM",
|
|
1530
|
+
vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
|
|
1531
|
+
vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
|
|
1532
|
+
vperm2f128_4 = "rrmiy:660F3AV06rMU",
|
|
1533
|
+
vptestpd_2 = "rmoy:660F38u0FrM",
|
|
1534
|
+
vptestps_2 = "rmoy:660F38u0ErM",
|
|
1535
|
+
vrcpps_2 = "rmoy:0Fu53rM",
|
|
1536
|
+
vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
|
|
1537
|
+
vrsqrtps_2 = "rmoy:0Fu52rM",
|
|
1538
|
+
vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
|
|
1539
|
+
vroundpd_3 = "rmioy:660F3AV09rMU",
|
|
1540
|
+
vroundps_3 = "rmioy:660F3AV08rMU",
|
|
1541
|
+
vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
|
|
1542
|
+
vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
|
|
1543
|
+
vshufpd_4 = "rrmioy:660FVC6rMU",
|
|
1544
|
+
vshufps_4 = "rrmioy:0FVC6rMU",
|
|
1545
|
+
vsqrtps_2 = "rmoy:0Fu51rM",
|
|
1546
|
+
vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
|
|
1547
|
+
vsqrtpd_2 = "rmoy:660Fu51rM",
|
|
1548
|
+
vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
|
|
1549
|
+
vstmxcsr_1 = "xd:0FuAE3m",
|
|
1550
|
+
vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
|
|
1551
|
+
vucomiss_2 = "rro:0Fu2ErM|rx/od:",
|
|
1552
|
+
vunpckhpd_3 = "rrmoy:660FV15rM",
|
|
1553
|
+
vunpckhps_3 = "rrmoy:0FV15rM",
|
|
1554
|
+
vunpcklpd_3 = "rrmoy:660FV14rM",
|
|
1555
|
+
vunpcklps_3 = "rrmoy:0FV14rM",
|
|
1556
|
+
vxorpd_3 = "rrmoy:660FV57rM",
|
|
1557
|
+
vxorps_3 = "rrmoy:0FV57rM",
|
|
1558
|
+
vzeroall_0 = "0FuL77",
|
|
1559
|
+
vzeroupper_0 = "0Fu77",
|
|
1560
|
+
|
|
1561
|
+
-- AVX2 FP ops
|
|
1562
|
+
vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
|
|
1563
|
+
vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
|
|
1564
|
+
-- *vgather* (!vsib)
|
|
1565
|
+
vpermpd_3 = "rmiy:660F3AuX01rMU",
|
|
1566
|
+
vpermps_3 = "rrmy:660F38V16rM",
|
|
1567
|
+
|
|
1568
|
+
-- AVX, AVX2 integer ops
|
|
1569
|
+
-- In general, xmm requires AVX, ymm requires AVX2.
|
|
1570
|
+
vaesdec_3 = "rrmo:660F38VDErM",
|
|
1571
|
+
vaesdeclast_3 = "rrmo:660F38VDFrM",
|
|
1572
|
+
vaesenc_3 = "rrmo:660F38VDCrM",
|
|
1573
|
+
vaesenclast_3 = "rrmo:660F38VDDrM",
|
|
1574
|
+
vaesimc_2 = "rmo:660F38uDBrM",
|
|
1575
|
+
vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
|
|
1576
|
+
vlddqu_2 = "rxoy:F20FuF0rM",
|
|
1577
|
+
vmaskmovdqu_2 = "rro:660FuF7rM",
|
|
1578
|
+
vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
|
|
1579
|
+
vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
|
|
1580
|
+
vmovntdq_2 = "xroy:660FuE7Rm",
|
|
1581
|
+
vmovntdqa_2 = "rxoy:660F38u2ArM",
|
|
1582
|
+
vmpsadbw_4 = "rrmioy:660F3AV42rMU",
|
|
1583
|
+
vpabsb_2 = "rmoy:660F38u1CrM",
|
|
1584
|
+
vpabsd_2 = "rmoy:660F38u1ErM",
|
|
1585
|
+
vpabsw_2 = "rmoy:660F38u1DrM",
|
|
1586
|
+
vpackusdw_3 = "rrmoy:660F38V2BrM",
|
|
1587
|
+
vpalignr_4 = "rrmioy:660F3AV0FrMU",
|
|
1588
|
+
vpblendvb_4 = "rrmroy:660F3AV4CrMs",
|
|
1589
|
+
vpblendw_4 = "rrmioy:660F3AV0ErMU",
|
|
1590
|
+
vpclmulqdq_4 = "rrmio:660F3AV44rMU",
|
|
1591
|
+
vpcmpeqq_3 = "rrmoy:660F38V29rM",
|
|
1592
|
+
vpcmpestri_3 = "rmio:660F3Au61rMU",
|
|
1593
|
+
vpcmpestrm_3 = "rmio:660F3Au60rMU",
|
|
1594
|
+
vpcmpgtq_3 = "rrmoy:660F38V37rM",
|
|
1595
|
+
vpcmpistri_3 = "rmio:660F3Au63rMU",
|
|
1596
|
+
vpcmpistrm_3 = "rmio:660F3Au62rMU",
|
|
1597
|
+
vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
|
|
1598
|
+
vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
|
|
1599
|
+
vpextrd_3 = "mri/do:660F3Au16RmU",
|
|
1600
|
+
vpextrq_3 = "mri/qo:660F3Au16RmU",
|
|
1601
|
+
vphaddw_3 = "rrmoy:660F38V01rM",
|
|
1602
|
+
vphaddd_3 = "rrmoy:660F38V02rM",
|
|
1603
|
+
vphaddsw_3 = "rrmoy:660F38V03rM",
|
|
1604
|
+
vphminposuw_2 = "rmo:660F38u41rM",
|
|
1605
|
+
vphsubw_3 = "rrmoy:660F38V05rM",
|
|
1606
|
+
vphsubd_3 = "rrmoy:660F38V06rM",
|
|
1607
|
+
vphsubsw_3 = "rrmoy:660F38V07rM",
|
|
1608
|
+
vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
|
|
1609
|
+
vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
|
|
1610
|
+
vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
|
|
1611
|
+
vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
|
|
1612
|
+
vpmaddubsw_3 = "rrmoy:660F38V04rM",
|
|
1613
|
+
vpmaxsb_3 = "rrmoy:660F38V3CrM",
|
|
1614
|
+
vpmaxsd_3 = "rrmoy:660F38V3DrM",
|
|
1615
|
+
vpmaxuw_3 = "rrmoy:660F38V3ErM",
|
|
1616
|
+
vpmaxud_3 = "rrmoy:660F38V3FrM",
|
|
1617
|
+
vpminsb_3 = "rrmoy:660F38V38rM",
|
|
1618
|
+
vpminsd_3 = "rrmoy:660F38V39rM",
|
|
1619
|
+
vpminuw_3 = "rrmoy:660F38V3ArM",
|
|
1620
|
+
vpminud_3 = "rrmoy:660F38V3BrM",
|
|
1621
|
+
vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
|
|
1622
|
+
vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
|
|
1623
|
+
vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
|
|
1624
|
+
vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
|
|
1625
|
+
vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
|
|
1626
|
+
vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
|
|
1627
|
+
vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
|
|
1628
|
+
vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
|
|
1629
|
+
vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
|
|
1630
|
+
vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
|
|
1631
|
+
vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
|
|
1632
|
+
vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
|
|
1633
|
+
vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
|
|
1634
|
+
vpmuldq_3 = "rrmoy:660F38V28rM",
|
|
1635
|
+
vpmulhrsw_3 = "rrmoy:660F38V0BrM",
|
|
1636
|
+
vpmulld_3 = "rrmoy:660F38V40rM",
|
|
1637
|
+
vpshufb_3 = "rrmoy:660F38V00rM",
|
|
1638
|
+
vpshufd_3 = "rmioy:660Fu70rMU",
|
|
1639
|
+
vpshufhw_3 = "rmioy:F30Fu70rMU",
|
|
1640
|
+
vpshuflw_3 = "rmioy:F20Fu70rMU",
|
|
1641
|
+
vpsignb_3 = "rrmoy:660F38V08rM",
|
|
1642
|
+
vpsignw_3 = "rrmoy:660F38V09rM",
|
|
1643
|
+
vpsignd_3 = "rrmoy:660F38V0ArM",
|
|
1644
|
+
vpslldq_3 = "rrioy:660Fv737mU",
|
|
1645
|
+
vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
|
|
1646
|
+
vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
|
|
1647
|
+
vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
|
|
1648
|
+
vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
|
|
1649
|
+
vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
|
|
1650
|
+
vpsrldq_3 = "rrioy:660Fv733mU",
|
|
1651
|
+
vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
|
|
1652
|
+
vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
|
|
1653
|
+
vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
|
|
1654
|
+
vptest_2 = "rmoy:660F38u17rM",
|
|
1655
|
+
|
|
1656
|
+
-- AVX2 integer ops
|
|
1657
|
+
vbroadcasti128_2 = "rx/yo:660F38u5ArM",
|
|
1658
|
+
vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
|
|
1659
|
+
vextracti128_3 = "mri/oy:660F3AuL39RmU",
|
|
1660
|
+
vpblendd_4 = "rrmioy:660F3AV02rMU",
|
|
1661
|
+
vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
|
|
1662
|
+
vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
|
|
1663
|
+
vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
|
|
1664
|
+
vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
|
|
1665
|
+
vpermd_3 = "rrmy:660F38V36rM",
|
|
1666
|
+
vpermq_3 = "rmiy:660F3AuX00rMU",
|
|
1667
|
+
-- *vpgather* (!vsib)
|
|
1668
|
+
vperm2i128_4 = "rrmiy:660F3AV46rMU",
|
|
1669
|
+
vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
|
|
1670
|
+
vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
|
|
1671
|
+
vpsllvd_3 = "rrmoy:660F38V47rM",
|
|
1672
|
+
vpsllvq_3 = "rrmoy:660F38VX47rM",
|
|
1673
|
+
vpsravd_3 = "rrmoy:660F38V46rM",
|
|
1674
|
+
vpsrlvd_3 = "rrmoy:660F38V45rM",
|
|
1675
|
+
vpsrlvq_3 = "rrmoy:660F38VX45rM",
|
|
1415
1676
|
}
|
|
1416
1677
|
|
|
1417
1678
|
------------------------------------------------------------------------------
|
|
@@ -1462,28 +1723,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
|
|
|
1462
1723
|
map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
|
|
1463
1724
|
end
|
|
1464
1725
|
|
|
1465
|
-
-- SSE FP arithmetic ops.
|
|
1726
|
+
-- SSE / AVX FP arithmetic ops.
|
|
1466
1727
|
for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
|
|
1467
1728
|
sub = 12, min = 13, div = 14, max = 15 } do
|
|
1468
1729
|
map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
|
|
1469
1730
|
map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
|
|
1470
1731
|
map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
|
|
1471
1732
|
map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
|
|
1733
|
+
if n ~= 1 then
|
|
1734
|
+
map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
|
|
1735
|
+
map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
|
|
1736
|
+
map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
|
|
1737
|
+
map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
|
|
1738
|
+
end
|
|
1739
|
+
end
|
|
1740
|
+
|
|
1741
|
+
-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
|
|
1742
|
+
for name,n in pairs{
|
|
1743
|
+
paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
|
|
1744
|
+
paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
|
|
1745
|
+
packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
|
|
1746
|
+
paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
|
|
1747
|
+
pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
|
|
1748
|
+
pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
|
|
1749
|
+
pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
|
|
1750
|
+
pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
|
|
1751
|
+
pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
|
|
1752
|
+
pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
|
|
1753
|
+
psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
|
|
1754
|
+
psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
|
|
1755
|
+
punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
|
|
1756
|
+
punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
|
|
1757
|
+
punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
|
|
1758
|
+
} do
|
|
1759
|
+
map_op[name.."_2"] = format("rmo:660F%02XrM", n)
|
|
1760
|
+
map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
|
|
1472
1761
|
end
|
|
1473
1762
|
|
|
1474
1763
|
------------------------------------------------------------------------------
|
|
1475
1764
|
|
|
1765
|
+
local map_vexarg = { u = false, v = 1, V = 2 }
|
|
1766
|
+
|
|
1476
1767
|
-- Process pattern string.
|
|
1477
1768
|
local function dopattern(pat, args, sz, op, needrex)
|
|
1478
|
-
local digit, addin
|
|
1769
|
+
local digit, addin, vex
|
|
1479
1770
|
local opcode = 0
|
|
1480
1771
|
local szov = sz
|
|
1481
1772
|
local narg = 1
|
|
1482
1773
|
local rex = 0
|
|
1483
1774
|
|
|
1484
1775
|
-- Limit number of section buffer positions used by a single dasm_put().
|
|
1485
|
-
-- A single opcode needs a maximum of
|
|
1486
|
-
if secpos+
|
|
1776
|
+
-- A single opcode needs a maximum of 6 positions.
|
|
1777
|
+
if secpos+6 > maxsecpos then wflush() end
|
|
1487
1778
|
|
|
1488
1779
|
-- Process each character.
|
|
1489
1780
|
for c in gmatch(pat.."|", ".") do
|
|
@@ -1497,6 +1788,8 @@ local function dopattern(pat, args, sz, op, needrex)
|
|
|
1497
1788
|
szov = nil
|
|
1498
1789
|
elseif c == "X" then -- Force REX.W.
|
|
1499
1790
|
rex = 8
|
|
1791
|
+
elseif c == "L" then -- Force VEX.L.
|
|
1792
|
+
vex.l = true
|
|
1500
1793
|
elseif c == "r" then -- Merge 1st operand regno. into opcode.
|
|
1501
1794
|
addin = args[1]; opcode = opcode + (addin.reg % 8)
|
|
1502
1795
|
if narg < 2 then narg = 2 end
|
|
@@ -1520,21 +1813,42 @@ local function dopattern(pat, args, sz, op, needrex)
|
|
|
1520
1813
|
if t.xreg and t.xreg > 7 then rex = rex + 2 end
|
|
1521
1814
|
if s > 7 then rex = rex + 4 end
|
|
1522
1815
|
if needrex then rex = rex + 16 end
|
|
1523
|
-
wputop(szov, opcode, rex
|
|
1816
|
+
local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
|
|
1817
|
+
opcode = nil
|
|
1524
1818
|
local imark = sub(pat, -1) -- Force a mark (ugly).
|
|
1525
1819
|
-- Put ModRM/SIB with regno/last digit as spare.
|
|
1526
|
-
wputmrmsib(t, imark, s, addin and addin.vreg)
|
|
1820
|
+
wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
|
|
1527
1821
|
addin = nil
|
|
1822
|
+
elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
|
|
1823
|
+
local b = band(opcode, 255); opcode = shr(opcode, 8)
|
|
1824
|
+
local m = 1
|
|
1825
|
+
if b == 0x38 then m = 2
|
|
1826
|
+
elseif b == 0x3a then m = 3 end
|
|
1827
|
+
if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
|
|
1828
|
+
if b ~= 0x0f then
|
|
1829
|
+
werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
|
|
1830
|
+
"' in pattern `"..pat.."' for `"..op.."'")
|
|
1831
|
+
end
|
|
1832
|
+
local v = map_vexarg[c]
|
|
1833
|
+
if v then v = remove(args, v) end
|
|
1834
|
+
b = band(opcode, 255)
|
|
1835
|
+
local p = 0
|
|
1836
|
+
if b == 0x66 then p = 1
|
|
1837
|
+
elseif b == 0xf3 then p = 2
|
|
1838
|
+
elseif b == 0xf2 then p = 3 end
|
|
1839
|
+
if p ~= 0 then opcode = shr(opcode, 8) end
|
|
1840
|
+
if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
|
|
1841
|
+
vex = { m = m, p = p, v = v }
|
|
1528
1842
|
else
|
|
1529
1843
|
if opcode then -- Flush opcode.
|
|
1530
1844
|
if szov == "q" and rex == 0 then rex = rex + 8 end
|
|
1531
1845
|
if needrex then rex = rex + 16 end
|
|
1532
1846
|
if addin and addin.reg == -1 then
|
|
1533
|
-
wputop(szov, opcode - 7, rex)
|
|
1534
|
-
|
|
1847
|
+
local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
|
|
1848
|
+
wvreg("opcode", addin.vreg, psz, sk)
|
|
1535
1849
|
else
|
|
1536
1850
|
if addin and addin.reg > 7 then rex = rex + 1 end
|
|
1537
|
-
wputop(szov, opcode, rex)
|
|
1851
|
+
wputop(szov, opcode, rex, vex)
|
|
1538
1852
|
end
|
|
1539
1853
|
opcode = nil
|
|
1540
1854
|
end
|
|
@@ -1571,6 +1885,14 @@ local function dopattern(pat, args, sz, op, needrex)
|
|
|
1571
1885
|
else
|
|
1572
1886
|
wputlabel("REL_", imm, 2)
|
|
1573
1887
|
end
|
|
1888
|
+
elseif c == "s" then
|
|
1889
|
+
local reg = a.reg
|
|
1890
|
+
if reg < 0 then
|
|
1891
|
+
wputb(0)
|
|
1892
|
+
wvreg("imm.hi", a.vreg)
|
|
1893
|
+
else
|
|
1894
|
+
wputb(shl(reg, 4))
|
|
1895
|
+
end
|
|
1574
1896
|
else
|
|
1575
1897
|
werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
|
|
1576
1898
|
end
|
|
@@ -1647,11 +1969,14 @@ map_op[".template__"] = function(params, template, nparams)
|
|
|
1647
1969
|
if pat == "" then pat = lastpat else lastpat = pat end
|
|
1648
1970
|
if matchtm(tm, args) then
|
|
1649
1971
|
local prefix = sub(szm, 1, 1)
|
|
1650
|
-
if prefix == "/" then --
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1972
|
+
if prefix == "/" then -- Exactly match leading operand sizes.
|
|
1973
|
+
for i = #szm,1,-1 do
|
|
1974
|
+
if i == 1 then
|
|
1975
|
+
dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
|
|
1976
|
+
return
|
|
1977
|
+
elseif args[i-1].opsize ~= sub(szm, i, i) then
|
|
1978
|
+
break
|
|
1979
|
+
end
|
|
1655
1980
|
end
|
|
1656
1981
|
else -- Match common operand size.
|
|
1657
1982
|
local szp = sz
|
|
@@ -1716,8 +2041,8 @@ if x64 then
|
|
|
1716
2041
|
rex = a.reg > 7 and 9 or 8
|
|
1717
2042
|
end
|
|
1718
2043
|
end
|
|
1719
|
-
wputop(sz, opcode, rex)
|
|
1720
|
-
|
|
2044
|
+
local psz, sk = wputop(sz, opcode, rex, nil, vreg)
|
|
2045
|
+
wvreg("opcode", vreg, psz, sk)
|
|
1721
2046
|
waction("IMM_D", format("(unsigned int)(%s)", op64))
|
|
1722
2047
|
waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
|
|
1723
2048
|
end
|