immunio 1.1.2 → 1.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/immunio/version.rb +1 -1
- data/lua-hooks/Makefile +56 -109
- data/lua-hooks/ext/all.c +3 -14
- data/lua-hooks/ext/libinjection/module.mk +5 -0
- data/lua-hooks/ext/lpeg/module.mk +6 -0
- data/lua-hooks/ext/lua-cmsgpack/module.mk +2 -0
- data/lua-hooks/ext/lua-snapshot/module.mk +2 -0
- data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
- data/lua-hooks/ext/luajit/Makefile +2 -2
- data/lua-hooks/ext/luajit/README +2 -2
- data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
- data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
- data/lua-hooks/ext/luajit/doc/changes.html +15 -2
- data/lua-hooks/ext/luajit/doc/contact.html +3 -3
- data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +4 -2
- data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_jit.html +2 -2
- data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
- data/lua-hooks/ext/luajit/doc/extensions.html +9 -2
- data/lua-hooks/ext/luajit/doc/faq.html +2 -2
- data/lua-hooks/ext/luajit/doc/install.html +22 -18
- data/lua-hooks/ext/luajit/doc/luajit.html +3 -3
- data/lua-hooks/ext/luajit/doc/running.html +2 -2
- data/lua-hooks/ext/luajit/doc/status.html +2 -2
- data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +4 -4
- data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +4 -4
- data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +4 -4
- data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +4 -4
- data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +3 -3
- data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
- data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +34 -7
- data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +427 -102
- data/lua-hooks/ext/luajit/dynasm/dynasm.lua +5 -5
- data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
- data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
- data/lua-hooks/ext/luajit/src/Makefile +36 -21
- data/lua-hooks/ext/luajit/src/Makefile.dep +3 -1
- data/lua-hooks/ext/luajit/src/host/buildvm.c +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +10 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
- data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +1 -1
- data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
- data/lua-hooks/ext/luajit/src/host/genminilua.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/bcsave.lua +2 -2
- data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +163 -73
- data/lua-hooks/ext/luajit/src/jit/dump.lua +2 -1
- data/lua-hooks/ext/luajit/src/jit/p.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/v.lua +1 -1
- data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
- data/lua-hooks/ext/luajit/src/lib_aux.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_base.c +4 -5
- data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_debug.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -5
- data/lua-hooks/ext/luajit/src/lib_init.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_io.c +2 -3
- data/lua-hooks/ext/luajit/src/lib_jit.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_math.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_os.c +2 -2
- data/lua-hooks/ext/luajit/src/lib_package.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_string.c +1 -1
- data/lua-hooks/ext/luajit/src/lib_table.c +1 -1
- data/lua-hooks/ext/luajit/src/lj.supp +15 -0
- data/lua-hooks/ext/luajit/src/lj_alloc.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_api.c +4 -1
- data/lua-hooks/ext/luajit/src/lj_arch.h +33 -7
- data/lua-hooks/ext/luajit/src/lj_asm.c +12 -5
- data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_asm_arm.h +3 -13
- data/lua-hooks/ext/luajit/src/lj_asm_mips.h +337 -71
- data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2 -2
- data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2 -2
- data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_buf.c +2 -4
- data/lua-hooks/ext/luajit/src/lj_buf.h +1 -3
- data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ccall.c +37 -14
- data/lua-hooks/ext/luajit/src/lj_ccall.h +3 -3
- data/lua-hooks/ext/luajit/src/lj_ccallback.c +16 -7
- data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_cconv.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_cdata.c +10 -1
- data/lua-hooks/ext/luajit/src/lj_cdata.h +3 -1
- data/lua-hooks/ext/luajit/src/lj_clib.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_cparse.c +27 -6
- data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_crecord.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ctype.c +10 -8
- data/lua-hooks/ext/luajit/src/lj_ctype.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_def.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_dispatch.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_dispatch.h +21 -4
- data/lua-hooks/ext/luajit/src/lj_emit_arm.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_emit_mips.h +7 -5
- data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_emit_x86.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_err.c +69 -31
- data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ffrecord.c +10 -40
- data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_frame.h +12 -1
- data/lua-hooks/ext/luajit/src/lj_func.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_gc.c +2 -2
- data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_gdbjit.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ir.c +31 -15
- data/lua-hooks/ext/luajit/src/lj_ir.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_ircall.h +29 -1
- data/lua-hooks/ext/luajit/src/lj_iropt.h +2 -1
- data/lua-hooks/ext/luajit/src/lj_jit.h +2 -1
- data/lua-hooks/ext/luajit/src/lj_lex.c +28 -1
- data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_mcode.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_meta.c +8 -8
- data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_obj.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_fold.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_mem.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_sink.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_opt_split.c +10 -5
- data/lua-hooks/ext/luajit/src/lj_parse.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_record.c +13 -5
- data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_snap.c +20 -23
- data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_state.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_state.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_strfmt.c +12 -98
- data/lua-hooks/ext/luajit/src/lj_strfmt.h +4 -4
- data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +591 -0
- data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_tab.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_arm64.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_mips.h +30 -2
- data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_target_x86.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_trace.c +7 -2
- data/lua-hooks/ext/luajit/src/lj_trace.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -3
- data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_vm.h +5 -3
- data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
- data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
- data/lua-hooks/ext/luajit/src/lj_vmmath.c +15 -15
- data/lua-hooks/ext/luajit/src/ljamalg.c +2 -1
- data/lua-hooks/ext/luajit/src/lua.h +1 -0
- data/lua-hooks/ext/luajit/src/luaconf.h +2 -2
- data/lua-hooks/ext/luajit/src/luajit.c +1 -1
- data/lua-hooks/ext/luajit/src/luajit.h +4 -4
- data/lua-hooks/ext/luajit/src/lualib.h +1 -1
- data/lua-hooks/ext/luajit/src/msvcbuild.bat +1 -1
- data/lua-hooks/ext/luajit/src/ps4build.bat +26 -6
- data/lua-hooks/ext/luajit/src/vm_arm.dasc +17 -9
- data/lua-hooks/ext/luajit/src/vm_arm64.dasc +1 -1
- data/lua-hooks/ext/luajit/src/vm_mips.dasc +1562 -656
- data/lua-hooks/ext/luajit/src/vm_ppc.dasc +3 -7
- data/lua-hooks/ext/luajit/src/vm_x64.dasc +10 -2
- data/lua-hooks/ext/luajit/src/vm_x86.dasc +5 -8
- data/lua-hooks/ext/luautf8/module.mk +2 -0
- data/lua-hooks/ext/module.mk +15 -0
- data/lua-hooks/ext/modules.h +17 -0
- data/lua-hooks/ext/perf/luacpu.c +1 -1
- data/lua-hooks/ext/perf/lualoadavg.c +1 -1
- data/lua-hooks/ext/perf/luameminfo.c +1 -1
- data/lua-hooks/ext/perf/luaoslib.c +124 -2
- data/lua-hooks/ext/perf/module.mk +5 -0
- data/lua-hooks/ext/sha1/luasha1.c +4 -2
- data/lua-hooks/ext/sha1/module.mk +5 -0
- data/lua-hooks/ext/sha2/luasha256.c +4 -2
- data/lua-hooks/ext/sha2/module.mk +5 -0
- data/lua-hooks/ext/sysutils/lua_utils.c +56 -0
- data/lua-hooks/ext/sysutils/module.mk +2 -0
- data/lua-hooks/lib/boot.lua +2 -1
- data/lua-hooks/lib/hooks/module.mk +31 -0
- data/lua-hooks/lib/hooks/xss/module.mk +4 -0
- data/lua-hooks/lib/lexers/module.mk +10 -0
- data/lua-hooks/lib/module.mk +38 -0
- data/lua-hooks/lib/schema/module.mk +3 -0
- data/lua-hooks/options.mk +59 -0
- metadata +21 -2
@@ -1,7 +1,7 @@
|
|
1
1
|
------------------------------------------------------------------------------
|
2
2
|
-- DynASM x64 module.
|
3
3
|
--
|
4
|
-
-- Copyright (C) 2005-
|
4
|
+
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
|
5
5
|
-- See dynasm.lua for full copyright notice.
|
6
6
|
------------------------------------------------------------------------------
|
7
7
|
-- This module just sets 64 bit mode for the combined x86/x64 module.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
/*
|
2
2
|
** DynASM x86 encoding engine.
|
3
|
-
** Copyright (C) 2005-
|
3
|
+
** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
|
4
4
|
** Released under the MIT license. See dynasm.lua for full copyright notice.
|
5
5
|
*/
|
6
6
|
|
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
|
|
170
170
|
dasm_State *D = Dst_REF;
|
171
171
|
dasm_ActList p = D->actionlist + start;
|
172
172
|
dasm_Section *sec = D->section;
|
173
|
-
int pos = sec->pos, ofs = sec->ofs, mrm =
|
173
|
+
int pos = sec->pos, ofs = sec->ofs, mrm = -1;
|
174
174
|
int *b;
|
175
175
|
|
176
176
|
if (pos >= sec->epos) {
|
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
|
|
193
193
|
b[pos++] = n;
|
194
194
|
switch (action) {
|
195
195
|
case DASM_DISP:
|
196
|
-
if (n == 0) { if (
|
196
|
+
if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
|
197
197
|
case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
|
198
198
|
case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
|
199
199
|
case DASM_IMM_D: ofs += 4; break;
|
@@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...)
|
|
203
203
|
case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
|
204
204
|
case DASM_SPACE: p++; ofs += n; break;
|
205
205
|
case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
|
206
|
-
case DASM_VREG: CK((n&-
|
207
|
-
if (*p
|
206
|
+
case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
|
207
|
+
if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
|
208
|
+
if (*p < 0x20 && (n&7) == 4) ofs++;
|
209
|
+
switch ((*p++ >> 3) & 3) {
|
210
|
+
case 3: n |= b[pos-3];
|
211
|
+
case 2: n |= b[pos-2];
|
212
|
+
case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
|
213
|
+
}
|
214
|
+
continue;
|
208
215
|
}
|
209
|
-
mrm =
|
216
|
+
mrm = -1;
|
210
217
|
} else {
|
211
218
|
int *pl, n;
|
212
219
|
switch (action) {
|
@@ -391,7 +398,27 @@ int dasm_encode(Dst_DECL, void *buffer)
|
|
391
398
|
case DASM_IMM_D: wd: dasmd(n); break;
|
392
399
|
case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
|
393
400
|
case DASM_IMM_W: dasmw(n); break;
|
394
|
-
case DASM_VREG: {
|
401
|
+
case DASM_VREG: {
|
402
|
+
int t = *p++;
|
403
|
+
unsigned char *ex = cp - (t&7);
|
404
|
+
if ((n & 8) && t < 0xa0) {
|
405
|
+
if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
|
406
|
+
n &= 7;
|
407
|
+
} else if (n & 0x10) {
|
408
|
+
if (*ex & 0x80) {
|
409
|
+
*ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
|
410
|
+
}
|
411
|
+
while (++ex < cp) ex[-1] = *ex;
|
412
|
+
if (mark) mark--;
|
413
|
+
cp--;
|
414
|
+
n &= 7;
|
415
|
+
}
|
416
|
+
if (t >= 0xc0) n <<= 4;
|
417
|
+
else if (t >= 0x40) n <<= 3;
|
418
|
+
else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
|
419
|
+
cp[-1] ^= n;
|
420
|
+
break;
|
421
|
+
}
|
395
422
|
case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
|
396
423
|
b++; n = (int)(ptrdiff_t)D->globals[-n];
|
397
424
|
case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
|
@@ -1,7 +1,7 @@
|
|
1
1
|
------------------------------------------------------------------------------
|
2
2
|
-- DynASM x86/x64 module.
|
3
3
|
--
|
4
|
-
-- Copyright (C) 2005-
|
4
|
+
-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
|
5
5
|
-- See dynasm.lua for full copyright notice.
|
6
6
|
------------------------------------------------------------------------------
|
7
7
|
|
@@ -11,9 +11,9 @@ local x64 = x64
|
|
11
11
|
local _info = {
|
12
12
|
arch = x64 and "x64" or "x86",
|
13
13
|
description = "DynASM x86/x64 module",
|
14
|
-
version = "1.
|
15
|
-
vernum =
|
16
|
-
release = "
|
14
|
+
version = "1.4.0",
|
15
|
+
vernum = 10400,
|
16
|
+
release = "2015-10-18",
|
17
17
|
author = "Mike Pall",
|
18
18
|
license = "MIT",
|
19
19
|
}
|
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
|
|
27
27
|
local _s = string
|
28
28
|
local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
|
29
29
|
local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
|
30
|
-
local concat, sort = table.concat, table.sort
|
30
|
+
local concat, sort, remove = table.concat, table.sort, table.remove
|
31
31
|
local bit = bit or require("bit")
|
32
|
-
local band, shl, shr = bit.band, bit.lshift, bit.rshift
|
32
|
+
local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
|
33
33
|
|
34
34
|
-- Inherited tables and callbacks.
|
35
35
|
local g_opt, g_arch
|
@@ -41,7 +41,7 @@ local action_names = {
|
|
41
41
|
-- int arg, 1 buffer pos:
|
42
42
|
"DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
|
43
43
|
-- action arg (1 byte), int arg, 1 buffer pos (reg/num):
|
44
|
-
"VREG", "SPACE",
|
44
|
+
"VREG", "SPACE",
|
45
45
|
-- ptrdiff_t arg, 1 buffer pos (address): !x64
|
46
46
|
"SETLABEL", "REL_A",
|
47
47
|
-- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
|
@@ -83,6 +83,21 @@ local actargs = { 0 }
|
|
83
83
|
-- Current number of section buffer positions for dasm_put().
|
84
84
|
local secpos = 1
|
85
85
|
|
86
|
+
-- VREG kind encodings, pre-shifted by 5 bits.
|
87
|
+
local map_vreg = {
|
88
|
+
["modrm.rm.m"] = 0x00,
|
89
|
+
["modrm.rm.r"] = 0x20,
|
90
|
+
["opcode"] = 0x20,
|
91
|
+
["sib.base"] = 0x20,
|
92
|
+
["sib.index"] = 0x40,
|
93
|
+
["modrm.reg"] = 0x80,
|
94
|
+
["vex.v"] = 0xa0,
|
95
|
+
["imm.hi"] = 0xc0,
|
96
|
+
}
|
97
|
+
|
98
|
+
-- Current number of VREG actions contributing to REX/VEX shrinkage.
|
99
|
+
local vreg_shrink_count = 0
|
100
|
+
|
86
101
|
------------------------------------------------------------------------------
|
87
102
|
|
88
103
|
-- Compute action numbers for action names.
|
@@ -134,6 +149,21 @@ local function waction(action, a, num)
|
|
134
149
|
if a or num then secpos = secpos + (num or 1) end
|
135
150
|
end
|
136
151
|
|
152
|
+
-- Optionally add a VREG action.
|
153
|
+
local function wvreg(kind, vreg, psz, sk, defer)
|
154
|
+
if not vreg then return end
|
155
|
+
waction("VREG", vreg)
|
156
|
+
local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
|
157
|
+
if b < (sk or 0) then
|
158
|
+
vreg_shrink_count = vreg_shrink_count + 1
|
159
|
+
end
|
160
|
+
if not defer then
|
161
|
+
b = b + vreg_shrink_count * 8
|
162
|
+
vreg_shrink_count = 0
|
163
|
+
end
|
164
|
+
wputxb(b + (psz or 0))
|
165
|
+
end
|
166
|
+
|
137
167
|
-- Add call to embedded DynASM C code.
|
138
168
|
local function wcall(func, args)
|
139
169
|
wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
|
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
|
|
299
329
|
local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
|
300
330
|
if needrex then map_reg_needrex[iname] = true end
|
301
331
|
local name
|
302
|
-
if sz == "o" then name = format("
|
332
|
+
if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
|
303
333
|
elseif sz == "f" then name = format("st%d", i)
|
304
334
|
else name = format("r%d%s", i, sz == addrsize and "" or sz) end
|
305
335
|
map_archdef[name] = iname
|
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
|
|
326
356
|
mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
|
327
357
|
map_reg_valid_index[map_archdef.esp] = false
|
328
358
|
if x64 then map_reg_valid_index[map_archdef.rsp] = false end
|
359
|
+
if x64 then map_reg_needrex[map_archdef.Rb] = true end
|
329
360
|
map_archdef["Ra"] = "@"..addrsize
|
330
361
|
|
331
362
|
-- FP registers (internally tword sized, but use "f" as operand size).
|
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
|
|
334
365
|
-- SSE registers (oword sized, but qword and dword accessible).
|
335
366
|
mkrmap("o", "xmm")
|
336
367
|
|
368
|
+
-- AVX registers (yword sized, but oword, qword and dword accessible).
|
369
|
+
mkrmap("y", "ymm")
|
370
|
+
|
337
371
|
-- Operand size prefixes to codes.
|
338
372
|
local map_opsize = {
|
339
|
-
byte = "b", word = "w", dword = "d", qword = "q", oword = "o",
|
340
|
-
aword = addrsize,
|
373
|
+
byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
|
374
|
+
tword = "t", aword = addrsize,
|
341
375
|
}
|
342
376
|
|
343
377
|
-- Operand size code to number.
|
344
378
|
local map_opsizenum = {
|
345
|
-
b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
|
379
|
+
b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
|
346
380
|
}
|
347
381
|
|
348
382
|
-- Operand size code to name.
|
349
383
|
local map_opsizename = {
|
350
|
-
b = "byte", w = "word", d = "dword", q = "qword", o = "oword",
|
351
|
-
f = "fpword",
|
384
|
+
b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
|
385
|
+
t = "tword", f = "fpword",
|
352
386
|
}
|
353
387
|
|
354
388
|
-- Valid index register scale factors.
|
@@ -460,9 +494,45 @@ local function wputszarg(sz, n)
|
|
460
494
|
end
|
461
495
|
|
462
496
|
-- Put multi-byte opcode with operand-size dependent modifications.
|
463
|
-
local function wputop(sz, op, rex)
|
497
|
+
local function wputop(sz, op, rex, vex, vregr, vregxb)
|
498
|
+
local psz, sk = 0, nil
|
499
|
+
if vex then
|
500
|
+
local tail
|
501
|
+
if vex.m == 1 and band(rex, 11) == 0 then
|
502
|
+
if x64 and vregxb then
|
503
|
+
sk = map_vreg["modrm.reg"]
|
504
|
+
else
|
505
|
+
wputb(0xc5)
|
506
|
+
tail = shl(bxor(band(rex, 4), 4), 5)
|
507
|
+
psz = 3
|
508
|
+
end
|
509
|
+
end
|
510
|
+
if not tail then
|
511
|
+
wputb(0xc4)
|
512
|
+
wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
|
513
|
+
tail = shl(band(rex, 8), 4)
|
514
|
+
psz = 4
|
515
|
+
end
|
516
|
+
local reg, vreg = 0, nil
|
517
|
+
if vex.v then
|
518
|
+
reg = vex.v.reg
|
519
|
+
if not reg then werror("bad vex operand") end
|
520
|
+
if reg < 0 then reg = 0; vreg = vex.v.vreg end
|
521
|
+
end
|
522
|
+
if sz == "y" or vex.l then tail = tail + 4 end
|
523
|
+
wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
|
524
|
+
wvreg("vex.v", vreg)
|
525
|
+
rex = 0
|
526
|
+
if op >= 256 then werror("bad vex opcode") end
|
527
|
+
else
|
528
|
+
if rex ~= 0 then
|
529
|
+
if not x64 then werror("bad operand size") end
|
530
|
+
elseif (vregr or vregxb) and x64 then
|
531
|
+
rex = 0x10
|
532
|
+
sk = map_vreg["vex.v"]
|
533
|
+
end
|
534
|
+
end
|
464
535
|
local r
|
465
|
-
if rex ~= 0 and not x64 then werror("bad operand size") end
|
466
536
|
if sz == "w" then wputb(102) end
|
467
537
|
-- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
|
468
538
|
if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
|
@@ -471,20 +541,20 @@ local function wputop(sz, op, rex)
|
|
471
541
|
if rex ~= 0 then
|
472
542
|
local opc3 = band(op, 0xffff00)
|
473
543
|
if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
|
474
|
-
wputb(64 + band(rex, 15)); rex = 0
|
544
|
+
wputb(64 + band(rex, 15)); rex = 0; psz = 2
|
475
545
|
end
|
476
546
|
end
|
477
|
-
wputb(shr(op, 16)); op = band(op, 0xffff)
|
547
|
+
wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
|
478
548
|
end
|
479
549
|
if op >= 256 then
|
480
550
|
local b = shr(op, 8)
|
481
|
-
if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
|
482
|
-
wputb(b)
|
483
|
-
op = band(op, 255)
|
551
|
+
if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
|
552
|
+
wputb(b); op = band(op, 255); psz = psz + 1
|
484
553
|
end
|
485
|
-
if rex ~= 0 then wputb(64 + band(rex, 15)) end
|
554
|
+
if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
|
486
555
|
if sz == "b" then op = op - 1 end
|
487
556
|
wputb(op)
|
557
|
+
return psz, sk
|
488
558
|
end
|
489
559
|
|
490
560
|
-- Put ModRM or SIB formatted byte.
|
@@ -494,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
|
|
494
564
|
end
|
495
565
|
|
496
566
|
-- Put ModRM/SIB plus optional displacement.
|
497
|
-
local function wputmrmsib(t, imark, s, vsreg)
|
567
|
+
local function wputmrmsib(t, imark, s, vsreg, psz, sk)
|
498
568
|
local vreg, vxreg
|
499
569
|
local reg, xreg = t.reg, t.xreg
|
500
570
|
if reg and reg < 0 then reg = 0; vreg = t.vreg end
|
@@ -504,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
|
|
504
574
|
-- Register mode.
|
505
575
|
if sub(t.mode, 1, 1) == "r" then
|
506
576
|
wputmodrm(3, s, reg)
|
507
|
-
|
508
|
-
|
577
|
+
wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
|
578
|
+
wvreg("modrm.rm.r", vreg, psz+1, sk)
|
509
579
|
return
|
510
580
|
end
|
511
581
|
|
@@ -519,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
|
|
519
589
|
-- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
|
520
590
|
wputmodrm(0, s, 4)
|
521
591
|
if imark == "I" then waction("MARK") end
|
522
|
-
|
592
|
+
wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
|
523
593
|
wputmodrm(t.xsc, xreg, 5)
|
524
|
-
|
594
|
+
wvreg("sib.index", vxreg, psz+2, sk)
|
525
595
|
else
|
526
596
|
-- Pure 32 bit displacement.
|
527
597
|
if x64 and tdisp ~= "table" then
|
528
598
|
wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
|
599
|
+
wvreg("modrm.reg", vsreg, psz+1, sk)
|
529
600
|
if imark == "I" then waction("MARK") end
|
530
601
|
wputmodrm(0, 4, 5)
|
531
602
|
else
|
532
603
|
riprel = x64
|
533
604
|
wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
|
605
|
+
wvreg("modrm.reg", vsreg, psz+1, sk)
|
534
606
|
if imark == "I" then waction("MARK") end
|
535
607
|
end
|
536
|
-
if vsreg then waction("VREG", vsreg); wputxb(2) end
|
537
608
|
end
|
538
609
|
if riprel then -- Emit rip-relative displacement.
|
539
610
|
if match("UWSiI", imark) then
|
@@ -561,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
|
|
561
632
|
if xreg or band(reg, 7) == 4 then
|
562
633
|
wputmodrm(m or 2, s, 4) -- ModRM.
|
563
634
|
if m == nil or imark == "I" then waction("MARK") end
|
564
|
-
|
635
|
+
wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
|
565
636
|
wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
|
566
|
-
|
567
|
-
|
637
|
+
wvreg("sib.index", vxreg, psz+2, sk, vreg)
|
638
|
+
wvreg("sib.base", vreg, psz+2, sk)
|
568
639
|
else
|
569
640
|
wputmodrm(m or 2, s, reg) -- ModRM.
|
570
641
|
if (imark == "I" and (m == 1 or m == 2)) or
|
571
642
|
(m == nil and (vsreg or vreg)) then waction("MARK") end
|
572
|
-
|
573
|
-
|
643
|
+
wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
|
644
|
+
wvreg("modrm.rm.m", vreg, psz+1, sk)
|
574
645
|
end
|
575
646
|
|
576
647
|
-- Put displacement.
|
@@ -881,9 +952,15 @@ end
|
|
881
952
|
-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
|
882
953
|
-- The spare 3 bits are either filled with the last hex digit or
|
883
954
|
-- the result from a previous "r"/"R". The opcode is restored.
|
955
|
+
-- "u" Use VEX encoding, vvvv unused.
|
956
|
+
-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
|
957
|
+
-- removed from the list used by future characters).
|
958
|
+
-- "L" Force VEX.L
|
884
959
|
--
|
885
960
|
-- All of the following characters force a flush of the opcode:
|
886
961
|
-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
|
962
|
+
-- "s" stores a 4 bit immediate from the last register operand,
|
963
|
+
-- followed by 4 zero bits.
|
887
964
|
-- "S" stores a signed 8 bit immediate from the last operand.
|
888
965
|
-- "U" stores an unsigned 8 bit immediate from the last operand.
|
889
966
|
-- "W" stores an unsigned 16 bit immediate from the last operand.
|
@@ -1081,10 +1158,11 @@ local map_op = {
|
|
1081
1158
|
btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
|
1082
1159
|
bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
|
1083
1160
|
|
1084
|
-
shld_3 = "mriqdw:0FA4RmU|
|
1085
|
-
shrd_3 = "mriqdw:0FACRmU|
|
1161
|
+
shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
|
1162
|
+
shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
|
1086
1163
|
|
1087
1164
|
rdtsc_0 = "0F31", -- P1+
|
1165
|
+
rdpmc_0 = "0F33", -- P6+
|
1088
1166
|
cpuid_0 = "0FA2", -- P1+
|
1089
1167
|
|
1090
1168
|
-- floating point ops
|
@@ -1190,7 +1268,7 @@ local map_op = {
|
|
1190
1268
|
cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
|
1191
1269
|
cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
|
1192
1270
|
cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
|
1193
|
-
cvtss2si_2 = "rr/do:
|
1271
|
+
cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
|
1194
1272
|
cvttpd2dq_2 = "rmo:660FE6rM",
|
1195
1273
|
cvttps2dq_2 = "rmo:F30F5BrM",
|
1196
1274
|
cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
|
@@ -1225,46 +1303,14 @@ local map_op = {
|
|
1225
1303
|
movups_2 = "rmo:0F10rM|mro:0F11Rm",
|
1226
1304
|
orpd_2 = "rmo:660F56rM",
|
1227
1305
|
orps_2 = "rmo:0F56rM",
|
1228
|
-
packssdw_2 = "rmo:660F6BrM",
|
1229
|
-
packsswb_2 = "rmo:660F63rM",
|
1230
|
-
packuswb_2 = "rmo:660F67rM",
|
1231
|
-
paddb_2 = "rmo:660FFCrM",
|
1232
|
-
paddd_2 = "rmo:660FFErM",
|
1233
|
-
paddq_2 = "rmo:660FD4rM",
|
1234
|
-
paddsb_2 = "rmo:660FECrM",
|
1235
|
-
paddsw_2 = "rmo:660FEDrM",
|
1236
|
-
paddusb_2 = "rmo:660FDCrM",
|
1237
|
-
paddusw_2 = "rmo:660FDDrM",
|
1238
|
-
paddw_2 = "rmo:660FFDrM",
|
1239
|
-
pand_2 = "rmo:660FDBrM",
|
1240
|
-
pandn_2 = "rmo:660FDFrM",
|
1241
1306
|
pause_0 = "F390",
|
1242
|
-
|
1243
|
-
pavgw_2 = "rmo:660FE3rM",
|
1244
|
-
pcmpeqb_2 = "rmo:660F74rM",
|
1245
|
-
pcmpeqd_2 = "rmo:660F76rM",
|
1246
|
-
pcmpeqw_2 = "rmo:660F75rM",
|
1247
|
-
pcmpgtb_2 = "rmo:660F64rM",
|
1248
|
-
pcmpgtd_2 = "rmo:660F66rM",
|
1249
|
-
pcmpgtw_2 = "rmo:660F65rM",
|
1250
|
-
pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
|
1307
|
+
pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
|
1251
1308
|
pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
|
1252
|
-
pmaddwd_2 = "rmo:660FF5rM",
|
1253
|
-
pmaxsw_2 = "rmo:660FEErM",
|
1254
|
-
pmaxub_2 = "rmo:660FDErM",
|
1255
|
-
pminsw_2 = "rmo:660FEArM",
|
1256
|
-
pminub_2 = "rmo:660FDArM",
|
1257
1309
|
pmovmskb_2 = "rr/do:660FD7rM",
|
1258
|
-
pmulhuw_2 = "rmo:660FE4rM",
|
1259
|
-
pmulhw_2 = "rmo:660FE5rM",
|
1260
|
-
pmullw_2 = "rmo:660FD5rM",
|
1261
|
-
pmuludq_2 = "rmo:660FF4rM",
|
1262
|
-
por_2 = "rmo:660FEBrM",
|
1263
1310
|
prefetchnta_1 = "xb:n0F180m",
|
1264
1311
|
prefetcht0_1 = "xb:n0F181m",
|
1265
1312
|
prefetcht1_1 = "xb:n0F182m",
|
1266
1313
|
prefetcht2_1 = "xb:n0F183m",
|
1267
|
-
psadbw_2 = "rmo:660FF6rM",
|
1268
1314
|
pshufd_3 = "rmio:660F70rMU",
|
1269
1315
|
pshufhw_3 = "rmio:F30F70rMU",
|
1270
1316
|
pshuflw_3 = "rmio:F20F70rMU",
|
@@ -1278,23 +1324,6 @@ local map_op = {
|
|
1278
1324
|
psrldq_2 = "rio:660F733mU",
|
1279
1325
|
psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
|
1280
1326
|
psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
|
1281
|
-
psubb_2 = "rmo:660FF8rM",
|
1282
|
-
psubd_2 = "rmo:660FFArM",
|
1283
|
-
psubq_2 = "rmo:660FFBrM",
|
1284
|
-
psubsb_2 = "rmo:660FE8rM",
|
1285
|
-
psubsw_2 = "rmo:660FE9rM",
|
1286
|
-
psubusb_2 = "rmo:660FD8rM",
|
1287
|
-
psubusw_2 = "rmo:660FD9rM",
|
1288
|
-
psubw_2 = "rmo:660FF9rM",
|
1289
|
-
punpckhbw_2 = "rmo:660F68rM",
|
1290
|
-
punpckhdq_2 = "rmo:660F6ArM",
|
1291
|
-
punpckhqdq_2 = "rmo:660F6DrM",
|
1292
|
-
punpckhwd_2 = "rmo:660F69rM",
|
1293
|
-
punpcklbw_2 = "rmo:660F60rM",
|
1294
|
-
punpckldq_2 = "rmo:660F62rM",
|
1295
|
-
punpcklqdq_2 = "rmo:660F6CrM",
|
1296
|
-
punpcklwd_2 = "rmo:660F61rM",
|
1297
|
-
pxor_2 = "rmo:660FEFrM",
|
1298
1327
|
rcpps_2 = "rmo:0F53rM",
|
1299
1328
|
rcpss_2 = "rro:F30F53rM|rx/od:",
|
1300
1329
|
rsqrtps_2 = "rmo:0F52rM",
|
@@ -1352,7 +1381,7 @@ local map_op = {
|
|
1352
1381
|
dpps_3 = "rmio:660F3A40rMU",
|
1353
1382
|
extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
|
1354
1383
|
insertps_3 = "rrio:660F3A41rMU|rxi/od:",
|
1355
|
-
movntdqa_2 = "
|
1384
|
+
movntdqa_2 = "rxo:660F382ArM",
|
1356
1385
|
mpsadbw_3 = "rmio:660F3A42rMU",
|
1357
1386
|
packusdw_2 = "rmo:660F382BrM",
|
1358
1387
|
pblendvb_3 = "rmRo:660F3810rM",
|
@@ -1412,6 +1441,238 @@ local map_op = {
|
|
1412
1441
|
movntsd_2 = "xr/qo:nF20F2BRm",
|
1413
1442
|
movntss_2 = "xr/do:F30F2BRm",
|
1414
1443
|
-- popcnt is also in SSE4.2
|
1444
|
+
|
1445
|
+
-- AES-NI
|
1446
|
+
aesdec_2 = "rmo:660F38DErM",
|
1447
|
+
aesdeclast_2 = "rmo:660F38DFrM",
|
1448
|
+
aesenc_2 = "rmo:660F38DCrM",
|
1449
|
+
aesenclast_2 = "rmo:660F38DDrM",
|
1450
|
+
aesimc_2 = "rmo:660F38DBrM",
|
1451
|
+
aeskeygenassist_3 = "rmio:660F3ADFrMU",
|
1452
|
+
pclmulqdq_3 = "rmio:660F3A44rMU",
|
1453
|
+
|
1454
|
+
-- AVX FP ops
|
1455
|
+
vaddsubpd_3 = "rrmoy:660FVD0rM",
|
1456
|
+
vaddsubps_3 = "rrmoy:F20FVD0rM",
|
1457
|
+
vandpd_3 = "rrmoy:660FV54rM",
|
1458
|
+
vandps_3 = "rrmoy:0FV54rM",
|
1459
|
+
vandnpd_3 = "rrmoy:660FV55rM",
|
1460
|
+
vandnps_3 = "rrmoy:0FV55rM",
|
1461
|
+
vblendpd_4 = "rrmioy:660F3AV0DrMU",
|
1462
|
+
vblendps_4 = "rrmioy:660F3AV0CrMU",
|
1463
|
+
vblendvpd_4 = "rrmroy:660F3AV4BrMs",
|
1464
|
+
vblendvps_4 = "rrmroy:660F3AV4ArMs",
|
1465
|
+
vbroadcastf128_2 = "rx/yo:660F38u1ArM",
|
1466
|
+
vcmppd_4 = "rrmioy:660FVC2rMU",
|
1467
|
+
vcmpps_4 = "rrmioy:0FVC2rMU",
|
1468
|
+
vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
|
1469
|
+
vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
|
1470
|
+
vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
|
1471
|
+
vcomiss_2 = "rro:0Fu2FrM|rx/od:",
|
1472
|
+
vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
|
1473
|
+
vcvtdq2ps_2 = "rmoy:0Fu5BrM",
|
1474
|
+
vcvtpd2dq_2 = "rmoy:F20FuE6rM",
|
1475
|
+
vcvtpd2ps_2 = "rmoy:660Fu5ArM",
|
1476
|
+
vcvtps2dq_2 = "rmoy:660Fu5BrM",
|
1477
|
+
vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
|
1478
|
+
vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
|
1479
|
+
vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
|
1480
|
+
vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
|
1481
|
+
vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
|
1482
|
+
vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
|
1483
|
+
vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
|
1484
|
+
vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
|
1485
|
+
vcvttps2dq_2 = "rmoy:F30Fu5BrM",
|
1486
|
+
vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
|
1487
|
+
vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
|
1488
|
+
vdppd_4 = "rrmio:660F3AV41rMU",
|
1489
|
+
vdpps_4 = "rrmioy:660F3AV40rMU",
|
1490
|
+
vextractf128_3 = "mri/oy:660F3AuL19RmU",
|
1491
|
+
vextractps_3 = "mri/do:660F3Au17RmU",
|
1492
|
+
vhaddpd_3 = "rrmoy:660FV7CrM",
|
1493
|
+
vhaddps_3 = "rrmoy:F20FV7CrM",
|
1494
|
+
vhsubpd_3 = "rrmoy:660FV7DrM",
|
1495
|
+
vhsubps_3 = "rrmoy:F20FV7DrM",
|
1496
|
+
vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
|
1497
|
+
vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
|
1498
|
+
vldmxcsr_1 = "xd:0FuAE2m",
|
1499
|
+
vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
|
1500
|
+
vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
|
1501
|
+
vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
|
1502
|
+
vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
|
1503
|
+
vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
|
1504
|
+
vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
|
1505
|
+
vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
|
1506
|
+
vmovhlps_3 = "rrro:0FV12rM",
|
1507
|
+
vmovhpd_2 = "xr/qo:660Fu17Rm",
|
1508
|
+
vmovhpd_3 = "rrx/ooq:660FV16rM",
|
1509
|
+
vmovhps_2 = "xr/qo:0Fu17Rm",
|
1510
|
+
vmovhps_3 = "rrx/ooq:0FV16rM",
|
1511
|
+
vmovlhps_3 = "rrro:0FV16rM",
|
1512
|
+
vmovlpd_2 = "xr/qo:660Fu13Rm",
|
1513
|
+
vmovlpd_3 = "rrx/ooq:660FV12rM",
|
1514
|
+
vmovlps_2 = "xr/qo:0Fu13Rm",
|
1515
|
+
vmovlps_3 = "rrx/ooq:0FV12rM",
|
1516
|
+
vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
|
1517
|
+
vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
|
1518
|
+
vmovntpd_2 = "xroy:660Fu2BRm",
|
1519
|
+
vmovntps_2 = "xroy:0Fu2BRm",
|
1520
|
+
vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
|
1521
|
+
vmovsd_3 = "rrro:F20FV10rM",
|
1522
|
+
vmovshdup_2 = "rmoy:F30Fu16rM",
|
1523
|
+
vmovsldup_2 = "rmoy:F30Fu12rM",
|
1524
|
+
vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
|
1525
|
+
vmovss_3 = "rrro:F30FV10rM",
|
1526
|
+
vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
|
1527
|
+
vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
|
1528
|
+
vorpd_3 = "rrmoy:660FV56rM",
|
1529
|
+
vorps_3 = "rrmoy:0FV56rM",
|
1530
|
+
vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
|
1531
|
+
vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
|
1532
|
+
vperm2f128_4 = "rrmiy:660F3AV06rMU",
|
1533
|
+
vptestpd_2 = "rmoy:660F38u0FrM",
|
1534
|
+
vptestps_2 = "rmoy:660F38u0ErM",
|
1535
|
+
vrcpps_2 = "rmoy:0Fu53rM",
|
1536
|
+
vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
|
1537
|
+
vrsqrtps_2 = "rmoy:0Fu52rM",
|
1538
|
+
vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
|
1539
|
+
vroundpd_3 = "rmioy:660F3AV09rMU",
|
1540
|
+
vroundps_3 = "rmioy:660F3AV08rMU",
|
1541
|
+
vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
|
1542
|
+
vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
|
1543
|
+
vshufpd_4 = "rrmioy:660FVC6rMU",
|
1544
|
+
vshufps_4 = "rrmioy:0FVC6rMU",
|
1545
|
+
vsqrtps_2 = "rmoy:0Fu51rM",
|
1546
|
+
vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
|
1547
|
+
vsqrtpd_2 = "rmoy:660Fu51rM",
|
1548
|
+
vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
|
1549
|
+
vstmxcsr_1 = "xd:0FuAE3m",
|
1550
|
+
vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
|
1551
|
+
vucomiss_2 = "rro:0Fu2ErM|rx/od:",
|
1552
|
+
vunpckhpd_3 = "rrmoy:660FV15rM",
|
1553
|
+
vunpckhps_3 = "rrmoy:0FV15rM",
|
1554
|
+
vunpcklpd_3 = "rrmoy:660FV14rM",
|
1555
|
+
vunpcklps_3 = "rrmoy:0FV14rM",
|
1556
|
+
vxorpd_3 = "rrmoy:660FV57rM",
|
1557
|
+
vxorps_3 = "rrmoy:0FV57rM",
|
1558
|
+
vzeroall_0 = "0FuL77",
|
1559
|
+
vzeroupper_0 = "0Fu77",
|
1560
|
+
|
1561
|
+
-- AVX2 FP ops
|
1562
|
+
vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
|
1563
|
+
vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
|
1564
|
+
-- *vgather* (!vsib)
|
1565
|
+
vpermpd_3 = "rmiy:660F3AuX01rMU",
|
1566
|
+
vpermps_3 = "rrmy:660F38V16rM",
|
1567
|
+
|
1568
|
+
-- AVX, AVX2 integer ops
|
1569
|
+
-- In general, xmm requires AVX, ymm requires AVX2.
|
1570
|
+
vaesdec_3 = "rrmo:660F38VDErM",
|
1571
|
+
vaesdeclast_3 = "rrmo:660F38VDFrM",
|
1572
|
+
vaesenc_3 = "rrmo:660F38VDCrM",
|
1573
|
+
vaesenclast_3 = "rrmo:660F38VDDrM",
|
1574
|
+
vaesimc_2 = "rmo:660F38uDBrM",
|
1575
|
+
vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
|
1576
|
+
vlddqu_2 = "rxoy:F20FuF0rM",
|
1577
|
+
vmaskmovdqu_2 = "rro:660FuF7rM",
|
1578
|
+
vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
|
1579
|
+
vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
|
1580
|
+
vmovntdq_2 = "xroy:660FuE7Rm",
|
1581
|
+
vmovntdqa_2 = "rxoy:660F38u2ArM",
|
1582
|
+
vmpsadbw_4 = "rrmioy:660F3AV42rMU",
|
1583
|
+
vpabsb_2 = "rmoy:660F38u1CrM",
|
1584
|
+
vpabsd_2 = "rmoy:660F38u1ErM",
|
1585
|
+
vpabsw_2 = "rmoy:660F38u1DrM",
|
1586
|
+
vpackusdw_3 = "rrmoy:660F38V2BrM",
|
1587
|
+
vpalignr_4 = "rrmioy:660F3AV0FrMU",
|
1588
|
+
vpblendvb_4 = "rrmroy:660F3AV4CrMs",
|
1589
|
+
vpblendw_4 = "rrmioy:660F3AV0ErMU",
|
1590
|
+
vpclmulqdq_4 = "rrmio:660F3AV44rMU",
|
1591
|
+
vpcmpeqq_3 = "rrmoy:660F38V29rM",
|
1592
|
+
vpcmpestri_3 = "rmio:660F3Au61rMU",
|
1593
|
+
vpcmpestrm_3 = "rmio:660F3Au60rMU",
|
1594
|
+
vpcmpgtq_3 = "rrmoy:660F38V37rM",
|
1595
|
+
vpcmpistri_3 = "rmio:660F3Au63rMU",
|
1596
|
+
vpcmpistrm_3 = "rmio:660F3Au62rMU",
|
1597
|
+
vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
|
1598
|
+
vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
|
1599
|
+
vpextrd_3 = "mri/do:660F3Au16RmU",
|
1600
|
+
vpextrq_3 = "mri/qo:660F3Au16RmU",
|
1601
|
+
vphaddw_3 = "rrmoy:660F38V01rM",
|
1602
|
+
vphaddd_3 = "rrmoy:660F38V02rM",
|
1603
|
+
vphaddsw_3 = "rrmoy:660F38V03rM",
|
1604
|
+
vphminposuw_2 = "rmo:660F38u41rM",
|
1605
|
+
vphsubw_3 = "rrmoy:660F38V05rM",
|
1606
|
+
vphsubd_3 = "rrmoy:660F38V06rM",
|
1607
|
+
vphsubsw_3 = "rrmoy:660F38V07rM",
|
1608
|
+
vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
|
1609
|
+
vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
|
1610
|
+
vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
|
1611
|
+
vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
|
1612
|
+
vpmaddubsw_3 = "rrmoy:660F38V04rM",
|
1613
|
+
vpmaxsb_3 = "rrmoy:660F38V3CrM",
|
1614
|
+
vpmaxsd_3 = "rrmoy:660F38V3DrM",
|
1615
|
+
vpmaxuw_3 = "rrmoy:660F38V3ErM",
|
1616
|
+
vpmaxud_3 = "rrmoy:660F38V3FrM",
|
1617
|
+
vpminsb_3 = "rrmoy:660F38V38rM",
|
1618
|
+
vpminsd_3 = "rrmoy:660F38V39rM",
|
1619
|
+
vpminuw_3 = "rrmoy:660F38V3ArM",
|
1620
|
+
vpminud_3 = "rrmoy:660F38V3BrM",
|
1621
|
+
vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
|
1622
|
+
vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
|
1623
|
+
vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
|
1624
|
+
vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
|
1625
|
+
vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
|
1626
|
+
vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
|
1627
|
+
vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
|
1628
|
+
vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
|
1629
|
+
vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
|
1630
|
+
vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
|
1631
|
+
vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
|
1632
|
+
vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
|
1633
|
+
vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
|
1634
|
+
vpmuldq_3 = "rrmoy:660F38V28rM",
|
1635
|
+
vpmulhrsw_3 = "rrmoy:660F38V0BrM",
|
1636
|
+
vpmulld_3 = "rrmoy:660F38V40rM",
|
1637
|
+
vpshufb_3 = "rrmoy:660F38V00rM",
|
1638
|
+
vpshufd_3 = "rmioy:660Fu70rMU",
|
1639
|
+
vpshufhw_3 = "rmioy:F30Fu70rMU",
|
1640
|
+
vpshuflw_3 = "rmioy:F20Fu70rMU",
|
1641
|
+
vpsignb_3 = "rrmoy:660F38V08rM",
|
1642
|
+
vpsignw_3 = "rrmoy:660F38V09rM",
|
1643
|
+
vpsignd_3 = "rrmoy:660F38V0ArM",
|
1644
|
+
vpslldq_3 = "rrioy:660Fv737mU",
|
1645
|
+
vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
|
1646
|
+
vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
|
1647
|
+
vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
|
1648
|
+
vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
|
1649
|
+
vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
|
1650
|
+
vpsrldq_3 = "rrioy:660Fv733mU",
|
1651
|
+
vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
|
1652
|
+
vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
|
1653
|
+
vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
|
1654
|
+
vptest_2 = "rmoy:660F38u17rM",
|
1655
|
+
|
1656
|
+
-- AVX2 integer ops
|
1657
|
+
vbroadcasti128_2 = "rx/yo:660F38u5ArM",
|
1658
|
+
vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
|
1659
|
+
vextracti128_3 = "mri/oy:660F3AuL39RmU",
|
1660
|
+
vpblendd_4 = "rrmioy:660F3AV02rMU",
|
1661
|
+
vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
|
1662
|
+
vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
|
1663
|
+
vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
|
1664
|
+
vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
|
1665
|
+
vpermd_3 = "rrmy:660F38V36rM",
|
1666
|
+
vpermq_3 = "rmiy:660F3AuX00rMU",
|
1667
|
+
-- *vpgather* (!vsib)
|
1668
|
+
vperm2i128_4 = "rrmiy:660F3AV46rMU",
|
1669
|
+
vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
|
1670
|
+
vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
|
1671
|
+
vpsllvd_3 = "rrmoy:660F38V47rM",
|
1672
|
+
vpsllvq_3 = "rrmoy:660F38VX47rM",
|
1673
|
+
vpsravd_3 = "rrmoy:660F38V46rM",
|
1674
|
+
vpsrlvd_3 = "rrmoy:660F38V45rM",
|
1675
|
+
vpsrlvq_3 = "rrmoy:660F38VX45rM",
|
1415
1676
|
}
|
1416
1677
|
|
1417
1678
|
------------------------------------------------------------------------------
|
@@ -1462,28 +1723,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
|
|
1462
1723
|
map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
|
1463
1724
|
end
|
1464
1725
|
|
1465
|
-
-- SSE FP arithmetic ops.
|
1726
|
+
-- SSE / AVX FP arithmetic ops.
|
1466
1727
|
for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
|
1467
1728
|
sub = 12, min = 13, div = 14, max = 15 } do
|
1468
1729
|
map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
|
1469
1730
|
map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
|
1470
1731
|
map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
|
1471
1732
|
map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
|
1733
|
+
if n ~= 1 then
|
1734
|
+
map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
|
1735
|
+
map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
|
1736
|
+
map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
|
1737
|
+
map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
|
1738
|
+
end
|
1739
|
+
end
|
1740
|
+
|
1741
|
+
-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
|
1742
|
+
for name,n in pairs{
|
1743
|
+
paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
|
1744
|
+
paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
|
1745
|
+
packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
|
1746
|
+
paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
|
1747
|
+
pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
|
1748
|
+
pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
|
1749
|
+
pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
|
1750
|
+
pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
|
1751
|
+
pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
|
1752
|
+
pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
|
1753
|
+
psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
|
1754
|
+
psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
|
1755
|
+
punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
|
1756
|
+
punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
|
1757
|
+
punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
|
1758
|
+
} do
|
1759
|
+
map_op[name.."_2"] = format("rmo:660F%02XrM", n)
|
1760
|
+
map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
|
1472
1761
|
end
|
1473
1762
|
|
1474
1763
|
------------------------------------------------------------------------------
|
1475
1764
|
|
1765
|
+
local map_vexarg = { u = false, v = 1, V = 2 }
|
1766
|
+
|
1476
1767
|
-- Process pattern string.
|
1477
1768
|
local function dopattern(pat, args, sz, op, needrex)
|
1478
|
-
local digit, addin
|
1769
|
+
local digit, addin, vex
|
1479
1770
|
local opcode = 0
|
1480
1771
|
local szov = sz
|
1481
1772
|
local narg = 1
|
1482
1773
|
local rex = 0
|
1483
1774
|
|
1484
1775
|
-- Limit number of section buffer positions used by a single dasm_put().
|
1485
|
-
-- A single opcode needs a maximum of
|
1486
|
-
if secpos+
|
1776
|
+
-- A single opcode needs a maximum of 6 positions.
|
1777
|
+
if secpos+6 > maxsecpos then wflush() end
|
1487
1778
|
|
1488
1779
|
-- Process each character.
|
1489
1780
|
for c in gmatch(pat.."|", ".") do
|
@@ -1497,6 +1788,8 @@ local function dopattern(pat, args, sz, op, needrex)
|
|
1497
1788
|
szov = nil
|
1498
1789
|
elseif c == "X" then -- Force REX.W.
|
1499
1790
|
rex = 8
|
1791
|
+
elseif c == "L" then -- Force VEX.L.
|
1792
|
+
vex.l = true
|
1500
1793
|
elseif c == "r" then -- Merge 1st operand regno. into opcode.
|
1501
1794
|
addin = args[1]; opcode = opcode + (addin.reg % 8)
|
1502
1795
|
if narg < 2 then narg = 2 end
|
@@ -1520,21 +1813,42 @@ local function dopattern(pat, args, sz, op, needrex)
|
|
1520
1813
|
if t.xreg and t.xreg > 7 then rex = rex + 2 end
|
1521
1814
|
if s > 7 then rex = rex + 4 end
|
1522
1815
|
if needrex then rex = rex + 16 end
|
1523
|
-
wputop(szov, opcode, rex
|
1816
|
+
local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
|
1817
|
+
opcode = nil
|
1524
1818
|
local imark = sub(pat, -1) -- Force a mark (ugly).
|
1525
1819
|
-- Put ModRM/SIB with regno/last digit as spare.
|
1526
|
-
wputmrmsib(t, imark, s, addin and addin.vreg)
|
1820
|
+
wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
|
1527
1821
|
addin = nil
|
1822
|
+
elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
|
1823
|
+
local b = band(opcode, 255); opcode = shr(opcode, 8)
|
1824
|
+
local m = 1
|
1825
|
+
if b == 0x38 then m = 2
|
1826
|
+
elseif b == 0x3a then m = 3 end
|
1827
|
+
if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
|
1828
|
+
if b ~= 0x0f then
|
1829
|
+
werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
|
1830
|
+
"' in pattern `"..pat.."' for `"..op.."'")
|
1831
|
+
end
|
1832
|
+
local v = map_vexarg[c]
|
1833
|
+
if v then v = remove(args, v) end
|
1834
|
+
b = band(opcode, 255)
|
1835
|
+
local p = 0
|
1836
|
+
if b == 0x66 then p = 1
|
1837
|
+
elseif b == 0xf3 then p = 2
|
1838
|
+
elseif b == 0xf2 then p = 3 end
|
1839
|
+
if p ~= 0 then opcode = shr(opcode, 8) end
|
1840
|
+
if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
|
1841
|
+
vex = { m = m, p = p, v = v }
|
1528
1842
|
else
|
1529
1843
|
if opcode then -- Flush opcode.
|
1530
1844
|
if szov == "q" and rex == 0 then rex = rex + 8 end
|
1531
1845
|
if needrex then rex = rex + 16 end
|
1532
1846
|
if addin and addin.reg == -1 then
|
1533
|
-
wputop(szov, opcode - 7, rex)
|
1534
|
-
|
1847
|
+
local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
|
1848
|
+
wvreg("opcode", addin.vreg, psz, sk)
|
1535
1849
|
else
|
1536
1850
|
if addin and addin.reg > 7 then rex = rex + 1 end
|
1537
|
-
wputop(szov, opcode, rex)
|
1851
|
+
wputop(szov, opcode, rex, vex)
|
1538
1852
|
end
|
1539
1853
|
opcode = nil
|
1540
1854
|
end
|
@@ -1571,6 +1885,14 @@ local function dopattern(pat, args, sz, op, needrex)
|
|
1571
1885
|
else
|
1572
1886
|
wputlabel("REL_", imm, 2)
|
1573
1887
|
end
|
1888
|
+
elseif c == "s" then
|
1889
|
+
local reg = a.reg
|
1890
|
+
if reg < 0 then
|
1891
|
+
wputb(0)
|
1892
|
+
wvreg("imm.hi", a.vreg)
|
1893
|
+
else
|
1894
|
+
wputb(shl(reg, 4))
|
1895
|
+
end
|
1574
1896
|
else
|
1575
1897
|
werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
|
1576
1898
|
end
|
@@ -1647,11 +1969,14 @@ map_op[".template__"] = function(params, template, nparams)
|
|
1647
1969
|
if pat == "" then pat = lastpat else lastpat = pat end
|
1648
1970
|
if matchtm(tm, args) then
|
1649
1971
|
local prefix = sub(szm, 1, 1)
|
1650
|
-
if prefix == "/" then --
|
1651
|
-
|
1652
|
-
|
1653
|
-
|
1654
|
-
|
1972
|
+
if prefix == "/" then -- Exactly match leading operand sizes.
|
1973
|
+
for i = #szm,1,-1 do
|
1974
|
+
if i == 1 then
|
1975
|
+
dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
|
1976
|
+
return
|
1977
|
+
elseif args[i-1].opsize ~= sub(szm, i, i) then
|
1978
|
+
break
|
1979
|
+
end
|
1655
1980
|
end
|
1656
1981
|
else -- Match common operand size.
|
1657
1982
|
local szp = sz
|
@@ -1716,8 +2041,8 @@ if x64 then
|
|
1716
2041
|
rex = a.reg > 7 and 9 or 8
|
1717
2042
|
end
|
1718
2043
|
end
|
1719
|
-
wputop(sz, opcode, rex)
|
1720
|
-
|
2044
|
+
local psz, sk = wputop(sz, opcode, rex, nil, vreg)
|
2045
|
+
wvreg("opcode", vreg, psz, sk)
|
1721
2046
|
waction("IMM_D", format("(unsigned int)(%s)", op64))
|
1722
2047
|
waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
|
1723
2048
|
end
|