immunio 1.1.2 → 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. checksums.yaml +4 -4
  2. data/lib/immunio/version.rb +1 -1
  3. data/lua-hooks/Makefile +56 -109
  4. data/lua-hooks/ext/all.c +3 -14
  5. data/lua-hooks/ext/libinjection/module.mk +5 -0
  6. data/lua-hooks/ext/lpeg/module.mk +6 -0
  7. data/lua-hooks/ext/lua-cmsgpack/module.mk +2 -0
  8. data/lua-hooks/ext/lua-snapshot/module.mk +2 -0
  9. data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
  10. data/lua-hooks/ext/luajit/Makefile +2 -2
  11. data/lua-hooks/ext/luajit/README +2 -2
  12. data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
  13. data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
  14. data/lua-hooks/ext/luajit/doc/changes.html +15 -2
  15. data/lua-hooks/ext/luajit/doc/contact.html +3 -3
  16. data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
  17. data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
  18. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
  19. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +4 -2
  20. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
  21. data/lua-hooks/ext/luajit/doc/ext_jit.html +2 -2
  22. data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
  23. data/lua-hooks/ext/luajit/doc/extensions.html +9 -2
  24. data/lua-hooks/ext/luajit/doc/faq.html +2 -2
  25. data/lua-hooks/ext/luajit/doc/install.html +22 -18
  26. data/lua-hooks/ext/luajit/doc/luajit.html +3 -3
  27. data/lua-hooks/ext/luajit/doc/running.html +2 -2
  28. data/lua-hooks/ext/luajit/doc/status.html +2 -2
  29. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
  30. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +4 -4
  31. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
  32. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +4 -4
  33. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +1 -1
  34. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +4 -4
  35. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
  36. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +4 -4
  37. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +3 -3
  38. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
  39. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +34 -7
  40. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +427 -102
  41. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +5 -5
  42. data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
  43. data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
  44. data/lua-hooks/ext/luajit/src/Makefile +36 -21
  45. data/lua-hooks/ext/luajit/src/Makefile.dep +3 -1
  46. data/lua-hooks/ext/luajit/src/host/buildvm.c +1 -1
  47. data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
  48. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +10 -1
  49. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +1 -1
  50. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
  51. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +1 -1
  52. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
  53. data/lua-hooks/ext/luajit/src/host/genminilua.lua +1 -1
  54. data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
  55. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +2 -2
  56. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +1 -1
  57. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +1 -1
  58. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
  59. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +1 -1
  60. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
  61. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +163 -73
  62. data/lua-hooks/ext/luajit/src/jit/dump.lua +2 -1
  63. data/lua-hooks/ext/luajit/src/jit/p.lua +1 -1
  64. data/lua-hooks/ext/luajit/src/jit/v.lua +1 -1
  65. data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
  66. data/lua-hooks/ext/luajit/src/lib_aux.c +1 -1
  67. data/lua-hooks/ext/luajit/src/lib_base.c +4 -5
  68. data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
  69. data/lua-hooks/ext/luajit/src/lib_debug.c +1 -1
  70. data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -5
  71. data/lua-hooks/ext/luajit/src/lib_init.c +1 -1
  72. data/lua-hooks/ext/luajit/src/lib_io.c +2 -3
  73. data/lua-hooks/ext/luajit/src/lib_jit.c +1 -1
  74. data/lua-hooks/ext/luajit/src/lib_math.c +1 -1
  75. data/lua-hooks/ext/luajit/src/lib_os.c +2 -2
  76. data/lua-hooks/ext/luajit/src/lib_package.c +1 -1
  77. data/lua-hooks/ext/luajit/src/lib_string.c +1 -1
  78. data/lua-hooks/ext/luajit/src/lib_table.c +1 -1
  79. data/lua-hooks/ext/luajit/src/lj.supp +15 -0
  80. data/lua-hooks/ext/luajit/src/lj_alloc.c +1 -1
  81. data/lua-hooks/ext/luajit/src/lj_api.c +4 -1
  82. data/lua-hooks/ext/luajit/src/lj_arch.h +33 -7
  83. data/lua-hooks/ext/luajit/src/lj_asm.c +12 -5
  84. data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
  85. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +3 -13
  86. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +337 -71
  87. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2 -2
  88. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2 -2
  89. data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
  90. data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
  91. data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
  92. data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
  93. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
  94. data/lua-hooks/ext/luajit/src/lj_buf.c +2 -4
  95. data/lua-hooks/ext/luajit/src/lj_buf.h +1 -3
  96. data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
  97. data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
  98. data/lua-hooks/ext/luajit/src/lj_ccall.c +37 -14
  99. data/lua-hooks/ext/luajit/src/lj_ccall.h +3 -3
  100. data/lua-hooks/ext/luajit/src/lj_ccallback.c +16 -7
  101. data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
  102. data/lua-hooks/ext/luajit/src/lj_cconv.c +1 -1
  103. data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
  104. data/lua-hooks/ext/luajit/src/lj_cdata.c +10 -1
  105. data/lua-hooks/ext/luajit/src/lj_cdata.h +3 -1
  106. data/lua-hooks/ext/luajit/src/lj_clib.c +1 -1
  107. data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
  108. data/lua-hooks/ext/luajit/src/lj_cparse.c +27 -6
  109. data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
  110. data/lua-hooks/ext/luajit/src/lj_crecord.c +1 -1
  111. data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
  112. data/lua-hooks/ext/luajit/src/lj_ctype.c +10 -8
  113. data/lua-hooks/ext/luajit/src/lj_ctype.h +1 -1
  114. data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
  115. data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
  116. data/lua-hooks/ext/luajit/src/lj_def.h +1 -1
  117. data/lua-hooks/ext/luajit/src/lj_dispatch.c +1 -1
  118. data/lua-hooks/ext/luajit/src/lj_dispatch.h +21 -4
  119. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +1 -1
  120. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +7 -5
  121. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +1 -1
  122. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +1 -1
  123. data/lua-hooks/ext/luajit/src/lj_err.c +69 -31
  124. data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
  125. data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
  126. data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
  127. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +10 -40
  128. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
  129. data/lua-hooks/ext/luajit/src/lj_frame.h +12 -1
  130. data/lua-hooks/ext/luajit/src/lj_func.c +1 -1
  131. data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
  132. data/lua-hooks/ext/luajit/src/lj_gc.c +2 -2
  133. data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
  134. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +1 -1
  135. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
  136. data/lua-hooks/ext/luajit/src/lj_ir.c +31 -15
  137. data/lua-hooks/ext/luajit/src/lj_ir.h +1 -1
  138. data/lua-hooks/ext/luajit/src/lj_ircall.h +29 -1
  139. data/lua-hooks/ext/luajit/src/lj_iropt.h +2 -1
  140. data/lua-hooks/ext/luajit/src/lj_jit.h +2 -1
  141. data/lua-hooks/ext/luajit/src/lj_lex.c +28 -1
  142. data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
  143. data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
  144. data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
  145. data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
  146. data/lua-hooks/ext/luajit/src/lj_mcode.c +1 -1
  147. data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
  148. data/lua-hooks/ext/luajit/src/lj_meta.c +8 -8
  149. data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
  150. data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
  151. data/lua-hooks/ext/luajit/src/lj_obj.h +1 -1
  152. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
  153. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +1 -1
  154. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
  155. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +1 -1
  156. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +1 -1
  157. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +1 -1
  158. data/lua-hooks/ext/luajit/src/lj_opt_split.c +10 -5
  159. data/lua-hooks/ext/luajit/src/lj_parse.c +1 -1
  160. data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
  161. data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
  162. data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
  163. data/lua-hooks/ext/luajit/src/lj_record.c +13 -5
  164. data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
  165. data/lua-hooks/ext/luajit/src/lj_snap.c +20 -23
  166. data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
  167. data/lua-hooks/ext/luajit/src/lj_state.c +1 -1
  168. data/lua-hooks/ext/luajit/src/lj_state.h +1 -1
  169. data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
  170. data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
  171. data/lua-hooks/ext/luajit/src/lj_strfmt.c +12 -98
  172. data/lua-hooks/ext/luajit/src/lj_strfmt.h +4 -4
  173. data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +591 -0
  174. data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
  175. data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
  176. data/lua-hooks/ext/luajit/src/lj_tab.c +1 -1
  177. data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
  178. data/lua-hooks/ext/luajit/src/lj_target.h +1 -1
  179. data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
  180. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +1 -1
  181. data/lua-hooks/ext/luajit/src/lj_target_mips.h +30 -2
  182. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
  183. data/lua-hooks/ext/luajit/src/lj_target_x86.h +1 -1
  184. data/lua-hooks/ext/luajit/src/lj_trace.c +7 -2
  185. data/lua-hooks/ext/luajit/src/lj_trace.h +1 -1
  186. data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -3
  187. data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
  188. data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
  189. data/lua-hooks/ext/luajit/src/lj_vm.h +5 -3
  190. data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
  191. data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
  192. data/lua-hooks/ext/luajit/src/lj_vmmath.c +15 -15
  193. data/lua-hooks/ext/luajit/src/ljamalg.c +2 -1
  194. data/lua-hooks/ext/luajit/src/lua.h +1 -0
  195. data/lua-hooks/ext/luajit/src/luaconf.h +2 -2
  196. data/lua-hooks/ext/luajit/src/luajit.c +1 -1
  197. data/lua-hooks/ext/luajit/src/luajit.h +4 -4
  198. data/lua-hooks/ext/luajit/src/lualib.h +1 -1
  199. data/lua-hooks/ext/luajit/src/msvcbuild.bat +1 -1
  200. data/lua-hooks/ext/luajit/src/ps4build.bat +26 -6
  201. data/lua-hooks/ext/luajit/src/vm_arm.dasc +17 -9
  202. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +1 -1
  203. data/lua-hooks/ext/luajit/src/vm_mips.dasc +1562 -656
  204. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +3 -7
  205. data/lua-hooks/ext/luajit/src/vm_x64.dasc +10 -2
  206. data/lua-hooks/ext/luajit/src/vm_x86.dasc +5 -8
  207. data/lua-hooks/ext/luautf8/module.mk +2 -0
  208. data/lua-hooks/ext/module.mk +15 -0
  209. data/lua-hooks/ext/modules.h +17 -0
  210. data/lua-hooks/ext/perf/luacpu.c +1 -1
  211. data/lua-hooks/ext/perf/lualoadavg.c +1 -1
  212. data/lua-hooks/ext/perf/luameminfo.c +1 -1
  213. data/lua-hooks/ext/perf/luaoslib.c +124 -2
  214. data/lua-hooks/ext/perf/module.mk +5 -0
  215. data/lua-hooks/ext/sha1/luasha1.c +4 -2
  216. data/lua-hooks/ext/sha1/module.mk +5 -0
  217. data/lua-hooks/ext/sha2/luasha256.c +4 -2
  218. data/lua-hooks/ext/sha2/module.mk +5 -0
  219. data/lua-hooks/ext/sysutils/lua_utils.c +56 -0
  220. data/lua-hooks/ext/sysutils/module.mk +2 -0
  221. data/lua-hooks/lib/boot.lua +2 -1
  222. data/lua-hooks/lib/hooks/module.mk +31 -0
  223. data/lua-hooks/lib/hooks/xss/module.mk +4 -0
  224. data/lua-hooks/lib/lexers/module.mk +10 -0
  225. data/lua-hooks/lib/module.mk +38 -0
  226. data/lua-hooks/lib/schema/module.mk +3 -0
  227. data/lua-hooks/options.mk +59 -0
  228. metadata +21 -2
@@ -1,7 +1,7 @@
1
1
  ------------------------------------------------------------------------------
2
2
  -- DynASM x64 module.
3
3
  --
4
- -- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
4
+ -- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
5
5
  -- See dynasm.lua for full copyright notice.
6
6
  ------------------------------------------------------------------------------
7
7
  -- This module just sets 64 bit mode for the combined x86/x64 module.
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** DynASM x86 encoding engine.
3
- ** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
3
+ ** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
4
4
  ** Released under the MIT license. See dynasm.lua for full copyright notice.
5
5
  */
6
6
 
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
170
170
  dasm_State *D = Dst_REF;
171
171
  dasm_ActList p = D->actionlist + start;
172
172
  dasm_Section *sec = D->section;
173
- int pos = sec->pos, ofs = sec->ofs, mrm = 4;
173
+ int pos = sec->pos, ofs = sec->ofs, mrm = -1;
174
174
  int *b;
175
175
 
176
176
  if (pos >= sec->epos) {
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
193
193
  b[pos++] = n;
194
194
  switch (action) {
195
195
  case DASM_DISP:
196
- if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
196
+ if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
197
197
  case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
198
198
  case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
199
199
  case DASM_IMM_D: ofs += 4; break;
@@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...)
203
203
  case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
204
204
  case DASM_SPACE: p++; ofs += n; break;
205
205
  case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
206
- case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
207
- if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
206
+ case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
207
+ if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
208
+ if (*p < 0x20 && (n&7) == 4) ofs++;
209
+ switch ((*p++ >> 3) & 3) {
210
+ case 3: n |= b[pos-3];
211
+ case 2: n |= b[pos-2];
212
+ case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
213
+ }
214
+ continue;
208
215
  }
209
- mrm = 4;
216
+ mrm = -1;
210
217
  } else {
211
218
  int *pl, n;
212
219
  switch (action) {
@@ -391,7 +398,27 @@ int dasm_encode(Dst_DECL, void *buffer)
391
398
  case DASM_IMM_D: wd: dasmd(n); break;
392
399
  case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
393
400
  case DASM_IMM_W: dasmw(n); break;
394
- case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
401
+ case DASM_VREG: {
402
+ int t = *p++;
403
+ unsigned char *ex = cp - (t&7);
404
+ if ((n & 8) && t < 0xa0) {
405
+ if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
406
+ n &= 7;
407
+ } else if (n & 0x10) {
408
+ if (*ex & 0x80) {
409
+ *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
410
+ }
411
+ while (++ex < cp) ex[-1] = *ex;
412
+ if (mark) mark--;
413
+ cp--;
414
+ n &= 7;
415
+ }
416
+ if (t >= 0xc0) n <<= 4;
417
+ else if (t >= 0x40) n <<= 3;
418
+ else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
419
+ cp[-1] ^= n;
420
+ break;
421
+ }
395
422
  case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
396
423
  b++; n = (int)(ptrdiff_t)D->globals[-n];
397
424
  case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
@@ -1,7 +1,7 @@
1
1
  ------------------------------------------------------------------------------
2
2
  -- DynASM x86/x64 module.
3
3
  --
4
- -- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
4
+ -- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
5
5
  -- See dynasm.lua for full copyright notice.
6
6
  ------------------------------------------------------------------------------
7
7
 
@@ -11,9 +11,9 @@ local x64 = x64
11
11
  local _info = {
12
12
  arch = x64 and "x64" or "x86",
13
13
  description = "DynASM x86/x64 module",
14
- version = "1.3.0",
15
- vernum = 10300,
16
- release = "2011-05-05",
14
+ version = "1.4.0",
15
+ vernum = 10400,
16
+ release = "2015-10-18",
17
17
  author = "Mike Pall",
18
18
  license = "MIT",
19
19
  }
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
27
27
  local _s = string
28
28
  local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
29
29
  local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
30
- local concat, sort = table.concat, table.sort
30
+ local concat, sort, remove = table.concat, table.sort, table.remove
31
31
  local bit = bit or require("bit")
32
- local band, shl, shr = bit.band, bit.lshift, bit.rshift
32
+ local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
33
33
 
34
34
  -- Inherited tables and callbacks.
35
35
  local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
41
41
  -- int arg, 1 buffer pos:
42
42
  "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43
43
  -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44
- "VREG", "SPACE", -- !x64: VREG support NYI.
44
+ "VREG", "SPACE",
45
45
  -- ptrdiff_t arg, 1 buffer pos (address): !x64
46
46
  "SETLABEL", "REL_A",
47
47
  -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
83
83
  -- Current number of section buffer positions for dasm_put().
84
84
  local secpos = 1
85
85
 
86
+ -- VREG kind encodings, pre-shifted by 5 bits.
87
+ local map_vreg = {
88
+ ["modrm.rm.m"] = 0x00,
89
+ ["modrm.rm.r"] = 0x20,
90
+ ["opcode"] = 0x20,
91
+ ["sib.base"] = 0x20,
92
+ ["sib.index"] = 0x40,
93
+ ["modrm.reg"] = 0x80,
94
+ ["vex.v"] = 0xa0,
95
+ ["imm.hi"] = 0xc0,
96
+ }
97
+
98
+ -- Current number of VREG actions contributing to REX/VEX shrinkage.
99
+ local vreg_shrink_count = 0
100
+
86
101
  ------------------------------------------------------------------------------
87
102
 
88
103
  -- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
134
149
  if a or num then secpos = secpos + (num or 1) end
135
150
  end
136
151
 
152
+ -- Optionally add a VREG action.
153
+ local function wvreg(kind, vreg, psz, sk, defer)
154
+ if not vreg then return end
155
+ waction("VREG", vreg)
156
+ local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
157
+ if b < (sk or 0) then
158
+ vreg_shrink_count = vreg_shrink_count + 1
159
+ end
160
+ if not defer then
161
+ b = b + vreg_shrink_count * 8
162
+ vreg_shrink_count = 0
163
+ end
164
+ wputxb(b + (psz or 0))
165
+ end
166
+
137
167
  -- Add call to embedded DynASM C code.
138
168
  local function wcall(func, args)
139
169
  wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
299
329
  local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
300
330
  if needrex then map_reg_needrex[iname] = true end
301
331
  local name
302
- if sz == "o" then name = format("xmm%d", i)
332
+ if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
303
333
  elseif sz == "f" then name = format("st%d", i)
304
334
  else name = format("r%d%s", i, sz == addrsize and "" or sz) end
305
335
  map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
326
356
  mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
327
357
  map_reg_valid_index[map_archdef.esp] = false
328
358
  if x64 then map_reg_valid_index[map_archdef.rsp] = false end
359
+ if x64 then map_reg_needrex[map_archdef.Rb] = true end
329
360
  map_archdef["Ra"] = "@"..addrsize
330
361
 
331
362
  -- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
334
365
  -- SSE registers (oword sized, but qword and dword accessible).
335
366
  mkrmap("o", "xmm")
336
367
 
368
+ -- AVX registers (yword sized, but oword, qword and dword accessible).
369
+ mkrmap("y", "ymm")
370
+
337
371
  -- Operand size prefixes to codes.
338
372
  local map_opsize = {
339
- byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
340
- aword = addrsize,
373
+ byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
374
+ tword = "t", aword = addrsize,
341
375
  }
342
376
 
343
377
  -- Operand size code to number.
344
378
  local map_opsizenum = {
345
- b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
379
+ b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
346
380
  }
347
381
 
348
382
  -- Operand size code to name.
349
383
  local map_opsizename = {
350
- b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
351
- f = "fpword",
384
+ b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
385
+ t = "tword", f = "fpword",
352
386
  }
353
387
 
354
388
  -- Valid index register scale factors.
@@ -460,9 +494,45 @@ local function wputszarg(sz, n)
460
494
  end
461
495
 
462
496
  -- Put multi-byte opcode with operand-size dependent modifications.
463
- local function wputop(sz, op, rex)
497
+ local function wputop(sz, op, rex, vex, vregr, vregxb)
498
+ local psz, sk = 0, nil
499
+ if vex then
500
+ local tail
501
+ if vex.m == 1 and band(rex, 11) == 0 then
502
+ if x64 and vregxb then
503
+ sk = map_vreg["modrm.reg"]
504
+ else
505
+ wputb(0xc5)
506
+ tail = shl(bxor(band(rex, 4), 4), 5)
507
+ psz = 3
508
+ end
509
+ end
510
+ if not tail then
511
+ wputb(0xc4)
512
+ wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
513
+ tail = shl(band(rex, 8), 4)
514
+ psz = 4
515
+ end
516
+ local reg, vreg = 0, nil
517
+ if vex.v then
518
+ reg = vex.v.reg
519
+ if not reg then werror("bad vex operand") end
520
+ if reg < 0 then reg = 0; vreg = vex.v.vreg end
521
+ end
522
+ if sz == "y" or vex.l then tail = tail + 4 end
523
+ wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
524
+ wvreg("vex.v", vreg)
525
+ rex = 0
526
+ if op >= 256 then werror("bad vex opcode") end
527
+ else
528
+ if rex ~= 0 then
529
+ if not x64 then werror("bad operand size") end
530
+ elseif (vregr or vregxb) and x64 then
531
+ rex = 0x10
532
+ sk = map_vreg["vex.v"]
533
+ end
534
+ end
464
535
  local r
465
- if rex ~= 0 and not x64 then werror("bad operand size") end
466
536
  if sz == "w" then wputb(102) end
467
537
  -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
468
538
  if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +541,20 @@ local function wputop(sz, op, rex)
471
541
  if rex ~= 0 then
472
542
  local opc3 = band(op, 0xffff00)
473
543
  if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
474
- wputb(64 + band(rex, 15)); rex = 0
544
+ wputb(64 + band(rex, 15)); rex = 0; psz = 2
475
545
  end
476
546
  end
477
- wputb(shr(op, 16)); op = band(op, 0xffff)
547
+ wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
478
548
  end
479
549
  if op >= 256 then
480
550
  local b = shr(op, 8)
481
- if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
482
- wputb(b)
483
- op = band(op, 255)
551
+ if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
552
+ wputb(b); op = band(op, 255); psz = psz + 1
484
553
  end
485
- if rex ~= 0 then wputb(64 + band(rex, 15)) end
554
+ if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
486
555
  if sz == "b" then op = op - 1 end
487
556
  wputb(op)
557
+ return psz, sk
488
558
  end
489
559
 
490
560
  -- Put ModRM or SIB formatted byte.
@@ -494,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
494
564
  end
495
565
 
496
566
  -- Put ModRM/SIB plus optional displacement.
497
- local function wputmrmsib(t, imark, s, vsreg)
567
+ local function wputmrmsib(t, imark, s, vsreg, psz, sk)
498
568
  local vreg, vxreg
499
569
  local reg, xreg = t.reg, t.xreg
500
570
  if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
504
574
  -- Register mode.
505
575
  if sub(t.mode, 1, 1) == "r" then
506
576
  wputmodrm(3, s, reg)
507
- if vsreg then waction("VREG", vsreg); wputxb(2) end
508
- if vreg then waction("VREG", vreg); wputxb(0) end
577
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
578
+ wvreg("modrm.rm.r", vreg, psz+1, sk)
509
579
  return
510
580
  end
511
581
 
@@ -519,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
519
589
  -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
520
590
  wputmodrm(0, s, 4)
521
591
  if imark == "I" then waction("MARK") end
522
- if vsreg then waction("VREG", vsreg); wputxb(2) end
592
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
523
593
  wputmodrm(t.xsc, xreg, 5)
524
- if vxreg then waction("VREG", vxreg); wputxb(3) end
594
+ wvreg("sib.index", vxreg, psz+2, sk)
525
595
  else
526
596
  -- Pure 32 bit displacement.
527
597
  if x64 and tdisp ~= "table" then
528
598
  wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
599
+ wvreg("modrm.reg", vsreg, psz+1, sk)
529
600
  if imark == "I" then waction("MARK") end
530
601
  wputmodrm(0, 4, 5)
531
602
  else
532
603
  riprel = x64
533
604
  wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
605
+ wvreg("modrm.reg", vsreg, psz+1, sk)
534
606
  if imark == "I" then waction("MARK") end
535
607
  end
536
- if vsreg then waction("VREG", vsreg); wputxb(2) end
537
608
  end
538
609
  if riprel then -- Emit rip-relative displacement.
539
610
  if match("UWSiI", imark) then
@@ -561,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
561
632
  if xreg or band(reg, 7) == 4 then
562
633
  wputmodrm(m or 2, s, 4) -- ModRM.
563
634
  if m == nil or imark == "I" then waction("MARK") end
564
- if vsreg then waction("VREG", vsreg); wputxb(2) end
635
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
565
636
  wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
566
- if vxreg then waction("VREG", vxreg); wputxb(3) end
567
- if vreg then waction("VREG", vreg); wputxb(1) end
637
+ wvreg("sib.index", vxreg, psz+2, sk, vreg)
638
+ wvreg("sib.base", vreg, psz+2, sk)
568
639
  else
569
640
  wputmodrm(m or 2, s, reg) -- ModRM.
570
641
  if (imark == "I" and (m == 1 or m == 2)) or
571
642
  (m == nil and (vsreg or vreg)) then waction("MARK") end
572
- if vsreg then waction("VREG", vsreg); wputxb(2) end
573
- if vreg then waction("VREG", vreg); wputxb(1) end
643
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
644
+ wvreg("modrm.rm.m", vreg, psz+1, sk)
574
645
  end
575
646
 
576
647
  -- Put displacement.
@@ -881,9 +952,15 @@ end
881
952
  -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
882
953
  -- The spare 3 bits are either filled with the last hex digit or
883
954
  -- the result from a previous "r"/"R". The opcode is restored.
955
+ -- "u" Use VEX encoding, vvvv unused.
956
+ -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
957
+ -- removed from the list used by future characters).
958
+ -- "L" Force VEX.L
884
959
  --
885
960
  -- All of the following characters force a flush of the opcode:
886
961
  -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
962
+ -- "s" stores a 4 bit immediate from the last register operand,
963
+ -- followed by 4 zero bits.
887
964
  -- "S" stores a signed 8 bit immediate from the last operand.
888
965
  -- "U" stores an unsigned 8 bit immediate from the last operand.
889
966
  -- "W" stores an unsigned 16 bit immediate from the last operand.
@@ -1081,10 +1158,11 @@ local map_op = {
1081
1158
  btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
1082
1159
  bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
1083
1160
 
1084
- shld_3 = "mriqdw:0FA4RmU|mrCqdw:0FA5Rm",
1085
- shrd_3 = "mriqdw:0FACRmU|mrCqdw:0FADRm",
1161
+ shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
1162
+ shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
1086
1163
 
1087
1164
  rdtsc_0 = "0F31", -- P1+
1165
+ rdpmc_0 = "0F33", -- P6+
1088
1166
  cpuid_0 = "0FA2", -- P1+
1089
1167
 
1090
1168
  -- floating point ops
@@ -1190,7 +1268,7 @@ local map_op = {
1190
1268
  cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
1191
1269
  cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
1192
1270
  cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
1193
- cvtss2si_2 = "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
1271
+ cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
1194
1272
  cvttpd2dq_2 = "rmo:660FE6rM",
1195
1273
  cvttps2dq_2 = "rmo:F30F5BrM",
1196
1274
  cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
@@ -1225,46 +1303,14 @@ local map_op = {
1225
1303
  movups_2 = "rmo:0F10rM|mro:0F11Rm",
1226
1304
  orpd_2 = "rmo:660F56rM",
1227
1305
  orps_2 = "rmo:0F56rM",
1228
- packssdw_2 = "rmo:660F6BrM",
1229
- packsswb_2 = "rmo:660F63rM",
1230
- packuswb_2 = "rmo:660F67rM",
1231
- paddb_2 = "rmo:660FFCrM",
1232
- paddd_2 = "rmo:660FFErM",
1233
- paddq_2 = "rmo:660FD4rM",
1234
- paddsb_2 = "rmo:660FECrM",
1235
- paddsw_2 = "rmo:660FEDrM",
1236
- paddusb_2 = "rmo:660FDCrM",
1237
- paddusw_2 = "rmo:660FDDrM",
1238
- paddw_2 = "rmo:660FFDrM",
1239
- pand_2 = "rmo:660FDBrM",
1240
- pandn_2 = "rmo:660FDFrM",
1241
1306
  pause_0 = "F390",
1242
- pavgb_2 = "rmo:660FE0rM",
1243
- pavgw_2 = "rmo:660FE3rM",
1244
- pcmpeqb_2 = "rmo:660F74rM",
1245
- pcmpeqd_2 = "rmo:660F76rM",
1246
- pcmpeqw_2 = "rmo:660F75rM",
1247
- pcmpgtb_2 = "rmo:660F64rM",
1248
- pcmpgtd_2 = "rmo:660F66rM",
1249
- pcmpgtw_2 = "rmo:660F65rM",
1250
- pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
1307
+ pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1251
1308
  pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
1252
- pmaddwd_2 = "rmo:660FF5rM",
1253
- pmaxsw_2 = "rmo:660FEErM",
1254
- pmaxub_2 = "rmo:660FDErM",
1255
- pminsw_2 = "rmo:660FEArM",
1256
- pminub_2 = "rmo:660FDArM",
1257
1309
  pmovmskb_2 = "rr/do:660FD7rM",
1258
- pmulhuw_2 = "rmo:660FE4rM",
1259
- pmulhw_2 = "rmo:660FE5rM",
1260
- pmullw_2 = "rmo:660FD5rM",
1261
- pmuludq_2 = "rmo:660FF4rM",
1262
- por_2 = "rmo:660FEBrM",
1263
1310
  prefetchnta_1 = "xb:n0F180m",
1264
1311
  prefetcht0_1 = "xb:n0F181m",
1265
1312
  prefetcht1_1 = "xb:n0F182m",
1266
1313
  prefetcht2_1 = "xb:n0F183m",
1267
- psadbw_2 = "rmo:660FF6rM",
1268
1314
  pshufd_3 = "rmio:660F70rMU",
1269
1315
  pshufhw_3 = "rmio:F30F70rMU",
1270
1316
  pshuflw_3 = "rmio:F20F70rMU",
@@ -1278,23 +1324,6 @@ local map_op = {
1278
1324
  psrldq_2 = "rio:660F733mU",
1279
1325
  psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
1280
1326
  psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
1281
- psubb_2 = "rmo:660FF8rM",
1282
- psubd_2 = "rmo:660FFArM",
1283
- psubq_2 = "rmo:660FFBrM",
1284
- psubsb_2 = "rmo:660FE8rM",
1285
- psubsw_2 = "rmo:660FE9rM",
1286
- psubusb_2 = "rmo:660FD8rM",
1287
- psubusw_2 = "rmo:660FD9rM",
1288
- psubw_2 = "rmo:660FF9rM",
1289
- punpckhbw_2 = "rmo:660F68rM",
1290
- punpckhdq_2 = "rmo:660F6ArM",
1291
- punpckhqdq_2 = "rmo:660F6DrM",
1292
- punpckhwd_2 = "rmo:660F69rM",
1293
- punpcklbw_2 = "rmo:660F60rM",
1294
- punpckldq_2 = "rmo:660F62rM",
1295
- punpcklqdq_2 = "rmo:660F6CrM",
1296
- punpcklwd_2 = "rmo:660F61rM",
1297
- pxor_2 = "rmo:660FEFrM",
1298
1327
  rcpps_2 = "rmo:0F53rM",
1299
1328
  rcpss_2 = "rro:F30F53rM|rx/od:",
1300
1329
  rsqrtps_2 = "rmo:0F52rM",
@@ -1352,7 +1381,7 @@ local map_op = {
1352
1381
  dpps_3 = "rmio:660F3A40rMU",
1353
1382
  extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
1354
1383
  insertps_3 = "rrio:660F3A41rMU|rxi/od:",
1355
- movntdqa_2 = "rmo:660F382ArM",
1384
+ movntdqa_2 = "rxo:660F382ArM",
1356
1385
  mpsadbw_3 = "rmio:660F3A42rMU",
1357
1386
  packusdw_2 = "rmo:660F382BrM",
1358
1387
  pblendvb_3 = "rmRo:660F3810rM",
@@ -1412,6 +1441,238 @@ local map_op = {
1412
1441
  movntsd_2 = "xr/qo:nF20F2BRm",
1413
1442
  movntss_2 = "xr/do:F30F2BRm",
1414
1443
  -- popcnt is also in SSE4.2
1444
+
1445
+ -- AES-NI
1446
+ aesdec_2 = "rmo:660F38DErM",
1447
+ aesdeclast_2 = "rmo:660F38DFrM",
1448
+ aesenc_2 = "rmo:660F38DCrM",
1449
+ aesenclast_2 = "rmo:660F38DDrM",
1450
+ aesimc_2 = "rmo:660F38DBrM",
1451
+ aeskeygenassist_3 = "rmio:660F3ADFrMU",
1452
+ pclmulqdq_3 = "rmio:660F3A44rMU",
1453
+
1454
+ -- AVX FP ops
1455
+ vaddsubpd_3 = "rrmoy:660FVD0rM",
1456
+ vaddsubps_3 = "rrmoy:F20FVD0rM",
1457
+ vandpd_3 = "rrmoy:660FV54rM",
1458
+ vandps_3 = "rrmoy:0FV54rM",
1459
+ vandnpd_3 = "rrmoy:660FV55rM",
1460
+ vandnps_3 = "rrmoy:0FV55rM",
1461
+ vblendpd_4 = "rrmioy:660F3AV0DrMU",
1462
+ vblendps_4 = "rrmioy:660F3AV0CrMU",
1463
+ vblendvpd_4 = "rrmroy:660F3AV4BrMs",
1464
+ vblendvps_4 = "rrmroy:660F3AV4ArMs",
1465
+ vbroadcastf128_2 = "rx/yo:660F38u1ArM",
1466
+ vcmppd_4 = "rrmioy:660FVC2rMU",
1467
+ vcmpps_4 = "rrmioy:0FVC2rMU",
1468
+ vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
1469
+ vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
1470
+ vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
1471
+ vcomiss_2 = "rro:0Fu2FrM|rx/od:",
1472
+ vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
1473
+ vcvtdq2ps_2 = "rmoy:0Fu5BrM",
1474
+ vcvtpd2dq_2 = "rmoy:F20FuE6rM",
1475
+ vcvtpd2ps_2 = "rmoy:660Fu5ArM",
1476
+ vcvtps2dq_2 = "rmoy:660Fu5BrM",
1477
+ vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
1478
+ vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
1479
+ vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
1480
+ vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
1481
+ vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
1482
+ vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
1483
+ vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
1484
+ vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
1485
+ vcvttps2dq_2 = "rmoy:F30Fu5BrM",
1486
+ vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
1487
+ vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
1488
+ vdppd_4 = "rrmio:660F3AV41rMU",
1489
+ vdpps_4 = "rrmioy:660F3AV40rMU",
1490
+ vextractf128_3 = "mri/oy:660F3AuL19RmU",
1491
+ vextractps_3 = "mri/do:660F3Au17RmU",
1492
+ vhaddpd_3 = "rrmoy:660FV7CrM",
1493
+ vhaddps_3 = "rrmoy:F20FV7CrM",
1494
+ vhsubpd_3 = "rrmoy:660FV7DrM",
1495
+ vhsubps_3 = "rrmoy:F20FV7DrM",
1496
+ vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
1497
+ vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
1498
+ vldmxcsr_1 = "xd:0FuAE2m",
1499
+ vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
1500
+ vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
1501
+ vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
1502
+ vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
1503
+ vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
1504
+ vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
1505
+ vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
1506
+ vmovhlps_3 = "rrro:0FV12rM",
1507
+ vmovhpd_2 = "xr/qo:660Fu17Rm",
1508
+ vmovhpd_3 = "rrx/ooq:660FV16rM",
1509
+ vmovhps_2 = "xr/qo:0Fu17Rm",
1510
+ vmovhps_3 = "rrx/ooq:0FV16rM",
1511
+ vmovlhps_3 = "rrro:0FV16rM",
1512
+ vmovlpd_2 = "xr/qo:660Fu13Rm",
1513
+ vmovlpd_3 = "rrx/ooq:660FV12rM",
1514
+ vmovlps_2 = "xr/qo:0Fu13Rm",
1515
+ vmovlps_3 = "rrx/ooq:0FV12rM",
1516
+ vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
1517
+ vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
1518
+ vmovntpd_2 = "xroy:660Fu2BRm",
1519
+ vmovntps_2 = "xroy:0Fu2BRm",
1520
+ vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
1521
+ vmovsd_3 = "rrro:F20FV10rM",
1522
+ vmovshdup_2 = "rmoy:F30Fu16rM",
1523
+ vmovsldup_2 = "rmoy:F30Fu12rM",
1524
+ vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
1525
+ vmovss_3 = "rrro:F30FV10rM",
1526
+ vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
1527
+ vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
1528
+ vorpd_3 = "rrmoy:660FV56rM",
1529
+ vorps_3 = "rrmoy:0FV56rM",
1530
+ vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
1531
+ vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
1532
+ vperm2f128_4 = "rrmiy:660F3AV06rMU",
1533
+ vptestpd_2 = "rmoy:660F38u0FrM",
1534
+ vptestps_2 = "rmoy:660F38u0ErM",
1535
+ vrcpps_2 = "rmoy:0Fu53rM",
1536
+ vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
1537
+ vrsqrtps_2 = "rmoy:0Fu52rM",
1538
+ vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
1539
+ vroundpd_3 = "rmioy:660F3AV09rMU",
1540
+ vroundps_3 = "rmioy:660F3AV08rMU",
1541
+ vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
1542
+ vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
1543
+ vshufpd_4 = "rrmioy:660FVC6rMU",
1544
+ vshufps_4 = "rrmioy:0FVC6rMU",
1545
+ vsqrtps_2 = "rmoy:0Fu51rM",
1546
+ vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
1547
+ vsqrtpd_2 = "rmoy:660Fu51rM",
1548
+ vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
1549
+ vstmxcsr_1 = "xd:0FuAE3m",
1550
+ vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
1551
+ vucomiss_2 = "rro:0Fu2ErM|rx/od:",
1552
+ vunpckhpd_3 = "rrmoy:660FV15rM",
1553
+ vunpckhps_3 = "rrmoy:0FV15rM",
1554
+ vunpcklpd_3 = "rrmoy:660FV14rM",
1555
+ vunpcklps_3 = "rrmoy:0FV14rM",
1556
+ vxorpd_3 = "rrmoy:660FV57rM",
1557
+ vxorps_3 = "rrmoy:0FV57rM",
1558
+ vzeroall_0 = "0FuL77",
1559
+ vzeroupper_0 = "0Fu77",
1560
+
1561
+ -- AVX2 FP ops
1562
+ vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
1563
+ vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
1564
+ -- *vgather* (!vsib)
1565
+ vpermpd_3 = "rmiy:660F3AuX01rMU",
1566
+ vpermps_3 = "rrmy:660F38V16rM",
1567
+
1568
+ -- AVX, AVX2 integer ops
1569
+ -- In general, xmm requires AVX, ymm requires AVX2.
1570
+ vaesdec_3 = "rrmo:660F38VDErM",
1571
+ vaesdeclast_3 = "rrmo:660F38VDFrM",
1572
+ vaesenc_3 = "rrmo:660F38VDCrM",
1573
+ vaesenclast_3 = "rrmo:660F38VDDrM",
1574
+ vaesimc_2 = "rmo:660F38uDBrM",
1575
+ vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
1576
+ vlddqu_2 = "rxoy:F20FuF0rM",
1577
+ vmaskmovdqu_2 = "rro:660FuF7rM",
1578
+ vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
1579
+ vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
1580
+ vmovntdq_2 = "xroy:660FuE7Rm",
1581
+ vmovntdqa_2 = "rxoy:660F38u2ArM",
1582
+ vmpsadbw_4 = "rrmioy:660F3AV42rMU",
1583
+ vpabsb_2 = "rmoy:660F38u1CrM",
1584
+ vpabsd_2 = "rmoy:660F38u1ErM",
1585
+ vpabsw_2 = "rmoy:660F38u1DrM",
1586
+ vpackusdw_3 = "rrmoy:660F38V2BrM",
1587
+ vpalignr_4 = "rrmioy:660F3AV0FrMU",
1588
+ vpblendvb_4 = "rrmroy:660F3AV4CrMs",
1589
+ vpblendw_4 = "rrmioy:660F3AV0ErMU",
1590
+ vpclmulqdq_4 = "rrmio:660F3AV44rMU",
1591
+ vpcmpeqq_3 = "rrmoy:660F38V29rM",
1592
+ vpcmpestri_3 = "rmio:660F3Au61rMU",
1593
+ vpcmpestrm_3 = "rmio:660F3Au60rMU",
1594
+ vpcmpgtq_3 = "rrmoy:660F38V37rM",
1595
+ vpcmpistri_3 = "rmio:660F3Au63rMU",
1596
+ vpcmpistrm_3 = "rmio:660F3Au62rMU",
1597
+ vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
1598
+ vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
1599
+ vpextrd_3 = "mri/do:660F3Au16RmU",
1600
+ vpextrq_3 = "mri/qo:660F3Au16RmU",
1601
+ vphaddw_3 = "rrmoy:660F38V01rM",
1602
+ vphaddd_3 = "rrmoy:660F38V02rM",
1603
+ vphaddsw_3 = "rrmoy:660F38V03rM",
1604
+ vphminposuw_2 = "rmo:660F38u41rM",
1605
+ vphsubw_3 = "rrmoy:660F38V05rM",
1606
+ vphsubd_3 = "rrmoy:660F38V06rM",
1607
+ vphsubsw_3 = "rrmoy:660F38V07rM",
1608
+ vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
1609
+ vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
1610
+ vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
1611
+ vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
1612
+ vpmaddubsw_3 = "rrmoy:660F38V04rM",
1613
+ vpmaxsb_3 = "rrmoy:660F38V3CrM",
1614
+ vpmaxsd_3 = "rrmoy:660F38V3DrM",
1615
+ vpmaxuw_3 = "rrmoy:660F38V3ErM",
1616
+ vpmaxud_3 = "rrmoy:660F38V3FrM",
1617
+ vpminsb_3 = "rrmoy:660F38V38rM",
1618
+ vpminsd_3 = "rrmoy:660F38V39rM",
1619
+ vpminuw_3 = "rrmoy:660F38V3ArM",
1620
+ vpminud_3 = "rrmoy:660F38V3BrM",
1621
+ vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
1622
+ vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
1623
+ vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
1624
+ vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
1625
+ vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
1626
+ vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
1627
+ vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
1628
+ vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
1629
+ vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
1630
+ vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
1631
+ vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
1632
+ vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
1633
+ vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
1634
+ vpmuldq_3 = "rrmoy:660F38V28rM",
1635
+ vpmulhrsw_3 = "rrmoy:660F38V0BrM",
1636
+ vpmulld_3 = "rrmoy:660F38V40rM",
1637
+ vpshufb_3 = "rrmoy:660F38V00rM",
1638
+ vpshufd_3 = "rmioy:660Fu70rMU",
1639
+ vpshufhw_3 = "rmioy:F30Fu70rMU",
1640
+ vpshuflw_3 = "rmioy:F20Fu70rMU",
1641
+ vpsignb_3 = "rrmoy:660F38V08rM",
1642
+ vpsignw_3 = "rrmoy:660F38V09rM",
1643
+ vpsignd_3 = "rrmoy:660F38V0ArM",
1644
+ vpslldq_3 = "rrioy:660Fv737mU",
1645
+ vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
1646
+ vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
1647
+ vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
1648
+ vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
1649
+ vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
1650
+ vpsrldq_3 = "rrioy:660Fv733mU",
1651
+ vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
1652
+ vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
1653
+ vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
1654
+ vptest_2 = "rmoy:660F38u17rM",
1655
+
1656
+ -- AVX2 integer ops
1657
+ vbroadcasti128_2 = "rx/yo:660F38u5ArM",
1658
+ vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
1659
+ vextracti128_3 = "mri/oy:660F3AuL39RmU",
1660
+ vpblendd_4 = "rrmioy:660F3AV02rMU",
1661
+ vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
1662
+ vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
1663
+ vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
1664
+ vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
1665
+ vpermd_3 = "rrmy:660F38V36rM",
1666
+ vpermq_3 = "rmiy:660F3AuX00rMU",
1667
+ -- *vpgather* (!vsib)
1668
+ vperm2i128_4 = "rrmiy:660F3AV46rMU",
1669
+ vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
1670
+ vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
1671
+ vpsllvd_3 = "rrmoy:660F38V47rM",
1672
+ vpsllvq_3 = "rrmoy:660F38VX47rM",
1673
+ vpsravd_3 = "rrmoy:660F38V46rM",
1674
+ vpsrlvd_3 = "rrmoy:660F38V45rM",
1675
+ vpsrlvq_3 = "rrmoy:660F38VX45rM",
1415
1676
  }
1416
1677
 
1417
1678
  ------------------------------------------------------------------------------
@@ -1462,28 +1723,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
1462
1723
  map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
1463
1724
  end
1464
1725
 
1465
- -- SSE FP arithmetic ops.
1726
+ -- SSE / AVX FP arithmetic ops.
1466
1727
  for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
1467
1728
  sub = 12, min = 13, div = 14, max = 15 } do
1468
1729
  map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
1469
1730
  map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
1470
1731
  map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
1471
1732
  map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
1733
+ if n ~= 1 then
1734
+ map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
1735
+ map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
1736
+ map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
1737
+ map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
1738
+ end
1739
+ end
1740
+
1741
+ -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
1742
+ for name,n in pairs{
1743
+ paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
1744
+ paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
1745
+ packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
1746
+ paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
1747
+ pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
1748
+ pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
1749
+ pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
1750
+ pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
1751
+ pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
1752
+ pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
1753
+ psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
1754
+ psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
1755
+ punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
1756
+ punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
1757
+ punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
1758
+ } do
1759
+ map_op[name.."_2"] = format("rmo:660F%02XrM", n)
1760
+ map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
1472
1761
  end
1473
1762
 
1474
1763
  ------------------------------------------------------------------------------
1475
1764
 
1765
+ local map_vexarg = { u = false, v = 1, V = 2 }
1766
+
1476
1767
  -- Process pattern string.
1477
1768
  local function dopattern(pat, args, sz, op, needrex)
1478
- local digit, addin
1769
+ local digit, addin, vex
1479
1770
  local opcode = 0
1480
1771
  local szov = sz
1481
1772
  local narg = 1
1482
1773
  local rex = 0
1483
1774
 
1484
1775
  -- Limit number of section buffer positions used by a single dasm_put().
1485
- -- A single opcode needs a maximum of 5 positions.
1486
- if secpos+5 > maxsecpos then wflush() end
1776
+ -- A single opcode needs a maximum of 6 positions.
1777
+ if secpos+6 > maxsecpos then wflush() end
1487
1778
 
1488
1779
  -- Process each character.
1489
1780
  for c in gmatch(pat.."|", ".") do
@@ -1497,6 +1788,8 @@ local function dopattern(pat, args, sz, op, needrex)
1497
1788
  szov = nil
1498
1789
  elseif c == "X" then -- Force REX.W.
1499
1790
  rex = 8
1791
+ elseif c == "L" then -- Force VEX.L.
1792
+ vex.l = true
1500
1793
  elseif c == "r" then -- Merge 1st operand regno. into opcode.
1501
1794
  addin = args[1]; opcode = opcode + (addin.reg % 8)
1502
1795
  if narg < 2 then narg = 2 end
@@ -1520,21 +1813,42 @@ local function dopattern(pat, args, sz, op, needrex)
1520
1813
  if t.xreg and t.xreg > 7 then rex = rex + 2 end
1521
1814
  if s > 7 then rex = rex + 4 end
1522
1815
  if needrex then rex = rex + 16 end
1523
- wputop(szov, opcode, rex); opcode = nil
1816
+ local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
1817
+ opcode = nil
1524
1818
  local imark = sub(pat, -1) -- Force a mark (ugly).
1525
1819
  -- Put ModRM/SIB with regno/last digit as spare.
1526
- wputmrmsib(t, imark, s, addin and addin.vreg)
1820
+ wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
1527
1821
  addin = nil
1822
+ elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
1823
+ local b = band(opcode, 255); opcode = shr(opcode, 8)
1824
+ local m = 1
1825
+ if b == 0x38 then m = 2
1826
+ elseif b == 0x3a then m = 3 end
1827
+ if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
1828
+ if b ~= 0x0f then
1829
+ werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
1830
+ "' in pattern `"..pat.."' for `"..op.."'")
1831
+ end
1832
+ local v = map_vexarg[c]
1833
+ if v then v = remove(args, v) end
1834
+ b = band(opcode, 255)
1835
+ local p = 0
1836
+ if b == 0x66 then p = 1
1837
+ elseif b == 0xf3 then p = 2
1838
+ elseif b == 0xf2 then p = 3 end
1839
+ if p ~= 0 then opcode = shr(opcode, 8) end
1840
+ if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
1841
+ vex = { m = m, p = p, v = v }
1528
1842
  else
1529
1843
  if opcode then -- Flush opcode.
1530
1844
  if szov == "q" and rex == 0 then rex = rex + 8 end
1531
1845
  if needrex then rex = rex + 16 end
1532
1846
  if addin and addin.reg == -1 then
1533
- wputop(szov, opcode - 7, rex)
1534
- waction("VREG", addin.vreg); wputxb(0)
1847
+ local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
1848
+ wvreg("opcode", addin.vreg, psz, sk)
1535
1849
  else
1536
1850
  if addin and addin.reg > 7 then rex = rex + 1 end
1537
- wputop(szov, opcode, rex)
1851
+ wputop(szov, opcode, rex, vex)
1538
1852
  end
1539
1853
  opcode = nil
1540
1854
  end
@@ -1571,6 +1885,14 @@ local function dopattern(pat, args, sz, op, needrex)
1571
1885
  else
1572
1886
  wputlabel("REL_", imm, 2)
1573
1887
  end
1888
+ elseif c == "s" then
1889
+ local reg = a.reg
1890
+ if reg < 0 then
1891
+ wputb(0)
1892
+ wvreg("imm.hi", a.vreg)
1893
+ else
1894
+ wputb(shl(reg, 4))
1895
+ end
1574
1896
  else
1575
1897
  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
1576
1898
  end
@@ -1647,11 +1969,14 @@ map_op[".template__"] = function(params, template, nparams)
1647
1969
  if pat == "" then pat = lastpat else lastpat = pat end
1648
1970
  if matchtm(tm, args) then
1649
1971
  local prefix = sub(szm, 1, 1)
1650
- if prefix == "/" then -- Match both operand sizes.
1651
- if args[1].opsize == sub(szm, 2, 2) and
1652
- args[2].opsize == sub(szm, 3, 3) then
1653
- dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
1654
- return
1972
+ if prefix == "/" then -- Exactly match leading operand sizes.
1973
+ for i = #szm,1,-1 do
1974
+ if i == 1 then
1975
+ dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
1976
+ return
1977
+ elseif args[i-1].opsize ~= sub(szm, i, i) then
1978
+ break
1979
+ end
1655
1980
  end
1656
1981
  else -- Match common operand size.
1657
1982
  local szp = sz
@@ -1716,8 +2041,8 @@ if x64 then
1716
2041
  rex = a.reg > 7 and 9 or 8
1717
2042
  end
1718
2043
  end
1719
- wputop(sz, opcode, rex)
1720
- if vreg then waction("VREG", vreg); wputxb(0) end
2044
+ local psz, sk = wputop(sz, opcode, rex, nil, vreg)
2045
+ wvreg("opcode", vreg, psz, sk)
1721
2046
  waction("IMM_D", format("(unsigned int)(%s)", op64))
1722
2047
  waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
1723
2048
  end