immunio 1.1.2 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +4 -4
  2. data/lib/immunio/version.rb +1 -1
  3. data/lua-hooks/Makefile +56 -109
  4. data/lua-hooks/ext/all.c +3 -14
  5. data/lua-hooks/ext/libinjection/module.mk +5 -0
  6. data/lua-hooks/ext/lpeg/module.mk +6 -0
  7. data/lua-hooks/ext/lua-cmsgpack/module.mk +2 -0
  8. data/lua-hooks/ext/lua-snapshot/module.mk +2 -0
  9. data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
  10. data/lua-hooks/ext/luajit/Makefile +2 -2
  11. data/lua-hooks/ext/luajit/README +2 -2
  12. data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
  13. data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
  14. data/lua-hooks/ext/luajit/doc/changes.html +15 -2
  15. data/lua-hooks/ext/luajit/doc/contact.html +3 -3
  16. data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
  17. data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
  18. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
  19. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +4 -2
  20. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
  21. data/lua-hooks/ext/luajit/doc/ext_jit.html +2 -2
  22. data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
  23. data/lua-hooks/ext/luajit/doc/extensions.html +9 -2
  24. data/lua-hooks/ext/luajit/doc/faq.html +2 -2
  25. data/lua-hooks/ext/luajit/doc/install.html +22 -18
  26. data/lua-hooks/ext/luajit/doc/luajit.html +3 -3
  27. data/lua-hooks/ext/luajit/doc/running.html +2 -2
  28. data/lua-hooks/ext/luajit/doc/status.html +2 -2
  29. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
  30. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +4 -4
  31. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
  32. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +4 -4
  33. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +1 -1
  34. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +4 -4
  35. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
  36. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +4 -4
  37. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +3 -3
  38. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
  39. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +34 -7
  40. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +427 -102
  41. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +5 -5
  42. data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
  43. data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
  44. data/lua-hooks/ext/luajit/src/Makefile +36 -21
  45. data/lua-hooks/ext/luajit/src/Makefile.dep +3 -1
  46. data/lua-hooks/ext/luajit/src/host/buildvm.c +1 -1
  47. data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
  48. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +10 -1
  49. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +1 -1
  50. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
  51. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +1 -1
  52. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
  53. data/lua-hooks/ext/luajit/src/host/genminilua.lua +1 -1
  54. data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
  55. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +2 -2
  56. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +1 -1
  57. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +1 -1
  58. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
  59. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +1 -1
  60. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
  61. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +163 -73
  62. data/lua-hooks/ext/luajit/src/jit/dump.lua +2 -1
  63. data/lua-hooks/ext/luajit/src/jit/p.lua +1 -1
  64. data/lua-hooks/ext/luajit/src/jit/v.lua +1 -1
  65. data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
  66. data/lua-hooks/ext/luajit/src/lib_aux.c +1 -1
  67. data/lua-hooks/ext/luajit/src/lib_base.c +4 -5
  68. data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
  69. data/lua-hooks/ext/luajit/src/lib_debug.c +1 -1
  70. data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -5
  71. data/lua-hooks/ext/luajit/src/lib_init.c +1 -1
  72. data/lua-hooks/ext/luajit/src/lib_io.c +2 -3
  73. data/lua-hooks/ext/luajit/src/lib_jit.c +1 -1
  74. data/lua-hooks/ext/luajit/src/lib_math.c +1 -1
  75. data/lua-hooks/ext/luajit/src/lib_os.c +2 -2
  76. data/lua-hooks/ext/luajit/src/lib_package.c +1 -1
  77. data/lua-hooks/ext/luajit/src/lib_string.c +1 -1
  78. data/lua-hooks/ext/luajit/src/lib_table.c +1 -1
  79. data/lua-hooks/ext/luajit/src/lj.supp +15 -0
  80. data/lua-hooks/ext/luajit/src/lj_alloc.c +1 -1
  81. data/lua-hooks/ext/luajit/src/lj_api.c +4 -1
  82. data/lua-hooks/ext/luajit/src/lj_arch.h +33 -7
  83. data/lua-hooks/ext/luajit/src/lj_asm.c +12 -5
  84. data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
  85. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +3 -13
  86. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +337 -71
  87. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2 -2
  88. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2 -2
  89. data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
  90. data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
  91. data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
  92. data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
  93. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
  94. data/lua-hooks/ext/luajit/src/lj_buf.c +2 -4
  95. data/lua-hooks/ext/luajit/src/lj_buf.h +1 -3
  96. data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
  97. data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
  98. data/lua-hooks/ext/luajit/src/lj_ccall.c +37 -14
  99. data/lua-hooks/ext/luajit/src/lj_ccall.h +3 -3
  100. data/lua-hooks/ext/luajit/src/lj_ccallback.c +16 -7
  101. data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
  102. data/lua-hooks/ext/luajit/src/lj_cconv.c +1 -1
  103. data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
  104. data/lua-hooks/ext/luajit/src/lj_cdata.c +10 -1
  105. data/lua-hooks/ext/luajit/src/lj_cdata.h +3 -1
  106. data/lua-hooks/ext/luajit/src/lj_clib.c +1 -1
  107. data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
  108. data/lua-hooks/ext/luajit/src/lj_cparse.c +27 -6
  109. data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
  110. data/lua-hooks/ext/luajit/src/lj_crecord.c +1 -1
  111. data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
  112. data/lua-hooks/ext/luajit/src/lj_ctype.c +10 -8
  113. data/lua-hooks/ext/luajit/src/lj_ctype.h +1 -1
  114. data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
  115. data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
  116. data/lua-hooks/ext/luajit/src/lj_def.h +1 -1
  117. data/lua-hooks/ext/luajit/src/lj_dispatch.c +1 -1
  118. data/lua-hooks/ext/luajit/src/lj_dispatch.h +21 -4
  119. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +1 -1
  120. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +7 -5
  121. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +1 -1
  122. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +1 -1
  123. data/lua-hooks/ext/luajit/src/lj_err.c +69 -31
  124. data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
  125. data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
  126. data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
  127. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +10 -40
  128. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
  129. data/lua-hooks/ext/luajit/src/lj_frame.h +12 -1
  130. data/lua-hooks/ext/luajit/src/lj_func.c +1 -1
  131. data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
  132. data/lua-hooks/ext/luajit/src/lj_gc.c +2 -2
  133. data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
  134. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +1 -1
  135. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
  136. data/lua-hooks/ext/luajit/src/lj_ir.c +31 -15
  137. data/lua-hooks/ext/luajit/src/lj_ir.h +1 -1
  138. data/lua-hooks/ext/luajit/src/lj_ircall.h +29 -1
  139. data/lua-hooks/ext/luajit/src/lj_iropt.h +2 -1
  140. data/lua-hooks/ext/luajit/src/lj_jit.h +2 -1
  141. data/lua-hooks/ext/luajit/src/lj_lex.c +28 -1
  142. data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
  143. data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
  144. data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
  145. data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
  146. data/lua-hooks/ext/luajit/src/lj_mcode.c +1 -1
  147. data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
  148. data/lua-hooks/ext/luajit/src/lj_meta.c +8 -8
  149. data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
  150. data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
  151. data/lua-hooks/ext/luajit/src/lj_obj.h +1 -1
  152. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
  153. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +1 -1
  154. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
  155. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +1 -1
  156. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +1 -1
  157. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +1 -1
  158. data/lua-hooks/ext/luajit/src/lj_opt_split.c +10 -5
  159. data/lua-hooks/ext/luajit/src/lj_parse.c +1 -1
  160. data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
  161. data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
  162. data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
  163. data/lua-hooks/ext/luajit/src/lj_record.c +13 -5
  164. data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
  165. data/lua-hooks/ext/luajit/src/lj_snap.c +20 -23
  166. data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
  167. data/lua-hooks/ext/luajit/src/lj_state.c +1 -1
  168. data/lua-hooks/ext/luajit/src/lj_state.h +1 -1
  169. data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
  170. data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
  171. data/lua-hooks/ext/luajit/src/lj_strfmt.c +12 -98
  172. data/lua-hooks/ext/luajit/src/lj_strfmt.h +4 -4
  173. data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +591 -0
  174. data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
  175. data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
  176. data/lua-hooks/ext/luajit/src/lj_tab.c +1 -1
  177. data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
  178. data/lua-hooks/ext/luajit/src/lj_target.h +1 -1
  179. data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
  180. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +1 -1
  181. data/lua-hooks/ext/luajit/src/lj_target_mips.h +30 -2
  182. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
  183. data/lua-hooks/ext/luajit/src/lj_target_x86.h +1 -1
  184. data/lua-hooks/ext/luajit/src/lj_trace.c +7 -2
  185. data/lua-hooks/ext/luajit/src/lj_trace.h +1 -1
  186. data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -3
  187. data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
  188. data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
  189. data/lua-hooks/ext/luajit/src/lj_vm.h +5 -3
  190. data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
  191. data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
  192. data/lua-hooks/ext/luajit/src/lj_vmmath.c +15 -15
  193. data/lua-hooks/ext/luajit/src/ljamalg.c +2 -1
  194. data/lua-hooks/ext/luajit/src/lua.h +1 -0
  195. data/lua-hooks/ext/luajit/src/luaconf.h +2 -2
  196. data/lua-hooks/ext/luajit/src/luajit.c +1 -1
  197. data/lua-hooks/ext/luajit/src/luajit.h +4 -4
  198. data/lua-hooks/ext/luajit/src/lualib.h +1 -1
  199. data/lua-hooks/ext/luajit/src/msvcbuild.bat +1 -1
  200. data/lua-hooks/ext/luajit/src/ps4build.bat +26 -6
  201. data/lua-hooks/ext/luajit/src/vm_arm.dasc +17 -9
  202. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +1 -1
  203. data/lua-hooks/ext/luajit/src/vm_mips.dasc +1562 -656
  204. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +3 -7
  205. data/lua-hooks/ext/luajit/src/vm_x64.dasc +10 -2
  206. data/lua-hooks/ext/luajit/src/vm_x86.dasc +5 -8
  207. data/lua-hooks/ext/luautf8/module.mk +2 -0
  208. data/lua-hooks/ext/module.mk +15 -0
  209. data/lua-hooks/ext/modules.h +17 -0
  210. data/lua-hooks/ext/perf/luacpu.c +1 -1
  211. data/lua-hooks/ext/perf/lualoadavg.c +1 -1
  212. data/lua-hooks/ext/perf/luameminfo.c +1 -1
  213. data/lua-hooks/ext/perf/luaoslib.c +124 -2
  214. data/lua-hooks/ext/perf/module.mk +5 -0
  215. data/lua-hooks/ext/sha1/luasha1.c +4 -2
  216. data/lua-hooks/ext/sha1/module.mk +5 -0
  217. data/lua-hooks/ext/sha2/luasha256.c +4 -2
  218. data/lua-hooks/ext/sha2/module.mk +5 -0
  219. data/lua-hooks/ext/sysutils/lua_utils.c +56 -0
  220. data/lua-hooks/ext/sysutils/module.mk +2 -0
  221. data/lua-hooks/lib/boot.lua +2 -1
  222. data/lua-hooks/lib/hooks/module.mk +31 -0
  223. data/lua-hooks/lib/hooks/xss/module.mk +4 -0
  224. data/lua-hooks/lib/lexers/module.mk +10 -0
  225. data/lua-hooks/lib/module.mk +38 -0
  226. data/lua-hooks/lib/schema/module.mk +3 -0
  227. data/lua-hooks/options.mk +59 -0
  228. metadata +21 -2
@@ -1,7 +1,7 @@
1
1
  ------------------------------------------------------------------------------
2
2
  -- DynASM x64 module.
3
3
  --
4
- -- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
4
+ -- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
5
5
  -- See dynasm.lua for full copyright notice.
6
6
  ------------------------------------------------------------------------------
7
7
  -- This module just sets 64 bit mode for the combined x86/x64 module.
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** DynASM x86 encoding engine.
3
- ** Copyright (C) 2005-2015 Mike Pall. All rights reserved.
3
+ ** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
4
4
  ** Released under the MIT license. See dynasm.lua for full copyright notice.
5
5
  */
6
6
 
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
170
170
  dasm_State *D = Dst_REF;
171
171
  dasm_ActList p = D->actionlist + start;
172
172
  dasm_Section *sec = D->section;
173
- int pos = sec->pos, ofs = sec->ofs, mrm = 4;
173
+ int pos = sec->pos, ofs = sec->ofs, mrm = -1;
174
174
  int *b;
175
175
 
176
176
  if (pos >= sec->epos) {
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
193
193
  b[pos++] = n;
194
194
  switch (action) {
195
195
  case DASM_DISP:
196
- if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
196
+ if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
197
197
  case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
198
198
  case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
199
199
  case DASM_IMM_D: ofs += 4; break;
@@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...)
203
203
  case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
204
204
  case DASM_SPACE: p++; ofs += n; break;
205
205
  case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
206
- case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
207
- if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
206
+ case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
207
+ if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
208
+ if (*p < 0x20 && (n&7) == 4) ofs++;
209
+ switch ((*p++ >> 3) & 3) {
210
+ case 3: n |= b[pos-3];
211
+ case 2: n |= b[pos-2];
212
+ case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
213
+ }
214
+ continue;
208
215
  }
209
- mrm = 4;
216
+ mrm = -1;
210
217
  } else {
211
218
  int *pl, n;
212
219
  switch (action) {
@@ -391,7 +398,27 @@ int dasm_encode(Dst_DECL, void *buffer)
391
398
  case DASM_IMM_D: wd: dasmd(n); break;
392
399
  case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
393
400
  case DASM_IMM_W: dasmw(n); break;
394
- case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
401
+ case DASM_VREG: {
402
+ int t = *p++;
403
+ unsigned char *ex = cp - (t&7);
404
+ if ((n & 8) && t < 0xa0) {
405
+ if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
406
+ n &= 7;
407
+ } else if (n & 0x10) {
408
+ if (*ex & 0x80) {
409
+ *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
410
+ }
411
+ while (++ex < cp) ex[-1] = *ex;
412
+ if (mark) mark--;
413
+ cp--;
414
+ n &= 7;
415
+ }
416
+ if (t >= 0xc0) n <<= 4;
417
+ else if (t >= 0x40) n <<= 3;
418
+ else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
419
+ cp[-1] ^= n;
420
+ break;
421
+ }
395
422
  case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
396
423
  b++; n = (int)(ptrdiff_t)D->globals[-n];
397
424
  case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
@@ -1,7 +1,7 @@
1
1
  ------------------------------------------------------------------------------
2
2
  -- DynASM x86/x64 module.
3
3
  --
4
- -- Copyright (C) 2005-2015 Mike Pall. All rights reserved.
4
+ -- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
5
5
  -- See dynasm.lua for full copyright notice.
6
6
  ------------------------------------------------------------------------------
7
7
 
@@ -11,9 +11,9 @@ local x64 = x64
11
11
  local _info = {
12
12
  arch = x64 and "x64" or "x86",
13
13
  description = "DynASM x86/x64 module",
14
- version = "1.3.0",
15
- vernum = 10300,
16
- release = "2011-05-05",
14
+ version = "1.4.0",
15
+ vernum = 10400,
16
+ release = "2015-10-18",
17
17
  author = "Mike Pall",
18
18
  license = "MIT",
19
19
  }
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
27
27
  local _s = string
28
28
  local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
29
29
  local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
30
- local concat, sort = table.concat, table.sort
30
+ local concat, sort, remove = table.concat, table.sort, table.remove
31
31
  local bit = bit or require("bit")
32
- local band, shl, shr = bit.band, bit.lshift, bit.rshift
32
+ local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
33
33
 
34
34
  -- Inherited tables and callbacks.
35
35
  local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
41
41
  -- int arg, 1 buffer pos:
42
42
  "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43
43
  -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44
- "VREG", "SPACE", -- !x64: VREG support NYI.
44
+ "VREG", "SPACE",
45
45
  -- ptrdiff_t arg, 1 buffer pos (address): !x64
46
46
  "SETLABEL", "REL_A",
47
47
  -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
83
83
  -- Current number of section buffer positions for dasm_put().
84
84
  local secpos = 1
85
85
 
86
+ -- VREG kind encodings, pre-shifted by 5 bits.
87
+ local map_vreg = {
88
+ ["modrm.rm.m"] = 0x00,
89
+ ["modrm.rm.r"] = 0x20,
90
+ ["opcode"] = 0x20,
91
+ ["sib.base"] = 0x20,
92
+ ["sib.index"] = 0x40,
93
+ ["modrm.reg"] = 0x80,
94
+ ["vex.v"] = 0xa0,
95
+ ["imm.hi"] = 0xc0,
96
+ }
97
+
98
+ -- Current number of VREG actions contributing to REX/VEX shrinkage.
99
+ local vreg_shrink_count = 0
100
+
86
101
  ------------------------------------------------------------------------------
87
102
 
88
103
  -- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
134
149
  if a or num then secpos = secpos + (num or 1) end
135
150
  end
136
151
 
152
+ -- Optionally add a VREG action.
153
+ local function wvreg(kind, vreg, psz, sk, defer)
154
+ if not vreg then return end
155
+ waction("VREG", vreg)
156
+ local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
157
+ if b < (sk or 0) then
158
+ vreg_shrink_count = vreg_shrink_count + 1
159
+ end
160
+ if not defer then
161
+ b = b + vreg_shrink_count * 8
162
+ vreg_shrink_count = 0
163
+ end
164
+ wputxb(b + (psz or 0))
165
+ end
166
+
137
167
  -- Add call to embedded DynASM C code.
138
168
  local function wcall(func, args)
139
169
  wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
299
329
  local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
300
330
  if needrex then map_reg_needrex[iname] = true end
301
331
  local name
302
- if sz == "o" then name = format("xmm%d", i)
332
+ if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
303
333
  elseif sz == "f" then name = format("st%d", i)
304
334
  else name = format("r%d%s", i, sz == addrsize and "" or sz) end
305
335
  map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
326
356
  mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
327
357
  map_reg_valid_index[map_archdef.esp] = false
328
358
  if x64 then map_reg_valid_index[map_archdef.rsp] = false end
359
+ if x64 then map_reg_needrex[map_archdef.Rb] = true end
329
360
  map_archdef["Ra"] = "@"..addrsize
330
361
 
331
362
  -- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
334
365
  -- SSE registers (oword sized, but qword and dword accessible).
335
366
  mkrmap("o", "xmm")
336
367
 
368
+ -- AVX registers (yword sized, but oword, qword and dword accessible).
369
+ mkrmap("y", "ymm")
370
+
337
371
  -- Operand size prefixes to codes.
338
372
  local map_opsize = {
339
- byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
340
- aword = addrsize,
373
+ byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
374
+ tword = "t", aword = addrsize,
341
375
  }
342
376
 
343
377
  -- Operand size code to number.
344
378
  local map_opsizenum = {
345
- b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
379
+ b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
346
380
  }
347
381
 
348
382
  -- Operand size code to name.
349
383
  local map_opsizename = {
350
- b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
351
- f = "fpword",
384
+ b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
385
+ t = "tword", f = "fpword",
352
386
  }
353
387
 
354
388
  -- Valid index register scale factors.
@@ -460,9 +494,45 @@ local function wputszarg(sz, n)
460
494
  end
461
495
 
462
496
  -- Put multi-byte opcode with operand-size dependent modifications.
463
- local function wputop(sz, op, rex)
497
+ local function wputop(sz, op, rex, vex, vregr, vregxb)
498
+ local psz, sk = 0, nil
499
+ if vex then
500
+ local tail
501
+ if vex.m == 1 and band(rex, 11) == 0 then
502
+ if x64 and vregxb then
503
+ sk = map_vreg["modrm.reg"]
504
+ else
505
+ wputb(0xc5)
506
+ tail = shl(bxor(band(rex, 4), 4), 5)
507
+ psz = 3
508
+ end
509
+ end
510
+ if not tail then
511
+ wputb(0xc4)
512
+ wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
513
+ tail = shl(band(rex, 8), 4)
514
+ psz = 4
515
+ end
516
+ local reg, vreg = 0, nil
517
+ if vex.v then
518
+ reg = vex.v.reg
519
+ if not reg then werror("bad vex operand") end
520
+ if reg < 0 then reg = 0; vreg = vex.v.vreg end
521
+ end
522
+ if sz == "y" or vex.l then tail = tail + 4 end
523
+ wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
524
+ wvreg("vex.v", vreg)
525
+ rex = 0
526
+ if op >= 256 then werror("bad vex opcode") end
527
+ else
528
+ if rex ~= 0 then
529
+ if not x64 then werror("bad operand size") end
530
+ elseif (vregr or vregxb) and x64 then
531
+ rex = 0x10
532
+ sk = map_vreg["vex.v"]
533
+ end
534
+ end
464
535
  local r
465
- if rex ~= 0 and not x64 then werror("bad operand size") end
466
536
  if sz == "w" then wputb(102) end
467
537
  -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
468
538
  if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +541,20 @@ local function wputop(sz, op, rex)
471
541
  if rex ~= 0 then
472
542
  local opc3 = band(op, 0xffff00)
473
543
  if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
474
- wputb(64 + band(rex, 15)); rex = 0
544
+ wputb(64 + band(rex, 15)); rex = 0; psz = 2
475
545
  end
476
546
  end
477
- wputb(shr(op, 16)); op = band(op, 0xffff)
547
+ wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
478
548
  end
479
549
  if op >= 256 then
480
550
  local b = shr(op, 8)
481
- if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
482
- wputb(b)
483
- op = band(op, 255)
551
+ if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
552
+ wputb(b); op = band(op, 255); psz = psz + 1
484
553
  end
485
- if rex ~= 0 then wputb(64 + band(rex, 15)) end
554
+ if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
486
555
  if sz == "b" then op = op - 1 end
487
556
  wputb(op)
557
+ return psz, sk
488
558
  end
489
559
 
490
560
  -- Put ModRM or SIB formatted byte.
@@ -494,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
494
564
  end
495
565
 
496
566
  -- Put ModRM/SIB plus optional displacement.
497
- local function wputmrmsib(t, imark, s, vsreg)
567
+ local function wputmrmsib(t, imark, s, vsreg, psz, sk)
498
568
  local vreg, vxreg
499
569
  local reg, xreg = t.reg, t.xreg
500
570
  if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
504
574
  -- Register mode.
505
575
  if sub(t.mode, 1, 1) == "r" then
506
576
  wputmodrm(3, s, reg)
507
- if vsreg then waction("VREG", vsreg); wputxb(2) end
508
- if vreg then waction("VREG", vreg); wputxb(0) end
577
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
578
+ wvreg("modrm.rm.r", vreg, psz+1, sk)
509
579
  return
510
580
  end
511
581
 
@@ -519,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
519
589
  -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
520
590
  wputmodrm(0, s, 4)
521
591
  if imark == "I" then waction("MARK") end
522
- if vsreg then waction("VREG", vsreg); wputxb(2) end
592
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
523
593
  wputmodrm(t.xsc, xreg, 5)
524
- if vxreg then waction("VREG", vxreg); wputxb(3) end
594
+ wvreg("sib.index", vxreg, psz+2, sk)
525
595
  else
526
596
  -- Pure 32 bit displacement.
527
597
  if x64 and tdisp ~= "table" then
528
598
  wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
599
+ wvreg("modrm.reg", vsreg, psz+1, sk)
529
600
  if imark == "I" then waction("MARK") end
530
601
  wputmodrm(0, 4, 5)
531
602
  else
532
603
  riprel = x64
533
604
  wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
605
+ wvreg("modrm.reg", vsreg, psz+1, sk)
534
606
  if imark == "I" then waction("MARK") end
535
607
  end
536
- if vsreg then waction("VREG", vsreg); wputxb(2) end
537
608
  end
538
609
  if riprel then -- Emit rip-relative displacement.
539
610
  if match("UWSiI", imark) then
@@ -561,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
561
632
  if xreg or band(reg, 7) == 4 then
562
633
  wputmodrm(m or 2, s, 4) -- ModRM.
563
634
  if m == nil or imark == "I" then waction("MARK") end
564
- if vsreg then waction("VREG", vsreg); wputxb(2) end
635
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
565
636
  wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
566
- if vxreg then waction("VREG", vxreg); wputxb(3) end
567
- if vreg then waction("VREG", vreg); wputxb(1) end
637
+ wvreg("sib.index", vxreg, psz+2, sk, vreg)
638
+ wvreg("sib.base", vreg, psz+2, sk)
568
639
  else
569
640
  wputmodrm(m or 2, s, reg) -- ModRM.
570
641
  if (imark == "I" and (m == 1 or m == 2)) or
571
642
  (m == nil and (vsreg or vreg)) then waction("MARK") end
572
- if vsreg then waction("VREG", vsreg); wputxb(2) end
573
- if vreg then waction("VREG", vreg); wputxb(1) end
643
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
644
+ wvreg("modrm.rm.m", vreg, psz+1, sk)
574
645
  end
575
646
 
576
647
  -- Put displacement.
@@ -881,9 +952,15 @@ end
881
952
  -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
882
953
  -- The spare 3 bits are either filled with the last hex digit or
883
954
  -- the result from a previous "r"/"R". The opcode is restored.
955
+ -- "u" Use VEX encoding, vvvv unused.
956
+ -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
957
+ -- removed from the list used by future characters).
958
+ -- "L" Force VEX.L
884
959
  --
885
960
  -- All of the following characters force a flush of the opcode:
886
961
  -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
962
+ -- "s" stores a 4 bit immediate from the last register operand,
963
+ -- followed by 4 zero bits.
887
964
  -- "S" stores a signed 8 bit immediate from the last operand.
888
965
  -- "U" stores an unsigned 8 bit immediate from the last operand.
889
966
  -- "W" stores an unsigned 16 bit immediate from the last operand.
@@ -1081,10 +1158,11 @@ local map_op = {
1081
1158
  btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
1082
1159
  bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
1083
1160
 
1084
- shld_3 = "mriqdw:0FA4RmU|mrCqdw:0FA5Rm",
1085
- shrd_3 = "mriqdw:0FACRmU|mrCqdw:0FADRm",
1161
+ shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
1162
+ shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
1086
1163
 
1087
1164
  rdtsc_0 = "0F31", -- P1+
1165
+ rdpmc_0 = "0F33", -- P6+
1088
1166
  cpuid_0 = "0FA2", -- P1+
1089
1167
 
1090
1168
  -- floating point ops
@@ -1190,7 +1268,7 @@ local map_op = {
1190
1268
  cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
1191
1269
  cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
1192
1270
  cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
1193
- cvtss2si_2 = "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
1271
+ cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
1194
1272
  cvttpd2dq_2 = "rmo:660FE6rM",
1195
1273
  cvttps2dq_2 = "rmo:F30F5BrM",
1196
1274
  cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
@@ -1225,46 +1303,14 @@ local map_op = {
1225
1303
  movups_2 = "rmo:0F10rM|mro:0F11Rm",
1226
1304
  orpd_2 = "rmo:660F56rM",
1227
1305
  orps_2 = "rmo:0F56rM",
1228
- packssdw_2 = "rmo:660F6BrM",
1229
- packsswb_2 = "rmo:660F63rM",
1230
- packuswb_2 = "rmo:660F67rM",
1231
- paddb_2 = "rmo:660FFCrM",
1232
- paddd_2 = "rmo:660FFErM",
1233
- paddq_2 = "rmo:660FD4rM",
1234
- paddsb_2 = "rmo:660FECrM",
1235
- paddsw_2 = "rmo:660FEDrM",
1236
- paddusb_2 = "rmo:660FDCrM",
1237
- paddusw_2 = "rmo:660FDDrM",
1238
- paddw_2 = "rmo:660FFDrM",
1239
- pand_2 = "rmo:660FDBrM",
1240
- pandn_2 = "rmo:660FDFrM",
1241
1306
  pause_0 = "F390",
1242
- pavgb_2 = "rmo:660FE0rM",
1243
- pavgw_2 = "rmo:660FE3rM",
1244
- pcmpeqb_2 = "rmo:660F74rM",
1245
- pcmpeqd_2 = "rmo:660F76rM",
1246
- pcmpeqw_2 = "rmo:660F75rM",
1247
- pcmpgtb_2 = "rmo:660F64rM",
1248
- pcmpgtd_2 = "rmo:660F66rM",
1249
- pcmpgtw_2 = "rmo:660F65rM",
1250
- pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
1307
+ pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1251
1308
  pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
1252
- pmaddwd_2 = "rmo:660FF5rM",
1253
- pmaxsw_2 = "rmo:660FEErM",
1254
- pmaxub_2 = "rmo:660FDErM",
1255
- pminsw_2 = "rmo:660FEArM",
1256
- pminub_2 = "rmo:660FDArM",
1257
1309
  pmovmskb_2 = "rr/do:660FD7rM",
1258
- pmulhuw_2 = "rmo:660FE4rM",
1259
- pmulhw_2 = "rmo:660FE5rM",
1260
- pmullw_2 = "rmo:660FD5rM",
1261
- pmuludq_2 = "rmo:660FF4rM",
1262
- por_2 = "rmo:660FEBrM",
1263
1310
  prefetchnta_1 = "xb:n0F180m",
1264
1311
  prefetcht0_1 = "xb:n0F181m",
1265
1312
  prefetcht1_1 = "xb:n0F182m",
1266
1313
  prefetcht2_1 = "xb:n0F183m",
1267
- psadbw_2 = "rmo:660FF6rM",
1268
1314
  pshufd_3 = "rmio:660F70rMU",
1269
1315
  pshufhw_3 = "rmio:F30F70rMU",
1270
1316
  pshuflw_3 = "rmio:F20F70rMU",
@@ -1278,23 +1324,6 @@ local map_op = {
1278
1324
  psrldq_2 = "rio:660F733mU",
1279
1325
  psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
1280
1326
  psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
1281
- psubb_2 = "rmo:660FF8rM",
1282
- psubd_2 = "rmo:660FFArM",
1283
- psubq_2 = "rmo:660FFBrM",
1284
- psubsb_2 = "rmo:660FE8rM",
1285
- psubsw_2 = "rmo:660FE9rM",
1286
- psubusb_2 = "rmo:660FD8rM",
1287
- psubusw_2 = "rmo:660FD9rM",
1288
- psubw_2 = "rmo:660FF9rM",
1289
- punpckhbw_2 = "rmo:660F68rM",
1290
- punpckhdq_2 = "rmo:660F6ArM",
1291
- punpckhqdq_2 = "rmo:660F6DrM",
1292
- punpckhwd_2 = "rmo:660F69rM",
1293
- punpcklbw_2 = "rmo:660F60rM",
1294
- punpckldq_2 = "rmo:660F62rM",
1295
- punpcklqdq_2 = "rmo:660F6CrM",
1296
- punpcklwd_2 = "rmo:660F61rM",
1297
- pxor_2 = "rmo:660FEFrM",
1298
1327
  rcpps_2 = "rmo:0F53rM",
1299
1328
  rcpss_2 = "rro:F30F53rM|rx/od:",
1300
1329
  rsqrtps_2 = "rmo:0F52rM",
@@ -1352,7 +1381,7 @@ local map_op = {
1352
1381
  dpps_3 = "rmio:660F3A40rMU",
1353
1382
  extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
1354
1383
  insertps_3 = "rrio:660F3A41rMU|rxi/od:",
1355
- movntdqa_2 = "rmo:660F382ArM",
1384
+ movntdqa_2 = "rxo:660F382ArM",
1356
1385
  mpsadbw_3 = "rmio:660F3A42rMU",
1357
1386
  packusdw_2 = "rmo:660F382BrM",
1358
1387
  pblendvb_3 = "rmRo:660F3810rM",
@@ -1412,6 +1441,238 @@ local map_op = {
1412
1441
  movntsd_2 = "xr/qo:nF20F2BRm",
1413
1442
  movntss_2 = "xr/do:F30F2BRm",
1414
1443
  -- popcnt is also in SSE4.2
1444
+
1445
+ -- AES-NI
1446
+ aesdec_2 = "rmo:660F38DErM",
1447
+ aesdeclast_2 = "rmo:660F38DFrM",
1448
+ aesenc_2 = "rmo:660F38DCrM",
1449
+ aesenclast_2 = "rmo:660F38DDrM",
1450
+ aesimc_2 = "rmo:660F38DBrM",
1451
+ aeskeygenassist_3 = "rmio:660F3ADFrMU",
1452
+ pclmulqdq_3 = "rmio:660F3A44rMU",
1453
+
1454
+ -- AVX FP ops
1455
+ vaddsubpd_3 = "rrmoy:660FVD0rM",
1456
+ vaddsubps_3 = "rrmoy:F20FVD0rM",
1457
+ vandpd_3 = "rrmoy:660FV54rM",
1458
+ vandps_3 = "rrmoy:0FV54rM",
1459
+ vandnpd_3 = "rrmoy:660FV55rM",
1460
+ vandnps_3 = "rrmoy:0FV55rM",
1461
+ vblendpd_4 = "rrmioy:660F3AV0DrMU",
1462
+ vblendps_4 = "rrmioy:660F3AV0CrMU",
1463
+ vblendvpd_4 = "rrmroy:660F3AV4BrMs",
1464
+ vblendvps_4 = "rrmroy:660F3AV4ArMs",
1465
+ vbroadcastf128_2 = "rx/yo:660F38u1ArM",
1466
+ vcmppd_4 = "rrmioy:660FVC2rMU",
1467
+ vcmpps_4 = "rrmioy:0FVC2rMU",
1468
+ vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
1469
+ vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
1470
+ vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
1471
+ vcomiss_2 = "rro:0Fu2FrM|rx/od:",
1472
+ vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
1473
+ vcvtdq2ps_2 = "rmoy:0Fu5BrM",
1474
+ vcvtpd2dq_2 = "rmoy:F20FuE6rM",
1475
+ vcvtpd2ps_2 = "rmoy:660Fu5ArM",
1476
+ vcvtps2dq_2 = "rmoy:660Fu5BrM",
1477
+ vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
1478
+ vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
1479
+ vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
1480
+ vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
1481
+ vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
1482
+ vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
1483
+ vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
1484
+ vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
1485
+ vcvttps2dq_2 = "rmoy:F30Fu5BrM",
1486
+ vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
1487
+ vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
1488
+ vdppd_4 = "rrmio:660F3AV41rMU",
1489
+ vdpps_4 = "rrmioy:660F3AV40rMU",
1490
+ vextractf128_3 = "mri/oy:660F3AuL19RmU",
1491
+ vextractps_3 = "mri/do:660F3Au17RmU",
1492
+ vhaddpd_3 = "rrmoy:660FV7CrM",
1493
+ vhaddps_3 = "rrmoy:F20FV7CrM",
1494
+ vhsubpd_3 = "rrmoy:660FV7DrM",
1495
+ vhsubps_3 = "rrmoy:F20FV7DrM",
1496
+ vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
1497
+ vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
1498
+ vldmxcsr_1 = "xd:0FuAE2m",
1499
+ vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
1500
+ vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
1501
+ vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
1502
+ vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
1503
+ vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
1504
+ vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
1505
+ vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
1506
+ vmovhlps_3 = "rrro:0FV12rM",
1507
+ vmovhpd_2 = "xr/qo:660Fu17Rm",
1508
+ vmovhpd_3 = "rrx/ooq:660FV16rM",
1509
+ vmovhps_2 = "xr/qo:0Fu17Rm",
1510
+ vmovhps_3 = "rrx/ooq:0FV16rM",
1511
+ vmovlhps_3 = "rrro:0FV16rM",
1512
+ vmovlpd_2 = "xr/qo:660Fu13Rm",
1513
+ vmovlpd_3 = "rrx/ooq:660FV12rM",
1514
+ vmovlps_2 = "xr/qo:0Fu13Rm",
1515
+ vmovlps_3 = "rrx/ooq:0FV12rM",
1516
+ vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
1517
+ vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
1518
+ vmovntpd_2 = "xroy:660Fu2BRm",
1519
+ vmovntps_2 = "xroy:0Fu2BRm",
1520
+ vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
1521
+ vmovsd_3 = "rrro:F20FV10rM",
1522
+ vmovshdup_2 = "rmoy:F30Fu16rM",
1523
+ vmovsldup_2 = "rmoy:F30Fu12rM",
1524
+ vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
1525
+ vmovss_3 = "rrro:F30FV10rM",
1526
+ vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
1527
+ vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
1528
+ vorpd_3 = "rrmoy:660FV56rM",
1529
+ vorps_3 = "rrmoy:0FV56rM",
1530
+ vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
1531
+ vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
1532
+ vperm2f128_4 = "rrmiy:660F3AV06rMU",
1533
+ vptestpd_2 = "rmoy:660F38u0FrM",
1534
+ vptestps_2 = "rmoy:660F38u0ErM",
1535
+ vrcpps_2 = "rmoy:0Fu53rM",
1536
+ vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
1537
+ vrsqrtps_2 = "rmoy:0Fu52rM",
1538
+ vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
1539
+ vroundpd_3 = "rmioy:660F3AV09rMU",
1540
+ vroundps_3 = "rmioy:660F3AV08rMU",
1541
+ vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
1542
+ vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
1543
+ vshufpd_4 = "rrmioy:660FVC6rMU",
1544
+ vshufps_4 = "rrmioy:0FVC6rMU",
1545
+ vsqrtps_2 = "rmoy:0Fu51rM",
1546
+ vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
1547
+ vsqrtpd_2 = "rmoy:660Fu51rM",
1548
+ vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
1549
+ vstmxcsr_1 = "xd:0FuAE3m",
1550
+ vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
1551
+ vucomiss_2 = "rro:0Fu2ErM|rx/od:",
1552
+ vunpckhpd_3 = "rrmoy:660FV15rM",
1553
+ vunpckhps_3 = "rrmoy:0FV15rM",
1554
+ vunpcklpd_3 = "rrmoy:660FV14rM",
1555
+ vunpcklps_3 = "rrmoy:0FV14rM",
1556
+ vxorpd_3 = "rrmoy:660FV57rM",
1557
+ vxorps_3 = "rrmoy:0FV57rM",
1558
+ vzeroall_0 = "0FuL77",
1559
+ vzeroupper_0 = "0Fu77",
1560
+
1561
+ -- AVX2 FP ops
1562
+ vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
1563
+ vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
1564
+ -- *vgather* (!vsib)
1565
+ vpermpd_3 = "rmiy:660F3AuX01rMU",
1566
+ vpermps_3 = "rrmy:660F38V16rM",
1567
+
1568
+ -- AVX, AVX2 integer ops
1569
+ -- In general, xmm requires AVX, ymm requires AVX2.
1570
+ vaesdec_3 = "rrmo:660F38VDErM",
1571
+ vaesdeclast_3 = "rrmo:660F38VDFrM",
1572
+ vaesenc_3 = "rrmo:660F38VDCrM",
1573
+ vaesenclast_3 = "rrmo:660F38VDDrM",
1574
+ vaesimc_2 = "rmo:660F38uDBrM",
1575
+ vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
1576
+ vlddqu_2 = "rxoy:F20FuF0rM",
1577
+ vmaskmovdqu_2 = "rro:660FuF7rM",
1578
+ vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
1579
+ vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
1580
+ vmovntdq_2 = "xroy:660FuE7Rm",
1581
+ vmovntdqa_2 = "rxoy:660F38u2ArM",
1582
+ vmpsadbw_4 = "rrmioy:660F3AV42rMU",
1583
+ vpabsb_2 = "rmoy:660F38u1CrM",
1584
+ vpabsd_2 = "rmoy:660F38u1ErM",
1585
+ vpabsw_2 = "rmoy:660F38u1DrM",
1586
+ vpackusdw_3 = "rrmoy:660F38V2BrM",
1587
+ vpalignr_4 = "rrmioy:660F3AV0FrMU",
1588
+ vpblendvb_4 = "rrmroy:660F3AV4CrMs",
1589
+ vpblendw_4 = "rrmioy:660F3AV0ErMU",
1590
+ vpclmulqdq_4 = "rrmio:660F3AV44rMU",
1591
+ vpcmpeqq_3 = "rrmoy:660F38V29rM",
1592
+ vpcmpestri_3 = "rmio:660F3Au61rMU",
1593
+ vpcmpestrm_3 = "rmio:660F3Au60rMU",
1594
+ vpcmpgtq_3 = "rrmoy:660F38V37rM",
1595
+ vpcmpistri_3 = "rmio:660F3Au63rMU",
1596
+ vpcmpistrm_3 = "rmio:660F3Au62rMU",
1597
+ vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
1598
+ vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
1599
+ vpextrd_3 = "mri/do:660F3Au16RmU",
1600
+ vpextrq_3 = "mri/qo:660F3Au16RmU",
1601
+ vphaddw_3 = "rrmoy:660F38V01rM",
1602
+ vphaddd_3 = "rrmoy:660F38V02rM",
1603
+ vphaddsw_3 = "rrmoy:660F38V03rM",
1604
+ vphminposuw_2 = "rmo:660F38u41rM",
1605
+ vphsubw_3 = "rrmoy:660F38V05rM",
1606
+ vphsubd_3 = "rrmoy:660F38V06rM",
1607
+ vphsubsw_3 = "rrmoy:660F38V07rM",
1608
+ vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
1609
+ vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
1610
+ vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
1611
+ vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
1612
+ vpmaddubsw_3 = "rrmoy:660F38V04rM",
1613
+ vpmaxsb_3 = "rrmoy:660F38V3CrM",
1614
+ vpmaxsd_3 = "rrmoy:660F38V3DrM",
1615
+ vpmaxuw_3 = "rrmoy:660F38V3ErM",
1616
+ vpmaxud_3 = "rrmoy:660F38V3FrM",
1617
+ vpminsb_3 = "rrmoy:660F38V38rM",
1618
+ vpminsd_3 = "rrmoy:660F38V39rM",
1619
+ vpminuw_3 = "rrmoy:660F38V3ArM",
1620
+ vpminud_3 = "rrmoy:660F38V3BrM",
1621
+ vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
1622
+ vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
1623
+ vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
1624
+ vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
1625
+ vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
1626
+ vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
1627
+ vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
1628
+ vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
1629
+ vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
1630
+ vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
1631
+ vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
1632
+ vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
1633
+ vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
1634
+ vpmuldq_3 = "rrmoy:660F38V28rM",
1635
+ vpmulhrsw_3 = "rrmoy:660F38V0BrM",
1636
+ vpmulld_3 = "rrmoy:660F38V40rM",
1637
+ vpshufb_3 = "rrmoy:660F38V00rM",
1638
+ vpshufd_3 = "rmioy:660Fu70rMU",
1639
+ vpshufhw_3 = "rmioy:F30Fu70rMU",
1640
+ vpshuflw_3 = "rmioy:F20Fu70rMU",
1641
+ vpsignb_3 = "rrmoy:660F38V08rM",
1642
+ vpsignw_3 = "rrmoy:660F38V09rM",
1643
+ vpsignd_3 = "rrmoy:660F38V0ArM",
1644
+ vpslldq_3 = "rrioy:660Fv737mU",
1645
+ vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
1646
+ vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
1647
+ vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
1648
+ vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
1649
+ vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
1650
+ vpsrldq_3 = "rrioy:660Fv733mU",
1651
+ vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
1652
+ vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
1653
+ vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
1654
+ vptest_2 = "rmoy:660F38u17rM",
1655
+
1656
+ -- AVX2 integer ops
1657
+ vbroadcasti128_2 = "rx/yo:660F38u5ArM",
1658
+ vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
1659
+ vextracti128_3 = "mri/oy:660F3AuL39RmU",
1660
+ vpblendd_4 = "rrmioy:660F3AV02rMU",
1661
+ vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
1662
+ vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
1663
+ vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
1664
+ vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
1665
+ vpermd_3 = "rrmy:660F38V36rM",
1666
+ vpermq_3 = "rmiy:660F3AuX00rMU",
1667
+ -- *vpgather* (!vsib)
1668
+ vperm2i128_4 = "rrmiy:660F3AV46rMU",
1669
+ vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
1670
+ vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
1671
+ vpsllvd_3 = "rrmoy:660F38V47rM",
1672
+ vpsllvq_3 = "rrmoy:660F38VX47rM",
1673
+ vpsravd_3 = "rrmoy:660F38V46rM",
1674
+ vpsrlvd_3 = "rrmoy:660F38V45rM",
1675
+ vpsrlvq_3 = "rrmoy:660F38VX45rM",
1415
1676
  }
1416
1677
 
1417
1678
  ------------------------------------------------------------------------------
@@ -1462,28 +1723,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
1462
1723
  map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
1463
1724
  end
1464
1725
 
1465
- -- SSE FP arithmetic ops.
1726
+ -- SSE / AVX FP arithmetic ops.
1466
1727
  for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
1467
1728
  sub = 12, min = 13, div = 14, max = 15 } do
1468
1729
  map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
1469
1730
  map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
1470
1731
  map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
1471
1732
  map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
1733
+ if n ~= 1 then
1734
+ map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
1735
+ map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
1736
+ map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
1737
+ map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
1738
+ end
1739
+ end
1740
+
1741
+ -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
1742
+ for name,n in pairs{
1743
+ paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
1744
+ paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
1745
+ packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
1746
+ paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
1747
+ pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
1748
+ pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
1749
+ pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
1750
+ pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
1751
+ pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
1752
+ pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
1753
+ psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
1754
+ psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
1755
+ punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
1756
+ punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
1757
+ punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
1758
+ } do
1759
+ map_op[name.."_2"] = format("rmo:660F%02XrM", n)
1760
+ map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
1472
1761
  end
1473
1762
 
1474
1763
  ------------------------------------------------------------------------------
1475
1764
 
1765
+ local map_vexarg = { u = false, v = 1, V = 2 }
1766
+
1476
1767
  -- Process pattern string.
1477
1768
  local function dopattern(pat, args, sz, op, needrex)
1478
- local digit, addin
1769
+ local digit, addin, vex
1479
1770
  local opcode = 0
1480
1771
  local szov = sz
1481
1772
  local narg = 1
1482
1773
  local rex = 0
1483
1774
 
1484
1775
  -- Limit number of section buffer positions used by a single dasm_put().
1485
- -- A single opcode needs a maximum of 5 positions.
1486
- if secpos+5 > maxsecpos then wflush() end
1776
+ -- A single opcode needs a maximum of 6 positions.
1777
+ if secpos+6 > maxsecpos then wflush() end
1487
1778
 
1488
1779
  -- Process each character.
1489
1780
  for c in gmatch(pat.."|", ".") do
@@ -1497,6 +1788,8 @@ local function dopattern(pat, args, sz, op, needrex)
1497
1788
  szov = nil
1498
1789
  elseif c == "X" then -- Force REX.W.
1499
1790
  rex = 8
1791
+ elseif c == "L" then -- Force VEX.L.
1792
+ vex.l = true
1500
1793
  elseif c == "r" then -- Merge 1st operand regno. into opcode.
1501
1794
  addin = args[1]; opcode = opcode + (addin.reg % 8)
1502
1795
  if narg < 2 then narg = 2 end
@@ -1520,21 +1813,42 @@ local function dopattern(pat, args, sz, op, needrex)
1520
1813
  if t.xreg and t.xreg > 7 then rex = rex + 2 end
1521
1814
  if s > 7 then rex = rex + 4 end
1522
1815
  if needrex then rex = rex + 16 end
1523
- wputop(szov, opcode, rex); opcode = nil
1816
+ local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
1817
+ opcode = nil
1524
1818
  local imark = sub(pat, -1) -- Force a mark (ugly).
1525
1819
  -- Put ModRM/SIB with regno/last digit as spare.
1526
- wputmrmsib(t, imark, s, addin and addin.vreg)
1820
+ wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
1527
1821
  addin = nil
1822
+ elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
1823
+ local b = band(opcode, 255); opcode = shr(opcode, 8)
1824
+ local m = 1
1825
+ if b == 0x38 then m = 2
1826
+ elseif b == 0x3a then m = 3 end
1827
+ if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
1828
+ if b ~= 0x0f then
1829
+ werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
1830
+ "' in pattern `"..pat.."' for `"..op.."'")
1831
+ end
1832
+ local v = map_vexarg[c]
1833
+ if v then v = remove(args, v) end
1834
+ b = band(opcode, 255)
1835
+ local p = 0
1836
+ if b == 0x66 then p = 1
1837
+ elseif b == 0xf3 then p = 2
1838
+ elseif b == 0xf2 then p = 3 end
1839
+ if p ~= 0 then opcode = shr(opcode, 8) end
1840
+ if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
1841
+ vex = { m = m, p = p, v = v }
1528
1842
  else
1529
1843
  if opcode then -- Flush opcode.
1530
1844
  if szov == "q" and rex == 0 then rex = rex + 8 end
1531
1845
  if needrex then rex = rex + 16 end
1532
1846
  if addin and addin.reg == -1 then
1533
- wputop(szov, opcode - 7, rex)
1534
- waction("VREG", addin.vreg); wputxb(0)
1847
+ local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
1848
+ wvreg("opcode", addin.vreg, psz, sk)
1535
1849
  else
1536
1850
  if addin and addin.reg > 7 then rex = rex + 1 end
1537
- wputop(szov, opcode, rex)
1851
+ wputop(szov, opcode, rex, vex)
1538
1852
  end
1539
1853
  opcode = nil
1540
1854
  end
@@ -1571,6 +1885,14 @@ local function dopattern(pat, args, sz, op, needrex)
1571
1885
  else
1572
1886
  wputlabel("REL_", imm, 2)
1573
1887
  end
1888
+ elseif c == "s" then
1889
+ local reg = a.reg
1890
+ if reg < 0 then
1891
+ wputb(0)
1892
+ wvreg("imm.hi", a.vreg)
1893
+ else
1894
+ wputb(shl(reg, 4))
1895
+ end
1574
1896
  else
1575
1897
  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
1576
1898
  end
@@ -1647,11 +1969,14 @@ map_op[".template__"] = function(params, template, nparams)
1647
1969
  if pat == "" then pat = lastpat else lastpat = pat end
1648
1970
  if matchtm(tm, args) then
1649
1971
  local prefix = sub(szm, 1, 1)
1650
- if prefix == "/" then -- Match both operand sizes.
1651
- if args[1].opsize == sub(szm, 2, 2) and
1652
- args[2].opsize == sub(szm, 3, 3) then
1653
- dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
1654
- return
1972
+ if prefix == "/" then -- Exactly match leading operand sizes.
1973
+ for i = #szm,1,-1 do
1974
+ if i == 1 then
1975
+ dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
1976
+ return
1977
+ elseif args[i-1].opsize ~= sub(szm, i, i) then
1978
+ break
1979
+ end
1655
1980
  end
1656
1981
  else -- Match common operand size.
1657
1982
  local szp = sz
@@ -1716,8 +2041,8 @@ if x64 then
1716
2041
  rex = a.reg > 7 and 9 or 8
1717
2042
  end
1718
2043
  end
1719
- wputop(sz, opcode, rex)
1720
- if vreg then waction("VREG", vreg); wputxb(0) end
2044
+ local psz, sk = wputop(sz, opcode, rex, nil, vreg)
2045
+ wvreg("opcode", vreg, psz, sk)
1721
2046
  waction("IMM_D", format("(unsigned int)(%s)", op64))
1722
2047
  waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
1723
2048
  end