immunio 1.2.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +13 -5
  3. data/ext/immunio/Rakefile +14 -6
  4. data/lib/immunio/context.rb +2 -0
  5. data/lib/immunio/plugins/action_view.rb +7 -668
  6. data/lib/immunio/plugins/action_view/action_view.rb +22 -0
  7. data/lib/immunio/plugins/action_view/active_support_hash.rb +29 -0
  8. data/lib/immunio/plugins/action_view/cache_store.rb +24 -0
  9. data/lib/immunio/plugins/action_view/erubi.rb +38 -0
  10. data/lib/immunio/plugins/action_view/erubis.rb +39 -0
  11. data/lib/immunio/plugins/action_view/fragment_caching.rb +29 -0
  12. data/lib/immunio/plugins/action_view/haml.rb +46 -0
  13. data/lib/immunio/plugins/action_view/slim.rb +42 -0
  14. data/lib/immunio/plugins/action_view/template.rb +431 -0
  15. data/lib/immunio/plugins/action_view/template_rendering.rb +45 -0
  16. data/lib/immunio/plugins/http_tracker.rb +2 -0
  17. data/lib/immunio/plugins/io.rb +34 -0
  18. data/lib/immunio/version.rb +1 -1
  19. data/lua-hooks/Makefile +36 -9
  20. data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
  21. data/lua-hooks/ext/luajit/Makefile +22 -15
  22. data/lua-hooks/ext/luajit/README +2 -2
  23. data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
  24. data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
  25. data/lua-hooks/ext/luajit/doc/changes.html +69 -3
  26. data/lua-hooks/ext/luajit/doc/contact.html +10 -3
  27. data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
  28. data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
  29. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
  30. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +3 -4
  31. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
  32. data/lua-hooks/ext/luajit/doc/ext_jit.html +3 -3
  33. data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
  34. data/lua-hooks/ext/luajit/doc/extensions.html +47 -20
  35. data/lua-hooks/ext/luajit/doc/faq.html +2 -2
  36. data/lua-hooks/ext/luajit/doc/install.html +74 -45
  37. data/lua-hooks/ext/luajit/doc/luajit.html +5 -5
  38. data/lua-hooks/ext/luajit/doc/running.html +3 -3
  39. data/lua-hooks/ext/luajit/doc/status.html +13 -8
  40. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
  41. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1 -1
  42. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
  43. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1 -1
  44. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +8 -5
  45. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +66 -11
  46. data/lua-hooks/ext/luajit/dynasm/dasm_mips64.lua +12 -0
  47. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
  48. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1 -1
  49. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +1 -1
  50. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
  51. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +1 -1
  52. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +5 -1
  53. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +2 -2
  54. data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
  55. data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
  56. data/lua-hooks/ext/luajit/src/Makefile +15 -11
  57. data/lua-hooks/ext/luajit/src/Makefile.dep +16 -16
  58. data/lua-hooks/ext/luajit/src/host/buildvm.c +2 -2
  59. data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
  60. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +9 -4
  61. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +2 -2
  62. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
  63. data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +14 -3
  64. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +27 -3
  65. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
  66. data/lua-hooks/ext/luajit/src/host/genminilua.lua +6 -5
  67. data/lua-hooks/ext/luajit/src/host/minilua.c +1 -1
  68. data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
  69. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +8 -8
  70. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +2 -2
  71. data/lua-hooks/ext/luajit/src/jit/dis_arm64.lua +1216 -0
  72. data/lua-hooks/ext/luajit/src/jit/dis_arm64be.lua +12 -0
  73. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +35 -20
  74. data/lua-hooks/ext/luajit/src/jit/dis_mips64.lua +17 -0
  75. data/lua-hooks/ext/luajit/src/jit/dis_mips64el.lua +17 -0
  76. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
  77. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +2 -2
  78. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
  79. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +7 -4
  80. data/lua-hooks/ext/luajit/src/jit/dump.lua +17 -12
  81. data/lua-hooks/ext/luajit/src/jit/p.lua +3 -2
  82. data/lua-hooks/ext/luajit/src/jit/v.lua +2 -2
  83. data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
  84. data/lua-hooks/ext/luajit/src/lauxlib.h +14 -20
  85. data/lua-hooks/ext/luajit/src/lib_aux.c +38 -27
  86. data/lua-hooks/ext/luajit/src/lib_base.c +12 -5
  87. data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
  88. data/lua-hooks/ext/luajit/src/lib_debug.c +5 -5
  89. data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -2
  90. data/lua-hooks/ext/luajit/src/lib_init.c +16 -16
  91. data/lua-hooks/ext/luajit/src/lib_io.c +6 -7
  92. data/lua-hooks/ext/luajit/src/lib_jit.c +14 -4
  93. data/lua-hooks/ext/luajit/src/lib_math.c +1 -5
  94. data/lua-hooks/ext/luajit/src/lib_os.c +1 -1
  95. data/lua-hooks/ext/luajit/src/lib_package.c +14 -23
  96. data/lua-hooks/ext/luajit/src/lib_string.c +1 -5
  97. data/lua-hooks/ext/luajit/src/lib_table.c +21 -1
  98. data/lua-hooks/ext/luajit/src/lj.supp +3 -3
  99. data/lua-hooks/ext/luajit/src/lj_alloc.c +174 -83
  100. data/lua-hooks/ext/luajit/src/lj_api.c +97 -18
  101. data/lua-hooks/ext/luajit/src/lj_arch.h +54 -22
  102. data/lua-hooks/ext/luajit/src/lj_asm.c +172 -53
  103. data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
  104. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +19 -16
  105. data/lua-hooks/ext/luajit/src/lj_asm_arm64.h +2022 -0
  106. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +564 -158
  107. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +19 -18
  108. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +578 -92
  109. data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
  110. data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
  111. data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
  112. data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
  113. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
  114. data/lua-hooks/ext/luajit/src/lj_buf.c +1 -1
  115. data/lua-hooks/ext/luajit/src/lj_buf.h +1 -1
  116. data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
  117. data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
  118. data/lua-hooks/ext/luajit/src/lj_ccall.c +172 -7
  119. data/lua-hooks/ext/luajit/src/lj_ccall.h +21 -5
  120. data/lua-hooks/ext/luajit/src/lj_ccallback.c +71 -17
  121. data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
  122. data/lua-hooks/ext/luajit/src/lj_cconv.c +4 -2
  123. data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
  124. data/lua-hooks/ext/luajit/src/lj_cdata.c +7 -5
  125. data/lua-hooks/ext/luajit/src/lj_cdata.h +1 -1
  126. data/lua-hooks/ext/luajit/src/lj_clib.c +5 -5
  127. data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
  128. data/lua-hooks/ext/luajit/src/lj_cparse.c +11 -6
  129. data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
  130. data/lua-hooks/ext/luajit/src/lj_crecord.c +70 -14
  131. data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
  132. data/lua-hooks/ext/luajit/src/lj_ctype.c +1 -1
  133. data/lua-hooks/ext/luajit/src/lj_ctype.h +8 -8
  134. data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
  135. data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
  136. data/lua-hooks/ext/luajit/src/lj_def.h +6 -9
  137. data/lua-hooks/ext/luajit/src/lj_dispatch.c +3 -3
  138. data/lua-hooks/ext/luajit/src/lj_dispatch.h +2 -1
  139. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +5 -4
  140. data/lua-hooks/ext/luajit/src/lj_emit_arm64.h +419 -0
  141. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +100 -20
  142. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +4 -4
  143. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +116 -25
  144. data/lua-hooks/ext/luajit/src/lj_err.c +34 -13
  145. data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
  146. data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
  147. data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
  148. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +58 -49
  149. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
  150. data/lua-hooks/ext/luajit/src/lj_frame.h +33 -6
  151. data/lua-hooks/ext/luajit/src/lj_func.c +4 -2
  152. data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
  153. data/lua-hooks/ext/luajit/src/lj_gc.c +16 -7
  154. data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
  155. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +31 -1
  156. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
  157. data/lua-hooks/ext/luajit/src/lj_ir.c +69 -96
  158. data/lua-hooks/ext/luajit/src/lj_ir.h +29 -18
  159. data/lua-hooks/ext/luajit/src/lj_ircall.h +24 -30
  160. data/lua-hooks/ext/luajit/src/lj_iropt.h +9 -9
  161. data/lua-hooks/ext/luajit/src/lj_jit.h +67 -9
  162. data/lua-hooks/ext/luajit/src/lj_lex.c +1 -1
  163. data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
  164. data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
  165. data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
  166. data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
  167. data/lua-hooks/ext/luajit/src/lj_mcode.c +11 -10
  168. data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
  169. data/lua-hooks/ext/luajit/src/lj_meta.c +1 -1
  170. data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
  171. data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
  172. data/lua-hooks/ext/luajit/src/lj_obj.h +7 -3
  173. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
  174. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +84 -17
  175. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
  176. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +3 -3
  177. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +24 -22
  178. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +11 -6
  179. data/lua-hooks/ext/luajit/src/lj_opt_split.c +11 -2
  180. data/lua-hooks/ext/luajit/src/lj_parse.c +9 -7
  181. data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
  182. data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
  183. data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
  184. data/lua-hooks/ext/luajit/src/lj_record.c +201 -117
  185. data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
  186. data/lua-hooks/ext/luajit/src/lj_snap.c +72 -26
  187. data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
  188. data/lua-hooks/ext/luajit/src/lj_state.c +6 -6
  189. data/lua-hooks/ext/luajit/src/lj_state.h +2 -2
  190. data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
  191. data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
  192. data/lua-hooks/ext/luajit/src/lj_strfmt.c +7 -3
  193. data/lua-hooks/ext/luajit/src/lj_strfmt.h +1 -1
  194. data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +4 -3
  195. data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
  196. data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
  197. data/lua-hooks/ext/luajit/src/lj_tab.c +1 -2
  198. data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
  199. data/lua-hooks/ext/luajit/src/lj_target.h +3 -3
  200. data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
  201. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +239 -7
  202. data/lua-hooks/ext/luajit/src/lj_target_mips.h +111 -22
  203. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
  204. data/lua-hooks/ext/luajit/src/lj_target_x86.h +21 -4
  205. data/lua-hooks/ext/luajit/src/lj_trace.c +63 -18
  206. data/lua-hooks/ext/luajit/src/lj_trace.h +2 -1
  207. data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -1
  208. data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
  209. data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
  210. data/lua-hooks/ext/luajit/src/lj_vm.h +5 -1
  211. data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
  212. data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
  213. data/lua-hooks/ext/luajit/src/lj_vmmath.c +1 -1
  214. data/lua-hooks/ext/luajit/src/ljamalg.c +1 -1
  215. data/lua-hooks/ext/luajit/src/lua.h +9 -1
  216. data/lua-hooks/ext/luajit/src/luaconf.h +3 -7
  217. data/lua-hooks/ext/luajit/src/luajit.c +69 -54
  218. data/lua-hooks/ext/luajit/src/luajit.h +4 -4
  219. data/lua-hooks/ext/luajit/src/lualib.h +1 -1
  220. data/lua-hooks/ext/luajit/src/msvcbuild.bat +12 -4
  221. data/lua-hooks/ext/luajit/src/vm_arm.dasc +1 -1
  222. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +255 -32
  223. data/lua-hooks/ext/luajit/src/vm_mips.dasc +26 -23
  224. data/lua-hooks/ext/luajit/src/vm_mips64.dasc +5062 -0
  225. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +1 -1
  226. data/lua-hooks/ext/luajit/src/vm_x64.dasc +24 -25
  227. data/lua-hooks/ext/luajit/src/vm_x86.dasc +77 -4
  228. data/lua-hooks/libluahooks.darwin.a +0 -0
  229. data/lua-hooks/libluahooks.linux.a +0 -0
  230. data/lua-hooks/options.mk +1 -1
  231. metadata +37 -77
  232. data/lua-hooks/ext/all.c +0 -69
  233. data/lua-hooks/ext/libinjection/COPYING +0 -37
  234. data/lua-hooks/ext/libinjection/libinjection.h +0 -65
  235. data/lua-hooks/ext/libinjection/libinjection_html5.c +0 -847
  236. data/lua-hooks/ext/libinjection/libinjection_html5.h +0 -54
  237. data/lua-hooks/ext/libinjection/libinjection_sqli.c +0 -2301
  238. data/lua-hooks/ext/libinjection/libinjection_sqli.h +0 -295
  239. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +0 -9349
  240. data/lua-hooks/ext/libinjection/libinjection_xss.c +0 -531
  241. data/lua-hooks/ext/libinjection/libinjection_xss.h +0 -21
  242. data/lua-hooks/ext/libinjection/lualib.c +0 -145
  243. data/lua-hooks/ext/libinjection/module.mk +0 -5
  244. data/lua-hooks/ext/lpeg/HISTORY +0 -96
  245. data/lua-hooks/ext/lpeg/lpcap.c +0 -537
  246. data/lua-hooks/ext/lpeg/lpcap.h +0 -56
  247. data/lua-hooks/ext/lpeg/lpcode.c +0 -1014
  248. data/lua-hooks/ext/lpeg/lpcode.h +0 -40
  249. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  250. data/lua-hooks/ext/lpeg/lpeg.html +0 -1445
  251. data/lua-hooks/ext/lpeg/lpprint.c +0 -244
  252. data/lua-hooks/ext/lpeg/lpprint.h +0 -36
  253. data/lua-hooks/ext/lpeg/lptree.c +0 -1303
  254. data/lua-hooks/ext/lpeg/lptree.h +0 -82
  255. data/lua-hooks/ext/lpeg/lptypes.h +0 -149
  256. data/lua-hooks/ext/lpeg/lpvm.c +0 -364
  257. data/lua-hooks/ext/lpeg/lpvm.h +0 -58
  258. data/lua-hooks/ext/lpeg/makefile +0 -55
  259. data/lua-hooks/ext/lpeg/module.mk +0 -6
  260. data/lua-hooks/ext/lpeg/re.html +0 -498
  261. data/lua-hooks/ext/lua-cmsgpack/.gitignore +0 -13
  262. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +0 -45
  263. data/lua-hooks/ext/lua-cmsgpack/README.md +0 -115
  264. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +0 -970
  265. data/lua-hooks/ext/lua-cmsgpack/module.mk +0 -2
  266. data/lua-hooks/ext/lua-cmsgpack/test.lua +0 -570
  267. data/lua-hooks/ext/lua-snapshot/LICENSE +0 -7
  268. data/lua-hooks/ext/lua-snapshot/Makefile +0 -12
  269. data/lua-hooks/ext/lua-snapshot/README.md +0 -18
  270. data/lua-hooks/ext/lua-snapshot/dump.lua +0 -15
  271. data/lua-hooks/ext/lua-snapshot/module.mk +0 -2
  272. data/lua-hooks/ext/lua-snapshot/snapshot.c +0 -462
  273. data/lua-hooks/ext/luautf8/README.md +0 -152
  274. data/lua-hooks/ext/luautf8/lutf8lib.c +0 -1274
  275. data/lua-hooks/ext/luautf8/module.mk +0 -2
  276. data/lua-hooks/ext/luautf8/unidata.h +0 -3064
  277. data/lua-hooks/ext/module.mk +0 -15
  278. data/lua-hooks/ext/modules.h +0 -17
  279. data/lua-hooks/ext/perf/luacpu.c +0 -114
  280. data/lua-hooks/ext/perf/lualoadavg.c +0 -40
  281. data/lua-hooks/ext/perf/luameminfo.c +0 -38
  282. data/lua-hooks/ext/perf/luaoslib.c +0 -203
  283. data/lua-hooks/ext/perf/module.mk +0 -5
  284. data/lua-hooks/ext/sha1/luasha1.c +0 -74
  285. data/lua-hooks/ext/sha1/module.mk +0 -5
  286. data/lua-hooks/ext/sha1/sha1.c +0 -145
  287. data/lua-hooks/ext/sha2/luasha256.c +0 -77
  288. data/lua-hooks/ext/sha2/module.mk +0 -5
  289. data/lua-hooks/ext/sha2/sha256.c +0 -196
  290. data/lua-hooks/ext/sysutils/lua_utils.c +0 -56
  291. data/lua-hooks/ext/sysutils/module.mk +0 -2
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** PPC IR assembler (SSA IR -> machine code).
3
- ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
3
+ ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
4
  */
5
5
 
6
6
  /* -- Register allocator extensions --------------------------------------- */
@@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
393
393
  emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
394
394
  emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
395
395
  emit_lsptr(as, PPCI_LFS, (fbias & 31),
396
- (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
397
- RSET_GPR);
396
+ (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
398
397
  emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
399
398
  emit_fb(as, PPCI_FCTIWZ, tmp, left);
400
399
  }
@@ -433,13 +432,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
433
432
  Reg left = ra_alloc1(as, lref, allow);
434
433
  Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
435
434
  Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
436
- const float *kbias;
437
435
  if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
438
436
  emit_fab(as, PPCI_FSUB, dest, dest, fbias);
439
437
  emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
440
- kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
441
- if (st == IRT_U32) kbias++;
442
- emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
438
+ emit_lsptr(as, PPCI_LFS, (fbias & 31),
439
+ &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
443
440
  rset_clear(allow, hibias));
444
441
  emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
445
442
  RID_SP, SPOFS_TMPLO);
@@ -472,8 +469,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
472
469
  emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
473
470
  emit_fab(as, PPCI_FSUB, tmp, left, tmp);
474
471
  emit_lsptr(as, PPCI_LFS, (tmp & 31),
475
- (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
476
- RSET_GPR);
472
+ (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
477
473
  } else {
478
474
  emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
479
475
  emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
@@ -717,7 +713,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
717
713
 
718
714
  static void asm_uref(ASMState *as, IRIns *ir)
719
715
  {
720
- /* NYI: Check that UREFO is still open and not aliasing a slot. */
721
716
  Reg dest = ra_dest(as, ir, RSET_GPR);
722
717
  if (irref_isk(ir->op1)) {
723
718
  GCfunc *fn = ir_kfunc(IR(ir->op1));
@@ -809,17 +804,23 @@ static PPCIns asm_fxstoreins(IRIns *ir)
809
804
  static void asm_fload(ASMState *as, IRIns *ir)
810
805
  {
811
806
  Reg dest = ra_dest(as, ir, RSET_GPR);
812
- Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
813
807
  PPCIns pi = asm_fxloadins(ir);
808
+ Reg idx;
814
809
  int32_t ofs;
815
- if (ir->op2 == IRFL_TAB_ARRAY) {
816
- ofs = asm_fuseabase(as, ir->op1);
817
- if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
818
- emit_tai(as, PPCI_ADDI, dest, idx, ofs);
819
- return;
810
+ if (ir->op1 == REF_NIL) {
811
+ idx = RID_JGL;
812
+ ofs = (ir->op2 << 2) - 32768;
813
+ } else {
814
+ idx = ra_alloc1(as, ir->op1, RSET_GPR);
815
+ if (ir->op2 == IRFL_TAB_ARRAY) {
816
+ ofs = asm_fuseabase(as, ir->op1);
817
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
818
+ emit_tai(as, PPCI_ADDI, dest, idx, ofs);
819
+ return;
820
+ }
820
821
  }
822
+ ofs = field_ofs[ir->op2];
821
823
  }
822
- ofs = field_ofs[ir->op2];
823
824
  lua_assert(!irt_isi8(ir->t));
824
825
  emit_tai(as, pi, dest, idx, ofs);
825
826
  }
@@ -975,7 +976,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
975
976
  emit_fab(as, PPCI_FSUB, dest, dest, fbias);
976
977
  emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
977
978
  emit_lsptr(as, PPCI_LFS, (fbias & 31),
978
- (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
979
+ (void *)&as->J->k32[LJ_K32_2P52_2P31],
979
980
  rset_clear(allow, hibias));
980
981
  emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
981
982
  emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** x86/x64 IR assembler (SSA IR -> machine code).
3
- ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
3
+ ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
4
  */
5
5
 
6
6
  /* -- Guard handling ------------------------------------------------------ */
@@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
21
21
  }
22
22
  /* Push the high byte of the exitno for each exit stub group. */
23
23
  *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
24
+ #if !LJ_GC64
24
25
  /* Store DISPATCH at original stack slot 0. Account for the two push ops. */
25
26
  *mxp++ = XI_MOVmi;
26
27
  *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
27
28
  *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
28
29
  *mxp++ = 2*sizeof(void *);
29
30
  *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
31
+ #endif
30
32
  /* Jump to exit handler which fills in the ExitState. */
31
33
  *mxp++ = XI_JMP; mxp += 4;
32
34
  *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
@@ -62,10 +64,14 @@ static void asm_guardcc(ASMState *as, int cc)
62
64
  target = p;
63
65
  cc ^= 1;
64
66
  if (as->realign) {
67
+ if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
68
+ as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */
65
69
  emit_sjcc(as, cc, target);
66
70
  return;
67
71
  }
68
72
  }
73
+ if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
74
+ as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */
69
75
  emit_jcc(as, cc, target);
70
76
  }
71
77
 
@@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
79
85
  {
80
86
  if (irref_isk(ref)) {
81
87
  IRIns *ir = IR(ref);
88
+ #if LJ_GC64
89
+ if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
90
+ *k = ir->i;
91
+ return 1;
92
+ } else if (checki32((int64_t)ir_k64(ir)->u64)) {
93
+ *k = (int32_t)ir_k64(ir)->u64;
94
+ return 1;
95
+ }
96
+ #else
82
97
  if (ir->o != IR_KINT64) {
83
98
  *k = ir->i;
84
99
  return 1;
@@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
86
101
  *k = (int32_t)ir_kint64(ir)->u64;
87
102
  return 1;
88
103
  }
104
+ #endif
89
105
  }
90
106
  return 0;
91
107
  }
@@ -185,9 +201,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
185
201
  if (irref_isk(ir->op1)) {
186
202
  GCfunc *fn = ir_kfunc(IR(ir->op1));
187
203
  GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
204
+ #if LJ_GC64
205
+ int64_t ofs = dispofs(as, &uv->tv);
206
+ if (checki32(ofs) && checki32(ofs+4)) {
207
+ as->mrm.ofs = (int32_t)ofs;
208
+ as->mrm.base = RID_DISPATCH;
209
+ as->mrm.idx = RID_NONE;
210
+ return;
211
+ }
212
+ #else
188
213
  as->mrm.ofs = ptr2addr(&uv->tv);
189
214
  as->mrm.base = as->mrm.idx = RID_NONE;
190
215
  return;
216
+ #endif
191
217
  }
192
218
  break;
193
219
  default:
@@ -205,14 +231,40 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
205
231
  static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
206
232
  {
207
233
  lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
208
- as->mrm.ofs = field_ofs[ir->op2];
209
234
  as->mrm.idx = RID_NONE;
235
+ if (ir->op1 == REF_NIL) {
236
+ #if LJ_GC64
237
+ as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch);
238
+ as->mrm.base = RID_DISPATCH;
239
+ #else
240
+ as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J));
241
+ as->mrm.base = RID_NONE;
242
+ #endif
243
+ return;
244
+ }
245
+ as->mrm.ofs = field_ofs[ir->op2];
210
246
  if (irref_isk(ir->op1)) {
211
- as->mrm.ofs += IR(ir->op1)->i;
247
+ IRIns *op1 = IR(ir->op1);
248
+ #if LJ_GC64
249
+ if (ir->op1 == REF_NIL) {
250
+ as->mrm.ofs -= GG_OFS(dispatch);
251
+ as->mrm.base = RID_DISPATCH;
252
+ return;
253
+ } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) {
254
+ intptr_t ofs = dispofs(as, ir_kptr(op1));
255
+ if (checki32(as->mrm.ofs + ofs)) {
256
+ as->mrm.ofs += (int32_t)ofs;
257
+ as->mrm.base = RID_DISPATCH;
258
+ return;
259
+ }
260
+ }
261
+ #else
262
+ as->mrm.ofs += op1->i;
212
263
  as->mrm.base = RID_NONE;
213
- } else {
214
- as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
264
+ return;
265
+ #endif
215
266
  }
267
+ as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
216
268
  }
217
269
 
218
270
  /* Fuse string reference into memory operand. */
@@ -223,7 +275,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
223
275
  as->mrm.base = as->mrm.idx = RID_NONE;
224
276
  as->mrm.scale = XM_SCALE1;
225
277
  as->mrm.ofs = sizeof(GCstr);
226
- if (irref_isk(ir->op1)) {
278
+ if (!LJ_GC64 && irref_isk(ir->op1)) {
227
279
  as->mrm.ofs += IR(ir->op1)->i;
228
280
  } else {
229
281
  Reg r = ra_alloc1(as, ir->op1, allow);
@@ -255,10 +307,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
255
307
  IRIns *ir = IR(ref);
256
308
  as->mrm.idx = RID_NONE;
257
309
  if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
310
+ #if LJ_GC64
311
+ intptr_t ofs = dispofs(as, ir_kptr(ir));
312
+ if (checki32(ofs)) {
313
+ as->mrm.ofs = (int32_t)ofs;
314
+ as->mrm.base = RID_DISPATCH;
315
+ return;
316
+ }
317
+ } if (0) {
318
+ #else
258
319
  as->mrm.ofs = ir->i;
259
320
  as->mrm.base = RID_NONE;
260
321
  } else if (ir->o == IR_STRREF) {
261
322
  asm_fusestrref(as, ir, allow);
323
+ #endif
262
324
  } else {
263
325
  as->mrm.ofs = 0;
264
326
  if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
@@ -301,7 +363,45 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
301
363
  }
302
364
  }
303
365
 
304
- /* Fuse load into memory operand. */
366
+ /* Fuse load of 64 bit IR constant into memory operand. */
367
+ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
368
+ {
369
+ const uint64_t *k = &ir_k64(ir)->u64;
370
+ if (!LJ_GC64 || checki32((intptr_t)k)) {
371
+ as->mrm.ofs = ptr2addr(k);
372
+ as->mrm.base = RID_NONE;
373
+ #if LJ_GC64
374
+ } else if (checki32(dispofs(as, k))) {
375
+ as->mrm.ofs = (int32_t)dispofs(as, k);
376
+ as->mrm.base = RID_DISPATCH;
377
+ } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) &&
378
+ checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
379
+ as->mrm.ofs = (int32_t)mcpofs(as, k);
380
+ as->mrm.base = RID_RIP;
381
+ } else {
382
+ if (ir->i) {
383
+ lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
384
+ } else {
385
+ while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
386
+ *(uint64_t*)as->mcbot = *k;
387
+ ir->i = (int32_t)(as->mctop - as->mcbot);
388
+ as->mcbot += 8;
389
+ as->mclim = as->mcbot + MCLIM_REDZONE;
390
+ }
391
+ as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
392
+ as->mrm.base = RID_RIP;
393
+ #endif
394
+ }
395
+ as->mrm.idx = RID_NONE;
396
+ return RID_MRM;
397
+ }
398
+
399
+ /* Fuse load into memory operand.
400
+ **
401
+ ** Important caveat: this may emit RIP-relative loads! So don't place any
402
+ ** code emitters between this function and the use of its result.
403
+ ** The only permitted exception is asm_guardcc().
404
+ */
305
405
  static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
306
406
  {
307
407
  IRIns *ir = IR(ref);
@@ -320,26 +420,35 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
320
420
  if (ir->o == IR_KNUM) {
321
421
  RegSet avail = as->freeset & ~as->modset & RSET_FPR;
322
422
  lua_assert(allow != RSET_EMPTY);
323
- if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
324
- as->mrm.ofs = ptr2addr(ir_knum(ir));
325
- as->mrm.base = as->mrm.idx = RID_NONE;
326
- return RID_MRM;
327
- }
328
- } else if (ir->o == IR_KINT64) {
423
+ if (!(avail & (avail-1))) /* Fuse if less than two regs available. */
424
+ return asm_fuseloadk64(as, ir);
425
+ } else if (ref == REF_BASE || ir->o == IR_KINT64) {
329
426
  RegSet avail = as->freeset & ~as->modset & RSET_GPR;
330
427
  lua_assert(allow != RSET_EMPTY);
331
428
  if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
332
- as->mrm.ofs = ptr2addr(ir_kint64(ir));
333
- as->mrm.base = as->mrm.idx = RID_NONE;
334
- return RID_MRM;
429
+ if (ref == REF_BASE) {
430
+ #if LJ_GC64
431
+ as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base);
432
+ as->mrm.base = RID_DISPATCH;
433
+ #else
434
+ as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base);
435
+ as->mrm.base = RID_NONE;
436
+ #endif
437
+ as->mrm.idx = RID_NONE;
438
+ return RID_MRM;
439
+ } else {
440
+ return asm_fuseloadk64(as, ir);
441
+ }
335
442
  }
336
443
  } else if (mayfuse(as, ref)) {
337
444
  RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
338
445
  if (ir->o == IR_SLOAD) {
339
446
  if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
340
- noconflict(as, ref, IR_RETF, 0)) {
447
+ noconflict(as, ref, IR_RETF, 0) &&
448
+ !(LJ_GC64 && irt_isaddr(ir->t))) {
341
449
  as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
342
- as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
450
+ as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
451
+ (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
343
452
  as->mrm.idx = RID_NONE;
344
453
  return RID_MRM;
345
454
  }
@@ -351,7 +460,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
351
460
  return RID_MRM;
352
461
  }
353
462
  } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
354
- if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) {
463
+ if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
464
+ !(LJ_GC64 && irt_isaddr(ir->t))) {
355
465
  asm_fuseahuref(as, ir->op1, xallow);
356
466
  return RID_MRM;
357
467
  }
@@ -364,12 +474,16 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
364
474
  asm_fusexref(as, ir->op1, xallow);
365
475
  return RID_MRM;
366
476
  }
367
- } else if (ir->o == IR_VLOAD) {
477
+ } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
368
478
  asm_fuseahuref(as, ir->op1, xallow);
369
479
  return RID_MRM;
370
480
  }
371
481
  }
372
- if (!(as->freeset & allow) && !irref_isk(ref) &&
482
+ if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) {
483
+ asm_fusefref(as, ir, RSET_EMPTY);
484
+ return RID_MRM;
485
+ }
486
+ if (!(as->freeset & allow) && !emit_canremat(ref) &&
373
487
  (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
374
488
  goto fusespill;
375
489
  return ra_allocref(as, ref, allow);
@@ -485,8 +599,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
485
599
  if (r) { /* Argument is in a register. */
486
600
  if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
487
601
  #if LJ_64
488
- if (ir->o == IR_KINT64)
489
- emit_loadu64(as, r, ir_kint64(ir)->u64);
602
+ if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64)
603
+ emit_loadu64(as, r, ir_k64(ir)->u64);
490
604
  else
491
605
  #endif
492
606
  emit_loadi(as, r, ir->i);
@@ -642,6 +756,9 @@ static void asm_callx(ASMState *as, IRIns *ir)
642
756
  static void asm_retf(ASMState *as, IRIns *ir)
643
757
  {
644
758
  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
759
+ #if LJ_FR2
760
+ Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base));
761
+ #endif
645
762
  void *pc = ir_kptr(IR(ir->op2));
646
763
  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
647
764
  as->topslot -= (BCReg)delta;
@@ -650,7 +767,12 @@ static void asm_retf(ASMState *as, IRIns *ir)
650
767
  emit_setgl(as, base, jit_base);
651
768
  emit_addptr(as, base, -8*delta);
652
769
  asm_guardcc(as, CC_NE);
770
+ #if LJ_FR2
771
+ emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8);
772
+ emit_loadu64(as, rpc, u64ptr(pc));
773
+ #else
653
774
  emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
775
+ #endif
654
776
  }
655
777
 
656
778
  /* -- Type conversions ---------------------------------------------------- */
@@ -674,8 +796,9 @@ static void asm_tobit(ASMState *as, IRIns *ir)
674
796
  Reg tmp = ra_noreg(IR(ir->op1)->r) ?
675
797
  ra_alloc1(as, ir->op1, RSET_FPR) :
676
798
  ra_scratch(as, RSET_FPR);
677
- Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
799
+ Reg right;
678
800
  emit_rr(as, XO_MOVDto, tmp, dest);
801
+ right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
679
802
  emit_mrm(as, XO_ADDSD, tmp, right);
680
803
  ra_left(as, tmp, ir->op1);
681
804
  }
@@ -696,13 +819,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
696
819
  if (left == dest) return; /* Avoid the XO_XORPS. */
697
820
  } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */
698
821
  /* number = (2^52+2^51 .. u32) - (2^52+2^51) */
699
- cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000));
822
+ cTValue *k = &as->J->k64[LJ_K64_TOBIT];
700
823
  Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
701
824
  if (irt_isfloat(ir->t))
702
825
  emit_rr(as, XO_CVTSD2SS, dest, dest);
703
826
  emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
704
827
  emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
705
- emit_loadn(as, bias, k);
828
+ emit_rma(as, XO_MOVSD, bias, k);
706
829
  emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
707
830
  return;
708
831
  } else { /* Integer to FP conversion. */
@@ -711,7 +834,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
711
834
  asm_fuseloadm(as, lref, RSET_GPR, st64);
712
835
  if (LJ_64 && st == IRT_U64) {
713
836
  MCLabel l_end = emit_label(as);
714
- const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000));
837
+ cTValue *k = &as->J->k64[LJ_K64_2P64];
715
838
  emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */
716
839
  emit_sjcc(as, CC_NS, l_end);
717
840
  emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */
@@ -738,23 +861,20 @@ static void asm_conv(ASMState *as, IRIns *ir)
738
861
  emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
739
862
  emit_rr(as, op, dest|REX_64, tmp);
740
863
  if (st == IRT_NUM)
741
- emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J,
742
- LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
864
+ emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]);
743
865
  else
744
- emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J,
745
- LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
866
+ emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]);
746
867
  emit_sjcc(as, CC_NS, l_end);
747
868
  emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
748
869
  emit_rr(as, op, dest|REX_64, tmp);
749
870
  ra_left(as, tmp, lref);
750
871
  } else {
751
- Reg left = asm_fuseload(as, lref, RSET_FPR);
752
872
  if (LJ_64 && irt_isu32(ir->t))
753
873
  emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
754
874
  emit_mrm(as, op,
755
875
  dest|((LJ_64 &&
756
876
  (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
757
- left);
877
+ asm_fuseload(as, lref, RSET_FPR));
758
878
  }
759
879
  }
760
880
  } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
@@ -828,8 +948,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
828
948
  if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
829
949
  /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
830
950
  MCLabel l_end = emit_label(as);
831
- emit_rma(as, XO_FADDq, XOg_FADDq,
832
- lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
951
+ emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]);
833
952
  emit_sjcc(as, CC_NS, l_end);
834
953
  emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
835
954
  } else {
@@ -869,8 +988,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
869
988
  emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
870
989
  else
871
990
  emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
872
- emit_rma(as, XO_FADDq, XOg_FADDq,
873
- lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
991
+ emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]);
874
992
  emit_sjcc(as, CC_NS, l_pop);
875
993
  emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
876
994
  }
@@ -934,6 +1052,25 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
934
1052
  emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
935
1053
  } else {
936
1054
  /* Otherwise use g->tmptv to hold the TValue. */
1055
+ #if LJ_GC64
1056
+ if (irref_isk(ref)) {
1057
+ TValue k;
1058
+ lj_ir_kvalue(as->J->L, &k, ir);
1059
+ emit_movmroi(as, dest, 4, k.u32.hi);
1060
+ emit_movmroi(as, dest, 0, k.u32.lo);
1061
+ } else {
1062
+ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1063
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
1064
+ if (irt_is64(ir->t)) {
1065
+ emit_u32(as, irt_toitype(ir->t) << 15);
1066
+ emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
1067
+ } else {
1068
+ /* Currently, no caller passes integers that might end up here. */
1069
+ emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
1070
+ }
1071
+ emit_movtomro(as, REX_64IR(ir, src), dest, 0);
1072
+ }
1073
+ #else
937
1074
  if (!irref_isk(ref)) {
938
1075
  Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
939
1076
  emit_movtomro(as, REX_64IR(ir, src), dest, 0);
@@ -942,6 +1079,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
942
1079
  }
943
1080
  if (!(LJ_64 && irt_islightud(ir->t)))
944
1081
  emit_movmroi(as, dest, 4, irt_toitype(ir->t));
1082
+ #endif
945
1083
  emit_loada(as, dest, &J2G(as->J)->tmptv);
946
1084
  }
947
1085
  }
@@ -951,9 +1089,9 @@ static void asm_aref(ASMState *as, IRIns *ir)
951
1089
  Reg dest = ra_dest(as, ir, RSET_GPR);
952
1090
  asm_fusearef(as, ir, RSET_GPR);
953
1091
  if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
954
- emit_mrm(as, XO_LEA, dest, RID_MRM);
1092
+ emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
955
1093
  else if (as->mrm.base != dest)
956
- emit_rr(as, XO_MOV, dest, as->mrm.base);
1094
+ emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base);
957
1095
  }
958
1096
 
959
1097
  /* Inlined hash lookup. Specialized for key type and for const keys.
@@ -980,7 +1118,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
980
1118
  if (!isk) {
981
1119
  rset_clear(allow, tab);
982
1120
  key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
983
- if (!irt_isstr(kt))
1121
+ if (LJ_GC64 || !irt_isstr(kt))
984
1122
  tmp = ra_scratch(as, rset_exclude(allow, key));
985
1123
  }
986
1124
 
@@ -993,8 +1131,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
993
1131
 
994
1132
  /* Follow hash chain until the end. */
995
1133
  l_loop = emit_sjcc_label(as, CC_NZ);
996
- emit_rr(as, XO_TEST, dest, dest);
997
- emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
1134
+ emit_rr(as, XO_TEST, dest|REX_GC64, dest);
1135
+ emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next));
998
1136
  l_next = emit_label(as);
999
1137
 
1000
1138
  /* Type and value comparison. */
@@ -1015,7 +1153,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1015
1153
  emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
1016
1154
  emit_sjcc(as, CC_AE, l_next);
1017
1155
  /* The type check avoids NaN penalties and complaints from Valgrind. */
1018
- #if LJ_64
1156
+ #if LJ_64 && !LJ_GC64
1019
1157
  emit_u32(as, LJ_TISNUM);
1020
1158
  emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1021
1159
  #else
@@ -1023,10 +1161,28 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1023
1161
  emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1024
1162
  #endif
1025
1163
  }
1026
- #if LJ_64
1164
+ #if LJ_64 && !LJ_GC64
1027
1165
  } else if (irt_islightud(kt)) {
1028
1166
  emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
1029
1167
  #endif
1168
+ #if LJ_GC64
1169
+ } else if (irt_isaddr(kt)) {
1170
+ if (isk) {
1171
+ TValue k;
1172
+ k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
1173
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
1174
+ k.u32.lo);
1175
+ emit_sjcc(as, CC_NE, l_next);
1176
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
1177
+ k.u32.hi);
1178
+ } else {
1179
+ emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
1180
+ }
1181
+ } else {
1182
+ lua_assert(irt_ispri(kt) && !irt_isnil(kt));
1183
+ emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
1184
+ emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1185
+ #else
1030
1186
  } else {
1031
1187
  if (!irt_ispri(kt)) {
1032
1188
  lua_assert(irt_isaddr(kt));
@@ -1040,16 +1196,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1040
1196
  lua_assert(!irt_isnil(kt));
1041
1197
  emit_i8(as, irt_toitype(kt));
1042
1198
  emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1199
+ #endif
1043
1200
  }
1044
1201
  emit_sfixup(as, l_loop);
1045
1202
  checkmclim(as);
1203
+ #if LJ_GC64
1204
+ if (!isk && irt_isaddr(kt)) {
1205
+ emit_rr(as, XO_OR, tmp|REX_64, key);
1206
+ emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47);
1207
+ }
1208
+ #endif
1046
1209
 
1047
1210
  /* Load main position relative to tab->node into dest. */
1048
1211
  khash = isk ? ir_khash(irkey) : 1;
1049
1212
  if (khash == 0) {
1050
- emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
1213
+ emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
1051
1214
  } else {
1052
- emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
1215
+ emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
1053
1216
  if ((as->flags & JIT_F_PREFER_IMUL)) {
1054
1217
  emit_i8(as, sizeof(Node));
1055
1218
  emit_rr(as, XO_IMULi8, dest, dest);
@@ -1084,7 +1247,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1084
1247
  #endif
1085
1248
  } else {
1086
1249
  emit_rr(as, XO_MOV, tmp, key);
1250
+ #if LJ_GC64
1251
+ checkmclim(as);
1252
+ emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
1253
+ if ((as->flags & JIT_F_BMI2)) {
1254
+ emit_i8(as, 32);
1255
+ emit_mrm(as, XV_RORX|VEX_64, dest, key);
1256
+ } else {
1257
+ emit_shifti(as, XOg_SHR|REX_64, dest, 32);
1258
+ emit_rr(as, XO_MOV, dest|REX_64, key|REX_64);
1259
+ }
1260
+ #else
1087
1261
  emit_rmro(as, XO_LEA, dest, key, HASH_BIAS);
1262
+ #endif
1088
1263
  }
1089
1264
  }
1090
1265
  }
@@ -1104,11 +1279,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1104
1279
  if (ra_hasreg(dest)) {
1105
1280
  if (ofs != 0) {
1106
1281
  if (dest == node && !(as->flags & JIT_F_LEA_AGU))
1107
- emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
1282
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
1108
1283
  else
1109
- emit_rmro(as, XO_LEA, dest, node, ofs);
1284
+ emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
1110
1285
  } else if (dest != node) {
1111
- emit_rr(as, XO_MOV, dest, node);
1286
+ emit_rr(as, XO_MOV, dest|REX_GC64, node);
1112
1287
  }
1113
1288
  }
1114
1289
  asm_guardcc(as, CC_NE);
@@ -1120,13 +1295,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1120
1295
  lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
1121
1296
  /* Assumes -0.0 is already canonicalized to +0.0. */
1122
1297
  emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
1298
+ #if LJ_GC64
1299
+ ((uint64_t)irt_toitype(irkey->t) << 47) |
1300
+ (uint64_t)ir_kgc(irkey));
1301
+ #else
1123
1302
  ((uint64_t)irt_toitype(irkey->t) << 32) |
1124
1303
  (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
1304
+ #endif
1125
1305
  } else {
1126
1306
  lua_assert(!irt_isnil(irkey->t));
1307
+ #if LJ_GC64
1308
+ emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
1309
+ emit_rmro(as, XO_ARITHi, XOg_CMP, node,
1310
+ ofs + (int32_t)offsetof(Node, key.it));
1311
+ #else
1127
1312
  emit_i8(as, irt_toitype(irkey->t));
1128
1313
  emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1129
1314
  ofs + (int32_t)offsetof(Node, key.it));
1315
+ #endif
1130
1316
  }
1131
1317
  #else
1132
1318
  l_exit = emit_label(as);
@@ -1157,25 +1343,25 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1157
1343
 
1158
1344
  static void asm_uref(ASMState *as, IRIns *ir)
1159
1345
  {
1160
- /* NYI: Check that UREFO is still open and not aliasing a slot. */
1161
1346
  Reg dest = ra_dest(as, ir, RSET_GPR);
1162
1347
  if (irref_isk(ir->op1)) {
1163
1348
  GCfunc *fn = ir_kfunc(IR(ir->op1));
1164
1349
  MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
1165
- emit_rma(as, XO_MOV, dest, v);
1350
+ emit_rma(as, XO_MOV, dest|REX_GC64, v);
1166
1351
  } else {
1167
1352
  Reg uv = ra_scratch(as, RSET_GPR);
1168
1353
  Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
1169
1354
  if (ir->o == IR_UREFC) {
1170
- emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
1355
+ emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
1171
1356
  asm_guardcc(as, CC_NE);
1172
1357
  emit_i8(as, 1);
1173
1358
  emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
1174
1359
  } else {
1175
- emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
1360
+ emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
1176
1361
  }
1177
- emit_rmro(as, XO_MOV, uv, func,
1178
- (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
1362
+ emit_rmro(as, XO_MOV, uv|REX_GC64, func,
1363
+ (int32_t)offsetof(GCfuncL, uvptr) +
1364
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
1179
1365
  }
1180
1366
  }
1181
1367
 
@@ -1193,9 +1379,9 @@ static void asm_strref(ASMState *as, IRIns *ir)
1193
1379
  if (as->mrm.base == RID_NONE)
1194
1380
  emit_loadi(as, dest, as->mrm.ofs);
1195
1381
  else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
1196
- emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
1382
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs);
1197
1383
  else
1198
- emit_mrm(as, XO_LEA, dest, RID_MRM);
1384
+ emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
1199
1385
  }
1200
1386
 
1201
1387
  /* -- Loads and stores ---------------------------------------------------- */
@@ -1264,7 +1450,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1264
1450
  case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
1265
1451
  case IRT_NUM: xo = XO_MOVSDto; break;
1266
1452
  case IRT_FLOAT: xo = XO_MOVSSto; break;
1267
- #if LJ_64
1453
+ #if LJ_64 && !LJ_GC64
1268
1454
  case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
1269
1455
  #endif
1270
1456
  default:
@@ -1296,7 +1482,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1296
1482
  #define asm_fstore(as, ir) asm_fxstore(as, ir)
1297
1483
  #define asm_xstore(as, ir) asm_fxstore(as, ir)
1298
1484
 
1299
- #if LJ_64
1485
+ #if LJ_64 && !LJ_GC64
1300
1486
  static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1301
1487
  {
1302
1488
  if (ra_used(ir) || typecheck) {
@@ -1318,9 +1504,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1318
1504
 
1319
1505
  static void asm_ahuvload(ASMState *as, IRIns *ir)
1320
1506
  {
1507
+ #if LJ_GC64
1508
+ Reg tmp = RID_NONE;
1509
+ #endif
1321
1510
  lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1322
1511
  (LJ_DUALNUM && irt_isint(ir->t)));
1323
- #if LJ_64
1512
+ #if LJ_64 && !LJ_GC64
1324
1513
  if (irt_islightud(ir->t)) {
1325
1514
  Reg dest = asm_load_lightud64(as, ir, 1);
1326
1515
  if (ra_hasreg(dest)) {
@@ -1334,20 +1523,64 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1334
1523
  RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1335
1524
  Reg dest = ra_dest(as, ir, allow);
1336
1525
  asm_fuseahuref(as, ir->op1, RSET_GPR);
1526
+ #if LJ_GC64
1527
+ if (irt_isaddr(ir->t)) {
1528
+ emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1529
+ asm_guardcc(as, CC_NE);
1530
+ emit_i8(as, irt_toitype(ir->t));
1531
+ emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1532
+ emit_i8(as, XI_O16);
1533
+ if ((as->flags & JIT_F_BMI2)) {
1534
+ emit_i8(as, 47);
1535
+ emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM);
1536
+ } else {
1537
+ emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1538
+ emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
1539
+ }
1540
+ return;
1541
+ } else
1542
+ #endif
1337
1543
  emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1338
1544
  } else {
1339
- asm_fuseahuref(as, ir->op1, RSET_GPR);
1545
+ RegSet gpr = RSET_GPR;
1546
+ #if LJ_GC64
1547
+ if (irt_isaddr(ir->t)) {
1548
+ tmp = ra_scratch(as, RSET_GPR);
1549
+ gpr = rset_exclude(gpr, tmp);
1550
+ }
1551
+ #endif
1552
+ asm_fuseahuref(as, ir->op1, gpr);
1340
1553
  }
1341
1554
  /* Always do the type check, even if the load result is unused. */
1342
1555
  as->mrm.ofs += 4;
1343
1556
  asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
1344
1557
  if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
1345
1558
  lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
1559
+ #if LJ_GC64
1560
+ emit_u32(as, LJ_TISNUM << 15);
1561
+ #else
1346
1562
  emit_u32(as, LJ_TISNUM);
1563
+ #endif
1564
+ emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1565
+ #if LJ_GC64
1566
+ } else if (irt_isaddr(ir->t)) {
1567
+ as->mrm.ofs -= 4;
1568
+ emit_i8(as, irt_toitype(ir->t));
1569
+ emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp);
1570
+ emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1571
+ emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM);
1572
+ } else if (irt_isnil(ir->t)) {
1573
+ as->mrm.ofs -= 4;
1574
+ emit_i8(as, -1);
1575
+ emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM);
1576
+ } else {
1577
+ emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff);
1347
1578
  emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1579
+ #else
1348
1580
  } else {
1349
1581
  emit_i8(as, irt_toitype(ir->t));
1350
1582
  emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
1583
+ #endif
1351
1584
  }
1352
1585
  }
1353
1586
 
@@ -1359,11 +1592,27 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1359
1592
  Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1360
1593
  asm_fuseahuref(as, ir->op1, RSET_GPR);
1361
1594
  emit_mrm(as, XO_MOVSDto, src, RID_MRM);
1362
- #if LJ_64
1595
+ #if LJ_64 && !LJ_GC64
1363
1596
  } else if (irt_islightud(ir->t)) {
1364
1597
  Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1365
1598
  asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
1366
1599
  emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1600
+ #endif
1601
+ #if LJ_GC64
1602
+ } else if (irref_isk(ir->op2)) {
1603
+ TValue k;
1604
+ lj_ir_kvalue(as->J->L, &k, IR(ir->op2));
1605
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
1606
+ if (tvisnil(&k)) {
1607
+ emit_i32(as, -1);
1608
+ emit_mrm(as, XO_MOVmi, REX_64, RID_MRM);
1609
+ } else {
1610
+ emit_u32(as, k.u32.lo);
1611
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1612
+ as->mrm.ofs += 4;
1613
+ emit_u32(as, k.u32.hi);
1614
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1615
+ }
1367
1616
  #endif
1368
1617
  } else {
1369
1618
  IRIns *irr = IR(ir->op2);
@@ -1375,6 +1624,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1375
1624
  }
1376
1625
  asm_fuseahuref(as, ir->op1, allow);
1377
1626
  if (ra_hasreg(src)) {
1627
+ #if LJ_GC64
1628
+ if (!(LJ_DUALNUM && irt_isinteger(ir->t))) {
1629
+ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1630
+ as->mrm.ofs += 4;
1631
+ emit_u32(as, irt_toitype(ir->t) << 15);
1632
+ emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM);
1633
+ as->mrm.ofs -= 4;
1634
+ emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1635
+ return;
1636
+ }
1637
+ #endif
1378
1638
  emit_mrm(as, XO_MOVto, src, RID_MRM);
1379
1639
  } else if (!irt_ispri(irr->t)) {
1380
1640
  lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
@@ -1382,14 +1642,20 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1382
1642
  emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1383
1643
  }
1384
1644
  as->mrm.ofs += 4;
1645
+ #if LJ_GC64
1646
+ lua_assert(LJ_DUALNUM && irt_isinteger(ir->t));
1647
+ emit_i32(as, LJ_TNUMX << 15);
1648
+ #else
1385
1649
  emit_i32(as, (int32_t)irt_toitype(ir->t));
1650
+ #endif
1386
1651
  emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1387
1652
  }
1388
1653
  }
1389
1654
 
1390
1655
  static void asm_sload(ASMState *as, IRIns *ir)
1391
1656
  {
1392
- int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1657
+ int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
1658
+ (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1393
1659
  IRType1 t = ir->t;
1394
1660
  Reg base;
1395
1661
  lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
@@ -1402,7 +1668,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1402
1668
  base = ra_alloc1(as, REF_BASE, RSET_GPR);
1403
1669
  emit_rmro(as, XO_MOVSD, left, base, ofs);
1404
1670
  t.irt = IRT_NUM; /* Continue with a regular number type check. */
1405
- #if LJ_64
1671
+ #if LJ_64 && !LJ_GC64
1406
1672
  } else if (irt_islightud(t)) {
1407
1673
  Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
1408
1674
  if (ra_hasreg(dest)) {
@@ -1420,6 +1686,36 @@ static void asm_sload(ASMState *as, IRIns *ir)
1420
1686
  t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1421
1687
  emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1422
1688
  } else {
1689
+ #if LJ_GC64
1690
+ if (irt_isaddr(t)) {
1691
+ /* LJ_GC64 type check + tag removal without BMI2 and with BMI2:
1692
+ **
1693
+ ** mov r64, [addr] rorx r64, [addr], 47
1694
+ ** ror r64, 47
1695
+ ** cmp r16, itype cmp r16, itype
1696
+ ** jne ->exit jne ->exit
1697
+ ** shr r64, 16 shr r64, 16
1698
+ */
1699
+ emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1700
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1701
+ asm_guardcc(as, CC_NE);
1702
+ emit_i8(as, irt_toitype(t));
1703
+ emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1704
+ emit_i8(as, XI_O16);
1705
+ }
1706
+ if ((as->flags & JIT_F_BMI2)) {
1707
+ emit_i8(as, 47);
1708
+ emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs);
1709
+ } else {
1710
+ if ((ir->op2 & IRSLOAD_TYPECHECK))
1711
+ emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1712
+ else
1713
+ emit_shifti(as, XOg_SHL|REX_64, dest, 17);
1714
+ emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
1715
+ }
1716
+ return;
1717
+ } else
1718
+ #endif
1423
1719
  emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1424
1720
  }
1425
1721
  } else {
@@ -1432,11 +1728,42 @@ static void asm_sload(ASMState *as, IRIns *ir)
1432
1728
  asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
1433
1729
  if (LJ_64 && irt_type(t) >= IRT_NUM) {
1434
1730
  lua_assert(irt_isinteger(t) || irt_isnum(t));
1731
+ #if LJ_GC64
1732
+ emit_u32(as, LJ_TISNUM << 15);
1733
+ #else
1435
1734
  emit_u32(as, LJ_TISNUM);
1735
+ #endif
1736
+ emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1737
+ #if LJ_GC64
1738
+ } else if (irt_isnil(t)) {
1739
+ /* LJ_GC64 type check for nil:
1740
+ **
1741
+ ** cmp qword [addr], -1
1742
+ ** jne ->exit
1743
+ */
1744
+ emit_i8(as, -1);
1745
+ emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs);
1746
+ } else if (irt_ispri(t)) {
1747
+ emit_u32(as, (irt_toitype(t) << 15) | 0x7fff);
1436
1748
  emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1749
+ } else {
1750
+ /* LJ_GC64 type check only:
1751
+ **
1752
+ ** mov r64, [addr]
1753
+ ** sar r64, 47
1754
+ ** cmp r32, itype
1755
+ ** jne ->exit
1756
+ */
1757
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
1758
+ emit_i8(as, irt_toitype(t));
1759
+ emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
1760
+ emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1761
+ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4);
1762
+ #else
1437
1763
  } else {
1438
1764
  emit_i8(as, irt_toitype(t));
1439
1765
  emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
1766
+ #endif
1440
1767
  }
1441
1768
  }
1442
1769
  }
@@ -1464,8 +1791,9 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1464
1791
  Reg r64 = sz == 8 ? REX_64 : 0;
1465
1792
  if (irref_isk(ir->op2)) {
1466
1793
  IRIns *irk = IR(ir->op2);
1467
- uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 :
1468
- (uint64_t)(uint32_t)irk->i;
1794
+ uint64_t k = (irk->o == IR_KINT64 ||
1795
+ (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ?
1796
+ ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i;
1469
1797
  if (sz == 4 || checki32((int64_t)k)) {
1470
1798
  emit_i32(as, (int32_t)k);
1471
1799
  emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
@@ -1530,7 +1858,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1530
1858
  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1531
1859
  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1532
1860
  MCLabel l_end = emit_label(as);
1533
- emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
1861
+ emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist));
1534
1862
  emit_setgl(as, tab, gc.grayagain);
1535
1863
  emit_getgl(as, tmp, gc.grayagain);
1536
1864
  emit_i8(as, ~LJ_GC_BLACK);
@@ -1956,7 +2284,7 @@ static void asm_bswap(ASMState *as, IRIns *ir)
1956
2284
  #define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1957
2285
  #define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1958
2286
 
1959
- static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2287
+ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
1960
2288
  {
1961
2289
  IRRef rref = ir->op2;
1962
2290
  IRIns *irr = IR(rref);
@@ -1965,11 +2293,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1965
2293
  int shift;
1966
2294
  dest = ra_dest(as, ir, RSET_GPR);
1967
2295
  shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
2296
+ if (!xv && shift && (as->flags & JIT_F_BMI2)) {
2297
+ Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
2298
+ if (left != dest) { /* BMI2 rotate right by constant. */
2299
+ emit_i8(as, xs == XOg_ROL ? -shift : shift);
2300
+ emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
2301
+ return;
2302
+ }
2303
+ }
1968
2304
  switch (shift) {
1969
2305
  case 0: break;
1970
2306
  case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
1971
2307
  default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
1972
2308
  }
2309
+ } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
2310
+ Reg left, right;
2311
+ dest = ra_dest(as, ir, RSET_GPR);
2312
+ right = ra_alloc1(as, rref, RSET_GPR);
2313
+ left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
2314
+ irt_is64(ir->t));
2315
+ emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
2316
+ return;
1973
2317
  } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
1974
2318
  Reg right;
1975
2319
  dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
@@ -1995,11 +2339,11 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1995
2339
  */
1996
2340
  }
1997
2341
 
1998
- #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
1999
- #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
2000
- #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
2001
- #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
2002
- #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
2342
+ #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
2343
+ #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
2344
+ #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
2345
+ #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
2346
+ #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
2003
2347
 
2004
2348
  /* -- Comparisons --------------------------------------------------------- */
2005
2349
 
@@ -2050,7 +2394,6 @@ static void asm_comp(ASMState *as, IRIns *ir)
2050
2394
  cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
2051
2395
  }
2052
2396
  left = ra_alloc1(as, lref, RSET_FPR);
2053
- right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2054
2397
  l_around = emit_label(as);
2055
2398
  asm_guardcc(as, cc >> 4);
2056
2399
  if (cc & VCC_P) { /* Extra CC_P branch required? */
@@ -2067,6 +2410,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
2067
2410
  emit_jcc(as, CC_P, as->mcp);
2068
2411
  }
2069
2412
  }
2413
+ right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2070
2414
  emit_mrm(as, XO_UCOMISD, left, right);
2071
2415
  } else {
2072
2416
  IRRef lref = ir->op1, rref = ir->op2;
@@ -2343,13 +2687,18 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2343
2687
  emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
2344
2688
  else
2345
2689
  ra_modified(as, r);
2346
- emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
2690
+ emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot));
2347
2691
  if (ra_hasreg(pbase) && pbase != r)
2348
- emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
2692
+ emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase);
2349
2693
  else
2694
+ #if LJ_GC64
2695
+ emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH,
2696
+ (int32_t)dispofs(as, &J2G(as->J)->jit_base));
2697
+ #else
2350
2698
  emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2351
2699
  ptr2addr(&J2G(as->J)->jit_base));
2352
- emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2700
+ #endif
2701
+ emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack));
2353
2702
  emit_getgl(as, r, cur_L);
2354
2703
  if (allow == RSET_EMPTY) /* Spill temp. register. */
2355
2704
  emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
@@ -2359,13 +2708,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2359
2708
  static void asm_stack_restore(ASMState *as, SnapShot *snap)
2360
2709
  {
2361
2710
  SnapEntry *map = &as->T->snapmap[snap->mapofs];
2362
- SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
2711
+ #if !LJ_FR2 || defined(LUA_USE_ASSERT)
2712
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2713
+ #endif
2363
2714
  MSize n, nent = snap->nent;
2364
2715
  /* Store the value of all modified slots to the Lua stack. */
2365
2716
  for (n = 0; n < nent; n++) {
2366
2717
  SnapEntry sn = map[n];
2367
2718
  BCReg s = snap_slot(sn);
2368
- int32_t ofs = 8*((int32_t)s-1);
2719
+ int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
2369
2720
  IRRef ref = snap_ref(sn);
2370
2721
  IRIns *ir = IR(ref);
2371
2722
  if ((sn & SNAP_NORESTORE))
@@ -2378,16 +2729,44 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2378
2729
  (LJ_DUALNUM && irt_isinteger(ir->t)));
2379
2730
  if (!irref_isk(ref)) {
2380
2731
  Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2732
+ #if LJ_GC64
2733
+ if (irt_is64(ir->t)) {
2734
+ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
2735
+ emit_u32(as, irt_toitype(ir->t) << 15);
2736
+ emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4);
2737
+ } else if (LJ_DUALNUM && irt_isinteger(ir->t)) {
2738
+ emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15);
2739
+ } else {
2740
+ emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff);
2741
+ }
2742
+ #endif
2381
2743
  emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
2744
+ #if LJ_GC64
2745
+ } else {
2746
+ TValue k;
2747
+ lj_ir_kvalue(as->J->L, &k, ir);
2748
+ if (tvisnil(&k)) {
2749
+ emit_i32(as, -1);
2750
+ emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs);
2751
+ } else {
2752
+ emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi);
2753
+ emit_movmroi(as, RID_BASE, ofs, k.u32.lo);
2754
+ }
2755
+ #else
2382
2756
  } else if (!irt_ispri(ir->t)) {
2383
2757
  emit_movmroi(as, RID_BASE, ofs, ir->i);
2758
+ #endif
2384
2759
  }
2385
2760
  if ((sn & (SNAP_CONT|SNAP_FRAME))) {
2761
+ #if !LJ_FR2
2386
2762
  if (s != 0) /* Do not overwrite link to previous frame. */
2387
2763
  emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2764
+ #endif
2765
+ #if !LJ_GC64
2388
2766
  } else {
2389
2767
  if (!(LJ_64 && irt_islightud(ir->t)))
2390
2768
  emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2769
+ #endif
2391
2770
  }
2392
2771
  }
2393
2772
  checkmclim(as);
@@ -2413,11 +2792,15 @@ static void asm_gc_check(ASMState *as)
2413
2792
  args[1] = ASMREF_TMP2; /* MSize steps */
2414
2793
  asm_gencall(as, ci, args);
2415
2794
  tmp = ra_releasetmp(as, ASMREF_TMP1);
2795
+ #if LJ_GC64
2796
+ emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G);
2797
+ #else
2416
2798
  emit_loada(as, tmp, J2G(as->J));
2799
+ #endif
2417
2800
  emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
2418
2801
  /* Jump around GC step if GC total < GC threshold. */
2419
2802
  emit_sjcc(as, CC_B, l_end);
2420
- emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
2803
+ emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold);
2421
2804
  emit_getgl(as, tmp, gc.total);
2422
2805
  as->gcsteps = 0;
2423
2806
  checkmclim(as);
@@ -2482,7 +2865,7 @@ static void asm_head_root_base(ASMState *as)
2482
2865
  if (rset_test(as->modset, r) || irt_ismarked(ir->t))
2483
2866
  ir->r = RID_INIT; /* No inheritance for modified BASE register. */
2484
2867
  if (r != RID_BASE)
2485
- emit_rr(as, XO_MOV, r, RID_BASE);
2868
+ emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE);
2486
2869
  }
2487
2870
  }
2488
2871
 
@@ -2498,8 +2881,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
2498
2881
  if (irp->r == r) {
2499
2882
  rset_clear(allow, r); /* Mark same BASE register as coalesced. */
2500
2883
  } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
2884
+ /* Move from coalesced parent reg. */
2501
2885
  rset_clear(allow, irp->r);
2502
- emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */
2886
+ emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
2503
2887
  } else {
2504
2888
  emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
2505
2889
  }
@@ -2600,10 +2984,111 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2600
2984
  static void asm_setup_target(ASMState *as)
2601
2985
  {
2602
2986
  asm_exitstub_setup(as, as->T->nsnap);
2987
+ as->mrm.base = 0;
2603
2988
  }
2604
2989
 
2605
2990
  /* -- Trace patching ------------------------------------------------------ */
2606
2991
 
2992
+ static const uint8_t map_op1[256] = {
2993
+ 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20,
2994
+ 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,
2995
+ 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
2996
+ 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
2997
+ #if LJ_64
2998
+ 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14,
2999
+ #else
3000
+ 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
3001
+ #endif
3002
+ 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
3003
+ 0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51,
3004
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
3005
+ 0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
3006
+ 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51,
3007
+ #if LJ_64
3008
+ 0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
3009
+ #else
3010
+ 0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
3011
+ #endif
3012
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,
3013
+ 0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51,
3014
+ 0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
3015
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51,
3016
+ 0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92
3017
+ };
3018
+
3019
+ static const uint8_t map_op2[256] = {
3020
+ 0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94,
3021
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3022
+ 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3023
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51,
3024
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3025
+ 0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3026
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3027
+ 0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3028
+ 0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,
3029
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3030
+ 0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93,
3031
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93,
3032
+ 0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
3033
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3034
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3035
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52
3036
+ };
3037
+
3038
+ static uint32_t asm_x86_inslen(const uint8_t* p)
3039
+ {
3040
+ uint32_t result = 0;
3041
+ uint32_t prefixes = 0;
3042
+ uint32_t x = map_op1[*p];
3043
+ for (;;) {
3044
+ switch (x >> 4) {
3045
+ case 0: return result + x + (prefixes & 4);
3046
+ case 1: prefixes |= x; x = map_op1[*++p]; result++; break;
3047
+ case 2: x = map_op2[*++p]; break;
3048
+ case 3: p++; goto mrm;
3049
+ case 4: result -= (prefixes & 2); /* fallthrough */
3050
+ case 5: return result + (x & 15);
3051
+ case 6: /* Group 3. */
3052
+ if (p[1] & 0x38) x = 2;
3053
+ else if ((prefixes & 2) && (x == 0x66)) x = 4;
3054
+ goto mrm;
3055
+ case 7: /* VEX c4/c5. */
3056
+ if (LJ_32 && p[1] < 0xc0) {
3057
+ x = 2;
3058
+ goto mrm;
3059
+ }
3060
+ if (x == 0x70) {
3061
+ x = *++p & 0x1f;
3062
+ result++;
3063
+ if (x >= 2) {
3064
+ p += 2;
3065
+ result += 2;
3066
+ goto mrm;
3067
+ }
3068
+ }
3069
+ p++;
3070
+ result++;
3071
+ x = map_op2[*++p];
3072
+ break;
3073
+ case 8: result -= (prefixes & 2); /* fallthrough */
3074
+ case 9: mrm: /* ModR/M and possibly SIB. */
3075
+ result += (x & 15);
3076
+ x = *++p;
3077
+ switch (x >> 6) {
3078
+ case 0: if ((x & 7) == 5) return result + 4; break;
3079
+ case 1: result++; break;
3080
+ case 2: result += 4; break;
3081
+ case 3: return result;
3082
+ }
3083
+ if ((x & 7) == 4) {
3084
+ result++;
3085
+ if (x < 0x40 && (p[1] & 7) == 5) result += 4;
3086
+ }
3087
+ return result;
3088
+ }
3089
+ }
3090
+ }
3091
+
2607
3092
  /* Patch exit jumps of existing machine code to a new target. */
2608
3093
  void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2609
3094
  {
@@ -2612,22 +3097,23 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2612
3097
  MSize len = T->szmcode;
2613
3098
  MCode *px = exitstub_addr(J, exitno) - 6;
2614
3099
  MCode *pe = p+len-6;
2615
- uint32_t stateaddr = u32ptr(&J2G(J)->vmstate);
3100
+ #if LJ_GC64
3101
+ uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
3102
+ #else
3103
+ uint32_t statei = u32ptr(&J2G(J)->vmstate);
3104
+ #endif
2616
3105
  if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
2617
3106
  *(int32_t *)(p+len-4) = jmprel(p+len, target);
2618
3107
  /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
2619
- for (; p < pe; p++)
2620
- if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) {
2621
- p += LJ_64 ? 11 : 10;
3108
+ for (; p < pe; p += asm_x86_inslen(p)) {
3109
+ intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64;
3110
+ if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi)
2622
3111
  break;
2623
- }
3112
+ }
2624
3113
  lua_assert(p < pe);
2625
- for (; p < pe; p++) {
2626
- if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) {
3114
+ for (; p < pe; p += asm_x86_inslen(p))
3115
+ if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px)
2627
3116
  *(int32_t *)(p+2) = jmprel(p+6, target);
2628
- p += 5;
2629
- }
2630
- }
2631
3117
  lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
2632
3118
  lj_mcode_patch(J, mcarea, 1);
2633
3119
  }