immunio 1.2.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +13 -5
  3. data/ext/immunio/Rakefile +14 -6
  4. data/lib/immunio/context.rb +2 -0
  5. data/lib/immunio/plugins/action_view.rb +7 -668
  6. data/lib/immunio/plugins/action_view/action_view.rb +22 -0
  7. data/lib/immunio/plugins/action_view/active_support_hash.rb +29 -0
  8. data/lib/immunio/plugins/action_view/cache_store.rb +24 -0
  9. data/lib/immunio/plugins/action_view/erubi.rb +38 -0
  10. data/lib/immunio/plugins/action_view/erubis.rb +39 -0
  11. data/lib/immunio/plugins/action_view/fragment_caching.rb +29 -0
  12. data/lib/immunio/plugins/action_view/haml.rb +46 -0
  13. data/lib/immunio/plugins/action_view/slim.rb +42 -0
  14. data/lib/immunio/plugins/action_view/template.rb +431 -0
  15. data/lib/immunio/plugins/action_view/template_rendering.rb +45 -0
  16. data/lib/immunio/plugins/http_tracker.rb +2 -0
  17. data/lib/immunio/plugins/io.rb +34 -0
  18. data/lib/immunio/version.rb +1 -1
  19. data/lua-hooks/Makefile +36 -9
  20. data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
  21. data/lua-hooks/ext/luajit/Makefile +22 -15
  22. data/lua-hooks/ext/luajit/README +2 -2
  23. data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
  24. data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
  25. data/lua-hooks/ext/luajit/doc/changes.html +69 -3
  26. data/lua-hooks/ext/luajit/doc/contact.html +10 -3
  27. data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
  28. data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
  29. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
  30. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +3 -4
  31. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
  32. data/lua-hooks/ext/luajit/doc/ext_jit.html +3 -3
  33. data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
  34. data/lua-hooks/ext/luajit/doc/extensions.html +47 -20
  35. data/lua-hooks/ext/luajit/doc/faq.html +2 -2
  36. data/lua-hooks/ext/luajit/doc/install.html +74 -45
  37. data/lua-hooks/ext/luajit/doc/luajit.html +5 -5
  38. data/lua-hooks/ext/luajit/doc/running.html +3 -3
  39. data/lua-hooks/ext/luajit/doc/status.html +13 -8
  40. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
  41. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1 -1
  42. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
  43. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1 -1
  44. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +8 -5
  45. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +66 -11
  46. data/lua-hooks/ext/luajit/dynasm/dasm_mips64.lua +12 -0
  47. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
  48. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1 -1
  49. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +1 -1
  50. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
  51. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +1 -1
  52. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +5 -1
  53. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +2 -2
  54. data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
  55. data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
  56. data/lua-hooks/ext/luajit/src/Makefile +15 -11
  57. data/lua-hooks/ext/luajit/src/Makefile.dep +16 -16
  58. data/lua-hooks/ext/luajit/src/host/buildvm.c +2 -2
  59. data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
  60. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +9 -4
  61. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +2 -2
  62. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
  63. data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +14 -3
  64. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +27 -3
  65. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
  66. data/lua-hooks/ext/luajit/src/host/genminilua.lua +6 -5
  67. data/lua-hooks/ext/luajit/src/host/minilua.c +1 -1
  68. data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
  69. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +8 -8
  70. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +2 -2
  71. data/lua-hooks/ext/luajit/src/jit/dis_arm64.lua +1216 -0
  72. data/lua-hooks/ext/luajit/src/jit/dis_arm64be.lua +12 -0
  73. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +35 -20
  74. data/lua-hooks/ext/luajit/src/jit/dis_mips64.lua +17 -0
  75. data/lua-hooks/ext/luajit/src/jit/dis_mips64el.lua +17 -0
  76. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
  77. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +2 -2
  78. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
  79. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +7 -4
  80. data/lua-hooks/ext/luajit/src/jit/dump.lua +17 -12
  81. data/lua-hooks/ext/luajit/src/jit/p.lua +3 -2
  82. data/lua-hooks/ext/luajit/src/jit/v.lua +2 -2
  83. data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
  84. data/lua-hooks/ext/luajit/src/lauxlib.h +14 -20
  85. data/lua-hooks/ext/luajit/src/lib_aux.c +38 -27
  86. data/lua-hooks/ext/luajit/src/lib_base.c +12 -5
  87. data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
  88. data/lua-hooks/ext/luajit/src/lib_debug.c +5 -5
  89. data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -2
  90. data/lua-hooks/ext/luajit/src/lib_init.c +16 -16
  91. data/lua-hooks/ext/luajit/src/lib_io.c +6 -7
  92. data/lua-hooks/ext/luajit/src/lib_jit.c +14 -4
  93. data/lua-hooks/ext/luajit/src/lib_math.c +1 -5
  94. data/lua-hooks/ext/luajit/src/lib_os.c +1 -1
  95. data/lua-hooks/ext/luajit/src/lib_package.c +14 -23
  96. data/lua-hooks/ext/luajit/src/lib_string.c +1 -5
  97. data/lua-hooks/ext/luajit/src/lib_table.c +21 -1
  98. data/lua-hooks/ext/luajit/src/lj.supp +3 -3
  99. data/lua-hooks/ext/luajit/src/lj_alloc.c +174 -83
  100. data/lua-hooks/ext/luajit/src/lj_api.c +97 -18
  101. data/lua-hooks/ext/luajit/src/lj_arch.h +54 -22
  102. data/lua-hooks/ext/luajit/src/lj_asm.c +172 -53
  103. data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
  104. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +19 -16
  105. data/lua-hooks/ext/luajit/src/lj_asm_arm64.h +2022 -0
  106. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +564 -158
  107. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +19 -18
  108. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +578 -92
  109. data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
  110. data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
  111. data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
  112. data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
  113. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
  114. data/lua-hooks/ext/luajit/src/lj_buf.c +1 -1
  115. data/lua-hooks/ext/luajit/src/lj_buf.h +1 -1
  116. data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
  117. data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
  118. data/lua-hooks/ext/luajit/src/lj_ccall.c +172 -7
  119. data/lua-hooks/ext/luajit/src/lj_ccall.h +21 -5
  120. data/lua-hooks/ext/luajit/src/lj_ccallback.c +71 -17
  121. data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
  122. data/lua-hooks/ext/luajit/src/lj_cconv.c +4 -2
  123. data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
  124. data/lua-hooks/ext/luajit/src/lj_cdata.c +7 -5
  125. data/lua-hooks/ext/luajit/src/lj_cdata.h +1 -1
  126. data/lua-hooks/ext/luajit/src/lj_clib.c +5 -5
  127. data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
  128. data/lua-hooks/ext/luajit/src/lj_cparse.c +11 -6
  129. data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
  130. data/lua-hooks/ext/luajit/src/lj_crecord.c +70 -14
  131. data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
  132. data/lua-hooks/ext/luajit/src/lj_ctype.c +1 -1
  133. data/lua-hooks/ext/luajit/src/lj_ctype.h +8 -8
  134. data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
  135. data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
  136. data/lua-hooks/ext/luajit/src/lj_def.h +6 -9
  137. data/lua-hooks/ext/luajit/src/lj_dispatch.c +3 -3
  138. data/lua-hooks/ext/luajit/src/lj_dispatch.h +2 -1
  139. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +5 -4
  140. data/lua-hooks/ext/luajit/src/lj_emit_arm64.h +419 -0
  141. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +100 -20
  142. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +4 -4
  143. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +116 -25
  144. data/lua-hooks/ext/luajit/src/lj_err.c +34 -13
  145. data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
  146. data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
  147. data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
  148. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +58 -49
  149. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
  150. data/lua-hooks/ext/luajit/src/lj_frame.h +33 -6
  151. data/lua-hooks/ext/luajit/src/lj_func.c +4 -2
  152. data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
  153. data/lua-hooks/ext/luajit/src/lj_gc.c +16 -7
  154. data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
  155. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +31 -1
  156. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
  157. data/lua-hooks/ext/luajit/src/lj_ir.c +69 -96
  158. data/lua-hooks/ext/luajit/src/lj_ir.h +29 -18
  159. data/lua-hooks/ext/luajit/src/lj_ircall.h +24 -30
  160. data/lua-hooks/ext/luajit/src/lj_iropt.h +9 -9
  161. data/lua-hooks/ext/luajit/src/lj_jit.h +67 -9
  162. data/lua-hooks/ext/luajit/src/lj_lex.c +1 -1
  163. data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
  164. data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
  165. data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
  166. data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
  167. data/lua-hooks/ext/luajit/src/lj_mcode.c +11 -10
  168. data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
  169. data/lua-hooks/ext/luajit/src/lj_meta.c +1 -1
  170. data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
  171. data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
  172. data/lua-hooks/ext/luajit/src/lj_obj.h +7 -3
  173. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
  174. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +84 -17
  175. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
  176. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +3 -3
  177. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +24 -22
  178. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +11 -6
  179. data/lua-hooks/ext/luajit/src/lj_opt_split.c +11 -2
  180. data/lua-hooks/ext/luajit/src/lj_parse.c +9 -7
  181. data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
  182. data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
  183. data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
  184. data/lua-hooks/ext/luajit/src/lj_record.c +201 -117
  185. data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
  186. data/lua-hooks/ext/luajit/src/lj_snap.c +72 -26
  187. data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
  188. data/lua-hooks/ext/luajit/src/lj_state.c +6 -6
  189. data/lua-hooks/ext/luajit/src/lj_state.h +2 -2
  190. data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
  191. data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
  192. data/lua-hooks/ext/luajit/src/lj_strfmt.c +7 -3
  193. data/lua-hooks/ext/luajit/src/lj_strfmt.h +1 -1
  194. data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +4 -3
  195. data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
  196. data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
  197. data/lua-hooks/ext/luajit/src/lj_tab.c +1 -2
  198. data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
  199. data/lua-hooks/ext/luajit/src/lj_target.h +3 -3
  200. data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
  201. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +239 -7
  202. data/lua-hooks/ext/luajit/src/lj_target_mips.h +111 -22
  203. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
  204. data/lua-hooks/ext/luajit/src/lj_target_x86.h +21 -4
  205. data/lua-hooks/ext/luajit/src/lj_trace.c +63 -18
  206. data/lua-hooks/ext/luajit/src/lj_trace.h +2 -1
  207. data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -1
  208. data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
  209. data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
  210. data/lua-hooks/ext/luajit/src/lj_vm.h +5 -1
  211. data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
  212. data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
  213. data/lua-hooks/ext/luajit/src/lj_vmmath.c +1 -1
  214. data/lua-hooks/ext/luajit/src/ljamalg.c +1 -1
  215. data/lua-hooks/ext/luajit/src/lua.h +9 -1
  216. data/lua-hooks/ext/luajit/src/luaconf.h +3 -7
  217. data/lua-hooks/ext/luajit/src/luajit.c +69 -54
  218. data/lua-hooks/ext/luajit/src/luajit.h +4 -4
  219. data/lua-hooks/ext/luajit/src/lualib.h +1 -1
  220. data/lua-hooks/ext/luajit/src/msvcbuild.bat +12 -4
  221. data/lua-hooks/ext/luajit/src/vm_arm.dasc +1 -1
  222. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +255 -32
  223. data/lua-hooks/ext/luajit/src/vm_mips.dasc +26 -23
  224. data/lua-hooks/ext/luajit/src/vm_mips64.dasc +5062 -0
  225. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +1 -1
  226. data/lua-hooks/ext/luajit/src/vm_x64.dasc +24 -25
  227. data/lua-hooks/ext/luajit/src/vm_x86.dasc +77 -4
  228. data/lua-hooks/libluahooks.darwin.a +0 -0
  229. data/lua-hooks/libluahooks.linux.a +0 -0
  230. data/lua-hooks/options.mk +1 -1
  231. metadata +37 -77
  232. data/lua-hooks/ext/all.c +0 -69
  233. data/lua-hooks/ext/libinjection/COPYING +0 -37
  234. data/lua-hooks/ext/libinjection/libinjection.h +0 -65
  235. data/lua-hooks/ext/libinjection/libinjection_html5.c +0 -847
  236. data/lua-hooks/ext/libinjection/libinjection_html5.h +0 -54
  237. data/lua-hooks/ext/libinjection/libinjection_sqli.c +0 -2301
  238. data/lua-hooks/ext/libinjection/libinjection_sqli.h +0 -295
  239. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +0 -9349
  240. data/lua-hooks/ext/libinjection/libinjection_xss.c +0 -531
  241. data/lua-hooks/ext/libinjection/libinjection_xss.h +0 -21
  242. data/lua-hooks/ext/libinjection/lualib.c +0 -145
  243. data/lua-hooks/ext/libinjection/module.mk +0 -5
  244. data/lua-hooks/ext/lpeg/HISTORY +0 -96
  245. data/lua-hooks/ext/lpeg/lpcap.c +0 -537
  246. data/lua-hooks/ext/lpeg/lpcap.h +0 -56
  247. data/lua-hooks/ext/lpeg/lpcode.c +0 -1014
  248. data/lua-hooks/ext/lpeg/lpcode.h +0 -40
  249. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  250. data/lua-hooks/ext/lpeg/lpeg.html +0 -1445
  251. data/lua-hooks/ext/lpeg/lpprint.c +0 -244
  252. data/lua-hooks/ext/lpeg/lpprint.h +0 -36
  253. data/lua-hooks/ext/lpeg/lptree.c +0 -1303
  254. data/lua-hooks/ext/lpeg/lptree.h +0 -82
  255. data/lua-hooks/ext/lpeg/lptypes.h +0 -149
  256. data/lua-hooks/ext/lpeg/lpvm.c +0 -364
  257. data/lua-hooks/ext/lpeg/lpvm.h +0 -58
  258. data/lua-hooks/ext/lpeg/makefile +0 -55
  259. data/lua-hooks/ext/lpeg/module.mk +0 -6
  260. data/lua-hooks/ext/lpeg/re.html +0 -498
  261. data/lua-hooks/ext/lua-cmsgpack/.gitignore +0 -13
  262. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +0 -45
  263. data/lua-hooks/ext/lua-cmsgpack/README.md +0 -115
  264. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +0 -970
  265. data/lua-hooks/ext/lua-cmsgpack/module.mk +0 -2
  266. data/lua-hooks/ext/lua-cmsgpack/test.lua +0 -570
  267. data/lua-hooks/ext/lua-snapshot/LICENSE +0 -7
  268. data/lua-hooks/ext/lua-snapshot/Makefile +0 -12
  269. data/lua-hooks/ext/lua-snapshot/README.md +0 -18
  270. data/lua-hooks/ext/lua-snapshot/dump.lua +0 -15
  271. data/lua-hooks/ext/lua-snapshot/module.mk +0 -2
  272. data/lua-hooks/ext/lua-snapshot/snapshot.c +0 -462
  273. data/lua-hooks/ext/luautf8/README.md +0 -152
  274. data/lua-hooks/ext/luautf8/lutf8lib.c +0 -1274
  275. data/lua-hooks/ext/luautf8/module.mk +0 -2
  276. data/lua-hooks/ext/luautf8/unidata.h +0 -3064
  277. data/lua-hooks/ext/module.mk +0 -15
  278. data/lua-hooks/ext/modules.h +0 -17
  279. data/lua-hooks/ext/perf/luacpu.c +0 -114
  280. data/lua-hooks/ext/perf/lualoadavg.c +0 -40
  281. data/lua-hooks/ext/perf/luameminfo.c +0 -38
  282. data/lua-hooks/ext/perf/luaoslib.c +0 -203
  283. data/lua-hooks/ext/perf/module.mk +0 -5
  284. data/lua-hooks/ext/sha1/luasha1.c +0 -74
  285. data/lua-hooks/ext/sha1/module.mk +0 -5
  286. data/lua-hooks/ext/sha1/sha1.c +0 -145
  287. data/lua-hooks/ext/sha2/luasha256.c +0 -77
  288. data/lua-hooks/ext/sha2/module.mk +0 -5
  289. data/lua-hooks/ext/sha2/sha256.c +0 -196
  290. data/lua-hooks/ext/sysutils/lua_utils.c +0 -56
  291. data/lua-hooks/ext/sysutils/module.mk +0 -2
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** PPC IR assembler (SSA IR -> machine code).
3
- ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
3
+ ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
4
  */
5
5
 
6
6
  /* -- Register allocator extensions --------------------------------------- */
@@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
393
393
  emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
394
394
  emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
395
395
  emit_lsptr(as, PPCI_LFS, (fbias & 31),
396
- (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
397
- RSET_GPR);
396
+ (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
398
397
  emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
399
398
  emit_fb(as, PPCI_FCTIWZ, tmp, left);
400
399
  }
@@ -433,13 +432,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
433
432
  Reg left = ra_alloc1(as, lref, allow);
434
433
  Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
435
434
  Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
436
- const float *kbias;
437
435
  if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
438
436
  emit_fab(as, PPCI_FSUB, dest, dest, fbias);
439
437
  emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
440
- kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
441
- if (st == IRT_U32) kbias++;
442
- emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
438
+ emit_lsptr(as, PPCI_LFS, (fbias & 31),
439
+ &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
443
440
  rset_clear(allow, hibias));
444
441
  emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
445
442
  RID_SP, SPOFS_TMPLO);
@@ -472,8 +469,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
472
469
  emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
473
470
  emit_fab(as, PPCI_FSUB, tmp, left, tmp);
474
471
  emit_lsptr(as, PPCI_LFS, (tmp & 31),
475
- (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
476
- RSET_GPR);
472
+ (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
477
473
  } else {
478
474
  emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
479
475
  emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
@@ -717,7 +713,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
717
713
 
718
714
  static void asm_uref(ASMState *as, IRIns *ir)
719
715
  {
720
- /* NYI: Check that UREFO is still open and not aliasing a slot. */
721
716
  Reg dest = ra_dest(as, ir, RSET_GPR);
722
717
  if (irref_isk(ir->op1)) {
723
718
  GCfunc *fn = ir_kfunc(IR(ir->op1));
@@ -809,17 +804,23 @@ static PPCIns asm_fxstoreins(IRIns *ir)
809
804
  static void asm_fload(ASMState *as, IRIns *ir)
810
805
  {
811
806
  Reg dest = ra_dest(as, ir, RSET_GPR);
812
- Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
813
807
  PPCIns pi = asm_fxloadins(ir);
808
+ Reg idx;
814
809
  int32_t ofs;
815
- if (ir->op2 == IRFL_TAB_ARRAY) {
816
- ofs = asm_fuseabase(as, ir->op1);
817
- if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
818
- emit_tai(as, PPCI_ADDI, dest, idx, ofs);
819
- return;
810
+ if (ir->op1 == REF_NIL) {
811
+ idx = RID_JGL;
812
+ ofs = (ir->op2 << 2) - 32768;
813
+ } else {
814
+ idx = ra_alloc1(as, ir->op1, RSET_GPR);
815
+ if (ir->op2 == IRFL_TAB_ARRAY) {
816
+ ofs = asm_fuseabase(as, ir->op1);
817
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
818
+ emit_tai(as, PPCI_ADDI, dest, idx, ofs);
819
+ return;
820
+ }
820
821
  }
822
+ ofs = field_ofs[ir->op2];
821
823
  }
822
- ofs = field_ofs[ir->op2];
823
824
  lua_assert(!irt_isi8(ir->t));
824
825
  emit_tai(as, pi, dest, idx, ofs);
825
826
  }
@@ -975,7 +976,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
975
976
  emit_fab(as, PPCI_FSUB, dest, dest, fbias);
976
977
  emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
977
978
  emit_lsptr(as, PPCI_LFS, (fbias & 31),
978
- (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
979
+ (void *)&as->J->k32[LJ_K32_2P52_2P31],
979
980
  rset_clear(allow, hibias));
980
981
  emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
981
982
  emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  ** x86/x64 IR assembler (SSA IR -> machine code).
3
- ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
3
+ ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4
4
  */
5
5
 
6
6
  /* -- Guard handling ------------------------------------------------------ */
@@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
21
21
  }
22
22
  /* Push the high byte of the exitno for each exit stub group. */
23
23
  *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
24
+ #if !LJ_GC64
24
25
  /* Store DISPATCH at original stack slot 0. Account for the two push ops. */
25
26
  *mxp++ = XI_MOVmi;
26
27
  *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
27
28
  *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
28
29
  *mxp++ = 2*sizeof(void *);
29
30
  *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
31
+ #endif
30
32
  /* Jump to exit handler which fills in the ExitState. */
31
33
  *mxp++ = XI_JMP; mxp += 4;
32
34
  *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
@@ -62,10 +64,14 @@ static void asm_guardcc(ASMState *as, int cc)
62
64
  target = p;
63
65
  cc ^= 1;
64
66
  if (as->realign) {
67
+ if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
68
+ as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */
65
69
  emit_sjcc(as, cc, target);
66
70
  return;
67
71
  }
68
72
  }
73
+ if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
74
+ as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */
69
75
  emit_jcc(as, cc, target);
70
76
  }
71
77
 
@@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
79
85
  {
80
86
  if (irref_isk(ref)) {
81
87
  IRIns *ir = IR(ref);
88
+ #if LJ_GC64
89
+ if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
90
+ *k = ir->i;
91
+ return 1;
92
+ } else if (checki32((int64_t)ir_k64(ir)->u64)) {
93
+ *k = (int32_t)ir_k64(ir)->u64;
94
+ return 1;
95
+ }
96
+ #else
82
97
  if (ir->o != IR_KINT64) {
83
98
  *k = ir->i;
84
99
  return 1;
@@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
86
101
  *k = (int32_t)ir_kint64(ir)->u64;
87
102
  return 1;
88
103
  }
104
+ #endif
89
105
  }
90
106
  return 0;
91
107
  }
@@ -185,9 +201,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
185
201
  if (irref_isk(ir->op1)) {
186
202
  GCfunc *fn = ir_kfunc(IR(ir->op1));
187
203
  GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
204
+ #if LJ_GC64
205
+ int64_t ofs = dispofs(as, &uv->tv);
206
+ if (checki32(ofs) && checki32(ofs+4)) {
207
+ as->mrm.ofs = (int32_t)ofs;
208
+ as->mrm.base = RID_DISPATCH;
209
+ as->mrm.idx = RID_NONE;
210
+ return;
211
+ }
212
+ #else
188
213
  as->mrm.ofs = ptr2addr(&uv->tv);
189
214
  as->mrm.base = as->mrm.idx = RID_NONE;
190
215
  return;
216
+ #endif
191
217
  }
192
218
  break;
193
219
  default:
@@ -205,14 +231,40 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
205
231
  static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
206
232
  {
207
233
  lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
208
- as->mrm.ofs = field_ofs[ir->op2];
209
234
  as->mrm.idx = RID_NONE;
235
+ if (ir->op1 == REF_NIL) {
236
+ #if LJ_GC64
237
+ as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch);
238
+ as->mrm.base = RID_DISPATCH;
239
+ #else
240
+ as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J));
241
+ as->mrm.base = RID_NONE;
242
+ #endif
243
+ return;
244
+ }
245
+ as->mrm.ofs = field_ofs[ir->op2];
210
246
  if (irref_isk(ir->op1)) {
211
- as->mrm.ofs += IR(ir->op1)->i;
247
+ IRIns *op1 = IR(ir->op1);
248
+ #if LJ_GC64
249
+ if (ir->op1 == REF_NIL) {
250
+ as->mrm.ofs -= GG_OFS(dispatch);
251
+ as->mrm.base = RID_DISPATCH;
252
+ return;
253
+ } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) {
254
+ intptr_t ofs = dispofs(as, ir_kptr(op1));
255
+ if (checki32(as->mrm.ofs + ofs)) {
256
+ as->mrm.ofs += (int32_t)ofs;
257
+ as->mrm.base = RID_DISPATCH;
258
+ return;
259
+ }
260
+ }
261
+ #else
262
+ as->mrm.ofs += op1->i;
212
263
  as->mrm.base = RID_NONE;
213
- } else {
214
- as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
264
+ return;
265
+ #endif
215
266
  }
267
+ as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
216
268
  }
217
269
 
218
270
  /* Fuse string reference into memory operand. */
@@ -223,7 +275,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
223
275
  as->mrm.base = as->mrm.idx = RID_NONE;
224
276
  as->mrm.scale = XM_SCALE1;
225
277
  as->mrm.ofs = sizeof(GCstr);
226
- if (irref_isk(ir->op1)) {
278
+ if (!LJ_GC64 && irref_isk(ir->op1)) {
227
279
  as->mrm.ofs += IR(ir->op1)->i;
228
280
  } else {
229
281
  Reg r = ra_alloc1(as, ir->op1, allow);
@@ -255,10 +307,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
255
307
  IRIns *ir = IR(ref);
256
308
  as->mrm.idx = RID_NONE;
257
309
  if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
310
+ #if LJ_GC64
311
+ intptr_t ofs = dispofs(as, ir_kptr(ir));
312
+ if (checki32(ofs)) {
313
+ as->mrm.ofs = (int32_t)ofs;
314
+ as->mrm.base = RID_DISPATCH;
315
+ return;
316
+ }
317
+ } if (0) {
318
+ #else
258
319
  as->mrm.ofs = ir->i;
259
320
  as->mrm.base = RID_NONE;
260
321
  } else if (ir->o == IR_STRREF) {
261
322
  asm_fusestrref(as, ir, allow);
323
+ #endif
262
324
  } else {
263
325
  as->mrm.ofs = 0;
264
326
  if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
@@ -301,7 +363,45 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
301
363
  }
302
364
  }
303
365
 
304
- /* Fuse load into memory operand. */
366
+ /* Fuse load of 64 bit IR constant into memory operand. */
367
+ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
368
+ {
369
+ const uint64_t *k = &ir_k64(ir)->u64;
370
+ if (!LJ_GC64 || checki32((intptr_t)k)) {
371
+ as->mrm.ofs = ptr2addr(k);
372
+ as->mrm.base = RID_NONE;
373
+ #if LJ_GC64
374
+ } else if (checki32(dispofs(as, k))) {
375
+ as->mrm.ofs = (int32_t)dispofs(as, k);
376
+ as->mrm.base = RID_DISPATCH;
377
+ } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) &&
378
+ checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
379
+ as->mrm.ofs = (int32_t)mcpofs(as, k);
380
+ as->mrm.base = RID_RIP;
381
+ } else {
382
+ if (ir->i) {
383
+ lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
384
+ } else {
385
+ while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
386
+ *(uint64_t*)as->mcbot = *k;
387
+ ir->i = (int32_t)(as->mctop - as->mcbot);
388
+ as->mcbot += 8;
389
+ as->mclim = as->mcbot + MCLIM_REDZONE;
390
+ }
391
+ as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
392
+ as->mrm.base = RID_RIP;
393
+ #endif
394
+ }
395
+ as->mrm.idx = RID_NONE;
396
+ return RID_MRM;
397
+ }
398
+
399
+ /* Fuse load into memory operand.
400
+ **
401
+ ** Important caveat: this may emit RIP-relative loads! So don't place any
402
+ ** code emitters between this function and the use of its result.
403
+ ** The only permitted exception is asm_guardcc().
404
+ */
305
405
  static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
306
406
  {
307
407
  IRIns *ir = IR(ref);
@@ -320,26 +420,35 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
320
420
  if (ir->o == IR_KNUM) {
321
421
  RegSet avail = as->freeset & ~as->modset & RSET_FPR;
322
422
  lua_assert(allow != RSET_EMPTY);
323
- if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
324
- as->mrm.ofs = ptr2addr(ir_knum(ir));
325
- as->mrm.base = as->mrm.idx = RID_NONE;
326
- return RID_MRM;
327
- }
328
- } else if (ir->o == IR_KINT64) {
423
+ if (!(avail & (avail-1))) /* Fuse if less than two regs available. */
424
+ return asm_fuseloadk64(as, ir);
425
+ } else if (ref == REF_BASE || ir->o == IR_KINT64) {
329
426
  RegSet avail = as->freeset & ~as->modset & RSET_GPR;
330
427
  lua_assert(allow != RSET_EMPTY);
331
428
  if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
332
- as->mrm.ofs = ptr2addr(ir_kint64(ir));
333
- as->mrm.base = as->mrm.idx = RID_NONE;
334
- return RID_MRM;
429
+ if (ref == REF_BASE) {
430
+ #if LJ_GC64
431
+ as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base);
432
+ as->mrm.base = RID_DISPATCH;
433
+ #else
434
+ as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base);
435
+ as->mrm.base = RID_NONE;
436
+ #endif
437
+ as->mrm.idx = RID_NONE;
438
+ return RID_MRM;
439
+ } else {
440
+ return asm_fuseloadk64(as, ir);
441
+ }
335
442
  }
336
443
  } else if (mayfuse(as, ref)) {
337
444
  RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
338
445
  if (ir->o == IR_SLOAD) {
339
446
  if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
340
- noconflict(as, ref, IR_RETF, 0)) {
447
+ noconflict(as, ref, IR_RETF, 0) &&
448
+ !(LJ_GC64 && irt_isaddr(ir->t))) {
341
449
  as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
342
- as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
450
+ as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
451
+ (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
343
452
  as->mrm.idx = RID_NONE;
344
453
  return RID_MRM;
345
454
  }
@@ -351,7 +460,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
351
460
  return RID_MRM;
352
461
  }
353
462
  } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
354
- if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) {
463
+ if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
464
+ !(LJ_GC64 && irt_isaddr(ir->t))) {
355
465
  asm_fuseahuref(as, ir->op1, xallow);
356
466
  return RID_MRM;
357
467
  }
@@ -364,12 +474,16 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
364
474
  asm_fusexref(as, ir->op1, xallow);
365
475
  return RID_MRM;
366
476
  }
367
- } else if (ir->o == IR_VLOAD) {
477
+ } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
368
478
  asm_fuseahuref(as, ir->op1, xallow);
369
479
  return RID_MRM;
370
480
  }
371
481
  }
372
- if (!(as->freeset & allow) && !irref_isk(ref) &&
482
+ if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) {
483
+ asm_fusefref(as, ir, RSET_EMPTY);
484
+ return RID_MRM;
485
+ }
486
+ if (!(as->freeset & allow) && !emit_canremat(ref) &&
373
487
  (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
374
488
  goto fusespill;
375
489
  return ra_allocref(as, ref, allow);
@@ -485,8 +599,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
485
599
  if (r) { /* Argument is in a register. */
486
600
  if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
487
601
  #if LJ_64
488
- if (ir->o == IR_KINT64)
489
- emit_loadu64(as, r, ir_kint64(ir)->u64);
602
+ if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64)
603
+ emit_loadu64(as, r, ir_k64(ir)->u64);
490
604
  else
491
605
  #endif
492
606
  emit_loadi(as, r, ir->i);
@@ -642,6 +756,9 @@ static void asm_callx(ASMState *as, IRIns *ir)
642
756
  static void asm_retf(ASMState *as, IRIns *ir)
643
757
  {
644
758
  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
759
+ #if LJ_FR2
760
+ Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base));
761
+ #endif
645
762
  void *pc = ir_kptr(IR(ir->op2));
646
763
  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
647
764
  as->topslot -= (BCReg)delta;
@@ -650,7 +767,12 @@ static void asm_retf(ASMState *as, IRIns *ir)
650
767
  emit_setgl(as, base, jit_base);
651
768
  emit_addptr(as, base, -8*delta);
652
769
  asm_guardcc(as, CC_NE);
770
+ #if LJ_FR2
771
+ emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8);
772
+ emit_loadu64(as, rpc, u64ptr(pc));
773
+ #else
653
774
  emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
775
+ #endif
654
776
  }
655
777
 
656
778
  /* -- Type conversions ---------------------------------------------------- */
@@ -674,8 +796,9 @@ static void asm_tobit(ASMState *as, IRIns *ir)
674
796
  Reg tmp = ra_noreg(IR(ir->op1)->r) ?
675
797
  ra_alloc1(as, ir->op1, RSET_FPR) :
676
798
  ra_scratch(as, RSET_FPR);
677
- Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
799
+ Reg right;
678
800
  emit_rr(as, XO_MOVDto, tmp, dest);
801
+ right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
679
802
  emit_mrm(as, XO_ADDSD, tmp, right);
680
803
  ra_left(as, tmp, ir->op1);
681
804
  }
@@ -696,13 +819,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
696
819
  if (left == dest) return; /* Avoid the XO_XORPS. */
697
820
  } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */
698
821
  /* number = (2^52+2^51 .. u32) - (2^52+2^51) */
699
- cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000));
822
+ cTValue *k = &as->J->k64[LJ_K64_TOBIT];
700
823
  Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
701
824
  if (irt_isfloat(ir->t))
702
825
  emit_rr(as, XO_CVTSD2SS, dest, dest);
703
826
  emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
704
827
  emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
705
- emit_loadn(as, bias, k);
828
+ emit_rma(as, XO_MOVSD, bias, k);
706
829
  emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
707
830
  return;
708
831
  } else { /* Integer to FP conversion. */
@@ -711,7 +834,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
711
834
  asm_fuseloadm(as, lref, RSET_GPR, st64);
712
835
  if (LJ_64 && st == IRT_U64) {
713
836
  MCLabel l_end = emit_label(as);
714
- const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000));
837
+ cTValue *k = &as->J->k64[LJ_K64_2P64];
715
838
  emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */
716
839
  emit_sjcc(as, CC_NS, l_end);
717
840
  emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */
@@ -738,23 +861,20 @@ static void asm_conv(ASMState *as, IRIns *ir)
738
861
  emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
739
862
  emit_rr(as, op, dest|REX_64, tmp);
740
863
  if (st == IRT_NUM)
741
- emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J,
742
- LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
864
+ emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]);
743
865
  else
744
- emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J,
745
- LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
866
+ emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]);
746
867
  emit_sjcc(as, CC_NS, l_end);
747
868
  emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
748
869
  emit_rr(as, op, dest|REX_64, tmp);
749
870
  ra_left(as, tmp, lref);
750
871
  } else {
751
- Reg left = asm_fuseload(as, lref, RSET_FPR);
752
872
  if (LJ_64 && irt_isu32(ir->t))
753
873
  emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
754
874
  emit_mrm(as, op,
755
875
  dest|((LJ_64 &&
756
876
  (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
757
- left);
877
+ asm_fuseload(as, lref, RSET_FPR));
758
878
  }
759
879
  }
760
880
  } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
@@ -828,8 +948,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
828
948
  if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
829
949
  /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
830
950
  MCLabel l_end = emit_label(as);
831
- emit_rma(as, XO_FADDq, XOg_FADDq,
832
- lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
951
+ emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]);
833
952
  emit_sjcc(as, CC_NS, l_end);
834
953
  emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
835
954
  } else {
@@ -869,8 +988,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
869
988
  emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
870
989
  else
871
990
  emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
872
- emit_rma(as, XO_FADDq, XOg_FADDq,
873
- lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
991
+ emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]);
874
992
  emit_sjcc(as, CC_NS, l_pop);
875
993
  emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
876
994
  }
@@ -934,6 +1052,25 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
934
1052
  emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
935
1053
  } else {
936
1054
  /* Otherwise use g->tmptv to hold the TValue. */
1055
+ #if LJ_GC64
1056
+ if (irref_isk(ref)) {
1057
+ TValue k;
1058
+ lj_ir_kvalue(as->J->L, &k, ir);
1059
+ emit_movmroi(as, dest, 4, k.u32.hi);
1060
+ emit_movmroi(as, dest, 0, k.u32.lo);
1061
+ } else {
1062
+ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1063
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
1064
+ if (irt_is64(ir->t)) {
1065
+ emit_u32(as, irt_toitype(ir->t) << 15);
1066
+ emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
1067
+ } else {
1068
+ /* Currently, no caller passes integers that might end up here. */
1069
+ emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
1070
+ }
1071
+ emit_movtomro(as, REX_64IR(ir, src), dest, 0);
1072
+ }
1073
+ #else
937
1074
  if (!irref_isk(ref)) {
938
1075
  Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
939
1076
  emit_movtomro(as, REX_64IR(ir, src), dest, 0);
@@ -942,6 +1079,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
942
1079
  }
943
1080
  if (!(LJ_64 && irt_islightud(ir->t)))
944
1081
  emit_movmroi(as, dest, 4, irt_toitype(ir->t));
1082
+ #endif
945
1083
  emit_loada(as, dest, &J2G(as->J)->tmptv);
946
1084
  }
947
1085
  }
@@ -951,9 +1089,9 @@ static void asm_aref(ASMState *as, IRIns *ir)
951
1089
  Reg dest = ra_dest(as, ir, RSET_GPR);
952
1090
  asm_fusearef(as, ir, RSET_GPR);
953
1091
  if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
954
- emit_mrm(as, XO_LEA, dest, RID_MRM);
1092
+ emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
955
1093
  else if (as->mrm.base != dest)
956
- emit_rr(as, XO_MOV, dest, as->mrm.base);
1094
+ emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base);
957
1095
  }
958
1096
 
959
1097
  /* Inlined hash lookup. Specialized for key type and for const keys.
@@ -980,7 +1118,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
980
1118
  if (!isk) {
981
1119
  rset_clear(allow, tab);
982
1120
  key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
983
- if (!irt_isstr(kt))
1121
+ if (LJ_GC64 || !irt_isstr(kt))
984
1122
  tmp = ra_scratch(as, rset_exclude(allow, key));
985
1123
  }
986
1124
 
@@ -993,8 +1131,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
993
1131
 
994
1132
  /* Follow hash chain until the end. */
995
1133
  l_loop = emit_sjcc_label(as, CC_NZ);
996
- emit_rr(as, XO_TEST, dest, dest);
997
- emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
1134
+ emit_rr(as, XO_TEST, dest|REX_GC64, dest);
1135
+ emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next));
998
1136
  l_next = emit_label(as);
999
1137
 
1000
1138
  /* Type and value comparison. */
@@ -1015,7 +1153,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1015
1153
  emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
1016
1154
  emit_sjcc(as, CC_AE, l_next);
1017
1155
  /* The type check avoids NaN penalties and complaints from Valgrind. */
1018
- #if LJ_64
1156
+ #if LJ_64 && !LJ_GC64
1019
1157
  emit_u32(as, LJ_TISNUM);
1020
1158
  emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1021
1159
  #else
@@ -1023,10 +1161,28 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1023
1161
  emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1024
1162
  #endif
1025
1163
  }
1026
- #if LJ_64
1164
+ #if LJ_64 && !LJ_GC64
1027
1165
  } else if (irt_islightud(kt)) {
1028
1166
  emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
1029
1167
  #endif
1168
+ #if LJ_GC64
1169
+ } else if (irt_isaddr(kt)) {
1170
+ if (isk) {
1171
+ TValue k;
1172
+ k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
1173
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
1174
+ k.u32.lo);
1175
+ emit_sjcc(as, CC_NE, l_next);
1176
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
1177
+ k.u32.hi);
1178
+ } else {
1179
+ emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
1180
+ }
1181
+ } else {
1182
+ lua_assert(irt_ispri(kt) && !irt_isnil(kt));
1183
+ emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
1184
+ emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1185
+ #else
1030
1186
  } else {
1031
1187
  if (!irt_ispri(kt)) {
1032
1188
  lua_assert(irt_isaddr(kt));
@@ -1040,16 +1196,23 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1040
1196
  lua_assert(!irt_isnil(kt));
1041
1197
  emit_i8(as, irt_toitype(kt));
1042
1198
  emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1199
+ #endif
1043
1200
  }
1044
1201
  emit_sfixup(as, l_loop);
1045
1202
  checkmclim(as);
1203
+ #if LJ_GC64
1204
+ if (!isk && irt_isaddr(kt)) {
1205
+ emit_rr(as, XO_OR, tmp|REX_64, key);
1206
+ emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47);
1207
+ }
1208
+ #endif
1046
1209
 
1047
1210
  /* Load main position relative to tab->node into dest. */
1048
1211
  khash = isk ? ir_khash(irkey) : 1;
1049
1212
  if (khash == 0) {
1050
- emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
1213
+ emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
1051
1214
  } else {
1052
- emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
1215
+ emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
1053
1216
  if ((as->flags & JIT_F_PREFER_IMUL)) {
1054
1217
  emit_i8(as, sizeof(Node));
1055
1218
  emit_rr(as, XO_IMULi8, dest, dest);
@@ -1084,7 +1247,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
1084
1247
  #endif
1085
1248
  } else {
1086
1249
  emit_rr(as, XO_MOV, tmp, key);
1250
+ #if LJ_GC64
1251
+ checkmclim(as);
1252
+ emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
1253
+ if ((as->flags & JIT_F_BMI2)) {
1254
+ emit_i8(as, 32);
1255
+ emit_mrm(as, XV_RORX|VEX_64, dest, key);
1256
+ } else {
1257
+ emit_shifti(as, XOg_SHR|REX_64, dest, 32);
1258
+ emit_rr(as, XO_MOV, dest|REX_64, key|REX_64);
1259
+ }
1260
+ #else
1087
1261
  emit_rmro(as, XO_LEA, dest, key, HASH_BIAS);
1262
+ #endif
1088
1263
  }
1089
1264
  }
1090
1265
  }
@@ -1104,11 +1279,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1104
1279
  if (ra_hasreg(dest)) {
1105
1280
  if (ofs != 0) {
1106
1281
  if (dest == node && !(as->flags & JIT_F_LEA_AGU))
1107
- emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
1282
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
1108
1283
  else
1109
- emit_rmro(as, XO_LEA, dest, node, ofs);
1284
+ emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
1110
1285
  } else if (dest != node) {
1111
- emit_rr(as, XO_MOV, dest, node);
1286
+ emit_rr(as, XO_MOV, dest|REX_GC64, node);
1112
1287
  }
1113
1288
  }
1114
1289
  asm_guardcc(as, CC_NE);
@@ -1120,13 +1295,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1120
1295
  lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
1121
1296
  /* Assumes -0.0 is already canonicalized to +0.0. */
1122
1297
  emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
1298
+ #if LJ_GC64
1299
+ ((uint64_t)irt_toitype(irkey->t) << 47) |
1300
+ (uint64_t)ir_kgc(irkey));
1301
+ #else
1123
1302
  ((uint64_t)irt_toitype(irkey->t) << 32) |
1124
1303
  (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
1304
+ #endif
1125
1305
  } else {
1126
1306
  lua_assert(!irt_isnil(irkey->t));
1307
+ #if LJ_GC64
1308
+ emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
1309
+ emit_rmro(as, XO_ARITHi, XOg_CMP, node,
1310
+ ofs + (int32_t)offsetof(Node, key.it));
1311
+ #else
1127
1312
  emit_i8(as, irt_toitype(irkey->t));
1128
1313
  emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1129
1314
  ofs + (int32_t)offsetof(Node, key.it));
1315
+ #endif
1130
1316
  }
1131
1317
  #else
1132
1318
  l_exit = emit_label(as);
@@ -1157,25 +1343,25 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1157
1343
 
1158
1344
  static void asm_uref(ASMState *as, IRIns *ir)
1159
1345
  {
1160
- /* NYI: Check that UREFO is still open and not aliasing a slot. */
1161
1346
  Reg dest = ra_dest(as, ir, RSET_GPR);
1162
1347
  if (irref_isk(ir->op1)) {
1163
1348
  GCfunc *fn = ir_kfunc(IR(ir->op1));
1164
1349
  MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
1165
- emit_rma(as, XO_MOV, dest, v);
1350
+ emit_rma(as, XO_MOV, dest|REX_GC64, v);
1166
1351
  } else {
1167
1352
  Reg uv = ra_scratch(as, RSET_GPR);
1168
1353
  Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
1169
1354
  if (ir->o == IR_UREFC) {
1170
- emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
1355
+ emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
1171
1356
  asm_guardcc(as, CC_NE);
1172
1357
  emit_i8(as, 1);
1173
1358
  emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
1174
1359
  } else {
1175
- emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
1360
+ emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
1176
1361
  }
1177
- emit_rmro(as, XO_MOV, uv, func,
1178
- (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
1362
+ emit_rmro(as, XO_MOV, uv|REX_GC64, func,
1363
+ (int32_t)offsetof(GCfuncL, uvptr) +
1364
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
1179
1365
  }
1180
1366
  }
1181
1367
 
@@ -1193,9 +1379,9 @@ static void asm_strref(ASMState *as, IRIns *ir)
1193
1379
  if (as->mrm.base == RID_NONE)
1194
1380
  emit_loadi(as, dest, as->mrm.ofs);
1195
1381
  else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
1196
- emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
1382
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs);
1197
1383
  else
1198
- emit_mrm(as, XO_LEA, dest, RID_MRM);
1384
+ emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
1199
1385
  }
1200
1386
 
1201
1387
  /* -- Loads and stores ---------------------------------------------------- */
@@ -1264,7 +1450,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1264
1450
  case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
1265
1451
  case IRT_NUM: xo = XO_MOVSDto; break;
1266
1452
  case IRT_FLOAT: xo = XO_MOVSSto; break;
1267
- #if LJ_64
1453
+ #if LJ_64 && !LJ_GC64
1268
1454
  case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
1269
1455
  #endif
1270
1456
  default:
@@ -1296,7 +1482,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1296
1482
  #define asm_fstore(as, ir) asm_fxstore(as, ir)
1297
1483
  #define asm_xstore(as, ir) asm_fxstore(as, ir)
1298
1484
 
1299
- #if LJ_64
1485
+ #if LJ_64 && !LJ_GC64
1300
1486
  static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1301
1487
  {
1302
1488
  if (ra_used(ir) || typecheck) {
@@ -1318,9 +1504,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1318
1504
 
1319
1505
  static void asm_ahuvload(ASMState *as, IRIns *ir)
1320
1506
  {
1507
+ #if LJ_GC64
1508
+ Reg tmp = RID_NONE;
1509
+ #endif
1321
1510
  lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1322
1511
  (LJ_DUALNUM && irt_isint(ir->t)));
1323
- #if LJ_64
1512
+ #if LJ_64 && !LJ_GC64
1324
1513
  if (irt_islightud(ir->t)) {
1325
1514
  Reg dest = asm_load_lightud64(as, ir, 1);
1326
1515
  if (ra_hasreg(dest)) {
@@ -1334,20 +1523,64 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1334
1523
  RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1335
1524
  Reg dest = ra_dest(as, ir, allow);
1336
1525
  asm_fuseahuref(as, ir->op1, RSET_GPR);
1526
+ #if LJ_GC64
1527
+ if (irt_isaddr(ir->t)) {
1528
+ emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1529
+ asm_guardcc(as, CC_NE);
1530
+ emit_i8(as, irt_toitype(ir->t));
1531
+ emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1532
+ emit_i8(as, XI_O16);
1533
+ if ((as->flags & JIT_F_BMI2)) {
1534
+ emit_i8(as, 47);
1535
+ emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM);
1536
+ } else {
1537
+ emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1538
+ emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
1539
+ }
1540
+ return;
1541
+ } else
1542
+ #endif
1337
1543
  emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1338
1544
  } else {
1339
- asm_fuseahuref(as, ir->op1, RSET_GPR);
1545
+ RegSet gpr = RSET_GPR;
1546
+ #if LJ_GC64
1547
+ if (irt_isaddr(ir->t)) {
1548
+ tmp = ra_scratch(as, RSET_GPR);
1549
+ gpr = rset_exclude(gpr, tmp);
1550
+ }
1551
+ #endif
1552
+ asm_fuseahuref(as, ir->op1, gpr);
1340
1553
  }
1341
1554
  /* Always do the type check, even if the load result is unused. */
1342
1555
  as->mrm.ofs += 4;
1343
1556
  asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
1344
1557
  if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
1345
1558
  lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
1559
+ #if LJ_GC64
1560
+ emit_u32(as, LJ_TISNUM << 15);
1561
+ #else
1346
1562
  emit_u32(as, LJ_TISNUM);
1563
+ #endif
1564
+ emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1565
+ #if LJ_GC64
1566
+ } else if (irt_isaddr(ir->t)) {
1567
+ as->mrm.ofs -= 4;
1568
+ emit_i8(as, irt_toitype(ir->t));
1569
+ emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp);
1570
+ emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1571
+ emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM);
1572
+ } else if (irt_isnil(ir->t)) {
1573
+ as->mrm.ofs -= 4;
1574
+ emit_i8(as, -1);
1575
+ emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM);
1576
+ } else {
1577
+ emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff);
1347
1578
  emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1579
+ #else
1348
1580
  } else {
1349
1581
  emit_i8(as, irt_toitype(ir->t));
1350
1582
  emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
1583
+ #endif
1351
1584
  }
1352
1585
  }
1353
1586
 
@@ -1359,11 +1592,27 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1359
1592
  Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1360
1593
  asm_fuseahuref(as, ir->op1, RSET_GPR);
1361
1594
  emit_mrm(as, XO_MOVSDto, src, RID_MRM);
1362
- #if LJ_64
1595
+ #if LJ_64 && !LJ_GC64
1363
1596
  } else if (irt_islightud(ir->t)) {
1364
1597
  Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1365
1598
  asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
1366
1599
  emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1600
+ #endif
1601
+ #if LJ_GC64
1602
+ } else if (irref_isk(ir->op2)) {
1603
+ TValue k;
1604
+ lj_ir_kvalue(as->J->L, &k, IR(ir->op2));
1605
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
1606
+ if (tvisnil(&k)) {
1607
+ emit_i32(as, -1);
1608
+ emit_mrm(as, XO_MOVmi, REX_64, RID_MRM);
1609
+ } else {
1610
+ emit_u32(as, k.u32.lo);
1611
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1612
+ as->mrm.ofs += 4;
1613
+ emit_u32(as, k.u32.hi);
1614
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1615
+ }
1367
1616
  #endif
1368
1617
  } else {
1369
1618
  IRIns *irr = IR(ir->op2);
@@ -1375,6 +1624,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1375
1624
  }
1376
1625
  asm_fuseahuref(as, ir->op1, allow);
1377
1626
  if (ra_hasreg(src)) {
1627
+ #if LJ_GC64
1628
+ if (!(LJ_DUALNUM && irt_isinteger(ir->t))) {
1629
+ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1630
+ as->mrm.ofs += 4;
1631
+ emit_u32(as, irt_toitype(ir->t) << 15);
1632
+ emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM);
1633
+ as->mrm.ofs -= 4;
1634
+ emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1635
+ return;
1636
+ }
1637
+ #endif
1378
1638
  emit_mrm(as, XO_MOVto, src, RID_MRM);
1379
1639
  } else if (!irt_ispri(irr->t)) {
1380
1640
  lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
@@ -1382,14 +1642,20 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1382
1642
  emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1383
1643
  }
1384
1644
  as->mrm.ofs += 4;
1645
+ #if LJ_GC64
1646
+ lua_assert(LJ_DUALNUM && irt_isinteger(ir->t));
1647
+ emit_i32(as, LJ_TNUMX << 15);
1648
+ #else
1385
1649
  emit_i32(as, (int32_t)irt_toitype(ir->t));
1650
+ #endif
1386
1651
  emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1387
1652
  }
1388
1653
  }
1389
1654
 
1390
1655
  static void asm_sload(ASMState *as, IRIns *ir)
1391
1656
  {
1392
- int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1657
+ int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
1658
+ (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1393
1659
  IRType1 t = ir->t;
1394
1660
  Reg base;
1395
1661
  lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
@@ -1402,7 +1668,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1402
1668
  base = ra_alloc1(as, REF_BASE, RSET_GPR);
1403
1669
  emit_rmro(as, XO_MOVSD, left, base, ofs);
1404
1670
  t.irt = IRT_NUM; /* Continue with a regular number type check. */
1405
- #if LJ_64
1671
+ #if LJ_64 && !LJ_GC64
1406
1672
  } else if (irt_islightud(t)) {
1407
1673
  Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
1408
1674
  if (ra_hasreg(dest)) {
@@ -1420,6 +1686,36 @@ static void asm_sload(ASMState *as, IRIns *ir)
1420
1686
  t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1421
1687
  emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1422
1688
  } else {
1689
+ #if LJ_GC64
1690
+ if (irt_isaddr(t)) {
1691
+ /* LJ_GC64 type check + tag removal without BMI2 and with BMI2:
1692
+ **
1693
+ ** mov r64, [addr] rorx r64, [addr], 47
1694
+ ** ror r64, 47
1695
+ ** cmp r16, itype cmp r16, itype
1696
+ ** jne ->exit jne ->exit
1697
+ ** shr r64, 16 shr r64, 16
1698
+ */
1699
+ emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1700
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1701
+ asm_guardcc(as, CC_NE);
1702
+ emit_i8(as, irt_toitype(t));
1703
+ emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1704
+ emit_i8(as, XI_O16);
1705
+ }
1706
+ if ((as->flags & JIT_F_BMI2)) {
1707
+ emit_i8(as, 47);
1708
+ emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs);
1709
+ } else {
1710
+ if ((ir->op2 & IRSLOAD_TYPECHECK))
1711
+ emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1712
+ else
1713
+ emit_shifti(as, XOg_SHL|REX_64, dest, 17);
1714
+ emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
1715
+ }
1716
+ return;
1717
+ } else
1718
+ #endif
1423
1719
  emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1424
1720
  }
1425
1721
  } else {
@@ -1432,11 +1728,42 @@ static void asm_sload(ASMState *as, IRIns *ir)
1432
1728
  asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
1433
1729
  if (LJ_64 && irt_type(t) >= IRT_NUM) {
1434
1730
  lua_assert(irt_isinteger(t) || irt_isnum(t));
1731
+ #if LJ_GC64
1732
+ emit_u32(as, LJ_TISNUM << 15);
1733
+ #else
1435
1734
  emit_u32(as, LJ_TISNUM);
1735
+ #endif
1736
+ emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1737
+ #if LJ_GC64
1738
+ } else if (irt_isnil(t)) {
1739
+ /* LJ_GC64 type check for nil:
1740
+ **
1741
+ ** cmp qword [addr], -1
1742
+ ** jne ->exit
1743
+ */
1744
+ emit_i8(as, -1);
1745
+ emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs);
1746
+ } else if (irt_ispri(t)) {
1747
+ emit_u32(as, (irt_toitype(t) << 15) | 0x7fff);
1436
1748
  emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1749
+ } else {
1750
+ /* LJ_GC64 type check only:
1751
+ **
1752
+ ** mov r64, [addr]
1753
+ ** sar r64, 47
1754
+ ** cmp r32, itype
1755
+ ** jne ->exit
1756
+ */
1757
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
1758
+ emit_i8(as, irt_toitype(t));
1759
+ emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
1760
+ emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1761
+ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4);
1762
+ #else
1437
1763
  } else {
1438
1764
  emit_i8(as, irt_toitype(t));
1439
1765
  emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
1766
+ #endif
1440
1767
  }
1441
1768
  }
1442
1769
  }
@@ -1464,8 +1791,9 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1464
1791
  Reg r64 = sz == 8 ? REX_64 : 0;
1465
1792
  if (irref_isk(ir->op2)) {
1466
1793
  IRIns *irk = IR(ir->op2);
1467
- uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 :
1468
- (uint64_t)(uint32_t)irk->i;
1794
+ uint64_t k = (irk->o == IR_KINT64 ||
1795
+ (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ?
1796
+ ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i;
1469
1797
  if (sz == 4 || checki32((int64_t)k)) {
1470
1798
  emit_i32(as, (int32_t)k);
1471
1799
  emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
@@ -1530,7 +1858,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1530
1858
  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1531
1859
  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1532
1860
  MCLabel l_end = emit_label(as);
1533
- emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
1861
+ emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist));
1534
1862
  emit_setgl(as, tab, gc.grayagain);
1535
1863
  emit_getgl(as, tmp, gc.grayagain);
1536
1864
  emit_i8(as, ~LJ_GC_BLACK);
@@ -1956,7 +2284,7 @@ static void asm_bswap(ASMState *as, IRIns *ir)
1956
2284
  #define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
1957
2285
  #define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
1958
2286
 
1959
- static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2287
+ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
1960
2288
  {
1961
2289
  IRRef rref = ir->op2;
1962
2290
  IRIns *irr = IR(rref);
@@ -1965,11 +2293,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1965
2293
  int shift;
1966
2294
  dest = ra_dest(as, ir, RSET_GPR);
1967
2295
  shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
2296
+ if (!xv && shift && (as->flags & JIT_F_BMI2)) {
2297
+ Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
2298
+ if (left != dest) { /* BMI2 rotate right by constant. */
2299
+ emit_i8(as, xs == XOg_ROL ? -shift : shift);
2300
+ emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
2301
+ return;
2302
+ }
2303
+ }
1968
2304
  switch (shift) {
1969
2305
  case 0: break;
1970
2306
  case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
1971
2307
  default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
1972
2308
  }
2309
+ } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
2310
+ Reg left, right;
2311
+ dest = ra_dest(as, ir, RSET_GPR);
2312
+ right = ra_alloc1(as, rref, RSET_GPR);
2313
+ left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
2314
+ irt_is64(ir->t));
2315
+ emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
2316
+ return;
1973
2317
  } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
1974
2318
  Reg right;
1975
2319
  dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
@@ -1995,11 +2339,11 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1995
2339
  */
1996
2340
  }
1997
2341
 
1998
- #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
1999
- #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
2000
- #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
2001
- #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
2002
- #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
2342
+ #define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
2343
+ #define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
2344
+ #define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
2345
+ #define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
2346
+ #define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
2003
2347
 
2004
2348
  /* -- Comparisons --------------------------------------------------------- */
2005
2349
 
@@ -2050,7 +2394,6 @@ static void asm_comp(ASMState *as, IRIns *ir)
2050
2394
  cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
2051
2395
  }
2052
2396
  left = ra_alloc1(as, lref, RSET_FPR);
2053
- right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2054
2397
  l_around = emit_label(as);
2055
2398
  asm_guardcc(as, cc >> 4);
2056
2399
  if (cc & VCC_P) { /* Extra CC_P branch required? */
@@ -2067,6 +2410,7 @@ static void asm_comp(ASMState *as, IRIns *ir)
2067
2410
  emit_jcc(as, CC_P, as->mcp);
2068
2411
  }
2069
2412
  }
2413
+ right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2070
2414
  emit_mrm(as, XO_UCOMISD, left, right);
2071
2415
  } else {
2072
2416
  IRRef lref = ir->op1, rref = ir->op2;
@@ -2343,13 +2687,18 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2343
2687
  emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
2344
2688
  else
2345
2689
  ra_modified(as, r);
2346
- emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
2690
+ emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot));
2347
2691
  if (ra_hasreg(pbase) && pbase != r)
2348
- emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
2692
+ emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase);
2349
2693
  else
2694
+ #if LJ_GC64
2695
+ emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH,
2696
+ (int32_t)dispofs(as, &J2G(as->J)->jit_base));
2697
+ #else
2350
2698
  emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2351
2699
  ptr2addr(&J2G(as->J)->jit_base));
2352
- emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2700
+ #endif
2701
+ emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack));
2353
2702
  emit_getgl(as, r, cur_L);
2354
2703
  if (allow == RSET_EMPTY) /* Spill temp. register. */
2355
2704
  emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
@@ -2359,13 +2708,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2359
2708
  static void asm_stack_restore(ASMState *as, SnapShot *snap)
2360
2709
  {
2361
2710
  SnapEntry *map = &as->T->snapmap[snap->mapofs];
2362
- SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
2711
+ #if !LJ_FR2 || defined(LUA_USE_ASSERT)
2712
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2713
+ #endif
2363
2714
  MSize n, nent = snap->nent;
2364
2715
  /* Store the value of all modified slots to the Lua stack. */
2365
2716
  for (n = 0; n < nent; n++) {
2366
2717
  SnapEntry sn = map[n];
2367
2718
  BCReg s = snap_slot(sn);
2368
- int32_t ofs = 8*((int32_t)s-1);
2719
+ int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
2369
2720
  IRRef ref = snap_ref(sn);
2370
2721
  IRIns *ir = IR(ref);
2371
2722
  if ((sn & SNAP_NORESTORE))
@@ -2378,16 +2729,44 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2378
2729
  (LJ_DUALNUM && irt_isinteger(ir->t)));
2379
2730
  if (!irref_isk(ref)) {
2380
2731
  Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2732
+ #if LJ_GC64
2733
+ if (irt_is64(ir->t)) {
2734
+ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
2735
+ emit_u32(as, irt_toitype(ir->t) << 15);
2736
+ emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4);
2737
+ } else if (LJ_DUALNUM && irt_isinteger(ir->t)) {
2738
+ emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15);
2739
+ } else {
2740
+ emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff);
2741
+ }
2742
+ #endif
2381
2743
  emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
2744
+ #if LJ_GC64
2745
+ } else {
2746
+ TValue k;
2747
+ lj_ir_kvalue(as->J->L, &k, ir);
2748
+ if (tvisnil(&k)) {
2749
+ emit_i32(as, -1);
2750
+ emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs);
2751
+ } else {
2752
+ emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi);
2753
+ emit_movmroi(as, RID_BASE, ofs, k.u32.lo);
2754
+ }
2755
+ #else
2382
2756
  } else if (!irt_ispri(ir->t)) {
2383
2757
  emit_movmroi(as, RID_BASE, ofs, ir->i);
2758
+ #endif
2384
2759
  }
2385
2760
  if ((sn & (SNAP_CONT|SNAP_FRAME))) {
2761
+ #if !LJ_FR2
2386
2762
  if (s != 0) /* Do not overwrite link to previous frame. */
2387
2763
  emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2764
+ #endif
2765
+ #if !LJ_GC64
2388
2766
  } else {
2389
2767
  if (!(LJ_64 && irt_islightud(ir->t)))
2390
2768
  emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2769
+ #endif
2391
2770
  }
2392
2771
  }
2393
2772
  checkmclim(as);
@@ -2413,11 +2792,15 @@ static void asm_gc_check(ASMState *as)
2413
2792
  args[1] = ASMREF_TMP2; /* MSize steps */
2414
2793
  asm_gencall(as, ci, args);
2415
2794
  tmp = ra_releasetmp(as, ASMREF_TMP1);
2795
+ #if LJ_GC64
2796
+ emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G);
2797
+ #else
2416
2798
  emit_loada(as, tmp, J2G(as->J));
2799
+ #endif
2417
2800
  emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
2418
2801
  /* Jump around GC step if GC total < GC threshold. */
2419
2802
  emit_sjcc(as, CC_B, l_end);
2420
- emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
2803
+ emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold);
2421
2804
  emit_getgl(as, tmp, gc.total);
2422
2805
  as->gcsteps = 0;
2423
2806
  checkmclim(as);
@@ -2482,7 +2865,7 @@ static void asm_head_root_base(ASMState *as)
2482
2865
  if (rset_test(as->modset, r) || irt_ismarked(ir->t))
2483
2866
  ir->r = RID_INIT; /* No inheritance for modified BASE register. */
2484
2867
  if (r != RID_BASE)
2485
- emit_rr(as, XO_MOV, r, RID_BASE);
2868
+ emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE);
2486
2869
  }
2487
2870
  }
2488
2871
 
@@ -2498,8 +2881,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
2498
2881
  if (irp->r == r) {
2499
2882
  rset_clear(allow, r); /* Mark same BASE register as coalesced. */
2500
2883
  } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
2884
+ /* Move from coalesced parent reg. */
2501
2885
  rset_clear(allow, irp->r);
2502
- emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */
2886
+ emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
2503
2887
  } else {
2504
2888
  emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
2505
2889
  }
@@ -2600,10 +2984,111 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2600
2984
  static void asm_setup_target(ASMState *as)
2601
2985
  {
2602
2986
  asm_exitstub_setup(as, as->T->nsnap);
2987
+ as->mrm.base = 0;
2603
2988
  }
2604
2989
 
2605
2990
  /* -- Trace patching ------------------------------------------------------ */
2606
2991
 
2992
+ static const uint8_t map_op1[256] = {
2993
+ 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20,
2994
+ 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,
2995
+ 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
2996
+ 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
2997
+ #if LJ_64
2998
+ 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14,
2999
+ #else
3000
+ 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
3001
+ #endif
3002
+ 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
3003
+ 0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51,
3004
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
3005
+ 0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
3006
+ 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51,
3007
+ #if LJ_64
3008
+ 0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
3009
+ #else
3010
+ 0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
3011
+ #endif
3012
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,
3013
+ 0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51,
3014
+ 0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
3015
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51,
3016
+ 0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92
3017
+ };
3018
+
3019
+ static const uint8_t map_op2[256] = {
3020
+ 0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94,
3021
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3022
+ 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3023
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51,
3024
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3025
+ 0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3026
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3027
+ 0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3028
+ 0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,
3029
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3030
+ 0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93,
3031
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93,
3032
+ 0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
3033
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3034
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
3035
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52
3036
+ };
3037
+
3038
+ static uint32_t asm_x86_inslen(const uint8_t* p)
3039
+ {
3040
+ uint32_t result = 0;
3041
+ uint32_t prefixes = 0;
3042
+ uint32_t x = map_op1[*p];
3043
+ for (;;) {
3044
+ switch (x >> 4) {
3045
+ case 0: return result + x + (prefixes & 4);
3046
+ case 1: prefixes |= x; x = map_op1[*++p]; result++; break;
3047
+ case 2: x = map_op2[*++p]; break;
3048
+ case 3: p++; goto mrm;
3049
+ case 4: result -= (prefixes & 2); /* fallthrough */
3050
+ case 5: return result + (x & 15);
3051
+ case 6: /* Group 3. */
3052
+ if (p[1] & 0x38) x = 2;
3053
+ else if ((prefixes & 2) && (x == 0x66)) x = 4;
3054
+ goto mrm;
3055
+ case 7: /* VEX c4/c5. */
3056
+ if (LJ_32 && p[1] < 0xc0) {
3057
+ x = 2;
3058
+ goto mrm;
3059
+ }
3060
+ if (x == 0x70) {
3061
+ x = *++p & 0x1f;
3062
+ result++;
3063
+ if (x >= 2) {
3064
+ p += 2;
3065
+ result += 2;
3066
+ goto mrm;
3067
+ }
3068
+ }
3069
+ p++;
3070
+ result++;
3071
+ x = map_op2[*++p];
3072
+ break;
3073
+ case 8: result -= (prefixes & 2); /* fallthrough */
3074
+ case 9: mrm: /* ModR/M and possibly SIB. */
3075
+ result += (x & 15);
3076
+ x = *++p;
3077
+ switch (x >> 6) {
3078
+ case 0: if ((x & 7) == 5) return result + 4; break;
3079
+ case 1: result++; break;
3080
+ case 2: result += 4; break;
3081
+ case 3: return result;
3082
+ }
3083
+ if ((x & 7) == 4) {
3084
+ result++;
3085
+ if (x < 0x40 && (p[1] & 7) == 5) result += 4;
3086
+ }
3087
+ return result;
3088
+ }
3089
+ }
3090
+ }
3091
+
2607
3092
  /* Patch exit jumps of existing machine code to a new target. */
2608
3093
  void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2609
3094
  {
@@ -2612,22 +3097,23 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2612
3097
  MSize len = T->szmcode;
2613
3098
  MCode *px = exitstub_addr(J, exitno) - 6;
2614
3099
  MCode *pe = p+len-6;
2615
- uint32_t stateaddr = u32ptr(&J2G(J)->vmstate);
3100
+ #if LJ_GC64
3101
+ uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
3102
+ #else
3103
+ uint32_t statei = u32ptr(&J2G(J)->vmstate);
3104
+ #endif
2616
3105
  if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
2617
3106
  *(int32_t *)(p+len-4) = jmprel(p+len, target);
2618
3107
  /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
2619
- for (; p < pe; p++)
2620
- if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) {
2621
- p += LJ_64 ? 11 : 10;
3108
+ for (; p < pe; p += asm_x86_inslen(p)) {
3109
+ intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64;
3110
+ if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi)
2622
3111
  break;
2623
- }
3112
+ }
2624
3113
  lua_assert(p < pe);
2625
- for (; p < pe; p++) {
2626
- if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) {
3114
+ for (; p < pe; p += asm_x86_inslen(p))
3115
+ if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px)
2627
3116
  *(int32_t *)(p+2) = jmprel(p+6, target);
2628
- p += 5;
2629
- }
2630
- }
2631
3117
  lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
2632
3118
  lj_mcode_patch(J, mcarea, 1);
2633
3119
  }