immunio 0.15.4 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (454) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +0 -27
  3. data/ext/immunio/Rakefile +9 -0
  4. data/lib/immunio/plugins/active_record.rb +1 -1
  5. data/lib/immunio/plugins/active_record_relation.rb +1 -1
  6. data/lib/immunio/plugins/environment_reporter.rb +20 -0
  7. data/lib/immunio/rufus_lua_ext/ref.rb +1 -3
  8. data/lib/immunio/version.rb +1 -1
  9. data/lib/immunio/vm.rb +1 -2
  10. data/lua-hooks/Makefile +97 -0
  11. data/lua-hooks/ext/all.c +41 -52
  12. data/lua-hooks/ext/all.o +0 -0
  13. data/lua-hooks/ext/libinjection/libinjection_html5.o +0 -0
  14. data/lua-hooks/ext/libinjection/libinjection_sqli.o +0 -0
  15. data/lua-hooks/ext/libinjection/libinjection_xss.o +0 -0
  16. data/lua-hooks/ext/libinjection/lualib.c +2 -2
  17. data/lua-hooks/ext/lpeg/lpcap.c +2 -2
  18. data/lua-hooks/ext/lpeg/lpcap.o +0 -0
  19. data/lua-hooks/ext/lpeg/lpcode.c +2 -2
  20. data/lua-hooks/ext/lpeg/lpcode.h +1 -1
  21. data/lua-hooks/ext/lpeg/lpcode.o +0 -0
  22. data/lua-hooks/ext/lpeg/lpprint.o +0 -0
  23. data/lua-hooks/ext/lpeg/lptree.c +2 -2
  24. data/lua-hooks/ext/lpeg/lptypes.h +1 -1
  25. data/lua-hooks/ext/lpeg/lpvm.c +2 -2
  26. data/lua-hooks/ext/lpeg/lpvm.o +0 -0
  27. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +16 -3
  28. data/lua-hooks/ext/lua-snapshot/snapshot.c +14 -7
  29. data/lua-hooks/ext/luajit/COPYRIGHT +56 -0
  30. data/lua-hooks/ext/luajit/Makefile +159 -0
  31. data/lua-hooks/ext/luajit/README +16 -0
  32. data/lua-hooks/ext/luajit/doc/bluequad-print.css +166 -0
  33. data/lua-hooks/ext/luajit/doc/bluequad.css +325 -0
  34. data/lua-hooks/ext/luajit/doc/changes.html +804 -0
  35. data/lua-hooks/ext/luajit/doc/contact.html +104 -0
  36. data/lua-hooks/ext/luajit/doc/ext_c_api.html +189 -0
  37. data/lua-hooks/ext/luajit/doc/ext_ffi.html +332 -0
  38. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +570 -0
  39. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +1261 -0
  40. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +603 -0
  41. data/lua-hooks/ext/luajit/doc/ext_jit.html +201 -0
  42. data/lua-hooks/ext/luajit/doc/ext_profiler.html +365 -0
  43. data/lua-hooks/ext/luajit/doc/extensions.html +448 -0
  44. data/lua-hooks/ext/luajit/doc/faq.html +186 -0
  45. data/lua-hooks/ext/luajit/doc/img/contact.png +0 -0
  46. data/lua-hooks/ext/luajit/doc/install.html +659 -0
  47. data/lua-hooks/ext/luajit/doc/luajit.html +236 -0
  48. data/lua-hooks/ext/luajit/doc/running.html +309 -0
  49. data/lua-hooks/ext/luajit/doc/status.html +118 -0
  50. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +456 -0
  51. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1125 -0
  52. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +518 -0
  53. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1166 -0
  54. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +416 -0
  55. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +953 -0
  56. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +419 -0
  57. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1919 -0
  58. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +83 -0
  59. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +12 -0
  60. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +471 -0
  61. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +1945 -0
  62. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +1094 -0
  63. data/lua-hooks/ext/luajit/etc/luajit.1 +88 -0
  64. data/lua-hooks/ext/luajit/etc/luajit.pc +25 -0
  65. data/lua-hooks/ext/luajit/src/Makefile +697 -0
  66. data/lua-hooks/ext/luajit/src/Makefile.dep +244 -0
  67. data/lua-hooks/ext/luajit/src/host/README +4 -0
  68. data/lua-hooks/ext/luajit/src/host/buildvm +0 -0
  69. data/lua-hooks/ext/luajit/src/host/buildvm.c +518 -0
  70. data/lua-hooks/ext/luajit/src/host/buildvm.h +105 -0
  71. data/lua-hooks/ext/luajit/src/host/buildvm.o +0 -0
  72. data/lua-hooks/ext/luajit/src/host/buildvm_arch.h +7449 -0
  73. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +345 -0
  74. data/lua-hooks/ext/luajit/src/host/buildvm_asm.o +0 -0
  75. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +229 -0
  76. data/lua-hooks/ext/luajit/src/host/buildvm_fold.o +0 -0
  77. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +457 -0
  78. data/lua-hooks/ext/luajit/src/host/buildvm_lib.o +0 -0
  79. data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +45 -0
  80. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +368 -0
  81. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.o +0 -0
  82. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +197 -0
  83. data/lua-hooks/ext/luajit/src/host/genminilua.lua +428 -0
  84. data/lua-hooks/ext/luajit/src/host/minilua +0 -0
  85. data/lua-hooks/ext/luajit/src/host/minilua.c +7770 -0
  86. data/lua-hooks/ext/luajit/src/host/minilua.o +0 -0
  87. data/lua-hooks/ext/luajit/src/jit/bc.lua +190 -0
  88. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +661 -0
  89. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +689 -0
  90. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +428 -0
  91. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +17 -0
  92. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +591 -0
  93. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +17 -0
  94. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +838 -0
  95. data/lua-hooks/ext/luajit/src/jit/dump.lua +706 -0
  96. data/lua-hooks/ext/luajit/src/jit/p.lua +310 -0
  97. data/lua-hooks/ext/luajit/src/jit/v.lua +170 -0
  98. data/lua-hooks/ext/luajit/src/jit/vmdef.lua +362 -0
  99. data/lua-hooks/ext/luajit/src/jit/zone.lua +45 -0
  100. data/lua-hooks/ext/{lua → luajit/src}/lauxlib.h +10 -17
  101. data/lua-hooks/ext/luajit/src/lib_aux.c +356 -0
  102. data/lua-hooks/ext/luajit/src/lib_aux.o +0 -0
  103. data/lua-hooks/ext/luajit/src/lib_aux_dyn.o +0 -0
  104. data/lua-hooks/ext/luajit/src/lib_base.c +664 -0
  105. data/lua-hooks/ext/luajit/src/lib_base.o +0 -0
  106. data/lua-hooks/ext/luajit/src/lib_base_dyn.o +0 -0
  107. data/lua-hooks/ext/luajit/src/lib_bit.c +180 -0
  108. data/lua-hooks/ext/luajit/src/lib_bit.o +0 -0
  109. data/lua-hooks/ext/luajit/src/lib_bit_dyn.o +0 -0
  110. data/lua-hooks/ext/luajit/src/lib_debug.c +405 -0
  111. data/lua-hooks/ext/luajit/src/lib_debug.o +0 -0
  112. data/lua-hooks/ext/luajit/src/lib_debug_dyn.o +0 -0
  113. data/lua-hooks/ext/luajit/src/lib_ffi.c +872 -0
  114. data/lua-hooks/ext/luajit/src/lib_ffi.o +0 -0
  115. data/lua-hooks/ext/luajit/src/lib_ffi_dyn.o +0 -0
  116. data/lua-hooks/ext/luajit/src/lib_init.c +55 -0
  117. data/lua-hooks/ext/luajit/src/lib_init.o +0 -0
  118. data/lua-hooks/ext/luajit/src/lib_init_dyn.o +0 -0
  119. data/lua-hooks/ext/luajit/src/lib_io.c +541 -0
  120. data/lua-hooks/ext/luajit/src/lib_io.o +0 -0
  121. data/lua-hooks/ext/luajit/src/lib_io_dyn.o +0 -0
  122. data/lua-hooks/ext/luajit/src/lib_jit.c +767 -0
  123. data/lua-hooks/ext/luajit/src/lib_jit.o +0 -0
  124. data/lua-hooks/ext/luajit/src/lib_jit_dyn.o +0 -0
  125. data/lua-hooks/ext/luajit/src/lib_math.c +230 -0
  126. data/lua-hooks/ext/luajit/src/lib_math.o +0 -0
  127. data/lua-hooks/ext/luajit/src/lib_math_dyn.o +0 -0
  128. data/lua-hooks/ext/luajit/src/lib_os.c +292 -0
  129. data/lua-hooks/ext/luajit/src/lib_os.o +0 -0
  130. data/lua-hooks/ext/luajit/src/lib_os_dyn.o +0 -0
  131. data/lua-hooks/ext/luajit/src/lib_package.c +610 -0
  132. data/lua-hooks/ext/luajit/src/lib_package.o +0 -0
  133. data/lua-hooks/ext/luajit/src/lib_package_dyn.o +0 -0
  134. data/lua-hooks/ext/luajit/src/lib_string.c +752 -0
  135. data/lua-hooks/ext/luajit/src/lib_string.o +0 -0
  136. data/lua-hooks/ext/luajit/src/lib_string_dyn.o +0 -0
  137. data/lua-hooks/ext/luajit/src/lib_table.c +307 -0
  138. data/lua-hooks/ext/luajit/src/lib_table.o +0 -0
  139. data/lua-hooks/ext/luajit/src/lib_table_dyn.o +0 -0
  140. data/lua-hooks/ext/luajit/src/libluajit.a +0 -0
  141. data/lua-hooks/ext/luajit/src/libluajit.so +0 -0
  142. data/lua-hooks/ext/luajit/src/lj.supp +26 -0
  143. data/lua-hooks/ext/luajit/src/lj_alloc.c +1398 -0
  144. data/lua-hooks/ext/luajit/src/lj_alloc.h +17 -0
  145. data/lua-hooks/ext/luajit/src/lj_alloc.o +0 -0
  146. data/lua-hooks/ext/luajit/src/lj_alloc_dyn.o +0 -0
  147. data/lua-hooks/ext/luajit/src/lj_api.c +1210 -0
  148. data/lua-hooks/ext/luajit/src/lj_api.o +0 -0
  149. data/lua-hooks/ext/luajit/src/lj_api_dyn.o +0 -0
  150. data/lua-hooks/ext/luajit/src/lj_arch.h +509 -0
  151. data/lua-hooks/ext/luajit/src/lj_asm.c +2278 -0
  152. data/lua-hooks/ext/luajit/src/lj_asm.h +17 -0
  153. data/lua-hooks/ext/luajit/src/lj_asm.o +0 -0
  154. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +2217 -0
  155. data/lua-hooks/ext/luajit/src/lj_asm_dyn.o +0 -0
  156. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +1833 -0
  157. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2015 -0
  158. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2634 -0
  159. data/lua-hooks/ext/luajit/src/lj_bc.c +14 -0
  160. data/lua-hooks/ext/luajit/src/lj_bc.h +265 -0
  161. data/lua-hooks/ext/luajit/src/lj_bc.o +0 -0
  162. data/lua-hooks/ext/luajit/src/lj_bc_dyn.o +0 -0
  163. data/lua-hooks/ext/luajit/src/lj_bcdef.h +220 -0
  164. data/lua-hooks/ext/luajit/src/lj_bcdump.h +68 -0
  165. data/lua-hooks/ext/luajit/src/lj_bcread.c +457 -0
  166. data/lua-hooks/ext/luajit/src/lj_bcread.o +0 -0
  167. data/lua-hooks/ext/luajit/src/lj_bcread_dyn.o +0 -0
  168. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +361 -0
  169. data/lua-hooks/ext/luajit/src/lj_bcwrite.o +0 -0
  170. data/lua-hooks/ext/luajit/src/lj_bcwrite_dyn.o +0 -0
  171. data/lua-hooks/ext/luajit/src/lj_buf.c +234 -0
  172. data/lua-hooks/ext/luajit/src/lj_buf.h +105 -0
  173. data/lua-hooks/ext/luajit/src/lj_buf.o +0 -0
  174. data/lua-hooks/ext/luajit/src/lj_buf_dyn.o +0 -0
  175. data/lua-hooks/ext/luajit/src/lj_carith.c +429 -0
  176. data/lua-hooks/ext/luajit/src/lj_carith.h +37 -0
  177. data/lua-hooks/ext/luajit/src/lj_carith.o +0 -0
  178. data/lua-hooks/ext/luajit/src/lj_carith_dyn.o +0 -0
  179. data/lua-hooks/ext/luajit/src/lj_ccall.c +984 -0
  180. data/lua-hooks/ext/luajit/src/lj_ccall.h +178 -0
  181. data/lua-hooks/ext/luajit/src/lj_ccall.o +0 -0
  182. data/lua-hooks/ext/luajit/src/lj_ccall_dyn.o +0 -0
  183. data/lua-hooks/ext/luajit/src/lj_ccallback.c +712 -0
  184. data/lua-hooks/ext/luajit/src/lj_ccallback.h +25 -0
  185. data/lua-hooks/ext/luajit/src/lj_ccallback.o +0 -0
  186. data/lua-hooks/ext/luajit/src/lj_ccallback_dyn.o +0 -0
  187. data/lua-hooks/ext/luajit/src/lj_cconv.c +752 -0
  188. data/lua-hooks/ext/luajit/src/lj_cconv.h +70 -0
  189. data/lua-hooks/ext/luajit/src/lj_cconv.o +0 -0
  190. data/lua-hooks/ext/luajit/src/lj_cconv_dyn.o +0 -0
  191. data/lua-hooks/ext/luajit/src/lj_cdata.c +288 -0
  192. data/lua-hooks/ext/luajit/src/lj_cdata.h +76 -0
  193. data/lua-hooks/ext/luajit/src/lj_cdata.o +0 -0
  194. data/lua-hooks/ext/luajit/src/lj_cdata_dyn.o +0 -0
  195. data/lua-hooks/ext/luajit/src/lj_char.c +43 -0
  196. data/lua-hooks/ext/luajit/src/lj_char.h +42 -0
  197. data/lua-hooks/ext/luajit/src/lj_char.o +0 -0
  198. data/lua-hooks/ext/luajit/src/lj_char_dyn.o +0 -0
  199. data/lua-hooks/ext/luajit/src/lj_clib.c +418 -0
  200. data/lua-hooks/ext/luajit/src/lj_clib.h +29 -0
  201. data/lua-hooks/ext/luajit/src/lj_clib.o +0 -0
  202. data/lua-hooks/ext/luajit/src/lj_clib_dyn.o +0 -0
  203. data/lua-hooks/ext/luajit/src/lj_cparse.c +1862 -0
  204. data/lua-hooks/ext/luajit/src/lj_cparse.h +65 -0
  205. data/lua-hooks/ext/luajit/src/lj_cparse.o +0 -0
  206. data/lua-hooks/ext/luajit/src/lj_cparse_dyn.o +0 -0
  207. data/lua-hooks/ext/luajit/src/lj_crecord.c +1834 -0
  208. data/lua-hooks/ext/luajit/src/lj_crecord.h +38 -0
  209. data/lua-hooks/ext/luajit/src/lj_crecord.o +0 -0
  210. data/lua-hooks/ext/luajit/src/lj_crecord_dyn.o +0 -0
  211. data/lua-hooks/ext/luajit/src/lj_ctype.c +635 -0
  212. data/lua-hooks/ext/luajit/src/lj_ctype.h +461 -0
  213. data/lua-hooks/ext/luajit/src/lj_ctype.o +0 -0
  214. data/lua-hooks/ext/luajit/src/lj_ctype_dyn.o +0 -0
  215. data/lua-hooks/ext/luajit/src/lj_debug.c +699 -0
  216. data/lua-hooks/ext/luajit/src/lj_debug.h +65 -0
  217. data/lua-hooks/ext/luajit/src/lj_debug.o +0 -0
  218. data/lua-hooks/ext/luajit/src/lj_debug_dyn.o +0 -0
  219. data/lua-hooks/ext/luajit/src/lj_def.h +365 -0
  220. data/lua-hooks/ext/luajit/src/lj_dispatch.c +557 -0
  221. data/lua-hooks/ext/luajit/src/lj_dispatch.h +138 -0
  222. data/lua-hooks/ext/luajit/src/lj_dispatch.o +0 -0
  223. data/lua-hooks/ext/luajit/src/lj_dispatch_dyn.o +0 -0
  224. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +356 -0
  225. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +211 -0
  226. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +238 -0
  227. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +462 -0
  228. data/lua-hooks/ext/luajit/src/lj_err.c +794 -0
  229. data/lua-hooks/ext/luajit/src/lj_err.h +41 -0
  230. data/lua-hooks/ext/luajit/src/lj_err.o +0 -0
  231. data/lua-hooks/ext/luajit/src/lj_err_dyn.o +0 -0
  232. data/lua-hooks/ext/luajit/src/lj_errmsg.h +190 -0
  233. data/lua-hooks/ext/luajit/src/lj_ff.h +18 -0
  234. data/lua-hooks/ext/luajit/src/lj_ffdef.h +209 -0
  235. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +1247 -0
  236. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +24 -0
  237. data/lua-hooks/ext/luajit/src/lj_ffrecord.o +0 -0
  238. data/lua-hooks/ext/luajit/src/lj_ffrecord_dyn.o +0 -0
  239. data/lua-hooks/ext/luajit/src/lj_folddef.h +1138 -0
  240. data/lua-hooks/ext/luajit/src/lj_frame.h +259 -0
  241. data/lua-hooks/ext/luajit/src/lj_func.c +185 -0
  242. data/lua-hooks/ext/luajit/src/lj_func.h +24 -0
  243. data/lua-hooks/ext/luajit/src/lj_func.o +0 -0
  244. data/lua-hooks/ext/luajit/src/lj_func_dyn.o +0 -0
  245. data/lua-hooks/ext/luajit/src/lj_gc.c +845 -0
  246. data/lua-hooks/ext/luajit/src/lj_gc.h +134 -0
  247. data/lua-hooks/ext/luajit/src/lj_gc.o +0 -0
  248. data/lua-hooks/ext/luajit/src/lj_gc_dyn.o +0 -0
  249. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +787 -0
  250. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +22 -0
  251. data/lua-hooks/ext/luajit/src/lj_gdbjit.o +0 -0
  252. data/lua-hooks/ext/luajit/src/lj_gdbjit_dyn.o +0 -0
  253. data/lua-hooks/ext/luajit/src/lj_ir.c +505 -0
  254. data/lua-hooks/ext/luajit/src/lj_ir.h +577 -0
  255. data/lua-hooks/ext/luajit/src/lj_ir.o +0 -0
  256. data/lua-hooks/ext/luajit/src/lj_ir_dyn.o +0 -0
  257. data/lua-hooks/ext/luajit/src/lj_ircall.h +321 -0
  258. data/lua-hooks/ext/luajit/src/lj_iropt.h +161 -0
  259. data/lua-hooks/ext/luajit/src/lj_jit.h +440 -0
  260. data/lua-hooks/ext/luajit/src/lj_lex.c +482 -0
  261. data/lua-hooks/ext/luajit/src/lj_lex.h +86 -0
  262. data/lua-hooks/ext/luajit/src/lj_lex.o +0 -0
  263. data/lua-hooks/ext/luajit/src/lj_lex_dyn.o +0 -0
  264. data/lua-hooks/ext/luajit/src/lj_lib.c +303 -0
  265. data/lua-hooks/ext/luajit/src/lj_lib.h +115 -0
  266. data/lua-hooks/ext/luajit/src/lj_lib.o +0 -0
  267. data/lua-hooks/ext/luajit/src/lj_lib_dyn.o +0 -0
  268. data/lua-hooks/ext/luajit/src/lj_libdef.h +414 -0
  269. data/lua-hooks/ext/luajit/src/lj_load.c +168 -0
  270. data/lua-hooks/ext/luajit/src/lj_load.o +0 -0
  271. data/lua-hooks/ext/luajit/src/lj_load_dyn.o +0 -0
  272. data/lua-hooks/ext/luajit/src/lj_mcode.c +386 -0
  273. data/lua-hooks/ext/luajit/src/lj_mcode.h +30 -0
  274. data/lua-hooks/ext/luajit/src/lj_mcode.o +0 -0
  275. data/lua-hooks/ext/luajit/src/lj_mcode_dyn.o +0 -0
  276. data/lua-hooks/ext/luajit/src/lj_meta.c +477 -0
  277. data/lua-hooks/ext/luajit/src/lj_meta.h +38 -0
  278. data/lua-hooks/ext/luajit/src/lj_meta.o +0 -0
  279. data/lua-hooks/ext/luajit/src/lj_meta_dyn.o +0 -0
  280. data/lua-hooks/ext/luajit/src/lj_obj.c +50 -0
  281. data/lua-hooks/ext/luajit/src/lj_obj.h +976 -0
  282. data/lua-hooks/ext/luajit/src/lj_obj.o +0 -0
  283. data/lua-hooks/ext/luajit/src/lj_obj_dyn.o +0 -0
  284. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +78 -0
  285. data/lua-hooks/ext/luajit/src/lj_opt_dce.o +0 -0
  286. data/lua-hooks/ext/luajit/src/lj_opt_dce_dyn.o +0 -0
  287. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +2488 -0
  288. data/lua-hooks/ext/luajit/src/lj_opt_fold.o +0 -0
  289. data/lua-hooks/ext/luajit/src/lj_opt_fold_dyn.o +0 -0
  290. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +449 -0
  291. data/lua-hooks/ext/luajit/src/lj_opt_loop.o +0 -0
  292. data/lua-hooks/ext/luajit/src/lj_opt_loop_dyn.o +0 -0
  293. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +935 -0
  294. data/lua-hooks/ext/luajit/src/lj_opt_mem.o +0 -0
  295. data/lua-hooks/ext/luajit/src/lj_opt_mem_dyn.o +0 -0
  296. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +652 -0
  297. data/lua-hooks/ext/luajit/src/lj_opt_narrow.o +0 -0
  298. data/lua-hooks/ext/luajit/src/lj_opt_narrow_dyn.o +0 -0
  299. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +245 -0
  300. data/lua-hooks/ext/luajit/src/lj_opt_sink.o +0 -0
  301. data/lua-hooks/ext/luajit/src/lj_opt_sink_dyn.o +0 -0
  302. data/lua-hooks/ext/luajit/src/lj_opt_split.c +856 -0
  303. data/lua-hooks/ext/luajit/src/lj_opt_split.o +0 -0
  304. data/lua-hooks/ext/luajit/src/lj_opt_split_dyn.o +0 -0
  305. data/lua-hooks/ext/luajit/src/lj_parse.c +2725 -0
  306. data/lua-hooks/ext/luajit/src/lj_parse.h +18 -0
  307. data/lua-hooks/ext/luajit/src/lj_parse.o +0 -0
  308. data/lua-hooks/ext/luajit/src/lj_parse_dyn.o +0 -0
  309. data/lua-hooks/ext/luajit/src/lj_profile.c +368 -0
  310. data/lua-hooks/ext/luajit/src/lj_profile.h +21 -0
  311. data/lua-hooks/ext/luajit/src/lj_profile.o +0 -0
  312. data/lua-hooks/ext/luajit/src/lj_profile_dyn.o +0 -0
  313. data/lua-hooks/ext/luajit/src/lj_recdef.h +270 -0
  314. data/lua-hooks/ext/luajit/src/lj_record.c +2554 -0
  315. data/lua-hooks/ext/luajit/src/lj_record.h +45 -0
  316. data/lua-hooks/ext/luajit/src/lj_record.o +0 -0
  317. data/lua-hooks/ext/luajit/src/lj_record_dyn.o +0 -0
  318. data/lua-hooks/ext/luajit/src/lj_snap.c +870 -0
  319. data/lua-hooks/ext/luajit/src/lj_snap.h +34 -0
  320. data/lua-hooks/ext/luajit/src/lj_snap.o +0 -0
  321. data/lua-hooks/ext/luajit/src/lj_snap_dyn.o +0 -0
  322. data/lua-hooks/ext/luajit/src/lj_state.c +300 -0
  323. data/lua-hooks/ext/luajit/src/lj_state.h +35 -0
  324. data/lua-hooks/ext/luajit/src/lj_state.o +0 -0
  325. data/lua-hooks/ext/luajit/src/lj_state_dyn.o +0 -0
  326. data/lua-hooks/ext/luajit/src/lj_str.c +197 -0
  327. data/lua-hooks/ext/luajit/src/lj_str.h +27 -0
  328. data/lua-hooks/ext/luajit/src/lj_str.o +0 -0
  329. data/lua-hooks/ext/luajit/src/lj_str_dyn.o +0 -0
  330. data/lua-hooks/ext/luajit/src/lj_strfmt.c +554 -0
  331. data/lua-hooks/ext/luajit/src/lj_strfmt.h +125 -0
  332. data/lua-hooks/ext/luajit/src/lj_strfmt.o +0 -0
  333. data/lua-hooks/ext/luajit/src/lj_strfmt_dyn.o +0 -0
  334. data/lua-hooks/ext/luajit/src/lj_strscan.c +547 -0
  335. data/lua-hooks/ext/luajit/src/lj_strscan.h +39 -0
  336. data/lua-hooks/ext/luajit/src/lj_strscan.o +0 -0
  337. data/lua-hooks/ext/luajit/src/lj_strscan_dyn.o +0 -0
  338. data/lua-hooks/ext/luajit/src/lj_tab.c +666 -0
  339. data/lua-hooks/ext/luajit/src/lj_tab.h +73 -0
  340. data/lua-hooks/ext/luajit/src/lj_tab.o +0 -0
  341. data/lua-hooks/ext/luajit/src/lj_tab_dyn.o +0 -0
  342. data/lua-hooks/ext/luajit/src/lj_target.h +164 -0
  343. data/lua-hooks/ext/luajit/src/lj_target_arm.h +270 -0
  344. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +97 -0
  345. data/lua-hooks/ext/luajit/src/lj_target_mips.h +260 -0
  346. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +280 -0
  347. data/lua-hooks/ext/luajit/src/lj_target_x86.h +345 -0
  348. data/lua-hooks/ext/luajit/src/lj_trace.c +859 -0
  349. data/lua-hooks/ext/luajit/src/lj_trace.h +54 -0
  350. data/lua-hooks/ext/luajit/src/lj_trace.o +0 -0
  351. data/lua-hooks/ext/luajit/src/lj_trace_dyn.o +0 -0
  352. data/lua-hooks/ext/luajit/src/lj_traceerr.h +63 -0
  353. data/lua-hooks/ext/luajit/src/lj_udata.c +34 -0
  354. data/lua-hooks/ext/luajit/src/lj_udata.h +14 -0
  355. data/lua-hooks/ext/luajit/src/lj_udata.o +0 -0
  356. data/lua-hooks/ext/luajit/src/lj_udata_dyn.o +0 -0
  357. data/lua-hooks/ext/luajit/src/lj_vm.S +2730 -0
  358. data/lua-hooks/ext/luajit/src/lj_vm.h +114 -0
  359. data/lua-hooks/ext/luajit/src/lj_vm.o +0 -0
  360. data/lua-hooks/ext/luajit/src/lj_vm_dyn.o +0 -0
  361. data/lua-hooks/ext/luajit/src/lj_vmevent.c +58 -0
  362. data/lua-hooks/ext/luajit/src/lj_vmevent.h +59 -0
  363. data/lua-hooks/ext/luajit/src/lj_vmevent.o +0 -0
  364. data/lua-hooks/ext/luajit/src/lj_vmevent_dyn.o +0 -0
  365. data/lua-hooks/ext/luajit/src/lj_vmmath.c +152 -0
  366. data/lua-hooks/ext/luajit/src/lj_vmmath.o +0 -0
  367. data/lua-hooks/ext/luajit/src/lj_vmmath_dyn.o +0 -0
  368. data/lua-hooks/ext/luajit/src/ljamalg.c +96 -0
  369. data/lua-hooks/ext/{lua → luajit/src}/lua.h +12 -7
  370. data/lua-hooks/ext/luajit/src/lua.hpp +9 -0
  371. data/lua-hooks/ext/luajit/src/luaconf.h +156 -0
  372. data/lua-hooks/ext/luajit/src/luajit +0 -0
  373. data/lua-hooks/ext/luajit/src/luajit.c +570 -0
  374. data/lua-hooks/ext/luajit/src/luajit.h +79 -0
  375. data/lua-hooks/ext/luajit/src/luajit.o +0 -0
  376. data/lua-hooks/ext/luajit/src/lualib.h +43 -0
  377. data/lua-hooks/ext/luajit/src/msvcbuild.bat +114 -0
  378. data/lua-hooks/ext/luajit/src/ps4build.bat +103 -0
  379. data/lua-hooks/ext/luajit/src/psvitabuild.bat +93 -0
  380. data/lua-hooks/ext/luajit/src/vm_arm.dasc +4585 -0
  381. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +3764 -0
  382. data/lua-hooks/ext/luajit/src/vm_mips.dasc +4355 -0
  383. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +5252 -0
  384. data/lua-hooks/ext/luajit/src/vm_x64.dasc +4902 -0
  385. data/lua-hooks/ext/luajit/src/vm_x86.dasc +5710 -0
  386. data/lua-hooks/ext/luajit/src/xb1build.bat +101 -0
  387. data/lua-hooks/ext/luajit/src/xedkbuild.bat +92 -0
  388. data/lua-hooks/ext/luautf8/lutf8lib.c +3 -3
  389. data/lua-hooks/lib/boot.lua +37 -2
  390. metadata +372 -69
  391. data/lua-hooks/ext/bitop/README +0 -22
  392. data/lua-hooks/ext/bitop/bit.c +0 -189
  393. data/lua-hooks/ext/extconf.rb +0 -38
  394. data/lua-hooks/ext/lua/COPYRIGHT +0 -34
  395. data/lua-hooks/ext/lua/lapi.c +0 -1087
  396. data/lua-hooks/ext/lua/lapi.h +0 -16
  397. data/lua-hooks/ext/lua/lauxlib.c +0 -652
  398. data/lua-hooks/ext/lua/lbaselib.c +0 -659
  399. data/lua-hooks/ext/lua/lcode.c +0 -831
  400. data/lua-hooks/ext/lua/lcode.h +0 -76
  401. data/lua-hooks/ext/lua/ldblib.c +0 -398
  402. data/lua-hooks/ext/lua/ldebug.c +0 -638
  403. data/lua-hooks/ext/lua/ldebug.h +0 -33
  404. data/lua-hooks/ext/lua/ldo.c +0 -519
  405. data/lua-hooks/ext/lua/ldo.h +0 -57
  406. data/lua-hooks/ext/lua/ldump.c +0 -164
  407. data/lua-hooks/ext/lua/lfunc.c +0 -174
  408. data/lua-hooks/ext/lua/lfunc.h +0 -34
  409. data/lua-hooks/ext/lua/lgc.c +0 -710
  410. data/lua-hooks/ext/lua/lgc.h +0 -110
  411. data/lua-hooks/ext/lua/linit.c +0 -38
  412. data/lua-hooks/ext/lua/liolib.c +0 -556
  413. data/lua-hooks/ext/lua/llex.c +0 -463
  414. data/lua-hooks/ext/lua/llex.h +0 -81
  415. data/lua-hooks/ext/lua/llimits.h +0 -128
  416. data/lua-hooks/ext/lua/lmathlib.c +0 -263
  417. data/lua-hooks/ext/lua/lmem.c +0 -86
  418. data/lua-hooks/ext/lua/lmem.h +0 -49
  419. data/lua-hooks/ext/lua/loadlib.c +0 -705
  420. data/lua-hooks/ext/lua/loadlib_rel.c +0 -760
  421. data/lua-hooks/ext/lua/lobject.c +0 -214
  422. data/lua-hooks/ext/lua/lobject.h +0 -381
  423. data/lua-hooks/ext/lua/lopcodes.c +0 -102
  424. data/lua-hooks/ext/lua/lopcodes.h +0 -268
  425. data/lua-hooks/ext/lua/loslib.c +0 -243
  426. data/lua-hooks/ext/lua/lparser.c +0 -1339
  427. data/lua-hooks/ext/lua/lparser.h +0 -82
  428. data/lua-hooks/ext/lua/lstate.c +0 -214
  429. data/lua-hooks/ext/lua/lstate.h +0 -169
  430. data/lua-hooks/ext/lua/lstring.c +0 -111
  431. data/lua-hooks/ext/lua/lstring.h +0 -31
  432. data/lua-hooks/ext/lua/lstrlib.c +0 -871
  433. data/lua-hooks/ext/lua/ltable.c +0 -588
  434. data/lua-hooks/ext/lua/ltable.h +0 -40
  435. data/lua-hooks/ext/lua/ltablib.c +0 -287
  436. data/lua-hooks/ext/lua/ltm.c +0 -75
  437. data/lua-hooks/ext/lua/ltm.h +0 -54
  438. data/lua-hooks/ext/lua/lua.c +0 -392
  439. data/lua-hooks/ext/lua/lua.def +0 -131
  440. data/lua-hooks/ext/lua/lua.rc +0 -28
  441. data/lua-hooks/ext/lua/lua_dll.rc +0 -26
  442. data/lua-hooks/ext/lua/luac.c +0 -200
  443. data/lua-hooks/ext/lua/luac.rc +0 -1
  444. data/lua-hooks/ext/lua/luaconf.h +0 -763
  445. data/lua-hooks/ext/lua/luaconf.h.in +0 -724
  446. data/lua-hooks/ext/lua/luaconf.h.orig +0 -763
  447. data/lua-hooks/ext/lua/lualib.h +0 -53
  448. data/lua-hooks/ext/lua/lundump.c +0 -227
  449. data/lua-hooks/ext/lua/lundump.h +0 -36
  450. data/lua-hooks/ext/lua/lvm.c +0 -767
  451. data/lua-hooks/ext/lua/lvm.h +0 -36
  452. data/lua-hooks/ext/lua/lzio.c +0 -82
  453. data/lua-hooks/ext/lua/lzio.h +0 -67
  454. data/lua-hooks/ext/lua/print.c +0 -227
@@ -0,0 +1,4902 @@
1
+ |// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2
+ |// Bytecode interpreter, fast functions and helper functions.
3
+ |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4
+ |
5
+ |.arch x64
6
+ |.section code_op, code_sub
7
+ |
8
+ |.actionlist build_actionlist
9
+ |.globals GLOB_
10
+ |.globalnames globnames
11
+ |.externnames extnames
12
+ |
13
+ |//-----------------------------------------------------------------------
14
+ |
15
+ |.if WIN
16
+ |.define X64WIN, 1 // Windows/x64 calling conventions.
17
+ |.endif
18
+ |
19
+ |// Fixed register assignments for the interpreter.
20
+ |// This is very fragile and has many dependencies. Caveat emptor.
21
+ |.define BASE, rdx // Not C callee-save, refetched anyway.
22
+ |.if X64WIN
23
+ |.define KBASE, rdi // Must be C callee-save.
24
+ |.define PC, rsi // Must be C callee-save.
25
+ |.define DISPATCH, rbx // Must be C callee-save.
26
+ |.define KBASEd, edi
27
+ |.define PCd, esi
28
+ |.define DISPATCHd, ebx
29
+ |.else
30
+ |.define KBASE, r15 // Must be C callee-save.
31
+ |.define PC, rbx // Must be C callee-save.
32
+ |.define DISPATCH, r14 // Must be C callee-save.
33
+ |.define KBASEd, r15d
34
+ |.define PCd, ebx
35
+ |.define DISPATCHd, r14d
36
+ |.endif
37
+ |
38
+ |.define RA, rcx
39
+ |.define RAd, ecx
40
+ |.define RAH, ch
41
+ |.define RAL, cl
42
+ |.define RB, rbp // Must be rbp (C callee-save).
43
+ |.define RBd, ebp
44
+ |.define RC, rax // Must be rax.
45
+ |.define RCd, eax
46
+ |.define RCW, ax
47
+ |.define RCH, ah
48
+ |.define RCL, al
49
+ |.define OP, RBd
50
+ |.define RD, RC
51
+ |.define RDd, RCd
52
+ |.define RDW, RCW
53
+ |.define RDL, RCL
54
+ |.define TMPR, r10
55
+ |.define TMPRd, r10d
56
+ |.define ITYPE, r11
57
+ |.define ITYPEd, r11d
58
+ |
59
+ |.if X64WIN
60
+ |.define CARG1, rcx // x64/WIN64 C call arguments.
61
+ |.define CARG2, rdx
62
+ |.define CARG3, r8
63
+ |.define CARG4, r9
64
+ |.define CARG1d, ecx
65
+ |.define CARG2d, edx
66
+ |.define CARG3d, r8d
67
+ |.define CARG4d, r9d
68
+ |.else
69
+ |.define CARG1, rdi // x64/POSIX C call arguments.
70
+ |.define CARG2, rsi
71
+ |.define CARG3, rdx
72
+ |.define CARG4, rcx
73
+ |.define CARG5, r8
74
+ |.define CARG6, r9
75
+ |.define CARG1d, edi
76
+ |.define CARG2d, esi
77
+ |.define CARG3d, edx
78
+ |.define CARG4d, ecx
79
+ |.define CARG5d, r8d
80
+ |.define CARG6d, r9d
81
+ |.endif
82
+ |
83
+ |// Type definitions. Some of these are only used for documentation.
84
+ |.type L, lua_State
85
+ |.type GL, global_State
86
+ |.type TVALUE, TValue
87
+ |.type GCOBJ, GCobj
88
+ |.type STR, GCstr
89
+ |.type TAB, GCtab
90
+ |.type LFUNC, GCfuncL
91
+ |.type CFUNC, GCfuncC
92
+ |.type PROTO, GCproto
93
+ |.type UPVAL, GCupval
94
+ |.type NODE, Node
95
+ |.type NARGS, int
96
+ |.type TRACE, GCtrace
97
+ |.type SBUF, SBuf
98
+ |
99
+ |// Stack layout while in interpreter. Must match with lj_frame.h.
100
+ |//-----------------------------------------------------------------------
101
+ |.if X64WIN // x64/Windows stack layout
102
+ |
103
+ |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
104
+ |.macro saveregs_
105
+ | push rdi; push rsi; push rbx
106
+ | sub rsp, CFRAME_SPACE
107
+ |.endmacro
108
+ |.macro saveregs
109
+ | push rbp; saveregs_
110
+ |.endmacro
111
+ |.macro restoreregs
112
+ | add rsp, CFRAME_SPACE
113
+ | pop rbx; pop rsi; pop rdi; pop rbp
114
+ |.endmacro
115
+ |
116
+ |.define SAVE_CFRAME, aword [rsp+aword*13]
117
+ |.define SAVE_PC, aword [rsp+aword*12]
118
+ |.define SAVE_L, aword [rsp+aword*11]
119
+ |.define SAVE_ERRF, dword [rsp+dword*21]
120
+ |.define SAVE_NRES, dword [rsp+dword*20]
121
+ |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122
+ |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123
+ |.define SAVE_R4, aword [rsp+aword*8]
124
+ |.define SAVE_R3, aword [rsp+aword*7]
125
+ |.define SAVE_R2, aword [rsp+aword*6]
126
+ |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127
+ |.define ARG5, aword [rsp+aword*4]
128
+ |.define CSAVE_4, aword [rsp+aword*3]
129
+ |.define CSAVE_3, aword [rsp+aword*2]
130
+ |.define CSAVE_2, aword [rsp+aword*1]
131
+ |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132
+ |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
133
+ |
134
+ |.define ARG5d, dword [rsp+dword*8]
135
+ |.define TMP1, ARG5 // TMP1 overlaps ARG5
136
+ |.define TMP1d, ARG5d
137
+ |.define TMP1hi, dword [rsp+dword*9]
138
+ |.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
139
+ |
140
+ |//-----------------------------------------------------------------------
141
+ |.else // x64/POSIX stack layout
142
+ |
143
+ |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
144
+ |.macro saveregs_
145
+ | push rbx; push r15; push r14
146
+ |.if NO_UNWIND
147
+ | push r13; push r12
148
+ |.endif
149
+ | sub rsp, CFRAME_SPACE
150
+ |.endmacro
151
+ |.macro saveregs
152
+ | push rbp; saveregs_
153
+ |.endmacro
154
+ |.macro restoreregs
155
+ | add rsp, CFRAME_SPACE
156
+ |.if NO_UNWIND
157
+ | pop r12; pop r13
158
+ |.endif
159
+ | pop r14; pop r15; pop rbx; pop rbp
160
+ |.endmacro
161
+ |
162
+ |//----- 16 byte aligned,
163
+ |.if NO_UNWIND
164
+ |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
165
+ |.define SAVE_R4, aword [rsp+aword*10]
166
+ |.define SAVE_R3, aword [rsp+aword*9]
167
+ |.define SAVE_R2, aword [rsp+aword*8]
168
+ |.define SAVE_R1, aword [rsp+aword*7]
169
+ |.define SAVE_RU2, aword [rsp+aword*6]
170
+ |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
171
+ |.else
172
+ |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
173
+ |.define SAVE_R4, aword [rsp+aword*8]
174
+ |.define SAVE_R3, aword [rsp+aword*7]
175
+ |.define SAVE_R2, aword [rsp+aword*6]
176
+ |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
177
+ |.endif
178
+ |.define SAVE_CFRAME, aword [rsp+aword*4]
179
+ |.define SAVE_PC, aword [rsp+aword*3]
180
+ |.define SAVE_L, aword [rsp+aword*2]
181
+ |.define SAVE_ERRF, dword [rsp+dword*3]
182
+ |.define SAVE_NRES, dword [rsp+dword*2]
183
+ |.define TMP1, aword [rsp] //<-- rsp while in interpreter.
184
+ |//----- 16 byte aligned
185
+ |
186
+ |.define TMP1d, dword [rsp]
187
+ |.define TMP1hi, dword [rsp+dword*1]
188
+ |.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
189
+ |
190
+ |.endif
191
+ |
192
+ |//-----------------------------------------------------------------------
193
+ |
194
+ |// Instruction headers.
195
+ |.macro ins_A; .endmacro
196
+ |.macro ins_AD; .endmacro
197
+ |.macro ins_AJ; .endmacro
198
+ |.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
199
+ |.macro ins_AB_; movzx RBd, RCH; .endmacro
200
+ |.macro ins_A_C; movzx RCd, RCL; .endmacro
201
+ |.macro ins_AND; not RD; .endmacro
202
+ |
203
+ |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
204
+ |.macro ins_NEXT
205
+ | mov RCd, [PC]
206
+ | movzx RAd, RCH
207
+ | movzx OP, RCL
208
+ | add PC, 4
209
+ | shr RCd, 16
210
+ | jmp aword [DISPATCH+OP*8]
211
+ |.endmacro
212
+ |
213
+ |// Instruction footer.
214
+ |.if 1
215
+ | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
216
+ | .define ins_next, ins_NEXT
217
+ | .define ins_next_, ins_NEXT
218
+ |.else
219
+ | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
220
+ | // Affects only certain kinds of benchmarks (and only with -j off).
221
+ | // Around 10%-30% slower on Core2, a lot more slower on P4.
222
+ | .macro ins_next
223
+ | jmp ->ins_next
224
+ | .endmacro
225
+ | .macro ins_next_
226
+ | ->ins_next:
227
+ | ins_NEXT
228
+ | .endmacro
229
+ |.endif
230
+ |
231
+ |// Call decode and dispatch.
232
+ |.macro ins_callt
233
+ | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
234
+ | mov PC, LFUNC:RB->pc
235
+ | mov RAd, [PC]
236
+ | movzx OP, RAL
237
+ | movzx RAd, RAH
238
+ | add PC, 4
239
+ | jmp aword [DISPATCH+OP*8]
240
+ |.endmacro
241
+ |
242
+ |.macro ins_call
243
+ | // BASE = new base, RB = LFUNC, RD = nargs+1
244
+ | mov [BASE-8], PC
245
+ | ins_callt
246
+ |.endmacro
247
+ |
248
+ |//-----------------------------------------------------------------------
249
+ |
250
+ |// Macros to clear or set tags.
251
+ |.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
252
+ |.macro settp, reg, tp
253
+ | mov64 ITYPE, ((int64_t)tp<<47)
254
+ | or reg, ITYPE
255
+ |.endmacro
256
+ |.macro settp, dst, reg, tp
257
+ | mov64 dst, ((int64_t)tp<<47)
258
+ | or dst, reg
259
+ |.endmacro
260
+ |.macro setint, reg
261
+ | settp reg, LJ_TISNUM
262
+ |.endmacro
263
+ |.macro setint, dst, reg
264
+ | settp dst, reg, LJ_TISNUM
265
+ |.endmacro
266
+ |
267
+ |// Macros to test operand types.
268
+ |.macro checktp_nc, reg, tp, target
269
+ | mov ITYPE, reg
270
+ | sar ITYPE, 47
271
+ | cmp ITYPEd, tp
272
+ | jne target
273
+ |.endmacro
274
+ |.macro checktp, reg, tp, target
275
+ | mov ITYPE, reg
276
+ | cleartp reg
277
+ | sar ITYPE, 47
278
+ | cmp ITYPEd, tp
279
+ | jne target
280
+ |.endmacro
281
+ |.macro checktptp, src, tp, target
282
+ | mov ITYPE, src
283
+ | sar ITYPE, 47
284
+ | cmp ITYPEd, tp
285
+ | jne target
286
+ |.endmacro
287
+ |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
288
+ |.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
289
+ |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
290
+ |
291
+ |.macro checknumx, reg, target, jump
292
+ | mov ITYPE, reg
293
+ | sar ITYPE, 47
294
+ | cmp ITYPEd, LJ_TISNUM
295
+ | jump target
296
+ |.endmacro
297
+ |.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
298
+ |.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
299
+ |.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
300
+ |.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
301
+ |.macro checknumber, src, target; checknumx src, target, ja; .endmacro
302
+ |
303
+ |.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
304
+ |.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
305
+ |
306
+ |// These operands must be used with movzx.
307
+ |.define PC_OP, byte [PC-4]
308
+ |.define PC_RA, byte [PC-3]
309
+ |.define PC_RB, byte [PC-1]
310
+ |.define PC_RC, byte [PC-2]
311
+ |.define PC_RD, word [PC-2]
312
+ |
313
+ |.macro branchPC, reg
314
+ | lea PC, [PC+reg*4-BCBIAS_J*4]
315
+ |.endmacro
316
+ |
317
+ |// Assumes DISPATCH is relative to GL.
318
+ #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
319
+ #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
320
+ |
321
+ #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
322
+ |
323
+ |// Decrement hashed hotcount and trigger trace recorder if zero.
324
+ |.macro hotloop, reg
325
+ | mov reg, PCd
326
+ | shr reg, 1
327
+ | and reg, HOTCOUNT_PCMASK
328
+ | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
329
+ | jb ->vm_hotloop
330
+ |.endmacro
331
+ |
332
+ |.macro hotcall, reg
333
+ | mov reg, PCd
334
+ | shr reg, 1
335
+ | and reg, HOTCOUNT_PCMASK
336
+ | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
337
+ | jb ->vm_hotcall
338
+ |.endmacro
339
+ |
340
+ |// Set current VM state.
341
+ |.macro set_vmstate, st
342
+ | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
343
+ |.endmacro
344
+ |
345
+ |.macro fpop1; fstp st1; .endmacro
346
+ |
347
+ |// Synthesize SSE FP constants.
348
+ |.macro sseconst_abs, reg, tmp // Synthesize abs mask.
349
+ | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
350
+ |.endmacro
351
+ |
352
+ |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
353
+ | mov64 tmp, U64x(val,00000000); movd reg, tmp
354
+ |.endmacro
355
+ |
356
+ |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
357
+ | sseconst_hi reg, tmp, 80000000
358
+ |.endmacro
359
+ |.macro sseconst_1, reg, tmp // Synthesize 1.0.
360
+ | sseconst_hi reg, tmp, 3ff00000
361
+ |.endmacro
362
+ |.macro sseconst_m1, reg, tmp // Synthesize -1.0.
363
+ | sseconst_hi reg, tmp, bff00000
364
+ |.endmacro
365
+ |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
366
+ | sseconst_hi reg, tmp, 43300000
367
+ |.endmacro
368
+ |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
369
+ | sseconst_hi reg, tmp, 43380000
370
+ |.endmacro
371
+ |
372
+ |// Move table write barrier back. Overwrites reg.
373
+ |.macro barrierback, tab, reg
374
+ | and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
375
+ | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
376
+ | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
377
+ | mov tab->gclist, reg
378
+ |.endmacro
379
+ |
380
+ |//-----------------------------------------------------------------------
381
+
382
+ /* Generate subroutines used by opcodes and other parts of the VM. */
383
+ /* The .code_sub section should be last to help static branch prediction. */
384
+ static void build_subroutines(BuildCtx *ctx)
385
+ {
386
+ |.code_sub
387
+ |
388
+ |//-----------------------------------------------------------------------
389
+ |//-- Return handling ----------------------------------------------------
390
+ |//-----------------------------------------------------------------------
391
+ |
392
+ |->vm_returnp:
393
+ | test PCd, FRAME_P
394
+ | jz ->cont_dispatch
395
+ |
396
+ | // Return from pcall or xpcall fast func.
397
+ | and PC, -8
398
+ | sub BASE, PC // Restore caller base.
399
+ | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
400
+ | mov PC, [BASE-8] // Fetch PC of previous frame.
401
+ | // Prepending may overwrite the pcall frame, so do it at the end.
402
+ | mov_true ITYPE
403
+ | mov aword [BASE+RA], ITYPE // Prepend true to results.
404
+ |
405
+ |->vm_returnc:
406
+ | add RDd, 1 // RD = nresults+1
407
+ | jz ->vm_unwind_yield
408
+ | mov MULTRES, RDd
409
+ | test PC, FRAME_TYPE
410
+ | jz ->BC_RET_Z // Handle regular return to Lua.
411
+ |
412
+ |->vm_return:
413
+ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
414
+ | xor PC, FRAME_C
415
+ | test PCd, FRAME_TYPE
416
+ | jnz ->vm_returnp
417
+ |
418
+ | // Return to C.
419
+ | set_vmstate C
420
+ | and PC, -8
421
+ | sub PC, BASE
422
+ | neg PC // Previous base = BASE - delta.
423
+ |
424
+ | sub RDd, 1
425
+ | jz >2
426
+ |1: // Move results down.
427
+ | mov RB, [BASE+RA]
428
+ | mov [BASE-16], RB
429
+ | add BASE, 8
430
+ | sub RDd, 1
431
+ | jnz <1
432
+ |2:
433
+ | mov L:RB, SAVE_L
434
+ | mov L:RB->base, PC
435
+ |3:
436
+ | mov RDd, MULTRES
437
+ | mov RAd, SAVE_NRES // RA = wanted nresults+1
438
+ |4:
439
+ | cmp RAd, RDd
440
+ | jne >6 // More/less results wanted?
441
+ |5:
442
+ | sub BASE, 16
443
+ | mov L:RB->top, BASE
444
+ |
445
+ |->vm_leave_cp:
446
+ | mov RA, SAVE_CFRAME // Restore previous C frame.
447
+ | mov L:RB->cframe, RA
448
+ | xor eax, eax // Ok return status for vm_pcall.
449
+ |
450
+ |->vm_leave_unw:
451
+ | restoreregs
452
+ | ret
453
+ |
454
+ |6:
455
+ | jb >7 // Less results wanted?
456
+ | // More results wanted. Check stack size and fill up results with nil.
457
+ | cmp BASE, L:RB->maxstack
458
+ | ja >8
459
+ | mov aword [BASE-16], LJ_TNIL
460
+ | add BASE, 8
461
+ | add RDd, 1
462
+ | jmp <4
463
+ |
464
+ |7: // Less results wanted.
465
+ | test RAd, RAd
466
+ | jz <5 // But check for LUA_MULTRET+1.
467
+ | sub RA, RD // Negative result!
468
+ | lea BASE, [BASE+RA*8] // Correct top.
469
+ | jmp <5
470
+ |
471
+ |8: // Corner case: need to grow stack for filling up results.
472
+ | // This can happen if:
473
+ | // - A C function grows the stack (a lot).
474
+ | // - The GC shrinks the stack in between.
475
+ | // - A return back from a lua_call() with (high) nresults adjustment.
476
+ | mov L:RB->top, BASE // Save current top held in BASE (yes).
477
+ | mov MULTRES, RDd // Need to fill only remainder with nil.
478
+ | mov CARG2d, RAd
479
+ | mov CARG1, L:RB
480
+ | call extern lj_state_growstack // (lua_State *L, int n)
481
+ | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
482
+ | jmp <3
483
+ |
484
+ |->vm_unwind_yield:
485
+ | mov al, LUA_YIELD
486
+ | jmp ->vm_unwind_c_eh
487
+ |
488
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
489
+ | // (void *cframe, int errcode)
490
+ | mov eax, CARG2d // Error return status for vm_pcall.
491
+ | mov rsp, CARG1
492
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
493
+ | mov L:RB, SAVE_L
494
+ | mov GL:RB, L:RB->glref
495
+ | mov dword GL:RB->vmstate, ~LJ_VMST_C
496
+ | jmp ->vm_leave_unw
497
+ |
498
+ |->vm_unwind_rethrow:
499
+ |.if not X64WIN
500
+ | mov CARG1, SAVE_L
501
+ | mov CARG2d, eax
502
+ | restoreregs
503
+ | jmp extern lj_err_throw // (lua_State *L, int errcode)
504
+ |.endif
505
+ |
506
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
507
+ | // (void *cframe)
508
+ | and CARG1, CFRAME_RAWMASK
509
+ | mov rsp, CARG1
510
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
511
+ | mov L:RB, SAVE_L
512
+ | mov RDd, 1+1 // Really 1+2 results, incr. later.
513
+ | mov BASE, L:RB->base
514
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
515
+ | add DISPATCH, GG_G2DISP
516
+ | mov PC, [BASE-8] // Fetch PC of previous frame.
517
+ | mov_false RA
518
+ | mov RB, [BASE]
519
+ | mov [BASE-16], RA // Prepend false to error message.
520
+ | mov [BASE-8], RB
521
+ | mov RA, -16 // Results start at BASE+RA = BASE-16.
522
+ | set_vmstate INTERP
523
+ | jmp ->vm_returnc // Increments RD/MULTRES and returns.
524
+ |
525
+ |//-----------------------------------------------------------------------
526
+ |//-- Grow stack for calls -----------------------------------------------
527
+ |//-----------------------------------------------------------------------
528
+ |
529
+ |->vm_growstack_c: // Grow stack for C function.
530
+ | mov CARG2d, LUA_MINSTACK
531
+ | jmp >2
532
+ |
533
+ |->vm_growstack_v: // Grow stack for vararg Lua function.
534
+ | sub RD, 8
535
+ | jmp >1
536
+ |
537
+ |->vm_growstack_f: // Grow stack for fixarg Lua function.
538
+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
539
+ | lea RD, [BASE+NARGS:RD*8-8]
540
+ |1:
541
+ | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
542
+ | add PC, 4 // Must point after first instruction.
543
+ | mov L:RB->base, BASE
544
+ | mov L:RB->top, RD
545
+ | mov SAVE_PC, PC
546
+ | mov CARG2, RA
547
+ |2:
548
+ | // RB = L, L->base = new base, L->top = top
549
+ | mov CARG1, L:RB
550
+ | call extern lj_state_growstack // (lua_State *L, int n)
551
+ | mov BASE, L:RB->base
552
+ | mov RD, L:RB->top
553
+ | mov LFUNC:RB, [BASE-16]
554
+ | cleartp LFUNC:RB
555
+ | sub RD, BASE
556
+ | shr RDd, 3
557
+ | add NARGS:RDd, 1
558
+ | // BASE = new base, RB = LFUNC, RD = nargs+1
559
+ | ins_callt // Just retry the call.
560
+ |
561
+ |//-----------------------------------------------------------------------
562
+ |//-- Entry points into the assembler VM ---------------------------------
563
+ |//-----------------------------------------------------------------------
564
+ |
565
+ |->vm_resume: // Setup C frame and resume thread.
566
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
567
+ | saveregs
568
+ | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
569
+ | mov SAVE_L, CARG1
570
+ | mov RA, CARG2
571
+ | mov PCd, FRAME_CP
572
+ | xor RDd, RDd
573
+ | lea KBASE, [esp+CFRAME_RESUME]
574
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
575
+ | add DISPATCH, GG_G2DISP
576
+ | mov SAVE_PC, RD // Any value outside of bytecode is ok.
577
+ | mov SAVE_CFRAME, RD
578
+ | mov SAVE_NRES, RDd
579
+ | mov SAVE_ERRF, RDd
580
+ | mov L:RB->cframe, KBASE
581
+ | cmp byte L:RB->status, RDL
582
+ | je >2 // Initial resume (like a call).
583
+ |
584
+ | // Resume after yield (like a return).
585
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
586
+ | set_vmstate INTERP
587
+ | mov byte L:RB->status, RDL
588
+ | mov BASE, L:RB->base
589
+ | mov RD, L:RB->top
590
+ | sub RD, RA
591
+ | shr RDd, 3
592
+ | add RDd, 1 // RD = nresults+1
593
+ | sub RA, BASE // RA = resultofs
594
+ | mov PC, [BASE-8]
595
+ | mov MULTRES, RDd
596
+ | test PCd, FRAME_TYPE
597
+ | jz ->BC_RET_Z
598
+ | jmp ->vm_return
599
+ |
600
+ |->vm_pcall: // Setup protected C frame and enter VM.
601
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
602
+ | saveregs
603
+ | mov PCd, FRAME_CP
604
+ | mov SAVE_ERRF, CARG4d
605
+ | jmp >1
606
+ |
607
+ |->vm_call: // Setup C frame and enter VM.
608
+ | // (lua_State *L, TValue *base, int nres1)
609
+ | saveregs
610
+ | mov PCd, FRAME_C
611
+ |
612
+ |1: // Entry point for vm_pcall above (PC = ftype).
613
+ | mov SAVE_NRES, CARG3d
614
+ | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
615
+ | mov SAVE_L, CARG1
616
+ | mov RA, CARG2
617
+ |
618
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
619
+ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
620
+ | mov SAVE_CFRAME, KBASE
621
+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
622
+ | add DISPATCH, GG_G2DISP
623
+ | mov L:RB->cframe, rsp
624
+ |
625
+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
626
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
627
+ | set_vmstate INTERP
628
+ | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
629
+ | add PC, RA
630
+ | sub PC, BASE // PC = frame delta + frame type
631
+ |
632
+ | mov RD, L:RB->top
633
+ | sub RD, RA
634
+ | shr NARGS:RDd, 3
635
+ | add NARGS:RDd, 1 // RD = nargs+1
636
+ |
637
+ |->vm_call_dispatch:
638
+ | mov LFUNC:RB, [RA-16]
639
+ | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
640
+ |
641
+ |->vm_call_dispatch_f:
642
+ | mov BASE, RA
643
+ | ins_call
644
+ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
645
+ |
646
+ |->vm_cpcall: // Setup protected C frame, call C.
647
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
648
+ | saveregs
649
+ | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
650
+ | mov SAVE_L, CARG1
651
+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
652
+ |
653
+ | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
654
+ | sub KBASE, L:RB->top
655
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
656
+ | mov SAVE_ERRF, 0 // No error function.
657
+ | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
658
+ | add DISPATCH, GG_G2DISP
659
+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
660
+ |
661
+ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
662
+ | mov SAVE_CFRAME, KBASE
663
+ | mov L:RB->cframe, rsp
664
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
665
+ |
666
+ | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
667
+ | // TValue * (new base) or NULL returned in eax (RC).
668
+ | test RC, RC
669
+ | jz ->vm_leave_cp // No base? Just remove C frame.
670
+ | mov RA, RC
671
+ | mov PCd, FRAME_CP
672
+ | jmp <2 // Else continue with the call.
673
+ |
674
+ |//-----------------------------------------------------------------------
675
+ |//-- Metamethod handling ------------------------------------------------
676
+ |//-----------------------------------------------------------------------
677
+ |
678
+ |//-- Continuation dispatch ----------------------------------------------
679
+ |
680
+ |->cont_dispatch:
681
+ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
682
+ | add RA, BASE
683
+ | and PC, -8
684
+ | mov RB, BASE
685
+ | sub BASE, PC // Restore caller BASE.
686
+ | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
687
+ | mov RC, RA // ... in [RC]
688
+ | mov PC, [RB-24] // Restore PC from [cont|PC].
689
+ | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
690
+ |.if FFI
691
+ | cmp RA, 1
692
+ | jbe >1
693
+ |.endif
694
+ | mov LFUNC:KBASE, [BASE-16]
695
+ | cleartp LFUNC:KBASE
696
+ | mov KBASE, LFUNC:KBASE->pc
697
+ | mov KBASE, [KBASE+PC2PROTO(k)]
698
+ | // BASE = base, RC = result, RB = meta base
699
+ | jmp RA // Jump to continuation.
700
+ |
701
+ |.if FFI
702
+ |1:
703
+ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
704
+ | // cont = 0: Tail call from C function.
705
+ | sub RB, BASE
706
+ | shr RBd, 3
707
+ | lea RDd, [RBd-3]
708
+ | jmp ->vm_call_tail
709
+ |.endif
710
+ |
711
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
712
+ | movzx RAd, PC_RB
713
+ | sub RB, 32
714
+ | lea RA, [BASE+RA*8]
715
+ | sub RA, RB
716
+ | je ->cont_ra
717
+ | neg RA
718
+ | shr RAd, 3
719
+ |.if X64WIN
720
+ | mov CARG3d, RAd
721
+ | mov L:CARG1, SAVE_L
722
+ | mov L:CARG1->base, BASE
723
+ | mov RC, [RC]
724
+ | mov [RB], RC
725
+ | mov CARG2, RB
726
+ |.else
727
+ | mov L:CARG1, SAVE_L
728
+ | mov L:CARG1->base, BASE
729
+ | mov CARG3d, RAd
730
+ | mov RA, [RC]
731
+ | mov [RB], RA
732
+ | mov CARG2, RB
733
+ |.endif
734
+ | jmp ->BC_CAT_Z
735
+ |
736
+ |//-- Table indexing metamethods -----------------------------------------
737
+ |
738
+ |->vmeta_tgets:
739
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
740
+ | mov TMP1, STR:RC
741
+ | lea RC, TMP1
742
+ | cmp PC_OP, BC_GGET
743
+ | jne >1
744
+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
745
+ | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
746
+ | mov [RB], TAB:RA
747
+ | jmp >2
748
+ |
749
+ |->vmeta_tgetb:
750
+ | movzx RCd, PC_RC
751
+ |.if DUALNUM
752
+ | setint RC
753
+ | mov TMP1, RC
754
+ |.else
755
+ | cvtsi2sd xmm0, RCd
756
+ | movsd TMP1, xmm0
757
+ |.endif
758
+ | lea RC, TMP1
759
+ | jmp >1
760
+ |
761
+ |->vmeta_tgetv:
762
+ | movzx RCd, PC_RC // Reload TValue *k from RC.
763
+ | lea RC, [BASE+RC*8]
764
+ |1:
765
+ | movzx RBd, PC_RB // Reload TValue *t from RB.
766
+ | lea RB, [BASE+RB*8]
767
+ |2:
768
+ | mov L:CARG1, SAVE_L
769
+ | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
770
+ | mov CARG2, RB
771
+ | mov CARG3, RC
772
+ | mov L:RB, L:CARG1
773
+ | mov SAVE_PC, PC
774
+ | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
775
+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
776
+ | mov BASE, L:RB->base
777
+ | test RC, RC
778
+ | jz >3
779
+ |->cont_ra: // BASE = base, RC = result
780
+ | movzx RAd, PC_RA
781
+ | mov RB, [RC]
782
+ | mov [BASE+RA*8], RB
783
+ | ins_next
784
+ |
785
+ |3: // Call __index metamethod.
786
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
787
+ | mov RA, L:RB->top
788
+ | mov [RA-24], PC // [cont|PC]
789
+ | lea PC, [RA+FRAME_CONT]
790
+ | sub PC, BASE
791
+ | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
792
+ | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
793
+ | cleartp LFUNC:RB
794
+ | jmp ->vm_call_dispatch_f
795
+ |
796
+ |->vmeta_tgetr:
797
+ | mov CARG1, TAB:RB
798
+ | mov RB, BASE // Save BASE.
799
+ | mov CARG2d, RCd // Caveat: CARG2 == BASE
800
+ | call extern lj_tab_getinth // (GCtab *t, int32_t key)
801
+ | // cTValue * or NULL returned in eax (RC).
802
+ | movzx RAd, PC_RA
803
+ | mov BASE, RB // Restore BASE.
804
+ | test RC, RC
805
+ | jnz ->BC_TGETR_Z
806
+ | mov ITYPE, LJ_TNIL
807
+ | jmp ->BC_TGETR2_Z
808
+ |
809
+ |//-----------------------------------------------------------------------
810
+ |
811
+ |->vmeta_tsets:
812
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
813
+ | mov TMP1, STR:RC
814
+ | lea RC, TMP1
815
+ | cmp PC_OP, BC_GSET
816
+ | jne >1
817
+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
818
+ | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
819
+ | mov [RB], TAB:RA
820
+ | jmp >2
821
+ |
822
+ |->vmeta_tsetb:
823
+ | movzx RCd, PC_RC
824
+ |.if DUALNUM
825
+ | setint RC
826
+ | mov TMP1, RC
827
+ |.else
828
+ | cvtsi2sd xmm0, RCd
829
+ | movsd TMP1, xmm0
830
+ |.endif
831
+ | lea RC, TMP1
832
+ | jmp >1
833
+ |
834
+ |->vmeta_tsetv:
835
+ | movzx RCd, PC_RC // Reload TValue *k from RC.
836
+ | lea RC, [BASE+RC*8]
837
+ |1:
838
+ | movzx RBd, PC_RB // Reload TValue *t from RB.
839
+ | lea RB, [BASE+RB*8]
840
+ |2:
841
+ | mov L:CARG1, SAVE_L
842
+ | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
843
+ | mov CARG2, RB
844
+ | mov CARG3, RC
845
+ | mov L:RB, L:CARG1
846
+ | mov SAVE_PC, PC
847
+ | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
848
+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
849
+ | mov BASE, L:RB->base
850
+ | test RC, RC
851
+ | jz >3
852
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
853
+ | movzx RAd, PC_RA
854
+ | mov RB, [BASE+RA*8]
855
+ | mov [RC], RB
856
+ |->cont_nop: // BASE = base, (RC = result)
857
+ | ins_next
858
+ |
859
+ |3: // Call __newindex metamethod.
860
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
861
+ | mov RA, L:RB->top
862
+ | mov [RA-24], PC // [cont|PC]
863
+ | movzx RCd, PC_RA
864
+ | // Copy value to third argument.
865
+ | mov RB, [BASE+RC*8]
866
+ | mov [RA+16], RB
867
+ | lea PC, [RA+FRAME_CONT]
868
+ | sub PC, BASE
869
+ | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
870
+ | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
871
+ | cleartp LFUNC:RB
872
+ | jmp ->vm_call_dispatch_f
873
+ |
874
+ |->vmeta_tsetr:
875
+ |.if X64WIN
876
+ | mov L:CARG1, SAVE_L
877
+ | mov CARG3d, RCd
878
+ | mov L:CARG1->base, BASE
879
+ | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
880
+ |.else
881
+ | mov L:CARG1, SAVE_L
882
+ | mov CARG2, TAB:RB
883
+ | mov L:CARG1->base, BASE
884
+ | mov RB, BASE // Save BASE.
885
+ | mov CARG3d, RCd // Caveat: CARG3 == BASE.
886
+ |.endif
887
+ | mov SAVE_PC, PC
888
+ | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
889
+ | // TValue * returned in eax (RC).
890
+ | movzx RAd, PC_RA
891
+ | mov BASE, RB // Restore BASE.
892
+ | jmp ->BC_TSETR_Z
893
+ |
894
+ |//-- Comparison metamethods ---------------------------------------------
895
+ |
896
+ |->vmeta_comp:
897
+ | movzx RDd, PC_RD
898
+ | movzx RAd, PC_RA
899
+ | mov L:RB, SAVE_L
900
+ | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
901
+ |.if X64WIN
902
+ | lea CARG3, [BASE+RD*8]
903
+ | lea CARG2, [BASE+RA*8]
904
+ |.else
905
+ | lea CARG2, [BASE+RA*8]
906
+ | lea CARG3, [BASE+RD*8]
907
+ |.endif
908
+ | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
909
+ | movzx CARG4d, PC_OP
910
+ | mov SAVE_PC, PC
911
+ | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
912
+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
913
+ |3:
914
+ | mov BASE, L:RB->base
915
+ | cmp RC, 1
916
+ | ja ->vmeta_binop
917
+ |4:
918
+ | lea PC, [PC+4]
919
+ | jb >6
920
+ |5:
921
+ | movzx RDd, PC_RD
922
+ | branchPC RD
923
+ |6:
924
+ | ins_next
925
+ |
926
+ |->cont_condt: // BASE = base, RC = result
927
+ | add PC, 4
928
+ | mov ITYPE, [RC]
929
+ | sar ITYPE, 47
930
+ | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
931
+ | jb <5
932
+ | jmp <6
933
+ |
934
+ |->cont_condf: // BASE = base, RC = result
935
+ | mov ITYPE, [RC]
936
+ | sar ITYPE, 47
937
+ | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
938
+ | jmp <4
939
+ |
940
+ |->vmeta_equal:
941
+ | cleartp TAB:RD
942
+ | sub PC, 4
943
+ |.if X64WIN
944
+ | mov CARG3, RD
945
+ | mov CARG4d, RBd
946
+ | mov L:RB, SAVE_L
947
+ | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
948
+ | mov CARG2, RA
949
+ | mov CARG1, L:RB // Caveat: CARG1 == RA.
950
+ |.else
951
+ | mov CARG2, RA
952
+ | mov CARG4d, RBd // Caveat: CARG4 == RA.
953
+ | mov L:RB, SAVE_L
954
+ | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
955
+ | mov CARG3, RD
956
+ | mov CARG1, L:RB
957
+ |.endif
958
+ | mov SAVE_PC, PC
959
+ | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
960
+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
961
+ | jmp <3
962
+ |
963
+ |->vmeta_equal_cd:
964
+ |.if FFI
965
+ | sub PC, 4
966
+ | mov L:RB, SAVE_L
967
+ | mov L:RB->base, BASE
968
+ | mov CARG1, L:RB
969
+ | mov CARG2d, dword [PC-4]
970
+ | mov SAVE_PC, PC
971
+ | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
972
+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
973
+ | jmp <3
974
+ |.endif
975
+ |
976
+ |->vmeta_istype:
977
+ | mov L:RB, SAVE_L
978
+ | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
979
+ | mov CARG2d, RAd
980
+ | mov CARG3d, RDd
981
+ | mov L:CARG1, L:RB
982
+ | mov SAVE_PC, PC
983
+ | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
984
+ | mov BASE, L:RB->base
985
+ | jmp <6
986
+ |
987
+ |//-- Arithmetic metamethods ---------------------------------------------
988
+ |
989
+ |->vmeta_arith_vno:
990
+ |.if DUALNUM
991
+ | movzx RBd, PC_RB
992
+ | movzx RCd, PC_RC
993
+ |.endif
994
+ |->vmeta_arith_vn:
995
+ | lea RC, [KBASE+RC*8]
996
+ | jmp >1
997
+ |
998
+ |->vmeta_arith_nvo:
999
+ |.if DUALNUM
1000
+ | movzx RBd, PC_RB
1001
+ | movzx RCd, PC_RC
1002
+ |.endif
1003
+ |->vmeta_arith_nv:
1004
+ | lea TMPR, [KBASE+RC*8]
1005
+ | lea RC, [BASE+RB*8]
1006
+ | mov RB, TMPR
1007
+ | jmp >2
1008
+ |
1009
+ |->vmeta_unm:
1010
+ | lea RC, [BASE+RD*8]
1011
+ | mov RB, RC
1012
+ | jmp >2
1013
+ |
1014
+ |->vmeta_arith_vvo:
1015
+ |.if DUALNUM
1016
+ | movzx RBd, PC_RB
1017
+ | movzx RCd, PC_RC
1018
+ |.endif
1019
+ |->vmeta_arith_vv:
1020
+ | lea RC, [BASE+RC*8]
1021
+ |1:
1022
+ | lea RB, [BASE+RB*8]
1023
+ |2:
1024
+ | lea RA, [BASE+RA*8]
1025
+ |.if X64WIN
1026
+ | mov CARG3, RB
1027
+ | mov CARG4, RC
1028
+ | movzx RCd, PC_OP
1029
+ | mov ARG5d, RCd
1030
+ | mov L:RB, SAVE_L
1031
+ | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1032
+ | mov CARG2, RA
1033
+ | mov CARG1, L:RB // Caveat: CARG1 == RA.
1034
+ |.else
1035
+ | movzx CARG5d, PC_OP
1036
+ | mov CARG2, RA
1037
+ | mov CARG4, RC // Caveat: CARG4 == RA.
1038
+ | mov L:CARG1, SAVE_L
1039
+ | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1040
+ | mov CARG3, RB
1041
+ | mov L:RB, L:CARG1
1042
+ |.endif
1043
+ | mov SAVE_PC, PC
1044
+ | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1045
+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1046
+ | mov BASE, L:RB->base
1047
+ | test RC, RC
1048
+ | jz ->cont_nop
1049
+ |
1050
+ | // Call metamethod for binary op.
1051
+ |->vmeta_binop:
1052
+ | // BASE = base, RC = new base, stack = cont/func/o1/o2
1053
+ | mov RA, RC
1054
+ | sub RC, BASE
1055
+ | mov [RA-24], PC // [cont|PC]
1056
+ | lea PC, [RC+FRAME_CONT]
1057
+ | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1058
+ | jmp ->vm_call_dispatch
1059
+ |
1060
+ |->vmeta_len:
1061
+ | movzx RDd, PC_RD
1062
+ | mov L:RB, SAVE_L
1063
+ | mov L:RB->base, BASE
1064
+ | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1065
+ | mov L:CARG1, L:RB
1066
+ | mov SAVE_PC, PC
1067
+ | call extern lj_meta_len // (lua_State *L, TValue *o)
1068
+ | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1069
+ | mov BASE, L:RB->base
1070
+ #if LJ_52
1071
+ | test RC, RC
1072
+ | jne ->vmeta_binop // Binop call for compatibility.
1073
+ | movzx RDd, PC_RD
1074
+ | mov TAB:CARG1, [BASE+RD*8]
1075
+ | cleartp TAB:CARG1
1076
+ | jmp ->BC_LEN_Z
1077
+ #else
1078
+ | jmp ->vmeta_binop // Binop call for compatibility.
1079
+ #endif
1080
+ |
1081
+ |//-- Call metamethod ----------------------------------------------------
1082
+ |
1083
+ |->vmeta_call_ra:
1084
+ | lea RA, [BASE+RA*8+16]
1085
+ |->vmeta_call: // Resolve and call __call metamethod.
1086
+ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1087
+ | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1088
+ | mov RB, RA
1089
+ |.if X64WIN
1090
+ | mov L:TMPR, SAVE_L
1091
+ | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1092
+ | lea CARG2, [RA-16]
1093
+ | lea CARG3, [RA+NARGS:RD*8-8]
1094
+ | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1095
+ |.else
1096
+ | mov L:CARG1, SAVE_L
1097
+ | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1098
+ | lea CARG2, [RA-16]
1099
+ | lea CARG3, [RA+NARGS:RD*8-8]
1100
+ |.endif
1101
+ | mov SAVE_PC, PC
1102
+ | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1103
+ | mov RA, RB
1104
+ | mov L:RB, SAVE_L
1105
+ | mov BASE, L:RB->base
1106
+ | mov NARGS:RDd, TMP1d
1107
+ | mov LFUNC:RB, [RA-16]
1108
+ | cleartp LFUNC:RB
1109
+ | add NARGS:RDd, 1
1110
+ | // This is fragile. L->base must not move, KBASE must always be defined.
1111
+ | cmp KBASE, BASE // Continue with CALLT if flag set.
1112
+ | je ->BC_CALLT_Z
1113
+ | mov BASE, RA
1114
+ | ins_call // Otherwise call resolved metamethod.
1115
+ |
1116
+ |//-- Argument coercion for 'for' statement ------------------------------
1117
+ |
1118
+ |->vmeta_for:
1119
+ | mov L:RB, SAVE_L
1120
+ | mov L:RB->base, BASE
1121
+ | mov CARG2, RA // Caveat: CARG2 == BASE
1122
+ | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1123
+ | mov SAVE_PC, PC
1124
+ | call extern lj_meta_for // (lua_State *L, TValue *base)
1125
+ | mov BASE, L:RB->base
1126
+ | mov RCd, [PC-4]
1127
+ | movzx RAd, RCH
1128
+ | movzx OP, RCL
1129
+ | shr RCd, 16
1130
+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1131
+ |
1132
+ |//-----------------------------------------------------------------------
1133
+ |//-- Fast functions -----------------------------------------------------
1134
+ |//-----------------------------------------------------------------------
1135
+ |
1136
+ |.macro .ffunc, name
1137
+ |->ff_ .. name:
1138
+ |.endmacro
1139
+ |
1140
+ |.macro .ffunc_1, name
1141
+ |->ff_ .. name:
1142
+ | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1143
+ |.endmacro
1144
+ |
1145
+ |.macro .ffunc_2, name
1146
+ |->ff_ .. name:
1147
+ | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1148
+ |.endmacro
1149
+ |
1150
+ |.macro .ffunc_n, name, op
1151
+ | .ffunc_1 name
1152
+ | checknumtp [BASE], ->fff_fallback
1153
+ | op xmm0, qword [BASE]
1154
+ |.endmacro
1155
+ |
1156
+ |.macro .ffunc_n, name
1157
+ | .ffunc_n name, movsd
1158
+ |.endmacro
1159
+ |
1160
+ |.macro .ffunc_nn, name
1161
+ | .ffunc_2 name
1162
+ | checknumtp [BASE], ->fff_fallback
1163
+ | checknumtp [BASE+8], ->fff_fallback
1164
+ | movsd xmm0, qword [BASE]
1165
+ | movsd xmm1, qword [BASE+8]
1166
+ |.endmacro
1167
+ |
1168
+ |// Inlined GC threshold check. Caveat: uses label 1.
1169
+ |.macro ffgccheck
1170
+ | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1171
+ | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1172
+ | jb >1
1173
+ | call ->fff_gcstep
1174
+ |1:
1175
+ |.endmacro
1176
+ |
1177
+ |//-- Base library: checks -----------------------------------------------
1178
+ |
1179
+ |.ffunc_1 assert
1180
+ | mov ITYPE, [BASE]
1181
+ | mov RB, ITYPE
1182
+ | sar ITYPE, 47
1183
+ | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1184
+ | mov PC, [BASE-8]
1185
+ | mov MULTRES, RDd
1186
+ | mov RB, [BASE]
1187
+ | mov [BASE-16], RB
1188
+ | sub RDd, 2
1189
+ | jz >2
1190
+ | mov RA, BASE
1191
+ |1:
1192
+ | add RA, 8
1193
+ | mov RB, [RA]
1194
+ | mov [RA-16], RB
1195
+ | sub RDd, 1
1196
+ | jnz <1
1197
+ |2:
1198
+ | mov RDd, MULTRES
1199
+ | jmp ->fff_res_
1200
+ |
1201
+ |.ffunc_1 type
1202
+ | mov RC, [BASE]
1203
+ | sar RC, 47
1204
+ | mov RBd, LJ_TISNUM
1205
+ | cmp RCd, RBd
1206
+ | cmovb RCd, RBd
1207
+ | not RCd
1208
+ |2:
1209
+ | mov CFUNC:RB, [BASE-16]
1210
+ | cleartp CFUNC:RB
1211
+ | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1212
+ | mov PC, [BASE-8]
1213
+ | settp STR:RC, LJ_TSTR
1214
+ | mov [BASE-16], STR:RC
1215
+ | jmp ->fff_res1
1216
+ |
1217
+ |//-- Base library: getters and setters ---------------------------------
1218
+ |
1219
+ |.ffunc_1 getmetatable
1220
+ | mov TAB:RB, [BASE]
1221
+ | mov PC, [BASE-8]
1222
+ | checktab TAB:RB, >6
1223
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
1224
+ | mov TAB:RB, TAB:RB->metatable
1225
+ |2:
1226
+ | test TAB:RB, TAB:RB
1227
+ | mov aword [BASE-16], LJ_TNIL
1228
+ | jz ->fff_res1
1229
+ | settp TAB:RC, TAB:RB, LJ_TTAB
1230
+ | mov [BASE-16], TAB:RC // Store metatable as default result.
1231
+ | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1232
+ | mov RAd, TAB:RB->hmask
1233
+ | and RAd, STR:RC->hash
1234
+ | settp STR:RC, LJ_TSTR
1235
+ | imul RAd, #NODE
1236
+ | add NODE:RA, TAB:RB->node
1237
+ |3: // Rearranged logic, because we expect _not_ to find the key.
1238
+ | cmp NODE:RA->key, STR:RC
1239
+ | je >5
1240
+ |4:
1241
+ | mov NODE:RA, NODE:RA->next
1242
+ | test NODE:RA, NODE:RA
1243
+ | jnz <3
1244
+ | jmp ->fff_res1 // Not found, keep default result.
1245
+ |5:
1246
+ | mov RB, NODE:RA->val
1247
+ | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1248
+ | mov [BASE-16], RB // Return value of mt.__metatable.
1249
+ | jmp ->fff_res1
1250
+ |
1251
+ |6:
1252
+ | cmp ITYPEd, LJ_TUDATA; je <1
1253
+ | cmp ITYPEd, LJ_TISNUM; ja >7
1254
+ | mov ITYPEd, LJ_TISNUM
1255
+ |7:
1256
+ | not ITYPEd
1257
+ | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1258
+ | jmp <2
1259
+ |
1260
+ |.ffunc_2 setmetatable
1261
+ | mov TAB:RB, [BASE]
1262
+ | mov TAB:TMPR, TAB:RB
1263
+ | checktab TAB:RB, ->fff_fallback
1264
+ | // Fast path: no mt for table yet and not clearing the mt.
1265
+ | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1266
+ | mov TAB:RA, [BASE+8]
1267
+ | checktab TAB:RA, ->fff_fallback
1268
+ | mov TAB:RB->metatable, TAB:RA
1269
+ | mov PC, [BASE-8]
1270
+ | mov [BASE-16], TAB:TMPR // Return original table.
1271
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1272
+ | jz >1
1273
+ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1274
+ | barrierback TAB:RB, RC
1275
+ |1:
1276
+ | jmp ->fff_res1
1277
+ |
1278
+ |.ffunc_2 rawget
1279
+ |.if X64WIN
1280
+ | mov TAB:RA, [BASE]
1281
+ | checktab TAB:RA, ->fff_fallback
1282
+ | mov RB, BASE // Save BASE.
1283
+ | lea CARG3, [BASE+8]
1284
+ | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1285
+ | mov CARG1, SAVE_L
1286
+ |.else
1287
+ | mov TAB:CARG2, [BASE]
1288
+ | checktab TAB:CARG2, ->fff_fallback
1289
+ | mov RB, BASE // Save BASE.
1290
+ | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1291
+ | mov CARG1, SAVE_L
1292
+ |.endif
1293
+ | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1294
+ | // cTValue * returned in eax (RD).
1295
+ | mov BASE, RB // Restore BASE.
1296
+ | // Copy table slot.
1297
+ | mov RB, [RD]
1298
+ | mov PC, [BASE-8]
1299
+ | mov [BASE-16], RB
1300
+ | jmp ->fff_res1
1301
+ |
1302
+ |//-- Base library: conversions ------------------------------------------
1303
+ |
1304
+ |.ffunc tonumber
1305
+ | // Only handles the number case inline (without a base argument).
1306
+ | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1307
+ | mov RB, [BASE]
1308
+ | checknumber RB, ->fff_fallback
1309
+ | mov PC, [BASE-8]
1310
+ | mov [BASE-16], RB
1311
+ | jmp ->fff_res1
1312
+ |
1313
+ |.ffunc_1 tostring
1314
+ | // Only handles the string or number case inline.
1315
+ | mov PC, [BASE-8]
1316
+ | mov STR:RB, [BASE]
1317
+ | checktp_nc STR:RB, LJ_TSTR, >3
1318
+ | // A __tostring method in the string base metatable is ignored.
1319
+ |2:
1320
+ | mov [BASE-16], STR:RB
1321
+ | jmp ->fff_res1
1322
+ |3: // Handle numbers inline, unless a number base metatable is present.
1323
+ | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1324
+ | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1325
+ | jne ->fff_fallback
1326
+ | ffgccheck // Caveat: uses label 1.
1327
+ | mov L:RB, SAVE_L
1328
+ | mov L:RB->base, BASE // Add frame since C call can throw.
1329
+ | mov SAVE_PC, PC // Redundant (but a defined value).
1330
+ |.if not X64WIN
1331
+ | mov CARG2, BASE // Otherwise: CARG2 == BASE
1332
+ |.endif
1333
+ | mov L:CARG1, L:RB
1334
+ |.if DUALNUM
1335
+ | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1336
+ |.else
1337
+ | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1338
+ |.endif
1339
+ | // GCstr returned in eax (RD).
1340
+ | mov BASE, L:RB->base
1341
+ | settp STR:RB, RD, LJ_TSTR
1342
+ | jmp <2
1343
+ |
1344
+ |//-- Base library: iterators -------------------------------------------
1345
+ |
1346
+ |.ffunc_1 next
1347
+ | je >2 // Missing 2nd arg?
1348
+ |1:
1349
+ |.if X64WIN
1350
+ | mov RA, [BASE]
1351
+ | checktab RA, ->fff_fallback
1352
+ |.else
1353
+ | mov CARG2, [BASE]
1354
+ | checktab CARG2, ->fff_fallback
1355
+ |.endif
1356
+ | mov L:RB, SAVE_L
1357
+ | mov L:RB->base, BASE // Add frame since C call can throw.
1358
+ | mov L:RB->top, BASE // Dummy frame length is ok.
1359
+ | mov PC, [BASE-8]
1360
+ |.if X64WIN
1361
+ | lea CARG3, [BASE+8]
1362
+ | mov CARG2, RA // Caveat: CARG2 == BASE.
1363
+ | mov CARG1, L:RB
1364
+ |.else
1365
+ | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1366
+ | mov CARG1, L:RB
1367
+ |.endif
1368
+ | mov SAVE_PC, PC // Needed for ITERN fallback.
1369
+ | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1370
+ | // Flag returned in eax (RD).
1371
+ | mov BASE, L:RB->base
1372
+ | test RDd, RDd; jz >3 // End of traversal?
1373
+ | // Copy key and value to results.
1374
+ | mov RB, [BASE+8]
1375
+ | mov RD, [BASE+16]
1376
+ | mov [BASE-16], RB
1377
+ | mov [BASE-8], RD
1378
+ |->fff_res2:
1379
+ | mov RDd, 1+2
1380
+ | jmp ->fff_res
1381
+ |2: // Set missing 2nd arg to nil.
1382
+ | mov aword [BASE+8], LJ_TNIL
1383
+ | jmp <1
1384
+ |3: // End of traversal: return nil.
1385
+ | mov aword [BASE-16], LJ_TNIL
1386
+ | jmp ->fff_res1
1387
+ |
1388
+ |.ffunc_1 pairs
1389
+ | mov TAB:RB, [BASE]
1390
+ | mov TMPR, TAB:RB
1391
+ | checktab TAB:RB, ->fff_fallback
1392
+ #if LJ_52
1393
+ | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1394
+ #endif
1395
+ | mov CFUNC:RD, [BASE-16]
1396
+ | cleartp CFUNC:RD
1397
+ | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1398
+ | settp CFUNC:RD, LJ_TFUNC
1399
+ | mov PC, [BASE-8]
1400
+ | mov [BASE-16], CFUNC:RD
1401
+ | mov [BASE-8], TMPR
1402
+ | mov aword [BASE], LJ_TNIL
1403
+ | mov RDd, 1+3
1404
+ | jmp ->fff_res
1405
+ |
1406
+ |.ffunc_2 ipairs_aux
1407
+ | mov TAB:RB, [BASE]
1408
+ | checktab TAB:RB, ->fff_fallback
1409
+ |.if DUALNUM
1410
+ | mov RA, [BASE+8]
1411
+ | checkint RA, ->fff_fallback
1412
+ |.else
1413
+ | checknumtp [BASE+8], ->fff_fallback
1414
+ | movsd xmm0, qword [BASE+8]
1415
+ |.endif
1416
+ | mov PC, [BASE-8]
1417
+ |.if DUALNUM
1418
+ | add RAd, 1
1419
+ | setint ITYPE, RA
1420
+ | mov [BASE-16], ITYPE
1421
+ |.else
1422
+ | sseconst_1 xmm1, TMPR
1423
+ | addsd xmm0, xmm1
1424
+ | cvttsd2si RAd, xmm0
1425
+ | movsd qword [BASE-16], xmm0
1426
+ |.endif
1427
+ | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1428
+ | mov RD, TAB:RB->array
1429
+ | lea RD, [RD+RA*8]
1430
+ |1:
1431
+ | cmp aword [RD], LJ_TNIL; je ->fff_res0
1432
+ | // Copy array slot.
1433
+ | mov RB, [RD]
1434
+ | mov [BASE-8], RB
1435
+ | jmp ->fff_res2
1436
+ |2: // Check for empty hash part first. Otherwise call C function.
1437
+ | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1438
+ |.if X64WIN
1439
+ | mov TMPR, BASE
1440
+ | mov CARG2d, RAd
1441
+ | mov CARG1, TAB:RB
1442
+ | mov RB, TMPR
1443
+ |.else
1444
+ | mov CARG1, TAB:RB
1445
+ | mov RB, BASE // Save BASE.
1446
+ | mov CARG2d, RAd // Caveat: CARG2 == BASE
1447
+ |.endif
1448
+ | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1449
+ | // cTValue * or NULL returned in eax (RD).
1450
+ | mov BASE, RB
1451
+ | test RD, RD
1452
+ | jnz <1
1453
+ |->fff_res0:
1454
+ | mov RDd, 1+0
1455
+ | jmp ->fff_res
1456
+ |
1457
+ |.ffunc_1 ipairs
1458
+ | mov TAB:RB, [BASE]
1459
+ | mov TMPR, TAB:RB
1460
+ | checktab TAB:RB, ->fff_fallback
1461
+ #if LJ_52
1462
+ | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1463
+ #endif
1464
+ | mov CFUNC:RD, [BASE-16]
1465
+ | cleartp CFUNC:RD
1466
+ | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1467
+ | settp CFUNC:RD, LJ_TFUNC
1468
+ | mov PC, [BASE-8]
1469
+ | mov [BASE-16], CFUNC:RD
1470
+ | mov [BASE-8], TMPR
1471
+ |.if DUALNUM
1472
+ | mov64 RD, ((int64_t)LJ_TISNUM<<47)
1473
+ | mov [BASE], RD
1474
+ |.else
1475
+ | mov qword [BASE], 0
1476
+ |.endif
1477
+ | mov RDd, 1+3
1478
+ | jmp ->fff_res
1479
+ |
1480
+ |//-- Base library: catch errors ----------------------------------------
1481
+ |
1482
+ |.ffunc_1 pcall
1483
+ | lea RA, [BASE+16]
1484
+ | sub NARGS:RDd, 1
1485
+ | mov PCd, 16+FRAME_PCALL
1486
+ |1:
1487
+ | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1488
+ | shr RB, HOOK_ACTIVE_SHIFT
1489
+ | and RB, 1
1490
+ | add PC, RB // Remember active hook before pcall.
1491
+ | // Note: this does a (harmless) copy of the function to the PC slot, too.
1492
+ | mov KBASE, RD
1493
+ |2:
1494
+ | mov RB, [RA+KBASE*8-24]
1495
+ | mov [RA+KBASE*8-16], RB
1496
+ | sub KBASE, 1
1497
+ | ja <2
1498
+ | jmp ->vm_call_dispatch
1499
+ |
1500
+ |.ffunc_2 xpcall
1501
+ | mov LFUNC:RA, [BASE+8]
1502
+ | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1503
+ | mov LFUNC:RB, [BASE] // Swap function and traceback.
1504
+ | mov [BASE], LFUNC:RA
1505
+ | mov [BASE+8], LFUNC:RB
1506
+ | lea RA, [BASE+24]
1507
+ | sub NARGS:RDd, 2
1508
+ | mov PCd, 24+FRAME_PCALL
1509
+ | jmp <1
1510
+ |
1511
+ |//-- Coroutine library --------------------------------------------------
1512
+ |
1513
+ |.macro coroutine_resume_wrap, resume
1514
+ |.if resume
1515
+ |.ffunc_1 coroutine_resume
1516
+ | mov L:RB, [BASE]
1517
+ | cleartp L:RB
1518
+ |.else
1519
+ |.ffunc coroutine_wrap_aux
1520
+ | mov CFUNC:RB, [BASE-16]
1521
+ | cleartp CFUNC:RB
1522
+ | mov L:RB, CFUNC:RB->upvalue[0].gcr
1523
+ | cleartp L:RB
1524
+ |.endif
1525
+ | mov PC, [BASE-8]
1526
+ | mov SAVE_PC, PC
1527
+ | mov TMP1, L:RB
1528
+ |.if resume
1529
+ | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1530
+ |.endif
1531
+ | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1532
+ | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1533
+ | mov RA, L:RB->top
1534
+ | je >1 // Status != LUA_YIELD (i.e. 0)?
1535
+ | cmp RA, L:RB->base // Check for presence of initial func.
1536
+ | je ->fff_fallback
1537
+ | mov PC, [RA-8] // Move initial function up.
1538
+ | mov [RA], PC
1539
+ | add RA, 8
1540
+ |1:
1541
+ |.if resume
1542
+ | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1543
+ |.else
1544
+ | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1545
+ |.endif
1546
+ | cmp PC, L:RB->maxstack; ja ->fff_fallback
1547
+ | mov L:RB->top, PC
1548
+ |
1549
+ | mov L:RB, SAVE_L
1550
+ | mov L:RB->base, BASE
1551
+ |.if resume
1552
+ | add BASE, 8 // Keep resumed thread in stack for GC.
1553
+ |.endif
1554
+ | mov L:RB->top, BASE
1555
+ |.if resume
1556
+ | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1557
+ |.else
1558
+ | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1559
+ |.endif
1560
+ | sub RB, PC // Relative to PC.
1561
+ |
1562
+ | cmp PC, RA
1563
+ | je >3
1564
+ |2: // Move args to coroutine.
1565
+ | mov RC, [PC+RB]
1566
+ | mov [PC-8], RC
1567
+ | sub PC, 8
1568
+ | cmp PC, RA
1569
+ | jne <2
1570
+ |3:
1571
+ | mov CARG2, RA
1572
+ | mov CARG1, TMP1
1573
+ | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1574
+ |
1575
+ | mov L:RB, SAVE_L
1576
+ | mov L:PC, TMP1
1577
+ | mov BASE, L:RB->base
1578
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1579
+ | set_vmstate INTERP
1580
+ |
1581
+ | cmp eax, LUA_YIELD
1582
+ | ja >8
1583
+ |4:
1584
+ | mov RA, L:PC->base
1585
+ | mov KBASE, L:PC->top
1586
+ | mov L:PC->top, RA // Clear coroutine stack.
1587
+ | mov PC, KBASE
1588
+ | sub PC, RA
1589
+ | je >6 // No results?
1590
+ | lea RD, [BASE+PC]
1591
+ | shr PCd, 3
1592
+ | cmp RD, L:RB->maxstack
1593
+ | ja >9 // Need to grow stack?
1594
+ |
1595
+ | mov RB, BASE
1596
+ | sub RB, RA
1597
+ |5: // Move results from coroutine.
1598
+ | mov RD, [RA]
1599
+ | mov [RA+RB], RD
1600
+ | add RA, 8
1601
+ | cmp RA, KBASE
1602
+ | jne <5
1603
+ |6:
1604
+ |.if resume
1605
+ | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1606
+ | mov_true ITYPE // Prepend true to results.
1607
+ | mov [BASE-8], ITYPE
1608
+ |.else
1609
+ | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1610
+ |.endif
1611
+ |7:
1612
+ | mov PC, SAVE_PC
1613
+ | mov MULTRES, RDd
1614
+ |.if resume
1615
+ | mov RA, -8
1616
+ |.else
1617
+ | xor RAd, RAd
1618
+ |.endif
1619
+ | test PCd, FRAME_TYPE
1620
+ | jz ->BC_RET_Z
1621
+ | jmp ->vm_return
1622
+ |
1623
+ |8: // Coroutine returned with error (at co->top-1).
1624
+ |.if resume
1625
+ | mov_false ITYPE // Prepend false to results.
1626
+ | mov [BASE-8], ITYPE
1627
+ | mov RA, L:PC->top
1628
+ | sub RA, 8
1629
+ | mov L:PC->top, RA // Clear error from coroutine stack.
1630
+ | // Copy error message.
1631
+ | mov RD, [RA]
1632
+ | mov [BASE], RD
1633
+ | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1634
+ | jmp <7
1635
+ |.else
1636
+ | mov CARG2, L:PC
1637
+ | mov CARG1, L:RB
1638
+ | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1639
+ | // Error function does not return.
1640
+ |.endif
1641
+ |
1642
+ |9: // Handle stack expansion on return from yield.
1643
+ | mov L:RA, TMP1
1644
+ | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1645
+ | mov CARG2, PC
1646
+ | mov CARG1, L:RB
1647
+ | call extern lj_state_growstack // (lua_State *L, int n)
1648
+ | mov L:PC, TMP1
1649
+ | mov BASE, L:RB->base
1650
+ | jmp <4 // Retry the stack move.
1651
+ |.endmacro
1652
+ |
1653
+ | coroutine_resume_wrap 1 // coroutine.resume
1654
+ | coroutine_resume_wrap 0 // coroutine.wrap
1655
+ |
1656
+ |.ffunc coroutine_yield
1657
+ | mov L:RB, SAVE_L
1658
+ | test aword L:RB->cframe, CFRAME_RESUME
1659
+ | jz ->fff_fallback
1660
+ | mov L:RB->base, BASE
1661
+ | lea RD, [BASE+NARGS:RD*8-8]
1662
+ | mov L:RB->top, RD
1663
+ | xor RDd, RDd
1664
+ | mov aword L:RB->cframe, RD
1665
+ | mov al, LUA_YIELD
1666
+ | mov byte L:RB->status, al
1667
+ | jmp ->vm_leave_unw
1668
+ |
1669
+ |//-- Math library -------------------------------------------------------
1670
+ |
1671
+ | .ffunc_1 math_abs
1672
+ | mov RB, [BASE]
1673
+ |.if DUALNUM
1674
+ | checkint RB, >3
1675
+ | cmp RBd, 0; jns ->fff_resi
1676
+ | neg RBd; js >2
1677
+ |->fff_resbit:
1678
+ |->fff_resi:
1679
+ | setint RB
1680
+ |->fff_resRB:
1681
+ | mov PC, [BASE-8]
1682
+ | mov [BASE-16], RB
1683
+ | jmp ->fff_res1
1684
+ |2:
1685
+ | mov64 RB, U64x(41e00000,00000000) // 2^31.
1686
+ | jmp ->fff_resRB
1687
+ |3:
1688
+ | ja ->fff_fallback
1689
+ |.else
1690
+ | checknum RB, ->fff_fallback
1691
+ |.endif
1692
+ | shl RB, 1
1693
+ | shr RB, 1
1694
+ | mov PC, [BASE-8]
1695
+ | mov [BASE-16], RB
1696
+ | jmp ->fff_res1
1697
+ |
1698
+ |.ffunc_n math_sqrt, sqrtsd
1699
+ |->fff_resxmm0:
1700
+ | mov PC, [BASE-8]
1701
+ | movsd qword [BASE-16], xmm0
1702
+ | // fallthrough
1703
+ |
1704
+ |->fff_res1:
1705
+ | mov RDd, 1+1
1706
+ |->fff_res:
1707
+ | mov MULTRES, RDd
1708
+ |->fff_res_:
1709
+ | test PCd, FRAME_TYPE
1710
+ | jnz >7
1711
+ |5:
1712
+ | cmp PC_RB, RDL // More results expected?
1713
+ | ja >6
1714
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1715
+ | movzx RAd, PC_RA
1716
+ | neg RA
1717
+ | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1718
+ | ins_next
1719
+ |
1720
+ |6: // Fill up results with nil.
1721
+ | mov aword [BASE+RD*8-24], LJ_TNIL
1722
+ | add RD, 1
1723
+ | jmp <5
1724
+ |
1725
+ |7: // Non-standard return case.
1726
+ | mov RA, -16 // Results start at BASE+RA = BASE-16.
1727
+ | jmp ->vm_return
1728
+ |
1729
+ |.macro math_round, func
1730
+ | .ffunc math_ .. func
1731
+ |.if DUALNUM
1732
+ | mov RB, [BASE]
1733
+ | checknumx RB, ->fff_resRB, je
1734
+ | ja ->fff_fallback
1735
+ |.else
1736
+ | checknumtp [BASE], ->fff_fallback
1737
+ |.endif
1738
+ | movsd xmm0, qword [BASE]
1739
+ | call ->vm_ .. func .. _sse
1740
+ |.if DUALNUM
1741
+ | cvttsd2si RBd, xmm0
1742
+ | cmp RBd, 0x80000000
1743
+ | jne ->fff_resi
1744
+ | cvtsi2sd xmm1, RBd
1745
+ | ucomisd xmm0, xmm1
1746
+ | jp ->fff_resxmm0
1747
+ | je ->fff_resi
1748
+ |.endif
1749
+ | jmp ->fff_resxmm0
1750
+ |.endmacro
1751
+ |
1752
+ | math_round floor
1753
+ | math_round ceil
1754
+ |
1755
+ |.ffunc math_log
1756
+ | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1757
+ | checknumtp [BASE], ->fff_fallback
1758
+ | movsd xmm0, qword [BASE]
1759
+ | mov RB, BASE
1760
+ | call extern log
1761
+ | mov BASE, RB
1762
+ | jmp ->fff_resxmm0
1763
+ |
1764
+ |.macro math_extern, func
1765
+ | .ffunc_n math_ .. func
1766
+ | mov RB, BASE
1767
+ | call extern func
1768
+ | mov BASE, RB
1769
+ | jmp ->fff_resxmm0
1770
+ |.endmacro
1771
+ |
1772
+ |.macro math_extern2, func
1773
+ | .ffunc_nn math_ .. func
1774
+ | mov RB, BASE
1775
+ | call extern func
1776
+ | mov BASE, RB
1777
+ | jmp ->fff_resxmm0
1778
+ |.endmacro
1779
+ |
1780
+ | math_extern log10
1781
+ | math_extern exp
1782
+ | math_extern sin
1783
+ | math_extern cos
1784
+ | math_extern tan
1785
+ | math_extern asin
1786
+ | math_extern acos
1787
+ | math_extern atan
1788
+ | math_extern sinh
1789
+ | math_extern cosh
1790
+ | math_extern tanh
1791
+ | math_extern2 pow
1792
+ | math_extern2 atan2
1793
+ | math_extern2 fmod
1794
+ |
1795
+ |.ffunc_2 math_ldexp
1796
+ | checknumtp [BASE], ->fff_fallback
1797
+ | checknumtp [BASE+8], ->fff_fallback
1798
+ | fld qword [BASE+8]
1799
+ | fld qword [BASE]
1800
+ | fscale
1801
+ | fpop1
1802
+ | mov PC, [BASE-8]
1803
+ | fstp qword [BASE-16]
1804
+ | jmp ->fff_res1
1805
+ |
1806
+ |.ffunc_n math_frexp
1807
+ | lea CARG1, TMP1
1808
+ | mov RB, BASE
1809
+ | call extern frexp
1810
+ | mov BASE, RB
1811
+ | mov RBd, TMP1d
1812
+ | mov PC, [BASE-8]
1813
+ | movsd qword [BASE-16], xmm0
1814
+ |.if DUALNUM
1815
+ | setint RB
1816
+ | mov [BASE-8], RB
1817
+ |.else
1818
+ | cvtsi2sd xmm1, RBd
1819
+ | movsd qword [BASE-8], xmm1
1820
+ |.endif
1821
+ | mov RDd, 1+2
1822
+ | jmp ->fff_res
1823
+ |
1824
+ |.ffunc_n math_modf
1825
+ | lea CARG1, [BASE-16]
1826
+ | mov PC, [BASE-8]
1827
+ | mov RB, BASE
1828
+ | call extern modf
1829
+ | mov BASE, RB
1830
+ | mov PC, [BASE-8]
1831
+ | movsd qword [BASE-8], xmm0
1832
+ | mov RDd, 1+2
1833
+ | jmp ->fff_res
1834
+ |
1835
+ |.macro math_minmax, name, cmovop, sseop
1836
+ | .ffunc name
1837
+ | mov RAd, 2
1838
+ |.if DUALNUM
1839
+ | mov RB, [BASE]
1840
+ | checkint RB, >4
1841
+ |1: // Handle integers.
1842
+ | cmp RAd, RDd; jae ->fff_resRB
1843
+ | mov TMPR, [BASE+RA*8-8]
1844
+ | checkint TMPR, >3
1845
+ | cmp RBd, TMPRd
1846
+ | cmovop RB, TMPR
1847
+ | add RAd, 1
1848
+ | jmp <1
1849
+ |3:
1850
+ | ja ->fff_fallback
1851
+ | // Convert intermediate result to number and continue below.
1852
+ | cvtsi2sd xmm0, RBd
1853
+ | jmp >6
1854
+ |4:
1855
+ | ja ->fff_fallback
1856
+ |.else
1857
+ | checknumtp [BASE], ->fff_fallback
1858
+ |.endif
1859
+ |
1860
+ | movsd xmm0, qword [BASE]
1861
+ |5: // Handle numbers or integers.
1862
+ | cmp RAd, RDd; jae ->fff_resxmm0
1863
+ |.if DUALNUM
1864
+ | mov RB, [BASE+RA*8-8]
1865
+ | checknumx RB, >6, jb
1866
+ | ja ->fff_fallback
1867
+ | cvtsi2sd xmm1, RBd
1868
+ | jmp >7
1869
+ |.else
1870
+ | checknumtp [BASE+RA*8-8], ->fff_fallback
1871
+ |.endif
1872
+ |6:
1873
+ | movsd xmm1, qword [BASE+RA*8-8]
1874
+ |7:
1875
+ | sseop xmm0, xmm1
1876
+ | add RAd, 1
1877
+ | jmp <5
1878
+ |.endmacro
1879
+ |
1880
+ | math_minmax math_min, cmovg, minsd
1881
+ | math_minmax math_max, cmovl, maxsd
1882
+ |
1883
+ |//-- String library -----------------------------------------------------
1884
+ |
1885
+ |.ffunc string_byte // Only handle the 1-arg case here.
1886
+ | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1887
+ | mov STR:RB, [BASE]
1888
+ | checkstr STR:RB, ->fff_fallback
1889
+ | mov PC, [BASE-8]
1890
+ | cmp dword STR:RB->len, 1
1891
+ | jb ->fff_res0 // Return no results for empty string.
1892
+ | movzx RBd, byte STR:RB[1]
1893
+ |.if DUALNUM
1894
+ | jmp ->fff_resi
1895
+ |.else
1896
+ | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1897
+ |.endif
1898
+ |
1899
+ |.ffunc string_char // Only handle the 1-arg case here.
1900
+ | ffgccheck
1901
+ | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1902
+ |.if DUALNUM
1903
+ | mov RB, [BASE]
1904
+ | checkint RB, ->fff_fallback
1905
+ |.else
1906
+ | checknumtp [BASE], ->fff_fallback
1907
+ | cvttsd2si RBd, qword [BASE]
1908
+ |.endif
1909
+ | cmp RBd, 255; ja ->fff_fallback
1910
+ | mov TMP1d, RBd
1911
+ | mov TMPRd, 1
1912
+ | lea RD, TMP1 // Points to stack. Little-endian.
1913
+ |->fff_newstr:
1914
+ | mov L:RB, SAVE_L
1915
+ | mov L:RB->base, BASE
1916
+ | mov CARG3d, TMPRd // Zero-extended to size_t.
1917
+ | mov CARG2, RD
1918
+ | mov CARG1, L:RB
1919
+ | mov SAVE_PC, PC
1920
+ | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1921
+ |->fff_resstr:
1922
+ | // GCstr * returned in eax (RD).
1923
+ | mov BASE, L:RB->base
1924
+ | mov PC, [BASE-8]
1925
+ | settp STR:RD, LJ_TSTR
1926
+ | mov [BASE-16], STR:RD
1927
+ | jmp ->fff_res1
1928
+ |
1929
+ |.ffunc string_sub
1930
+ | ffgccheck
1931
+ | mov TMPRd, -1
1932
+ | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1933
+ | jna >1
1934
+ |.if DUALNUM
1935
+ | mov TMPR, [BASE+16]
1936
+ | checkint TMPR, ->fff_fallback
1937
+ |.else
1938
+ | checknumtp [BASE+16], ->fff_fallback
1939
+ | cvttsd2si TMPRd, qword [BASE+16]
1940
+ |.endif
1941
+ |1:
1942
+ | mov STR:RB, [BASE]
1943
+ | checkstr STR:RB, ->fff_fallback
1944
+ |.if DUALNUM
1945
+ | mov ITYPE, [BASE+8]
1946
+ | mov RAd, ITYPEd // Must clear hiword for lea below.
1947
+ | sar ITYPE, 47
1948
+ | cmp ITYPEd, LJ_TISNUM
1949
+ | jne ->fff_fallback
1950
+ |.else
1951
+ | checknumtp [BASE+8], ->fff_fallback
1952
+ | cvttsd2si RAd, qword [BASE+8]
1953
+ |.endif
1954
+ | mov RCd, STR:RB->len
1955
+ | cmp RCd, TMPRd // len < end? (unsigned compare)
1956
+ | jb >5
1957
+ |2:
1958
+ | test RAd, RAd // start <= 0?
1959
+ | jle >7
1960
+ |3:
1961
+ | sub TMPRd, RAd // start > end?
1962
+ | jl ->fff_emptystr
1963
+ | lea RD, [STR:RB+RAd+#STR-1]
1964
+ | add TMPRd, 1
1965
+ |4:
1966
+ | jmp ->fff_newstr
1967
+ |
1968
+ |5: // Negative end or overflow.
1969
+ | jl >6
1970
+ | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1971
+ | jmp <2
1972
+ |6: // Overflow.
1973
+ | mov TMPRd, RCd // end = len
1974
+ | jmp <2
1975
+ |
1976
+ |7: // Negative start or underflow.
1977
+ | je >8
1978
+ | add RAd, RCd // start = start+(len+1)
1979
+ | add RAd, 1
1980
+ | jg <3 // start > 0?
1981
+ |8: // Underflow.
1982
+ | mov RAd, 1 // start = 1
1983
+ | jmp <3
1984
+ |
1985
+ |->fff_emptystr: // Range underflow.
1986
+ | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1987
+ | jmp <4
1988
+ |
1989
+ |.macro ffstring_op, name
1990
+ | .ffunc_1 string_ .. name
1991
+ | ffgccheck
1992
+ |.if X64WIN
1993
+ | mov STR:TMPR, [BASE]
1994
+ | checkstr STR:TMPR, ->fff_fallback
1995
+ |.else
1996
+ | mov STR:CARG2, [BASE]
1997
+ | checkstr STR:CARG2, ->fff_fallback
1998
+ |.endif
1999
+ | mov L:RB, SAVE_L
2000
+ | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2001
+ | mov L:RB->base, BASE
2002
+ |.if X64WIN
2003
+ | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
2004
+ |.endif
2005
+ | mov RC, SBUF:CARG1->b
2006
+ | mov SBUF:CARG1->L, L:RB
2007
+ | mov SBUF:CARG1->p, RC
2008
+ | mov SAVE_PC, PC
2009
+ | call extern lj_buf_putstr_ .. name
2010
+ | mov CARG1, rax
2011
+ | call extern lj_buf_tostr
2012
+ | jmp ->fff_resstr
2013
+ |.endmacro
2014
+ |
2015
+ |ffstring_op reverse
2016
+ |ffstring_op lower
2017
+ |ffstring_op upper
2018
+ |
2019
+ |//-- Bit library --------------------------------------------------------
2020
+ |
2021
+ |.macro .ffunc_bit, name, kind, fdef
2022
+ | fdef name
2023
+ |.if kind == 2
2024
+ | sseconst_tobit xmm1, RB
2025
+ |.endif
2026
+ |.if DUALNUM
2027
+ | mov RB, [BASE]
2028
+ | checkint RB, >1
2029
+ |.if kind > 0
2030
+ | jmp >2
2031
+ |.else
2032
+ | jmp ->fff_resbit
2033
+ |.endif
2034
+ |1:
2035
+ | ja ->fff_fallback
2036
+ | movd xmm0, RB
2037
+ |.else
2038
+ | checknumtp [BASE], ->fff_fallback
2039
+ | movsd xmm0, qword [BASE]
2040
+ |.endif
2041
+ |.if kind < 2
2042
+ | sseconst_tobit xmm1, RB
2043
+ |.endif
2044
+ | addsd xmm0, xmm1
2045
+ | movd RBd, xmm0
2046
+ |2:
2047
+ |.endmacro
2048
+ |
2049
+ |.macro .ffunc_bit, name, kind
2050
+ | .ffunc_bit name, kind, .ffunc_1
2051
+ |.endmacro
2052
+ |
2053
+ |.ffunc_bit bit_tobit, 0
2054
+ | jmp ->fff_resbit
2055
+ |
2056
+ |.macro .ffunc_bit_op, name, ins
2057
+ | .ffunc_bit name, 2
2058
+ | mov TMPRd, NARGS:RDd // Save for fallback.
2059
+ | lea RD, [BASE+NARGS:RD*8-16]
2060
+ |1:
2061
+ | cmp RD, BASE
2062
+ | jbe ->fff_resbit
2063
+ |.if DUALNUM
2064
+ | mov RA, [RD]
2065
+ | checkint RA, >2
2066
+ | ins RBd, RAd
2067
+ | sub RD, 8
2068
+ | jmp <1
2069
+ |2:
2070
+ | ja ->fff_fallback_bit_op
2071
+ | movd xmm0, RA
2072
+ |.else
2073
+ | checknumtp [RD], ->fff_fallback_bit_op
2074
+ | movsd xmm0, qword [RD]
2075
+ |.endif
2076
+ | addsd xmm0, xmm1
2077
+ | movd RAd, xmm0
2078
+ | ins RBd, RAd
2079
+ | sub RD, 8
2080
+ | jmp <1
2081
+ |.endmacro
2082
+ |
2083
+ |.ffunc_bit_op bit_band, and
2084
+ |.ffunc_bit_op bit_bor, or
2085
+ |.ffunc_bit_op bit_bxor, xor
2086
+ |
2087
+ |.ffunc_bit bit_bswap, 1
2088
+ | bswap RBd
2089
+ | jmp ->fff_resbit
2090
+ |
2091
+ |.ffunc_bit bit_bnot, 1
2092
+ | not RBd
2093
+ |.if DUALNUM
2094
+ | jmp ->fff_resbit
2095
+ |.else
2096
+ |->fff_resbit:
2097
+ | cvtsi2sd xmm0, RBd
2098
+ | jmp ->fff_resxmm0
2099
+ |.endif
2100
+ |
2101
+ |->fff_fallback_bit_op:
2102
+ | mov NARGS:RDd, TMPRd // Restore for fallback
2103
+ | jmp ->fff_fallback
2104
+ |
2105
+ |.macro .ffunc_bit_sh, name, ins
2106
+ |.if DUALNUM
2107
+ | .ffunc_bit name, 1, .ffunc_2
2108
+ | // Note: no inline conversion from number for 2nd argument!
2109
+ | mov RA, [BASE+8]
2110
+ | checkint RA, ->fff_fallback
2111
+ |.else
2112
+ | .ffunc_nn name
2113
+ | sseconst_tobit xmm2, RB
2114
+ | addsd xmm0, xmm2
2115
+ | addsd xmm1, xmm2
2116
+ | movd RBd, xmm0
2117
+ | movd RAd, xmm1
2118
+ |.endif
2119
+ | ins RBd, cl // Assumes RA is ecx.
2120
+ | jmp ->fff_resbit
2121
+ |.endmacro
2122
+ |
2123
+ |.ffunc_bit_sh bit_lshift, shl
2124
+ |.ffunc_bit_sh bit_rshift, shr
2125
+ |.ffunc_bit_sh bit_arshift, sar
2126
+ |.ffunc_bit_sh bit_rol, rol
2127
+ |.ffunc_bit_sh bit_ror, ror
2128
+ |
2129
+ |//-----------------------------------------------------------------------
2130
+ |
2131
+ |->fff_fallback_2:
2132
+ | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2133
+ | jmp ->fff_fallback
2134
+ |->fff_fallback_1:
2135
+ | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2136
+ |->fff_fallback: // Call fast function fallback handler.
2137
+ | // BASE = new base, RD = nargs+1
2138
+ | mov L:RB, SAVE_L
2139
+ | mov PC, [BASE-8] // Fallback may overwrite PC.
2140
+ | mov SAVE_PC, PC // Redundant (but a defined value).
2141
+ | mov L:RB->base, BASE
2142
+ | lea RD, [BASE+NARGS:RD*8-8]
2143
+ | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2144
+ | mov L:RB->top, RD
2145
+ | mov CFUNC:RD, [BASE-16]
2146
+ | cleartp CFUNC:RD
2147
+ | cmp RA, L:RB->maxstack
2148
+ | ja >5 // Need to grow stack.
2149
+ | mov CARG1, L:RB
2150
+ | call aword CFUNC:RD->f // (lua_State *L)
2151
+ | mov BASE, L:RB->base
2152
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2153
+ | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2154
+ |1:
2155
+ | mov RA, L:RB->top
2156
+ | sub RA, BASE
2157
+ | shr RAd, 3
2158
+ | test RDd, RDd
2159
+ | lea NARGS:RDd, [RAd+1]
2160
+ | mov LFUNC:RB, [BASE-16]
2161
+ | jne ->vm_call_tail // Returned -1?
2162
+ | cleartp LFUNC:RB
2163
+ | ins_callt // Returned 0: retry fast path.
2164
+ |
2165
+ |// Reconstruct previous base for vmeta_call during tailcall.
2166
+ |->vm_call_tail:
2167
+ | mov RA, BASE
2168
+ | test PCd, FRAME_TYPE
2169
+ | jnz >3
2170
+ | movzx RBd, PC_RA
2171
+ | neg RB
2172
+ | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2173
+ | jmp ->vm_call_dispatch // Resolve again for tailcall.
2174
+ |3:
2175
+ | mov RB, PC
2176
+ | and RB, -8
2177
+ | sub BASE, RB
2178
+ | jmp ->vm_call_dispatch // Resolve again for tailcall.
2179
+ |
2180
+ |5: // Grow stack for fallback handler.
2181
+ | mov CARG2d, LUA_MINSTACK
2182
+ | mov CARG1, L:RB
2183
+ | call extern lj_state_growstack // (lua_State *L, int n)
2184
+ | mov BASE, L:RB->base
2185
+ | xor RDd, RDd // Simulate a return 0.
2186
+ | jmp <1 // Dumb retry (goes through ff first).
2187
+ |
2188
+ |->fff_gcstep: // Call GC step function.
2189
+ | // BASE = new base, RD = nargs+1
2190
+ | pop RB // Must keep stack at same level.
2191
+ | mov TMP1, RB // Save return address
2192
+ | mov L:RB, SAVE_L
2193
+ | mov SAVE_PC, PC // Redundant (but a defined value).
2194
+ | mov L:RB->base, BASE
2195
+ | lea RD, [BASE+NARGS:RD*8-8]
2196
+ | mov CARG1, L:RB
2197
+ | mov L:RB->top, RD
2198
+ | call extern lj_gc_step // (lua_State *L)
2199
+ | mov BASE, L:RB->base
2200
+ | mov RD, L:RB->top
2201
+ | sub RD, BASE
2202
+ | shr RDd, 3
2203
+ | add NARGS:RDd, 1
2204
+ | mov RB, TMP1
2205
+ | push RB // Restore return address.
2206
+ | ret
2207
+ |
2208
+ |//-----------------------------------------------------------------------
2209
+ |//-- Special dispatch targets -------------------------------------------
2210
+ |//-----------------------------------------------------------------------
2211
+ |
2212
+ |->vm_record: // Dispatch target for recording phase.
2213
+ |.if JIT
2214
+ | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2215
+ | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2216
+ | jnz >5
2217
+ | // Decrement the hookcount for consistency, but always do the call.
2218
+ | test RDL, HOOK_ACTIVE
2219
+ | jnz >1
2220
+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2221
+ | jz >1
2222
+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2223
+ | jmp >1
2224
+ |.endif
2225
+ |
2226
+ |->vm_rethook: // Dispatch target for return hooks.
2227
+ | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2228
+ | test RDL, HOOK_ACTIVE // Hook already active?
2229
+ | jnz >5
2230
+ | jmp >1
2231
+ |
2232
+ |->vm_inshook: // Dispatch target for instr/line hooks.
2233
+ | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2234
+ | test RDL, HOOK_ACTIVE // Hook already active?
2235
+ | jnz >5
2236
+ |
2237
+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2238
+ | jz >5
2239
+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2240
+ | jz >1
2241
+ | test RDL, LUA_MASKLINE
2242
+ | jz >5
2243
+ |1:
2244
+ | mov L:RB, SAVE_L
2245
+ | mov L:RB->base, BASE
2246
+ | mov CARG2, PC // Caveat: CARG2 == BASE
2247
+ | mov CARG1, L:RB
2248
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2249
+ | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2250
+ |3:
2251
+ | mov BASE, L:RB->base
2252
+ |4:
2253
+ | movzx RAd, PC_RA
2254
+ |5:
2255
+ | movzx OP, PC_OP
2256
+ | movzx RDd, PC_RD
2257
+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2258
+ |
2259
+ |->cont_hook: // Continue from hook yield.
2260
+ | add PC, 4
2261
+ | mov RA, [RB-40]
2262
+ | mov MULTRES, RAd // Restore MULTRES for *M ins.
2263
+ | jmp <4
2264
+ |
2265
+ |->vm_hotloop: // Hot loop counter underflow.
2266
+ |.if JIT
2267
+ | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2268
+ | cleartp LFUNC:RB
2269
+ | mov RB, LFUNC:RB->pc
2270
+ | movzx RDd, byte [RB+PC2PROTO(framesize)]
2271
+ | lea RD, [BASE+RD*8]
2272
+ | mov L:RB, SAVE_L
2273
+ | mov L:RB->base, BASE
2274
+ | mov L:RB->top, RD
2275
+ | mov CARG2, PC
2276
+ | lea CARG1, [DISPATCH+GG_DISP2J]
2277
+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2278
+ | mov SAVE_PC, PC
2279
+ | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2280
+ | jmp <3
2281
+ |.endif
2282
+ |
2283
+ |->vm_callhook: // Dispatch target for call hooks.
2284
+ | mov SAVE_PC, PC
2285
+ |.if JIT
2286
+ | jmp >1
2287
+ |.endif
2288
+ |
2289
+ |->vm_hotcall: // Hot call counter underflow.
2290
+ |.if JIT
2291
+ | mov SAVE_PC, PC
2292
+ | or PC, 1 // Marker for hot call.
2293
+ |1:
2294
+ |.endif
2295
+ | lea RD, [BASE+NARGS:RD*8-8]
2296
+ | mov L:RB, SAVE_L
2297
+ | mov L:RB->base, BASE
2298
+ | mov L:RB->top, RD
2299
+ | mov CARG2, PC
2300
+ | mov CARG1, L:RB
2301
+ | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2302
+ | // ASMFunction returned in eax/rax (RD).
2303
+ | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2304
+ |.if JIT
2305
+ | and PC, -2
2306
+ |.endif
2307
+ | mov BASE, L:RB->base
2308
+ | mov RA, RD
2309
+ | mov RD, L:RB->top
2310
+ | sub RD, BASE
2311
+ | mov RB, RA
2312
+ | movzx RAd, PC_RA
2313
+ | shr RDd, 3
2314
+ | add NARGS:RDd, 1
2315
+ | jmp RB
2316
+ |
2317
+ |->cont_stitch: // Trace stitching.
2318
+ |.if JIT
2319
+ | // BASE = base, RC = result, RB = mbase
2320
+ | mov ITYPEd, [RB-24] // Save previous trace number.
2321
+ | mov TMPRd, MULTRES
2322
+ | movzx RAd, PC_RA
2323
+ | lea RA, [BASE+RA*8] // Call base.
2324
+ | sub TMPRd, 1
2325
+ | jz >2
2326
+ |1: // Move results down.
2327
+ | mov RB, [RC]
2328
+ | mov [RA], RB
2329
+ | add RC, 8
2330
+ | add RA, 8
2331
+ | sub TMPRd, 1
2332
+ | jnz <1
2333
+ |2:
2334
+ | movzx RCd, PC_RA
2335
+ | movzx RBd, PC_RB
2336
+ | add RC, RB
2337
+ | lea RC, [BASE+RC*8-8]
2338
+ |3:
2339
+ | cmp RC, RA
2340
+ | ja >9 // More results wanted?
2341
+ |
2342
+ | mov RA, [DISPATCH+DISPATCH_J(trace)]
2343
+ | mov TRACE:RD, [RA+ITYPE*8]
2344
+ | test TRACE:RD, TRACE:RD
2345
+ | jz ->cont_nop
2346
+ | movzx RDd, word TRACE:RD->link
2347
+ | cmp RDd, RBd
2348
+ | je ->cont_nop // Blacklisted.
2349
+ | test RDd, RDd
2350
+ | jne =>BC_JLOOP // Jump to stitched trace.
2351
+ |
2352
+ | // Stitch a new trace to the previous trace.
2353
+ | mov [DISPATCH+DISPATCH_J(exitno)], RB
2354
+ | mov L:RB, SAVE_L
2355
+ | mov L:RB->base, BASE
2356
+ | mov CARG2, PC
2357
+ | lea CARG1, [DISPATCH+GG_DISP2J]
2358
+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2359
+ | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2360
+ | mov BASE, L:RB->base
2361
+ | jmp ->cont_nop
2362
+ |
2363
+ |9: // Fill up results with nil.
2364
+ | mov aword [RA], LJ_TNIL
2365
+ | add RA, 8
2366
+ | jmp <3
2367
+ |.endif
2368
+ |
2369
+ |->vm_profhook: // Dispatch target for profiler hook.
2370
+ #if LJ_HASPROFILE
2371
+ | mov L:RB, SAVE_L
2372
+ | mov L:RB->base, BASE
2373
+ | mov CARG2, PC // Caveat: CARG2 == BASE
2374
+ | mov CARG1, L:RB
2375
+ | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2376
+ | mov BASE, L:RB->base
2377
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2378
+ | sub PC, 4
2379
+ | jmp ->cont_nop
2380
+ #endif
2381
+ |
2382
+ |//-----------------------------------------------------------------------
2383
+ |//-- Trace exit handler -------------------------------------------------
2384
+ |//-----------------------------------------------------------------------
2385
+ |
2386
+ |// Called from an exit stub with the exit number on the stack.
2387
+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2388
+ |->vm_exit_handler:
2389
+ |.if JIT
2390
+ | push r13; push r12
2391
+ | push r11; push r10; push r9; push r8
2392
+ | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2393
+ | push rbx; push rdx; push rcx; push rax
2394
+ | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2395
+ | mov RCH, byte [rbp-16]
2396
+ | mov [rbp-8], r15; mov [rbp-16], r14
2397
+ | // Caveat: DISPATCH is rbx.
2398
+ | mov DISPATCH, [ebp]
2399
+ | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2400
+ | set_vmstate EXIT
2401
+ | mov [DISPATCH+DISPATCH_J(exitno)], RC
2402
+ | mov [DISPATCH+DISPATCH_J(parent)], RA
2403
+ |.if X64WIN
2404
+ | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2405
+ |.else
2406
+ | sub rsp, 16*8 // Room for SSE regs.
2407
+ |.endif
2408
+ | add rbp, -128
2409
+ | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2410
+ | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2411
+ | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2412
+ | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2413
+ | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2414
+ | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2415
+ | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2416
+ | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2417
+ | // Caveat: RB is rbp.
2418
+ | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2419
+ | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2420
+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2421
+ | mov L:RB->base, BASE
2422
+ |.if X64WIN
2423
+ | lea CARG2, [rsp+4*8]
2424
+ |.else
2425
+ | mov CARG2, rsp
2426
+ |.endif
2427
+ | lea CARG1, [DISPATCH+GG_DISP2J]
2428
+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2429
+ | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2430
+ | // MULTRES or negated error code returned in eax (RD).
2431
+ | mov RA, L:RB->cframe
2432
+ | and RA, CFRAME_RAWMASK
2433
+ | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2434
+ | mov BASE, L:RB->base
2435
+ | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2436
+ | jmp >1
2437
+ |.endif
2438
+ |->vm_exit_interp:
2439
+ | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2440
+ |.if JIT
2441
+ | // Restore additional callee-save registers only used in compiled code.
2442
+ |.if X64WIN
2443
+ | lea RA, [rsp+10*16+4*8]
2444
+ |1:
2445
+ | movdqa xmm15, [RA-10*16]
2446
+ | movdqa xmm14, [RA-9*16]
2447
+ | movdqa xmm13, [RA-8*16]
2448
+ | movdqa xmm12, [RA-7*16]
2449
+ | movdqa xmm11, [RA-6*16]
2450
+ | movdqa xmm10, [RA-5*16]
2451
+ | movdqa xmm9, [RA-4*16]
2452
+ | movdqa xmm8, [RA-3*16]
2453
+ | movdqa xmm7, [RA-2*16]
2454
+ | mov rsp, RA // Reposition stack to C frame.
2455
+ | movdqa xmm6, [RA-1*16]
2456
+ | mov r15, CSAVE_1
2457
+ | mov r14, CSAVE_2
2458
+ | mov r13, CSAVE_3
2459
+ | mov r12, CSAVE_4
2460
+ |.else
2461
+ | lea RA, [rsp+16]
2462
+ |1:
2463
+ | mov r13, [RA-8]
2464
+ | mov r12, [RA]
2465
+ | mov rsp, RA // Reposition stack to C frame.
2466
+ |.endif
2467
+ | test RDd, RDd; js >9 // Check for error from exit.
2468
+ | mov L:RB, SAVE_L
2469
+ | mov MULTRES, RDd
2470
+ | mov LFUNC:KBASE, [BASE-16]
2471
+ | cleartp LFUNC:KBASE
2472
+ | mov KBASE, LFUNC:KBASE->pc
2473
+ | mov KBASE, [KBASE+PC2PROTO(k)]
2474
+ | mov L:RB->base, BASE
2475
+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2476
+ | set_vmstate INTERP
2477
+ | // Modified copy of ins_next which handles function header dispatch, too.
2478
+ | mov RCd, [PC]
2479
+ | movzx RAd, RCH
2480
+ | movzx OP, RCL
2481
+ | add PC, 4
2482
+ | shr RCd, 16
2483
+ | cmp OP, BC_FUNCF // Function header?
2484
+ | jb >3
2485
+ | cmp OP, BC_FUNCC+2 // Fast function?
2486
+ | jae >4
2487
+ |2:
2488
+ | mov RCd, MULTRES // RC/RD holds nres+1.
2489
+ |3:
2490
+ | jmp aword [DISPATCH+OP*8]
2491
+ |
2492
+ |4: // Check frame below fast function.
2493
+ | mov RC, [BASE-8]
2494
+ | test RCd, FRAME_TYPE
2495
+ | jnz <2 // Trace stitching continuation?
2496
+ | // Otherwise set KBASE for Lua function below fast function.
2497
+ | movzx RCd, byte [RC-3]
2498
+ | neg RC
2499
+ | mov LFUNC:KBASE, [BASE+RC*8-24]
2500
+ | cleartp LFUNC:KBASE
2501
+ | mov KBASE, LFUNC:KBASE->pc
2502
+ | mov KBASE, [KBASE+PC2PROTO(k)]
2503
+ | jmp <2
2504
+ |
2505
+ |9: // Rethrow error from the right C frame.
2506
+ | neg RD
2507
+ | mov CARG1, L:RB
2508
+ | mov CARG2, RD
2509
+ | call extern lj_err_throw // (lua_State *L, int errcode)
2510
+ |.endif
2511
+ |
2512
+ |//-----------------------------------------------------------------------
2513
+ |//-- Math helper functions ----------------------------------------------
2514
+ |//-----------------------------------------------------------------------
2515
+ |
2516
+ |// FP value rounding. Called by math.floor/math.ceil fast functions
2517
+ |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2518
+ |.macro vm_round, name, mode, cond
2519
+ |->name:
2520
+ |->name .. _sse:
2521
+ | sseconst_abs xmm2, RD
2522
+ | sseconst_2p52 xmm3, RD
2523
+ | movaps xmm1, xmm0
2524
+ | andpd xmm1, xmm2 // |x|
2525
+ | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2526
+ | jbe >1
2527
+ | andnpd xmm2, xmm0 // Isolate sign bit.
2528
+ |.if mode == 2 // trunc(x)?
2529
+ | movaps xmm0, xmm1
2530
+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2531
+ | subsd xmm1, xmm3
2532
+ | sseconst_1 xmm3, RD
2533
+ | cmpsd xmm0, xmm1, 1 // |x| < result?
2534
+ | andpd xmm0, xmm3
2535
+ | subsd xmm1, xmm0 // If yes, subtract -1.
2536
+ | orpd xmm1, xmm2 // Merge sign bit back in.
2537
+ |.else
2538
+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2539
+ | subsd xmm1, xmm3
2540
+ | orpd xmm1, xmm2 // Merge sign bit back in.
2541
+ | .if mode == 1 // ceil(x)?
2542
+ | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0.
2543
+ | cmpsd xmm0, xmm1, 6 // x > result?
2544
+ | .else // floor(x)?
2545
+ | sseconst_1 xmm2, RD
2546
+ | cmpsd xmm0, xmm1, 1 // x < result?
2547
+ | .endif
2548
+ | andpd xmm0, xmm2
2549
+ | subsd xmm1, xmm0 // If yes, subtract +-1.
2550
+ |.endif
2551
+ | movaps xmm0, xmm1
2552
+ |1:
2553
+ | ret
2554
+ |.endmacro
2555
+ |
2556
+ | vm_round vm_floor, 0, 1
2557
+ | vm_round vm_ceil, 1, JIT
2558
+ | vm_round vm_trunc, 2, JIT
2559
+ |
2560
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2561
+ |->vm_mod:
2562
+ |// Args in xmm0/xmm1, return value in xmm0.
2563
+ |// Caveat: xmm0-xmm5 and RC (eax) modified!
2564
+ | movaps xmm5, xmm0
2565
+ | divsd xmm0, xmm1
2566
+ | sseconst_abs xmm2, RD
2567
+ | sseconst_2p52 xmm3, RD
2568
+ | movaps xmm4, xmm0
2569
+ | andpd xmm4, xmm2 // |x/y|
2570
+ | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2571
+ | jbe >1
2572
+ | andnpd xmm2, xmm0 // Isolate sign bit.
2573
+ | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2574
+ | subsd xmm4, xmm3
2575
+ | orpd xmm4, xmm2 // Merge sign bit back in.
2576
+ | sseconst_1 xmm2, RD
2577
+ | cmpsd xmm0, xmm4, 1 // x/y < result?
2578
+ | andpd xmm0, xmm2
2579
+ | subsd xmm4, xmm0 // If yes, subtract 1.0.
2580
+ | movaps xmm0, xmm5
2581
+ | mulsd xmm1, xmm4
2582
+ | subsd xmm0, xmm1
2583
+ | ret
2584
+ |1:
2585
+ | mulsd xmm1, xmm0
2586
+ | movaps xmm0, xmm5
2587
+ | subsd xmm0, xmm1
2588
+ | ret
2589
+ |
2590
+ |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2591
+ |->vm_powi_sse:
2592
+ | cmp eax, 1; jle >6 // i<=1?
2593
+ | // Now 1 < (unsigned)i <= 0x80000000.
2594
+ |1: // Handle leading zeros.
2595
+ | test eax, 1; jnz >2
2596
+ | mulsd xmm0, xmm0
2597
+ | shr eax, 1
2598
+ | jmp <1
2599
+ |2:
2600
+ | shr eax, 1; jz >5
2601
+ | movaps xmm1, xmm0
2602
+ |3: // Handle trailing bits.
2603
+ | mulsd xmm0, xmm0
2604
+ | shr eax, 1; jz >4
2605
+ | jnc <3
2606
+ | mulsd xmm1, xmm0
2607
+ | jmp <3
2608
+ |4:
2609
+ | mulsd xmm0, xmm1
2610
+ |5:
2611
+ | ret
2612
+ |6:
2613
+ | je <5 // x^1 ==> x
2614
+ | jb >7 // x^0 ==> 1
2615
+ | neg eax
2616
+ | call <1
2617
+ | sseconst_1 xmm1, RD
2618
+ | divsd xmm1, xmm0
2619
+ | movaps xmm0, xmm1
2620
+ | ret
2621
+ |7:
2622
+ | sseconst_1 xmm0, RD
2623
+ | ret
2624
+ |
2625
+ |//-----------------------------------------------------------------------
2626
+ |//-- Miscellaneous functions --------------------------------------------
2627
+ |//-----------------------------------------------------------------------
2628
+ |
2629
+ |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2630
+ |->vm_cpuid:
2631
+ | mov eax, CARG1d
2632
+ | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2633
+ | push rbx
2634
+ | cpuid
2635
+ | mov [rsi], eax
2636
+ | mov [rsi+4], ebx
2637
+ | mov [rsi+8], ecx
2638
+ | mov [rsi+12], edx
2639
+ | pop rbx
2640
+ | .if X64WIN; pop rsi; .endif
2641
+ | ret
2642
+ |
2643
+ |//-----------------------------------------------------------------------
2644
+ |//-- Assertions ---------------------------------------------------------
2645
+ |//-----------------------------------------------------------------------
2646
+ |
2647
+ |->assert_bad_for_arg_type:
2648
+ #ifdef LUA_USE_ASSERT
2649
+ | int3
2650
+ #endif
2651
+ | int3
2652
+ |
2653
+ |//-----------------------------------------------------------------------
2654
+ |//-- FFI helper functions -----------------------------------------------
2655
+ |//-----------------------------------------------------------------------
2656
+ |
2657
+ |// Handler for callback functions. Callback slot number in ah/al.
2658
+ |->vm_ffi_callback:
2659
+ |.if FFI
2660
+ |.type CTSTATE, CTState, PC
2661
+ | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2662
+ | lea DISPATCH, [ebp+GG_G2DISP]
2663
+ | mov CTSTATE, GL:ebp->ctype_state
2664
+ | movzx eax, ax
2665
+ | mov CTSTATE->cb.slot, eax
2666
+ | mov CTSTATE->cb.gpr[0], CARG1
2667
+ | mov CTSTATE->cb.gpr[1], CARG2
2668
+ | mov CTSTATE->cb.gpr[2], CARG3
2669
+ | mov CTSTATE->cb.gpr[3], CARG4
2670
+ | movsd qword CTSTATE->cb.fpr[0], xmm0
2671
+ | movsd qword CTSTATE->cb.fpr[1], xmm1
2672
+ | movsd qword CTSTATE->cb.fpr[2], xmm2
2673
+ | movsd qword CTSTATE->cb.fpr[3], xmm3
2674
+ |.if X64WIN
2675
+ | lea rax, [rsp+CFRAME_SIZE+4*8]
2676
+ |.else
2677
+ | lea rax, [rsp+CFRAME_SIZE]
2678
+ | mov CTSTATE->cb.gpr[4], CARG5
2679
+ | mov CTSTATE->cb.gpr[5], CARG6
2680
+ | movsd qword CTSTATE->cb.fpr[4], xmm4
2681
+ | movsd qword CTSTATE->cb.fpr[5], xmm5
2682
+ | movsd qword CTSTATE->cb.fpr[6], xmm6
2683
+ | movsd qword CTSTATE->cb.fpr[7], xmm7
2684
+ |.endif
2685
+ | mov CTSTATE->cb.stack, rax
2686
+ | mov CARG2, rsp
2687
+ | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2688
+ | mov CARG1, CTSTATE
2689
+ | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2690
+ | // lua_State * returned in eax (RD).
2691
+ | set_vmstate INTERP
2692
+ | mov BASE, L:RD->base
2693
+ | mov RD, L:RD->top
2694
+ | sub RD, BASE
2695
+ | mov LFUNC:RB, [BASE-16]
2696
+ | cleartp LFUNC:RB
2697
+ | shr RD, 3
2698
+ | add RD, 1
2699
+ | ins_callt
2700
+ |.endif
2701
+ |
2702
+ |->cont_ffi_callback: // Return from FFI callback.
2703
+ |.if FFI
2704
+ | mov L:RA, SAVE_L
2705
+ | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2706
+ | mov aword CTSTATE->L, L:RA
2707
+ | mov L:RA->base, BASE
2708
+ | mov L:RA->top, RB
2709
+ | mov CARG1, CTSTATE
2710
+ | mov CARG2, RC
2711
+ | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2712
+ | mov rax, CTSTATE->cb.gpr[0]
2713
+ | movsd xmm0, qword CTSTATE->cb.fpr[0]
2714
+ | jmp ->vm_leave_unw
2715
+ |.endif
2716
+ |
2717
+ |->vm_ffi_call: // Call C function via FFI.
2718
+ | // Caveat: needs special frame unwinding, see below.
2719
+ |.if FFI
2720
+ | .type CCSTATE, CCallState, rbx
2721
+ | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2722
+ |
2723
+ | // Readjust stack.
2724
+ | mov eax, CCSTATE->spadj
2725
+ | sub rsp, rax
2726
+ |
2727
+ | // Copy stack slots.
2728
+ | movzx ecx, byte CCSTATE->nsp
2729
+ | sub ecx, 1
2730
+ | js >2
2731
+ |1:
2732
+ | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
2733
+ | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
2734
+ | sub ecx, 1
2735
+ | jns <1
2736
+ |2:
2737
+ |
2738
+ | movzx eax, byte CCSTATE->nfpr
2739
+ | mov CARG1, CCSTATE->gpr[0]
2740
+ | mov CARG2, CCSTATE->gpr[1]
2741
+ | mov CARG3, CCSTATE->gpr[2]
2742
+ | mov CARG4, CCSTATE->gpr[3]
2743
+ |.if not X64WIN
2744
+ | mov CARG5, CCSTATE->gpr[4]
2745
+ | mov CARG6, CCSTATE->gpr[5]
2746
+ |.endif
2747
+ | test eax, eax; jz >5
2748
+ | movaps xmm0, CCSTATE->fpr[0]
2749
+ | movaps xmm1, CCSTATE->fpr[1]
2750
+ | movaps xmm2, CCSTATE->fpr[2]
2751
+ | movaps xmm3, CCSTATE->fpr[3]
2752
+ |.if not X64WIN
2753
+ | cmp eax, 4; jbe >5
2754
+ | movaps xmm4, CCSTATE->fpr[4]
2755
+ | movaps xmm5, CCSTATE->fpr[5]
2756
+ | movaps xmm6, CCSTATE->fpr[6]
2757
+ | movaps xmm7, CCSTATE->fpr[7]
2758
+ |.endif
2759
+ |5:
2760
+ |
2761
+ | call aword CCSTATE->func
2762
+ |
2763
+ | mov CCSTATE->gpr[0], rax
2764
+ | movaps CCSTATE->fpr[0], xmm0
2765
+ |.if not X64WIN
2766
+ | mov CCSTATE->gpr[1], rdx
2767
+ | movaps CCSTATE->fpr[1], xmm1
2768
+ |.endif
2769
+ |
2770
+ | mov rbx, [rbp-8]; leave; ret
2771
+ |.endif
2772
+ |// Note: vm_ffi_call must be the last function in this object file!
2773
+ |
2774
+ |//-----------------------------------------------------------------------
2775
+ }
2776
+
2777
+ /* Generate the code for a single instruction. */
2778
+ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2779
+ {
2780
+ int vk = 0;
2781
+ |// Note: aligning all instructions does not pay off.
2782
+ |=>defop:
2783
+
2784
+ switch (op) {
2785
+
2786
+ /* -- Comparison ops ---------------------------------------------------- */
2787
+
2788
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
2789
+
2790
+ |.macro jmp_comp, lt, ge, le, gt, target
2791
+ ||switch (op) {
2792
+ ||case BC_ISLT:
2793
+ | lt target
2794
+ ||break;
2795
+ ||case BC_ISGE:
2796
+ | ge target
2797
+ ||break;
2798
+ ||case BC_ISLE:
2799
+ | le target
2800
+ ||break;
2801
+ ||case BC_ISGT:
2802
+ | gt target
2803
+ ||break;
2804
+ ||default: break; /* Shut up GCC. */
2805
+ ||}
2806
+ |.endmacro
2807
+
2808
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2809
+ | // RA = src1, RD = src2, JMP with RD = target
2810
+ | ins_AD
2811
+ | mov ITYPE, [BASE+RA*8]
2812
+ | mov RB, [BASE+RD*8]
2813
+ | mov RA, ITYPE
2814
+ | mov RD, RB
2815
+ | sar ITYPE, 47
2816
+ | sar RB, 47
2817
+ |.if DUALNUM
2818
+ | cmp ITYPEd, LJ_TISNUM; jne >7
2819
+ | cmp RBd, LJ_TISNUM; jne >8
2820
+ | add PC, 4
2821
+ | cmp RAd, RDd
2822
+ | jmp_comp jge, jl, jg, jle, >9
2823
+ |6:
2824
+ | movzx RDd, PC_RD
2825
+ | branchPC RD
2826
+ |9:
2827
+ | ins_next
2828
+ |
2829
+ |7: // RA is not an integer.
2830
+ | ja ->vmeta_comp
2831
+ | // RA is a number.
2832
+ | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2833
+ | // RA is a number, RD is an integer.
2834
+ | cvtsi2sd xmm0, RDd
2835
+ | jmp >2
2836
+ |
2837
+ |8: // RA is an integer, RD is not an integer.
2838
+ | ja ->vmeta_comp
2839
+ | // RA is an integer, RD is a number.
2840
+ | cvtsi2sd xmm1, RAd
2841
+ | movd xmm0, RD
2842
+ | jmp >3
2843
+ |.else
2844
+ | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2845
+ | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2846
+ |.endif
2847
+ |1:
2848
+ | movd xmm0, RD
2849
+ |2:
2850
+ | movd xmm1, RA
2851
+ |3:
2852
+ | add PC, 4
2853
+ | ucomisd xmm0, xmm1
2854
+ | // Unordered: all of ZF CF PF set, ordered: PF clear.
2855
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2856
+ |.if DUALNUM
2857
+ | jmp_comp jbe, ja, jb, jae, <9
2858
+ | jmp <6
2859
+ |.else
2860
+ | jmp_comp jbe, ja, jb, jae, >1
2861
+ | movzx RDd, PC_RD
2862
+ | branchPC RD
2863
+ |1:
2864
+ | ins_next
2865
+ |.endif
2866
+ break;
2867
+
2868
+ case BC_ISEQV: case BC_ISNEV:
2869
+ vk = op == BC_ISEQV;
2870
+ | ins_AD // RA = src1, RD = src2, JMP with RD = target
2871
+ | mov RB, [BASE+RD*8]
2872
+ | mov ITYPE, [BASE+RA*8]
2873
+ | add PC, 4
2874
+ | mov RD, RB
2875
+ | mov RA, ITYPE
2876
+ | sar RB, 47
2877
+ | sar ITYPE, 47
2878
+ |.if DUALNUM
2879
+ | cmp RBd, LJ_TISNUM; jne >7
2880
+ | cmp ITYPEd, LJ_TISNUM; jne >8
2881
+ | cmp RDd, RAd
2882
+ if (vk) {
2883
+ | jne >9
2884
+ } else {
2885
+ | je >9
2886
+ }
2887
+ | movzx RDd, PC_RD
2888
+ | branchPC RD
2889
+ |9:
2890
+ | ins_next
2891
+ |
2892
+ |7: // RD is not an integer.
2893
+ | ja >5
2894
+ | // RD is a number.
2895
+ | movd xmm1, RD
2896
+ | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2897
+ | // RD is a number, RA is an integer.
2898
+ | cvtsi2sd xmm0, RAd
2899
+ | jmp >2
2900
+ |
2901
+ |8: // RD is an integer, RA is not an integer.
2902
+ | ja >5
2903
+ | // RD is an integer, RA is a number.
2904
+ | cvtsi2sd xmm1, RDd
2905
+ | jmp >1
2906
+ |
2907
+ |.else
2908
+ | cmp RBd, LJ_TISNUM; jae >5
2909
+ | cmp ITYPEd, LJ_TISNUM; jae >5
2910
+ | movd xmm1, RD
2911
+ |.endif
2912
+ |1:
2913
+ | movd xmm0, RA
2914
+ |2:
2915
+ | ucomisd xmm0, xmm1
2916
+ |4:
2917
+ iseqne_fp:
2918
+ if (vk) {
2919
+ | jp >2 // Unordered means not equal.
2920
+ | jne >2
2921
+ } else {
2922
+ | jp >2 // Unordered means not equal.
2923
+ | je >1
2924
+ }
2925
+ iseqne_end:
2926
+ if (vk) {
2927
+ |1: // EQ: Branch to the target.
2928
+ | movzx RDd, PC_RD
2929
+ | branchPC RD
2930
+ |2: // NE: Fallthrough to next instruction.
2931
+ |.if not FFI
2932
+ |3:
2933
+ |.endif
2934
+ } else {
2935
+ |.if not FFI
2936
+ |3:
2937
+ |.endif
2938
+ |2: // NE: Branch to the target.
2939
+ | movzx RDd, PC_RD
2940
+ | branchPC RD
2941
+ |1: // EQ: Fallthrough to next instruction.
2942
+ }
2943
+ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2944
+ op == BC_ISEQN || op == BC_ISNEN)) {
2945
+ | jmp <9
2946
+ } else {
2947
+ | ins_next
2948
+ }
2949
+ |
2950
+ if (op == BC_ISEQV || op == BC_ISNEV) {
2951
+ |5: // Either or both types are not numbers.
2952
+ |.if FFI
2953
+ | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
2954
+ | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
2955
+ |.endif
2956
+ | cmp RA, RD
2957
+ | je <1 // Same GCobjs or pvalues?
2958
+ | cmp RBd, ITYPEd
2959
+ | jne <2 // Not the same type?
2960
+ | cmp RBd, LJ_TISTABUD
2961
+ | ja <2 // Different objects and not table/ud?
2962
+ |
2963
+ | // Different tables or userdatas. Need to check __eq metamethod.
2964
+ | // Field metatable must be at same offset for GCtab and GCudata!
2965
+ | cleartp TAB:RA
2966
+ | mov TAB:RB, TAB:RA->metatable
2967
+ | test TAB:RB, TAB:RB
2968
+ | jz <2 // No metatable?
2969
+ | test byte TAB:RB->nomm, 1<<MM_eq
2970
+ | jnz <2 // Or 'no __eq' flag set?
2971
+ if (vk) {
2972
+ | xor RBd, RBd // ne = 0
2973
+ } else {
2974
+ | mov RBd, 1 // ne = 1
2975
+ }
2976
+ | jmp ->vmeta_equal // Handle __eq metamethod.
2977
+ } else {
2978
+ |.if FFI
2979
+ |3:
2980
+ | cmp ITYPEd, LJ_TCDATA
2981
+ if (LJ_DUALNUM && vk) {
2982
+ | jne <9
2983
+ } else {
2984
+ | jne <2
2985
+ }
2986
+ | jmp ->vmeta_equal_cd
2987
+ |.endif
2988
+ }
2989
+ break;
2990
+ case BC_ISEQS: case BC_ISNES:
2991
+ vk = op == BC_ISEQS;
2992
+ | ins_AND // RA = src, RD = str const, JMP with RD = target
2993
+ | mov RB, [BASE+RA*8]
2994
+ | add PC, 4
2995
+ | checkstr RB, >3
2996
+ | cmp RB, [KBASE+RD*8]
2997
+ iseqne_test:
2998
+ if (vk) {
2999
+ | jne >2
3000
+ } else {
3001
+ | je >1
3002
+ }
3003
+ goto iseqne_end;
3004
+ case BC_ISEQN: case BC_ISNEN:
3005
+ vk = op == BC_ISEQN;
3006
+ | ins_AD // RA = src, RD = num const, JMP with RD = target
3007
+ | mov RB, [BASE+RA*8]
3008
+ | add PC, 4
3009
+ |.if DUALNUM
3010
+ | checkint RB, >7
3011
+ | mov RD, [KBASE+RD*8]
3012
+ | checkint RD, >8
3013
+ | cmp RBd, RDd
3014
+ if (vk) {
3015
+ | jne >9
3016
+ } else {
3017
+ | je >9
3018
+ }
3019
+ | movzx RDd, PC_RD
3020
+ | branchPC RD
3021
+ |9:
3022
+ | ins_next
3023
+ |
3024
+ |7: // RA is not an integer.
3025
+ | ja >3
3026
+ | // RA is a number.
3027
+ | mov RD, [KBASE+RD*8]
3028
+ | checkint RD, >1
3029
+ | // RA is a number, RD is an integer.
3030
+ | cvtsi2sd xmm0, RDd
3031
+ | jmp >2
3032
+ |
3033
+ |8: // RA is an integer, RD is a number.
3034
+ | cvtsi2sd xmm0, RBd
3035
+ | movd xmm1, RD
3036
+ | ucomisd xmm0, xmm1
3037
+ | jmp >4
3038
+ |1:
3039
+ | movd xmm0, RD
3040
+ |.else
3041
+ | checknum RB, >3
3042
+ |1:
3043
+ | movsd xmm0, qword [KBASE+RD*8]
3044
+ |.endif
3045
+ |2:
3046
+ | ucomisd xmm0, qword [BASE+RA*8]
3047
+ |4:
3048
+ goto iseqne_fp;
3049
+ case BC_ISEQP: case BC_ISNEP:
3050
+ vk = op == BC_ISEQP;
3051
+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3052
+ | mov RB, [BASE+RA*8]
3053
+ | sar RB, 47
3054
+ | add PC, 4
3055
+ | cmp RBd, RDd
3056
+ if (!LJ_HASFFI) goto iseqne_test;
3057
+ if (vk) {
3058
+ | jne >3
3059
+ | movzx RDd, PC_RD
3060
+ | branchPC RD
3061
+ |2:
3062
+ | ins_next
3063
+ |3:
3064
+ | cmp RBd, LJ_TCDATA; jne <2
3065
+ | jmp ->vmeta_equal_cd
3066
+ } else {
3067
+ | je >2
3068
+ | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3069
+ | movzx RDd, PC_RD
3070
+ | branchPC RD
3071
+ |2:
3072
+ | ins_next
3073
+ }
3074
+ break;
3075
+
3076
+ /* -- Unary test and copy ops ------------------------------------------- */
3077
+
3078
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3079
+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3080
+ | mov ITYPE, [BASE+RD*8]
3081
+ | add PC, 4
3082
+ if (op == BC_ISTC || op == BC_ISFC) {
3083
+ | mov RB, ITYPE
3084
+ }
3085
+ | sar ITYPE, 47
3086
+ | cmp ITYPEd, LJ_TISTRUECOND
3087
+ if (op == BC_IST || op == BC_ISTC) {
3088
+ | jae >1
3089
+ } else {
3090
+ | jb >1
3091
+ }
3092
+ if (op == BC_ISTC || op == BC_ISFC) {
3093
+ | mov [BASE+RA*8], RB
3094
+ }
3095
+ | movzx RDd, PC_RD
3096
+ | branchPC RD
3097
+ |1: // Fallthrough to the next instruction.
3098
+ | ins_next
3099
+ break;
3100
+
3101
+ case BC_ISTYPE:
3102
+ | ins_AD // RA = src, RD = -type
3103
+ | mov RB, [BASE+RA*8]
3104
+ | sar RB, 47
3105
+ | add RBd, RDd
3106
+ | jne ->vmeta_istype
3107
+ | ins_next
3108
+ break;
3109
+ case BC_ISNUM:
3110
+ | ins_AD // RA = src, RD = -(TISNUM-1)
3111
+ | checknumtp [BASE+RA*8], ->vmeta_istype
3112
+ | ins_next
3113
+ break;
3114
+
3115
+ /* -- Unary ops --------------------------------------------------------- */
3116
+
3117
+ case BC_MOV:
3118
+ | ins_AD // RA = dst, RD = src
3119
+ | mov RB, [BASE+RD*8]
3120
+ | mov [BASE+RA*8], RB
3121
+ | ins_next_
3122
+ break;
3123
+ case BC_NOT:
3124
+ | ins_AD // RA = dst, RD = src
3125
+ | mov RB, [BASE+RD*8]
3126
+ | sar RB, 47
3127
+ | mov RCd, 2
3128
+ | cmp RB, LJ_TISTRUECOND
3129
+ | sbb RCd, 0
3130
+ | shl RC, 47
3131
+ | not RC
3132
+ | mov [BASE+RA*8], RC
3133
+ | ins_next
3134
+ break;
3135
+ case BC_UNM:
3136
+ | ins_AD // RA = dst, RD = src
3137
+ | mov RB, [BASE+RD*8]
3138
+ |.if DUALNUM
3139
+ | checkint RB, >5
3140
+ | neg RBd
3141
+ | jo >4
3142
+ | setint RB
3143
+ |9:
3144
+ | mov [BASE+RA*8], RB
3145
+ | ins_next
3146
+ |4:
3147
+ | mov64 RB, U64x(41e00000,00000000) // 2^31.
3148
+ | jmp <9
3149
+ |5:
3150
+ | ja ->vmeta_unm
3151
+ |.else
3152
+ | checknum RB, ->vmeta_unm
3153
+ |.endif
3154
+ | mov64 RD, U64x(80000000,00000000)
3155
+ | xor RB, RD
3156
+ |.if DUALNUM
3157
+ | jmp <9
3158
+ |.else
3159
+ | mov [BASE+RA*8], RB
3160
+ | ins_next
3161
+ |.endif
3162
+ break;
3163
+ case BC_LEN:
3164
+ | ins_AD // RA = dst, RD = src
3165
+ | mov RD, [BASE+RD*8]
3166
+ | checkstr RD, >2
3167
+ |.if DUALNUM
3168
+ | mov RDd, dword STR:RD->len
3169
+ |1:
3170
+ | setint RD
3171
+ | mov [BASE+RA*8], RD
3172
+ |.else
3173
+ | xorps xmm0, xmm0
3174
+ | cvtsi2sd xmm0, dword STR:RD->len
3175
+ |1:
3176
+ | movsd qword [BASE+RA*8], xmm0
3177
+ |.endif
3178
+ | ins_next
3179
+ |2:
3180
+ | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3181
+ | mov TAB:CARG1, TAB:RD
3182
+ #if LJ_52
3183
+ | mov TAB:RB, TAB:RD->metatable
3184
+ | cmp TAB:RB, 0
3185
+ | jnz >9
3186
+ |3:
3187
+ #endif
3188
+ |->BC_LEN_Z:
3189
+ | mov RB, BASE // Save BASE.
3190
+ | call extern lj_tab_len // (GCtab *t)
3191
+ | // Length of table returned in eax (RD).
3192
+ |.if DUALNUM
3193
+ | // Nothing to do.
3194
+ |.else
3195
+ | cvtsi2sd xmm0, RDd
3196
+ |.endif
3197
+ | mov BASE, RB // Restore BASE.
3198
+ | movzx RAd, PC_RA
3199
+ | jmp <1
3200
+ #if LJ_52
3201
+ |9: // Check for __len.
3202
+ | test byte TAB:RB->nomm, 1<<MM_len
3203
+ | jnz <3
3204
+ | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3205
+ #endif
3206
+ break;
3207
+
3208
+ /* -- Binary ops -------------------------------------------------------- */
3209
+
3210
+ |.macro ins_arithpre, sseins, ssereg
3211
+ | ins_ABC
3212
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3213
+ ||switch (vk) {
3214
+ ||case 0:
3215
+ | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3216
+ | .if DUALNUM
3217
+ | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3218
+ | .endif
3219
+ | movsd xmm0, qword [BASE+RB*8]
3220
+ | sseins ssereg, qword [KBASE+RC*8]
3221
+ || break;
3222
+ ||case 1:
3223
+ | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3224
+ | .if DUALNUM
3225
+ | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3226
+ | .endif
3227
+ | movsd xmm0, qword [KBASE+RC*8]
3228
+ | sseins ssereg, qword [BASE+RB*8]
3229
+ || break;
3230
+ ||default:
3231
+ | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3232
+ | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3233
+ | movsd xmm0, qword [BASE+RB*8]
3234
+ | sseins ssereg, qword [BASE+RC*8]
3235
+ || break;
3236
+ ||}
3237
+ |.endmacro
3238
+ |
3239
+ |.macro ins_arithdn, intins
3240
+ | ins_ABC
3241
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3242
+ ||switch (vk) {
3243
+ ||case 0:
3244
+ | mov RB, [BASE+RB*8]
3245
+ | mov RC, [KBASE+RC*8]
3246
+ | checkint RB, ->vmeta_arith_vno
3247
+ | checkint RC, ->vmeta_arith_vno
3248
+ | intins RBd, RCd; jo ->vmeta_arith_vno
3249
+ || break;
3250
+ ||case 1:
3251
+ | mov RB, [BASE+RB*8]
3252
+ | mov RC, [KBASE+RC*8]
3253
+ | checkint RB, ->vmeta_arith_nvo
3254
+ | checkint RC, ->vmeta_arith_nvo
3255
+ | intins RCd, RBd; jo ->vmeta_arith_nvo
3256
+ || break;
3257
+ ||default:
3258
+ | mov RB, [BASE+RB*8]
3259
+ | mov RC, [BASE+RC*8]
3260
+ | checkint RB, ->vmeta_arith_vvo
3261
+ | checkint RC, ->vmeta_arith_vvo
3262
+ | intins RBd, RCd; jo ->vmeta_arith_vvo
3263
+ || break;
3264
+ ||}
3265
+ ||if (vk == 1) {
3266
+ | setint RC
3267
+ | mov [BASE+RA*8], RC
3268
+ ||} else {
3269
+ | setint RB
3270
+ | mov [BASE+RA*8], RB
3271
+ ||}
3272
+ | ins_next
3273
+ |.endmacro
3274
+ |
3275
+ |.macro ins_arithpost
3276
+ | movsd qword [BASE+RA*8], xmm0
3277
+ |.endmacro
3278
+ |
3279
+ |.macro ins_arith, sseins
3280
+ | ins_arithpre sseins, xmm0
3281
+ | ins_arithpost
3282
+ | ins_next
3283
+ |.endmacro
3284
+ |
3285
+ |.macro ins_arith, intins, sseins
3286
+ |.if DUALNUM
3287
+ | ins_arithdn intins
3288
+ |.else
3289
+ | ins_arith, sseins
3290
+ |.endif
3291
+ |.endmacro
3292
+
3293
+ | // RA = dst, RB = src1 or num const, RC = src2 or num const
3294
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3295
+ | ins_arith add, addsd
3296
+ break;
3297
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3298
+ | ins_arith sub, subsd
3299
+ break;
3300
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
3301
+ | ins_arith imul, mulsd
3302
+ break;
3303
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3304
+ | ins_arith divsd
3305
+ break;
3306
+ case BC_MODVN:
3307
+ | ins_arithpre movsd, xmm1
3308
+ |->BC_MODVN_Z:
3309
+ | call ->vm_mod
3310
+ | ins_arithpost
3311
+ | ins_next
3312
+ break;
3313
+ case BC_MODNV: case BC_MODVV:
3314
+ | ins_arithpre movsd, xmm1
3315
+ | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3316
+ break;
3317
+ case BC_POW:
3318
+ | ins_arithpre movsd, xmm1
3319
+ | mov RB, BASE
3320
+ | call extern pow
3321
+ | movzx RAd, PC_RA
3322
+ | mov BASE, RB
3323
+ | ins_arithpost
3324
+ | ins_next
3325
+ break;
3326
+
3327
+ case BC_CAT:
3328
+ | ins_ABC // RA = dst, RB = src_start, RC = src_end
3329
+ | mov L:CARG1, SAVE_L
3330
+ | mov L:CARG1->base, BASE
3331
+ | lea CARG2, [BASE+RC*8]
3332
+ | mov CARG3d, RCd
3333
+ | sub CARG3d, RBd
3334
+ |->BC_CAT_Z:
3335
+ | mov L:RB, L:CARG1
3336
+ | mov SAVE_PC, PC
3337
+ | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3338
+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3339
+ | mov BASE, L:RB->base
3340
+ | test RC, RC
3341
+ | jnz ->vmeta_binop
3342
+ | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3343
+ | movzx RAd, PC_RA
3344
+ | mov RC, [BASE+RB*8]
3345
+ | mov [BASE+RA*8], RC
3346
+ | ins_next
3347
+ break;
3348
+
3349
+ /* -- Constant ops ------------------------------------------------------ */
3350
+
3351
+ case BC_KSTR:
3352
+ | ins_AND // RA = dst, RD = str const (~)
3353
+ | mov RD, [KBASE+RD*8]
3354
+ | settp RD, LJ_TSTR
3355
+ | mov [BASE+RA*8], RD
3356
+ | ins_next
3357
+ break;
3358
+ case BC_KCDATA:
3359
+ |.if FFI
3360
+ | ins_AND // RA = dst, RD = cdata const (~)
3361
+ | mov RD, [KBASE+RD*8]
3362
+ | settp RD, LJ_TCDATA
3363
+ | mov [BASE+RA*8], RD
3364
+ | ins_next
3365
+ |.endif
3366
+ break;
3367
+ case BC_KSHORT:
3368
+ | ins_AD // RA = dst, RD = signed int16 literal
3369
+ |.if DUALNUM
3370
+ | movsx RDd, RDW
3371
+ | setint RD
3372
+ | mov [BASE+RA*8], RD
3373
+ |.else
3374
+ | movsx RDd, RDW // Sign-extend literal.
3375
+ | cvtsi2sd xmm0, RDd
3376
+ | movsd qword [BASE+RA*8], xmm0
3377
+ |.endif
3378
+ | ins_next
3379
+ break;
3380
+ case BC_KNUM:
3381
+ | ins_AD // RA = dst, RD = num const
3382
+ | movsd xmm0, qword [KBASE+RD*8]
3383
+ | movsd qword [BASE+RA*8], xmm0
3384
+ | ins_next
3385
+ break;
3386
+ case BC_KPRI:
3387
+ | ins_AD // RA = dst, RD = primitive type (~)
3388
+ | shl RD, 47
3389
+ | not RD
3390
+ | mov [BASE+RA*8], RD
3391
+ | ins_next
3392
+ break;
3393
+ case BC_KNIL:
3394
+ | ins_AD // RA = dst_start, RD = dst_end
3395
+ | lea RA, [BASE+RA*8+8]
3396
+ | lea RD, [BASE+RD*8]
3397
+ | mov RB, LJ_TNIL
3398
+ | mov [RA-8], RB // Sets minimum 2 slots.
3399
+ |1:
3400
+ | mov [RA], RB
3401
+ | add RA, 8
3402
+ | cmp RA, RD
3403
+ | jbe <1
3404
+ | ins_next
3405
+ break;
3406
+
3407
+ /* -- Upvalue and function ops ------------------------------------------ */
3408
+
3409
+ case BC_UGET:
3410
+ | ins_AD // RA = dst, RD = upvalue #
3411
+ | mov LFUNC:RB, [BASE-16]
3412
+ | cleartp LFUNC:RB
3413
+ | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3414
+ | mov RB, UPVAL:RB->v
3415
+ | mov RD, [RB]
3416
+ | mov [BASE+RA*8], RD
3417
+ | ins_next
3418
+ break;
3419
+ case BC_USETV:
3420
+ #define TV2MARKOFS \
3421
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3422
+ | ins_AD // RA = upvalue #, RD = src
3423
+ | mov LFUNC:RB, [BASE-16]
3424
+ | cleartp LFUNC:RB
3425
+ | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3426
+ | cmp byte UPVAL:RB->closed, 0
3427
+ | mov RB, UPVAL:RB->v
3428
+ | mov RA, [BASE+RD*8]
3429
+ | mov [RB], RA
3430
+ | jz >1
3431
+ | // Check barrier for closed upvalue.
3432
+ | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3433
+ | jnz >2
3434
+ |1:
3435
+ | ins_next
3436
+ |
3437
+ |2: // Upvalue is black. Check if new value is collectable and white.
3438
+ | mov RD, RA
3439
+ | sar RD, 47
3440
+ | sub RDd, LJ_TISGCV
3441
+ | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3442
+ | jbe <1
3443
+ | cleartp GCOBJ:RA
3444
+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3445
+ | jz <1
3446
+ | // Crossed a write barrier. Move the barrier forward.
3447
+ |.if not X64WIN
3448
+ | mov CARG2, RB
3449
+ | mov RB, BASE // Save BASE.
3450
+ |.else
3451
+ | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3452
+ |.endif
3453
+ | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3454
+ | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3455
+ | mov BASE, RB // Restore BASE.
3456
+ | jmp <1
3457
+ break;
3458
+ #undef TV2MARKOFS
3459
+ case BC_USETS:
3460
+ | ins_AND // RA = upvalue #, RD = str const (~)
3461
+ | mov LFUNC:RB, [BASE-16]
3462
+ | cleartp LFUNC:RB
3463
+ | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3464
+ | mov STR:RA, [KBASE+RD*8]
3465
+ | mov RD, UPVAL:RB->v
3466
+ | settp STR:ITYPE, STR:RA, LJ_TSTR
3467
+ | mov [RD], STR:ITYPE
3468
+ | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3469
+ | jnz >2
3470
+ |1:
3471
+ | ins_next
3472
+ |
3473
+ |2: // Check if string is white and ensure upvalue is closed.
3474
+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3475
+ | jz <1
3476
+ | cmp byte UPVAL:RB->closed, 0
3477
+ | jz <1
3478
+ | // Crossed a write barrier. Move the barrier forward.
3479
+ | mov RB, BASE // Save BASE (CARG2 == BASE).
3480
+ | mov CARG2, RD
3481
+ | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3482
+ | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3483
+ | mov BASE, RB // Restore BASE.
3484
+ | jmp <1
3485
+ break;
3486
+ case BC_USETN:
3487
+ | ins_AD // RA = upvalue #, RD = num const
3488
+ | mov LFUNC:RB, [BASE-16]
3489
+ | cleartp LFUNC:RB
3490
+ | movsd xmm0, qword [KBASE+RD*8]
3491
+ | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3492
+ | mov RA, UPVAL:RB->v
3493
+ | movsd qword [RA], xmm0
3494
+ | ins_next
3495
+ break;
3496
+ case BC_USETP:
3497
+ | ins_AD // RA = upvalue #, RD = primitive type (~)
3498
+ | mov LFUNC:RB, [BASE-16]
3499
+ | cleartp LFUNC:RB
3500
+ | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3501
+ | shl RD, 47
3502
+ | not RD
3503
+ | mov RA, UPVAL:RB->v
3504
+ | mov [RA], RD
3505
+ | ins_next
3506
+ break;
3507
+ case BC_UCLO:
3508
+ | ins_AD // RA = level, RD = target
3509
+ | branchPC RD // Do this first to free RD.
3510
+ | mov L:RB, SAVE_L
3511
+ | cmp dword L:RB->openupval, 0
3512
+ | je >1
3513
+ | mov L:RB->base, BASE
3514
+ | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3515
+ | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3516
+ | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3517
+ | mov BASE, L:RB->base
3518
+ |1:
3519
+ | ins_next
3520
+ break;
3521
+
3522
+ case BC_FNEW:
3523
+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3524
+ | mov L:RB, SAVE_L
3525
+ | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3526
+ | mov CARG3, [BASE-16]
3527
+ | cleartp CARG3
3528
+ | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3529
+ | mov CARG1, L:RB
3530
+ | mov SAVE_PC, PC
3531
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3532
+ | call extern lj_func_newL_gc
3533
+ | // GCfuncL * returned in eax (RC).
3534
+ | mov BASE, L:RB->base
3535
+ | movzx RAd, PC_RA
3536
+ | settp LFUNC:RC, LJ_TFUNC
3537
+ | mov [BASE+RA*8], LFUNC:RC
3538
+ | ins_next
3539
+ break;
3540
+
3541
+ /* -- Table ops --------------------------------------------------------- */
3542
+
3543
+ case BC_TNEW:
3544
+ | ins_AD // RA = dst, RD = hbits|asize
3545
+ | mov L:RB, SAVE_L
3546
+ | mov L:RB->base, BASE
3547
+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3548
+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3549
+ | mov SAVE_PC, PC
3550
+ | jae >5
3551
+ |1:
3552
+ | mov CARG3d, RDd
3553
+ | and RDd, 0x7ff
3554
+ | shr CARG3d, 11
3555
+ | cmp RDd, 0x7ff
3556
+ | je >3
3557
+ |2:
3558
+ | mov L:CARG1, L:RB
3559
+ | mov CARG2d, RDd
3560
+ | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3561
+ | // Table * returned in eax (RC).
3562
+ | mov BASE, L:RB->base
3563
+ | movzx RAd, PC_RA
3564
+ | settp TAB:RC, LJ_TTAB
3565
+ | mov [BASE+RA*8], TAB:RC
3566
+ | ins_next
3567
+ |3: // Turn 0x7ff into 0x801.
3568
+ | mov RDd, 0x801
3569
+ | jmp <2
3570
+ |5:
3571
+ | mov L:CARG1, L:RB
3572
+ | call extern lj_gc_step_fixtop // (lua_State *L)
3573
+ | movzx RDd, PC_RD
3574
+ | jmp <1
3575
+ break;
3576
+ case BC_TDUP:
3577
+ | ins_AND // RA = dst, RD = table const (~) (holding template table)
3578
+ | mov L:RB, SAVE_L
3579
+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3580
+ | mov SAVE_PC, PC
3581
+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3582
+ | mov L:RB->base, BASE
3583
+ | jae >3
3584
+ |2:
3585
+ | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3586
+ | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3587
+ | call extern lj_tab_dup // (lua_State *L, Table *kt)
3588
+ | // Table * returned in eax (RC).
3589
+ | mov BASE, L:RB->base
3590
+ | movzx RAd, PC_RA
3591
+ | settp TAB:RC, LJ_TTAB
3592
+ | mov [BASE+RA*8], TAB:RC
3593
+ | ins_next
3594
+ |3:
3595
+ | mov L:CARG1, L:RB
3596
+ | call extern lj_gc_step_fixtop // (lua_State *L)
3597
+ | movzx RDd, PC_RD // Need to reload RD.
3598
+ | not RD
3599
+ | jmp <2
3600
+ break;
3601
+
3602
+ case BC_GGET:
3603
+ | ins_AND // RA = dst, RD = str const (~)
3604
+ | mov LFUNC:RB, [BASE-16]
3605
+ | cleartp LFUNC:RB
3606
+ | mov TAB:RB, LFUNC:RB->env
3607
+ | mov STR:RC, [KBASE+RD*8]
3608
+ | jmp ->BC_TGETS_Z
3609
+ break;
3610
+ case BC_GSET:
3611
+ | ins_AND // RA = src, RD = str const (~)
3612
+ | mov LFUNC:RB, [BASE-16]
3613
+ | cleartp LFUNC:RB
3614
+ | mov TAB:RB, LFUNC:RB->env
3615
+ | mov STR:RC, [KBASE+RD*8]
3616
+ | jmp ->BC_TSETS_Z
3617
+ break;
3618
+
3619
+ case BC_TGETV:
3620
+ | ins_ABC // RA = dst, RB = table, RC = key
3621
+ | mov TAB:RB, [BASE+RB*8]
3622
+ | mov RC, [BASE+RC*8]
3623
+ | checktab TAB:RB, ->vmeta_tgetv
3624
+ |
3625
+ | // Integer key?
3626
+ |.if DUALNUM
3627
+ | checkint RC, >5
3628
+ |.else
3629
+ | // Convert number to int and back and compare.
3630
+ | checknum RC, >5
3631
+ | movd xmm0, RC
3632
+ | cvttsd2si RCd, xmm0
3633
+ | cvtsi2sd xmm1, RCd
3634
+ | ucomisd xmm0, xmm1
3635
+ | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3636
+ |.endif
3637
+ | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3638
+ | jae ->vmeta_tgetv // Not in array part? Use fallback.
3639
+ | shl RCd, 3
3640
+ | add RC, TAB:RB->array
3641
+ | // Get array slot.
3642
+ | mov ITYPE, [RC]
3643
+ | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3644
+ | je >2
3645
+ |1:
3646
+ | mov [BASE+RA*8], ITYPE
3647
+ | ins_next
3648
+ |
3649
+ |2: // Check for __index if table value is nil.
3650
+ | mov TAB:TMPR, TAB:RB->metatable
3651
+ | test TAB:TMPR, TAB:TMPR
3652
+ | jz <1
3653
+ | test byte TAB:TMPR->nomm, 1<<MM_index
3654
+ | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3655
+ | jmp <1
3656
+ |
3657
+ |5: // String key?
3658
+ | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3659
+ | cleartp STR:RC
3660
+ | jmp ->BC_TGETS_Z
3661
+ break;
3662
+ case BC_TGETS:
3663
+ | ins_ABC // RA = dst, RB = table, RC = str const (~)
3664
+ | mov TAB:RB, [BASE+RB*8]
3665
+ | not RC
3666
+ | mov STR:RC, [KBASE+RC*8]
3667
+ | checktab TAB:RB, ->vmeta_tgets
3668
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3669
+ | mov TMPRd, TAB:RB->hmask
3670
+ | and TMPRd, STR:RC->hash
3671
+ | imul TMPRd, #NODE
3672
+ | add NODE:TMPR, TAB:RB->node
3673
+ | settp ITYPE, STR:RC, LJ_TSTR
3674
+ |1:
3675
+ | cmp NODE:TMPR->key, ITYPE
3676
+ | jne >4
3677
+ | // Get node value.
3678
+ | mov ITYPE, NODE:TMPR->val
3679
+ | cmp ITYPE, LJ_TNIL
3680
+ | je >5 // Key found, but nil value?
3681
+ |2:
3682
+ | mov [BASE+RA*8], ITYPE
3683
+ | ins_next
3684
+ |
3685
+ |4: // Follow hash chain.
3686
+ | mov NODE:TMPR, NODE:TMPR->next
3687
+ | test NODE:TMPR, NODE:TMPR
3688
+ | jnz <1
3689
+ | // End of hash chain: key not found, nil result.
3690
+ | mov ITYPE, LJ_TNIL
3691
+ |
3692
+ |5: // Check for __index if table value is nil.
3693
+ | mov TAB:TMPR, TAB:RB->metatable
3694
+ | test TAB:TMPR, TAB:TMPR
3695
+ | jz <2 // No metatable: done.
3696
+ | test byte TAB:TMPR->nomm, 1<<MM_index
3697
+ | jnz <2 // 'no __index' flag set: done.
3698
+ | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3699
+ break;
3700
+ case BC_TGETB:
3701
+ | ins_ABC // RA = dst, RB = table, RC = byte literal
3702
+ | mov TAB:RB, [BASE+RB*8]
3703
+ | checktab TAB:RB, ->vmeta_tgetb
3704
+ | cmp RCd, TAB:RB->asize
3705
+ | jae ->vmeta_tgetb
3706
+ | shl RCd, 3
3707
+ | add RC, TAB:RB->array
3708
+ | // Get array slot.
3709
+ | mov ITYPE, [RC]
3710
+ | cmp ITYPE, LJ_TNIL
3711
+ | je >2
3712
+ |1:
3713
+ | mov [BASE+RA*8], ITYPE
3714
+ | ins_next
3715
+ |
3716
+ |2: // Check for __index if table value is nil.
3717
+ | mov TAB:TMPR, TAB:RB->metatable
3718
+ | test TAB:TMPR, TAB:TMPR
3719
+ | jz <1
3720
+ | test byte TAB:TMPR->nomm, 1<<MM_index
3721
+ | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3722
+ | jmp <1
3723
+ break;
3724
+ case BC_TGETR:
3725
+ | ins_ABC // RA = dst, RB = table, RC = key
3726
+ | mov TAB:RB, [BASE+RB*8]
3727
+ | cleartp TAB:RB
3728
+ |.if DUALNUM
3729
+ | mov RCd, dword [BASE+RC*8]
3730
+ |.else
3731
+ | cvttsd2si RCd, qword [BASE+RC*8]
3732
+ |.endif
3733
+ | cmp RCd, TAB:RB->asize
3734
+ | jae ->vmeta_tgetr // Not in array part? Use fallback.
3735
+ | shl RCd, 3
3736
+ | add RC, TAB:RB->array
3737
+ | // Get array slot.
3738
+ |->BC_TGETR_Z:
3739
+ | mov ITYPE, [RC]
3740
+ |->BC_TGETR2_Z:
3741
+ | mov [BASE+RA*8], ITYPE
3742
+ | ins_next
3743
+ break;
3744
+
3745
+ case BC_TSETV:
3746
+ | ins_ABC // RA = src, RB = table, RC = key
3747
+ | mov TAB:RB, [BASE+RB*8]
3748
+ | mov RC, [BASE+RC*8]
3749
+ | checktab TAB:RB, ->vmeta_tsetv
3750
+ |
3751
+ | // Integer key?
3752
+ |.if DUALNUM
3753
+ | checkint RC, >5
3754
+ |.else
3755
+ | // Convert number to int and back and compare.
3756
+ | checknum RC, >5
3757
+ | movd xmm0, RC
3758
+ | cvttsd2si RCd, xmm0
3759
+ | cvtsi2sd xmm1, RCd
3760
+ | ucomisd xmm0, xmm1
3761
+ | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3762
+ |.endif
3763
+ | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3764
+ | jae ->vmeta_tsetv
3765
+ | shl RCd, 3
3766
+ | add RC, TAB:RB->array
3767
+ | cmp aword [RC], LJ_TNIL
3768
+ | je >3 // Previous value is nil?
3769
+ |1:
3770
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3771
+ | jnz >7
3772
+ |2: // Set array slot.
3773
+ | mov RB, [BASE+RA*8]
3774
+ | mov [RC], RB
3775
+ | ins_next
3776
+ |
3777
+ |3: // Check for __newindex if previous value is nil.
3778
+ | mov TAB:TMPR, TAB:RB->metatable
3779
+ | test TAB:TMPR, TAB:TMPR
3780
+ | jz <1
3781
+ | test byte TAB:TMPR->nomm, 1<<MM_newindex
3782
+ | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3783
+ | jmp <1
3784
+ |
3785
+ |5: // String key?
3786
+ | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3787
+ | cleartp STR:RC
3788
+ | jmp ->BC_TSETS_Z
3789
+ |
3790
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
3791
+ | barrierback TAB:RB, TMPR
3792
+ | jmp <2
3793
+ break;
3794
+ case BC_TSETS:
3795
+ | ins_ABC // RA = src, RB = table, RC = str const (~)
3796
+ | mov TAB:RB, [BASE+RB*8]
3797
+ | not RC
3798
+ | mov STR:RC, [KBASE+RC*8]
3799
+ | checktab TAB:RB, ->vmeta_tsets
3800
+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3801
+ | mov TMPRd, TAB:RB->hmask
3802
+ | and TMPRd, STR:RC->hash
3803
+ | imul TMPRd, #NODE
3804
+ | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3805
+ | add NODE:TMPR, TAB:RB->node
3806
+ | settp ITYPE, STR:RC, LJ_TSTR
3807
+ |1:
3808
+ | cmp NODE:TMPR->key, ITYPE
3809
+ | jne >5
3810
+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
3811
+ | cmp aword [TMPR], LJ_TNIL
3812
+ | je >4 // Previous value is nil?
3813
+ |2:
3814
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3815
+ | jnz >7
3816
+ |3: // Set node value.
3817
+ | mov ITYPE, [BASE+RA*8]
3818
+ | mov [TMPR], ITYPE
3819
+ | ins_next
3820
+ |
3821
+ |4: // Check for __newindex if previous value is nil.
3822
+ | mov TAB:ITYPE, TAB:RB->metatable
3823
+ | test TAB:ITYPE, TAB:ITYPE
3824
+ | jz <2
3825
+ | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3826
+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3827
+ | jmp <2
3828
+ |
3829
+ |5: // Follow hash chain.
3830
+ | mov NODE:TMPR, NODE:TMPR->next
3831
+ | test NODE:TMPR, NODE:TMPR
3832
+ | jnz <1
3833
+ | // End of hash chain: key not found, add a new one.
3834
+ |
3835
+ | // But check for __newindex first.
3836
+ | mov TAB:TMPR, TAB:RB->metatable
3837
+ | test TAB:TMPR, TAB:TMPR
3838
+ | jz >6 // No metatable: continue.
3839
+ | test byte TAB:TMPR->nomm, 1<<MM_newindex
3840
+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3841
+ |6:
3842
+ | mov TMP1, ITYPE
3843
+ | mov L:CARG1, SAVE_L
3844
+ | mov L:CARG1->base, BASE
3845
+ | lea CARG3, TMP1
3846
+ | mov CARG2, TAB:RB
3847
+ | mov SAVE_PC, PC
3848
+ | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3849
+ | // Handles write barrier for the new key. TValue * returned in eax (RC).
3850
+ | mov L:CARG1, SAVE_L
3851
+ | mov BASE, L:CARG1->base
3852
+ | mov TMPR, rax
3853
+ | movzx RAd, PC_RA
3854
+ | jmp <2 // Must check write barrier for value.
3855
+ |
3856
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
3857
+ | barrierback TAB:RB, ITYPE
3858
+ | jmp <3
3859
+ break;
3860
+ case BC_TSETB:
3861
+ | ins_ABC // RA = src, RB = table, RC = byte literal
3862
+ | mov TAB:RB, [BASE+RB*8]
3863
+ | checktab TAB:RB, ->vmeta_tsetb
3864
+ | cmp RCd, TAB:RB->asize
3865
+ | jae ->vmeta_tsetb
3866
+ | shl RCd, 3
3867
+ | add RC, TAB:RB->array
3868
+ | cmp aword [RC], LJ_TNIL
3869
+ | je >3 // Previous value is nil?
3870
+ |1:
3871
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3872
+ | jnz >7
3873
+ |2: // Set array slot.
3874
+ | mov ITYPE, [BASE+RA*8]
3875
+ | mov [RC], ITYPE
3876
+ | ins_next
3877
+ |
3878
+ |3: // Check for __newindex if previous value is nil.
3879
+ | mov TAB:TMPR, TAB:RB->metatable
3880
+ | test TAB:TMPR, TAB:TMPR
3881
+ | jz <1
3882
+ | test byte TAB:TMPR->nomm, 1<<MM_newindex
3883
+ | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3884
+ | jmp <1
3885
+ |
3886
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
3887
+ | barrierback TAB:RB, TMPR
3888
+ | jmp <2
3889
+ break;
3890
+ case BC_TSETR:
3891
+ | ins_ABC // RA = src, RB = table, RC = key
3892
+ | mov TAB:RB, [BASE+RB*8]
3893
+ | cleartp TAB:RB
3894
+ |.if DUALNUM
3895
+ | mov RC, [BASE+RC*8]
3896
+ |.else
3897
+ | cvttsd2si RCd, qword [BASE+RC*8]
3898
+ |.endif
3899
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3900
+ | jnz >7
3901
+ |2:
3902
+ | cmp RCd, TAB:RB->asize
3903
+ | jae ->vmeta_tsetr
3904
+ | shl RCd, 3
3905
+ | add RC, TAB:RB->array
3906
+ | // Set array slot.
3907
+ |->BC_TSETR_Z:
3908
+ | mov ITYPE, [BASE+RA*8]
3909
+ | mov [RC], ITYPE
3910
+ | ins_next
3911
+ |
3912
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
3913
+ | barrierback TAB:RB, TMPR
3914
+ | jmp <2
3915
+ break;
3916
+
3917
+ case BC_TSETM:
3918
+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
3919
+ |1:
3920
+ | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3921
+ | lea RA, [BASE+RA*8]
3922
+ | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3923
+ | cleartp TAB:RB
3924
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3925
+ | jnz >7
3926
+ |2:
3927
+ | mov RDd, MULTRES
3928
+ | sub RDd, 1
3929
+ | jz >4 // Nothing to copy?
3930
+ | add RDd, TMPRd // Compute needed size.
3931
+ | cmp RDd, TAB:RB->asize
3932
+ | ja >5 // Doesn't fit into array part?
3933
+ | sub RDd, TMPRd
3934
+ | shl TMPRd, 3
3935
+ | add TMPR, TAB:RB->array
3936
+ |3: // Copy result slots to table.
3937
+ | mov RB, [RA]
3938
+ | add RA, 8
3939
+ | mov [TMPR], RB
3940
+ | add TMPR, 8
3941
+ | sub RDd, 1
3942
+ | jnz <3
3943
+ |4:
3944
+ | ins_next
3945
+ |
3946
+ |5: // Need to resize array part.
3947
+ | mov L:CARG1, SAVE_L
3948
+ | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3949
+ | mov CARG2, TAB:RB
3950
+ | mov CARG3d, RDd
3951
+ | mov L:RB, L:CARG1
3952
+ | mov SAVE_PC, PC
3953
+ | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3954
+ | mov BASE, L:RB->base
3955
+ | movzx RAd, PC_RA // Restore RA.
3956
+ | movzx RDd, PC_RD // Restore RD.
3957
+ | jmp <1 // Retry.
3958
+ |
3959
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
3960
+ | barrierback TAB:RB, RD
3961
+ | jmp <2
3962
+ break;
3963
+
3964
+ /* -- Calls and vararg handling ----------------------------------------- */
3965
+
3966
+ case BC_CALL: case BC_CALLM:
3967
+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3968
+ if (op == BC_CALLM) {
3969
+ | add NARGS:RDd, MULTRES
3970
+ }
3971
+ | mov LFUNC:RB, [BASE+RA*8]
3972
+ | checkfunc LFUNC:RB, ->vmeta_call_ra
3973
+ | lea BASE, [BASE+RA*8+16]
3974
+ | ins_call
3975
+ break;
3976
+
3977
+ case BC_CALLMT:
3978
+ | ins_AD // RA = base, RD = extra_nargs
3979
+ | add NARGS:RDd, MULTRES
3980
+ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
3981
+ break;
3982
+ case BC_CALLT:
3983
+ | ins_AD // RA = base, RD = nargs+1
3984
+ | lea RA, [BASE+RA*8+16]
3985
+ | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
3986
+ | mov LFUNC:RB, [RA-16]
3987
+ | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
3988
+ |->BC_CALLT_Z:
3989
+ | mov PC, [BASE-8]
3990
+ | test PCd, FRAME_TYPE
3991
+ | jnz >7
3992
+ |1:
3993
+ | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
3994
+ | mov MULTRES, NARGS:RDd
3995
+ | sub NARGS:RDd, 1
3996
+ | jz >3
3997
+ |2: // Move args down.
3998
+ | mov RB, [RA]
3999
+ | add RA, 8
4000
+ | mov [KBASE], RB
4001
+ | add KBASE, 8
4002
+ | sub NARGS:RDd, 1
4003
+ | jnz <2
4004
+ |
4005
+ | mov LFUNC:RB, [BASE-16]
4006
+ |3:
4007
+ | cleartp LFUNC:RB
4008
+ | mov NARGS:RDd, MULTRES
4009
+ | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4010
+ | ja >5
4011
+ |4:
4012
+ | ins_callt
4013
+ |
4014
+ |5: // Tailcall to a fast function.
4015
+ | test PCd, FRAME_TYPE // Lua frame below?
4016
+ | jnz <4
4017
+ | movzx RAd, PC_RA
4018
+ | neg RA
4019
+ | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
4020
+ | cleartp LFUNC:KBASE
4021
+ | mov KBASE, LFUNC:KBASE->pc
4022
+ | mov KBASE, [KBASE+PC2PROTO(k)]
4023
+ | jmp <4
4024
+ |
4025
+ |7: // Tailcall from a vararg function.
4026
+ | sub PC, FRAME_VARG
4027
+ | test PCd, FRAME_TYPEP
4028
+ | jnz >8 // Vararg frame below?
4029
+ | sub BASE, PC // Need to relocate BASE/KBASE down.
4030
+ | mov KBASE, BASE
4031
+ | mov PC, [BASE-8]
4032
+ | jmp <1
4033
+ |8:
4034
+ | add PCd, FRAME_VARG
4035
+ | jmp <1
4036
+ break;
4037
+
4038
+ case BC_ITERC:
4039
+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4040
+ | lea RA, [BASE+RA*8+16] // fb = base+2
4041
+ | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4042
+ | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4043
+ | mov [RA], RB
4044
+ | mov [RA+8], RC
4045
+ | mov LFUNC:RB, [RA-40] // Copy callable. fb[-1] = fb[-5]
4046
+ | mov [RA-16], LFUNC:RB
4047
+ | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4048
+ | checkfunc LFUNC:RB, ->vmeta_call
4049
+ | mov BASE, RA
4050
+ | ins_call
4051
+ break;
4052
+
4053
+ case BC_ITERN:
4054
+ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4055
+ |.if JIT
4056
+ | // NYI: add hotloop, record BC_ITERN.
4057
+ |.endif
4058
+ | mov TAB:RB, [BASE+RA*8-16]
4059
+ | cleartp TAB:RB
4060
+ | mov RCd, [BASE+RA*8-8] // Get index from control var.
4061
+ | mov TMPRd, TAB:RB->asize
4062
+ | add PC, 4
4063
+ | mov ITYPE, TAB:RB->array
4064
+ |1: // Traverse array part.
4065
+ | cmp RCd, TMPRd; jae >5 // Index points after array part?
4066
+ | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4067
+ |.if not DUALNUM
4068
+ | cvtsi2sd xmm0, RCd
4069
+ |.endif
4070
+ | // Copy array slot to returned value.
4071
+ | mov RB, [ITYPE+RC*8]
4072
+ | mov [BASE+RA*8+8], RB
4073
+ | // Return array index as a numeric key.
4074
+ |.if DUALNUM
4075
+ | setint ITYPE, RC
4076
+ | mov [BASE+RA*8], ITYPE
4077
+ |.else
4078
+ | movsd qword [BASE+RA*8], xmm0
4079
+ |.endif
4080
+ | add RCd, 1
4081
+ | mov [BASE+RA*8-8], RCd // Update control var.
4082
+ |2:
4083
+ | movzx RDd, PC_RD // Get target from ITERL.
4084
+ | branchPC RD
4085
+ |3:
4086
+ | ins_next
4087
+ |
4088
+ |4: // Skip holes in array part.
4089
+ | add RCd, 1
4090
+ | jmp <1
4091
+ |
4092
+ |5: // Traverse hash part.
4093
+ | sub RCd, TMPRd
4094
+ |6:
4095
+ | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4096
+ | imul ITYPEd, RCd, #NODE
4097
+ | add NODE:ITYPE, TAB:RB->node
4098
+ | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4099
+ | lea TMPRd, [RCd+TMPRd+1]
4100
+ | // Copy key and value from hash slot.
4101
+ | mov RB, NODE:ITYPE->key
4102
+ | mov RC, NODE:ITYPE->val
4103
+ | mov [BASE+RA*8], RB
4104
+ | mov [BASE+RA*8+8], RC
4105
+ | mov [BASE+RA*8-8], TMPRd
4106
+ | jmp <2
4107
+ |
4108
+ |7: // Skip holes in hash part.
4109
+ | add RCd, 1
4110
+ | jmp <6
4111
+ break;
4112
+
4113
+ case BC_ISNEXT:
4114
+ | ins_AD // RA = base, RD = target (points to ITERN)
4115
+ | mov CFUNC:RB, [BASE+RA*8-24]
4116
+ | checkfunc CFUNC:RB, >5
4117
+ | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4118
+ | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4119
+ | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4120
+ | branchPC RD
4121
+ | mov64 TMPR, U64x(fffe7fff, 00000000)
4122
+ | mov [BASE+RA*8-8], TMPR // Initialize control var.
4123
+ |1:
4124
+ | ins_next
4125
+ |5: // Despecialize bytecode if any of the checks fail.
4126
+ | mov PC_OP, BC_JMP
4127
+ | branchPC RD
4128
+ | mov byte [PC], BC_ITERC
4129
+ | jmp <1
4130
+ break;
4131
+
4132
+ case BC_VARG:
4133
+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4134
+ | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4135
+ | lea RA, [BASE+RA*8]
4136
+ | sub TMPR, [BASE-8]
4137
+ | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4138
+ | test RB, RB
4139
+ | jz >5 // Copy all varargs?
4140
+ | lea RB, [RA+RB*8-8]
4141
+ | cmp TMPR, BASE // No vararg slots?
4142
+ | jnb >2
4143
+ |1: // Copy vararg slots to destination slots.
4144
+ | mov RC, [TMPR-16]
4145
+ | add TMPR, 8
4146
+ | mov [RA], RC
4147
+ | add RA, 8
4148
+ | cmp RA, RB // All destination slots filled?
4149
+ | jnb >3
4150
+ | cmp TMPR, BASE // No more vararg slots?
4151
+ | jb <1
4152
+ |2: // Fill up remainder with nil.
4153
+ | mov aword [RA], LJ_TNIL
4154
+ | add RA, 8
4155
+ | cmp RA, RB
4156
+ | jb <2
4157
+ |3:
4158
+ | ins_next
4159
+ |
4160
+ |5: // Copy all varargs.
4161
+ | mov MULTRES, 1 // MULTRES = 0+1
4162
+ | mov RC, BASE
4163
+ | sub RC, TMPR
4164
+ | jbe <3 // No vararg slots?
4165
+ | mov RBd, RCd
4166
+ | shr RBd, 3
4167
+ | add RBd, 1
4168
+ | mov MULTRES, RBd // MULTRES = #varargs+1
4169
+ | mov L:RB, SAVE_L
4170
+ | add RC, RA
4171
+ | cmp RC, L:RB->maxstack
4172
+ | ja >7 // Need to grow stack?
4173
+ |6: // Copy all vararg slots.
4174
+ | mov RC, [TMPR-16]
4175
+ | add TMPR, 8
4176
+ | mov [RA], RC
4177
+ | add RA, 8
4178
+ | cmp TMPR, BASE // No more vararg slots?
4179
+ | jb <6
4180
+ | jmp <3
4181
+ |
4182
+ |7: // Grow stack for varargs.
4183
+ | mov L:RB->base, BASE
4184
+ | mov L:RB->top, RA
4185
+ | mov SAVE_PC, PC
4186
+ | sub TMPR, BASE // Need delta, because BASE may change.
4187
+ | mov TMP1hi, TMPRd
4188
+ | mov CARG2d, MULTRES
4189
+ | sub CARG2d, 1
4190
+ | mov CARG1, L:RB
4191
+ | call extern lj_state_growstack // (lua_State *L, int n)
4192
+ | mov BASE, L:RB->base
4193
+ | movsxd TMPR, TMP1hi
4194
+ | mov RA, L:RB->top
4195
+ | add TMPR, BASE
4196
+ | jmp <6
4197
+ break;
4198
+
4199
+ /* -- Returns ----------------------------------------------------------- */
4200
+
4201
+ case BC_RETM:
4202
+ | ins_AD // RA = results, RD = extra_nresults
4203
+ | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4204
+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4205
+ break;
4206
+
4207
+ case BC_RET: case BC_RET0: case BC_RET1:
4208
+ | ins_AD // RA = results, RD = nresults+1
4209
+ if (op != BC_RET0) {
4210
+ | shl RAd, 3
4211
+ }
4212
+ |1:
4213
+ | mov PC, [BASE-8]
4214
+ | mov MULTRES, RDd // Save nresults+1.
4215
+ | test PCd, FRAME_TYPE // Check frame type marker.
4216
+ | jnz >7 // Not returning to a fixarg Lua func?
4217
+ switch (op) {
4218
+ case BC_RET:
4219
+ |->BC_RET_Z:
4220
+ | mov KBASE, BASE // Use KBASE for result move.
4221
+ | sub RDd, 1
4222
+ | jz >3
4223
+ |2: // Move results down.
4224
+ | mov RB, [KBASE+RA]
4225
+ | mov [KBASE-16], RB
4226
+ | add KBASE, 8
4227
+ | sub RDd, 1
4228
+ | jnz <2
4229
+ |3:
4230
+ | mov RDd, MULTRES // Note: MULTRES may be >255.
4231
+ | movzx RBd, PC_RB // So cannot compare with RDL!
4232
+ |5:
4233
+ | cmp RBd, RDd // More results expected?
4234
+ | ja >6
4235
+ break;
4236
+ case BC_RET1:
4237
+ | mov RB, [BASE+RA]
4238
+ | mov [BASE-16], RB
4239
+ /* fallthrough */
4240
+ case BC_RET0:
4241
+ |5:
4242
+ | cmp PC_RB, RDL // More results expected?
4243
+ | ja >6
4244
+ default:
4245
+ break;
4246
+ }
4247
+ | movzx RAd, PC_RA
4248
+ | neg RA
4249
+ | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4250
+ | mov LFUNC:KBASE, [BASE-16]
4251
+ | cleartp LFUNC:KBASE
4252
+ | mov KBASE, LFUNC:KBASE->pc
4253
+ | mov KBASE, [KBASE+PC2PROTO(k)]
4254
+ | ins_next
4255
+ |
4256
+ |6: // Fill up results with nil.
4257
+ if (op == BC_RET) {
4258
+ | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4259
+ | add KBASE, 8
4260
+ } else {
4261
+ | mov aword [BASE+RD*8-24], LJ_TNIL
4262
+ }
4263
+ | add RD, 1
4264
+ | jmp <5
4265
+ |
4266
+ |7: // Non-standard return case.
4267
+ | lea RB, [PC-FRAME_VARG]
4268
+ | test RBd, FRAME_TYPEP
4269
+ | jnz ->vm_return
4270
+ | // Return from vararg function: relocate BASE down and RA up.
4271
+ | sub BASE, RB
4272
+ if (op != BC_RET0) {
4273
+ | add RA, RB
4274
+ }
4275
+ | jmp <1
4276
+ break;
4277
+
4278
+ /* -- Loops and branches ------------------------------------------------ */
4279
+
4280
+ |.define FOR_IDX, [RA]
4281
+ |.define FOR_STOP, [RA+8]
4282
+ |.define FOR_STEP, [RA+16]
4283
+ |.define FOR_EXT, [RA+24]
4284
+
4285
+ case BC_FORL:
4286
+ |.if JIT
4287
+ | hotloop RBd
4288
+ |.endif
4289
+ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4290
+ break;
4291
+
4292
+ case BC_JFORI:
4293
+ case BC_JFORL:
4294
+ #if !LJ_HASJIT
4295
+ break;
4296
+ #endif
4297
+ case BC_FORI:
4298
+ case BC_IFORL:
4299
+ vk = (op == BC_IFORL || op == BC_JFORL);
4300
+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4301
+ | lea RA, [BASE+RA*8]
4302
+ if (LJ_DUALNUM) {
4303
+ | mov RB, FOR_IDX
4304
+ | checkint RB, >9
4305
+ | mov TMPR, FOR_STOP
4306
+ if (!vk) {
4307
+ | checkint TMPR, ->vmeta_for
4308
+ | mov ITYPE, FOR_STEP
4309
+ | test ITYPEd, ITYPEd; js >5
4310
+ | sar ITYPE, 47;
4311
+ | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4312
+ } else {
4313
+ #ifdef LUA_USE_ASSERT
4314
+ | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4315
+ | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4316
+ #endif
4317
+ | mov ITYPE, FOR_STEP
4318
+ | test ITYPEd, ITYPEd; js >5
4319
+ | add RBd, ITYPEd; jo >1
4320
+ | setint RB
4321
+ | mov FOR_IDX, RB
4322
+ }
4323
+ | cmp RBd, TMPRd
4324
+ | mov FOR_EXT, RB
4325
+ if (op == BC_FORI) {
4326
+ | jle >7
4327
+ |1:
4328
+ |6:
4329
+ | branchPC RD
4330
+ } else if (op == BC_JFORI) {
4331
+ | branchPC RD
4332
+ | movzx RDd, PC_RD
4333
+ | jle =>BC_JLOOP
4334
+ |1:
4335
+ |6:
4336
+ } else if (op == BC_IFORL) {
4337
+ | jg >7
4338
+ |6:
4339
+ | branchPC RD
4340
+ |1:
4341
+ } else {
4342
+ | jle =>BC_JLOOP
4343
+ |1:
4344
+ |6:
4345
+ }
4346
+ |7:
4347
+ | ins_next
4348
+ |
4349
+ |5: // Invert check for negative step.
4350
+ if (!vk) {
4351
+ | sar ITYPE, 47;
4352
+ | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4353
+ } else {
4354
+ | add RBd, ITYPEd; jo <1
4355
+ | setint RB
4356
+ | mov FOR_IDX, RB
4357
+ }
4358
+ | cmp RBd, TMPRd
4359
+ | mov FOR_EXT, RB
4360
+ if (op == BC_FORI) {
4361
+ | jge <7
4362
+ } else if (op == BC_JFORI) {
4363
+ | branchPC RD
4364
+ | movzx RDd, PC_RD
4365
+ | jge =>BC_JLOOP
4366
+ } else if (op == BC_IFORL) {
4367
+ | jl <7
4368
+ } else {
4369
+ | jge =>BC_JLOOP
4370
+ }
4371
+ | jmp <6
4372
+ |9: // Fallback to FP variant.
4373
+ if (!vk) {
4374
+ | jae ->vmeta_for
4375
+ }
4376
+ } else if (!vk) {
4377
+ | checknumtp FOR_IDX, ->vmeta_for
4378
+ }
4379
+ if (!vk) {
4380
+ | checknumtp FOR_STOP, ->vmeta_for
4381
+ } else {
4382
+ #ifdef LUA_USE_ASSERT
4383
+ | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4384
+ | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4385
+ #endif
4386
+ }
4387
+ | mov RB, FOR_STEP
4388
+ if (!vk) {
4389
+ | checknum RB, ->vmeta_for
4390
+ }
4391
+ | movsd xmm0, qword FOR_IDX
4392
+ | movsd xmm1, qword FOR_STOP
4393
+ if (vk) {
4394
+ | addsd xmm0, qword FOR_STEP
4395
+ | movsd qword FOR_IDX, xmm0
4396
+ | test RB, RB; js >3
4397
+ } else {
4398
+ | jl >3
4399
+ }
4400
+ | ucomisd xmm1, xmm0
4401
+ |1:
4402
+ | movsd qword FOR_EXT, xmm0
4403
+ if (op == BC_FORI) {
4404
+ |.if DUALNUM
4405
+ | jnb <7
4406
+ |.else
4407
+ | jnb >2
4408
+ | branchPC RD
4409
+ |.endif
4410
+ } else if (op == BC_JFORI) {
4411
+ | branchPC RD
4412
+ | movzx RDd, PC_RD
4413
+ | jnb =>BC_JLOOP
4414
+ } else if (op == BC_IFORL) {
4415
+ |.if DUALNUM
4416
+ | jb <7
4417
+ |.else
4418
+ | jb >2
4419
+ | branchPC RD
4420
+ |.endif
4421
+ } else {
4422
+ | jnb =>BC_JLOOP
4423
+ }
4424
+ |.if DUALNUM
4425
+ | jmp <6
4426
+ |.else
4427
+ |2:
4428
+ | ins_next
4429
+ |.endif
4430
+ |
4431
+ |3: // Invert comparison if step is negative.
4432
+ | ucomisd xmm0, xmm1
4433
+ | jmp <1
4434
+ break;
4435
+
4436
+ case BC_ITERL:
4437
+ |.if JIT
4438
+ | hotloop RBd
4439
+ |.endif
4440
+ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4441
+ break;
4442
+
4443
+ case BC_JITERL:
4444
+ #if !LJ_HASJIT
4445
+ break;
4446
+ #endif
4447
+ case BC_IITERL:
4448
+ | ins_AJ // RA = base, RD = target
4449
+ | lea RA, [BASE+RA*8]
4450
+ | mov RB, [RA]
4451
+ | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4452
+ if (op == BC_JITERL) {
4453
+ | mov [RA-8], RB
4454
+ | jmp =>BC_JLOOP
4455
+ } else {
4456
+ | branchPC RD // Otherwise save control var + branch.
4457
+ | mov [RA-8], RB
4458
+ }
4459
+ |1:
4460
+ | ins_next
4461
+ break;
4462
+
4463
+ case BC_LOOP:
4464
+ | ins_A // RA = base, RD = target (loop extent)
4465
+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
4466
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4467
+ |.if JIT
4468
+ | hotloop RBd
4469
+ |.endif
4470
+ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4471
+ break;
4472
+
4473
+ case BC_ILOOP:
4474
+ | ins_A // RA = base, RD = target (loop extent)
4475
+ | ins_next
4476
+ break;
4477
+
4478
+ case BC_JLOOP:
4479
+ |.if JIT
4480
+ | ins_AD // RA = base (ignored), RD = traceno
4481
+ | mov RA, [DISPATCH+DISPATCH_J(trace)]
4482
+ | mov TRACE:RD, [RA+RD*8]
4483
+ | mov RD, TRACE:RD->mcode
4484
+ | mov L:RB, SAVE_L
4485
+ | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4486
+ | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4487
+ | // Save additional callee-save registers only used in compiled code.
4488
+ |.if X64WIN
4489
+ | mov CSAVE_4, r12
4490
+ | mov CSAVE_3, r13
4491
+ | mov CSAVE_2, r14
4492
+ | mov CSAVE_1, r15
4493
+ | mov RA, rsp
4494
+ | sub rsp, 10*16+4*8
4495
+ | movdqa [RA-1*16], xmm6
4496
+ | movdqa [RA-2*16], xmm7
4497
+ | movdqa [RA-3*16], xmm8
4498
+ | movdqa [RA-4*16], xmm9
4499
+ | movdqa [RA-5*16], xmm10
4500
+ | movdqa [RA-6*16], xmm11
4501
+ | movdqa [RA-7*16], xmm12
4502
+ | movdqa [RA-8*16], xmm13
4503
+ | movdqa [RA-9*16], xmm14
4504
+ | movdqa [RA-10*16], xmm15
4505
+ |.else
4506
+ | sub rsp, 16
4507
+ | mov [rsp+16], r12
4508
+ | mov [rsp+8], r13
4509
+ |.endif
4510
+ | jmp RD
4511
+ |.endif
4512
+ break;
4513
+
4514
+ case BC_JMP:
4515
+ | ins_AJ // RA = unused, RD = target
4516
+ | branchPC RD
4517
+ | ins_next
4518
+ break;
4519
+
4520
+ /* -- Function headers -------------------------------------------------- */
4521
+
4522
+ /*
4523
+ ** Reminder: A function may be called with func/args above L->maxstack,
4524
+ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4525
+ ** too. This means all FUNC* ops (including fast functions) must check
4526
+ ** for stack overflow _before_ adding more slots!
4527
+ */
4528
+
4529
+ case BC_FUNCF:
4530
+ |.if JIT
4531
+ | hotcall RBd
4532
+ |.endif
4533
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
4534
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4535
+ break;
4536
+
4537
+ case BC_JFUNCF:
4538
+ #if !LJ_HASJIT
4539
+ break;
4540
+ #endif
4541
+ case BC_IFUNCF:
4542
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4543
+ | mov KBASE, [PC-4+PC2PROTO(k)]
4544
+ | mov L:RB, SAVE_L
4545
+ | lea RA, [BASE+RA*8] // Top of frame.
4546
+ | cmp RA, L:RB->maxstack
4547
+ | ja ->vm_growstack_f
4548
+ | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4549
+ | cmp NARGS:RDd, RAd // Check for missing parameters.
4550
+ | jbe >3
4551
+ |2:
4552
+ if (op == BC_JFUNCF) {
4553
+ | movzx RDd, PC_RD
4554
+ | jmp =>BC_JLOOP
4555
+ } else {
4556
+ | ins_next
4557
+ }
4558
+ |
4559
+ |3: // Clear missing parameters.
4560
+ | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4561
+ | add NARGS:RDd, 1
4562
+ | cmp NARGS:RDd, RAd
4563
+ | jbe <3
4564
+ | jmp <2
4565
+ break;
4566
+
4567
+ case BC_JFUNCV:
4568
+ #if !LJ_HASJIT
4569
+ break;
4570
+ #endif
4571
+ | int3 // NYI: compiled vararg functions
4572
+ break; /* NYI: compiled vararg functions. */
4573
+
4574
+ case BC_IFUNCV:
4575
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4576
+ | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4577
+ | lea RD, [BASE+NARGS:RD*8+8]
4578
+ | mov LFUNC:KBASE, [BASE-16]
4579
+ | mov [RD-8], RB // Store delta + FRAME_VARG.
4580
+ | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4581
+ | mov L:RB, SAVE_L
4582
+ | lea RA, [RD+RA*8]
4583
+ | cmp RA, L:RB->maxstack
4584
+ | ja ->vm_growstack_v // Need to grow stack.
4585
+ | mov RA, BASE
4586
+ | mov BASE, RD
4587
+ | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4588
+ | test RBd, RBd
4589
+ | jz >2
4590
+ | add RA, 8
4591
+ |1: // Copy fixarg slots up to new frame.
4592
+ | add RA, 8
4593
+ | cmp RA, BASE
4594
+ | jnb >3 // Less args than parameters?
4595
+ | mov KBASE, [RA-16]
4596
+ | mov [RD], KBASE
4597
+ | add RD, 8
4598
+ | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4599
+ | sub RBd, 1
4600
+ | jnz <1
4601
+ |2:
4602
+ if (op == BC_JFUNCV) {
4603
+ | movzx RDd, PC_RD
4604
+ | jmp =>BC_JLOOP
4605
+ } else {
4606
+ | mov KBASE, [PC-4+PC2PROTO(k)]
4607
+ | ins_next
4608
+ }
4609
+ |
4610
+ |3: // Clear missing parameters.
4611
+ | mov aword [RD], LJ_TNIL
4612
+ | add RD, 8
4613
+ | sub RBd, 1
4614
+ | jnz <3
4615
+ | jmp <2
4616
+ break;
4617
+
4618
+ case BC_FUNCC:
4619
+ case BC_FUNCCW:
4620
+ | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4621
+ | mov CFUNC:RB, [BASE-16]
4622
+ | cleartp CFUNC:RB
4623
+ | mov KBASE, CFUNC:RB->f
4624
+ | mov L:RB, SAVE_L
4625
+ | lea RD, [BASE+NARGS:RD*8-8]
4626
+ | mov L:RB->base, BASE
4627
+ | lea RA, [RD+8*LUA_MINSTACK]
4628
+ | cmp RA, L:RB->maxstack
4629
+ | mov L:RB->top, RD
4630
+ if (op == BC_FUNCC) {
4631
+ | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4632
+ } else {
4633
+ | mov CARG2, KBASE
4634
+ | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4635
+ }
4636
+ | ja ->vm_growstack_c // Need to grow stack.
4637
+ | set_vmstate C
4638
+ if (op == BC_FUNCC) {
4639
+ | call KBASE // (lua_State *L)
4640
+ } else {
4641
+ | // (lua_State *L, lua_CFunction f)
4642
+ | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4643
+ }
4644
+ | // nresults returned in eax (RD).
4645
+ | mov BASE, L:RB->base
4646
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4647
+ | set_vmstate INTERP
4648
+ | lea RA, [BASE+RD*8]
4649
+ | neg RA
4650
+ | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4651
+ | mov PC, [BASE-8] // Fetch PC of caller.
4652
+ | jmp ->vm_returnc
4653
+ break;
4654
+
4655
+ /* ---------------------------------------------------------------------- */
4656
+
4657
+ default:
4658
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4659
+ exit(2);
4660
+ break;
4661
+ }
4662
+ }
4663
+
4664
+ static int build_backend(BuildCtx *ctx)
4665
+ {
4666
+ int op;
4667
+ dasm_growpc(Dst, BC__MAX);
4668
+ build_subroutines(ctx);
4669
+ |.code_op
4670
+ for (op = 0; op < BC__MAX; op++)
4671
+ build_ins(ctx, (BCOp)op, op);
4672
+ return BC__MAX;
4673
+ }
4674
+
4675
+ /* Emit pseudo frame-info for all assembler functions. */
4676
+ static void emit_asm_debug(BuildCtx *ctx)
4677
+ {
4678
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4679
+ switch (ctx->mode) {
4680
+ case BUILD_elfasm:
4681
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4682
+ fprintf(ctx->fp,
4683
+ ".Lframe0:\n"
4684
+ "\t.long .LECIE0-.LSCIE0\n"
4685
+ ".LSCIE0:\n"
4686
+ "\t.long 0xffffffff\n"
4687
+ "\t.byte 0x1\n"
4688
+ "\t.string \"\"\n"
4689
+ "\t.uleb128 0x1\n"
4690
+ "\t.sleb128 -8\n"
4691
+ "\t.byte 0x10\n"
4692
+ "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4693
+ "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4694
+ "\t.align 8\n"
4695
+ ".LECIE0:\n\n");
4696
+ fprintf(ctx->fp,
4697
+ ".LSFDE0:\n"
4698
+ "\t.long .LEFDE0-.LASFDE0\n"
4699
+ ".LASFDE0:\n"
4700
+ "\t.long .Lframe0\n"
4701
+ "\t.quad .Lbegin\n"
4702
+ "\t.quad %d\n"
4703
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4704
+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4705
+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4706
+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4707
+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4708
+ #if LJ_NO_UNWIND
4709
+ "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
4710
+ "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
4711
+ #endif
4712
+ "\t.align 8\n"
4713
+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4714
+ #if LJ_HASFFI
4715
+ fprintf(ctx->fp,
4716
+ ".LSFDE1:\n"
4717
+ "\t.long .LEFDE1-.LASFDE1\n"
4718
+ ".LASFDE1:\n"
4719
+ "\t.long .Lframe0\n"
4720
+ "\t.quad lj_vm_ffi_call\n"
4721
+ "\t.quad %d\n"
4722
+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4723
+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4724
+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4725
+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4726
+ "\t.align 8\n"
4727
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4728
+ #endif
4729
+ #if !LJ_NO_UNWIND
4730
+ #if (defined(__sun__) && defined(__svr4__))
4731
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4732
+ #else
4733
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4734
+ #endif
4735
+ fprintf(ctx->fp,
4736
+ ".Lframe1:\n"
4737
+ "\t.long .LECIE1-.LSCIE1\n"
4738
+ ".LSCIE1:\n"
4739
+ "\t.long 0\n"
4740
+ "\t.byte 0x1\n"
4741
+ "\t.string \"zPR\"\n"
4742
+ "\t.uleb128 0x1\n"
4743
+ "\t.sleb128 -8\n"
4744
+ "\t.byte 0x10\n"
4745
+ "\t.uleb128 6\n" /* augmentation length */
4746
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
4747
+ "\t.long lj_err_unwind_dwarf-.\n"
4748
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
4749
+ "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4750
+ "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4751
+ "\t.align 8\n"
4752
+ ".LECIE1:\n\n");
4753
+ fprintf(ctx->fp,
4754
+ ".LSFDE2:\n"
4755
+ "\t.long .LEFDE2-.LASFDE2\n"
4756
+ ".LASFDE2:\n"
4757
+ "\t.long .LASFDE2-.Lframe1\n"
4758
+ "\t.long .Lbegin-.\n"
4759
+ "\t.long %d\n"
4760
+ "\t.uleb128 0\n" /* augmentation length */
4761
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4762
+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4763
+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4764
+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4765
+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4766
+ "\t.align 8\n"
4767
+ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4768
+ #if LJ_HASFFI
4769
+ fprintf(ctx->fp,
4770
+ ".Lframe2:\n"
4771
+ "\t.long .LECIE2-.LSCIE2\n"
4772
+ ".LSCIE2:\n"
4773
+ "\t.long 0\n"
4774
+ "\t.byte 0x1\n"
4775
+ "\t.string \"zR\"\n"
4776
+ "\t.uleb128 0x1\n"
4777
+ "\t.sleb128 -8\n"
4778
+ "\t.byte 0x10\n"
4779
+ "\t.uleb128 1\n" /* augmentation length */
4780
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
4781
+ "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4782
+ "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4783
+ "\t.align 8\n"
4784
+ ".LECIE2:\n\n");
4785
+ fprintf(ctx->fp,
4786
+ ".LSFDE3:\n"
4787
+ "\t.long .LEFDE3-.LASFDE3\n"
4788
+ ".LASFDE3:\n"
4789
+ "\t.long .LASFDE3-.Lframe2\n"
4790
+ "\t.long lj_vm_ffi_call-.\n"
4791
+ "\t.long %d\n"
4792
+ "\t.uleb128 0\n" /* augmentation length */
4793
+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4794
+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4795
+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4796
+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4797
+ "\t.align 8\n"
4798
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4799
+ #endif
4800
+ #endif
4801
+ break;
4802
+ #if !LJ_NO_UNWIND
4803
+ /* Mental note: never let Apple design an assembler.
4804
+ ** Or a linker. Or a plastic case. But I digress.
4805
+ */
4806
+ case BUILD_machasm: {
4807
+ #if LJ_HASFFI
4808
+ int fcsize = 0;
4809
+ #endif
4810
+ int i;
4811
+ fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4812
+ fprintf(ctx->fp,
4813
+ "EH_frame1:\n"
4814
+ "\t.set L$set$x,LECIEX-LSCIEX\n"
4815
+ "\t.long L$set$x\n"
4816
+ "LSCIEX:\n"
4817
+ "\t.long 0\n"
4818
+ "\t.byte 0x1\n"
4819
+ "\t.ascii \"zPR\\0\"\n"
4820
+ "\t.byte 0x1\n"
4821
+ "\t.byte 128-8\n"
4822
+ "\t.byte 0x10\n"
4823
+ "\t.byte 6\n" /* augmentation length */
4824
+ "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4825
+ "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4826
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
4827
+ "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4828
+ "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4829
+ "\t.align 3\n"
4830
+ "LECIEX:\n\n");
4831
+ for (i = 0; i < ctx->nsym; i++) {
4832
+ const char *name = ctx->sym[i].name;
4833
+ int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4834
+ if (size == 0) continue;
4835
+ #if LJ_HASFFI
4836
+ if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4837
+ #endif
4838
+ fprintf(ctx->fp,
4839
+ "%s.eh:\n"
4840
+ "LSFDE%d:\n"
4841
+ "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4842
+ "\t.long L$set$%d\n"
4843
+ "LASFDE%d:\n"
4844
+ "\t.long LASFDE%d-EH_frame1\n"
4845
+ "\t.long %s-.\n"
4846
+ "\t.long %d\n"
4847
+ "\t.byte 0\n" /* augmentation length */
4848
+ "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4849
+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4850
+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4851
+ "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4852
+ "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4853
+ "\t.align 3\n"
4854
+ "LEFDE%d:\n\n",
4855
+ name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4856
+ }
4857
+ #if LJ_HASFFI
4858
+ if (fcsize) {
4859
+ fprintf(ctx->fp,
4860
+ "EH_frame2:\n"
4861
+ "\t.set L$set$y,LECIEY-LSCIEY\n"
4862
+ "\t.long L$set$y\n"
4863
+ "LSCIEY:\n"
4864
+ "\t.long 0\n"
4865
+ "\t.byte 0x1\n"
4866
+ "\t.ascii \"zR\\0\"\n"
4867
+ "\t.byte 0x1\n"
4868
+ "\t.byte 128-8\n"
4869
+ "\t.byte 0x10\n"
4870
+ "\t.byte 1\n" /* augmentation length */
4871
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
4872
+ "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4873
+ "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4874
+ "\t.align 3\n"
4875
+ "LECIEY:\n\n");
4876
+ fprintf(ctx->fp,
4877
+ "_lj_vm_ffi_call.eh:\n"
4878
+ "LSFDEY:\n"
4879
+ "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4880
+ "\t.long L$set$yy\n"
4881
+ "LASFDEY:\n"
4882
+ "\t.long LASFDEY-EH_frame2\n"
4883
+ "\t.long _lj_vm_ffi_call-.\n"
4884
+ "\t.long %d\n"
4885
+ "\t.byte 0\n" /* augmentation length */
4886
+ "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4887
+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4888
+ "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4889
+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4890
+ "\t.align 3\n"
4891
+ "LEFDEY:\n\n", fcsize);
4892
+ }
4893
+ #endif
4894
+ fprintf(ctx->fp, ".subsections_via_symbols\n");
4895
+ }
4896
+ break;
4897
+ #endif
4898
+ default: /* Difficult for other modes. */
4899
+ break;
4900
+ }
4901
+ }
4902
+