immunio 1.1.2 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +4 -4
  2. data/lib/immunio/version.rb +1 -1
  3. data/lua-hooks/Makefile +56 -109
  4. data/lua-hooks/ext/all.c +3 -14
  5. data/lua-hooks/ext/libinjection/module.mk +5 -0
  6. data/lua-hooks/ext/lpeg/module.mk +6 -0
  7. data/lua-hooks/ext/lua-cmsgpack/module.mk +2 -0
  8. data/lua-hooks/ext/lua-snapshot/module.mk +2 -0
  9. data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
  10. data/lua-hooks/ext/luajit/Makefile +2 -2
  11. data/lua-hooks/ext/luajit/README +2 -2
  12. data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
  13. data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
  14. data/lua-hooks/ext/luajit/doc/changes.html +15 -2
  15. data/lua-hooks/ext/luajit/doc/contact.html +3 -3
  16. data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
  17. data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
  18. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
  19. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +4 -2
  20. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
  21. data/lua-hooks/ext/luajit/doc/ext_jit.html +2 -2
  22. data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
  23. data/lua-hooks/ext/luajit/doc/extensions.html +9 -2
  24. data/lua-hooks/ext/luajit/doc/faq.html +2 -2
  25. data/lua-hooks/ext/luajit/doc/install.html +22 -18
  26. data/lua-hooks/ext/luajit/doc/luajit.html +3 -3
  27. data/lua-hooks/ext/luajit/doc/running.html +2 -2
  28. data/lua-hooks/ext/luajit/doc/status.html +2 -2
  29. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
  30. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +4 -4
  31. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
  32. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +4 -4
  33. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +1 -1
  34. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +4 -4
  35. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
  36. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +4 -4
  37. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +3 -3
  38. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
  39. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +34 -7
  40. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +427 -102
  41. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +5 -5
  42. data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
  43. data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
  44. data/lua-hooks/ext/luajit/src/Makefile +36 -21
  45. data/lua-hooks/ext/luajit/src/Makefile.dep +3 -1
  46. data/lua-hooks/ext/luajit/src/host/buildvm.c +1 -1
  47. data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
  48. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +10 -1
  49. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +1 -1
  50. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
  51. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +1 -1
  52. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
  53. data/lua-hooks/ext/luajit/src/host/genminilua.lua +1 -1
  54. data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
  55. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +2 -2
  56. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +1 -1
  57. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +1 -1
  58. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
  59. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +1 -1
  60. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
  61. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +163 -73
  62. data/lua-hooks/ext/luajit/src/jit/dump.lua +2 -1
  63. data/lua-hooks/ext/luajit/src/jit/p.lua +1 -1
  64. data/lua-hooks/ext/luajit/src/jit/v.lua +1 -1
  65. data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
  66. data/lua-hooks/ext/luajit/src/lib_aux.c +1 -1
  67. data/lua-hooks/ext/luajit/src/lib_base.c +4 -5
  68. data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
  69. data/lua-hooks/ext/luajit/src/lib_debug.c +1 -1
  70. data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -5
  71. data/lua-hooks/ext/luajit/src/lib_init.c +1 -1
  72. data/lua-hooks/ext/luajit/src/lib_io.c +2 -3
  73. data/lua-hooks/ext/luajit/src/lib_jit.c +1 -1
  74. data/lua-hooks/ext/luajit/src/lib_math.c +1 -1
  75. data/lua-hooks/ext/luajit/src/lib_os.c +2 -2
  76. data/lua-hooks/ext/luajit/src/lib_package.c +1 -1
  77. data/lua-hooks/ext/luajit/src/lib_string.c +1 -1
  78. data/lua-hooks/ext/luajit/src/lib_table.c +1 -1
  79. data/lua-hooks/ext/luajit/src/lj.supp +15 -0
  80. data/lua-hooks/ext/luajit/src/lj_alloc.c +1 -1
  81. data/lua-hooks/ext/luajit/src/lj_api.c +4 -1
  82. data/lua-hooks/ext/luajit/src/lj_arch.h +33 -7
  83. data/lua-hooks/ext/luajit/src/lj_asm.c +12 -5
  84. data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
  85. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +3 -13
  86. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +337 -71
  87. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2 -2
  88. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2 -2
  89. data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
  90. data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
  91. data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
  92. data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
  93. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
  94. data/lua-hooks/ext/luajit/src/lj_buf.c +2 -4
  95. data/lua-hooks/ext/luajit/src/lj_buf.h +1 -3
  96. data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
  97. data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
  98. data/lua-hooks/ext/luajit/src/lj_ccall.c +37 -14
  99. data/lua-hooks/ext/luajit/src/lj_ccall.h +3 -3
  100. data/lua-hooks/ext/luajit/src/lj_ccallback.c +16 -7
  101. data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
  102. data/lua-hooks/ext/luajit/src/lj_cconv.c +1 -1
  103. data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
  104. data/lua-hooks/ext/luajit/src/lj_cdata.c +10 -1
  105. data/lua-hooks/ext/luajit/src/lj_cdata.h +3 -1
  106. data/lua-hooks/ext/luajit/src/lj_clib.c +1 -1
  107. data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
  108. data/lua-hooks/ext/luajit/src/lj_cparse.c +27 -6
  109. data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
  110. data/lua-hooks/ext/luajit/src/lj_crecord.c +1 -1
  111. data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
  112. data/lua-hooks/ext/luajit/src/lj_ctype.c +10 -8
  113. data/lua-hooks/ext/luajit/src/lj_ctype.h +1 -1
  114. data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
  115. data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
  116. data/lua-hooks/ext/luajit/src/lj_def.h +1 -1
  117. data/lua-hooks/ext/luajit/src/lj_dispatch.c +1 -1
  118. data/lua-hooks/ext/luajit/src/lj_dispatch.h +21 -4
  119. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +1 -1
  120. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +7 -5
  121. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +1 -1
  122. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +1 -1
  123. data/lua-hooks/ext/luajit/src/lj_err.c +69 -31
  124. data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
  125. data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
  126. data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
  127. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +10 -40
  128. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
  129. data/lua-hooks/ext/luajit/src/lj_frame.h +12 -1
  130. data/lua-hooks/ext/luajit/src/lj_func.c +1 -1
  131. data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
  132. data/lua-hooks/ext/luajit/src/lj_gc.c +2 -2
  133. data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
  134. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +1 -1
  135. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
  136. data/lua-hooks/ext/luajit/src/lj_ir.c +31 -15
  137. data/lua-hooks/ext/luajit/src/lj_ir.h +1 -1
  138. data/lua-hooks/ext/luajit/src/lj_ircall.h +29 -1
  139. data/lua-hooks/ext/luajit/src/lj_iropt.h +2 -1
  140. data/lua-hooks/ext/luajit/src/lj_jit.h +2 -1
  141. data/lua-hooks/ext/luajit/src/lj_lex.c +28 -1
  142. data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
  143. data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
  144. data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
  145. data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
  146. data/lua-hooks/ext/luajit/src/lj_mcode.c +1 -1
  147. data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
  148. data/lua-hooks/ext/luajit/src/lj_meta.c +8 -8
  149. data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
  150. data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
  151. data/lua-hooks/ext/luajit/src/lj_obj.h +1 -1
  152. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
  153. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +1 -1
  154. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
  155. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +1 -1
  156. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +1 -1
  157. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +1 -1
  158. data/lua-hooks/ext/luajit/src/lj_opt_split.c +10 -5
  159. data/lua-hooks/ext/luajit/src/lj_parse.c +1 -1
  160. data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
  161. data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
  162. data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
  163. data/lua-hooks/ext/luajit/src/lj_record.c +13 -5
  164. data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
  165. data/lua-hooks/ext/luajit/src/lj_snap.c +20 -23
  166. data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
  167. data/lua-hooks/ext/luajit/src/lj_state.c +1 -1
  168. data/lua-hooks/ext/luajit/src/lj_state.h +1 -1
  169. data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
  170. data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
  171. data/lua-hooks/ext/luajit/src/lj_strfmt.c +12 -98
  172. data/lua-hooks/ext/luajit/src/lj_strfmt.h +4 -4
  173. data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +591 -0
  174. data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
  175. data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
  176. data/lua-hooks/ext/luajit/src/lj_tab.c +1 -1
  177. data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
  178. data/lua-hooks/ext/luajit/src/lj_target.h +1 -1
  179. data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
  180. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +1 -1
  181. data/lua-hooks/ext/luajit/src/lj_target_mips.h +30 -2
  182. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
  183. data/lua-hooks/ext/luajit/src/lj_target_x86.h +1 -1
  184. data/lua-hooks/ext/luajit/src/lj_trace.c +7 -2
  185. data/lua-hooks/ext/luajit/src/lj_trace.h +1 -1
  186. data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -3
  187. data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
  188. data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
  189. data/lua-hooks/ext/luajit/src/lj_vm.h +5 -3
  190. data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
  191. data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
  192. data/lua-hooks/ext/luajit/src/lj_vmmath.c +15 -15
  193. data/lua-hooks/ext/luajit/src/ljamalg.c +2 -1
  194. data/lua-hooks/ext/luajit/src/lua.h +1 -0
  195. data/lua-hooks/ext/luajit/src/luaconf.h +2 -2
  196. data/lua-hooks/ext/luajit/src/luajit.c +1 -1
  197. data/lua-hooks/ext/luajit/src/luajit.h +4 -4
  198. data/lua-hooks/ext/luajit/src/lualib.h +1 -1
  199. data/lua-hooks/ext/luajit/src/msvcbuild.bat +1 -1
  200. data/lua-hooks/ext/luajit/src/ps4build.bat +26 -6
  201. data/lua-hooks/ext/luajit/src/vm_arm.dasc +17 -9
  202. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +1 -1
  203. data/lua-hooks/ext/luajit/src/vm_mips.dasc +1562 -656
  204. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +3 -7
  205. data/lua-hooks/ext/luajit/src/vm_x64.dasc +10 -2
  206. data/lua-hooks/ext/luajit/src/vm_x86.dasc +5 -8
  207. data/lua-hooks/ext/luautf8/module.mk +2 -0
  208. data/lua-hooks/ext/module.mk +15 -0
  209. data/lua-hooks/ext/modules.h +17 -0
  210. data/lua-hooks/ext/perf/luacpu.c +1 -1
  211. data/lua-hooks/ext/perf/lualoadavg.c +1 -1
  212. data/lua-hooks/ext/perf/luameminfo.c +1 -1
  213. data/lua-hooks/ext/perf/luaoslib.c +124 -2
  214. data/lua-hooks/ext/perf/module.mk +5 -0
  215. data/lua-hooks/ext/sha1/luasha1.c +4 -2
  216. data/lua-hooks/ext/sha1/module.mk +5 -0
  217. data/lua-hooks/ext/sha2/luasha256.c +4 -2
  218. data/lua-hooks/ext/sha2/module.mk +5 -0
  219. data/lua-hooks/ext/sysutils/lua_utils.c +56 -0
  220. data/lua-hooks/ext/sysutils/module.mk +2 -0
  221. data/lua-hooks/lib/boot.lua +2 -1
  222. data/lua-hooks/lib/hooks/module.mk +31 -0
  223. data/lua-hooks/lib/hooks/xss/module.mk +4 -0
  224. data/lua-hooks/lib/lexers/module.mk +10 -0
  225. data/lua-hooks/lib/module.mk +38 -0
  226. data/lua-hooks/lib/schema/module.mk +3 -0
  227. data/lua-hooks/options.mk +59 -0
  228. metadata +21 -2
@@ -1,5 +1,5 @@
1
1
  @rem Script to build LuaJIT with MSVC.
2
- @rem Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
2
+ @rem Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
3
3
  @rem
4
4
  @rem Either open a "Visual Studio .NET Command Prompt"
5
5
  @rem (Note that the Express Edition does not contain an x64 compiler)
@@ -2,7 +2,19 @@
2
2
  @rem Donated to the public domain.
3
3
  @rem
4
4
  @rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
5
+ @rem or "VS2015 x64 Native Tools Command Prompt".
6
+ @rem
5
7
  @rem Then cd to this directory and run this script.
8
+ @rem
9
+ @rem Recommended invocation:
10
+ @rem
11
+ @rem ps4build release build, amalgamated, 64-bit GC
12
+ @rem ps4build debug debug build, amalgamated, 64-bit GC
13
+ @rem
14
+ @rem Additional command-line options (not generally recommended):
15
+ @rem
16
+ @rem gc32 (before debug) 32-bit GC
17
+ @rem noamalg (after debug) non-amalgamated build
6
18
 
7
19
  @if not defined INCLUDE goto :FAIL
8
20
  @if not defined SCE_ORBIS_SDK_DIR goto :FAIL
@@ -15,6 +27,14 @@
15
27
  @set DASMDIR=..\dynasm
16
28
  @set DASM=%DASMDIR%\dynasm.lua
17
29
  @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
30
+ @set GC64=-DLUAJIT_ENABLE_GC64
31
+ @set DASC=vm_x64.dasc
32
+
33
+ @if "%1" neq "gc32" goto :NOGC32
34
+ @shift
35
+ @set GC64=
36
+ @set DASC=vm_x86.dasc
37
+ :NOGC32
18
38
 
19
39
  %LJCOMPILE% host\minilua.c
20
40
  @if errorlevel 1 goto :BAD
@@ -28,10 +48,10 @@ if exist minilua.exe.manifest^
28
48
  @if not errorlevel 8 goto :FAIL
29
49
 
30
50
  @set DASMFLAGS=-D P64 -D NO_UNWIND
31
- minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
51
+ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
32
52
  @if errorlevel 1 goto :BAD
33
53
 
34
- %LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
54
+ %LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
35
55
  @if errorlevel 1 goto :BAD
36
56
  %LJLINK% /out:buildvm.exe buildvm*.obj
37
57
  @if errorlevel 1 goto :BAD
@@ -54,7 +74,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
54
74
  @if errorlevel 1 goto :BAD
55
75
 
56
76
  @rem ---- Cross compiler ----
57
- @set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI
77
+ @set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
58
78
  @set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
59
79
  @set INCLUDE=""
60
80
 
@@ -63,14 +83,14 @@ orbis-as -o lj_vm.o lj_vm.s
63
83
  @if "%1" neq "debug" goto :NODEBUG
64
84
  @shift
65
85
  @set LJCOMPILE=%LJCOMPILE% -g -O0
66
- @set TARGETLIB=libluajitD.a
86
+ @set TARGETLIB=libluajitD_ps4.a
67
87
  goto :BUILD
68
88
  :NODEBUG
69
89
  @set LJCOMPILE=%LJCOMPILE% -O2
70
- @set TARGETLIB=libluajit.a
90
+ @set TARGETLIB=libluajit_ps4.a
71
91
  :BUILD
72
92
  del %TARGETLIB%
73
- @if "%1"=="amalg" goto :AMALG
93
+ @if "%1" neq "noamalg" goto :AMALG
74
94
  for %%f in (lj_*.c lib_*.c) do (
75
95
  %LJCOMPILE% %%f
76
96
  @if errorlevel 1 goto :BAD
@@ -1,6 +1,6 @@
1
1
  |// Low-level VM code for ARM CPUs.
2
2
  |// Bytecode interpreter, fast functions and helper functions.
3
- |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
3
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
4
4
  |
5
5
  |.arch arm
6
6
  |.section code_op, code_sub
@@ -372,6 +372,17 @@ static void build_subroutines(BuildCtx *ctx)
372
372
  | str CARG1, [BASE, #-4] // Prepend false to error message.
373
373
  | st_vmstate CARG2
374
374
  | b ->vm_returnc
375
+ |
376
+ |->vm_unwind_ext: // Complete external unwind.
377
+ #if !LJ_NO_UNWIND
378
+ | push {r0, r1, r2, lr}
379
+ | bl extern _Unwind_Complete
380
+ | ldr r0, [sp]
381
+ | bl extern _Unwind_DeleteException
382
+ | pop {r0, r1, r2, lr}
383
+ | mov r0, r1
384
+ | bx r2
385
+ #endif
375
386
  |
376
387
  |//-----------------------------------------------------------------------
377
388
  |//-- Grow stack for calls -----------------------------------------------
@@ -2086,7 +2097,7 @@ static void build_subroutines(BuildCtx *ctx)
2086
2097
  | // RA = resultptr, CARG4 = meta base
2087
2098
  | ldr RB, SAVE_MULTRES
2088
2099
  | ldr INS, [PC, #-4]
2089
- | ldr CARG3, [CARG4, #-24] // Save previous trace number.
2100
+ | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
2090
2101
  | subs RB, RB, #8
2091
2102
  | decode_RA8 RC, INS // Call base.
2092
2103
  | beq >2
@@ -2101,23 +2112,20 @@ static void build_subroutines(BuildCtx *ctx)
2101
2112
  | decode_RA8 RA, INS
2102
2113
  | decode_RB8 RB, INS
2103
2114
  | add RA, RA, RB
2104
- | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
2105
2115
  |3:
2106
2116
  | cmp RA, RC
2107
2117
  | mvn CARG2, #~LJ_TNIL
2108
2118
  | bhi >9 // More results wanted?
2109
2119
  |
2110
- | ldr TRACE:RA, [CARG1, CARG3, lsl #2]
2111
- | cmp TRACE:RA, #0
2112
- | beq ->cont_nop
2113
- | ldrh RC, TRACE:RA->link
2114
- | cmp RC, CARG3
2120
+ | ldrh RA, TRACE:CARG3->traceno
2121
+ | ldrh RC, TRACE:CARG3->link
2122
+ | cmp RC, RA
2115
2123
  | beq ->cont_nop // Blacklisted.
2116
2124
  | cmp RC, #0
2117
2125
  | bne =>BC_JLOOP // Jump to stitched trace.
2118
2126
  |
2119
2127
  | // Stitch a new trace to the previous trace.
2120
- | str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
2128
+ | str RA, [DISPATCH, #DISPATCH_J(exitno)]
2121
2129
  | str L, [DISPATCH, #DISPATCH_J(L)]
2122
2130
  | str BASE, L->base
2123
2131
  | sub CARG1, DISPATCH, #-GG_DISP2J
@@ -1,6 +1,6 @@
1
1
  |// Low-level VM code for ARM64 CPUs.
2
2
  |// Bytecode interpreter, fast functions and helper functions.
3
- |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
3
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
4
4
  |
5
5
  |.arch arm64
6
6
  |.section code_op, code_sub
@@ -1,6 +1,9 @@
1
1
  |// Low-level VM code for MIPS CPUs.
2
2
  |// Bytecode interpreter, fast functions and helper functions.
3
- |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
3
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
4
+ |//
5
+ |// MIPS soft-float support contributed by Djordje Kovacevic and
6
+ |// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
4
7
  |
5
8
  |.arch mips
6
9
  |.section code_op, code_sub
@@ -18,6 +21,12 @@
18
21
  |// Fixed register assignments for the interpreter.
19
22
  |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
20
23
  |
24
+ |.macro .FPU, a, b
25
+ |.if FPU
26
+ | a, b
27
+ |.endif
28
+ |.endmacro
29
+ |
21
30
  |// The following must be C callee-save (but BASE is often refetched).
22
31
  |.define BASE, r16 // Base of current Lua stack frame.
23
32
  |.define KBASE, r17 // Constants of current Lua function.
@@ -25,13 +34,15 @@
25
34
  |.define DISPATCH, r19 // Opcode dispatch table.
26
35
  |.define LREG, r20 // Register holding lua_State (also in SAVE_L).
27
36
  |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
28
- |// NYI: r22 currently unused.
29
37
  |
30
38
  |.define JGL, r30 // On-trace: global_State + 32768.
31
39
  |
32
40
  |// Constants for type-comparisons, stores and conversions. C callee-save.
41
+ |.define TISNUM, r22
33
42
  |.define TISNIL, r30
43
+ |.if FPU
34
44
  |.define TOBIT, f30 // 2^52 + 2^51.
45
+ |.endif
35
46
  |
36
47
  |// The following temporaries are not saved across C calls, except for RA.
37
48
  |.define RA, r23 // Callee-save.
@@ -56,13 +67,33 @@
56
67
  |.define CRET1, r2
57
68
  |.define CRET2, r3
58
69
  |
70
+ |.if ENDIAN_LE
71
+ |.define SFRETLO, CRET1
72
+ |.define SFRETHI, CRET2
73
+ |.define SFARG1LO, CARG1
74
+ |.define SFARG1HI, CARG2
75
+ |.define SFARG2LO, CARG3
76
+ |.define SFARG2HI, CARG4
77
+ |.else
78
+ |.define SFRETLO, CRET2
79
+ |.define SFRETHI, CRET1
80
+ |.define SFARG1LO, CARG2
81
+ |.define SFARG1HI, CARG1
82
+ |.define SFARG2LO, CARG4
83
+ |.define SFARG2HI, CARG3
84
+ |.endif
85
+ |
86
+ |.if FPU
59
87
  |.define FARG1, f12
60
88
  |.define FARG2, f14
61
89
  |
62
90
  |.define FRET1, f0
63
91
  |.define FRET2, f2
92
+ |.endif
64
93
  |
65
94
  |// Stack layout while in interpreter. Must match with lj_frame.h.
95
+ |.if FPU // MIPS32 hard-float.
96
+ |
66
97
  |.define CFRAME_SPACE, 112 // Delta for sp.
67
98
  |
68
99
  |.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -72,6 +103,20 @@
72
103
  |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
73
104
  |.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
74
105
  |.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
106
+ |
107
+ |.else // MIPS32 soft-float
108
+ |
109
+ |.define CFRAME_SPACE, 64 // Delta for sp.
110
+ |
111
+ |.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
112
+ |.define SAVE_NRES, 72(sp)
113
+ |.define SAVE_CFRAME, 68(sp)
114
+ |.define SAVE_L, 64(sp)
115
+ |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
116
+ |.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
117
+ |
118
+ |.endif
119
+ |
75
120
  |.define SAVE_PC, 20(sp)
76
121
  |.define ARG5, 16(sp)
77
122
  |.define CSAVE_4, 12(sp)
@@ -83,43 +128,45 @@
83
128
  |.define ARG5_OFS, 16
84
129
  |.define SAVE_MULTRES, ARG5
85
130
  |
131
+ |//-----------------------------------------------------------------------
132
+ |
86
133
  |.macro saveregs
87
134
  | addiu sp, sp, -CFRAME_SPACE
88
135
  | sw ra, SAVE_GPR_+9*4(sp)
89
136
  | sw r30, SAVE_GPR_+8*4(sp)
90
- | sdc1 f30, SAVE_FPR_+5*8(sp)
137
+ | .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
91
138
  | sw r23, SAVE_GPR_+7*4(sp)
92
139
  | sw r22, SAVE_GPR_+6*4(sp)
93
- | sdc1 f28, SAVE_FPR_+4*8(sp)
140
+ | .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
94
141
  | sw r21, SAVE_GPR_+5*4(sp)
95
142
  | sw r20, SAVE_GPR_+4*4(sp)
96
- | sdc1 f26, SAVE_FPR_+3*8(sp)
143
+ | .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
97
144
  | sw r19, SAVE_GPR_+3*4(sp)
98
145
  | sw r18, SAVE_GPR_+2*4(sp)
99
- | sdc1 f24, SAVE_FPR_+2*8(sp)
146
+ | .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
100
147
  | sw r17, SAVE_GPR_+1*4(sp)
101
148
  | sw r16, SAVE_GPR_+0*4(sp)
102
- | sdc1 f22, SAVE_FPR_+1*8(sp)
103
- | sdc1 f20, SAVE_FPR_+0*8(sp)
149
+ | .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
150
+ | .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
104
151
  |.endmacro
105
152
  |
106
153
  |.macro restoreregs_ret
107
154
  | lw ra, SAVE_GPR_+9*4(sp)
108
155
  | lw r30, SAVE_GPR_+8*4(sp)
109
- | ldc1 f30, SAVE_FPR_+5*8(sp)
156
+ | .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
110
157
  | lw r23, SAVE_GPR_+7*4(sp)
111
158
  | lw r22, SAVE_GPR_+6*4(sp)
112
- | ldc1 f28, SAVE_FPR_+4*8(sp)
159
+ | .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
113
160
  | lw r21, SAVE_GPR_+5*4(sp)
114
161
  | lw r20, SAVE_GPR_+4*4(sp)
115
- | ldc1 f26, SAVE_FPR_+3*8(sp)
162
+ | .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
116
163
  | lw r19, SAVE_GPR_+3*4(sp)
117
164
  | lw r18, SAVE_GPR_+2*4(sp)
118
- | ldc1 f24, SAVE_FPR_+2*8(sp)
165
+ | .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
119
166
  | lw r17, SAVE_GPR_+1*4(sp)
120
167
  | lw r16, SAVE_GPR_+0*4(sp)
121
- | ldc1 f22, SAVE_FPR_+1*8(sp)
122
- | ldc1 f20, SAVE_FPR_+0*8(sp)
168
+ | .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
169
+ | .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
123
170
  | jr ra
124
171
  | addiu sp, sp, CFRAME_SPACE
125
172
  |.endmacro
@@ -153,13 +200,23 @@
153
200
  |//-----------------------------------------------------------------------
154
201
  |
155
202
  |// Endian-specific defines.
156
- |.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8)
157
- |.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4)
158
- |.define HI, LJ_ENDIAN_SELECT(4,0)
159
- |.define LO, LJ_ENDIAN_SELECT(0,4)
160
- |.define OFS_RD, LJ_ENDIAN_SELECT(2,0)
161
- |.define OFS_RA, LJ_ENDIAN_SELECT(1,2)
162
- |.define OFS_OP, LJ_ENDIAN_SELECT(0,3)
203
+ |.if ENDIAN_LE
204
+ |.define FRAME_PC, -4
205
+ |.define FRAME_FUNC, -8
206
+ |.define HI, 4
207
+ |.define LO, 0
208
+ |.define OFS_RD, 2
209
+ |.define OFS_RA, 1
210
+ |.define OFS_OP, 0
211
+ |.else
212
+ |.define FRAME_PC, -8
213
+ |.define FRAME_FUNC, -4
214
+ |.define HI, 0
215
+ |.define LO, 4
216
+ |.define OFS_RD, 0
217
+ |.define OFS_RA, 2
218
+ |.define OFS_OP, 3
219
+ |.endif
163
220
  |
164
221
  |// Instruction decode.
165
222
  |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
@@ -354,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx)
354
411
  |. sll TMP2, TMP2, 3
355
412
  |1:
356
413
  | addiu TMP1, TMP1, -8
357
- | ldc1 f0, 0(RA)
414
+ | lw SFRETHI, HI(RA)
415
+ | lw SFRETLO, LO(RA)
358
416
  | addiu RA, RA, 8
359
- | sdc1 f0, 0(BASE)
417
+ | sw SFRETHI, HI(BASE)
418
+ | sw SFRETLO, LO(BASE)
360
419
  | bnez TMP1, <1
361
420
  |. addiu BASE, BASE, 8
362
421
  |
@@ -425,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx)
425
484
  | and sp, CARG1, AT
426
485
  |->vm_unwind_ff_eh: // Landing pad for external unwinder.
427
486
  | lw L, SAVE_L
428
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
487
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
488
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
429
489
  | li TISNIL, LJ_TNIL
430
490
  | lw BASE, L->base
431
491
  | lw DISPATCH, L->glref // Setup pointer to dispatch table.
432
- | mtc1 TMP3, TOBIT
492
+ | .FPU mtc1 TMP3, TOBIT
433
493
  | li TMP1, LJ_TFALSE
434
494
  | li_vmstate INTERP
435
495
  | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
436
- | cvt.d.s TOBIT, TOBIT
496
+ | .FPU cvt.d.s TOBIT, TOBIT
437
497
  | addiu RA, BASE, -8 // Results start at BASE-8.
438
498
  | addiu DISPATCH, DISPATCH, GG_G2DISP
439
499
  | sw TMP1, HI(RA) // Prepend false to error message.
@@ -496,13 +556,14 @@ static void build_subroutines(BuildCtx *ctx)
496
556
  | sw L, DISPATCH_GL(cur_L)(DISPATCH)
497
557
  | move RA, BASE
498
558
  | lw BASE, L->base
559
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
499
560
  | lw TMP1, L->top
500
561
  | lw PC, FRAME_PC(BASE)
501
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
562
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
502
563
  | subu RD, TMP1, BASE
503
- | mtc1 TMP3, TOBIT
564
+ | .FPU mtc1 TMP3, TOBIT
504
565
  | sb r0, L->status
505
- | cvt.d.s TOBIT, TOBIT
566
+ | .FPU cvt.d.s TOBIT, TOBIT
506
567
  | li_vmstate INTERP
507
568
  | addiu RD, RD, 8
508
569
  | st_vmstate
@@ -540,13 +601,14 @@ static void build_subroutines(BuildCtx *ctx)
540
601
  |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
541
602
  | sw L, DISPATCH_GL(cur_L)(DISPATCH)
542
603
  | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
543
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
604
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
605
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
544
606
  | lw TMP1, L->top
545
- | mtc1 TMP3, TOBIT
607
+ | .FPU mtc1 TMP3, TOBIT
546
608
  | addu PC, PC, BASE
547
609
  | subu NARGS8:RC, TMP1, BASE
548
610
  | subu PC, PC, TMP2 // PC = frame delta + frame type
549
- | cvt.d.s TOBIT, TOBIT
611
+ | .FPU cvt.d.s TOBIT, TOBIT
550
612
  | li_vmstate INTERP
551
613
  | li TISNIL, LJ_TNIL
552
614
  | st_vmstate
@@ -628,7 +690,8 @@ static void build_subroutines(BuildCtx *ctx)
628
690
  |->cont_cat: // RA = resultptr, RB = meta base
629
691
  | lw INS, -4(PC)
630
692
  | addiu CARG2, RB, -16
631
- | ldc1 f0, 0(RA)
693
+ | lw SFRETHI, HI(RA)
694
+ | lw SFRETLO, LO(RA)
632
695
  | decode_RB8a MULTRES, INS
633
696
  | decode_RA8a RA, INS
634
697
  | decode_RB8b MULTRES
@@ -636,11 +699,13 @@ static void build_subroutines(BuildCtx *ctx)
636
699
  | addu TMP1, BASE, MULTRES
637
700
  | sw BASE, L->base
638
701
  | subu CARG3, CARG2, TMP1
702
+ | sw SFRETHI, HI(CARG2)
639
703
  | bne TMP1, CARG2, ->BC_CAT_Z
640
- |. sdc1 f0, 0(CARG2)
704
+ |. sw SFRETLO, LO(CARG2)
641
705
  | addu RA, BASE, RA
706
+ | sw SFRETHI, HI(RA)
642
707
  | b ->cont_nop
643
- |. sdc1 f0, 0(RA)
708
+ |. sw SFRETLO, LO(RA)
644
709
  |
645
710
  |//-- Table indexing metamethods -----------------------------------------
646
711
  |
@@ -663,10 +728,9 @@ static void build_subroutines(BuildCtx *ctx)
663
728
  |. sw TMP1, HI(CARG3)
664
729
  |
665
730
  |->vmeta_tgetb: // TMP0 = index
666
- | mtc1 TMP0, f0
667
- | cvt.d.w f0, f0
668
731
  | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
669
- | sdc1 f0, 0(CARG3)
732
+ | sw TMP0, LO(CARG3)
733
+ | sw TISNUM, HI(CARG3)
670
734
  |
671
735
  |->vmeta_tgetv:
672
736
  |1:
@@ -678,9 +742,11 @@ static void build_subroutines(BuildCtx *ctx)
678
742
  | // Returns TValue * (finished) or NULL (metamethod).
679
743
  | beqz CRET1, >3
680
744
  |. addiu TMP1, BASE, -FRAME_CONT
681
- | ldc1 f0, 0(CRET1)
745
+ | lw SFARG1HI, HI(CRET1)
746
+ | lw SFARG2HI, LO(CRET1)
682
747
  | ins_next1
683
- | sdc1 f0, 0(RA)
748
+ | sw SFARG1HI, HI(RA)
749
+ | sw SFARG2HI, LO(RA)
684
750
  | ins_next2
685
751
  |
686
752
  |3: // Call __index metamethod.
@@ -697,10 +763,11 @@ static void build_subroutines(BuildCtx *ctx)
697
763
  | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
698
764
  |. nop
699
765
  | // Returns cTValue * or NULL.
700
- | beqz CRET1, >1
701
- |. nop
766
+ | beqz CRET1, ->BC_TGETR_Z
767
+ |. move SFARG2HI, TISNIL
768
+ | lw SFARG2HI, HI(CRET1)
702
769
  | b ->BC_TGETR_Z
703
- |. ldc1 f0, 0(CRET1)
770
+ |. lw SFARG2LO, LO(CRET1)
704
771
  |
705
772
  |//-----------------------------------------------------------------------
706
773
  |
@@ -723,10 +790,9 @@ static void build_subroutines(BuildCtx *ctx)
723
790
  |. sw TMP1, HI(CARG3)
724
791
  |
725
792
  |->vmeta_tsetb: // TMP0 = index
726
- | mtc1 TMP0, f0
727
- | cvt.d.w f0, f0
728
793
  | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
729
- | sdc1 f0, 0(CARG3)
794
+ | sw TMP0, LO(CARG3)
795
+ | sw TISNUM, HI(CARG3)
730
796
  |
731
797
  |->vmeta_tsetv:
732
798
  |1:
@@ -736,11 +802,13 @@ static void build_subroutines(BuildCtx *ctx)
736
802
  | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
737
803
  |. move CARG1, L
738
804
  | // Returns TValue * (finished) or NULL (metamethod).
805
+ | lw SFARG1HI, HI(RA)
739
806
  | beqz CRET1, >3
740
- |. ldc1 f0, 0(RA)
807
+ |. lw SFARG1LO, LO(RA)
741
808
  | // NOBARRIER: lj_meta_tset ensures the table is not black.
742
809
  | ins_next1
743
- | sdc1 f0, 0(CRET1)
810
+ | sw SFARG1HI, HI(CRET1)
811
+ | sw SFARG1LO, LO(CRET1)
744
812
  | ins_next2
745
813
  |
746
814
  |3: // Call __newindex metamethod.
@@ -750,7 +818,8 @@ static void build_subroutines(BuildCtx *ctx)
750
818
  | sw PC, -16+HI(BASE) // [cont|PC]
751
819
  | subu PC, BASE, TMP1
752
820
  | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
753
- | sdc1 f0, 16(BASE) // Copy value to third argument.
821
+ | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
822
+ | sw SFARG1LO, 16+LO(BASE)
754
823
  | b ->vm_call_dispatch_f
755
824
  |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
756
825
  |
@@ -767,7 +836,9 @@ static void build_subroutines(BuildCtx *ctx)
767
836
  |//-- Comparison metamethods ---------------------------------------------
768
837
  |
769
838
  |->vmeta_comp:
770
- | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT.
839
+ | // RA/RD point to o1/o2.
840
+ | move CARG2, RA
841
+ | move CARG3, RD
771
842
  | load_got lj_meta_comp
772
843
  | addiu PC, PC, -4
773
844
  | sw BASE, L->base
@@ -793,11 +864,13 @@ static void build_subroutines(BuildCtx *ctx)
793
864
  |
794
865
  |->cont_ra: // RA = resultptr
795
866
  | lbu TMP1, -4+OFS_RA(PC)
796
- | ldc1 f0, 0(RA)
867
+ | lw SFRETHI, HI(RA)
868
+ | lw SFRETLO, LO(RA)
797
869
  | sll TMP1, TMP1, 3
798
870
  | addu TMP1, BASE, TMP1
871
+ | sw SFRETHI, HI(TMP1)
799
872
  | b ->cont_nop
800
- |. sdc1 f0, 0(TMP1)
873
+ |. sw SFRETLO, LO(TMP1)
801
874
  |
802
875
  |->cont_condt: // RA = resultptr
803
876
  | lw TMP0, HI(RA)
@@ -812,8 +885,11 @@ static void build_subroutines(BuildCtx *ctx)
812
885
  |. addiu TMP2, AT, -1 // Branch if result is false.
813
886
  |
814
887
  |->vmeta_equal:
815
- | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
888
+ | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
816
889
  | load_got lj_meta_equal
890
+ | move CARG2, SFARG1LO
891
+ | move CARG3, SFARG2LO
892
+ | move CARG4, TMP0
817
893
  | addiu PC, PC, -4
818
894
  | sw BASE, L->base
819
895
  | sw PC, SAVE_PC
@@ -852,14 +928,16 @@ static void build_subroutines(BuildCtx *ctx)
852
928
  |//-- Arithmetic metamethods ---------------------------------------------
853
929
  |
854
930
  |->vmeta_unm:
855
- | move CARG4, CARG3
931
+ | move RC, RB
856
932
  |
857
933
  |->vmeta_arith:
858
934
  | load_got lj_meta_arith
859
935
  | decode_OP1 TMP0, INS
860
936
  | sw BASE, L->base
861
- | sw PC, SAVE_PC
862
937
  | move CARG2, RA
938
+ | sw PC, SAVE_PC
939
+ | move CARG3, RB
940
+ | move CARG4, RC
863
941
  | sw TMP0, ARG5
864
942
  | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
865
943
  |. move CARG1, L
@@ -967,40 +1045,52 @@ static void build_subroutines(BuildCtx *ctx)
967
1045
  |
968
1046
  |.macro .ffunc_1, name
969
1047
  |->ff_ .. name:
1048
+ | lw SFARG1HI, HI(BASE)
970
1049
  | beqz NARGS8:RC, ->fff_fallback
971
- |. lw CARG3, HI(BASE)
972
- | lw CARG1, LO(BASE)
1050
+ |. lw SFARG1LO, LO(BASE)
973
1051
  |.endmacro
974
1052
  |
975
1053
  |.macro .ffunc_2, name
976
1054
  |->ff_ .. name:
977
1055
  | sltiu AT, NARGS8:RC, 16
978
- | lw CARG3, HI(BASE)
1056
+ | lw SFARG1HI, HI(BASE)
979
1057
  | bnez AT, ->fff_fallback
980
- |. lw CARG4, 8+HI(BASE)
981
- | lw CARG1, LO(BASE)
982
- | lw CARG2, 8+LO(BASE)
1058
+ |. lw SFARG2HI, 8+HI(BASE)
1059
+ | lw SFARG1LO, LO(BASE)
1060
+ | lw SFARG2LO, 8+LO(BASE)
983
1061
  |.endmacro
984
1062
  |
985
1063
  |.macro .ffunc_n, name // Caveat: has delay slot!
986
1064
  |->ff_ .. name:
987
- | lw CARG3, HI(BASE)
1065
+ | lw SFARG1HI, HI(BASE)
1066
+ |.if FPU
1067
+ | ldc1 FARG1, 0(BASE)
1068
+ |.else
1069
+ | lw SFARG1LO, LO(BASE)
1070
+ |.endif
988
1071
  | beqz NARGS8:RC, ->fff_fallback
989
- |. ldc1 FARG1, 0(BASE)
990
- | sltiu AT, CARG3, LJ_TISNUM
1072
+ |. sltiu AT, SFARG1HI, LJ_TISNUM
991
1073
  | beqz AT, ->fff_fallback
992
1074
  |.endmacro
993
1075
  |
994
1076
  |.macro .ffunc_nn, name // Caveat: has delay slot!
995
1077
  |->ff_ .. name:
996
1078
  | sltiu AT, NARGS8:RC, 16
997
- | lw CARG3, HI(BASE)
1079
+ | lw SFARG1HI, HI(BASE)
998
1080
  | bnez AT, ->fff_fallback
999
- |. lw CARG4, 8+HI(BASE)
1000
- | ldc1 FARG1, 0(BASE)
1001
- | ldc1 FARG2, 8(BASE)
1002
- | sltiu TMP0, CARG3, LJ_TISNUM
1003
- | sltiu TMP1, CARG4, LJ_TISNUM
1081
+ |. lw SFARG2HI, 8+HI(BASE)
1082
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
1083
+ |.if FPU
1084
+ | ldc1 FARG1, 0(BASE)
1085
+ |.else
1086
+ | lw SFARG1LO, LO(BASE)
1087
+ |.endif
1088
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
1089
+ |.if FPU
1090
+ | ldc1 FARG2, 8(BASE)
1091
+ |.else
1092
+ | lw SFARG2LO, 8+LO(BASE)
1093
+ |.endif
1004
1094
  | and TMP0, TMP0, TMP1
1005
1095
  | beqz TMP0, ->fff_fallback
1006
1096
  |.endmacro
@@ -1016,52 +1106,54 @@ static void build_subroutines(BuildCtx *ctx)
1016
1106
  |//-- Base library: checks -----------------------------------------------
1017
1107
  |
1018
1108
  |.ffunc_1 assert
1019
- | sltiu AT, CARG3, LJ_TISTRUECOND
1109
+ | sltiu AT, SFARG1HI, LJ_TISTRUECOND
1020
1110
  | beqz AT, ->fff_fallback
1021
1111
  |. addiu RA, BASE, -8
1022
1112
  | lw PC, FRAME_PC(BASE)
1023
1113
  | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1024
1114
  | addu TMP2, RA, NARGS8:RC
1025
- | sw CARG3, HI(RA)
1115
+ | sw SFARG1HI, HI(RA)
1026
1116
  | addiu TMP1, BASE, 8
1027
1117
  | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
1028
- |. sw CARG1, LO(RA)
1118
+ |. sw SFARG1LO, LO(RA)
1029
1119
  |1:
1030
- | ldc1 f0, 0(TMP1)
1031
- | sdc1 f0, -8(TMP1)
1120
+ | lw SFRETHI, HI(TMP1)
1121
+ | lw SFRETLO, LO(TMP1)
1122
+ | sw SFRETHI, -8+HI(TMP1)
1123
+ | sw SFRETLO, -8+LO(TMP1)
1032
1124
  | bne TMP1, TMP2, <1
1033
1125
  |. addiu TMP1, TMP1, 8
1034
1126
  | b ->fff_res
1035
1127
  |. nop
1036
1128
  |
1037
1129
  |.ffunc type
1038
- | lw CARG3, HI(BASE)
1039
- | li TMP1, LJ_TISNUM
1130
+ | lw SFARG1HI, HI(BASE)
1040
1131
  | beqz NARGS8:RC, ->fff_fallback
1041
- |. sltiu TMP0, CARG3, LJ_TISNUM
1042
- | movz TMP1, CARG3, TMP0
1043
- | not TMP1, TMP1
1132
+ |. sltiu TMP0, SFARG1HI, LJ_TISNUM
1133
+ | movn SFARG1HI, TISNUM, TMP0
1134
+ | not TMP1, SFARG1HI
1044
1135
  | sll TMP1, TMP1, 3
1045
1136
  | addu TMP1, CFUNC:RB, TMP1
1046
- | b ->fff_resn
1047
- |. ldc1 FRET1, CFUNC:TMP1->upvalue
1137
+ | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
1138
+ | b ->fff_restv
1139
+ |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
1048
1140
  |
1049
1141
  |//-- Base library: getters and setters ---------------------------------
1050
1142
  |
1051
1143
  |.ffunc_1 getmetatable
1052
1144
  | li AT, LJ_TTAB
1053
- | bne CARG3, AT, >6
1145
+ | bne SFARG1HI, AT, >6
1054
1146
  |. li AT, LJ_TUDATA
1055
1147
  |1: // Field metatable must be at same offset for GCtab and GCudata!
1056
- | lw TAB:CARG1, TAB:CARG1->metatable
1148
+ | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
1057
1149
  |2:
1058
1150
  | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1059
- | beqz TAB:CARG1, ->fff_restv
1060
- |. li CARG3, LJ_TNIL
1061
- | lw TMP0, TAB:CARG1->hmask
1062
- | li CARG3, LJ_TTAB // Use metatable as default result.
1151
+ | beqz TAB:SFARG1LO, ->fff_restv
1152
+ |. li SFARG1HI, LJ_TNIL
1153
+ | lw TMP0, TAB:SFARG1LO->hmask
1154
+ | li SFARG1HI, LJ_TTAB // Use metatable as default result.
1063
1155
  | lw TMP1, STR:RC->hash
1064
- | lw NODE:TMP2, TAB:CARG1->node
1156
+ | lw NODE:TMP2, TAB:SFARG1LO->node
1065
1157
  | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
1066
1158
  | sll TMP0, TMP1, 5
1067
1159
  | sll TMP1, TMP1, 3
@@ -1073,7 +1165,7 @@ static void build_subroutines(BuildCtx *ctx)
1073
1165
  | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
1074
1166
  | lw NODE:TMP3, NODE:TMP2->next
1075
1167
  | bne CARG4, AT, >4
1076
- |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2)
1168
+ |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
1077
1169
  | beq TMP0, STR:RC, >5
1078
1170
  |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
1079
1171
  |4:
@@ -1082,36 +1174,35 @@ static void build_subroutines(BuildCtx *ctx)
1082
1174
  | b <3
1083
1175
  |. nop
1084
1176
  |5:
1085
- | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value.
1177
+ | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
1086
1178
  |. nop
1087
- | move CARG3, CARG2 // Return value of mt.__metatable.
1179
+ | move SFARG1HI, CARG3 // Return value of mt.__metatable.
1088
1180
  | b ->fff_restv
1089
- |. move CARG1, TMP1
1181
+ |. move SFARG1LO, TMP1
1090
1182
  |
1091
1183
  |6:
1092
- | beq CARG3, AT, <1
1093
- |. sltiu TMP0, CARG3, LJ_TISNUM
1094
- | li TMP1, LJ_TISNUM
1095
- | movz TMP1, CARG3, TMP0
1096
- | not TMP1, TMP1
1184
+ | beq SFARG1HI, AT, <1
1185
+ |. sltu AT, TISNUM, SFARG1HI
1186
+ | movz SFARG1HI, TISNUM, AT
1187
+ | not TMP1, SFARG1HI
1097
1188
  | sll TMP1, TMP1, 2
1098
1189
  | addu TMP1, DISPATCH, TMP1
1099
1190
  | b <2
1100
- |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1191
+ |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1101
1192
  |
1102
1193
  |.ffunc_2 setmetatable
1103
1194
  | // Fast path: no mt for table yet and not clearing the mt.
1104
1195
  | li AT, LJ_TTAB
1105
- | bne CARG3, AT, ->fff_fallback
1106
- |. addiu CARG4, CARG4, -LJ_TTAB
1107
- | lw TAB:TMP1, TAB:CARG1->metatable
1108
- | lbu TMP3, TAB:CARG1->marked
1109
- | or AT, CARG4, TAB:TMP1
1196
+ | bne SFARG1HI, AT, ->fff_fallback
1197
+ |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
1198
+ | lw TAB:TMP1, TAB:SFARG1LO->metatable
1199
+ | lbu TMP3, TAB:SFARG1LO->marked
1200
+ | or AT, SFARG2HI, TAB:TMP1
1110
1201
  | bnez AT, ->fff_fallback
1111
1202
  |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
1112
1203
  | beqz AT, ->fff_restv
1113
- |. sw TAB:CARG2, TAB:CARG1->metatable
1114
- | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv
1204
+ |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
1205
+ | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
1115
1206
  |
1116
1207
  |.ffunc rawget
1117
1208
  | lw CARG4, HI(BASE)
@@ -1125,44 +1216,44 @@ static void build_subroutines(BuildCtx *ctx)
1125
1216
  | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1126
1217
  |. move CARG1, L
1127
1218
  | // Returns cTValue *.
1128
- | b ->fff_resn
1129
- |. ldc1 FRET1, 0(CRET1)
1219
+ | lw SFARG1HI, HI(CRET1)
1220
+ | b ->fff_restv
1221
+ |. lw SFARG1LO, LO(CRET1)
1130
1222
  |
1131
1223
  |//-- Base library: conversions ------------------------------------------
1132
1224
  |
1133
1225
  |.ffunc tonumber
1134
1226
  | // Only handles the number case inline (without a base argument).
1135
1227
  | lw CARG1, HI(BASE)
1136
- | xori AT, NARGS8:RC, 8
1137
- | sltiu CARG1, CARG1, LJ_TISNUM
1138
- | movn CARG1, r0, AT
1139
- | beqz CARG1, ->fff_fallback // Exactly one number argument.
1140
- |. ldc1 FRET1, 0(BASE)
1141
- | b ->fff_resn
1142
- |. nop
1228
+ | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1229
+ | sltu TMP0, TISNUM, CARG1
1230
+ | or AT, AT, TMP0
1231
+ | bnez AT, ->fff_fallback
1232
+ |. lw SFARG1HI, HI(BASE)
1233
+ | b ->fff_restv
1234
+ |. lw SFARG1LO, LO(BASE)
1143
1235
  |
1144
1236
  |.ffunc_1 tostring
1145
1237
  | // Only handles the string or number case inline.
1146
1238
  | li AT, LJ_TSTR
1147
1239
  | // A __tostring method in the string base metatable is ignored.
1148
- | beq CARG3, AT, ->fff_restv // String key?
1240
+ | beq SFARG1HI, AT, ->fff_restv // String key?
1149
1241
  | // Handle numbers inline, unless a number base metatable is present.
1150
1242
  |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1151
- | sltiu TMP0, CARG3, LJ_TISNUM
1152
- | sltiu TMP1, TMP1, 1
1153
- | and TMP0, TMP0, TMP1
1154
- | beqz TMP0, ->fff_fallback
1243
+ | sltu TMP0, TISNUM, SFARG1HI
1244
+ | or TMP0, TMP0, TMP1
1245
+ | bnez TMP0, ->fff_fallback
1155
1246
  |. sw BASE, L->base // Add frame since C call can throw.
1156
1247
  | ffgccheck
1157
1248
  |. sw PC, SAVE_PC // Redundant (but a defined value).
1158
- | load_got lj_strfmt_num
1249
+ | load_got lj_strfmt_number
1159
1250
  | move CARG1, L
1160
- | call_intern lj_strfmt_num // (lua_State *L, lua_Number *np)
1251
+ | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1161
1252
  |. move CARG2, BASE
1162
1253
  | // Returns GCstr *.
1163
- | li CARG3, LJ_TSTR
1254
+ | li SFARG1HI, LJ_TSTR
1164
1255
  | b ->fff_restv
1165
- |. move CARG1, CRET1
1256
+ |. move SFARG1LO, CRET1
1166
1257
  |
1167
1258
  |//-- Base library: iterators -------------------------------------------
1168
1259
  |
@@ -1184,31 +1275,38 @@ static void build_subroutines(BuildCtx *ctx)
1184
1275
  |. move CARG1, L
1185
1276
  | // Returns 0 at end of traversal.
1186
1277
  | beqz CRET1, ->fff_restv // End of traversal: return nil.
1187
- |. li CARG3, LJ_TNIL
1188
- | ldc1 f0, 8(BASE) // Copy key and value to results.
1278
+ |. li SFARG1HI, LJ_TNIL
1279
+ | lw TMP0, 8+HI(BASE)
1280
+ | lw TMP1, 8+LO(BASE)
1189
1281
  | addiu RA, BASE, -8
1190
- | ldc1 f2, 16(BASE)
1191
- | li RD, (2+1)*8
1192
- | sdc1 f0, 0(RA)
1282
+ | lw TMP2, 16+HI(BASE)
1283
+ | lw TMP3, 16+LO(BASE)
1284
+ | sw TMP0, HI(RA)
1285
+ | sw TMP1, LO(RA)
1286
+ | sw TMP2, 8+HI(RA)
1287
+ | sw TMP3, 8+LO(RA)
1193
1288
  | b ->fff_res
1194
- |. sdc1 f2, 8(RA)
1289
+ |. li RD, (2+1)*8
1195
1290
  |
1196
1291
  |.ffunc_1 pairs
1197
1292
  | li AT, LJ_TTAB
1198
- | bne CARG3, AT, ->fff_fallback
1293
+ | bne SFARG1HI, AT, ->fff_fallback
1199
1294
  |. lw PC, FRAME_PC(BASE)
1200
1295
  #if LJ_52
1201
- | lw TAB:TMP2, TAB:CARG1->metatable
1202
- | ldc1 f0, CFUNC:RB->upvalue[0]
1296
+ | lw TAB:TMP2, TAB:SFARG1LO->metatable
1297
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1298
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1203
1299
  | bnez TAB:TMP2, ->fff_fallback
1204
1300
  #else
1205
- | ldc1 f0, CFUNC:RB->upvalue[0]
1301
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1302
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1206
1303
  #endif
1207
1304
  |. addiu RA, BASE, -8
1208
1305
  | sw TISNIL, 8+HI(BASE)
1209
- | li RD, (3+1)*8
1306
+ | sw TMP0, HI(RA)
1307
+ | sw TMP1, LO(RA)
1210
1308
  | b ->fff_res
1211
- |. sdc1 f0, 0(RA)
1309
+ |. li RD, (3+1)*8
1212
1310
  |
1213
1311
  |.ffunc ipairs_aux
1214
1312
  | sltiu AT, NARGS8:RC, 16
@@ -1216,35 +1314,32 @@ static void build_subroutines(BuildCtx *ctx)
1216
1314
  | lw TAB:CARG1, LO(BASE)
1217
1315
  | lw CARG4, 8+HI(BASE)
1218
1316
  | bnez AT, ->fff_fallback
1219
- |. ldc1 FARG2, 8(BASE)
1220
- | addiu CARG3, CARG3, -LJ_TTAB
1221
- | sltiu AT, CARG4, LJ_TISNUM
1222
- | li TMP0, 1
1223
- | movn AT, r0, CARG3
1224
- | mtc1 TMP0, FARG1
1225
- | beqz AT, ->fff_fallback
1317
+ |. addiu CARG3, CARG3, -LJ_TTAB
1318
+ | xor CARG4, CARG4, TISNUM
1319
+ | and AT, CARG3, CARG4
1320
+ | bnez AT, ->fff_fallback
1226
1321
  |. lw PC, FRAME_PC(BASE)
1227
- | trunc.w.d FRET1, FARG2
1228
- | cvt.d.w FARG1, FARG1
1322
+ | lw TMP2, 8+LO(BASE)
1229
1323
  | lw TMP0, TAB:CARG1->asize
1230
1324
  | lw TMP1, TAB:CARG1->array
1231
- | mfc1 TMP2, FRET1
1232
- | addiu RA, BASE, -8
1233
- | add.d FARG2, FARG2, FARG1
1234
1325
  | addiu TMP2, TMP2, 1
1326
+ | sw TISNUM, -8+HI(BASE)
1235
1327
  | sltu AT, TMP2, TMP0
1328
+ | sw TMP2, -8+LO(BASE)
1329
+ | beqz AT, >2 // Not in array part?
1330
+ |. addiu RA, BASE, -8
1236
1331
  | sll TMP3, TMP2, 3
1237
1332
  | addu TMP3, TMP1, TMP3
1238
- | beqz AT, >2 // Not in array part?
1239
- |. sdc1 FARG2, 0(RA)
1240
- | lw TMP2, HI(TMP3)
1241
- | ldc1 f0, 0(TMP3)
1333
+ | lw TMP1, HI(TMP3)
1334
+ | lw TMP2, LO(TMP3)
1242
1335
  |1:
1243
- | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results.
1336
+ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1244
1337
  |. li RD, (0+1)*8
1245
- | li RD, (2+1)*8
1338
+ | sw TMP1, 8+HI(RA)
1339
+ | sw TMP2, 8+LO(RA)
1246
1340
  | b ->fff_res
1247
- |. sdc1 f0, 8(RA)
1341
+ |. li RD, (2+1)*8
1342
+ |
1248
1343
  |2: // Check for empty hash part first. Otherwise call C function.
1249
1344
  | lw TMP0, TAB:CARG1->hmask
1250
1345
  | load_got lj_tab_getinth
@@ -1255,27 +1350,30 @@ static void build_subroutines(BuildCtx *ctx)
1255
1350
  | // Returns cTValue * or NULL.
1256
1351
  | beqz CRET1, ->fff_res
1257
1352
  |. li RD, (0+1)*8
1258
- | lw TMP2, HI(CRET1)
1353
+ | lw TMP1, HI(CRET1)
1259
1354
  | b <1
1260
- |. ldc1 f0, 0(CRET1)
1355
+ |. lw TMP2, LO(CRET1)
1261
1356
  |
1262
1357
  |.ffunc_1 ipairs
1263
1358
  | li AT, LJ_TTAB
1264
- | bne CARG3, AT, ->fff_fallback
1359
+ | bne SFARG1HI, AT, ->fff_fallback
1265
1360
  |. lw PC, FRAME_PC(BASE)
1266
1361
  #if LJ_52
1267
- | lw TAB:TMP2, TAB:CARG1->metatable
1268
- | ldc1 f0, CFUNC:RB->upvalue[0]
1362
+ | lw TAB:TMP2, TAB:SFARG1LO->metatable
1363
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1364
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1269
1365
  | bnez TAB:TMP2, ->fff_fallback
1270
1366
  #else
1271
- | ldc1 f0, CFUNC:RB->upvalue[0]
1367
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1368
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1272
1369
  #endif
1273
1370
  |. addiu RA, BASE, -8
1274
- | sw r0, 8+HI(BASE)
1371
+ | sw TISNUM, 8+HI(BASE)
1275
1372
  | sw r0, 8+LO(BASE)
1276
- | li RD, (3+1)*8
1373
+ | sw TMP0, HI(RA)
1374
+ | sw TMP1, LO(RA)
1277
1375
  | b ->fff_res
1278
- |. sdc1 f0, 0(RA)
1376
+ |. li RD, (3+1)*8
1279
1377
  |
1280
1378
  |//-- Base library: catch errors ----------------------------------------
1281
1379
  |
@@ -1295,8 +1393,9 @@ static void build_subroutines(BuildCtx *ctx)
1295
1393
  | sltiu AT, NARGS8:RC, 16
1296
1394
  | lw CARG4, 8+HI(BASE)
1297
1395
  | bnez AT, ->fff_fallback
1298
- |. ldc1 FARG2, 8(BASE)
1299
- | ldc1 FARG1, 0(BASE)
1396
+ |. lw CARG3, 8+LO(BASE)
1397
+ | lw CARG1, LO(BASE)
1398
+ | lw CARG2, HI(BASE)
1300
1399
  | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1301
1400
  | li AT, LJ_TFUNC
1302
1401
  | move TMP2, BASE
@@ -1304,9 +1403,11 @@ static void build_subroutines(BuildCtx *ctx)
1304
1403
  | addiu BASE, BASE, 16
1305
1404
  | // Remember active hook before pcall.
1306
1405
  | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1307
- | sdc1 FARG2, 0(TMP2) // Swap function and traceback.
1406
+ | sw CARG3, LO(TMP2) // Swap function and traceback.
1407
+ | sw CARG4, HI(TMP2)
1308
1408
  | andi TMP3, TMP3, 1
1309
- | sdc1 FARG1, 8(TMP2)
1409
+ | sw CARG1, 8+LO(TMP2)
1410
+ | sw CARG2, 8+HI(TMP2)
1310
1411
  | addiu PC, TMP3, 16+FRAME_PCALL
1311
1412
  | b ->vm_call_dispatch
1312
1413
  |. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1315,7 +1416,10 @@ static void build_subroutines(BuildCtx *ctx)
1315
1416
  |
1316
1417
  |.macro coroutine_resume_wrap, resume
1317
1418
  |.if resume
1318
- |.ffunc_1 coroutine_resume
1419
+ |.ffunc coroutine_resume
1420
+ | lw CARG3, HI(BASE)
1421
+ | beqz NARGS8:RC, ->fff_fallback
1422
+ |. lw CARG1, LO(BASE)
1319
1423
  | li AT, LJ_TTHREAD
1320
1424
  | bne CARG3, AT, ->fff_fallback
1321
1425
  |.else
@@ -1350,11 +1454,13 @@ static void build_subroutines(BuildCtx *ctx)
1350
1454
  | move CARG3, CARG2
1351
1455
  | sw BASE, L->top
1352
1456
  |2: // Move args to coroutine.
1353
- | ldc1 f0, 0(BASE)
1457
+ | lw SFRETHI, HI(BASE)
1458
+ | lw SFRETLO, LO(BASE)
1354
1459
  | sltu AT, BASE, TMP1
1355
1460
  | beqz AT, >3
1356
1461
  |. addiu BASE, BASE, 8
1357
- | sdc1 f0, 0(CARG3)
1462
+ | sw SFRETHI, HI(CARG3)
1463
+ | sw SFRETLO, LO(CARG3)
1358
1464
  | b <2
1359
1465
  |. addiu CARG3, CARG3, 8
1360
1466
  |3:
@@ -1380,10 +1486,12 @@ static void build_subroutines(BuildCtx *ctx)
1380
1486
  | sw TMP2, L:RA->top // Clear coroutine stack.
1381
1487
  | move TMP1, BASE
1382
1488
  |5: // Move results from coroutine.
1383
- | ldc1 f0, 0(TMP2)
1489
+ | lw SFRETHI, HI(TMP2)
1490
+ | lw SFRETLO, LO(TMP2)
1384
1491
  | addiu TMP2, TMP2, 8
1385
1492
  | sltu AT, TMP2, TMP3
1386
- | sdc1 f0, 0(TMP1)
1493
+ | sw SFRETHI, HI(TMP1)
1494
+ | sw SFRETLO, LO(TMP1)
1387
1495
  | bnez AT, <5
1388
1496
  |. addiu TMP1, TMP1, 8
1389
1497
  |6:
@@ -1408,12 +1516,14 @@ static void build_subroutines(BuildCtx *ctx)
1408
1516
  |.if resume
1409
1517
  | addiu TMP3, TMP3, -8
1410
1518
  | li TMP1, LJ_TFALSE
1411
- | ldc1 f0, 0(TMP3)
1519
+ | lw SFRETHI, HI(TMP3)
1520
+ | lw SFRETLO, LO(TMP3)
1412
1521
  | sw TMP3, L:RA->top // Remove error from coroutine stack.
1413
1522
  | li RD, (2+1)*8
1414
1523
  | sw TMP1, -8+HI(BASE) // Prepend false to results.
1415
1524
  | addiu RA, BASE, -8
1416
- | sdc1 f0, 0(BASE) // Copy error message.
1525
+ | sw SFRETHI, HI(BASE) // Copy error message.
1526
+ | sw SFRETLO, LO(BASE)
1417
1527
  | b <7
1418
1528
  |. andi TMP0, PC, FRAME_TYPE
1419
1529
  |.else
@@ -1449,20 +1559,29 @@ static void build_subroutines(BuildCtx *ctx)
1449
1559
  |
1450
1560
  |//-- Math library -------------------------------------------------------
1451
1561
  |
1452
- |.ffunc_n math_abs
1453
- |. abs.d FRET1, FARG1
1454
- |->fff_resn:
1455
- | lw PC, FRAME_PC(BASE)
1456
- | addiu RA, BASE, -8
1457
- | b ->fff_res1
1458
- |. sdc1 FRET1, -8(BASE)
1562
+ |.ffunc_1 math_abs
1563
+ | bne SFARG1HI, TISNUM, >1
1564
+ |. sra TMP0, SFARG1LO, 31
1565
+ | xor TMP1, SFARG1LO, TMP0
1566
+ | subu SFARG1LO, TMP1, TMP0
1567
+ | bgez SFARG1LO, ->fff_restv
1568
+ |. nop
1569
+ | lui SFARG1HI, 0x41e0 // 2^31 as a double.
1570
+ | b ->fff_restv
1571
+ |. li SFARG1LO, 0
1572
+ |1:
1573
+ | sltiu AT, SFARG1HI, LJ_TISNUM
1574
+ | beqz AT, ->fff_fallback
1575
+ |. sll SFARG1HI, SFARG1HI, 1
1576
+ | srl SFARG1HI, SFARG1HI, 1
1577
+ |// fallthrough
1459
1578
  |
1460
1579
  |->fff_restv:
1461
- | // CARG3/CARG1 = TValue result.
1580
+ | // SFARG1LO/SFARG1HI = TValue result.
1462
1581
  | lw PC, FRAME_PC(BASE)
1463
- | sw CARG3, -8+HI(BASE)
1582
+ | sw SFARG1HI, -8+HI(BASE)
1464
1583
  | addiu RA, BASE, -8
1465
- | sw CARG1, -8+LO(BASE)
1584
+ | sw SFARG1LO, -8+LO(BASE)
1466
1585
  |->fff_res1:
1467
1586
  | // RA = results, PC = return.
1468
1587
  | li RD, (1+1)*8
@@ -1491,15 +1610,19 @@ static void build_subroutines(BuildCtx *ctx)
1491
1610
  |. sw TISNIL, -8+HI(TMP1)
1492
1611
  |
1493
1612
  |.macro math_extern, func
1494
- |->ff_math_ .. func:
1495
- | lw CARG3, HI(BASE)
1613
+ | .ffunc math_ .. func
1614
+ | lw SFARG1HI, HI(BASE)
1496
1615
  | beqz NARGS8:RC, ->fff_fallback
1497
1616
  |. load_got func
1498
- | sltiu AT, CARG3, LJ_TISNUM
1617
+ | sltiu AT, SFARG1HI, LJ_TISNUM
1499
1618
  | beqz AT, ->fff_fallback
1500
- |. nop
1501
- | call_extern
1619
+ |.if FPU
1502
1620
  |. ldc1 FARG1, 0(BASE)
1621
+ |.else
1622
+ |. lw SFARG1LO, LO(BASE)
1623
+ |.endif
1624
+ | call_extern
1625
+ |. nop
1503
1626
  | b ->fff_resn
1504
1627
  |. nop
1505
1628
  |.endmacro
@@ -1513,10 +1636,22 @@ static void build_subroutines(BuildCtx *ctx)
1513
1636
  |. nop
1514
1637
  |.endmacro
1515
1638
  |
1639
+ |// TODO: Return integer type if result is integer (own sf implementation).
1516
1640
  |.macro math_round, func
1517
- | .ffunc_n math_ .. func
1518
- |. nop
1641
+ |->ff_math_ .. func:
1642
+ | lw SFARG1HI, HI(BASE)
1643
+ | beqz NARGS8:RC, ->fff_fallback
1644
+ |. lw SFARG1LO, LO(BASE)
1645
+ | beq SFARG1HI, TISNUM, ->fff_restv
1646
+ |. sltu AT, SFARG1HI, TISNUM
1647
+ | beqz AT, ->fff_fallback
1648
+ |.if FPU
1649
+ |. ldc1 FARG1, 0(BASE)
1519
1650
  | bal ->vm_ .. func
1651
+ |.else
1652
+ |. load_got func
1653
+ | call_extern
1654
+ |.endif
1520
1655
  |. nop
1521
1656
  | b ->fff_resn
1522
1657
  |. nop
@@ -1526,15 +1661,19 @@ static void build_subroutines(BuildCtx *ctx)
1526
1661
  | math_round ceil
1527
1662
  |
1528
1663
  |.ffunc math_log
1529
- | lw CARG3, HI(BASE)
1530
1664
  | li AT, 8
1531
1665
  | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1532
- |. load_got log
1533
- | sltiu AT, CARG3, LJ_TISNUM
1666
+ |. lw SFARG1HI, HI(BASE)
1667
+ | sltiu AT, SFARG1HI, LJ_TISNUM
1534
1668
  | beqz AT, ->fff_fallback
1535
- |. nop
1669
+ |. load_got log
1670
+ |.if FPU
1536
1671
  | call_extern
1537
1672
  |. ldc1 FARG1, 0(BASE)
1673
+ |.else
1674
+ | call_extern
1675
+ |. lw SFARG1LO, LO(BASE)
1676
+ |.endif
1538
1677
  | b ->fff_resn
1539
1678
  |. nop
1540
1679
  |
@@ -1553,17 +1692,43 @@ static void build_subroutines(BuildCtx *ctx)
1553
1692
  | math_extern2 atan2
1554
1693
  | math_extern2 fmod
1555
1694
  |
1695
+ |.if FPU
1556
1696
  |.ffunc_n math_sqrt
1557
1697
  |. sqrt.d FRET1, FARG1
1558
- | b ->fff_resn
1559
- |. nop
1698
+ |// fallthrough to ->fff_resn
1699
+ |.else
1700
+ | math_extern sqrt
1701
+ |.endif
1702
+ |
1703
+ |->fff_resn:
1704
+ | lw PC, FRAME_PC(BASE)
1705
+ | addiu RA, BASE, -8
1706
+ |.if FPU
1707
+ | b ->fff_res1
1708
+ |. sdc1 FRET1, -8(BASE)
1709
+ |.else
1710
+ | sw SFRETHI, -8+HI(BASE)
1711
+ | b ->fff_res1
1712
+ |. sw SFRETLO, -8+LO(BASE)
1713
+ |.endif
1714
+ |
1560
1715
  |
1561
- |.ffunc_nn math_ldexp
1562
- | trunc.w.d FARG2, FARG2
1716
+ |.ffunc math_ldexp
1717
+ | sltiu AT, NARGS8:RC, 16
1718
+ | lw SFARG1HI, HI(BASE)
1719
+ | bnez AT, ->fff_fallback
1720
+ |. lw CARG4, 8+HI(BASE)
1721
+ | bne CARG4, TISNUM, ->fff_fallback
1563
1722
  | load_got ldexp
1564
- | mfc1 CARG3, FARG2
1723
+ |. sltu AT, SFARG1HI, TISNUM
1724
+ | beqz AT, ->fff_fallback
1725
+ |.if FPU
1726
+ |. ldc1 FARG1, 0(BASE)
1727
+ |.else
1728
+ |. lw SFARG1LO, LO(BASE)
1729
+ |.endif
1565
1730
  | call_extern
1566
- |. nop
1731
+ |. lw CARG3, 8+LO(BASE)
1567
1732
  | b ->fff_resn
1568
1733
  |. nop
1569
1734
  |
@@ -1574,10 +1739,17 @@ static void build_subroutines(BuildCtx *ctx)
1574
1739
  |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
1575
1740
  | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1576
1741
  | addiu RA, BASE, -8
1742
+ |.if FPU
1577
1743
  | mtc1 TMP1, FARG2
1578
1744
  | sdc1 FRET1, 0(RA)
1579
1745
  | cvt.d.w FARG2, FARG2
1580
1746
  | sdc1 FARG2, 8(RA)
1747
+ |.else
1748
+ | sw SFRETLO, LO(RA)
1749
+ | sw SFRETHI, HI(RA)
1750
+ | sw TMP1, 8+LO(RA)
1751
+ | sw TISNUM, 8+HI(RA)
1752
+ |.endif
1581
1753
  | b ->fff_res
1582
1754
  |. li RD, (2+1)*8
1583
1755
  |
@@ -1587,39 +1759,98 @@ static void build_subroutines(BuildCtx *ctx)
1587
1759
  | call_extern
1588
1760
  |. addiu CARG3, BASE, -8
1589
1761
  | addiu RA, BASE, -8
1762
+ |.if FPU
1590
1763
  | sdc1 FRET1, 0(BASE)
1764
+ |.else
1765
+ | sw SFRETLO, LO(BASE)
1766
+ | sw SFRETHI, HI(BASE)
1767
+ |.endif
1591
1768
  | b ->fff_res
1592
1769
  |. li RD, (2+1)*8
1593
1770
  |
1594
- |.macro math_minmax, name, ismax
1595
- |->ff_ .. name:
1596
- | lw CARG3, HI(BASE)
1597
- | beqz NARGS8:RC, ->fff_fallback
1598
- |. ldc1 FRET1, 0(BASE)
1599
- | sltiu AT, CARG3, LJ_TISNUM
1771
+ |.macro math_minmax, name, intins, fpins
1772
+ | .ffunc_1 name
1773
+ | addu TMP3, BASE, NARGS8:RC
1774
+ | bne SFARG1HI, TISNUM, >5
1775
+ |. addiu TMP2, BASE, 8
1776
+ |1: // Handle integers.
1777
+ |. lw SFARG2HI, HI(TMP2)
1778
+ | beq TMP2, TMP3, ->fff_restv
1779
+ |. lw SFARG2LO, LO(TMP2)
1780
+ | bne SFARG2HI, TISNUM, >3
1781
+ |. slt AT, SFARG1LO, SFARG2LO
1782
+ | intins SFARG1LO, SFARG2LO, AT
1783
+ | b <1
1784
+ |. addiu TMP2, TMP2, 8
1785
+ |
1786
+ |3: // Convert intermediate result to number and continue with number loop.
1787
+ | sltiu AT, SFARG2HI, LJ_TISNUM
1600
1788
  | beqz AT, ->fff_fallback
1601
- |. addu TMP2, BASE, NARGS8:RC
1602
- | addiu TMP1, BASE, 8
1603
- | beq TMP1, TMP2, ->fff_resn
1604
- |1:
1605
- |. lw CARG3, HI(TMP1)
1606
- | ldc1 FARG1, 0(TMP1)
1607
- | addiu TMP1, TMP1, 8
1608
- | sltiu AT, CARG3, LJ_TISNUM
1789
+ |.if FPU
1790
+ |. mtc1 SFARG1LO, FRET1
1791
+ | cvt.d.w FRET1, FRET1
1792
+ | b >7
1793
+ |. ldc1 FARG1, 0(TMP2)
1794
+ |.else
1795
+ |. nop
1796
+ | bal ->vm_sfi2d_1
1797
+ |. nop
1798
+ | b >7
1799
+ |. nop
1800
+ |.endif
1801
+ |
1802
+ |5:
1803
+ |. sltiu AT, SFARG1HI, LJ_TISNUM
1609
1804
  | beqz AT, ->fff_fallback
1610
- |.if ismax
1611
- |. c.olt.d FARG1, FRET1
1805
+ |.if FPU
1806
+ |. ldc1 FRET1, 0(BASE)
1807
+ |.endif
1808
+ |
1809
+ |6: // Handle numbers.
1810
+ |. lw SFARG2HI, HI(TMP2)
1811
+ |.if FPU
1812
+ | beq TMP2, TMP3, ->fff_resn
1612
1813
  |.else
1613
- |. c.olt.d FRET1, FARG1
1814
+ | beq TMP2, TMP3, ->fff_restv
1614
1815
  |.endif
1615
- | bne TMP1, TMP2, <1
1616
- |. movf.d FRET1, FARG1
1617
- | b ->fff_resn
1816
+ |. sltiu AT, SFARG2HI, LJ_TISNUM
1817
+ | beqz AT, >8
1818
+ |.if FPU
1819
+ |. ldc1 FARG1, 0(TMP2)
1820
+ |.else
1821
+ |. lw SFARG2LO, LO(TMP2)
1822
+ |.endif
1823
+ |7:
1824
+ |.if FPU
1825
+ | c.olt.d FRET1, FARG1
1826
+ | fpins FRET1, FARG1
1827
+ |.else
1828
+ | bal ->vm_sfcmpolt
1829
+ |. nop
1830
+ | intins SFARG1LO, SFARG2LO, CRET1
1831
+ | intins SFARG1HI, SFARG2HI, CRET1
1832
+ |.endif
1833
+ | b <6
1834
+ |. addiu TMP2, TMP2, 8
1835
+ |
1836
+ |8: // Convert integer to number and continue with number loop.
1837
+ | bne SFARG2HI, TISNUM, ->fff_fallback
1838
+ |.if FPU
1839
+ |. lwc1 FARG1, LO(TMP2)
1840
+ | b <7
1841
+ |. cvt.d.w FARG1, FARG1
1842
+ |.else
1843
+ |. nop
1844
+ | bal ->vm_sfi2d_2
1618
1845
  |. nop
1846
+ | b <7
1847
+ |. nop
1848
+ |.endif
1849
+ |
1619
1850
  |.endmacro
1620
1851
  |
1621
- | math_minmax math_min, 0
1622
- | math_minmax math_max, 1
1852
+ | math_minmax math_min, movz, movf.d
1853
+ | math_minmax math_max, movn, movt.d
1623
1854
  |
1624
1855
  |//-- String library -----------------------------------------------------
1625
1856
  |
@@ -1632,32 +1863,30 @@ static void build_subroutines(BuildCtx *ctx)
1632
1863
  | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1633
1864
  |. nop
1634
1865
  | lw TMP0, STR:CARG1->len
1635
- | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1636
1866
  | addiu RA, BASE, -8
1867
+ | lw PC, FRAME_PC(BASE)
1637
1868
  | sltu RD, r0, TMP0
1638
- | mtc1 TMP1, f0
1869
+ | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1639
1870
  | addiu RD, RD, 1
1640
- | cvt.d.w f0, f0
1641
- | lw PC, FRAME_PC(BASE)
1642
1871
  | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1872
+ | sw TISNUM, HI(RA)
1643
1873
  | b ->fff_res
1644
- |. sdc1 f0, 0(RA)
1874
+ |. sw TMP1, LO(RA)
1645
1875
  |
1646
1876
  |.ffunc string_char // Only handle the 1-arg case here.
1647
1877
  | ffgccheck
1648
- | lw CARG3, HI(BASE)
1649
- | ldc1 FARG1, 0(BASE)
1650
- | li AT, 8
1651
- | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1652
- |. sltiu AT, CARG3, LJ_TISNUM
1653
- | beqz AT, ->fff_fallback
1878
+ |. lw CARG3, HI(BASE)
1879
+ | lw CARG1, LO(BASE)
1880
+ | li TMP1, 255
1881
+ | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1882
+ | xor TMP0, CARG3, TISNUM // Integer.
1883
+ | sltu TMP1, TMP1, CARG1 // !(255 < n).
1884
+ | or AT, AT, TMP0
1885
+ | or AT, AT, TMP1
1886
+ | bnez AT, ->fff_fallback
1654
1887
  |. li CARG3, 1
1655
- | trunc.w.d FARG1, FARG1
1656
1888
  | addiu CARG2, sp, ARG5_OFS
1657
- | sltiu AT, TMP0, 256
1658
- | mfc1 TMP0, FARG1
1659
- | beqz AT, ->fff_fallback
1660
- |. sw TMP0, ARG5
1889
+ | sb CARG1, ARG5
1661
1890
  |->fff_newstr:
1662
1891
  | load_got lj_str_new
1663
1892
  | sw BASE, L->base
@@ -1667,34 +1896,28 @@ static void build_subroutines(BuildCtx *ctx)
1667
1896
  | // Returns GCstr *.
1668
1897
  | lw BASE, L->base
1669
1898
  |->fff_resstr:
1670
- | move CARG1, CRET1
1899
+ | move SFARG1LO, CRET1
1671
1900
  | b ->fff_restv
1672
- |. li CARG3, LJ_TSTR
1901
+ |. li SFARG1HI, LJ_TSTR
1673
1902
  |
1674
1903
  |.ffunc string_sub
1675
1904
  | ffgccheck
1676
- | addiu AT, NARGS8:RC, -16
1905
+ |. addiu AT, NARGS8:RC, -16
1677
1906
  | lw CARG3, 16+HI(BASE)
1678
- | ldc1 f0, 16(BASE)
1679
1907
  | lw TMP0, HI(BASE)
1680
1908
  | lw STR:CARG1, LO(BASE)
1681
1909
  | bltz AT, ->fff_fallback
1682
- | lw CARG2, 8+HI(BASE)
1683
- | ldc1 f2, 8(BASE)
1910
+ |. lw CARG2, 8+HI(BASE)
1684
1911
  | beqz AT, >1
1685
1912
  |. li CARG4, -1
1686
- | trunc.w.d f0, f0
1687
- | sltiu AT, CARG3, LJ_TISNUM
1688
- | beqz AT, ->fff_fallback
1689
- |. mfc1 CARG4, f0
1913
+ | bne CARG3, TISNUM, ->fff_fallback
1914
+ |. lw CARG4, 16+LO(BASE)
1690
1915
  |1:
1691
- | sltiu AT, CARG2, LJ_TISNUM
1692
- | beqz AT, ->fff_fallback
1916
+ | bne CARG2, TISNUM, ->fff_fallback
1693
1917
  |. li AT, LJ_TSTR
1694
- | trunc.w.d f2, f2
1695
1918
  | bne TMP0, AT, ->fff_fallback
1696
- |. lw CARG2, STR:CARG1->len
1697
- | mfc1 CARG3, f2
1919
+ |. lw CARG3, 8+LO(BASE)
1920
+ | lw CARG2, STR:CARG1->len
1698
1921
  | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1699
1922
  | slt AT, CARG4, r0
1700
1923
  | addiu TMP0, CARG2, 1
@@ -1716,14 +1939,14 @@ static void build_subroutines(BuildCtx *ctx)
1716
1939
  | bgez CARG3, ->fff_newstr
1717
1940
  |. addiu CARG3, CARG3, 1 // len++
1718
1941
  |->fff_emptystr: // Return empty string.
1719
- | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
1942
+ | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
1720
1943
  | b ->fff_restv
1721
- |. li CARG3, LJ_TSTR
1944
+ |. li SFARG1HI, LJ_TSTR
1722
1945
  |
1723
1946
  |.macro ffstring_op, name
1724
1947
  | .ffunc string_ .. name
1725
1948
  | ffgccheck
1726
- | lw CARG3, HI(BASE)
1949
+ |. lw CARG3, HI(BASE)
1727
1950
  | lw STR:CARG2, LO(BASE)
1728
1951
  | beqz NARGS8:RC, ->fff_fallback
1729
1952
  |. li AT, LJ_TSTR
@@ -1749,27 +1972,96 @@ static void build_subroutines(BuildCtx *ctx)
1749
1972
  |
1750
1973
  |//-- Bit library --------------------------------------------------------
1751
1974
  |
1975
+ |->vm_tobit_fb:
1976
+ | beqz TMP1, ->fff_fallback
1977
+ |.if FPU
1978
+ |. ldc1 FARG1, 0(BASE)
1979
+ | add.d FARG1, FARG1, TOBIT
1980
+ | jr ra
1981
+ |. mfc1 CRET1, FARG1
1982
+ |.else
1983
+ |// FP number to bit conversion for soft-float.
1984
+ |->vm_tobit:
1985
+ | sll TMP0, SFARG1HI, 1
1986
+ | lui AT, 0x0020
1987
+ | addu TMP0, TMP0, AT
1988
+ | slt AT, TMP0, r0
1989
+ | movz SFARG1LO, r0, AT
1990
+ | beqz AT, >2
1991
+ |. li TMP1, 0x3e0
1992
+ | not TMP1, TMP1
1993
+ | sra TMP0, TMP0, 21
1994
+ | subu TMP0, TMP1, TMP0
1995
+ | slt AT, TMP0, r0
1996
+ | bnez AT, >1
1997
+ |. sll TMP1, SFARG1HI, 11
1998
+ | lui AT, 0x8000
1999
+ | or TMP1, TMP1, AT
2000
+ | srl AT, SFARG1LO, 21
2001
+ | or TMP1, TMP1, AT
2002
+ | slt AT, SFARG1HI, r0
2003
+ | beqz AT, >2
2004
+ |. srlv SFARG1LO, TMP1, TMP0
2005
+ | subu SFARG1LO, r0, SFARG1LO
2006
+ |2:
2007
+ | jr ra
2008
+ |. move CRET1, SFARG1LO
2009
+ |1:
2010
+ | addiu TMP0, TMP0, 21
2011
+ | srlv TMP1, SFARG1LO, TMP0
2012
+ | li AT, 20
2013
+ | subu TMP0, AT, TMP0
2014
+ | sll SFARG1LO, SFARG1HI, 12
2015
+ | sllv AT, SFARG1LO, TMP0
2016
+ | or SFARG1LO, TMP1, AT
2017
+ | slt AT, SFARG1HI, r0
2018
+ | beqz AT, <2
2019
+ |. nop
2020
+ | jr ra
2021
+ |. subu CRET1, r0, SFARG1LO
2022
+ |.endif
2023
+ |
1752
2024
  |.macro .ffunc_bit, name
1753
- | .ffunc_n bit_..name
1754
- |. add.d FARG1, FARG1, TOBIT
1755
- | mfc1 CRET1, FARG1
2025
+ | .ffunc_1 bit_..name
2026
+ | beq SFARG1HI, TISNUM, >6
2027
+ |. move CRET1, SFARG1LO
2028
+ | bal ->vm_tobit_fb
2029
+ |. sltu TMP1, SFARG1HI, TISNUM
2030
+ |6:
1756
2031
  |.endmacro
1757
2032
  |
1758
2033
  |.macro .ffunc_bit_op, name, ins
1759
2034
  | .ffunc_bit name
1760
- | addiu TMP1, BASE, 8
1761
- | addu TMP2, BASE, NARGS8:RC
2035
+ | addiu TMP2, BASE, 8
2036
+ | addu TMP3, BASE, NARGS8:RC
1762
2037
  |1:
1763
- | lw CARG4, HI(TMP1)
1764
- | beq TMP1, TMP2, ->fff_resi
1765
- |. ldc1 FARG1, 0(TMP1)
1766
- | sltiu AT, CARG4, LJ_TISNUM
1767
- | beqz AT, ->fff_fallback
1768
- | add.d FARG1, FARG1, TOBIT
1769
- | mfc1 CARG2, FARG1
1770
- | ins CRET1, CRET1, CARG2
2038
+ | lw SFARG1HI, HI(TMP2)
2039
+ | beq TMP2, TMP3, ->fff_resi
2040
+ |. lw SFARG1LO, LO(TMP2)
2041
+ |.if FPU
2042
+ | bne SFARG1HI, TISNUM, >2
2043
+ |. addiu TMP2, TMP2, 8
1771
2044
  | b <1
1772
- |. addiu TMP1, TMP1, 8
2045
+ |. ins CRET1, CRET1, SFARG1LO
2046
+ |2:
2047
+ | ldc1 FARG1, -8(TMP2)
2048
+ | sltu TMP1, SFARG1HI, TISNUM
2049
+ | beqz TMP1, ->fff_fallback
2050
+ |. add.d FARG1, FARG1, TOBIT
2051
+ | mfc1 SFARG1LO, FARG1
2052
+ | b <1
2053
+ |. ins CRET1, CRET1, SFARG1LO
2054
+ |.else
2055
+ | beq SFARG1HI, TISNUM, >2
2056
+ |. move CRET2, CRET1
2057
+ | bal ->vm_tobit_fb
2058
+ |. sltu TMP1, SFARG1HI, TISNUM
2059
+ | move SFARG1LO, CRET2
2060
+ |2:
2061
+ | ins CRET1, CRET1, SFARG1LO
2062
+ | b <1
2063
+ |. addiu TMP2, TMP2, 8
2064
+ |.endif
1773
2065
  |.endmacro
1774
2066
  |
1775
2067
  |.ffunc_bit_op band, and
@@ -1793,24 +2085,28 @@ static void build_subroutines(BuildCtx *ctx)
1793
2085
  |. not CRET1, CRET1
1794
2086
  |
1795
2087
  |.macro .ffunc_bit_sh, name, ins, shmod
1796
- | .ffunc_nn bit_..name
1797
- |. add.d FARG1, FARG1, TOBIT
1798
- | add.d FARG2, FARG2, TOBIT
1799
- | mfc1 CARG1, FARG1
1800
- | mfc1 CARG2, FARG2
2088
+ | .ffunc_2 bit_..name
2089
+ | beq SFARG1HI, TISNUM, >1
2090
+ |. nop
2091
+ | bal ->vm_tobit_fb
2092
+ |. sltu TMP1, SFARG1HI, TISNUM
2093
+ | move SFARG1LO, CRET1
2094
+ |1:
2095
+ | bne SFARG2HI, TISNUM, ->fff_fallback
2096
+ |. nop
1801
2097
  |.if shmod == 1
1802
2098
  | li AT, 32
1803
- | subu TMP0, AT, CARG2
1804
- | sllv CARG2, CARG1, CARG2
1805
- | srlv CARG1, CARG1, TMP0
2099
+ | subu TMP0, AT, SFARG2LO
2100
+ | sllv SFARG2LO, SFARG1LO, SFARG2LO
2101
+ | srlv SFARG1LO, SFARG1LO, TMP0
1806
2102
  |.elif shmod == 2
1807
2103
  | li AT, 32
1808
- | subu TMP0, AT, CARG2
1809
- | srlv CARG2, CARG1, CARG2
1810
- | sllv CARG1, CARG1, TMP0
2104
+ | subu TMP0, AT, SFARG2LO
2105
+ | srlv SFARG2LO, SFARG1LO, SFARG2LO
2106
+ | sllv SFARG1LO, SFARG1LO, TMP0
1811
2107
  |.endif
1812
2108
  | b ->fff_resi
1813
- |. ins CRET1, CARG1, CARG2
2109
+ |. ins CRET1, SFARG1LO, SFARG2LO
1814
2110
  |.endmacro
1815
2111
  |
1816
2112
  |.ffunc_bit_sh lshift, sllv, 0
@@ -1822,9 +2118,11 @@ static void build_subroutines(BuildCtx *ctx)
1822
2118
  |
1823
2119
  |.ffunc_bit tobit
1824
2120
  |->fff_resi:
1825
- | mtc1 CRET1, FRET1
1826
- | b ->fff_resn
1827
- |. cvt.d.w FRET1, FRET1
2121
+ | lw PC, FRAME_PC(BASE)
2122
+ | addiu RA, BASE, -8
2123
+ | sw TISNUM, -8+HI(BASE)
2124
+ | b ->fff_res1
2125
+ |. sw CRET1, -8+LO(BASE)
1828
2126
  |
1829
2127
  |//-----------------------------------------------------------------------
1830
2128
  |
@@ -2015,17 +2313,19 @@ static void build_subroutines(BuildCtx *ctx)
2015
2313
  |.if JIT
2016
2314
  | // RA = resultptr, RB = meta base
2017
2315
  | lw INS, -4(PC)
2018
- | lw TMP3, -24+LO(RB) // Save previous trace number.
2316
+ | lw TMP2, -24+LO(RB) // Save previous trace.
2019
2317
  | decode_RA8a RC, INS
2020
2318
  | addiu AT, MULTRES, -8
2021
2319
  | decode_RA8b RC
2022
2320
  | beqz AT, >2
2023
2321
  |. addu RC, BASE, RC // Call base.
2024
2322
  |1: // Move results down.
2025
- | ldc1 f0, 0(RA)
2323
+ | lw SFRETHI, HI(RA)
2324
+ | lw SFRETLO, LO(RA)
2026
2325
  | addiu AT, AT, -8
2027
2326
  | addiu RA, RA, 8
2028
- | sdc1 f0, 0(RC)
2327
+ | sw SFRETHI, HI(RC)
2328
+ | sw SFRETLO, LO(RC)
2029
2329
  | bnez AT, <1
2030
2330
  |. addiu RC, RC, 8
2031
2331
  |2:
@@ -2034,17 +2334,13 @@ static void build_subroutines(BuildCtx *ctx)
2034
2334
  | decode_RA8b RA
2035
2335
  | decode_RB8b RB
2036
2336
  | addu RA, RA, RB
2037
- | lw TMP1, DISPATCH_J(trace)(DISPATCH)
2038
2337
  | addu RA, BASE, RA
2039
2338
  |3:
2040
2339
  | sltu AT, RC, RA
2041
2340
  | bnez AT, >9 // More results wanted?
2042
- |. sll TMP2, TMP3, 2
2341
+ |. nop
2043
2342
  |
2044
- | addu TMP2, TMP1, TMP2
2045
- | lw TRACE:TMP2, 0(TMP2)
2046
- | beqz TRACE:TMP2, ->cont_nop
2047
- |. nop
2343
+ | lhu TMP3, TRACE:TMP2->traceno
2048
2344
  | lhu RD, TRACE:TMP2->link
2049
2345
  | beq RD, TMP3, ->cont_nop // Blacklisted.
2050
2346
  |. load_got lj_dispatch_stitch
@@ -2086,14 +2382,23 @@ static void build_subroutines(BuildCtx *ctx)
2086
2382
  |//-----------------------------------------------------------------------
2087
2383
  |
2088
2384
  |.macro savex_, a, b
2385
+ |.if FPU
2089
2386
  | sdc1 f..a, 16+a*8(sp)
2090
2387
  | sw r..a, 16+32*8+a*4(sp)
2091
2388
  | sw r..b, 16+32*8+b*4(sp)
2389
+ |.else
2390
+ | sw r..a, 16+a*4(sp)
2391
+ | sw r..b, 16+b*4(sp)
2392
+ |.endif
2092
2393
  |.endmacro
2093
2394
  |
2094
2395
  |->vm_exit_handler:
2095
2396
  |.if JIT
2397
+ |.if FPU
2096
2398
  | addiu sp, sp, -(16+32*8+32*4)
2399
+ |.else
2400
+ | addiu sp, sp, -(16+32*4)
2401
+ |.endif
2097
2402
  | savex_ 0, 1
2098
2403
  | savex_ 2, 3
2099
2404
  | savex_ 4, 5
@@ -2108,17 +2413,25 @@ static void build_subroutines(BuildCtx *ctx)
2108
2413
  | savex_ 22, 23
2109
2414
  | savex_ 24, 25
2110
2415
  | savex_ 26, 27
2416
+ |.if FPU
2111
2417
  | sdc1 f28, 16+28*8(sp)
2112
- | sw r28, 16+32*8+28*4(sp)
2113
2418
  | sdc1 f30, 16+30*8(sp)
2419
+ | sw r28, 16+32*8+28*4(sp)
2114
2420
  | sw r30, 16+32*8+30*4(sp)
2115
2421
  | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
2422
+ | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2423
+ | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
2424
+ |.else
2425
+ | sw r28, 16+28*4(sp)
2426
+ | sw r30, 16+30*4(sp)
2427
+ | sw r0, 16+31*4(sp) // Clear RID_TMP.
2428
+ | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
2429
+ | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
2430
+ |.endif
2116
2431
  | li_vmstate EXIT
2117
- | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2118
2432
  | addiu DISPATCH, JGL, -GG_DISP2G-32768
2119
2433
  | lw TMP1, 0(TMP2) // Load exit number.
2120
2434
  | st_vmstate
2121
- | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP.
2122
2435
  | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2123
2436
  | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2124
2437
  | load_got lj_trace_exit
@@ -2148,15 +2461,16 @@ static void build_subroutines(BuildCtx *ctx)
2148
2461
  |1:
2149
2462
  | bltz CRET1, >9 // Check for error from exit.
2150
2463
  |. lw LFUNC:RB, FRAME_FUNC(BASE)
2151
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2464
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2152
2465
  | sll MULTRES, CRET1, 3
2153
2466
  | li TISNIL, LJ_TNIL
2467
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2154
2468
  | sw MULTRES, SAVE_MULTRES
2155
- | mtc1 TMP3, TOBIT
2469
+ | .FPU mtc1 TMP3, TOBIT
2156
2470
  | lw TMP1, LFUNC:RB->pc
2157
2471
  | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2158
2472
  | lw KBASE, PC2PROTO(k)(TMP1)
2159
- | cvt.d.s TOBIT, TOBIT
2473
+ | .FPU cvt.d.s TOBIT, TOBIT
2160
2474
  | // Modified copy of ins_next which handles function header dispatch, too.
2161
2475
  | lw INS, 0(PC)
2162
2476
  | addiu PC, PC, 4
@@ -2164,7 +2478,7 @@ static void build_subroutines(BuildCtx *ctx)
2164
2478
  | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2165
2479
  | decode_OP4a TMP1, INS
2166
2480
  | decode_OP4b TMP1
2167
- | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header?
2481
+ | sltiu TMP2, TMP1, BC_FUNCF*4
2168
2482
  | addu TMP0, DISPATCH, TMP1
2169
2483
  | decode_RD8a RD, INS
2170
2484
  | lw AT, 0(TMP0)
@@ -2205,8 +2519,9 @@ static void build_subroutines(BuildCtx *ctx)
2205
2519
  |//-- Math helper functions ----------------------------------------------
2206
2520
  |//-----------------------------------------------------------------------
2207
2521
  |
2522
+ |// Hard-float round to integer.
2208
2523
  |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2209
- |.macro vm_round, func
2524
+ |.macro vm_round_hf, func
2210
2525
  | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2211
2526
  | mtc1 r0, f4
2212
2527
  | mtc1 TMP0, f5
@@ -2248,6 +2563,12 @@ static void build_subroutines(BuildCtx *ctx)
2248
2563
  |. mov.d FRET1, FARG1
2249
2564
  |.endmacro
2250
2565
  |
2566
+ |.macro vm_round, func
2567
+ |.if FPU
2568
+ | vm_round_hf, func
2569
+ |.endif
2570
+ |.endmacro
2571
+ |
2251
2572
  |->vm_floor:
2252
2573
  | vm_round floor
2253
2574
  |->vm_ceil:
@@ -2257,29 +2578,201 @@ static void build_subroutines(BuildCtx *ctx)
2257
2578
  | vm_round trunc
2258
2579
  |.endif
2259
2580
  |
2260
- |//-----------------------------------------------------------------------
2261
- |//-- Miscellaneous functions --------------------------------------------
2262
- |//-----------------------------------------------------------------------
2581
+ |// Soft-float integer to number conversion.
2582
+ |.macro sfi2d, AHI, ALO
2583
+ |.if not FPU
2584
+ | beqz ALO, >9 // Handle zero first.
2585
+ |. sra TMP0, ALO, 31
2586
+ | xor TMP1, ALO, TMP0
2587
+ | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2588
+ | clz AHI, TMP1
2589
+ | andi TMP0, TMP0, 0x800 // Mask sign bit.
2590
+ | li AT, 0x3ff+31-1
2591
+ | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
2592
+ | subu AHI, AT, AHI // Exponent - 1 in AHI.
2593
+ | sll ALO, TMP1, 21
2594
+ | or AHI, AHI, TMP0 // Sign | Exponent.
2595
+ | srl TMP1, TMP1, 11
2596
+ | sll AHI, AHI, 20 // Align left.
2597
+ | jr ra
2598
+ |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
2599
+ |9:
2600
+ | jr ra
2601
+ |. li AHI, 0
2602
+ |.endif
2603
+ |.endmacro
2263
2604
  |
2264
- |//-----------------------------------------------------------------------
2265
- |//-- FFI helper functions -----------------------------------------------
2266
- |//-----------------------------------------------------------------------
2605
+ |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
2606
+ |->vm_sfi2d_1:
2607
+ | sfi2d SFARG1HI, SFARG1LO
2608
+ |
2609
+ |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
2610
+ |->vm_sfi2d_2:
2611
+ | sfi2d SFARG2HI, SFARG2LO
2612
+ |
2613
+ |// Soft-float comparison. Equivalent to c.eq.d.
2614
+ |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2615
+ |->vm_sfcmpeq:
2616
+ |.if not FPU
2617
+ | sll AT, SFARG1HI, 1
2618
+ | sll TMP0, SFARG2HI, 1
2619
+ | or CRET1, SFARG1LO, SFARG2LO
2620
+ | or TMP1, AT, TMP0
2621
+ | or TMP1, TMP1, CRET1
2622
+ | beqz TMP1, >8 // Both args +-0: return 1.
2623
+ |. sltu CRET1, r0, SFARG1LO
2624
+ | lui TMP1, 0xffe0
2625
+ | addu AT, AT, CRET1
2626
+ | sltu CRET1, r0, SFARG2LO
2627
+ | sltu AT, TMP1, AT
2628
+ | addu TMP0, TMP0, CRET1
2629
+ | sltu TMP0, TMP1, TMP0
2630
+ | or TMP1, AT, TMP0
2631
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
2632
+ |. xor TMP0, SFARG1HI, SFARG2HI
2633
+ | xor TMP1, SFARG1LO, SFARG2LO
2634
+ | or AT, TMP0, TMP1
2635
+ | jr ra
2636
+ |. sltiu CRET1, AT, 1 // Same values: return 1.
2637
+ |8:
2638
+ | jr ra
2639
+ |. li CRET1, 1
2640
+ |9:
2641
+ | jr ra
2642
+ |. li CRET1, 0
2643
+ |.endif
2267
2644
  |
2268
- |// Handler for callback functions. Callback slot number in r1, g in r2.
2269
- |->vm_ffi_callback:
2270
- |.if FFI
2271
- |.type CTSTATE, CTState, PC
2272
- | saveregs
2273
- | lw CTSTATE, GL:r2->ctype_state
2274
- | addiu DISPATCH, r2, GG_G2DISP
2645
+ |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2646
+ |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2647
+ |->vm_sfcmpult:
2648
+ |.if not FPU
2649
+ | b >1
2650
+ |. li CRET2, 1
2651
+ |.endif
2652
+ |
2653
+ |->vm_sfcmpolt:
2654
+ |.if not FPU
2655
+ | li CRET2, 0
2656
+ |1:
2657
+ | sll AT, SFARG1HI, 1
2658
+ | sll TMP0, SFARG2HI, 1
2659
+ | or CRET1, SFARG1LO, SFARG2LO
2660
+ | or TMP1, AT, TMP0
2661
+ | or TMP1, TMP1, CRET1
2662
+ | beqz TMP1, >8 // Both args +-0: return 0.
2663
+ |. sltu CRET1, r0, SFARG1LO
2664
+ | lui TMP1, 0xffe0
2665
+ | addu AT, AT, CRET1
2666
+ | sltu CRET1, r0, SFARG2LO
2667
+ | sltu AT, TMP1, AT
2668
+ | addu TMP0, TMP0, CRET1
2669
+ | sltu TMP0, TMP1, TMP0
2670
+ | or TMP1, AT, TMP0
2671
+ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2672
+ |. and AT, SFARG1HI, SFARG2HI
2673
+ | bltz AT, >5 // Both args negative?
2674
+ |. nop
2675
+ | beq SFARG1HI, SFARG2HI, >8
2676
+ |. sltu CRET1, SFARG1LO, SFARG2LO
2677
+ | jr ra
2678
+ |. slt CRET1, SFARG1HI, SFARG2HI
2679
+ |5: // Swap conditions if both operands are negative.
2680
+ | beq SFARG1HI, SFARG2HI, >8
2681
+ |. sltu CRET1, SFARG2LO, SFARG1LO
2682
+ | jr ra
2683
+ |. slt CRET1, SFARG2HI, SFARG1HI
2684
+ |8:
2685
+ | jr ra
2686
+ |. nop
2687
+ |9:
2688
+ | jr ra
2689
+ |. move CRET1, CRET2
2690
+ |.endif
2691
+ |
2692
+ |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2693
+ |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2694
+ |->vm_sfcmpolex:
2695
+ |.if not FPU
2696
+ | sll AT, SFARG1HI, 1
2697
+ | sll TMP0, SFARG2HI, 1
2698
+ | or CRET1, SFARG1LO, SFARG2LO
2699
+ | or TMP1, AT, TMP0
2700
+ | or TMP1, TMP1, CRET1
2701
+ | beqz TMP1, >8 // Both args +-0: return 1.
2702
+ |. sltu CRET1, r0, SFARG1LO
2703
+ | lui TMP1, 0xffe0
2704
+ | addu AT, AT, CRET1
2705
+ | sltu CRET1, r0, SFARG2LO
2706
+ | sltu AT, TMP1, AT
2707
+ | addu TMP0, TMP0, CRET1
2708
+ | sltu TMP0, TMP1, TMP0
2709
+ | or TMP1, AT, TMP0
2710
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
2711
+ |. and AT, SFARG1HI, SFARG2HI
2712
+ | xor AT, AT, TMP3
2713
+ | bltz AT, >5 // Both args negative?
2714
+ |. nop
2715
+ | beq SFARG1HI, SFARG2HI, >6
2716
+ |. sltu CRET1, SFARG2LO, SFARG1LO
2717
+ | jr ra
2718
+ |. slt CRET1, SFARG2HI, SFARG1HI
2719
+ |5: // Swap conditions if both operands are negative.
2720
+ | beq SFARG1HI, SFARG2HI, >6
2721
+ |. sltu CRET1, SFARG1LO, SFARG2LO
2722
+ | slt CRET1, SFARG1HI, SFARG2HI
2723
+ |6:
2724
+ | jr ra
2725
+ |. nop
2726
+ |8:
2727
+ | jr ra
2728
+ |. li CRET1, 1
2729
+ |9:
2730
+ | jr ra
2731
+ |. li CRET1, 0
2732
+ |.endif
2733
+ |
2734
+ |.macro sfmin_max, name, intins
2735
+ |->vm_sf .. name:
2736
+ |.if JIT and not FPU
2737
+ | move TMP2, ra
2738
+ | bal ->vm_sfcmpolt
2739
+ |. nop
2740
+ | move TMP0, CRET1
2741
+ | move SFRETHI, SFARG1HI
2742
+ | move SFRETLO, SFARG1LO
2743
+ | move ra, TMP2
2744
+ | intins SFRETHI, SFARG2HI, TMP0
2745
+ | jr ra
2746
+ |. intins SFRETLO, SFARG2LO, TMP0
2747
+ |.endif
2748
+ |.endmacro
2749
+ |
2750
+ | sfmin_max min, movz
2751
+ | sfmin_max max, movn
2752
+ |
2753
+ |//-----------------------------------------------------------------------
2754
+ |//-- Miscellaneous functions --------------------------------------------
2755
+ |//-----------------------------------------------------------------------
2756
+ |
2757
+ |//-----------------------------------------------------------------------
2758
+ |//-- FFI helper functions -----------------------------------------------
2759
+ |//-----------------------------------------------------------------------
2760
+ |
2761
+ |// Handler for callback functions. Callback slot number in r1, g in r2.
2762
+ |->vm_ffi_callback:
2763
+ |.if FFI
2764
+ |.type CTSTATE, CTState, PC
2765
+ | saveregs
2766
+ | lw CTSTATE, GL:r2->ctype_state
2767
+ | addiu DISPATCH, r2, GG_G2DISP
2275
2768
  | load_got lj_ccallback_enter
2276
2769
  | sw r1, CTSTATE->cb.slot
2277
2770
  | sw CARG1, CTSTATE->cb.gpr[0]
2278
2771
  | sw CARG2, CTSTATE->cb.gpr[1]
2279
- | sdc1 FARG1, CTSTATE->cb.fpr[0]
2772
+ | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2280
2773
  | sw CARG3, CTSTATE->cb.gpr[2]
2281
2774
  | sw CARG4, CTSTATE->cb.gpr[3]
2282
- | sdc1 FARG2, CTSTATE->cb.fpr[1]
2775
+ | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2283
2776
  | addiu TMP0, sp, CFRAME_SPACE+16
2284
2777
  | sw TMP0, CTSTATE->cb.stack
2285
2778
  | sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2289,15 +2782,16 @@ static void build_subroutines(BuildCtx *ctx)
2289
2782
  | // Returns lua_State *.
2290
2783
  | lw BASE, L:CRET1->base
2291
2784
  | lw RC, L:CRET1->top
2785
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2292
2786
  | move L, CRET1
2293
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2787
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2294
2788
  | lw LFUNC:RB, FRAME_FUNC(BASE)
2295
- | mtc1 TMP3, TOBIT
2789
+ | .FPU mtc1 TMP3, TOBIT
2296
2790
  | li_vmstate INTERP
2297
2791
  | li TISNIL, LJ_TNIL
2298
2792
  | subu RC, RC, BASE
2299
2793
  | st_vmstate
2300
- | cvt.d.s TOBIT, TOBIT
2794
+ | .FPU cvt.d.s TOBIT, TOBIT
2301
2795
  | ins_callt
2302
2796
  |.endif
2303
2797
  |
@@ -2311,11 +2805,11 @@ static void build_subroutines(BuildCtx *ctx)
2311
2805
  | move CARG2, RA
2312
2806
  | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2313
2807
  |. move CARG1, CTSTATE
2808
+ | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2314
2809
  | lw CRET1, CTSTATE->cb.gpr[0]
2315
- | ldc1 FRET1, CTSTATE->cb.fpr[0]
2316
- | lw CRET2, CTSTATE->cb.gpr[1]
2810
+ | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2317
2811
  | b ->vm_leave_unw
2318
- |. ldc1 FRET2, CTSTATE->cb.fpr[1]
2812
+ |. lw CRET2, CTSTATE->cb.gpr[1]
2319
2813
  |.endif
2320
2814
  |
2321
2815
  |->vm_ffi_call: // Call C function via FFI.
@@ -2347,8 +2841,8 @@ static void build_subroutines(BuildCtx *ctx)
2347
2841
  | lw CARG2, CCSTATE->gpr[1]
2348
2842
  | lw CARG3, CCSTATE->gpr[2]
2349
2843
  | lw CARG4, CCSTATE->gpr[3]
2350
- | ldc1 FARG1, CCSTATE->fpr[0]
2351
- | ldc1 FARG2, CCSTATE->fpr[1]
2844
+ | .FPU ldc1 FARG1, CCSTATE->fpr[0]
2845
+ | .FPU ldc1 FARG2, CCSTATE->fpr[1]
2352
2846
  | jalr CFUNCADDR
2353
2847
  |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
2354
2848
  | lw CCSTATE:TMP1, -12(r16)
@@ -2356,8 +2850,13 @@ static void build_subroutines(BuildCtx *ctx)
2356
2850
  | lw ra, -4(r16)
2357
2851
  | sw CRET1, CCSTATE:TMP1->gpr[0]
2358
2852
  | sw CRET2, CCSTATE:TMP1->gpr[1]
2853
+ |.if FPU
2359
2854
  | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2360
2855
  | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2856
+ |.else
2857
+ | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
2858
+ | sw CARG2, CCSTATE:TMP1->gpr[3]
2859
+ |.endif
2361
2860
  | move sp, r16
2362
2861
  | jr ra
2363
2862
  |. move r16, TMP2
@@ -2381,82 +2880,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2381
2880
 
2382
2881
  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2383
2882
  | // RA = src1*8, RD = src2*8, JMP with RD = target
2384
- | addu CARG2, BASE, RA
2385
- | addu CARG3, BASE, RD
2386
- | lw TMP0, HI(CARG2)
2387
- | lw TMP1, HI(CARG3)
2388
- | ldc1 f0, 0(CARG2)
2389
- | ldc1 f2, 0(CARG3)
2390
- | sltiu TMP0, TMP0, LJ_TISNUM
2391
- | sltiu TMP1, TMP1, LJ_TISNUM
2883
+ |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
2884
+ | addu RA, BASE, RA
2885
+ | addu RD, BASE, RD
2886
+ | lw RAHI, HI(RA)
2887
+ | lw RDHI, HI(RD)
2392
2888
  | lhu TMP2, OFS_RD(PC)
2393
- | and TMP0, TMP0, TMP1
2394
2889
  | addiu PC, PC, 4
2395
- | beqz TMP0, ->vmeta_comp
2396
- |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
2397
- | decode_RD4b TMP2
2398
- | addu TMP2, TMP2, TMP1
2399
- if (op == BC_ISLT || op == BC_ISGE) {
2400
- | c.olt.d f0, f2
2401
- } else {
2402
- | c.ole.d f0, f2
2403
- }
2404
- if (op == BC_ISLT || op == BC_ISLE) {
2405
- | movf TMP2, r0
2406
- } else {
2407
- | movt TMP2, r0
2408
- }
2409
- | addu PC, PC, TMP2
2890
+ | bne RAHI, TISNUM, >2
2891
+ |. lw RALO, LO(RA)
2892
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2893
+ | lw RDLO, LO(RD)
2894
+ | bne RDHI, TISNUM, >5
2895
+ |. decode_RD4b TMP2
2896
+ | slt AT, SFARG1LO, SFARG2LO
2897
+ | addu TMP2, TMP2, TMP3
2898
+ | movop TMP2, r0, AT
2410
2899
  |1:
2900
+ | addu PC, PC, TMP2
2411
2901
  | ins_next
2902
+ |
2903
+ |2: // RA is not an integer.
2904
+ | sltiu AT, RAHI, LJ_TISNUM
2905
+ | beqz AT, ->vmeta_comp
2906
+ |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2907
+ | sltiu AT, RDHI, LJ_TISNUM
2908
+ |.if FPU
2909
+ | ldc1 FRA, 0(RA)
2910
+ | ldc1 FRD, 0(RD)
2911
+ |.else
2912
+ | lw RDLO, LO(RD)
2913
+ |.endif
2914
+ | beqz AT, >4
2915
+ |. decode_RD4b TMP2
2916
+ |3: // RA and RD are both numbers.
2917
+ |.if FPU
2918
+ | fcomp f20, f22
2919
+ | addu TMP2, TMP2, TMP3
2920
+ | b <1
2921
+ |. fmovop TMP2, r0
2922
+ |.else
2923
+ | bal sfcomp
2924
+ |. addu TMP2, TMP2, TMP3
2925
+ | b <1
2926
+ |. movop TMP2, r0, CRET1
2927
+ |.endif
2928
+ |
2929
+ |4: // RA is a number, RD is not a number.
2930
+ | bne RDHI, TISNUM, ->vmeta_comp
2931
+ | // RA is a number, RD is an integer. Convert RD to a number.
2932
+ |.if FPU
2933
+ |. lwc1 FRD, LO(RD)
2934
+ | b <3
2935
+ |. cvt.d.w FRD, FRD
2936
+ |.else
2937
+ |. nop
2938
+ |.if "RDHI" == "SFARG1HI"
2939
+ | bal ->vm_sfi2d_1
2940
+ |.else
2941
+ | bal ->vm_sfi2d_2
2942
+ |.endif
2943
+ |. nop
2944
+ | b <3
2945
+ |. nop
2946
+ |.endif
2947
+ |
2948
+ |5: // RA is an integer, RD is not an integer
2949
+ | sltiu AT, RDHI, LJ_TISNUM
2950
+ | beqz AT, ->vmeta_comp
2951
+ | // RA is an integer, RD is a number. Convert RA to a number.
2952
+ |.if FPU
2953
+ |. mtc1 RALO, FRA
2954
+ | ldc1 FRD, 0(RD)
2955
+ | b <3
2956
+ | cvt.d.w FRA, FRA
2957
+ |.else
2958
+ |. nop
2959
+ |.if "RAHI" == "SFARG1HI"
2960
+ | bal ->vm_sfi2d_1
2961
+ |.else
2962
+ | bal ->vm_sfi2d_2
2963
+ |.endif
2964
+ |. nop
2965
+ | b <3
2966
+ |. nop
2967
+ |.endif
2968
+ |.endmacro
2969
+ |
2970
+ if (op == BC_ISLT) {
2971
+ | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
2972
+ } else if (op == BC_ISGE) {
2973
+ | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
2974
+ } else if (op == BC_ISLE) {
2975
+ | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
2976
+ } else {
2977
+ | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
2978
+ }
2412
2979
  break;
2413
2980
 
2414
2981
  case BC_ISEQV: case BC_ISNEV:
2415
2982
  vk = op == BC_ISEQV;
2416
2983
  | // RA = src1*8, RD = src2*8, JMP with RD = target
2417
2984
  | addu RA, BASE, RA
2418
- | addiu PC, PC, 4
2419
- | lw TMP0, HI(RA)
2420
- | ldc1 f0, 0(RA)
2985
+ | addiu PC, PC, 4
2421
2986
  | addu RD, BASE, RD
2987
+ | lw SFARG1HI, HI(RA)
2422
2988
  | lhu TMP2, -4+OFS_RD(PC)
2423
- | lw TMP1, HI(RD)
2424
- | ldc1 f2, 0(RD)
2989
+ | lw SFARG2HI, HI(RD)
2425
2990
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2426
- | sltiu AT, TMP0, LJ_TISNUM
2427
- | sltiu CARG1, TMP1, LJ_TISNUM
2428
- | decode_RD4b TMP2
2429
- | and AT, AT, CARG1
2430
- | beqz AT, >5
2431
- |. addu TMP2, TMP2, TMP3
2432
- | c.eq.d f0, f2
2991
+ | sltu AT, TISNUM, SFARG1HI
2992
+ | sltu TMP0, TISNUM, SFARG2HI
2993
+ | or AT, AT, TMP0
2433
2994
  if (vk) {
2434
- | movf TMP2, r0
2995
+ | beqz AT, ->BC_ISEQN_Z
2435
2996
  } else {
2436
- | movt TMP2, r0
2997
+ | beqz AT, ->BC_ISNEN_Z
2437
2998
  }
2438
- |1:
2439
- | addu PC, PC, TMP2
2440
- | ins_next
2441
- |5: // Either or both types are not numbers.
2442
- | lw CARG2, LO(RA)
2443
- | lw CARG3, LO(RD)
2999
+ |. decode_RD4b TMP2
3000
+ | // Either or both types are not numbers.
3001
+ | lw SFARG1LO, LO(RA)
3002
+ | lw SFARG2LO, LO(RD)
3003
+ | addu TMP2, TMP2, TMP3
2444
3004
  |.if FFI
2445
3005
  | li TMP3, LJ_TCDATA
2446
- | beq TMP0, TMP3, ->vmeta_equal_cd
3006
+ | beq SFARG1HI, TMP3, ->vmeta_equal_cd
2447
3007
  |.endif
2448
- |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive?
3008
+ |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
2449
3009
  |.if FFI
2450
- | beq TMP1, TMP3, ->vmeta_equal_cd
3010
+ | beq SFARG2HI, TMP3, ->vmeta_equal_cd
2451
3011
  |.endif
2452
- |. xor TMP3, CARG2, CARG3 // Same tv?
2453
- | xor TMP1, TMP1, TMP0 // Same type?
2454
- | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata?
3012
+ |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
3013
+ | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
3014
+ | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
2455
3015
  | movz TMP3, r0, AT // Ignore tv if primitive.
2456
- | movn CARG1, r0, TMP1 // Tab/ud and same type?
2457
- | or AT, TMP1, TMP3 // Same type && (pri||same tv).
2458
- | movz CARG1, r0, AT
2459
- | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv.
3016
+ | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
3017
+ | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
3018
+ | movz TMP0, r0, AT
3019
+ | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
2460
3020
  if (vk) {
2461
3021
  |. movn TMP2, r0, AT
2462
3022
  } else {
@@ -2464,15 +3024,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2464
3024
  }
2465
3025
  | // Different tables or userdatas. Need to check __eq metamethod.
2466
3026
  | // Field metatable must be at same offset for GCtab and GCudata!
2467
- | lw TAB:TMP1, TAB:CARG2->metatable
2468
- | beqz TAB:TMP1, <1 // No metatable?
3027
+ | lw TAB:TMP1, TAB:SFARG1LO->metatable
3028
+ | beqz TAB:TMP1, >1 // No metatable?
2469
3029
  |. nop
2470
3030
  | lbu TMP1, TAB:TMP1->nomm
2471
3031
  | andi TMP1, TMP1, 1<<MM_eq
2472
- | bnez TMP1, <1 // Or 'no __eq' flag set?
3032
+ | bnez TMP1, >1 // Or 'no __eq' flag set?
2473
3033
  |. nop
2474
3034
  | b ->vmeta_equal // Handle __eq metamethod.
2475
- |. li CARG4, 1-vk // ne = 0 or 1.
3035
+ |. li TMP0, 1-vk // ne = 0 or 1.
3036
+ |1:
3037
+ | addu PC, PC, TMP2
3038
+ | ins_next
2476
3039
  break;
2477
3040
 
2478
3041
  case BC_ISEQS: case BC_ISNES:
@@ -2509,38 +3072,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2509
3072
  vk = op == BC_ISEQN;
2510
3073
  | // RA = src*8, RD = num_const*8, JMP with RD = target
2511
3074
  | addu RA, BASE, RA
2512
- | addiu PC, PC, 4
2513
- | lw TMP0, HI(RA)
2514
- | ldc1 f0, 0(RA)
2515
- | addu RD, KBASE, RD
2516
- | lhu TMP2, -4+OFS_RD(PC)
2517
- | ldc1 f2, 0(RD)
3075
+ | addu RD, KBASE, RD
3076
+ | lw SFARG1HI, HI(RA)
3077
+ | lw SFARG2HI, HI(RD)
3078
+ | lhu TMP2, OFS_RD(PC)
3079
+ | addiu PC, PC, 4
2518
3080
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2519
- | sltiu AT, TMP0, LJ_TISNUM
2520
3081
  | decode_RD4b TMP2
2521
- |.if FFI
2522
- | beqz AT, >5
2523
- |.else
2524
- | beqz AT, >1
2525
- |.endif
2526
- |. addu TMP2, TMP2, TMP3
2527
- | c.eq.d f0, f2
2528
3082
  if (vk) {
2529
- | movf TMP2, r0
2530
- | addu PC, PC, TMP2
3083
+ |->BC_ISEQN_Z:
3084
+ } else {
3085
+ |->BC_ISNEN_Z:
3086
+ }
3087
+ | bne SFARG1HI, TISNUM, >3
3088
+ |. lw SFARG1LO, LO(RA)
3089
+ | lw SFARG2LO, LO(RD)
3090
+ | addu TMP2, TMP2, TMP3
3091
+ | bne SFARG2HI, TISNUM, >6
3092
+ |. xor AT, SFARG1LO, SFARG2LO
3093
+ if (vk) {
3094
+ | movn TMP2, r0, AT
2531
3095
  |1:
3096
+ | addu PC, PC, TMP2
3097
+ |2:
2532
3098
  } else {
2533
- | movt TMP2, r0
3099
+ | movz TMP2, r0, AT
2534
3100
  |1:
3101
+ |2:
2535
3102
  | addu PC, PC, TMP2
2536
3103
  }
2537
3104
  | ins_next
3105
+ |
3106
+ |3: // RA is not an integer.
3107
+ | sltiu AT, SFARG1HI, LJ_TISNUM
2538
3108
  |.if FFI
2539
- |5:
2540
- | li AT, LJ_TCDATA
2541
- | beq TMP0, AT, ->vmeta_equal_cd
3109
+ | beqz AT, >8
3110
+ |.else
3111
+ | beqz AT, <2
3112
+ |.endif
3113
+ |. addu TMP2, TMP2, TMP3
3114
+ | sltiu AT, SFARG2HI, LJ_TISNUM
3115
+ |.if FPU
3116
+ | ldc1 f20, 0(RA)
3117
+ | ldc1 f22, 0(RD)
3118
+ |.endif
3119
+ | beqz AT, >5
3120
+ |. lw SFARG2LO, LO(RD)
3121
+ |4: // RA and RD are both numbers.
3122
+ |.if FPU
3123
+ | c.eq.d f20, f22
3124
+ | b <1
3125
+ if (vk) {
3126
+ |. movf TMP2, r0
3127
+ } else {
3128
+ |. movt TMP2, r0
3129
+ }
3130
+ |.else
3131
+ | bal ->vm_sfcmpeq
2542
3132
  |. nop
2543
3133
  | b <1
3134
+ if (vk) {
3135
+ |. movz TMP2, r0, CRET1
3136
+ } else {
3137
+ |. movn TMP2, r0, CRET1
3138
+ }
3139
+ |.endif
3140
+ |
3141
+ |5: // RA is a number, RD is not a number.
3142
+ |.if FFI
3143
+ | bne SFARG2HI, TISNUM, >9
3144
+ |.else
3145
+ | bne SFARG2HI, TISNUM, <2
3146
+ |.endif
3147
+ | // RA is a number, RD is an integer. Convert RD to a number.
3148
+ |.if FPU
3149
+ |. lwc1 f22, LO(RD)
3150
+ | b <4
3151
+ |. cvt.d.w f22, f22
3152
+ |.else
3153
+ |. nop
3154
+ | bal ->vm_sfi2d_2
3155
+ |. nop
3156
+ | b <4
3157
+ |. nop
3158
+ |.endif
3159
+ |
3160
+ |6: // RA is an integer, RD is not an integer
3161
+ | sltiu AT, SFARG2HI, LJ_TISNUM
3162
+ |.if FFI
3163
+ | beqz AT, >9
3164
+ |.else
3165
+ | beqz AT, <2
3166
+ |.endif
3167
+ | // RA is an integer, RD is a number. Convert RA to a number.
3168
+ |.if FPU
3169
+ |. mtc1 SFARG1LO, f20
3170
+ | ldc1 f22, 0(RD)
3171
+ | b <4
3172
+ | cvt.d.w f20, f20
3173
+ |.else
3174
+ |. nop
3175
+ | bal ->vm_sfi2d_1
3176
+ |. nop
3177
+ | b <4
3178
+ |. nop
3179
+ |.endif
3180
+ |
3181
+ |.if FFI
3182
+ |8:
3183
+ | li AT, LJ_TCDATA
3184
+ | bne SFARG1HI, AT, <2
3185
+ |. nop
3186
+ | b ->vmeta_equal_cd
3187
+ |. nop
3188
+ |9:
3189
+ | li AT, LJ_TCDATA
3190
+ | bne SFARG2HI, AT, <2
3191
+ |. nop
3192
+ | b ->vmeta_equal_cd
2544
3193
  |. nop
2545
3194
  |.endif
2546
3195
  break;
@@ -2592,7 +3241,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2592
3241
  | addu PC, PC, TMP2
2593
3242
  } else {
2594
3243
  | sltiu TMP0, TMP0, LJ_TISTRUECOND
2595
- | ldc1 f0, 0(RD)
3244
+ | lw SFRETHI, HI(RD)
3245
+ | lw SFRETLO, LO(RD)
2596
3246
  if (op == BC_ISTC) {
2597
3247
  | beqz TMP0, >1
2598
3248
  } else {
@@ -2602,7 +3252,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2602
3252
  | decode_RD4b TMP2
2603
3253
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2604
3254
  | addu TMP2, TMP2, TMP3
2605
- | sdc1 f0, 0(RA)
3255
+ | sw SFRETHI, HI(RA)
3256
+ | sw SFRETLO, LO(RA)
2606
3257
  | addu PC, PC, TMP2
2607
3258
  |1:
2608
3259
  }
@@ -2634,10 +3285,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2634
3285
  case BC_MOV:
2635
3286
  | // RA = dst*8, RD = src*8
2636
3287
  | addu RD, BASE, RD
2637
- | addu RA, BASE, RA
2638
- | ldc1 f0, 0(RD)
3288
+ | addu RA, BASE, RA
3289
+ | lw SFRETHI, HI(RD)
3290
+ | lw SFRETLO, LO(RD)
2639
3291
  | ins_next1
2640
- | sdc1 f0, 0(RA)
3292
+ | sw SFRETHI, HI(RA)
3293
+ | sw SFRETLO, LO(RA)
2641
3294
  | ins_next2
2642
3295
  break;
2643
3296
  case BC_NOT:
@@ -2654,16 +3307,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2654
3307
  break;
2655
3308
  case BC_UNM:
2656
3309
  | // RA = dst*8, RD = src*8
2657
- | addu CARG3, BASE, RD
3310
+ | addu RB, BASE, RD
3311
+ | lw SFARG1HI, HI(RB)
2658
3312
  | addu RA, BASE, RA
2659
- | lw TMP0, HI(CARG3)
2660
- | ldc1 f0, 0(CARG3)
2661
- | sltiu AT, TMP0, LJ_TISNUM
2662
- | beqz AT, ->vmeta_unm
2663
- |. neg.d f0, f0
3313
+ | bne SFARG1HI, TISNUM, >2
3314
+ |. lw SFARG1LO, LO(RB)
3315
+ | lui TMP1, 0x8000
3316
+ | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
3317
+ |. negu SFARG1LO, SFARG1LO
3318
+ |1:
2664
3319
  | ins_next1
2665
- | sdc1 f0, 0(RA)
3320
+ | sw SFARG1HI, HI(RA)
3321
+ | sw SFARG1LO, LO(RA)
2666
3322
  | ins_next2
3323
+ |2:
3324
+ | sltiu AT, SFARG1HI, LJ_TISNUM
3325
+ | beqz AT, ->vmeta_unm
3326
+ |. lui TMP1, 0x8000
3327
+ | b <1
3328
+ |. xor SFARG1HI, SFARG1HI, TMP1
2667
3329
  break;
2668
3330
  case BC_LEN:
2669
3331
  | // RA = dst*8, RD = src*8
@@ -2674,12 +3336,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2674
3336
  | li AT, LJ_TSTR
2675
3337
  | bne TMP0, AT, >2
2676
3338
  |. li AT, LJ_TTAB
2677
- | lw CRET1, STR:CARG1->len
3339
+ | lw CRET1, STR:CARG1->len
2678
3340
  |1:
2679
- | mtc1 CRET1, f0
2680
- | cvt.d.w f0, f0
2681
3341
  | ins_next1
2682
- | sdc1 f0, 0(RA)
3342
+ | sw TISNUM, HI(RA)
3343
+ | sw CRET1, LO(RA)
2683
3344
  | ins_next2
2684
3345
  |2:
2685
3346
  | bne TMP0, AT, ->vmeta_len
@@ -2710,104 +3371,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2710
3371
 
2711
3372
  /* -- Binary ops -------------------------------------------------------- */
2712
3373
 
2713
- |.macro ins_arithpre
3374
+ |.macro fpmod, a, b, c
3375
+ | bal ->vm_floor // floor(b/c)
3376
+ |. div.d FARG1, b, c
3377
+ | mul.d a, FRET1, c
3378
+ | sub.d a, b, a // b - floor(b/c)*c
3379
+ |.endmacro
3380
+
3381
+ |.macro sfpmod
3382
+ | addiu sp, sp, -16
3383
+ |
3384
+ | load_got __divdf3
3385
+ | sw SFARG1HI, HI(sp)
3386
+ | sw SFARG1LO, LO(sp)
3387
+ | sw SFARG2HI, 8+HI(sp)
3388
+ | call_extern
3389
+ |. sw SFARG2LO, 8+LO(sp)
3390
+ |
3391
+ | load_got floor
3392
+ | move SFARG1HI, SFRETHI
3393
+ | call_extern
3394
+ |. move SFARG1LO, SFRETLO
3395
+ |
3396
+ | load_got __muldf3
3397
+ | move SFARG1HI, SFRETHI
3398
+ | move SFARG1LO, SFRETLO
3399
+ | lw SFARG2HI, 8+HI(sp)
3400
+ | call_extern
3401
+ |. lw SFARG2LO, 8+LO(sp)
3402
+ |
3403
+ | load_got __subdf3
3404
+ | lw SFARG1HI, HI(sp)
3405
+ | lw SFARG1LO, LO(sp)
3406
+ | move SFARG2HI, SFRETHI
3407
+ | call_extern
3408
+ |. move SFARG2LO, SFRETLO
3409
+ |
3410
+ | addiu sp, sp, 16
3411
+ |.endmacro
3412
+
3413
+ |.macro ins_arithpre, label
2714
3414
  ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2715
- | decode_RB8a RB, INS
2716
- | decode_RB8b RB
2717
- | decode_RDtoRC8 RC, RD
2718
3415
  | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2719
3416
  ||switch (vk) {
2720
3417
  ||case 0:
2721
- | addu CARG3, BASE, RB
2722
- | addu CARG4, KBASE, RC
2723
- | lw TMP1, HI(CARG3)
2724
- | ldc1 f20, 0(CARG3)
2725
- | ldc1 f22, 0(CARG4)
2726
- | sltiu AT, TMP1, LJ_TISNUM
3418
+ | decode_RB8a RB, INS
3419
+ | decode_RB8b RB
3420
+ | decode_RDtoRC8 RC, RD
3421
+ | // RA = dst*8, RB = src1*8, RC = num_const*8
3422
+ | addu RB, BASE, RB
3423
+ |.if "label" ~= "none"
3424
+ | b label
3425
+ |.endif
3426
+ |. addu RC, KBASE, RC
2727
3427
  || break;
2728
3428
  ||case 1:
2729
- | addu CARG4, BASE, RB
2730
- | addu CARG3, KBASE, RC
2731
- | lw TMP1, HI(CARG4)
2732
- | ldc1 f22, 0(CARG4)
2733
- | ldc1 f20, 0(CARG3)
2734
- | sltiu AT, TMP1, LJ_TISNUM
3429
+ | decode_RB8a RC, INS
3430
+ | decode_RB8b RC
3431
+ | decode_RDtoRC8 RB, RD
3432
+ | // RA = dst*8, RB = num_const*8, RC = src1*8
3433
+ | addu RC, BASE, RC
3434
+ |.if "label" ~= "none"
3435
+ | b label
3436
+ |.endif
3437
+ |. addu RB, KBASE, RB
2735
3438
  || break;
2736
3439
  ||default:
2737
- | addu CARG3, BASE, RB
2738
- | addu CARG4, BASE, RC
2739
- | lw TMP1, HI(CARG3)
2740
- | lw TMP2, HI(CARG4)
2741
- | ldc1 f20, 0(CARG3)
2742
- | ldc1 f22, 0(CARG4)
2743
- | sltiu AT, TMP1, LJ_TISNUM
2744
- | sltiu TMP0, TMP2, LJ_TISNUM
2745
- | and AT, AT, TMP0
3440
+ | decode_RB8a RB, INS
3441
+ | decode_RB8b RB
3442
+ | decode_RDtoRC8 RC, RD
3443
+ | // RA = dst*8, RB = src1*8, RC = src2*8
3444
+ | addu RB, BASE, RB
3445
+ |.if "label" ~= "none"
3446
+ | b label
3447
+ |.endif
3448
+ |. addu RC, BASE, RC
2746
3449
  || break;
2747
3450
  ||}
2748
- | beqz AT, ->vmeta_arith
2749
- |. addu RA, BASE, RA
2750
3451
  |.endmacro
2751
3452
  |
2752
- |.macro fpmod, a, b, c
2753
- |->BC_MODVN_Z:
2754
- | bal ->vm_floor // floor(b/c)
2755
- |. div.d FARG1, b, c
2756
- | mul.d a, FRET1, c
2757
- | sub.d a, b, a // b - floor(b/c)*c
2758
- |.endmacro
3453
+ |.macro ins_arith, intins, fpins, fpcall, label
3454
+ | ins_arithpre none
2759
3455
  |
2760
- |.macro ins_arith, ins
2761
- | ins_arithpre
2762
- |.if "ins" == "fpmod_"
2763
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2764
- |. nop
3456
+ |.if "label" ~= "none"
3457
+ |label:
3458
+ |.endif
3459
+ |
3460
+ | lw SFARG1HI, HI(RB)
3461
+ | lw SFARG2HI, HI(RC)
3462
+ |
3463
+ |.if "intins" ~= "div"
3464
+ |
3465
+ | // Check for two integers.
3466
+ | lw SFARG1LO, LO(RB)
3467
+ | bne SFARG1HI, TISNUM, >5
3468
+ |. lw SFARG2LO, LO(RC)
3469
+ | bne SFARG2HI, TISNUM, >5
3470
+ |
3471
+ |.if "intins" == "addu"
3472
+ |. intins CRET1, SFARG1LO, SFARG2LO
3473
+ | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
3474
+ | xor TMP2, CRET1, SFARG2LO
3475
+ | and TMP1, TMP1, TMP2
3476
+ | bltz TMP1, ->vmeta_arith
3477
+ |. addu RA, BASE, RA
3478
+ |.elif "intins" == "subu"
3479
+ |. intins CRET1, SFARG1LO, SFARG2LO
3480
+ | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
3481
+ | xor TMP2, SFARG1LO, SFARG2LO
3482
+ | and TMP1, TMP1, TMP2
3483
+ | bltz TMP1, ->vmeta_arith
3484
+ |. addu RA, BASE, RA
3485
+ |.elif "intins" == "mult"
3486
+ |. intins SFARG1LO, SFARG2LO
3487
+ | mflo CRET1
3488
+ | mfhi TMP2
3489
+ | sra TMP1, CRET1, 31
3490
+ | bne TMP1, TMP2, ->vmeta_arith
3491
+ |. addu RA, BASE, RA
2765
3492
  |.else
2766
- | ins f0, f20, f22
3493
+ |. load_got lj_vm_modi
3494
+ | beqz SFARG2LO, ->vmeta_arith
3495
+ |. addu RA, BASE, RA
3496
+ |.if ENDIAN_BE
3497
+ | move CARG1, SFARG1LO
3498
+ |.endif
3499
+ | call_extern
3500
+ |. move CARG2, SFARG2LO
3501
+ |.endif
3502
+ |
2767
3503
  | ins_next1
2768
- | sdc1 f0, 0(RA)
3504
+ | sw TISNUM, HI(RA)
3505
+ | sw CRET1, LO(RA)
3506
+ |3:
2769
3507
  | ins_next2
3508
+ |
3509
+ |.elif not FPU
3510
+ |
3511
+ | lw SFARG1LO, LO(RB)
3512
+ | lw SFARG2LO, LO(RC)
3513
+ |
3514
+ |.endif
3515
+ |
3516
+ |5: // Check for two numbers.
3517
+ | .FPU ldc1 f20, 0(RB)
3518
+ | sltiu AT, SFARG1HI, LJ_TISNUM
3519
+ | sltiu TMP0, SFARG2HI, LJ_TISNUM
3520
+ | .FPU ldc1 f22, 0(RC)
3521
+ | and AT, AT, TMP0
3522
+ | beqz AT, ->vmeta_arith
3523
+ |. addu RA, BASE, RA
3524
+ |
3525
+ |.if FPU
3526
+ | fpins FRET1, f20, f22
3527
+ |.elif "fpcall" == "sfpmod"
3528
+ | sfpmod
3529
+ |.else
3530
+ | load_got fpcall
3531
+ | call_extern
3532
+ |. nop
3533
+ |.endif
3534
+ |
3535
+ | ins_next1
3536
+ |.if not FPU
3537
+ | sw SFRETHI, HI(RA)
3538
+ |.endif
3539
+ |.if "intins" ~= "div"
3540
+ | b <3
3541
+ |.endif
3542
+ |.if FPU
3543
+ |. sdc1 FRET1, 0(RA)
3544
+ |.else
3545
+ |. sw SFRETLO, LO(RA)
2770
3546
  |.endif
3547
+ |.if "intins" == "div"
3548
+ | ins_next2
3549
+ |.endif
3550
+ |
2771
3551
  |.endmacro
2772
3552
 
2773
3553
  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2774
- | ins_arith add.d
3554
+ | ins_arith addu, add.d, __adddf3, none
2775
3555
  break;
2776
3556
  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2777
- | ins_arith sub.d
3557
+ | ins_arith subu, sub.d, __subdf3, none
2778
3558
  break;
2779
3559
  case BC_MULVN: case BC_MULNV: case BC_MULVV:
2780
- | ins_arith mul.d
3560
+ | ins_arith mult, mul.d, __muldf3, none
2781
3561
  break;
2782
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2783
- | ins_arith div.d
3562
+ case BC_DIVVN:
3563
+ | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
3564
+ break;
3565
+ case BC_DIVNV: case BC_DIVVV:
3566
+ | ins_arithpre ->BC_DIVVN_Z
2784
3567
  break;
2785
3568
  case BC_MODVN:
2786
- | ins_arith fpmod
3569
+ | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
2787
3570
  break;
2788
3571
  case BC_MODNV: case BC_MODVV:
2789
- | ins_arith fpmod_
3572
+ | ins_arithpre ->BC_MODVN_Z
2790
3573
  break;
2791
3574
  case BC_POW:
2792
- | decode_RB8a RB, INS
2793
- | decode_RB8b RB
2794
- | decode_RDtoRC8 RC, RD
2795
- | addu CARG3, BASE, RB
2796
- | addu CARG4, BASE, RC
2797
- | lw TMP1, HI(CARG3)
2798
- | lw TMP2, HI(CARG4)
2799
- | ldc1 FARG1, 0(CARG3)
2800
- | ldc1 FARG2, 0(CARG4)
2801
- | sltiu AT, TMP1, LJ_TISNUM
2802
- | sltiu TMP0, TMP2, LJ_TISNUM
3575
+ | ins_arithpre none
3576
+ | lw SFARG1HI, HI(RB)
3577
+ | lw SFARG2HI, HI(RC)
3578
+ | sltiu AT, SFARG1HI, LJ_TISNUM
3579
+ | sltiu TMP0, SFARG2HI, LJ_TISNUM
2803
3580
  | and AT, AT, TMP0
2804
3581
  | load_got pow
2805
3582
  | beqz AT, ->vmeta_arith
2806
3583
  |. addu RA, BASE, RA
3584
+ |.if FPU
3585
+ | ldc1 FARG1, 0(RB)
3586
+ | ldc1 FARG2, 0(RC)
3587
+ |.else
3588
+ | lw SFARG1LO, LO(RB)
3589
+ | lw SFARG2LO, LO(RC)
3590
+ |.endif
2807
3591
  | call_extern
2808
3592
  |. nop
2809
3593
  | ins_next1
3594
+ |.if FPU
2810
3595
  | sdc1 FRET1, 0(RA)
3596
+ |.else
3597
+ | sw SFRETHI, HI(RA)
3598
+ | sw SFRETLO, LO(RA)
3599
+ |.endif
2811
3600
  | ins_next2
2812
3601
  break;
2813
3602
 
@@ -2830,10 +3619,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2830
3619
  | bnez CRET1, ->vmeta_binop
2831
3620
  |. lw BASE, L->base
2832
3621
  | addu RB, BASE, MULTRES
2833
- | ldc1 f0, 0(RB)
3622
+ | lw SFRETHI, HI(RB)
3623
+ | lw SFRETLO, LO(RB)
2834
3624
  | addu RA, BASE, RA
2835
3625
  | ins_next1
2836
- | sdc1 f0, 0(RA) // Copy result from RB to RA.
3626
+ | sw SFRETHI, HI(RA)
3627
+ | sw SFRETLO, LO(RA)
2837
3628
  | ins_next2
2838
3629
  break;
2839
3630
 
@@ -2868,20 +3659,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2868
3659
  case BC_KSHORT:
2869
3660
  | // RA = dst*8, RD = int16_literal*8
2870
3661
  | sra RD, INS, 16
2871
- | mtc1 RD, f0
2872
3662
  | addu RA, BASE, RA
2873
- | cvt.d.w f0, f0
2874
3663
  | ins_next1
2875
- | sdc1 f0, 0(RA)
3664
+ | sw TISNUM, HI(RA)
3665
+ | sw RD, LO(RA)
2876
3666
  | ins_next2
2877
3667
  break;
2878
3668
  case BC_KNUM:
2879
3669
  | // RA = dst*8, RD = num_const*8
2880
3670
  | addu RD, KBASE, RD
2881
3671
  | addu RA, BASE, RA
2882
- | ldc1 f0, 0(RD)
3672
+ | lw SFRETHI, HI(RD)
3673
+ | lw SFRETLO, LO(RD)
2883
3674
  | ins_next1
2884
- | sdc1 f0, 0(RA)
3675
+ | sw SFRETHI, HI(RA)
3676
+ | sw SFRETLO, LO(RA)
2885
3677
  | ins_next2
2886
3678
  break;
2887
3679
  case BC_KPRI:
@@ -2917,9 +3709,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2917
3709
  | lw UPVAL:RB, LFUNC:RD->uvptr
2918
3710
  | ins_next1
2919
3711
  | lw TMP1, UPVAL:RB->v
2920
- | ldc1 f0, 0(TMP1)
3712
+ | lw SFRETHI, HI(TMP1)
3713
+ | lw SFRETLO, LO(TMP1)
2921
3714
  | addu RA, BASE, RA
2922
- | sdc1 f0, 0(RA)
3715
+ | sw SFRETHI, HI(RA)
3716
+ | sw SFRETLO, LO(RA)
2923
3717
  | ins_next2
2924
3718
  break;
2925
3719
  case BC_USETV:
@@ -2928,26 +3722,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2928
3722
  | srl RA, RA, 1
2929
3723
  | addu RD, BASE, RD
2930
3724
  | addu RA, RA, LFUNC:RB
2931
- | ldc1 f0, 0(RD)
2932
3725
  | lw UPVAL:RB, LFUNC:RA->uvptr
3726
+ | lw SFRETHI, HI(RD)
3727
+ | lw SFRETLO, LO(RD)
2933
3728
  | lbu TMP3, UPVAL:RB->marked
2934
3729
  | lw CARG2, UPVAL:RB->v
2935
3730
  | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2936
3731
  | lbu TMP0, UPVAL:RB->closed
2937
- | lw TMP2, HI(RD)
2938
- | sdc1 f0, 0(CARG2)
3732
+ | sw SFRETHI, HI(CARG2)
3733
+ | sw SFRETLO, LO(CARG2)
2939
3734
  | li AT, LJ_GC_BLACK|1
2940
3735
  | or TMP3, TMP3, TMP0
2941
3736
  | beq TMP3, AT, >2 // Upvalue is closed and black?
2942
- |. addiu TMP2, TMP2, -(LJ_TNUMX+1)
3737
+ |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
2943
3738
  |1:
2944
3739
  | ins_next
2945
3740
  |
2946
3741
  |2: // Check if new value is collectable.
2947
3742
  | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
2948
3743
  | beqz AT, <1 // tvisgcv(v)
2949
- |. lw TMP1, LO(RD)
2950
- | lbu TMP3, GCOBJ:TMP1->gch.marked
3744
+ |. nop
3745
+ | lbu TMP3, GCOBJ:SFRETLO->gch.marked
2951
3746
  | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2952
3747
  | beqz TMP3, <1
2953
3748
  |. load_got lj_gc_barrieruv
@@ -2995,11 +3790,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2995
3790
  | srl RA, RA, 1
2996
3791
  | addu RD, KBASE, RD
2997
3792
  | addu RA, RA, LFUNC:RB
2998
- | ldc1 f0, 0(RD)
2999
- | lw UPVAL:RB, LFUNC:RA->uvptr
3793
+ | lw UPVAL:RB, LFUNC:RA->uvptr
3794
+ | lw SFRETHI, HI(RD)
3795
+ | lw SFRETLO, LO(RD)
3796
+ | lw TMP1, UPVAL:RB->v
3000
3797
  | ins_next1
3001
- | lw TMP1, UPVAL:RB->v
3002
- | sdc1 f0, 0(TMP1)
3798
+ | sw SFRETHI, HI(TMP1)
3799
+ | sw SFRETLO, LO(TMP1)
3003
3800
  | ins_next2
3004
3801
  break;
3005
3802
  case BC_USETP:
@@ -3009,10 +3806,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3009
3806
  | srl TMP0, RD, 3
3010
3807
  | addu RA, RA, LFUNC:RB
3011
3808
  | not TMP0, TMP0
3012
- | lw UPVAL:RB, LFUNC:RA->uvptr
3809
+ | lw UPVAL:RB, LFUNC:RA->uvptr
3013
3810
  | ins_next1
3014
- | lw TMP1, UPVAL:RB->v
3015
- | sw TMP0, HI(TMP1)
3811
+ | lw TMP1, UPVAL:RB->v
3812
+ | sw TMP0, HI(TMP1)
3016
3813
  | ins_next2
3017
3814
  break;
3018
3815
 
@@ -3048,8 +3845,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3048
3845
  | li TMP0, LJ_TFUNC
3049
3846
  | ins_next1
3050
3847
  | addu RA, BASE, RA
3051
- | sw TMP0, HI(RA)
3052
3848
  | sw LFUNC:CRET1, LO(RA)
3849
+ | sw TMP0, HI(RA)
3053
3850
  | ins_next2
3054
3851
  break;
3055
3852
 
@@ -3130,31 +3927,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3130
3927
  | lw TMP2, HI(CARG3)
3131
3928
  | lw TAB:RB, LO(CARG2)
3132
3929
  | li AT, LJ_TTAB
3133
- | ldc1 f0, 0(CARG3)
3134
3930
  | bne TMP1, AT, ->vmeta_tgetv
3135
3931
  |. addu RA, BASE, RA
3136
- | sltiu AT, TMP2, LJ_TISNUM
3137
- | beqz AT, >5
3138
- |. li AT, LJ_TSTR
3139
- |
3140
- | // Convert number key to integer, check for integerness and range.
3141
- | cvt.w.d f2, f0
3142
- | lw TMP0, TAB:RB->asize
3143
- | mfc1 TMP2, f2
3144
- | cvt.d.w f4, f2
3932
+ | bne TMP2, TISNUM, >5
3933
+ |. lw RC, LO(CARG3)
3934
+ | lw TMP0, TAB:RB->asize
3145
3935
  | lw TMP1, TAB:RB->array
3146
- | c.eq.d f0, f4
3147
- | sltu AT, TMP2, TMP0
3148
- | movf AT, r0
3149
- | sll TMP2, TMP2, 3
3936
+ | sltu AT, RC, TMP0
3937
+ | sll TMP2, RC, 3
3150
3938
  | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3151
3939
  |. addu TMP2, TMP1, TMP2
3152
- | lw TMP0, HI(TMP2)
3153
- | beq TMP0, TISNIL, >2
3154
- |. ldc1 f0, 0(TMP2)
3940
+ | lw SFRETHI, HI(TMP2)
3941
+ | beq SFRETHI, TISNIL, >2
3942
+ |. lw SFRETLO, LO(TMP2)
3155
3943
  |1:
3156
3944
  | ins_next1
3157
- | sdc1 f0, 0(RA)
3945
+ | sw SFRETHI, HI(RA)
3946
+ | sw SFRETLO, LO(RA)
3158
3947
  | ins_next2
3159
3948
  |
3160
3949
  |2: // Check for __index if table value is nil.
@@ -3169,8 +3958,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3169
3958
  |. nop
3170
3959
  |
3171
3960
  |5:
3961
+ | li AT, LJ_TSTR
3172
3962
  | bne TMP2, AT, ->vmeta_tgetv
3173
- |. lw STR:RC, LO(CARG3)
3963
+ |. nop
3174
3964
  | b ->BC_TGETS_Z // String key?
3175
3965
  |. nop
3176
3966
  break;
@@ -3202,18 +3992,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3202
3992
  | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3203
3993
  | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
3204
3994
  | lw NODE:TMP1, NODE:TMP2->next
3205
- | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2)
3995
+ | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
3206
3996
  | addiu CARG1, CARG1, -LJ_TSTR
3207
3997
  | xor TMP0, TMP0, STR:RC
3208
3998
  | or AT, CARG1, TMP0
3209
3999
  | bnez AT, >4
3210
4000
  |. lw TAB:TMP3, TAB:RB->metatable
3211
- | beq CARG2, TISNIL, >5 // Key found, but nil value?
3212
- |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2)
4001
+ | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
4002
+ |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
3213
4003
  |3:
3214
4004
  | ins_next1
3215
- | sw CARG2, HI(RA)
3216
- | sw CARG1, LO(RA)
4005
+ | sw SFRETHI, HI(RA)
4006
+ | sw SFRETLO, LO(RA)
3217
4007
  | ins_next2
3218
4008
  |
3219
4009
  |4: // Follow hash chain.
@@ -3223,7 +4013,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3223
4013
  |
3224
4014
  |5: // Check for __index if table value is nil.
3225
4015
  | beqz TAB:TMP3, <3 // No metatable: done.
3226
- |. li CARG2, LJ_TNIL
4016
+ |. li SFRETHI, LJ_TNIL
3227
4017
  | lbu TMP0, TAB:TMP3->nomm
3228
4018
  | andi TMP0, TMP0, 1<<MM_index
3229
4019
  | bnez TMP0, <3 // 'no __index' flag set: done.
@@ -3248,12 +4038,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3248
4038
  | sltu AT, TMP0, TMP1
3249
4039
  | beqz AT, ->vmeta_tgetb
3250
4040
  |. addu RC, TMP2, RC
3251
- | lw TMP1, HI(RC)
3252
- | beq TMP1, TISNIL, >5
3253
- |. ldc1 f0, 0(RC)
4041
+ | lw SFRETHI, HI(RC)
4042
+ | beq SFRETHI, TISNIL, >5
4043
+ |. lw SFRETLO, LO(RC)
3254
4044
  |1:
3255
4045
  | ins_next1
3256
- | sdc1 f0, 0(RA)
4046
+ | sw SFRETHI, HI(RA)
4047
+ | sw SFRETLO, LO(RA)
3257
4048
  | ins_next2
3258
4049
  |
3259
4050
  |5: // Check for __index if table value is nil.
@@ -3264,7 +4055,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3264
4055
  | andi TMP1, TMP1, 1<<MM_index
3265
4056
  | bnez TMP1, <1 // 'no __index' flag set: done.
3266
4057
  |. nop
3267
- | b ->vmeta_tgetb // Caveat: preserve TMP0!
4058
+ | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
3268
4059
  |. nop
3269
4060
  break;
3270
4061
  case BC_TGETR:
@@ -3272,23 +4063,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3272
4063
  | decode_RB8a RB, INS
3273
4064
  | decode_RB8b RB
3274
4065
  | decode_RDtoRC8 RC, RD
3275
- | addu CARG2, BASE, RB
3276
- | addu CARG3, BASE, RC
3277
- | lw TAB:CARG1, LO(CARG2)
3278
- | ldc1 f0, 0(CARG3)
3279
- | trunc.w.d f2, f0
3280
- | lw TMP0, TAB:CARG1->asize
3281
- | mfc1 CARG2, f2
4066
+ | addu RB, BASE, RB
4067
+ | addu RC, BASE, RC
4068
+ | lw TAB:CARG1, LO(RB)
4069
+ | lw CARG2, LO(RC)
4070
+ | addu RA, BASE, RA
4071
+ | lw TMP0, TAB:CARG1->asize
3282
4072
  | lw TMP1, TAB:CARG1->array
3283
4073
  | sltu AT, CARG2, TMP0
3284
4074
  | sll TMP2, CARG2, 3
3285
4075
  | beqz AT, ->vmeta_tgetr // In array part?
3286
- |. addu TMP2, TMP1, TMP2
3287
- | ldc1 f0, 0(TMP2)
4076
+ |. addu CRET1, TMP1, TMP2
4077
+ | lw SFARG2HI, HI(CRET1)
4078
+ | lw SFARG2LO, LO(CRET1)
3288
4079
  |->BC_TGETR_Z:
3289
- | addu RA, BASE, RA
3290
4080
  | ins_next1
3291
- | sdc1 f0, 0(RA)
4081
+ | sw SFARG2HI, HI(RA)
4082
+ | sw SFARG2LO, LO(RA)
3292
4083
  | ins_next2
3293
4084
  break;
3294
4085
 
@@ -3303,33 +4094,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3303
4094
  | lw TMP2, HI(CARG3)
3304
4095
  | lw TAB:RB, LO(CARG2)
3305
4096
  | li AT, LJ_TTAB
3306
- | ldc1 f0, 0(CARG3)
3307
4097
  | bne TMP1, AT, ->vmeta_tsetv
3308
4098
  |. addu RA, BASE, RA
3309
- | sltiu AT, TMP2, LJ_TISNUM
3310
- | beqz AT, >5
3311
- |. li AT, LJ_TSTR
3312
- |
3313
- | // Convert number key to integer, check for integerness and range.
3314
- | cvt.w.d f2, f0
3315
- | lw TMP0, TAB:RB->asize
3316
- | mfc1 TMP2, f2
3317
- | cvt.d.w f4, f2
4099
+ | bne TMP2, TISNUM, >5
4100
+ |. lw RC, LO(CARG3)
4101
+ | lw TMP0, TAB:RB->asize
3318
4102
  | lw TMP1, TAB:RB->array
3319
- | c.eq.d f0, f4
3320
- | sltu AT, TMP2, TMP0
3321
- | movf AT, r0
3322
- | sll TMP2, TMP2, 3
4103
+ | sltu AT, RC, TMP0
4104
+ | sll TMP2, RC, 3
3323
4105
  | beqz AT, ->vmeta_tsetv // Integer key and in array part?
3324
4106
  |. addu TMP1, TMP1, TMP2
3325
- | lbu TMP3, TAB:RB->marked
3326
4107
  | lw TMP0, HI(TMP1)
4108
+ | lbu TMP3, TAB:RB->marked
4109
+ | lw SFRETHI, HI(RA)
3327
4110
  | beq TMP0, TISNIL, >3
3328
- |. ldc1 f0, 0(RA)
4111
+ |. lw SFRETLO, LO(RA)
3329
4112
  |1:
3330
- | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3331
- | bnez AT, >7
3332
- |. sdc1 f0, 0(TMP1)
4113
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4114
+ | sw SFRETHI, HI(TMP1)
4115
+ | bnez AT, >7
4116
+ |. sw SFRETLO, LO(TMP1)
3333
4117
  |2:
3334
4118
  | ins_next
3335
4119
  |
@@ -3345,8 +4129,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3345
4129
  |. nop
3346
4130
  |
3347
4131
  |5:
4132
+ | li AT, LJ_TSTR
3348
4133
  | bne TMP2, AT, ->vmeta_tsetv
3349
- |. lw STR:RC, LO(CARG3)
4134
+ |. nop
3350
4135
  | b ->BC_TSETS_Z // String key?
3351
4136
  |. nop
3352
4137
  |
@@ -3378,7 +4163,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3378
4163
  | sll TMP1, TMP1, 3
3379
4164
  | subu TMP1, TMP0, TMP1
3380
4165
  | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4166
+ |.if FPU
3381
4167
  | ldc1 f20, 0(RA)
4168
+ |.else
4169
+ | lw SFRETHI, HI(RA)
4170
+ | lw SFRETLO, LO(RA)
4171
+ |.endif
3382
4172
  |1:
3383
4173
  | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3384
4174
  | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3392,8 +4182,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3392
4182
  |. lw TAB:TMP0, TAB:RB->metatable
3393
4183
  |2:
3394
4184
  | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4185
+ |.if FPU
3395
4186
  | bnez AT, >7
3396
4187
  |. sdc1 f20, NODE:TMP2->val
4188
+ |.else
4189
+ | sw SFRETHI, NODE:TMP2->val.u32.hi
4190
+ | bnez AT, >7
4191
+ |. sw SFRETLO, NODE:TMP2->val.u32.lo
4192
+ |.endif
3397
4193
  |3:
3398
4194
  | ins_next
3399
4195
  |
@@ -3431,8 +4227,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3431
4227
  |. move CARG1, L
3432
4228
  | // Returns TValue *.
3433
4229
  | lw BASE, L->base
4230
+ |.if FPU
3434
4231
  | b <3 // No 2nd write barrier needed.
3435
4232
  |. sdc1 f20, 0(CRET1)
4233
+ |.else
4234
+ | lw SFARG1HI, HI(RA)
4235
+ | lw SFARG1LO, LO(RA)
4236
+ | sw SFARG1HI, HI(CRET1)
4237
+ | b <3 // No 2nd write barrier needed.
4238
+ |. sw SFARG1LO, LO(CRET1)
4239
+ |.endif
3436
4240
  |
3437
4241
  |7: // Possible table write barrier for the value. Skip valiswhite check.
3438
4242
  | barrierback TAB:RB, TMP3, TMP0, <3
@@ -3457,11 +4261,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3457
4261
  | lw TMP1, HI(RC)
3458
4262
  | lbu TMP3, TAB:RB->marked
3459
4263
  | beq TMP1, TISNIL, >5
3460
- |. ldc1 f0, 0(RA)
3461
4264
  |1:
4265
+ |. lw SFRETHI, HI(RA)
4266
+ | lw SFRETLO, LO(RA)
3462
4267
  | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4268
+ | sw SFRETHI, HI(RC)
3463
4269
  | bnez AT, >7
3464
- |. sdc1 f0, 0(RC)
4270
+ |. sw SFRETLO, LO(RC)
3465
4271
  |2:
3466
4272
  | ins_next
3467
4273
  |
@@ -3473,7 +4279,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3473
4279
  | andi TMP1, TMP1, 1<<MM_newindex
3474
4280
  | bnez TMP1, <1 // 'no __newindex' flag set: done.
3475
4281
  |. nop
3476
- | b ->vmeta_tsetb // Caveat: preserve TMP0!
4282
+ | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
3477
4283
  |. nop
3478
4284
  |
3479
4285
  |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -3486,13 +4292,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3486
4292
  | decode_RDtoRC8 RC, RD
3487
4293
  | addu CARG1, BASE, RB
3488
4294
  | addu CARG3, BASE, RC
3489
- | lw TAB:CARG2, LO(CARG1)
3490
- | ldc1 f0, 0(CARG3)
3491
- | trunc.w.d f2, f0
3492
- | lbu TMP3, TAB:CARG2->marked
4295
+ | lw TAB:CARG2, LO(CARG1)
4296
+ | lw CARG3, LO(CARG3)
4297
+ | lbu TMP3, TAB:CARG2->marked
3493
4298
  | lw TMP0, TAB:CARG2->asize
3494
- | mfc1 CARG3, f2
3495
- | lw TMP1, TAB:CARG2->array
4299
+ | lw TMP1, TAB:CARG2->array
3496
4300
  | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3497
4301
  | bnez AT, >7
3498
4302
  |. addu RA, BASE, RA
@@ -3500,18 +4304,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3500
4304
  | sltu AT, CARG3, TMP0
3501
4305
  | sll TMP2, CARG3, 3
3502
4306
  | beqz AT, ->vmeta_tsetr // In array part?
3503
- |. ldc1 f20, 0(RA)
3504
- | addu CRET1, TMP1, TMP2
4307
+ |. addu CRET1, TMP1, TMP2
3505
4308
  |->BC_TSETR_Z:
4309
+ | lw SFARG1HI, HI(RA)
4310
+ | lw SFARG1LO, LO(RA)
3506
4311
  | ins_next1
3507
- | sdc1 f20, 0(CRET1)
4312
+ | sw SFARG1HI, HI(CRET1)
4313
+ | sw SFARG1LO, LO(CRET1)
3508
4314
  | ins_next2
3509
4315
  |
3510
4316
  |7: // Possible table write barrier for the value. Skip valiswhite check.
3511
4317
  | barrierback TAB:RB, TMP3, TMP0, <2
3512
4318
  break;
3513
4319
 
3514
-
3515
4320
  case BC_TSETM:
3516
4321
  | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
3517
4322
  | addu RA, BASE, RA
@@ -3533,10 +4338,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3533
4338
  | addu TMP1, TMP1, CARG1
3534
4339
  | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
3535
4340
  |3: // Copy result slots to table.
3536
- | ldc1 f0, 0(RA)
4341
+ | lw SFRETHI, HI(RA)
4342
+ | lw SFRETLO, LO(RA)
3537
4343
  | addiu RA, RA, 8
3538
4344
  | sltu AT, RA, TMP2
3539
- | sdc1 f0, 0(TMP1)
4345
+ | sw SFRETHI, HI(TMP1)
4346
+ | sw SFRETLO, LO(TMP1)
3540
4347
  | bnez AT, <3
3541
4348
  |. addiu TMP1, TMP1, 8
3542
4349
  | bnez TMP0, >7
@@ -3611,10 +4418,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3611
4418
  | beqz NARGS8:RC, >3
3612
4419
  |. move TMP3, NARGS8:RC
3613
4420
  |2:
3614
- | ldc1 f0, 0(RA)
4421
+ | lw SFRETHI, HI(RA)
4422
+ | lw SFRETLO, LO(RA)
3615
4423
  | addiu RA, RA, 8
3616
4424
  | addiu TMP3, TMP3, -8
3617
- | sdc1 f0, 0(TMP2)
4425
+ | sw SFRETHI, HI(TMP2)
4426
+ | sw SFRETLO, LO(TMP2)
3618
4427
  | bnez TMP3, <2
3619
4428
  |. addiu TMP2, TMP2, 8
3620
4429
  |3:
@@ -3651,12 +4460,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3651
4460
  | li AT, LJ_TFUNC
3652
4461
  | lw TMP1, -24+HI(BASE)
3653
4462
  | lw LFUNC:RB, -24+LO(BASE)
3654
- | ldc1 f2, -8(BASE)
3655
- | ldc1 f0, -16(BASE)
4463
+ | lw SFARG1HI, -16+HI(BASE)
4464
+ | lw SFARG1LO, -16+LO(BASE)
4465
+ | lw SFARG2HI, -8+HI(BASE)
4466
+ | lw SFARG2LO, -8+LO(BASE)
3656
4467
  | sw TMP1, HI(BASE) // Copy callable.
3657
4468
  | sw LFUNC:RB, LO(BASE)
3658
- | sdc1 f2, 16(BASE) // Copy control var.
3659
- | sdc1 f0, 8(BASE) // Copy state.
4469
+ | sw SFARG1HI, 8+HI(BASE) // Copy state.
4470
+ | sw SFARG1LO, 8+LO(BASE)
4471
+ | sw SFARG2HI, 16+HI(BASE) // Copy control var.
4472
+ | sw SFARG2LO, 16+LO(BASE)
3660
4473
  | addiu BASE, BASE, 8
3661
4474
  | bne TMP1, AT, ->vmeta_call
3662
4475
  |. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3679,20 +4492,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3679
4492
  | beqz AT, >5 // Index points after array part?
3680
4493
  |. sll TMP3, RC, 3
3681
4494
  | addu TMP3, TMP1, TMP3
3682
- | lw TMP2, HI(TMP3)
3683
- | ldc1 f0, 0(TMP3)
3684
- | mtc1 RC, f2
4495
+ | lw SFARG1HI, HI(TMP3)
4496
+ | lw SFARG1LO, LO(TMP3)
3685
4497
  | lhu RD, -4+OFS_RD(PC)
3686
- | beq TMP2, TISNIL, <1 // Skip holes in array part.
4498
+ | sw TISNUM, HI(RA)
4499
+ | sw RC, LO(RA)
4500
+ | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
3687
4501
  |. addiu RC, RC, 1
3688
- | cvt.d.w f2, f2
4502
+ | sw SFARG1HI, 8+HI(RA)
4503
+ | sw SFARG1LO, 8+LO(RA)
3689
4504
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3690
- | sdc1 f0, 8(RA)
3691
4505
  | decode_RD4b RD
3692
4506
  | addu RD, RD, TMP3
3693
4507
  | sw RC, -8+LO(RA) // Update control var.
3694
4508
  | addu PC, PC, RD
3695
- | sdc1 f2, 0(RA)
3696
4509
  |3:
3697
4510
  | ins_next
3698
4511
  |
@@ -3707,18 +4520,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3707
4520
  | sll RB, RC, 3
3708
4521
  | subu TMP3, TMP3, RB
3709
4522
  | addu NODE:TMP3, TMP3, TMP2
3710
- | lw RB, HI(NODE:TMP3)
3711
- | ldc1 f0, 0(NODE:TMP3)
4523
+ | lw SFARG1HI, NODE:TMP3->val.u32.hi
4524
+ | lw SFARG1LO, NODE:TMP3->val.u32.lo
3712
4525
  | lhu RD, -4+OFS_RD(PC)
3713
- | beq RB, TISNIL, <6 // Skip holes in hash part.
4526
+ | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
3714
4527
  |. addiu RC, RC, 1
3715
- | ldc1 f2, NODE:TMP3->key
4528
+ | lw SFARG2HI, NODE:TMP3->key.u32.hi
4529
+ | lw SFARG2LO, NODE:TMP3->key.u32.lo
3716
4530
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3717
- | sdc1 f0, 8(RA)
4531
+ | sw SFARG1HI, 8+HI(RA)
4532
+ | sw SFARG1LO, 8+LO(RA)
3718
4533
  | addu RC, RC, TMP0
3719
4534
  | decode_RD4b RD
3720
4535
  | addu RD, RD, TMP3
3721
- | sdc1 f2, 0(RA)
4536
+ | sw SFARG2HI, HI(RA)
4537
+ | sw SFARG2LO, LO(RA)
3722
4538
  | addu PC, PC, RD
3723
4539
  | b <3
3724
4540
  |. sw RC, -8+LO(RA) // Update control var.
@@ -3798,9 +4614,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3798
4614
  | bnez AT, >7
3799
4615
  |. addiu MULTRES, TMP1, 8
3800
4616
  |6:
3801
- | ldc1 f0, 0(RC)
4617
+ | lw SFRETHI, HI(RC)
4618
+ | lw SFRETLO, LO(RC)
3802
4619
  | addiu RC, RC, 8
3803
- | sdc1 f0, 0(RA)
4620
+ | sw SFRETHI, HI(RA)
4621
+ | sw SFRETLO, LO(RA)
3804
4622
  | sltu AT, RC, TMP3
3805
4623
  | bnez AT, <6 // More vararg slots?
3806
4624
  |. addiu RA, RA, 8
@@ -3856,10 +4674,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3856
4674
  | beqz RC, >3
3857
4675
  |. subu BASE, TMP2, TMP0
3858
4676
  |2:
3859
- | ldc1 f0, 0(RA)
4677
+ | lw SFRETHI, HI(RA)
4678
+ | lw SFRETLO, LO(RA)
3860
4679
  | addiu RA, RA, 8
3861
4680
  | addiu RC, RC, -8
3862
- | sdc1 f0, 0(TMP2)
4681
+ | sw SFRETHI, HI(TMP2)
4682
+ | sw SFRETLO, LO(TMP2)
3863
4683
  | bnez RC, <2
3864
4684
  |. addiu TMP2, TMP2, 8
3865
4685
  |3:
@@ -3900,14 +4720,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3900
4720
  | lw INS, -4(PC)
3901
4721
  | addiu TMP2, BASE, -8
3902
4722
  if (op == BC_RET1) {
3903
- | ldc1 f0, 0(RA)
4723
+ | lw SFRETHI, HI(RA)
4724
+ | lw SFRETLO, LO(RA)
3904
4725
  }
3905
4726
  | decode_RB8a RB, INS
3906
4727
  | decode_RA8a RA, INS
3907
4728
  | decode_RB8b RB
3908
4729
  | decode_RA8b RA
3909
4730
  if (op == BC_RET1) {
3910
- | sdc1 f0, 0(TMP2)
4731
+ | sw SFRETHI, HI(TMP2)
4732
+ | sw SFRETLO, LO(TMP2)
3911
4733
  }
3912
4734
  | subu BASE, TMP2, RA
3913
4735
  |5:
@@ -3949,69 +4771,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3949
4771
  | // RA = base*8, RD = target (after end of loop or start of loop)
3950
4772
  vk = (op == BC_IFORL || op == BC_JFORL);
3951
4773
  | addu RA, BASE, RA
3952
- if (vk) {
3953
- | ldc1 f0, FORL_IDX*8(RA)
3954
- | ldc1 f4, FORL_STEP*8(RA)
3955
- | ldc1 f2, FORL_STOP*8(RA)
3956
- | lw TMP3, FORL_STEP*8+HI(RA)
3957
- | add.d f0, f0, f4
3958
- | sdc1 f0, FORL_IDX*8(RA)
3959
- } else {
3960
- | lw TMP1, FORL_IDX*8+HI(RA)
3961
- | lw TMP3, FORL_STEP*8+HI(RA)
3962
- | lw TMP2, FORL_STOP*8+HI(RA)
3963
- | sltiu TMP1, TMP1, LJ_TISNUM
3964
- | sltiu TMP0, TMP3, LJ_TISNUM
3965
- | sltiu TMP2, TMP2, LJ_TISNUM
3966
- | and TMP1, TMP1, TMP0
3967
- | and TMP1, TMP1, TMP2
3968
- | ldc1 f0, FORL_IDX*8(RA)
3969
- | beqz TMP1, ->vmeta_for
3970
- |. ldc1 f2, FORL_STOP*8(RA)
3971
- }
4774
+ | lw SFARG1HI, FORL_IDX*8+HI(RA)
4775
+ | lw SFARG1LO, FORL_IDX*8+LO(RA)
3972
4776
  if (op != BC_JFORL) {
3973
4777
  | srl RD, RD, 1
3974
- | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535)
4778
+ | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4779
+ | addu TMP2, RD, TMP2
4780
+ }
4781
+ if (!vk) {
4782
+ | lw SFARG2HI, FORL_STOP*8+HI(RA)
4783
+ | lw SFARG2LO, FORL_STOP*8+LO(RA)
4784
+ | bne SFARG1HI, TISNUM, >5
4785
+ |. lw SFRETHI, FORL_STEP*8+HI(RA)
4786
+ | xor AT, SFARG2HI, TISNUM
4787
+ | lw SFRETLO, FORL_STEP*8+LO(RA)
4788
+ | xor TMP0, SFRETHI, TISNUM
4789
+ | or AT, AT, TMP0
4790
+ | bnez AT, ->vmeta_for
4791
+ |. slt AT, SFRETLO, r0
4792
+ | slt CRET1, SFARG2LO, SFARG1LO
4793
+ | slt TMP1, SFARG1LO, SFARG2LO
4794
+ | movn CRET1, TMP1, AT
4795
+ } else {
4796
+ | bne SFARG1HI, TISNUM, >5
4797
+ |. lw SFARG2LO, FORL_STEP*8+LO(RA)
4798
+ | lw SFRETLO, FORL_STOP*8+LO(RA)
4799
+ | move TMP3, SFARG1LO
4800
+ | addu SFARG1LO, SFARG1LO, SFARG2LO
4801
+ | xor TMP0, SFARG1LO, TMP3
4802
+ | xor TMP1, SFARG1LO, SFARG2LO
4803
+ | and TMP0, TMP0, TMP1
4804
+ | slt TMP1, SFARG1LO, SFRETLO
4805
+ | slt CRET1, SFRETLO, SFARG1LO
4806
+ | slt AT, SFARG2LO, r0
4807
+ | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
4808
+ | movn CRET1, TMP1, AT
4809
+ | or CRET1, CRET1, TMP0
4810
+ }
4811
+ |1:
4812
+ if (op == BC_FORI) {
4813
+ | movz TMP2, r0, CRET1
4814
+ | addu PC, PC, TMP2
4815
+ } else if (op == BC_JFORI) {
4816
+ | addu PC, PC, TMP2
4817
+ | lhu RD, -4+OFS_RD(PC)
4818
+ } else if (op == BC_IFORL) {
4819
+ | movn TMP2, r0, CRET1
4820
+ | addu PC, PC, TMP2
4821
+ }
4822
+ if (vk) {
4823
+ | sw SFARG1HI, FORL_IDX*8+HI(RA)
4824
+ | sw SFARG1LO, FORL_IDX*8+LO(RA)
3975
4825
  }
3976
- | c.le.d 0, f0, f2
3977
- | c.le.d 1, f2, f0
3978
- | sdc1 f0, FORL_EXT*8(RA)
4826
+ | ins_next1
4827
+ | sw SFARG1HI, FORL_EXT*8+HI(RA)
4828
+ | sw SFARG1LO, FORL_EXT*8+LO(RA)
4829
+ |2:
3979
4830
  if (op == BC_JFORI) {
3980
- | li TMP1, 1
3981
- | li TMP2, 1
3982
- | addu TMP0, RD, TMP0
3983
- | slt TMP3, TMP3, r0
3984
- | movf TMP1, r0, 0
3985
- | addu PC, PC, TMP0
3986
- | movf TMP2, r0, 1
3987
- | lhu RD, -4+OFS_RD(PC)
3988
- | movn TMP1, TMP2, TMP3
3989
- | bnez TMP1, =>BC_JLOOP
4831
+ | beqz CRET1, =>BC_JLOOP
3990
4832
  |. decode_RD8b RD
3991
4833
  } else if (op == BC_JFORL) {
3992
- | li TMP1, 1
3993
- | li TMP2, 1
3994
- | slt TMP3, TMP3, r0
3995
- | movf TMP1, r0, 0
3996
- | movf TMP2, r0, 1
3997
- | movn TMP1, TMP2, TMP3
3998
- | bnez TMP1, =>BC_JLOOP
4834
+ | beqz CRET1, =>BC_JLOOP
4835
+ }
4836
+ | ins_next2
4837
+ |
4838
+ |5: // FP loop.
4839
+ |.if FPU
4840
+ if (!vk) {
4841
+ | ldc1 f0, FORL_IDX*8(RA)
4842
+ | ldc1 f2, FORL_STOP*8(RA)
4843
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
4844
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
4845
+ | sltiu AT, SFRETHI, LJ_TISNUM
4846
+ | and TMP0, TMP0, TMP1
4847
+ | and AT, AT, TMP0
4848
+ | beqz AT, ->vmeta_for
4849
+ |. slt TMP3, SFRETHI, r0
4850
+ | c.ole.d 0, f0, f2
4851
+ | c.ole.d 1, f2, f0
4852
+ | li CRET1, 1
4853
+ | movt CRET1, r0, 0
4854
+ | movt AT, r0, 1
4855
+ | b <1
4856
+ |. movn CRET1, AT, TMP3
4857
+ } else {
4858
+ | ldc1 f0, FORL_IDX*8(RA)
4859
+ | ldc1 f4, FORL_STEP*8(RA)
4860
+ | ldc1 f2, FORL_STOP*8(RA)
4861
+ | lw SFARG2HI, FORL_STEP*8+HI(RA)
4862
+ | add.d f0, f0, f4
4863
+ | c.ole.d 0, f0, f2
4864
+ | c.ole.d 1, f2, f0
4865
+ | slt TMP3, SFARG2HI, r0
4866
+ | li CRET1, 1
4867
+ | li AT, 1
4868
+ | movt CRET1, r0, 0
4869
+ | movt AT, r0, 1
4870
+ | movn CRET1, AT, TMP3
4871
+ if (op == BC_IFORL) {
4872
+ | movn TMP2, r0, CRET1
4873
+ | addu PC, PC, TMP2
4874
+ }
4875
+ | sdc1 f0, FORL_IDX*8(RA)
4876
+ | ins_next1
4877
+ | b <2
4878
+ |. sdc1 f0, FORL_EXT*8(RA)
4879
+ }
4880
+ |.else
4881
+ if (!vk) {
4882
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
4883
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
4884
+ | sltiu AT, SFRETHI, LJ_TISNUM
4885
+ | and TMP0, TMP0, TMP1
4886
+ | and AT, AT, TMP0
4887
+ | beqz AT, ->vmeta_for
4888
+ |. nop
4889
+ | bal ->vm_sfcmpolex
4890
+ |. move TMP3, SFRETHI
4891
+ | b <1
3999
4892
  |. nop
4000
4893
  } else {
4001
- | addu TMP1, RD, TMP0
4002
- | slt TMP3, TMP3, r0
4003
- | move TMP2, TMP1
4004
- if (op == BC_FORI) {
4005
- | movt TMP1, r0, 0
4006
- | movt TMP2, r0, 1
4894
+ | lw SFARG2HI, FORL_STEP*8+HI(RA)
4895
+ | load_got __adddf3
4896
+ | call_extern
4897
+ |. sw TMP2, ARG5
4898
+ | lw SFARG2HI, FORL_STOP*8+HI(RA)
4899
+ | lw SFARG2LO, FORL_STOP*8+LO(RA)
4900
+ | move SFARG1HI, SFRETHI
4901
+ | move SFARG1LO, SFRETLO
4902
+ | bal ->vm_sfcmpolex
4903
+ |. lw TMP3, FORL_STEP*8+HI(RA)
4904
+ if ( op == BC_JFORL ) {
4905
+ | lhu RD, -4+OFS_RD(PC)
4906
+ | lw TMP2, ARG5
4907
+ | b <1
4908
+ |. decode_RD8b RD
4007
4909
  } else {
4008
- | movf TMP1, r0, 0
4009
- | movf TMP2, r0, 1
4910
+ | b <1
4911
+ |. lw TMP2, ARG5
4010
4912
  }
4011
- | movn TMP1, TMP2, TMP3
4012
- | addu PC, PC, TMP1
4013
4913
  }
4014
- | ins_next
4914
+ |.endif
4015
4915
  break;
4016
4916
 
4017
4917
  case BC_ITERL:
@@ -4260,8 +5160,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4260
5160
  fcofs, CFRAME_SIZE);
4261
5161
  for (i = 23; i >= 16; i--)
4262
5162
  fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5163
+ #if !LJ_SOFTFP
4263
5164
  for (i = 30; i >= 20; i -= 2)
4264
5165
  fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5166
+ #endif
4265
5167
  fprintf(ctx->fp,
4266
5168
  "\t.align 2\n"
4267
5169
  ".LEFDE0:\n\n");
@@ -4279,6 +5181,7 @@ static void emit_asm_debug(BuildCtx *ctx)
4279
5181
  "\t.align 2\n"
4280
5182
  ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4281
5183
  #endif
5184
+ #if !LJ_NO_UNWIND
4282
5185
  fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
4283
5186
  fprintf(ctx->fp,
4284
5187
  "\t.globl lj_err_unwind_dwarf\n"
@@ -4312,8 +5215,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4312
5215
  fcofs, CFRAME_SIZE);
4313
5216
  for (i = 23; i >= 16; i--)
4314
5217
  fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5218
+ #if !LJ_SOFTFP
4315
5219
  for (i = 30; i >= 20; i -= 2)
4316
5220
  fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5221
+ #endif
4317
5222
  fprintf(ctx->fp,
4318
5223
  "\t.align 2\n"
4319
5224
  ".LEFDE2:\n\n");
@@ -4346,6 +5251,7 @@ static void emit_asm_debug(BuildCtx *ctx)
4346
5251
  "\t.byte 0xd\n\t.uleb128 0x10\n"
4347
5252
  "\t.align 2\n"
4348
5253
  ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
5254
+ #endif
4349
5255
  #endif
4350
5256
  break;
4351
5257
  default: