immunio 1.1.2 → 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. checksums.yaml +4 -4
  2. data/lib/immunio/version.rb +1 -1
  3. data/lua-hooks/Makefile +56 -109
  4. data/lua-hooks/ext/all.c +3 -14
  5. data/lua-hooks/ext/libinjection/module.mk +5 -0
  6. data/lua-hooks/ext/lpeg/module.mk +6 -0
  7. data/lua-hooks/ext/lua-cmsgpack/module.mk +2 -0
  8. data/lua-hooks/ext/lua-snapshot/module.mk +2 -0
  9. data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
  10. data/lua-hooks/ext/luajit/Makefile +2 -2
  11. data/lua-hooks/ext/luajit/README +2 -2
  12. data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
  13. data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
  14. data/lua-hooks/ext/luajit/doc/changes.html +15 -2
  15. data/lua-hooks/ext/luajit/doc/contact.html +3 -3
  16. data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
  17. data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
  18. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
  19. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +4 -2
  20. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
  21. data/lua-hooks/ext/luajit/doc/ext_jit.html +2 -2
  22. data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
  23. data/lua-hooks/ext/luajit/doc/extensions.html +9 -2
  24. data/lua-hooks/ext/luajit/doc/faq.html +2 -2
  25. data/lua-hooks/ext/luajit/doc/install.html +22 -18
  26. data/lua-hooks/ext/luajit/doc/luajit.html +3 -3
  27. data/lua-hooks/ext/luajit/doc/running.html +2 -2
  28. data/lua-hooks/ext/luajit/doc/status.html +2 -2
  29. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
  30. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +4 -4
  31. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
  32. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +4 -4
  33. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +1 -1
  34. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +4 -4
  35. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
  36. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +4 -4
  37. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +3 -3
  38. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
  39. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +34 -7
  40. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +427 -102
  41. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +5 -5
  42. data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
  43. data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
  44. data/lua-hooks/ext/luajit/src/Makefile +36 -21
  45. data/lua-hooks/ext/luajit/src/Makefile.dep +3 -1
  46. data/lua-hooks/ext/luajit/src/host/buildvm.c +1 -1
  47. data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
  48. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +10 -1
  49. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +1 -1
  50. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
  51. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +1 -1
  52. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
  53. data/lua-hooks/ext/luajit/src/host/genminilua.lua +1 -1
  54. data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
  55. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +2 -2
  56. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +1 -1
  57. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +1 -1
  58. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
  59. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +1 -1
  60. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
  61. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +163 -73
  62. data/lua-hooks/ext/luajit/src/jit/dump.lua +2 -1
  63. data/lua-hooks/ext/luajit/src/jit/p.lua +1 -1
  64. data/lua-hooks/ext/luajit/src/jit/v.lua +1 -1
  65. data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
  66. data/lua-hooks/ext/luajit/src/lib_aux.c +1 -1
  67. data/lua-hooks/ext/luajit/src/lib_base.c +4 -5
  68. data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
  69. data/lua-hooks/ext/luajit/src/lib_debug.c +1 -1
  70. data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -5
  71. data/lua-hooks/ext/luajit/src/lib_init.c +1 -1
  72. data/lua-hooks/ext/luajit/src/lib_io.c +2 -3
  73. data/lua-hooks/ext/luajit/src/lib_jit.c +1 -1
  74. data/lua-hooks/ext/luajit/src/lib_math.c +1 -1
  75. data/lua-hooks/ext/luajit/src/lib_os.c +2 -2
  76. data/lua-hooks/ext/luajit/src/lib_package.c +1 -1
  77. data/lua-hooks/ext/luajit/src/lib_string.c +1 -1
  78. data/lua-hooks/ext/luajit/src/lib_table.c +1 -1
  79. data/lua-hooks/ext/luajit/src/lj.supp +15 -0
  80. data/lua-hooks/ext/luajit/src/lj_alloc.c +1 -1
  81. data/lua-hooks/ext/luajit/src/lj_api.c +4 -1
  82. data/lua-hooks/ext/luajit/src/lj_arch.h +33 -7
  83. data/lua-hooks/ext/luajit/src/lj_asm.c +12 -5
  84. data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
  85. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +3 -13
  86. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +337 -71
  87. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +2 -2
  88. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +2 -2
  89. data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
  90. data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
  91. data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
  92. data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
  93. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
  94. data/lua-hooks/ext/luajit/src/lj_buf.c +2 -4
  95. data/lua-hooks/ext/luajit/src/lj_buf.h +1 -3
  96. data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
  97. data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
  98. data/lua-hooks/ext/luajit/src/lj_ccall.c +37 -14
  99. data/lua-hooks/ext/luajit/src/lj_ccall.h +3 -3
  100. data/lua-hooks/ext/luajit/src/lj_ccallback.c +16 -7
  101. data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
  102. data/lua-hooks/ext/luajit/src/lj_cconv.c +1 -1
  103. data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
  104. data/lua-hooks/ext/luajit/src/lj_cdata.c +10 -1
  105. data/lua-hooks/ext/luajit/src/lj_cdata.h +3 -1
  106. data/lua-hooks/ext/luajit/src/lj_clib.c +1 -1
  107. data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
  108. data/lua-hooks/ext/luajit/src/lj_cparse.c +27 -6
  109. data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
  110. data/lua-hooks/ext/luajit/src/lj_crecord.c +1 -1
  111. data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
  112. data/lua-hooks/ext/luajit/src/lj_ctype.c +10 -8
  113. data/lua-hooks/ext/luajit/src/lj_ctype.h +1 -1
  114. data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
  115. data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
  116. data/lua-hooks/ext/luajit/src/lj_def.h +1 -1
  117. data/lua-hooks/ext/luajit/src/lj_dispatch.c +1 -1
  118. data/lua-hooks/ext/luajit/src/lj_dispatch.h +21 -4
  119. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +1 -1
  120. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +7 -5
  121. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +1 -1
  122. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +1 -1
  123. data/lua-hooks/ext/luajit/src/lj_err.c +69 -31
  124. data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
  125. data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
  126. data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
  127. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +10 -40
  128. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
  129. data/lua-hooks/ext/luajit/src/lj_frame.h +12 -1
  130. data/lua-hooks/ext/luajit/src/lj_func.c +1 -1
  131. data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
  132. data/lua-hooks/ext/luajit/src/lj_gc.c +2 -2
  133. data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
  134. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +1 -1
  135. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
  136. data/lua-hooks/ext/luajit/src/lj_ir.c +31 -15
  137. data/lua-hooks/ext/luajit/src/lj_ir.h +1 -1
  138. data/lua-hooks/ext/luajit/src/lj_ircall.h +29 -1
  139. data/lua-hooks/ext/luajit/src/lj_iropt.h +2 -1
  140. data/lua-hooks/ext/luajit/src/lj_jit.h +2 -1
  141. data/lua-hooks/ext/luajit/src/lj_lex.c +28 -1
  142. data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
  143. data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
  144. data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
  145. data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
  146. data/lua-hooks/ext/luajit/src/lj_mcode.c +1 -1
  147. data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
  148. data/lua-hooks/ext/luajit/src/lj_meta.c +8 -8
  149. data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
  150. data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
  151. data/lua-hooks/ext/luajit/src/lj_obj.h +1 -1
  152. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
  153. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +1 -1
  154. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
  155. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +1 -1
  156. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +1 -1
  157. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +1 -1
  158. data/lua-hooks/ext/luajit/src/lj_opt_split.c +10 -5
  159. data/lua-hooks/ext/luajit/src/lj_parse.c +1 -1
  160. data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
  161. data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
  162. data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
  163. data/lua-hooks/ext/luajit/src/lj_record.c +13 -5
  164. data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
  165. data/lua-hooks/ext/luajit/src/lj_snap.c +20 -23
  166. data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
  167. data/lua-hooks/ext/luajit/src/lj_state.c +1 -1
  168. data/lua-hooks/ext/luajit/src/lj_state.h +1 -1
  169. data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
  170. data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
  171. data/lua-hooks/ext/luajit/src/lj_strfmt.c +12 -98
  172. data/lua-hooks/ext/luajit/src/lj_strfmt.h +4 -4
  173. data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +591 -0
  174. data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
  175. data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
  176. data/lua-hooks/ext/luajit/src/lj_tab.c +1 -1
  177. data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
  178. data/lua-hooks/ext/luajit/src/lj_target.h +1 -1
  179. data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
  180. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +1 -1
  181. data/lua-hooks/ext/luajit/src/lj_target_mips.h +30 -2
  182. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
  183. data/lua-hooks/ext/luajit/src/lj_target_x86.h +1 -1
  184. data/lua-hooks/ext/luajit/src/lj_trace.c +7 -2
  185. data/lua-hooks/ext/luajit/src/lj_trace.h +1 -1
  186. data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -3
  187. data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
  188. data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
  189. data/lua-hooks/ext/luajit/src/lj_vm.h +5 -3
  190. data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
  191. data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
  192. data/lua-hooks/ext/luajit/src/lj_vmmath.c +15 -15
  193. data/lua-hooks/ext/luajit/src/ljamalg.c +2 -1
  194. data/lua-hooks/ext/luajit/src/lua.h +1 -0
  195. data/lua-hooks/ext/luajit/src/luaconf.h +2 -2
  196. data/lua-hooks/ext/luajit/src/luajit.c +1 -1
  197. data/lua-hooks/ext/luajit/src/luajit.h +4 -4
  198. data/lua-hooks/ext/luajit/src/lualib.h +1 -1
  199. data/lua-hooks/ext/luajit/src/msvcbuild.bat +1 -1
  200. data/lua-hooks/ext/luajit/src/ps4build.bat +26 -6
  201. data/lua-hooks/ext/luajit/src/vm_arm.dasc +17 -9
  202. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +1 -1
  203. data/lua-hooks/ext/luajit/src/vm_mips.dasc +1562 -656
  204. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +3 -7
  205. data/lua-hooks/ext/luajit/src/vm_x64.dasc +10 -2
  206. data/lua-hooks/ext/luajit/src/vm_x86.dasc +5 -8
  207. data/lua-hooks/ext/luautf8/module.mk +2 -0
  208. data/lua-hooks/ext/module.mk +15 -0
  209. data/lua-hooks/ext/modules.h +17 -0
  210. data/lua-hooks/ext/perf/luacpu.c +1 -1
  211. data/lua-hooks/ext/perf/lualoadavg.c +1 -1
  212. data/lua-hooks/ext/perf/luameminfo.c +1 -1
  213. data/lua-hooks/ext/perf/luaoslib.c +124 -2
  214. data/lua-hooks/ext/perf/module.mk +5 -0
  215. data/lua-hooks/ext/sha1/luasha1.c +4 -2
  216. data/lua-hooks/ext/sha1/module.mk +5 -0
  217. data/lua-hooks/ext/sha2/luasha256.c +4 -2
  218. data/lua-hooks/ext/sha2/module.mk +5 -0
  219. data/lua-hooks/ext/sysutils/lua_utils.c +56 -0
  220. data/lua-hooks/ext/sysutils/module.mk +2 -0
  221. data/lua-hooks/lib/boot.lua +2 -1
  222. data/lua-hooks/lib/hooks/module.mk +31 -0
  223. data/lua-hooks/lib/hooks/xss/module.mk +4 -0
  224. data/lua-hooks/lib/lexers/module.mk +10 -0
  225. data/lua-hooks/lib/module.mk +38 -0
  226. data/lua-hooks/lib/schema/module.mk +3 -0
  227. data/lua-hooks/options.mk +59 -0
  228. metadata +21 -2
@@ -1,5 +1,5 @@
1
1
  @rem Script to build LuaJIT with MSVC.
2
- @rem Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
2
+ @rem Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
3
3
  @rem
4
4
  @rem Either open a "Visual Studio .NET Command Prompt"
5
5
  @rem (Note that the Express Edition does not contain an x64 compiler)
@@ -2,7 +2,19 @@
2
2
  @rem Donated to the public domain.
3
3
  @rem
4
4
  @rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
5
+ @rem or "VS2015 x64 Native Tools Command Prompt".
6
+ @rem
5
7
  @rem Then cd to this directory and run this script.
8
+ @rem
9
+ @rem Recommended invocation:
10
+ @rem
11
+ @rem ps4build release build, amalgamated, 64-bit GC
12
+ @rem ps4build debug debug build, amalgamated, 64-bit GC
13
+ @rem
14
+ @rem Additional command-line options (not generally recommended):
15
+ @rem
16
+ @rem gc32 (before debug) 32-bit GC
17
+ @rem noamalg (after debug) non-amalgamated build
6
18
 
7
19
  @if not defined INCLUDE goto :FAIL
8
20
  @if not defined SCE_ORBIS_SDK_DIR goto :FAIL
@@ -15,6 +27,14 @@
15
27
  @set DASMDIR=..\dynasm
16
28
  @set DASM=%DASMDIR%\dynasm.lua
17
29
  @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
30
+ @set GC64=-DLUAJIT_ENABLE_GC64
31
+ @set DASC=vm_x64.dasc
32
+
33
+ @if "%1" neq "gc32" goto :NOGC32
34
+ @shift
35
+ @set GC64=
36
+ @set DASC=vm_x86.dasc
37
+ :NOGC32
18
38
 
19
39
  %LJCOMPILE% host\minilua.c
20
40
  @if errorlevel 1 goto :BAD
@@ -28,10 +48,10 @@ if exist minilua.exe.manifest^
28
48
  @if not errorlevel 8 goto :FAIL
29
49
 
30
50
  @set DASMFLAGS=-D P64 -D NO_UNWIND
31
- minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
51
+ minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
32
52
  @if errorlevel 1 goto :BAD
33
53
 
34
- %LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
54
+ %LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
35
55
  @if errorlevel 1 goto :BAD
36
56
  %LJLINK% /out:buildvm.exe buildvm*.obj
37
57
  @if errorlevel 1 goto :BAD
@@ -54,7 +74,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
54
74
  @if errorlevel 1 goto :BAD
55
75
 
56
76
  @rem ---- Cross compiler ----
57
- @set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI
77
+ @set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
58
78
  @set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
59
79
  @set INCLUDE=""
60
80
 
@@ -63,14 +83,14 @@ orbis-as -o lj_vm.o lj_vm.s
63
83
  @if "%1" neq "debug" goto :NODEBUG
64
84
  @shift
65
85
  @set LJCOMPILE=%LJCOMPILE% -g -O0
66
- @set TARGETLIB=libluajitD.a
86
+ @set TARGETLIB=libluajitD_ps4.a
67
87
  goto :BUILD
68
88
  :NODEBUG
69
89
  @set LJCOMPILE=%LJCOMPILE% -O2
70
- @set TARGETLIB=libluajit.a
90
+ @set TARGETLIB=libluajit_ps4.a
71
91
  :BUILD
72
92
  del %TARGETLIB%
73
- @if "%1"=="amalg" goto :AMALG
93
+ @if "%1" neq "noamalg" goto :AMALG
74
94
  for %%f in (lj_*.c lib_*.c) do (
75
95
  %LJCOMPILE% %%f
76
96
  @if errorlevel 1 goto :BAD
@@ -1,6 +1,6 @@
1
1
  |// Low-level VM code for ARM CPUs.
2
2
  |// Bytecode interpreter, fast functions and helper functions.
3
- |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
3
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
4
4
  |
5
5
  |.arch arm
6
6
  |.section code_op, code_sub
@@ -372,6 +372,17 @@ static void build_subroutines(BuildCtx *ctx)
372
372
  | str CARG1, [BASE, #-4] // Prepend false to error message.
373
373
  | st_vmstate CARG2
374
374
  | b ->vm_returnc
375
+ |
376
+ |->vm_unwind_ext: // Complete external unwind.
377
+ #if !LJ_NO_UNWIND
378
+ | push {r0, r1, r2, lr}
379
+ | bl extern _Unwind_Complete
380
+ | ldr r0, [sp]
381
+ | bl extern _Unwind_DeleteException
382
+ | pop {r0, r1, r2, lr}
383
+ | mov r0, r1
384
+ | bx r2
385
+ #endif
375
386
  |
376
387
  |//-----------------------------------------------------------------------
377
388
  |//-- Grow stack for calls -----------------------------------------------
@@ -2086,7 +2097,7 @@ static void build_subroutines(BuildCtx *ctx)
2086
2097
  | // RA = resultptr, CARG4 = meta base
2087
2098
  | ldr RB, SAVE_MULTRES
2088
2099
  | ldr INS, [PC, #-4]
2089
- | ldr CARG3, [CARG4, #-24] // Save previous trace number.
2100
+ | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
2090
2101
  | subs RB, RB, #8
2091
2102
  | decode_RA8 RC, INS // Call base.
2092
2103
  | beq >2
@@ -2101,23 +2112,20 @@ static void build_subroutines(BuildCtx *ctx)
2101
2112
  | decode_RA8 RA, INS
2102
2113
  | decode_RB8 RB, INS
2103
2114
  | add RA, RA, RB
2104
- | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
2105
2115
  |3:
2106
2116
  | cmp RA, RC
2107
2117
  | mvn CARG2, #~LJ_TNIL
2108
2118
  | bhi >9 // More results wanted?
2109
2119
  |
2110
- | ldr TRACE:RA, [CARG1, CARG3, lsl #2]
2111
- | cmp TRACE:RA, #0
2112
- | beq ->cont_nop
2113
- | ldrh RC, TRACE:RA->link
2114
- | cmp RC, CARG3
2120
+ | ldrh RA, TRACE:CARG3->traceno
2121
+ | ldrh RC, TRACE:CARG3->link
2122
+ | cmp RC, RA
2115
2123
  | beq ->cont_nop // Blacklisted.
2116
2124
  | cmp RC, #0
2117
2125
  | bne =>BC_JLOOP // Jump to stitched trace.
2118
2126
  |
2119
2127
  | // Stitch a new trace to the previous trace.
2120
- | str CARG3, [DISPATCH, #DISPATCH_J(exitno)]
2128
+ | str RA, [DISPATCH, #DISPATCH_J(exitno)]
2121
2129
  | str L, [DISPATCH, #DISPATCH_J(L)]
2122
2130
  | str BASE, L->base
2123
2131
  | sub CARG1, DISPATCH, #-GG_DISP2J
@@ -1,6 +1,6 @@
1
1
  |// Low-level VM code for ARM64 CPUs.
2
2
  |// Bytecode interpreter, fast functions and helper functions.
3
- |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
3
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
4
4
  |
5
5
  |.arch arm64
6
6
  |.section code_op, code_sub
@@ -1,6 +1,9 @@
1
1
  |// Low-level VM code for MIPS CPUs.
2
2
  |// Bytecode interpreter, fast functions and helper functions.
3
- |// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
3
+ |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
4
+ |//
5
+ |// MIPS soft-float support contributed by Djordje Kovacevic and
6
+ |// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
4
7
  |
5
8
  |.arch mips
6
9
  |.section code_op, code_sub
@@ -18,6 +21,12 @@
18
21
  |// Fixed register assignments for the interpreter.
19
22
  |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
20
23
  |
24
+ |.macro .FPU, a, b
25
+ |.if FPU
26
+ | a, b
27
+ |.endif
28
+ |.endmacro
29
+ |
21
30
  |// The following must be C callee-save (but BASE is often refetched).
22
31
  |.define BASE, r16 // Base of current Lua stack frame.
23
32
  |.define KBASE, r17 // Constants of current Lua function.
@@ -25,13 +34,15 @@
25
34
  |.define DISPATCH, r19 // Opcode dispatch table.
26
35
  |.define LREG, r20 // Register holding lua_State (also in SAVE_L).
27
36
  |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
28
- |// NYI: r22 currently unused.
29
37
  |
30
38
  |.define JGL, r30 // On-trace: global_State + 32768.
31
39
  |
32
40
  |// Constants for type-comparisons, stores and conversions. C callee-save.
41
+ |.define TISNUM, r22
33
42
  |.define TISNIL, r30
43
+ |.if FPU
34
44
  |.define TOBIT, f30 // 2^52 + 2^51.
45
+ |.endif
35
46
  |
36
47
  |// The following temporaries are not saved across C calls, except for RA.
37
48
  |.define RA, r23 // Callee-save.
@@ -56,13 +67,33 @@
56
67
  |.define CRET1, r2
57
68
  |.define CRET2, r3
58
69
  |
70
+ |.if ENDIAN_LE
71
+ |.define SFRETLO, CRET1
72
+ |.define SFRETHI, CRET2
73
+ |.define SFARG1LO, CARG1
74
+ |.define SFARG1HI, CARG2
75
+ |.define SFARG2LO, CARG3
76
+ |.define SFARG2HI, CARG4
77
+ |.else
78
+ |.define SFRETLO, CRET2
79
+ |.define SFRETHI, CRET1
80
+ |.define SFARG1LO, CARG2
81
+ |.define SFARG1HI, CARG1
82
+ |.define SFARG2LO, CARG4
83
+ |.define SFARG2HI, CARG3
84
+ |.endif
85
+ |
86
+ |.if FPU
59
87
  |.define FARG1, f12
60
88
  |.define FARG2, f14
61
89
  |
62
90
  |.define FRET1, f0
63
91
  |.define FRET2, f2
92
+ |.endif
64
93
  |
65
94
  |// Stack layout while in interpreter. Must match with lj_frame.h.
95
+ |.if FPU // MIPS32 hard-float.
96
+ |
66
97
  |.define CFRAME_SPACE, 112 // Delta for sp.
67
98
  |
68
99
  |.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -72,6 +103,20 @@
72
103
  |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
73
104
  |.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
74
105
  |.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
106
+ |
107
+ |.else // MIPS32 soft-float
108
+ |
109
+ |.define CFRAME_SPACE, 64 // Delta for sp.
110
+ |
111
+ |.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
112
+ |.define SAVE_NRES, 72(sp)
113
+ |.define SAVE_CFRAME, 68(sp)
114
+ |.define SAVE_L, 64(sp)
115
+ |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
116
+ |.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
117
+ |
118
+ |.endif
119
+ |
75
120
  |.define SAVE_PC, 20(sp)
76
121
  |.define ARG5, 16(sp)
77
122
  |.define CSAVE_4, 12(sp)
@@ -83,43 +128,45 @@
83
128
  |.define ARG5_OFS, 16
84
129
  |.define SAVE_MULTRES, ARG5
85
130
  |
131
+ |//-----------------------------------------------------------------------
132
+ |
86
133
  |.macro saveregs
87
134
  | addiu sp, sp, -CFRAME_SPACE
88
135
  | sw ra, SAVE_GPR_+9*4(sp)
89
136
  | sw r30, SAVE_GPR_+8*4(sp)
90
- | sdc1 f30, SAVE_FPR_+5*8(sp)
137
+ | .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
91
138
  | sw r23, SAVE_GPR_+7*4(sp)
92
139
  | sw r22, SAVE_GPR_+6*4(sp)
93
- | sdc1 f28, SAVE_FPR_+4*8(sp)
140
+ | .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
94
141
  | sw r21, SAVE_GPR_+5*4(sp)
95
142
  | sw r20, SAVE_GPR_+4*4(sp)
96
- | sdc1 f26, SAVE_FPR_+3*8(sp)
143
+ | .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
97
144
  | sw r19, SAVE_GPR_+3*4(sp)
98
145
  | sw r18, SAVE_GPR_+2*4(sp)
99
- | sdc1 f24, SAVE_FPR_+2*8(sp)
146
+ | .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
100
147
  | sw r17, SAVE_GPR_+1*4(sp)
101
148
  | sw r16, SAVE_GPR_+0*4(sp)
102
- | sdc1 f22, SAVE_FPR_+1*8(sp)
103
- | sdc1 f20, SAVE_FPR_+0*8(sp)
149
+ | .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
150
+ | .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
104
151
  |.endmacro
105
152
  |
106
153
  |.macro restoreregs_ret
107
154
  | lw ra, SAVE_GPR_+9*4(sp)
108
155
  | lw r30, SAVE_GPR_+8*4(sp)
109
- | ldc1 f30, SAVE_FPR_+5*8(sp)
156
+ | .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
110
157
  | lw r23, SAVE_GPR_+7*4(sp)
111
158
  | lw r22, SAVE_GPR_+6*4(sp)
112
- | ldc1 f28, SAVE_FPR_+4*8(sp)
159
+ | .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
113
160
  | lw r21, SAVE_GPR_+5*4(sp)
114
161
  | lw r20, SAVE_GPR_+4*4(sp)
115
- | ldc1 f26, SAVE_FPR_+3*8(sp)
162
+ | .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
116
163
  | lw r19, SAVE_GPR_+3*4(sp)
117
164
  | lw r18, SAVE_GPR_+2*4(sp)
118
- | ldc1 f24, SAVE_FPR_+2*8(sp)
165
+ | .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
119
166
  | lw r17, SAVE_GPR_+1*4(sp)
120
167
  | lw r16, SAVE_GPR_+0*4(sp)
121
- | ldc1 f22, SAVE_FPR_+1*8(sp)
122
- | ldc1 f20, SAVE_FPR_+0*8(sp)
168
+ | .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
169
+ | .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
123
170
  | jr ra
124
171
  | addiu sp, sp, CFRAME_SPACE
125
172
  |.endmacro
@@ -153,13 +200,23 @@
153
200
  |//-----------------------------------------------------------------------
154
201
  |
155
202
  |// Endian-specific defines.
156
- |.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8)
157
- |.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4)
158
- |.define HI, LJ_ENDIAN_SELECT(4,0)
159
- |.define LO, LJ_ENDIAN_SELECT(0,4)
160
- |.define OFS_RD, LJ_ENDIAN_SELECT(2,0)
161
- |.define OFS_RA, LJ_ENDIAN_SELECT(1,2)
162
- |.define OFS_OP, LJ_ENDIAN_SELECT(0,3)
203
+ |.if ENDIAN_LE
204
+ |.define FRAME_PC, -4
205
+ |.define FRAME_FUNC, -8
206
+ |.define HI, 4
207
+ |.define LO, 0
208
+ |.define OFS_RD, 2
209
+ |.define OFS_RA, 1
210
+ |.define OFS_OP, 0
211
+ |.else
212
+ |.define FRAME_PC, -8
213
+ |.define FRAME_FUNC, -4
214
+ |.define HI, 0
215
+ |.define LO, 4
216
+ |.define OFS_RD, 0
217
+ |.define OFS_RA, 2
218
+ |.define OFS_OP, 3
219
+ |.endif
163
220
  |
164
221
  |// Instruction decode.
165
222
  |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
@@ -354,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx)
354
411
  |. sll TMP2, TMP2, 3
355
412
  |1:
356
413
  | addiu TMP1, TMP1, -8
357
- | ldc1 f0, 0(RA)
414
+ | lw SFRETHI, HI(RA)
415
+ | lw SFRETLO, LO(RA)
358
416
  | addiu RA, RA, 8
359
- | sdc1 f0, 0(BASE)
417
+ | sw SFRETHI, HI(BASE)
418
+ | sw SFRETLO, LO(BASE)
360
419
  | bnez TMP1, <1
361
420
  |. addiu BASE, BASE, 8
362
421
  |
@@ -425,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx)
425
484
  | and sp, CARG1, AT
426
485
  |->vm_unwind_ff_eh: // Landing pad for external unwinder.
427
486
  | lw L, SAVE_L
428
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
487
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
488
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
429
489
  | li TISNIL, LJ_TNIL
430
490
  | lw BASE, L->base
431
491
  | lw DISPATCH, L->glref // Setup pointer to dispatch table.
432
- | mtc1 TMP3, TOBIT
492
+ | .FPU mtc1 TMP3, TOBIT
433
493
  | li TMP1, LJ_TFALSE
434
494
  | li_vmstate INTERP
435
495
  | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
436
- | cvt.d.s TOBIT, TOBIT
496
+ | .FPU cvt.d.s TOBIT, TOBIT
437
497
  | addiu RA, BASE, -8 // Results start at BASE-8.
438
498
  | addiu DISPATCH, DISPATCH, GG_G2DISP
439
499
  | sw TMP1, HI(RA) // Prepend false to error message.
@@ -496,13 +556,14 @@ static void build_subroutines(BuildCtx *ctx)
496
556
  | sw L, DISPATCH_GL(cur_L)(DISPATCH)
497
557
  | move RA, BASE
498
558
  | lw BASE, L->base
559
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
499
560
  | lw TMP1, L->top
500
561
  | lw PC, FRAME_PC(BASE)
501
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
562
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
502
563
  | subu RD, TMP1, BASE
503
- | mtc1 TMP3, TOBIT
564
+ | .FPU mtc1 TMP3, TOBIT
504
565
  | sb r0, L->status
505
- | cvt.d.s TOBIT, TOBIT
566
+ | .FPU cvt.d.s TOBIT, TOBIT
506
567
  | li_vmstate INTERP
507
568
  | addiu RD, RD, 8
508
569
  | st_vmstate
@@ -540,13 +601,14 @@ static void build_subroutines(BuildCtx *ctx)
540
601
  |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
541
602
  | sw L, DISPATCH_GL(cur_L)(DISPATCH)
542
603
  | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
543
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
604
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
605
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
544
606
  | lw TMP1, L->top
545
- | mtc1 TMP3, TOBIT
607
+ | .FPU mtc1 TMP3, TOBIT
546
608
  | addu PC, PC, BASE
547
609
  | subu NARGS8:RC, TMP1, BASE
548
610
  | subu PC, PC, TMP2 // PC = frame delta + frame type
549
- | cvt.d.s TOBIT, TOBIT
611
+ | .FPU cvt.d.s TOBIT, TOBIT
550
612
  | li_vmstate INTERP
551
613
  | li TISNIL, LJ_TNIL
552
614
  | st_vmstate
@@ -628,7 +690,8 @@ static void build_subroutines(BuildCtx *ctx)
628
690
  |->cont_cat: // RA = resultptr, RB = meta base
629
691
  | lw INS, -4(PC)
630
692
  | addiu CARG2, RB, -16
631
- | ldc1 f0, 0(RA)
693
+ | lw SFRETHI, HI(RA)
694
+ | lw SFRETLO, LO(RA)
632
695
  | decode_RB8a MULTRES, INS
633
696
  | decode_RA8a RA, INS
634
697
  | decode_RB8b MULTRES
@@ -636,11 +699,13 @@ static void build_subroutines(BuildCtx *ctx)
636
699
  | addu TMP1, BASE, MULTRES
637
700
  | sw BASE, L->base
638
701
  | subu CARG3, CARG2, TMP1
702
+ | sw SFRETHI, HI(CARG2)
639
703
  | bne TMP1, CARG2, ->BC_CAT_Z
640
- |. sdc1 f0, 0(CARG2)
704
+ |. sw SFRETLO, LO(CARG2)
641
705
  | addu RA, BASE, RA
706
+ | sw SFRETHI, HI(RA)
642
707
  | b ->cont_nop
643
- |. sdc1 f0, 0(RA)
708
+ |. sw SFRETLO, LO(RA)
644
709
  |
645
710
  |//-- Table indexing metamethods -----------------------------------------
646
711
  |
@@ -663,10 +728,9 @@ static void build_subroutines(BuildCtx *ctx)
663
728
  |. sw TMP1, HI(CARG3)
664
729
  |
665
730
  |->vmeta_tgetb: // TMP0 = index
666
- | mtc1 TMP0, f0
667
- | cvt.d.w f0, f0
668
731
  | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
669
- | sdc1 f0, 0(CARG3)
732
+ | sw TMP0, LO(CARG3)
733
+ | sw TISNUM, HI(CARG3)
670
734
  |
671
735
  |->vmeta_tgetv:
672
736
  |1:
@@ -678,9 +742,11 @@ static void build_subroutines(BuildCtx *ctx)
678
742
  | // Returns TValue * (finished) or NULL (metamethod).
679
743
  | beqz CRET1, >3
680
744
  |. addiu TMP1, BASE, -FRAME_CONT
681
- | ldc1 f0, 0(CRET1)
745
+ | lw SFARG1HI, HI(CRET1)
746
+ | lw SFARG2HI, LO(CRET1)
682
747
  | ins_next1
683
- | sdc1 f0, 0(RA)
748
+ | sw SFARG1HI, HI(RA)
749
+ | sw SFARG2HI, LO(RA)
684
750
  | ins_next2
685
751
  |
686
752
  |3: // Call __index metamethod.
@@ -697,10 +763,11 @@ static void build_subroutines(BuildCtx *ctx)
697
763
  | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
698
764
  |. nop
699
765
  | // Returns cTValue * or NULL.
700
- | beqz CRET1, >1
701
- |. nop
766
+ | beqz CRET1, ->BC_TGETR_Z
767
+ |. move SFARG2HI, TISNIL
768
+ | lw SFARG2HI, HI(CRET1)
702
769
  | b ->BC_TGETR_Z
703
- |. ldc1 f0, 0(CRET1)
770
+ |. lw SFARG2LO, LO(CRET1)
704
771
  |
705
772
  |//-----------------------------------------------------------------------
706
773
  |
@@ -723,10 +790,9 @@ static void build_subroutines(BuildCtx *ctx)
723
790
  |. sw TMP1, HI(CARG3)
724
791
  |
725
792
  |->vmeta_tsetb: // TMP0 = index
726
- | mtc1 TMP0, f0
727
- | cvt.d.w f0, f0
728
793
  | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
729
- | sdc1 f0, 0(CARG3)
794
+ | sw TMP0, LO(CARG3)
795
+ | sw TISNUM, HI(CARG3)
730
796
  |
731
797
  |->vmeta_tsetv:
732
798
  |1:
@@ -736,11 +802,13 @@ static void build_subroutines(BuildCtx *ctx)
736
802
  | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
737
803
  |. move CARG1, L
738
804
  | // Returns TValue * (finished) or NULL (metamethod).
805
+ | lw SFARG1HI, HI(RA)
739
806
  | beqz CRET1, >3
740
- |. ldc1 f0, 0(RA)
807
+ |. lw SFARG1LO, LO(RA)
741
808
  | // NOBARRIER: lj_meta_tset ensures the table is not black.
742
809
  | ins_next1
743
- | sdc1 f0, 0(CRET1)
810
+ | sw SFARG1HI, HI(CRET1)
811
+ | sw SFARG1LO, LO(CRET1)
744
812
  | ins_next2
745
813
  |
746
814
  |3: // Call __newindex metamethod.
@@ -750,7 +818,8 @@ static void build_subroutines(BuildCtx *ctx)
750
818
  | sw PC, -16+HI(BASE) // [cont|PC]
751
819
  | subu PC, BASE, TMP1
752
820
  | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
753
- | sdc1 f0, 16(BASE) // Copy value to third argument.
821
+ | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
822
+ | sw SFARG1LO, 16+LO(BASE)
754
823
  | b ->vm_call_dispatch_f
755
824
  |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
756
825
  |
@@ -767,7 +836,9 @@ static void build_subroutines(BuildCtx *ctx)
767
836
  |//-- Comparison metamethods ---------------------------------------------
768
837
  |
769
838
  |->vmeta_comp:
770
- | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT.
839
+ | // RA/RD point to o1/o2.
840
+ | move CARG2, RA
841
+ | move CARG3, RD
771
842
  | load_got lj_meta_comp
772
843
  | addiu PC, PC, -4
773
844
  | sw BASE, L->base
@@ -793,11 +864,13 @@ static void build_subroutines(BuildCtx *ctx)
793
864
  |
794
865
  |->cont_ra: // RA = resultptr
795
866
  | lbu TMP1, -4+OFS_RA(PC)
796
- | ldc1 f0, 0(RA)
867
+ | lw SFRETHI, HI(RA)
868
+ | lw SFRETLO, LO(RA)
797
869
  | sll TMP1, TMP1, 3
798
870
  | addu TMP1, BASE, TMP1
871
+ | sw SFRETHI, HI(TMP1)
799
872
  | b ->cont_nop
800
- |. sdc1 f0, 0(TMP1)
873
+ |. sw SFRETLO, LO(TMP1)
801
874
  |
802
875
  |->cont_condt: // RA = resultptr
803
876
  | lw TMP0, HI(RA)
@@ -812,8 +885,11 @@ static void build_subroutines(BuildCtx *ctx)
812
885
  |. addiu TMP2, AT, -1 // Branch if result is false.
813
886
  |
814
887
  |->vmeta_equal:
815
- | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
888
+ | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
816
889
  | load_got lj_meta_equal
890
+ | move CARG2, SFARG1LO
891
+ | move CARG3, SFARG2LO
892
+ | move CARG4, TMP0
817
893
  | addiu PC, PC, -4
818
894
  | sw BASE, L->base
819
895
  | sw PC, SAVE_PC
@@ -852,14 +928,16 @@ static void build_subroutines(BuildCtx *ctx)
852
928
  |//-- Arithmetic metamethods ---------------------------------------------
853
929
  |
854
930
  |->vmeta_unm:
855
- | move CARG4, CARG3
931
+ | move RC, RB
856
932
  |
857
933
  |->vmeta_arith:
858
934
  | load_got lj_meta_arith
859
935
  | decode_OP1 TMP0, INS
860
936
  | sw BASE, L->base
861
- | sw PC, SAVE_PC
862
937
  | move CARG2, RA
938
+ | sw PC, SAVE_PC
939
+ | move CARG3, RB
940
+ | move CARG4, RC
863
941
  | sw TMP0, ARG5
864
942
  | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
865
943
  |. move CARG1, L
@@ -967,40 +1045,52 @@ static void build_subroutines(BuildCtx *ctx)
967
1045
  |
968
1046
  |.macro .ffunc_1, name
969
1047
  |->ff_ .. name:
1048
+ | lw SFARG1HI, HI(BASE)
970
1049
  | beqz NARGS8:RC, ->fff_fallback
971
- |. lw CARG3, HI(BASE)
972
- | lw CARG1, LO(BASE)
1050
+ |. lw SFARG1LO, LO(BASE)
973
1051
  |.endmacro
974
1052
  |
975
1053
  |.macro .ffunc_2, name
976
1054
  |->ff_ .. name:
977
1055
  | sltiu AT, NARGS8:RC, 16
978
- | lw CARG3, HI(BASE)
1056
+ | lw SFARG1HI, HI(BASE)
979
1057
  | bnez AT, ->fff_fallback
980
- |. lw CARG4, 8+HI(BASE)
981
- | lw CARG1, LO(BASE)
982
- | lw CARG2, 8+LO(BASE)
1058
+ |. lw SFARG2HI, 8+HI(BASE)
1059
+ | lw SFARG1LO, LO(BASE)
1060
+ | lw SFARG2LO, 8+LO(BASE)
983
1061
  |.endmacro
984
1062
  |
985
1063
  |.macro .ffunc_n, name // Caveat: has delay slot!
986
1064
  |->ff_ .. name:
987
- | lw CARG3, HI(BASE)
1065
+ | lw SFARG1HI, HI(BASE)
1066
+ |.if FPU
1067
+ | ldc1 FARG1, 0(BASE)
1068
+ |.else
1069
+ | lw SFARG1LO, LO(BASE)
1070
+ |.endif
988
1071
  | beqz NARGS8:RC, ->fff_fallback
989
- |. ldc1 FARG1, 0(BASE)
990
- | sltiu AT, CARG3, LJ_TISNUM
1072
+ |. sltiu AT, SFARG1HI, LJ_TISNUM
991
1073
  | beqz AT, ->fff_fallback
992
1074
  |.endmacro
993
1075
  |
994
1076
  |.macro .ffunc_nn, name // Caveat: has delay slot!
995
1077
  |->ff_ .. name:
996
1078
  | sltiu AT, NARGS8:RC, 16
997
- | lw CARG3, HI(BASE)
1079
+ | lw SFARG1HI, HI(BASE)
998
1080
  | bnez AT, ->fff_fallback
999
- |. lw CARG4, 8+HI(BASE)
1000
- | ldc1 FARG1, 0(BASE)
1001
- | ldc1 FARG2, 8(BASE)
1002
- | sltiu TMP0, CARG3, LJ_TISNUM
1003
- | sltiu TMP1, CARG4, LJ_TISNUM
1081
+ |. lw SFARG2HI, 8+HI(BASE)
1082
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
1083
+ |.if FPU
1084
+ | ldc1 FARG1, 0(BASE)
1085
+ |.else
1086
+ | lw SFARG1LO, LO(BASE)
1087
+ |.endif
1088
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
1089
+ |.if FPU
1090
+ | ldc1 FARG2, 8(BASE)
1091
+ |.else
1092
+ | lw SFARG2LO, 8+LO(BASE)
1093
+ |.endif
1004
1094
  | and TMP0, TMP0, TMP1
1005
1095
  | beqz TMP0, ->fff_fallback
1006
1096
  |.endmacro
@@ -1016,52 +1106,54 @@ static void build_subroutines(BuildCtx *ctx)
1016
1106
  |//-- Base library: checks -----------------------------------------------
1017
1107
  |
1018
1108
  |.ffunc_1 assert
1019
- | sltiu AT, CARG3, LJ_TISTRUECOND
1109
+ | sltiu AT, SFARG1HI, LJ_TISTRUECOND
1020
1110
  | beqz AT, ->fff_fallback
1021
1111
  |. addiu RA, BASE, -8
1022
1112
  | lw PC, FRAME_PC(BASE)
1023
1113
  | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1024
1114
  | addu TMP2, RA, NARGS8:RC
1025
- | sw CARG3, HI(RA)
1115
+ | sw SFARG1HI, HI(RA)
1026
1116
  | addiu TMP1, BASE, 8
1027
1117
  | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
1028
- |. sw CARG1, LO(RA)
1118
+ |. sw SFARG1LO, LO(RA)
1029
1119
  |1:
1030
- | ldc1 f0, 0(TMP1)
1031
- | sdc1 f0, -8(TMP1)
1120
+ | lw SFRETHI, HI(TMP1)
1121
+ | lw SFRETLO, LO(TMP1)
1122
+ | sw SFRETHI, -8+HI(TMP1)
1123
+ | sw SFRETLO, -8+LO(TMP1)
1032
1124
  | bne TMP1, TMP2, <1
1033
1125
  |. addiu TMP1, TMP1, 8
1034
1126
  | b ->fff_res
1035
1127
  |. nop
1036
1128
  |
1037
1129
  |.ffunc type
1038
- | lw CARG3, HI(BASE)
1039
- | li TMP1, LJ_TISNUM
1130
+ | lw SFARG1HI, HI(BASE)
1040
1131
  | beqz NARGS8:RC, ->fff_fallback
1041
- |. sltiu TMP0, CARG3, LJ_TISNUM
1042
- | movz TMP1, CARG3, TMP0
1043
- | not TMP1, TMP1
1132
+ |. sltiu TMP0, SFARG1HI, LJ_TISNUM
1133
+ | movn SFARG1HI, TISNUM, TMP0
1134
+ | not TMP1, SFARG1HI
1044
1135
  | sll TMP1, TMP1, 3
1045
1136
  | addu TMP1, CFUNC:RB, TMP1
1046
- | b ->fff_resn
1047
- |. ldc1 FRET1, CFUNC:TMP1->upvalue
1137
+ | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
1138
+ | b ->fff_restv
1139
+ |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
1048
1140
  |
1049
1141
  |//-- Base library: getters and setters ---------------------------------
1050
1142
  |
1051
1143
  |.ffunc_1 getmetatable
1052
1144
  | li AT, LJ_TTAB
1053
- | bne CARG3, AT, >6
1145
+ | bne SFARG1HI, AT, >6
1054
1146
  |. li AT, LJ_TUDATA
1055
1147
  |1: // Field metatable must be at same offset for GCtab and GCudata!
1056
- | lw TAB:CARG1, TAB:CARG1->metatable
1148
+ | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
1057
1149
  |2:
1058
1150
  | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1059
- | beqz TAB:CARG1, ->fff_restv
1060
- |. li CARG3, LJ_TNIL
1061
- | lw TMP0, TAB:CARG1->hmask
1062
- | li CARG3, LJ_TTAB // Use metatable as default result.
1151
+ | beqz TAB:SFARG1LO, ->fff_restv
1152
+ |. li SFARG1HI, LJ_TNIL
1153
+ | lw TMP0, TAB:SFARG1LO->hmask
1154
+ | li SFARG1HI, LJ_TTAB // Use metatable as default result.
1063
1155
  | lw TMP1, STR:RC->hash
1064
- | lw NODE:TMP2, TAB:CARG1->node
1156
+ | lw NODE:TMP2, TAB:SFARG1LO->node
1065
1157
  | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
1066
1158
  | sll TMP0, TMP1, 5
1067
1159
  | sll TMP1, TMP1, 3
@@ -1073,7 +1165,7 @@ static void build_subroutines(BuildCtx *ctx)
1073
1165
  | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
1074
1166
  | lw NODE:TMP3, NODE:TMP2->next
1075
1167
  | bne CARG4, AT, >4
1076
- |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2)
1168
+ |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
1077
1169
  | beq TMP0, STR:RC, >5
1078
1170
  |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
1079
1171
  |4:
@@ -1082,36 +1174,35 @@ static void build_subroutines(BuildCtx *ctx)
1082
1174
  | b <3
1083
1175
  |. nop
1084
1176
  |5:
1085
- | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value.
1177
+ | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
1086
1178
  |. nop
1087
- | move CARG3, CARG2 // Return value of mt.__metatable.
1179
+ | move SFARG1HI, CARG3 // Return value of mt.__metatable.
1088
1180
  | b ->fff_restv
1089
- |. move CARG1, TMP1
1181
+ |. move SFARG1LO, TMP1
1090
1182
  |
1091
1183
  |6:
1092
- | beq CARG3, AT, <1
1093
- |. sltiu TMP0, CARG3, LJ_TISNUM
1094
- | li TMP1, LJ_TISNUM
1095
- | movz TMP1, CARG3, TMP0
1096
- | not TMP1, TMP1
1184
+ | beq SFARG1HI, AT, <1
1185
+ |. sltu AT, TISNUM, SFARG1HI
1186
+ | movz SFARG1HI, TISNUM, AT
1187
+ | not TMP1, SFARG1HI
1097
1188
  | sll TMP1, TMP1, 2
1098
1189
  | addu TMP1, DISPATCH, TMP1
1099
1190
  | b <2
1100
- |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1191
+ |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1101
1192
  |
1102
1193
  |.ffunc_2 setmetatable
1103
1194
  | // Fast path: no mt for table yet and not clearing the mt.
1104
1195
  | li AT, LJ_TTAB
1105
- | bne CARG3, AT, ->fff_fallback
1106
- |. addiu CARG4, CARG4, -LJ_TTAB
1107
- | lw TAB:TMP1, TAB:CARG1->metatable
1108
- | lbu TMP3, TAB:CARG1->marked
1109
- | or AT, CARG4, TAB:TMP1
1196
+ | bne SFARG1HI, AT, ->fff_fallback
1197
+ |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
1198
+ | lw TAB:TMP1, TAB:SFARG1LO->metatable
1199
+ | lbu TMP3, TAB:SFARG1LO->marked
1200
+ | or AT, SFARG2HI, TAB:TMP1
1110
1201
  | bnez AT, ->fff_fallback
1111
1202
  |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
1112
1203
  | beqz AT, ->fff_restv
1113
- |. sw TAB:CARG2, TAB:CARG1->metatable
1114
- | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv
1204
+ |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
1205
+ | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
1115
1206
  |
1116
1207
  |.ffunc rawget
1117
1208
  | lw CARG4, HI(BASE)
@@ -1125,44 +1216,44 @@ static void build_subroutines(BuildCtx *ctx)
1125
1216
  | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1126
1217
  |. move CARG1, L
1127
1218
  | // Returns cTValue *.
1128
- | b ->fff_resn
1129
- |. ldc1 FRET1, 0(CRET1)
1219
+ | lw SFARG1HI, HI(CRET1)
1220
+ | b ->fff_restv
1221
+ |. lw SFARG1LO, LO(CRET1)
1130
1222
  |
1131
1223
  |//-- Base library: conversions ------------------------------------------
1132
1224
  |
1133
1225
  |.ffunc tonumber
1134
1226
  | // Only handles the number case inline (without a base argument).
1135
1227
  | lw CARG1, HI(BASE)
1136
- | xori AT, NARGS8:RC, 8
1137
- | sltiu CARG1, CARG1, LJ_TISNUM
1138
- | movn CARG1, r0, AT
1139
- | beqz CARG1, ->fff_fallback // Exactly one number argument.
1140
- |. ldc1 FRET1, 0(BASE)
1141
- | b ->fff_resn
1142
- |. nop
1228
+ | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1229
+ | sltu TMP0, TISNUM, CARG1
1230
+ | or AT, AT, TMP0
1231
+ | bnez AT, ->fff_fallback
1232
+ |. lw SFARG1HI, HI(BASE)
1233
+ | b ->fff_restv
1234
+ |. lw SFARG1LO, LO(BASE)
1143
1235
  |
1144
1236
  |.ffunc_1 tostring
1145
1237
  | // Only handles the string or number case inline.
1146
1238
  | li AT, LJ_TSTR
1147
1239
  | // A __tostring method in the string base metatable is ignored.
1148
- | beq CARG3, AT, ->fff_restv // String key?
1240
+ | beq SFARG1HI, AT, ->fff_restv // String key?
1149
1241
  | // Handle numbers inline, unless a number base metatable is present.
1150
1242
  |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1151
- | sltiu TMP0, CARG3, LJ_TISNUM
1152
- | sltiu TMP1, TMP1, 1
1153
- | and TMP0, TMP0, TMP1
1154
- | beqz TMP0, ->fff_fallback
1243
+ | sltu TMP0, TISNUM, SFARG1HI
1244
+ | or TMP0, TMP0, TMP1
1245
+ | bnez TMP0, ->fff_fallback
1155
1246
  |. sw BASE, L->base // Add frame since C call can throw.
1156
1247
  | ffgccheck
1157
1248
  |. sw PC, SAVE_PC // Redundant (but a defined value).
1158
- | load_got lj_strfmt_num
1249
+ | load_got lj_strfmt_number
1159
1250
  | move CARG1, L
1160
- | call_intern lj_strfmt_num // (lua_State *L, lua_Number *np)
1251
+ | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1161
1252
  |. move CARG2, BASE
1162
1253
  | // Returns GCstr *.
1163
- | li CARG3, LJ_TSTR
1254
+ | li SFARG1HI, LJ_TSTR
1164
1255
  | b ->fff_restv
1165
- |. move CARG1, CRET1
1256
+ |. move SFARG1LO, CRET1
1166
1257
  |
1167
1258
  |//-- Base library: iterators -------------------------------------------
1168
1259
  |
@@ -1184,31 +1275,38 @@ static void build_subroutines(BuildCtx *ctx)
1184
1275
  |. move CARG1, L
1185
1276
  | // Returns 0 at end of traversal.
1186
1277
  | beqz CRET1, ->fff_restv // End of traversal: return nil.
1187
- |. li CARG3, LJ_TNIL
1188
- | ldc1 f0, 8(BASE) // Copy key and value to results.
1278
+ |. li SFARG1HI, LJ_TNIL
1279
+ | lw TMP0, 8+HI(BASE)
1280
+ | lw TMP1, 8+LO(BASE)
1189
1281
  | addiu RA, BASE, -8
1190
- | ldc1 f2, 16(BASE)
1191
- | li RD, (2+1)*8
1192
- | sdc1 f0, 0(RA)
1282
+ | lw TMP2, 16+HI(BASE)
1283
+ | lw TMP3, 16+LO(BASE)
1284
+ | sw TMP0, HI(RA)
1285
+ | sw TMP1, LO(RA)
1286
+ | sw TMP2, 8+HI(RA)
1287
+ | sw TMP3, 8+LO(RA)
1193
1288
  | b ->fff_res
1194
- |. sdc1 f2, 8(RA)
1289
+ |. li RD, (2+1)*8
1195
1290
  |
1196
1291
  |.ffunc_1 pairs
1197
1292
  | li AT, LJ_TTAB
1198
- | bne CARG3, AT, ->fff_fallback
1293
+ | bne SFARG1HI, AT, ->fff_fallback
1199
1294
  |. lw PC, FRAME_PC(BASE)
1200
1295
  #if LJ_52
1201
- | lw TAB:TMP2, TAB:CARG1->metatable
1202
- | ldc1 f0, CFUNC:RB->upvalue[0]
1296
+ | lw TAB:TMP2, TAB:SFARG1LO->metatable
1297
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1298
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1203
1299
  | bnez TAB:TMP2, ->fff_fallback
1204
1300
  #else
1205
- | ldc1 f0, CFUNC:RB->upvalue[0]
1301
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1302
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1206
1303
  #endif
1207
1304
  |. addiu RA, BASE, -8
1208
1305
  | sw TISNIL, 8+HI(BASE)
1209
- | li RD, (3+1)*8
1306
+ | sw TMP0, HI(RA)
1307
+ | sw TMP1, LO(RA)
1210
1308
  | b ->fff_res
1211
- |. sdc1 f0, 0(RA)
1309
+ |. li RD, (3+1)*8
1212
1310
  |
1213
1311
  |.ffunc ipairs_aux
1214
1312
  | sltiu AT, NARGS8:RC, 16
@@ -1216,35 +1314,32 @@ static void build_subroutines(BuildCtx *ctx)
1216
1314
  | lw TAB:CARG1, LO(BASE)
1217
1315
  | lw CARG4, 8+HI(BASE)
1218
1316
  | bnez AT, ->fff_fallback
1219
- |. ldc1 FARG2, 8(BASE)
1220
- | addiu CARG3, CARG3, -LJ_TTAB
1221
- | sltiu AT, CARG4, LJ_TISNUM
1222
- | li TMP0, 1
1223
- | movn AT, r0, CARG3
1224
- | mtc1 TMP0, FARG1
1225
- | beqz AT, ->fff_fallback
1317
+ |. addiu CARG3, CARG3, -LJ_TTAB
1318
+ | xor CARG4, CARG4, TISNUM
1319
+ | and AT, CARG3, CARG4
1320
+ | bnez AT, ->fff_fallback
1226
1321
  |. lw PC, FRAME_PC(BASE)
1227
- | trunc.w.d FRET1, FARG2
1228
- | cvt.d.w FARG1, FARG1
1322
+ | lw TMP2, 8+LO(BASE)
1229
1323
  | lw TMP0, TAB:CARG1->asize
1230
1324
  | lw TMP1, TAB:CARG1->array
1231
- | mfc1 TMP2, FRET1
1232
- | addiu RA, BASE, -8
1233
- | add.d FARG2, FARG2, FARG1
1234
1325
  | addiu TMP2, TMP2, 1
1326
+ | sw TISNUM, -8+HI(BASE)
1235
1327
  | sltu AT, TMP2, TMP0
1328
+ | sw TMP2, -8+LO(BASE)
1329
+ | beqz AT, >2 // Not in array part?
1330
+ |. addiu RA, BASE, -8
1236
1331
  | sll TMP3, TMP2, 3
1237
1332
  | addu TMP3, TMP1, TMP3
1238
- | beqz AT, >2 // Not in array part?
1239
- |. sdc1 FARG2, 0(RA)
1240
- | lw TMP2, HI(TMP3)
1241
- | ldc1 f0, 0(TMP3)
1333
+ | lw TMP1, HI(TMP3)
1334
+ | lw TMP2, LO(TMP3)
1242
1335
  |1:
1243
- | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results.
1336
+ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1244
1337
  |. li RD, (0+1)*8
1245
- | li RD, (2+1)*8
1338
+ | sw TMP1, 8+HI(RA)
1339
+ | sw TMP2, 8+LO(RA)
1246
1340
  | b ->fff_res
1247
- |. sdc1 f0, 8(RA)
1341
+ |. li RD, (2+1)*8
1342
+ |
1248
1343
  |2: // Check for empty hash part first. Otherwise call C function.
1249
1344
  | lw TMP0, TAB:CARG1->hmask
1250
1345
  | load_got lj_tab_getinth
@@ -1255,27 +1350,30 @@ static void build_subroutines(BuildCtx *ctx)
1255
1350
  | // Returns cTValue * or NULL.
1256
1351
  | beqz CRET1, ->fff_res
1257
1352
  |. li RD, (0+1)*8
1258
- | lw TMP2, HI(CRET1)
1353
+ | lw TMP1, HI(CRET1)
1259
1354
  | b <1
1260
- |. ldc1 f0, 0(CRET1)
1355
+ |. lw TMP2, LO(CRET1)
1261
1356
  |
1262
1357
  |.ffunc_1 ipairs
1263
1358
  | li AT, LJ_TTAB
1264
- | bne CARG3, AT, ->fff_fallback
1359
+ | bne SFARG1HI, AT, ->fff_fallback
1265
1360
  |. lw PC, FRAME_PC(BASE)
1266
1361
  #if LJ_52
1267
- | lw TAB:TMP2, TAB:CARG1->metatable
1268
- | ldc1 f0, CFUNC:RB->upvalue[0]
1362
+ | lw TAB:TMP2, TAB:SFARG1LO->metatable
1363
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1364
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1269
1365
  | bnez TAB:TMP2, ->fff_fallback
1270
1366
  #else
1271
- | ldc1 f0, CFUNC:RB->upvalue[0]
1367
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1368
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1272
1369
  #endif
1273
1370
  |. addiu RA, BASE, -8
1274
- | sw r0, 8+HI(BASE)
1371
+ | sw TISNUM, 8+HI(BASE)
1275
1372
  | sw r0, 8+LO(BASE)
1276
- | li RD, (3+1)*8
1373
+ | sw TMP0, HI(RA)
1374
+ | sw TMP1, LO(RA)
1277
1375
  | b ->fff_res
1278
- |. sdc1 f0, 0(RA)
1376
+ |. li RD, (3+1)*8
1279
1377
  |
1280
1378
  |//-- Base library: catch errors ----------------------------------------
1281
1379
  |
@@ -1295,8 +1393,9 @@ static void build_subroutines(BuildCtx *ctx)
1295
1393
  | sltiu AT, NARGS8:RC, 16
1296
1394
  | lw CARG4, 8+HI(BASE)
1297
1395
  | bnez AT, ->fff_fallback
1298
- |. ldc1 FARG2, 8(BASE)
1299
- | ldc1 FARG1, 0(BASE)
1396
+ |. lw CARG3, 8+LO(BASE)
1397
+ | lw CARG1, LO(BASE)
1398
+ | lw CARG2, HI(BASE)
1300
1399
  | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1301
1400
  | li AT, LJ_TFUNC
1302
1401
  | move TMP2, BASE
@@ -1304,9 +1403,11 @@ static void build_subroutines(BuildCtx *ctx)
1304
1403
  | addiu BASE, BASE, 16
1305
1404
  | // Remember active hook before pcall.
1306
1405
  | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1307
- | sdc1 FARG2, 0(TMP2) // Swap function and traceback.
1406
+ | sw CARG3, LO(TMP2) // Swap function and traceback.
1407
+ | sw CARG4, HI(TMP2)
1308
1408
  | andi TMP3, TMP3, 1
1309
- | sdc1 FARG1, 8(TMP2)
1409
+ | sw CARG1, 8+LO(TMP2)
1410
+ | sw CARG2, 8+HI(TMP2)
1310
1411
  | addiu PC, TMP3, 16+FRAME_PCALL
1311
1412
  | b ->vm_call_dispatch
1312
1413
  |. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1315,7 +1416,10 @@ static void build_subroutines(BuildCtx *ctx)
1315
1416
  |
1316
1417
  |.macro coroutine_resume_wrap, resume
1317
1418
  |.if resume
1318
- |.ffunc_1 coroutine_resume
1419
+ |.ffunc coroutine_resume
1420
+ | lw CARG3, HI(BASE)
1421
+ | beqz NARGS8:RC, ->fff_fallback
1422
+ |. lw CARG1, LO(BASE)
1319
1423
  | li AT, LJ_TTHREAD
1320
1424
  | bne CARG3, AT, ->fff_fallback
1321
1425
  |.else
@@ -1350,11 +1454,13 @@ static void build_subroutines(BuildCtx *ctx)
1350
1454
  | move CARG3, CARG2
1351
1455
  | sw BASE, L->top
1352
1456
  |2: // Move args to coroutine.
1353
- | ldc1 f0, 0(BASE)
1457
+ | lw SFRETHI, HI(BASE)
1458
+ | lw SFRETLO, LO(BASE)
1354
1459
  | sltu AT, BASE, TMP1
1355
1460
  | beqz AT, >3
1356
1461
  |. addiu BASE, BASE, 8
1357
- | sdc1 f0, 0(CARG3)
1462
+ | sw SFRETHI, HI(CARG3)
1463
+ | sw SFRETLO, LO(CARG3)
1358
1464
  | b <2
1359
1465
  |. addiu CARG3, CARG3, 8
1360
1466
  |3:
@@ -1380,10 +1486,12 @@ static void build_subroutines(BuildCtx *ctx)
1380
1486
  | sw TMP2, L:RA->top // Clear coroutine stack.
1381
1487
  | move TMP1, BASE
1382
1488
  |5: // Move results from coroutine.
1383
- | ldc1 f0, 0(TMP2)
1489
+ | lw SFRETHI, HI(TMP2)
1490
+ | lw SFRETLO, LO(TMP2)
1384
1491
  | addiu TMP2, TMP2, 8
1385
1492
  | sltu AT, TMP2, TMP3
1386
- | sdc1 f0, 0(TMP1)
1493
+ | sw SFRETHI, HI(TMP1)
1494
+ | sw SFRETLO, LO(TMP1)
1387
1495
  | bnez AT, <5
1388
1496
  |. addiu TMP1, TMP1, 8
1389
1497
  |6:
@@ -1408,12 +1516,14 @@ static void build_subroutines(BuildCtx *ctx)
1408
1516
  |.if resume
1409
1517
  | addiu TMP3, TMP3, -8
1410
1518
  | li TMP1, LJ_TFALSE
1411
- | ldc1 f0, 0(TMP3)
1519
+ | lw SFRETHI, HI(TMP3)
1520
+ | lw SFRETLO, LO(TMP3)
1412
1521
  | sw TMP3, L:RA->top // Remove error from coroutine stack.
1413
1522
  | li RD, (2+1)*8
1414
1523
  | sw TMP1, -8+HI(BASE) // Prepend false to results.
1415
1524
  | addiu RA, BASE, -8
1416
- | sdc1 f0, 0(BASE) // Copy error message.
1525
+ | sw SFRETHI, HI(BASE) // Copy error message.
1526
+ | sw SFRETLO, LO(BASE)
1417
1527
  | b <7
1418
1528
  |. andi TMP0, PC, FRAME_TYPE
1419
1529
  |.else
@@ -1449,20 +1559,29 @@ static void build_subroutines(BuildCtx *ctx)
1449
1559
  |
1450
1560
  |//-- Math library -------------------------------------------------------
1451
1561
  |
1452
- |.ffunc_n math_abs
1453
- |. abs.d FRET1, FARG1
1454
- |->fff_resn:
1455
- | lw PC, FRAME_PC(BASE)
1456
- | addiu RA, BASE, -8
1457
- | b ->fff_res1
1458
- |. sdc1 FRET1, -8(BASE)
1562
+ |.ffunc_1 math_abs
1563
+ | bne SFARG1HI, TISNUM, >1
1564
+ |. sra TMP0, SFARG1LO, 31
1565
+ | xor TMP1, SFARG1LO, TMP0
1566
+ | subu SFARG1LO, TMP1, TMP0
1567
+ | bgez SFARG1LO, ->fff_restv
1568
+ |. nop
1569
+ | lui SFARG1HI, 0x41e0 // 2^31 as a double.
1570
+ | b ->fff_restv
1571
+ |. li SFARG1LO, 0
1572
+ |1:
1573
+ | sltiu AT, SFARG1HI, LJ_TISNUM
1574
+ | beqz AT, ->fff_fallback
1575
+ |. sll SFARG1HI, SFARG1HI, 1
1576
+ | srl SFARG1HI, SFARG1HI, 1
1577
+ |// fallthrough
1459
1578
  |
1460
1579
  |->fff_restv:
1461
- | // CARG3/CARG1 = TValue result.
1580
+ | // SFARG1LO/SFARG1HI = TValue result.
1462
1581
  | lw PC, FRAME_PC(BASE)
1463
- | sw CARG3, -8+HI(BASE)
1582
+ | sw SFARG1HI, -8+HI(BASE)
1464
1583
  | addiu RA, BASE, -8
1465
- | sw CARG1, -8+LO(BASE)
1584
+ | sw SFARG1LO, -8+LO(BASE)
1466
1585
  |->fff_res1:
1467
1586
  | // RA = results, PC = return.
1468
1587
  | li RD, (1+1)*8
@@ -1491,15 +1610,19 @@ static void build_subroutines(BuildCtx *ctx)
1491
1610
  |. sw TISNIL, -8+HI(TMP1)
1492
1611
  |
1493
1612
  |.macro math_extern, func
1494
- |->ff_math_ .. func:
1495
- | lw CARG3, HI(BASE)
1613
+ | .ffunc math_ .. func
1614
+ | lw SFARG1HI, HI(BASE)
1496
1615
  | beqz NARGS8:RC, ->fff_fallback
1497
1616
  |. load_got func
1498
- | sltiu AT, CARG3, LJ_TISNUM
1617
+ | sltiu AT, SFARG1HI, LJ_TISNUM
1499
1618
  | beqz AT, ->fff_fallback
1500
- |. nop
1501
- | call_extern
1619
+ |.if FPU
1502
1620
  |. ldc1 FARG1, 0(BASE)
1621
+ |.else
1622
+ |. lw SFARG1LO, LO(BASE)
1623
+ |.endif
1624
+ | call_extern
1625
+ |. nop
1503
1626
  | b ->fff_resn
1504
1627
  |. nop
1505
1628
  |.endmacro
@@ -1513,10 +1636,22 @@ static void build_subroutines(BuildCtx *ctx)
1513
1636
  |. nop
1514
1637
  |.endmacro
1515
1638
  |
1639
+ |// TODO: Return integer type if result is integer (own sf implementation).
1516
1640
  |.macro math_round, func
1517
- | .ffunc_n math_ .. func
1518
- |. nop
1641
+ |->ff_math_ .. func:
1642
+ | lw SFARG1HI, HI(BASE)
1643
+ | beqz NARGS8:RC, ->fff_fallback
1644
+ |. lw SFARG1LO, LO(BASE)
1645
+ | beq SFARG1HI, TISNUM, ->fff_restv
1646
+ |. sltu AT, SFARG1HI, TISNUM
1647
+ | beqz AT, ->fff_fallback
1648
+ |.if FPU
1649
+ |. ldc1 FARG1, 0(BASE)
1519
1650
  | bal ->vm_ .. func
1651
+ |.else
1652
+ |. load_got func
1653
+ | call_extern
1654
+ |.endif
1520
1655
  |. nop
1521
1656
  | b ->fff_resn
1522
1657
  |. nop
@@ -1526,15 +1661,19 @@ static void build_subroutines(BuildCtx *ctx)
1526
1661
  | math_round ceil
1527
1662
  |
1528
1663
  |.ffunc math_log
1529
- | lw CARG3, HI(BASE)
1530
1664
  | li AT, 8
1531
1665
  | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1532
- |. load_got log
1533
- | sltiu AT, CARG3, LJ_TISNUM
1666
+ |. lw SFARG1HI, HI(BASE)
1667
+ | sltiu AT, SFARG1HI, LJ_TISNUM
1534
1668
  | beqz AT, ->fff_fallback
1535
- |. nop
1669
+ |. load_got log
1670
+ |.if FPU
1536
1671
  | call_extern
1537
1672
  |. ldc1 FARG1, 0(BASE)
1673
+ |.else
1674
+ | call_extern
1675
+ |. lw SFARG1LO, LO(BASE)
1676
+ |.endif
1538
1677
  | b ->fff_resn
1539
1678
  |. nop
1540
1679
  |
@@ -1553,17 +1692,43 @@ static void build_subroutines(BuildCtx *ctx)
1553
1692
  | math_extern2 atan2
1554
1693
  | math_extern2 fmod
1555
1694
  |
1695
+ |.if FPU
1556
1696
  |.ffunc_n math_sqrt
1557
1697
  |. sqrt.d FRET1, FARG1
1558
- | b ->fff_resn
1559
- |. nop
1698
+ |// fallthrough to ->fff_resn
1699
+ |.else
1700
+ | math_extern sqrt
1701
+ |.endif
1702
+ |
1703
+ |->fff_resn:
1704
+ | lw PC, FRAME_PC(BASE)
1705
+ | addiu RA, BASE, -8
1706
+ |.if FPU
1707
+ | b ->fff_res1
1708
+ |. sdc1 FRET1, -8(BASE)
1709
+ |.else
1710
+ | sw SFRETHI, -8+HI(BASE)
1711
+ | b ->fff_res1
1712
+ |. sw SFRETLO, -8+LO(BASE)
1713
+ |.endif
1714
+ |
1560
1715
  |
1561
- |.ffunc_nn math_ldexp
1562
- | trunc.w.d FARG2, FARG2
1716
+ |.ffunc math_ldexp
1717
+ | sltiu AT, NARGS8:RC, 16
1718
+ | lw SFARG1HI, HI(BASE)
1719
+ | bnez AT, ->fff_fallback
1720
+ |. lw CARG4, 8+HI(BASE)
1721
+ | bne CARG4, TISNUM, ->fff_fallback
1563
1722
  | load_got ldexp
1564
- | mfc1 CARG3, FARG2
1723
+ |. sltu AT, SFARG1HI, TISNUM
1724
+ | beqz AT, ->fff_fallback
1725
+ |.if FPU
1726
+ |. ldc1 FARG1, 0(BASE)
1727
+ |.else
1728
+ |. lw SFARG1LO, LO(BASE)
1729
+ |.endif
1565
1730
  | call_extern
1566
- |. nop
1731
+ |. lw CARG3, 8+LO(BASE)
1567
1732
  | b ->fff_resn
1568
1733
  |. nop
1569
1734
  |
@@ -1574,10 +1739,17 @@ static void build_subroutines(BuildCtx *ctx)
1574
1739
  |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
1575
1740
  | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1576
1741
  | addiu RA, BASE, -8
1742
+ |.if FPU
1577
1743
  | mtc1 TMP1, FARG2
1578
1744
  | sdc1 FRET1, 0(RA)
1579
1745
  | cvt.d.w FARG2, FARG2
1580
1746
  | sdc1 FARG2, 8(RA)
1747
+ |.else
1748
+ | sw SFRETLO, LO(RA)
1749
+ | sw SFRETHI, HI(RA)
1750
+ | sw TMP1, 8+LO(RA)
1751
+ | sw TISNUM, 8+HI(RA)
1752
+ |.endif
1581
1753
  | b ->fff_res
1582
1754
  |. li RD, (2+1)*8
1583
1755
  |
@@ -1587,39 +1759,98 @@ static void build_subroutines(BuildCtx *ctx)
1587
1759
  | call_extern
1588
1760
  |. addiu CARG3, BASE, -8
1589
1761
  | addiu RA, BASE, -8
1762
+ |.if FPU
1590
1763
  | sdc1 FRET1, 0(BASE)
1764
+ |.else
1765
+ | sw SFRETLO, LO(BASE)
1766
+ | sw SFRETHI, HI(BASE)
1767
+ |.endif
1591
1768
  | b ->fff_res
1592
1769
  |. li RD, (2+1)*8
1593
1770
  |
1594
- |.macro math_minmax, name, ismax
1595
- |->ff_ .. name:
1596
- | lw CARG3, HI(BASE)
1597
- | beqz NARGS8:RC, ->fff_fallback
1598
- |. ldc1 FRET1, 0(BASE)
1599
- | sltiu AT, CARG3, LJ_TISNUM
1771
+ |.macro math_minmax, name, intins, fpins
1772
+ | .ffunc_1 name
1773
+ | addu TMP3, BASE, NARGS8:RC
1774
+ | bne SFARG1HI, TISNUM, >5
1775
+ |. addiu TMP2, BASE, 8
1776
+ |1: // Handle integers.
1777
+ |. lw SFARG2HI, HI(TMP2)
1778
+ | beq TMP2, TMP3, ->fff_restv
1779
+ |. lw SFARG2LO, LO(TMP2)
1780
+ | bne SFARG2HI, TISNUM, >3
1781
+ |. slt AT, SFARG1LO, SFARG2LO
1782
+ | intins SFARG1LO, SFARG2LO, AT
1783
+ | b <1
1784
+ |. addiu TMP2, TMP2, 8
1785
+ |
1786
+ |3: // Convert intermediate result to number and continue with number loop.
1787
+ | sltiu AT, SFARG2HI, LJ_TISNUM
1600
1788
  | beqz AT, ->fff_fallback
1601
- |. addu TMP2, BASE, NARGS8:RC
1602
- | addiu TMP1, BASE, 8
1603
- | beq TMP1, TMP2, ->fff_resn
1604
- |1:
1605
- |. lw CARG3, HI(TMP1)
1606
- | ldc1 FARG1, 0(TMP1)
1607
- | addiu TMP1, TMP1, 8
1608
- | sltiu AT, CARG3, LJ_TISNUM
1789
+ |.if FPU
1790
+ |. mtc1 SFARG1LO, FRET1
1791
+ | cvt.d.w FRET1, FRET1
1792
+ | b >7
1793
+ |. ldc1 FARG1, 0(TMP2)
1794
+ |.else
1795
+ |. nop
1796
+ | bal ->vm_sfi2d_1
1797
+ |. nop
1798
+ | b >7
1799
+ |. nop
1800
+ |.endif
1801
+ |
1802
+ |5:
1803
+ |. sltiu AT, SFARG1HI, LJ_TISNUM
1609
1804
  | beqz AT, ->fff_fallback
1610
- |.if ismax
1611
- |. c.olt.d FARG1, FRET1
1805
+ |.if FPU
1806
+ |. ldc1 FRET1, 0(BASE)
1807
+ |.endif
1808
+ |
1809
+ |6: // Handle numbers.
1810
+ |. lw SFARG2HI, HI(TMP2)
1811
+ |.if FPU
1812
+ | beq TMP2, TMP3, ->fff_resn
1612
1813
  |.else
1613
- |. c.olt.d FRET1, FARG1
1814
+ | beq TMP2, TMP3, ->fff_restv
1614
1815
  |.endif
1615
- | bne TMP1, TMP2, <1
1616
- |. movf.d FRET1, FARG1
1617
- | b ->fff_resn
1816
+ |. sltiu AT, SFARG2HI, LJ_TISNUM
1817
+ | beqz AT, >8
1818
+ |.if FPU
1819
+ |. ldc1 FARG1, 0(TMP2)
1820
+ |.else
1821
+ |. lw SFARG2LO, LO(TMP2)
1822
+ |.endif
1823
+ |7:
1824
+ |.if FPU
1825
+ | c.olt.d FRET1, FARG1
1826
+ | fpins FRET1, FARG1
1827
+ |.else
1828
+ | bal ->vm_sfcmpolt
1829
+ |. nop
1830
+ | intins SFARG1LO, SFARG2LO, CRET1
1831
+ | intins SFARG1HI, SFARG2HI, CRET1
1832
+ |.endif
1833
+ | b <6
1834
+ |. addiu TMP2, TMP2, 8
1835
+ |
1836
+ |8: // Convert integer to number and continue with number loop.
1837
+ | bne SFARG2HI, TISNUM, ->fff_fallback
1838
+ |.if FPU
1839
+ |. lwc1 FARG1, LO(TMP2)
1840
+ | b <7
1841
+ |. cvt.d.w FARG1, FARG1
1842
+ |.else
1843
+ |. nop
1844
+ | bal ->vm_sfi2d_2
1618
1845
  |. nop
1846
+ | b <7
1847
+ |. nop
1848
+ |.endif
1849
+ |
1619
1850
  |.endmacro
1620
1851
  |
1621
- | math_minmax math_min, 0
1622
- | math_minmax math_max, 1
1852
+ | math_minmax math_min, movz, movf.d
1853
+ | math_minmax math_max, movn, movt.d
1623
1854
  |
1624
1855
  |//-- String library -----------------------------------------------------
1625
1856
  |
@@ -1632,32 +1863,30 @@ static void build_subroutines(BuildCtx *ctx)
1632
1863
  | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1633
1864
  |. nop
1634
1865
  | lw TMP0, STR:CARG1->len
1635
- | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1636
1866
  | addiu RA, BASE, -8
1867
+ | lw PC, FRAME_PC(BASE)
1637
1868
  | sltu RD, r0, TMP0
1638
- | mtc1 TMP1, f0
1869
+ | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1639
1870
  | addiu RD, RD, 1
1640
- | cvt.d.w f0, f0
1641
- | lw PC, FRAME_PC(BASE)
1642
1871
  | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1872
+ | sw TISNUM, HI(RA)
1643
1873
  | b ->fff_res
1644
- |. sdc1 f0, 0(RA)
1874
+ |. sw TMP1, LO(RA)
1645
1875
  |
1646
1876
  |.ffunc string_char // Only handle the 1-arg case here.
1647
1877
  | ffgccheck
1648
- | lw CARG3, HI(BASE)
1649
- | ldc1 FARG1, 0(BASE)
1650
- | li AT, 8
1651
- | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1652
- |. sltiu AT, CARG3, LJ_TISNUM
1653
- | beqz AT, ->fff_fallback
1878
+ |. lw CARG3, HI(BASE)
1879
+ | lw CARG1, LO(BASE)
1880
+ | li TMP1, 255
1881
+ | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1882
+ | xor TMP0, CARG3, TISNUM // Integer.
1883
+ | sltu TMP1, TMP1, CARG1 // !(255 < n).
1884
+ | or AT, AT, TMP0
1885
+ | or AT, AT, TMP1
1886
+ | bnez AT, ->fff_fallback
1654
1887
  |. li CARG3, 1
1655
- | trunc.w.d FARG1, FARG1
1656
1888
  | addiu CARG2, sp, ARG5_OFS
1657
- | sltiu AT, TMP0, 256
1658
- | mfc1 TMP0, FARG1
1659
- | beqz AT, ->fff_fallback
1660
- |. sw TMP0, ARG5
1889
+ | sb CARG1, ARG5
1661
1890
  |->fff_newstr:
1662
1891
  | load_got lj_str_new
1663
1892
  | sw BASE, L->base
@@ -1667,34 +1896,28 @@ static void build_subroutines(BuildCtx *ctx)
1667
1896
  | // Returns GCstr *.
1668
1897
  | lw BASE, L->base
1669
1898
  |->fff_resstr:
1670
- | move CARG1, CRET1
1899
+ | move SFARG1LO, CRET1
1671
1900
  | b ->fff_restv
1672
- |. li CARG3, LJ_TSTR
1901
+ |. li SFARG1HI, LJ_TSTR
1673
1902
  |
1674
1903
  |.ffunc string_sub
1675
1904
  | ffgccheck
1676
- | addiu AT, NARGS8:RC, -16
1905
+ |. addiu AT, NARGS8:RC, -16
1677
1906
  | lw CARG3, 16+HI(BASE)
1678
- | ldc1 f0, 16(BASE)
1679
1907
  | lw TMP0, HI(BASE)
1680
1908
  | lw STR:CARG1, LO(BASE)
1681
1909
  | bltz AT, ->fff_fallback
1682
- | lw CARG2, 8+HI(BASE)
1683
- | ldc1 f2, 8(BASE)
1910
+ |. lw CARG2, 8+HI(BASE)
1684
1911
  | beqz AT, >1
1685
1912
  |. li CARG4, -1
1686
- | trunc.w.d f0, f0
1687
- | sltiu AT, CARG3, LJ_TISNUM
1688
- | beqz AT, ->fff_fallback
1689
- |. mfc1 CARG4, f0
1913
+ | bne CARG3, TISNUM, ->fff_fallback
1914
+ |. lw CARG4, 16+LO(BASE)
1690
1915
  |1:
1691
- | sltiu AT, CARG2, LJ_TISNUM
1692
- | beqz AT, ->fff_fallback
1916
+ | bne CARG2, TISNUM, ->fff_fallback
1693
1917
  |. li AT, LJ_TSTR
1694
- | trunc.w.d f2, f2
1695
1918
  | bne TMP0, AT, ->fff_fallback
1696
- |. lw CARG2, STR:CARG1->len
1697
- | mfc1 CARG3, f2
1919
+ |. lw CARG3, 8+LO(BASE)
1920
+ | lw CARG2, STR:CARG1->len
1698
1921
  | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1699
1922
  | slt AT, CARG4, r0
1700
1923
  | addiu TMP0, CARG2, 1
@@ -1716,14 +1939,14 @@ static void build_subroutines(BuildCtx *ctx)
1716
1939
  | bgez CARG3, ->fff_newstr
1717
1940
  |. addiu CARG3, CARG3, 1 // len++
1718
1941
  |->fff_emptystr: // Return empty string.
1719
- | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
1942
+ | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
1720
1943
  | b ->fff_restv
1721
- |. li CARG3, LJ_TSTR
1944
+ |. li SFARG1HI, LJ_TSTR
1722
1945
  |
1723
1946
  |.macro ffstring_op, name
1724
1947
  | .ffunc string_ .. name
1725
1948
  | ffgccheck
1726
- | lw CARG3, HI(BASE)
1949
+ |. lw CARG3, HI(BASE)
1727
1950
  | lw STR:CARG2, LO(BASE)
1728
1951
  | beqz NARGS8:RC, ->fff_fallback
1729
1952
  |. li AT, LJ_TSTR
@@ -1749,27 +1972,96 @@ static void build_subroutines(BuildCtx *ctx)
1749
1972
  |
1750
1973
  |//-- Bit library --------------------------------------------------------
1751
1974
  |
1975
+ |->vm_tobit_fb:
1976
+ | beqz TMP1, ->fff_fallback
1977
+ |.if FPU
1978
+ |. ldc1 FARG1, 0(BASE)
1979
+ | add.d FARG1, FARG1, TOBIT
1980
+ | jr ra
1981
+ |. mfc1 CRET1, FARG1
1982
+ |.else
1983
+ |// FP number to bit conversion for soft-float.
1984
+ |->vm_tobit:
1985
+ | sll TMP0, SFARG1HI, 1
1986
+ | lui AT, 0x0020
1987
+ | addu TMP0, TMP0, AT
1988
+ | slt AT, TMP0, r0
1989
+ | movz SFARG1LO, r0, AT
1990
+ | beqz AT, >2
1991
+ |. li TMP1, 0x3e0
1992
+ | not TMP1, TMP1
1993
+ | sra TMP0, TMP0, 21
1994
+ | subu TMP0, TMP1, TMP0
1995
+ | slt AT, TMP0, r0
1996
+ | bnez AT, >1
1997
+ |. sll TMP1, SFARG1HI, 11
1998
+ | lui AT, 0x8000
1999
+ | or TMP1, TMP1, AT
2000
+ | srl AT, SFARG1LO, 21
2001
+ | or TMP1, TMP1, AT
2002
+ | slt AT, SFARG1HI, r0
2003
+ | beqz AT, >2
2004
+ |. srlv SFARG1LO, TMP1, TMP0
2005
+ | subu SFARG1LO, r0, SFARG1LO
2006
+ |2:
2007
+ | jr ra
2008
+ |. move CRET1, SFARG1LO
2009
+ |1:
2010
+ | addiu TMP0, TMP0, 21
2011
+ | srlv TMP1, SFARG1LO, TMP0
2012
+ | li AT, 20
2013
+ | subu TMP0, AT, TMP0
2014
+ | sll SFARG1LO, SFARG1HI, 12
2015
+ | sllv AT, SFARG1LO, TMP0
2016
+ | or SFARG1LO, TMP1, AT
2017
+ | slt AT, SFARG1HI, r0
2018
+ | beqz AT, <2
2019
+ |. nop
2020
+ | jr ra
2021
+ |. subu CRET1, r0, SFARG1LO
2022
+ |.endif
2023
+ |
1752
2024
  |.macro .ffunc_bit, name
1753
- | .ffunc_n bit_..name
1754
- |. add.d FARG1, FARG1, TOBIT
1755
- | mfc1 CRET1, FARG1
2025
+ | .ffunc_1 bit_..name
2026
+ | beq SFARG1HI, TISNUM, >6
2027
+ |. move CRET1, SFARG1LO
2028
+ | bal ->vm_tobit_fb
2029
+ |. sltu TMP1, SFARG1HI, TISNUM
2030
+ |6:
1756
2031
  |.endmacro
1757
2032
  |
1758
2033
  |.macro .ffunc_bit_op, name, ins
1759
2034
  | .ffunc_bit name
1760
- | addiu TMP1, BASE, 8
1761
- | addu TMP2, BASE, NARGS8:RC
2035
+ | addiu TMP2, BASE, 8
2036
+ | addu TMP3, BASE, NARGS8:RC
1762
2037
  |1:
1763
- | lw CARG4, HI(TMP1)
1764
- | beq TMP1, TMP2, ->fff_resi
1765
- |. ldc1 FARG1, 0(TMP1)
1766
- | sltiu AT, CARG4, LJ_TISNUM
1767
- | beqz AT, ->fff_fallback
1768
- | add.d FARG1, FARG1, TOBIT
1769
- | mfc1 CARG2, FARG1
1770
- | ins CRET1, CRET1, CARG2
2038
+ | lw SFARG1HI, HI(TMP2)
2039
+ | beq TMP2, TMP3, ->fff_resi
2040
+ |. lw SFARG1LO, LO(TMP2)
2041
+ |.if FPU
2042
+ | bne SFARG1HI, TISNUM, >2
2043
+ |. addiu TMP2, TMP2, 8
1771
2044
  | b <1
1772
- |. addiu TMP1, TMP1, 8
2045
+ |. ins CRET1, CRET1, SFARG1LO
2046
+ |2:
2047
+ | ldc1 FARG1, -8(TMP2)
2048
+ | sltu TMP1, SFARG1HI, TISNUM
2049
+ | beqz TMP1, ->fff_fallback
2050
+ |. add.d FARG1, FARG1, TOBIT
2051
+ | mfc1 SFARG1LO, FARG1
2052
+ | b <1
2053
+ |. ins CRET1, CRET1, SFARG1LO
2054
+ |.else
2055
+ | beq SFARG1HI, TISNUM, >2
2056
+ |. move CRET2, CRET1
2057
+ | bal ->vm_tobit_fb
2058
+ |. sltu TMP1, SFARG1HI, TISNUM
2059
+ | move SFARG1LO, CRET2
2060
+ |2:
2061
+ | ins CRET1, CRET1, SFARG1LO
2062
+ | b <1
2063
+ |. addiu TMP2, TMP2, 8
2064
+ |.endif
1773
2065
  |.endmacro
1774
2066
  |
1775
2067
  |.ffunc_bit_op band, and
@@ -1793,24 +2085,28 @@ static void build_subroutines(BuildCtx *ctx)
1793
2085
  |. not CRET1, CRET1
1794
2086
  |
1795
2087
  |.macro .ffunc_bit_sh, name, ins, shmod
1796
- | .ffunc_nn bit_..name
1797
- |. add.d FARG1, FARG1, TOBIT
1798
- | add.d FARG2, FARG2, TOBIT
1799
- | mfc1 CARG1, FARG1
1800
- | mfc1 CARG2, FARG2
2088
+ | .ffunc_2 bit_..name
2089
+ | beq SFARG1HI, TISNUM, >1
2090
+ |. nop
2091
+ | bal ->vm_tobit_fb
2092
+ |. sltu TMP1, SFARG1HI, TISNUM
2093
+ | move SFARG1LO, CRET1
2094
+ |1:
2095
+ | bne SFARG2HI, TISNUM, ->fff_fallback
2096
+ |. nop
1801
2097
  |.if shmod == 1
1802
2098
  | li AT, 32
1803
- | subu TMP0, AT, CARG2
1804
- | sllv CARG2, CARG1, CARG2
1805
- | srlv CARG1, CARG1, TMP0
2099
+ | subu TMP0, AT, SFARG2LO
2100
+ | sllv SFARG2LO, SFARG1LO, SFARG2LO
2101
+ | srlv SFARG1LO, SFARG1LO, TMP0
1806
2102
  |.elif shmod == 2
1807
2103
  | li AT, 32
1808
- | subu TMP0, AT, CARG2
1809
- | srlv CARG2, CARG1, CARG2
1810
- | sllv CARG1, CARG1, TMP0
2104
+ | subu TMP0, AT, SFARG2LO
2105
+ | srlv SFARG2LO, SFARG1LO, SFARG2LO
2106
+ | sllv SFARG1LO, SFARG1LO, TMP0
1811
2107
  |.endif
1812
2108
  | b ->fff_resi
1813
- |. ins CRET1, CARG1, CARG2
2109
+ |. ins CRET1, SFARG1LO, SFARG2LO
1814
2110
  |.endmacro
1815
2111
  |
1816
2112
  |.ffunc_bit_sh lshift, sllv, 0
@@ -1822,9 +2118,11 @@ static void build_subroutines(BuildCtx *ctx)
1822
2118
  |
1823
2119
  |.ffunc_bit tobit
1824
2120
  |->fff_resi:
1825
- | mtc1 CRET1, FRET1
1826
- | b ->fff_resn
1827
- |. cvt.d.w FRET1, FRET1
2121
+ | lw PC, FRAME_PC(BASE)
2122
+ | addiu RA, BASE, -8
2123
+ | sw TISNUM, -8+HI(BASE)
2124
+ | b ->fff_res1
2125
+ |. sw CRET1, -8+LO(BASE)
1828
2126
  |
1829
2127
  |//-----------------------------------------------------------------------
1830
2128
  |
@@ -2015,17 +2313,19 @@ static void build_subroutines(BuildCtx *ctx)
2015
2313
  |.if JIT
2016
2314
  | // RA = resultptr, RB = meta base
2017
2315
  | lw INS, -4(PC)
2018
- | lw TMP3, -24+LO(RB) // Save previous trace number.
2316
+ | lw TMP2, -24+LO(RB) // Save previous trace.
2019
2317
  | decode_RA8a RC, INS
2020
2318
  | addiu AT, MULTRES, -8
2021
2319
  | decode_RA8b RC
2022
2320
  | beqz AT, >2
2023
2321
  |. addu RC, BASE, RC // Call base.
2024
2322
  |1: // Move results down.
2025
- | ldc1 f0, 0(RA)
2323
+ | lw SFRETHI, HI(RA)
2324
+ | lw SFRETLO, LO(RA)
2026
2325
  | addiu AT, AT, -8
2027
2326
  | addiu RA, RA, 8
2028
- | sdc1 f0, 0(RC)
2327
+ | sw SFRETHI, HI(RC)
2328
+ | sw SFRETLO, LO(RC)
2029
2329
  | bnez AT, <1
2030
2330
  |. addiu RC, RC, 8
2031
2331
  |2:
@@ -2034,17 +2334,13 @@ static void build_subroutines(BuildCtx *ctx)
2034
2334
  | decode_RA8b RA
2035
2335
  | decode_RB8b RB
2036
2336
  | addu RA, RA, RB
2037
- | lw TMP1, DISPATCH_J(trace)(DISPATCH)
2038
2337
  | addu RA, BASE, RA
2039
2338
  |3:
2040
2339
  | sltu AT, RC, RA
2041
2340
  | bnez AT, >9 // More results wanted?
2042
- |. sll TMP2, TMP3, 2
2341
+ |. nop
2043
2342
  |
2044
- | addu TMP2, TMP1, TMP2
2045
- | lw TRACE:TMP2, 0(TMP2)
2046
- | beqz TRACE:TMP2, ->cont_nop
2047
- |. nop
2343
+ | lhu TMP3, TRACE:TMP2->traceno
2048
2344
  | lhu RD, TRACE:TMP2->link
2049
2345
  | beq RD, TMP3, ->cont_nop // Blacklisted.
2050
2346
  |. load_got lj_dispatch_stitch
@@ -2086,14 +2382,23 @@ static void build_subroutines(BuildCtx *ctx)
2086
2382
  |//-----------------------------------------------------------------------
2087
2383
  |
2088
2384
  |.macro savex_, a, b
2385
+ |.if FPU
2089
2386
  | sdc1 f..a, 16+a*8(sp)
2090
2387
  | sw r..a, 16+32*8+a*4(sp)
2091
2388
  | sw r..b, 16+32*8+b*4(sp)
2389
+ |.else
2390
+ | sw r..a, 16+a*4(sp)
2391
+ | sw r..b, 16+b*4(sp)
2392
+ |.endif
2092
2393
  |.endmacro
2093
2394
  |
2094
2395
  |->vm_exit_handler:
2095
2396
  |.if JIT
2397
+ |.if FPU
2096
2398
  | addiu sp, sp, -(16+32*8+32*4)
2399
+ |.else
2400
+ | addiu sp, sp, -(16+32*4)
2401
+ |.endif
2097
2402
  | savex_ 0, 1
2098
2403
  | savex_ 2, 3
2099
2404
  | savex_ 4, 5
@@ -2108,17 +2413,25 @@ static void build_subroutines(BuildCtx *ctx)
2108
2413
  | savex_ 22, 23
2109
2414
  | savex_ 24, 25
2110
2415
  | savex_ 26, 27
2416
+ |.if FPU
2111
2417
  | sdc1 f28, 16+28*8(sp)
2112
- | sw r28, 16+32*8+28*4(sp)
2113
2418
  | sdc1 f30, 16+30*8(sp)
2419
+ | sw r28, 16+32*8+28*4(sp)
2114
2420
  | sw r30, 16+32*8+30*4(sp)
2115
2421
  | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
2422
+ | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2423
+ | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
2424
+ |.else
2425
+ | sw r28, 16+28*4(sp)
2426
+ | sw r30, 16+30*4(sp)
2427
+ | sw r0, 16+31*4(sp) // Clear RID_TMP.
2428
+ | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
2429
+ | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
2430
+ |.endif
2116
2431
  | li_vmstate EXIT
2117
- | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2118
2432
  | addiu DISPATCH, JGL, -GG_DISP2G-32768
2119
2433
  | lw TMP1, 0(TMP2) // Load exit number.
2120
2434
  | st_vmstate
2121
- | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP.
2122
2435
  | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2123
2436
  | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2124
2437
  | load_got lj_trace_exit
@@ -2148,15 +2461,16 @@ static void build_subroutines(BuildCtx *ctx)
2148
2461
  |1:
2149
2462
  | bltz CRET1, >9 // Check for error from exit.
2150
2463
  |. lw LFUNC:RB, FRAME_FUNC(BASE)
2151
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2464
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2152
2465
  | sll MULTRES, CRET1, 3
2153
2466
  | li TISNIL, LJ_TNIL
2467
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2154
2468
  | sw MULTRES, SAVE_MULTRES
2155
- | mtc1 TMP3, TOBIT
2469
+ | .FPU mtc1 TMP3, TOBIT
2156
2470
  | lw TMP1, LFUNC:RB->pc
2157
2471
  | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2158
2472
  | lw KBASE, PC2PROTO(k)(TMP1)
2159
- | cvt.d.s TOBIT, TOBIT
2473
+ | .FPU cvt.d.s TOBIT, TOBIT
2160
2474
  | // Modified copy of ins_next which handles function header dispatch, too.
2161
2475
  | lw INS, 0(PC)
2162
2476
  | addiu PC, PC, 4
@@ -2164,7 +2478,7 @@ static void build_subroutines(BuildCtx *ctx)
2164
2478
  | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2165
2479
  | decode_OP4a TMP1, INS
2166
2480
  | decode_OP4b TMP1
2167
- | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header?
2481
+ | sltiu TMP2, TMP1, BC_FUNCF*4
2168
2482
  | addu TMP0, DISPATCH, TMP1
2169
2483
  | decode_RD8a RD, INS
2170
2484
  | lw AT, 0(TMP0)
@@ -2205,8 +2519,9 @@ static void build_subroutines(BuildCtx *ctx)
2205
2519
  |//-- Math helper functions ----------------------------------------------
2206
2520
  |//-----------------------------------------------------------------------
2207
2521
  |
2522
+ |// Hard-float round to integer.
2208
2523
  |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2209
- |.macro vm_round, func
2524
+ |.macro vm_round_hf, func
2210
2525
  | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2211
2526
  | mtc1 r0, f4
2212
2527
  | mtc1 TMP0, f5
@@ -2248,6 +2563,12 @@ static void build_subroutines(BuildCtx *ctx)
2248
2563
  |. mov.d FRET1, FARG1
2249
2564
  |.endmacro
2250
2565
  |
2566
+ |.macro vm_round, func
2567
+ |.if FPU
2568
+ | vm_round_hf, func
2569
+ |.endif
2570
+ |.endmacro
2571
+ |
2251
2572
  |->vm_floor:
2252
2573
  | vm_round floor
2253
2574
  |->vm_ceil:
@@ -2257,29 +2578,201 @@ static void build_subroutines(BuildCtx *ctx)
2257
2578
  | vm_round trunc
2258
2579
  |.endif
2259
2580
  |
2260
- |//-----------------------------------------------------------------------
2261
- |//-- Miscellaneous functions --------------------------------------------
2262
- |//-----------------------------------------------------------------------
2581
+ |// Soft-float integer to number conversion.
2582
+ |.macro sfi2d, AHI, ALO
2583
+ |.if not FPU
2584
+ | beqz ALO, >9 // Handle zero first.
2585
+ |. sra TMP0, ALO, 31
2586
+ | xor TMP1, ALO, TMP0
2587
+ | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2588
+ | clz AHI, TMP1
2589
+ | andi TMP0, TMP0, 0x800 // Mask sign bit.
2590
+ | li AT, 0x3ff+31-1
2591
+ | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
2592
+ | subu AHI, AT, AHI // Exponent - 1 in AHI.
2593
+ | sll ALO, TMP1, 21
2594
+ | or AHI, AHI, TMP0 // Sign | Exponent.
2595
+ | srl TMP1, TMP1, 11
2596
+ | sll AHI, AHI, 20 // Align left.
2597
+ | jr ra
2598
+ |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
2599
+ |9:
2600
+ | jr ra
2601
+ |. li AHI, 0
2602
+ |.endif
2603
+ |.endmacro
2263
2604
  |
2264
- |//-----------------------------------------------------------------------
2265
- |//-- FFI helper functions -----------------------------------------------
2266
- |//-----------------------------------------------------------------------
2605
+ |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
2606
+ |->vm_sfi2d_1:
2607
+ | sfi2d SFARG1HI, SFARG1LO
2608
+ |
2609
+ |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
2610
+ |->vm_sfi2d_2:
2611
+ | sfi2d SFARG2HI, SFARG2LO
2612
+ |
2613
+ |// Soft-float comparison. Equivalent to c.eq.d.
2614
+ |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2615
+ |->vm_sfcmpeq:
2616
+ |.if not FPU
2617
+ | sll AT, SFARG1HI, 1
2618
+ | sll TMP0, SFARG2HI, 1
2619
+ | or CRET1, SFARG1LO, SFARG2LO
2620
+ | or TMP1, AT, TMP0
2621
+ | or TMP1, TMP1, CRET1
2622
+ | beqz TMP1, >8 // Both args +-0: return 1.
2623
+ |. sltu CRET1, r0, SFARG1LO
2624
+ | lui TMP1, 0xffe0
2625
+ | addu AT, AT, CRET1
2626
+ | sltu CRET1, r0, SFARG2LO
2627
+ | sltu AT, TMP1, AT
2628
+ | addu TMP0, TMP0, CRET1
2629
+ | sltu TMP0, TMP1, TMP0
2630
+ | or TMP1, AT, TMP0
2631
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
2632
+ |. xor TMP0, SFARG1HI, SFARG2HI
2633
+ | xor TMP1, SFARG1LO, SFARG2LO
2634
+ | or AT, TMP0, TMP1
2635
+ | jr ra
2636
+ |. sltiu CRET1, AT, 1 // Same values: return 1.
2637
+ |8:
2638
+ | jr ra
2639
+ |. li CRET1, 1
2640
+ |9:
2641
+ | jr ra
2642
+ |. li CRET1, 0
2643
+ |.endif
2267
2644
  |
2268
- |// Handler for callback functions. Callback slot number in r1, g in r2.
2269
- |->vm_ffi_callback:
2270
- |.if FFI
2271
- |.type CTSTATE, CTState, PC
2272
- | saveregs
2273
- | lw CTSTATE, GL:r2->ctype_state
2274
- | addiu DISPATCH, r2, GG_G2DISP
2645
+ |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2646
+ |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2647
+ |->vm_sfcmpult:
2648
+ |.if not FPU
2649
+ | b >1
2650
+ |. li CRET2, 1
2651
+ |.endif
2652
+ |
2653
+ |->vm_sfcmpolt:
2654
+ |.if not FPU
2655
+ | li CRET2, 0
2656
+ |1:
2657
+ | sll AT, SFARG1HI, 1
2658
+ | sll TMP0, SFARG2HI, 1
2659
+ | or CRET1, SFARG1LO, SFARG2LO
2660
+ | or TMP1, AT, TMP0
2661
+ | or TMP1, TMP1, CRET1
2662
+ | beqz TMP1, >8 // Both args +-0: return 0.
2663
+ |. sltu CRET1, r0, SFARG1LO
2664
+ | lui TMP1, 0xffe0
2665
+ | addu AT, AT, CRET1
2666
+ | sltu CRET1, r0, SFARG2LO
2667
+ | sltu AT, TMP1, AT
2668
+ | addu TMP0, TMP0, CRET1
2669
+ | sltu TMP0, TMP1, TMP0
2670
+ | or TMP1, AT, TMP0
2671
+ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2672
+ |. and AT, SFARG1HI, SFARG2HI
2673
+ | bltz AT, >5 // Both args negative?
2674
+ |. nop
2675
+ | beq SFARG1HI, SFARG2HI, >8
2676
+ |. sltu CRET1, SFARG1LO, SFARG2LO
2677
+ | jr ra
2678
+ |. slt CRET1, SFARG1HI, SFARG2HI
2679
+ |5: // Swap conditions if both operands are negative.
2680
+ | beq SFARG1HI, SFARG2HI, >8
2681
+ |. sltu CRET1, SFARG2LO, SFARG1LO
2682
+ | jr ra
2683
+ |. slt CRET1, SFARG2HI, SFARG1HI
2684
+ |8:
2685
+ | jr ra
2686
+ |. nop
2687
+ |9:
2688
+ | jr ra
2689
+ |. move CRET1, CRET2
2690
+ |.endif
2691
+ |
2692
+ |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2693
+ |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2694
+ |->vm_sfcmpolex:
2695
+ |.if not FPU
2696
+ | sll AT, SFARG1HI, 1
2697
+ | sll TMP0, SFARG2HI, 1
2698
+ | or CRET1, SFARG1LO, SFARG2LO
2699
+ | or TMP1, AT, TMP0
2700
+ | or TMP1, TMP1, CRET1
2701
+ | beqz TMP1, >8 // Both args +-0: return 1.
2702
+ |. sltu CRET1, r0, SFARG1LO
2703
+ | lui TMP1, 0xffe0
2704
+ | addu AT, AT, CRET1
2705
+ | sltu CRET1, r0, SFARG2LO
2706
+ | sltu AT, TMP1, AT
2707
+ | addu TMP0, TMP0, CRET1
2708
+ | sltu TMP0, TMP1, TMP0
2709
+ | or TMP1, AT, TMP0
2710
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
2711
+ |. and AT, SFARG1HI, SFARG2HI
2712
+ | xor AT, AT, TMP3
2713
+ | bltz AT, >5 // Both args negative?
2714
+ |. nop
2715
+ | beq SFARG1HI, SFARG2HI, >6
2716
+ |. sltu CRET1, SFARG2LO, SFARG1LO
2717
+ | jr ra
2718
+ |. slt CRET1, SFARG2HI, SFARG1HI
2719
+ |5: // Swap conditions if both operands are negative.
2720
+ | beq SFARG1HI, SFARG2HI, >6
2721
+ |. sltu CRET1, SFARG1LO, SFARG2LO
2722
+ | slt CRET1, SFARG1HI, SFARG2HI
2723
+ |6:
2724
+ | jr ra
2725
+ |. nop
2726
+ |8:
2727
+ | jr ra
2728
+ |. li CRET1, 1
2729
+ |9:
2730
+ | jr ra
2731
+ |. li CRET1, 0
2732
+ |.endif
2733
+ |
2734
+ |.macro sfmin_max, name, intins
2735
+ |->vm_sf .. name:
2736
+ |.if JIT and not FPU
2737
+ | move TMP2, ra
2738
+ | bal ->vm_sfcmpolt
2739
+ |. nop
2740
+ | move TMP0, CRET1
2741
+ | move SFRETHI, SFARG1HI
2742
+ | move SFRETLO, SFARG1LO
2743
+ | move ra, TMP2
2744
+ | intins SFRETHI, SFARG2HI, TMP0
2745
+ | jr ra
2746
+ |. intins SFRETLO, SFARG2LO, TMP0
2747
+ |.endif
2748
+ |.endmacro
2749
+ |
2750
+ | sfmin_max min, movz
2751
+ | sfmin_max max, movn
2752
+ |
2753
+ |//-----------------------------------------------------------------------
2754
+ |//-- Miscellaneous functions --------------------------------------------
2755
+ |//-----------------------------------------------------------------------
2756
+ |
2757
+ |//-----------------------------------------------------------------------
2758
+ |//-- FFI helper functions -----------------------------------------------
2759
+ |//-----------------------------------------------------------------------
2760
+ |
2761
+ |// Handler for callback functions. Callback slot number in r1, g in r2.
2762
+ |->vm_ffi_callback:
2763
+ |.if FFI
2764
+ |.type CTSTATE, CTState, PC
2765
+ | saveregs
2766
+ | lw CTSTATE, GL:r2->ctype_state
2767
+ | addiu DISPATCH, r2, GG_G2DISP
2275
2768
  | load_got lj_ccallback_enter
2276
2769
  | sw r1, CTSTATE->cb.slot
2277
2770
  | sw CARG1, CTSTATE->cb.gpr[0]
2278
2771
  | sw CARG2, CTSTATE->cb.gpr[1]
2279
- | sdc1 FARG1, CTSTATE->cb.fpr[0]
2772
+ | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2280
2773
  | sw CARG3, CTSTATE->cb.gpr[2]
2281
2774
  | sw CARG4, CTSTATE->cb.gpr[3]
2282
- | sdc1 FARG2, CTSTATE->cb.fpr[1]
2775
+ | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2283
2776
  | addiu TMP0, sp, CFRAME_SPACE+16
2284
2777
  | sw TMP0, CTSTATE->cb.stack
2285
2778
  | sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2289,15 +2782,16 @@ static void build_subroutines(BuildCtx *ctx)
2289
2782
  | // Returns lua_State *.
2290
2783
  | lw BASE, L:CRET1->base
2291
2784
  | lw RC, L:CRET1->top
2785
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2292
2786
  | move L, CRET1
2293
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2787
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2294
2788
  | lw LFUNC:RB, FRAME_FUNC(BASE)
2295
- | mtc1 TMP3, TOBIT
2789
+ | .FPU mtc1 TMP3, TOBIT
2296
2790
  | li_vmstate INTERP
2297
2791
  | li TISNIL, LJ_TNIL
2298
2792
  | subu RC, RC, BASE
2299
2793
  | st_vmstate
2300
- | cvt.d.s TOBIT, TOBIT
2794
+ | .FPU cvt.d.s TOBIT, TOBIT
2301
2795
  | ins_callt
2302
2796
  |.endif
2303
2797
  |
@@ -2311,11 +2805,11 @@ static void build_subroutines(BuildCtx *ctx)
2311
2805
  | move CARG2, RA
2312
2806
  | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2313
2807
  |. move CARG1, CTSTATE
2808
+ | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2314
2809
  | lw CRET1, CTSTATE->cb.gpr[0]
2315
- | ldc1 FRET1, CTSTATE->cb.fpr[0]
2316
- | lw CRET2, CTSTATE->cb.gpr[1]
2810
+ | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2317
2811
  | b ->vm_leave_unw
2318
- |. ldc1 FRET2, CTSTATE->cb.fpr[1]
2812
+ |. lw CRET2, CTSTATE->cb.gpr[1]
2319
2813
  |.endif
2320
2814
  |
2321
2815
  |->vm_ffi_call: // Call C function via FFI.
@@ -2347,8 +2841,8 @@ static void build_subroutines(BuildCtx *ctx)
2347
2841
  | lw CARG2, CCSTATE->gpr[1]
2348
2842
  | lw CARG3, CCSTATE->gpr[2]
2349
2843
  | lw CARG4, CCSTATE->gpr[3]
2350
- | ldc1 FARG1, CCSTATE->fpr[0]
2351
- | ldc1 FARG2, CCSTATE->fpr[1]
2844
+ | .FPU ldc1 FARG1, CCSTATE->fpr[0]
2845
+ | .FPU ldc1 FARG2, CCSTATE->fpr[1]
2352
2846
  | jalr CFUNCADDR
2353
2847
  |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
2354
2848
  | lw CCSTATE:TMP1, -12(r16)
@@ -2356,8 +2850,13 @@ static void build_subroutines(BuildCtx *ctx)
2356
2850
  | lw ra, -4(r16)
2357
2851
  | sw CRET1, CCSTATE:TMP1->gpr[0]
2358
2852
  | sw CRET2, CCSTATE:TMP1->gpr[1]
2853
+ |.if FPU
2359
2854
  | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2360
2855
  | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2856
+ |.else
2857
+ | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
2858
+ | sw CARG2, CCSTATE:TMP1->gpr[3]
2859
+ |.endif
2361
2860
  | move sp, r16
2362
2861
  | jr ra
2363
2862
  |. move r16, TMP2
@@ -2381,82 +2880,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2381
2880
 
2382
2881
  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2383
2882
  | // RA = src1*8, RD = src2*8, JMP with RD = target
2384
- | addu CARG2, BASE, RA
2385
- | addu CARG3, BASE, RD
2386
- | lw TMP0, HI(CARG2)
2387
- | lw TMP1, HI(CARG3)
2388
- | ldc1 f0, 0(CARG2)
2389
- | ldc1 f2, 0(CARG3)
2390
- | sltiu TMP0, TMP0, LJ_TISNUM
2391
- | sltiu TMP1, TMP1, LJ_TISNUM
2883
+ |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
2884
+ | addu RA, BASE, RA
2885
+ | addu RD, BASE, RD
2886
+ | lw RAHI, HI(RA)
2887
+ | lw RDHI, HI(RD)
2392
2888
  | lhu TMP2, OFS_RD(PC)
2393
- | and TMP0, TMP0, TMP1
2394
2889
  | addiu PC, PC, 4
2395
- | beqz TMP0, ->vmeta_comp
2396
- |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
2397
- | decode_RD4b TMP2
2398
- | addu TMP2, TMP2, TMP1
2399
- if (op == BC_ISLT || op == BC_ISGE) {
2400
- | c.olt.d f0, f2
2401
- } else {
2402
- | c.ole.d f0, f2
2403
- }
2404
- if (op == BC_ISLT || op == BC_ISLE) {
2405
- | movf TMP2, r0
2406
- } else {
2407
- | movt TMP2, r0
2408
- }
2409
- | addu PC, PC, TMP2
2890
+ | bne RAHI, TISNUM, >2
2891
+ |. lw RALO, LO(RA)
2892
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2893
+ | lw RDLO, LO(RD)
2894
+ | bne RDHI, TISNUM, >5
2895
+ |. decode_RD4b TMP2
2896
+ | slt AT, SFARG1LO, SFARG2LO
2897
+ | addu TMP2, TMP2, TMP3
2898
+ | movop TMP2, r0, AT
2410
2899
  |1:
2900
+ | addu PC, PC, TMP2
2411
2901
  | ins_next
2902
+ |
2903
+ |2: // RA is not an integer.
2904
+ | sltiu AT, RAHI, LJ_TISNUM
2905
+ | beqz AT, ->vmeta_comp
2906
+ |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2907
+ | sltiu AT, RDHI, LJ_TISNUM
2908
+ |.if FPU
2909
+ | ldc1 FRA, 0(RA)
2910
+ | ldc1 FRD, 0(RD)
2911
+ |.else
2912
+ | lw RDLO, LO(RD)
2913
+ |.endif
2914
+ | beqz AT, >4
2915
+ |. decode_RD4b TMP2
2916
+ |3: // RA and RD are both numbers.
2917
+ |.if FPU
2918
+ | fcomp f20, f22
2919
+ | addu TMP2, TMP2, TMP3
2920
+ | b <1
2921
+ |. fmovop TMP2, r0
2922
+ |.else
2923
+ | bal sfcomp
2924
+ |. addu TMP2, TMP2, TMP3
2925
+ | b <1
2926
+ |. movop TMP2, r0, CRET1
2927
+ |.endif
2928
+ |
2929
+ |4: // RA is a number, RD is not a number.
2930
+ | bne RDHI, TISNUM, ->vmeta_comp
2931
+ | // RA is a number, RD is an integer. Convert RD to a number.
2932
+ |.if FPU
2933
+ |. lwc1 FRD, LO(RD)
2934
+ | b <3
2935
+ |. cvt.d.w FRD, FRD
2936
+ |.else
2937
+ |. nop
2938
+ |.if "RDHI" == "SFARG1HI"
2939
+ | bal ->vm_sfi2d_1
2940
+ |.else
2941
+ | bal ->vm_sfi2d_2
2942
+ |.endif
2943
+ |. nop
2944
+ | b <3
2945
+ |. nop
2946
+ |.endif
2947
+ |
2948
+ |5: // RA is an integer, RD is not an integer
2949
+ | sltiu AT, RDHI, LJ_TISNUM
2950
+ | beqz AT, ->vmeta_comp
2951
+ | // RA is an integer, RD is a number. Convert RA to a number.
2952
+ |.if FPU
2953
+ |. mtc1 RALO, FRA
2954
+ | ldc1 FRD, 0(RD)
2955
+ | b <3
2956
+ | cvt.d.w FRA, FRA
2957
+ |.else
2958
+ |. nop
2959
+ |.if "RAHI" == "SFARG1HI"
2960
+ | bal ->vm_sfi2d_1
2961
+ |.else
2962
+ | bal ->vm_sfi2d_2
2963
+ |.endif
2964
+ |. nop
2965
+ | b <3
2966
+ |. nop
2967
+ |.endif
2968
+ |.endmacro
2969
+ |
2970
+ if (op == BC_ISLT) {
2971
+ | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
2972
+ } else if (op == BC_ISGE) {
2973
+ | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
2974
+ } else if (op == BC_ISLE) {
2975
+ | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
2976
+ } else {
2977
+ | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
2978
+ }
2412
2979
  break;
2413
2980
 
2414
2981
  case BC_ISEQV: case BC_ISNEV:
2415
2982
  vk = op == BC_ISEQV;
2416
2983
  | // RA = src1*8, RD = src2*8, JMP with RD = target
2417
2984
  | addu RA, BASE, RA
2418
- | addiu PC, PC, 4
2419
- | lw TMP0, HI(RA)
2420
- | ldc1 f0, 0(RA)
2985
+ | addiu PC, PC, 4
2421
2986
  | addu RD, BASE, RD
2987
+ | lw SFARG1HI, HI(RA)
2422
2988
  | lhu TMP2, -4+OFS_RD(PC)
2423
- | lw TMP1, HI(RD)
2424
- | ldc1 f2, 0(RD)
2989
+ | lw SFARG2HI, HI(RD)
2425
2990
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2426
- | sltiu AT, TMP0, LJ_TISNUM
2427
- | sltiu CARG1, TMP1, LJ_TISNUM
2428
- | decode_RD4b TMP2
2429
- | and AT, AT, CARG1
2430
- | beqz AT, >5
2431
- |. addu TMP2, TMP2, TMP3
2432
- | c.eq.d f0, f2
2991
+ | sltu AT, TISNUM, SFARG1HI
2992
+ | sltu TMP0, TISNUM, SFARG2HI
2993
+ | or AT, AT, TMP0
2433
2994
  if (vk) {
2434
- | movf TMP2, r0
2995
+ | beqz AT, ->BC_ISEQN_Z
2435
2996
  } else {
2436
- | movt TMP2, r0
2997
+ | beqz AT, ->BC_ISNEN_Z
2437
2998
  }
2438
- |1:
2439
- | addu PC, PC, TMP2
2440
- | ins_next
2441
- |5: // Either or both types are not numbers.
2442
- | lw CARG2, LO(RA)
2443
- | lw CARG3, LO(RD)
2999
+ |. decode_RD4b TMP2
3000
+ | // Either or both types are not numbers.
3001
+ | lw SFARG1LO, LO(RA)
3002
+ | lw SFARG2LO, LO(RD)
3003
+ | addu TMP2, TMP2, TMP3
2444
3004
  |.if FFI
2445
3005
  | li TMP3, LJ_TCDATA
2446
- | beq TMP0, TMP3, ->vmeta_equal_cd
3006
+ | beq SFARG1HI, TMP3, ->vmeta_equal_cd
2447
3007
  |.endif
2448
- |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive?
3008
+ |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
2449
3009
  |.if FFI
2450
- | beq TMP1, TMP3, ->vmeta_equal_cd
3010
+ | beq SFARG2HI, TMP3, ->vmeta_equal_cd
2451
3011
  |.endif
2452
- |. xor TMP3, CARG2, CARG3 // Same tv?
2453
- | xor TMP1, TMP1, TMP0 // Same type?
2454
- | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata?
3012
+ |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
3013
+ | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
3014
+ | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
2455
3015
  | movz TMP3, r0, AT // Ignore tv if primitive.
2456
- | movn CARG1, r0, TMP1 // Tab/ud and same type?
2457
- | or AT, TMP1, TMP3 // Same type && (pri||same tv).
2458
- | movz CARG1, r0, AT
2459
- | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv.
3016
+ | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
3017
+ | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
3018
+ | movz TMP0, r0, AT
3019
+ | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
2460
3020
  if (vk) {
2461
3021
  |. movn TMP2, r0, AT
2462
3022
  } else {
@@ -2464,15 +3024,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2464
3024
  }
2465
3025
  | // Different tables or userdatas. Need to check __eq metamethod.
2466
3026
  | // Field metatable must be at same offset for GCtab and GCudata!
2467
- | lw TAB:TMP1, TAB:CARG2->metatable
2468
- | beqz TAB:TMP1, <1 // No metatable?
3027
+ | lw TAB:TMP1, TAB:SFARG1LO->metatable
3028
+ | beqz TAB:TMP1, >1 // No metatable?
2469
3029
  |. nop
2470
3030
  | lbu TMP1, TAB:TMP1->nomm
2471
3031
  | andi TMP1, TMP1, 1<<MM_eq
2472
- | bnez TMP1, <1 // Or 'no __eq' flag set?
3032
+ | bnez TMP1, >1 // Or 'no __eq' flag set?
2473
3033
  |. nop
2474
3034
  | b ->vmeta_equal // Handle __eq metamethod.
2475
- |. li CARG4, 1-vk // ne = 0 or 1.
3035
+ |. li TMP0, 1-vk // ne = 0 or 1.
3036
+ |1:
3037
+ | addu PC, PC, TMP2
3038
+ | ins_next
2476
3039
  break;
2477
3040
 
2478
3041
  case BC_ISEQS: case BC_ISNES:
@@ -2509,38 +3072,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2509
3072
  vk = op == BC_ISEQN;
2510
3073
  | // RA = src*8, RD = num_const*8, JMP with RD = target
2511
3074
  | addu RA, BASE, RA
2512
- | addiu PC, PC, 4
2513
- | lw TMP0, HI(RA)
2514
- | ldc1 f0, 0(RA)
2515
- | addu RD, KBASE, RD
2516
- | lhu TMP2, -4+OFS_RD(PC)
2517
- | ldc1 f2, 0(RD)
3075
+ | addu RD, KBASE, RD
3076
+ | lw SFARG1HI, HI(RA)
3077
+ | lw SFARG2HI, HI(RD)
3078
+ | lhu TMP2, OFS_RD(PC)
3079
+ | addiu PC, PC, 4
2518
3080
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2519
- | sltiu AT, TMP0, LJ_TISNUM
2520
3081
  | decode_RD4b TMP2
2521
- |.if FFI
2522
- | beqz AT, >5
2523
- |.else
2524
- | beqz AT, >1
2525
- |.endif
2526
- |. addu TMP2, TMP2, TMP3
2527
- | c.eq.d f0, f2
2528
3082
  if (vk) {
2529
- | movf TMP2, r0
2530
- | addu PC, PC, TMP2
3083
+ |->BC_ISEQN_Z:
3084
+ } else {
3085
+ |->BC_ISNEN_Z:
3086
+ }
3087
+ | bne SFARG1HI, TISNUM, >3
3088
+ |. lw SFARG1LO, LO(RA)
3089
+ | lw SFARG2LO, LO(RD)
3090
+ | addu TMP2, TMP2, TMP3
3091
+ | bne SFARG2HI, TISNUM, >6
3092
+ |. xor AT, SFARG1LO, SFARG2LO
3093
+ if (vk) {
3094
+ | movn TMP2, r0, AT
2531
3095
  |1:
3096
+ | addu PC, PC, TMP2
3097
+ |2:
2532
3098
  } else {
2533
- | movt TMP2, r0
3099
+ | movz TMP2, r0, AT
2534
3100
  |1:
3101
+ |2:
2535
3102
  | addu PC, PC, TMP2
2536
3103
  }
2537
3104
  | ins_next
3105
+ |
3106
+ |3: // RA is not an integer.
3107
+ | sltiu AT, SFARG1HI, LJ_TISNUM
2538
3108
  |.if FFI
2539
- |5:
2540
- | li AT, LJ_TCDATA
2541
- | beq TMP0, AT, ->vmeta_equal_cd
3109
+ | beqz AT, >8
3110
+ |.else
3111
+ | beqz AT, <2
3112
+ |.endif
3113
+ |. addu TMP2, TMP2, TMP3
3114
+ | sltiu AT, SFARG2HI, LJ_TISNUM
3115
+ |.if FPU
3116
+ | ldc1 f20, 0(RA)
3117
+ | ldc1 f22, 0(RD)
3118
+ |.endif
3119
+ | beqz AT, >5
3120
+ |. lw SFARG2LO, LO(RD)
3121
+ |4: // RA and RD are both numbers.
3122
+ |.if FPU
3123
+ | c.eq.d f20, f22
3124
+ | b <1
3125
+ if (vk) {
3126
+ |. movf TMP2, r0
3127
+ } else {
3128
+ |. movt TMP2, r0
3129
+ }
3130
+ |.else
3131
+ | bal ->vm_sfcmpeq
2542
3132
  |. nop
2543
3133
  | b <1
3134
+ if (vk) {
3135
+ |. movz TMP2, r0, CRET1
3136
+ } else {
3137
+ |. movn TMP2, r0, CRET1
3138
+ }
3139
+ |.endif
3140
+ |
3141
+ |5: // RA is a number, RD is not a number.
3142
+ |.if FFI
3143
+ | bne SFARG2HI, TISNUM, >9
3144
+ |.else
3145
+ | bne SFARG2HI, TISNUM, <2
3146
+ |.endif
3147
+ | // RA is a number, RD is an integer. Convert RD to a number.
3148
+ |.if FPU
3149
+ |. lwc1 f22, LO(RD)
3150
+ | b <4
3151
+ |. cvt.d.w f22, f22
3152
+ |.else
3153
+ |. nop
3154
+ | bal ->vm_sfi2d_2
3155
+ |. nop
3156
+ | b <4
3157
+ |. nop
3158
+ |.endif
3159
+ |
3160
+ |6: // RA is an integer, RD is not an integer
3161
+ | sltiu AT, SFARG2HI, LJ_TISNUM
3162
+ |.if FFI
3163
+ | beqz AT, >9
3164
+ |.else
3165
+ | beqz AT, <2
3166
+ |.endif
3167
+ | // RA is an integer, RD is a number. Convert RA to a number.
3168
+ |.if FPU
3169
+ |. mtc1 SFARG1LO, f20
3170
+ | ldc1 f22, 0(RD)
3171
+ | b <4
3172
+ | cvt.d.w f20, f20
3173
+ |.else
3174
+ |. nop
3175
+ | bal ->vm_sfi2d_1
3176
+ |. nop
3177
+ | b <4
3178
+ |. nop
3179
+ |.endif
3180
+ |
3181
+ |.if FFI
3182
+ |8:
3183
+ | li AT, LJ_TCDATA
3184
+ | bne SFARG1HI, AT, <2
3185
+ |. nop
3186
+ | b ->vmeta_equal_cd
3187
+ |. nop
3188
+ |9:
3189
+ | li AT, LJ_TCDATA
3190
+ | bne SFARG2HI, AT, <2
3191
+ |. nop
3192
+ | b ->vmeta_equal_cd
2544
3193
  |. nop
2545
3194
  |.endif
2546
3195
  break;
@@ -2592,7 +3241,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2592
3241
  | addu PC, PC, TMP2
2593
3242
  } else {
2594
3243
  | sltiu TMP0, TMP0, LJ_TISTRUECOND
2595
- | ldc1 f0, 0(RD)
3244
+ | lw SFRETHI, HI(RD)
3245
+ | lw SFRETLO, LO(RD)
2596
3246
  if (op == BC_ISTC) {
2597
3247
  | beqz TMP0, >1
2598
3248
  } else {
@@ -2602,7 +3252,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2602
3252
  | decode_RD4b TMP2
2603
3253
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2604
3254
  | addu TMP2, TMP2, TMP3
2605
- | sdc1 f0, 0(RA)
3255
+ | sw SFRETHI, HI(RA)
3256
+ | sw SFRETLO, LO(RA)
2606
3257
  | addu PC, PC, TMP2
2607
3258
  |1:
2608
3259
  }
@@ -2634,10 +3285,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2634
3285
  case BC_MOV:
2635
3286
  | // RA = dst*8, RD = src*8
2636
3287
  | addu RD, BASE, RD
2637
- | addu RA, BASE, RA
2638
- | ldc1 f0, 0(RD)
3288
+ | addu RA, BASE, RA
3289
+ | lw SFRETHI, HI(RD)
3290
+ | lw SFRETLO, LO(RD)
2639
3291
  | ins_next1
2640
- | sdc1 f0, 0(RA)
3292
+ | sw SFRETHI, HI(RA)
3293
+ | sw SFRETLO, LO(RA)
2641
3294
  | ins_next2
2642
3295
  break;
2643
3296
  case BC_NOT:
@@ -2654,16 +3307,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2654
3307
  break;
2655
3308
  case BC_UNM:
2656
3309
  | // RA = dst*8, RD = src*8
2657
- | addu CARG3, BASE, RD
3310
+ | addu RB, BASE, RD
3311
+ | lw SFARG1HI, HI(RB)
2658
3312
  | addu RA, BASE, RA
2659
- | lw TMP0, HI(CARG3)
2660
- | ldc1 f0, 0(CARG3)
2661
- | sltiu AT, TMP0, LJ_TISNUM
2662
- | beqz AT, ->vmeta_unm
2663
- |. neg.d f0, f0
3313
+ | bne SFARG1HI, TISNUM, >2
3314
+ |. lw SFARG1LO, LO(RB)
3315
+ | lui TMP1, 0x8000
3316
+ | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
3317
+ |. negu SFARG1LO, SFARG1LO
3318
+ |1:
2664
3319
  | ins_next1
2665
- | sdc1 f0, 0(RA)
3320
+ | sw SFARG1HI, HI(RA)
3321
+ | sw SFARG1LO, LO(RA)
2666
3322
  | ins_next2
3323
+ |2:
3324
+ | sltiu AT, SFARG1HI, LJ_TISNUM
3325
+ | beqz AT, ->vmeta_unm
3326
+ |. lui TMP1, 0x8000
3327
+ | b <1
3328
+ |. xor SFARG1HI, SFARG1HI, TMP1
2667
3329
  break;
2668
3330
  case BC_LEN:
2669
3331
  | // RA = dst*8, RD = src*8
@@ -2674,12 +3336,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2674
3336
  | li AT, LJ_TSTR
2675
3337
  | bne TMP0, AT, >2
2676
3338
  |. li AT, LJ_TTAB
2677
- | lw CRET1, STR:CARG1->len
3339
+ | lw CRET1, STR:CARG1->len
2678
3340
  |1:
2679
- | mtc1 CRET1, f0
2680
- | cvt.d.w f0, f0
2681
3341
  | ins_next1
2682
- | sdc1 f0, 0(RA)
3342
+ | sw TISNUM, HI(RA)
3343
+ | sw CRET1, LO(RA)
2683
3344
  | ins_next2
2684
3345
  |2:
2685
3346
  | bne TMP0, AT, ->vmeta_len
@@ -2710,104 +3371,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2710
3371
 
2711
3372
  /* -- Binary ops -------------------------------------------------------- */
2712
3373
 
2713
- |.macro ins_arithpre
3374
+ |.macro fpmod, a, b, c
3375
+ | bal ->vm_floor // floor(b/c)
3376
+ |. div.d FARG1, b, c
3377
+ | mul.d a, FRET1, c
3378
+ | sub.d a, b, a // b - floor(b/c)*c
3379
+ |.endmacro
3380
+
3381
+ |.macro sfpmod
3382
+ | addiu sp, sp, -16
3383
+ |
3384
+ | load_got __divdf3
3385
+ | sw SFARG1HI, HI(sp)
3386
+ | sw SFARG1LO, LO(sp)
3387
+ | sw SFARG2HI, 8+HI(sp)
3388
+ | call_extern
3389
+ |. sw SFARG2LO, 8+LO(sp)
3390
+ |
3391
+ | load_got floor
3392
+ | move SFARG1HI, SFRETHI
3393
+ | call_extern
3394
+ |. move SFARG1LO, SFRETLO
3395
+ |
3396
+ | load_got __muldf3
3397
+ | move SFARG1HI, SFRETHI
3398
+ | move SFARG1LO, SFRETLO
3399
+ | lw SFARG2HI, 8+HI(sp)
3400
+ | call_extern
3401
+ |. lw SFARG2LO, 8+LO(sp)
3402
+ |
3403
+ | load_got __subdf3
3404
+ | lw SFARG1HI, HI(sp)
3405
+ | lw SFARG1LO, LO(sp)
3406
+ | move SFARG2HI, SFRETHI
3407
+ | call_extern
3408
+ |. move SFARG2LO, SFRETLO
3409
+ |
3410
+ | addiu sp, sp, 16
3411
+ |.endmacro
3412
+
3413
+ |.macro ins_arithpre, label
2714
3414
  ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2715
- | decode_RB8a RB, INS
2716
- | decode_RB8b RB
2717
- | decode_RDtoRC8 RC, RD
2718
3415
  | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2719
3416
  ||switch (vk) {
2720
3417
  ||case 0:
2721
- | addu CARG3, BASE, RB
2722
- | addu CARG4, KBASE, RC
2723
- | lw TMP1, HI(CARG3)
2724
- | ldc1 f20, 0(CARG3)
2725
- | ldc1 f22, 0(CARG4)
2726
- | sltiu AT, TMP1, LJ_TISNUM
3418
+ | decode_RB8a RB, INS
3419
+ | decode_RB8b RB
3420
+ | decode_RDtoRC8 RC, RD
3421
+ | // RA = dst*8, RB = src1*8, RC = num_const*8
3422
+ | addu RB, BASE, RB
3423
+ |.if "label" ~= "none"
3424
+ | b label
3425
+ |.endif
3426
+ |. addu RC, KBASE, RC
2727
3427
  || break;
2728
3428
  ||case 1:
2729
- | addu CARG4, BASE, RB
2730
- | addu CARG3, KBASE, RC
2731
- | lw TMP1, HI(CARG4)
2732
- | ldc1 f22, 0(CARG4)
2733
- | ldc1 f20, 0(CARG3)
2734
- | sltiu AT, TMP1, LJ_TISNUM
3429
+ | decode_RB8a RC, INS
3430
+ | decode_RB8b RC
3431
+ | decode_RDtoRC8 RB, RD
3432
+ | // RA = dst*8, RB = num_const*8, RC = src1*8
3433
+ | addu RC, BASE, RC
3434
+ |.if "label" ~= "none"
3435
+ | b label
3436
+ |.endif
3437
+ |. addu RB, KBASE, RB
2735
3438
  || break;
2736
3439
  ||default:
2737
- | addu CARG3, BASE, RB
2738
- | addu CARG4, BASE, RC
2739
- | lw TMP1, HI(CARG3)
2740
- | lw TMP2, HI(CARG4)
2741
- | ldc1 f20, 0(CARG3)
2742
- | ldc1 f22, 0(CARG4)
2743
- | sltiu AT, TMP1, LJ_TISNUM
2744
- | sltiu TMP0, TMP2, LJ_TISNUM
2745
- | and AT, AT, TMP0
3440
+ | decode_RB8a RB, INS
3441
+ | decode_RB8b RB
3442
+ | decode_RDtoRC8 RC, RD
3443
+ | // RA = dst*8, RB = src1*8, RC = src2*8
3444
+ | addu RB, BASE, RB
3445
+ |.if "label" ~= "none"
3446
+ | b label
3447
+ |.endif
3448
+ |. addu RC, BASE, RC
2746
3449
  || break;
2747
3450
  ||}
2748
- | beqz AT, ->vmeta_arith
2749
- |. addu RA, BASE, RA
2750
3451
  |.endmacro
2751
3452
  |
2752
- |.macro fpmod, a, b, c
2753
- |->BC_MODVN_Z:
2754
- | bal ->vm_floor // floor(b/c)
2755
- |. div.d FARG1, b, c
2756
- | mul.d a, FRET1, c
2757
- | sub.d a, b, a // b - floor(b/c)*c
2758
- |.endmacro
3453
+ |.macro ins_arith, intins, fpins, fpcall, label
3454
+ | ins_arithpre none
2759
3455
  |
2760
- |.macro ins_arith, ins
2761
- | ins_arithpre
2762
- |.if "ins" == "fpmod_"
2763
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2764
- |. nop
3456
+ |.if "label" ~= "none"
3457
+ |label:
3458
+ |.endif
3459
+ |
3460
+ | lw SFARG1HI, HI(RB)
3461
+ | lw SFARG2HI, HI(RC)
3462
+ |
3463
+ |.if "intins" ~= "div"
3464
+ |
3465
+ | // Check for two integers.
3466
+ | lw SFARG1LO, LO(RB)
3467
+ | bne SFARG1HI, TISNUM, >5
3468
+ |. lw SFARG2LO, LO(RC)
3469
+ | bne SFARG2HI, TISNUM, >5
3470
+ |
3471
+ |.if "intins" == "addu"
3472
+ |. intins CRET1, SFARG1LO, SFARG2LO
3473
+ | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
3474
+ | xor TMP2, CRET1, SFARG2LO
3475
+ | and TMP1, TMP1, TMP2
3476
+ | bltz TMP1, ->vmeta_arith
3477
+ |. addu RA, BASE, RA
3478
+ |.elif "intins" == "subu"
3479
+ |. intins CRET1, SFARG1LO, SFARG2LO
3480
+ | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
3481
+ | xor TMP2, SFARG1LO, SFARG2LO
3482
+ | and TMP1, TMP1, TMP2
3483
+ | bltz TMP1, ->vmeta_arith
3484
+ |. addu RA, BASE, RA
3485
+ |.elif "intins" == "mult"
3486
+ |. intins SFARG1LO, SFARG2LO
3487
+ | mflo CRET1
3488
+ | mfhi TMP2
3489
+ | sra TMP1, CRET1, 31
3490
+ | bne TMP1, TMP2, ->vmeta_arith
3491
+ |. addu RA, BASE, RA
2765
3492
  |.else
2766
- | ins f0, f20, f22
3493
+ |. load_got lj_vm_modi
3494
+ | beqz SFARG2LO, ->vmeta_arith
3495
+ |. addu RA, BASE, RA
3496
+ |.if ENDIAN_BE
3497
+ | move CARG1, SFARG1LO
3498
+ |.endif
3499
+ | call_extern
3500
+ |. move CARG2, SFARG2LO
3501
+ |.endif
3502
+ |
2767
3503
  | ins_next1
2768
- | sdc1 f0, 0(RA)
3504
+ | sw TISNUM, HI(RA)
3505
+ | sw CRET1, LO(RA)
3506
+ |3:
2769
3507
  | ins_next2
3508
+ |
3509
+ |.elif not FPU
3510
+ |
3511
+ | lw SFARG1LO, LO(RB)
3512
+ | lw SFARG2LO, LO(RC)
3513
+ |
3514
+ |.endif
3515
+ |
3516
+ |5: // Check for two numbers.
3517
+ | .FPU ldc1 f20, 0(RB)
3518
+ | sltiu AT, SFARG1HI, LJ_TISNUM
3519
+ | sltiu TMP0, SFARG2HI, LJ_TISNUM
3520
+ | .FPU ldc1 f22, 0(RC)
3521
+ | and AT, AT, TMP0
3522
+ | beqz AT, ->vmeta_arith
3523
+ |. addu RA, BASE, RA
3524
+ |
3525
+ |.if FPU
3526
+ | fpins FRET1, f20, f22
3527
+ |.elif "fpcall" == "sfpmod"
3528
+ | sfpmod
3529
+ |.else
3530
+ | load_got fpcall
3531
+ | call_extern
3532
+ |. nop
3533
+ |.endif
3534
+ |
3535
+ | ins_next1
3536
+ |.if not FPU
3537
+ | sw SFRETHI, HI(RA)
3538
+ |.endif
3539
+ |.if "intins" ~= "div"
3540
+ | b <3
3541
+ |.endif
3542
+ |.if FPU
3543
+ |. sdc1 FRET1, 0(RA)
3544
+ |.else
3545
+ |. sw SFRETLO, LO(RA)
2770
3546
  |.endif
3547
+ |.if "intins" == "div"
3548
+ | ins_next2
3549
+ |.endif
3550
+ |
2771
3551
  |.endmacro
2772
3552
 
2773
3553
  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2774
- | ins_arith add.d
3554
+ | ins_arith addu, add.d, __adddf3, none
2775
3555
  break;
2776
3556
  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2777
- | ins_arith sub.d
3557
+ | ins_arith subu, sub.d, __subdf3, none
2778
3558
  break;
2779
3559
  case BC_MULVN: case BC_MULNV: case BC_MULVV:
2780
- | ins_arith mul.d
3560
+ | ins_arith mult, mul.d, __muldf3, none
2781
3561
  break;
2782
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2783
- | ins_arith div.d
3562
+ case BC_DIVVN:
3563
+ | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
3564
+ break;
3565
+ case BC_DIVNV: case BC_DIVVV:
3566
+ | ins_arithpre ->BC_DIVVN_Z
2784
3567
  break;
2785
3568
  case BC_MODVN:
2786
- | ins_arith fpmod
3569
+ | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
2787
3570
  break;
2788
3571
  case BC_MODNV: case BC_MODVV:
2789
- | ins_arith fpmod_
3572
+ | ins_arithpre ->BC_MODVN_Z
2790
3573
  break;
2791
3574
  case BC_POW:
2792
- | decode_RB8a RB, INS
2793
- | decode_RB8b RB
2794
- | decode_RDtoRC8 RC, RD
2795
- | addu CARG3, BASE, RB
2796
- | addu CARG4, BASE, RC
2797
- | lw TMP1, HI(CARG3)
2798
- | lw TMP2, HI(CARG4)
2799
- | ldc1 FARG1, 0(CARG3)
2800
- | ldc1 FARG2, 0(CARG4)
2801
- | sltiu AT, TMP1, LJ_TISNUM
2802
- | sltiu TMP0, TMP2, LJ_TISNUM
3575
+ | ins_arithpre none
3576
+ | lw SFARG1HI, HI(RB)
3577
+ | lw SFARG2HI, HI(RC)
3578
+ | sltiu AT, SFARG1HI, LJ_TISNUM
3579
+ | sltiu TMP0, SFARG2HI, LJ_TISNUM
2803
3580
  | and AT, AT, TMP0
2804
3581
  | load_got pow
2805
3582
  | beqz AT, ->vmeta_arith
2806
3583
  |. addu RA, BASE, RA
3584
+ |.if FPU
3585
+ | ldc1 FARG1, 0(RB)
3586
+ | ldc1 FARG2, 0(RC)
3587
+ |.else
3588
+ | lw SFARG1LO, LO(RB)
3589
+ | lw SFARG2LO, LO(RC)
3590
+ |.endif
2807
3591
  | call_extern
2808
3592
  |. nop
2809
3593
  | ins_next1
3594
+ |.if FPU
2810
3595
  | sdc1 FRET1, 0(RA)
3596
+ |.else
3597
+ | sw SFRETHI, HI(RA)
3598
+ | sw SFRETLO, LO(RA)
3599
+ |.endif
2811
3600
  | ins_next2
2812
3601
  break;
2813
3602
 
@@ -2830,10 +3619,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2830
3619
  | bnez CRET1, ->vmeta_binop
2831
3620
  |. lw BASE, L->base
2832
3621
  | addu RB, BASE, MULTRES
2833
- | ldc1 f0, 0(RB)
3622
+ | lw SFRETHI, HI(RB)
3623
+ | lw SFRETLO, LO(RB)
2834
3624
  | addu RA, BASE, RA
2835
3625
  | ins_next1
2836
- | sdc1 f0, 0(RA) // Copy result from RB to RA.
3626
+ | sw SFRETHI, HI(RA)
3627
+ | sw SFRETLO, LO(RA)
2837
3628
  | ins_next2
2838
3629
  break;
2839
3630
 
@@ -2868,20 +3659,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2868
3659
  case BC_KSHORT:
2869
3660
  | // RA = dst*8, RD = int16_literal*8
2870
3661
  | sra RD, INS, 16
2871
- | mtc1 RD, f0
2872
3662
  | addu RA, BASE, RA
2873
- | cvt.d.w f0, f0
2874
3663
  | ins_next1
2875
- | sdc1 f0, 0(RA)
3664
+ | sw TISNUM, HI(RA)
3665
+ | sw RD, LO(RA)
2876
3666
  | ins_next2
2877
3667
  break;
2878
3668
  case BC_KNUM:
2879
3669
  | // RA = dst*8, RD = num_const*8
2880
3670
  | addu RD, KBASE, RD
2881
3671
  | addu RA, BASE, RA
2882
- | ldc1 f0, 0(RD)
3672
+ | lw SFRETHI, HI(RD)
3673
+ | lw SFRETLO, LO(RD)
2883
3674
  | ins_next1
2884
- | sdc1 f0, 0(RA)
3675
+ | sw SFRETHI, HI(RA)
3676
+ | sw SFRETLO, LO(RA)
2885
3677
  | ins_next2
2886
3678
  break;
2887
3679
  case BC_KPRI:
@@ -2917,9 +3709,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2917
3709
  | lw UPVAL:RB, LFUNC:RD->uvptr
2918
3710
  | ins_next1
2919
3711
  | lw TMP1, UPVAL:RB->v
2920
- | ldc1 f0, 0(TMP1)
3712
+ | lw SFRETHI, HI(TMP1)
3713
+ | lw SFRETLO, LO(TMP1)
2921
3714
  | addu RA, BASE, RA
2922
- | sdc1 f0, 0(RA)
3715
+ | sw SFRETHI, HI(RA)
3716
+ | sw SFRETLO, LO(RA)
2923
3717
  | ins_next2
2924
3718
  break;
2925
3719
  case BC_USETV:
@@ -2928,26 +3722,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2928
3722
  | srl RA, RA, 1
2929
3723
  | addu RD, BASE, RD
2930
3724
  | addu RA, RA, LFUNC:RB
2931
- | ldc1 f0, 0(RD)
2932
3725
  | lw UPVAL:RB, LFUNC:RA->uvptr
3726
+ | lw SFRETHI, HI(RD)
3727
+ | lw SFRETLO, LO(RD)
2933
3728
  | lbu TMP3, UPVAL:RB->marked
2934
3729
  | lw CARG2, UPVAL:RB->v
2935
3730
  | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2936
3731
  | lbu TMP0, UPVAL:RB->closed
2937
- | lw TMP2, HI(RD)
2938
- | sdc1 f0, 0(CARG2)
3732
+ | sw SFRETHI, HI(CARG2)
3733
+ | sw SFRETLO, LO(CARG2)
2939
3734
  | li AT, LJ_GC_BLACK|1
2940
3735
  | or TMP3, TMP3, TMP0
2941
3736
  | beq TMP3, AT, >2 // Upvalue is closed and black?
2942
- |. addiu TMP2, TMP2, -(LJ_TNUMX+1)
3737
+ |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
2943
3738
  |1:
2944
3739
  | ins_next
2945
3740
  |
2946
3741
  |2: // Check if new value is collectable.
2947
3742
  | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
2948
3743
  | beqz AT, <1 // tvisgcv(v)
2949
- |. lw TMP1, LO(RD)
2950
- | lbu TMP3, GCOBJ:TMP1->gch.marked
3744
+ |. nop
3745
+ | lbu TMP3, GCOBJ:SFRETLO->gch.marked
2951
3746
  | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2952
3747
  | beqz TMP3, <1
2953
3748
  |. load_got lj_gc_barrieruv
@@ -2995,11 +3790,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2995
3790
  | srl RA, RA, 1
2996
3791
  | addu RD, KBASE, RD
2997
3792
  | addu RA, RA, LFUNC:RB
2998
- | ldc1 f0, 0(RD)
2999
- | lw UPVAL:RB, LFUNC:RA->uvptr
3793
+ | lw UPVAL:RB, LFUNC:RA->uvptr
3794
+ | lw SFRETHI, HI(RD)
3795
+ | lw SFRETLO, LO(RD)
3796
+ | lw TMP1, UPVAL:RB->v
3000
3797
  | ins_next1
3001
- | lw TMP1, UPVAL:RB->v
3002
- | sdc1 f0, 0(TMP1)
3798
+ | sw SFRETHI, HI(TMP1)
3799
+ | sw SFRETLO, LO(TMP1)
3003
3800
  | ins_next2
3004
3801
  break;
3005
3802
  case BC_USETP:
@@ -3009,10 +3806,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3009
3806
  | srl TMP0, RD, 3
3010
3807
  | addu RA, RA, LFUNC:RB
3011
3808
  | not TMP0, TMP0
3012
- | lw UPVAL:RB, LFUNC:RA->uvptr
3809
+ | lw UPVAL:RB, LFUNC:RA->uvptr
3013
3810
  | ins_next1
3014
- | lw TMP1, UPVAL:RB->v
3015
- | sw TMP0, HI(TMP1)
3811
+ | lw TMP1, UPVAL:RB->v
3812
+ | sw TMP0, HI(TMP1)
3016
3813
  | ins_next2
3017
3814
  break;
3018
3815
 
@@ -3048,8 +3845,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3048
3845
  | li TMP0, LJ_TFUNC
3049
3846
  | ins_next1
3050
3847
  | addu RA, BASE, RA
3051
- | sw TMP0, HI(RA)
3052
3848
  | sw LFUNC:CRET1, LO(RA)
3849
+ | sw TMP0, HI(RA)
3053
3850
  | ins_next2
3054
3851
  break;
3055
3852
 
@@ -3130,31 +3927,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3130
3927
  | lw TMP2, HI(CARG3)
3131
3928
  | lw TAB:RB, LO(CARG2)
3132
3929
  | li AT, LJ_TTAB
3133
- | ldc1 f0, 0(CARG3)
3134
3930
  | bne TMP1, AT, ->vmeta_tgetv
3135
3931
  |. addu RA, BASE, RA
3136
- | sltiu AT, TMP2, LJ_TISNUM
3137
- | beqz AT, >5
3138
- |. li AT, LJ_TSTR
3139
- |
3140
- | // Convert number key to integer, check for integerness and range.
3141
- | cvt.w.d f2, f0
3142
- | lw TMP0, TAB:RB->asize
3143
- | mfc1 TMP2, f2
3144
- | cvt.d.w f4, f2
3932
+ | bne TMP2, TISNUM, >5
3933
+ |. lw RC, LO(CARG3)
3934
+ | lw TMP0, TAB:RB->asize
3145
3935
  | lw TMP1, TAB:RB->array
3146
- | c.eq.d f0, f4
3147
- | sltu AT, TMP2, TMP0
3148
- | movf AT, r0
3149
- | sll TMP2, TMP2, 3
3936
+ | sltu AT, RC, TMP0
3937
+ | sll TMP2, RC, 3
3150
3938
  | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3151
3939
  |. addu TMP2, TMP1, TMP2
3152
- | lw TMP0, HI(TMP2)
3153
- | beq TMP0, TISNIL, >2
3154
- |. ldc1 f0, 0(TMP2)
3940
+ | lw SFRETHI, HI(TMP2)
3941
+ | beq SFRETHI, TISNIL, >2
3942
+ |. lw SFRETLO, LO(TMP2)
3155
3943
  |1:
3156
3944
  | ins_next1
3157
- | sdc1 f0, 0(RA)
3945
+ | sw SFRETHI, HI(RA)
3946
+ | sw SFRETLO, LO(RA)
3158
3947
  | ins_next2
3159
3948
  |
3160
3949
  |2: // Check for __index if table value is nil.
@@ -3169,8 +3958,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3169
3958
  |. nop
3170
3959
  |
3171
3960
  |5:
3961
+ | li AT, LJ_TSTR
3172
3962
  | bne TMP2, AT, ->vmeta_tgetv
3173
- |. lw STR:RC, LO(CARG3)
3963
+ |. nop
3174
3964
  | b ->BC_TGETS_Z // String key?
3175
3965
  |. nop
3176
3966
  break;
@@ -3202,18 +3992,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3202
3992
  | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3203
3993
  | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
3204
3994
  | lw NODE:TMP1, NODE:TMP2->next
3205
- | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2)
3995
+ | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
3206
3996
  | addiu CARG1, CARG1, -LJ_TSTR
3207
3997
  | xor TMP0, TMP0, STR:RC
3208
3998
  | or AT, CARG1, TMP0
3209
3999
  | bnez AT, >4
3210
4000
  |. lw TAB:TMP3, TAB:RB->metatable
3211
- | beq CARG2, TISNIL, >5 // Key found, but nil value?
3212
- |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2)
4001
+ | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
4002
+ |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
3213
4003
  |3:
3214
4004
  | ins_next1
3215
- | sw CARG2, HI(RA)
3216
- | sw CARG1, LO(RA)
4005
+ | sw SFRETHI, HI(RA)
4006
+ | sw SFRETLO, LO(RA)
3217
4007
  | ins_next2
3218
4008
  |
3219
4009
  |4: // Follow hash chain.
@@ -3223,7 +4013,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3223
4013
  |
3224
4014
  |5: // Check for __index if table value is nil.
3225
4015
  | beqz TAB:TMP3, <3 // No metatable: done.
3226
- |. li CARG2, LJ_TNIL
4016
+ |. li SFRETHI, LJ_TNIL
3227
4017
  | lbu TMP0, TAB:TMP3->nomm
3228
4018
  | andi TMP0, TMP0, 1<<MM_index
3229
4019
  | bnez TMP0, <3 // 'no __index' flag set: done.
@@ -3248,12 +4038,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3248
4038
  | sltu AT, TMP0, TMP1
3249
4039
  | beqz AT, ->vmeta_tgetb
3250
4040
  |. addu RC, TMP2, RC
3251
- | lw TMP1, HI(RC)
3252
- | beq TMP1, TISNIL, >5
3253
- |. ldc1 f0, 0(RC)
4041
+ | lw SFRETHI, HI(RC)
4042
+ | beq SFRETHI, TISNIL, >5
4043
+ |. lw SFRETLO, LO(RC)
3254
4044
  |1:
3255
4045
  | ins_next1
3256
- | sdc1 f0, 0(RA)
4046
+ | sw SFRETHI, HI(RA)
4047
+ | sw SFRETLO, LO(RA)
3257
4048
  | ins_next2
3258
4049
  |
3259
4050
  |5: // Check for __index if table value is nil.
@@ -3264,7 +4055,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3264
4055
  | andi TMP1, TMP1, 1<<MM_index
3265
4056
  | bnez TMP1, <1 // 'no __index' flag set: done.
3266
4057
  |. nop
3267
- | b ->vmeta_tgetb // Caveat: preserve TMP0!
4058
+ | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
3268
4059
  |. nop
3269
4060
  break;
3270
4061
  case BC_TGETR:
@@ -3272,23 +4063,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3272
4063
  | decode_RB8a RB, INS
3273
4064
  | decode_RB8b RB
3274
4065
  | decode_RDtoRC8 RC, RD
3275
- | addu CARG2, BASE, RB
3276
- | addu CARG3, BASE, RC
3277
- | lw TAB:CARG1, LO(CARG2)
3278
- | ldc1 f0, 0(CARG3)
3279
- | trunc.w.d f2, f0
3280
- | lw TMP0, TAB:CARG1->asize
3281
- | mfc1 CARG2, f2
4066
+ | addu RB, BASE, RB
4067
+ | addu RC, BASE, RC
4068
+ | lw TAB:CARG1, LO(RB)
4069
+ | lw CARG2, LO(RC)
4070
+ | addu RA, BASE, RA
4071
+ | lw TMP0, TAB:CARG1->asize
3282
4072
  | lw TMP1, TAB:CARG1->array
3283
4073
  | sltu AT, CARG2, TMP0
3284
4074
  | sll TMP2, CARG2, 3
3285
4075
  | beqz AT, ->vmeta_tgetr // In array part?
3286
- |. addu TMP2, TMP1, TMP2
3287
- | ldc1 f0, 0(TMP2)
4076
+ |. addu CRET1, TMP1, TMP2
4077
+ | lw SFARG2HI, HI(CRET1)
4078
+ | lw SFARG2LO, LO(CRET1)
3288
4079
  |->BC_TGETR_Z:
3289
- | addu RA, BASE, RA
3290
4080
  | ins_next1
3291
- | sdc1 f0, 0(RA)
4081
+ | sw SFARG2HI, HI(RA)
4082
+ | sw SFARG2LO, LO(RA)
3292
4083
  | ins_next2
3293
4084
  break;
3294
4085
 
@@ -3303,33 +4094,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3303
4094
  | lw TMP2, HI(CARG3)
3304
4095
  | lw TAB:RB, LO(CARG2)
3305
4096
  | li AT, LJ_TTAB
3306
- | ldc1 f0, 0(CARG3)
3307
4097
  | bne TMP1, AT, ->vmeta_tsetv
3308
4098
  |. addu RA, BASE, RA
3309
- | sltiu AT, TMP2, LJ_TISNUM
3310
- | beqz AT, >5
3311
- |. li AT, LJ_TSTR
3312
- |
3313
- | // Convert number key to integer, check for integerness and range.
3314
- | cvt.w.d f2, f0
3315
- | lw TMP0, TAB:RB->asize
3316
- | mfc1 TMP2, f2
3317
- | cvt.d.w f4, f2
4099
+ | bne TMP2, TISNUM, >5
4100
+ |. lw RC, LO(CARG3)
4101
+ | lw TMP0, TAB:RB->asize
3318
4102
  | lw TMP1, TAB:RB->array
3319
- | c.eq.d f0, f4
3320
- | sltu AT, TMP2, TMP0
3321
- | movf AT, r0
3322
- | sll TMP2, TMP2, 3
4103
+ | sltu AT, RC, TMP0
4104
+ | sll TMP2, RC, 3
3323
4105
  | beqz AT, ->vmeta_tsetv // Integer key and in array part?
3324
4106
  |. addu TMP1, TMP1, TMP2
3325
- | lbu TMP3, TAB:RB->marked
3326
4107
  | lw TMP0, HI(TMP1)
4108
+ | lbu TMP3, TAB:RB->marked
4109
+ | lw SFRETHI, HI(RA)
3327
4110
  | beq TMP0, TISNIL, >3
3328
- |. ldc1 f0, 0(RA)
4111
+ |. lw SFRETLO, LO(RA)
3329
4112
  |1:
3330
- | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3331
- | bnez AT, >7
3332
- |. sdc1 f0, 0(TMP1)
4113
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4114
+ | sw SFRETHI, HI(TMP1)
4115
+ | bnez AT, >7
4116
+ |. sw SFRETLO, LO(TMP1)
3333
4117
  |2:
3334
4118
  | ins_next
3335
4119
  |
@@ -3345,8 +4129,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3345
4129
  |. nop
3346
4130
  |
3347
4131
  |5:
4132
+ | li AT, LJ_TSTR
3348
4133
  | bne TMP2, AT, ->vmeta_tsetv
3349
- |. lw STR:RC, LO(CARG3)
4134
+ |. nop
3350
4135
  | b ->BC_TSETS_Z // String key?
3351
4136
  |. nop
3352
4137
  |
@@ -3378,7 +4163,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3378
4163
  | sll TMP1, TMP1, 3
3379
4164
  | subu TMP1, TMP0, TMP1
3380
4165
  | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4166
+ |.if FPU
3381
4167
  | ldc1 f20, 0(RA)
4168
+ |.else
4169
+ | lw SFRETHI, HI(RA)
4170
+ | lw SFRETLO, LO(RA)
4171
+ |.endif
3382
4172
  |1:
3383
4173
  | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3384
4174
  | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3392,8 +4182,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3392
4182
  |. lw TAB:TMP0, TAB:RB->metatable
3393
4183
  |2:
3394
4184
  | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4185
+ |.if FPU
3395
4186
  | bnez AT, >7
3396
4187
  |. sdc1 f20, NODE:TMP2->val
4188
+ |.else
4189
+ | sw SFRETHI, NODE:TMP2->val.u32.hi
4190
+ | bnez AT, >7
4191
+ |. sw SFRETLO, NODE:TMP2->val.u32.lo
4192
+ |.endif
3397
4193
  |3:
3398
4194
  | ins_next
3399
4195
  |
@@ -3431,8 +4227,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3431
4227
  |. move CARG1, L
3432
4228
  | // Returns TValue *.
3433
4229
  | lw BASE, L->base
4230
+ |.if FPU
3434
4231
  | b <3 // No 2nd write barrier needed.
3435
4232
  |. sdc1 f20, 0(CRET1)
4233
+ |.else
4234
+ | lw SFARG1HI, HI(RA)
4235
+ | lw SFARG1LO, LO(RA)
4236
+ | sw SFARG1HI, HI(CRET1)
4237
+ | b <3 // No 2nd write barrier needed.
4238
+ |. sw SFARG1LO, LO(CRET1)
4239
+ |.endif
3436
4240
  |
3437
4241
  |7: // Possible table write barrier for the value. Skip valiswhite check.
3438
4242
  | barrierback TAB:RB, TMP3, TMP0, <3
@@ -3457,11 +4261,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3457
4261
  | lw TMP1, HI(RC)
3458
4262
  | lbu TMP3, TAB:RB->marked
3459
4263
  | beq TMP1, TISNIL, >5
3460
- |. ldc1 f0, 0(RA)
3461
4264
  |1:
4265
+ |. lw SFRETHI, HI(RA)
4266
+ | lw SFRETLO, LO(RA)
3462
4267
  | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4268
+ | sw SFRETHI, HI(RC)
3463
4269
  | bnez AT, >7
3464
- |. sdc1 f0, 0(RC)
4270
+ |. sw SFRETLO, LO(RC)
3465
4271
  |2:
3466
4272
  | ins_next
3467
4273
  |
@@ -3473,7 +4279,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3473
4279
  | andi TMP1, TMP1, 1<<MM_newindex
3474
4280
  | bnez TMP1, <1 // 'no __newindex' flag set: done.
3475
4281
  |. nop
3476
- | b ->vmeta_tsetb // Caveat: preserve TMP0!
4282
+ | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
3477
4283
  |. nop
3478
4284
  |
3479
4285
  |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -3486,13 +4292,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3486
4292
  | decode_RDtoRC8 RC, RD
3487
4293
  | addu CARG1, BASE, RB
3488
4294
  | addu CARG3, BASE, RC
3489
- | lw TAB:CARG2, LO(CARG1)
3490
- | ldc1 f0, 0(CARG3)
3491
- | trunc.w.d f2, f0
3492
- | lbu TMP3, TAB:CARG2->marked
4295
+ | lw TAB:CARG2, LO(CARG1)
4296
+ | lw CARG3, LO(CARG3)
4297
+ | lbu TMP3, TAB:CARG2->marked
3493
4298
  | lw TMP0, TAB:CARG2->asize
3494
- | mfc1 CARG3, f2
3495
- | lw TMP1, TAB:CARG2->array
4299
+ | lw TMP1, TAB:CARG2->array
3496
4300
  | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3497
4301
  | bnez AT, >7
3498
4302
  |. addu RA, BASE, RA
@@ -3500,18 +4304,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3500
4304
  | sltu AT, CARG3, TMP0
3501
4305
  | sll TMP2, CARG3, 3
3502
4306
  | beqz AT, ->vmeta_tsetr // In array part?
3503
- |. ldc1 f20, 0(RA)
3504
- | addu CRET1, TMP1, TMP2
4307
+ |. addu CRET1, TMP1, TMP2
3505
4308
  |->BC_TSETR_Z:
4309
+ | lw SFARG1HI, HI(RA)
4310
+ | lw SFARG1LO, LO(RA)
3506
4311
  | ins_next1
3507
- | sdc1 f20, 0(CRET1)
4312
+ | sw SFARG1HI, HI(CRET1)
4313
+ | sw SFARG1LO, LO(CRET1)
3508
4314
  | ins_next2
3509
4315
  |
3510
4316
  |7: // Possible table write barrier for the value. Skip valiswhite check.
3511
4317
  | barrierback TAB:RB, TMP3, TMP0, <2
3512
4318
  break;
3513
4319
 
3514
-
3515
4320
  case BC_TSETM:
3516
4321
  | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
3517
4322
  | addu RA, BASE, RA
@@ -3533,10 +4338,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3533
4338
  | addu TMP1, TMP1, CARG1
3534
4339
  | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
3535
4340
  |3: // Copy result slots to table.
3536
- | ldc1 f0, 0(RA)
4341
+ | lw SFRETHI, HI(RA)
4342
+ | lw SFRETLO, LO(RA)
3537
4343
  | addiu RA, RA, 8
3538
4344
  | sltu AT, RA, TMP2
3539
- | sdc1 f0, 0(TMP1)
4345
+ | sw SFRETHI, HI(TMP1)
4346
+ | sw SFRETLO, LO(TMP1)
3540
4347
  | bnez AT, <3
3541
4348
  |. addiu TMP1, TMP1, 8
3542
4349
  | bnez TMP0, >7
@@ -3611,10 +4418,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3611
4418
  | beqz NARGS8:RC, >3
3612
4419
  |. move TMP3, NARGS8:RC
3613
4420
  |2:
3614
- | ldc1 f0, 0(RA)
4421
+ | lw SFRETHI, HI(RA)
4422
+ | lw SFRETLO, LO(RA)
3615
4423
  | addiu RA, RA, 8
3616
4424
  | addiu TMP3, TMP3, -8
3617
- | sdc1 f0, 0(TMP2)
4425
+ | sw SFRETHI, HI(TMP2)
4426
+ | sw SFRETLO, LO(TMP2)
3618
4427
  | bnez TMP3, <2
3619
4428
  |. addiu TMP2, TMP2, 8
3620
4429
  |3:
@@ -3651,12 +4460,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3651
4460
  | li AT, LJ_TFUNC
3652
4461
  | lw TMP1, -24+HI(BASE)
3653
4462
  | lw LFUNC:RB, -24+LO(BASE)
3654
- | ldc1 f2, -8(BASE)
3655
- | ldc1 f0, -16(BASE)
4463
+ | lw SFARG1HI, -16+HI(BASE)
4464
+ | lw SFARG1LO, -16+LO(BASE)
4465
+ | lw SFARG2HI, -8+HI(BASE)
4466
+ | lw SFARG2LO, -8+LO(BASE)
3656
4467
  | sw TMP1, HI(BASE) // Copy callable.
3657
4468
  | sw LFUNC:RB, LO(BASE)
3658
- | sdc1 f2, 16(BASE) // Copy control var.
3659
- | sdc1 f0, 8(BASE) // Copy state.
4469
+ | sw SFARG1HI, 8+HI(BASE) // Copy state.
4470
+ | sw SFARG1LO, 8+LO(BASE)
4471
+ | sw SFARG2HI, 16+HI(BASE) // Copy control var.
4472
+ | sw SFARG2LO, 16+LO(BASE)
3660
4473
  | addiu BASE, BASE, 8
3661
4474
  | bne TMP1, AT, ->vmeta_call
3662
4475
  |. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3679,20 +4492,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3679
4492
  | beqz AT, >5 // Index points after array part?
3680
4493
  |. sll TMP3, RC, 3
3681
4494
  | addu TMP3, TMP1, TMP3
3682
- | lw TMP2, HI(TMP3)
3683
- | ldc1 f0, 0(TMP3)
3684
- | mtc1 RC, f2
4495
+ | lw SFARG1HI, HI(TMP3)
4496
+ | lw SFARG1LO, LO(TMP3)
3685
4497
  | lhu RD, -4+OFS_RD(PC)
3686
- | beq TMP2, TISNIL, <1 // Skip holes in array part.
4498
+ | sw TISNUM, HI(RA)
4499
+ | sw RC, LO(RA)
4500
+ | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
3687
4501
  |. addiu RC, RC, 1
3688
- | cvt.d.w f2, f2
4502
+ | sw SFARG1HI, 8+HI(RA)
4503
+ | sw SFARG1LO, 8+LO(RA)
3689
4504
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3690
- | sdc1 f0, 8(RA)
3691
4505
  | decode_RD4b RD
3692
4506
  | addu RD, RD, TMP3
3693
4507
  | sw RC, -8+LO(RA) // Update control var.
3694
4508
  | addu PC, PC, RD
3695
- | sdc1 f2, 0(RA)
3696
4509
  |3:
3697
4510
  | ins_next
3698
4511
  |
@@ -3707,18 +4520,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3707
4520
  | sll RB, RC, 3
3708
4521
  | subu TMP3, TMP3, RB
3709
4522
  | addu NODE:TMP3, TMP3, TMP2
3710
- | lw RB, HI(NODE:TMP3)
3711
- | ldc1 f0, 0(NODE:TMP3)
4523
+ | lw SFARG1HI, NODE:TMP3->val.u32.hi
4524
+ | lw SFARG1LO, NODE:TMP3->val.u32.lo
3712
4525
  | lhu RD, -4+OFS_RD(PC)
3713
- | beq RB, TISNIL, <6 // Skip holes in hash part.
4526
+ | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
3714
4527
  |. addiu RC, RC, 1
3715
- | ldc1 f2, NODE:TMP3->key
4528
+ | lw SFARG2HI, NODE:TMP3->key.u32.hi
4529
+ | lw SFARG2LO, NODE:TMP3->key.u32.lo
3716
4530
  | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3717
- | sdc1 f0, 8(RA)
4531
+ | sw SFARG1HI, 8+HI(RA)
4532
+ | sw SFARG1LO, 8+LO(RA)
3718
4533
  | addu RC, RC, TMP0
3719
4534
  | decode_RD4b RD
3720
4535
  | addu RD, RD, TMP3
3721
- | sdc1 f2, 0(RA)
4536
+ | sw SFARG2HI, HI(RA)
4537
+ | sw SFARG2LO, LO(RA)
3722
4538
  | addu PC, PC, RD
3723
4539
  | b <3
3724
4540
  |. sw RC, -8+LO(RA) // Update control var.
@@ -3798,9 +4614,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3798
4614
  | bnez AT, >7
3799
4615
  |. addiu MULTRES, TMP1, 8
3800
4616
  |6:
3801
- | ldc1 f0, 0(RC)
4617
+ | lw SFRETHI, HI(RC)
4618
+ | lw SFRETLO, LO(RC)
3802
4619
  | addiu RC, RC, 8
3803
- | sdc1 f0, 0(RA)
4620
+ | sw SFRETHI, HI(RA)
4621
+ | sw SFRETLO, LO(RA)
3804
4622
  | sltu AT, RC, TMP3
3805
4623
  | bnez AT, <6 // More vararg slots?
3806
4624
  |. addiu RA, RA, 8
@@ -3856,10 +4674,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3856
4674
  | beqz RC, >3
3857
4675
  |. subu BASE, TMP2, TMP0
3858
4676
  |2:
3859
- | ldc1 f0, 0(RA)
4677
+ | lw SFRETHI, HI(RA)
4678
+ | lw SFRETLO, LO(RA)
3860
4679
  | addiu RA, RA, 8
3861
4680
  | addiu RC, RC, -8
3862
- | sdc1 f0, 0(TMP2)
4681
+ | sw SFRETHI, HI(TMP2)
4682
+ | sw SFRETLO, LO(TMP2)
3863
4683
  | bnez RC, <2
3864
4684
  |. addiu TMP2, TMP2, 8
3865
4685
  |3:
@@ -3900,14 +4720,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3900
4720
  | lw INS, -4(PC)
3901
4721
  | addiu TMP2, BASE, -8
3902
4722
  if (op == BC_RET1) {
3903
- | ldc1 f0, 0(RA)
4723
+ | lw SFRETHI, HI(RA)
4724
+ | lw SFRETLO, LO(RA)
3904
4725
  }
3905
4726
  | decode_RB8a RB, INS
3906
4727
  | decode_RA8a RA, INS
3907
4728
  | decode_RB8b RB
3908
4729
  | decode_RA8b RA
3909
4730
  if (op == BC_RET1) {
3910
- | sdc1 f0, 0(TMP2)
4731
+ | sw SFRETHI, HI(TMP2)
4732
+ | sw SFRETLO, LO(TMP2)
3911
4733
  }
3912
4734
  | subu BASE, TMP2, RA
3913
4735
  |5:
@@ -3949,69 +4771,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3949
4771
  | // RA = base*8, RD = target (after end of loop or start of loop)
3950
4772
  vk = (op == BC_IFORL || op == BC_JFORL);
3951
4773
  | addu RA, BASE, RA
3952
- if (vk) {
3953
- | ldc1 f0, FORL_IDX*8(RA)
3954
- | ldc1 f4, FORL_STEP*8(RA)
3955
- | ldc1 f2, FORL_STOP*8(RA)
3956
- | lw TMP3, FORL_STEP*8+HI(RA)
3957
- | add.d f0, f0, f4
3958
- | sdc1 f0, FORL_IDX*8(RA)
3959
- } else {
3960
- | lw TMP1, FORL_IDX*8+HI(RA)
3961
- | lw TMP3, FORL_STEP*8+HI(RA)
3962
- | lw TMP2, FORL_STOP*8+HI(RA)
3963
- | sltiu TMP1, TMP1, LJ_TISNUM
3964
- | sltiu TMP0, TMP3, LJ_TISNUM
3965
- | sltiu TMP2, TMP2, LJ_TISNUM
3966
- | and TMP1, TMP1, TMP0
3967
- | and TMP1, TMP1, TMP2
3968
- | ldc1 f0, FORL_IDX*8(RA)
3969
- | beqz TMP1, ->vmeta_for
3970
- |. ldc1 f2, FORL_STOP*8(RA)
3971
- }
4774
+ | lw SFARG1HI, FORL_IDX*8+HI(RA)
4775
+ | lw SFARG1LO, FORL_IDX*8+LO(RA)
3972
4776
  if (op != BC_JFORL) {
3973
4777
  | srl RD, RD, 1
3974
- | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535)
4778
+ | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4779
+ | addu TMP2, RD, TMP2
4780
+ }
4781
+ if (!vk) {
4782
+ | lw SFARG2HI, FORL_STOP*8+HI(RA)
4783
+ | lw SFARG2LO, FORL_STOP*8+LO(RA)
4784
+ | bne SFARG1HI, TISNUM, >5
4785
+ |. lw SFRETHI, FORL_STEP*8+HI(RA)
4786
+ | xor AT, SFARG2HI, TISNUM
4787
+ | lw SFRETLO, FORL_STEP*8+LO(RA)
4788
+ | xor TMP0, SFRETHI, TISNUM
4789
+ | or AT, AT, TMP0
4790
+ | bnez AT, ->vmeta_for
4791
+ |. slt AT, SFRETLO, r0
4792
+ | slt CRET1, SFARG2LO, SFARG1LO
4793
+ | slt TMP1, SFARG1LO, SFARG2LO
4794
+ | movn CRET1, TMP1, AT
4795
+ } else {
4796
+ | bne SFARG1HI, TISNUM, >5
4797
+ |. lw SFARG2LO, FORL_STEP*8+LO(RA)
4798
+ | lw SFRETLO, FORL_STOP*8+LO(RA)
4799
+ | move TMP3, SFARG1LO
4800
+ | addu SFARG1LO, SFARG1LO, SFARG2LO
4801
+ | xor TMP0, SFARG1LO, TMP3
4802
+ | xor TMP1, SFARG1LO, SFARG2LO
4803
+ | and TMP0, TMP0, TMP1
4804
+ | slt TMP1, SFARG1LO, SFRETLO
4805
+ | slt CRET1, SFRETLO, SFARG1LO
4806
+ | slt AT, SFARG2LO, r0
4807
+ | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
4808
+ | movn CRET1, TMP1, AT
4809
+ | or CRET1, CRET1, TMP0
4810
+ }
4811
+ |1:
4812
+ if (op == BC_FORI) {
4813
+ | movz TMP2, r0, CRET1
4814
+ | addu PC, PC, TMP2
4815
+ } else if (op == BC_JFORI) {
4816
+ | addu PC, PC, TMP2
4817
+ | lhu RD, -4+OFS_RD(PC)
4818
+ } else if (op == BC_IFORL) {
4819
+ | movn TMP2, r0, CRET1
4820
+ | addu PC, PC, TMP2
4821
+ }
4822
+ if (vk) {
4823
+ | sw SFARG1HI, FORL_IDX*8+HI(RA)
4824
+ | sw SFARG1LO, FORL_IDX*8+LO(RA)
3975
4825
  }
3976
- | c.le.d 0, f0, f2
3977
- | c.le.d 1, f2, f0
3978
- | sdc1 f0, FORL_EXT*8(RA)
4826
+ | ins_next1
4827
+ | sw SFARG1HI, FORL_EXT*8+HI(RA)
4828
+ | sw SFARG1LO, FORL_EXT*8+LO(RA)
4829
+ |2:
3979
4830
  if (op == BC_JFORI) {
3980
- | li TMP1, 1
3981
- | li TMP2, 1
3982
- | addu TMP0, RD, TMP0
3983
- | slt TMP3, TMP3, r0
3984
- | movf TMP1, r0, 0
3985
- | addu PC, PC, TMP0
3986
- | movf TMP2, r0, 1
3987
- | lhu RD, -4+OFS_RD(PC)
3988
- | movn TMP1, TMP2, TMP3
3989
- | bnez TMP1, =>BC_JLOOP
4831
+ | beqz CRET1, =>BC_JLOOP
3990
4832
  |. decode_RD8b RD
3991
4833
  } else if (op == BC_JFORL) {
3992
- | li TMP1, 1
3993
- | li TMP2, 1
3994
- | slt TMP3, TMP3, r0
3995
- | movf TMP1, r0, 0
3996
- | movf TMP2, r0, 1
3997
- | movn TMP1, TMP2, TMP3
3998
- | bnez TMP1, =>BC_JLOOP
4834
+ | beqz CRET1, =>BC_JLOOP
4835
+ }
4836
+ | ins_next2
4837
+ |
4838
+ |5: // FP loop.
4839
+ |.if FPU
4840
+ if (!vk) {
4841
+ | ldc1 f0, FORL_IDX*8(RA)
4842
+ | ldc1 f2, FORL_STOP*8(RA)
4843
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
4844
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
4845
+ | sltiu AT, SFRETHI, LJ_TISNUM
4846
+ | and TMP0, TMP0, TMP1
4847
+ | and AT, AT, TMP0
4848
+ | beqz AT, ->vmeta_for
4849
+ |. slt TMP3, SFRETHI, r0
4850
+ | c.ole.d 0, f0, f2
4851
+ | c.ole.d 1, f2, f0
4852
+ | li CRET1, 1
4853
+ | movt CRET1, r0, 0
4854
+ | movt AT, r0, 1
4855
+ | b <1
4856
+ |. movn CRET1, AT, TMP3
4857
+ } else {
4858
+ | ldc1 f0, FORL_IDX*8(RA)
4859
+ | ldc1 f4, FORL_STEP*8(RA)
4860
+ | ldc1 f2, FORL_STOP*8(RA)
4861
+ | lw SFARG2HI, FORL_STEP*8+HI(RA)
4862
+ | add.d f0, f0, f4
4863
+ | c.ole.d 0, f0, f2
4864
+ | c.ole.d 1, f2, f0
4865
+ | slt TMP3, SFARG2HI, r0
4866
+ | li CRET1, 1
4867
+ | li AT, 1
4868
+ | movt CRET1, r0, 0
4869
+ | movt AT, r0, 1
4870
+ | movn CRET1, AT, TMP3
4871
+ if (op == BC_IFORL) {
4872
+ | movn TMP2, r0, CRET1
4873
+ | addu PC, PC, TMP2
4874
+ }
4875
+ | sdc1 f0, FORL_IDX*8(RA)
4876
+ | ins_next1
4877
+ | b <2
4878
+ |. sdc1 f0, FORL_EXT*8(RA)
4879
+ }
4880
+ |.else
4881
+ if (!vk) {
4882
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
4883
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
4884
+ | sltiu AT, SFRETHI, LJ_TISNUM
4885
+ | and TMP0, TMP0, TMP1
4886
+ | and AT, AT, TMP0
4887
+ | beqz AT, ->vmeta_for
4888
+ |. nop
4889
+ | bal ->vm_sfcmpolex
4890
+ |. move TMP3, SFRETHI
4891
+ | b <1
3999
4892
  |. nop
4000
4893
  } else {
4001
- | addu TMP1, RD, TMP0
4002
- | slt TMP3, TMP3, r0
4003
- | move TMP2, TMP1
4004
- if (op == BC_FORI) {
4005
- | movt TMP1, r0, 0
4006
- | movt TMP2, r0, 1
4894
+ | lw SFARG2HI, FORL_STEP*8+HI(RA)
4895
+ | load_got __adddf3
4896
+ | call_extern
4897
+ |. sw TMP2, ARG5
4898
+ | lw SFARG2HI, FORL_STOP*8+HI(RA)
4899
+ | lw SFARG2LO, FORL_STOP*8+LO(RA)
4900
+ | move SFARG1HI, SFRETHI
4901
+ | move SFARG1LO, SFRETLO
4902
+ | bal ->vm_sfcmpolex
4903
+ |. lw TMP3, FORL_STEP*8+HI(RA)
4904
+ if ( op == BC_JFORL ) {
4905
+ | lhu RD, -4+OFS_RD(PC)
4906
+ | lw TMP2, ARG5
4907
+ | b <1
4908
+ |. decode_RD8b RD
4007
4909
  } else {
4008
- | movf TMP1, r0, 0
4009
- | movf TMP2, r0, 1
4910
+ | b <1
4911
+ |. lw TMP2, ARG5
4010
4912
  }
4011
- | movn TMP1, TMP2, TMP3
4012
- | addu PC, PC, TMP1
4013
4913
  }
4014
- | ins_next
4914
+ |.endif
4015
4915
  break;
4016
4916
 
4017
4917
  case BC_ITERL:
@@ -4260,8 +5160,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4260
5160
  fcofs, CFRAME_SIZE);
4261
5161
  for (i = 23; i >= 16; i--)
4262
5162
  fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5163
+ #if !LJ_SOFTFP
4263
5164
  for (i = 30; i >= 20; i -= 2)
4264
5165
  fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5166
+ #endif
4265
5167
  fprintf(ctx->fp,
4266
5168
  "\t.align 2\n"
4267
5169
  ".LEFDE0:\n\n");
@@ -4279,6 +5181,7 @@ static void emit_asm_debug(BuildCtx *ctx)
4279
5181
  "\t.align 2\n"
4280
5182
  ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4281
5183
  #endif
5184
+ #if !LJ_NO_UNWIND
4282
5185
  fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
4283
5186
  fprintf(ctx->fp,
4284
5187
  "\t.globl lj_err_unwind_dwarf\n"
@@ -4312,8 +5215,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4312
5215
  fcofs, CFRAME_SIZE);
4313
5216
  for (i = 23; i >= 16; i--)
4314
5217
  fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5218
+ #if !LJ_SOFTFP
4315
5219
  for (i = 30; i >= 20; i -= 2)
4316
5220
  fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5221
+ #endif
4317
5222
  fprintf(ctx->fp,
4318
5223
  "\t.align 2\n"
4319
5224
  ".LEFDE2:\n\n");
@@ -4346,6 +5251,7 @@ static void emit_asm_debug(BuildCtx *ctx)
4346
5251
  "\t.byte 0xd\n\t.uleb128 0x10\n"
4347
5252
  "\t.align 2\n"
4348
5253
  ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
5254
+ #endif
4349
5255
  #endif
4350
5256
  break;
4351
5257
  default: