asmjit 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/asmjit.gemspec +1 -1
- data/ext/asmjit/asmjit/.editorconfig +10 -0
- data/ext/asmjit/asmjit/.github/FUNDING.yml +1 -0
- data/ext/asmjit/asmjit/.github/workflows/build-config.json +47 -0
- data/ext/asmjit/asmjit/.github/workflows/build.yml +156 -0
- data/ext/asmjit/asmjit/.gitignore +6 -0
- data/ext/asmjit/asmjit/CMakeLists.txt +611 -0
- data/ext/asmjit/asmjit/LICENSE.md +17 -0
- data/ext/asmjit/asmjit/README.md +69 -0
- data/ext/asmjit/asmjit/src/asmjit/a64.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64archtraits_p.h +81 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.cpp +5115 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64assembler.h +72 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.cpp +51 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64builder.h +57 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.cpp +60 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64compiler.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper.cpp +464 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emithelper_p.h +50 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64emitter.h +1228 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter.cpp +298 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64formatter_p.h +59 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func.cpp +189 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64globals.h +1894 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi.cpp +278 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.cpp +1957 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb.h +74 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64instdb_p.h +876 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.cpp +85 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64operand.h +312 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass.cpp +852 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64rapass_p.h +105 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/a64utils.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter.cpp +143 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armformatter_p.h +44 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armglobals.h +21 -0
- data/ext/asmjit/asmjit/src/asmjit/arm/armoperand.h +621 -0
- data/ext/asmjit/asmjit/src/asmjit/arm.h +62 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-begin.h +17 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit-scope-end.h +9 -0
- data/ext/asmjit/asmjit/src/asmjit/asmjit.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-build_p.h +55 -0
- data/ext/asmjit/asmjit/src/asmjit/core/api-config.h +613 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archcommons.h +229 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.cpp +160 -0
- data/ext/asmjit/asmjit/src/asmjit/core/archtraits.h +290 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.cpp +406 -0
- data/ext/asmjit/asmjit/src/asmjit/core/assembler.h +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.cpp +889 -0
- data/ext/asmjit/asmjit/src/asmjit/core/builder.h +1391 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codebuffer.h +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.cpp +1149 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codeholder.h +1035 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter.cpp +175 -0
- data/ext/asmjit/asmjit/src/asmjit/core/codewriter_p.h +179 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.cpp +582 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compiler.h +737 -0
- data/ext/asmjit/asmjit/src/asmjit/core/compilerdefs.h +173 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.cpp +363 -0
- data/ext/asmjit/asmjit/src/asmjit/core/constpool.h +250 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.cpp +1162 -0
- data/ext/asmjit/asmjit/src/asmjit/core/cpuinfo.h +813 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper.cpp +323 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emithelper_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.cpp +333 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitter.h +741 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils.cpp +129 -0
- data/ext/asmjit/asmjit/src/asmjit/core/emitterutils_p.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.cpp +46 -0
- data/ext/asmjit/asmjit/src/asmjit/core/environment.h +508 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/errorhandler.h +228 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.cpp +584 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter.h +247 -0
- data/ext/asmjit/asmjit/src/asmjit/core/formatter_p.h +34 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.cpp +286 -0
- data/ext/asmjit/asmjit/src/asmjit/core/func.h +1445 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext.cpp +293 -0
- data/ext/asmjit/asmjit/src/asmjit/core/funcargscontext_p.h +199 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.cpp +133 -0
- data/ext/asmjit/asmjit/src/asmjit/core/globals.h +393 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.cpp +113 -0
- data/ext/asmjit/asmjit/src/asmjit/core/inst.h +772 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.cpp +1242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitallocator.h +261 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.cpp +80 -0
- data/ext/asmjit/asmjit/src/asmjit/core/jitruntime.h +89 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.cpp +69 -0
- data/ext/asmjit/asmjit/src/asmjit/core/logger.h +198 -0
- data/ext/asmjit/asmjit/src/asmjit/core/misc_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.cpp +132 -0
- data/ext/asmjit/asmjit/src/asmjit/core/operand.h +1611 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.cpp +84 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils.h +61 -0
- data/ext/asmjit/asmjit/src/asmjit/core/osutils_p.h +68 -0
- data/ext/asmjit/asmjit/src/asmjit/core/raassignment_p.h +418 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rabuilders_p.h +612 -0
- data/ext/asmjit/asmjit/src/asmjit/core/radefs_p.h +1204 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal.cpp +1166 -0
- data/ext/asmjit/asmjit/src/asmjit/core/ralocal_p.h +254 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass.cpp +1969 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rapass_p.h +1183 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack.cpp +184 -0
- data/ext/asmjit/asmjit/src/asmjit/core/rastack_p.h +171 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.cpp +559 -0
- data/ext/asmjit/asmjit/src/asmjit/core/string.h +372 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.cpp +494 -0
- data/ext/asmjit/asmjit/src/asmjit/core/support.h +1773 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.cpp +14 -0
- data/ext/asmjit/asmjit/src/asmjit/core/target.h +53 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.cpp +74 -0
- data/ext/asmjit/asmjit/src/asmjit/core/type.h +419 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.cpp +722 -0
- data/ext/asmjit/asmjit/src/asmjit/core/virtmem.h +242 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.cpp +353 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zone.h +615 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.cpp +309 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonehash.h +186 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.cpp +163 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonelist.h +209 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.cpp +176 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestack.h +239 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonestring.h +120 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.cpp +99 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonetree.h +380 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.cpp +356 -0
- data/ext/asmjit/asmjit/src/asmjit/core/zonevector.h +690 -0
- data/ext/asmjit/asmjit/src/asmjit/core.h +1861 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86archtraits_p.h +148 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.cpp +5110 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86assembler.h +685 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.cpp +52 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86builder.h +351 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.cpp +61 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86compiler.h +721 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper.cpp +619 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emithelper_p.h +60 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86emitter.h +4315 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter.cpp +944 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86formatter_p.h +58 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func.cpp +503 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86func_p.h +33 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86globals.h +2169 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi.cpp +1732 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instapi_p.h +41 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.cpp +4427 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb.h +563 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86instdb_p.h +311 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86opcode_p.h +436 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.cpp +231 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86operand.h +1085 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass.cpp +1509 -0
- data/ext/asmjit/asmjit/src/asmjit/x86/x86rapass_p.h +94 -0
- data/ext/asmjit/asmjit/src/asmjit/x86.h +93 -0
- data/ext/asmjit/asmjit/src/asmjit.natvis +245 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.cpp +84 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler.h +85 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_a64.cpp +4006 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x64.cpp +17833 -0
- data/ext/asmjit/asmjit/test/asmjit_test_assembler_x86.cpp +8300 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.cpp +253 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler.h +73 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_a64.cpp +690 -0
- data/ext/asmjit/asmjit/test/asmjit_test_compiler_x86.cpp +4317 -0
- data/ext/asmjit/asmjit/test/asmjit_test_emitters.cpp +197 -0
- data/ext/asmjit/asmjit/test/asmjit_test_instinfo.cpp +181 -0
- data/ext/asmjit/asmjit/test/asmjit_test_misc.h +257 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.cpp +62 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf.h +61 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_a64.cpp +699 -0
- data/ext/asmjit/asmjit/test/asmjit_test_perf_x86.cpp +5032 -0
- data/ext/asmjit/asmjit/test/asmjit_test_unit.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjit_test_x86_sections.cpp +172 -0
- data/ext/asmjit/asmjit/test/asmjitutils.h +38 -0
- data/ext/asmjit/asmjit/test/broken.cpp +312 -0
- data/ext/asmjit/asmjit/test/broken.h +148 -0
- data/ext/asmjit/asmjit/test/cmdline.h +61 -0
- data/ext/asmjit/asmjit/test/performancetimer.h +41 -0
- data/ext/asmjit/asmjit/tools/configure-makefiles.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-ninja.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-sanitizers.sh +13 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2019-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x64.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-vs2022-x86.bat +2 -0
- data/ext/asmjit/asmjit/tools/configure-xcode.sh +8 -0
- data/ext/asmjit/asmjit/tools/enumgen.js +417 -0
- data/ext/asmjit/asmjit/tools/enumgen.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.js +365 -0
- data/ext/asmjit/asmjit/tools/tablegen-arm.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.js +2638 -0
- data/ext/asmjit/asmjit/tools/tablegen-x86.sh +3 -0
- data/ext/asmjit/asmjit/tools/tablegen.js +947 -0
- data/ext/asmjit/asmjit/tools/tablegen.sh +4 -0
- data/ext/asmjit/asmjit.cc +18 -0
- data/lib/asmjit/version.rb +1 -1
- metadata +197 -2
@@ -0,0 +1,1166 @@
|
|
1
|
+
// This file is part of AsmJit project <https://asmjit.com>
|
2
|
+
//
|
3
|
+
// See asmjit.h or LICENSE.md for license and copyright information
|
4
|
+
// SPDX-License-Identifier: Zlib
|
5
|
+
|
6
|
+
#include "../core/api-build_p.h"
|
7
|
+
#ifndef ASMJIT_NO_COMPILER
|
8
|
+
|
9
|
+
#include "../core/ralocal_p.h"
|
10
|
+
#include "../core/support.h"
|
11
|
+
|
12
|
+
ASMJIT_BEGIN_NAMESPACE
|
13
|
+
|
14
|
+
// RALocalAllocator - Utilities
|
15
|
+
// ============================
|
16
|
+
|
17
|
+
static ASMJIT_FORCE_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept {
|
18
|
+
for (size_t i = 0; i < count; i++)
|
19
|
+
if (tiedRegs[i].workId() == workId)
|
20
|
+
return &tiedRegs[i];
|
21
|
+
return nullptr;
|
22
|
+
}
|
23
|
+
|
24
|
+
// RALocalAllocator - Init & Reset
|
25
|
+
// ===============================
|
26
|
+
|
27
|
+
Error RALocalAllocator::init() noexcept {
|
28
|
+
PhysToWorkMap* physToWorkMap;
|
29
|
+
WorkToPhysMap* workToPhysMap;
|
30
|
+
|
31
|
+
physToWorkMap = _pass->newPhysToWorkMap();
|
32
|
+
workToPhysMap = _pass->newWorkToPhysMap();
|
33
|
+
if (!physToWorkMap || !workToPhysMap)
|
34
|
+
return DebugUtils::errored(kErrorOutOfMemory);
|
35
|
+
|
36
|
+
_curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
|
37
|
+
_curAssignment.initMaps(physToWorkMap, workToPhysMap);
|
38
|
+
|
39
|
+
physToWorkMap = _pass->newPhysToWorkMap();
|
40
|
+
workToPhysMap = _pass->newWorkToPhysMap();
|
41
|
+
_tmpWorkToPhysMap = _pass->newWorkToPhysMap();
|
42
|
+
|
43
|
+
if (!physToWorkMap || !workToPhysMap || !_tmpWorkToPhysMap)
|
44
|
+
return DebugUtils::errored(kErrorOutOfMemory);
|
45
|
+
|
46
|
+
_tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
|
47
|
+
_tmpAssignment.initMaps(physToWorkMap, workToPhysMap);
|
48
|
+
|
49
|
+
return kErrorOk;
|
50
|
+
}
|
51
|
+
|
52
|
+
// RALocalAllocator - Assignment
|
53
|
+
// =============================
|
54
|
+
|
55
|
+
Error RALocalAllocator::makeInitialAssignment() noexcept {
|
56
|
+
FuncNode* func = _pass->func();
|
57
|
+
RABlock* entry = _pass->entryBlock();
|
58
|
+
|
59
|
+
ZoneBitVector& liveIn = entry->liveIn();
|
60
|
+
uint32_t argCount = func->argCount();
|
61
|
+
uint32_t numIter = 1;
|
62
|
+
|
63
|
+
for (uint32_t iter = 0; iter < numIter; iter++) {
|
64
|
+
for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
|
65
|
+
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
|
66
|
+
// Unassigned argument.
|
67
|
+
const RegOnly& regArg = func->argPack(argIndex)[valueIndex];
|
68
|
+
if (!regArg.isReg() || !_cc->isVirtIdValid(regArg.id()))
|
69
|
+
continue;
|
70
|
+
|
71
|
+
VirtReg* virtReg = _cc->virtRegById(regArg.id());
|
72
|
+
|
73
|
+
// Unreferenced argument.
|
74
|
+
RAWorkReg* workReg = virtReg->workReg();
|
75
|
+
if (!workReg)
|
76
|
+
continue;
|
77
|
+
|
78
|
+
// Overwritten argument.
|
79
|
+
uint32_t workId = workReg->workId();
|
80
|
+
if (!liveIn.bitAt(workId))
|
81
|
+
continue;
|
82
|
+
|
83
|
+
RegGroup group = workReg->group();
|
84
|
+
if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone)
|
85
|
+
continue;
|
86
|
+
|
87
|
+
RegMask allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
|
88
|
+
if (iter == 0) {
|
89
|
+
// First iteration: Try to allocate to home RegId.
|
90
|
+
if (workReg->hasHomeRegId()) {
|
91
|
+
uint32_t physId = workReg->homeRegId();
|
92
|
+
if (Support::bitTest(allocableRegs, physId)) {
|
93
|
+
_curAssignment.assign(group, workId, physId, true);
|
94
|
+
_pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
|
95
|
+
continue;
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
numIter = 2;
|
100
|
+
}
|
101
|
+
else {
|
102
|
+
// Second iteration: Pick any other register if the is an unassigned one or assign to stack.
|
103
|
+
if (allocableRegs) {
|
104
|
+
uint32_t physId = Support::ctz(allocableRegs);
|
105
|
+
_curAssignment.assign(group, workId, physId, true);
|
106
|
+
_pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->type(), physId, workReg->typeId());
|
107
|
+
}
|
108
|
+
else {
|
109
|
+
// This register will definitely need stack, create the slot now and assign also `argIndex`
|
110
|
+
// to it. We will patch `_argsAssignment` later after RAStackAllocator finishes.
|
111
|
+
RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg);
|
112
|
+
if (ASMJIT_UNLIKELY(!slot))
|
113
|
+
return DebugUtils::errored(kErrorOutOfMemory);
|
114
|
+
|
115
|
+
// This means STACK_ARG may be moved to STACK.
|
116
|
+
workReg->addFlags(RAWorkRegFlags::kStackArgToStack);
|
117
|
+
_pass->_numStackArgsToStackSlots++;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
}
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
124
|
+
return kErrorOk;
|
125
|
+
}
|
126
|
+
|
127
|
+
Error RALocalAllocator::replaceAssignment(const PhysToWorkMap* physToWorkMap) noexcept {
|
128
|
+
_curAssignment.copyFrom(physToWorkMap);
|
129
|
+
return kErrorOk;
|
130
|
+
}
|
131
|
+
|
132
|
+
Error RALocalAllocator::switchToAssignment(PhysToWorkMap* dstPhysToWorkMap, const ZoneBitVector& liveIn, bool dstReadOnly, bool tryMode) noexcept {
|
133
|
+
RAAssignment dst;
|
134
|
+
RAAssignment& cur = _curAssignment;
|
135
|
+
|
136
|
+
dst.initLayout(_pass->_physRegCount, _pass->workRegs());
|
137
|
+
dst.initMaps(dstPhysToWorkMap, _tmpWorkToPhysMap);
|
138
|
+
dst.assignWorkIdsFromPhysIds();
|
139
|
+
|
140
|
+
if (tryMode)
|
141
|
+
return kErrorOk;
|
142
|
+
|
143
|
+
for (RegGroup group : RegGroupVirtValues{}) {
|
144
|
+
// STEP 1
|
145
|
+
// ------
|
146
|
+
//
|
147
|
+
// - KILL all registers that are not live at `dst`,
|
148
|
+
// - SPILL all registers that are not assigned at `dst`.
|
149
|
+
|
150
|
+
if (!tryMode) {
|
151
|
+
Support::BitWordIterator<RegMask> it(cur.assigned(group));
|
152
|
+
while (it.hasNext()) {
|
153
|
+
uint32_t physId = it.next();
|
154
|
+
uint32_t workId = cur.physToWorkId(group, physId);
|
155
|
+
|
156
|
+
// Must be true as we iterate over assigned registers.
|
157
|
+
ASMJIT_ASSERT(workId != RAAssignment::kWorkNone);
|
158
|
+
|
159
|
+
// KILL if it's not live on entry.
|
160
|
+
if (!liveIn.bitAt(workId)) {
|
161
|
+
onKillReg(group, workId, physId);
|
162
|
+
continue;
|
163
|
+
}
|
164
|
+
|
165
|
+
// SPILL if it's not assigned on entry.
|
166
|
+
uint32_t altId = dst.workToPhysId(group, workId);
|
167
|
+
if (altId == RAAssignment::kPhysNone) {
|
168
|
+
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
169
|
+
}
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
// STEP 2
|
174
|
+
// ------
|
175
|
+
//
|
176
|
+
// - MOVE and SWAP registers from their current assignments into their DST assignments.
|
177
|
+
// - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`.
|
178
|
+
|
179
|
+
// Current run-id (1 means more aggressive decisions).
|
180
|
+
int32_t runId = -1;
|
181
|
+
// Remaining registers scheduled for `onLoadReg()`.
|
182
|
+
RegMask willLoadRegs = 0;
|
183
|
+
// Remaining registers to be allocated in this loop.
|
184
|
+
RegMask affectedRegs = dst.assigned(group);
|
185
|
+
|
186
|
+
while (affectedRegs) {
|
187
|
+
if (++runId == 2) {
|
188
|
+
if (!tryMode)
|
189
|
+
return DebugUtils::errored(kErrorInvalidState);
|
190
|
+
|
191
|
+
// Stop in `tryMode` if we haven't done anything in past two rounds.
|
192
|
+
break;
|
193
|
+
}
|
194
|
+
|
195
|
+
Support::BitWordIterator<RegMask> it(affectedRegs);
|
196
|
+
while (it.hasNext()) {
|
197
|
+
uint32_t physId = it.next();
|
198
|
+
RegMask physMask = Support::bitMask<RegMask>(physId);
|
199
|
+
|
200
|
+
uint32_t curWorkId = cur.physToWorkId(group, physId);
|
201
|
+
uint32_t dstWorkId = dst.physToWorkId(group, physId);
|
202
|
+
|
203
|
+
// The register must have assigned `dstWorkId` as we only iterate over assigned regs.
|
204
|
+
ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone);
|
205
|
+
|
206
|
+
if (curWorkId != RAAssignment::kWorkNone) {
|
207
|
+
// Both assigned.
|
208
|
+
if (curWorkId != dstWorkId) {
|
209
|
+
// Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out.
|
210
|
+
if (runId <= 0)
|
211
|
+
continue;
|
212
|
+
|
213
|
+
uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
|
214
|
+
if (altPhysId == RAAssignment::kPhysNone)
|
215
|
+
continue;
|
216
|
+
|
217
|
+
// Reset as we will do some changes to the current assignment.
|
218
|
+
runId = -1;
|
219
|
+
|
220
|
+
if (_archTraits->hasInstRegSwap(group)) {
|
221
|
+
ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId));
|
222
|
+
}
|
223
|
+
else {
|
224
|
+
// SPILL the reg if it's not dirty in DST, otherwise try to MOVE.
|
225
|
+
if (!cur.isPhysDirty(group, physId)) {
|
226
|
+
ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId));
|
227
|
+
}
|
228
|
+
else {
|
229
|
+
RegMask allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group);
|
230
|
+
|
231
|
+
// If possible don't conflict with assigned regs at DST.
|
232
|
+
if (allocableRegs & ~dst.assigned(group))
|
233
|
+
allocableRegs &= ~dst.assigned(group);
|
234
|
+
|
235
|
+
if (allocableRegs) {
|
236
|
+
// MOVE is possible, thus preferred.
|
237
|
+
uint32_t tmpPhysId = Support::ctz(allocableRegs);
|
238
|
+
|
239
|
+
ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
|
240
|
+
_pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
|
241
|
+
}
|
242
|
+
else {
|
243
|
+
// MOVE is impossible, must SPILL.
|
244
|
+
ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId));
|
245
|
+
}
|
246
|
+
}
|
247
|
+
|
248
|
+
goto Cleared;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
}
|
252
|
+
else {
|
253
|
+
Cleared:
|
254
|
+
// DST assigned, CUR unassigned.
|
255
|
+
uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
|
256
|
+
if (altPhysId == RAAssignment::kPhysNone) {
|
257
|
+
if (liveIn.bitAt(dstWorkId))
|
258
|
+
willLoadRegs |= physMask; // Scheduled for `onLoadReg()`.
|
259
|
+
affectedRegs &= ~physMask; // Unaffected from now.
|
260
|
+
continue;
|
261
|
+
}
|
262
|
+
ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId));
|
263
|
+
}
|
264
|
+
|
265
|
+
// Both DST and CUR assigned to the same reg or CUR just moved to DST.
|
266
|
+
if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) {
|
267
|
+
if ((dst.dirty(group) & physMask) == 0) {
|
268
|
+
// CUR dirty, DST not dirty (the assert is just to visualize the condition).
|
269
|
+
ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId));
|
270
|
+
|
271
|
+
// If `dstReadOnly` is true it means that that block was already processed and we cannot change from
|
272
|
+
// CLEAN to DIRTY. In that case the register has to be saved as it cannot enter the block DIRTY.
|
273
|
+
if (dstReadOnly)
|
274
|
+
ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId));
|
275
|
+
else
|
276
|
+
dst.makeDirty(group, dstWorkId, physId);
|
277
|
+
}
|
278
|
+
else {
|
279
|
+
// DST dirty, CUR not dirty (the assert is just to visualize the condition).
|
280
|
+
ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId));
|
281
|
+
|
282
|
+
cur.makeDirty(group, dstWorkId, physId);
|
283
|
+
}
|
284
|
+
}
|
285
|
+
|
286
|
+
// Must match now...
|
287
|
+
ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId));
|
288
|
+
ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
|
289
|
+
|
290
|
+
runId = -1;
|
291
|
+
affectedRegs &= ~physMask;
|
292
|
+
}
|
293
|
+
}
|
294
|
+
|
295
|
+
// STEP 3
|
296
|
+
// ------
|
297
|
+
//
|
298
|
+
// - Load registers specified by `willLoadRegs`.
|
299
|
+
|
300
|
+
{
|
301
|
+
Support::BitWordIterator<RegMask> it(willLoadRegs);
|
302
|
+
while (it.hasNext()) {
|
303
|
+
uint32_t physId = it.next();
|
304
|
+
|
305
|
+
if (!cur.isPhysAssigned(group, physId)) {
|
306
|
+
uint32_t workId = dst.physToWorkId(group, physId);
|
307
|
+
|
308
|
+
// The algorithm is broken if it tries to load a register that is not in LIVE-IN.
|
309
|
+
ASMJIT_ASSERT(liveIn.bitAt(workId) == true);
|
310
|
+
|
311
|
+
ASMJIT_PROPAGATE(onLoadReg(group, workId, physId));
|
312
|
+
if (dst.isPhysDirty(group, physId))
|
313
|
+
cur.makeDirty(group, workId, physId);
|
314
|
+
ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
|
315
|
+
}
|
316
|
+
else {
|
317
|
+
// Not possible otherwise.
|
318
|
+
ASMJIT_ASSERT(tryMode == true);
|
319
|
+
}
|
320
|
+
}
|
321
|
+
}
|
322
|
+
}
|
323
|
+
|
324
|
+
if (!tryMode) {
|
325
|
+
// Here is a code that dumps the conflicting part if something fails here:
|
326
|
+
// if (!dst.equals(cur)) {
|
327
|
+
// uint32_t physTotal = dst._layout.physTotal;
|
328
|
+
// uint32_t workCount = dst._layout.workCount;
|
329
|
+
//
|
330
|
+
// fprintf(stderr, "Dirty DST=0x%08X CUR=0x%08X\n", dst.dirty(RegGroup::kGp), cur.dirty(RegGroup::kGp));
|
331
|
+
// fprintf(stderr, "Assigned DST=0x%08X CUR=0x%08X\n", dst.assigned(RegGroup::kGp), cur.assigned(RegGroup::kGp));
|
332
|
+
//
|
333
|
+
// for (uint32_t physId = 0; physId < physTotal; physId++) {
|
334
|
+
// uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
|
335
|
+
// uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
|
336
|
+
// if (dstWorkId != curWorkId)
|
337
|
+
// fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
|
338
|
+
// }
|
339
|
+
//
|
340
|
+
// for (uint32_t workId = 0; workId < workCount; workId++) {
|
341
|
+
// uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
|
342
|
+
// uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
|
343
|
+
// if (dstPhysId != curPhysId)
|
344
|
+
// fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
|
345
|
+
// }
|
346
|
+
// }
|
347
|
+
ASMJIT_ASSERT(dst.equals(cur));
|
348
|
+
}
|
349
|
+
|
350
|
+
return kErrorOk;
|
351
|
+
}
|
352
|
+
|
353
|
+
Error RALocalAllocator::spillScratchGpRegsBeforeEntry(RegMask scratchRegs) noexcept {
|
354
|
+
RegGroup group = RegGroup::kGp;
|
355
|
+
Support::BitWordIterator<RegMask> it(scratchRegs);
|
356
|
+
|
357
|
+
while (it.hasNext()) {
|
358
|
+
uint32_t physId = it.next();
|
359
|
+
if (_curAssignment.isPhysAssigned(group, physId)) {
|
360
|
+
uint32_t workId = _curAssignment.physToWorkId(group, physId);
|
361
|
+
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
362
|
+
}
|
363
|
+
}
|
364
|
+
|
365
|
+
return kErrorOk;
|
366
|
+
}
|
367
|
+
|
368
|
+
// RALocalAllocator - Allocation
|
369
|
+
// =============================
|
370
|
+
|
371
|
+
Error RALocalAllocator::allocInst(InstNode* node) noexcept {
|
372
|
+
RAInst* raInst = node->passData<RAInst>();
|
373
|
+
|
374
|
+
RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
|
375
|
+
RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];
|
376
|
+
RATiedReg* consecutiveRegs[kMaxConsecutiveRegs];
|
377
|
+
|
378
|
+
// The cursor must point to the previous instruction for a possible instruction insertion.
|
379
|
+
_cc->_setCursor(node->prev());
|
380
|
+
|
381
|
+
_node = node;
|
382
|
+
_raInst = raInst;
|
383
|
+
_tiedTotal = raInst->_tiedTotal;
|
384
|
+
_tiedCount = raInst->_tiedCount;
|
385
|
+
|
386
|
+
// Whether we already replaced register operand with memory operand.
|
387
|
+
bool rmAllocated = false;
|
388
|
+
|
389
|
+
for (RegGroup group : RegGroupVirtValues{}) {
|
390
|
+
uint32_t i, count = this->tiedCount(group);
|
391
|
+
RATiedReg* tiedRegs = this->tiedRegs(group);
|
392
|
+
|
393
|
+
RegMask willUse = _raInst->_usedRegs[group];
|
394
|
+
RegMask willOut = _raInst->_clobberedRegs[group];
|
395
|
+
RegMask willFree = 0;
|
396
|
+
|
397
|
+
uint32_t usePending = count;
|
398
|
+
uint32_t outTiedCount = 0;
|
399
|
+
uint32_t dupTiedCount = 0;
|
400
|
+
uint32_t consecutiveMask = 0;
|
401
|
+
|
402
|
+
// STEP 1
|
403
|
+
// ------
|
404
|
+
//
|
405
|
+
// Calculate `willUse` and `willFree` masks based on tied registers we have. In addition, aggregate information
|
406
|
+
// regarding consecutive registers used by this instruction. We need that to make USE/OUT assignments.
|
407
|
+
//
|
408
|
+
// We don't do any assignment decisions at this stage as we just need to collect some information first. Then,
|
409
|
+
// after we populate all masks needed we can finally make some decisions in the second loop. The main reason
|
410
|
+
// for this is that we really need `willFree` to make assignment decisions for `willUse`, because if we mark
|
411
|
+
// some registers that will be freed, we can consider them in decision making afterwards.
|
412
|
+
|
413
|
+
for (i = 0; i < count; i++) {
|
414
|
+
RATiedReg* tiedReg = &tiedRegs[i];
|
415
|
+
|
416
|
+
if (tiedReg->hasAnyConsecutiveFlag()) {
|
417
|
+
uint32_t consecutiveOffset = tiedReg->isLeadConsecutive() ? uint32_t(0) : tiedReg->consecutiveData();
|
418
|
+
|
419
|
+
if (ASMJIT_UNLIKELY(Support::bitTest(consecutiveMask, consecutiveOffset)))
|
420
|
+
return DebugUtils::errored(kErrorInvalidState);
|
421
|
+
|
422
|
+
consecutiveMask |= Support::bitMask(consecutiveOffset);
|
423
|
+
consecutiveRegs[consecutiveOffset] = tiedReg;
|
424
|
+
}
|
425
|
+
|
426
|
+
// Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
|
427
|
+
if (tiedReg->isOutOrKill())
|
428
|
+
outTiedRegs[outTiedCount++] = tiedReg;
|
429
|
+
|
430
|
+
if (tiedReg->isDuplicate())
|
431
|
+
dupTiedRegs[dupTiedCount++] = tiedReg;
|
432
|
+
|
433
|
+
if (!tiedReg->isUse()) {
|
434
|
+
tiedReg->markUseDone();
|
435
|
+
usePending--;
|
436
|
+
continue;
|
437
|
+
}
|
438
|
+
|
439
|
+
// Don't assign anything here if this is a consecutive USE - we will handle this in STEP 2 instead.
|
440
|
+
if (tiedReg->isUseConsecutive())
|
441
|
+
continue;
|
442
|
+
|
443
|
+
uint32_t workId = tiedReg->workId();
|
444
|
+
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
445
|
+
|
446
|
+
if (tiedReg->hasUseId()) {
|
447
|
+
// If the register has `useId` it means it can only be allocated in that register.
|
448
|
+
RegMask useMask = Support::bitMask(tiedReg->useId());
|
449
|
+
|
450
|
+
// RAInstBuilder must have collected `usedRegs` on-the-fly.
|
451
|
+
ASMJIT_ASSERT((willUse & useMask) != 0);
|
452
|
+
|
453
|
+
if (assignedId == tiedReg->useId()) {
|
454
|
+
// If the register is already allocated in this one, mark it done and continue.
|
455
|
+
tiedReg->markUseDone();
|
456
|
+
if (tiedReg->isWrite())
|
457
|
+
_curAssignment.makeDirty(group, workId, assignedId);
|
458
|
+
usePending--;
|
459
|
+
willUse |= useMask;
|
460
|
+
}
|
461
|
+
else {
|
462
|
+
willFree |= useMask & _curAssignment.assigned(group);
|
463
|
+
}
|
464
|
+
}
|
465
|
+
else {
|
466
|
+
// Check if the register must be moved to `allocableRegs`.
|
467
|
+
RegMask allocableRegs = tiedReg->useRegMask();
|
468
|
+
if (assignedId != RAAssignment::kPhysNone) {
|
469
|
+
RegMask assignedMask = Support::bitMask(assignedId);
|
470
|
+
if ((allocableRegs & ~willUse) & assignedMask) {
|
471
|
+
tiedReg->setUseId(assignedId);
|
472
|
+
tiedReg->markUseDone();
|
473
|
+
if (tiedReg->isWrite())
|
474
|
+
_curAssignment.makeDirty(group, workId, assignedId);
|
475
|
+
usePending--;
|
476
|
+
willUse |= assignedMask;
|
477
|
+
}
|
478
|
+
else {
|
479
|
+
willFree |= assignedMask;
|
480
|
+
}
|
481
|
+
}
|
482
|
+
}
|
483
|
+
}
|
484
|
+
|
485
|
+
// STEP 2
|
486
|
+
// ------
|
487
|
+
//
|
488
|
+
// Verify that all the consecutive registers are really consecutive. Terminate if there is a gap. In addition,
|
489
|
+
// decide which USE ids will be used in case that this consecutive sequence is USE (OUT registers are allocated
|
490
|
+
// in a different step).
|
491
|
+
uint32_t consecutiveCount = 0;
|
492
|
+
|
493
|
+
if (consecutiveMask) {
|
494
|
+
if ((consecutiveMask & (consecutiveMask + 1u)) != 0)
|
495
|
+
return DebugUtils::errored(kErrorInvalidState);
|
496
|
+
|
497
|
+
// Count of trailing ones is the count of consecutive registers. There cannot be gap.
|
498
|
+
consecutiveCount = Support::ctz(~consecutiveMask);
|
499
|
+
|
500
|
+
// Prioritize allocation that would result in least moves even when moving registers away from their homes.
|
501
|
+
RATiedReg* lead = consecutiveRegs[0];
|
502
|
+
|
503
|
+
// Assign the best possible USE Ids to all consecutives.
|
504
|
+
if (lead->isUseConsecutive()) {
|
505
|
+
uint32_t bestScore = 0;
|
506
|
+
uint32_t bestLeadReg = 0xFFFFFFFF;
|
507
|
+
RegMask allocableRegs = (_availableRegs[group] | willFree) & ~willUse;
|
508
|
+
|
509
|
+
uint32_t assignments[kMaxConsecutiveRegs];
|
510
|
+
|
511
|
+
for (i = 0; i < consecutiveCount; i++)
|
512
|
+
assignments[i] = _curAssignment.workToPhysId(group, consecutiveRegs[i]->workId());
|
513
|
+
|
514
|
+
Support::BitWordIterator<uint32_t> it(lead->useRegMask());
|
515
|
+
while (it.hasNext()) {
|
516
|
+
uint32_t regIndex = it.next();
|
517
|
+
if (Support::bitTest(lead->useRegMask(), regIndex)) {
|
518
|
+
uint32_t score = 15;
|
519
|
+
|
520
|
+
for (i = 0; i < consecutiveCount; i++) {
|
521
|
+
uint32_t consecutiveIndex = regIndex + i;
|
522
|
+
if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
|
523
|
+
score = 0;
|
524
|
+
break;
|
525
|
+
}
|
526
|
+
|
527
|
+
RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
|
528
|
+
score += uint32_t(workReg->homeRegId() == consecutiveIndex);
|
529
|
+
score += uint32_t(assignments[i] == consecutiveIndex) * 2;
|
530
|
+
}
|
531
|
+
|
532
|
+
if (score > bestScore) {
|
533
|
+
bestScore = score;
|
534
|
+
bestLeadReg = regIndex;
|
535
|
+
}
|
536
|
+
}
|
537
|
+
}
|
538
|
+
|
539
|
+
if (bestLeadReg == 0xFFFFFFFF)
|
540
|
+
return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
|
541
|
+
|
542
|
+
for (i = 0; i < consecutiveCount; i++) {
|
543
|
+
uint32_t consecutiveIndex = bestLeadReg + i;
|
544
|
+
|
545
|
+
RATiedReg* tiedReg = consecutiveRegs[i];
|
546
|
+
RegMask useMask = Support::bitMask(consecutiveIndex);
|
547
|
+
|
548
|
+
uint32_t workId = tiedReg->workId();
|
549
|
+
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
550
|
+
|
551
|
+
tiedReg->setUseId(consecutiveIndex);
|
552
|
+
|
553
|
+
if (assignedId == consecutiveIndex) {
|
554
|
+
// If the register is already allocated in this one, mark it done and continue.
|
555
|
+
tiedReg->markUseDone();
|
556
|
+
if (tiedReg->isWrite())
|
557
|
+
_curAssignment.makeDirty(group, workId, assignedId);
|
558
|
+
usePending--;
|
559
|
+
willUse |= useMask;
|
560
|
+
}
|
561
|
+
else {
|
562
|
+
willUse |= useMask;
|
563
|
+
willFree |= useMask & _curAssignment.assigned(group);
|
564
|
+
}
|
565
|
+
}
|
566
|
+
}
|
567
|
+
}
|
568
|
+
|
569
|
+
// STEP 3
|
570
|
+
// ------
|
571
|
+
//
|
572
|
+
// Do some decision making to find the best candidates of registers that need to be assigned, moved, and/or
|
573
|
+
// spilled. Only USE registers are considered here, OUT will be decided later after all CLOBBERed and OUT
|
574
|
+
// registers are unassigned.
|
575
|
+
|
576
|
+
if (usePending) {
|
577
|
+
// TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.
|
578
|
+
|
579
|
+
// All registers that are currently alive without registers that will be freed.
|
580
|
+
RegMask liveRegs = _curAssignment.assigned(group) & ~willFree;
|
581
|
+
|
582
|
+
for (i = 0; i < count; i++) {
|
583
|
+
RATiedReg* tiedReg = &tiedRegs[i];
|
584
|
+
if (tiedReg->isUseDone())
|
585
|
+
continue;
|
586
|
+
|
587
|
+
uint32_t workId = tiedReg->workId();
|
588
|
+
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
589
|
+
|
590
|
+
// REG/MEM: Patch register operand to memory operand if not allocated.
|
591
|
+
if (!rmAllocated && tiedReg->hasUseRM()) {
|
592
|
+
if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
|
593
|
+
RAWorkReg* workReg = workRegById(tiedReg->workId());
|
594
|
+
uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
|
595
|
+
uint32_t rmSize = tiedReg->rmSize();
|
596
|
+
|
597
|
+
if (rmSize <= workReg->virtReg()->virtSize()) {
|
598
|
+
Operand& op = node->operands()[opIndex];
|
599
|
+
op = _pass->workRegAsMem(workReg);
|
600
|
+
op.as<BaseMem>().setSize(rmSize);
|
601
|
+
tiedReg->_useRewriteMask = 0;
|
602
|
+
|
603
|
+
tiedReg->markUseDone();
|
604
|
+
usePending--;
|
605
|
+
|
606
|
+
rmAllocated = true;
|
607
|
+
continue;
|
608
|
+
}
|
609
|
+
}
|
610
|
+
}
|
611
|
+
|
612
|
+
if (!tiedReg->hasUseId()) {
|
613
|
+
// DECIDE where to assign the USE register.
|
614
|
+
RegMask allocableRegs = tiedReg->useRegMask() & ~(willFree | willUse);
|
615
|
+
uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
|
616
|
+
|
617
|
+
RegMask useMask = Support::bitMask(useId);
|
618
|
+
willUse |= useMask;
|
619
|
+
willFree |= useMask & liveRegs;
|
620
|
+
tiedReg->setUseId(useId);
|
621
|
+
|
622
|
+
if (assignedId != RAAssignment::kPhysNone) {
|
623
|
+
RegMask assignedMask = Support::bitMask(assignedId);
|
624
|
+
|
625
|
+
willFree |= assignedMask;
|
626
|
+
liveRegs &= ~assignedMask;
|
627
|
+
|
628
|
+
// OPTIMIZATION: Assign the USE register here if it's possible.
|
629
|
+
if (!(liveRegs & useMask)) {
|
630
|
+
ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
|
631
|
+
tiedReg->markUseDone();
|
632
|
+
if (tiedReg->isWrite())
|
633
|
+
_curAssignment.makeDirty(group, workId, useId);
|
634
|
+
usePending--;
|
635
|
+
}
|
636
|
+
}
|
637
|
+
else {
|
638
|
+
// OPTIMIZATION: Assign the USE register here if it's possible.
|
639
|
+
if (!(liveRegs & useMask)) {
|
640
|
+
ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
|
641
|
+
tiedReg->markUseDone();
|
642
|
+
if (tiedReg->isWrite())
|
643
|
+
_curAssignment.makeDirty(group, workId, useId);
|
644
|
+
usePending--;
|
645
|
+
}
|
646
|
+
}
|
647
|
+
|
648
|
+
liveRegs |= useMask;
|
649
|
+
}
|
650
|
+
}
|
651
|
+
}
|
652
|
+
|
653
|
+
// Initially all used regs will be marked as clobbered.
|
654
|
+
RegMask clobberedByInst = willUse | willOut;
|
655
|
+
|
656
|
+
// STEP 4
|
657
|
+
// ------
|
658
|
+
//
|
659
|
+
// Free all registers that we marked as `willFree`. Only registers that are not USEd by the instruction are
|
660
|
+
// considered as we don't want to free regs we need.
|
661
|
+
|
662
|
+
if (willFree) {
|
663
|
+
RegMask allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
|
664
|
+
Support::BitWordIterator<RegMask> it(willFree);
|
665
|
+
|
666
|
+
do {
|
667
|
+
uint32_t assignedId = it.next();
|
668
|
+
if (_curAssignment.isPhysAssigned(group, assignedId)) {
|
669
|
+
uint32_t workId = _curAssignment.physToWorkId(group, assignedId);
|
670
|
+
|
671
|
+
// DECIDE whether to MOVE or SPILL.
|
672
|
+
if (allocableRegs) {
|
673
|
+
uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs);
|
674
|
+
if (reassignedId != RAAssignment::kPhysNone) {
|
675
|
+
ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
|
676
|
+
allocableRegs ^= Support::bitMask(reassignedId);
|
677
|
+
continue;
|
678
|
+
}
|
679
|
+
}
|
680
|
+
|
681
|
+
ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
|
682
|
+
}
|
683
|
+
} while (it.hasNext());
|
684
|
+
}
|
685
|
+
|
686
|
+
// STEP 5
|
687
|
+
// ------
|
688
|
+
//
|
689
|
+
// ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't allocated yet. This is a bit
|
690
|
+
// complicated as the allocation is iterative. In some cases we have to wait before allocating a particual
|
691
|
+
// physical register as it's still occupied by some other one, which we need to move before we can use it.
|
692
|
+
// In this case we skip it and allocate another some other instead (making it free for another iteration).
|
693
|
+
//
|
694
|
+
// NOTE: Iterations are mostly important for complicated allocations like function calls, where there can
|
695
|
+
// be up to N registers used at once. Asm instructions won't run the loop more than once in 99.9% of cases
|
696
|
+
// as they use 2..3 registers in average.
|
697
|
+
|
698
|
+
if (usePending) {
|
699
|
+
bool mustSwap = false;
|
700
|
+
do {
|
701
|
+
uint32_t oldPending = usePending;
|
702
|
+
|
703
|
+
for (i = 0; i < count; i++) {
|
704
|
+
RATiedReg* thisTiedReg = &tiedRegs[i];
|
705
|
+
if (thisTiedReg->isUseDone())
|
706
|
+
continue;
|
707
|
+
|
708
|
+
uint32_t thisWorkId = thisTiedReg->workId();
|
709
|
+
uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
|
710
|
+
|
711
|
+
// This would be a bug, fatal one!
|
712
|
+
uint32_t targetPhysId = thisTiedReg->useId();
|
713
|
+
ASMJIT_ASSERT(targetPhysId != thisPhysId);
|
714
|
+
|
715
|
+
uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
|
716
|
+
if (targetWorkId != RAAssignment::kWorkNone) {
|
717
|
+
RAWorkReg* targetWorkReg = workRegById(targetWorkId);
|
718
|
+
|
719
|
+
// Swapping two registers can solve two allocation tasks by emitting just a single instruction. However,
|
720
|
+
// swap is only available on few architectures and it's definitely not available for each register group.
|
721
|
+
// Calling `onSwapReg()` before checking these would be fatal.
|
722
|
+
if (_archTraits->hasInstRegSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
|
723
|
+
ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
|
724
|
+
|
725
|
+
thisTiedReg->markUseDone();
|
726
|
+
if (thisTiedReg->isWrite())
|
727
|
+
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
|
728
|
+
usePending--;
|
729
|
+
|
730
|
+
// Double-hit.
|
731
|
+
RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
|
732
|
+
if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
|
733
|
+
targetTiedReg->markUseDone();
|
734
|
+
if (targetTiedReg->isWrite())
|
735
|
+
_curAssignment.makeDirty(group, targetWorkId, thisPhysId);
|
736
|
+
usePending--;
|
737
|
+
}
|
738
|
+
continue;
|
739
|
+
}
|
740
|
+
|
741
|
+
if (!mustSwap)
|
742
|
+
continue;
|
743
|
+
|
744
|
+
// Only branched here if the previous iteration did nothing. This is essentially a SWAP operation without
|
745
|
+
// having a dedicated instruction for that purpose (vector registers, etc). The simplest way to handle
|
746
|
+
// such case is to SPILL the target register.
|
747
|
+
ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
|
748
|
+
}
|
749
|
+
|
750
|
+
if (thisPhysId != RAAssignment::kPhysNone) {
|
751
|
+
ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
|
752
|
+
|
753
|
+
thisTiedReg->markUseDone();
|
754
|
+
if (thisTiedReg->isWrite())
|
755
|
+
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
|
756
|
+
usePending--;
|
757
|
+
}
|
758
|
+
else {
|
759
|
+
ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
|
760
|
+
|
761
|
+
thisTiedReg->markUseDone();
|
762
|
+
if (thisTiedReg->isWrite())
|
763
|
+
_curAssignment.makeDirty(group, thisWorkId, targetPhysId);
|
764
|
+
usePending--;
|
765
|
+
}
|
766
|
+
}
|
767
|
+
|
768
|
+
mustSwap = (oldPending == usePending);
|
769
|
+
} while (usePending);
|
770
|
+
}
|
771
|
+
|
772
|
+
// STEP 6
|
773
|
+
// ------
|
774
|
+
//
|
775
|
+
// KILL registers marked as KILL/OUT.
|
776
|
+
|
777
|
+
uint32_t outPending = outTiedCount;
|
778
|
+
if (outTiedCount) {
|
779
|
+
for (i = 0; i < outTiedCount; i++) {
|
780
|
+
RATiedReg* tiedReg = outTiedRegs[i];
|
781
|
+
|
782
|
+
uint32_t workId = tiedReg->workId();
|
783
|
+
uint32_t physId = _curAssignment.workToPhysId(group, workId);
|
784
|
+
|
785
|
+
// Must check if it's allocated as KILL can be related to OUT (like KILL immediately after OUT, which could
|
786
|
+
// mean the register is not assigned).
|
787
|
+
if (physId != RAAssignment::kPhysNone) {
|
788
|
+
ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
|
789
|
+
willOut &= ~Support::bitMask(physId);
|
790
|
+
}
|
791
|
+
|
792
|
+
// We still maintain number of pending registers for OUT assignment. So, if this is only KILL, not OUT, we
|
793
|
+
// can safely decrement it.
|
794
|
+
outPending -= !tiedReg->isOut();
|
795
|
+
}
|
796
|
+
}
|
797
|
+
|
798
|
+
// STEP 7
|
799
|
+
// ------
|
800
|
+
//
|
801
|
+
// SPILL registers that will be CLOBBERed. Since OUT and KILL were already processed this is used mostly to
|
802
|
+
// handle function CALLs.
|
803
|
+
|
804
|
+
if (willOut) {
|
805
|
+
Support::BitWordIterator<RegMask> it(willOut);
|
806
|
+
do {
|
807
|
+
uint32_t physId = it.next();
|
808
|
+
uint32_t workId = _curAssignment.physToWorkId(group, physId);
|
809
|
+
|
810
|
+
if (workId == RAAssignment::kWorkNone)
|
811
|
+
continue;
|
812
|
+
|
813
|
+
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
814
|
+
} while (it.hasNext());
|
815
|
+
}
|
816
|
+
|
817
|
+
// STEP 8
|
818
|
+
// ------
|
819
|
+
//
|
820
|
+
// Duplication.
|
821
|
+
|
822
|
+
for (i = 0; i < dupTiedCount; i++) {
|
823
|
+
RATiedReg* tiedReg = dupTiedRegs[i];
|
824
|
+
uint32_t workId = tiedReg->workId();
|
825
|
+
uint32_t srcId = tiedReg->useId();
|
826
|
+
|
827
|
+
Support::BitWordIterator<RegMask> it(tiedReg->useRegMask());
|
828
|
+
while (it.hasNext()) {
|
829
|
+
uint32_t dstId = it.next();
|
830
|
+
if (dstId == srcId)
|
831
|
+
continue;
|
832
|
+
_pass->emitMove(workId, dstId, srcId);
|
833
|
+
}
|
834
|
+
}
|
835
|
+
|
836
|
+
// STEP 9
|
837
|
+
// ------
|
838
|
+
//
|
839
|
+
// Vector registers can be cloberred partially by invoke - find if that's the case and clobber when necessary.
|
840
|
+
|
841
|
+
if (node->isInvoke() && group == RegGroup::kVec) {
|
842
|
+
const InvokeNode* invokeNode = node->as<InvokeNode>();
|
843
|
+
|
844
|
+
RegMask maybeClobberedRegs = invokeNode->detail().callConv().preservedRegs(group) & _curAssignment.assigned(group);
|
845
|
+
if (maybeClobberedRegs) {
|
846
|
+
uint32_t saveRestoreVecSize = invokeNode->detail().callConv().saveRestoreRegSize(group);
|
847
|
+
Support::BitWordIterator<RegMask> it(maybeClobberedRegs);
|
848
|
+
|
849
|
+
do {
|
850
|
+
uint32_t physId = it.next();
|
851
|
+
uint32_t workId = _curAssignment.physToWorkId(group, physId);
|
852
|
+
|
853
|
+
RAWorkReg* workReg = workRegById(workId);
|
854
|
+
uint32_t virtSize = workReg->virtReg()->virtSize();
|
855
|
+
|
856
|
+
if (virtSize > saveRestoreVecSize) {
|
857
|
+
ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
|
858
|
+
}
|
859
|
+
|
860
|
+
} while (it.hasNext());
|
861
|
+
}
|
862
|
+
}
|
863
|
+
|
864
|
+
// STEP 10
|
865
|
+
// -------
|
866
|
+
//
|
867
|
+
// Assign OUT registers.
|
868
|
+
|
869
|
+
if (outPending) {
|
870
|
+
// Live registers, we need a separate register (outside of `_curAssignment) to hold these because of KILLed
|
871
|
+
// registers. If we KILL a register here it will go out from `_curAssignment`, but we cannot assign to it in
|
872
|
+
// here.
|
873
|
+
RegMask liveRegs = _curAssignment.assigned(group);
|
874
|
+
|
875
|
+
// Must avoid as they have been already OUTed (added during the loop).
|
876
|
+
RegMask outRegs = 0;
|
877
|
+
|
878
|
+
// Must avoid as they collide with already allocated ones.
|
879
|
+
RegMask avoidRegs = willUse & ~clobberedByInst;
|
880
|
+
|
881
|
+
// Assign the best possible OUT ids of all consecutives.
|
882
|
+
if (consecutiveCount) {
|
883
|
+
RATiedReg* lead = consecutiveRegs[0];
|
884
|
+
if (lead->isOutConsecutive()) {
|
885
|
+
uint32_t bestScore = 0;
|
886
|
+
uint32_t bestLeadReg = 0xFFFFFFFF;
|
887
|
+
RegMask allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);
|
888
|
+
|
889
|
+
Support::BitWordIterator<uint32_t> it(lead->outRegMask());
|
890
|
+
while (it.hasNext()) {
|
891
|
+
uint32_t regIndex = it.next();
|
892
|
+
if (Support::bitTest(lead->outRegMask(), regIndex)) {
|
893
|
+
uint32_t score = 15;
|
894
|
+
|
895
|
+
for (i = 0; i < consecutiveCount; i++) {
|
896
|
+
uint32_t consecutiveIndex = regIndex + i;
|
897
|
+
if (!Support::bitTest(allocableRegs, consecutiveIndex)) {
|
898
|
+
score = 0;
|
899
|
+
break;
|
900
|
+
}
|
901
|
+
|
902
|
+
RAWorkReg* workReg = workRegById(consecutiveRegs[i]->workId());
|
903
|
+
score += uint32_t(workReg->homeRegId() == consecutiveIndex);
|
904
|
+
}
|
905
|
+
|
906
|
+
if (score > bestScore) {
|
907
|
+
bestScore = score;
|
908
|
+
bestLeadReg = regIndex;
|
909
|
+
}
|
910
|
+
}
|
911
|
+
}
|
912
|
+
|
913
|
+
if (bestLeadReg == 0xFFFFFFFF)
|
914
|
+
return DebugUtils::errored(kErrorConsecutiveRegsAllocation);
|
915
|
+
|
916
|
+
for (i = 0; i < consecutiveCount; i++) {
|
917
|
+
uint32_t consecutiveIndex = bestLeadReg + i;
|
918
|
+
RATiedReg* tiedReg = consecutiveRegs[i];
|
919
|
+
tiedReg->setOutId(consecutiveIndex);
|
920
|
+
}
|
921
|
+
}
|
922
|
+
}
|
923
|
+
|
924
|
+
// Allocate OUT registers.
|
925
|
+
for (i = 0; i < outTiedCount; i++) {
|
926
|
+
RATiedReg* tiedReg = outTiedRegs[i];
|
927
|
+
if (!tiedReg->isOut())
|
928
|
+
continue;
|
929
|
+
|
930
|
+
uint32_t workId = tiedReg->workId();
|
931
|
+
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
932
|
+
|
933
|
+
if (assignedId != RAAssignment::kPhysNone)
|
934
|
+
ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
|
935
|
+
|
936
|
+
uint32_t physId = tiedReg->outId();
|
937
|
+
if (physId == RAAssignment::kPhysNone) {
|
938
|
+
RegMask allocableRegs = tiedReg->outRegMask() & ~(outRegs | avoidRegs);
|
939
|
+
|
940
|
+
if (!(allocableRegs & ~liveRegs)) {
|
941
|
+
// There are no more registers, decide which one to spill.
|
942
|
+
uint32_t spillWorkId;
|
943
|
+
physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
|
944
|
+
ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
|
945
|
+
}
|
946
|
+
else {
|
947
|
+
physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
|
948
|
+
}
|
949
|
+
}
|
950
|
+
|
951
|
+
// OUTs are CLOBBERed thus cannot be ASSIGNed right now.
|
952
|
+
ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
|
953
|
+
|
954
|
+
if (!tiedReg->isKill())
|
955
|
+
ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
|
956
|
+
|
957
|
+
tiedReg->setOutId(physId);
|
958
|
+
tiedReg->markOutDone();
|
959
|
+
|
960
|
+
outRegs |= Support::bitMask(physId);
|
961
|
+
liveRegs &= ~Support::bitMask(physId);
|
962
|
+
outPending--;
|
963
|
+
}
|
964
|
+
|
965
|
+
clobberedByInst |= outRegs;
|
966
|
+
ASMJIT_ASSERT(outPending == 0);
|
967
|
+
}
|
968
|
+
|
969
|
+
_clobberedRegs[group] |= clobberedByInst;
|
970
|
+
}
|
971
|
+
|
972
|
+
return kErrorOk;
|
973
|
+
}
|
974
|
+
|
975
|
+
Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept {
|
976
|
+
// This is experimental feature that would spill registers that don't have home-id and are last in this basic block.
|
977
|
+
// This prevents saving these regs in other basic blocks and then restoring them (mostly relevant for loops).
|
978
|
+
RAInst* raInst = node->passData<RAInst>();
|
979
|
+
uint32_t count = raInst->tiedCount();
|
980
|
+
|
981
|
+
for (uint32_t i = 0; i < count; i++) {
|
982
|
+
RATiedReg* tiedReg = raInst->tiedAt(i);
|
983
|
+
if (tiedReg->isLast()) {
|
984
|
+
uint32_t workId = tiedReg->workId();
|
985
|
+
RAWorkReg* workReg = workRegById(workId);
|
986
|
+
if (!workReg->hasHomeRegId()) {
|
987
|
+
RegGroup group = workReg->group();
|
988
|
+
uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
|
989
|
+
if (assignedId != RAAssignment::kPhysNone) {
|
990
|
+
_cc->_setCursor(node);
|
991
|
+
ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
|
992
|
+
}
|
993
|
+
}
|
994
|
+
}
|
995
|
+
}
|
996
|
+
|
997
|
+
return kErrorOk;
|
998
|
+
}
|
999
|
+
|
1000
|
+
Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept {
|
1001
|
+
// TODO: This should be used to make the branch allocation better.
|
1002
|
+
DebugUtils::unused(cont);
|
1003
|
+
|
1004
|
+
// The cursor must point to the previous instruction for a possible instruction insertion.
|
1005
|
+
_cc->_setCursor(node->prev());
|
1006
|
+
|
1007
|
+
// Use TryMode of `switchToAssignment()` if possible.
|
1008
|
+
if (target->hasEntryAssignment()) {
|
1009
|
+
ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), true));
|
1010
|
+
}
|
1011
|
+
|
1012
|
+
ASMJIT_PROPAGATE(allocInst(node));
|
1013
|
+
ASMJIT_PROPAGATE(spillRegsBeforeEntry(target));
|
1014
|
+
|
1015
|
+
if (target->hasEntryAssignment()) {
|
1016
|
+
BaseNode* injectionPoint = _pass->extraBlock()->prev();
|
1017
|
+
BaseNode* prevCursor = _cc->setCursor(injectionPoint);
|
1018
|
+
|
1019
|
+
_tmpAssignment.copyFrom(_curAssignment);
|
1020
|
+
ASMJIT_PROPAGATE(switchToAssignment(target->entryPhysToWorkMap(), target->liveIn(), target->isAllocated(), false));
|
1021
|
+
|
1022
|
+
BaseNode* curCursor = _cc->cursor();
|
1023
|
+
if (curCursor != injectionPoint) {
|
1024
|
+
// Additional instructions emitted to switch from the current state to the `target` state. This means
|
1025
|
+
// that we have to move these instructions into an independent code block and patch the jump location.
|
1026
|
+
Operand& targetOp = node->op(node->opCount() - 1);
|
1027
|
+
if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
|
1028
|
+
return DebugUtils::errored(kErrorInvalidState);
|
1029
|
+
|
1030
|
+
Label trampoline = _cc->newLabel();
|
1031
|
+
Label savedTarget = targetOp.as<Label>();
|
1032
|
+
|
1033
|
+
// Patch `target` to point to the `trampoline` we just created.
|
1034
|
+
targetOp = trampoline;
|
1035
|
+
|
1036
|
+
// Clear a possible SHORT form as we have no clue now if the SHORT form would be encodable after patching
|
1037
|
+
// the target to `trampoline` (X86 specific).
|
1038
|
+
node->clearOptions(InstOptions::kShortForm);
|
1039
|
+
|
1040
|
+
// Finalize the switch assignment sequence.
|
1041
|
+
ASMJIT_PROPAGATE(_pass->emitJump(savedTarget));
|
1042
|
+
_cc->_setCursor(injectionPoint);
|
1043
|
+
_cc->bind(trampoline);
|
1044
|
+
}
|
1045
|
+
|
1046
|
+
_cc->_setCursor(prevCursor);
|
1047
|
+
_curAssignment.swap(_tmpAssignment);
|
1048
|
+
}
|
1049
|
+
else {
|
1050
|
+
ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(target, block(), _curAssignment));
|
1051
|
+
}
|
1052
|
+
|
1053
|
+
return kErrorOk;
|
1054
|
+
}
|
1055
|
+
|
1056
|
+
Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept {
|
1057
|
+
// TODO: Do we really need to use `cont`?
|
1058
|
+
DebugUtils::unused(cont);
|
1059
|
+
|
1060
|
+
if (targets.empty())
|
1061
|
+
return DebugUtils::errored(kErrorInvalidState);
|
1062
|
+
|
1063
|
+
// The cursor must point to the previous instruction for a possible instruction insertion.
|
1064
|
+
_cc->_setCursor(node->prev());
|
1065
|
+
|
1066
|
+
// All `targets` should have the same sharedAssignmentId, we just read the first.
|
1067
|
+
RABlock* anyTarget = targets[0];
|
1068
|
+
if (!anyTarget->hasSharedAssignmentId())
|
1069
|
+
return DebugUtils::errored(kErrorInvalidState);
|
1070
|
+
|
1071
|
+
RASharedAssignment& sharedAssignment = _pass->_sharedAssignments[anyTarget->sharedAssignmentId()];
|
1072
|
+
|
1073
|
+
ASMJIT_PROPAGATE(allocInst(node));
|
1074
|
+
|
1075
|
+
if (!sharedAssignment.empty()) {
|
1076
|
+
ASMJIT_PROPAGATE(switchToAssignment(
|
1077
|
+
sharedAssignment.physToWorkMap(),
|
1078
|
+
sharedAssignment.liveIn(),
|
1079
|
+
true, // Read-only.
|
1080
|
+
false // Try-mode.
|
1081
|
+
));
|
1082
|
+
}
|
1083
|
+
|
1084
|
+
ASMJIT_PROPAGATE(spillRegsBeforeEntry(anyTarget));
|
1085
|
+
|
1086
|
+
if (sharedAssignment.empty()) {
|
1087
|
+
ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(anyTarget, block(), _curAssignment));
|
1088
|
+
}
|
1089
|
+
|
1090
|
+
return kErrorOk;
|
1091
|
+
}
|
1092
|
+
|
1093
|
+
// RALocalAllocator - Decision Making
|
1094
|
+
// ==================================
|
1095
|
+
|
1096
|
+
uint32_t RALocalAllocator::decideOnAssignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
|
1097
|
+
ASMJIT_ASSERT(allocableRegs != 0);
|
1098
|
+
DebugUtils::unused(group, physId);
|
1099
|
+
|
1100
|
+
RAWorkReg* workReg = workRegById(workId);
|
1101
|
+
|
1102
|
+
// Prefer home register id, if possible.
|
1103
|
+
if (workReg->hasHomeRegId()) {
|
1104
|
+
uint32_t homeId = workReg->homeRegId();
|
1105
|
+
if (Support::bitTest(allocableRegs, homeId))
|
1106
|
+
return homeId;
|
1107
|
+
}
|
1108
|
+
|
1109
|
+
// Prefer registers used upon block entries.
|
1110
|
+
RegMask previouslyAssignedRegs = workReg->allocatedMask();
|
1111
|
+
if (allocableRegs & previouslyAssignedRegs)
|
1112
|
+
allocableRegs &= previouslyAssignedRegs;
|
1113
|
+
|
1114
|
+
return Support::ctz(allocableRegs);
|
1115
|
+
}
|
1116
|
+
|
1117
|
+
uint32_t RALocalAllocator::decideOnReassignment(RegGroup group, uint32_t workId, uint32_t physId, RegMask allocableRegs) const noexcept {
|
1118
|
+
ASMJIT_ASSERT(allocableRegs != 0);
|
1119
|
+
DebugUtils::unused(group, physId);
|
1120
|
+
|
1121
|
+
RAWorkReg* workReg = workRegById(workId);
|
1122
|
+
|
1123
|
+
// Prefer allocating back to HomeId, if possible.
|
1124
|
+
if (workReg->hasHomeRegId()) {
|
1125
|
+
if (Support::bitTest(allocableRegs, workReg->homeRegId()))
|
1126
|
+
return workReg->homeRegId();
|
1127
|
+
}
|
1128
|
+
|
1129
|
+
// TODO: [Register Allocator] This could be improved.
|
1130
|
+
|
1131
|
+
// Decided to SPILL.
|
1132
|
+
return RAAssignment::kPhysNone;
|
1133
|
+
}
|
1134
|
+
|
1135
|
+
uint32_t RALocalAllocator::decideOnSpillFor(RegGroup group, uint32_t workId, RegMask spillableRegs, uint32_t* spillWorkId) const noexcept {
|
1136
|
+
// May be used in the future to decide which register would be best to spill so `workId` can be assigned.
|
1137
|
+
DebugUtils::unused(workId);
|
1138
|
+
ASMJIT_ASSERT(spillableRegs != 0);
|
1139
|
+
|
1140
|
+
Support::BitWordIterator<RegMask> it(spillableRegs);
|
1141
|
+
uint32_t bestPhysId = it.next();
|
1142
|
+
uint32_t bestWorkId = _curAssignment.physToWorkId(group, bestPhysId);
|
1143
|
+
|
1144
|
+
// Avoid calculating the cost model if there is only one spillable register.
|
1145
|
+
if (it.hasNext()) {
|
1146
|
+
uint32_t bestCost = calculateSpillCost(group, bestWorkId, bestPhysId);
|
1147
|
+
do {
|
1148
|
+
uint32_t localPhysId = it.next();
|
1149
|
+
uint32_t localWorkId = _curAssignment.physToWorkId(group, localPhysId);
|
1150
|
+
uint32_t localCost = calculateSpillCost(group, localWorkId, localPhysId);
|
1151
|
+
|
1152
|
+
if (localCost < bestCost) {
|
1153
|
+
bestCost = localCost;
|
1154
|
+
bestPhysId = localPhysId;
|
1155
|
+
bestWorkId = localWorkId;
|
1156
|
+
}
|
1157
|
+
} while (it.hasNext());
|
1158
|
+
}
|
1159
|
+
|
1160
|
+
*spillWorkId = bestWorkId;
|
1161
|
+
return bestPhysId;
|
1162
|
+
}
|
1163
|
+
|
1164
|
+
ASMJIT_END_NAMESPACE
|
1165
|
+
|
1166
|
+
#endif // !ASMJIT_NO_COMPILER
|