js-confuser-vm 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +281 -147
- package/dist/build-runtime.js +41 -15
- package/dist/compiler.js +714 -265
- package/dist/disassembler.js +367 -0
- package/dist/index.js +7 -2
- package/dist/runtime.js +160 -119
- package/dist/template.js +163 -42
- package/dist/transforms/bytecode/aliasedOpcodes.js +4 -1
- package/dist/transforms/bytecode/concealConstants.js +2 -2
- package/dist/transforms/bytecode/controlFlowFlattening.js +569 -0
- package/dist/transforms/bytecode/dispatcher.js +15 -111
- package/dist/transforms/bytecode/macroOpcodes.js +2 -2
- package/{src/transforms/bytecode/resolveContants.ts → dist/transforms/bytecode/resolveConstants.js} +30 -56
- package/dist/transforms/bytecode/resolveRegisters.js +23 -4
- package/dist/transforms/bytecode/selfModifying.js +88 -21
- package/dist/transforms/bytecode/semanticOpcodes.js +162 -0
- package/dist/transforms/bytecode/specializedOpcodes.js +23 -12
- package/dist/transforms/bytecode/stringConcealing.js +288 -0
- package/dist/transforms/runtime/classObfuscation.js +43 -0
- package/dist/transforms/runtime/handlerTable.js +91 -0
- package/dist/transforms/runtime/semanticOpcodes.js +35 -0
- package/dist/transforms/runtime/specializedOpcodes.js +11 -5
- package/dist/types.js +1 -1
- package/dist/utils/ast-utils.js +75 -0
- package/dist/utils/op-utils.js +1 -2
- package/dist/utils/pass-utils.js +100 -0
- package/dist/utils/profile-utils.js +3 -0
- package/package.json +8 -1
- package/.gitmodules +0 -4
- package/.prettierignore +0 -1
- package/CHANGELOG.md +0 -335
- package/babel-plugin-inline-runtime.cjs +0 -34
- package/babel.config.json +0 -23
- package/index.ts +0 -38
- package/jest-strip-types.js +0 -10
- package/jest.config.js +0 -52
- package/src/build-runtime.ts +0 -78
- package/src/compiler.ts +0 -2593
- package/src/index.ts +0 -14
- package/src/minify.ts +0 -21
- package/src/options.ts +0 -18
- package/src/runtime.ts +0 -923
- package/src/template.ts +0 -141
- package/src/transforms/bytecode/aliasedOpcodes.ts +0 -148
- package/src/transforms/bytecode/concealConstants.ts +0 -52
- package/src/transforms/bytecode/dispatcher.ts +0 -398
- package/src/transforms/bytecode/macroOpcodes.ts +0 -193
- package/src/transforms/bytecode/microOpcodes.ts +0 -291
- package/src/transforms/bytecode/resolveLabels.ts +0 -112
- package/src/transforms/bytecode/resolveRegisters.ts +0 -221
- package/src/transforms/bytecode/selfModifying.ts +0 -121
- package/src/transforms/bytecode/specializedOpcodes.ts +0 -153
- package/src/transforms/runtime/aliasedOpcodes.ts +0 -191
- package/src/transforms/runtime/internalVariables.ts +0 -270
- package/src/transforms/runtime/macroOpcodes.ts +0 -138
- package/src/transforms/runtime/microOpcodes.ts +0 -93
- package/src/transforms/runtime/minify.ts +0 -1
- package/src/transforms/runtime/shuffleOpcodes.ts +0 -24
- package/src/transforms/runtime/specializedOpcodes.ts +0 -156
- package/src/types.ts +0 -93
- package/src/utils/op-utils.ts +0 -48
- package/src/utils/random-utils.ts +0 -31
- package/tsconfig.json +0 -12
|
@@ -1,291 +0,0 @@
|
|
|
1
|
-
import { parse } from "@babel/parser";
|
|
2
|
-
import traverseImport from "@babel/traverse";
|
|
3
|
-
import * as t from "@babel/types";
|
|
4
|
-
import { ok } from "assert";
|
|
5
|
-
import { Compiler, VM_RUNTIME, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
6
|
-
import type { Bytecode, Instruction } from "../../types.ts";
|
|
7
|
-
import { nextFreeSlot } from "../../utils/op-utils.ts";
|
|
8
|
-
import { nSizedOps } from "./specializedOpcodes.ts";
|
|
9
|
-
import generate from "@babel/generator";
|
|
10
|
-
|
|
11
|
-
const traverse = (traverseImport.default ||
|
|
12
|
-
traverseImport) as typeof traverseImport.default;
|
|
13
|
-
|
|
14
|
-
// Extract the real statement list from a SwitchCase consequent.
|
|
15
|
-
function extractCaseBody(switchCase: t.SwitchCase): t.Statement[] {
|
|
16
|
-
let stmts: t.Statement[];
|
|
17
|
-
if (
|
|
18
|
-
switchCase.consequent.length === 1 &&
|
|
19
|
-
t.isBlockStatement(switchCase.consequent[0])
|
|
20
|
-
) {
|
|
21
|
-
stmts = (switchCase.consequent[0] as t.BlockStatement).body;
|
|
22
|
-
} else {
|
|
23
|
-
stmts = switchCase.consequent as t.Statement[];
|
|
24
|
-
}
|
|
25
|
-
return stmts.filter((s) => !t.isBreakStatement(s) && !t.isEmptyStatement(s));
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
// Count how many IR-level operands a single statement consumes.
|
|
29
|
-
// Returns null if the statement is ineligible (contains a loop, or has
|
|
30
|
-
// _operand()/_constant() calls inside a conditional branch).
|
|
31
|
-
function countStatementOperands(stmt: t.Statement): number | null {
|
|
32
|
-
let count = 0;
|
|
33
|
-
let ineligible = false;
|
|
34
|
-
|
|
35
|
-
const file = t.file(t.program([t.cloneNode(stmt, true) as t.Statement]));
|
|
36
|
-
|
|
37
|
-
traverse(file, {
|
|
38
|
-
enter(path) {
|
|
39
|
-
if (ineligible) {
|
|
40
|
-
path.stop();
|
|
41
|
-
return;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
const nodeType = path.node.type;
|
|
45
|
-
|
|
46
|
-
// Don't traverse into nested functions
|
|
47
|
-
if (
|
|
48
|
-
nodeType === "FunctionDeclaration" ||
|
|
49
|
-
nodeType === "FunctionExpression" ||
|
|
50
|
-
nodeType === "ArrowFunctionExpression"
|
|
51
|
-
) {
|
|
52
|
-
path.skip();
|
|
53
|
-
return;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Count _operand() and _constant() calls
|
|
57
|
-
if (nodeType === "CallExpression") {
|
|
58
|
-
const call = path.node as t.CallExpression;
|
|
59
|
-
const callee = call.callee;
|
|
60
|
-
if (
|
|
61
|
-
t.isMemberExpression(callee) &&
|
|
62
|
-
t.isThisExpression(callee.object) &&
|
|
63
|
-
t.isIdentifier(callee.property)
|
|
64
|
-
) {
|
|
65
|
-
const name = (callee.property as t.Identifier).name;
|
|
66
|
-
const operandsConsumed =
|
|
67
|
-
name === "_operand" ? 1 : name === "_constant" ? 2 : null;
|
|
68
|
-
|
|
69
|
-
if (operandsConsumed) {
|
|
70
|
-
// You are not allowed to use _operand() in loops or branches
|
|
71
|
-
const ancestors = path.getAncestry();
|
|
72
|
-
|
|
73
|
-
if (
|
|
74
|
-
ancestors.find(
|
|
75
|
-
(t) =>
|
|
76
|
-
t.isLoop() ||
|
|
77
|
-
t.isIfStatement() ||
|
|
78
|
-
t.isSwitchStatement() ||
|
|
79
|
-
t.isConditionalExpression() ||
|
|
80
|
-
t.isLogicalExpression(),
|
|
81
|
-
)
|
|
82
|
-
) {
|
|
83
|
-
ineligible = true;
|
|
84
|
-
path.stop();
|
|
85
|
-
return;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
count += operandsConsumed;
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
},
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
return ineligible ? null : count;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
// Analyse the VM runtime's @SWITCH statement to build a per-opcode map of
|
|
99
|
-
// { stmtIndex → irOperandCount } for every case that can be split.
|
|
100
|
-
// Returns a map: opValue → array of per-statement operand counts (null if ineligible).
|
|
101
|
-
function analyzeRuntimeCases(compiler: Compiler): Map<number, number[]> {
|
|
102
|
-
// Parse the runtime source
|
|
103
|
-
const ast = parse(VM_RUNTIME, { sourceType: "unambiguous" });
|
|
104
|
-
|
|
105
|
-
// Build reverse name→opValue map from original OPs only
|
|
106
|
-
const nameToOp = new Map<string, number>();
|
|
107
|
-
for (const [name, val] of Object.entries(compiler.OP)) {
|
|
108
|
-
if (val !== undefined) nameToOp.set(name, val as number);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
let switchStatement: t.SwitchStatement | null = null;
|
|
112
|
-
traverse(ast, {
|
|
113
|
-
SwitchStatement(path) {
|
|
114
|
-
if (path.node.leadingComments?.some((c) => c.value.includes("@SWITCH"))) {
|
|
115
|
-
switchStatement = path.node;
|
|
116
|
-
path.stop();
|
|
117
|
-
}
|
|
118
|
-
},
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
ok(switchStatement, "Could not find @SWITCH statement for micro opcodes");
|
|
122
|
-
|
|
123
|
-
const result = new Map<number, number[]>();
|
|
124
|
-
|
|
125
|
-
for (const sc of (switchStatement as t.SwitchStatement).cases) {
|
|
126
|
-
const test = sc.test;
|
|
127
|
-
if (
|
|
128
|
-
!test ||
|
|
129
|
-
!t.isMemberExpression(test) ||
|
|
130
|
-
!t.isIdentifier(test.object, { name: "OP" }) ||
|
|
131
|
-
!t.isIdentifier(test.property)
|
|
132
|
-
) {
|
|
133
|
-
continue;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
const opName = (test.property as t.Identifier).name;
|
|
137
|
-
const opVal = nameToOp.get(opName);
|
|
138
|
-
if (opVal === undefined) continue;
|
|
139
|
-
|
|
140
|
-
const stmts = extractCaseBody(sc);
|
|
141
|
-
if (stmts.length < 2) continue; // need at least 2 statements to split
|
|
142
|
-
|
|
143
|
-
const counts: number[] = [];
|
|
144
|
-
let allEligible = true;
|
|
145
|
-
|
|
146
|
-
// Banned patterns:
|
|
147
|
-
// Return statements (Control flow isn't remembered)
|
|
148
|
-
traverse(t.file(t.program(stmts)), {
|
|
149
|
-
ReturnStatement(path) {
|
|
150
|
-
path.stop();
|
|
151
|
-
allEligible = false;
|
|
152
|
-
},
|
|
153
|
-
});
|
|
154
|
-
|
|
155
|
-
for (const stmt of stmts) {
|
|
156
|
-
const c = countStatementOperands(stmt);
|
|
157
|
-
if (c === null) {
|
|
158
|
-
allEligible = false;
|
|
159
|
-
break;
|
|
160
|
-
}
|
|
161
|
-
if (t.isDebuggerStatement(stmt) || t.isThrowStatement(stmt)) {
|
|
162
|
-
allEligible = false;
|
|
163
|
-
break;
|
|
164
|
-
}
|
|
165
|
-
counts.push(c);
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
if (!allEligible) continue;
|
|
169
|
-
|
|
170
|
-
// Verify that the total operand count matches the instruction size expectation
|
|
171
|
-
// (just store for now; bytecode pass validates operands match)
|
|
172
|
-
result.set(opVal, counts);
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
return result;
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
// Main bytecode transform: split frequently-used opcodes into per-statement
|
|
179
|
-
// micro-opcodes so each sub-instruction is as small as possible.
|
|
180
|
-
export function microOpcodes(
|
|
181
|
-
bc: Bytecode,
|
|
182
|
-
compiler: Compiler,
|
|
183
|
-
): { bytecode: Bytecode } {
|
|
184
|
-
// ── Step 1: analyse runtime to discover splittable opcodes ──────────────────
|
|
185
|
-
const opAnalysis = analyzeRuntimeCases(compiler);
|
|
186
|
-
if (opAnalysis.size === 0) return { bytecode: bc };
|
|
187
|
-
|
|
188
|
-
// ── Step 2: count opcode frequency in bytecode ────────────────────────────
|
|
189
|
-
const disallowedOps = new Set(nSizedOps.map((name) => compiler.OP[name]));
|
|
190
|
-
|
|
191
|
-
disallowedOps.add(compiler.OP.RETURN);
|
|
192
|
-
|
|
193
|
-
const freqMap = new Map<number, number>();
|
|
194
|
-
for (const instr of bc) {
|
|
195
|
-
const op = instr[0];
|
|
196
|
-
if (op === null || !opAnalysis.has(op) || disallowedOps.has(op)) continue;
|
|
197
|
-
freqMap.set(op, (freqMap.get(op) ?? 0) + 1);
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
// ── Step 3: sort by frequency, keep opcodes that actually appear ─────────
|
|
201
|
-
const candidates = Array.from(freqMap.entries())
|
|
202
|
-
.filter(([, count]) => count >= 1)
|
|
203
|
-
.sort(([, a], [, b]) => b - a)
|
|
204
|
-
.map(([op]) => op);
|
|
205
|
-
|
|
206
|
-
if (candidates.length === 0) return { bytecode: bc };
|
|
207
|
-
|
|
208
|
-
// ── Step 4: assign free opcode slots for each sub-statement ─────────────
|
|
209
|
-
// Build: originalOp → [{ microOp, irOperandCount }, ...]
|
|
210
|
-
const originalToSubOps = new Map<
|
|
211
|
-
number,
|
|
212
|
-
{ microOp: number; irOperandCount: number }[]
|
|
213
|
-
>();
|
|
214
|
-
|
|
215
|
-
for (const origOp of candidates) {
|
|
216
|
-
const stmtCounts = opAnalysis.get(origOp)!;
|
|
217
|
-
|
|
218
|
-
// Pre-allocate all needed slots; if any slot is unavailable, skip this op.
|
|
219
|
-
const slots: number[] = [];
|
|
220
|
-
for (let si = 0; si < stmtCounts.length; si++) {
|
|
221
|
-
const slot = nextFreeSlot(compiler);
|
|
222
|
-
if (slot === -1) break;
|
|
223
|
-
|
|
224
|
-
compiler.OP_NAME[slot] = `MICRO_${origOp}_${si}`;
|
|
225
|
-
slots.push(slot);
|
|
226
|
-
}
|
|
227
|
-
if (slots.length !== stmtCounts.length) continue;
|
|
228
|
-
|
|
229
|
-
const subOps: { microOp: number; irOperandCount: number }[] = [];
|
|
230
|
-
const origName = compiler.OP_NAME[origOp] ?? `OP_${origOp}`;
|
|
231
|
-
|
|
232
|
-
for (let si = 0; si < stmtCounts.length; si++) {
|
|
233
|
-
const microOp = slots[si];
|
|
234
|
-
const irOperandCount = stmtCounts[si];
|
|
235
|
-
subOps.push({ microOp, irOperandCount });
|
|
236
|
-
|
|
237
|
-
compiler.OP_NAME[microOp] = `MICRO_${origName}_${si}`;
|
|
238
|
-
compiler.MICRO_OPS[microOp] = {
|
|
239
|
-
originalOp: origOp,
|
|
240
|
-
stmtIndex: si,
|
|
241
|
-
irOperandCount,
|
|
242
|
-
};
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
originalToSubOps.set(origOp, subOps);
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
if (originalToSubOps.size === 0) return { bytecode: bc };
|
|
249
|
-
|
|
250
|
-
// ── Step 5: replace each matched instruction with sub-instructions ────────
|
|
251
|
-
const result: Bytecode = [];
|
|
252
|
-
|
|
253
|
-
for (const instr of bc) {
|
|
254
|
-
const op = instr[0];
|
|
255
|
-
if (op === null || !originalToSubOps.has(op)) {
|
|
256
|
-
result.push(instr);
|
|
257
|
-
continue;
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
const subOps = originalToSubOps.get(op)!;
|
|
261
|
-
const operands = instr.slice(1); // all operands of the original instruction
|
|
262
|
-
|
|
263
|
-
// Verify total operand count matches sum of sub-op IR operand counts
|
|
264
|
-
const expectedTotal = subOps.reduce(
|
|
265
|
-
(s, { irOperandCount }) => s + irOperandCount,
|
|
266
|
-
0,
|
|
267
|
-
);
|
|
268
|
-
if (operands.length !== expectedTotal) {
|
|
269
|
-
throw new Error(
|
|
270
|
-
`Operand count mismatch for opcode ${compiler.OP_NAME[op]}`,
|
|
271
|
-
);
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
// Split operands among sub-instructions
|
|
275
|
-
let offset = 0;
|
|
276
|
-
for (const { microOp, irOperandCount } of subOps) {
|
|
277
|
-
const subOperands = operands.slice(offset, offset + irOperandCount);
|
|
278
|
-
offset += irOperandCount;
|
|
279
|
-
|
|
280
|
-
const newInstr: Instruction = [microOp, ...subOperands];
|
|
281
|
-
// Carry source-node info on the first sub-instruction
|
|
282
|
-
if (offset === irOperandCount) {
|
|
283
|
-
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
result.push(newInstr);
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
return { bytecode: result };
|
|
291
|
-
}
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
// --- Label IR ---
|
|
2
|
-
// During compilation, jump targets are symbolic labels instead of hard-coded
|
|
3
|
-
// PC numbers. Two IR "pseudo operands" carry the label information:
|
|
4
|
-
//
|
|
5
|
-
// defineLabel operand : [null, {type:"defineLabel", label:"FN_ENTRY_1"}]
|
|
6
|
-
// Marks a position in the bytecode array.
|
|
7
|
-
// resolveLabels() strips these out entirely.
|
|
8
|
-
//
|
|
9
|
-
// label ref operand : [OP.JUMP, {type:"label", label:"FN_ENTRY_1"}]
|
|
10
|
-
// Used as the operand of any jump instruction. resolveLabels() replaces
|
|
11
|
-
// it with the integer PC that the corresponding defineLabel resolves to.
|
|
12
|
-
//
|
|
13
|
-
// The output bytecode is still a nested array of instructions.
|
|
14
|
-
// Flattening (one u16 slot per op, one per operand) happens in the Serializer.
|
|
15
|
-
// PC values computed here reflect the FLAT slot index so that jump targets,
|
|
16
|
-
// startPc, and LOAD_INT label operands are all correct after flattening.
|
|
17
|
-
|
|
18
|
-
import type { Instruction } from "../../types.ts";
|
|
19
|
-
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
20
|
-
|
|
21
|
-
// Resolve symbolic labels to absolute flat-PC indices within a bytecode array.
|
|
22
|
-
// defineLabel pseudo-instructions are stripped; label-ref operands become ints.
|
|
23
|
-
// Each instruction [op, ...operands] occupies (1 + operands.length) flat slots,
|
|
24
|
-
// so realPc advances by instr.length for every non-pseudo instruction.
|
|
25
|
-
export function resolveLabels(
|
|
26
|
-
bc: Instruction[],
|
|
27
|
-
compiler: Compiler,
|
|
28
|
-
): {
|
|
29
|
-
bytecode: Instruction[];
|
|
30
|
-
} {
|
|
31
|
-
// Pass 1 – walk the array and record each label's flat PC, counting
|
|
32
|
-
// real instructions by their full flat width (1 op + N operands).
|
|
33
|
-
const labelToPc = new Map<string, number>();
|
|
34
|
-
let realPc = 0;
|
|
35
|
-
for (const instr of bc) {
|
|
36
|
-
const op = instr[0];
|
|
37
|
-
const operand = instr[1];
|
|
38
|
-
|
|
39
|
-
if (
|
|
40
|
-
op === null &&
|
|
41
|
-
operand !== null &&
|
|
42
|
-
typeof operand === "object" &&
|
|
43
|
-
(operand as any).type === "defineLabel"
|
|
44
|
-
) {
|
|
45
|
-
labelToPc.set((operand as any).label, realPc);
|
|
46
|
-
continue;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
if (op === null) continue; // "null" opcodes are never emitted
|
|
50
|
-
|
|
51
|
-
// Each instruction occupies 1 slot for the opcode + 1 per operand.
|
|
52
|
-
// IMPORTANT: 'placeholder' operands are not counted
|
|
53
|
-
realPc += instr.filter((x) => (x as any)?.placeholder !== true).length;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Pass 2 – build the resolved instruction list.
|
|
57
|
-
// Label refs may appear at any operand position, so scan all of them.
|
|
58
|
-
const resolved: any[] = [];
|
|
59
|
-
for (const instr of bc) {
|
|
60
|
-
const [op, ...operands] = instr;
|
|
61
|
-
|
|
62
|
-
// Replace label-ref and encodedLabel operands with resolved flat PCs.
|
|
63
|
-
// encodedLabel applies an encoding to the PC before emission so that raw
|
|
64
|
-
// jump targets are hidden; the dispatcher block reverses it at runtime.
|
|
65
|
-
// To change the encoding scheme, update both here and in jumpDispatcher.ts.
|
|
66
|
-
const newOperands = operands.map((operand) => {
|
|
67
|
-
if (
|
|
68
|
-
operand === undefined ||
|
|
69
|
-
operand === null ||
|
|
70
|
-
typeof operand !== "object"
|
|
71
|
-
)
|
|
72
|
-
return operand;
|
|
73
|
-
|
|
74
|
-
const type = (operand as any).type;
|
|
75
|
-
|
|
76
|
-
if (type === "label") {
|
|
77
|
-
const pc = labelToPc.get((operand as any).label);
|
|
78
|
-
if (pc === undefined)
|
|
79
|
-
throw new Error(`Undefined label: ${(operand as any).label}`);
|
|
80
|
-
|
|
81
|
-
let resolvedValue = pc + ((operand as any).offset ?? 0);
|
|
82
|
-
if ((operand as any).transform) {
|
|
83
|
-
resolvedValue = (operand as any).transform(resolvedValue);
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
const newOperand = {
|
|
87
|
-
type: "number",
|
|
88
|
-
resolvedValue: resolvedValue,
|
|
89
|
-
};
|
|
90
|
-
return Object.assign(operand, newOperand);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
return operand;
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
const newInstr = [op, ...newOperands];
|
|
97
|
-
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
98
|
-
|
|
99
|
-
// Pseudo-op "defineLabel"s are kept within this bytecode as the Serializer is responsible for dropping it, and its useful information for comment generation
|
|
100
|
-
resolved.push(newInstr);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// Patch each function descriptor's startPc now that labels are resolved.
|
|
104
|
-
for (const desc of compiler.fnDescriptors) {
|
|
105
|
-
desc.startPc =
|
|
106
|
-
labelToPc.get(desc.startLabel) ?? labelToPc.get(desc.entryLabel);
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
return {
|
|
110
|
-
bytecode: resolved,
|
|
111
|
-
};
|
|
112
|
-
}
|
|
@@ -1,221 +0,0 @@
|
|
|
1
|
-
// resolveRegisters
|
|
2
|
-
// Converts virtual RegisterOperand objects into concrete slot indices and sets
|
|
3
|
-
// each FnDescriptor's regCount.
|
|
4
|
-
//
|
|
5
|
-
// Two-tier slot assignment:
|
|
6
|
-
//
|
|
7
|
-
// "local::" pool (params, `arguments`, hoisted vars, upvalue-captured vars)
|
|
8
|
-
// ─────────────────────────────────────────────────────────────────────────
|
|
9
|
-
// Sorted by virtual-id, slots assigned sequentially with NO reuse.
|
|
10
|
-
// This is required because:
|
|
11
|
-
// • The runtime writes args[i] to regs[base + i] at call time, so params
|
|
12
|
-
// MUST occupy slots 0..paramCount-1 in virtual-id order.
|
|
13
|
-
// • Open upvalues hold an absolute slot index and read regs[base+slot] for
|
|
14
|
-
// the lifetime of the outer frame — reusing a captured slot corrupts reads.
|
|
15
|
-
//
|
|
16
|
-
// All other pools (e.g. "temp::", "canary::", pass-introduced pools)
|
|
17
|
-
// ─────────────────────────────────────────────────────────────────────────
|
|
18
|
-
// Linear-scan with a free list: registers are sorted by firstUse, and any
|
|
19
|
-
// slot whose previous occupant's lastUse < current register's firstUse is
|
|
20
|
-
// recycled. An explicit [null, freeRegOperand(reg)] pseudo-instruction clamps
|
|
21
|
-
// lastUse early, enabling reuse before the natural end of the live range.
|
|
22
|
-
//
|
|
23
|
-
// Pools are processed in priority order: "local::" always first (slots
|
|
24
|
-
// 0..N), then remaining pools alphabetically. This keeps temp slots above
|
|
25
|
-
// the reserved param/local region.
|
|
26
|
-
//
|
|
27
|
-
// regCount = max concrete slot used across all pools + 1.
|
|
28
|
-
//
|
|
29
|
-
// Run AFTER all IR-level passes but BEFORE resolveLabels / resolveConstants.
|
|
30
|
-
|
|
31
|
-
import type { Bytecode } from "../../types.ts";
|
|
32
|
-
import { Compiler } from "../../compiler.ts";
|
|
33
|
-
|
|
34
|
-
export function resolveRegisters(
|
|
35
|
-
bc: Bytecode,
|
|
36
|
-
compiler: Compiler,
|
|
37
|
-
): { bytecode: Bytecode } {
|
|
38
|
-
function registerPoolKey(op: {
|
|
39
|
-
kind?: string;
|
|
40
|
-
scopeId?: string | number;
|
|
41
|
-
}): string {
|
|
42
|
-
return `${op.kind ?? "local"}::${op.scopeId ?? ""}`;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// ── Pass 1: collect live ranges ───────────────────────────────────────────
|
|
46
|
-
// For each (fnId, virtId) record the first and last instruction index where
|
|
47
|
-
// the register appears as a real operand. A freeReg marker clamps lastUse.
|
|
48
|
-
type RegInfo = {
|
|
49
|
-
firstUse: number;
|
|
50
|
-
lastUse: number;
|
|
51
|
-
poolKey: string;
|
|
52
|
-
freed: boolean; // true once a freeReg has been seen; prevents further extension
|
|
53
|
-
};
|
|
54
|
-
// fnId -> virtId -> RegInfo
|
|
55
|
-
const fnRegInfo = new Map<number, Map<number, RegInfo>>();
|
|
56
|
-
|
|
57
|
-
for (let i = 0; i < bc.length; i++) {
|
|
58
|
-
const instr = bc[i];
|
|
59
|
-
for (let j = 1; j < instr.length; j++) {
|
|
60
|
-
const op = instr[j] as any;
|
|
61
|
-
if (!op || typeof op !== "object") continue;
|
|
62
|
-
|
|
63
|
-
if (op.type === "register") {
|
|
64
|
-
const { fnId, id } = op;
|
|
65
|
-
const poolKey = registerPoolKey(op);
|
|
66
|
-
let fnMap = fnRegInfo.get(fnId);
|
|
67
|
-
if (!fnMap) {
|
|
68
|
-
fnMap = new Map();
|
|
69
|
-
fnRegInfo.set(fnId, fnMap);
|
|
70
|
-
}
|
|
71
|
-
const existing = fnMap.get(id);
|
|
72
|
-
if (!existing) {
|
|
73
|
-
fnMap.set(id, { firstUse: i, lastUse: i, poolKey, freed: false });
|
|
74
|
-
} else if (!existing.freed) {
|
|
75
|
-
// Only extend lastUse if no explicit freeReg has clamped it yet.
|
|
76
|
-
existing.lastUse = i;
|
|
77
|
-
}
|
|
78
|
-
} else if (op.type === "freeReg") {
|
|
79
|
-
// Explicit end-of-life marker: clamp lastUse and prevent extension.
|
|
80
|
-
const { fnId, id } = op;
|
|
81
|
-
const fnMap = fnRegInfo.get(fnId);
|
|
82
|
-
if (fnMap) {
|
|
83
|
-
const info = fnMap.get(id);
|
|
84
|
-
if (info && !info.freed) {
|
|
85
|
-
info.lastUse = i;
|
|
86
|
-
info.freed = true;
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// ── Pass 2: slot assignment per function ──────────────────────────────────
|
|
94
|
-
// fnId -> virtId -> concrete slot
|
|
95
|
-
const fnSlotMaps = new Map<number, Map<number, number>>();
|
|
96
|
-
|
|
97
|
-
// Pool ordering: "local::" always first; all other keys sorted alphabetically.
|
|
98
|
-
function poolSortKey(key: string): [number, string] {
|
|
99
|
-
return key === "local::" ? [0, ""] : [1, key];
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
for (const [fnId, regMap] of fnRegInfo) {
|
|
103
|
-
// Group by pool key.
|
|
104
|
-
const pools = new Map<
|
|
105
|
-
string,
|
|
106
|
-
Array<{ id: number; firstUse: number; lastUse: number }>
|
|
107
|
-
>();
|
|
108
|
-
for (const [id, info] of regMap) {
|
|
109
|
-
let pool = pools.get(info.poolKey);
|
|
110
|
-
if (!pool) {
|
|
111
|
-
pool = [];
|
|
112
|
-
pools.set(info.poolKey, pool);
|
|
113
|
-
}
|
|
114
|
-
pool.push({ id, firstUse: info.firstUse, lastUse: info.lastUse });
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
const sortedPoolKeys = Array.from(pools.keys()).sort((a, b) => {
|
|
118
|
-
const [pa, sa] = poolSortKey(a);
|
|
119
|
-
const [pb, sb] = poolSortKey(b);
|
|
120
|
-
if (pa !== pb) return pa - pb;
|
|
121
|
-
return sa < sb ? -1 : sa > sb ? 1 : 0;
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
const slotMap = new Map<number, number>(); // virtId -> slot
|
|
125
|
-
fnSlotMaps.set(fnId, slotMap);
|
|
126
|
-
|
|
127
|
-
// nextSlot is the high-water mark: the next fresh slot to allocate.
|
|
128
|
-
// It is shared across all pools so each pool's slots start above the
|
|
129
|
-
// previous pool's maximum slot.
|
|
130
|
-
let nextSlot = 0;
|
|
131
|
-
|
|
132
|
-
for (const poolKey of sortedPoolKeys) {
|
|
133
|
-
const regs = pools.get(poolKey)!;
|
|
134
|
-
|
|
135
|
-
if (poolKey === "local::") {
|
|
136
|
-
// ── Local pool: virtual-id order, no reuse ────────────────────────
|
|
137
|
-
// Params must be at the lowest slots (written by the runtime at call
|
|
138
|
-
// time); upvalue captures must keep their slot for the frame's lifetime.
|
|
139
|
-
regs.sort((a, b) => a.id - b.id);
|
|
140
|
-
for (const reg of regs) {
|
|
141
|
-
slotMap.set(reg.id, nextSlot++);
|
|
142
|
-
}
|
|
143
|
-
} else {
|
|
144
|
-
// ── Non-local pool: firstUse order, linear-scan reuse ─────────────
|
|
145
|
-
regs.sort((a, b) => a.firstUse - b.firstUse);
|
|
146
|
-
|
|
147
|
-
// freeList entries: { slot, freeAt } where freeAt = lastUse of current
|
|
148
|
-
// occupant. A slot becomes available when freeAt < next reg's firstUse.
|
|
149
|
-
const freeList: Array<{ slot: number; freeAt: number }> = [];
|
|
150
|
-
|
|
151
|
-
for (const reg of regs) {
|
|
152
|
-
// Find the lowest-numbered slot whose last occupant has ended.
|
|
153
|
-
let bestSlot = -1;
|
|
154
|
-
let bestIdx = -1;
|
|
155
|
-
for (let k = 0; k < freeList.length; k++) {
|
|
156
|
-
if (freeList[k].freeAt < reg.firstUse) {
|
|
157
|
-
if (bestSlot === -1 || freeList[k].slot < bestSlot) {
|
|
158
|
-
bestSlot = freeList[k].slot;
|
|
159
|
-
bestIdx = k;
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
let assignedSlot: number;
|
|
165
|
-
if (bestIdx !== -1) {
|
|
166
|
-
assignedSlot = bestSlot;
|
|
167
|
-
freeList.splice(bestIdx, 1);
|
|
168
|
-
} else {
|
|
169
|
-
assignedSlot = nextSlot++;
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
slotMap.set(reg.id, assignedSlot);
|
|
173
|
-
freeList.push({ slot: assignedSlot, freeAt: reg.lastUse });
|
|
174
|
-
}
|
|
175
|
-
// nextSlot already reflects the high-water mark; reused slots are
|
|
176
|
-
// always < nextSlot by construction.
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
// ── Pass 3: patch register operands ──────────────────────────────────────
|
|
182
|
-
for (const instr of bc) {
|
|
183
|
-
for (let i = 1; i < instr.length; i++) {
|
|
184
|
-
const op = instr[i] as any;
|
|
185
|
-
if (!op || typeof op !== "object") continue;
|
|
186
|
-
if (op.type === "register") {
|
|
187
|
-
op.resolvedValue = fnSlotMaps.get(op.fnId)?.get(op.id);
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// ── Pass 4: set regCount on each FnDescriptor ─────────────────────────────
|
|
193
|
-
// regCount = max concrete slot used + 1 (not sum of virtual-register counts).
|
|
194
|
-
for (const desc of compiler.fnDescriptors) {
|
|
195
|
-
const fnId = desc._fnIdx!;
|
|
196
|
-
const slotMap = fnSlotMaps.get(fnId);
|
|
197
|
-
let regCount = 0;
|
|
198
|
-
if (slotMap) {
|
|
199
|
-
for (const slot of slotMap.values()) {
|
|
200
|
-
if (slot + 1 > regCount) regCount = slot + 1;
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
desc.regCount = regCount;
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
compiler.mainRegCount = compiler.mainFn?.regCount ?? 0;
|
|
207
|
-
|
|
208
|
-
// ── Pass 5: patch fnRegCount operands ────────────────────────────────────
|
|
209
|
-
for (const instr of bc) {
|
|
210
|
-
for (let i = 1; i < instr.length; i++) {
|
|
211
|
-
const op = instr[i] as any;
|
|
212
|
-
if (!op || typeof op !== "object") continue;
|
|
213
|
-
if (op.type === "fnRegCount") {
|
|
214
|
-
const desc = compiler.fnDescriptors[op.fnId];
|
|
215
|
-
op.resolvedValue = desc?.regCount ?? 0;
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
return { bytecode: bc };
|
|
221
|
-
}
|