js-confuser-vm 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +55 -0
- package/README.MD +101 -37
- package/dist/build-runtime.js +15 -2
- package/dist/compiler.js +98 -51
- package/dist/runtime.js +5 -1
- package/dist/transforms/bytecode/aliasedOpcodes.js +2 -8
- package/dist/transforms/bytecode/macroOpcodes.js +21 -19
- package/dist/transforms/bytecode/microOpcodes.js +236 -0
- package/dist/transforms/bytecode/resolveContants.js +5 -11
- package/dist/transforms/bytecode/resolveLabels.js +5 -3
- package/dist/transforms/bytecode/specializedOpcodes.js +21 -16
- package/dist/transforms/runtime/internalVariables.js +202 -0
- package/dist/transforms/runtime/macroOpcodes.js +30 -18
- package/dist/transforms/runtime/microOpcodes.js +76 -0
- package/dist/transforms/runtime/specializedOpcodes.js +20 -18
- package/dist/utils/op-utils.js +15 -8
- package/index.ts +3 -2
- package/jest.config.js +2 -0
- package/package.json +1 -1
- package/src/build-runtime.ts +18 -3
- package/src/compiler.ts +152 -65
- package/src/options.ts +1 -0
- package/src/runtime.ts +5 -1
- package/src/transforms/bytecode/aliasedOpcodes.ts +2 -12
- package/src/transforms/bytecode/macroOpcodes.ts +28 -29
- package/src/transforms/bytecode/microOpcodes.ts +291 -0
- package/src/transforms/bytecode/resolveContants.ts +6 -13
- package/src/transforms/bytecode/resolveLabels.ts +5 -4
- package/src/transforms/bytecode/specializedOpcodes.ts +38 -28
- package/src/transforms/runtime/internalVariables.ts +270 -0
- package/src/transforms/runtime/macroOpcodes.ts +47 -20
- package/src/transforms/runtime/microOpcodes.ts +93 -0
- package/src/transforms/runtime/specializedOpcodes.ts +27 -32
- package/src/types.ts +1 -1
- package/src/utils/op-utils.ts +21 -8
- package/src/utilts.ts +0 -3
package/src/runtime.ts
CHANGED
|
@@ -82,6 +82,7 @@ function VM(bytecode, mainStartPc, mainRegCount, constants, globals) {
|
|
|
82
82
|
startPc: mainStartPc, // <- where main begins
|
|
83
83
|
};
|
|
84
84
|
this._currentFrame = new Frame(new Closure(mainFn), null, null, undefined, 0);
|
|
85
|
+
this._internals = {};
|
|
85
86
|
}
|
|
86
87
|
|
|
87
88
|
// Consume the next slot from the flat bytecode stream and advance the PC.
|
|
@@ -820,7 +821,10 @@ for (var k of Object.getOwnPropertyNames(globalThis)) {
|
|
|
820
821
|
// If a window object is in scope (browser or test harness), capture it
|
|
821
822
|
// explicitly so VM code can read/write window.TEST_OUTPUT etc.
|
|
822
823
|
if (typeof window !== "undefined") {
|
|
823
|
-
globals
|
|
824
|
+
globals.window = window;
|
|
825
|
+
for (var k of Object.getOwnPropertyNames(window)) {
|
|
826
|
+
globals[k] = window[k];
|
|
827
|
+
}
|
|
824
828
|
}
|
|
825
829
|
|
|
826
830
|
// Transfer common primitives
|
|
@@ -46,15 +46,6 @@ export function aliasedOpcodes(
|
|
|
46
46
|
baseOpValueToName.set(val as number, name);
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
// Collect all currently used opcode slots (base + any dynamically assigned)
|
|
50
|
-
const usedOpcodes = new Set<number>(
|
|
51
|
-
Object.keys(compiler.OP_NAME)
|
|
52
|
-
.map((k) => parseInt(k, 10))
|
|
53
|
-
.filter((v) => !isNaN(v)),
|
|
54
|
-
);
|
|
55
|
-
|
|
56
|
-
if (usedOpcodes.size > U16_MAX) return { bytecode: bc };
|
|
57
|
-
|
|
58
49
|
// ── Step 1: count frequency and determine arity for each eligible base opcode ─
|
|
59
50
|
// We scan the actual post-transform bytecode so frequency reflects what's
|
|
60
51
|
// really left (specialized/macro ops already consumed their share).
|
|
@@ -92,7 +83,7 @@ export function aliasedOpcodes(
|
|
|
92
83
|
const aliasedOps: Compiler["ALIASED_OPS"] = {};
|
|
93
84
|
|
|
94
85
|
for (const [originalOp, stats] of candidates) {
|
|
95
|
-
const aliasOp = nextFreeSlot(
|
|
86
|
+
const aliasOp = nextFreeSlot(compiler);
|
|
96
87
|
if (aliasOp === -1) break;
|
|
97
88
|
|
|
98
89
|
const arity = stats.arity!;
|
|
@@ -116,8 +107,7 @@ export function aliasedOpcodes(
|
|
|
116
107
|
aliasMap.set(originalOp, aliasOp);
|
|
117
108
|
aliasedOps[aliasOp] = { originalOp, order };
|
|
118
109
|
|
|
119
|
-
const originalName =
|
|
120
|
-
compiler.OP_NAME[originalOp] ?? `OP_${originalOp}`;
|
|
110
|
+
const originalName = compiler.OP_NAME[originalOp] ?? `OP_${originalOp}`;
|
|
121
111
|
compiler.OP_NAME[aliasOp] = `ALIAS_${originalName}_${order.join("_")}`;
|
|
122
112
|
}
|
|
123
113
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { Bytecode, Instruction } from "../../types.ts";
|
|
2
2
|
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
3
|
import { nextFreeSlot, U16_MAX } from "../../utils/op-utils.ts";
|
|
4
|
+
import { ok } from "node:assert";
|
|
4
5
|
|
|
5
6
|
// Opcodes that must not appear in a non-terminal position inside a macro window.
|
|
6
7
|
// Jump ops: modifying frame._pc mid-execution causes the macro handler to
|
|
@@ -21,8 +22,7 @@ import { nextFreeSlot, U16_MAX } from "../../utils/op-utils.ts";
|
|
|
21
22
|
// Algorithm:
|
|
22
23
|
// 1. Count every eligible window of length 2–5 by its op-code signature.
|
|
23
24
|
// 2. Keep sequences that appear >= 2 times; sort by frequency then length.
|
|
24
|
-
// 3.
|
|
25
|
-
// to the most-frequent candidates and store in compiler.MACRO_OPS.
|
|
25
|
+
// 3. Use nextFreeSlot() to assign a new opcode to each of the best candidates
|
|
26
26
|
// 4. Re-scan bytecode, replacing each matched sequence with a single
|
|
27
27
|
// multi-operand instruction:
|
|
28
28
|
// [macroOpCode, operands_of_instr_0..., operands_of_instr_1..., ...]
|
|
@@ -38,43 +38,39 @@ export function macroOpcodes(
|
|
|
38
38
|
originalOpToName.set(opVal, name);
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
// Names are used instead of codes as specialized opcodes may generate based off these and it should not be considered eligible still
|
|
42
|
+
const alwaysExcluded = [
|
|
43
|
+
"PATCH",
|
|
44
|
+
"TRY_SETUP",
|
|
45
|
+
"TRY_END",
|
|
46
|
+
"DEBUGGER",
|
|
47
|
+
"MAKE_CLOSURE",
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
const nonTerminalExcluded = ["RETURN", "CALL", "CALL_METHOD", "NEW", "THROW"];
|
|
51
|
+
|
|
41
52
|
function isEligible(
|
|
42
53
|
op: number | null,
|
|
43
54
|
compiler: Compiler,
|
|
44
55
|
isLast: boolean = false,
|
|
45
56
|
): boolean {
|
|
46
57
|
if (op === null) return false;
|
|
47
|
-
const { OP, JUMP_OPS } = compiler;
|
|
58
|
+
const { OP, JUMP_OPS, OP_NAME } = compiler;
|
|
48
59
|
// Infrastructure and variable-length ops are never eligible.
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
OP.DEBUGGER,
|
|
54
|
-
OP.MAKE_CLOSURE, // variable-length operands — cannot generate a static handler
|
|
55
|
-
]);
|
|
56
|
-
if (alwaysExcluded.has(op)) return false;
|
|
60
|
+
const opName = OP_NAME[op];
|
|
61
|
+
ok(opName, `Unknown opcode ${op} (not in OP_NAME)`);
|
|
62
|
+
if (alwaysExcluded.find((name) => opName.includes(name))) return false;
|
|
63
|
+
|
|
57
64
|
// Jump and frame-changing ops are only eligible as the terminal instruction.
|
|
58
65
|
if (!isLast) {
|
|
59
66
|
if (JUMP_OPS.has(op)) return false;
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
OP.CALL_METHOD,
|
|
64
|
-
OP.NEW,
|
|
65
|
-
OP.THROW,
|
|
66
|
-
]);
|
|
67
|
-
if (nonTerminalExcluded.has(op)) return false;
|
|
67
|
+
|
|
68
|
+
if (nonTerminalExcluded.find((name) => opName.includes(name)))
|
|
69
|
+
return false;
|
|
68
70
|
}
|
|
69
|
-
return
|
|
71
|
+
return OP_NAME[op] !== undefined;
|
|
70
72
|
}
|
|
71
73
|
|
|
72
|
-
// Collect every opcode value already in use so we can find free slots.
|
|
73
|
-
const usedOpcodes = new Set<number>(
|
|
74
|
-
Object.values(compiler.OP).filter((v) => v !== undefined) as number[],
|
|
75
|
-
);
|
|
76
|
-
if (usedOpcodes.size > U16_MAX) return { bytecode: bc };
|
|
77
|
-
|
|
78
74
|
// ── Step 1: count window frequencies ──────────────────────────────────────
|
|
79
75
|
const freqMap = new Map<string, { ops: number[]; count: number }>();
|
|
80
76
|
|
|
@@ -85,7 +81,8 @@ export function macroOpcodes(
|
|
|
85
81
|
const ops: number[] = [];
|
|
86
82
|
let valid = true;
|
|
87
83
|
for (let j = 0; j < len; j++) {
|
|
88
|
-
const
|
|
84
|
+
const instr = bc[i + j];
|
|
85
|
+
const op = instr[0];
|
|
89
86
|
const isLast = j === len - 1;
|
|
90
87
|
if (!isEligible(op, compiler, isLast)) {
|
|
91
88
|
valid = false;
|
|
@@ -116,7 +113,7 @@ export function macroOpcodes(
|
|
|
116
113
|
|
|
117
114
|
// ── Step 3: assign free opcode slots to the best candidates ───────────────
|
|
118
115
|
for (let i = 0; i < candidates.length; i++) {
|
|
119
|
-
const macroOp = nextFreeSlot(
|
|
116
|
+
const macroOp = nextFreeSlot(compiler);
|
|
120
117
|
if (macroOp === -1) break;
|
|
121
118
|
const ops = candidates[i].ops;
|
|
122
119
|
compiler.MACRO_OPS[macroOp] = ops;
|
|
@@ -169,7 +166,9 @@ export function macroOpcodes(
|
|
|
169
166
|
// Each instruction contributes instr.slice(1) — zero or more operands.
|
|
170
167
|
const allOperands: any[] = [];
|
|
171
168
|
for (let j = 0; j < len; j++) {
|
|
172
|
-
|
|
169
|
+
var instr = bc[i + j];
|
|
170
|
+
var operands = instr.slice(1);
|
|
171
|
+
allOperands.push(...operands);
|
|
173
172
|
}
|
|
174
173
|
|
|
175
174
|
const newInstr: Instruction = [macroOpCode, ...allOperands];
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import { parse } from "@babel/parser";
|
|
2
|
+
import traverseImport from "@babel/traverse";
|
|
3
|
+
import * as t from "@babel/types";
|
|
4
|
+
import { ok } from "assert";
|
|
5
|
+
import { Compiler, VM_RUNTIME, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
6
|
+
import type { Bytecode, Instruction } from "../../types.ts";
|
|
7
|
+
import { nextFreeSlot } from "../../utils/op-utils.ts";
|
|
8
|
+
import { nSizedOps } from "./specializedOpcodes.ts";
|
|
9
|
+
import generate from "@babel/generator";
|
|
10
|
+
|
|
11
|
+
const traverse = (traverseImport.default ||
|
|
12
|
+
traverseImport) as typeof traverseImport.default;
|
|
13
|
+
|
|
14
|
+
// Extract the real statement list from a SwitchCase consequent.
|
|
15
|
+
function extractCaseBody(switchCase: t.SwitchCase): t.Statement[] {
|
|
16
|
+
let stmts: t.Statement[];
|
|
17
|
+
if (
|
|
18
|
+
switchCase.consequent.length === 1 &&
|
|
19
|
+
t.isBlockStatement(switchCase.consequent[0])
|
|
20
|
+
) {
|
|
21
|
+
stmts = (switchCase.consequent[0] as t.BlockStatement).body;
|
|
22
|
+
} else {
|
|
23
|
+
stmts = switchCase.consequent as t.Statement[];
|
|
24
|
+
}
|
|
25
|
+
return stmts.filter((s) => !t.isBreakStatement(s) && !t.isEmptyStatement(s));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Count how many IR-level operands a single statement consumes.
|
|
29
|
+
// Returns null if the statement is ineligible (contains a loop, or has
|
|
30
|
+
// _operand()/_constant() calls inside a conditional branch).
|
|
31
|
+
function countStatementOperands(stmt: t.Statement): number | null {
|
|
32
|
+
let count = 0;
|
|
33
|
+
let ineligible = false;
|
|
34
|
+
|
|
35
|
+
const file = t.file(t.program([t.cloneNode(stmt, true) as t.Statement]));
|
|
36
|
+
|
|
37
|
+
traverse(file, {
|
|
38
|
+
enter(path) {
|
|
39
|
+
if (ineligible) {
|
|
40
|
+
path.stop();
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const nodeType = path.node.type;
|
|
45
|
+
|
|
46
|
+
// Don't traverse into nested functions
|
|
47
|
+
if (
|
|
48
|
+
nodeType === "FunctionDeclaration" ||
|
|
49
|
+
nodeType === "FunctionExpression" ||
|
|
50
|
+
nodeType === "ArrowFunctionExpression"
|
|
51
|
+
) {
|
|
52
|
+
path.skip();
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Count _operand() and _constant() calls
|
|
57
|
+
if (nodeType === "CallExpression") {
|
|
58
|
+
const call = path.node as t.CallExpression;
|
|
59
|
+
const callee = call.callee;
|
|
60
|
+
if (
|
|
61
|
+
t.isMemberExpression(callee) &&
|
|
62
|
+
t.isThisExpression(callee.object) &&
|
|
63
|
+
t.isIdentifier(callee.property)
|
|
64
|
+
) {
|
|
65
|
+
const name = (callee.property as t.Identifier).name;
|
|
66
|
+
const operandsConsumed =
|
|
67
|
+
name === "_operand" ? 1 : name === "_constant" ? 2 : null;
|
|
68
|
+
|
|
69
|
+
if (operandsConsumed) {
|
|
70
|
+
// You are not allowed to use _operand() in loops or branches
|
|
71
|
+
const ancestors = path.getAncestry();
|
|
72
|
+
|
|
73
|
+
if (
|
|
74
|
+
ancestors.find(
|
|
75
|
+
(t) =>
|
|
76
|
+
t.isLoop() ||
|
|
77
|
+
t.isIfStatement() ||
|
|
78
|
+
t.isSwitchStatement() ||
|
|
79
|
+
t.isConditionalExpression() ||
|
|
80
|
+
t.isLogicalExpression(),
|
|
81
|
+
)
|
|
82
|
+
) {
|
|
83
|
+
ineligible = true;
|
|
84
|
+
path.stop();
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
count += operandsConsumed;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
return ineligible ? null : count;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Analyse the VM runtime's @SWITCH statement to build a per-opcode map of
|
|
99
|
+
// { stmtIndex → irOperandCount } for every case that can be split.
|
|
100
|
+
// Returns a map: opValue → array of per-statement operand counts (null if ineligible).
|
|
101
|
+
function analyzeRuntimeCases(compiler: Compiler): Map<number, number[]> {
|
|
102
|
+
// Parse the runtime source
|
|
103
|
+
const ast = parse(VM_RUNTIME, { sourceType: "unambiguous" });
|
|
104
|
+
|
|
105
|
+
// Build reverse name→opValue map from original OPs only
|
|
106
|
+
const nameToOp = new Map<string, number>();
|
|
107
|
+
for (const [name, val] of Object.entries(compiler.OP)) {
|
|
108
|
+
if (val !== undefined) nameToOp.set(name, val as number);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
let switchStatement: t.SwitchStatement | null = null;
|
|
112
|
+
traverse(ast, {
|
|
113
|
+
SwitchStatement(path) {
|
|
114
|
+
if (path.node.leadingComments?.some((c) => c.value.includes("@SWITCH"))) {
|
|
115
|
+
switchStatement = path.node;
|
|
116
|
+
path.stop();
|
|
117
|
+
}
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
ok(switchStatement, "Could not find @SWITCH statement for micro opcodes");
|
|
122
|
+
|
|
123
|
+
const result = new Map<number, number[]>();
|
|
124
|
+
|
|
125
|
+
for (const sc of (switchStatement as t.SwitchStatement).cases) {
|
|
126
|
+
const test = sc.test;
|
|
127
|
+
if (
|
|
128
|
+
!test ||
|
|
129
|
+
!t.isMemberExpression(test) ||
|
|
130
|
+
!t.isIdentifier(test.object, { name: "OP" }) ||
|
|
131
|
+
!t.isIdentifier(test.property)
|
|
132
|
+
) {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const opName = (test.property as t.Identifier).name;
|
|
137
|
+
const opVal = nameToOp.get(opName);
|
|
138
|
+
if (opVal === undefined) continue;
|
|
139
|
+
|
|
140
|
+
const stmts = extractCaseBody(sc);
|
|
141
|
+
if (stmts.length < 2) continue; // need at least 2 statements to split
|
|
142
|
+
|
|
143
|
+
const counts: number[] = [];
|
|
144
|
+
let allEligible = true;
|
|
145
|
+
|
|
146
|
+
// Banned patterns:
|
|
147
|
+
// Return statements (Control flow isn't remembered)
|
|
148
|
+
traverse(t.file(t.program(stmts)), {
|
|
149
|
+
ReturnStatement(path) {
|
|
150
|
+
path.stop();
|
|
151
|
+
allEligible = false;
|
|
152
|
+
},
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
for (const stmt of stmts) {
|
|
156
|
+
const c = countStatementOperands(stmt);
|
|
157
|
+
if (c === null) {
|
|
158
|
+
allEligible = false;
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
if (t.isDebuggerStatement(stmt) || t.isThrowStatement(stmt)) {
|
|
162
|
+
allEligible = false;
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
counts.push(c);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (!allEligible) continue;
|
|
169
|
+
|
|
170
|
+
// Verify that the total operand count matches the instruction size expectation
|
|
171
|
+
// (just store for now; bytecode pass validates operands match)
|
|
172
|
+
result.set(opVal, counts);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return result;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Main bytecode transform: split frequently-used opcodes into per-statement
|
|
179
|
+
// micro-opcodes so each sub-instruction is as small as possible.
|
|
180
|
+
export function microOpcodes(
|
|
181
|
+
bc: Bytecode,
|
|
182
|
+
compiler: Compiler,
|
|
183
|
+
): { bytecode: Bytecode } {
|
|
184
|
+
// ── Step 1: analyse runtime to discover splittable opcodes ──────────────────
|
|
185
|
+
const opAnalysis = analyzeRuntimeCases(compiler);
|
|
186
|
+
if (opAnalysis.size === 0) return { bytecode: bc };
|
|
187
|
+
|
|
188
|
+
// ── Step 2: count opcode frequency in bytecode ────────────────────────────
|
|
189
|
+
const disallowedOps = new Set(nSizedOps.map((name) => compiler.OP[name]));
|
|
190
|
+
|
|
191
|
+
disallowedOps.add(compiler.OP.RETURN);
|
|
192
|
+
|
|
193
|
+
const freqMap = new Map<number, number>();
|
|
194
|
+
for (const instr of bc) {
|
|
195
|
+
const op = instr[0];
|
|
196
|
+
if (op === null || !opAnalysis.has(op) || disallowedOps.has(op)) continue;
|
|
197
|
+
freqMap.set(op, (freqMap.get(op) ?? 0) + 1);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// ── Step 3: sort by frequency, keep opcodes that actually appear ─────────
|
|
201
|
+
const candidates = Array.from(freqMap.entries())
|
|
202
|
+
.filter(([, count]) => count >= 1)
|
|
203
|
+
.sort(([, a], [, b]) => b - a)
|
|
204
|
+
.map(([op]) => op);
|
|
205
|
+
|
|
206
|
+
if (candidates.length === 0) return { bytecode: bc };
|
|
207
|
+
|
|
208
|
+
// ── Step 4: assign free opcode slots for each sub-statement ─────────────
|
|
209
|
+
// Build: originalOp → [{ microOp, irOperandCount }, ...]
|
|
210
|
+
const originalToSubOps = new Map<
|
|
211
|
+
number,
|
|
212
|
+
{ microOp: number; irOperandCount: number }[]
|
|
213
|
+
>();
|
|
214
|
+
|
|
215
|
+
for (const origOp of candidates) {
|
|
216
|
+
const stmtCounts = opAnalysis.get(origOp)!;
|
|
217
|
+
|
|
218
|
+
// Pre-allocate all needed slots; if any slot is unavailable, skip this op.
|
|
219
|
+
const slots: number[] = [];
|
|
220
|
+
for (let si = 0; si < stmtCounts.length; si++) {
|
|
221
|
+
const slot = nextFreeSlot(compiler);
|
|
222
|
+
if (slot === -1) break;
|
|
223
|
+
|
|
224
|
+
compiler.OP_NAME[slot] = `MICRO_${origOp}_${si}`;
|
|
225
|
+
slots.push(slot);
|
|
226
|
+
}
|
|
227
|
+
if (slots.length !== stmtCounts.length) continue;
|
|
228
|
+
|
|
229
|
+
const subOps: { microOp: number; irOperandCount: number }[] = [];
|
|
230
|
+
const origName = compiler.OP_NAME[origOp] ?? `OP_${origOp}`;
|
|
231
|
+
|
|
232
|
+
for (let si = 0; si < stmtCounts.length; si++) {
|
|
233
|
+
const microOp = slots[si];
|
|
234
|
+
const irOperandCount = stmtCounts[si];
|
|
235
|
+
subOps.push({ microOp, irOperandCount });
|
|
236
|
+
|
|
237
|
+
compiler.OP_NAME[microOp] = `MICRO_${origName}_${si}`;
|
|
238
|
+
compiler.MICRO_OPS[microOp] = {
|
|
239
|
+
originalOp: origOp,
|
|
240
|
+
stmtIndex: si,
|
|
241
|
+
irOperandCount,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
originalToSubOps.set(origOp, subOps);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (originalToSubOps.size === 0) return { bytecode: bc };
|
|
249
|
+
|
|
250
|
+
// ── Step 5: replace each matched instruction with sub-instructions ────────
|
|
251
|
+
const result: Bytecode = [];
|
|
252
|
+
|
|
253
|
+
for (const instr of bc) {
|
|
254
|
+
const op = instr[0];
|
|
255
|
+
if (op === null || !originalToSubOps.has(op)) {
|
|
256
|
+
result.push(instr);
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const subOps = originalToSubOps.get(op)!;
|
|
261
|
+
const operands = instr.slice(1); // all operands of the original instruction
|
|
262
|
+
|
|
263
|
+
// Verify total operand count matches sum of sub-op IR operand counts
|
|
264
|
+
const expectedTotal = subOps.reduce(
|
|
265
|
+
(s, { irOperandCount }) => s + irOperandCount,
|
|
266
|
+
0,
|
|
267
|
+
);
|
|
268
|
+
if (operands.length !== expectedTotal) {
|
|
269
|
+
throw new Error(
|
|
270
|
+
`Operand count mismatch for opcode ${compiler.OP_NAME[op]}`,
|
|
271
|
+
);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Split operands among sub-instructions
|
|
275
|
+
let offset = 0;
|
|
276
|
+
for (const { microOp, irOperandCount } of subOps) {
|
|
277
|
+
const subOperands = operands.slice(offset, offset + irOperandCount);
|
|
278
|
+
offset += irOperandCount;
|
|
279
|
+
|
|
280
|
+
const newInstr: Instruction = [microOp, ...subOperands];
|
|
281
|
+
// Carry source-node info on the first sub-instruction
|
|
282
|
+
if (offset === irOperandCount) {
|
|
283
|
+
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
result.push(newInstr);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
return { bytecode: result };
|
|
291
|
+
}
|
|
@@ -19,8 +19,7 @@ function concealString(s: string, key: number): string {
|
|
|
19
19
|
return Buffer.from(bytes).toString("base64");
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
// Resolve all {type:"constant", value}
|
|
23
|
-
// [constPoolIndex, concealKey]
|
|
22
|
+
// Resolve all {type:"constant", value} (index) and {type:"constant", value, key: true} (key) operands
|
|
24
23
|
//
|
|
25
24
|
// constPoolIndex — index into the constants array (as before).
|
|
26
25
|
// concealKey — XOR key used to conceal this constant.
|
|
@@ -43,8 +42,6 @@ export function resolveConstants(
|
|
|
43
42
|
const keyMap = new Map<number, number>(); // pool index → conceal key
|
|
44
43
|
|
|
45
44
|
function intern(operand: b.InstrOperand): [b.InstrOperand, number] {
|
|
46
|
-
const operandAsObject =
|
|
47
|
-
typeof operand === "object" && operand ? operand : {};
|
|
48
45
|
const value = (operand as any).value;
|
|
49
46
|
|
|
50
47
|
let idx = constantsMap.get(value);
|
|
@@ -79,13 +76,11 @@ export function resolveConstants(
|
|
|
79
76
|
}
|
|
80
77
|
|
|
81
78
|
const idxOperand: any = {
|
|
82
|
-
...(operandAsObject as object),
|
|
83
79
|
type: "number",
|
|
84
80
|
resolvedValue: idx,
|
|
85
81
|
};
|
|
86
82
|
|
|
87
83
|
const keyOperand: any = {
|
|
88
|
-
...(operandAsObject as object),
|
|
89
84
|
type: "number",
|
|
90
85
|
resolvedValue: key,
|
|
91
86
|
};
|
|
@@ -108,19 +103,17 @@ export function resolveConstants(
|
|
|
108
103
|
|
|
109
104
|
if (hasConstant) {
|
|
110
105
|
// 1-to-2 expansion: each {type:"constant"} becomes [constIdx, concealKey].
|
|
111
|
-
const newOperands: b.InstrOperand[] =
|
|
112
|
-
for (const operand of operands) {
|
|
106
|
+
const newOperands: b.InstrOperand[] = operands.map((operand) => {
|
|
113
107
|
if ((operand as any)?.type === "constant") {
|
|
114
108
|
const [idxOperand, key] = intern(operand);
|
|
115
|
-
|
|
116
109
|
const newOperand = (operand as any)?.key ? key : idxOperand;
|
|
117
110
|
|
|
118
|
-
|
|
119
|
-
// newOperands.push(key); // plain number — serialized as a regular u16 slot
|
|
111
|
+
return Object.assign(operand, newOperand);
|
|
120
112
|
} else {
|
|
121
|
-
|
|
113
|
+
return operand;
|
|
122
114
|
}
|
|
123
|
-
}
|
|
115
|
+
});
|
|
116
|
+
|
|
124
117
|
const newInstr = [op, ...newOperands] as b.Instruction;
|
|
125
118
|
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
126
119
|
resolved.push(newInstr);
|
|
@@ -76,15 +76,16 @@ export function resolveLabels(
|
|
|
76
76
|
if (pc === undefined)
|
|
77
77
|
throw new Error(`Undefined label: ${(operand as any).label}`);
|
|
78
78
|
|
|
79
|
-
var operandAsObject =
|
|
80
|
-
typeof operand === "object" && operand ? operand : {};
|
|
81
|
-
|
|
82
79
|
const newOperand = {
|
|
83
|
-
...operandAsObject, // Preverse original operand properties
|
|
84
80
|
type: "number",
|
|
85
81
|
resolvedValue: pc + ((operand as any).offset ?? 0),
|
|
86
82
|
};
|
|
87
83
|
|
|
84
|
+
// Mutate original object so that references are also updated
|
|
85
|
+
if (typeof operand === "object" && operand !== null) {
|
|
86
|
+
return Object.assign(operand, newOperand);
|
|
87
|
+
}
|
|
88
|
+
|
|
88
89
|
return newOperand;
|
|
89
90
|
}
|
|
90
91
|
return operand;
|
|
@@ -5,6 +5,17 @@ import {
|
|
|
5
5
|
nextFreeSlot,
|
|
6
6
|
U16_MAX,
|
|
7
7
|
} from "../../utils/op-utils.ts";
|
|
8
|
+
import * as t from "@babel/types";
|
|
9
|
+
import * as b from "../../types.ts";
|
|
10
|
+
|
|
11
|
+
export const nSizedOps = [
|
|
12
|
+
"MAKE_CLOSURE",
|
|
13
|
+
"BUILD_ARRAY",
|
|
14
|
+
"BUILD_OBJECT",
|
|
15
|
+
"CALL",
|
|
16
|
+
"CALL_METHOD",
|
|
17
|
+
"NEW",
|
|
18
|
+
];
|
|
8
19
|
|
|
9
20
|
// Creates specialized opcodes for the most frequent (OPCODE + single_integer_operand) pairs.
|
|
10
21
|
// Example: [OP.LOAD_CONST, 1] becomes [SPECIALIZED_LOAD_CONST_1].
|
|
@@ -15,23 +26,7 @@ export function specializedOpcodes(
|
|
|
15
26
|
bc: Bytecode,
|
|
16
27
|
compiler: Compiler,
|
|
17
28
|
): { bytecode: Bytecode } {
|
|
18
|
-
const disallowedOps = new Set([
|
|
19
|
-
compiler.OP.MAKE_CLOSURE,
|
|
20
|
-
compiler.OP.BUILD_ARRAY,
|
|
21
|
-
compiler.OP.BUILD_OBJECT,
|
|
22
|
-
compiler.OP.CALL,
|
|
23
|
-
compiler.OP.CALL_METHOD,
|
|
24
|
-
compiler.OP.NEW,
|
|
25
|
-
]);
|
|
26
|
-
|
|
27
|
-
// ── Collect used opcodes exactly as specified ─────────────────────────────
|
|
28
|
-
const usedOpcodes = new Set<number>(
|
|
29
|
-
Object.keys(compiler.OP_NAME)
|
|
30
|
-
.map((k) => parseInt(k, 10))
|
|
31
|
-
.filter((v) => !isNaN(v)) as number[],
|
|
32
|
-
);
|
|
33
|
-
|
|
34
|
-
if (usedOpcodes.size > U16_MAX) return { bytecode: bc };
|
|
29
|
+
const disallowedOps = new Set(nSizedOps.map((name) => compiler.OP[name]));
|
|
35
30
|
|
|
36
31
|
// ── Step 1: count frequency of eligible (op, operand) pairs ───────────────
|
|
37
32
|
const freqMap = new Map<
|
|
@@ -52,7 +47,22 @@ export function specializedOpcodes(
|
|
|
52
47
|
const operandCount = getInstructionSize(instr) - 1;
|
|
53
48
|
if (operandCount < 1 || operandCount > 6) continue;
|
|
54
49
|
|
|
55
|
-
|
|
50
|
+
// Convert numbers into operand objects so they can be modified elsewhere and preserved
|
|
51
|
+
const oldOperands = instr.slice(1);
|
|
52
|
+
const operands = oldOperands.map((operand) => {
|
|
53
|
+
if (typeof operand === "number") {
|
|
54
|
+
return {
|
|
55
|
+
type: "number",
|
|
56
|
+
value: operand,
|
|
57
|
+
resolvedValue: operand,
|
|
58
|
+
} as InstrOperand;
|
|
59
|
+
}
|
|
60
|
+
return operand;
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
instr.length = 1;
|
|
64
|
+
instr.push(...operands);
|
|
65
|
+
|
|
56
66
|
const operandsKey = JSON.stringify(operands);
|
|
57
67
|
|
|
58
68
|
const key = `${op},${operandsKey}`;
|
|
@@ -60,7 +70,12 @@ export function specializedOpcodes(
|
|
|
60
70
|
if (entry) {
|
|
61
71
|
entry.occurences++;
|
|
62
72
|
} else {
|
|
63
|
-
freqMap.set(key, {
|
|
73
|
+
freqMap.set(key, {
|
|
74
|
+
op,
|
|
75
|
+
operands,
|
|
76
|
+
operandsKey,
|
|
77
|
+
occurences: 1,
|
|
78
|
+
});
|
|
64
79
|
}
|
|
65
80
|
}
|
|
66
81
|
|
|
@@ -76,7 +91,7 @@ export function specializedOpcodes(
|
|
|
76
91
|
const specializedOps: Compiler["SPECIALIZED_OPS"] = {};
|
|
77
92
|
|
|
78
93
|
for (let i = 0; i < candidates.length; i++) {
|
|
79
|
-
const specialOp = nextFreeSlot(
|
|
94
|
+
const specialOp = nextFreeSlot(compiler);
|
|
80
95
|
if (specialOp === -1) break;
|
|
81
96
|
const { op: originalOp, operands, operandsKey } = candidates[i];
|
|
82
97
|
|
|
@@ -118,21 +133,16 @@ export function specializedOpcodes(
|
|
|
118
133
|
}
|
|
119
134
|
|
|
120
135
|
const newOperands = operands.map((operand) => {
|
|
121
|
-
const operandAsObject =
|
|
136
|
+
const operandAsObject: any =
|
|
122
137
|
typeof operand === "object" && operand
|
|
123
138
|
? operand
|
|
124
139
|
: {
|
|
125
140
|
type: "number",
|
|
126
|
-
value: operand,
|
|
127
141
|
resolvedValue: operand,
|
|
128
142
|
};
|
|
129
143
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
placeholder: true,
|
|
133
|
-
} as any as InstrOperand;
|
|
134
|
-
|
|
135
|
-
return newOperand;
|
|
144
|
+
operandAsObject.placeholder = true;
|
|
145
|
+
return operandAsObject;
|
|
136
146
|
});
|
|
137
147
|
|
|
138
148
|
const newInstr: Instruction = [specialOpCode, ...newOperands];
|