js-confuser-vm 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +125 -0
- package/LICENSE +21 -21
- package/README.MD +370 -190
- package/babel-plugin-inline-runtime.cjs +34 -0
- package/babel.config.json +23 -24
- package/index.ts +34 -28
- package/jest-strip-types.js +10 -10
- package/jest.config.js +35 -18
- package/package.json +50 -48
- package/src/build-runtime.ts +57 -0
- package/src/compiler.ts +2069 -1677
- package/src/index.ts +14 -13
- package/src/minify.ts +21 -21
- package/src/options.ts +14 -10
- package/src/runtime.ts +771 -645
- package/src/transforms/bytecode/macroOpcodes.ts +177 -0
- package/src/transforms/bytecode/resolveContants.ts +62 -0
- package/src/transforms/bytecode/resolveLabels.ts +107 -0
- package/src/transforms/bytecode/selfModifying.ts +121 -0
- package/src/transforms/bytecode/specializedOpcodes.ts +118 -0
- package/src/transforms/runtime/macroOpcodes.ts +111 -0
- package/src/transforms/runtime/minify.ts +1 -0
- package/src/transforms/runtime/shuffleOpcodes.ts +24 -0
- package/src/transforms/runtime/specializedOpcodes.ts +146 -0
- package/src/transforms/utils/op-utils.ts +26 -0
- package/src/{random.ts → transforms/utils/random-utils.ts} +31 -31
- package/src/types.ts +33 -0
- package/src/utilts.ts +3 -3
- package/tsconfig.json +12 -12
- package/dist/compiler.js +0 -1505
- package/dist/index.js +0 -9
- package/dist/minify.js +0 -18
- package/dist/minify_empty_externs.js +0 -4
- package/dist/options.js +0 -1
- package/dist/random.js +0 -27
- package/dist/runtime.js +0 -620
- package/dist/runtimeObf.js +0 -36
- package/dist/utilts.js +0 -3
- package/src/runtimeObf.ts +0 -48
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import type { Bytecode, Instruction } from "../../types.ts";
|
|
2
|
+
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
|
+
import { nextFreeSlot, U16_MAX } from "../utils/op-utils.ts";
|
|
4
|
+
|
|
5
|
+
// Opcodes that must not appear inside a macro window.
|
|
6
|
+
// Jump ops: modifying frame._pc mid-execution causes the macro handler to
|
|
7
|
+
// run subsequent sub-bodies even after the jump already fired.
|
|
8
|
+
// Frame-changing ops (CALL, CALL_METHOD, NEW, RETURN, THROW): push/pop call
|
|
9
|
+
// frames mid-macro, leaving the `frame` variable stale for later sub-bodies.
|
|
10
|
+
// Variable-operand ops (MAKE_CLOSURE): the number of _operand() calls depends
|
|
11
|
+
// on uvCount at runtime, so a static handler cannot be generated.
|
|
12
|
+
// Infrastructure ops (DATA, PATCH, TRY_SETUP, TRY_END, DEBUGGER):
|
|
13
|
+
// either illegal here or nonsensical to fold.
|
|
14
|
+
|
|
15
|
+
// Scan bytecode for repeating instruction sequences and fold them into
|
|
16
|
+
// macro opcodes. Runs after selfModifying but before resolveLabels so
|
|
17
|
+
// IR-ref operands (label/constant) are carried through transparently.
|
|
18
|
+
//
|
|
19
|
+
// Algorithm:
|
|
20
|
+
// 1. Count every eligible window of length 2–5 by its op-code signature.
|
|
21
|
+
// 2. Keep sequences that appear >= 2 times; sort by frequency then length.
|
|
22
|
+
// 3. Assign unused opcode values (0–255, not already claimed by compiler.OP)
|
|
23
|
+
// to the most-frequent candidates and store in compiler.MACRO_OPS.
|
|
24
|
+
// 4. Re-scan bytecode, replacing each matched sequence with a single
|
|
25
|
+
// multi-operand instruction:
|
|
26
|
+
// [macroOpCode, operands_of_instr_0..., operands_of_instr_1..., ...]
|
|
27
|
+
// The runtime macro handler inlines each sub-instruction body; those
|
|
28
|
+
// bodies call this._operand() themselves to consume the inline operands.
|
|
29
|
+
export function macroOpcodes(
|
|
30
|
+
bc: Bytecode,
|
|
31
|
+
compiler: Compiler,
|
|
32
|
+
): { bytecode: Bytecode } {
|
|
33
|
+
const originalOpToName = new Map<number, string>();
|
|
34
|
+
for (const name in compiler.OP) {
|
|
35
|
+
const opVal = compiler.OP[name];
|
|
36
|
+
originalOpToName.set(opVal, name);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function isEligible(op: number | null, compiler: Compiler): boolean {
|
|
40
|
+
if (op === null) return false;
|
|
41
|
+
const { OP, JUMP_OPS } = compiler;
|
|
42
|
+
if (JUMP_OPS.has(op)) return false;
|
|
43
|
+
const excluded = new Set<number | undefined>([
|
|
44
|
+
OP.RETURN,
|
|
45
|
+
OP.PATCH,
|
|
46
|
+
OP.TRY_SETUP,
|
|
47
|
+
OP.TRY_END,
|
|
48
|
+
OP.DEBUGGER,
|
|
49
|
+
OP.CALL,
|
|
50
|
+
OP.CALL_METHOD,
|
|
51
|
+
OP.NEW,
|
|
52
|
+
OP.THROW,
|
|
53
|
+
OP.MAKE_CLOSURE, // variable-length operands — cannot generate a static handler
|
|
54
|
+
]);
|
|
55
|
+
return !excluded.has(op) && originalOpToName.has(op); // Only original Ops are eligible (specialized disallowed)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Collect every opcode value already in use so we can find free slots.
|
|
59
|
+
const usedOpcodes = new Set<number>(
|
|
60
|
+
Object.values(compiler.OP).filter((v) => v !== undefined) as number[],
|
|
61
|
+
);
|
|
62
|
+
if (usedOpcodes.size > U16_MAX) return { bytecode: bc };
|
|
63
|
+
|
|
64
|
+
// ── Step 1: count window frequencies ──────────────────────────────────────
|
|
65
|
+
const freqMap = new Map<string, { ops: number[]; count: number }>();
|
|
66
|
+
|
|
67
|
+
for (let i = 0; i < bc.length; i++) {
|
|
68
|
+
for (let len = 2; len <= 5; len++) {
|
|
69
|
+
if (i + len > bc.length) break;
|
|
70
|
+
|
|
71
|
+
const ops: number[] = [];
|
|
72
|
+
let valid = true;
|
|
73
|
+
for (let j = 0; j < len; j++) {
|
|
74
|
+
const op = bc[i + j][0];
|
|
75
|
+
if (!isEligible(op, compiler)) {
|
|
76
|
+
valid = false;
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
ops.push(op as number);
|
|
80
|
+
}
|
|
81
|
+
// If position (i+j) is ineligible, longer windows from i are also invalid.
|
|
82
|
+
if (!valid) break;
|
|
83
|
+
|
|
84
|
+
const key = ops.join(",");
|
|
85
|
+
const entry = freqMap.get(key);
|
|
86
|
+
if (entry) {
|
|
87
|
+
entry.count++;
|
|
88
|
+
} else {
|
|
89
|
+
freqMap.set(key, { ops, count: 1 });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ── Step 2: keep repeated candidates, prioritise by frequency then length ─
|
|
95
|
+
const candidates = Array.from(freqMap.values())
|
|
96
|
+
.filter((e) => e.count >= 2)
|
|
97
|
+
.sort((a, b) => b.count - a.count || b.ops.length - a.ops.length);
|
|
98
|
+
|
|
99
|
+
if (candidates.length === 0) return { bytecode: bc };
|
|
100
|
+
|
|
101
|
+
// ── Step 3: assign free opcode slots to the best candidates ───────────────
|
|
102
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
103
|
+
const macroOp = nextFreeSlot(usedOpcodes);
|
|
104
|
+
if (macroOp === -1) break;
|
|
105
|
+
const ops = candidates[i].ops;
|
|
106
|
+
compiler.MACRO_OPS[macroOp] = ops;
|
|
107
|
+
// Register a combined name so OP_NAME and comment generation both work.
|
|
108
|
+
let combinedName = ops
|
|
109
|
+
.map((v) => compiler.OP_NAME[v] ?? `OP_${v}`)
|
|
110
|
+
.join(",");
|
|
111
|
+
compiler.OP_NAME[macroOp] = combinedName;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ── Step 4: build signature → macro opcode lookup ─────────────────────────
|
|
115
|
+
const sigToMacro = new Map<string, number>();
|
|
116
|
+
for (const [macroOpStr, ops] of Object.entries(compiler.MACRO_OPS)) {
|
|
117
|
+
sigToMacro.set((ops as number[]).join(","), Number(macroOpStr));
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// ── Step 5: replace sequences with a single multi-operand macro instruction ─
|
|
121
|
+
// Emit [macroOpCode, ...all operands from all constituent instructions].
|
|
122
|
+
// The runtime handler inlines each sub-instruction body; those bodies call
|
|
123
|
+
// this._operand() themselves to consume the operands in order.
|
|
124
|
+
const result: Bytecode = [];
|
|
125
|
+
let i = 0;
|
|
126
|
+
|
|
127
|
+
while (i < bc.length) {
|
|
128
|
+
let matched = false;
|
|
129
|
+
|
|
130
|
+
for (let len = 5; len >= 2; len--) {
|
|
131
|
+
if (i + len > bc.length) continue;
|
|
132
|
+
|
|
133
|
+
const instructions: Instruction[] = [];
|
|
134
|
+
let valid = true;
|
|
135
|
+
for (let j = 0; j < len; j++) {
|
|
136
|
+
const instr = bc[i + j];
|
|
137
|
+
const op = instr[0];
|
|
138
|
+
if (!isEligible(op, compiler)) {
|
|
139
|
+
valid = false;
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
instructions.push(instr);
|
|
143
|
+
}
|
|
144
|
+
if (!valid) continue;
|
|
145
|
+
|
|
146
|
+
const key = instructions.map((instr) => instr[0]).join(",");
|
|
147
|
+
if (!sigToMacro.has(key)) continue;
|
|
148
|
+
|
|
149
|
+
const macroOpCode = sigToMacro.get(key)!;
|
|
150
|
+
|
|
151
|
+
// Collect all operands from every constituent instruction, in order.
|
|
152
|
+
// Each instruction contributes instr.slice(1) — zero or more operands.
|
|
153
|
+
const allOperands: any[] = [];
|
|
154
|
+
for (let j = 0; j < len; j++) {
|
|
155
|
+
allOperands.push(...bc[i + j].slice(1));
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const newInstr: Instruction = [macroOpCode, ...allOperands];
|
|
159
|
+
(newInstr as any)[SOURCE_NODE_SYM] = (instructions[0] as any)[
|
|
160
|
+
SOURCE_NODE_SYM
|
|
161
|
+
];
|
|
162
|
+
|
|
163
|
+
result.push(newInstr);
|
|
164
|
+
|
|
165
|
+
i += len;
|
|
166
|
+
matched = true;
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (!matched) {
|
|
171
|
+
result.push(bc[i]);
|
|
172
|
+
i++;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return { bytecode: result };
|
|
177
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type * as b from "../../types.ts";
|
|
2
|
+
import { SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
|
+
|
|
4
|
+
// Resolve all {type:"constant", value} operands to integer indices into the
|
|
5
|
+
// constants pool. Returns both the resolved bytecode and the constants array
|
|
6
|
+
// so the Serializer can use it for comment generation and output.
|
|
7
|
+
// Constant refs may appear at any operand position (index 1, 2, 3, …).
|
|
8
|
+
export function resolveConstants(bc: b.Bytecode): {
|
|
9
|
+
bytecode: b.Bytecode;
|
|
10
|
+
constants: any[];
|
|
11
|
+
} {
|
|
12
|
+
const constants: any[] = [];
|
|
13
|
+
const constantsMap = new Map<any, number>();
|
|
14
|
+
|
|
15
|
+
function intern(operand: b.InstrOperand): b.Operand {
|
|
16
|
+
const operandAsObject =
|
|
17
|
+
typeof operand === "object" && operand ? operand : {};
|
|
18
|
+
|
|
19
|
+
const value = (operand as any).value;
|
|
20
|
+
|
|
21
|
+
let idx = constantsMap.get(value);
|
|
22
|
+
if (typeof idx !== "number") {
|
|
23
|
+
idx = constants.length;
|
|
24
|
+
constantsMap.set(value, idx);
|
|
25
|
+
constants.push(value);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const newOperand = {
|
|
29
|
+
...operandAsObject,
|
|
30
|
+
type: "number",
|
|
31
|
+
resolvedValue: idx,
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
return newOperand;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const resolved: b.Bytecode = [];
|
|
38
|
+
for (const instr of bc) {
|
|
39
|
+
const [op, ...operands] = instr;
|
|
40
|
+
|
|
41
|
+
const hasConstant = operands.some(
|
|
42
|
+
(o) =>
|
|
43
|
+
o !== undefined &&
|
|
44
|
+
o !== null &&
|
|
45
|
+
typeof o === "object" &&
|
|
46
|
+
(o as any).type === "constant",
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
if (hasConstant) {
|
|
50
|
+
const newOperands = operands.map((operand) =>
|
|
51
|
+
(operand as any)?.type === "constant" ? intern(operand) : operand,
|
|
52
|
+
);
|
|
53
|
+
const newInstr = [op, ...newOperands] as b.Instruction;
|
|
54
|
+
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
55
|
+
resolved.push(newInstr);
|
|
56
|
+
} else {
|
|
57
|
+
resolved.push(instr);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return { bytecode: resolved, constants };
|
|
62
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
// --- Label IR ---
|
|
2
|
+
// During compilation, jump targets are symbolic labels instead of hard-coded
|
|
3
|
+
// PC numbers. Two IR "pseudo operands" carry the label information:
|
|
4
|
+
//
|
|
5
|
+
// defineLabel operand : [null, {type:"defineLabel", label:"FN_ENTRY_1"}]
|
|
6
|
+
// Marks a position in the bytecode array.
|
|
7
|
+
// resolveLabels() strips these out entirely.
|
|
8
|
+
//
|
|
9
|
+
// label ref operand : [OP.JUMP, {type:"label", label:"FN_ENTRY_1"}]
|
|
10
|
+
// Used as the operand of any jump instruction. resolveLabels() replaces
|
|
11
|
+
// it with the integer PC that the corresponding defineLabel resolves to.
|
|
12
|
+
//
|
|
13
|
+
// The output bytecode is still a nested array of instructions.
|
|
14
|
+
// Flattening (one u16 slot per op, one per operand) happens in the Serializer.
|
|
15
|
+
// PC values computed here reflect the FLAT slot index so that jump targets,
|
|
16
|
+
// startPc, and LOAD_INT label operands are all correct after flattening.
|
|
17
|
+
|
|
18
|
+
import type { Instruction } from "../../types.ts";
|
|
19
|
+
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
20
|
+
|
|
21
|
+
// Resolve symbolic labels to absolute flat-PC indices within a bytecode array.
|
|
22
|
+
// defineLabel pseudo-instructions are stripped; label-ref operands become ints.
|
|
23
|
+
// Each instruction [op, ...operands] occupies (1 + operands.length) flat slots,
|
|
24
|
+
// so realPc advances by instr.length for every non-pseudo instruction.
|
|
25
|
+
export function resolveLabels(
|
|
26
|
+
bc: Instruction[],
|
|
27
|
+
compiler: Compiler,
|
|
28
|
+
): {
|
|
29
|
+
bytecode: Instruction[];
|
|
30
|
+
} {
|
|
31
|
+
// Pass 1 – walk the array and record each label's flat PC, counting
|
|
32
|
+
// real instructions by their full flat width (1 op + N operands).
|
|
33
|
+
const labelToPc = new Map<string, number>();
|
|
34
|
+
let realPc = 0;
|
|
35
|
+
for (const instr of bc) {
|
|
36
|
+
const op = instr[0];
|
|
37
|
+
const operand = instr[1];
|
|
38
|
+
if (
|
|
39
|
+
op === null &&
|
|
40
|
+
operand !== null &&
|
|
41
|
+
typeof operand === "object" &&
|
|
42
|
+
(operand as any).type === "defineLabel"
|
|
43
|
+
) {
|
|
44
|
+
labelToPc.set((operand as any).label, realPc);
|
|
45
|
+
} else {
|
|
46
|
+
// Each instruction occupies 1 slot for the opcode + 1 per operand.
|
|
47
|
+
// IMPORTANT: 'placeholder' operands are not counted
|
|
48
|
+
realPc += instr.filter((x) => (x as any)?.placeholder !== true).length;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Pass 2 – build the resolved instruction list.
|
|
53
|
+
// Label refs may appear at any operand position, so scan all of them.
|
|
54
|
+
const resolved: any[] = [];
|
|
55
|
+
for (const instr of bc) {
|
|
56
|
+
const [op, ...operands] = instr;
|
|
57
|
+
|
|
58
|
+
// Strip defineLabel pseudo-ops.
|
|
59
|
+
if (
|
|
60
|
+
op === null &&
|
|
61
|
+
typeof operands[0] === "object" &&
|
|
62
|
+
(operands[0] as any)?.type === "defineLabel"
|
|
63
|
+
) {
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Replace label-ref operands with their resolved flat PC (any position).
|
|
68
|
+
const newOperands = operands.map((operand) => {
|
|
69
|
+
if (
|
|
70
|
+
operand !== undefined &&
|
|
71
|
+
operand !== null &&
|
|
72
|
+
typeof operand === "object" &&
|
|
73
|
+
(operand as any).type === "label"
|
|
74
|
+
) {
|
|
75
|
+
const pc = labelToPc.get((operand as any).label);
|
|
76
|
+
if (pc === undefined)
|
|
77
|
+
throw new Error(`Undefined label: ${(operand as any).label}`);
|
|
78
|
+
|
|
79
|
+
var operandAsObject =
|
|
80
|
+
typeof operand === "object" && operand ? operand : {};
|
|
81
|
+
|
|
82
|
+
const newOperand = {
|
|
83
|
+
...operandAsObject, // Preverse original operand properties
|
|
84
|
+
type: "number",
|
|
85
|
+
resolvedValue: pc + ((operand as any).offset ?? 0),
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
return newOperand;
|
|
89
|
+
}
|
|
90
|
+
return operand;
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
const newInstr = [op, ...newOperands];
|
|
94
|
+
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
95
|
+
resolved.push(newInstr);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Patch each function descriptor's startPc now that labels are resolved.
|
|
99
|
+
for (const desc of compiler.fnDescriptors) {
|
|
100
|
+
desc.startPc =
|
|
101
|
+
labelToPc.get(desc.startLabel) ?? labelToPc.get(desc.entryLabel);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
bytecode: resolved,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import type { Bytecode, Instruction } from "../../types.ts";
|
|
2
|
+
import { Compiler } from "../../compiler.ts";
|
|
3
|
+
import { choice } from "../utils/random-utils.ts";
|
|
4
|
+
|
|
5
|
+
export function selfModifying(
|
|
6
|
+
bc: Bytecode,
|
|
7
|
+
compiler: Compiler,
|
|
8
|
+
): { bytecode: Bytecode } {
|
|
9
|
+
// Walk the bytecode looking for "defineLabel" pseudo-ops, which start basic
|
|
10
|
+
// blocks. For each block we collect the body (instructions between the label
|
|
11
|
+
// and the next label/jump terminator), move it to the end of the bytecode
|
|
12
|
+
// under a fresh "patch_LXX" label, and replace it in-place with:
|
|
13
|
+
//
|
|
14
|
+
// defineLabel ("originalLabel") ← kept as-is (pseudo-op)
|
|
15
|
+
// PATCH destPc sliceStart sliceEnd ← 4 flat slots total
|
|
16
|
+
// Garbage Opcodes × bodyFlatSize ← placeholder slots
|
|
17
|
+
//
|
|
18
|
+
// PATCH reads three inline operands via _operand():
|
|
19
|
+
// destPc = originalLabel + 4 (first slot after PATCH's own 4 slots)
|
|
20
|
+
// sliceStart = patchLabel (flat PC of appended body)
|
|
21
|
+
// sliceEnd = patchLabel + bodyFlatSize
|
|
22
|
+
//
|
|
23
|
+
// On first execution PATCH copies bytecode[sliceStart..sliceEnd) over the
|
|
24
|
+
// placeholder region starting at destPc. Execution then falls through into
|
|
25
|
+
// the freshly-patched body. Subsequent calls are idempotent.
|
|
26
|
+
|
|
27
|
+
const { OP, JUMP_OPS } = compiler;
|
|
28
|
+
|
|
29
|
+
const result: Bytecode = [];
|
|
30
|
+
const appended: Bytecode = [];
|
|
31
|
+
let patchCount = 0;
|
|
32
|
+
|
|
33
|
+
let i = 0;
|
|
34
|
+
while (i < bc.length) {
|
|
35
|
+
const instr = bc[i];
|
|
36
|
+
const [op, operand] = instr;
|
|
37
|
+
|
|
38
|
+
// Detect a defineLabel pseudo-op — start of a new basic block.
|
|
39
|
+
if (
|
|
40
|
+
op === null &&
|
|
41
|
+
operand !== null &&
|
|
42
|
+
typeof operand === "object" &&
|
|
43
|
+
(operand as any).type === "defineLabel"
|
|
44
|
+
) {
|
|
45
|
+
const originalLabel = (operand as any).label as string;
|
|
46
|
+
result.push(instr); // keep the defineLabel marker
|
|
47
|
+
i++;
|
|
48
|
+
|
|
49
|
+
// Collect body: everything after the label until the next terminator.
|
|
50
|
+
let j = i;
|
|
51
|
+
while (j < bc.length) {
|
|
52
|
+
const [nextOp, nextOperand] = bc[j];
|
|
53
|
+
|
|
54
|
+
// Another defineLabel = boundary of the next block.
|
|
55
|
+
if (
|
|
56
|
+
nextOp === null &&
|
|
57
|
+
typeof nextOperand === "object" &&
|
|
58
|
+
(nextOperand as any)?.type === "defineLabel"
|
|
59
|
+
) {
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Jump instructions, RETURN all terminate the body.
|
|
64
|
+
if (nextOp !== null && (JUMP_OPS.has(nextOp) || nextOp === OP.RETURN)) {
|
|
65
|
+
break;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
j++;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const body = bc.slice(i, j);
|
|
72
|
+
const N = body.length;
|
|
73
|
+
|
|
74
|
+
if (N === 0) {
|
|
75
|
+
// Nothing to transform — label is immediately followed by a terminator.
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const patchLabel = `patch_${originalLabel}_${patchCount++}`;
|
|
80
|
+
|
|
81
|
+
// Flat size of the body (each instruction occupies instr.length slots).
|
|
82
|
+
const bodyFlatSize = body.reduce(
|
|
83
|
+
(acc, instr) =>
|
|
84
|
+
acc + instr.filter((x) => (x as any)?.placeholder !== true).length,
|
|
85
|
+
0,
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
// ── PATCH instruction (4 flat slots: opcode + 3 operands) ───────────
|
|
89
|
+
// destPc = originalLabel + 4 (slot right after PATCH's 4 slots)
|
|
90
|
+
// sliceStart = patchLabel
|
|
91
|
+
// sliceEnd = patchLabel + bodyFlatSize
|
|
92
|
+
result.push([
|
|
93
|
+
OP.PATCH as number,
|
|
94
|
+
{ type: "label", label: originalLabel, offset: 4 },
|
|
95
|
+
{ type: "label", label: patchLabel },
|
|
96
|
+
{ type: "label", label: patchLabel, offset: bodyFlatSize },
|
|
97
|
+
] as unknown as Instruction);
|
|
98
|
+
|
|
99
|
+
// ── Placeholders (Garbage Opcodes * bodyFlatSize, each 1 flat slot) ────────────
|
|
100
|
+
// These are overwritten by PATCH on first execution.
|
|
101
|
+
for (let p = 0; p < bodyFlatSize; p++) {
|
|
102
|
+
const randomOpcode = choice(Object.values(compiler.OP));
|
|
103
|
+
result.push([+randomOpcode]);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ── Append real body at end ─────────────────────────────────────────
|
|
107
|
+
appended.push([null, { type: "defineLabel", label: patchLabel }]);
|
|
108
|
+
for (const bodyInstr of body) {
|
|
109
|
+
appended.push(bodyInstr);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
i = j; // skip over the original body in the input array
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
result.push(instr);
|
|
117
|
+
i++;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return { bytecode: [...result, ...appended] };
|
|
121
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import type { Bytecode, InstrOperand, Instruction } from "../../types.ts";
|
|
2
|
+
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
|
+
import { nextFreeSlot, U16_MAX } from "../utils/op-utils.ts";
|
|
4
|
+
|
|
5
|
+
// Creates specialized opcodes for the most frequent (OPCODE + single_integer_operand) pairs.
|
|
6
|
+
// Example: [OP.LOAD_CONST, 1] becomes [SPECIALIZED_LOAD_CONST_1].
|
|
7
|
+
// Only instructions with *exactly one numeric operand* are considered.
|
|
8
|
+
// MAKE_CLOSURE and any instruction with zero / multiple operands are skipped.
|
|
9
|
+
// Runs after selfModifying but before resolveLabels (operands stay plain numbers).
|
|
10
|
+
export function specializedOpcodes(
|
|
11
|
+
bc: Bytecode,
|
|
12
|
+
compiler: Compiler,
|
|
13
|
+
): { bytecode: Bytecode } {
|
|
14
|
+
// ── Collect used opcodes exactly as specified ─────────────────────────────
|
|
15
|
+
const usedOpcodes = new Set<number>(
|
|
16
|
+
Object.keys(compiler.OP_NAME)
|
|
17
|
+
.map((k) => parseInt(k, 10))
|
|
18
|
+
.filter((v) => !isNaN(v)) as number[],
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
if (usedOpcodes.size > U16_MAX) return { bytecode: bc };
|
|
22
|
+
|
|
23
|
+
// ── Step 1: count frequency of eligible (op, operand) pairs ───────────────
|
|
24
|
+
const freqMap = new Map<
|
|
25
|
+
string,
|
|
26
|
+
{ op: number; operand: InstrOperand; count: number }
|
|
27
|
+
>();
|
|
28
|
+
|
|
29
|
+
for (const instr of bc) {
|
|
30
|
+
const op = instr[0];
|
|
31
|
+
if (op === null || op === compiler.OP.MAKE_CLOSURE) continue;
|
|
32
|
+
|
|
33
|
+
// Must have exactly one operand and it must be a plain number
|
|
34
|
+
if (instr.length !== 2) continue;
|
|
35
|
+
const operand = instr[1];
|
|
36
|
+
|
|
37
|
+
const key = `${op},${operand}`;
|
|
38
|
+
const entry = freqMap.get(key);
|
|
39
|
+
if (entry) {
|
|
40
|
+
entry.count++;
|
|
41
|
+
} else {
|
|
42
|
+
freqMap.set(key, { op, operand, count: 1 });
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ── Step 2: keep combinations that appear >= 2 times, sort by frequency ───
|
|
47
|
+
const candidates = Array.from(freqMap.values())
|
|
48
|
+
.filter((e) => e.count >= 1)
|
|
49
|
+
.sort((a, b) => b.count - a.count);
|
|
50
|
+
|
|
51
|
+
if (candidates.length === 0) return { bytecode: bc };
|
|
52
|
+
|
|
53
|
+
// ── Step 3: assign free opcode slots to the best candidates ───────────────
|
|
54
|
+
const sigToSpecial = new Map<string, number>();
|
|
55
|
+
const specializedOps: Compiler["SPECIALIZED_OPS"] = {};
|
|
56
|
+
|
|
57
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
58
|
+
const specialOp = nextFreeSlot(usedOpcodes);
|
|
59
|
+
if (specialOp === -1) break;
|
|
60
|
+
const { op: originalOp, operand } = candidates[i];
|
|
61
|
+
|
|
62
|
+
const key = `${originalOp},${JSON.stringify(operand)}`;
|
|
63
|
+
sigToSpecial.set(key, specialOp);
|
|
64
|
+
|
|
65
|
+
specializedOps[specialOp] = { originalOp, operand };
|
|
66
|
+
|
|
67
|
+
// Register a human-readable name for disassembly / debugging
|
|
68
|
+
const originalName = compiler.OP_NAME[originalOp] ?? `OP_${originalOp}`;
|
|
69
|
+
compiler.OP_NAME[specialOp] = `${originalName}_${JSON.stringify(operand)}`;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Store mapping so the interpreter knows how to dispatch the specialized op
|
|
73
|
+
compiler.SPECIALIZED_OPS = specializedOps;
|
|
74
|
+
|
|
75
|
+
// ── Step 4: replace matching instructions with the new single-byte opcode ─
|
|
76
|
+
const result: Bytecode = [];
|
|
77
|
+
|
|
78
|
+
for (const instr of bc) {
|
|
79
|
+
const op = instr[0];
|
|
80
|
+
// Only consider instructions with exactly one numeric operand
|
|
81
|
+
if (op === null || instr.length !== 2 || op === compiler.OP.MAKE_CLOSURE) {
|
|
82
|
+
result.push(instr);
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const operand = instr[1];
|
|
87
|
+
const key = `${op},${JSON.stringify(operand)}`;
|
|
88
|
+
|
|
89
|
+
if (sigToSpecial.has(key)) {
|
|
90
|
+
const specialOpCode = sigToSpecial.get(key)!;
|
|
91
|
+
|
|
92
|
+
const operandAsObject =
|
|
93
|
+
typeof operand === "object" && operand
|
|
94
|
+
? operand
|
|
95
|
+
: {
|
|
96
|
+
type: "number",
|
|
97
|
+
value: operand,
|
|
98
|
+
resolvedValue: operand,
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
const newOperand = {
|
|
102
|
+
...operandAsObject,
|
|
103
|
+
placeholder: true,
|
|
104
|
+
} as any as InstrOperand;
|
|
105
|
+
|
|
106
|
+
const newInstr: Instruction = [specialOpCode, newOperand];
|
|
107
|
+
|
|
108
|
+
// Preserve source-node information for error reporting
|
|
109
|
+
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
110
|
+
|
|
111
|
+
result.push(newInstr);
|
|
112
|
+
} else {
|
|
113
|
+
result.push(instr);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return { bytecode: result };
|
|
118
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import * as t from "@babel/types";
|
|
2
|
+
import traverseImport from "@babel/traverse";
|
|
3
|
+
import { ok } from "assert";
|
|
4
|
+
import { Compiler } from "../../compiler.ts";
|
|
5
|
+
const traverse = (traverseImport.default ||
|
|
6
|
+
traverseImport) as typeof traverseImport.default;
|
|
7
|
+
|
|
8
|
+
// Extract the real statement list from a SwitchCase consequent, normalising
|
|
9
|
+
// the two forms that appear in the runtime:
|
|
10
|
+
// • A single wrapping BlockStatement → use its .body
|
|
11
|
+
// • Statements listed directly → use as-is
|
|
12
|
+
// In both cases trailing BreakStatement / EmptyStatement are filtered out.
|
|
13
|
+
function extractCaseBody(switchCase: t.SwitchCase): t.Statement[] {
|
|
14
|
+
let stmts: t.Statement[];
|
|
15
|
+
if (
|
|
16
|
+
switchCase.consequent.length === 1 &&
|
|
17
|
+
t.isBlockStatement(switchCase.consequent[0])
|
|
18
|
+
) {
|
|
19
|
+
stmts = (switchCase.consequent[0] as t.BlockStatement).body;
|
|
20
|
+
} else {
|
|
21
|
+
stmts = switchCase.consequent as t.Statement[];
|
|
22
|
+
}
|
|
23
|
+
return stmts.filter((s) => !t.isBreakStatement(s) && !t.isEmptyStatement(s));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Append a generated switch case for every entry in compiler.MACRO_OPS.
|
|
27
|
+
// Each case inlines the constituent case bodies directly — no operand stack,
|
|
28
|
+
// no substitution needed. Because every opcode handler now reads its own
|
|
29
|
+
// operands via this._operand(), those calls naturally consume the inline
|
|
30
|
+
// operands that macroOpcodes.ts embedded on the macro instruction.
|
|
31
|
+
// Must be called BEFORE applyShuffleOpcodes so the new cases get shuffled.
|
|
32
|
+
export function applyMacroOpcodes(ast: t.File, compiler: Compiler): void {
|
|
33
|
+
let switchStatement: t.SwitchStatement | null = null;
|
|
34
|
+
traverse(ast, {
|
|
35
|
+
SwitchStatement(path) {
|
|
36
|
+
if (path.node.leadingComments?.some((c) => c.value.includes("@SWITCH"))) {
|
|
37
|
+
switchStatement = path.node;
|
|
38
|
+
path.stop();
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
ok(switchStatement, "Could not find @SWITCH statement for macro opcodes");
|
|
44
|
+
|
|
45
|
+
// Build a map opName → SwitchCase from the existing OP.xxx case tests.
|
|
46
|
+
const nameToCaseMap = new Map<string, t.SwitchCase>();
|
|
47
|
+
for (const sc of (switchStatement as t.SwitchStatement).cases) {
|
|
48
|
+
const test = sc.test;
|
|
49
|
+
if (
|
|
50
|
+
test &&
|
|
51
|
+
t.isMemberExpression(test) &&
|
|
52
|
+
t.isIdentifier(test.object, { name: "OP" }) &&
|
|
53
|
+
t.isIdentifier(test.property)
|
|
54
|
+
) {
|
|
55
|
+
nameToCaseMap.set((test.property as t.Identifier).name, sc);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
for (const [macroOpStr, constituentOps] of Object.entries(
|
|
60
|
+
compiler.MACRO_OPS,
|
|
61
|
+
)) {
|
|
62
|
+
const macroOpCode = Number(macroOpStr);
|
|
63
|
+
const N = constituentOps.length;
|
|
64
|
+
|
|
65
|
+
// Resolve each constituent op value → case node via OP_NAME lookup.
|
|
66
|
+
const constituentCases: t.SwitchCase[] = [];
|
|
67
|
+
let allFound = true;
|
|
68
|
+
for (const opVal of constituentOps) {
|
|
69
|
+
const opName = compiler.OP_NAME[opVal];
|
|
70
|
+
if (!opName) {
|
|
71
|
+
allFound = false;
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
const found = nameToCaseMap.get(opName);
|
|
75
|
+
if (!found) {
|
|
76
|
+
allFound = false;
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
constituentCases.push(found);
|
|
80
|
+
}
|
|
81
|
+
if (!allFound) continue;
|
|
82
|
+
|
|
83
|
+
const opNames = constituentOps.map((v) => compiler.OP_NAME[v] ?? `OP_${v}`);
|
|
84
|
+
|
|
85
|
+
// ── Build the macro case body ──────────────────────────────────────────
|
|
86
|
+
// Clone and inline each sub-instruction's case body directly.
|
|
87
|
+
// No operand substitution needed: each body already calls this._operand()
|
|
88
|
+
// to read its own operands, which will consume the inline operands that
|
|
89
|
+
// macroOpcodes.ts embedded on the macro instruction in order.
|
|
90
|
+
const bodyStmts: t.Statement[] = [];
|
|
91
|
+
|
|
92
|
+
for (let i = 0; i < N; i++) {
|
|
93
|
+
const subStmts = extractCaseBody(constituentCases[i]).map(
|
|
94
|
+
(s) => t.cloneNode(s, true) as t.Statement,
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
if (subStmts.length > 0) {
|
|
98
|
+
t.addComment(subStmts[0], "leading", ` ${opNames[i]}`, true);
|
|
99
|
+
bodyStmts.push(...subStmts);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
bodyStmts.push(t.breakStatement());
|
|
104
|
+
|
|
105
|
+
(switchStatement as t.SwitchStatement).cases.push(
|
|
106
|
+
t.switchCase(t.numericLiteral(macroOpCode), [
|
|
107
|
+
t.blockStatement(bodyStmts),
|
|
108
|
+
]),
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
}
|