js-confuser-vm 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -2
- package/README.MD +186 -107
- package/dist/build-runtime.js +7 -1
- package/dist/compiler.js +801 -785
- package/dist/runtime.js +409 -332
- package/dist/transforms/bytecode/aliasedOpcodes.js +140 -0
- package/dist/transforms/bytecode/concealConstants.js +31 -0
- package/dist/transforms/bytecode/macroOpcodes.js +22 -10
- package/dist/transforms/bytecode/resolveContants.js +73 -10
- package/dist/transforms/bytecode/selfModifying.js +3 -2
- package/dist/transforms/bytecode/specializedOpcodes.js +38 -28
- package/dist/transforms/runtime/aliasedOpcodes.js +134 -0
- package/dist/transforms/runtime/shuffleOpcodes.js +1 -1
- package/dist/transforms/runtime/specializedOpcodes.js +21 -16
- package/dist/utils/op-utils.js +29 -0
- package/dist/utils/random-utils.js +27 -0
- package/index.ts +10 -8
- package/jest.config.js +10 -0
- package/package.json +1 -1
- package/src/build-runtime.ts +7 -1
- package/src/compiler.ts +2395 -2069
- package/src/options.ts +2 -0
- package/src/runtime.ts +838 -771
- package/src/transforms/bytecode/aliasedOpcodes.ts +158 -0
- package/src/transforms/bytecode/concealConstants.ts +52 -0
- package/src/transforms/bytecode/macroOpcodes.ts +32 -15
- package/src/transforms/bytecode/resolveContants.ts +87 -16
- package/src/transforms/bytecode/selfModifying.ts +3 -3
- package/src/transforms/bytecode/specializedOpcodes.ts +58 -29
- package/src/transforms/runtime/aliasedOpcodes.ts +191 -0
- package/src/transforms/runtime/shuffleOpcodes.ts +1 -1
- package/src/transforms/runtime/specializedOpcodes.ts +39 -24
- package/src/{transforms/utils → utils}/op-utils.ts +7 -0
- /package/src/{transforms/utils → utils}/random-utils.ts +0 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import type { Bytecode, InstrOperand, Instruction } from "../../types.ts";
|
|
2
|
+
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
|
+
import { nextFreeSlot, U16_MAX } from "../../utils/op-utils.ts";
|
|
4
|
+
import { shuffle } from "../../utils/random-utils.ts";
|
|
5
|
+
|
|
6
|
+
// Opcodes that must not be aliased.
|
|
7
|
+
// Variable-length operand opcodes cannot be statically aliased since the
|
|
8
|
+
// number of this._operand() calls varies at runtime.
|
|
9
|
+
// Infrastructure opcodes (PATCH, TRY_SETUP, TRY_END, DEBUGGER) are excluded
|
|
10
|
+
// because aliasing them would interfere with self-modifying bytecode and
|
|
11
|
+
// exception-handling machinery.
|
|
12
|
+
const DISALLOWED_OP_NAMES = new Set([
|
|
13
|
+
"MAKE_CLOSURE",
|
|
14
|
+
"BUILD_ARRAY",
|
|
15
|
+
"BUILD_OBJECT",
|
|
16
|
+
"CALL",
|
|
17
|
+
"CALL_METHOD",
|
|
18
|
+
"NEW",
|
|
19
|
+
"PATCH",
|
|
20
|
+
"TRY_SETUP",
|
|
21
|
+
"TRY_END",
|
|
22
|
+
"DEBUGGER",
|
|
23
|
+
]);
|
|
24
|
+
|
|
25
|
+
// Creates aliased opcodes: duplicate handlers for commonly-used opcodes,
|
|
26
|
+
// optionally with a permuted operand read order in the bytecode stream.
|
|
27
|
+
//
|
|
28
|
+
// For each aliased op, we record an `order` permutation of length `arity`.
|
|
29
|
+
// order[i] = j means: bytecode slot i holds what was originally operand j.
|
|
30
|
+
//
|
|
31
|
+
// Example: LOAD_GLOBAL [dst, nameIdx] with order=[1,0]:
|
|
32
|
+
// Bytecode stores: [ALIAS_OP, nameIdx, dst]
|
|
33
|
+
// Handler reads: _unsortedOperands = [nameIdx, dst]
|
|
34
|
+
// _operands = [_unsortedOperands[1], _unsortedOperands[0]]
|
|
35
|
+
// = [dst, nameIdx] ← original order restored
|
|
36
|
+
//
|
|
37
|
+
// Runs LAST among bytecode transforms (after selfModifying), before resolveLabels.
|
|
38
|
+
export function aliasedOpcodes(
|
|
39
|
+
bc: Bytecode,
|
|
40
|
+
compiler: Compiler,
|
|
41
|
+
): { bytecode: Bytecode } {
|
|
42
|
+
// Build a map of base opcode value → name, excluding disallowed ops
|
|
43
|
+
const baseOpValueToName = new Map<number, string>();
|
|
44
|
+
for (const [name, val] of Object.entries(compiler.OP)) {
|
|
45
|
+
if (DISALLOWED_OP_NAMES.has(name)) continue;
|
|
46
|
+
baseOpValueToName.set(val as number, name);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Collect all currently used opcode slots (base + any dynamically assigned)
|
|
50
|
+
const usedOpcodes = new Set<number>(
|
|
51
|
+
Object.keys(compiler.OP_NAME)
|
|
52
|
+
.map((k) => parseInt(k, 10))
|
|
53
|
+
.filter((v) => !isNaN(v)),
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
if (usedOpcodes.size > U16_MAX) return { bytecode: bc };
|
|
57
|
+
|
|
58
|
+
// ── Step 1: count frequency and determine arity for each eligible base opcode ─
|
|
59
|
+
// We scan the actual post-transform bytecode so frequency reflects what's
|
|
60
|
+
// really left (specialized/macro ops already consumed their share).
|
|
61
|
+
const opStats = new Map<number, { freq: number; arity: number | null }>();
|
|
62
|
+
|
|
63
|
+
for (const instr of bc) {
|
|
64
|
+
const op = instr[0];
|
|
65
|
+
if (op === null || !baseOpValueToName.has(op)) continue;
|
|
66
|
+
|
|
67
|
+
const arity = instr.length - 1;
|
|
68
|
+
if (arity < 1) continue; // 0-operand opcodes have nothing to permute
|
|
69
|
+
|
|
70
|
+
const existing = opStats.get(op);
|
|
71
|
+
if (!existing) {
|
|
72
|
+
opStats.set(op, { freq: 1, arity });
|
|
73
|
+
} else {
|
|
74
|
+
if (existing.arity !== arity) {
|
|
75
|
+
// Inconsistent arity → variable-length; skip
|
|
76
|
+
existing.arity = null;
|
|
77
|
+
}
|
|
78
|
+
existing.freq++;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// ── Step 2: sort by frequency descending, keep only consistent-arity ops ────
|
|
83
|
+
const candidates = Array.from(opStats.entries())
|
|
84
|
+
.filter(([, s]) => s.arity !== null)
|
|
85
|
+
.sort(([, a], [, b]) => b.freq - a.freq);
|
|
86
|
+
|
|
87
|
+
if (candidates.length === 0) return { bytecode: bc };
|
|
88
|
+
|
|
89
|
+
// ── Step 3: assign free slots, build order permutations ─────────────────────
|
|
90
|
+
// aliasMap: originalOp → aliasOp (only the winning alias per original op)
|
|
91
|
+
const aliasMap = new Map<number, number>();
|
|
92
|
+
const aliasedOps: Compiler["ALIASED_OPS"] = {};
|
|
93
|
+
|
|
94
|
+
for (const [originalOp, stats] of candidates) {
|
|
95
|
+
const aliasOp = nextFreeSlot(usedOpcodes);
|
|
96
|
+
if (aliasOp === -1) break;
|
|
97
|
+
|
|
98
|
+
const arity = stats.arity!;
|
|
99
|
+
|
|
100
|
+
// Build a permutation of [0 .. arity-1].
|
|
101
|
+
// For arity >= 2: shuffle until we get a non-identity permutation so the
|
|
102
|
+
// operand order is actually different (makes the alias more confusing).
|
|
103
|
+
// For arity == 1: only one permutation exists ([0]); still useful as a clone.
|
|
104
|
+
let order: number[];
|
|
105
|
+
if (arity >= 2) {
|
|
106
|
+
const identity = Array.from({ length: arity }, (_, i) => i);
|
|
107
|
+
let attempts = 0;
|
|
108
|
+
do {
|
|
109
|
+
order = shuffle([...identity]);
|
|
110
|
+
attempts++;
|
|
111
|
+
} while (attempts < 20 && order.every((v, i) => v === i));
|
|
112
|
+
} else {
|
|
113
|
+
order = [0];
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
aliasMap.set(originalOp, aliasOp);
|
|
117
|
+
aliasedOps[aliasOp] = { originalOp, order };
|
|
118
|
+
|
|
119
|
+
const originalName =
|
|
120
|
+
compiler.OP_NAME[originalOp] ?? `OP_${originalOp}`;
|
|
121
|
+
compiler.OP_NAME[aliasOp] = `ALIAS_${originalName}_${order.join("_")}`;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
compiler.ALIASED_OPS = aliasedOps;
|
|
125
|
+
|
|
126
|
+
if (aliasMap.size === 0) return { bytecode: bc };
|
|
127
|
+
|
|
128
|
+
// ── Step 4: rewrite bytecode ─────────────────────────────────────────────────
|
|
129
|
+
const result: Bytecode = [];
|
|
130
|
+
|
|
131
|
+
for (const instr of bc) {
|
|
132
|
+
const op = instr[0];
|
|
133
|
+
if (op === null || !aliasMap.has(op)) {
|
|
134
|
+
result.push(instr);
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const aliasOp = aliasMap.get(op)!;
|
|
139
|
+
const { order } = aliasedOps[aliasOp];
|
|
140
|
+
const originalOperands = instr.slice(1) as InstrOperand[];
|
|
141
|
+
|
|
142
|
+
// Guard: if arity changed (shouldn't happen after the consistency check),
|
|
143
|
+
// fall back to the original instruction.
|
|
144
|
+
if (originalOperands.length !== order.length) {
|
|
145
|
+
result.push(instr);
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Rearrange operands: new slot i receives original operand order[i].
|
|
150
|
+
const newOperands = order.map((i) => originalOperands[i]);
|
|
151
|
+
|
|
152
|
+
const newInstr: Instruction = [aliasOp, ...newOperands];
|
|
153
|
+
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
154
|
+
result.push(newInstr);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return { bytecode: result };
|
|
158
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { Compiler } from "../../compiler.ts";
|
|
2
|
+
import type * as b from "../../types.ts";
|
|
3
|
+
|
|
4
|
+
export function concealConstants(
|
|
5
|
+
bytecode: b.Bytecode,
|
|
6
|
+
compiler: Compiler,
|
|
7
|
+
): {
|
|
8
|
+
bytecode: b.Bytecode;
|
|
9
|
+
} {
|
|
10
|
+
const newBytecode: b.Bytecode = [];
|
|
11
|
+
|
|
12
|
+
for (const instr of bytecode) {
|
|
13
|
+
const [op, ...operands] = instr;
|
|
14
|
+
|
|
15
|
+
const hasContant = operands.some(
|
|
16
|
+
(o) =>
|
|
17
|
+
o !== undefined &&
|
|
18
|
+
o !== null &&
|
|
19
|
+
typeof o === "object" &&
|
|
20
|
+
(o as any).type === "constant",
|
|
21
|
+
);
|
|
22
|
+
|
|
23
|
+
if (!hasContant) {
|
|
24
|
+
newBytecode.push(instr);
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const newOperands = [];
|
|
29
|
+
for (const operand of operands) {
|
|
30
|
+
if ((operand as any)?.type === "constant") {
|
|
31
|
+
const tsOperand = operand as any;
|
|
32
|
+
newOperands.push(operand);
|
|
33
|
+
newOperands.push({
|
|
34
|
+
type: "constant",
|
|
35
|
+
value: tsOperand.value,
|
|
36
|
+
key: true,
|
|
37
|
+
});
|
|
38
|
+
} else {
|
|
39
|
+
newOperands.push(operand);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
instr.length = 0;
|
|
44
|
+
instr.push(op, ...newOperands);
|
|
45
|
+
|
|
46
|
+
newBytecode.push(instr);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
bytecode: newBytecode,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
import type { Bytecode, Instruction } from "../../types.ts";
|
|
2
2
|
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
|
-
import { nextFreeSlot, U16_MAX } from "
|
|
3
|
+
import { nextFreeSlot, U16_MAX } from "../../utils/op-utils.ts";
|
|
4
4
|
|
|
5
|
-
// Opcodes that must not appear inside a macro window.
|
|
5
|
+
// Opcodes that must not appear in a non-terminal position inside a macro window.
|
|
6
6
|
// Jump ops: modifying frame._pc mid-execution causes the macro handler to
|
|
7
7
|
// run subsequent sub-bodies even after the jump already fired.
|
|
8
8
|
// Frame-changing ops (CALL, CALL_METHOD, NEW, RETURN, THROW): push/pop call
|
|
9
9
|
// frames mid-macro, leaving the `frame` variable stale for later sub-bodies.
|
|
10
|
+
// When one of these is the LAST instruction in the macro sequence there are no
|
|
11
|
+
// following sub-bodies, so editing _pc or the call frame is safe.
|
|
10
12
|
// Variable-operand ops (MAKE_CLOSURE): the number of _operand() calls depends
|
|
11
13
|
// on uvCount at runtime, so a static handler cannot be generated.
|
|
12
|
-
// Infrastructure ops (
|
|
14
|
+
// Infrastructure ops (PATCH, TRY_SETUP, TRY_END, DEBUGGER):
|
|
13
15
|
// either illegal here or nonsensical to fold.
|
|
14
16
|
|
|
15
17
|
// Scan bytecode for repeating instruction sequences and fold them into
|
|
@@ -36,23 +38,35 @@ export function macroOpcodes(
|
|
|
36
38
|
originalOpToName.set(opVal, name);
|
|
37
39
|
}
|
|
38
40
|
|
|
39
|
-
function isEligible(
|
|
41
|
+
function isEligible(
|
|
42
|
+
op: number | null,
|
|
43
|
+
compiler: Compiler,
|
|
44
|
+
isLast: boolean = false,
|
|
45
|
+
): boolean {
|
|
40
46
|
if (op === null) return false;
|
|
41
47
|
const { OP, JUMP_OPS } = compiler;
|
|
42
|
-
|
|
43
|
-
const
|
|
44
|
-
OP.RETURN,
|
|
48
|
+
// Infrastructure and variable-length ops are never eligible.
|
|
49
|
+
const alwaysExcluded = new Set([
|
|
45
50
|
OP.PATCH,
|
|
46
51
|
OP.TRY_SETUP,
|
|
47
52
|
OP.TRY_END,
|
|
48
53
|
OP.DEBUGGER,
|
|
49
|
-
OP.CALL,
|
|
50
|
-
OP.CALL_METHOD,
|
|
51
|
-
OP.NEW,
|
|
52
|
-
OP.THROW,
|
|
53
54
|
OP.MAKE_CLOSURE, // variable-length operands — cannot generate a static handler
|
|
54
55
|
]);
|
|
55
|
-
|
|
56
|
+
if (alwaysExcluded.has(op)) return false;
|
|
57
|
+
// Jump and frame-changing ops are only eligible as the terminal instruction.
|
|
58
|
+
if (!isLast) {
|
|
59
|
+
if (JUMP_OPS.has(op)) return false;
|
|
60
|
+
const nonTerminalExcluded = new Set([
|
|
61
|
+
OP.RETURN,
|
|
62
|
+
OP.CALL,
|
|
63
|
+
OP.CALL_METHOD,
|
|
64
|
+
OP.NEW,
|
|
65
|
+
OP.THROW,
|
|
66
|
+
]);
|
|
67
|
+
if (nonTerminalExcluded.has(op)) return false;
|
|
68
|
+
}
|
|
69
|
+
return originalOpToName.has(op); // Only original Ops are eligible (specialized disallowed)
|
|
56
70
|
}
|
|
57
71
|
|
|
58
72
|
// Collect every opcode value already in use so we can find free slots.
|
|
@@ -72,13 +86,15 @@ export function macroOpcodes(
|
|
|
72
86
|
let valid = true;
|
|
73
87
|
for (let j = 0; j < len; j++) {
|
|
74
88
|
const op = bc[i + j][0];
|
|
75
|
-
|
|
89
|
+
const isLast = j === len - 1;
|
|
90
|
+
if (!isEligible(op, compiler, isLast)) {
|
|
76
91
|
valid = false;
|
|
77
92
|
break;
|
|
78
93
|
}
|
|
79
94
|
ops.push(op as number);
|
|
80
95
|
}
|
|
81
|
-
// If position (i+j) is ineligible, longer windows from
|
|
96
|
+
// If position (i+j) is ineligible even as a terminal, longer windows from
|
|
97
|
+
// i are also invalid (it would be non-terminal there too).
|
|
82
98
|
if (!valid) break;
|
|
83
99
|
|
|
84
100
|
const key = ops.join(",");
|
|
@@ -135,7 +151,8 @@ export function macroOpcodes(
|
|
|
135
151
|
for (let j = 0; j < len; j++) {
|
|
136
152
|
const instr = bc[i + j];
|
|
137
153
|
const op = instr[0];
|
|
138
|
-
|
|
154
|
+
const isLast = j === len - 1;
|
|
155
|
+
if (!isEligible(op, compiler, isLast)) {
|
|
139
156
|
valid = false;
|
|
140
157
|
break;
|
|
141
158
|
}
|
|
@@ -1,37 +1,97 @@
|
|
|
1
1
|
import type * as b from "../../types.ts";
|
|
2
|
-
import { SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
2
|
+
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
|
+
import { getRandomInt } from "../../utils/random-utils.ts";
|
|
4
|
+
import { U16_MAX } from "../../utils/op-utils.ts";
|
|
3
5
|
|
|
4
|
-
//
|
|
5
|
-
//
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
|
|
6
|
+
// Encrypt a string with a position-dependent XOR key (u16) then base64-encode.
|
|
7
|
+
//
|
|
8
|
+
// Each char code is XOR'd with ((key + i) & 0xFFFF), producing a u16 value.
|
|
9
|
+
// The u16 values are packed as little-endian byte pairs (matching decodeBytecode),
|
|
10
|
+
// then base64-encoded so the stored constant is always safe ASCII — no raw Unicode
|
|
11
|
+
// surrogates, control chars, or quote chars that would break JS string literals.
|
|
12
|
+
function concealString(s: string, key: number): string {
|
|
13
|
+
const bytes = new Uint8Array(s.length * 2);
|
|
14
|
+
for (let i = 0; i < s.length; i++) {
|
|
15
|
+
const code = s.charCodeAt(i) ^ ((key + i) & 0xffff);
|
|
16
|
+
bytes[i * 2] = code & 0xff;
|
|
17
|
+
bytes[i * 2 + 1] = (code >> 8) & 0xff;
|
|
18
|
+
}
|
|
19
|
+
return Buffer.from(bytes).toString("base64");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Resolve all {type:"constant", value} operands to a PAIR of integer operands:
|
|
23
|
+
// [constPoolIndex, concealKey]
|
|
24
|
+
//
|
|
25
|
+
// constPoolIndex — index into the constants array (as before).
|
|
26
|
+
// concealKey — XOR key used to conceal this constant.
|
|
27
|
+
// 0 means no concealment (concealConstants is off, or the
|
|
28
|
+
// value type is not concealable: null, undefined, bool, float…).
|
|
29
|
+
//
|
|
30
|
+
// The constants array stores the CONCEALED value when key != 0.
|
|
31
|
+
// The runtime's _readConstant(idx, key) reverses the concealment on the fly.
|
|
32
|
+
//
|
|
33
|
+
// Both slots are u16; all existing operand serialization handles them identically.
|
|
34
|
+
export function resolveConstants(
|
|
35
|
+
bc: b.Bytecode,
|
|
36
|
+
compiler: Compiler,
|
|
37
|
+
): {
|
|
9
38
|
bytecode: b.Bytecode;
|
|
10
39
|
constants: any[];
|
|
11
40
|
} {
|
|
12
41
|
const constants: any[] = [];
|
|
13
|
-
const constantsMap = new Map<any, number>();
|
|
42
|
+
const constantsMap = new Map<any, number>(); // original value → pool index
|
|
43
|
+
const keyMap = new Map<number, number>(); // pool index → conceal key
|
|
14
44
|
|
|
15
|
-
function intern(operand: b.InstrOperand): b.
|
|
45
|
+
function intern(operand: b.InstrOperand): [b.InstrOperand, number] {
|
|
16
46
|
const operandAsObject =
|
|
17
47
|
typeof operand === "object" && operand ? operand : {};
|
|
18
|
-
|
|
19
48
|
const value = (operand as any).value;
|
|
20
49
|
|
|
21
50
|
let idx = constantsMap.get(value);
|
|
51
|
+
let key = 0;
|
|
52
|
+
|
|
22
53
|
if (typeof idx !== "number") {
|
|
23
54
|
idx = constants.length;
|
|
24
55
|
constantsMap.set(value, idx);
|
|
25
|
-
|
|
56
|
+
|
|
57
|
+
if (compiler.options.concealConstants && typeof value === "string") {
|
|
58
|
+
// Strings: position-dependent XOR. Key must be >= 1.
|
|
59
|
+
key = getRandomInt(1, U16_MAX);
|
|
60
|
+
constants.push(concealString(value, key));
|
|
61
|
+
} else if (
|
|
62
|
+
compiler.options.concealConstants &&
|
|
63
|
+
typeof value === "number" &&
|
|
64
|
+
Number.isInteger(value)
|
|
65
|
+
) {
|
|
66
|
+
// Integers: simple XOR. Result is still a valid JS integer.
|
|
67
|
+
key = getRandomInt(1, U16_MAX);
|
|
68
|
+
constants.push(value ^ key);
|
|
69
|
+
} else {
|
|
70
|
+
// Not concealable (null, undefined, boolean, float, RegExp…) or option off.
|
|
71
|
+
key = 0;
|
|
72
|
+
constants.push(value);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
keyMap.set(idx, key);
|
|
76
|
+
} else {
|
|
77
|
+
// Reuse existing pool entry — same key that was assigned on first intern.
|
|
78
|
+
key = keyMap.get(idx)!;
|
|
26
79
|
}
|
|
27
80
|
|
|
28
|
-
const
|
|
29
|
-
...operandAsObject,
|
|
81
|
+
const idxOperand: any = {
|
|
82
|
+
...(operandAsObject as object),
|
|
30
83
|
type: "number",
|
|
31
84
|
resolvedValue: idx,
|
|
32
85
|
};
|
|
33
86
|
|
|
34
|
-
|
|
87
|
+
const keyOperand: any = {
|
|
88
|
+
...(operandAsObject as object),
|
|
89
|
+
type: "number",
|
|
90
|
+
resolvedValue: key,
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
// key is a plain u16 number — no wrapping needed.
|
|
94
|
+
return [idxOperand, keyOperand];
|
|
35
95
|
}
|
|
36
96
|
|
|
37
97
|
const resolved: b.Bytecode = [];
|
|
@@ -47,9 +107,20 @@ export function resolveConstants(bc: b.Bytecode): {
|
|
|
47
107
|
);
|
|
48
108
|
|
|
49
109
|
if (hasConstant) {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
)
|
|
110
|
+
// 1-to-2 expansion: each {type:"constant"} becomes [constIdx, concealKey].
|
|
111
|
+
const newOperands: b.InstrOperand[] = [];
|
|
112
|
+
for (const operand of operands) {
|
|
113
|
+
if ((operand as any)?.type === "constant") {
|
|
114
|
+
const [idxOperand, key] = intern(operand);
|
|
115
|
+
|
|
116
|
+
const newOperand = (operand as any)?.key ? key : idxOperand;
|
|
117
|
+
|
|
118
|
+
newOperands.push(newOperand);
|
|
119
|
+
// newOperands.push(key); // plain number — serialized as a regular u16 slot
|
|
120
|
+
} else {
|
|
121
|
+
newOperands.push(operand);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
53
124
|
const newInstr = [op, ...newOperands] as b.Instruction;
|
|
54
125
|
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
55
126
|
resolved.push(newInstr);
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { Bytecode, Instruction } from "../../types.ts";
|
|
2
2
|
import { Compiler } from "../../compiler.ts";
|
|
3
|
-
import { choice } from "
|
|
3
|
+
import { choice } from "../../utils/random-utils.ts";
|
|
4
|
+
import { getInstructionSize } from "../../utils/op-utils.ts";
|
|
4
5
|
|
|
5
6
|
export function selfModifying(
|
|
6
7
|
bc: Bytecode,
|
|
@@ -80,8 +81,7 @@ export function selfModifying(
|
|
|
80
81
|
|
|
81
82
|
// Flat size of the body (each instruction occupies instr.length slots).
|
|
82
83
|
const bodyFlatSize = body.reduce(
|
|
83
|
-
(acc, instr) =>
|
|
84
|
-
acc + instr.filter((x) => (x as any)?.placeholder !== true).length,
|
|
84
|
+
(acc, instr) => acc + getInstructionSize(instr),
|
|
85
85
|
0,
|
|
86
86
|
);
|
|
87
87
|
|
|
@@ -1,16 +1,29 @@
|
|
|
1
1
|
import type { Bytecode, InstrOperand, Instruction } from "../../types.ts";
|
|
2
2
|
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
getInstructionSize,
|
|
5
|
+
nextFreeSlot,
|
|
6
|
+
U16_MAX,
|
|
7
|
+
} from "../../utils/op-utils.ts";
|
|
4
8
|
|
|
5
9
|
// Creates specialized opcodes for the most frequent (OPCODE + single_integer_operand) pairs.
|
|
6
10
|
// Example: [OP.LOAD_CONST, 1] becomes [SPECIALIZED_LOAD_CONST_1].
|
|
7
11
|
// Only instructions with *exactly one numeric operand* are considered.
|
|
8
|
-
// MAKE_CLOSURE and
|
|
12
|
+
// MAKE_CLOSURE and other N-sized instructions cannot be specialized
|
|
9
13
|
// Runs after selfModifying but before resolveLabels (operands stay plain numbers).
|
|
10
14
|
export function specializedOpcodes(
|
|
11
15
|
bc: Bytecode,
|
|
12
16
|
compiler: Compiler,
|
|
13
17
|
): { bytecode: Bytecode } {
|
|
18
|
+
const disallowedOps = new Set([
|
|
19
|
+
compiler.OP.MAKE_CLOSURE,
|
|
20
|
+
compiler.OP.BUILD_ARRAY,
|
|
21
|
+
compiler.OP.BUILD_OBJECT,
|
|
22
|
+
compiler.OP.CALL,
|
|
23
|
+
compiler.OP.CALL_METHOD,
|
|
24
|
+
compiler.OP.NEW,
|
|
25
|
+
]);
|
|
26
|
+
|
|
14
27
|
// ── Collect used opcodes exactly as specified ─────────────────────────────
|
|
15
28
|
const usedOpcodes = new Set<number>(
|
|
16
29
|
Object.keys(compiler.OP_NAME)
|
|
@@ -23,30 +36,38 @@ export function specializedOpcodes(
|
|
|
23
36
|
// ── Step 1: count frequency of eligible (op, operand) pairs ───────────────
|
|
24
37
|
const freqMap = new Map<
|
|
25
38
|
string,
|
|
26
|
-
{
|
|
39
|
+
{
|
|
40
|
+
op: number;
|
|
41
|
+
operands: InstrOperand[];
|
|
42
|
+
operandsKey: string;
|
|
43
|
+
occurences: number;
|
|
44
|
+
}
|
|
27
45
|
>();
|
|
28
46
|
|
|
29
47
|
for (const instr of bc) {
|
|
30
48
|
const op = instr[0];
|
|
31
|
-
if (op === null || op
|
|
49
|
+
if (op === null || disallowedOps.has(op)) continue;
|
|
50
|
+
|
|
51
|
+
// Only supports between 1-6 operands
|
|
52
|
+
const operandCount = getInstructionSize(instr) - 1;
|
|
53
|
+
if (operandCount < 1 || operandCount > 6) continue;
|
|
32
54
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
const operand = instr[1];
|
|
55
|
+
const operands = instr.slice(1);
|
|
56
|
+
const operandsKey = JSON.stringify(operands);
|
|
36
57
|
|
|
37
|
-
const key = `${op},${
|
|
58
|
+
const key = `${op},${operandsKey}`;
|
|
38
59
|
const entry = freqMap.get(key);
|
|
39
60
|
if (entry) {
|
|
40
|
-
entry.
|
|
61
|
+
entry.occurences++;
|
|
41
62
|
} else {
|
|
42
|
-
freqMap.set(key, { op,
|
|
63
|
+
freqMap.set(key, { op, operands, operandsKey, occurences: 1 });
|
|
43
64
|
}
|
|
44
65
|
}
|
|
45
66
|
|
|
46
67
|
// ── Step 2: keep combinations that appear >= 2 times, sort by frequency ───
|
|
47
68
|
const candidates = Array.from(freqMap.values())
|
|
48
|
-
.filter((e) => e.
|
|
49
|
-
.sort((a, b) => b.
|
|
69
|
+
.filter((e) => e.occurences >= 1)
|
|
70
|
+
.sort((a, b) => b.occurences - a.occurences);
|
|
50
71
|
|
|
51
72
|
if (candidates.length === 0) return { bytecode: bc };
|
|
52
73
|
|
|
@@ -57,16 +78,17 @@ export function specializedOpcodes(
|
|
|
57
78
|
for (let i = 0; i < candidates.length; i++) {
|
|
58
79
|
const specialOp = nextFreeSlot(usedOpcodes);
|
|
59
80
|
if (specialOp === -1) break;
|
|
60
|
-
const { op: originalOp,
|
|
81
|
+
const { op: originalOp, operands, operandsKey } = candidates[i];
|
|
61
82
|
|
|
62
|
-
const key = `${originalOp},${
|
|
83
|
+
const key = `${originalOp},${operandsKey}`;
|
|
63
84
|
sigToSpecial.set(key, specialOp);
|
|
64
85
|
|
|
65
|
-
specializedOps[specialOp] = { originalOp,
|
|
86
|
+
specializedOps[specialOp] = { originalOp, operands };
|
|
66
87
|
|
|
67
88
|
// Register a human-readable name for disassembly / debugging
|
|
68
89
|
const originalName = compiler.OP_NAME[originalOp] ?? `OP_${originalOp}`;
|
|
69
|
-
compiler.OP_NAME[specialOp] =
|
|
90
|
+
compiler.OP_NAME[specialOp] =
|
|
91
|
+
`${originalName}_${JSON.stringify(operandsKey)}`;
|
|
70
92
|
}
|
|
71
93
|
|
|
72
94
|
// Store mapping so the interpreter knows how to dispatch the specialized op
|
|
@@ -77,18 +99,25 @@ export function specializedOpcodes(
|
|
|
77
99
|
|
|
78
100
|
for (const instr of bc) {
|
|
79
101
|
const op = instr[0];
|
|
80
|
-
// Only consider instructions with
|
|
81
|
-
if (op === null || instr.length
|
|
102
|
+
// Only consider instructions with one or more operands
|
|
103
|
+
if (op === null || instr.length <= 1 || op === compiler.OP.MAKE_CLOSURE) {
|
|
82
104
|
result.push(instr);
|
|
83
105
|
continue;
|
|
84
106
|
}
|
|
85
107
|
|
|
86
|
-
const
|
|
87
|
-
const
|
|
108
|
+
const operands = instr.slice(1);
|
|
109
|
+
const operandsKey = JSON.stringify(operands);
|
|
88
110
|
|
|
89
|
-
|
|
90
|
-
|
|
111
|
+
const key = `${op},${operandsKey}`;
|
|
112
|
+
|
|
113
|
+
const specialOpCode = sigToSpecial.get(key)!;
|
|
114
|
+
|
|
115
|
+
if (!specialOpCode) {
|
|
116
|
+
result.push(instr);
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
91
119
|
|
|
120
|
+
const newOperands = operands.map((operand) => {
|
|
92
121
|
const operandAsObject =
|
|
93
122
|
typeof operand === "object" && operand
|
|
94
123
|
? operand
|
|
@@ -103,15 +132,15 @@ export function specializedOpcodes(
|
|
|
103
132
|
placeholder: true,
|
|
104
133
|
} as any as InstrOperand;
|
|
105
134
|
|
|
106
|
-
|
|
135
|
+
return newOperand;
|
|
136
|
+
});
|
|
107
137
|
|
|
108
|
-
|
|
109
|
-
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
138
|
+
const newInstr: Instruction = [specialOpCode, ...newOperands];
|
|
110
139
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
140
|
+
// Preserve source-node information for error reporting
|
|
141
|
+
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
142
|
+
|
|
143
|
+
result.push(newInstr);
|
|
115
144
|
}
|
|
116
145
|
|
|
117
146
|
return { bytecode: result };
|