js-confuser-vm 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +58 -3
  2. package/README.MD +186 -107
  3. package/dist/build-runtime.js +59 -0
  4. package/dist/compiler.js +1777 -0
  5. package/dist/index.js +10 -0
  6. package/dist/minify.js +18 -0
  7. package/dist/options.js +1 -0
  8. package/dist/runtime.js +826 -0
  9. package/dist/transforms/bytecode/aliasedOpcodes.js +140 -0
  10. package/dist/transforms/bytecode/concealConstants.js +31 -0
  11. package/dist/transforms/bytecode/macroOpcodes.js +164 -0
  12. package/dist/transforms/bytecode/resolveContants.js +106 -0
  13. package/dist/transforms/bytecode/resolveLabels.js +80 -0
  14. package/dist/transforms/bytecode/selfModifying.js +108 -0
  15. package/dist/transforms/bytecode/specializedOpcodes.js +113 -0
  16. package/dist/transforms/runtime/aliasedOpcodes.js +134 -0
  17. package/dist/transforms/runtime/macroOpcodes.js +88 -0
  18. package/dist/transforms/runtime/minify.js +1 -0
  19. package/dist/transforms/runtime/shuffleOpcodes.js +20 -0
  20. package/dist/transforms/runtime/specializedOpcodes.js +107 -0
  21. package/{src/transforms/utils/op-utils.ts → dist/transforms/utils/op-utils.js} +25 -26
  22. package/dist/transforms/utils/random-utils.js +27 -0
  23. package/dist/types.js +15 -0
  24. package/dist/utils/op-utils.js +29 -0
  25. package/dist/utils/random-utils.js +27 -0
  26. package/dist/utilts.js +3 -0
  27. package/index.ts +10 -8
  28. package/jest.config.js +10 -0
  29. package/package.json +3 -4
  30. package/src/build-runtime.ts +7 -1
  31. package/src/compiler.ts +2395 -2069
  32. package/src/options.ts +2 -0
  33. package/src/runtime.ts +838 -771
  34. package/src/transforms/bytecode/aliasedOpcodes.ts +158 -0
  35. package/src/transforms/bytecode/concealConstants.ts +52 -0
  36. package/src/transforms/bytecode/macroOpcodes.ts +32 -15
  37. package/src/transforms/bytecode/resolveContants.ts +87 -16
  38. package/src/transforms/bytecode/selfModifying.ts +3 -3
  39. package/src/transforms/bytecode/specializedOpcodes.ts +58 -29
  40. package/src/transforms/runtime/aliasedOpcodes.ts +191 -0
  41. package/src/transforms/runtime/shuffleOpcodes.ts +1 -1
  42. package/src/transforms/runtime/specializedOpcodes.ts +39 -24
  43. package/src/utils/op-utils.ts +33 -0
  44. /package/src/{transforms/utils → utils}/random-utils.ts +0 -0
@@ -0,0 +1,140 @@
1
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
2
+ import { nextFreeSlot, U16_MAX } from "../../utils/op-utils.js";
3
+ import { shuffle } from "../../utils/random-utils.js";
4
+
5
+ // Opcodes that must not be aliased.
6
+ // Variable-length operand opcodes cannot be statically aliased since the
7
+ // number of this._operand() calls varies at runtime.
8
+ // Infrastructure opcodes (PATCH, TRY_SETUP, TRY_END, DEBUGGER) are excluded
9
+ // because aliasing them would interfere with self-modifying bytecode and
10
+ // exception-handling machinery.
11
+ const DISALLOWED_OP_NAMES = new Set(["MAKE_CLOSURE", "BUILD_ARRAY", "BUILD_OBJECT", "CALL", "CALL_METHOD", "NEW", "PATCH", "TRY_SETUP", "TRY_END", "DEBUGGER"]);
12
+
13
+ // Creates aliased opcodes: duplicate handlers for commonly-used opcodes,
14
+ // optionally with a permuted operand read order in the bytecode stream.
15
+ //
16
+ // For each aliased op, we record an `order` permutation of length `arity`.
17
+ // order[i] = j means: bytecode slot i holds what was originally operand j.
18
+ //
19
+ // Example: LOAD_GLOBAL [dst, nameIdx] with order=[1,0]:
20
+ // Bytecode stores: [ALIAS_OP, nameIdx, dst]
21
+ // Handler reads: _unsortedOperands = [nameIdx, dst]
22
+ // _operands = [_unsortedOperands[1], _unsortedOperands[0]]
23
+ // = [dst, nameIdx] ← original order restored
24
+ //
25
+ // Runs LAST among bytecode transforms (after selfModifying), before resolveLabels.
26
+ export function aliasedOpcodes(bc, compiler) {
27
+ // Build a map of base opcode value → name, excluding disallowed ops
28
+ const baseOpValueToName = new Map();
29
+ for (const [name, val] of Object.entries(compiler.OP)) {
30
+ if (DISALLOWED_OP_NAMES.has(name)) continue;
31
+ baseOpValueToName.set(val, name);
32
+ }
33
+
34
+ // Collect all currently used opcode slots (base + any dynamically assigned)
35
+ const usedOpcodes = new Set(Object.keys(compiler.OP_NAME).map(k => parseInt(k, 10)).filter(v => !isNaN(v)));
36
+ if (usedOpcodes.size > U16_MAX) return {
37
+ bytecode: bc
38
+ };
39
+
40
+ // ── Step 1: count frequency and determine arity for each eligible base opcode ─
41
+ // We scan the actual post-transform bytecode so frequency reflects what's
42
+ // really left (specialized/macro ops already consumed their share).
43
+ const opStats = new Map();
44
+ for (const instr of bc) {
45
+ const op = instr[0];
46
+ if (op === null || !baseOpValueToName.has(op)) continue;
47
+ const arity = instr.length - 1;
48
+ if (arity < 1) continue; // 0-operand opcodes have nothing to permute
49
+
50
+ const existing = opStats.get(op);
51
+ if (!existing) {
52
+ opStats.set(op, {
53
+ freq: 1,
54
+ arity
55
+ });
56
+ } else {
57
+ if (existing.arity !== arity) {
58
+ // Inconsistent arity → variable-length; skip
59
+ existing.arity = null;
60
+ }
61
+ existing.freq++;
62
+ }
63
+ }
64
+
65
+ // ── Step 2: sort by frequency descending, keep only consistent-arity ops ────
66
+ const candidates = Array.from(opStats.entries()).filter(([, s]) => s.arity !== null).sort(([, a], [, b]) => b.freq - a.freq);
67
+ if (candidates.length === 0) return {
68
+ bytecode: bc
69
+ };
70
+
71
+ // ── Step 3: assign free slots, build order permutations ─────────────────────
72
+ // aliasMap: originalOp → aliasOp (only the winning alias per original op)
73
+ const aliasMap = new Map();
74
+ const aliasedOps = {};
75
+ for (const [originalOp, stats] of candidates) {
76
+ const aliasOp = nextFreeSlot(usedOpcodes);
77
+ if (aliasOp === -1) break;
78
+ const arity = stats.arity;
79
+
80
+ // Build a permutation of [0 .. arity-1].
81
+ // For arity >= 2: shuffle until we get a non-identity permutation so the
82
+ // operand order is actually different (makes the alias more confusing).
83
+ // For arity == 1: only one permutation exists ([0]); still useful as a clone.
84
+ let order;
85
+ if (arity >= 2) {
86
+ const identity = Array.from({
87
+ length: arity
88
+ }, (_, i) => i);
89
+ let attempts = 0;
90
+ do {
91
+ order = shuffle([...identity]);
92
+ attempts++;
93
+ } while (attempts < 20 && order.every((v, i) => v === i));
94
+ } else {
95
+ order = [0];
96
+ }
97
+ aliasMap.set(originalOp, aliasOp);
98
+ aliasedOps[aliasOp] = {
99
+ originalOp,
100
+ order
101
+ };
102
+ const originalName = compiler.OP_NAME[originalOp] ?? `OP_${originalOp}`;
103
+ compiler.OP_NAME[aliasOp] = `ALIAS_${originalName}_${order.join("_")}`;
104
+ }
105
+ compiler.ALIASED_OPS = aliasedOps;
106
+ if (aliasMap.size === 0) return {
107
+ bytecode: bc
108
+ };
109
+
110
+ // ── Step 4: rewrite bytecode ─────────────────────────────────────────────────
111
+ const result = [];
112
+ for (const instr of bc) {
113
+ const op = instr[0];
114
+ if (op === null || !aliasMap.has(op)) {
115
+ result.push(instr);
116
+ continue;
117
+ }
118
+ const aliasOp = aliasMap.get(op);
119
+ const {
120
+ order
121
+ } = aliasedOps[aliasOp];
122
+ const originalOperands = instr.slice(1);
123
+
124
+ // Guard: if arity changed (shouldn't happen after the consistency check),
125
+ // fall back to the original instruction.
126
+ if (originalOperands.length !== order.length) {
127
+ result.push(instr);
128
+ continue;
129
+ }
130
+
131
+ // Rearrange operands: new slot i receives original operand order[i].
132
+ const newOperands = order.map(i => originalOperands[i]);
133
+ const newInstr = [aliasOp, ...newOperands];
134
+ newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
135
+ result.push(newInstr);
136
+ }
137
+ return {
138
+ bytecode: result
139
+ };
140
+ }
@@ -0,0 +1,31 @@
1
+ export function concealConstants(bytecode, compiler) {
2
+ const newBytecode = [];
3
+ for (const instr of bytecode) {
4
+ const [op, ...operands] = instr;
5
+ const hasContant = operands.some(o => o !== undefined && o !== null && typeof o === "object" && o.type === "constant");
6
+ if (!hasContant) {
7
+ newBytecode.push(instr);
8
+ continue;
9
+ }
10
+ const newOperands = [];
11
+ for (const operand of operands) {
12
+ if (operand?.type === "constant") {
13
+ const tsOperand = operand;
14
+ newOperands.push(operand);
15
+ newOperands.push({
16
+ type: "constant",
17
+ value: tsOperand.value,
18
+ key: true
19
+ });
20
+ } else {
21
+ newOperands.push(operand);
22
+ }
23
+ }
24
+ instr.length = 0;
25
+ instr.push(op, ...newOperands);
26
+ newBytecode.push(instr);
27
+ }
28
+ return {
29
+ bytecode: newBytecode
30
+ };
31
+ }
@@ -0,0 +1,164 @@
1
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
2
+ import { nextFreeSlot, U16_MAX } from "../../utils/op-utils.js";
3
+
4
+ // Opcodes that must not appear in a non-terminal position inside a macro window.
5
+ // Jump ops: modifying frame._pc mid-execution causes the macro handler to
6
+ // run subsequent sub-bodies even after the jump already fired.
7
+ // Frame-changing ops (CALL, CALL_METHOD, NEW, RETURN, THROW): push/pop call
8
+ // frames mid-macro, leaving the `frame` variable stale for later sub-bodies.
9
+ // When one of these is the LAST instruction in the macro sequence there are no
10
+ // following sub-bodies, so editing _pc or the call frame is safe.
11
+ // Variable-operand ops (MAKE_CLOSURE): the number of _operand() calls depends
12
+ // on uvCount at runtime, so a static handler cannot be generated.
13
+ // Infrastructure ops (PATCH, TRY_SETUP, TRY_END, DEBUGGER):
14
+ // either illegal here or nonsensical to fold.
15
+
16
+ // Scan bytecode for repeating instruction sequences and fold them into
17
+ // macro opcodes. Runs after selfModifying but before resolveLabels so
18
+ // IR-ref operands (label/constant) are carried through transparently.
19
+ //
20
+ // Algorithm:
21
+ // 1. Count every eligible window of length 2–5 by its op-code signature.
22
+ // 2. Keep sequences that appear >= 2 times; sort by frequency then length.
23
+ // 3. Assign unused opcode values (0–255, not already claimed by compiler.OP)
24
+ // to the most-frequent candidates and store in compiler.MACRO_OPS.
25
+ // 4. Re-scan bytecode, replacing each matched sequence with a single
26
+ // multi-operand instruction:
27
+ // [macroOpCode, operands_of_instr_0..., operands_of_instr_1..., ...]
28
+ // The runtime macro handler inlines each sub-instruction body; those
29
+ // bodies call this._operand() themselves to consume the inline operands.
30
+ export function macroOpcodes(bc, compiler) {
31
+ const originalOpToName = new Map();
32
+ for (const name in compiler.OP) {
33
+ const opVal = compiler.OP[name];
34
+ originalOpToName.set(opVal, name);
35
+ }
36
+ function isEligible(op, compiler, isLast = false) {
37
+ if (op === null) return false;
38
+ const {
39
+ OP,
40
+ JUMP_OPS
41
+ } = compiler;
42
+ // Infrastructure and variable-length ops are never eligible.
43
+ const alwaysExcluded = new Set([OP.PATCH, OP.TRY_SETUP, OP.TRY_END, OP.DEBUGGER, OP.MAKE_CLOSURE // variable-length operands — cannot generate a static handler
44
+ ]);
45
+ if (alwaysExcluded.has(op)) return false;
46
+ // Jump and frame-changing ops are only eligible as the terminal instruction.
47
+ if (!isLast) {
48
+ if (JUMP_OPS.has(op)) return false;
49
+ const nonTerminalExcluded = new Set([OP.RETURN, OP.CALL, OP.CALL_METHOD, OP.NEW, OP.THROW]);
50
+ if (nonTerminalExcluded.has(op)) return false;
51
+ }
52
+ return originalOpToName.has(op); // Only original Ops are eligible (specialized disallowed)
53
+ }
54
+
55
+ // Collect every opcode value already in use so we can find free slots.
56
+ const usedOpcodes = new Set(Object.values(compiler.OP).filter(v => v !== undefined));
57
+ if (usedOpcodes.size > U16_MAX) return {
58
+ bytecode: bc
59
+ };
60
+
61
+ // ── Step 1: count window frequencies ──────────────────────────────────────
62
+ const freqMap = new Map();
63
+ for (let i = 0; i < bc.length; i++) {
64
+ for (let len = 2; len <= 5; len++) {
65
+ if (i + len > bc.length) break;
66
+ const ops = [];
67
+ let valid = true;
68
+ for (let j = 0; j < len; j++) {
69
+ const op = bc[i + j][0];
70
+ const isLast = j === len - 1;
71
+ if (!isEligible(op, compiler, isLast)) {
72
+ valid = false;
73
+ break;
74
+ }
75
+ ops.push(op);
76
+ }
77
+ // If position (i+j) is ineligible even as a terminal, longer windows from
78
+ // i are also invalid (it would be non-terminal there too).
79
+ if (!valid) break;
80
+ const key = ops.join(",");
81
+ const entry = freqMap.get(key);
82
+ if (entry) {
83
+ entry.count++;
84
+ } else {
85
+ freqMap.set(key, {
86
+ ops,
87
+ count: 1
88
+ });
89
+ }
90
+ }
91
+ }
92
+
93
+ // ── Step 2: keep repeated candidates, prioritise by frequency then length ─
94
+ const candidates = Array.from(freqMap.values()).filter(e => e.count >= 2).sort((a, b) => b.count - a.count || b.ops.length - a.ops.length);
95
+ if (candidates.length === 0) return {
96
+ bytecode: bc
97
+ };
98
+
99
+ // ── Step 3: assign free opcode slots to the best candidates ───────────────
100
+ for (let i = 0; i < candidates.length; i++) {
101
+ const macroOp = nextFreeSlot(usedOpcodes);
102
+ if (macroOp === -1) break;
103
+ const ops = candidates[i].ops;
104
+ compiler.MACRO_OPS[macroOp] = ops;
105
+ // Register a combined name so OP_NAME and comment generation both work.
106
+ let combinedName = ops.map(v => compiler.OP_NAME[v] ?? `OP_${v}`).join(",");
107
+ compiler.OP_NAME[macroOp] = combinedName;
108
+ }
109
+
110
+ // ── Step 4: build signature → macro opcode lookup ─────────────────────────
111
+ const sigToMacro = new Map();
112
+ for (const [macroOpStr, ops] of Object.entries(compiler.MACRO_OPS)) {
113
+ sigToMacro.set(ops.join(","), Number(macroOpStr));
114
+ }
115
+
116
+ // ── Step 5: replace sequences with a single multi-operand macro instruction ─
117
+ // Emit [macroOpCode, ...all operands from all constituent instructions].
118
+ // The runtime handler inlines each sub-instruction body; those bodies call
119
+ // this._operand() themselves to consume the operands in order.
120
+ const result = [];
121
+ let i = 0;
122
+ while (i < bc.length) {
123
+ let matched = false;
124
+ for (let len = 5; len >= 2; len--) {
125
+ if (i + len > bc.length) continue;
126
+ const instructions = [];
127
+ let valid = true;
128
+ for (let j = 0; j < len; j++) {
129
+ const instr = bc[i + j];
130
+ const op = instr[0];
131
+ const isLast = j === len - 1;
132
+ if (!isEligible(op, compiler, isLast)) {
133
+ valid = false;
134
+ break;
135
+ }
136
+ instructions.push(instr);
137
+ }
138
+ if (!valid) continue;
139
+ const key = instructions.map(instr => instr[0]).join(",");
140
+ if (!sigToMacro.has(key)) continue;
141
+ const macroOpCode = sigToMacro.get(key);
142
+
143
+ // Collect all operands from every constituent instruction, in order.
144
+ // Each instruction contributes instr.slice(1) — zero or more operands.
145
+ const allOperands = [];
146
+ for (let j = 0; j < len; j++) {
147
+ allOperands.push(...bc[i + j].slice(1));
148
+ }
149
+ const newInstr = [macroOpCode, ...allOperands];
150
+ newInstr[SOURCE_NODE_SYM] = instructions[0][SOURCE_NODE_SYM];
151
+ result.push(newInstr);
152
+ i += len;
153
+ matched = true;
154
+ break;
155
+ }
156
+ if (!matched) {
157
+ result.push(bc[i]);
158
+ i++;
159
+ }
160
+ }
161
+ return {
162
+ bytecode: result
163
+ };
164
+ }
@@ -0,0 +1,106 @@
1
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
2
+ import { getRandomInt } from "../../utils/random-utils.js";
3
+ import { U16_MAX } from "../../utils/op-utils.js";
4
+
5
+ // Encrypt a string with a position-dependent XOR key (u16) then base64-encode.
6
+ //
7
+ // Each char code is XOR'd with ((key + i) & 0xFFFF), producing a u16 value.
8
+ // The u16 values are packed as little-endian byte pairs (matching decodeBytecode),
9
+ // then base64-encoded so the stored constant is always safe ASCII — no raw Unicode
10
+ // surrogates, control chars, or quote chars that would break JS string literals.
11
+ function concealString(s, key) {
12
+ const bytes = new Uint8Array(s.length * 2);
13
+ for (let i = 0; i < s.length; i++) {
14
+ const code = s.charCodeAt(i) ^ key + i & 0xffff;
15
+ bytes[i * 2] = code & 0xff;
16
+ bytes[i * 2 + 1] = code >> 8 & 0xff;
17
+ }
18
+ return Buffer.from(bytes).toString("base64");
19
+ }
20
+
21
+ // Resolve all {type:"constant", value} operands to a PAIR of integer operands:
22
+ // [constPoolIndex, concealKey]
23
+ //
24
+ // constPoolIndex — index into the constants array (as before).
25
+ // concealKey — XOR key used to conceal this constant.
26
+ // 0 means no concealment (concealConstants is off, or the
27
+ // value type is not concealable: null, undefined, bool, float…).
28
+ //
29
+ // The constants array stores the CONCEALED value when key != 0.
30
+ // The runtime's _readConstant(idx, key) reverses the concealment on the fly.
31
+ //
32
+ // Both slots are u16; all existing operand serialization handles them identically.
33
+ export function resolveConstants(bc, compiler) {
34
+ const constants = [];
35
+ const constantsMap = new Map(); // original value → pool index
36
+ const keyMap = new Map(); // pool index → conceal key
37
+
38
+ function intern(operand) {
39
+ const operandAsObject = typeof operand === "object" && operand ? operand : {};
40
+ const value = operand.value;
41
+ let idx = constantsMap.get(value);
42
+ let key = 0;
43
+ if (typeof idx !== "number") {
44
+ idx = constants.length;
45
+ constantsMap.set(value, idx);
46
+ if (compiler.options.concealConstants && typeof value === "string") {
47
+ // Strings: position-dependent XOR. Key must be >= 1.
48
+ key = getRandomInt(1, U16_MAX);
49
+ constants.push(concealString(value, key));
50
+ } else if (compiler.options.concealConstants && typeof value === "number" && Number.isInteger(value)) {
51
+ // Integers: simple XOR. Result is still a valid JS integer.
52
+ key = getRandomInt(1, U16_MAX);
53
+ constants.push(value ^ key);
54
+ } else {
55
+ // Not concealable (null, undefined, boolean, float, RegExp…) or option off.
56
+ key = 0;
57
+ constants.push(value);
58
+ }
59
+ keyMap.set(idx, key);
60
+ } else {
61
+ // Reuse existing pool entry — same key that was assigned on first intern.
62
+ key = keyMap.get(idx);
63
+ }
64
+ const idxOperand = {
65
+ ...operandAsObject,
66
+ type: "number",
67
+ resolvedValue: idx
68
+ };
69
+ const keyOperand = {
70
+ ...operandAsObject,
71
+ type: "number",
72
+ resolvedValue: key
73
+ };
74
+
75
+ // key is a plain u16 number — no wrapping needed.
76
+ return [idxOperand, keyOperand];
77
+ }
78
+ const resolved = [];
79
+ for (const instr of bc) {
80
+ const [op, ...operands] = instr;
81
+ const hasConstant = operands.some(o => o !== undefined && o !== null && typeof o === "object" && o.type === "constant");
82
+ if (hasConstant) {
83
+ // 1-to-2 expansion: each {type:"constant"} becomes [constIdx, concealKey].
84
+ const newOperands = [];
85
+ for (const operand of operands) {
86
+ if (operand?.type === "constant") {
87
+ const [idxOperand, key] = intern(operand);
88
+ const newOperand = operand?.key ? key : idxOperand;
89
+ newOperands.push(newOperand);
90
+ // newOperands.push(key); // plain number — serialized as a regular u16 slot
91
+ } else {
92
+ newOperands.push(operand);
93
+ }
94
+ }
95
+ const newInstr = [op, ...newOperands];
96
+ newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
97
+ resolved.push(newInstr);
98
+ } else {
99
+ resolved.push(instr);
100
+ }
101
+ }
102
+ return {
103
+ bytecode: resolved,
104
+ constants
105
+ };
106
+ }
@@ -0,0 +1,80 @@
1
+ // --- Label IR ---
2
+ // During compilation, jump targets are symbolic labels instead of hard-coded
3
+ // PC numbers. Two IR "pseudo operands" carry the label information:
4
+ //
5
+ // defineLabel operand : [null, {type:"defineLabel", label:"FN_ENTRY_1"}]
6
+ // Marks a position in the bytecode array.
7
+ // resolveLabels() strips these out entirely.
8
+ //
9
+ // label ref operand : [OP.JUMP, {type:"label", label:"FN_ENTRY_1"}]
10
+ // Used as the operand of any jump instruction. resolveLabels() replaces
11
+ // it with the integer PC that the corresponding defineLabel resolves to.
12
+ //
13
+ // The output bytecode is still a nested array of instructions.
14
+ // Flattening (one u16 slot per op, one per operand) happens in the Serializer.
15
+ // PC values computed here reflect the FLAT slot index so that jump targets,
16
+ // startPc, and LOAD_INT label operands are all correct after flattening.
17
+
18
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
19
+
20
+ // Resolve symbolic labels to absolute flat-PC indices within a bytecode array.
21
+ // defineLabel pseudo-instructions are stripped; label-ref operands become ints.
22
+ // Each instruction [op, ...operands] occupies (1 + operands.length) flat slots,
23
+ // so realPc advances by instr.length for every non-pseudo instruction.
24
+ export function resolveLabels(bc, compiler) {
25
+ // Pass 1 – walk the array and record each label's flat PC, counting
26
+ // real instructions by their full flat width (1 op + N operands).
27
+ const labelToPc = new Map();
28
+ let realPc = 0;
29
+ for (const instr of bc) {
30
+ const op = instr[0];
31
+ const operand = instr[1];
32
+ if (op === null && operand !== null && typeof operand === "object" && operand.type === "defineLabel") {
33
+ labelToPc.set(operand.label, realPc);
34
+ } else {
35
+ // Each instruction occupies 1 slot for the opcode + 1 per operand.
36
+ // IMPORTANT: 'placeholder' operands are not counted
37
+ realPc += instr.filter(x => x?.placeholder !== true).length;
38
+ }
39
+ }
40
+
41
+ // Pass 2 – build the resolved instruction list.
42
+ // Label refs may appear at any operand position, so scan all of them.
43
+ const resolved = [];
44
+ for (const instr of bc) {
45
+ const [op, ...operands] = instr;
46
+
47
+ // Strip defineLabel pseudo-ops.
48
+ if (op === null && typeof operands[0] === "object" && operands[0]?.type === "defineLabel") {
49
+ continue;
50
+ }
51
+
52
+ // Replace label-ref operands with their resolved flat PC (any position).
53
+ const newOperands = operands.map(operand => {
54
+ if (operand !== undefined && operand !== null && typeof operand === "object" && operand.type === "label") {
55
+ const pc = labelToPc.get(operand.label);
56
+ if (pc === undefined) throw new Error(`Undefined label: ${operand.label}`);
57
+ var operandAsObject = typeof operand === "object" && operand ? operand : {};
58
+ const newOperand = {
59
+ ...operandAsObject,
60
+ // Preverse original operand properties
61
+ type: "number",
62
+ resolvedValue: pc + (operand.offset ?? 0)
63
+ };
64
+ return newOperand;
65
+ }
66
+ return operand;
67
+ });
68
+ const newInstr = [op, ...newOperands];
69
+ newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
70
+ resolved.push(newInstr);
71
+ }
72
+
73
+ // Patch each function descriptor's startPc now that labels are resolved.
74
+ for (const desc of compiler.fnDescriptors) {
75
+ desc.startPc = labelToPc.get(desc.startLabel) ?? labelToPc.get(desc.entryLabel);
76
+ }
77
+ return {
78
+ bytecode: resolved
79
+ };
80
+ }
@@ -0,0 +1,108 @@
1
+ import { choice } from "../../utils/random-utils.js";
2
+ import { getInstructionSize } from "../../utils/op-utils.js";
3
+ export function selfModifying(bc, compiler) {
4
+ // Walk the bytecode looking for "defineLabel" pseudo-ops, which start basic
5
+ // blocks. For each block we collect the body (instructions between the label
6
+ // and the next label/jump terminator), move it to the end of the bytecode
7
+ // under a fresh "patch_LXX" label, and replace it in-place with:
8
+ //
9
+ // defineLabel ("originalLabel") ← kept as-is (pseudo-op)
10
+ // PATCH destPc sliceStart sliceEnd ← 4 flat slots total
11
+ // Garbage Opcodes × bodyFlatSize ← placeholder slots
12
+ //
13
+ // PATCH reads three inline operands via _operand():
14
+ // destPc = originalLabel + 4 (first slot after PATCH's own 4 slots)
15
+ // sliceStart = patchLabel (flat PC of appended body)
16
+ // sliceEnd = patchLabel + bodyFlatSize
17
+ //
18
+ // On first execution PATCH copies bytecode[sliceStart..sliceEnd) over the
19
+ // placeholder region starting at destPc. Execution then falls through into
20
+ // the freshly-patched body. Subsequent calls are idempotent.
21
+
22
+ const {
23
+ OP,
24
+ JUMP_OPS
25
+ } = compiler;
26
+ const result = [];
27
+ const appended = [];
28
+ let patchCount = 0;
29
+ let i = 0;
30
+ while (i < bc.length) {
31
+ const instr = bc[i];
32
+ const [op, operand] = instr;
33
+
34
+ // Detect a defineLabel pseudo-op — start of a new basic block.
35
+ if (op === null && operand !== null && typeof operand === "object" && operand.type === "defineLabel") {
36
+ const originalLabel = operand.label;
37
+ result.push(instr); // keep the defineLabel marker
38
+ i++;
39
+
40
+ // Collect body: everything after the label until the next terminator.
41
+ let j = i;
42
+ while (j < bc.length) {
43
+ const [nextOp, nextOperand] = bc[j];
44
+
45
+ // Another defineLabel = boundary of the next block.
46
+ if (nextOp === null && typeof nextOperand === "object" && nextOperand?.type === "defineLabel") {
47
+ break;
48
+ }
49
+
50
+ // Jump instructions, RETURN all terminate the body.
51
+ if (nextOp !== null && (JUMP_OPS.has(nextOp) || nextOp === OP.RETURN)) {
52
+ break;
53
+ }
54
+ j++;
55
+ }
56
+ const body = bc.slice(i, j);
57
+ const N = body.length;
58
+ if (N === 0) {
59
+ // Nothing to transform — label is immediately followed by a terminator.
60
+ continue;
61
+ }
62
+ const patchLabel = `patch_${originalLabel}_${patchCount++}`;
63
+
64
+ // Flat size of the body (each instruction occupies instr.length slots).
65
+ const bodyFlatSize = body.reduce((acc, instr) => acc + getInstructionSize(instr), 0);
66
+
67
+ // ── PATCH instruction (4 flat slots: opcode + 3 operands) ───────────
68
+ // destPc = originalLabel + 4 (slot right after PATCH's 4 slots)
69
+ // sliceStart = patchLabel
70
+ // sliceEnd = patchLabel + bodyFlatSize
71
+ result.push([OP.PATCH, {
72
+ type: "label",
73
+ label: originalLabel,
74
+ offset: 4
75
+ }, {
76
+ type: "label",
77
+ label: patchLabel
78
+ }, {
79
+ type: "label",
80
+ label: patchLabel,
81
+ offset: bodyFlatSize
82
+ }]);
83
+
84
+ // ── Placeholders (Garbage Opcodes * bodyFlatSize, each 1 flat slot) ────────────
85
+ // These are overwritten by PATCH on first execution.
86
+ for (let p = 0; p < bodyFlatSize; p++) {
87
+ const randomOpcode = choice(Object.values(compiler.OP));
88
+ result.push([+randomOpcode]);
89
+ }
90
+
91
+ // ── Append real body at end ─────────────────────────────────────────
92
+ appended.push([null, {
93
+ type: "defineLabel",
94
+ label: patchLabel
95
+ }]);
96
+ for (const bodyInstr of body) {
97
+ appended.push(bodyInstr);
98
+ }
99
+ i = j; // skip over the original body in the input array
100
+ continue;
101
+ }
102
+ result.push(instr);
103
+ i++;
104
+ }
105
+ return {
106
+ bytecode: [...result, ...appended]
107
+ };
108
+ }