js-confuser-vm 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +112 -2
  2. package/README.MD +249 -106
  3. package/dist/build-runtime.js +22 -3
  4. package/dist/compiler.js +864 -801
  5. package/dist/runtime.js +414 -333
  6. package/dist/transforms/bytecode/aliasedOpcodes.js +134 -0
  7. package/dist/transforms/bytecode/concealConstants.js +31 -0
  8. package/dist/transforms/bytecode/macroOpcodes.js +37 -23
  9. package/dist/transforms/bytecode/microOpcodes.js +236 -0
  10. package/dist/transforms/bytecode/resolveContants.js +69 -12
  11. package/dist/transforms/bytecode/resolveLabels.js +5 -3
  12. package/dist/transforms/bytecode/selfModifying.js +3 -2
  13. package/dist/transforms/bytecode/specializedOpcodes.js +54 -39
  14. package/dist/transforms/runtime/aliasedOpcodes.js +134 -0
  15. package/dist/transforms/runtime/internalVariables.js +202 -0
  16. package/dist/transforms/runtime/macroOpcodes.js +30 -18
  17. package/dist/transforms/runtime/microOpcodes.js +76 -0
  18. package/dist/transforms/runtime/shuffleOpcodes.js +1 -1
  19. package/dist/transforms/runtime/specializedOpcodes.js +36 -29
  20. package/dist/utils/op-utils.js +36 -0
  21. package/dist/utils/random-utils.js +27 -0
  22. package/index.ts +11 -8
  23. package/jest.config.js +12 -0
  24. package/package.json +1 -1
  25. package/src/build-runtime.ts +25 -4
  26. package/src/compiler.ts +2482 -2069
  27. package/src/options.ts +3 -0
  28. package/src/runtime.ts +842 -771
  29. package/src/transforms/bytecode/aliasedOpcodes.ts +148 -0
  30. package/src/transforms/bytecode/concealConstants.ts +52 -0
  31. package/src/transforms/bytecode/macroOpcodes.ts +49 -33
  32. package/src/transforms/bytecode/microOpcodes.ts +291 -0
  33. package/src/transforms/bytecode/resolveContants.ts +82 -18
  34. package/src/transforms/bytecode/resolveLabels.ts +5 -4
  35. package/src/transforms/bytecode/selfModifying.ts +3 -3
  36. package/src/transforms/bytecode/specializedOpcodes.ts +85 -46
  37. package/src/transforms/runtime/aliasedOpcodes.ts +191 -0
  38. package/src/transforms/runtime/internalVariables.ts +270 -0
  39. package/src/transforms/runtime/macroOpcodes.ts +47 -20
  40. package/src/transforms/runtime/microOpcodes.ts +93 -0
  41. package/src/transforms/runtime/shuffleOpcodes.ts +1 -1
  42. package/src/transforms/runtime/specializedOpcodes.ts +56 -46
  43. package/src/types.ts +1 -1
  44. package/src/utils/op-utils.ts +46 -0
  45. package/src/transforms/utils/op-utils.ts +0 -26
  46. package/src/utilts.ts +0 -3
  47. /package/src/{transforms/utils → utils}/random-utils.ts +0 -0
@@ -0,0 +1,134 @@
1
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
2
+ import { nextFreeSlot } from "../../utils/op-utils.js";
3
+ import { shuffle } from "../../utils/random-utils.js";
4
+
5
+ // Opcodes that must not be aliased.
6
+ // Variable-length operand opcodes cannot be statically aliased since the
7
+ // number of this._operand() calls varies at runtime.
8
+ // Infrastructure opcodes (PATCH, TRY_SETUP, TRY_END, DEBUGGER) are excluded
9
+ // because aliasing them would interfere with self-modifying bytecode and
10
+ // exception-handling machinery.
11
+ const DISALLOWED_OP_NAMES = new Set(["MAKE_CLOSURE", "BUILD_ARRAY", "BUILD_OBJECT", "CALL", "CALL_METHOD", "NEW", "PATCH", "TRY_SETUP", "TRY_END", "DEBUGGER"]);
12
+
13
+ // Creates aliased opcodes: duplicate handlers for commonly-used opcodes,
14
+ // optionally with a permuted operand read order in the bytecode stream.
15
+ //
16
+ // For each aliased op, we record an `order` permutation of length `arity`.
17
+ // order[i] = j means: bytecode slot i holds what was originally operand j.
18
+ //
19
+ // Example: LOAD_GLOBAL [dst, nameIdx] with order=[1,0]:
20
+ // Bytecode stores: [ALIAS_OP, nameIdx, dst]
21
+ // Handler reads: _unsortedOperands = [nameIdx, dst]
22
+ // _operands = [_unsortedOperands[1], _unsortedOperands[0]]
23
+ // = [dst, nameIdx] ← original order restored
24
+ //
25
+ // Runs LAST among bytecode transforms (after selfModifying), before resolveLabels.
26
+ export function aliasedOpcodes(bc, compiler) {
27
+ // Build a map of base opcode value → name, excluding disallowed ops
28
+ const baseOpValueToName = new Map();
29
+ for (const [name, val] of Object.entries(compiler.OP)) {
30
+ if (DISALLOWED_OP_NAMES.has(name)) continue;
31
+ baseOpValueToName.set(val, name);
32
+ }
33
+
34
+ // ── Step 1: count frequency and determine arity for each eligible base opcode ─
35
+ // We scan the actual post-transform bytecode so frequency reflects what's
36
+ // really left (specialized/macro ops already consumed their share).
37
+ const opStats = new Map();
38
+ for (const instr of bc) {
39
+ const op = instr[0];
40
+ if (op === null || !baseOpValueToName.has(op)) continue;
41
+ const arity = instr.length - 1;
42
+ if (arity < 1) continue; // 0-operand opcodes have nothing to permute
43
+
44
+ const existing = opStats.get(op);
45
+ if (!existing) {
46
+ opStats.set(op, {
47
+ freq: 1,
48
+ arity
49
+ });
50
+ } else {
51
+ if (existing.arity !== arity) {
52
+ // Inconsistent arity → variable-length; skip
53
+ existing.arity = null;
54
+ }
55
+ existing.freq++;
56
+ }
57
+ }
58
+
59
+ // ── Step 2: sort by frequency descending, keep only consistent-arity ops ────
60
+ const candidates = Array.from(opStats.entries()).filter(([, s]) => s.arity !== null).sort(([, a], [, b]) => b.freq - a.freq);
61
+ if (candidates.length === 0) return {
62
+ bytecode: bc
63
+ };
64
+
65
+ // ── Step 3: assign free slots, build order permutations ─────────────────────
66
+ // aliasMap: originalOp → aliasOp (only the winning alias per original op)
67
+ const aliasMap = new Map();
68
+ const aliasedOps = {};
69
+ for (const [originalOp, stats] of candidates) {
70
+ const aliasOp = nextFreeSlot(compiler);
71
+ if (aliasOp === -1) break;
72
+ const arity = stats.arity;
73
+
74
+ // Build a permutation of [0 .. arity-1].
75
+ // For arity >= 2: shuffle until we get a non-identity permutation so the
76
+ // operand order is actually different (makes the alias more confusing).
77
+ // For arity == 1: only one permutation exists ([0]); still useful as a clone.
78
+ let order;
79
+ if (arity >= 2) {
80
+ const identity = Array.from({
81
+ length: arity
82
+ }, (_, i) => i);
83
+ let attempts = 0;
84
+ do {
85
+ order = shuffle([...identity]);
86
+ attempts++;
87
+ } while (attempts < 20 && order.every((v, i) => v === i));
88
+ } else {
89
+ order = [0];
90
+ }
91
+ aliasMap.set(originalOp, aliasOp);
92
+ aliasedOps[aliasOp] = {
93
+ originalOp,
94
+ order
95
+ };
96
+ const originalName = compiler.OP_NAME[originalOp] ?? `OP_${originalOp}`;
97
+ compiler.OP_NAME[aliasOp] = `ALIAS_${originalName}_${order.join("_")}`;
98
+ }
99
+ compiler.ALIASED_OPS = aliasedOps;
100
+ if (aliasMap.size === 0) return {
101
+ bytecode: bc
102
+ };
103
+
104
+ // ── Step 4: rewrite bytecode ─────────────────────────────────────────────────
105
+ const result = [];
106
+ for (const instr of bc) {
107
+ const op = instr[0];
108
+ if (op === null || !aliasMap.has(op)) {
109
+ result.push(instr);
110
+ continue;
111
+ }
112
+ const aliasOp = aliasMap.get(op);
113
+ const {
114
+ order
115
+ } = aliasedOps[aliasOp];
116
+ const originalOperands = instr.slice(1);
117
+
118
+ // Guard: if arity changed (shouldn't happen after the consistency check),
119
+ // fall back to the original instruction.
120
+ if (originalOperands.length !== order.length) {
121
+ result.push(instr);
122
+ continue;
123
+ }
124
+
125
+ // Rearrange operands: new slot i receives original operand order[i].
126
+ const newOperands = order.map(i => originalOperands[i]);
127
+ const newInstr = [aliasOp, ...newOperands];
128
+ newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
129
+ result.push(newInstr);
130
+ }
131
+ return {
132
+ bytecode: result
133
+ };
134
+ }
@@ -0,0 +1,31 @@
1
+ export function concealConstants(bytecode, compiler) {
2
+ const newBytecode = [];
3
+ for (const instr of bytecode) {
4
+ const [op, ...operands] = instr;
5
+ const hasContant = operands.some(o => o !== undefined && o !== null && typeof o === "object" && o.type === "constant");
6
+ if (!hasContant) {
7
+ newBytecode.push(instr);
8
+ continue;
9
+ }
10
+ const newOperands = [];
11
+ for (const operand of operands) {
12
+ if (operand?.type === "constant") {
13
+ const tsOperand = operand;
14
+ newOperands.push(operand);
15
+ newOperands.push({
16
+ type: "constant",
17
+ value: tsOperand.value,
18
+ key: true
19
+ });
20
+ } else {
21
+ newOperands.push(operand);
22
+ }
23
+ }
24
+ instr.length = 0;
25
+ instr.push(op, ...newOperands);
26
+ newBytecode.push(instr);
27
+ }
28
+ return {
29
+ bytecode: newBytecode
30
+ };
31
+ }
@@ -1,14 +1,17 @@
1
1
  import { SOURCE_NODE_SYM } from "../../compiler.js";
2
- import { nextFreeSlot, U16_MAX } from "../utils/op-utils.js";
2
+ import { nextFreeSlot } from "../../utils/op-utils.js";
3
+ import { ok } from "node:assert";
3
4
 
4
- // Opcodes that must not appear inside a macro window.
5
+ // Opcodes that must not appear in a non-terminal position inside a macro window.
5
6
  // Jump ops: modifying frame._pc mid-execution causes the macro handler to
6
7
  // run subsequent sub-bodies even after the jump already fired.
7
8
  // Frame-changing ops (CALL, CALL_METHOD, NEW, RETURN, THROW): push/pop call
8
9
  // frames mid-macro, leaving the `frame` variable stale for later sub-bodies.
10
+ // When one of these is the LAST instruction in the macro sequence there are no
11
+ // following sub-bodies, so editing _pc or the call frame is safe.
9
12
  // Variable-operand ops (MAKE_CLOSURE): the number of _operand() calls depends
10
13
  // on uvCount at runtime, so a static handler cannot be generated.
11
- // Infrastructure ops (DATA, PATCH, TRY_SETUP, TRY_END, DEBUGGER):
14
+ // Infrastructure ops (PATCH, TRY_SETUP, TRY_END, DEBUGGER):
12
15
  // either illegal here or nonsensical to fold.
13
16
 
14
17
  // Scan bytecode for repeating instruction sequences and fold them into
@@ -18,8 +21,7 @@ import { nextFreeSlot, U16_MAX } from "../utils/op-utils.js";
18
21
  // Algorithm:
19
22
  // 1. Count every eligible window of length 2–5 by its op-code signature.
20
23
  // 2. Keep sequences that appear >= 2 times; sort by frequency then length.
21
- // 3. Assign unused opcode values (0–255, not already claimed by compiler.OP)
22
- // to the most-frequent candidates and store in compiler.MACRO_OPS.
24
+ // 3. Use nextFreeSlot() to assign a new opcode to each of the best candidates
23
25
  // 4. Re-scan bytecode, replacing each matched sequence with a single
24
26
  // multi-operand instruction:
25
27
  // [macroOpCode, operands_of_instr_0..., operands_of_instr_1..., ...]
@@ -31,23 +33,29 @@ export function macroOpcodes(bc, compiler) {
31
33
  const opVal = compiler.OP[name];
32
34
  originalOpToName.set(opVal, name);
33
35
  }
34
- function isEligible(op, compiler) {
36
+
37
+ // Names are used instead of codes as specialized opcodes may generate based off these and it should not be considered eligible still
38
+ const alwaysExcluded = ["PATCH", "TRY_SETUP", "TRY_END", "DEBUGGER", "MAKE_CLOSURE"];
39
+ const nonTerminalExcluded = ["RETURN", "CALL", "CALL_METHOD", "NEW", "THROW"];
40
+ function isEligible(op, compiler, isLast = false) {
35
41
  if (op === null) return false;
36
42
  const {
37
43
  OP,
38
- JUMP_OPS
44
+ JUMP_OPS,
45
+ OP_NAME
39
46
  } = compiler;
40
- if (JUMP_OPS.has(op)) return false;
41
- const excluded = new Set([OP.RETURN, OP.PATCH, OP.TRY_SETUP, OP.TRY_END, OP.DEBUGGER, OP.CALL, OP.CALL_METHOD, OP.NEW, OP.THROW, OP.MAKE_CLOSURE // variable-length operands — cannot generate a static handler
42
- ]);
43
- return !excluded.has(op) && originalOpToName.has(op); // Only original Ops are eligible (specialized disallowed)
44
- }
47
+ // Infrastructure and variable-length ops are never eligible.
48
+ const opName = OP_NAME[op];
49
+ ok(opName, `Unknown opcode ${op} (not in OP_NAME)`);
50
+ if (alwaysExcluded.find(name => opName.includes(name))) return false;
45
51
 
46
- // Collect every opcode value already in use so we can find free slots.
47
- const usedOpcodes = new Set(Object.values(compiler.OP).filter(v => v !== undefined));
48
- if (usedOpcodes.size > U16_MAX) return {
49
- bytecode: bc
50
- };
52
+ // Jump and frame-changing ops are only eligible as the terminal instruction.
53
+ if (!isLast) {
54
+ if (JUMP_OPS.has(op)) return false;
55
+ if (nonTerminalExcluded.find(name => opName.includes(name))) return false;
56
+ }
57
+ return OP_NAME[op] !== undefined;
58
+ }
51
59
 
52
60
  // ── Step 1: count window frequencies ──────────────────────────────────────
53
61
  const freqMap = new Map();
@@ -57,14 +65,17 @@ export function macroOpcodes(bc, compiler) {
57
65
  const ops = [];
58
66
  let valid = true;
59
67
  for (let j = 0; j < len; j++) {
60
- const op = bc[i + j][0];
61
- if (!isEligible(op, compiler)) {
68
+ const instr = bc[i + j];
69
+ const op = instr[0];
70
+ const isLast = j === len - 1;
71
+ if (!isEligible(op, compiler, isLast)) {
62
72
  valid = false;
63
73
  break;
64
74
  }
65
75
  ops.push(op);
66
76
  }
67
- // If position (i+j) is ineligible, longer windows from i are also invalid.
77
+ // If position (i+j) is ineligible even as a terminal, longer windows from
78
+ // i are also invalid (it would be non-terminal there too).
68
79
  if (!valid) break;
69
80
  const key = ops.join(",");
70
81
  const entry = freqMap.get(key);
@@ -87,7 +98,7 @@ export function macroOpcodes(bc, compiler) {
87
98
 
88
99
  // ── Step 3: assign free opcode slots to the best candidates ───────────────
89
100
  for (let i = 0; i < candidates.length; i++) {
90
- const macroOp = nextFreeSlot(usedOpcodes);
101
+ const macroOp = nextFreeSlot(compiler);
91
102
  if (macroOp === -1) break;
92
103
  const ops = candidates[i].ops;
93
104
  compiler.MACRO_OPS[macroOp] = ops;
@@ -117,7 +128,8 @@ export function macroOpcodes(bc, compiler) {
117
128
  for (let j = 0; j < len; j++) {
118
129
  const instr = bc[i + j];
119
130
  const op = instr[0];
120
- if (!isEligible(op, compiler)) {
131
+ const isLast = j === len - 1;
132
+ if (!isEligible(op, compiler, isLast)) {
121
133
  valid = false;
122
134
  break;
123
135
  }
@@ -132,7 +144,9 @@ export function macroOpcodes(bc, compiler) {
132
144
  // Each instruction contributes instr.slice(1) — zero or more operands.
133
145
  const allOperands = [];
134
146
  for (let j = 0; j < len; j++) {
135
- allOperands.push(...bc[i + j].slice(1));
147
+ var instr = bc[i + j];
148
+ var operands = instr.slice(1);
149
+ allOperands.push(...operands);
136
150
  }
137
151
  const newInstr = [macroOpCode, ...allOperands];
138
152
  newInstr[SOURCE_NODE_SYM] = instructions[0][SOURCE_NODE_SYM];
@@ -0,0 +1,236 @@
1
+ import { parse } from "@babel/parser";
2
+ import traverseImport from "@babel/traverse";
3
+ import * as t from "@babel/types";
4
+ import { ok } from "assert";
5
+ import { VM_RUNTIME, SOURCE_NODE_SYM } from "../../compiler.js";
6
+ import { nextFreeSlot } from "../../utils/op-utils.js";
7
+ import { nSizedOps } from "./specializedOpcodes.js";
8
+ const traverse = traverseImport.default || traverseImport;
9
+
10
+ // Extract the real statement list from a SwitchCase consequent.
11
+ function extractCaseBody(switchCase) {
12
+ let stmts;
13
+ if (switchCase.consequent.length === 1 && t.isBlockStatement(switchCase.consequent[0])) {
14
+ stmts = switchCase.consequent[0].body;
15
+ } else {
16
+ stmts = switchCase.consequent;
17
+ }
18
+ return stmts.filter(s => !t.isBreakStatement(s) && !t.isEmptyStatement(s));
19
+ }
20
+
21
+ // Count how many IR-level operands a single statement consumes.
22
+ // Returns null if the statement is ineligible (contains a loop, or has
23
+ // _operand()/_constant() calls inside a conditional branch).
24
+ function countStatementOperands(stmt) {
25
+ let count = 0;
26
+ let ineligible = false;
27
+ const file = t.file(t.program([t.cloneNode(stmt, true)]));
28
+ traverse(file, {
29
+ enter(path) {
30
+ if (ineligible) {
31
+ path.stop();
32
+ return;
33
+ }
34
+ const nodeType = path.node.type;
35
+
36
+ // Don't traverse into nested functions
37
+ if (nodeType === "FunctionDeclaration" || nodeType === "FunctionExpression" || nodeType === "ArrowFunctionExpression") {
38
+ path.skip();
39
+ return;
40
+ }
41
+
42
+ // Count _operand() and _constant() calls
43
+ if (nodeType === "CallExpression") {
44
+ const call = path.node;
45
+ const callee = call.callee;
46
+ if (t.isMemberExpression(callee) && t.isThisExpression(callee.object) && t.isIdentifier(callee.property)) {
47
+ const name = callee.property.name;
48
+ const operandsConsumed = name === "_operand" ? 1 : name === "_constant" ? 2 : null;
49
+ if (operandsConsumed) {
50
+ // You are not allowed to use _operand() in loops or branches
51
+ const ancestors = path.getAncestry();
52
+ if (ancestors.find(t => t.isLoop() || t.isIfStatement() || t.isSwitchStatement() || t.isConditionalExpression() || t.isLogicalExpression())) {
53
+ ineligible = true;
54
+ path.stop();
55
+ return;
56
+ }
57
+ count += operandsConsumed;
58
+ }
59
+ }
60
+ }
61
+ }
62
+ });
63
+ return ineligible ? null : count;
64
+ }
65
+
66
+ // Analyse the VM runtime's @SWITCH statement to build a per-opcode map of
67
+ // { stmtIndex → irOperandCount } for every case that can be split.
68
+ // Returns a map: opValue → array of per-statement operand counts (null if ineligible).
69
+ function analyzeRuntimeCases(compiler) {
70
+ // Parse the runtime source
71
+ const ast = parse(VM_RUNTIME, {
72
+ sourceType: "unambiguous"
73
+ });
74
+
75
+ // Build reverse name→opValue map from original OPs only
76
+ const nameToOp = new Map();
77
+ for (const [name, val] of Object.entries(compiler.OP)) {
78
+ if (val !== undefined) nameToOp.set(name, val);
79
+ }
80
+ let switchStatement = null;
81
+ traverse(ast, {
82
+ SwitchStatement(path) {
83
+ if (path.node.leadingComments?.some(c => c.value.includes("@SWITCH"))) {
84
+ switchStatement = path.node;
85
+ path.stop();
86
+ }
87
+ }
88
+ });
89
+ ok(switchStatement, "Could not find @SWITCH statement for micro opcodes");
90
+ const result = new Map();
91
+ for (const sc of switchStatement.cases) {
92
+ const test = sc.test;
93
+ if (!test || !t.isMemberExpression(test) || !t.isIdentifier(test.object, {
94
+ name: "OP"
95
+ }) || !t.isIdentifier(test.property)) {
96
+ continue;
97
+ }
98
+ const opName = test.property.name;
99
+ const opVal = nameToOp.get(opName);
100
+ if (opVal === undefined) continue;
101
+ const stmts = extractCaseBody(sc);
102
+ if (stmts.length < 2) continue; // need at least 2 statements to split
103
+
104
+ const counts = [];
105
+ let allEligible = true;
106
+
107
+ // Banned patterns:
108
+ // Return statements (Control flow isn't remembered)
109
+ traverse(t.file(t.program(stmts)), {
110
+ ReturnStatement(path) {
111
+ path.stop();
112
+ allEligible = false;
113
+ }
114
+ });
115
+ for (const stmt of stmts) {
116
+ const c = countStatementOperands(stmt);
117
+ if (c === null) {
118
+ allEligible = false;
119
+ break;
120
+ }
121
+ if (t.isDebuggerStatement(stmt) || t.isThrowStatement(stmt)) {
122
+ allEligible = false;
123
+ break;
124
+ }
125
+ counts.push(c);
126
+ }
127
+ if (!allEligible) continue;
128
+
129
+ // Verify that the total operand count matches the instruction size expectation
130
+ // (just store for now; bytecode pass validates operands match)
131
+ result.set(opVal, counts);
132
+ }
133
+ return result;
134
+ }
135
+
136
+ // Main bytecode transform: split frequently-used opcodes into per-statement
137
+ // micro-opcodes so each sub-instruction is as small as possible.
138
+ export function microOpcodes(bc, compiler) {
139
+ // ── Step 1: analyse runtime to discover splittable opcodes ──────────────────
140
+ const opAnalysis = analyzeRuntimeCases(compiler);
141
+ if (opAnalysis.size === 0) return {
142
+ bytecode: bc
143
+ };
144
+
145
+ // ── Step 2: count opcode frequency in bytecode ────────────────────────────
146
+ const disallowedOps = new Set(nSizedOps.map(name => compiler.OP[name]));
147
+ disallowedOps.add(compiler.OP.RETURN);
148
+ const freqMap = new Map();
149
+ for (const instr of bc) {
150
+ const op = instr[0];
151
+ if (op === null || !opAnalysis.has(op) || disallowedOps.has(op)) continue;
152
+ freqMap.set(op, (freqMap.get(op) ?? 0) + 1);
153
+ }
154
+
155
+ // ── Step 3: sort by frequency, keep opcodes that actually appear ─────────
156
+ const candidates = Array.from(freqMap.entries()).filter(([, count]) => count >= 1).sort(([, a], [, b]) => b - a).map(([op]) => op);
157
+ if (candidates.length === 0) return {
158
+ bytecode: bc
159
+ };
160
+
161
+ // ── Step 4: assign free opcode slots for each sub-statement ─────────────
162
+ // Build: originalOp → [{ microOp, irOperandCount }, ...]
163
+ const originalToSubOps = new Map();
164
+ for (const origOp of candidates) {
165
+ const stmtCounts = opAnalysis.get(origOp);
166
+
167
+ // Pre-allocate all needed slots; if any slot is unavailable, skip this op.
168
+ const slots = [];
169
+ for (let si = 0; si < stmtCounts.length; si++) {
170
+ const slot = nextFreeSlot(compiler);
171
+ if (slot === -1) break;
172
+ compiler.OP_NAME[slot] = `MICRO_${origOp}_${si}`;
173
+ slots.push(slot);
174
+ }
175
+ if (slots.length !== stmtCounts.length) continue;
176
+ const subOps = [];
177
+ const origName = compiler.OP_NAME[origOp] ?? `OP_${origOp}`;
178
+ for (let si = 0; si < stmtCounts.length; si++) {
179
+ const microOp = slots[si];
180
+ const irOperandCount = stmtCounts[si];
181
+ subOps.push({
182
+ microOp,
183
+ irOperandCount
184
+ });
185
+ compiler.OP_NAME[microOp] = `MICRO_${origName}_${si}`;
186
+ compiler.MICRO_OPS[microOp] = {
187
+ originalOp: origOp,
188
+ stmtIndex: si,
189
+ irOperandCount
190
+ };
191
+ }
192
+ originalToSubOps.set(origOp, subOps);
193
+ }
194
+ if (originalToSubOps.size === 0) return {
195
+ bytecode: bc
196
+ };
197
+
198
+ // ── Step 5: replace each matched instruction with sub-instructions ────────
199
+ const result = [];
200
+ for (const instr of bc) {
201
+ const op = instr[0];
202
+ if (op === null || !originalToSubOps.has(op)) {
203
+ result.push(instr);
204
+ continue;
205
+ }
206
+ const subOps = originalToSubOps.get(op);
207
+ const operands = instr.slice(1); // all operands of the original instruction
208
+
209
+ // Verify total operand count matches sum of sub-op IR operand counts
210
+ const expectedTotal = subOps.reduce((s, {
211
+ irOperandCount
212
+ }) => s + irOperandCount, 0);
213
+ if (operands.length !== expectedTotal) {
214
+ throw new Error(`Operand count mismatch for opcode ${compiler.OP_NAME[op]}`);
215
+ }
216
+
217
+ // Split operands among sub-instructions
218
+ let offset = 0;
219
+ for (const {
220
+ microOp,
221
+ irOperandCount
222
+ } of subOps) {
223
+ const subOperands = operands.slice(offset, offset + irOperandCount);
224
+ offset += irOperandCount;
225
+ const newInstr = [microOp, ...subOperands];
226
+ // Carry source-node info on the first sub-instruction
227
+ if (offset === irOperandCount) {
228
+ newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
229
+ }
230
+ result.push(newInstr);
231
+ }
232
+ }
233
+ return {
234
+ bytecode: result
235
+ };
236
+ }
@@ -1,34 +1,91 @@
1
1
  import { SOURCE_NODE_SYM } from "../../compiler.js";
2
+ import { getRandomInt } from "../../utils/random-utils.js";
3
+ import { U16_MAX } from "../../utils/op-utils.js";
2
4
 
3
- // Resolve all {type:"constant", value} operands to integer indices into the
4
- // constants pool. Returns both the resolved bytecode and the constants array
5
- // so the Serializer can use it for comment generation and output.
6
- // Constant refs may appear at any operand position (index 1, 2, 3, …).
7
- export function resolveConstants(bc) {
5
+ // Encrypt a string with a position-dependent XOR key (u16) then base64-encode.
6
+ //
7
+ // Each char code is XOR'd with ((key + i) & 0xFFFF), producing a u16 value.
8
+ // The u16 values are packed as little-endian byte pairs (matching decodeBytecode),
9
+ // then base64-encoded so the stored constant is always safe ASCII — no raw Unicode
10
+ // surrogates, control chars, or quote chars that would break JS string literals.
11
+ function concealString(s, key) {
12
+ const bytes = new Uint8Array(s.length * 2);
13
+ for (let i = 0; i < s.length; i++) {
14
+ const code = s.charCodeAt(i) ^ key + i & 0xffff;
15
+ bytes[i * 2] = code & 0xff;
16
+ bytes[i * 2 + 1] = code >> 8 & 0xff;
17
+ }
18
+ return Buffer.from(bytes).toString("base64");
19
+ }
20
+
21
+ // Resolve all {type:"constant", value} (index) and {type:"constant", value, key: true} (key) operands
22
+ //
23
+ // constPoolIndex — index into the constants array (as before).
24
+ // concealKey — XOR key used to conceal this constant.
25
+ // 0 means no concealment (concealConstants is off, or the
26
+ // value type is not concealable: null, undefined, bool, float…).
27
+ //
28
+ // The constants array stores the CONCEALED value when key != 0.
29
+ // The runtime's _readConstant(idx, key) reverses the concealment on the fly.
30
+ //
31
+ // Both slots are u16; all existing operand serialization handles them identically.
32
+ export function resolveConstants(bc, compiler) {
8
33
  const constants = [];
9
- const constantsMap = new Map();
34
+ const constantsMap = new Map(); // original value → pool index
35
+ const keyMap = new Map(); // pool index → conceal key
36
+
10
37
  function intern(operand) {
11
- const operandAsObject = typeof operand === "object" && operand ? operand : {};
12
38
  const value = operand.value;
13
39
  let idx = constantsMap.get(value);
40
+ let key = 0;
14
41
  if (typeof idx !== "number") {
15
42
  idx = constants.length;
16
43
  constantsMap.set(value, idx);
17
- constants.push(value);
44
+ if (compiler.options.concealConstants && typeof value === "string") {
45
+ // Strings: position-dependent XOR. Key must be >= 1.
46
+ key = getRandomInt(1, U16_MAX);
47
+ constants.push(concealString(value, key));
48
+ } else if (compiler.options.concealConstants && typeof value === "number" && Number.isInteger(value)) {
49
+ // Integers: simple XOR. Result is still a valid JS integer.
50
+ key = getRandomInt(1, U16_MAX);
51
+ constants.push(value ^ key);
52
+ } else {
53
+ // Not concealable (null, undefined, boolean, float, RegExp…) or option off.
54
+ key = 0;
55
+ constants.push(value);
56
+ }
57
+ keyMap.set(idx, key);
58
+ } else {
59
+ // Reuse existing pool entry — same key that was assigned on first intern.
60
+ key = keyMap.get(idx);
18
61
  }
19
- const newOperand = {
20
- ...operandAsObject,
62
+ const idxOperand = {
21
63
  type: "number",
22
64
  resolvedValue: idx
23
65
  };
24
- return newOperand;
66
+ const keyOperand = {
67
+ type: "number",
68
+ resolvedValue: key
69
+ };
70
+
71
+ // key is a plain u16 number — no wrapping needed.
72
+ return [idxOperand, keyOperand];
25
73
  }
26
74
  const resolved = [];
27
75
  for (const instr of bc) {
28
76
  const [op, ...operands] = instr;
29
77
  const hasConstant = operands.some(o => o !== undefined && o !== null && typeof o === "object" && o.type === "constant");
30
78
  if (hasConstant) {
31
- const newOperands = operands.map(operand => operand?.type === "constant" ? intern(operand) : operand);
79
+ // 1-to-2 expansion: each {type:"constant"} becomes [constIdx, concealKey].
80
+ const newOperands = operands.map(operand => {
81
+ if (operand?.type === "constant") {
82
+ const [idxOperand, key] = intern(operand);
83
+ const newOperand = operand?.key ? key : idxOperand;
84
+ return Object.assign(operand, newOperand);
85
+ } else {
86
+ return operand;
87
+ }
88
+ });
32
89
  const newInstr = [op, ...newOperands];
33
90
  newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
34
91
  resolved.push(newInstr);
@@ -54,13 +54,15 @@ export function resolveLabels(bc, compiler) {
54
54
  if (operand !== undefined && operand !== null && typeof operand === "object" && operand.type === "label") {
55
55
  const pc = labelToPc.get(operand.label);
56
56
  if (pc === undefined) throw new Error(`Undefined label: ${operand.label}`);
57
- var operandAsObject = typeof operand === "object" && operand ? operand : {};
58
57
  const newOperand = {
59
- ...operandAsObject,
60
- // Preverse original operand properties
61
58
  type: "number",
62
59
  resolvedValue: pc + (operand.offset ?? 0)
63
60
  };
61
+
62
+ // Mutate original object so that references are also updated
63
+ if (typeof operand === "object" && operand !== null) {
64
+ return Object.assign(operand, newOperand);
65
+ }
64
66
  return newOperand;
65
67
  }
66
68
  return operand;
@@ -1,4 +1,5 @@
1
- import { choice } from "../utils/random-utils.js";
1
+ import { choice } from "../../utils/random-utils.js";
2
+ import { getInstructionSize } from "../../utils/op-utils.js";
2
3
  export function selfModifying(bc, compiler) {
3
4
  // Walk the bytecode looking for "defineLabel" pseudo-ops, which start basic
4
5
  // blocks. For each block we collect the body (instructions between the label
@@ -61,7 +62,7 @@ export function selfModifying(bc, compiler) {
61
62
  const patchLabel = `patch_${originalLabel}_${patchCount++}`;
62
63
 
63
64
  // Flat size of the body (each instruction occupies instr.length slots).
64
- const bodyFlatSize = body.reduce((acc, instr) => acc + instr.filter(x => x?.placeholder !== true).length, 0);
65
+ const bodyFlatSize = body.reduce((acc, instr) => acc + getInstructionSize(instr), 0);
65
66
 
66
67
  // ── PATCH instruction (4 flat slots: opcode + 3 operands) ───────────
67
68
  // destPc = originalLabel + 4 (slot right after PATCH's 4 slots)