js-confuser-vm 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +281 -147
  2. package/dist/build-runtime.js +41 -15
  3. package/dist/compiler.js +714 -265
  4. package/dist/disassembler.js +367 -0
  5. package/dist/index.js +7 -2
  6. package/dist/runtime.js +160 -119
  7. package/dist/template.js +163 -42
  8. package/dist/transforms/bytecode/aliasedOpcodes.js +4 -1
  9. package/dist/transforms/bytecode/concealConstants.js +2 -2
  10. package/dist/transforms/bytecode/controlFlowFlattening.js +569 -0
  11. package/dist/transforms/bytecode/dispatcher.js +15 -111
  12. package/dist/transforms/bytecode/macroOpcodes.js +2 -2
  13. package/{src/transforms/bytecode/resolveContants.ts → dist/transforms/bytecode/resolveConstants.js} +30 -56
  14. package/dist/transforms/bytecode/resolveRegisters.js +23 -4
  15. package/dist/transforms/bytecode/selfModifying.js +88 -21
  16. package/dist/transforms/bytecode/semanticOpcodes.js +162 -0
  17. package/dist/transforms/bytecode/specializedOpcodes.js +23 -12
  18. package/dist/transforms/bytecode/stringConcealing.js +288 -0
  19. package/dist/transforms/runtime/classObfuscation.js +43 -0
  20. package/dist/transforms/runtime/handlerTable.js +91 -0
  21. package/dist/transforms/runtime/semanticOpcodes.js +35 -0
  22. package/dist/transforms/runtime/specializedOpcodes.js +11 -5
  23. package/dist/types.js +1 -1
  24. package/dist/utils/ast-utils.js +75 -0
  25. package/dist/utils/op-utils.js +1 -2
  26. package/dist/utils/pass-utils.js +100 -0
  27. package/dist/utils/profile-utils.js +3 -0
  28. package/package.json +8 -1
  29. package/.gitmodules +0 -4
  30. package/.prettierignore +0 -1
  31. package/CHANGELOG.md +0 -335
  32. package/babel-plugin-inline-runtime.cjs +0 -34
  33. package/babel.config.json +0 -23
  34. package/index.ts +0 -38
  35. package/jest-strip-types.js +0 -10
  36. package/jest.config.js +0 -52
  37. package/src/build-runtime.ts +0 -78
  38. package/src/compiler.ts +0 -2593
  39. package/src/index.ts +0 -14
  40. package/src/minify.ts +0 -21
  41. package/src/options.ts +0 -18
  42. package/src/runtime.ts +0 -923
  43. package/src/template.ts +0 -141
  44. package/src/transforms/bytecode/aliasedOpcodes.ts +0 -148
  45. package/src/transforms/bytecode/concealConstants.ts +0 -52
  46. package/src/transforms/bytecode/dispatcher.ts +0 -398
  47. package/src/transforms/bytecode/macroOpcodes.ts +0 -193
  48. package/src/transforms/bytecode/microOpcodes.ts +0 -291
  49. package/src/transforms/bytecode/resolveLabels.ts +0 -112
  50. package/src/transforms/bytecode/resolveRegisters.ts +0 -221
  51. package/src/transforms/bytecode/selfModifying.ts +0 -121
  52. package/src/transforms/bytecode/specializedOpcodes.ts +0 -153
  53. package/src/transforms/runtime/aliasedOpcodes.ts +0 -191
  54. package/src/transforms/runtime/internalVariables.ts +0 -270
  55. package/src/transforms/runtime/macroOpcodes.ts +0 -138
  56. package/src/transforms/runtime/microOpcodes.ts +0 -93
  57. package/src/transforms/runtime/minify.ts +0 -1
  58. package/src/transforms/runtime/shuffleOpcodes.ts +0 -24
  59. package/src/transforms/runtime/specializedOpcodes.ts +0 -156
  60. package/src/types.ts +0 -93
  61. package/src/utils/op-utils.ts +0 -48
  62. package/src/utils/random-utils.ts +0 -31
  63. package/tsconfig.json +0 -12
@@ -78,27 +78,17 @@
78
78
  // Runs BEFORE resolveRegisters (so injected RegisterOperands are picked up by
79
79
  // liveness analysis) and BEFORE resolveLabels (so label operands with transforms
80
80
  // are resolved as part of the normal label-resolution pass).
81
- //
82
- // Enabled by options.dispatcher = true.
83
81
 
84
82
  import * as b from "../../types.js";
85
83
  import { getRandomInt } from "../../utils/random-utils.js";
86
84
  import { U16_MAX } from "../../utils/op-utils.js";
87
85
  import { Template } from "../../template.js";
88
-
86
+ import { ref, buildMaxIdMap, allocReg, extractLabel, forEachFunction } from "../../utils/pass-utils.js";
89
87
  // VERY IMPORTANT: All object operands should be unique objects for the entire compilation process.
90
88
  // This ensures that other passes that may reference/modify operands (e.g. specializedOpcodes) don't accidentally break behavior by mutating cloned objects.
91
- function ref(r) {
92
- return b.registerOperand(r.id, r.fnId);
93
- }
94
89
 
95
- // Monotonically increasing counter that makes every encoded label operand
96
- // JSON.stringify-distinguishable. specializedOpcodes keys candidates by
97
- // JSON.stringify(operands), which drops the transform function. Without this
98
- // counter, two LOAD_INT instructions for the same label but different siteKeys
99
- // would serialize identically and be coalesced into one specialized opcode
100
- // sharing a single operand object — causing both sites to decode with the
101
- // first site's key rather than their own.
90
+ // VERY IMPORTANT: All "encoded" label operands include a unique "_id" property that survives JSON.stringify.
91
+ // This allows Specialized Opcodes and other passes to correct distinguish them as the "transform" function WILL NOT be preserved
102
92
  let _encodedLabelId = 0;
103
93
  function encodedLabelOperand(label, siteKey, fnSalt) {
104
94
  return {
@@ -120,38 +110,6 @@ function applyEncoding(pc, siteKey, fnSalt) {
120
110
  return pc - fnSalt & U16_MAX ^ siteKey;
121
111
  }
122
112
 
123
- // ── Register allocation helpers ───────────────────────────────────────────────
124
- // At pass time FnContext objects are gone; we allocate new virtual registers by
125
- // scanning the bytecode for the highest existing id per fnId and incrementing.
126
- function buildMaxIdMap(bc) {
127
- const maxId = new Map();
128
- for (const instr of bc) {
129
- for (let j = 1; j < instr.length; j++) {
130
- const op = instr[j];
131
- if (op && op.type === "register") {
132
- const cur = maxId.get(op.fnId) ?? -1;
133
- if (op.id > cur) maxId.set(op.fnId, op.id);
134
- }
135
- }
136
- }
137
- return maxId;
138
- }
139
-
140
- // Allocate a new virtual register for fnId, updating maxId in-place.
141
- function allocReg(fnId, maxId) {
142
- const next = (maxId.get(fnId) ?? -1) + 1;
143
- maxId.set(fnId, next);
144
- return b.registerOperand(next, fnId);
145
- }
146
-
147
- // ── Label operand extraction ──────────────────────────────────────────────────
148
- // Returns the label string if the operand is a { type:"label" } object,
149
- // otherwise returns null. Used to identify routable jump targets.
150
- function extractLabel(op) {
151
- if (op && typeof op === "object" && op.type === "label") return op.label;
152
- return null;
153
- }
154
-
155
113
  // buildDispatcherBlock: emits the dispatcher label + call + indirect jump.
156
114
  // rClosure is already live (created at function entry); this block simply
157
115
  // calls the decode closure and jumps to the result.
@@ -187,7 +145,7 @@ function processFunctionBlock(instrs, fnId, compiler, maxId, labelCounter) {
187
145
  });
188
146
  if (!hasRoutableJump) return {
189
147
  instrs,
190
- templateBytecode: []
148
+ tail: []
191
149
  };
192
150
 
193
151
  // Per-function salt baked into this function's decode Template.
@@ -195,10 +153,8 @@ function processFunctionBlock(instrs, fnId, compiler, maxId, labelCounter) {
195
153
  const fnSalt = getRandomInt(1, U16_MAX);
196
154
 
197
155
  // Compile a unique decode closure for this function.
198
- // The fnSalt literal is inlined into the source so each function's closure
199
- // body is structurally distinct; no single signature covers all functions.
200
- const tmpl = new Template(`function decode(x, k) { return ((x ^ k) + ${fnSalt}) & ${U16_MAX}; }`).compile({}, compiler);
201
- const decodeDesc = tmpl.functions[0];
156
+ const template = new Template(`function decode(x, k) { return ((x ^ k) + ${fnSalt}) & ${U16_MAX}; }`).compile({}, compiler);
157
+ const decodeDesc = template.functions[0];
202
158
  const dispatcherLabel = labelCounter();
203
159
  const rDisp = allocReg(fnId, maxId); // carries encoded PC to dispatcher
204
160
  const rKey = allocReg(fnId, maxId); // carries per-site key to dispatcher
@@ -214,7 +170,9 @@ function processFunctionBlock(instrs, fnId, compiler, maxId, labelCounter) {
214
170
  // 2 (x, k)
215
171
  b.fnRegCountOperand(decodeDesc._fnIdx),
216
172
  // resolved by resolveRegisters()
217
- 0 // no upvalues
173
+ 0,
174
+ // no upvalues
175
+ 0 // hasRest = false
218
176
  ]);
219
177
 
220
178
  // ── Transform each instruction ────────────────────────────────────────────
@@ -292,71 +250,17 @@ function processFunctionBlock(instrs, fnId, compiler, maxId, labelCounter) {
292
250
  out.push(...buildDispatcherBlock(compiler, rDisp, rKey, rClosure, dispatcherLabel));
293
251
  return {
294
252
  instrs: out,
295
- templateBytecode: tmpl.bytecode
253
+ tail: template.bytecode
296
254
  };
297
255
  }
298
256
 
299
257
  // ── Pass entry point ──────────────────────────────────────────────────────────
300
258
  export function dispatcher(bc, compiler) {
301
- // Pre-compute max virtual register id per function across the whole bytecode.
302
259
  const maxId = buildMaxIdMap(bc);
303
-
304
- // Label factory that delegates to the compiler's own counter so labels
305
- // produced here never collide with compiler-generated or pass-generated ones.
260
+ // Label factory delegates to the compiler's counter so labels never collide.
306
261
  const labelCounter = () => compiler._makeLabel("dispatcher");
307
-
308
- // Build a set of entry labels so we can detect function boundaries.
309
- const entryLabels = new Set(compiler.fnDescriptors.map(d => d.entryLabel));
310
- // Build a map from entry label fnId.
311
- const entryLabelToFnId = new Map(compiler.fnDescriptors.map(d => [d.entryLabel, d._fnIdx]));
312
- const result = [];
313
- // Collect each function's decode Template bytecode; appended at the end so
314
- // all MAKE_CLOSURE instructions can reference their entryLabels regardless
315
- // of where in the bytecode the function appears.
316
- const decodeBytecodes = [];
317
- let i = 0;
318
- while (i < bc.length) {
319
- const instr = bc[i];
320
- const [op, operand0] = instr;
321
- const isEntryLabel = op === null && operand0?.type === "defineLabel" && entryLabels.has(operand0.label);
322
- if (!isEntryLabel) {
323
- result.push(instr);
324
- i++;
325
- continue;
326
- }
327
-
328
- // Found a function entry label. Collect all instructions belonging to
329
- // this function (until the next entry label or end of bytecode).
330
- const entryLabel = operand0.label;
331
- const fnId = entryLabelToFnId.get(entryLabel);
332
- i++; // step past the defineLabel itself
333
-
334
- const fnInstrs = [];
335
- while (i < bc.length) {
336
- const next = bc[i];
337
- const [nextOp, nextOp0] = next;
338
- if (nextOp === null && nextOp0?.type === "defineLabel" && entryLabels.has(nextOp0.label)) break; // next function starts here
339
- fnInstrs.push(next);
340
- i++;
341
- }
342
-
343
- // Emit the entry defineLabel, then the (potentially transformed) body.
344
- result.push(instr); // the defineLabel
345
- const {
346
- instrs: processed,
347
- templateBytecode
348
- } = processFunctionBlock(fnInstrs, fnId, compiler, maxId, labelCounter);
349
- result.push(...processed);
350
- if (templateBytecode.length > 0) decodeBytecodes.push(templateBytecode);
351
- }
352
-
353
- // Append all per-function decode closure bodies at the end of the bytecode.
354
- // Each block defines the entryLabel that the corresponding MAKE_CLOSURE
355
- // instruction references.
356
- for (const tb of decodeBytecodes) {
357
- result.push(...tb);
358
- }
359
- return {
360
- bytecode: result
361
- };
262
+ // forEachFunction collects each function's tail (decode closure bytecode) and
263
+ // appends them all after the last function body, so every MAKE_CLOSURE can
264
+ // reference its entryLabel regardless of where it appears in the bytecode.
265
+ return forEachFunction(bc, compiler, (fnInstrs, fnId) => processFunctionBlock(fnInstrs, fnId, compiler, maxId, labelCounter));
362
266
  }
@@ -1,4 +1,4 @@
1
- import { SOURCE_NODE_SYM } from "../../compiler.js";
1
+ import { OP_ORIGINAL, SOURCE_NODE_SYM } from "../../compiler.js";
2
2
  import { nextFreeSlot } from "../../utils/op-utils.js";
3
3
  import { ok } from "assert";
4
4
 
@@ -54,7 +54,7 @@ export function macroOpcodes(bc, compiler) {
54
54
  if (JUMP_OPS.has(op)) return false;
55
55
  if (nonTerminalExcluded.find(name => opName.includes(name))) return false;
56
56
  }
57
- return OP_NAME[op] !== undefined;
57
+ return OP_NAME[op] !== undefined && OP_ORIGINAL[opName] !== undefined;
58
58
  }
59
59
 
60
60
  // ── Step 1: count window frequencies ──────────────────────────────────────
@@ -1,7 +1,6 @@
1
- import type * as b from "../../types.ts";
2
- import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
3
- import { getRandomInt } from "../../utils/random-utils.ts";
4
- import { U16_MAX } from "../../utils/op-utils.ts";
1
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
2
+ import { getRandomInt } from "../../utils/random-utils.js";
3
+ import { U16_MAX } from "../../utils/op-utils.js";
5
4
 
6
5
  // Encrypt a string with a position-dependent XOR key (u16) then base64-encode.
7
6
  //
@@ -9,12 +8,12 @@ import { U16_MAX } from "../../utils/op-utils.ts";
9
8
  // The u16 values are packed as little-endian byte pairs (matching decodeBytecode),
10
9
  // then base64-encoded so the stored constant is always safe ASCII — no raw Unicode
11
10
  // surrogates, control chars, or quote chars that would break JS string literals.
12
- function concealString(s: string, key: number): string {
11
+ function concealString(s, key) {
13
12
  const bytes = new Uint8Array(s.length * 2);
14
13
  for (let i = 0; i < s.length; i++) {
15
- const code = s.charCodeAt(i) ^ ((key + i) & 0xffff);
14
+ const code = s.charCodeAt(i) ^ key + i & 0xffff;
16
15
  bytes[i * 2] = code & 0xff;
17
- bytes[i * 2 + 1] = (code >> 8) & 0xff;
16
+ bytes[i * 2 + 1] = code >> 8 & 0xff;
18
17
  }
19
18
  return Buffer.from(bytes).toString("base64");
20
19
  }
@@ -30,36 +29,23 @@ function concealString(s: string, key: number): string {
30
29
  // The runtime's _readConstant(idx, key) reverses the concealment on the fly.
31
30
  //
32
31
  // Both slots are u16; all existing operand serialization handles them identically.
33
- export function resolveConstants(
34
- bc: b.Bytecode,
35
- compiler: Compiler,
36
- ): {
37
- bytecode: b.Bytecode;
38
- constants: any[];
39
- } {
40
- const constants: any[] = [];
41
- const constantsMap = new Map<any, number>(); // original value → pool index
42
- const keyMap = new Map<number, number>(); // pool index → conceal key
43
-
44
- function intern(operand: b.InstrOperand): [b.InstrOperand, number] {
45
- const value = (operand as any).value;
32
+ export function resolveConstants(bc, compiler) {
33
+ const constants = [];
34
+ const constantsMap = new Map(); // original value → pool index
35
+ const keyMap = new Map(); // pool index → conceal key
46
36
 
37
+ function intern(operand) {
38
+ const value = operand.value;
47
39
  let idx = constantsMap.get(value);
48
40
  let key = 0;
49
-
50
41
  if (typeof idx !== "number") {
51
42
  idx = constants.length;
52
43
  constantsMap.set(value, idx);
53
-
54
44
  if (compiler.options.concealConstants && typeof value === "string") {
55
45
  // Strings: position-dependent XOR. Key must be >= 1.
56
46
  key = getRandomInt(1, U16_MAX);
57
47
  constants.push(concealString(value, key));
58
- } else if (
59
- compiler.options.concealConstants &&
60
- typeof value === "number" &&
61
- Number.isInteger(value)
62
- ) {
48
+ } else if (compiler.options.concealConstants && typeof value === "number" && Number.isInteger(value)) {
63
49
  // Integers: simple XOR. Result is still a valid JS integer.
64
50
  key = getRandomInt(1, U16_MAX);
65
51
  constants.push(value ^ key);
@@ -68,59 +54,47 @@ export function resolveConstants(
68
54
  key = 0;
69
55
  constants.push(value);
70
56
  }
71
-
72
57
  keyMap.set(idx, key);
73
58
  } else {
74
59
  // Reuse existing pool entry — same key that was assigned on first intern.
75
- key = keyMap.get(idx)!;
60
+ key = keyMap.get(idx);
76
61
  }
77
-
78
- const idxOperand: any = {
62
+ const idxOperand = {
79
63
  type: "number",
80
- resolvedValue: idx,
64
+ resolvedValue: idx
81
65
  };
82
-
83
- const keyOperand: any = {
66
+ const keyOperand = {
84
67
  type: "number",
85
- resolvedValue: key,
68
+ resolvedValue: key
86
69
  };
87
70
 
88
71
  // key is a plain u16 number — no wrapping needed.
89
72
  return [idxOperand, keyOperand];
90
73
  }
91
-
92
- const resolved: b.Bytecode = [];
74
+ const resolved = [];
93
75
  for (const instr of bc) {
94
76
  const [op, ...operands] = instr;
95
-
96
- const hasConstant = operands.some(
97
- (o) =>
98
- o !== undefined &&
99
- o !== null &&
100
- typeof o === "object" &&
101
- (o as any).type === "constant",
102
- );
103
-
77
+ const hasConstant = operands.some(o => o !== undefined && o !== null && typeof o === "object" && o.type === "constant");
104
78
  if (hasConstant) {
105
79
  // 1-to-2 expansion: each {type:"constant"} becomes [constIdx, concealKey].
106
- const newOperands: b.InstrOperand[] = operands.map((operand) => {
107
- if ((operand as any)?.type === "constant") {
80
+ const newOperands = operands.map(operand => {
81
+ if (operand?.type === "constant") {
108
82
  const [idxOperand, key] = intern(operand);
109
- const newOperand = (operand as any)?.key ? key : idxOperand;
110
-
83
+ const newOperand = operand?.key ? key : idxOperand;
111
84
  return Object.assign(operand, newOperand);
112
85
  } else {
113
86
  return operand;
114
87
  }
115
88
  });
116
-
117
- const newInstr = [op, ...newOperands] as b.Instruction;
118
- (newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
89
+ const newInstr = [op, ...newOperands];
90
+ newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
119
91
  resolved.push(newInstr);
120
92
  } else {
121
93
  resolved.push(instr);
122
94
  }
123
95
  }
124
-
125
- return { bytecode: resolved, constants };
126
- }
96
+ return {
97
+ bytecode: resolved,
98
+ constants
99
+ };
100
+ }
@@ -30,6 +30,10 @@
30
30
 
31
31
  export function resolveRegisters(bc, compiler) {
32
32
  function registerPoolKey(op) {
33
+ // Pinned registers must never share a slot with anything else.
34
+ // Passes set this on registers whose live range crosses a CFF back-edge
35
+ // that the linear-scan liveness analysis cannot see.
36
+ if (op.pinned) return "local::";
33
37
  return `${op.kind ?? "local"}::${op.scopeId ?? ""}`;
34
38
  }
35
39
 
@@ -120,16 +124,31 @@ export function resolveRegisters(bc, compiler) {
120
124
  // nextSlot is the high-water mark: the next fresh slot to allocate.
121
125
  // It is shared across all pools so each pool's slots start above the
122
126
  // previous pool's maximum slot.
123
- let nextSlot = 0;
127
+ //
128
+ // Leading locals (params, `arguments`, `this`) have slots fixed by position
129
+ // and are written by the runtime at call time. Start the cursor above that
130
+ // reserved region so no other register can land in it — even when some
131
+ // reserved locals are unused (and therefore never collected above). This is
132
+ // essential for correctness: e.g. a function with unused named params whose
133
+ // `arguments` is captured by a nested arrow would otherwise slide into a
134
+ // param slot and read a parameter value instead of the arguments object.
135
+ const reserved = compiler.fnDescriptors[fnId]?.reservedRegisters ?? 0;
136
+ let nextSlot = reserved;
124
137
  for (const poolKey of sortedPoolKeys) {
125
138
  const regs = pools.get(poolKey);
126
139
  if (poolKey === "local::") {
127
140
  // ── Local pool: virtual-id order, no reuse ────────────────────────
128
- // Params must be at the lowest slots (written by the runtime at call
129
- // time); upvalue captures must keep their slot for the frame's lifetime.
141
+ // Reserved leading locals keep an identity slot mapping (id N slot N)
142
+ // so the runtime's positional writes always land correctly; upvalue
143
+ // captures must keep their slot for the frame's lifetime. Remaining
144
+ // locals (hoisted vars, captured variables) pack above the reserved region.
130
145
  regs.sort((a, b) => a.id - b.id);
131
146
  for (const reg of regs) {
132
- slotMap.set(reg.id, nextSlot++);
147
+ if (reg.id < reserved) {
148
+ slotMap.set(reg.id, reg.id);
149
+ } else {
150
+ slotMap.set(reg.id, nextSlot++);
151
+ }
133
152
  }
134
153
  } else {
135
154
  // ── Non-local pool: firstUse order, linear-scan reuse ─────────────
@@ -1,23 +1,30 @@
1
- import { choice } from "../../utils/random-utils.js";
1
+ import { choice, getRandomInt } from "../../utils/random-utils.js";
2
2
  import { getInstructionSize } from "../../utils/op-utils.js";
3
3
  export function selfModifying(bc, compiler) {
4
4
  // Walk the bytecode looking for "defineLabel" pseudo-ops, which start basic
5
5
  // blocks. For each block we collect the body (instructions between the label
6
- // and the next label/jump terminator), move it to the end of the bytecode
7
- // under a fresh "patch_LXX" label, and replace it in-place with:
6
+ // and the next label/jump terminator), pick a random-sized, random-offset
7
+ // sub-region within that body, move only that region to the end of the
8
+ // bytecode under a fresh "patch_LXX" label, and replace it in-place with:
8
9
  //
9
10
  // defineLabel ("originalLabel") ← kept as-is (pseudo-op)
11
+ // <prefix instructions> ← body before the region (kept)
10
12
  // PATCH destPc sliceStart sliceEnd ← 4 flat slots total
11
- // Garbage Opcodes × bodyFlatSize ← placeholder slots
13
+ // Garbage Opcodes × regionFlatSize ← placeholder slots
14
+ // <suffix instructions> ← body after the region (kept)
12
15
  //
13
16
  // PATCH reads three inline operands via _operand():
14
- // destPc = originalLabel + 4 (first slot after PATCH's own 4 slots)
15
- // sliceStart = patchLabel (flat PC of appended body)
16
- // sliceEnd = patchLabel + bodyFlatSize
17
+ // destPc = originalLabel + prefixFlatSize + 4 (first placeholder slot)
18
+ // sliceStart = patchLabel (flat PC of appended region)
19
+ // sliceEnd = patchLabel + regionFlatSize
17
20
  //
18
21
  // On first execution PATCH copies bytecode[sliceStart..sliceEnd) over the
19
22
  // placeholder region starting at destPc. Execution then falls through into
20
- // the freshly-patched body. Subsequent calls are idempotent.
23
+ // the freshly-patched region (and onward into the suffix). Subsequent calls
24
+ // are idempotent.
25
+ //
26
+ // A budget caps the extra bytecode this pass adds to ~100% of the input
27
+ // bytecode size. Once exhausted, remaining blocks are emitted untouched.
21
28
 
22
29
  const {
23
30
  OP,
@@ -26,6 +33,18 @@ export function selfModifying(bc, compiler) {
26
33
  const result = [];
27
34
  const appended = [];
28
35
  let patchCount = 0;
36
+
37
+ // Budget: allow this pass to add at most one extra copy (100%) of the input
38
+ // bytecode size. "Size" here is the number of instruction entries, matching
39
+ // the reported `bytecodeSize` (= bytecode.length).
40
+ //
41
+ // Each patch adds, in entry terms:
42
+ // in-place: +1 PATCH entry, +regionFlatSize placeholder entries,
43
+ // −region.length region entries (moved out)
44
+ // appended: +1 defineLabel marker, +region.length region entries
45
+ // net delta = 2 + regionFlatSize
46
+ const budget = bc.length;
47
+ let added = 0;
29
48
  let i = 0;
30
49
  while (i < bc.length) {
31
50
  const instr = bc[i];
@@ -55,46 +74,94 @@ export function selfModifying(bc, compiler) {
55
74
  }
56
75
  const body = bc.slice(i, j);
57
76
  const N = body.length;
58
- if (N === 0) {
59
- // Nothing to transform — label is immediately followed by a terminator.
77
+ const flatSize = chunk => chunk.reduce((acc, instr) => acc + getInstructionSize(instr), 0);
78
+
79
+ // Each patch adds (2 + regionFlatSize) entries (see budget note above).
80
+ // Stop patching once there isn't room for even the smallest patch —
81
+ // remaining blocks (and empty blocks) are emitted untouched.
82
+ const remaining = budget - added;
83
+ if (N === 0 || remaining < 2 + 1) {
84
+ for (const bodyInstr of body) {
85
+ result.push(bodyInstr);
86
+ }
87
+ i = j;
60
88
  continue;
61
89
  }
90
+
91
+ // ── Pick a random-sized, random-offset region within the body ────────
92
+ // prefix = body[0, regionStart) (kept in place, executes normally)
93
+ // region = body[regionStart, regionEnd) (self-modified)
94
+ // suffix = body[regionEnd, N) (kept in place)
95
+ const regionStart = getRandomInt(0, N - 1);
96
+ const regionLen = getRandomInt(1, N - regionStart);
97
+ let region = body.slice(regionStart, regionStart + regionLen);
98
+ let regionFlatSize = flatSize(region);
99
+
100
+ // Trim the region from the end so the patch fits the remaining budget,
101
+ // keeping the cap strict (never overshoot 100% growth).
102
+ while (region.length > 1 && 2 + regionFlatSize > remaining) {
103
+ region = region.slice(0, -1);
104
+ regionFlatSize = flatSize(region);
105
+ }
106
+ if (2 + regionFlatSize > remaining) {
107
+ // Even a single-instruction region doesn't fit — leave block untouched.
108
+ for (const bodyInstr of body) {
109
+ result.push(bodyInstr);
110
+ }
111
+ i = j;
112
+ continue;
113
+ }
114
+ const regionEnd = regionStart + region.length;
115
+ const prefix = body.slice(0, regionStart);
116
+ const suffix = body.slice(regionEnd);
117
+ const prefixFlatSize = flatSize(prefix);
62
118
  const patchLabel = `patch_${originalLabel}_${patchCount++}`;
63
119
 
64
- // Flat size of the body (each instruction occupies instr.length slots).
65
- const bodyFlatSize = body.reduce((acc, instr) => acc + getInstructionSize(instr), 0);
120
+ // Charge the budget (entry count): PATCH entry + defineLabel marker +
121
+ // placeholder entries (region.length cancels between in-place and append).
122
+ added += 2 + regionFlatSize;
123
+
124
+ // ── Prefix instructions (kept as-is) ────────────────────────────────
125
+ for (const prefixInstr of prefix) {
126
+ result.push(prefixInstr);
127
+ }
66
128
 
67
129
  // ── PATCH instruction (4 flat slots: opcode + 3 operands) ───────────
68
- // destPc = originalLabel + 4 (slot right after PATCH's 4 slots)
130
+ // destPc = originalLabel + prefixFlatSize + 4 (first placeholder)
69
131
  // sliceStart = patchLabel
70
- // sliceEnd = patchLabel + bodyFlatSize
132
+ // sliceEnd = patchLabel + regionFlatSize
71
133
  result.push([OP.PATCH, {
72
134
  type: "label",
73
135
  label: originalLabel,
74
- offset: 4
136
+ offset: prefixFlatSize + 4
75
137
  }, {
76
138
  type: "label",
77
139
  label: patchLabel
78
140
  }, {
79
141
  type: "label",
80
142
  label: patchLabel,
81
- offset: bodyFlatSize
143
+ offset: regionFlatSize
82
144
  }]);
83
145
 
84
- // ── Placeholders (Garbage Opcodes * bodyFlatSize, each 1 flat slot) ────────────
146
+ // ── Placeholders (Garbage Opcodes * regionFlatSize, each 1 flat slot) ──
85
147
  // These are overwritten by PATCH on first execution.
86
- for (let p = 0; p < bodyFlatSize; p++) {
148
+ for (let p = 0; p < regionFlatSize; p++) {
87
149
  const randomOpcode = choice(Object.values(compiler.OP));
88
150
  result.push([+randomOpcode]);
89
151
  }
90
152
 
91
- // ── Append real body at end ─────────────────────────────────────────
153
+ // ── Suffix instructions (kept as-is) ────────────────────────────────
154
+ for (const suffixInstr of suffix) {
155
+ result.push(suffixInstr);
156
+ }
157
+
158
+ // ── Append real region at end ───────────────────────────────────────
92
159
  appended.push([null, {
93
160
  type: "defineLabel",
94
161
  label: patchLabel
95
162
  }]);
96
- for (const bodyInstr of body) {
97
- appended.push(bodyInstr);
163
+ for (const regionInstr of region) {
164
+ appended.push(regionInstr);
98
165
  }
99
166
  i = j; // skip over the original body in the input array
100
167
  continue;