js-confuser-vm 0.0.9 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/.gitmodules +4 -0
  2. package/CHANGELOG.md +125 -2
  3. package/README.md +128 -53
  4. package/bench.ts +146 -0
  5. package/disassemble.ts +12 -0
  6. package/dist/build-runtime.js +41 -15
  7. package/dist/compiler.js +328 -181
  8. package/dist/disassembler.js +317 -0
  9. package/dist/index.js +7 -2
  10. package/dist/runtime.js +255 -176
  11. package/dist/template.js +258 -0
  12. package/dist/transforms/bytecode/aliasedOpcodes.js +4 -1
  13. package/dist/transforms/bytecode/controlFlowFlattening.js +451 -0
  14. package/dist/transforms/bytecode/dispatcher.js +266 -0
  15. package/dist/transforms/bytecode/macroOpcodes.js +3 -3
  16. package/dist/transforms/bytecode/resolveConstants.js +100 -0
  17. package/dist/transforms/bytecode/resolveLabels.js +21 -18
  18. package/dist/transforms/bytecode/resolveRegisters.js +216 -0
  19. package/dist/transforms/bytecode/semanticOpcodes.js +162 -0
  20. package/dist/transforms/bytecode/specializedOpcodes.js +22 -12
  21. package/dist/transforms/bytecode/stringConcealing.js +110 -0
  22. package/dist/transforms/runtime/classObfuscation.js +43 -0
  23. package/dist/transforms/runtime/handlerTable.js +91 -0
  24. package/dist/transforms/runtime/semanticOpcodes.js +35 -0
  25. package/dist/transforms/runtime/specializedOpcodes.js +11 -5
  26. package/dist/types.js +42 -1
  27. package/dist/utils/ast-utils.js +14 -0
  28. package/dist/utils/op-utils.js +1 -2
  29. package/dist/utils/pass-utils.js +100 -0
  30. package/dist/utils/profile-utils.js +3 -0
  31. package/index.ts +22 -16
  32. package/jest.config.js +19 -2
  33. package/output.disassembled.js +41 -0
  34. package/package.json +2 -1
  35. package/src/build-runtime.ts +113 -78
  36. package/src/compiler.ts +2703 -2482
  37. package/src/disassembler.ts +329 -0
  38. package/src/index.ts +12 -2
  39. package/src/options.ts +8 -1
  40. package/src/runtime.ts +294 -180
  41. package/src/template.ts +265 -0
  42. package/src/transforms/bytecode/aliasedOpcodes.ts +5 -2
  43. package/src/transforms/bytecode/controlFlowFlattening.ts +566 -0
  44. package/src/transforms/bytecode/dispatcher.ts +292 -0
  45. package/src/transforms/bytecode/macroOpcodes.ts +4 -4
  46. package/src/transforms/bytecode/resolveLabels.ts +31 -27
  47. package/src/transforms/bytecode/resolveRegisters.ts +226 -0
  48. package/src/transforms/bytecode/specializedOpcodes.ts +27 -20
  49. package/src/transforms/bytecode/stringConcealing.ts +130 -0
  50. package/src/transforms/runtime/classObfuscation.ts +59 -0
  51. package/src/transforms/runtime/specializedOpcodes.ts +14 -9
  52. package/src/types.ts +106 -5
  53. package/src/utils/ast-utils.ts +19 -0
  54. package/src/utils/op-utils.ts +2 -2
  55. package/src/utils/pass-utils.ts +126 -0
  56. package/src/utils/profile-utils.ts +3 -0
  57. package/tsconfig.json +1 -1
  58. package/dist/transforms/utils/op-utils.js +0 -25
  59. package/dist/transforms/utils/random-utils.js +0 -27
  60. package/dist/utilts.js +0 -3
  61. package/src/transforms/bytecode/microOpcodes.ts +0 -291
  62. package/src/transforms/runtime/internalVariables.ts +0 -270
  63. package/src/transforms/runtime/microOpcodes.ts +0 -93
  64. /package/src/transforms/bytecode/{resolveContants.ts → resolveConstants.ts} +0 -0
@@ -0,0 +1,266 @@
1
+ // Routes simple unconditional and conditional jumps through a per-function
2
+ // central dispatcher block so that static analysis cannot read jump targets
3
+ // directly from the bytecode operands.
4
+ //
5
+ // ── How it works ─────────────────────────────────────────────────────────────
6
+ //
7
+ // Each function that contains at least one routable jump gets:
8
+ //
9
+ // rDisp — a stable register shared across the whole function.
10
+ // At every jump site, the per-site encoded target PC is written
11
+ // here before jumping to the dispatcher block.
12
+ // rKey — a stable register written at every jump site with that site's
13
+ // unique XOR key. The dispatcher passes it to the decode closure.
14
+ // rClosure — holds the decode closure, created ONCE at function entry
15
+ // (hoisted). All dispatch calls reuse the same closure object.
16
+ //
17
+ // Dispatcher block (appended after the function body, never reached by fall-through):
18
+ //
19
+ // <dispatcher_N>:
20
+ // CALL rDisp, rClosure, 2, rDisp, rKey // rDisp = decode(rDisp, rKey)
21
+ // JUMP_REG rDisp // indirect jump to recovered PC
22
+ //
23
+ // The decode function is compiled ONCE PER FUNCTION from a Template that
24
+ // embeds a per-function constant (fnSalt). Every function gets its own
25
+ // distinct decode closure body, so identifying one does not help with others.
26
+ //
27
+ // function decode(x, k) { return ((x ^ k) + FN_SALT) & 0xFFFF; }
28
+ //
29
+ // Jump site transformations (each site has its own random siteKey):
30
+ //
31
+ // Original: JUMP target_label
32
+ // Becomes: LOAD_INT rDisp, (target_label_pc - fnSalt) ^ siteKey
33
+ // LOAD_INT rKey, siteKey
34
+ // JUMP <dispatcher_N>
35
+ //
36
+ // Original: JUMP_IF_FALSE cond, target_label
37
+ // Becomes: JUMP_IF_TRUE cond, <skip_N>
38
+ // LOAD_INT rDisp, (target_label_pc - fnSalt) ^ siteKey
39
+ // LOAD_INT rKey, siteKey
40
+ // JUMP <dispatcher_N>
41
+ // <skip_N>:
42
+ //
43
+ // Original: JUMP_IF_TRUE cond, target_label
44
+ // Becomes: JUMP_IF_FALSE cond, <skip_N>
45
+ // LOAD_INT rDisp, (target_label_pc - fnSalt) ^ siteKey
46
+ // LOAD_INT rKey, siteKey
47
+ // JUMP <dispatcher_N>
48
+ // <skip_N>:
49
+ //
50
+ // ── Encoding scheme ──────────────────────────────────────────────────────────
51
+ // Two-key mixed encoding: XOR (per-site) + SUB/ADD (per-function).
52
+ //
53
+ // encode(pc, siteKey, fnSalt) = (pc - fnSalt) ^ siteKey
54
+ // decode(x, k, fnSalt) = (x ^ k) + fnSalt
55
+ //
56
+ // The siteKey is a random nonzero u16 unique per jump site — stored as a plain
57
+ // integer operand in the bytecode.
58
+ // The fnSalt is a random nonzero u16 unique per function — it is never stored
59
+ // as an operand anywhere; it is compiled as a literal constant inside the
60
+ // function's own decode Template body.
61
+ //
62
+ // Attack resistance:
63
+ // • Brute-forcing a single jump requires enumerating siteKey × fnSalt
64
+ // (~4 billion combinations) rather than just siteKey (65 535).
65
+ // • Assuming pure XOR fails: un-XOR-ing with siteKey yields (pc - fnSalt),
66
+ // not pc. Valid-PC heuristics produce wrong answers.
67
+ // • Each function emits its own decode closure bytecode with a different
68
+ // fnSalt literal baked in. There is no shared signature to fingerprint.
69
+ // • The encode and decode operations differ structurally (SUB vs ADD),
70
+ // removing the self-inverse property that makes XOR-only schemes obvious.
71
+ //
72
+ // To change the scheme:
73
+ // 1. Change the Template source in processFunctionBlock() to match new decode.
74
+ // 2. Change applyEncoding() to return the matching encode transform.
75
+ // Only these two places need updating; everything else is scheme-agnostic.
76
+ //
77
+ // ── Pipeline position ─────────────────────────────────────────────────────────
78
+ // Runs BEFORE resolveRegisters (so injected RegisterOperands are picked up by
79
+ // liveness analysis) and BEFORE resolveLabels (so label operands with transforms
80
+ // are resolved as part of the normal label-resolution pass).
81
+
82
+ import * as b from "../../types.js";
83
+ import { getRandomInt } from "../../utils/random-utils.js";
84
+ import { U16_MAX } from "../../utils/op-utils.js";
85
+ import { Template } from "../../template.js";
86
+ import { ref, buildMaxIdMap, allocReg, extractLabel, forEachFunction } from "../../utils/pass-utils.js";
87
+ // VERY IMPORTANT: All object operands should be unique objects for the entire compilation process.
88
+ // This ensures that other passes that may reference/modify operands (e.g. specializedOpcodes) don't accidentally break behavior by mutating cloned objects.
89
+
90
+ // VERY IMPORTANT: All "encoded" label operands include a unique "_id" property that survives JSON.stringify.
91
+ // This allows Specialized Opcodes and other passes to correct distinguish them as the "transform" function WILL NOT be preserved
92
+ let _encodedLabelId = 0;
93
+ function encodedLabelOperand(label, siteKey, fnSalt) {
94
+ return {
95
+ type: "label",
96
+ label,
97
+ _id: _encodedLabelId++,
98
+ // unique per site — survives JSON.stringify
99
+ transform: pc => applyEncoding(pc, siteKey, fnSalt)
100
+ };
101
+ }
102
+
103
+ // ── Encoding scheme (XOR + SUB/ADD, u16 modular) ────────────────────────────
104
+ // applyEncoding(pc, siteKey, fnSalt): the value stored in rDisp at the jump site.
105
+ // Must be the inverse of the decode function compiled by the Template.
106
+ // encode: ((pc - fnSalt) & 0xFFFF) ^ siteKey → always a valid u16
107
+ // decode: ((x ^ siteKey) + fnSalt) & 0xFFFF ← compiled into the per-function Template
108
+ // The & 0xFFFF mask keeps both sides in [0, 65535], preventing negative LOAD_INT operands.
109
+ function applyEncoding(pc, siteKey, fnSalt) {
110
+ return pc - fnSalt & U16_MAX ^ siteKey;
111
+ }
112
+
113
+ // buildDispatcherBlock: emits the dispatcher label + call + indirect jump.
114
+ // rClosure is already live (created at function entry); this block simply
115
+ // calls the decode closure and jumps to the result.
116
+ function buildDispatcherBlock(compiler, rDisp, rKey, rClosure, dispatcherLabel) {
117
+ const OP = compiler.OP;
118
+ return [[null, {
119
+ type: "defineLabel",
120
+ label: dispatcherLabel
121
+ }],
122
+ // decode(rDisp, rKey) → rDisp. Args are read before dst is written.
123
+ [OP.CALL, ref(rDisp),
124
+ // dst — receives decoded PC
125
+ ref(rClosure),
126
+ // the hoisted decode closure
127
+ 2,
128
+ // argc
129
+ ref(rDisp),
130
+ // arg[0] = encoded value
131
+ ref(rKey) // arg[1] = per-site key
132
+ ], [OP.JUMP_REG, ref(rDisp)]];
133
+ }
134
+
135
+ // ── Per-function transformation ───────────────────────────────────────────────
136
+ // Returns the transformed instruction stream and the template bytecode block
137
+ // for the per-function decode closure (to be appended at the end of the output).
138
+ function processFunctionBlock(instrs, fnId, compiler, maxId, labelCounter) {
139
+ const OP = compiler.OP;
140
+
141
+ // Only transform functions that actually contain simple jumps.
142
+ const hasRoutableJump = instrs.some(instr => {
143
+ const op = instr[0];
144
+ return op === OP.JUMP || op === OP.JUMP_IF_FALSE || op === OP.JUMP_IF_TRUE;
145
+ });
146
+ if (!hasRoutableJump) return {
147
+ instrs,
148
+ tail: []
149
+ };
150
+
151
+ // Per-function salt baked into this function's decode Template.
152
+ // Never stored as an operand — lives only inside the decode closure body.
153
+ const fnSalt = getRandomInt(1, U16_MAX);
154
+
155
+ // Compile a unique decode closure for this function.
156
+ const tmpl = new Template(`function decode(x, k) { return ((x ^ k) + ${fnSalt}) & ${U16_MAX}; }`).compile({}, compiler);
157
+ const decodeDesc = tmpl.functions[0];
158
+ const dispatcherLabel = labelCounter();
159
+ const rDisp = allocReg(fnId, maxId); // carries encoded PC to dispatcher
160
+ const rKey = allocReg(fnId, maxId); // carries per-site key to dispatcher
161
+ const rClosure = allocReg(fnId, maxId); // holds the hoisted decode closure
162
+
163
+ const out = [];
164
+
165
+ // ── Hoist: create the decode closure once at function entry ───────────────
166
+ out.push([OP.MAKE_CLOSURE, ref(rClosure), {
167
+ type: "label",
168
+ label: decodeDesc.entryLabel
169
+ }, decodeDesc.paramCount,
170
+ // 2 (x, k)
171
+ b.fnRegCountOperand(decodeDesc._fnIdx),
172
+ // resolved by resolveRegisters()
173
+ 0,
174
+ // no upvalues
175
+ 0 // hasRest = false
176
+ ]);
177
+
178
+ // ── Transform each instruction ────────────────────────────────────────────
179
+ for (const instr of instrs) {
180
+ const op = instr[0];
181
+ if (op === OP.JUMP) {
182
+ // [JUMP, label] → [LOAD_INT rDisp, encoded] + [LOAD_INT rKey, siteKey] + [JUMP dispatcher]
183
+ const targetLabel = extractLabel(instr[1]);
184
+ if (targetLabel === null) {
185
+ out.push(instr);
186
+ continue;
187
+ }
188
+ const siteKey = getRandomInt(1, U16_MAX);
189
+ out.push([OP.LOAD_INT, ref(rDisp), encodedLabelOperand(targetLabel, siteKey, fnSalt)]);
190
+ out.push([OP.LOAD_INT, ref(rKey), siteKey]);
191
+ out.push([OP.JUMP, {
192
+ type: "label",
193
+ label: dispatcherLabel
194
+ }]);
195
+ } else if (op === OP.JUMP_IF_FALSE) {
196
+ // Invert to JUMP_IF_TRUE so the false path (jump taken) falls into dispatch.
197
+ const cond = instr[1];
198
+ const targetLabel = extractLabel(instr[2]);
199
+ if (targetLabel === null) {
200
+ out.push(instr);
201
+ continue;
202
+ }
203
+ const siteKey = getRandomInt(1, U16_MAX);
204
+ const skipLabel = compiler._makeLabel(targetLabel + "_skip");
205
+ out.push([OP.JUMP_IF_TRUE, cond, {
206
+ type: "label",
207
+ label: skipLabel
208
+ }]);
209
+ out.push([OP.LOAD_INT, ref(rDisp), encodedLabelOperand(targetLabel, siteKey, fnSalt)]);
210
+ out.push([OP.LOAD_INT, ref(rKey), siteKey]);
211
+ out.push([OP.JUMP, {
212
+ type: "label",
213
+ label: dispatcherLabel
214
+ }]);
215
+ out.push([null, {
216
+ type: "defineLabel",
217
+ label: skipLabel
218
+ }]);
219
+ } else if (op === OP.JUMP_IF_TRUE) {
220
+ // Invert to JUMP_IF_FALSE so the true path (jump taken) falls into dispatch.
221
+ const cond = instr[1];
222
+ const targetLabel = extractLabel(instr[2]);
223
+ if (targetLabel === null) {
224
+ out.push(instr);
225
+ continue;
226
+ }
227
+ const siteKey = getRandomInt(1, U16_MAX);
228
+ const skipLabel = compiler._makeLabel(targetLabel + "_skip");
229
+ out.push([OP.JUMP_IF_FALSE, cond, {
230
+ type: "label",
231
+ label: skipLabel
232
+ }]);
233
+ out.push([OP.LOAD_INT, ref(rDisp), encodedLabelOperand(targetLabel, siteKey, fnSalt)]);
234
+ out.push([OP.LOAD_INT, ref(rKey), siteKey]);
235
+ out.push([OP.JUMP, {
236
+ type: "label",
237
+ label: dispatcherLabel
238
+ }]);
239
+ out.push([null, {
240
+ type: "defineLabel",
241
+ label: skipLabel
242
+ }]);
243
+ } else {
244
+ out.push(instr);
245
+ }
246
+ }
247
+
248
+ // Dispatcher block appended after the function body. Never reached by
249
+ // fall-through; all entries are via the JUMP dispatcher instructions above.
250
+ out.push(...buildDispatcherBlock(compiler, rDisp, rKey, rClosure, dispatcherLabel));
251
+ return {
252
+ instrs: out,
253
+ tail: tmpl.bytecode
254
+ };
255
+ }
256
+
257
+ // ── Pass entry point ──────────────────────────────────────────────────────────
258
+ export function dispatcher(bc, compiler) {
259
+ const maxId = buildMaxIdMap(bc);
260
+ // Label factory delegates to the compiler's counter so labels never collide.
261
+ const labelCounter = () => compiler._makeLabel("dispatcher");
262
+ // forEachFunction collects each function's tail (decode closure bytecode) and
263
+ // appends them all after the last function body, so every MAKE_CLOSURE can
264
+ // reference its entryLabel regardless of where it appears in the bytecode.
265
+ return forEachFunction(bc, compiler, (fnInstrs, fnId) => processFunctionBlock(fnInstrs, fnId, compiler, maxId, labelCounter));
266
+ }
@@ -1,6 +1,6 @@
1
- import { SOURCE_NODE_SYM } from "../../compiler.js";
1
+ import { OP_ORIGINAL, SOURCE_NODE_SYM } from "../../compiler.js";
2
2
  import { nextFreeSlot } from "../../utils/op-utils.js";
3
- import { ok } from "node:assert";
3
+ import { ok } from "assert";
4
4
 
5
5
  // Opcodes that must not appear in a non-terminal position inside a macro window.
6
6
  // Jump ops: modifying frame._pc mid-execution causes the macro handler to
@@ -54,7 +54,7 @@ export function macroOpcodes(bc, compiler) {
54
54
  if (JUMP_OPS.has(op)) return false;
55
55
  if (nonTerminalExcluded.find(name => opName.includes(name))) return false;
56
56
  }
57
- return OP_NAME[op] !== undefined;
57
+ return OP_NAME[op] !== undefined && OP_ORIGINAL[opName] !== undefined;
58
58
  }
59
59
 
60
60
  // ── Step 1: count window frequencies ──────────────────────────────────────
@@ -0,0 +1,100 @@
1
+ import { SOURCE_NODE_SYM } from "../../compiler.js";
2
+ import { getRandomInt } from "../../utils/random-utils.js";
3
+ import { U16_MAX } from "../../utils/op-utils.js";
4
+
5
+ // Encrypt a string with a position-dependent XOR key (u16) then base64-encode.
6
+ //
7
+ // Each char code is XOR'd with ((key + i) & 0xFFFF), producing a u16 value.
8
+ // The u16 values are packed as little-endian byte pairs (matching decodeBytecode),
9
+ // then base64-encoded so the stored constant is always safe ASCII — no raw Unicode
10
+ // surrogates, control chars, or quote chars that would break JS string literals.
11
+ function concealString(s, key) {
12
+ const bytes = new Uint8Array(s.length * 2);
13
+ for (let i = 0; i < s.length; i++) {
14
+ const code = s.charCodeAt(i) ^ key + i & 0xffff;
15
+ bytes[i * 2] = code & 0xff;
16
+ bytes[i * 2 + 1] = code >> 8 & 0xff;
17
+ }
18
+ return Buffer.from(bytes).toString("base64");
19
+ }
20
+
21
+ // Resolve all {type:"constant", value} (index) and {type:"constant", value, key: true} (key) operands
22
+ //
23
+ // constPoolIndex — index into the constants array (as before).
24
+ // concealKey — XOR key used to conceal this constant.
25
+ // 0 means no concealment (concealConstants is off, or the
26
+ // value type is not concealable: null, undefined, bool, float…).
27
+ //
28
+ // The constants array stores the CONCEALED value when key != 0.
29
+ // The runtime's _readConstant(idx, key) reverses the concealment on the fly.
30
+ //
31
+ // Both slots are u16; all existing operand serialization handles them identically.
32
+ export function resolveConstants(bc, compiler) {
33
+ const constants = [];
34
+ const constantsMap = new Map(); // original value → pool index
35
+ const keyMap = new Map(); // pool index → conceal key
36
+
37
+ function intern(operand) {
38
+ const value = operand.value;
39
+ let idx = constantsMap.get(value);
40
+ let key = 0;
41
+ if (typeof idx !== "number") {
42
+ idx = constants.length;
43
+ constantsMap.set(value, idx);
44
+ if (compiler.options.concealConstants && typeof value === "string") {
45
+ // Strings: position-dependent XOR. Key must be >= 1.
46
+ key = getRandomInt(1, U16_MAX);
47
+ constants.push(concealString(value, key));
48
+ } else if (compiler.options.concealConstants && typeof value === "number" && Number.isInteger(value)) {
49
+ // Integers: simple XOR. Result is still a valid JS integer.
50
+ key = getRandomInt(1, U16_MAX);
51
+ constants.push(value ^ key);
52
+ } else {
53
+ // Not concealable (null, undefined, boolean, float, RegExp…) or option off.
54
+ key = 0;
55
+ constants.push(value);
56
+ }
57
+ keyMap.set(idx, key);
58
+ } else {
59
+ // Reuse existing pool entry — same key that was assigned on first intern.
60
+ key = keyMap.get(idx);
61
+ }
62
+ const idxOperand = {
63
+ type: "number",
64
+ resolvedValue: idx
65
+ };
66
+ const keyOperand = {
67
+ type: "number",
68
+ resolvedValue: key
69
+ };
70
+
71
+ // key is a plain u16 number — no wrapping needed.
72
+ return [idxOperand, keyOperand];
73
+ }
74
+ const resolved = [];
75
+ for (const instr of bc) {
76
+ const [op, ...operands] = instr;
77
+ const hasConstant = operands.some(o => o !== undefined && o !== null && typeof o === "object" && o.type === "constant");
78
+ if (hasConstant) {
79
+ // 1-to-2 expansion: each {type:"constant"} becomes [constIdx, concealKey].
80
+ const newOperands = operands.map(operand => {
81
+ if (operand?.type === "constant") {
82
+ const [idxOperand, key] = intern(operand);
83
+ const newOperand = operand?.key ? key : idxOperand;
84
+ return Object.assign(operand, newOperand);
85
+ } else {
86
+ return operand;
87
+ }
88
+ });
89
+ const newInstr = [op, ...newOperands];
90
+ newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
91
+ resolved.push(newInstr);
92
+ } else {
93
+ resolved.push(instr);
94
+ }
95
+ }
96
+ return {
97
+ bytecode: resolved,
98
+ constants
99
+ };
100
+ }
@@ -31,11 +31,13 @@ export function resolveLabels(bc, compiler) {
31
31
  const operand = instr[1];
32
32
  if (op === null && operand !== null && typeof operand === "object" && operand.type === "defineLabel") {
33
33
  labelToPc.set(operand.label, realPc);
34
- } else {
35
- // Each instruction occupies 1 slot for the opcode + 1 per operand.
36
- // IMPORTANT: 'placeholder' operands are not counted
37
- realPc += instr.filter(x => x?.placeholder !== true).length;
34
+ continue;
38
35
  }
36
+ if (op === null) continue; // "null" opcodes are never emitted
37
+
38
+ // Each instruction occupies 1 slot for the opcode + 1 per operand.
39
+ // IMPORTANT: 'placeholder' operands are not counted
40
+ realPc += instr.filter(x => x?.placeholder !== true).length;
39
41
  }
40
42
 
41
43
  // Pass 2 – build the resolved instruction list.
@@ -44,31 +46,32 @@ export function resolveLabels(bc, compiler) {
44
46
  for (const instr of bc) {
45
47
  const [op, ...operands] = instr;
46
48
 
47
- // Strip defineLabel pseudo-ops.
48
- if (op === null && typeof operands[0] === "object" && operands[0]?.type === "defineLabel") {
49
- continue;
50
- }
51
-
52
- // Replace label-ref operands with their resolved flat PC (any position).
49
+ // Replace label-ref and encodedLabel operands with resolved flat PCs.
50
+ // encodedLabel applies an encoding to the PC before emission so that raw
51
+ // jump targets are hidden; the dispatcher block reverses it at runtime.
52
+ // To change the encoding scheme, update both here and in jumpDispatcher.ts.
53
53
  const newOperands = operands.map(operand => {
54
- if (operand !== undefined && operand !== null && typeof operand === "object" && operand.type === "label") {
54
+ if (operand === undefined || operand === null || typeof operand !== "object") return operand;
55
+ const type = operand.type;
56
+ if (type === "label") {
55
57
  const pc = labelToPc.get(operand.label);
56
58
  if (pc === undefined) throw new Error(`Undefined label: ${operand.label}`);
59
+ let resolvedValue = pc + (operand.offset ?? 0);
60
+ if (operand.transform) {
61
+ resolvedValue = operand.transform(resolvedValue);
62
+ }
57
63
  const newOperand = {
58
64
  type: "number",
59
- resolvedValue: pc + (operand.offset ?? 0)
65
+ resolvedValue: resolvedValue
60
66
  };
61
-
62
- // Mutate original object so that references are also updated
63
- if (typeof operand === "object" && operand !== null) {
64
- return Object.assign(operand, newOperand);
65
- }
66
- return newOperand;
67
+ return Object.assign(operand, newOperand);
67
68
  }
68
69
  return operand;
69
70
  });
70
71
  const newInstr = [op, ...newOperands];
71
72
  newInstr[SOURCE_NODE_SYM] = instr[SOURCE_NODE_SYM];
73
+
74
+ // Pseudo-op "defineLabel"s are kept within this bytecode as the Serializer is responsible for dropping it, and its useful information for comment generation
72
75
  resolved.push(newInstr);
73
76
  }
74
77
 
@@ -0,0 +1,216 @@
1
+ // resolveRegisters
2
+ // Converts virtual RegisterOperand objects into concrete slot indices and sets
3
+ // each FnDescriptor's regCount.
4
+ //
5
+ // Two-tier slot assignment:
6
+ //
7
+ // "local::" pool (params, `arguments`, hoisted vars, upvalue-captured vars)
8
+ // ─────────────────────────────────────────────────────────────────────────
9
+ // Sorted by virtual-id, slots assigned sequentially with NO reuse.
10
+ // This is required because:
11
+ // • The runtime writes args[i] to regs[base + i] at call time, so params
12
+ // MUST occupy slots 0..paramCount-1 in virtual-id order.
13
+ // • Open upvalues hold an absolute slot index and read regs[base+slot] for
14
+ // the lifetime of the outer frame — reusing a captured slot corrupts reads.
15
+ //
16
+ // All other pools (e.g. "temp::", "canary::", pass-introduced pools)
17
+ // ─────────────────────────────────────────────────────────────────────────
18
+ // Linear-scan with a free list: registers are sorted by firstUse, and any
19
+ // slot whose previous occupant's lastUse < current register's firstUse is
20
+ // recycled. An explicit [null, freeRegOperand(reg)] pseudo-instruction clamps
21
+ // lastUse early, enabling reuse before the natural end of the live range.
22
+ //
23
+ // Pools are processed in priority order: "local::" always first (slots
24
+ // 0..N), then remaining pools alphabetically. This keeps temp slots above
25
+ // the reserved param/local region.
26
+ //
27
+ // regCount = max concrete slot used across all pools + 1.
28
+ //
29
+ // Run AFTER all IR-level passes but BEFORE resolveLabels / resolveConstants.
30
+
31
+ export function resolveRegisters(bc, compiler) {
32
+ function registerPoolKey(op) {
33
+ // Pinned registers must never share a slot with anything else.
34
+ // Passes set this on registers whose live range crosses a CFF back-edge
35
+ // that the linear-scan liveness analysis cannot see.
36
+ if (op.pinned) return "local::";
37
+ return `${op.kind ?? "local"}::${op.scopeId ?? ""}`;
38
+ }
39
+
40
+ // ── Pass 1: collect live ranges ───────────────────────────────────────────
41
+ // For each (fnId, virtId) record the first and last instruction index where
42
+ // the register appears as a real operand. A freeReg marker clamps lastUse.
43
+
44
+ // fnId -> virtId -> RegInfo
45
+ const fnRegInfo = new Map();
46
+ for (let i = 0; i < bc.length; i++) {
47
+ const instr = bc[i];
48
+ for (let j = 1; j < instr.length; j++) {
49
+ const op = instr[j];
50
+ if (!op || typeof op !== "object") continue;
51
+ if (op.type === "register") {
52
+ const {
53
+ fnId,
54
+ id
55
+ } = op;
56
+ const poolKey = registerPoolKey(op);
57
+ let fnMap = fnRegInfo.get(fnId);
58
+ if (!fnMap) {
59
+ fnMap = new Map();
60
+ fnRegInfo.set(fnId, fnMap);
61
+ }
62
+ const existing = fnMap.get(id);
63
+ if (!existing) {
64
+ fnMap.set(id, {
65
+ firstUse: i,
66
+ lastUse: i,
67
+ poolKey,
68
+ freed: false
69
+ });
70
+ } else if (!existing.freed) {
71
+ // Only extend lastUse if no explicit freeReg has clamped it yet.
72
+ existing.lastUse = i;
73
+ }
74
+ } else if (op.type === "freeReg") {
75
+ // Explicit end-of-life marker: clamp lastUse and prevent extension.
76
+ const {
77
+ fnId,
78
+ id
79
+ } = op;
80
+ const fnMap = fnRegInfo.get(fnId);
81
+ if (fnMap) {
82
+ const info = fnMap.get(id);
83
+ if (info && !info.freed) {
84
+ info.lastUse = i;
85
+ info.freed = true;
86
+ }
87
+ }
88
+ }
89
+ }
90
+ }
91
+
92
+ // ── Pass 2: slot assignment per function ──────────────────────────────────
93
+ // fnId -> virtId -> concrete slot
94
+ const fnSlotMaps = new Map();
95
+
96
+ // Pool ordering: "local::" always first; all other keys sorted alphabetically.
97
+ function poolSortKey(key) {
98
+ return key === "local::" ? [0, ""] : [1, key];
99
+ }
100
+ for (const [fnId, regMap] of fnRegInfo) {
101
+ // Group by pool key.
102
+ const pools = new Map();
103
+ for (const [id, info] of regMap) {
104
+ let pool = pools.get(info.poolKey);
105
+ if (!pool) {
106
+ pool = [];
107
+ pools.set(info.poolKey, pool);
108
+ }
109
+ pool.push({
110
+ id,
111
+ firstUse: info.firstUse,
112
+ lastUse: info.lastUse
113
+ });
114
+ }
115
+ const sortedPoolKeys = Array.from(pools.keys()).sort((a, b) => {
116
+ const [pa, sa] = poolSortKey(a);
117
+ const [pb, sb] = poolSortKey(b);
118
+ if (pa !== pb) return pa - pb;
119
+ return sa < sb ? -1 : sa > sb ? 1 : 0;
120
+ });
121
+ const slotMap = new Map(); // virtId -> slot
122
+ fnSlotMaps.set(fnId, slotMap);
123
+
124
+ // nextSlot is the high-water mark: the next fresh slot to allocate.
125
+ // It is shared across all pools so each pool's slots start above the
126
+ // previous pool's maximum slot.
127
+ let nextSlot = 0;
128
+ for (const poolKey of sortedPoolKeys) {
129
+ const regs = pools.get(poolKey);
130
+ if (poolKey === "local::") {
131
+ // ── Local pool: virtual-id order, no reuse ────────────────────────
132
+ // Params must be at the lowest slots (written by the runtime at call
133
+ // time); upvalue captures must keep their slot for the frame's lifetime.
134
+ regs.sort((a, b) => a.id - b.id);
135
+ for (const reg of regs) {
136
+ slotMap.set(reg.id, nextSlot++);
137
+ }
138
+ } else {
139
+ // ── Non-local pool: firstUse order, linear-scan reuse ─────────────
140
+ regs.sort((a, b) => a.firstUse - b.firstUse);
141
+
142
+ // freeList entries: { slot, freeAt } where freeAt = lastUse of current
143
+ // occupant. A slot becomes available when freeAt < next reg's firstUse.
144
+ const freeList = [];
145
+ for (const reg of regs) {
146
+ // Find the lowest-numbered slot whose last occupant has ended.
147
+ let bestSlot = -1;
148
+ let bestIdx = -1;
149
+ for (let k = 0; k < freeList.length; k++) {
150
+ if (freeList[k].freeAt < reg.firstUse) {
151
+ if (bestSlot === -1 || freeList[k].slot < bestSlot) {
152
+ bestSlot = freeList[k].slot;
153
+ bestIdx = k;
154
+ }
155
+ }
156
+ }
157
+ let assignedSlot;
158
+ if (bestIdx !== -1) {
159
+ assignedSlot = bestSlot;
160
+ freeList.splice(bestIdx, 1);
161
+ } else {
162
+ assignedSlot = nextSlot++;
163
+ }
164
+ slotMap.set(reg.id, assignedSlot);
165
+ freeList.push({
166
+ slot: assignedSlot,
167
+ freeAt: reg.lastUse
168
+ });
169
+ }
170
+ // nextSlot already reflects the high-water mark; reused slots are
171
+ // always < nextSlot by construction.
172
+ }
173
+ }
174
+ }
175
+
176
+ // ── Pass 3: patch register operands ──────────────────────────────────────
177
+ for (const instr of bc) {
178
+ for (let i = 1; i < instr.length; i++) {
179
+ const op = instr[i];
180
+ if (!op || typeof op !== "object") continue;
181
+ if (op.type === "register") {
182
+ op.resolvedValue = fnSlotMaps.get(op.fnId)?.get(op.id);
183
+ }
184
+ }
185
+ }
186
+
187
+ // ── Pass 4: set regCount on each FnDescriptor ─────────────────────────────
188
+ // regCount = max concrete slot used + 1 (not sum of virtual-register counts).
189
+ for (const desc of compiler.fnDescriptors) {
190
+ const fnId = desc._fnIdx;
191
+ const slotMap = fnSlotMaps.get(fnId);
192
+ let regCount = 0;
193
+ if (slotMap) {
194
+ for (const slot of slotMap.values()) {
195
+ if (slot + 1 > regCount) regCount = slot + 1;
196
+ }
197
+ }
198
+ desc.regCount = regCount;
199
+ }
200
+ compiler.mainRegCount = compiler.mainFn?.regCount ?? 0;
201
+
202
+ // ── Pass 5: patch fnRegCount operands ────────────────────────────────────
203
+ for (const instr of bc) {
204
+ for (let i = 1; i < instr.length; i++) {
205
+ const op = instr[i];
206
+ if (!op || typeof op !== "object") continue;
207
+ if (op.type === "fnRegCount") {
208
+ const desc = compiler.fnDescriptors[op.fnId];
209
+ op.resolvedValue = desc?.regCount ?? 0;
210
+ }
211
+ }
212
+ }
213
+ return {
214
+ bytecode: bc
215
+ };
216
+ }