js-confuser-vm 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +242 -89
- package/dist/compiler.js +583 -208
- package/dist/disassembler.js +58 -8
- package/dist/runtime.js +93 -74
- package/dist/template.js +81 -76
- package/dist/transforms/bytecode/concealConstants.js +2 -2
- package/dist/transforms/bytecode/controlFlowFlattening.js +143 -25
- package/dist/transforms/bytecode/dispatcher.js +3 -3
- package/dist/transforms/bytecode/resolveRegisters.js +19 -4
- package/dist/transforms/bytecode/selfModifying.js +88 -21
- package/dist/transforms/bytecode/specializedOpcodes.js +6 -3
- package/dist/transforms/bytecode/stringConcealing.js +253 -75
- package/dist/utils/ast-utils.js +61 -0
- package/dist/utils/op-utils.js +1 -0
- package/package.json +7 -1
- package/.gitmodules +0 -4
- package/.prettierignore +0 -1
- package/CHANGELOG.md +0 -358
- package/babel-plugin-inline-runtime.cjs +0 -34
- package/babel.config.json +0 -23
- package/bench.ts +0 -146
- package/disassemble.ts +0 -12
- package/index.ts +0 -43
- package/jest-strip-types.js +0 -10
- package/jest.config.js +0 -64
- package/output.disassembled.js +0 -41
- package/src/build-runtime.ts +0 -113
- package/src/compiler.ts +0 -2703
- package/src/disassembler.ts +0 -329
- package/src/index.ts +0 -24
- package/src/minify.ts +0 -21
- package/src/options.ts +0 -24
- package/src/runtime.ts +0 -956
- package/src/template.ts +0 -265
- package/src/transforms/bytecode/aliasedOpcodes.ts +0 -151
- package/src/transforms/bytecode/concealConstants.ts +0 -52
- package/src/transforms/bytecode/controlFlowFlattening.ts +0 -566
- package/src/transforms/bytecode/dispatcher.ts +0 -292
- package/src/transforms/bytecode/macroOpcodes.ts +0 -193
- package/src/transforms/bytecode/resolveConstants.ts +0 -126
- package/src/transforms/bytecode/resolveLabels.ts +0 -112
- package/src/transforms/bytecode/resolveRegisters.ts +0 -226
- package/src/transforms/bytecode/selfModifying.ts +0 -121
- package/src/transforms/bytecode/specializedOpcodes.ts +0 -164
- package/src/transforms/bytecode/stringConcealing.ts +0 -130
- package/src/transforms/runtime/aliasedOpcodes.ts +0 -191
- package/src/transforms/runtime/classObfuscation.ts +0 -59
- package/src/transforms/runtime/macroOpcodes.ts +0 -138
- package/src/transforms/runtime/minify.ts +0 -1
- package/src/transforms/runtime/shuffleOpcodes.ts +0 -24
- package/src/transforms/runtime/specializedOpcodes.ts +0 -161
- package/src/types.ts +0 -134
- package/src/utils/ast-utils.ts +0 -19
- package/src/utils/op-utils.ts +0 -46
- package/src/utils/pass-utils.ts +0 -126
- package/src/utils/profile-utils.ts +0 -3
- package/src/utils/random-utils.ts +0 -31
- package/tsconfig.json +0 -12
|
@@ -1,292 +0,0 @@
|
|
|
1
|
-
// Routes simple unconditional and conditional jumps through a per-function
|
|
2
|
-
// central dispatcher block so that static analysis cannot read jump targets
|
|
3
|
-
// directly from the bytecode operands.
|
|
4
|
-
//
|
|
5
|
-
// ── How it works ─────────────────────────────────────────────────────────────
|
|
6
|
-
//
|
|
7
|
-
// Each function that contains at least one routable jump gets:
|
|
8
|
-
//
|
|
9
|
-
// rDisp — a stable register shared across the whole function.
|
|
10
|
-
// At every jump site, the per-site encoded target PC is written
|
|
11
|
-
// here before jumping to the dispatcher block.
|
|
12
|
-
// rKey — a stable register written at every jump site with that site's
|
|
13
|
-
// unique XOR key. The dispatcher passes it to the decode closure.
|
|
14
|
-
// rClosure — holds the decode closure, created ONCE at function entry
|
|
15
|
-
// (hoisted). All dispatch calls reuse the same closure object.
|
|
16
|
-
//
|
|
17
|
-
// Dispatcher block (appended after the function body, never reached by fall-through):
|
|
18
|
-
//
|
|
19
|
-
// <dispatcher_N>:
|
|
20
|
-
// CALL rDisp, rClosure, 2, rDisp, rKey // rDisp = decode(rDisp, rKey)
|
|
21
|
-
// JUMP_REG rDisp // indirect jump to recovered PC
|
|
22
|
-
//
|
|
23
|
-
// The decode function is compiled ONCE PER FUNCTION from a Template that
|
|
24
|
-
// embeds a per-function constant (fnSalt). Every function gets its own
|
|
25
|
-
// distinct decode closure body, so identifying one does not help with others.
|
|
26
|
-
//
|
|
27
|
-
// function decode(x, k) { return ((x ^ k) + FN_SALT) & 0xFFFF; }
|
|
28
|
-
//
|
|
29
|
-
// Jump site transformations (each site has its own random siteKey):
|
|
30
|
-
//
|
|
31
|
-
// Original: JUMP target_label
|
|
32
|
-
// Becomes: LOAD_INT rDisp, (target_label_pc - fnSalt) ^ siteKey
|
|
33
|
-
// LOAD_INT rKey, siteKey
|
|
34
|
-
// JUMP <dispatcher_N>
|
|
35
|
-
//
|
|
36
|
-
// Original: JUMP_IF_FALSE cond, target_label
|
|
37
|
-
// Becomes: JUMP_IF_TRUE cond, <skip_N>
|
|
38
|
-
// LOAD_INT rDisp, (target_label_pc - fnSalt) ^ siteKey
|
|
39
|
-
// LOAD_INT rKey, siteKey
|
|
40
|
-
// JUMP <dispatcher_N>
|
|
41
|
-
// <skip_N>:
|
|
42
|
-
//
|
|
43
|
-
// Original: JUMP_IF_TRUE cond, target_label
|
|
44
|
-
// Becomes: JUMP_IF_FALSE cond, <skip_N>
|
|
45
|
-
// LOAD_INT rDisp, (target_label_pc - fnSalt) ^ siteKey
|
|
46
|
-
// LOAD_INT rKey, siteKey
|
|
47
|
-
// JUMP <dispatcher_N>
|
|
48
|
-
// <skip_N>:
|
|
49
|
-
//
|
|
50
|
-
// ── Encoding scheme ──────────────────────────────────────────────────────────
|
|
51
|
-
// Two-key mixed encoding: XOR (per-site) + SUB/ADD (per-function).
|
|
52
|
-
//
|
|
53
|
-
// encode(pc, siteKey, fnSalt) = (pc - fnSalt) ^ siteKey
|
|
54
|
-
// decode(x, k, fnSalt) = (x ^ k) + fnSalt
|
|
55
|
-
//
|
|
56
|
-
// The siteKey is a random nonzero u16 unique per jump site — stored as a plain
|
|
57
|
-
// integer operand in the bytecode.
|
|
58
|
-
// The fnSalt is a random nonzero u16 unique per function — it is never stored
|
|
59
|
-
// as an operand anywhere; it is compiled as a literal constant inside the
|
|
60
|
-
// function's own decode Template body.
|
|
61
|
-
//
|
|
62
|
-
// Attack resistance:
|
|
63
|
-
// • Brute-forcing a single jump requires enumerating siteKey × fnSalt
|
|
64
|
-
// (~4 billion combinations) rather than just siteKey (65 535).
|
|
65
|
-
// • Assuming pure XOR fails: un-XOR-ing with siteKey yields (pc - fnSalt),
|
|
66
|
-
// not pc. Valid-PC heuristics produce wrong answers.
|
|
67
|
-
// • Each function emits its own decode closure bytecode with a different
|
|
68
|
-
// fnSalt literal baked in. There is no shared signature to fingerprint.
|
|
69
|
-
// • The encode and decode operations differ structurally (SUB vs ADD),
|
|
70
|
-
// removing the self-inverse property that makes XOR-only schemes obvious.
|
|
71
|
-
//
|
|
72
|
-
// To change the scheme:
|
|
73
|
-
// 1. Change the Template source in processFunctionBlock() to match new decode.
|
|
74
|
-
// 2. Change applyEncoding() to return the matching encode transform.
|
|
75
|
-
// Only these two places need updating; everything else is scheme-agnostic.
|
|
76
|
-
//
|
|
77
|
-
// ── Pipeline position ─────────────────────────────────────────────────────────
|
|
78
|
-
// Runs BEFORE resolveRegisters (so injected RegisterOperands are picked up by
|
|
79
|
-
// liveness analysis) and BEFORE resolveLabels (so label operands with transforms
|
|
80
|
-
// are resolved as part of the normal label-resolution pass).
|
|
81
|
-
|
|
82
|
-
import type {
|
|
83
|
-
Bytecode,
|
|
84
|
-
Instruction,
|
|
85
|
-
RegisterOperand,
|
|
86
|
-
InstrOperand,
|
|
87
|
-
} from "../../types.ts";
|
|
88
|
-
import * as b from "../../types.ts";
|
|
89
|
-
import { Compiler } from "../../compiler.ts";
|
|
90
|
-
import { getRandomInt } from "../../utils/random-utils.ts";
|
|
91
|
-
import { U16_MAX } from "../../utils/op-utils.ts";
|
|
92
|
-
import { Template } from "../../template.ts";
|
|
93
|
-
import {
|
|
94
|
-
ref,
|
|
95
|
-
buildMaxIdMap,
|
|
96
|
-
allocReg,
|
|
97
|
-
extractLabel,
|
|
98
|
-
forEachFunction,
|
|
99
|
-
} from "../../utils/pass-utils.ts";
|
|
100
|
-
// VERY IMPORTANT: All object operands should be unique objects for the entire compilation process.
|
|
101
|
-
// This ensures that other passes that may reference/modify operands (e.g. specializedOpcodes) don't accidentally break behavior by mutating cloned objects.
|
|
102
|
-
|
|
103
|
-
// VERY IMPORTANT: All "encoded" label operands include a unique "_id" property that survives JSON.stringify.
|
|
104
|
-
// This allows Specialized Opcodes and other passes to correct distinguish them as the "transform" function WILL NOT be preserved
|
|
105
|
-
let _encodedLabelId = 0;
|
|
106
|
-
function encodedLabelOperand(
|
|
107
|
-
label: string,
|
|
108
|
-
siteKey: number,
|
|
109
|
-
fnSalt: number,
|
|
110
|
-
): InstrOperand {
|
|
111
|
-
return {
|
|
112
|
-
type: "label",
|
|
113
|
-
label,
|
|
114
|
-
_id: _encodedLabelId++, // unique per site — survives JSON.stringify
|
|
115
|
-
transform: (pc) => applyEncoding(pc, siteKey, fnSalt),
|
|
116
|
-
} as InstrOperand;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// ── Encoding scheme (XOR + SUB/ADD, u16 modular) ────────────────────────────
|
|
120
|
-
// applyEncoding(pc, siteKey, fnSalt): the value stored in rDisp at the jump site.
|
|
121
|
-
// Must be the inverse of the decode function compiled by the Template.
|
|
122
|
-
// encode: ((pc - fnSalt) & 0xFFFF) ^ siteKey → always a valid u16
|
|
123
|
-
// decode: ((x ^ siteKey) + fnSalt) & 0xFFFF ← compiled into the per-function Template
|
|
124
|
-
// The & 0xFFFF mask keeps both sides in [0, 65535], preventing negative LOAD_INT operands.
|
|
125
|
-
function applyEncoding(pc: number, siteKey: number, fnSalt: number): number {
|
|
126
|
-
return ((pc - fnSalt) & U16_MAX) ^ siteKey;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
// buildDispatcherBlock: emits the dispatcher label + call + indirect jump.
|
|
130
|
-
// rClosure is already live (created at function entry); this block simply
|
|
131
|
-
// calls the decode closure and jumps to the result.
|
|
132
|
-
function buildDispatcherBlock(
|
|
133
|
-
compiler: Compiler,
|
|
134
|
-
rDisp: RegisterOperand,
|
|
135
|
-
rKey: RegisterOperand,
|
|
136
|
-
rClosure: RegisterOperand,
|
|
137
|
-
dispatcherLabel: string,
|
|
138
|
-
): Instruction[] {
|
|
139
|
-
const OP = compiler.OP;
|
|
140
|
-
return [
|
|
141
|
-
[null, { type: "defineLabel", label: dispatcherLabel }],
|
|
142
|
-
|
|
143
|
-
// decode(rDisp, rKey) → rDisp. Args are read before dst is written.
|
|
144
|
-
[
|
|
145
|
-
OP.CALL!,
|
|
146
|
-
ref(rDisp), // dst — receives decoded PC
|
|
147
|
-
ref(rClosure), // the hoisted decode closure
|
|
148
|
-
2, // argc
|
|
149
|
-
ref(rDisp), // arg[0] = encoded value
|
|
150
|
-
ref(rKey), // arg[1] = per-site key
|
|
151
|
-
],
|
|
152
|
-
|
|
153
|
-
[OP.JUMP_REG!, ref(rDisp)],
|
|
154
|
-
];
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
// ── Per-function transformation ───────────────────────────────────────────────
|
|
158
|
-
// Returns the transformed instruction stream and the template bytecode block
|
|
159
|
-
// for the per-function decode closure (to be appended at the end of the output).
|
|
160
|
-
function processFunctionBlock(
|
|
161
|
-
instrs: Bytecode,
|
|
162
|
-
fnId: number,
|
|
163
|
-
compiler: Compiler,
|
|
164
|
-
maxId: Map<number, number>,
|
|
165
|
-
labelCounter: () => string,
|
|
166
|
-
): { instrs: Bytecode; tail: Bytecode } {
|
|
167
|
-
const OP = compiler.OP;
|
|
168
|
-
|
|
169
|
-
// Only transform functions that actually contain simple jumps.
|
|
170
|
-
const hasRoutableJump = instrs.some((instr) => {
|
|
171
|
-
const op = instr[0];
|
|
172
|
-
return op === OP.JUMP || op === OP.JUMP_IF_FALSE || op === OP.JUMP_IF_TRUE;
|
|
173
|
-
});
|
|
174
|
-
if (!hasRoutableJump) return { instrs, tail: [] };
|
|
175
|
-
|
|
176
|
-
// Per-function salt baked into this function's decode Template.
|
|
177
|
-
// Never stored as an operand — lives only inside the decode closure body.
|
|
178
|
-
const fnSalt = getRandomInt(1, U16_MAX);
|
|
179
|
-
|
|
180
|
-
// Compile a unique decode closure for this function.
|
|
181
|
-
const tmpl = new Template(
|
|
182
|
-
`function decode(x, k) { return ((x ^ k) + ${fnSalt}) & ${U16_MAX}; }`,
|
|
183
|
-
).compile({}, compiler);
|
|
184
|
-
const decodeDesc = tmpl.functions[0];
|
|
185
|
-
|
|
186
|
-
const dispatcherLabel = labelCounter();
|
|
187
|
-
const rDisp = allocReg(fnId, maxId); // carries encoded PC to dispatcher
|
|
188
|
-
const rKey = allocReg(fnId, maxId); // carries per-site key to dispatcher
|
|
189
|
-
const rClosure = allocReg(fnId, maxId); // holds the hoisted decode closure
|
|
190
|
-
|
|
191
|
-
const out: Bytecode = [];
|
|
192
|
-
|
|
193
|
-
// ── Hoist: create the decode closure once at function entry ───────────────
|
|
194
|
-
out.push([
|
|
195
|
-
OP.MAKE_CLOSURE!,
|
|
196
|
-
ref(rClosure),
|
|
197
|
-
{ type: "label", label: decodeDesc.entryLabel },
|
|
198
|
-
decodeDesc.paramCount, // 2 (x, k)
|
|
199
|
-
b.fnRegCountOperand(decodeDesc._fnIdx), // resolved by resolveRegisters()
|
|
200
|
-
0, // no upvalues
|
|
201
|
-
0, // hasRest = false
|
|
202
|
-
]);
|
|
203
|
-
|
|
204
|
-
// ── Transform each instruction ────────────────────────────────────────────
|
|
205
|
-
for (const instr of instrs) {
|
|
206
|
-
const op = instr[0];
|
|
207
|
-
|
|
208
|
-
if (op === OP.JUMP) {
|
|
209
|
-
// [JUMP, label] → [LOAD_INT rDisp, encoded] + [LOAD_INT rKey, siteKey] + [JUMP dispatcher]
|
|
210
|
-
const targetLabel = extractLabel(instr[1]);
|
|
211
|
-
if (targetLabel === null) {
|
|
212
|
-
out.push(instr);
|
|
213
|
-
continue;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
const siteKey = getRandomInt(1, U16_MAX);
|
|
217
|
-
out.push([
|
|
218
|
-
OP.LOAD_INT!,
|
|
219
|
-
ref(rDisp),
|
|
220
|
-
encodedLabelOperand(targetLabel, siteKey, fnSalt),
|
|
221
|
-
]);
|
|
222
|
-
out.push([OP.LOAD_INT!, ref(rKey), siteKey]);
|
|
223
|
-
out.push([OP.JUMP!, { type: "label", label: dispatcherLabel }]);
|
|
224
|
-
} else if (op === OP.JUMP_IF_FALSE) {
|
|
225
|
-
// Invert to JUMP_IF_TRUE so the false path (jump taken) falls into dispatch.
|
|
226
|
-
const cond = instr[1] as RegisterOperand;
|
|
227
|
-
const targetLabel = extractLabel(instr[2]);
|
|
228
|
-
if (targetLabel === null) {
|
|
229
|
-
out.push(instr);
|
|
230
|
-
continue;
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
const siteKey = getRandomInt(1, U16_MAX);
|
|
234
|
-
const skipLabel = compiler._makeLabel(targetLabel + "_skip");
|
|
235
|
-
out.push([OP.JUMP_IF_TRUE!, cond, { type: "label", label: skipLabel }]);
|
|
236
|
-
out.push([
|
|
237
|
-
OP.LOAD_INT!,
|
|
238
|
-
ref(rDisp),
|
|
239
|
-
encodedLabelOperand(targetLabel, siteKey, fnSalt),
|
|
240
|
-
]);
|
|
241
|
-
out.push([OP.LOAD_INT!, ref(rKey), siteKey]);
|
|
242
|
-
out.push([OP.JUMP!, { type: "label", label: dispatcherLabel }]);
|
|
243
|
-
out.push([null, { type: "defineLabel", label: skipLabel }]);
|
|
244
|
-
} else if (op === OP.JUMP_IF_TRUE) {
|
|
245
|
-
// Invert to JUMP_IF_FALSE so the true path (jump taken) falls into dispatch.
|
|
246
|
-
const cond = instr[1] as RegisterOperand;
|
|
247
|
-
const targetLabel = extractLabel(instr[2]);
|
|
248
|
-
if (targetLabel === null) {
|
|
249
|
-
out.push(instr);
|
|
250
|
-
continue;
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
const siteKey = getRandomInt(1, U16_MAX);
|
|
254
|
-
const skipLabel = compiler._makeLabel(targetLabel + "_skip");
|
|
255
|
-
out.push([OP.JUMP_IF_FALSE!, cond, { type: "label", label: skipLabel }]);
|
|
256
|
-
out.push([
|
|
257
|
-
OP.LOAD_INT!,
|
|
258
|
-
ref(rDisp),
|
|
259
|
-
encodedLabelOperand(targetLabel, siteKey, fnSalt),
|
|
260
|
-
]);
|
|
261
|
-
out.push([OP.LOAD_INT!, ref(rKey), siteKey]);
|
|
262
|
-
out.push([OP.JUMP!, { type: "label", label: dispatcherLabel }]);
|
|
263
|
-
out.push([null, { type: "defineLabel", label: skipLabel }]);
|
|
264
|
-
} else {
|
|
265
|
-
out.push(instr);
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
// Dispatcher block appended after the function body. Never reached by
|
|
270
|
-
// fall-through; all entries are via the JUMP dispatcher instructions above.
|
|
271
|
-
out.push(
|
|
272
|
-
...buildDispatcherBlock(compiler, rDisp, rKey, rClosure, dispatcherLabel),
|
|
273
|
-
);
|
|
274
|
-
|
|
275
|
-
return { instrs: out, tail: tmpl.bytecode };
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// ── Pass entry point ──────────────────────────────────────────────────────────
|
|
279
|
-
export function dispatcher(
|
|
280
|
-
bc: Bytecode,
|
|
281
|
-
compiler: Compiler,
|
|
282
|
-
): { bytecode: Bytecode } {
|
|
283
|
-
const maxId = buildMaxIdMap(bc);
|
|
284
|
-
// Label factory delegates to the compiler's counter so labels never collide.
|
|
285
|
-
const labelCounter = () => compiler._makeLabel("dispatcher");
|
|
286
|
-
// forEachFunction collects each function's tail (decode closure bytecode) and
|
|
287
|
-
// appends them all after the last function body, so every MAKE_CLOSURE can
|
|
288
|
-
// reference its entryLabel regardless of where it appears in the bytecode.
|
|
289
|
-
return forEachFunction(bc, compiler, (fnInstrs, fnId) =>
|
|
290
|
-
processFunctionBlock(fnInstrs, fnId, compiler, maxId, labelCounter),
|
|
291
|
-
);
|
|
292
|
-
}
|
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
import type { Bytecode, Instruction } from "../../types.ts";
|
|
2
|
-
import { Compiler, OP_ORIGINAL, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
|
-
import { nextFreeSlot } from "../../utils/op-utils.ts";
|
|
4
|
-
import { ok } from "assert";
|
|
5
|
-
|
|
6
|
-
// Opcodes that must not appear in a non-terminal position inside a macro window.
|
|
7
|
-
// Jump ops: modifying frame._pc mid-execution causes the macro handler to
|
|
8
|
-
// run subsequent sub-bodies even after the jump already fired.
|
|
9
|
-
// Frame-changing ops (CALL, CALL_METHOD, NEW, RETURN, THROW): push/pop call
|
|
10
|
-
// frames mid-macro, leaving the `frame` variable stale for later sub-bodies.
|
|
11
|
-
// When one of these is the LAST instruction in the macro sequence there are no
|
|
12
|
-
// following sub-bodies, so editing _pc or the call frame is safe.
|
|
13
|
-
// Variable-operand ops (MAKE_CLOSURE): the number of _operand() calls depends
|
|
14
|
-
// on uvCount at runtime, so a static handler cannot be generated.
|
|
15
|
-
// Infrastructure ops (PATCH, TRY_SETUP, TRY_END, DEBUGGER):
|
|
16
|
-
// either illegal here or nonsensical to fold.
|
|
17
|
-
|
|
18
|
-
// Scan bytecode for repeating instruction sequences and fold them into
|
|
19
|
-
// macro opcodes. Runs after selfModifying but before resolveLabels so
|
|
20
|
-
// IR-ref operands (label/constant) are carried through transparently.
|
|
21
|
-
//
|
|
22
|
-
// Algorithm:
|
|
23
|
-
// 1. Count every eligible window of length 2–5 by its op-code signature.
|
|
24
|
-
// 2. Keep sequences that appear >= 2 times; sort by frequency then length.
|
|
25
|
-
// 3. Use nextFreeSlot() to assign a new opcode to each of the best candidates
|
|
26
|
-
// 4. Re-scan bytecode, replacing each matched sequence with a single
|
|
27
|
-
// multi-operand instruction:
|
|
28
|
-
// [macroOpCode, operands_of_instr_0..., operands_of_instr_1..., ...]
|
|
29
|
-
// The runtime macro handler inlines each sub-instruction body; those
|
|
30
|
-
// bodies call this._operand() themselves to consume the inline operands.
|
|
31
|
-
export function macroOpcodes(
|
|
32
|
-
bc: Bytecode,
|
|
33
|
-
compiler: Compiler,
|
|
34
|
-
): { bytecode: Bytecode } {
|
|
35
|
-
const originalOpToName = new Map<number, string>();
|
|
36
|
-
for (const name in compiler.OP) {
|
|
37
|
-
const opVal = compiler.OP[name];
|
|
38
|
-
originalOpToName.set(opVal, name);
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
// Names are used instead of codes as specialized opcodes may generate based off these and it should not be considered eligible still
|
|
42
|
-
const alwaysExcluded = [
|
|
43
|
-
"PATCH",
|
|
44
|
-
"TRY_SETUP",
|
|
45
|
-
"TRY_END",
|
|
46
|
-
"DEBUGGER",
|
|
47
|
-
"MAKE_CLOSURE",
|
|
48
|
-
];
|
|
49
|
-
|
|
50
|
-
const nonTerminalExcluded = ["RETURN", "CALL", "CALL_METHOD", "NEW", "THROW"];
|
|
51
|
-
|
|
52
|
-
function isEligible(
|
|
53
|
-
op: number | null,
|
|
54
|
-
compiler: Compiler,
|
|
55
|
-
isLast: boolean = false,
|
|
56
|
-
): boolean {
|
|
57
|
-
if (op === null) return false;
|
|
58
|
-
const { OP, JUMP_OPS, OP_NAME } = compiler;
|
|
59
|
-
// Infrastructure and variable-length ops are never eligible.
|
|
60
|
-
const opName = OP_NAME[op];
|
|
61
|
-
ok(opName, `Unknown opcode ${op} (not in OP_NAME)`);
|
|
62
|
-
if (alwaysExcluded.find((name) => opName.includes(name))) return false;
|
|
63
|
-
|
|
64
|
-
// Jump and frame-changing ops are only eligible as the terminal instruction.
|
|
65
|
-
if (!isLast) {
|
|
66
|
-
if (JUMP_OPS.has(op)) return false;
|
|
67
|
-
|
|
68
|
-
if (nonTerminalExcluded.find((name) => opName.includes(name)))
|
|
69
|
-
return false;
|
|
70
|
-
}
|
|
71
|
-
return OP_NAME[op] !== undefined && OP_ORIGINAL[opName] !== undefined;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// ── Step 1: count window frequencies ──────────────────────────────────────
|
|
75
|
-
const freqMap = new Map<string, { ops: number[]; count: number }>();
|
|
76
|
-
|
|
77
|
-
for (let i = 0; i < bc.length; i++) {
|
|
78
|
-
for (let len = 2; len <= 5; len++) {
|
|
79
|
-
if (i + len > bc.length) break;
|
|
80
|
-
|
|
81
|
-
const ops: number[] = [];
|
|
82
|
-
let valid = true;
|
|
83
|
-
for (let j = 0; j < len; j++) {
|
|
84
|
-
const instr = bc[i + j];
|
|
85
|
-
const op = instr[0];
|
|
86
|
-
const isLast = j === len - 1;
|
|
87
|
-
if (!isEligible(op, compiler, isLast)) {
|
|
88
|
-
valid = false;
|
|
89
|
-
break;
|
|
90
|
-
}
|
|
91
|
-
ops.push(op as number);
|
|
92
|
-
}
|
|
93
|
-
// If position (i+j) is ineligible even as a terminal, longer windows from
|
|
94
|
-
// i are also invalid (it would be non-terminal there too).
|
|
95
|
-
if (!valid) break;
|
|
96
|
-
|
|
97
|
-
const key = ops.join(",");
|
|
98
|
-
const entry = freqMap.get(key);
|
|
99
|
-
if (entry) {
|
|
100
|
-
entry.count++;
|
|
101
|
-
} else {
|
|
102
|
-
freqMap.set(key, { ops, count: 1 });
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// ── Step 2: keep repeated candidates, prioritise by frequency then length ─
|
|
108
|
-
const candidates = Array.from(freqMap.values())
|
|
109
|
-
.filter((e) => e.count >= 2)
|
|
110
|
-
.sort((a, b) => b.count - a.count || b.ops.length - a.ops.length);
|
|
111
|
-
|
|
112
|
-
if (candidates.length === 0) return { bytecode: bc };
|
|
113
|
-
|
|
114
|
-
// ── Step 3: assign free opcode slots to the best candidates ───────────────
|
|
115
|
-
for (let i = 0; i < candidates.length; i++) {
|
|
116
|
-
const macroOp = nextFreeSlot(compiler);
|
|
117
|
-
if (macroOp === -1) break;
|
|
118
|
-
const ops = candidates[i].ops;
|
|
119
|
-
compiler.MACRO_OPS[macroOp] = ops;
|
|
120
|
-
// Register a combined name so OP_NAME and comment generation both work.
|
|
121
|
-
let combinedName = ops
|
|
122
|
-
.map((v) => compiler.OP_NAME[v] ?? `OP_${v}`)
|
|
123
|
-
.join(",");
|
|
124
|
-
compiler.OP_NAME[macroOp] = combinedName;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// ── Step 4: build signature → macro opcode lookup ─────────────────────────
|
|
128
|
-
const sigToMacro = new Map<string, number>();
|
|
129
|
-
for (const [macroOpStr, ops] of Object.entries(compiler.MACRO_OPS)) {
|
|
130
|
-
sigToMacro.set((ops as number[]).join(","), Number(macroOpStr));
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// ── Step 5: replace sequences with a single multi-operand macro instruction ─
|
|
134
|
-
// Emit [macroOpCode, ...all operands from all constituent instructions].
|
|
135
|
-
// The runtime handler inlines each sub-instruction body; those bodies call
|
|
136
|
-
// this._operand() themselves to consume the operands in order.
|
|
137
|
-
const result: Bytecode = [];
|
|
138
|
-
let i = 0;
|
|
139
|
-
|
|
140
|
-
while (i < bc.length) {
|
|
141
|
-
let matched = false;
|
|
142
|
-
|
|
143
|
-
for (let len = 5; len >= 2; len--) {
|
|
144
|
-
if (i + len > bc.length) continue;
|
|
145
|
-
|
|
146
|
-
const instructions: Instruction[] = [];
|
|
147
|
-
let valid = true;
|
|
148
|
-
for (let j = 0; j < len; j++) {
|
|
149
|
-
const instr = bc[i + j];
|
|
150
|
-
const op = instr[0];
|
|
151
|
-
const isLast = j === len - 1;
|
|
152
|
-
if (!isEligible(op, compiler, isLast)) {
|
|
153
|
-
valid = false;
|
|
154
|
-
break;
|
|
155
|
-
}
|
|
156
|
-
instructions.push(instr);
|
|
157
|
-
}
|
|
158
|
-
if (!valid) continue;
|
|
159
|
-
|
|
160
|
-
const key = instructions.map((instr) => instr[0]).join(",");
|
|
161
|
-
if (!sigToMacro.has(key)) continue;
|
|
162
|
-
|
|
163
|
-
const macroOpCode = sigToMacro.get(key)!;
|
|
164
|
-
|
|
165
|
-
// Collect all operands from every constituent instruction, in order.
|
|
166
|
-
// Each instruction contributes instr.slice(1) — zero or more operands.
|
|
167
|
-
const allOperands: any[] = [];
|
|
168
|
-
for (let j = 0; j < len; j++) {
|
|
169
|
-
var instr = bc[i + j];
|
|
170
|
-
var operands = instr.slice(1);
|
|
171
|
-
allOperands.push(...operands);
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const newInstr: Instruction = [macroOpCode, ...allOperands];
|
|
175
|
-
(newInstr as any)[SOURCE_NODE_SYM] = (instructions[0] as any)[
|
|
176
|
-
SOURCE_NODE_SYM
|
|
177
|
-
];
|
|
178
|
-
|
|
179
|
-
result.push(newInstr);
|
|
180
|
-
|
|
181
|
-
i += len;
|
|
182
|
-
matched = true;
|
|
183
|
-
break;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
if (!matched) {
|
|
187
|
-
result.push(bc[i]);
|
|
188
|
-
i++;
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
return { bytecode: result };
|
|
193
|
-
}
|
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
import type * as b from "../../types.ts";
|
|
2
|
-
import { Compiler, SOURCE_NODE_SYM } from "../../compiler.ts";
|
|
3
|
-
import { getRandomInt } from "../../utils/random-utils.ts";
|
|
4
|
-
import { U16_MAX } from "../../utils/op-utils.ts";
|
|
5
|
-
|
|
6
|
-
// Encrypt a string with a position-dependent XOR key (u16) then base64-encode.
|
|
7
|
-
//
|
|
8
|
-
// Each char code is XOR'd with ((key + i) & 0xFFFF), producing a u16 value.
|
|
9
|
-
// The u16 values are packed as little-endian byte pairs (matching decodeBytecode),
|
|
10
|
-
// then base64-encoded so the stored constant is always safe ASCII — no raw Unicode
|
|
11
|
-
// surrogates, control chars, or quote chars that would break JS string literals.
|
|
12
|
-
function concealString(s: string, key: number): string {
|
|
13
|
-
const bytes = new Uint8Array(s.length * 2);
|
|
14
|
-
for (let i = 0; i < s.length; i++) {
|
|
15
|
-
const code = s.charCodeAt(i) ^ ((key + i) & 0xffff);
|
|
16
|
-
bytes[i * 2] = code & 0xff;
|
|
17
|
-
bytes[i * 2 + 1] = (code >> 8) & 0xff;
|
|
18
|
-
}
|
|
19
|
-
return Buffer.from(bytes).toString("base64");
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
// Resolve all {type:"constant", value} (index) and {type:"constant", value, key: true} (key) operands
|
|
23
|
-
//
|
|
24
|
-
// constPoolIndex — index into the constants array (as before).
|
|
25
|
-
// concealKey — XOR key used to conceal this constant.
|
|
26
|
-
// 0 means no concealment (concealConstants is off, or the
|
|
27
|
-
// value type is not concealable: null, undefined, bool, float…).
|
|
28
|
-
//
|
|
29
|
-
// The constants array stores the CONCEALED value when key != 0.
|
|
30
|
-
// The runtime's _readConstant(idx, key) reverses the concealment on the fly.
|
|
31
|
-
//
|
|
32
|
-
// Both slots are u16; all existing operand serialization handles them identically.
|
|
33
|
-
export function resolveConstants(
|
|
34
|
-
bc: b.Bytecode,
|
|
35
|
-
compiler: Compiler,
|
|
36
|
-
): {
|
|
37
|
-
bytecode: b.Bytecode;
|
|
38
|
-
constants: any[];
|
|
39
|
-
} {
|
|
40
|
-
const constants: any[] = [];
|
|
41
|
-
const constantsMap = new Map<any, number>(); // original value → pool index
|
|
42
|
-
const keyMap = new Map<number, number>(); // pool index → conceal key
|
|
43
|
-
|
|
44
|
-
function intern(operand: b.InstrOperand): [b.InstrOperand, number] {
|
|
45
|
-
const value = (operand as any).value;
|
|
46
|
-
|
|
47
|
-
let idx = constantsMap.get(value);
|
|
48
|
-
let key = 0;
|
|
49
|
-
|
|
50
|
-
if (typeof idx !== "number") {
|
|
51
|
-
idx = constants.length;
|
|
52
|
-
constantsMap.set(value, idx);
|
|
53
|
-
|
|
54
|
-
if (compiler.options.concealConstants && typeof value === "string") {
|
|
55
|
-
// Strings: position-dependent XOR. Key must be >= 1.
|
|
56
|
-
key = getRandomInt(1, U16_MAX);
|
|
57
|
-
constants.push(concealString(value, key));
|
|
58
|
-
} else if (
|
|
59
|
-
compiler.options.concealConstants &&
|
|
60
|
-
typeof value === "number" &&
|
|
61
|
-
Number.isInteger(value)
|
|
62
|
-
) {
|
|
63
|
-
// Integers: simple XOR. Result is still a valid JS integer.
|
|
64
|
-
key = getRandomInt(1, U16_MAX);
|
|
65
|
-
constants.push(value ^ key);
|
|
66
|
-
} else {
|
|
67
|
-
// Not concealable (null, undefined, boolean, float, RegExp…) or option off.
|
|
68
|
-
key = 0;
|
|
69
|
-
constants.push(value);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
keyMap.set(idx, key);
|
|
73
|
-
} else {
|
|
74
|
-
// Reuse existing pool entry — same key that was assigned on first intern.
|
|
75
|
-
key = keyMap.get(idx)!;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
const idxOperand: any = {
|
|
79
|
-
type: "number",
|
|
80
|
-
resolvedValue: idx,
|
|
81
|
-
};
|
|
82
|
-
|
|
83
|
-
const keyOperand: any = {
|
|
84
|
-
type: "number",
|
|
85
|
-
resolvedValue: key,
|
|
86
|
-
};
|
|
87
|
-
|
|
88
|
-
// key is a plain u16 number — no wrapping needed.
|
|
89
|
-
return [idxOperand, keyOperand];
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
const resolved: b.Bytecode = [];
|
|
93
|
-
for (const instr of bc) {
|
|
94
|
-
const [op, ...operands] = instr;
|
|
95
|
-
|
|
96
|
-
const hasConstant = operands.some(
|
|
97
|
-
(o) =>
|
|
98
|
-
o !== undefined &&
|
|
99
|
-
o !== null &&
|
|
100
|
-
typeof o === "object" &&
|
|
101
|
-
(o as any).type === "constant",
|
|
102
|
-
);
|
|
103
|
-
|
|
104
|
-
if (hasConstant) {
|
|
105
|
-
// 1-to-2 expansion: each {type:"constant"} becomes [constIdx, concealKey].
|
|
106
|
-
const newOperands: b.InstrOperand[] = operands.map((operand) => {
|
|
107
|
-
if ((operand as any)?.type === "constant") {
|
|
108
|
-
const [idxOperand, key] = intern(operand);
|
|
109
|
-
const newOperand = (operand as any)?.key ? key : idxOperand;
|
|
110
|
-
|
|
111
|
-
return Object.assign(operand, newOperand);
|
|
112
|
-
} else {
|
|
113
|
-
return operand;
|
|
114
|
-
}
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
const newInstr = [op, ...newOperands] as b.Instruction;
|
|
118
|
-
(newInstr as any)[SOURCE_NODE_SYM] = (instr as any)[SOURCE_NODE_SYM];
|
|
119
|
-
resolved.push(newInstr);
|
|
120
|
-
} else {
|
|
121
|
-
resolved.push(instr);
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
return { bytecode: resolved, constants };
|
|
126
|
-
}
|