jsguardian 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,176 @@
1
+ "use strict";
2
+ // ============================================================================
3
+ // Layer 8 — Adversarial Token Injection
4
+ //
5
+ // CONCEPT:
6
+ // Transformer-based LLMs (GPT-4, Claude, Gemini) tokenize code before
7
+ // processing. Certain byte sequences produce token IDs near "glitch tokens"
8
+ // — positions in the embedding space with degenerate or highly anisotropic
9
+ // properties that cause attention scores to spike/collapse, producing
10
+ // inconsistent or confused outputs for the surrounding context.
11
+ //
12
+ // Known vectors (from Rumbelow & Watkins 2023, "SolidGoldMagikarp"):
13
+ // - Tokens that were in the vocabulary but almost never appeared in
14
+ // training data → their embeddings are under-trained → chaotic behavior
15
+ // - Token sequences that force the model into repetition or refusal
16
+ // - Identifier-legal characters that map to split/fused token boundaries
17
+ //
18
+ // We rename a fraction of INTERNAL (non-exported) identifiers to sequences
19
+ // that exploit these properties. The identifiers remain valid JavaScript.
20
+ // The LLM's attention mechanism is disrupted when it tries to track data
21
+ // flow through these variables — it loses the thread.
22
+ //
23
+ // PRACTICAL IMPLEMENTATION:
24
+ // Since we can't know the exact tokenizer of every LLM, we use two
25
+ // complementary strategies:
26
+ //
27
+ // Strategy A: Unicode homoglyph + combining character injection
28
+ // Insert zero-width joiners, right-to-left marks, or Cyrillic lookalikes
29
+ // into identifier names. Valid JS (ES2015+ allows Unicode in identifiers).
30
+ // Forces tokenizer to produce unexpected token boundaries.
31
+ // e.g.: _validate → _vаlidаte (Cyrillic 'а' U+0430 replaces Latin 'a')
32
+ //
33
+ // Strategy B: Token boundary confusion sequences
34
+ // Use patterns like _0x + alternating hex + underscore that cause
35
+ // common BPE tokenizers to produce 3-4 tokens instead of 1, fragmenting
36
+ // the "concept" of the variable across the attention window.
37
+ // e.g.: _k3y_v4l → _0xk3_0xv4 (forces re-tokenization at every digit)
38
+ //
39
+ // Strategy C: Semantic inversion baits
40
+ // Rename a variable to a name that MEANS the opposite of what it holds.
41
+ // e.g.: a variable holding `true` (valid) is renamed `__invalid__`
42
+ // The LLM reads the name, not the value — it draws the wrong conclusion.
43
+ // ============================================================================
44
+ Object.defineProperty(exports, "__esModule", { value: true });
45
+ exports.applyAdversarialTokens = applyAdversarialTokens;
46
+ // Cyrillic homoglyphs for common ASCII letters (visually identical in most fonts)
47
+ const HOMOGLYPHS = {
48
+ a: "а", // Cyrillic а
49
+ e: "е", // Cyrillic е
50
+ o: "о", // Cyrillic о
51
+ p: "р", // Cyrillic р
52
+ c: "с", // Cyrillic с
53
+ x: "х", // Cyrillic х
54
+ i: "і", // Cyrillic і (Ukrainian)
55
+ };
56
+ // Semantic inversion names — names that mislead about what the variable holds
57
+ const INVERSION_NAMES = [
58
+ "__invalid__", "__failed__", "__unauthorized__", "__rejected__",
59
+ "__expired__", "__denied__", "__corrupt__", "__tampered__",
60
+ "__bypass__", "__override__", "__nullified__", "__voided__",
61
+ ];
62
+ // Token-boundary confusion prefixes (force BPE fragmentation)
63
+ const CONFUSION_PREFIXES = [
64
+ "_0x", "_0b", "_0o", "_v0", "_k_", "_r_", "_f_",
65
+ ];
66
+ function injectHomoglyphs(name, rng) {
67
+ // Replace 1–2 characters with Cyrillic homoglyphs
68
+ let result = name;
69
+ let count = 0;
70
+ for (let i = 0; i < result.length && count < 2; i++) {
71
+ const ch = result[i];
72
+ if (HOMOGLYPHS[ch] && rng.next() < 0.6) {
73
+ result = result.slice(0, i) + HOMOGLYPHS[ch] + result.slice(i + 1);
74
+ count++;
75
+ }
76
+ }
77
+ return result;
78
+ }
79
+ function confusionName(rng) {
80
+ const prefix = CONFUSION_PREFIXES[Math.floor(rng.next() * CONFUSION_PREFIXES.length)];
81
+ const part1 = (rng.int32() >>> 0).toString(36).slice(0, 4);
82
+ const part2 = (rng.int32() >>> 0).toString(36).slice(0, 3);
83
+ return `${prefix}${part1}_${part2}`;
84
+ }
85
+ /**
86
+ * Renames a fraction of internal (non-exported, non-param) identifiers
87
+ * using adversarial naming strategies.
88
+ *
89
+ * Operates on the Babel AST before jsobf runs. Only renames:
90
+ * - Local `var`/`let`/`const` declarators inside functions
91
+ * - Not: exported names, function params, property keys
92
+ *
93
+ * Rate: 0–1 fraction of eligible identifiers to rename.
94
+ */
95
+ function applyAdversarialTokens(ast, traverse, t, rng, opts = {}) {
96
+ const rate = opts.adversarialTokenRate ?? 0.3;
97
+ traverse(ast, {
98
+ // Target VariableDeclarator inside function bodies
99
+ VariableDeclarator(path) {
100
+ if (path.node.__obf)
101
+ return;
102
+ if (!t.isIdentifier(path.node.id))
103
+ return;
104
+ // Skip vars inside VM-virtualized wrappers (function node marked __obf)
105
+ const _vmParent = path.findParent((p) => p.isFunction());
106
+ if (_vmParent && _vmParent.node && _vmParent.node.__obf)
107
+ return;
108
+ if (rng.next() > rate)
109
+ return;
110
+ // Don't rename if the declarator is at module scope (could be exported)
111
+ const fnParent = path.findParent((p) => p.isFunction());
112
+ if (!fnParent)
113
+ return;
114
+ // Skip const/let — renaming them can break reassignment in strict mode
115
+ const declKind = path.parent && path.parent.kind;
116
+ if (declKind === "const" || declKind === "let")
117
+ return;
118
+ const oldName = path.node.id.name;
119
+ if (!oldName || oldName.startsWith("__"))
120
+ return; // leave our own injected vars
121
+ // OBsmith correctness guard: skip names that appear in any StringLiteral
122
+ // in the same function scope (potential eval targets: eval(varName) etc.).
123
+ // Also skip names starting with $ (framework convention, e.g. jQuery).
124
+ if (oldName.startsWith("$"))
125
+ return;
126
+ try {
127
+ let appearsInString = false;
128
+ fnParent.traverse({
129
+ StringLiteral(sp) {
130
+ if (sp.node.value.includes(oldName)) {
131
+ appearsInString = true;
132
+ sp.stop();
133
+ }
134
+ },
135
+ });
136
+ if (appearsInString)
137
+ return;
138
+ }
139
+ catch {
140
+ return;
141
+ }
142
+ // Pick strategy
143
+ let newName;
144
+ const strategy = rng.next();
145
+ if (strategy < 0.35) {
146
+ // Strategy A: homoglyph injection
147
+ newName = injectHomoglyphs(oldName, rng);
148
+ if (newName === oldName)
149
+ newName = confusionName(rng); // fallback
150
+ }
151
+ else if (strategy < 0.65) {
152
+ // Strategy B: token boundary confusion
153
+ newName = confusionName(rng);
154
+ }
155
+ else {
156
+ // Strategy C: semantic inversion
157
+ newName = INVERSION_NAMES[Math.floor(rng.next() * INVERSION_NAMES.length)] +
158
+ "_" + (rng.int32() >>> 0).toString(36).slice(0, 3);
159
+ }
160
+ // Rename all references within the containing function scope
161
+ try {
162
+ const binding = path.scope.getBinding(oldName);
163
+ if (binding) {
164
+ binding.referencePaths.forEach((ref) => {
165
+ if (t.isIdentifier(ref.node))
166
+ ref.node.name = newName;
167
+ });
168
+ path.node.id.name = newName;
169
+ }
170
+ }
171
+ catch {
172
+ // scope rename failed — skip silently
173
+ }
174
+ },
175
+ });
176
+ }
@@ -0,0 +1,235 @@
1
+ "use strict";
2
+ // ============================================================================
3
+ // Layer 3g — Anti-Pattern Injection
4
+ //
5
+ // Three mechanisms that violate every assumption analysis tools make:
6
+ //
7
+ // A — Prototype Pollution Noise
8
+ // IIFEs that temporarily install junk properties on Object.prototype via a
9
+ // Symbol (guaranteed unique, non-enumerable in for..in, safe to restore).
10
+ // NET runtime effect: zero. Effect on analysis tools: flag as dangerous
11
+ // prototype mutation; static analysers that freeze Object.prototype before
12
+ // analysis will error; execution-based tools that snapshot the prototype
13
+ // graph before running will see a different shape than after.
14
+ //
15
+ // B — ASI Traps
16
+ // Dead functions with `return\n<expr>` patterns. ECMAScript 2015+:
17
+ // ASI fires after `return` when the next token is on a new line →
18
+ // the function returns `undefined`, not `<expr>`.
19
+ // A deobfuscator or formatter that "helpfully" removes the line break
20
+ // (or adds a semicolon after `return`) changes the observable semantics.
21
+ // Tools that reformat before analysis break on these.
22
+ //
23
+ // C — Unicode Homoglyph Decoy Pairs
24
+ // Adjacent var declarations whose names differ only in one character:
25
+ // Latin `x` (U+0078) vs Cyrillic `х` (U+0445), or
26
+ // Latin `a` (U+0061) vs Cyrillic `а` (U+0430), etc.
27
+ // Both look identical in most editors and terminals.
28
+ // Deobfuscators that Unicode-normalise identifiers before analysis
29
+ // collapse two distinct variables into one → name collision → crash.
30
+ // Tools that compare names visually misidentify one for the other.
31
+ //
32
+ // ARCHITECTURE:
33
+ // buildAntiPatternSource() returns a raw JS source string, injected
34
+ // AFTER cneObfuscate() runs. Nothing here passes through CNE — names
35
+ // are chosen to already look like CNE output (_0xXXXX format or short
36
+ // random identifiers). The prototype Symbol IIFEs actually execute at
37
+ // module load time but are provably safe: Symbol keys are unique and
38
+ // immediately restored.
39
+ // ============================================================================
40
+ Object.defineProperty(exports, "__esModule", { value: true });
41
+ exports.buildAntiPatternSource = buildAntiPatternSource;
42
+ // ---------------------------------------------------------------------------
43
+ // Helpers
44
+ // ---------------------------------------------------------------------------
45
+ function rname(rng) {
46
+ return `_0x${((rng.int32() >>> 0) & 0xffff).toString(16).padStart(4, "0")}`;
47
+ }
48
+ function rconst(rng) {
49
+ return `0x${((rng.int32() >>> 0) & 0xffff).toString(16)}`;
50
+ }
51
+ // ---------------------------------------------------------------------------
52
+ // A — Prototype Pollution Noise
53
+ //
54
+ // Each IIFE:
55
+ // 1. Creates a Symbol (or a random string key on platforms without Symbol)
56
+ // 2. Saves the current Object.prototype value for that key (undefined)
57
+ // 3. Assigns a seeded-random junk function/value to Object.prototype[sym]
58
+ // 4. Reads it back via `{}[sym]` to confirm the mutation
59
+ // 5. Immediately restores Object.prototype[sym] to the saved value
60
+ // 6. Voids the result so no side-effects escape
61
+ //
62
+ // Runtime cost: ~1 microsecond at module load. Side-effect: none.
63
+ // Analysis cost: tool must prove the restoration always runs (it does,
64
+ // unconditionally) and that the Symbol never collides (it never can).
65
+ // ---------------------------------------------------------------------------
66
+ function buildProtoPollutionIife(rng) {
67
+ const symName = rname(rng);
68
+ const prevName = rname(rng);
69
+ const chkName = rname(rng);
70
+ const paramA = `_${((rng.int32() >>> 0) & 0xff).toString(36)}`;
71
+ const junkVal = (rng.int32() >>> 0) & 0xffff;
72
+ const junkOp = rng.next() < 0.5
73
+ ? `function(${paramA}){return ${paramA}^${rconst(rng)}|0;}`
74
+ : `function(${paramA}){return(${paramA}+${rconst(rng)})|0;}`;
75
+ return `;(function(){` +
76
+ `var ${symName}=typeof Symbol!=='undefined'?Symbol():` +
77
+ `'_'+(${rconst(rng)}).toString(36);` +
78
+ `var ${prevName}=Object.prototype[${symName}];` +
79
+ `Object.prototype[${symName}]=${junkOp};` +
80
+ `var ${chkName}=({})[${symName}];` +
81
+ `Object.prototype[${symName}]=${prevName};` +
82
+ `void ${chkName};` +
83
+ `void ${junkVal};` +
84
+ `})();`;
85
+ }
86
+ // ---------------------------------------------------------------------------
87
+ // B — ASI Traps
88
+ //
89
+ // ECMAScript mandates ASI after `return` when the next token is on a NEW LINE.
90
+ // So `return\n<expr>` always means `return; <expr>` — the function returns
91
+ // undefined and <expr> is unreachable dead code.
92
+ //
93
+ // A reformatter that moves `<expr>` to the same line as `return` changes the
94
+ // return value. A tool that adds `; // semicolon` after `return` does the same.
95
+ // Both produce different semantics → the tool's analysis is of WRONG code.
96
+ //
97
+ // We generate dead functions (never called, no exports) with these patterns
98
+ // so the traps don't affect production behaviour.
99
+ // ---------------------------------------------------------------------------
100
+ function buildAsiTrapFunction(rng) {
101
+ const fnName = rname(rng);
102
+ const v1 = rname(rng), v2 = rname(rng);
103
+ const c1 = rconst(rng), c2 = rconst(rng);
104
+ // Pick one of three ASI trap patterns:
105
+ const kind = (rng.int32() >>> 0) % 3;
106
+ if (kind === 0) {
107
+ // return\n<numeric_expr> — looks like it returns a computed value,
108
+ // actually returns undefined.
109
+ return `function ${fnName}(${v1}){` +
110
+ `var ${v2}=(${v1}^${c1})|0;` +
111
+ `return\n` + // ASI fires here → returns undefined
112
+ `${v2}+${c2};` + // unreachable — a formatter that joins lines breaks semantics
113
+ `}`;
114
+ }
115
+ if (kind === 1) {
116
+ // return\n{ key: expr } — looks like it returns an object literal,
117
+ // actually: return undefined; { key: expr } is a labeled block statement.
118
+ // A formatter that joins the lines produces `return {key: expr}` — returns
119
+ // an object instead of undefined. Different semantics.
120
+ // Use ONE property only: {k1: expr} is valid as `label: expression-statement`.
121
+ const k1 = rname(rng);
122
+ return `function ${fnName}(${v1},${v2}){` +
123
+ `return\n` + // ASI fires — returns undefined
124
+ `{${k1}:(${v1}^${c1})|(${v2}|${c2})};` + // parsed as labeled block, not object literal
125
+ `}`;
126
+ }
127
+ // kind === 2: throw\n… — Note: ECMAScript does NOT allow ASI after throw
128
+ // (it's a SyntaxError), so we use a prefix-increment trap instead:
129
+ // `var a = b\n++c` → `var a = b; ++c` (NOT `var a = b++c`)
130
+ const v3 = rname(rng);
131
+ return `function ${fnName}(${v1}){` +
132
+ `var ${v2}=${c1}|0;` +
133
+ `var ${v3}=${v2}` + `\n` + // line break — next line starts with ++
134
+ `+${c2};` + // `var v3 = v2; +c2` — the + is parsed as unary, NOT concatenation
135
+ `return ${v3};` + // returns v2, not v2 + c2
136
+ `}`;
137
+ }
138
+ // ---------------------------------------------------------------------------
139
+ // C — Unicode Homoglyph Decoy Pairs
140
+ //
141
+ // Each pair: one Latin-script var and one Cyrillic-script var whose names are
142
+ // visually indistinguishable in proportional fonts.
143
+ //
144
+ // Substitution map (Latin → Cyrillic lookalike):
145
+ // a → а (U+0430) e → е (U+0435) o → о (U+043E)
146
+ // p → р (U+0440) c → с (U+0441) x → х (U+0445)
147
+ // s → ѕ (U+0455) y → у (U+0443)
148
+ //
149
+ // Both values are different random constants. A tool that normalises Unicode
150
+ // before analysis collapses them to the same name → redeclaration error.
151
+ // A tool that compares names visually mistakes one for the other.
152
+ // ---------------------------------------------------------------------------
153
+ // Latin-to-Cyrillic substitution map (one-way: applied to generate Cyrillic twin)
154
+ const HOMOGLYPH_MAP = {
155
+ a: "а", // а
156
+ e: "е", // е
157
+ o: "о", // о
158
+ p: "р", // р
159
+ c: "с", // с
160
+ x: "х", // х
161
+ y: "у", // у
162
+ };
163
+ function cyrillicTwin(name) {
164
+ // Skip the '_0x' prefix — always contains 'x' which would be substituted first,
165
+ // preventing substitutions in the hex-digit section (where a, c, e live).
166
+ // We scan the data section (after '_0x') to get Cyrillic a/c/e twins.
167
+ // Then fall back to full scan so the 'x'-in-prefix path also produces twins.
168
+ const prefixLen = name.startsWith("_0x") ? 3 : 0;
169
+ // First pass: skip prefix, try hex-digit section
170
+ for (let i = prefixLen; i < name.length; i++) {
171
+ const sub = HOMOGLYPH_MAP[name[i]];
172
+ if (sub) {
173
+ return name.slice(0, i) + sub + name.slice(i + 1);
174
+ }
175
+ }
176
+ // Second pass: include prefix (catches names with no a/c/e in data section)
177
+ for (let i = 0; i < prefixLen; i++) {
178
+ const sub = HOMOGLYPH_MAP[name[i]];
179
+ if (sub) {
180
+ return name.slice(0, i) + sub + name.slice(i + 1);
181
+ }
182
+ }
183
+ return null;
184
+ }
185
+ function buildHomoglyphPairs(rng, count) {
186
+ const lines = [];
187
+ // Rotate through ALL HOMOGLYPH_MAP keys deterministically so every build
188
+ // contains every Cyrillic substitution type at least once.
189
+ const subKeys = Object.keys(HOMOGLYPH_MAP); // [a, e, o, p, c, x, y]
190
+ for (let g = 0; g < count; g++) {
191
+ const targetLetter = subKeys[g % subKeys.length];
192
+ const cyrChar = HOMOGLYPH_MAP[targetLetter];
193
+ // Per-build random suffix keeps names unique; format `_<lat><hex>`.
194
+ // Using exactly one substitutable letter in a known position guarantees
195
+ // that cyrillicTwin() won't pick a different letter instead.
196
+ const suffix = ((rng.int32() >>> 0) & 0xffff).toString(16).padStart(4, "0");
197
+ const baseName = `_${targetLetter}${suffix}`; // e.g. _a3f2c
198
+ const twin = `_${cyrChar}${suffix}`; // e.g. _а3f2c (Cyrillic а)
199
+ const val1 = rconst(rng);
200
+ const val2 = rconst(rng);
201
+ lines.push(`var ${baseName}=${val1};`);
202
+ lines.push(`var ${twin}=${val2};`);
203
+ }
204
+ return lines;
205
+ }
206
+ // ---------------------------------------------------------------------------
207
+ // Main export
208
+ // ---------------------------------------------------------------------------
209
+ /**
210
+ * Build a raw JS source block with three anti-pattern mechanisms:
211
+ * A) 3–5 prototype-pollution IIFEs (Symbol-keyed, safe save/restore)
212
+ * B) 4–6 ASI-trap dead functions (return+newline semantic traps)
213
+ * C) 5–8 Unicode homoglyph decoy pairs (Latin vs Cyrillic twin vars)
214
+ *
215
+ * Injected AFTER cneObfuscate() — names already in _0xXXXX format,
216
+ * no further processing by CNE.
217
+ */
218
+ function buildAntiPatternSource(rng) {
219
+ const parts = ["/* layer-3g */"];
220
+ // A: Prototype pollution IIFEs
221
+ const ppCount = 3 + Math.floor(rng.next() * 3); // 3..5
222
+ for (let i = 0; i < ppCount; i++) {
223
+ parts.push(buildProtoPollutionIife(rng));
224
+ }
225
+ // B: ASI trap dead functions
226
+ const asiCount = 4 + Math.floor(rng.next() * 3); // 4..6
227
+ for (let i = 0; i < asiCount; i++) {
228
+ parts.push(buildAsiTrapFunction(rng));
229
+ }
230
+ // C: Unicode homoglyph decoy pairs
231
+ const hgCount = 5 + Math.floor(rng.next() * 4); // 5..8
232
+ const hgLines = buildHomoglyphPairs(rng, hgCount);
233
+ parts.push(...hgLines);
234
+ return parts.join("\n");
235
+ }