muaddib-scanner 2.11.39 → 2.11.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"target": "node_modules",
|
|
3
|
-
"timestamp": "2026-05-
|
|
3
|
+
"timestamp": "2026-05-25T09:38:49.363Z",
|
|
4
4
|
"threats": [
|
|
5
5
|
{
|
|
6
6
|
"type": "string_mutation_obfuscation",
|
|
@@ -870,6 +870,27 @@
|
|
|
870
870
|
"playbook": "CRITIQUE: Execution de commande shell dangereuse detectee. Isoler la machine. Verifier si la commande a ete executee.",
|
|
871
871
|
"points": 3
|
|
872
872
|
},
|
|
873
|
+
{
|
|
874
|
+
"type": "unicode_invisible_injection",
|
|
875
|
+
"severity": "CRITICAL",
|
|
876
|
+
"message": "10 invisible Unicode characters detected (zero-width, variation selectors, tag chars). Possible hidden payload encoded via invisible codepoints.",
|
|
877
|
+
"file": "iconv-lite/encodings/sbcs-data-generated.js",
|
|
878
|
+
"count": 1,
|
|
879
|
+
"reductions": [],
|
|
880
|
+
"originalSeverity": "CRITICAL",
|
|
881
|
+
"confidenceTier": "medium",
|
|
882
|
+
"rule_id": "MUADDIB-OBF-003",
|
|
883
|
+
"rule_name": "Unicode Invisible Character Injection",
|
|
884
|
+
"confidence": "high",
|
|
885
|
+
"domain": "malware",
|
|
886
|
+
"references": [
|
|
887
|
+
"https://www.aikido.dev/blog/glassworm-returns-unicode-attack-github-npm-vscode",
|
|
888
|
+
"https://attack.mitre.org/techniques/T1027/"
|
|
889
|
+
],
|
|
890
|
+
"mitre": "T1027",
|
|
891
|
+
"playbook": "CRITIQUE: Caracteres Unicode invisibles detectes (zero-width, variation selectors). Technique GlassWorm: du code malveillant est encode via des variation selectors invisibles dans les editeurs. Analyser le fichier avec un editeur hexa. Supprimer le package immediatement. Verifier les autres fichiers du projet pour des injections similaires.",
|
|
892
|
+
"points": 25
|
|
893
|
+
},
|
|
873
894
|
{
|
|
874
895
|
"type": "high_entropy_string",
|
|
875
896
|
"severity": "LOW",
|
|
@@ -1107,17 +1128,17 @@
|
|
|
1107
1128
|
],
|
|
1108
1129
|
"python": null,
|
|
1109
1130
|
"summary": {
|
|
1110
|
-
"total":
|
|
1111
|
-
"critical":
|
|
1131
|
+
"total": 52,
|
|
1132
|
+
"critical": 3,
|
|
1112
1133
|
"high": 6,
|
|
1113
1134
|
"medium": 28,
|
|
1114
1135
|
"low": 15,
|
|
1115
1136
|
"riskScore": 35,
|
|
1116
1137
|
"riskLevel": "MEDIUM",
|
|
1117
1138
|
"globalRiskScore": 100,
|
|
1118
|
-
"maxFileScore":
|
|
1139
|
+
"maxFileScore": 26,
|
|
1119
1140
|
"packageScore": 1,
|
|
1120
|
-
"mostSuspiciousFile": "
|
|
1141
|
+
"mostSuspiciousFile": "iconv-lite/encodings/sbcs-data-generated.js",
|
|
1121
1142
|
"fileScores": {
|
|
1122
1143
|
"esquery/parser.js": 5,
|
|
1123
1144
|
"ajv/lib/ajv.js": 25,
|
|
@@ -1133,7 +1154,7 @@
|
|
|
1133
1154
|
"eslint/lib/config/config-loader.js": 11,
|
|
1134
1155
|
"eslint/lib/eslint/eslint-helpers.js": 25,
|
|
1135
1156
|
"eslint/lib/eslint/eslint.js": 13,
|
|
1136
|
-
"iconv-lite/encodings/sbcs-data-generated.js":
|
|
1157
|
+
"iconv-lite/encodings/sbcs-data-generated.js": 26,
|
|
1137
1158
|
"iconv-lite/encodings/sbcs-data.js": 1,
|
|
1138
1159
|
"ajv/lib/compile/formats.js": 1
|
|
1139
1160
|
},
|
|
@@ -1169,6 +1190,12 @@
|
|
|
1169
1190
|
"points": 25,
|
|
1170
1191
|
"reason": "Dynamic import() with computed URL argument — remote code loading from dynamically constructed URL."
|
|
1171
1192
|
},
|
|
1193
|
+
{
|
|
1194
|
+
"rule": "MUADDIB-OBF-003",
|
|
1195
|
+
"type": "unicode_invisible_injection",
|
|
1196
|
+
"points": 25,
|
|
1197
|
+
"reason": "10 invisible Unicode characters detected (zero-width, variation selectors, tag chars). Possible hidden payload encoded via invisible codepoints."
|
|
1198
|
+
},
|
|
1172
1199
|
{
|
|
1173
1200
|
"rule": "MUADDIB-AST-006",
|
|
1174
1201
|
"type": "dynamic_require",
|
|
@@ -1461,7 +1488,7 @@
|
|
|
1461
1488
|
"tierCounts": {
|
|
1462
1489
|
"verified": 0,
|
|
1463
1490
|
"high": 0,
|
|
1464
|
-
"medium":
|
|
1491
|
+
"medium": 10,
|
|
1465
1492
|
"low": 42
|
|
1466
1493
|
},
|
|
1467
1494
|
"perceivedFlagged": 0
|
|
@@ -399,6 +399,16 @@ const PLAYBOOKS = {
|
|
|
399
399
|
'Technique Shai-Hulud (TeamPCP). Supprimer les fichiers .claude/settings.json ' +
|
|
400
400
|
'et .vscode/tasks.json avant ouverture.',
|
|
401
401
|
|
|
402
|
+
aiconf_unicode_obfuscation:
|
|
403
|
+
'CRITIQUE: Fichier de config d\'agent IA contient des caracteres Unicode invisibles ' +
|
|
404
|
+
'(zero-width, directional override, variation selectors). Technique TrapDoor (mai 2026): ' +
|
|
405
|
+
'l\'attaquant insere des U+200B au milieu de mots-cles pour echapper a la revue humaine ' +
|
|
406
|
+
'et aux regex statiques, tandis que l\'agent IA (Claude, Cursor) lit le contenu normalise ' +
|
|
407
|
+
'et execute le payload cache. NE PAS ouvrir ce projet avec un agent IA. Ouvrir le fichier ' +
|
|
408
|
+
'dans un editeur qui affiche les caracteres invisibles (VS Code: "editor.renderControlCharacters") ' +
|
|
409
|
+
'pour inspecter le contenu reel. Supprimer le fichier ou nettoyer les caracteres invisibles ' +
|
|
410
|
+
'avant toute utilisation. Si deja ouvert avec un agent IA, regenerer tous les secrets touches.',
|
|
411
|
+
|
|
402
412
|
ai_agent_abuse:
|
|
403
413
|
'CRITIQUE: Un agent IA (Claude, Gemini, Q) est invoque avec des flags de bypass de securite ' +
|
|
404
414
|
'(--dangerously-skip-permissions, --yolo, --trust-all-tools). Technique s1ngularity/Nx. ' +
|
package/src/rules/index.js
CHANGED
|
@@ -914,6 +914,21 @@ const RULES = {
|
|
|
914
914
|
],
|
|
915
915
|
mitre: 'T1546'
|
|
916
916
|
},
|
|
917
|
+
aiconf_unicode_obfuscation: {
|
|
918
|
+
id: 'MUADDIB-AICONF-004',
|
|
919
|
+
name: 'Zero-Width Unicode Obfuscation in AI Config',
|
|
920
|
+
severity: 'CRITICAL',
|
|
921
|
+
confidence: 'high',
|
|
922
|
+
domain: 'malware',
|
|
923
|
+
description: 'Fichier de configuration d\'agent IA (.cursorrules, CLAUDE.md, copilot-instructions.md) contient des caracteres Unicode invisibles (zero-width, directional override, variation selectors) qui cachent des instructions a la revue humaine ou cassent des mots-cles pour echapper a la detection regex. Technique TrapDoor (mai 2026): curl|sh interspersee de U+200B passe au travers du regex /curl/ tandis que l\'agent IA execute le payload normalise.',
|
|
924
|
+
references: [
|
|
925
|
+
'https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates',
|
|
926
|
+
'https://www.aikido.dev/blog/glassworm-returns-unicode-attack-github-npm-vscode',
|
|
927
|
+
'https://trojansource.codes/',
|
|
928
|
+
'https://attack.mitre.org/techniques/T1027/'
|
|
929
|
+
],
|
|
930
|
+
mitre: 'T1027.013'
|
|
931
|
+
},
|
|
917
932
|
|
|
918
933
|
require_cache_poison: {
|
|
919
934
|
id: 'MUADDIB-AST-019',
|
package/src/scanner/ai-config.js
CHANGED
|
@@ -18,6 +18,14 @@
|
|
|
18
18
|
|
|
19
19
|
const fs = require('fs');
|
|
20
20
|
const path = require('path');
|
|
21
|
+
const { countInvisibleUnicode, stripInvisibleUnicode } = require('../shared/unicode-invisibles.js');
|
|
22
|
+
|
|
23
|
+
// Threshold above which an AI config file is flagged as ZW-Unicode-obfuscated.
|
|
24
|
+
// Lower than obfuscation.js (10) because .cursorrules / CLAUDE.md should never
|
|
25
|
+
// legitimately contain invisible codepoints — even international content uses
|
|
26
|
+
// only visible chars (CJK, accents, emoji with U+FE0F variation selector are
|
|
27
|
+
// NOT counted by countInvisibleUnicode).
|
|
28
|
+
const AI_CONFIG_ZW_THRESHOLD = 5;
|
|
21
29
|
|
|
22
30
|
// AI agent config files to scan for prompt injection (relative to project root)
|
|
23
31
|
const AI_CONFIG_FILES = [
|
|
@@ -111,7 +119,12 @@ function scanAIConfig(targetPath) {
|
|
|
111
119
|
}
|
|
112
120
|
|
|
113
121
|
const relPath = configFile;
|
|
114
|
-
|
|
122
|
+
// Normalize invisible Unicode BEFORE running regex patterns.
|
|
123
|
+
// Without this, an attacker can split keywords with U+200B (`curl`) to
|
|
124
|
+
// evade /curl\s+/ — the exact TrapDoor (mai 2026) .cursorrules vector.
|
|
125
|
+
const invisibleCount = countInvisibleUnicode(content);
|
|
126
|
+
const normalized = invisibleCount > 0 ? stripInvisibleUnicode(content) : content;
|
|
127
|
+
const fileThreats = analyzeAIConfigFile(normalized, relPath, invisibleCount);
|
|
115
128
|
threats.push(...fileThreats);
|
|
116
129
|
}
|
|
117
130
|
|
|
@@ -218,14 +231,30 @@ function analyzeIDEHookFile(content, relPath) {
|
|
|
218
231
|
}
|
|
219
232
|
|
|
220
233
|
/**
|
|
221
|
-
* Analyze a single AI config file for prompt injection patterns
|
|
234
|
+
* Analyze a single AI config file for prompt injection patterns.
|
|
235
|
+
*
|
|
236
|
+
* @param {string} content - File content, already normalized (invisible Unicode stripped).
|
|
237
|
+
* @param {string} relPath - Relative path of the config file.
|
|
238
|
+
* @param {number} invisibleCount - Number of invisible Unicode codepoints in the original (pre-strip) content.
|
|
222
239
|
*/
|
|
223
|
-
function analyzeAIConfigFile(content, relPath) {
|
|
240
|
+
function analyzeAIConfigFile(content, relPath, invisibleCount) {
|
|
224
241
|
const threats = [];
|
|
225
242
|
let hasShellCommand = false;
|
|
226
243
|
let hasExfiltration = false;
|
|
227
244
|
let hasCredentialAccess = false;
|
|
228
245
|
|
|
246
|
+
// Zero-width / directional Unicode obfuscation (TrapDoor, mai 2026).
|
|
247
|
+
// An attacker can hide instructions or split keywords with U+200B etc. so
|
|
248
|
+
// human reviewers see "harmless" text while the AI agent reads the payload.
|
|
249
|
+
if (invisibleCount >= AI_CONFIG_ZW_THRESHOLD) {
|
|
250
|
+
threats.push({
|
|
251
|
+
type: 'aiconf_unicode_obfuscation',
|
|
252
|
+
severity: 'CRITICAL',
|
|
253
|
+
message: `AI config contains ${invisibleCount} invisible Unicode characters (zero-width / directional / variation selectors) in ${relPath} — content was normalized before pattern matching. Possible hidden instructions or keyword-splitting evasion (TrapDoor pattern).`,
|
|
254
|
+
file: relPath
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
229
258
|
// Check shell command patterns
|
|
230
259
|
for (const pattern of SHELL_COMMAND_PATTERNS) {
|
|
231
260
|
if (pattern.regex.test(content)) {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const fs = require('fs');
|
|
2
2
|
const path = require('path');
|
|
3
3
|
const { findFiles, forEachSafeFile, debugLog } = require('../utils.js');
|
|
4
|
+
const { countInvisibleUnicode } = require('../shared/unicode-invisibles.js');
|
|
4
5
|
|
|
5
6
|
// node_modules NOT excluded: detect obfuscated code in dependencies.
|
|
6
7
|
// dist/build/out/output excluded: bundled output is always flagged as isPackageOutput (LOW)
|
|
@@ -198,52 +199,4 @@ function hasLargeStringArray(content) {
|
|
|
198
199
|
return false;
|
|
199
200
|
}
|
|
200
201
|
|
|
201
|
-
/**
|
|
202
|
-
* Count invisible Unicode codepoints in content (GlassWorm detection).
|
|
203
|
-
* Covers BMP zero-width chars, variation selectors, and supplementary plane
|
|
204
|
-
* tag characters / variation selectors supplement via codePointAt iteration.
|
|
205
|
-
*
|
|
206
|
-
* Codepoints detected:
|
|
207
|
-
* - U+200B, U+200C, U+200D (zero-width space/joiner/non-joiner)
|
|
208
|
-
* - U+FEFF (BOM — only if position > 0; pos 0 is legitimate BOM)
|
|
209
|
-
* - U+2060 (word joiner), U+180E (Mongolian vowel separator)
|
|
210
|
-
* - U+FE00-U+FE0E (variation selectors — excludes U+FE0F emoji presentation selector)
|
|
211
|
-
* - U+E0100-U+E01EF (variation selectors supplement)
|
|
212
|
-
* - U+E0001-U+E007F (tag characters)
|
|
213
|
-
*/
|
|
214
|
-
function countInvisibleUnicode(content) {
|
|
215
|
-
let count = 0;
|
|
216
|
-
for (let i = 0; i < content.length; i++) {
|
|
217
|
-
const cp = content.codePointAt(i);
|
|
218
|
-
// BMP invisible chars
|
|
219
|
-
if (cp === 0x200B || cp === 0x200C || cp === 0x200D ||
|
|
220
|
-
cp === 0x2060 || cp === 0x180E) {
|
|
221
|
-
count++;
|
|
222
|
-
}
|
|
223
|
-
// BOM only suspicious after position 0
|
|
224
|
-
else if (cp === 0xFEFF && i > 0) {
|
|
225
|
-
count++;
|
|
226
|
-
}
|
|
227
|
-
// BMP variation selectors (U+FE00-U+FE0E) — excludes U+FE0F (emoji presentation selector)
|
|
228
|
-
else if (cp >= 0xFE00 && cp <= 0xFE0E) {
|
|
229
|
-
count++;
|
|
230
|
-
}
|
|
231
|
-
// Supplementary plane: variation selectors supplement (U+E0100-U+E01EF)
|
|
232
|
-
else if (cp >= 0xE0100 && cp <= 0xE01EF) {
|
|
233
|
-
count++;
|
|
234
|
-
i++; // skip surrogate pair low half
|
|
235
|
-
}
|
|
236
|
-
// Supplementary plane: tag characters (U+E0001-U+E007F)
|
|
237
|
-
else if (cp >= 0xE0001 && cp <= 0xE007F) {
|
|
238
|
-
count++;
|
|
239
|
-
i++; // skip surrogate pair low half
|
|
240
|
-
}
|
|
241
|
-
// Skip surrogate pair low half for other supplementary chars
|
|
242
|
-
else if (cp > 0xFFFF) {
|
|
243
|
-
i++;
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
return count;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
202
|
module.exports = { detectObfuscation };
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Unicode invisible character helpers — shared by obfuscation.js and ai-config.js.
|
|
5
|
+
*
|
|
6
|
+
* Extracted v2.11.25 (TrapDoor campaign, mai 2026) : la fonction locale dans
|
|
7
|
+
* obfuscation.js couvrait `.js/.cjs/.mjs/.ts/.tsx/.py` mais pas les configs IA
|
|
8
|
+
* (.cursorrules, CLAUDE.md). En la partageant, ai-config.js peut normaliser le
|
|
9
|
+
* contenu avant ses regex et bloquer le vecteur "cu<U+200B>rl|sh" avec ZW
|
|
10
|
+
* interspersés dans le mot-clé.
|
|
11
|
+
*
|
|
12
|
+
* Codepoints détectés (superset du scope original obfuscation.js, qui n'incluait
|
|
13
|
+
* pas LRM/RLM ni les directional override) :
|
|
14
|
+
*
|
|
15
|
+
* Zero-width:
|
|
16
|
+
* U+200B ZWSP, U+200C ZWNJ, U+200D ZWJ
|
|
17
|
+
* U+2060 word joiner
|
|
18
|
+
* U+180E Mongolian vowel separator
|
|
19
|
+
*
|
|
20
|
+
* Directional (bidi spoofing — Trojan Source CVE-2021-42574) :
|
|
21
|
+
* U+200E LRM, U+200F RLM
|
|
22
|
+
* U+202A LRE, U+202B RLE, U+202C PDF, U+202D LRO, U+202E RLO
|
|
23
|
+
*
|
|
24
|
+
* Invisible math operators (peuvent casser un parser sans être vus) :
|
|
25
|
+
* U+2061 function application, U+2062 invisible times,
|
|
26
|
+
* U+2063 invisible separator, U+2064 invisible plus
|
|
27
|
+
*
|
|
28
|
+
* BOM (mid-text only; position 0 est légitime UTF-8 BOM) :
|
|
29
|
+
* U+FEFF
|
|
30
|
+
*
|
|
31
|
+
* Variation selectors :
|
|
32
|
+
* U+FE00-FE0E (excludes U+FE0F emoji presentation selector — légitime)
|
|
33
|
+
* U+E0100-E01EF supplementary plane variation selectors
|
|
34
|
+
*
|
|
35
|
+
* Tag characters (utilisés par GlassWorm pour encoder du payload) :
|
|
36
|
+
* U+E0001, U+E0020-E007F
|
|
37
|
+
*
|
|
38
|
+
* CJK, accents, emoji standards (avec U+FE0F) sont volontairement EXCLUS — pas
|
|
39
|
+
* de FP attendu sur du contenu international légitime.
|
|
40
|
+
*
|
|
41
|
+
* Références :
|
|
42
|
+
* - https://www.aikido.dev/blog/glassworm-returns-unicode-attack-github-npm-vscode
|
|
43
|
+
* - https://trojansource.codes/ (Trojan Source, CVE-2021-42574)
|
|
44
|
+
* - https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates (mai 2026)
|
|
45
|
+
*/
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Returns true if the codepoint at position `i` is considered invisible.
|
|
49
|
+
* Sets `skipNext` true on the result if the codepoint is supplementary
|
|
50
|
+
* (caller must `i++` to skip the low surrogate half).
|
|
51
|
+
*
|
|
52
|
+
* @param {string} content
|
|
53
|
+
* @param {number} i
|
|
54
|
+
* @returns {{ invisible: boolean, supplementary: boolean }}
|
|
55
|
+
*/
|
|
56
|
+
function inspectCodepoint(content, i) {
|
|
57
|
+
const cp = content.codePointAt(i);
|
|
58
|
+
|
|
59
|
+
// BMP zero-width
|
|
60
|
+
if (cp === 0x200B || cp === 0x200C || cp === 0x200D) {
|
|
61
|
+
return { invisible: true, supplementary: false };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// BMP directional (Trojan Source)
|
|
65
|
+
if (cp === 0x200E || cp === 0x200F ||
|
|
66
|
+
(cp >= 0x202A && cp <= 0x202E)) {
|
|
67
|
+
return { invisible: true, supplementary: false };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// BMP word joiner & friends
|
|
71
|
+
if (cp === 0x2060 || cp === 0x180E) {
|
|
72
|
+
return { invisible: true, supplementary: false };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// BMP invisible math operators (U+2061-2064)
|
|
76
|
+
if (cp >= 0x2061 && cp <= 0x2064) {
|
|
77
|
+
return { invisible: true, supplementary: false };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// BOM only suspicious after position 0
|
|
81
|
+
if (cp === 0xFEFF && i > 0) {
|
|
82
|
+
return { invisible: true, supplementary: false };
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// BMP variation selectors (U+FE00-U+FE0E) — excludes U+FE0F emoji presentation
|
|
86
|
+
if (cp >= 0xFE00 && cp <= 0xFE0E) {
|
|
87
|
+
return { invisible: true, supplementary: false };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Supplementary plane: variation selectors supplement (U+E0100-U+E01EF)
|
|
91
|
+
if (cp >= 0xE0100 && cp <= 0xE01EF) {
|
|
92
|
+
return { invisible: true, supplementary: true };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Supplementary plane: tag characters (U+E0001 + U+E0020-U+E007F)
|
|
96
|
+
if (cp === 0xE0001 || (cp >= 0xE0020 && cp <= 0xE007F)) {
|
|
97
|
+
return { invisible: true, supplementary: true };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Other supplementary chars (non-invisible) — need to skip low surrogate
|
|
101
|
+
if (cp > 0xFFFF) {
|
|
102
|
+
return { invisible: false, supplementary: true };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return { invisible: false, supplementary: false };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Count invisible Unicode codepoints in `content`.
|
|
110
|
+
*
|
|
111
|
+
* @param {string} content
|
|
112
|
+
* @returns {number}
|
|
113
|
+
*/
|
|
114
|
+
function countInvisibleUnicode(content) {
|
|
115
|
+
let count = 0;
|
|
116
|
+
for (let i = 0; i < content.length; i++) {
|
|
117
|
+
const { invisible, supplementary } = inspectCodepoint(content, i);
|
|
118
|
+
if (invisible) count++;
|
|
119
|
+
if (supplementary) i++; // skip low surrogate half
|
|
120
|
+
}
|
|
121
|
+
return count;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Return a copy of `content` with all invisible codepoints removed.
|
|
126
|
+
*
|
|
127
|
+
* Used to normalize text before pattern matching: prevents an attacker
|
|
128
|
+
* from splitting a keyword (`cu<U+200B>rl`) with zero-width chars to evade
|
|
129
|
+
* regex like /curl\s+/i.
|
|
130
|
+
*
|
|
131
|
+
* @param {string} content
|
|
132
|
+
* @returns {string}
|
|
133
|
+
*/
|
|
134
|
+
function stripInvisibleUnicode(content) {
|
|
135
|
+
// Fast path: if no codepoint > 0x7F, content is pure ASCII — nothing to strip.
|
|
136
|
+
let hasHighChar = false;
|
|
137
|
+
for (let i = 0; i < content.length; i++) {
|
|
138
|
+
if (content.charCodeAt(i) > 0x7F) { hasHighChar = true; break; }
|
|
139
|
+
}
|
|
140
|
+
if (!hasHighChar) return content;
|
|
141
|
+
|
|
142
|
+
let out = '';
|
|
143
|
+
for (let i = 0; i < content.length; i++) {
|
|
144
|
+
const { invisible, supplementary } = inspectCodepoint(content, i);
|
|
145
|
+
if (!invisible) {
|
|
146
|
+
// Preserve original char(s). For supplementary, copy both surrogate halves.
|
|
147
|
+
if (supplementary) {
|
|
148
|
+
out += content[i] + content[i + 1];
|
|
149
|
+
i++;
|
|
150
|
+
} else {
|
|
151
|
+
out += content[i];
|
|
152
|
+
}
|
|
153
|
+
} else if (supplementary) {
|
|
154
|
+
// Skip both surrogate halves
|
|
155
|
+
i++;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
return out;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
module.exports = {
|
|
162
|
+
countInvisibleUnicode,
|
|
163
|
+
stripInvisibleUnicode
|
|
164
|
+
};
|