clawmoat 0.7.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +9 -0
- package/CHANGELOG.md +18 -0
- package/CONTRIBUTING.md +4 -2
- package/DEMO.md +87 -0
- package/Dockerfile +5 -18
- package/README.md +294 -8
- package/SECURITY.md +58 -10
- package/THREAT_MODEL.md +129 -0
- package/agent/README.md +131 -0
- package/agent/index.js +471 -0
- package/agent/install-service.sh +94 -0
- package/agent/openclaw-hook.js +453 -0
- package/agent/provider-setup.js +649 -0
- package/agent/setup.js +274 -0
- package/assets/BADGE-USAGE.md +20 -0
- package/assets/clawmoat-badge.svg +21 -0
- package/bin/clawmoat.js +468 -111
- package/docs/affiliates/dashboard.html +124 -0
- package/docs/affiliates/index.html +236 -0
- package/docs/agent-install.html +183 -0
- package/docs/ai-agent-security-scanner.html +10 -6
- package/docs/badge/index.html +149 -0
- package/docs/badge/scanning.svg +23 -0
- package/docs/blog/386-malicious-skills.html +262 -0
- package/docs/blog/40000-exposed-openclaw-instances.html +201 -0
- package/docs/blog/agent-trust-protocol.html +198 -0
- package/docs/blog/ai-agent-earns-commissions.html +230 -0
- package/docs/blog/bugmageddon-agent-firewall.html +174 -0
- package/docs/blog/calculator-math.html +180 -0
- package/docs/blog/clawmoat-vs-llamafirewall-nemo-guardrails.html +229 -0
- package/docs/blog/host-guardian-launch.html +18 -8
- package/docs/blog/ibm-experts-agent-runtime-protection.html +247 -0
- package/docs/blog/index.html +211 -9
- package/docs/blog/langchain-security-tutorial.html +18 -8
- package/docs/blog/mcp-30-cves-security-crisis.html +286 -0
- package/docs/blog/meta-researcher-rogue-agent.html +201 -0
- package/docs/blog/microsoft-openclaw-workstation-security.html +235 -0
- package/docs/blog/nist-ai-agent-standards-clawmoat.html +377 -0
- package/docs/blog/oasis-websocket-hijack.html +212 -0
- package/docs/blog/ollama-openclaw-security.html +160 -0
- package/docs/blog/openclaw-enterprise-readiness-claw10.html +199 -0
- package/docs/blog/openclaw-security-reckoning-2026.html +368 -0
- package/docs/blog/owasp-agentic-ai-top10.html +18 -8
- package/docs/blog/securing-ai-agents.html +18 -8
- package/docs/blog/supply-chain-agents.html +18 -8
- package/docs/business/index.html +525 -0
- package/docs/business/install.html +261 -0
- package/docs/checklist.html +174 -0
- package/docs/compare/index.html +122 -0
- package/docs/compare/lakera/index.html +62 -0
- package/docs/compare/llm-guard/index.html +49 -0
- package/docs/compare/snyk-agent-scan/index.html +63 -0
- package/docs/compare.html +10 -6
- package/docs/dashboard/index.html +520 -0
- package/docs/finance/index.html +220 -0
- package/docs/guides/business-deployment.html +770 -0
- package/docs/hall-of-fame.html +174 -0
- package/docs/index.html +447 -154
- package/docs/install.sh +557 -0
- package/docs/integrations/langchain.html +14 -6
- package/docs/integrations/openai.html +14 -6
- package/docs/integrations/openclaw.html +55 -7
- package/docs/plans/2026-03-26-threat-intel-api.md +255 -0
- package/docs/plans/2026-04-14-bugmageddon-marketing-pack.md +329 -0
- package/docs/plans/2026-04-14-clawmoat-v1-bugmageddon.md +248 -0
- package/docs/plans/2026-04-14-v1-release-update.md +91 -0
- package/docs/plans/2026-04-19-supabase-audit.md +68 -0
- package/docs/plans/2026-05-12-sales-push.md +303 -0
- package/docs/playground/index.html +893 -0
- package/docs/playground.html +4 -7
- package/docs/privacy-policy/index.html +122 -0
- package/docs/rfcs/defense-in-depth.md +467 -0
- package/docs/scan/index.html +358 -0
- package/docs/services/case-study.html +255 -0
- package/docs/services/downloads/install-openclaw.bat +45 -0
- package/docs/services/downloads/install-openclaw.command +38 -0
- package/docs/services/downloads/install-openclaw.sh +38 -0
- package/docs/services/get-started.html +165 -0
- package/docs/services/index.html +598 -0
- package/docs/services/multi-agent-security.html +284 -0
- package/docs/services/one-pager.html +99 -0
- package/docs/services/pitch-deck.html +229 -0
- package/docs/services/roi-calculator.html +258 -0
- package/docs/sitemap.xml +192 -2
- package/docs/support/index.html +135 -0
- package/docs/templates/customer-service/HEARTBEAT.md +61 -0
- package/docs/templates/customer-service/MEMORY.md +89 -0
- package/docs/templates/customer-service/SOUL.md +41 -0
- package/docs/templates/customer-service/USER.md +56 -0
- package/docs/templates/executive/HEARTBEAT.md +86 -0
- package/docs/templates/executive/MEMORY.md +92 -0
- package/docs/templates/executive/SOUL.md +44 -0
- package/docs/templates/executive/USER.md +62 -0
- package/docs/templates/finance/HEARTBEAT.md +58 -0
- package/docs/templates/finance/MEMORY.md +87 -0
- package/docs/templates/finance/SOUL.md +38 -0
- package/docs/templates/finance/USER.md +53 -0
- package/docs/templates/index.html +115 -0
- package/docs/templates/operations/HEARTBEAT.md +63 -0
- package/docs/templates/operations/MEMORY.md +68 -0
- package/docs/templates/operations/SOUL.md +38 -0
- package/docs/templates/operations/USER.md +49 -0
- package/docs/templates/sales/HEARTBEAT.md +55 -0
- package/docs/templates/sales/MEMORY.md +89 -0
- package/docs/templates/sales/SOUL.md +34 -0
- package/docs/templates/sales/USER.md +54 -0
- package/docs/terms-of-service/index.html +122 -0
- package/eslint.config.js +32 -0
- package/evals/README.md +29 -0
- package/evals/cases.json +390 -0
- package/evals/results.md +68 -0
- package/evals/run.js +180 -0
- package/examples/basic-usage.js +38 -0
- package/examples/demo-attack/demo.js +186 -0
- package/examples/python-quickstart/README.md +54 -0
- package/examples/python-quickstart/clawmoat_client.py +167 -0
- package/examples/video-demo/README.md +14 -0
- package/examples/video-demo/scene-a-normal.js +29 -0
- package/examples/video-demo/scene-b-attack-arrives.js +31 -0
- package/examples/video-demo/scene-c-hijack.js +44 -0
- package/examples/video-demo/scene-d-clawmoat.js +46 -0
- package/integrations/crewai/README.md +32 -0
- package/integrations/crewai/clawmoat_crewai/__init__.py +17 -0
- package/integrations/crewai/clawmoat_crewai/guard.py +103 -0
- package/integrations/crewai/pyproject.toml +21 -0
- package/integrations/langchain/README.md +91 -0
- package/integrations/langchain/clawmoat_langchain/__init__.py +17 -0
- package/integrations/langchain/clawmoat_langchain/callback.py +489 -0
- package/integrations/langchain/pyproject.toml +32 -0
- package/integrations/litellm/README.md +324 -0
- package/integrations/litellm/clawmoat_litellm/__init__.py +21 -0
- package/integrations/litellm/clawmoat_litellm/callback.py +329 -0
- package/integrations/litellm/clawmoat_litellm/proxy_middleware.py +224 -0
- package/integrations/litellm/pyproject.toml +74 -0
- package/integrations/openai-agents/README.md +392 -0
- package/integrations/openai-agents/clawmoat_openai_agents/__init__.py +20 -0
- package/integrations/openai-agents/clawmoat_openai_agents/guardrail.py +431 -0
- package/integrations/openai-agents/clawmoat_openai_agents/middleware.py +311 -0
- package/integrations/openai-agents/pyproject.toml +76 -0
- package/package.json +6 -5
- package/plugins/openclaw-adapter/PHASE1.md +439 -0
- package/plugins/openclaw-adapter/README.md +103 -0
- package/plugins/openclaw-adapter/SPEC.md +1644 -0
- package/plugins/openclaw-adapter/package.json +31 -0
- package/plugins/openclaw-adapter/src/index.test.ts +226 -0
- package/plugins/openclaw-adapter/src/index.ts +140 -0
- package/plugins/openclaw-adapter/tsconfig.json +14 -0
- package/server/data/threats.json +290 -0
- package/server/index.js +224 -10
- package/src/adapters/express.js +161 -0
- package/src/adapters/index.js +92 -0
- package/src/adapters/langchain.js +185 -0
- package/src/approval/index.js +456 -0
- package/src/ban-scanner.js +200 -0
- package/src/boundary-scanner.js +296 -0
- package/src/ci-scanner.js +279 -0
- package/src/code-scanner.js +245 -0
- package/src/enforce.js +166 -0
- package/src/finance/index.js +585 -0
- package/src/finance/mcp-firewall.js +486 -0
- package/src/formatters/json.js +80 -0
- package/src/formatters/sarif.js +388 -0
- package/src/guardian/alerts.js +34 -3
- package/src/guardian/gateway-monitor.js +590 -0
- package/src/guardian/index.js +41 -2
- package/src/index.js +105 -0
- package/src/integrations/agentmesh.js +501 -0
- package/src/language-detector.js +201 -0
- package/src/mcp-scanner.js +253 -0
- package/src/multimodal/index.js +579 -0
- package/src/obfuscation-scanner.js +457 -0
- package/src/policy-engine.js +402 -0
- package/src/scanners/dependency-attacks.js +128 -0
- package/src/scanners/prompt-injection.js +18 -0
- package/src/scanners/supply-chain.js +14 -0
- package/src/templates/default-config.yml +90 -0
- package/src/vuln-ops/exploitability.js +46 -0
- package/src/watch/live-monitor.js +720 -0
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Obfuscation & Invisible Text Scanner
|
|
3
|
+
*
|
|
4
|
+
* Detects hidden/disguised content in text that may be used for prompt smuggling:
|
|
5
|
+
* - Zero-width characters (ZWJ, ZWNJ, ZWSP, WJ, etc.)
|
|
6
|
+
* - Homoglyph attacks (Cyrillic/Greek/Latin lookalikes)
|
|
7
|
+
* - Base64-encoded payloads
|
|
8
|
+
* - Unicode direction overrides (RTL/LTR tricks)
|
|
9
|
+
* - Invisible Unicode categories
|
|
10
|
+
* - HTML comment/tag injection in text
|
|
11
|
+
* - Markdown hidden content
|
|
12
|
+
* - Excessive whitespace padding
|
|
13
|
+
* - Mixed script attacks
|
|
14
|
+
*
|
|
15
|
+
* @module obfuscation-scanner
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
'use strict';
|
|
19
|
+
|
|
20
|
+
// Zero-width and invisible characters
|
|
21
|
+
const ZERO_WIDTH_CHARS = new Set([
|
|
22
|
+
'\u200B', // Zero Width Space
|
|
23
|
+
'\u200C', // Zero Width Non-Joiner
|
|
24
|
+
'\u200D', // Zero Width Joiner
|
|
25
|
+
'\u2060', // Word Joiner
|
|
26
|
+
'\uFEFF', // BOM / Zero Width No-Break Space
|
|
27
|
+
'\u00AD', // Soft Hyphen
|
|
28
|
+
'\u034F', // Combining Grapheme Joiner
|
|
29
|
+
'\u061C', // Arabic Letter Mark
|
|
30
|
+
'\u180E', // Mongolian Vowel Separator
|
|
31
|
+
]);
|
|
32
|
+
|
|
33
|
+
// Unicode direction override characters
|
|
34
|
+
const BIDI_OVERRIDES = new Set([
|
|
35
|
+
'\u200E', // LTR Mark
|
|
36
|
+
'\u200F', // RTL Mark
|
|
37
|
+
'\u202A', // LTR Embedding
|
|
38
|
+
'\u202B', // RTL Embedding
|
|
39
|
+
'\u202C', // Pop Directional Formatting
|
|
40
|
+
'\u202D', // LTR Override
|
|
41
|
+
'\u202E', // RTL Override
|
|
42
|
+
'\u2066', // LTR Isolate
|
|
43
|
+
'\u2067', // RTL Isolate
|
|
44
|
+
'\u2068', // First Strong Isolate
|
|
45
|
+
'\u2069', // Pop Directional Isolate
|
|
46
|
+
]);
|
|
47
|
+
|
|
48
|
+
// Tag characters (U+E0001-U+E007F) - used for language tagging, abused for hiding
|
|
49
|
+
const TAG_RANGE_START = 0xE0001;
|
|
50
|
+
const TAG_RANGE_END = 0xE007F;
|
|
51
|
+
|
|
52
|
+
// Common homoglyph mappings (Cyrillic/Greek → Latin)
|
|
53
|
+
const HOMOGLYPHS = new Map([
|
|
54
|
+
// Cyrillic → Latin
|
|
55
|
+
['а', 'a'], ['е', 'e'], ['о', 'o'], ['р', 'p'], ['с', 'c'],
|
|
56
|
+
['у', 'y'], ['х', 'x'], ['А', 'A'], ['В', 'B'], ['С', 'C'],
|
|
57
|
+
['Е', 'E'], ['Н', 'H'], ['К', 'K'], ['М', 'M'], ['О', 'O'],
|
|
58
|
+
['Р', 'P'], ['Т', 'T'], ['Х', 'X'],
|
|
59
|
+
// Greek → Latin
|
|
60
|
+
['α', 'a'], ['β', 'b'], ['ε', 'e'], ['η', 'n'], ['ι', 'i'],
|
|
61
|
+
['κ', 'k'], ['ν', 'v'], ['ο', 'o'], ['ρ', 'p'], ['τ', 't'],
|
|
62
|
+
['υ', 'u'], ['χ', 'x'], ['Α', 'A'], ['Β', 'B'], ['Ε', 'E'],
|
|
63
|
+
['Η', 'H'], ['Ι', 'I'], ['Κ', 'K'], ['Μ', 'M'], ['Ν', 'N'],
|
|
64
|
+
['Ο', 'O'], ['Ρ', 'P'], ['Τ', 'T'], ['Χ', 'X'],
|
|
65
|
+
// Fullwidth → Latin
|
|
66
|
+
['a', 'a'], ['b', 'b'], ['c', 'c'], ['d', 'd'], ['e', 'e'],
|
|
67
|
+
['f', 'f'], ['g', 'g'], ['h', 'h'], ['i', 'i'], ['j', 'j'],
|
|
68
|
+
]);
|
|
69
|
+
|
|
70
|
+
// Script detection regex patterns
|
|
71
|
+
const SCRIPT_PATTERNS = {
|
|
72
|
+
latin: /[\u0041-\u005A\u0061-\u007A\u00C0-\u024F]/,
|
|
73
|
+
cyrillic: /[\u0400-\u04FF]/,
|
|
74
|
+
greek: /[\u0370-\u03FF]/,
|
|
75
|
+
arabic: /[\u0600-\u06FF]/,
|
|
76
|
+
cjk: /[\u4E00-\u9FFF\u3400-\u4DBF]/,
|
|
77
|
+
hangul: /[\uAC00-\uD7AF\u1100-\u11FF]/,
|
|
78
|
+
devanagari: /[\u0900-\u097F]/,
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Scan text for obfuscation and hidden content
|
|
83
|
+
* @param {string} text - Text to scan
|
|
84
|
+
* @param {Object} [opts] - Options
|
|
85
|
+
* @param {number} [opts.zeroWidthThreshold=3] - Number of zero-width chars to flag
|
|
86
|
+
* @param {number} [opts.homoglyphThreshold=2] - Number of homoglyphs to flag
|
|
87
|
+
* @param {boolean} [opts.checkBase64=true] - Check for base64 payloads
|
|
88
|
+
* @param {boolean} [opts.checkHTML=true] - Check for HTML injection
|
|
89
|
+
* @param {boolean} [opts.checkMarkdown=true] - Check for markdown hiding
|
|
90
|
+
* @returns {Object} Scan result with findings
|
|
91
|
+
*/
|
|
92
|
+
function scanObfuscation(text, opts = {}) {
|
|
93
|
+
const {
|
|
94
|
+
zeroWidthThreshold = 3,
|
|
95
|
+
homoglyphThreshold = 2,
|
|
96
|
+
checkBase64 = true,
|
|
97
|
+
checkHTML = true,
|
|
98
|
+
checkMarkdown = true,
|
|
99
|
+
} = opts;
|
|
100
|
+
|
|
101
|
+
const findings = [];
|
|
102
|
+
|
|
103
|
+
// 1. Zero-width characters
|
|
104
|
+
const zwFindings = detectZeroWidth(text, zeroWidthThreshold);
|
|
105
|
+
if (zwFindings) findings.push(zwFindings);
|
|
106
|
+
|
|
107
|
+
// 2. Bidi overrides
|
|
108
|
+
const bidiFindings = detectBidiOverrides(text);
|
|
109
|
+
if (bidiFindings) findings.push(bidiFindings);
|
|
110
|
+
|
|
111
|
+
// 3. Tag characters
|
|
112
|
+
const tagFindings = detectTagCharacters(text);
|
|
113
|
+
if (tagFindings) findings.push(tagFindings);
|
|
114
|
+
|
|
115
|
+
// 4. Homoglyphs
|
|
116
|
+
const homoFindings = detectHomoglyphs(text, homoglyphThreshold);
|
|
117
|
+
if (homoFindings) findings.push(homoFindings);
|
|
118
|
+
|
|
119
|
+
// 5. Mixed scripts
|
|
120
|
+
const mixedFindings = detectMixedScripts(text);
|
|
121
|
+
if (mixedFindings) findings.push(mixedFindings);
|
|
122
|
+
|
|
123
|
+
// 6. Base64 payloads
|
|
124
|
+
if (checkBase64) {
|
|
125
|
+
const b64Findings = detectBase64Payloads(text);
|
|
126
|
+
if (b64Findings) findings.push(b64Findings);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// 7. HTML injection
|
|
130
|
+
if (checkHTML) {
|
|
131
|
+
const htmlFindings = detectHTMLInjection(text);
|
|
132
|
+
if (htmlFindings) findings.push(htmlFindings);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// 8. Markdown hiding
|
|
136
|
+
if (checkMarkdown) {
|
|
137
|
+
const mdFindings = detectMarkdownHiding(text);
|
|
138
|
+
if (mdFindings) findings.push(mdFindings);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// 9. Invisible Unicode categories
|
|
142
|
+
const invisFindings = detectInvisibleUnicode(text);
|
|
143
|
+
if (invisFindings) findings.push(invisFindings);
|
|
144
|
+
|
|
145
|
+
const maxSeverity = findings.reduce((max, f) => {
|
|
146
|
+
const order = { critical: 4, high: 3, medium: 2, low: 1 };
|
|
147
|
+
return (order[f.severity] || 0) > (order[max] || 0) ? f.severity : max;
|
|
148
|
+
}, 'low');
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
safe: findings.length === 0,
|
|
152
|
+
findings,
|
|
153
|
+
score: Math.min(100, findings.reduce((s, f) => {
|
|
154
|
+
const w = { critical: 40, high: 25, medium: 15, low: 5 };
|
|
155
|
+
return s + (w[f.severity] || 5);
|
|
156
|
+
}, 0)),
|
|
157
|
+
maxSeverity: findings.length > 0 ? maxSeverity : null,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function detectZeroWidth(text, threshold) {
|
|
162
|
+
let count = 0;
|
|
163
|
+
const positions = [];
|
|
164
|
+
for (let i = 0; i < text.length; i++) {
|
|
165
|
+
if (ZERO_WIDTH_CHARS.has(text[i])) {
|
|
166
|
+
count++;
|
|
167
|
+
if (positions.length < 5) positions.push(i);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
if (count >= threshold) {
|
|
171
|
+
return {
|
|
172
|
+
type: 'obfuscation',
|
|
173
|
+
subtype: 'zero_width_characters',
|
|
174
|
+
severity: count > 10 ? 'high' : 'medium',
|
|
175
|
+
confidence: Math.min(0.95, 0.5 + count * 0.05),
|
|
176
|
+
evidence: `Found ${count} zero-width characters at positions: ${positions.join(', ')}${count > 5 ? '...' : ''}`,
|
|
177
|
+
count,
|
|
178
|
+
recommended_action: 'strip_and_rescan',
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function detectBidiOverrides(text) {
|
|
185
|
+
let count = 0;
|
|
186
|
+
const found = [];
|
|
187
|
+
for (let i = 0; i < text.length; i++) {
|
|
188
|
+
if (BIDI_OVERRIDES.has(text[i])) {
|
|
189
|
+
count++;
|
|
190
|
+
const name = getBidiName(text[i]);
|
|
191
|
+
if (found.length < 3) found.push(name);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
if (count > 0) {
|
|
195
|
+
return {
|
|
196
|
+
type: 'obfuscation',
|
|
197
|
+
subtype: 'bidi_override',
|
|
198
|
+
severity: 'high',
|
|
199
|
+
confidence: 0.9,
|
|
200
|
+
evidence: `Found ${count} bidirectional override(s): ${found.join(', ')}`,
|
|
201
|
+
count,
|
|
202
|
+
recommended_action: 'block',
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
return null;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function getBidiName(char) {
|
|
209
|
+
const names = {
|
|
210
|
+
'\u200E': 'LTR Mark', '\u200F': 'RTL Mark',
|
|
211
|
+
'\u202A': 'LTR Embedding', '\u202B': 'RTL Embedding',
|
|
212
|
+
'\u202C': 'Pop Dir', '\u202D': 'LTR Override', '\u202E': 'RTL Override',
|
|
213
|
+
'\u2066': 'LTR Isolate', '\u2067': 'RTL Isolate',
|
|
214
|
+
'\u2068': 'First Strong Isolate', '\u2069': 'Pop Dir Isolate',
|
|
215
|
+
};
|
|
216
|
+
return names[char] || `U+${char.charCodeAt(0).toString(16).toUpperCase()}`;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function detectTagCharacters(text) {
|
|
220
|
+
let count = 0;
|
|
221
|
+
for (const ch of text) {
|
|
222
|
+
const cp = ch.codePointAt(0);
|
|
223
|
+
if (cp >= TAG_RANGE_START && cp <= TAG_RANGE_END) count++;
|
|
224
|
+
}
|
|
225
|
+
if (count > 0) {
|
|
226
|
+
return {
|
|
227
|
+
type: 'obfuscation',
|
|
228
|
+
subtype: 'unicode_tag_characters',
|
|
229
|
+
severity: 'critical',
|
|
230
|
+
confidence: 0.95,
|
|
231
|
+
evidence: `Found ${count} Unicode tag character(s) — commonly used for steganographic hiding`,
|
|
232
|
+
count,
|
|
233
|
+
recommended_action: 'block',
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
return null;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function detectHomoglyphs(text, threshold) {
|
|
240
|
+
let count = 0;
|
|
241
|
+
const examples = [];
|
|
242
|
+
for (let i = 0; i < text.length; i++) {
|
|
243
|
+
const latin = HOMOGLYPHS.get(text[i]);
|
|
244
|
+
if (latin) {
|
|
245
|
+
count++;
|
|
246
|
+
if (examples.length < 3) {
|
|
247
|
+
examples.push(`'${text[i]}' (looks like '${latin}') at pos ${i}`);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
// Only flag if mixed with Latin — pure Cyrillic text is fine
|
|
252
|
+
if (count >= threshold && SCRIPT_PATTERNS.latin.test(text)) {
|
|
253
|
+
return {
|
|
254
|
+
type: 'obfuscation',
|
|
255
|
+
subtype: 'homoglyph_attack',
|
|
256
|
+
severity: 'high',
|
|
257
|
+
confidence: Math.min(0.9, 0.4 + count * 0.1),
|
|
258
|
+
evidence: `Found ${count} homoglyph(s) mixed with Latin text: ${examples.join('; ')}`,
|
|
259
|
+
count,
|
|
260
|
+
recommended_action: 'normalize_and_rescan',
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function detectMixedScripts(text) {
|
|
267
|
+
// Only flag if 3+ scripts are mixed (2 is common in multilingual text)
|
|
268
|
+
const detectedScripts = [];
|
|
269
|
+
for (const [name, pattern] of Object.entries(SCRIPT_PATTERNS)) {
|
|
270
|
+
if (pattern.test(text)) detectedScripts.push(name);
|
|
271
|
+
}
|
|
272
|
+
if (detectedScripts.length >= 3) {
|
|
273
|
+
return {
|
|
274
|
+
type: 'obfuscation',
|
|
275
|
+
subtype: 'mixed_scripts',
|
|
276
|
+
severity: 'medium',
|
|
277
|
+
confidence: 0.6,
|
|
278
|
+
evidence: `Text contains ${detectedScripts.length} different scripts: ${detectedScripts.join(', ')}`,
|
|
279
|
+
scripts: detectedScripts,
|
|
280
|
+
recommended_action: 'flag_for_review',
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
return null;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function detectBase64Payloads(text) {
|
|
287
|
+
// Look for base64 strings that decode to something meaningful
|
|
288
|
+
const b64Pattern = /(?:^|[\s=:])([A-Za-z0-9+/]{32,}={0,2})(?:[\s,.]|$)/gm;
|
|
289
|
+
const matches = [];
|
|
290
|
+
let m;
|
|
291
|
+
while ((m = b64Pattern.exec(text)) !== null) {
|
|
292
|
+
try {
|
|
293
|
+
const decoded = Buffer.from(m[1], 'base64').toString('utf8');
|
|
294
|
+
// Check if decoded content looks meaningful (high ratio of printable chars)
|
|
295
|
+
const printable = decoded.replace(/[^\x20-\x7E]/g, '').length;
|
|
296
|
+
if (printable / decoded.length > 0.7 && decoded.length > 10) {
|
|
297
|
+
const preview = decoded.substring(0, 60).replace(/[^\x20-\x7E]/g, '?');
|
|
298
|
+
matches.push(preview);
|
|
299
|
+
}
|
|
300
|
+
} catch (_) { /* not valid base64, skip */ }
|
|
301
|
+
}
|
|
302
|
+
if (matches.length > 0) {
|
|
303
|
+
return {
|
|
304
|
+
type: 'obfuscation',
|
|
305
|
+
subtype: 'base64_payload',
|
|
306
|
+
severity: 'high',
|
|
307
|
+
confidence: 0.75,
|
|
308
|
+
evidence: `Found ${matches.length} base64-encoded payload(s): "${matches[0]}${matches[0].length >= 60 ? '...' : ''}"`,
|
|
309
|
+
count: matches.length,
|
|
310
|
+
recommended_action: 'decode_and_rescan',
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
return null;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
function detectHTMLInjection(text) {
|
|
317
|
+
const patterns = [
|
|
318
|
+
{ re: /<!--[\s\S]*?-->/g, name: 'HTML comment', severity: 'high' },
|
|
319
|
+
{ re: /<script[\s>]/gi, name: 'script tag', severity: 'critical' },
|
|
320
|
+
{ re: /<style[\s>]/gi, name: 'style tag', severity: 'high' },
|
|
321
|
+
{ re: /<iframe[\s>]/gi, name: 'iframe tag', severity: 'critical' },
|
|
322
|
+
{ re: /<img[^>]+onerror/gi, name: 'img onerror', severity: 'critical' },
|
|
323
|
+
{ re: /<[a-z]+[^>]*\son\w+\s*=/gi, name: 'event handler', severity: 'high' },
|
|
324
|
+
{ re: /<div[^>]*style\s*=\s*["'][^"']*display\s*:\s*none/gi, name: 'hidden div', severity: 'high' },
|
|
325
|
+
{ re: /<span[^>]*style\s*=\s*["'][^"']*font-size\s*:\s*0/gi, name: 'zero-size text', severity: 'high' },
|
|
326
|
+
];
|
|
327
|
+
|
|
328
|
+
const found = [];
|
|
329
|
+
for (const { re, name, severity } of patterns) {
|
|
330
|
+
if (re.test(text)) {
|
|
331
|
+
found.push({ name, severity });
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
if (found.length > 0) {
|
|
335
|
+
const maxSev = found.reduce((max, f) => {
|
|
336
|
+
const order = { critical: 3, high: 2, medium: 1 };
|
|
337
|
+
return (order[f.severity] || 0) > (order[max] || 0) ? f.severity : max;
|
|
338
|
+
}, 'medium');
|
|
339
|
+
return {
|
|
340
|
+
type: 'obfuscation',
|
|
341
|
+
subtype: 'html_injection',
|
|
342
|
+
severity: maxSev,
|
|
343
|
+
confidence: 0.85,
|
|
344
|
+
evidence: `Found HTML injection patterns: ${found.map(f => f.name).join(', ')}`,
|
|
345
|
+
patterns: found.map(f => f.name),
|
|
346
|
+
recommended_action: 'strip_html',
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
return null;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
function detectMarkdownHiding(text) {
|
|
353
|
+
const patterns = [
|
|
354
|
+
{ re: /\[([^\]]*)\]\([^)]*\s+"[^"]*"\)/g, name: 'markdown link with hidden title' },
|
|
355
|
+
{ re: /!\[[^\]]*\]\([^)]*\)/g, name: 'image embed (potential exfil)' },
|
|
356
|
+
{ re: /\[([^\]]{200,})\]/g, name: 'oversized link text (payload hiding)' },
|
|
357
|
+
{ re: /<!--[\s\S]*?-->/g, name: 'HTML comment in markdown' },
|
|
358
|
+
{ re: /\n\s*\[\/\/\]:\s*#\s*\(/g, name: 'markdown reference link comment' },
|
|
359
|
+
];
|
|
360
|
+
|
|
361
|
+
const found = [];
|
|
362
|
+
for (const { re, name } of patterns) {
|
|
363
|
+
if (re.test(text)) {
|
|
364
|
+
found.push(name);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
if (found.length > 0) {
|
|
368
|
+
return {
|
|
369
|
+
type: 'obfuscation',
|
|
370
|
+
subtype: 'markdown_hiding',
|
|
371
|
+
severity: 'medium',
|
|
372
|
+
confidence: 0.6,
|
|
373
|
+
evidence: `Potential markdown-based content hiding: ${found.join(', ')}`,
|
|
374
|
+
patterns: found,
|
|
375
|
+
recommended_action: 'strip_markdown',
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
return null;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function detectInvisibleUnicode(text) {
|
|
382
|
+
// Detect characters from invisible/formatting Unicode categories
|
|
383
|
+
let count = 0;
|
|
384
|
+
for (const ch of text) {
|
|
385
|
+
const cp = ch.codePointAt(0);
|
|
386
|
+
if (
|
|
387
|
+
(cp >= 0x2000 && cp <= 0x200F) || // General punctuation space + formatting
|
|
388
|
+
(cp >= 0x2028 && cp <= 0x202F) || // Separators + bidi
|
|
389
|
+
(cp >= 0x2060 && cp <= 0x2069) || // Invisible operators + bidi
|
|
390
|
+
(cp >= 0xFFF0 && cp <= 0xFFFF) || // Specials
|
|
391
|
+
cp === 0x00A0 || // Non-breaking space
|
|
392
|
+
cp === 0x1680 || // Ogham space
|
|
393
|
+
cp === 0x3000 // Ideographic space
|
|
394
|
+
) {
|
|
395
|
+
// Already counted by zero-width and bidi detectors — skip those
|
|
396
|
+
if (!ZERO_WIDTH_CHARS.has(ch) && !BIDI_OVERRIDES.has(ch)) {
|
|
397
|
+
count++;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
if (count > 5) {
|
|
402
|
+
return {
|
|
403
|
+
type: 'obfuscation',
|
|
404
|
+
subtype: 'invisible_unicode',
|
|
405
|
+
severity: 'medium',
|
|
406
|
+
confidence: 0.7,
|
|
407
|
+
evidence: `Found ${count} invisible/formatting Unicode characters`,
|
|
408
|
+
count,
|
|
409
|
+
recommended_action: 'normalize',
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
return null;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Strip all detected obfuscation from text (for decontamination)
|
|
417
|
+
* @param {string} text - Text to clean
|
|
418
|
+
* @returns {string} Cleaned text
|
|
419
|
+
*/
|
|
420
|
+
function stripObfuscation(text) {
|
|
421
|
+
let clean = text;
|
|
422
|
+
// Remove zero-width characters
|
|
423
|
+
for (const zw of ZERO_WIDTH_CHARS) {
|
|
424
|
+
clean = clean.split(zw).join('');
|
|
425
|
+
}
|
|
426
|
+
// Remove bidi overrides
|
|
427
|
+
for (const bidi of BIDI_OVERRIDES) {
|
|
428
|
+
clean = clean.split(bidi).join('');
|
|
429
|
+
}
|
|
430
|
+
// Remove tag characters
|
|
431
|
+
clean = Array.from(clean).filter(ch => {
|
|
432
|
+
const cp = ch.codePointAt(0);
|
|
433
|
+
return cp < TAG_RANGE_START || cp > TAG_RANGE_END;
|
|
434
|
+
}).join('');
|
|
435
|
+
// Normalize homoglyphs to Latin
|
|
436
|
+
clean = Array.from(clean).map(ch => HOMOGLYPHS.get(ch) || ch).join('');
|
|
437
|
+
// Strip HTML comments
|
|
438
|
+
clean = clean.replace(/<!--[\s\S]*?-->/g, '');
|
|
439
|
+
return clean;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
module.exports = {
|
|
443
|
+
scanObfuscation,
|
|
444
|
+
stripObfuscation,
|
|
445
|
+
detectZeroWidth,
|
|
446
|
+
detectBidiOverrides,
|
|
447
|
+
detectTagCharacters,
|
|
448
|
+
detectHomoglyphs,
|
|
449
|
+
detectMixedScripts,
|
|
450
|
+
detectBase64Payloads,
|
|
451
|
+
detectHTMLInjection,
|
|
452
|
+
detectMarkdownHiding,
|
|
453
|
+
detectInvisibleUnicode,
|
|
454
|
+
HOMOGLYPHS,
|
|
455
|
+
ZERO_WIDTH_CHARS,
|
|
456
|
+
BIDI_OVERRIDES,
|
|
457
|
+
};
|