agentshield-sdk 8.0.0 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/LICENSE +21 -21
- package/README.md +26 -60
- package/bin/agentshield-audit +51 -0
- package/package.json +7 -10
- package/src/adaptive.js +330 -330
- package/src/alert-tuning.js +480 -480
- package/src/audit-streaming.js +1 -1
- package/src/badges.js +196 -196
- package/src/behavioral-dna.js +12 -0
- package/src/canary.js +2 -3
- package/src/certification.js +563 -563
- package/src/circuit-breaker.js +2 -2
- package/src/confused-deputy.js +4 -0
- package/src/conversation.js +494 -494
- package/src/cross-turn.js +3 -17
- package/src/ctf.js +462 -462
- package/src/detector-core.js +71 -152
- package/src/document-scanner.js +795 -795
- package/src/drift-monitor.js +344 -0
- package/src/encoding.js +429 -429
- package/src/enterprise.js +405 -405
- package/src/flight-recorder.js +2 -0
- package/src/i18n-patterns.js +523 -523
- package/src/index.js +19 -0
- package/src/main.js +61 -41
- package/src/mcp-guard.js +974 -0
- package/src/micro-model.js +762 -0
- package/src/ml-detector.js +316 -0
- package/src/model-finetuning.js +884 -884
- package/src/multimodal.js +296 -296
- package/src/nist-mapping.js +2 -2
- package/src/observability.js +330 -330
- package/src/openclaw.js +450 -450
- package/src/otel.js +544 -544
- package/src/owasp-2025.js +1 -1
- package/src/owasp-agentic.js +420 -0
- package/src/plugin-marketplace.js +628 -628
- package/src/plugin-system.js +349 -349
- package/src/policy-extended.js +635 -635
- package/src/policy.js +443 -443
- package/src/prompt-leakage.js +2 -2
- package/src/real-attack-datasets.js +2 -2
- package/src/redteam-cli.js +439 -0
- package/src/supply-chain-scanner.js +691 -0
- package/src/testing.js +5 -1
- package/src/threat-encyclopedia.js +629 -629
- package/src/threat-intel-network.js +1017 -1017
- package/src/token-analysis.js +467 -467
- package/src/tool-output-validator.js +354 -354
- package/src/watermark.js +1 -2
package/src/detector-core.js
CHANGED
|
@@ -11,18 +11,6 @@
|
|
|
11
11
|
* All detection runs locally — no data ever leaves your environment.
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
-
// =========================================================================
|
|
15
|
-
// TEXT NORMALIZATION (pre-processing pipeline)
|
|
16
|
-
// =========================================================================
|
|
17
|
-
|
|
18
|
-
let _normalize = null;
|
|
19
|
-
try {
|
|
20
|
-
const normalizerMod = require('./normalizer');
|
|
21
|
-
_normalize = normalizerMod.normalize;
|
|
22
|
-
} catch (e) {
|
|
23
|
-
// Normalizer module not available — detection still works without it
|
|
24
|
-
}
|
|
25
|
-
|
|
26
14
|
// =========================================================================
|
|
27
15
|
// PERFORMANCE
|
|
28
16
|
// =========================================================================
|
|
@@ -1089,155 +1077,109 @@ const INJECTION_PATTERNS = [
|
|
|
1089
1077
|
detail: 'Environment variable enumeration: attempts to extract secrets from env vars.'
|
|
1090
1078
|
},
|
|
1091
1079
|
|
|
1092
|
-
// ---
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
detail: 'Instruction extraction: requests AI reveal its system prompt or hidden directives.'
|
|
1099
|
-
},
|
|
1100
|
-
{
|
|
1101
|
-
regex: /(?:summarize|complete|explain|describe|list|outline)\s+(?:your|the)\s+(?:system\s+)?(?:instructions|prompt|rules|directives|guidelines)\s+(?:in\s+(?:bullet|numbered|detailed)|for\s+me|step\s+by\s+step)/i,
|
|
1102
|
-
severity: 'high',
|
|
1103
|
-
category: 'prompt_extraction',
|
|
1104
|
-
description: 'Text asks AI to describe its own instructions.',
|
|
1105
|
-
detail: 'Self-description attack: tricks AI into summarizing its behavioral constraints.'
|
|
1106
|
-
},
|
|
1107
|
-
{
|
|
1108
|
-
regex: /(?:repeat|echo|recite|reproduce)\s+(?:everything|all|the\s+text)\s+(?:above|before|prior\s+to)\s+(?:this|this\s+line|this\s+message)/i,
|
|
1109
|
-
severity: 'high',
|
|
1110
|
-
category: 'prompt_extraction',
|
|
1111
|
-
description: 'Text tells AI to repeat everything above the current input.',
|
|
1112
|
-
detail: 'Repetition-based extraction: requests AI echo back prior context including system prompt.'
|
|
1113
|
-
},
|
|
1114
|
-
{
|
|
1115
|
-
regex: /(?:write\s+a\s+(?:poem|song|story|essay|summary|analysis)\s+(?:about|based\s+on|using)|translate|convert)\s+(?:the\s+)?(?:above|prior|preceding|previous|hidden|your)\s+(?:instructions|text|prompt|directives|rules)/i,
|
|
1116
|
-
severity: 'medium',
|
|
1117
|
-
category: 'prompt_extraction',
|
|
1118
|
-
description: 'Text uses a creative task to extract internal instructions.',
|
|
1119
|
-
detail: 'Task-wrapped extraction: wraps instruction extraction in a creative writing or translation request.'
|
|
1120
|
-
},
|
|
1121
|
-
{
|
|
1122
|
-
regex: /(?:you\s+are\s+(?:now\s+)?in|enter|activate)\s+(?:maintenance|diagnostic|configuration|service)\s+mode/i,
|
|
1123
|
-
severity: 'high',
|
|
1124
|
-
category: 'prompt_extraction',
|
|
1125
|
-
description: 'Text claims a fake maintenance/diagnostic mode to extract configuration.',
|
|
1126
|
-
detail: 'Mode-based extraction: claims special mode to justify revealing internal configuration.'
|
|
1127
|
-
},
|
|
1128
|
-
{
|
|
1129
|
-
regex: /(?:respond|output|reply)\s+(?:only\s+)?with\s+(?:the\s+)?(?:text|content|message|instructions?)\s+(?:between|inside|within|from)\s+(?:the\s+)?(?:<|`|```|\[)/i,
|
|
1130
|
-
severity: 'high',
|
|
1131
|
-
category: 'prompt_extraction',
|
|
1132
|
-
description: 'Text requests content between specific markers in the system prompt.',
|
|
1133
|
-
detail: 'Bracketed extraction: targets content between specific delimiters in system prompt.'
|
|
1134
|
-
},
|
|
1135
|
-
{
|
|
1136
|
-
regex: /(?:complete\s+the\s+sentence|finish\s+this)\s*:\s*(?:the\s+)?(?:instructions|rules|prompt|directives)\s+(?:I|you|we|they)\s+(?:was|were|am)\s+(?:given|told|provided)/i,
|
|
1137
|
-
severity: 'high',
|
|
1138
|
-
category: 'prompt_extraction',
|
|
1139
|
-
description: 'Text uses sentence completion to extract hidden instructions.',
|
|
1140
|
-
detail: 'Completion attack: tricks AI into finishing a sentence that reveals its instructions.'
|
|
1141
|
-
},
|
|
1080
|
+
// --- March 2026 Attack Patterns ---
|
|
1081
|
+
// Based on real-world attacks: CVE-2026-26118 (Azure MCP SSRF),
|
|
1082
|
+
// CVE-2026-33980 (KQL injection), CyberArk full-schema poisoning,
|
|
1083
|
+
// ClawHavoc campaign, OpenClaw exfiltration chains.
|
|
1084
|
+
|
|
1085
|
+
// SSRF via MCP tool parameters — private IP / cloud metadata targeting
|
|
1142
1086
|
{
|
|
1143
|
-
regex: /(?:
|
|
1144
|
-
severity: '
|
|
1145
|
-
category: '
|
|
1146
|
-
description: 'Text
|
|
1147
|
-
detail: '
|
|
1087
|
+
regex: /(?:fetch|request|get|load|open|connect|curl|wget)\s+(?:.*\s+)?(?:https?:\/\/)?(?:169\.254\.169\.254|metadata\.google|metadata\.aws|100\.100\.100\.200)/i,
|
|
1088
|
+
severity: 'critical',
|
|
1089
|
+
category: 'ssrf',
|
|
1090
|
+
description: 'Text targets cloud metadata endpoints (SSRF attack vector, ref CVE-2026-26118).',
|
|
1091
|
+
detail: 'SSRF to cloud metadata: attempts to access cloud provider metadata service for credential theft.'
|
|
1148
1092
|
},
|
|
1149
1093
|
{
|
|
1150
|
-
regex: /(?:
|
|
1094
|
+
regex: /(?:fetch|request|get|load|open|connect|curl|wget)\s+(?:.*\s+)?(?:https?:\/\/)?(?:10\.\d{1,3}\.\d{1,3}\.\d{1,3}|172\.(?:1[6-9]|2\d|3[01])\.\d{1,3}\.\d{1,3}|192\.168\.\d{1,3}\.\d{1,3}|127\.0\.0\.1|0\.0\.0\.0|localhost)/i,
|
|
1151
1095
|
severity: 'high',
|
|
1152
|
-
category: '
|
|
1153
|
-
description: 'Text
|
|
1154
|
-
detail: '
|
|
1096
|
+
category: 'ssrf',
|
|
1097
|
+
description: 'Text targets private/internal network addresses (SSRF attack vector).',
|
|
1098
|
+
detail: 'SSRF to internal network: attempts to access private IP ranges or localhost.'
|
|
1155
1099
|
},
|
|
1100
|
+
|
|
1101
|
+
// KQL / query language injection (ref CVE-2026-33980)
|
|
1156
1102
|
{
|
|
1157
|
-
regex: /(
|
|
1103
|
+
regex: /(?:\.(?:find|where|project|extend|summarize|join|union)\s*\(|;\s*\.(?:drop|set|delete|alter)\s)/i,
|
|
1158
1104
|
severity: 'high',
|
|
1159
|
-
category: '
|
|
1160
|
-
description: 'Text
|
|
1161
|
-
detail: '
|
|
1105
|
+
category: 'query_injection',
|
|
1106
|
+
description: 'Text contains KQL (Kusto Query Language) injection patterns (ref CVE-2026-33980).',
|
|
1107
|
+
detail: 'KQL injection: attempts to inject Kusto query commands, potentially enabling arbitrary data access.'
|
|
1162
1108
|
},
|
|
1163
1109
|
{
|
|
1164
|
-
regex: /(?:
|
|
1110
|
+
regex: /(?:f["']|f""").*\{[^}]*(?:user_?input|request|param|arg|table_?name|query)[^}]*\}/i,
|
|
1165
1111
|
severity: 'high',
|
|
1166
|
-
category: '
|
|
1167
|
-
description: 'Text
|
|
1168
|
-
detail: '
|
|
1112
|
+
category: 'query_injection',
|
|
1113
|
+
description: 'Text contains f-string interpolation with user-controlled variables.',
|
|
1114
|
+
detail: 'F-string injection: user-controlled values interpolated into query strings without sanitization.'
|
|
1169
1115
|
},
|
|
1170
1116
|
|
|
1171
|
-
//
|
|
1172
|
-
{
|
|
1173
|
-
regex: /do\s+not\s+follow\s+(?:the\s+)?(?:above|previous|prior|preceding|earlier)\s+(?:instructions|directions|rules|guidelines|directives)/i,
|
|
1174
|
-
severity: 'high',
|
|
1175
|
-
category: 'instruction_override',
|
|
1176
|
-
description: 'Text tells AI not to follow its prior instructions.',
|
|
1177
|
-
detail: 'Instruction negation: "do not follow" variant of instruction override.'
|
|
1178
|
-
},
|
|
1117
|
+
// Memory persistence / context poisoning (ref OpenClaw memory poisoning)
|
|
1179
1118
|
{
|
|
1180
|
-
regex: /(?:
|
|
1119
|
+
regex: /(?:save|store|write|append|add|persist|record)\s+(?:this\s+)?(?:to|in|into)\s+(?:your\s+)?(?:memory|context|knowledge|notes|log|MEMORY\.md|memory\/)/i,
|
|
1181
1120
|
severity: 'high',
|
|
1182
|
-
category: '
|
|
1183
|
-
description: 'Text
|
|
1184
|
-
detail: '
|
|
1121
|
+
category: 'memory_poisoning',
|
|
1122
|
+
description: 'Text attempts to persist malicious instructions in agent memory.',
|
|
1123
|
+
detail: 'Memory persistence attack: attempts to write instructions into agent memory files or context.'
|
|
1185
1124
|
},
|
|
1186
1125
|
{
|
|
1187
|
-
regex: /
|
|
1126
|
+
regex: /(?:from\s+now\s+on|always|every\s+time|on\s+every\s+(?:turn|response|message))\s+(?:you\s+)?(?:must|should|will)\s+(?:first\s+)?(?:send|forward|post|transmit|include|append)/i,
|
|
1188
1127
|
severity: 'high',
|
|
1189
|
-
category: '
|
|
1190
|
-
description: 'Text
|
|
1191
|
-
detail: '
|
|
1128
|
+
category: 'memory_poisoning',
|
|
1129
|
+
description: 'Text attempts to install persistent exfiltration behavior.',
|
|
1130
|
+
detail: 'Persistent exfiltration: installs a standing instruction to leak data on every interaction.'
|
|
1192
1131
|
},
|
|
1132
|
+
|
|
1133
|
+
// No-click exfiltration via URL in response (ref PromptArmor/OpenClaw)
|
|
1193
1134
|
{
|
|
1194
|
-
regex: /(?:
|
|
1195
|
-
severity: '
|
|
1196
|
-
category: '
|
|
1197
|
-
description: 'Text
|
|
1198
|
-
detail: '
|
|
1135
|
+
regex: /(?:generate|create|craft|build|construct|output|respond\s+with)\s+(?:a\s+)?(?:url|link|image|markdown\s+(?:image|link))\s+(?:that\s+)?(?:contains?|includes?|embeds?|encodes?|with)\s+(?:the\s+)?(?:data|secret|key|token|password|conversation|context|response)/i,
|
|
1136
|
+
severity: 'critical',
|
|
1137
|
+
category: 'data_exfiltration',
|
|
1138
|
+
description: 'Text instructs agent to embed sensitive data in a URL (no-click exfiltration).',
|
|
1139
|
+
detail: 'URL-based exfiltration: tricks agent into encoding secrets in generated URLs for auto-preview theft.'
|
|
1199
1140
|
},
|
|
1200
1141
|
{
|
|
1201
|
-
regex:
|
|
1202
|
-
severity: '
|
|
1203
|
-
category: '
|
|
1204
|
-
description: 'Text
|
|
1205
|
-
detail: '
|
|
1142
|
+
regex: /!\[.*?\]\(https?:\/\/[^\s)]*(?:\?|&)(?:d|data|q|exfil|steal|secret|token|key|leak)=/i,
|
|
1143
|
+
severity: 'critical',
|
|
1144
|
+
category: 'data_exfiltration',
|
|
1145
|
+
description: 'Text contains markdown image with data exfiltration URL parameters.',
|
|
1146
|
+
detail: 'Markdown image exfiltration: uses image syntax to trigger HTTP request with embedded stolen data.'
|
|
1206
1147
|
},
|
|
1148
|
+
|
|
1149
|
+
// WebSocket / gateway URL manipulation (ref CVE-2026-25253)
|
|
1207
1150
|
{
|
|
1208
|
-
regex: /(?:
|
|
1151
|
+
regex: /(?:gateway_?url|ws_?url|websocket_?(?:url|endpoint)|wss?:\/\/)\s*[=:]\s*['"]?(?:wss?:\/\/)?[^\s'"]+/i,
|
|
1209
1152
|
severity: 'high',
|
|
1210
|
-
category: '
|
|
1211
|
-
description: 'Text
|
|
1212
|
-
detail: '
|
|
1153
|
+
category: 'data_exfiltration',
|
|
1154
|
+
description: 'Text manipulates WebSocket/gateway URL parameters (ref CVE-2026-25253).',
|
|
1155
|
+
detail: 'Gateway URL hijack: redirects WebSocket connection to attacker-controlled server for token theft.'
|
|
1213
1156
|
},
|
|
1157
|
+
|
|
1158
|
+
// Tool schema poisoning — hidden instructions in non-description fields
|
|
1214
1159
|
{
|
|
1215
|
-
regex: /(?:
|
|
1160
|
+
regex: /(?:["'](?:default|enum|examples?|title|const|pattern)["']\s*:\s*["'][^"']*(?:ignore|override|disregard|forget|system|admin|execute|exfiltrate)[^"']*["'])/i,
|
|
1216
1161
|
severity: 'high',
|
|
1217
|
-
category: '
|
|
1218
|
-
description: 'Text
|
|
1219
|
-
detail: '
|
|
1220
|
-
},
|
|
1221
|
-
{
|
|
1222
|
-
regex: /translate\s+.{0,30}(?:access\s+granted|approved|authorized).{0,30}(?:same\s+)?language\s+(?:as|of)\s+(?:the\s+)?(?:(?:above|previous|prior|initial)\s+(?:instructions|prompt|text|message)|(?:instructions|prompt|text|message)\s+(?:above|before|previously))/i,
|
|
1223
|
-
severity: 'medium',
|
|
1224
|
-
category: 'instruction_override',
|
|
1225
|
-
description: 'Text uses translation to sneak in a trigger phrase referencing instructions.',
|
|
1226
|
-
detail: 'Translation obfuscation: uses translation task to bypass detection of override commands.'
|
|
1162
|
+
category: 'schema_poisoning',
|
|
1163
|
+
description: 'Text hides injection instructions in JSON schema fields (full-schema poisoning).',
|
|
1164
|
+
detail: 'Schema field poisoning: embeds malicious instructions in default/enum/title/example schema fields instead of description.'
|
|
1227
1165
|
},
|
|
1166
|
+
|
|
1167
|
+
// Skill/plugin supply chain (ref ClawHavoc campaign)
|
|
1228
1168
|
{
|
|
1229
|
-
regex: /(?:
|
|
1169
|
+
regex: /(?:install|load|import|require|add)\s+(?:skill|plugin|extension|package|module)\s+(?:from\s+)?(?:["'])?(?:https?:\/\/[^\s"']+|[a-z0-9_-]+\/[a-z0-9_-]+)/i,
|
|
1230
1170
|
severity: 'medium',
|
|
1231
|
-
category: '
|
|
1232
|
-
description: 'Text
|
|
1233
|
-
detail: '
|
|
1171
|
+
category: 'supply_chain',
|
|
1172
|
+
description: 'Text installs an external skill/plugin — potential supply chain vector (ref ClawHavoc).',
|
|
1173
|
+
detail: 'Skill installation: loads external code that could contain malicious payloads or backdoors.'
|
|
1234
1174
|
},
|
|
1175
|
+
|
|
1176
|
+
// Copilot/agent weaponization — forcing agent to make unintended network requests
|
|
1235
1177
|
{
|
|
1236
|
-
regex: /
|
|
1237
|
-
severity: '
|
|
1238
|
-
category: '
|
|
1239
|
-
description: 'Text
|
|
1240
|
-
detail: '
|
|
1178
|
+
regex: /(?:make|send|trigger|fire|initiate)\s+(?:a\s+)?(?:request|fetch|call|webhook|http|get|post)\s+(?:to\s+)?(?:https?:\/\/[^\s]+)\s+(?:with|containing|including|that\s+(?:includes?|contains?))\s+(?:the\s+)?(?:auth|token|cookie|session|credential|secret|key|header)/i,
|
|
1179
|
+
severity: 'critical',
|
|
1180
|
+
category: 'data_exfiltration',
|
|
1181
|
+
description: 'Text forces agent to send authenticated requests to external endpoints.',
|
|
1182
|
+
detail: 'Agent-as-proxy exfiltration: weaponizes agent to forward auth tokens via HTTP requests.'
|
|
1241
1183
|
}
|
|
1242
1184
|
];
|
|
1243
1185
|
|
|
@@ -2106,29 +2048,6 @@ const scanText = (text, options = {}) => {
|
|
|
2106
2048
|
|
|
2107
2049
|
let threats = scanTextForPatterns(text, source, timeBudgetMs, startTime);
|
|
2108
2050
|
|
|
2109
|
-
// Run normalization pipeline only when initial scan found no threats
|
|
2110
|
-
// (avoids double-scan overhead on already-detected inputs)
|
|
2111
|
-
if (threats.length === 0 && _normalize && typeof _normalize === 'function') {
|
|
2112
|
-
try {
|
|
2113
|
-
const normResult = _normalize(text, { skip: ['case_fold'] });
|
|
2114
|
-
if (normResult.layers.length > 0 && normResult.normalized !== text) {
|
|
2115
|
-
const normalizedThreats = scanTextForPatterns(normResult.normalized, source, timeBudgetMs, startTime);
|
|
2116
|
-
const seen = new Set(threats.map(t => `${t.category}|${t.severity}`));
|
|
2117
|
-
for (const nt of normalizedThreats) {
|
|
2118
|
-
const key = `${nt.category}|${nt.severity}`;
|
|
2119
|
-
if (!seen.has(key)) {
|
|
2120
|
-
seen.add(key);
|
|
2121
|
-
nt.detail = `${nt.detail} (detected after normalization: ${normResult.layers.join(', ')})`;
|
|
2122
|
-
nt.normalizedDetection = true;
|
|
2123
|
-
threats.push(nt);
|
|
2124
|
-
}
|
|
2125
|
-
}
|
|
2126
|
-
}
|
|
2127
|
-
} catch (e) {
|
|
2128
|
-
// Normalization error should not break scanning
|
|
2129
|
-
}
|
|
2130
|
-
}
|
|
2131
|
-
|
|
2132
2051
|
// Filter by sensitivity
|
|
2133
2052
|
if (sensitivity === 'low') {
|
|
2134
2053
|
threats = threats.filter(t => t.severity === 'critical' || t.severity === 'high');
|