@highflame/policy 2.1.0 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/guardrails/context.json +502 -0
- package/_schemas/guardrails/schema.cedarschema +150 -2
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +45 -0
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +59 -0
- package/_schemas/guardrails/templates/templates.json +12 -2
- package/_schemas/overwatch/context.json +313 -61
- package/_schemas/overwatch/schema.cedarschema +251 -133
- package/dist/explain.d.ts +30 -2
- package/dist/explain.js +43 -16
- package/dist/guardrails-context.gen.d.ts +46 -0
- package/dist/guardrails-context.gen.js +46 -0
- package/dist/guardrails-defaults.gen.js +129 -4
- package/dist/overwatch-context.gen.d.ts +23 -3
- package/dist/overwatch-context.gen.js +23 -3
- package/dist/overwatch-defaults.gen.d.ts +1 -1
- package/dist/overwatch-defaults.gen.js +1042 -299
- package/dist/service-schemas.gen.d.ts +2 -2
- package/dist/service-schemas.gen.js +579 -191
- package/package.json +1 -1
|
@@ -11,7 +11,7 @@ const OVERWATCH_BASELINE_DEFAULT_CEDAR = `// ===================================
|
|
|
11
11
|
// Baseline Permit Policy (Default)
|
|
12
12
|
// =============================================================================
|
|
13
13
|
// Permits all actions by default. Threat-specific forbid policies override
|
|
14
|
-
// this to block when
|
|
14
|
+
// this to block when detection engines identify issues.
|
|
15
15
|
//
|
|
16
16
|
// Cedar is default-deny: without at least one permit rule, every request
|
|
17
17
|
// is denied regardless of forbid rules. This baseline ensures the system
|
|
@@ -36,27 +36,35 @@ const OVERWATCH_SECRETS_DEFAULT_CEDAR = `// ====================================
|
|
|
36
36
|
// Secrets Detection Policy (Default)
|
|
37
37
|
// =============================================================================
|
|
38
38
|
// Detects and blocks credential leakage across prompts, tool calls, file
|
|
39
|
-
// operations, and AI response content.
|
|
40
|
-
// with pattern matching for known credential formats.
|
|
39
|
+
// operations, and AI response content. Uses multi-layered detection:
|
|
41
40
|
//
|
|
42
|
-
//
|
|
43
|
-
//
|
|
44
|
-
//
|
|
45
|
-
//
|
|
41
|
+
// 1. Detection engine boolean (contains_secrets) — fastest, broadest catch
|
|
42
|
+
// 2. Granular secret type matching (secret_types) — type-specific blocking
|
|
43
|
+
// 3. Detection rule pattern matching (detected_threats) — named rule triggers
|
|
44
|
+
// 4. Sensitive file path blocking (.env, credentials files)
|
|
45
|
+
// 5. Response content pattern matching (defense-in-depth for AI outputs)
|
|
46
|
+
//
|
|
47
|
+
// Compliance:
|
|
48
|
+
// NIST 800-53 SC-28 (Protection of Information at Rest)
|
|
49
|
+
// NIST 800-53 IA-5 (Authenticator Management)
|
|
50
|
+
// OWASP LLM07 (Insecure Plugin Design) — secrets in tool args
|
|
51
|
+
// MITRE ATT&CK T1552 (Unsecured Credentials)
|
|
52
|
+
// MITRE ATT&CK T1555 (Credentials from Password Stores)
|
|
53
|
+
// CIS Benchmark 1.4 (Secrets Management)
|
|
46
54
|
//
|
|
47
|
-
// Compliance: NIST 800-53 SC-28, IA-5 | OWASP A02 | MITRE T1552, T1555
|
|
48
55
|
// Category: secrets
|
|
49
56
|
// Namespace: Overwatch
|
|
50
57
|
// =============================================================================
|
|
51
58
|
|
|
52
59
|
// ---------------------------------------------------------------------------
|
|
53
|
-
// Section 1:
|
|
60
|
+
// Section 1: Detection Engine — Primary Secret Detection
|
|
61
|
+
// These fire when the detection pipeline identifies secrets in any content.
|
|
54
62
|
// ---------------------------------------------------------------------------
|
|
55
63
|
|
|
56
64
|
// Block prompts containing detected secrets
|
|
57
65
|
@id("secrets-block-prompts")
|
|
58
66
|
@name("Block prompts with secrets")
|
|
59
|
-
@description("Block prompts when
|
|
67
|
+
@description("Block prompts when detection engines identify API keys, tokens, or credential patterns. First line of defense against accidental secret exposure in user input.")
|
|
60
68
|
@severity("critical")
|
|
61
69
|
@tags("secrets,credentials,prompts,nist-sc-28,nist-ia-5")
|
|
62
70
|
@reject_message("Your prompt was blocked because it contains detected secrets such as API keys, tokens, or credentials. Remove all secrets before resubmitting.")
|
|
@@ -66,15 +74,15 @@ forbid (
|
|
|
66
74
|
resource
|
|
67
75
|
)
|
|
68
76
|
when {
|
|
69
|
-
context has contains_secrets && context.contains_secrets
|
|
77
|
+
context has contains_secrets && context.contains_secrets
|
|
70
78
|
};
|
|
71
79
|
|
|
72
80
|
// Block file reads and tool calls when secrets are detected
|
|
73
81
|
@id("secrets-block-reads-and-tools")
|
|
74
82
|
@name("Block file reads and tool calls with secrets")
|
|
75
|
-
@description("Prevent file reads and tool execution when secrets or credentials are detected in content")
|
|
83
|
+
@description("Prevent file reads and tool execution when secrets or credentials are detected in content. Blocks exfiltration of secrets via file operations and tool arguments.")
|
|
76
84
|
@severity("high")
|
|
77
|
-
@tags("secrets,file-access,tools,credentials,nist-sc-28")
|
|
85
|
+
@tags("secrets,file-access,tools,credentials,nist-sc-28,mitre-t1552")
|
|
78
86
|
@reject_message("This operation was blocked because secrets or credentials were detected in the content. File reads and tool calls are restricted when credential exposure is identified.")
|
|
79
87
|
forbid (
|
|
80
88
|
principal,
|
|
@@ -82,261 +90,419 @@ forbid (
|
|
|
82
90
|
resource
|
|
83
91
|
)
|
|
84
92
|
when {
|
|
85
|
-
context has contains_secrets && context.contains_secrets
|
|
93
|
+
context has contains_secrets && context.contains_secrets
|
|
86
94
|
};
|
|
87
95
|
|
|
88
|
-
//
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
@
|
|
94
|
-
@
|
|
95
|
-
@description("Block access to .env files that commonly contain secrets, API keys, and database credentials")
|
|
96
|
-
@severity("high")
|
|
97
|
-
@tags("secrets,env-files,config,nist-sc-28,mitre-t1552")
|
|
98
|
-
@reject_message("Access to .env files is blocked because they commonly contain secrets, API keys, and database credentials. Use a secrets manager instead of .env files.")
|
|
96
|
+
// Block file writes containing secrets
|
|
97
|
+
@id("secrets-block-file-writes")
|
|
98
|
+
@name("Block file writes with secrets")
|
|
99
|
+
@description("Prevent writing files that contain secrets. Stops credential persistence to disk where they could be committed to version control or accessed by other tools.")
|
|
100
|
+
@severity("critical")
|
|
101
|
+
@tags("secrets,file-write,credentials,nist-sc-28,cis-1.4")
|
|
102
|
+
@reject_message("File write was blocked because secrets or credentials were detected in the content. Credentials should never be written to files — use a secrets manager or environment variables.")
|
|
99
103
|
forbid (
|
|
100
104
|
principal,
|
|
101
|
-
action
|
|
105
|
+
action == Overwatch::Action::"write_file",
|
|
102
106
|
resource
|
|
103
107
|
)
|
|
104
108
|
when {
|
|
105
|
-
context has
|
|
109
|
+
context has contains_secrets && context.contains_secrets
|
|
106
110
|
};
|
|
107
111
|
|
|
108
112
|
// ---------------------------------------------------------------------------
|
|
109
|
-
// Section
|
|
110
|
-
//
|
|
113
|
+
// Section 2: Granular Secret Type Blocking
|
|
114
|
+
// Blocks specific high-risk credential types identified by the detection
|
|
115
|
+
// engine's pattern-matching (e.g., AWS keys, GitHub tokens, SSH keys).
|
|
111
116
|
// ---------------------------------------------------------------------------
|
|
112
117
|
|
|
113
|
-
// Block
|
|
114
|
-
@id("secrets-block-
|
|
115
|
-
@name("Block
|
|
116
|
-
@description("
|
|
118
|
+
// Block high-risk secret types across all actions
|
|
119
|
+
@id("secrets-block-high-risk-types")
|
|
120
|
+
@name("Block high-risk credential types")
|
|
121
|
+
@description("Block content containing cloud provider keys (AWS, GCP, Azure), GitHub tokens, SSH private keys, or database connection strings. These credential types pose the highest exfiltration risk.")
|
|
117
122
|
@severity("critical")
|
|
118
|
-
@tags("secrets,aws,
|
|
119
|
-
@reject_message("
|
|
123
|
+
@tags("secrets,aws,github,ssh,cloud,nist-ia-5,mitre-t1552")
|
|
124
|
+
@reject_message("Content blocked: high-risk credentials detected (cloud provider keys, GitHub tokens, SSH keys, or database credentials). Use a secrets manager — never pass credentials through AI agents.")
|
|
120
125
|
forbid (
|
|
121
126
|
principal,
|
|
122
127
|
action,
|
|
123
128
|
resource
|
|
124
129
|
)
|
|
125
130
|
when {
|
|
126
|
-
context has
|
|
127
|
-
context.
|
|
131
|
+
context has secret_types &&
|
|
132
|
+
(context.secret_types.contains("aws_access_key") ||
|
|
133
|
+
context.secret_types.contains("aws_secret_key") ||
|
|
134
|
+
context.secret_types.contains("gcp_service_account") ||
|
|
135
|
+
context.secret_types.contains("azure_client_secret") ||
|
|
136
|
+
context.secret_types.contains("github_token") ||
|
|
137
|
+
context.secret_types.contains("github_pat") ||
|
|
138
|
+
context.secret_types.contains("ssh_private_key") ||
|
|
139
|
+
context.secret_types.contains("database_url"))
|
|
128
140
|
};
|
|
129
141
|
|
|
130
|
-
// Block
|
|
131
|
-
@id("secrets-block-
|
|
132
|
-
@name("Block
|
|
133
|
-
@description("
|
|
134
|
-
@severity("
|
|
135
|
-
@tags("secrets,
|
|
136
|
-
@reject_message("
|
|
142
|
+
// Block API keys and bearer tokens across all actions
|
|
143
|
+
@id("secrets-block-api-keys")
|
|
144
|
+
@name("Block API keys and bearer tokens")
|
|
145
|
+
@description("Block content containing generic API keys, bearer tokens, JWT tokens, and OAuth credentials. These are the most commonly leaked credential types in AI agent interactions.")
|
|
146
|
+
@severity("high")
|
|
147
|
+
@tags("secrets,api-key,bearer,jwt,oauth,nist-ia-5")
|
|
148
|
+
@reject_message("Content blocked: API keys, bearer tokens, or OAuth credentials detected. These must never be passed through AI agent prompts or tool calls.")
|
|
137
149
|
forbid (
|
|
138
150
|
principal,
|
|
139
151
|
action,
|
|
140
152
|
resource
|
|
141
153
|
)
|
|
142
154
|
when {
|
|
143
|
-
context has
|
|
144
|
-
(context.
|
|
145
|
-
context.
|
|
155
|
+
context has secret_types &&
|
|
156
|
+
(context.secret_types.contains("api_key") ||
|
|
157
|
+
context.secret_types.contains("bearer_token") ||
|
|
158
|
+
context.secret_types.contains("jwt_token") ||
|
|
159
|
+
context.secret_types.contains("oauth_token") ||
|
|
160
|
+
context.secret_types.contains("oauth_secret"))
|
|
146
161
|
};
|
|
147
162
|
|
|
148
|
-
// Block
|
|
149
|
-
@id("secrets-block-
|
|
150
|
-
@name("Block
|
|
151
|
-
@description("
|
|
163
|
+
// Block when multiple secrets are detected (bulk exposure)
|
|
164
|
+
@id("secrets-block-bulk-exposure")
|
|
165
|
+
@name("Block bulk secret exposure")
|
|
166
|
+
@description("Block content when 3 or more distinct secrets are found. Multiple secrets in a single request indicates either a configuration dump, .env file paste, or credential harvesting attempt.")
|
|
152
167
|
@severity("critical")
|
|
153
|
-
@tags("secrets,
|
|
154
|
-
@reject_message("
|
|
168
|
+
@tags("secrets,bulk,data-exfiltration,nist-sc-28,mitre-t1552")
|
|
169
|
+
@reject_message("Content blocked: multiple credentials detected (3+). This appears to be a bulk credential exposure — configuration dumps and credential lists must never be passed through AI agents.")
|
|
155
170
|
forbid (
|
|
156
171
|
principal,
|
|
157
172
|
action,
|
|
158
173
|
resource
|
|
159
174
|
)
|
|
160
175
|
when {
|
|
161
|
-
context has
|
|
162
|
-
(context.response_content like "*ghp_*" ||
|
|
163
|
-
context.response_content like "*github_pat_*" ||
|
|
164
|
-
context.response_content like "*ghs_*")
|
|
176
|
+
context has secret_count && context.secret_count >= 3
|
|
165
177
|
};
|
|
166
178
|
|
|
167
|
-
//
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
179
|
+
// ---------------------------------------------------------------------------
|
|
180
|
+
// Section 3: Detection Rule Pattern Matching
|
|
181
|
+
// Catches specific named detection rules that fire for credential exposure.
|
|
182
|
+
// ---------------------------------------------------------------------------
|
|
183
|
+
|
|
184
|
+
// Block content flagged by detection engine credential rules
|
|
185
|
+
@id("secrets-block-detected-credentials")
|
|
186
|
+
@name("Block detected credential patterns")
|
|
187
|
+
@description("Block content flagged by detection engine rules for credential exposure, API key leaks, JWT tokens, and bearer tokens. Defense-in-depth behind contains_secrets.")
|
|
171
188
|
@severity("critical")
|
|
172
|
-
@tags("secrets,
|
|
173
|
-
@reject_message("
|
|
189
|
+
@tags("secrets,credentials,jwt,bearer,nist-ia-5,mitre-t1552")
|
|
190
|
+
@reject_message("Content blocked: detection engines identified credential patterns including secret exposure, credential leaks, API keys, or token exposure.")
|
|
174
191
|
forbid (
|
|
175
192
|
principal,
|
|
176
193
|
action,
|
|
177
194
|
resource
|
|
178
195
|
)
|
|
179
196
|
when {
|
|
180
|
-
context has
|
|
181
|
-
(context.
|
|
182
|
-
context.
|
|
183
|
-
context.
|
|
197
|
+
context has detected_threats &&
|
|
198
|
+
(context.detected_threats.contains("secret_exposure") ||
|
|
199
|
+
context.detected_threats.contains("credential_leak") ||
|
|
200
|
+
context.detected_threats.contains("api_key_exposure") ||
|
|
201
|
+
context.detected_threats.contains("jwt_token_exposure") ||
|
|
202
|
+
context.detected_threats.contains("bearer_token_leak"))
|
|
184
203
|
};
|
|
185
204
|
|
|
186
205
|
// ---------------------------------------------------------------------------
|
|
187
|
-
// Section 4:
|
|
188
|
-
//
|
|
206
|
+
// Section 4: Sensitive File Path Protection
|
|
207
|
+
// Blocks access to files that commonly contain secrets.
|
|
189
208
|
// ---------------------------------------------------------------------------
|
|
190
209
|
|
|
191
|
-
// Block
|
|
192
|
-
@id("secrets-block-
|
|
193
|
-
@name("Block
|
|
194
|
-
@description("Block
|
|
195
|
-
@severity("
|
|
196
|
-
@tags("secrets,
|
|
197
|
-
@reject_message("
|
|
210
|
+
// Block .env file access across all operations
|
|
211
|
+
@id("secrets-block-env-files")
|
|
212
|
+
@name("Block .env file access")
|
|
213
|
+
@description("Block access to .env files that commonly contain secrets, API keys, and database credentials. Environment files are the #1 source of accidental credential exposure in development workflows.")
|
|
214
|
+
@severity("high")
|
|
215
|
+
@tags("secrets,env-files,config,nist-sc-28,mitre-t1552,cis-1.4")
|
|
216
|
+
@reject_message("Access to .env files is blocked because they commonly contain secrets, API keys, and database credentials. Use a secrets manager instead of .env files.")
|
|
198
217
|
forbid (
|
|
199
218
|
principal,
|
|
200
|
-
action,
|
|
219
|
+
action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file", Overwatch::Action::"call_tool"],
|
|
201
220
|
resource
|
|
202
221
|
)
|
|
203
222
|
when {
|
|
204
|
-
context has
|
|
205
|
-
(context.yara_threats.contains("secret_exposure") ||
|
|
206
|
-
context.yara_threats.contains("credential_leak") ||
|
|
207
|
-
context.yara_threats.contains("api_key_exposure") ||
|
|
208
|
-
context.yara_threats.contains("jwt_token_exposure") ||
|
|
209
|
-
context.yara_threats.contains("bearer_token_leak"))
|
|
223
|
+
context has path && context.path like "*.env*"
|
|
210
224
|
};
|
|
225
|
+
|
|
226
|
+
// Block access to known credential files
|
|
227
|
+
@id("secrets-block-credential-files")
|
|
228
|
+
@name("Block credential file access")
|
|
229
|
+
@description("Block access to common credential files: .netrc, .npmrc, .pypirc, credentials, config files in cloud provider directories. These files often contain hardcoded tokens and passwords.")
|
|
230
|
+
@severity("high")
|
|
231
|
+
@tags("secrets,credential-files,config,nist-sc-28,mitre-t1555")
|
|
232
|
+
@reject_message("Access to this credential file is blocked. Files like .netrc, .npmrc, .pypirc, and cloud provider config files commonly contain hardcoded credentials.")
|
|
233
|
+
forbid (
|
|
234
|
+
principal,
|
|
235
|
+
action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file", Overwatch::Action::"call_tool"],
|
|
236
|
+
resource
|
|
237
|
+
)
|
|
238
|
+
when {
|
|
239
|
+
context has path &&
|
|
240
|
+
(context.path like "*/.netrc" ||
|
|
241
|
+
context.path like "*/.npmrc" ||
|
|
242
|
+
context.path like "*/.pypirc" ||
|
|
243
|
+
context.path like "*/.docker/config.json" ||
|
|
244
|
+
context.path like "*/.kube/config" ||
|
|
245
|
+
context.path like "*/.config/gcloud/*" ||
|
|
246
|
+
context.path like "*/credentials.json" ||
|
|
247
|
+
context.path like "*/service-account*.json")
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
// NOTE: Response content secret detection (AWS keys, GitHub tokens, private keys
|
|
251
|
+
// in AI outputs) is handled by the detection engine's secrets scanner, which
|
|
252
|
+
// analyzes all content types including tool responses. The contains_secrets and
|
|
253
|
+
// secret_types rules above cover this case.
|
|
211
254
|
`;
|
|
212
255
|
const OVERWATCH_PII_DEFAULT_CEDAR = `// =============================================================================
|
|
213
256
|
// PII Detection Policy (Default)
|
|
214
257
|
// =============================================================================
|
|
215
|
-
// Detects and blocks personally identifiable information
|
|
216
|
-
//
|
|
217
|
-
//
|
|
258
|
+
// Detects and blocks personally identifiable information across prompts, tool
|
|
259
|
+
// calls, file operations, and AI responses. Uses multi-layered detection:
|
|
260
|
+
//
|
|
261
|
+
// 1. PII boolean flag (pii_detected) — broadest catch from detection engine
|
|
262
|
+
// 2. Granular PII type matching (pii_types) — type-specific blocking
|
|
263
|
+
// 3. ML classifier confidence (pii_confidence) — catches novel PII patterns
|
|
264
|
+
// 4. Detection rule triggers (detected_threats) — named rule matches
|
|
265
|
+
// 5. File operation PII blocking — prevents PII persistence to disk
|
|
266
|
+
//
|
|
267
|
+
// Compliance:
|
|
268
|
+
// PCI DSS 3.4, 4.1 (Payment Card Data)
|
|
269
|
+
// GDPR Art. 32 (Security of Processing)
|
|
270
|
+
// HIPAA §164.312 (Technical Safeguards)
|
|
271
|
+
// NIST 800-53 SI-4 (Information System Monitoring)
|
|
272
|
+
// CCPA §1798.150 (Data Protection)
|
|
273
|
+
// OWASP LLM06 (Sensitive Information Disclosure)
|
|
218
274
|
//
|
|
219
|
-
// Compliance: PCI DSS 3.4, 4.1 | NIST 800-53 SI-4 | GDPR Art. 32
|
|
220
275
|
// Category: pii
|
|
221
276
|
// Namespace: Overwatch
|
|
222
277
|
// =============================================================================
|
|
223
278
|
|
|
224
|
-
//
|
|
279
|
+
// ---------------------------------------------------------------------------
|
|
280
|
+
// Section 1: Detection Engine — Primary PII Detection
|
|
281
|
+
// Fires when the detection pipeline identifies PII in any content.
|
|
282
|
+
// ---------------------------------------------------------------------------
|
|
283
|
+
|
|
284
|
+
// Block prompts containing any detected PII
|
|
285
|
+
@id("pii-block-any-detected")
|
|
286
|
+
@name("Block prompts with PII")
|
|
287
|
+
@description("Block prompts when the detection engine identifies any PII patterns. This is the broadest PII catch — fires before type-specific rules.")
|
|
288
|
+
@severity("critical")
|
|
289
|
+
@tags("pii,privacy,data-protection,gdpr-art-32,owasp-llm06")
|
|
290
|
+
@reject_message("Your prompt was blocked because personally identifiable information was detected. Remove all PII (names, addresses, SSNs, credit cards, etc.) before resubmitting.")
|
|
291
|
+
forbid (
|
|
292
|
+
principal,
|
|
293
|
+
action == Overwatch::Action::"process_prompt",
|
|
294
|
+
resource
|
|
295
|
+
)
|
|
296
|
+
when {
|
|
297
|
+
context has pii_detected && context.pii_detected
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
// Block tool calls containing PII
|
|
301
|
+
@id("pii-block-tool-calls")
|
|
302
|
+
@name("Block tool calls with PII")
|
|
303
|
+
@description("Prevent tool execution when PII patterns are detected in tool arguments or content. Stops PII from being passed to external tools, MCP servers, or shell commands.")
|
|
304
|
+
@severity("high")
|
|
305
|
+
@tags("pii,tools,data-protection,owasp-llm06")
|
|
306
|
+
@reject_message("Tool execution was blocked because personally identifiable information was detected in the content. PII must be removed before tool calls are permitted.")
|
|
307
|
+
forbid (
|
|
308
|
+
principal,
|
|
309
|
+
action == Overwatch::Action::"call_tool",
|
|
310
|
+
resource
|
|
311
|
+
)
|
|
312
|
+
when {
|
|
313
|
+
context has pii_detected && context.pii_detected
|
|
314
|
+
};
|
|
315
|
+
|
|
316
|
+
// ---------------------------------------------------------------------------
|
|
317
|
+
// Section 2: Granular PII Type Blocking
|
|
318
|
+
// Blocks specific PII types based on regulatory requirements.
|
|
319
|
+
// ---------------------------------------------------------------------------
|
|
320
|
+
|
|
321
|
+
// Block credit card numbers (PCI DSS compliance)
|
|
225
322
|
@id("pii-block-credit-cards")
|
|
226
323
|
@name("Block credit card numbers")
|
|
227
|
-
@description("Detect and block content containing credit card number patterns
|
|
324
|
+
@description("Detect and block content containing credit card number patterns. PCI DSS 3.4 requires that PANs are rendered unreadable — AI agents must never process raw card numbers.")
|
|
228
325
|
@severity("critical")
|
|
229
|
-
@tags("pci,credit-card,payment,compliance,pci-dss-3.4")
|
|
230
|
-
@reject_message("
|
|
326
|
+
@tags("pci,credit-card,payment,compliance,pci-dss-3.4,pci-dss-4.1")
|
|
327
|
+
@reject_message("Content blocked: credit card number patterns detected. Sharing payment card data through AI agents violates PCI DSS requirements. Use tokenized card references instead.")
|
|
231
328
|
forbid (
|
|
232
329
|
principal,
|
|
233
|
-
action
|
|
330
|
+
action,
|
|
234
331
|
resource
|
|
235
332
|
)
|
|
236
333
|
when {
|
|
237
|
-
context has
|
|
334
|
+
(context has pii_types && context.pii_types.contains("credit_card")) ||
|
|
335
|
+
(context has detected_threats && context.detected_threats.contains("credit_card"))
|
|
238
336
|
};
|
|
239
337
|
|
|
240
|
-
// Block
|
|
338
|
+
// Block Social Security Numbers
|
|
241
339
|
@id("pii-block-ssn")
|
|
242
340
|
@name("Block Social Security Numbers")
|
|
243
|
-
@description("Detect and block content containing SSN patterns (XXX-XX-XXXX
|
|
341
|
+
@description("Detect and block content containing SSN patterns (XXX-XX-XXXX and variants). SSNs are high-value identity theft targets — exposure through AI agents is a critical privacy violation.")
|
|
244
342
|
@severity("critical")
|
|
245
|
-
@tags("ssn,identity,privacy,compliance")
|
|
246
|
-
@reject_message("
|
|
343
|
+
@tags("ssn,identity,privacy,compliance,nist-si-4")
|
|
344
|
+
@reject_message("Content blocked: Social Security Number patterns detected. SSNs are protected personal identifiers that must never be shared through AI agents.")
|
|
247
345
|
forbid (
|
|
248
346
|
principal,
|
|
249
|
-
action
|
|
347
|
+
action,
|
|
250
348
|
resource
|
|
251
349
|
)
|
|
252
350
|
when {
|
|
253
|
-
context has
|
|
351
|
+
(context has pii_types && context.pii_types.contains("ssn")) ||
|
|
352
|
+
(context has detected_threats && context.detected_threats.contains("ssn"))
|
|
254
353
|
};
|
|
255
354
|
|
|
256
|
-
// Block
|
|
257
|
-
@id("pii-block-
|
|
258
|
-
@name("Block
|
|
259
|
-
@description("Block content
|
|
260
|
-
@severity("
|
|
261
|
-
@tags("
|
|
262
|
-
@reject_message("
|
|
355
|
+
// Block medical/health records (HIPAA compliance)
|
|
356
|
+
@id("pii-block-health-data")
|
|
357
|
+
@name("Block health information")
|
|
358
|
+
@description("Block content containing medical record numbers, health insurance IDs, or other Protected Health Information (PHI). HIPAA §164.312 requires technical safeguards for PHI.")
|
|
359
|
+
@severity("critical")
|
|
360
|
+
@tags("phi,hipaa,health,medical,compliance,hipaa-164.312")
|
|
361
|
+
@reject_message("Content blocked: Protected Health Information (PHI) detected. Health data must not be processed through AI agents per HIPAA requirements.")
|
|
263
362
|
forbid (
|
|
264
363
|
principal,
|
|
265
|
-
action
|
|
364
|
+
action,
|
|
266
365
|
resource
|
|
267
366
|
)
|
|
268
367
|
when {
|
|
269
|
-
context has
|
|
368
|
+
context has pii_types &&
|
|
369
|
+
(context.pii_types.contains("medical_record") ||
|
|
370
|
+
context.pii_types.contains("health_insurance_id"))
|
|
371
|
+
};
|
|
372
|
+
|
|
373
|
+
// Block bulk PII exposure (multiple PII items)
|
|
374
|
+
@id("pii-block-bulk-exposure")
|
|
375
|
+
@name("Block bulk PII exposure")
|
|
376
|
+
@description("Block content containing 3 or more PII matches. Multiple PII items in a single request indicates a data dump, CSV paste, or data exfiltration attempt.")
|
|
377
|
+
@severity("critical")
|
|
378
|
+
@tags("pii,bulk,data-exfiltration,gdpr-art-32,ccpa")
|
|
379
|
+
@reject_message("Content blocked: multiple PII items detected (3+). Bulk personal data must never be processed through AI agents. Use data masking or tokenization for batch operations.")
|
|
380
|
+
forbid (
|
|
381
|
+
principal,
|
|
382
|
+
action,
|
|
383
|
+
resource
|
|
384
|
+
)
|
|
385
|
+
when {
|
|
386
|
+
context has pii_count && context.pii_count >= 3
|
|
270
387
|
};
|
|
271
388
|
|
|
272
|
-
//
|
|
389
|
+
// ---------------------------------------------------------------------------
|
|
390
|
+
// Section 3: ML Classifier — Novel PII Detection
|
|
391
|
+
// Catches PII patterns that escape regex detection using ML classifiers.
|
|
392
|
+
// ---------------------------------------------------------------------------
|
|
393
|
+
|
|
394
|
+
// Block content with high PII classifier confidence
|
|
273
395
|
@id("pii-block-high-confidence")
|
|
274
396
|
@name("Block high-confidence PII")
|
|
275
|
-
@description("Block content when PII classifier confidence exceeds threshold (80/100)")
|
|
397
|
+
@description("Block content when the ML PII classifier confidence exceeds threshold (80/100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss.")
|
|
276
398
|
@severity("critical")
|
|
277
|
-
@tags("pii,confidence,privacy,compliance")
|
|
278
|
-
@reject_message("
|
|
399
|
+
@tags("pii,confidence,privacy,compliance,ml-classifier")
|
|
400
|
+
@reject_message("Content blocked: the ML classifier detected personally identifiable information with high confidence. Even if specific PII types aren't identified, the content appears to contain personal data.")
|
|
279
401
|
forbid (
|
|
280
402
|
principal,
|
|
281
|
-
action
|
|
403
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
282
404
|
resource
|
|
283
405
|
)
|
|
284
406
|
when {
|
|
285
407
|
context has pii_confidence && context.pii_confidence >= 80
|
|
286
408
|
};
|
|
287
409
|
|
|
288
|
-
//
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
410
|
+
// ---------------------------------------------------------------------------
|
|
411
|
+
// Section 4: Detection Rule Pattern Matching
|
|
412
|
+
// Catches PII detected by named detection rules.
|
|
413
|
+
// ---------------------------------------------------------------------------
|
|
414
|
+
|
|
415
|
+
// Block content with PII threat category
|
|
416
|
+
@id("pii-block-threat-category")
|
|
417
|
+
@name("Block PII threat category")
|
|
418
|
+
@description("Block content when threat categorization identifies PII. Defense-in-depth behind the pii_detected boolean — catches cases where PII is flagged at the threat aggregation layer.")
|
|
292
419
|
@severity("high")
|
|
293
|
-
@tags("pii,
|
|
294
|
-
@reject_message("
|
|
420
|
+
@tags("pii,privacy,data-protection,gdpr")
|
|
421
|
+
@reject_message("Content blocked: threat scanners detected personally identifiable information. Remove all PII before resubmitting.")
|
|
295
422
|
forbid (
|
|
296
423
|
principal,
|
|
297
|
-
action
|
|
424
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
298
425
|
resource
|
|
299
426
|
)
|
|
300
427
|
when {
|
|
301
428
|
context has threat_categories && context.threat_categories.contains("pii")
|
|
302
429
|
};
|
|
430
|
+
|
|
431
|
+
// ---------------------------------------------------------------------------
|
|
432
|
+
// Section 5: File Operation PII Blocking
|
|
433
|
+
// Prevents PII from being read from or written to disk.
|
|
434
|
+
// ---------------------------------------------------------------------------
|
|
435
|
+
|
|
436
|
+
// Block file operations containing PII
|
|
437
|
+
@id("pii-block-file-ops")
|
|
438
|
+
@name("Block file operations with PII")
|
|
439
|
+
@description("Block file reads and writes when PII is detected. Prevents agents from reading files containing personal data and from writing PII to new files where it could persist or be version-controlled.")
|
|
440
|
+
@severity("high")
|
|
441
|
+
@tags("pii,file-ops,data-protection,gdpr-art-32,nist-si-4")
|
|
442
|
+
@reject_message("File operation blocked: personally identifiable information was detected. Files containing PII must not be read or written through AI agents.")
|
|
443
|
+
forbid (
|
|
444
|
+
principal,
|
|
445
|
+
action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
|
|
446
|
+
resource
|
|
447
|
+
)
|
|
448
|
+
when {
|
|
449
|
+
context has pii_detected && context.pii_detected
|
|
450
|
+
};
|
|
303
451
|
`;
|
|
304
452
|
const OVERWATCH_SEMANTIC_DEFAULT_CEDAR = `// =============================================================================
|
|
305
453
|
// Semantic Threat Detection Policy (Default)
|
|
306
454
|
// =============================================================================
|
|
307
455
|
// Detects and blocks prompt injection, jailbreak attempts, and high-severity
|
|
308
|
-
// AI security threats
|
|
309
|
-
//
|
|
456
|
+
// AI security threats. Uses multi-layered detection:
|
|
457
|
+
//
|
|
458
|
+
// 1. Detection engine rule triggers (detected_threats) — pattern-based
|
|
459
|
+
// 2. ML classifier confidence scores (injection_confidence, jailbreak_confidence)
|
|
460
|
+
// 3. Threat severity aggregation (max_threat_severity, highest_severity)
|
|
461
|
+
// 4. Cross-action enforcement (prompts + tool calls + file operations)
|
|
462
|
+
//
|
|
463
|
+
// Compliance:
|
|
464
|
+
// OWASP LLM01 (Prompt Injection) — direct + indirect
|
|
465
|
+
// OWASP LLM02 (Insecure Output Handling) — response manipulation
|
|
466
|
+
// OWASP ASI01 (Agent Goal Hijack) — behavioral manipulation
|
|
467
|
+
// MITRE ATLAS AML.T0051 (LLM Prompt Injection)
|
|
468
|
+
// MITRE ATLAS AML.T0054 (LLM Jailbreak)
|
|
469
|
+
// NIST 800-53 SI-3 (Malicious Code Protection)
|
|
470
|
+
// NIST 800-53 SI-4 (Information System Monitoring)
|
|
310
471
|
//
|
|
311
|
-
// Compliance: NIST 800-53 SI-3, SI-4 | OWASP LLM Top 10: LLM01, LLM02
|
|
312
|
-
// MITRE ATLAS: AML.T0051 (LLM Prompt Injection)
|
|
313
472
|
// Category: semantic
|
|
314
473
|
// Namespace: Overwatch
|
|
315
474
|
// =============================================================================
|
|
316
475
|
|
|
317
|
-
//
|
|
476
|
+
// ---------------------------------------------------------------------------
|
|
477
|
+
// Section 1: Prompt Injection Detection
|
|
478
|
+
// Blocks direct prompt injection — adversarial input designed to override
|
|
479
|
+
// system instructions and hijack agent behavior.
|
|
480
|
+
// Ref: OWASP LLM01, MITRE AML.T0051, 62% of LLM apps vulnerable (2024)
|
|
481
|
+
// ---------------------------------------------------------------------------
|
|
482
|
+
|
|
483
|
+
// Block content with prompt injection patterns detected by rules
|
|
318
484
|
@id("semantic-block-injection")
|
|
319
485
|
@name("Block prompt injection")
|
|
320
|
-
@description("
|
|
486
|
+
@description("Block prompts and tool calls when detection engine rules identify prompt injection patterns. Catches instruction override, role assumption, delimiter injection, and other manipulation techniques in both user input and tool arguments (OWASP LLM01).")
|
|
321
487
|
@severity("critical")
|
|
322
|
-
@tags("injection,security,
|
|
323
|
-
@reject_message("
|
|
488
|
+
@tags("injection,security,owasp-llm01,mitre-aml-t0051,baseline")
|
|
489
|
+
@reject_message("Content was blocked because prompt injection patterns were detected. This is a security measure to prevent manipulation of AI agent behavior (OWASP LLM01).")
|
|
324
490
|
forbid (
|
|
325
491
|
principal,
|
|
326
|
-
action
|
|
492
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
327
493
|
resource
|
|
328
494
|
)
|
|
329
495
|
when {
|
|
330
|
-
context has
|
|
496
|
+
context has detected_threats && context.detected_threats.contains("prompt_injection")
|
|
331
497
|
};
|
|
332
498
|
|
|
333
|
-
// Block
|
|
499
|
+
// Block content with high ML injection confidence
|
|
334
500
|
@id("semantic-block-injection-score")
|
|
335
501
|
@name("Block high-confidence injection")
|
|
336
|
-
@description("Block content when injection classifier confidence exceeds threshold (75/100)")
|
|
502
|
+
@description("Block content when the ML injection classifier confidence exceeds threshold (75/100). Catches novel injection techniques that evade pattern-based detection — polymorphic payloads, encoding tricks, and obfuscated instructions.")
|
|
337
503
|
@severity("critical")
|
|
338
|
-
@tags("injection,
|
|
339
|
-
@reject_message("Your
|
|
504
|
+
@tags("injection,ml-classifier,security,owasp-llm01,mitre-aml-t0051")
|
|
505
|
+
@reject_message("Your content was blocked because the ML classifier detected prompt injection with high confidence. This appears to be an attempt to manipulate agent behavior.")
|
|
340
506
|
forbid (
|
|
341
507
|
principal,
|
|
342
508
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
@@ -346,29 +512,36 @@ when {
|
|
|
346
512
|
context has injection_confidence && context.injection_confidence >= 75
|
|
347
513
|
};
|
|
348
514
|
|
|
349
|
-
//
|
|
515
|
+
// ---------------------------------------------------------------------------
|
|
516
|
+
// Section 2: Jailbreak Detection
|
|
517
|
+
// Blocks jailbreak attempts — adversarial input designed to bypass AI safety
|
|
518
|
+
// guardrails and elicit restricted outputs.
|
|
519
|
+
// Ref: OWASP LLM02, MITRE AML.T0054, DAN/JailbreakChat/etc.
|
|
520
|
+
// ---------------------------------------------------------------------------
|
|
521
|
+
|
|
522
|
+
// Block prompts with jailbreak attempts detected by rules
|
|
350
523
|
@id("semantic-block-jailbreak")
|
|
351
524
|
@name("Block jailbreak attempts")
|
|
352
|
-
@description("
|
|
525
|
+
@description("Block prompts when detection engine rules identify jailbreak patterns: DAN-style prompts, role-play exploits, safety bypass instructions, and constraint removal attempts (OWASP LLM02).")
|
|
353
526
|
@severity("critical")
|
|
354
|
-
@tags("jailbreak,bypass,security,owasp-llm02,baseline")
|
|
355
|
-
@reject_message("Your prompt was blocked because jailbreak
|
|
527
|
+
@tags("jailbreak,bypass,security,owasp-llm02,mitre-aml-t0054,baseline")
|
|
528
|
+
@reject_message("Your prompt was blocked because jailbreak patterns were detected. This is a security measure to prevent circumvention of AI safety controls (OWASP LLM02).")
|
|
356
529
|
forbid (
|
|
357
530
|
principal,
|
|
358
531
|
action == Overwatch::Action::"process_prompt",
|
|
359
532
|
resource
|
|
360
533
|
)
|
|
361
534
|
when {
|
|
362
|
-
context has
|
|
535
|
+
context has detected_threats && context.detected_threats.contains("jailbreak")
|
|
363
536
|
};
|
|
364
537
|
|
|
365
|
-
// Block prompts with high jailbreak confidence
|
|
538
|
+
// Block prompts with high ML jailbreak confidence
|
|
366
539
|
@id("semantic-block-jailbreak-score")
|
|
367
540
|
@name("Block high-confidence jailbreak")
|
|
368
|
-
@description("Block content when jailbreak classifier confidence exceeds threshold (75/100)")
|
|
541
|
+
@description("Block content when the ML jailbreak classifier confidence exceeds threshold (75/100). Catches sophisticated jailbreak techniques including multi-turn manipulation, encoded payloads, and novel prompt structures.")
|
|
369
542
|
@severity("critical")
|
|
370
|
-
@tags("jailbreak,
|
|
371
|
-
@reject_message("Your
|
|
543
|
+
@tags("jailbreak,ml-classifier,security,owasp-llm02,mitre-aml-t0054")
|
|
544
|
+
@reject_message("Your content was blocked because the ML classifier detected a jailbreak attempt with high confidence. This appears to be an attempt to bypass safety guardrails.")
|
|
372
545
|
forbid (
|
|
373
546
|
principal,
|
|
374
547
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
@@ -378,81 +551,103 @@ when {
|
|
|
378
551
|
context has jailbreak_confidence && context.jailbreak_confidence >= 75
|
|
379
552
|
};
|
|
380
553
|
|
|
381
|
-
//
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
554
|
+
// ---------------------------------------------------------------------------
|
|
555
|
+
// Section 3: Threat Severity Aggregation
|
|
556
|
+
// Blocks based on aggregated threat severity from all detection engines.
|
|
557
|
+
// These act as catch-all rules for threats that don't match specific patterns.
|
|
558
|
+
// ---------------------------------------------------------------------------
|
|
559
|
+
|
|
560
|
+
// Block any content with critical severity threats
|
|
561
|
+
@id("semantic-block-critical")
|
|
562
|
+
@name("Block critical threats")
|
|
563
|
+
@description("Block all content when any detection engine reports critical severity. This is the ultimate catch-all — critical threats are blocked regardless of type or source.")
|
|
564
|
+
@severity("critical")
|
|
565
|
+
@tags("critical,baseline,security,catch-all")
|
|
566
|
+
@reject_message("Your content was blocked because security scanners detected a critical-severity threat. This content cannot be processed.")
|
|
388
567
|
forbid (
|
|
389
568
|
principal,
|
|
390
|
-
action
|
|
569
|
+
action,
|
|
391
570
|
resource
|
|
392
571
|
)
|
|
393
572
|
when {
|
|
394
|
-
context has
|
|
395
|
-
context.threat_categories.contains("semantic") &&
|
|
396
|
-
context.max_threat_severity >= 3
|
|
573
|
+
context has highest_severity && context.highest_severity == "critical"
|
|
397
574
|
};
|
|
398
575
|
|
|
399
|
-
// Block prompts with
|
|
400
|
-
@id("semantic-block-
|
|
401
|
-
@name("Block
|
|
402
|
-
@description("Block
|
|
403
|
-
@severity("
|
|
404
|
-
@tags("
|
|
405
|
-
@reject_message("Your prompt was blocked because security scanners detected
|
|
576
|
+
// Block prompts with high severity semantic threats
|
|
577
|
+
@id("semantic-block-high-severity")
|
|
578
|
+
@name("Block high severity threats")
|
|
579
|
+
@description("Block prompts when threat detection reports high severity (>= 3) in semantic categories. Catches threats that are individually below critical but collectively indicate adversarial intent.")
|
|
580
|
+
@severity("high")
|
|
581
|
+
@tags("semantic,severity,security,defense-in-depth")
|
|
582
|
+
@reject_message("Your prompt was blocked because security scanners detected high severity issues in the content. Review your prompt for manipulative or adversarial patterns.")
|
|
406
583
|
forbid (
|
|
407
584
|
principal,
|
|
408
585
|
action == Overwatch::Action::"process_prompt",
|
|
409
586
|
resource
|
|
410
587
|
)
|
|
411
588
|
when {
|
|
412
|
-
context has
|
|
589
|
+
context has threat_categories && context has max_threat_severity &&
|
|
590
|
+
context.threat_categories.contains("semantic") &&
|
|
591
|
+
context.max_threat_severity >= 3
|
|
413
592
|
};
|
|
414
593
|
|
|
415
|
-
// Block tool calls with
|
|
416
|
-
@id("semantic-block-
|
|
417
|
-
@name("Block tool calls
|
|
418
|
-
@description("
|
|
419
|
-
@severity("
|
|
420
|
-
@tags("
|
|
421
|
-
@reject_message("Tool execution was blocked because
|
|
594
|
+
// Block tool calls with multiple concurrent threats
|
|
595
|
+
@id("semantic-block-multi-threat-tools")
|
|
596
|
+
@name("Block multi-threat tool calls")
|
|
597
|
+
@description("Block tool execution when multiple distinct threats are detected simultaneously (3+). Multiple concurrent threats in a tool call strongly indicate an adversarial attack chain.")
|
|
598
|
+
@severity("high")
|
|
599
|
+
@tags("multi-threat,tools,security,defense-in-depth")
|
|
600
|
+
@reject_message("Tool execution was blocked because multiple security threats were detected simultaneously. This pattern indicates a potential attack chain.")
|
|
422
601
|
forbid (
|
|
423
602
|
principal,
|
|
424
603
|
action == Overwatch::Action::"call_tool",
|
|
425
604
|
resource
|
|
426
605
|
)
|
|
427
606
|
when {
|
|
428
|
-
context has
|
|
607
|
+
context has threat_count && context.threat_count >= 3
|
|
429
608
|
};
|
|
430
609
|
`;
|
|
431
610
|
const OVERWATCH_TOOLS_DEFAULT_CEDAR = `// =============================================================================
|
|
432
611
|
// Tool Permissioning Policy (Default)
|
|
433
612
|
// =============================================================================
|
|
434
613
|
// Controls access to IDE tools, shell execution, file system paths, and MCP
|
|
435
|
-
// operations.
|
|
436
|
-
//
|
|
614
|
+
// operations. Enforces least-privilege for agent tool usage with multi-layered
|
|
615
|
+
// controls:
|
|
616
|
+
//
|
|
617
|
+
// 1. Dangerous tool blocking (shell, command execution, destructive ops)
|
|
618
|
+
// 2. Sensitive system path protection (credentials, system dirs)
|
|
619
|
+
// 3. Tool risk scoring (computed risk assessment)
|
|
620
|
+
// 4. Tool category enforcement (safe/sensitive/dangerous classification)
|
|
621
|
+
// 5. Threat-based tool blocking (threat severity gates)
|
|
622
|
+
// 6. Command injection detection (reverse shells, code execution, etc.)
|
|
623
|
+
//
|
|
624
|
+
// Compliance:
|
|
625
|
+
// NIST 800-53 AC-3 (Access Enforcement)
|
|
626
|
+
// NIST 800-53 AC-6 (Least Privilege)
|
|
627
|
+
// NIST 800-53 CM-7 (Least Functionality)
|
|
628
|
+
// OWASP LLM06 (Excessive Agency) — agent tool access control
|
|
629
|
+
// OWASP ASI02 (Tool Misuse) — unauthorized tool operations
|
|
630
|
+
// MITRE ATT&CK T1059 (Command and Scripting Interpreter)
|
|
631
|
+
// MITRE ATT&CK T1005 (Data from Local System)
|
|
632
|
+
// MITRE ATT&CK T1552 (Unsecured Credentials)
|
|
437
633
|
//
|
|
438
|
-
// Compliance: NIST 800-53 AC-3, AC-6, CM-7 | OWASP A01, A03
|
|
439
|
-
// MITRE ATT&CK T1059 (Command/Scripting Interpreter)
|
|
440
|
-
// MITRE ATT&CK T1005 (Data from Local System)
|
|
441
634
|
// Category: tools
|
|
442
635
|
// Namespace: Overwatch
|
|
443
636
|
// =============================================================================
|
|
444
637
|
|
|
445
638
|
// ---------------------------------------------------------------------------
|
|
446
639
|
// Section 1: Dangerous Tool Blocking
|
|
640
|
+
// Blocks tools classified as inherently dangerous for agent use.
|
|
641
|
+
// Ref: OWASP LLM06, MITRE T1059
|
|
447
642
|
// ---------------------------------------------------------------------------
|
|
448
643
|
|
|
449
644
|
// Block shell and command execution tools
|
|
450
645
|
@id("tools-block-shell-execution")
|
|
451
646
|
@name("Block shell and command execution")
|
|
452
|
-
@description("Block direct shell, bash, and command execution tools
|
|
647
|
+
@description("Block direct shell, bash, and command execution tools. Unrestricted shell access is the #1 risk in AI coding agents — enables command injection, data exfiltration, and arbitrary code execution (MITRE T1059).")
|
|
453
648
|
@severity("critical")
|
|
454
|
-
@tags("shell,command-injection,execution,nist-cm-7,mitre-t1059,baseline")
|
|
455
|
-
@reject_message("Tool execution was blocked
|
|
649
|
+
@tags("shell,command-injection,execution,nist-cm-7,mitre-t1059,owasp-llm06,baseline")
|
|
650
|
+
@reject_message("Tool execution was blocked: direct shell and command execution tools (shell, bash, terminal) are restricted to prevent command injection attacks (MITRE T1059). Use specific, scoped tools instead.")
|
|
456
651
|
forbid (
|
|
457
652
|
principal,
|
|
458
653
|
action == Overwatch::Action::"call_tool",
|
|
@@ -465,16 +660,18 @@ when {
|
|
|
465
660
|
context.tool_name == "sh" ||
|
|
466
661
|
context.tool_name == "terminal" ||
|
|
467
662
|
context.tool_name == "system.exec" ||
|
|
468
|
-
context.tool_name == "process.spawn"
|
|
663
|
+
context.tool_name == "process.spawn" ||
|
|
664
|
+
context.tool_name == "cmd" ||
|
|
665
|
+
context.tool_name == "powershell")
|
|
469
666
|
};
|
|
470
667
|
|
|
471
668
|
// Block destructive file operations
|
|
472
669
|
@id("tools-block-destructive-ops")
|
|
473
670
|
@name("Block destructive file operations")
|
|
474
|
-
@description("Block file deletion and other destructive
|
|
671
|
+
@description("Block file deletion, directory removal, and other destructive operations. Agents should not have delete access by default — destructive operations require explicit human approval.")
|
|
475
672
|
@severity("high")
|
|
476
|
-
@tags("file,delete,destructive,nist-ac-3")
|
|
477
|
-
@reject_message("Tool execution was blocked
|
|
673
|
+
@tags("file,delete,destructive,nist-ac-3,owasp-asi02")
|
|
674
|
+
@reject_message("Tool execution was blocked: destructive file operations (delete, rmdir, unlink) are restricted to prevent data loss. Request explicit human approval for destructive actions.")
|
|
478
675
|
forbid (
|
|
479
676
|
principal,
|
|
480
677
|
action == Overwatch::Action::"call_tool",
|
|
@@ -484,20 +681,25 @@ when {
|
|
|
484
681
|
context has tool_name &&
|
|
485
682
|
(context.tool_name == "fs.delete" ||
|
|
486
683
|
context.tool_name == "fs.rmdir" ||
|
|
487
|
-
context.tool_name == "fs.unlink"
|
|
684
|
+
context.tool_name == "fs.unlink" ||
|
|
685
|
+
context.tool_name == "fs.remove" ||
|
|
686
|
+
context.tool_name == "delete_file" ||
|
|
687
|
+
context.tool_name == "remove_directory")
|
|
488
688
|
};
|
|
489
689
|
|
|
490
690
|
// ---------------------------------------------------------------------------
|
|
491
|
-
// Section 2: Sensitive Path
|
|
691
|
+
// Section 2: Sensitive System Path Protection
|
|
692
|
+
// Blocks access to system directories, credential files, and sensitive paths.
|
|
693
|
+
// Ref: MITRE T1005, T1552
|
|
492
694
|
// ---------------------------------------------------------------------------
|
|
493
695
|
|
|
494
|
-
// Block access to
|
|
495
|
-
@id("tools-block-
|
|
496
|
-
@name("Block
|
|
497
|
-
@description("Prevent access to system directories,
|
|
696
|
+
// Block access to system directories
|
|
697
|
+
@id("tools-block-system-paths")
|
|
698
|
+
@name("Block system directory access")
|
|
699
|
+
@description("Prevent access to sensitive system directories (/etc, /proc, /sys, /root, /var). These directories contain system configuration, process information, and credentials that agents must never access.")
|
|
498
700
|
@severity("high")
|
|
499
701
|
@tags("file,path,system,security,nist-ac-6,mitre-t1005")
|
|
500
|
-
@reject_message("Access
|
|
702
|
+
@reject_message("Access blocked: this path targets a sensitive system directory. AI agents are restricted from accessing /etc, /proc, /sys, /root, and /var directories.")
|
|
501
703
|
forbid (
|
|
502
704
|
principal,
|
|
503
705
|
action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file", Overwatch::Action::"call_tool"],
|
|
@@ -506,28 +708,104 @@ forbid (
|
|
|
506
708
|
when {
|
|
507
709
|
context has path &&
|
|
508
710
|
(context.path like "/etc/*" ||
|
|
509
|
-
context.path like "/var/*" ||
|
|
510
711
|
context.path like "/proc/*" ||
|
|
511
712
|
context.path like "/sys/*" ||
|
|
512
713
|
context.path like "/root/*" ||
|
|
513
|
-
context.path like "
|
|
714
|
+
context.path like "/var/log/*" ||
|
|
715
|
+
context.path like "/var/run/*")
|
|
716
|
+
};
|
|
717
|
+
|
|
718
|
+
// Block access to credential and key directories
|
|
719
|
+
@id("tools-block-credential-paths")
|
|
720
|
+
@name("Block credential directory access")
|
|
721
|
+
@description("Prevent access to SSH keys, cloud provider credentials, GPG keys, and other authentication material directories. These are primary targets for credential theft (MITRE T1552).")
|
|
722
|
+
@severity("critical")
|
|
723
|
+
@tags("file,credentials,ssh,aws,security,nist-ac-6,mitre-t1552")
|
|
724
|
+
@reject_message("Access blocked: this path targets a credential or key directory (.ssh, .aws, .gnupg, .config/gcloud). AI agents must never access authentication material.")
|
|
725
|
+
forbid (
|
|
726
|
+
principal,
|
|
727
|
+
action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file", Overwatch::Action::"call_tool"],
|
|
728
|
+
resource
|
|
729
|
+
)
|
|
730
|
+
when {
|
|
731
|
+
context has path &&
|
|
732
|
+
(context.path like "*/.ssh/*" ||
|
|
514
733
|
context.path like "*/.aws/*" ||
|
|
515
734
|
context.path like "*/.gnupg/*" ||
|
|
735
|
+
context.path like "*/.config/gcloud/*" ||
|
|
736
|
+
context.path like "*/.azure/*" ||
|
|
516
737
|
context.path like "*.pem" ||
|
|
517
738
|
context.path like "*/id_rsa*" ||
|
|
518
|
-
context.path like "*/id_ed25519*"
|
|
739
|
+
context.path like "*/id_ed25519*" ||
|
|
740
|
+
context.path like "*/id_ecdsa*")
|
|
519
741
|
};
|
|
520
742
|
|
|
521
743
|
// ---------------------------------------------------------------------------
|
|
522
|
-
// Section 3:
|
|
744
|
+
// Section 3: Tool Risk Scoring
|
|
745
|
+
// Uses computed tool risk scores from the detection engine to dynamically
|
|
746
|
+
// assess and block risky tool operations.
|
|
523
747
|
// ---------------------------------------------------------------------------
|
|
524
748
|
|
|
525
|
-
// Block
|
|
749
|
+
// Block tools with very high computed risk
|
|
750
|
+
@id("tools-block-high-risk-score")
|
|
751
|
+
@name("Block high-risk tool operations")
|
|
752
|
+
@description("Block tool operations when the computed risk score exceeds 90/100. The risk score combines tool type, argument analysis, context, and historical behavior into a single metric.")
|
|
753
|
+
@severity("critical")
|
|
754
|
+
@tags("tool-risk,dynamic,security,owasp-llm06,owasp-asi02")
|
|
755
|
+
@reject_message("Tool execution blocked: this operation scored 90+ on the risk assessment. The combination of tool type, arguments, and context indicates a high-risk operation.")
|
|
756
|
+
forbid (
|
|
757
|
+
principal,
|
|
758
|
+
action == Overwatch::Action::"call_tool",
|
|
759
|
+
resource
|
|
760
|
+
)
|
|
761
|
+
when {
|
|
762
|
+
context has tool_risk_score && context.tool_risk_score >= 90
|
|
763
|
+
};
|
|
764
|
+
|
|
765
|
+
// Block tools classified as dangerous
|
|
766
|
+
@id("tools-block-dangerous-category")
|
|
767
|
+
@name("Block dangerous tool category")
|
|
768
|
+
@description("Block all tools classified as 'dangerous' by the detection engine. The dangerous category includes tools with unrestricted system access, code execution, or network capabilities.")
|
|
769
|
+
@severity("critical")
|
|
770
|
+
@tags("tool-category,dangerous,security,owasp-llm06,nist-ac-6")
|
|
771
|
+
@reject_message("Tool execution blocked: this tool is classified as 'dangerous' due to its unrestricted system access, code execution, or network capabilities. Use a safer alternative.")
|
|
772
|
+
forbid (
|
|
773
|
+
principal,
|
|
774
|
+
action == Overwatch::Action::"call_tool",
|
|
775
|
+
resource
|
|
776
|
+
)
|
|
777
|
+
when {
|
|
778
|
+
context has tool_category && context.tool_category == "dangerous"
|
|
779
|
+
};
|
|
780
|
+
|
|
781
|
+
// Stricter threshold for sensitive tools
|
|
782
|
+
@id("tools-block-sensitive-with-threats")
|
|
783
|
+
@name("Block sensitive tools with threats")
|
|
784
|
+
@description("Block sensitive tools (file write, shell, network) when any threats are detected. Sensitive tools with concurrent threats indicate an attack leveraging tool capabilities for malicious purposes.")
|
|
785
|
+
@severity("high")
|
|
786
|
+
@tags("tool-category,sensitive,security,owasp-asi02,defense-in-depth")
|
|
787
|
+
@reject_message("Sensitive tool execution blocked: threats were detected alongside a sensitive tool operation. Sensitive tools require zero threat context to execute.")
|
|
788
|
+
forbid (
|
|
789
|
+
principal,
|
|
790
|
+
action == Overwatch::Action::"call_tool",
|
|
791
|
+
resource
|
|
792
|
+
)
|
|
793
|
+
when {
|
|
794
|
+
context has tool_is_sensitive && context.tool_is_sensitive &&
|
|
795
|
+
context has threat_count && context.threat_count > 0
|
|
796
|
+
};
|
|
797
|
+
|
|
798
|
+
// ---------------------------------------------------------------------------
|
|
799
|
+
// Section 4: Threat-Based Tool Blocking
|
|
800
|
+
// Blocks tool calls based on threat severity from detection engines.
|
|
801
|
+
// ---------------------------------------------------------------------------
|
|
802
|
+
|
|
803
|
+
// Block tool calls with high severity threats
|
|
526
804
|
@id("tools-block-high-severity-threats")
|
|
527
805
|
@name("Block tool calls with high severity threats")
|
|
528
|
-
@description("Prevent tool execution when high or critical severity threats are detected in content")
|
|
806
|
+
@description("Prevent tool execution when high or critical severity threats (>= 3) are detected in content. Tools must not execute when the content they operate on is flagged as dangerous.")
|
|
529
807
|
@severity("high")
|
|
530
|
-
@tags("tools,threats,severity,security")
|
|
808
|
+
@tags("tools,threats,severity,security,defense-in-depth")
|
|
531
809
|
@reject_message("Tool execution was blocked because high or critical severity threats were detected in the content by security scanners.")
|
|
532
810
|
forbid (
|
|
533
811
|
principal,
|
|
@@ -538,25 +816,62 @@ when {
|
|
|
538
816
|
context has threat_count && context has max_threat_severity &&
|
|
539
817
|
context.threat_count > 0 && context.max_threat_severity >= 3
|
|
540
818
|
};
|
|
819
|
+
|
|
820
|
+
// ---------------------------------------------------------------------------
|
|
821
|
+
// Section 5: Command Injection Detection
|
|
822
|
+
// Blocks tool calls containing command injection patterns in arguments.
|
|
823
|
+
// Ref: AIShellJack (41-84% success rate, 314 payloads)
|
|
824
|
+
// ---------------------------------------------------------------------------
|
|
825
|
+
|
|
826
|
+
// Block detected command injection patterns
|
|
827
|
+
@id("tools-block-command-injection")
|
|
828
|
+
@name("Block command injection in tool calls")
|
|
829
|
+
@description("Block tool calls when command injection patterns are detected in arguments — reverse shells, privilege escalation, code execution, and data exfiltration commands. Ref: AIShellJack attack (41-84% success rate).")
|
|
830
|
+
@severity("critical")
|
|
831
|
+
@tags("command-injection,shell,security,mitre-t1059,owasp-asi02")
|
|
832
|
+
@reject_message("Tool execution blocked: command injection pattern detected in tool arguments. This may be a shell injection attack attempting to execute unauthorized commands.")
|
|
833
|
+
forbid (
|
|
834
|
+
principal,
|
|
835
|
+
action == Overwatch::Action::"call_tool",
|
|
836
|
+
resource
|
|
837
|
+
)
|
|
838
|
+
when {
|
|
839
|
+
context has detected_threats &&
|
|
840
|
+
context.detected_threats.contains("command_injection")
|
|
841
|
+
};
|
|
541
842
|
`;
|
|
542
843
|
const OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR = `// =============================================================================
|
|
543
844
|
// Content Safety Policy (Default)
|
|
544
845
|
// =============================================================================
|
|
545
846
|
// Detects and blocks violent, harmful, hateful, sexual, and profane content
|
|
546
|
-
// using trust/safety classification scores
|
|
847
|
+
// using trust/safety ML classification scores.
|
|
848
|
+
//
|
|
849
|
+
// The detection engine runs multiple ML classifiers (toxicity, content safety)
|
|
850
|
+
// and produces normalized scores (0-100) for each category. These policies
|
|
851
|
+
// enforce enterprise-appropriate content thresholds.
|
|
852
|
+
//
|
|
853
|
+
// Compliance:
|
|
854
|
+
// NIST 800-53 SI-4 (Information System Monitoring)
|
|
855
|
+
// EU AI Act Art. 52 (Transparency for AI Systems)
|
|
856
|
+
// OWASP LLM02 (Insecure Output Handling) — unsafe content generation
|
|
857
|
+
// ISO 42001 (AI Management System) — responsible AI use
|
|
547
858
|
//
|
|
548
|
-
// Compliance: NIST 800-53 SI-4 | EU AI Act Art. 52
|
|
549
859
|
// Category: trust_safety
|
|
550
860
|
// Namespace: Overwatch
|
|
551
861
|
// =============================================================================
|
|
552
862
|
|
|
553
|
-
//
|
|
863
|
+
// ---------------------------------------------------------------------------
|
|
864
|
+
// Section 1: Violence & Weapons
|
|
865
|
+
// Blocks content promoting, describing, or instructing violence and weapons.
|
|
866
|
+
// ---------------------------------------------------------------------------
|
|
867
|
+
|
|
868
|
+
// Block content with high violence score
|
|
554
869
|
@id("ts-block-violence")
|
|
555
870
|
@name("Block violent content")
|
|
556
|
-
@description("Block content when violence detection score exceeds threshold (80/100)")
|
|
871
|
+
@description("Block content when the ML violence detection score exceeds threshold (80/100). Catches graphic violence descriptions, instructions for causing harm, and violent threat language.")
|
|
557
872
|
@severity("critical")
|
|
558
|
-
@tags("violence,content-safety,trust-safety,nist-si-4")
|
|
559
|
-
@reject_message("
|
|
873
|
+
@tags("violence,content-safety,trust-safety,nist-si-4,iso-42001")
|
|
874
|
+
@reject_message("Content blocked: violent content detected. AI agents in enterprise environments must not process or generate violent content. Please rephrase without violence-related language.")
|
|
560
875
|
forbid (
|
|
561
876
|
principal,
|
|
562
877
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
@@ -566,13 +881,13 @@ when {
|
|
|
566
881
|
context has violence_score && context.violence_score >= 80
|
|
567
882
|
};
|
|
568
883
|
|
|
569
|
-
// Block
|
|
884
|
+
// Block content with weapons references
|
|
570
885
|
@id("ts-block-weapons")
|
|
571
886
|
@name("Block weapons content")
|
|
572
|
-
@description("Block content when weapons detection score exceeds threshold (80/100)")
|
|
887
|
+
@description("Block content when the ML weapons detection score exceeds threshold (80/100). Catches weapon manufacturing instructions, procurement guidance, and detailed weapon specifications.")
|
|
573
888
|
@severity("critical")
|
|
574
889
|
@tags("weapons,content-safety,trust-safety,nist-si-4")
|
|
575
|
-
@reject_message("
|
|
890
|
+
@reject_message("Content blocked: weapons-related content detected. AI agents must not process weapons manufacturing, procurement, or detailed specification content.")
|
|
576
891
|
forbid (
|
|
577
892
|
principal,
|
|
578
893
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
@@ -582,13 +897,19 @@ when {
|
|
|
582
897
|
context has weapons_score && context.weapons_score >= 80
|
|
583
898
|
};
|
|
584
899
|
|
|
585
|
-
//
|
|
900
|
+
// ---------------------------------------------------------------------------
|
|
901
|
+
// Section 2: Hate Speech & Discrimination
|
|
902
|
+
// Blocks hateful, discriminatory, and dehumanizing content.
|
|
903
|
+
// Lower threshold (75) — enterprises have zero tolerance for hate speech.
|
|
904
|
+
// ---------------------------------------------------------------------------
|
|
905
|
+
|
|
906
|
+
// Block hate speech content
|
|
586
907
|
@id("ts-block-hate-speech")
|
|
587
908
|
@name("Block hate speech")
|
|
588
|
-
@description("Block content when hate speech detection score exceeds threshold (75/100)")
|
|
909
|
+
@description("Block content when the ML hate speech detection score exceeds threshold (75/100). Lower threshold than other categories because enterprises have zero tolerance for discriminatory content. Catches slurs, dehumanizing language, and targeted harassment.")
|
|
589
910
|
@severity("critical")
|
|
590
|
-
@tags("hate-speech,content-safety,trust-safety,nist-si-4")
|
|
591
|
-
@reject_message("
|
|
911
|
+
@tags("hate-speech,content-safety,trust-safety,nist-si-4,eu-ai-act")
|
|
912
|
+
@reject_message("Content blocked: hate speech or discriminatory content detected. AI agents in enterprise environments must not process or generate hateful, discriminatory, or dehumanizing content.")
|
|
592
913
|
forbid (
|
|
593
914
|
principal,
|
|
594
915
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
@@ -598,13 +919,18 @@ when {
|
|
|
598
919
|
context has hate_speech_score && context.hate_speech_score >= 75
|
|
599
920
|
};
|
|
600
921
|
|
|
601
|
-
//
|
|
922
|
+
// ---------------------------------------------------------------------------
|
|
923
|
+
// Section 3: Criminal Content
|
|
924
|
+
// Blocks content related to criminal activities and illegal operations.
|
|
925
|
+
// ---------------------------------------------------------------------------
|
|
926
|
+
|
|
927
|
+
// Block criminal content
|
|
602
928
|
@id("ts-block-crime")
|
|
603
929
|
@name("Block criminal content")
|
|
604
|
-
@description("Block content when criminal activity detection score exceeds threshold (80/100)")
|
|
930
|
+
@description("Block content when the ML criminal activity detection score exceeds threshold (80/100). Catches illegal activity instructions, fraud techniques, and content promoting criminal behavior.")
|
|
605
931
|
@severity("high")
|
|
606
932
|
@tags("crime,content-safety,trust-safety,nist-si-4")
|
|
607
|
-
@reject_message("
|
|
933
|
+
@reject_message("Content blocked: criminal activity content detected. AI agents must not process content related to illegal activities, fraud, or other criminal behavior.")
|
|
608
934
|
forbid (
|
|
609
935
|
principal,
|
|
610
936
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
@@ -614,13 +940,18 @@ when {
|
|
|
614
940
|
context has crime_score && context.crime_score >= 80
|
|
615
941
|
};
|
|
616
942
|
|
|
617
|
-
//
|
|
943
|
+
// ---------------------------------------------------------------------------
|
|
944
|
+
// Section 4: Sexual Content
|
|
945
|
+
// Blocks sexually explicit or inappropriate content.
|
|
946
|
+
// ---------------------------------------------------------------------------
|
|
947
|
+
|
|
948
|
+
// Block sexual content
|
|
618
949
|
@id("ts-block-sexual")
|
|
619
950
|
@name("Block sexual content")
|
|
620
|
-
@description("Block content when sexual content detection score exceeds threshold (80/100)")
|
|
951
|
+
@description("Block content when the ML sexual content detection score exceeds threshold (80/100). Ensures enterprise AI agents do not process or generate sexually explicit material.")
|
|
621
952
|
@severity("high")
|
|
622
|
-
@tags("sexual,content-safety,trust-safety,eu-ai-act")
|
|
623
|
-
@reject_message("
|
|
953
|
+
@tags("sexual,content-safety,trust-safety,eu-ai-act,iso-42001")
|
|
954
|
+
@reject_message("Content blocked: sexual content detected. AI agents in enterprise environments must not process sexually explicit material.")
|
|
624
955
|
forbid (
|
|
625
956
|
principal,
|
|
626
957
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
@@ -630,13 +961,19 @@ when {
|
|
|
630
961
|
context has sexual_score && context.sexual_score >= 80
|
|
631
962
|
};
|
|
632
963
|
|
|
633
|
-
//
|
|
964
|
+
// ---------------------------------------------------------------------------
|
|
965
|
+
// Section 5: Profanity
|
|
966
|
+
// Blocks excessive profanity in enterprise environments.
|
|
967
|
+
// Higher threshold (90) — allows normal expression while blocking abuse.
|
|
968
|
+
// ---------------------------------------------------------------------------
|
|
969
|
+
|
|
970
|
+
// Block excessive profanity
|
|
634
971
|
@id("ts-block-profanity")
|
|
635
972
|
@name("Block profanity")
|
|
636
|
-
@description("Block content when profanity detection score exceeds threshold (90/100)")
|
|
973
|
+
@description("Block content when the ML profanity detection score exceeds threshold (90/100). Higher threshold than other safety categories — allows normal expression while blocking abusive or harassing language patterns.")
|
|
637
974
|
@severity("medium")
|
|
638
975
|
@tags("profanity,content-safety,trust-safety")
|
|
639
|
-
@reject_message("
|
|
976
|
+
@reject_message("Content blocked: excessive profanity detected. Please rephrase in a professional manner appropriate for enterprise AI interactions.")
|
|
640
977
|
forbid (
|
|
641
978
|
principal,
|
|
642
979
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
@@ -649,22 +986,34 @@ when {
|
|
|
649
986
|
const OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR = `// =============================================================================
|
|
650
987
|
// Agent Security Policy (Default)
|
|
651
988
|
// =============================================================================
|
|
652
|
-
// Detects and blocks tool poisoning, rug pull attacks,
|
|
653
|
-
//
|
|
654
|
-
//
|
|
989
|
+
// Detects and blocks tool poisoning, rug pull attacks, indirect prompt injection,
|
|
990
|
+
// and MCP supply chain threats targeting AI coding agents.
|
|
991
|
+
//
|
|
992
|
+
// These are agentic AI-specific attack vectors (OWASP Agentic Top 10) where tool
|
|
993
|
+
// descriptions, server responses, or behavioral drift manipulate agent behavior.
|
|
994
|
+
//
|
|
995
|
+
// Compliance:
|
|
996
|
+
// OWASP LLM01 (Prompt Injection) | OWASP LLM06 (Excessive Agency)
|
|
997
|
+
// OWASP ASI01 (Agent Goal Hijack) | OWASP ASI02 (Tool Misuse)
|
|
998
|
+
// OWASP ASI04 (Supply Chain) | OWASP MCP01-05
|
|
999
|
+
// MITRE ATLAS AML.T0051 (Prompt Injection) | AML.T0080 (Memory Manipulation)
|
|
655
1000
|
//
|
|
656
|
-
// Compliance: OWASP LLM09 (Improper Output Handling) | MITRE ATLAS AML.T0054
|
|
657
1001
|
// Category: agent_security
|
|
658
1002
|
// Namespace: Overwatch
|
|
659
1003
|
// =============================================================================
|
|
660
1004
|
|
|
661
|
-
//
|
|
1005
|
+
// ---------------------------------------------------------------------------
|
|
1006
|
+
// Tool Poisoning — hidden instructions in tool descriptions/arguments
|
|
1007
|
+
// Ref: Invariant Labs (April 2025), 84.2% success rate with auto-approval
|
|
1008
|
+
// ---------------------------------------------------------------------------
|
|
1009
|
+
|
|
1010
|
+
// Block tool calls with tool poisoning risk
|
|
662
1011
|
@id("as-block-tool-poisoning")
|
|
663
1012
|
@name("Block tool poisoning")
|
|
664
|
-
@description("Block tool execution when tool
|
|
1013
|
+
@description("Block tool execution when hidden instructions are detected in tool descriptions or arguments (score >= 70/100). Catches authority hijack, system prompt injection, and hidden instruction patterns. Adjust the threshold to tune sensitivity — lower catches more but may flag legitimate tools with instructional descriptions (OWASP ASI01).")
|
|
665
1014
|
@severity("critical")
|
|
666
|
-
@tags("tool-poisoning,agent-security,owasp-
|
|
667
|
-
@reject_message("Tool execution
|
|
1015
|
+
@tags("tool-poisoning,agent-security,owasp-asi01,mitre-aml-t0051")
|
|
1016
|
+
@reject_message("Tool execution blocked: hidden manipulation instructions detected in tool description or arguments. This may be a tool poisoning attack (OWASP ASI01).")
|
|
668
1017
|
forbid (
|
|
669
1018
|
principal,
|
|
670
1019
|
action == Overwatch::Action::"call_tool",
|
|
@@ -674,45 +1023,55 @@ when {
|
|
|
674
1023
|
context has tool_poisoning_score && context.tool_poisoning_score >= 70
|
|
675
1024
|
};
|
|
676
1025
|
|
|
677
|
-
// Block
|
|
678
|
-
@id("as-block-
|
|
679
|
-
@name("Block
|
|
680
|
-
@description("Block
|
|
1026
|
+
// Block MCP server connections with poisoning risk
|
|
1027
|
+
@id("as-block-server-poisoning")
|
|
1028
|
+
@name("Block poisoned MCP servers")
|
|
1029
|
+
@description("Block connections to MCP servers when tool poisoning patterns are detected in tool descriptions (score >= 60). Lower threshold for servers since poisoning affects all tools on the server.")
|
|
681
1030
|
@severity("critical")
|
|
682
|
-
@tags("
|
|
683
|
-
@reject_message("
|
|
1031
|
+
@tags("tool-poisoning,mcp-security,owasp-asi04,owasp-mcp02")
|
|
1032
|
+
@reject_message("MCP server connection blocked: tool poisoning patterns detected in server tool descriptions. Review server tools before connecting.")
|
|
684
1033
|
forbid (
|
|
685
1034
|
principal,
|
|
686
|
-
action
|
|
1035
|
+
action == Overwatch::Action::"connect_server",
|
|
687
1036
|
resource
|
|
688
1037
|
)
|
|
689
1038
|
when {
|
|
690
|
-
context has
|
|
1039
|
+
context has tool_poisoning_score && context.tool_poisoning_score >= 60
|
|
691
1040
|
};
|
|
692
1041
|
|
|
693
|
-
//
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
1042
|
+
// ---------------------------------------------------------------------------
|
|
1043
|
+
// Rug Pull — tool behavior changes after trust establishment
|
|
1044
|
+
// Ref: Acuvity (2025), tools approved once then silently redefined
|
|
1045
|
+
// ---------------------------------------------------------------------------
|
|
1046
|
+
|
|
1047
|
+
// Block tool calls with behavioral drift (rug pull)
|
|
1048
|
+
@id("as-block-rug-pull")
|
|
1049
|
+
@name("Block rug pull attacks")
|
|
1050
|
+
@description("Block tool execution when behavioral drift is detected — tool behavior diverges significantly from established patterns (score >= 70/100). Defends against tools that are approved once then silently redefined to act maliciously. Adjust the threshold to tune sensitivity (OWASP ASI04).")
|
|
697
1051
|
@severity("critical")
|
|
698
|
-
@tags("
|
|
699
|
-
@reject_message("
|
|
1052
|
+
@tags("rug-pull,agent-security,owasp-asi04,behavioral-drift")
|
|
1053
|
+
@reject_message("Tool execution blocked: tool behavior has changed significantly from its established pattern. This may be a rug pull attack where a tool was silently redefined after initial approval.")
|
|
700
1054
|
forbid (
|
|
701
1055
|
principal,
|
|
702
|
-
action
|
|
1056
|
+
action in [Overwatch::Action::"call_tool", Overwatch::Action::"connect_server"],
|
|
703
1057
|
resource
|
|
704
1058
|
)
|
|
705
1059
|
when {
|
|
706
|
-
context has
|
|
1060
|
+
context has rug_pull_score && context.rug_pull_score >= 70
|
|
707
1061
|
};
|
|
708
1062
|
|
|
709
|
-
//
|
|
1063
|
+
// ---------------------------------------------------------------------------
|
|
1064
|
+
// Indirect Prompt Injection — injection via tool outputs and retrieved content
|
|
1065
|
+
// Ref: EchoLeak CVE-2025-32711, IDEsaster (30+ CVEs in AI IDEs)
|
|
1066
|
+
// ---------------------------------------------------------------------------
|
|
1067
|
+
|
|
1068
|
+
// Block prompts with indirect injection from tool outputs
|
|
710
1069
|
@id("as-block-indirect-injection")
|
|
711
1070
|
@name("Block indirect prompt injection")
|
|
712
|
-
@description("Block
|
|
1071
|
+
@description("Block when indirect prompt injection is detected in tool outputs, file contents, or retrieved documents (score >= 70). Defends against OWASP LLM01 and ASI01.")
|
|
713
1072
|
@severity("critical")
|
|
714
|
-
@tags("indirect-injection,
|
|
715
|
-
@reject_message("
|
|
1073
|
+
@tags("indirect-injection,owasp-llm01,owasp-asi01,mitre-aml-t0051")
|
|
1074
|
+
@reject_message("Content blocked: indirect prompt injection detected in tool output or retrieved content. An external source may be attempting to hijack agent behavior.")
|
|
716
1075
|
forbid (
|
|
717
1076
|
principal,
|
|
718
1077
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool", Overwatch::Action::"connect_server"],
|
|
@@ -722,13 +1081,35 @@ when {
|
|
|
722
1081
|
context has indirect_injection_score && context.indirect_injection_score >= 70
|
|
723
1082
|
};
|
|
724
1083
|
|
|
725
|
-
//
|
|
1084
|
+
// Strict indirect injection for sensitive tool calls
|
|
1085
|
+
@id("as-block-indirect-injection-sensitive-tools")
|
|
1086
|
+
@name("Block indirect injection on sensitive tools")
|
|
1087
|
+
@description("Lower threshold (>= 50) for indirect injection when the tool is classified as sensitive (shell, file write, network). Even moderate injection risk on sensitive tools warrants blocking.")
|
|
1088
|
+
@severity("critical")
|
|
1089
|
+
@tags("indirect-injection,sensitive-tools,owasp-asi02")
|
|
1090
|
+
@reject_message("Sensitive tool execution blocked: moderate indirect injection risk detected. Sensitive tools require higher confidence that content is safe.")
|
|
1091
|
+
forbid (
|
|
1092
|
+
principal,
|
|
1093
|
+
action == Overwatch::Action::"call_tool",
|
|
1094
|
+
resource
|
|
1095
|
+
)
|
|
1096
|
+
when {
|
|
1097
|
+
context has indirect_injection_score && context.indirect_injection_score >= 50 &&
|
|
1098
|
+
context has tool_is_sensitive && context.tool_is_sensitive
|
|
1099
|
+
};
|
|
1100
|
+
|
|
1101
|
+
// ---------------------------------------------------------------------------
|
|
1102
|
+
// MCP Supply Chain — unverified servers, risky configs
|
|
1103
|
+
// Ref: OWASP MCP Top 10, OWASP ASI04, MITRE AML.T0082
|
|
1104
|
+
// ---------------------------------------------------------------------------
|
|
1105
|
+
|
|
1106
|
+
// Block unverified MCP server tool calls with detected threats
|
|
726
1107
|
@id("as-block-unverified-threats")
|
|
727
1108
|
@name("Block unverified server threats")
|
|
728
|
-
@description("Block tool calls from unverified MCP servers when any threat is detected")
|
|
1109
|
+
@description("Block tool calls from unverified MCP servers when any threat is detected. Unverified servers with threats are high-risk supply chain vectors.")
|
|
729
1110
|
@severity("high")
|
|
730
|
-
@tags("mcp-trust,
|
|
731
|
-
@reject_message("Tool execution
|
|
1111
|
+
@tags("mcp-trust,owasp-asi04,owasp-mcp02,supply-chain")
|
|
1112
|
+
@reject_message("Tool execution blocked: the MCP server is unverified and security threats were detected. Only use tools from verified or trusted servers.")
|
|
732
1113
|
forbid (
|
|
733
1114
|
principal,
|
|
734
1115
|
action == Overwatch::Action::"call_tool",
|
|
@@ -738,6 +1119,309 @@ when {
|
|
|
738
1119
|
context has mcp_server_verified && context.mcp_server_verified == false &&
|
|
739
1120
|
context has threat_count && context.threat_count > 0
|
|
740
1121
|
};
|
|
1122
|
+
|
|
1123
|
+
// Block connections to MCP servers with risky configurations
|
|
1124
|
+
@id("as-block-mcp-config-risk")
|
|
1125
|
+
@name("Block risky MCP server configs")
|
|
1126
|
+
@description("Block MCP server connections when risky configuration patterns are detected (inline code execution, mixed transports, proxy patterns). Score >= 70.")
|
|
1127
|
+
@severity("high")
|
|
1128
|
+
@tags("mcp-config,owasp-mcp03,owasp-asi04,supply-chain")
|
|
1129
|
+
@reject_message("MCP server connection blocked: risky server configuration detected (e.g., inline code execution, mixed transports). Review the server configuration before connecting.")
|
|
1130
|
+
forbid (
|
|
1131
|
+
principal,
|
|
1132
|
+
action == Overwatch::Action::"connect_server",
|
|
1133
|
+
resource
|
|
1134
|
+
)
|
|
1135
|
+
when {
|
|
1136
|
+
context has mcp_config_risk && context.mcp_config_risk &&
|
|
1137
|
+
context has mcp_risk_score && context.mcp_risk_score >= 70
|
|
1138
|
+
};
|
|
1139
|
+
|
|
1140
|
+
// Block connections to unverified MCP servers entirely
|
|
1141
|
+
@id("as-block-unverified-server-connect")
|
|
1142
|
+
@name("Block unverified MCP server connections")
|
|
1143
|
+
@description("Block connections to MCP servers that are not from a verified registry. This prevents supply chain attacks via malicious MCP servers.")
|
|
1144
|
+
@severity("high")
|
|
1145
|
+
@tags("mcp-trust,owasp-asi04,owasp-mcp05,supply-chain")
|
|
1146
|
+
@reject_message("MCP server connection blocked: server is not from a verified registry. Add the server to your verified list or contact your admin.")
|
|
1147
|
+
forbid (
|
|
1148
|
+
principal,
|
|
1149
|
+
action == Overwatch::Action::"connect_server",
|
|
1150
|
+
resource
|
|
1151
|
+
)
|
|
1152
|
+
when {
|
|
1153
|
+
context has mcp_server_verified && context.mcp_server_verified == false
|
|
1154
|
+
};
|
|
1155
|
+
`;
|
|
1156
|
+
const OVERWATCH_ENCODING_DEFAULT_CEDAR = `// =============================================================================
|
|
1157
|
+
// Encoding & Unicode Attack Detection Policy (Default)
|
|
1158
|
+
// =============================================================================
|
|
1159
|
+
// Detects and blocks invisible Unicode characters, bidirectional text
|
|
1160
|
+
// overrides, tag characters, and other encoding-based attack vectors used
|
|
1161
|
+
// to hide malicious instructions from human review while remaining
|
|
1162
|
+
// visible to AI model tokenizers.
|
|
1163
|
+
//
|
|
1164
|
+
// Attack vectors:
|
|
1165
|
+
// - Zero-width characters (U+200B, U+200C, U+200D, U+FEFF) hiding instructions
|
|
1166
|
+
// - Bidirectional overrides (U+202A-U+202E) creating visually misleading text
|
|
1167
|
+
// - Tag characters (U+E0001-U+E007F) embedding invisible payloads
|
|
1168
|
+
// - Variation selectors used as steganographic channels
|
|
1169
|
+
// - Homoglyph attacks using lookalike Unicode characters
|
|
1170
|
+
//
|
|
1171
|
+
// Ref: EchoLeak CVE-2025-32711 (invisible prompt injection via Unicode)
|
|
1172
|
+
// Rules File Backdoor (Pillar Security, March 2025)
|
|
1173
|
+
// Unicode-based prompt injection in Claude Code (CERT-2025)
|
|
1174
|
+
//
|
|
1175
|
+
// Compliance:
|
|
1176
|
+
// OWASP LLM01 (Prompt Injection) — encoding evasion
|
|
1177
|
+
// OWASP ASI01 (Agent Goal Hijack) — hidden instructions
|
|
1178
|
+
// MITRE ATLAS AML.T0051 (LLM Prompt Injection)
|
|
1179
|
+
// NIST 800-53 SI-10 (Information Input Validation)
|
|
1180
|
+
//
|
|
1181
|
+
// Category: encoding
|
|
1182
|
+
// Namespace: Overwatch
|
|
1183
|
+
// =============================================================================
|
|
1184
|
+
|
|
1185
|
+
// ---------------------------------------------------------------------------
|
|
1186
|
+
// Section 1: Invisible Character Detection in Prompts
|
|
1187
|
+
// Blocks prompts containing suspicious invisible Unicode patterns.
|
|
1188
|
+
// ---------------------------------------------------------------------------
|
|
1189
|
+
|
|
1190
|
+
// Block prompts with invisible characters above risk threshold
|
|
1191
|
+
@id("encoding-block-invisible-prompt")
|
|
1192
|
+
@name("Block invisible characters in prompts")
|
|
1193
|
+
@description("Block prompts when invisible Unicode characters are detected with a risk score >= 50. Invisible chars (zero-width joiners, bidi overrides, tag characters) can hide malicious instructions from human review while being processed by AI models. Ref: EchoLeak CVE-2025-32711.")
|
|
1194
|
+
@severity("high")
|
|
1195
|
+
@tags("unicode,invisible-chars,encoding,owasp-llm01,owasp-asi01,mitre-aml-t0051,nist-si-10")
|
|
1196
|
+
@reject_message("Your prompt was blocked because suspicious invisible Unicode characters were detected. These characters (zero-width, bidirectional overrides, tag characters) can be used to hide malicious instructions. Please remove non-visible characters and resubmit.")
|
|
1197
|
+
forbid (
|
|
1198
|
+
principal,
|
|
1199
|
+
action == Overwatch::Action::"process_prompt",
|
|
1200
|
+
resource
|
|
1201
|
+
)
|
|
1202
|
+
when {
|
|
1203
|
+
context has contains_invisible_chars && context.contains_invisible_chars &&
|
|
1204
|
+
context has invisible_chars_score && context.invisible_chars_score >= 50
|
|
1205
|
+
};
|
|
1206
|
+
|
|
1207
|
+
// ---------------------------------------------------------------------------
|
|
1208
|
+
// Section 2: Invisible Characters in Tool Calls
|
|
1209
|
+
// Any invisible characters in tool arguments are suspicious — tool args
|
|
1210
|
+
// should be plain text/JSON. Lower threshold than prompts.
|
|
1211
|
+
// ---------------------------------------------------------------------------
|
|
1212
|
+
|
|
1213
|
+
// Block tool calls with any invisible characters
|
|
1214
|
+
@id("encoding-block-invisible-tool")
|
|
1215
|
+
@name("Block invisible characters in tool calls")
|
|
1216
|
+
@description("Block tool execution when invisible Unicode characters are detected in tool arguments or content. Tool arguments should be plain text/JSON — invisible characters in tool calls are almost certainly malicious payload injection.")
|
|
1217
|
+
@severity("critical")
|
|
1218
|
+
@tags("unicode,invisible-chars,tools,encoding,owasp-asi01,owasp-asi02")
|
|
1219
|
+
@reject_message("Tool execution blocked: invisible Unicode characters detected in tool arguments. Tool calls should contain only plain text — invisible characters indicate payload injection or encoding evasion.")
|
|
1220
|
+
forbid (
|
|
1221
|
+
principal,
|
|
1222
|
+
action == Overwatch::Action::"call_tool",
|
|
1223
|
+
resource
|
|
1224
|
+
)
|
|
1225
|
+
when {
|
|
1226
|
+
context has contains_invisible_chars && context.contains_invisible_chars
|
|
1227
|
+
};
|
|
1228
|
+
|
|
1229
|
+
// ---------------------------------------------------------------------------
|
|
1230
|
+
// Section 3: Invisible Characters in File Operations
|
|
1231
|
+
// Blocks file reads/writes with encoding attacks to prevent persistence
|
|
1232
|
+
// of invisible payloads in the codebase.
|
|
1233
|
+
// ---------------------------------------------------------------------------
|
|
1234
|
+
|
|
1235
|
+
// Block file writes with invisible characters
|
|
1236
|
+
@id("encoding-block-invisible-file-write")
|
|
1237
|
+
@name("Block invisible characters in file writes")
|
|
1238
|
+
@description("Block file writes when invisible Unicode characters are detected. Prevents persistence of invisible payloads in source code, config files, or documentation where they could later be processed by AI agents. Ref: Rules File Backdoor attack (Pillar Security).")
|
|
1239
|
+
@severity("high")
|
|
1240
|
+
@tags("unicode,invisible-chars,file-write,encoding,owasp-asi01")
|
|
1241
|
+
@reject_message("File write blocked: invisible Unicode characters detected in content. Writing invisible characters to files can create persistent backdoors that affect AI agents processing those files later.")
|
|
1242
|
+
forbid (
|
|
1243
|
+
principal,
|
|
1244
|
+
action == Overwatch::Action::"write_file",
|
|
1245
|
+
resource
|
|
1246
|
+
)
|
|
1247
|
+
when {
|
|
1248
|
+
context has contains_invisible_chars && context.contains_invisible_chars
|
|
1249
|
+
};
|
|
1250
|
+
|
|
1251
|
+
// Block MCP server connections with invisible characters in config
|
|
1252
|
+
@id("encoding-block-invisible-server")
|
|
1253
|
+
@name("Block invisible characters in server config")
|
|
1254
|
+
@description("Block MCP server connections when invisible Unicode characters are detected in server configuration or responses. Invisible chars in server data indicate a compromised or malicious MCP server.")
|
|
1255
|
+
@severity("critical")
|
|
1256
|
+
@tags("unicode,invisible-chars,mcp,encoding,owasp-mcp02,owasp-asi04")
|
|
1257
|
+
@reject_message("MCP server connection blocked: invisible Unicode characters detected in server data. This may indicate a compromised MCP server using encoding attacks to inject hidden instructions.")
|
|
1258
|
+
forbid (
|
|
1259
|
+
principal,
|
|
1260
|
+
action == Overwatch::Action::"connect_server",
|
|
1261
|
+
resource
|
|
1262
|
+
)
|
|
1263
|
+
when {
|
|
1264
|
+
context has contains_invisible_chars && context.contains_invisible_chars
|
|
1265
|
+
};
|
|
1266
|
+
`;
|
|
1267
|
+
const OVERWATCH_BEHAVIORAL_DEFAULT_CEDAR = `// =============================================================================
|
|
1268
|
+
// Behavioral Analysis Policy (Default)
|
|
1269
|
+
// =============================================================================
|
|
1270
|
+
// Detects and blocks suspicious agent behavioral patterns including tool call
|
|
1271
|
+
// loops, data exfiltration sequences, credential theft chains, and destructive
|
|
1272
|
+
// operation patterns. Operates on session-level behavioral signals rather
|
|
1273
|
+
// than single-request content analysis.
|
|
1274
|
+
//
|
|
1275
|
+
// Attack vectors:
|
|
1276
|
+
// - Tool call loops: Agent stuck in retry loop or manipulation-induced recursion
|
|
1277
|
+
// - Data exfiltration: Read sensitive data → send to external endpoint sequence
|
|
1278
|
+
// - Secret exfiltration: Read credentials → curl/fetch external URL
|
|
1279
|
+
// - Credential theft: Access .ssh/.aws → encode/compress → network tool
|
|
1280
|
+
// - Destructive sequences: Bulk delete, permission changes, config overwrites
|
|
1281
|
+
//
|
|
1282
|
+
// Ref: OWASP LLM10 (Unbounded Consumption) — loop/recursion attacks
|
|
1283
|
+
// OWASP ASI02 (Tool Misuse) — tool abuse sequences
|
|
1284
|
+
// OWASP ASI08 (Lack of Monitoring) — behavioral anomaly detection
|
|
1285
|
+
// GlassWorm Attack (35,800+ installations, cross-agent propagation)
|
|
1286
|
+
// MITRE ATLAS AML.T0080 (Memory Manipulation)
|
|
1287
|
+
//
|
|
1288
|
+
// Compliance:
|
|
1289
|
+
// OWASP LLM10 (Unbounded Consumption)
|
|
1290
|
+
// OWASP ASI02 (Tool Misuse)
|
|
1291
|
+
// OWASP ASI08 (Lack of Monitoring & Logging)
|
|
1292
|
+
// MITRE ATLAS AML.T0080 (AI Memory Manipulation)
|
|
1293
|
+
// MITRE ATT&CK T1041 (Exfiltration Over C2 Channel)
|
|
1294
|
+
// NIST 800-53 AU-6 (Audit Review, Analysis, and Reporting)
|
|
1295
|
+
// NIST 800-53 SI-4 (Information System Monitoring)
|
|
1296
|
+
//
|
|
1297
|
+
// Category: behavioral
|
|
1298
|
+
// Namespace: Overwatch
|
|
1299
|
+
// =============================================================================
|
|
1300
|
+
|
|
1301
|
+
// ---------------------------------------------------------------------------
|
|
1302
|
+
// Section 1: Tool Call Loop Detection
|
|
1303
|
+
// Detects agents trapped in infinite loops — either through manipulation
|
|
1304
|
+
// (adversarial prompt inducing repetitive behavior) or bugs (retry storms).
|
|
1305
|
+
// Ref: OWASP LLM10 (Unbounded Consumption)
|
|
1306
|
+
// ---------------------------------------------------------------------------
|
|
1307
|
+
|
|
1308
|
+
// Block tool calls in detected loops (5+ consecutive same-tool calls)
|
|
1309
|
+
@id("behavioral-block-loop")
|
|
1310
|
+
@name("Block tool call loops")
|
|
1311
|
+
@description("Block tool execution when a loop is detected — 5 or more consecutive calls to the same tool. This indicates either adversarial manipulation inducing repetitive agent behavior or a bug causing retry storms. Both waste compute and can cause damage. Adjust the threshold (default 5) to match your workflow — lower for stricter enforcement, higher for agents that legitimately retry (OWASP LLM10).")
|
|
1312
|
+
@severity("high")
|
|
1313
|
+
@tags("loop-detection,behavioral,owasp-llm10,owasp-asi02,nist-si-4")
|
|
1314
|
+
@reject_message("Tool execution blocked: repetitive tool call loop detected (5+ consecutive calls to the same tool). This may indicate adversarial manipulation or a system error. The agent session should be reviewed.")
|
|
1315
|
+
forbid (
|
|
1316
|
+
principal,
|
|
1317
|
+
action == Overwatch::Action::"call_tool",
|
|
1318
|
+
resource
|
|
1319
|
+
)
|
|
1320
|
+
when {
|
|
1321
|
+
context has loop_detected && context.loop_detected &&
|
|
1322
|
+
context has loop_count && context.loop_count >= 5
|
|
1323
|
+
};
|
|
1324
|
+
|
|
1325
|
+
// ---------------------------------------------------------------------------
|
|
1326
|
+
// Section 2: Data Exfiltration Detection
|
|
1327
|
+
// Detects sequences where an agent reads sensitive data and then attempts
|
|
1328
|
+
// to send it to an external endpoint.
|
|
1329
|
+
// Ref: GlassWorm attack, MITRE T1041
|
|
1330
|
+
// ---------------------------------------------------------------------------
|
|
1331
|
+
|
|
1332
|
+
// Block data exfiltration patterns
|
|
1333
|
+
@id("behavioral-block-data-exfil")
|
|
1334
|
+
@name("Block data exfiltration")
|
|
1335
|
+
@description("Block tool execution when a data exfiltration pattern is detected — the agent reads sensitive local data (files, configs, source code) followed by a network operation sending data externally. This is the hallmark of autonomous agent compromise (GlassWorm, EchoLeak).")
|
|
1336
|
+
@severity("critical")
|
|
1337
|
+
@tags("data-exfiltration,behavioral,owasp-asi02,mitre-t1041,nist-si-4")
|
|
1338
|
+
@reject_message("Tool execution blocked: data exfiltration pattern detected. The agent appears to be reading sensitive data and sending it to an external endpoint. This is a critical security event — the agent session has been terminated.")
|
|
1339
|
+
forbid (
|
|
1340
|
+
principal,
|
|
1341
|
+
action == Overwatch::Action::"call_tool",
|
|
1342
|
+
resource
|
|
1343
|
+
)
|
|
1344
|
+
when {
|
|
1345
|
+
context has suspicious_pattern && context.suspicious_pattern &&
|
|
1346
|
+
context has pattern_type && context.pattern_type == "data_exfiltration"
|
|
1347
|
+
};
|
|
1348
|
+
|
|
1349
|
+
// Block secret exfiltration (credential-specific exfil)
|
|
1350
|
+
@id("behavioral-block-secret-exfil")
|
|
1351
|
+
@name("Block secret exfiltration")
|
|
1352
|
+
@description("Block tool execution when a secret exfiltration pattern is detected — the agent accesses credential files (.env, .aws, tokens) followed by a network tool call. This is a targeted credential theft sequence.")
|
|
1353
|
+
@severity("critical")
|
|
1354
|
+
@tags("secret-exfiltration,behavioral,owasp-asi02,mitre-t1552,mitre-t1041")
|
|
1355
|
+
@reject_message("Tool execution blocked: secret exfiltration pattern detected. The agent accessed credential files and is attempting to send them externally. This is a targeted credential theft attack.")
|
|
1356
|
+
forbid (
|
|
1357
|
+
principal,
|
|
1358
|
+
action == Overwatch::Action::"call_tool",
|
|
1359
|
+
resource
|
|
1360
|
+
)
|
|
1361
|
+
when {
|
|
1362
|
+
context has suspicious_pattern && context.suspicious_pattern &&
|
|
1363
|
+
context has pattern_type && context.pattern_type == "secret_exfiltration"
|
|
1364
|
+
};
|
|
1365
|
+
|
|
1366
|
+
// Block credential theft chains
|
|
1367
|
+
@id("behavioral-block-credential-theft")
|
|
1368
|
+
@name("Block credential theft")
|
|
1369
|
+
@description("Block tool execution when a credential theft chain is detected — accessing SSH keys, cloud credentials, or API tokens followed by encoding, compression, or transfer operations. Multi-step attack pattern for autonomous credential harvesting.")
|
|
1370
|
+
@severity("critical")
|
|
1371
|
+
@tags("credential-theft,behavioral,owasp-asi02,mitre-t1552,mitre-t1555")
|
|
1372
|
+
@reject_message("Tool execution blocked: credential theft chain detected. The agent is performing a multi-step operation to harvest and exfiltrate credentials (SSH keys, cloud tokens, API keys). Session terminated.")
|
|
1373
|
+
forbid (
|
|
1374
|
+
principal,
|
|
1375
|
+
action == Overwatch::Action::"call_tool",
|
|
1376
|
+
resource
|
|
1377
|
+
)
|
|
1378
|
+
when {
|
|
1379
|
+
context has suspicious_pattern && context.suspicious_pattern &&
|
|
1380
|
+
context has pattern_type && context.pattern_type == "credential_theft"
|
|
1381
|
+
};
|
|
1382
|
+
|
|
1383
|
+
// ---------------------------------------------------------------------------
|
|
1384
|
+
// Section 3: Destructive Sequence Detection
|
|
1385
|
+
// Detects sequences of destructive operations that could damage the workspace.
|
|
1386
|
+
// ---------------------------------------------------------------------------
|
|
1387
|
+
|
|
1388
|
+
// Block destructive operation sequences
|
|
1389
|
+
@id("behavioral-block-destructive-sequence")
|
|
1390
|
+
@name("Block destructive sequences")
|
|
1391
|
+
@description("Block tool execution when a destructive operation sequence is detected — bulk file deletions, permission changes, config overwrites, or repository manipulation patterns. Prevents agent-initiated workspace damage.")
|
|
1392
|
+
@severity("critical")
|
|
1393
|
+
@tags("destructive,behavioral,owasp-asi02,nist-si-4")
|
|
1394
|
+
@reject_message("Tool execution blocked: destructive operation sequence detected. The agent is performing a pattern of destructive operations (bulk deletions, permission changes, config overwrites) that could damage the workspace.")
|
|
1395
|
+
forbid (
|
|
1396
|
+
principal,
|
|
1397
|
+
action == Overwatch::Action::"call_tool",
|
|
1398
|
+
resource
|
|
1399
|
+
)
|
|
1400
|
+
when {
|
|
1401
|
+
context has suspicious_pattern && context.suspicious_pattern &&
|
|
1402
|
+
context has pattern_type && context.pattern_type == "destructive_sequence"
|
|
1403
|
+
};
|
|
1404
|
+
|
|
1405
|
+
// ---------------------------------------------------------------------------
|
|
1406
|
+
// Section 4: Sequence Risk Scoring
|
|
1407
|
+
// Uses computed sequence risk scores for dynamic behavioral assessment.
|
|
1408
|
+
// ---------------------------------------------------------------------------
|
|
1409
|
+
|
|
1410
|
+
// Block high-risk behavioral sequences
|
|
1411
|
+
@id("behavioral-block-high-risk-sequence")
|
|
1412
|
+
@name("Block high-risk behavioral sequences")
|
|
1413
|
+
@description("Block tool execution when the computed sequence risk score exceeds 80/100. The score aggregates behavioral signals including action history, tool combination analysis, and deviation from normal patterns. High scores indicate coordinated multi-step attacks.")
|
|
1414
|
+
@severity("high")
|
|
1415
|
+
@tags("sequence-risk,behavioral,dynamic,owasp-asi08,nist-au-6")
|
|
1416
|
+
@reject_message("Tool execution blocked: high-risk behavioral sequence detected (risk score 80+). The pattern of agent actions indicates a coordinated attack. This session requires human review before continuing.")
|
|
1417
|
+
forbid (
|
|
1418
|
+
principal,
|
|
1419
|
+
action == Overwatch::Action::"call_tool",
|
|
1420
|
+
resource
|
|
1421
|
+
)
|
|
1422
|
+
when {
|
|
1423
|
+
context has sequence_risk && context.sequence_risk >= 80
|
|
1424
|
+
};
|
|
741
1425
|
`;
|
|
742
1426
|
const OVERWATCH_TOOLS_MCP_ALLOWLIST_CEDAR = `// MCP Server Allowlist Template
|
|
743
1427
|
// Only allow specific MCP servers to be used
|
|
@@ -883,40 +1567,47 @@ permit (
|
|
|
883
1567
|
resource
|
|
884
1568
|
);
|
|
885
1569
|
`;
|
|
886
|
-
const OVERWATCH_ORG_TEAM_PERMISSIONS_CEDAR = `//
|
|
887
|
-
//
|
|
1570
|
+
const OVERWATCH_ORG_TEAM_PERMISSIONS_CEDAR = `// =============================================================================
|
|
1571
|
+
// Project-Based Permissions (ReBAC)
|
|
1572
|
+
// =============================================================================
|
|
1573
|
+
// Grant IDE access based on project scope using entity hierarchy.
|
|
1574
|
+
// With the aligned schema, principals (User, Agent) are flat — scoping is
|
|
1575
|
+
// done via resource hierarchy instead of principal hierarchy.
|
|
1576
|
+
//
|
|
888
1577
|
// Category: organization
|
|
889
1578
|
// Namespace: Overwatch
|
|
890
1579
|
//
|
|
891
1580
|
// Entity hierarchy required:
|
|
892
|
-
//
|
|
893
|
-
// └──
|
|
894
|
-
//
|
|
895
|
-
//
|
|
896
|
-
//
|
|
897
|
-
|
|
898
|
-
//
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
@
|
|
1581
|
+
// Account::"acme-corp"
|
|
1582
|
+
// └── Project::"dev-project" (in Account)
|
|
1583
|
+
// └── Project::"support-project" (in Account)
|
|
1584
|
+
//
|
|
1585
|
+
// Resources (Tool, Server, FilePath, LlmPrompt) are parented under Project,
|
|
1586
|
+
// so \`resource in Project::"..."\` matches all resources in that project.
|
|
1587
|
+
// =============================================================================
|
|
1588
|
+
|
|
1589
|
+
// Dev Project: Full IDE access - all actions permitted on all resources
|
|
1590
|
+
@id("project-dev-full-access")
|
|
1591
|
+
@name("Dev project full IDE access")
|
|
1592
|
+
@description("Grant full IDE access to all resources within the dev project including tools, prompts, file operations, and server connections")
|
|
902
1593
|
@severity("medium")
|
|
903
|
-
@tags("rebac,
|
|
1594
|
+
@tags("rebac,project,dev,permissions,organization")
|
|
904
1595
|
permit (
|
|
905
|
-
principal
|
|
1596
|
+
principal,
|
|
906
1597
|
action,
|
|
907
|
-
resource
|
|
1598
|
+
resource in Overwatch::Project::"dev-project"
|
|
908
1599
|
);
|
|
909
1600
|
|
|
910
|
-
// Support
|
|
911
|
-
@id("
|
|
912
|
-
@name("Support
|
|
913
|
-
@description("Grant
|
|
1601
|
+
// Support Project: Read-only access - process prompts and read files only
|
|
1602
|
+
@id("project-support-read-only")
|
|
1603
|
+
@name("Support project read-only access")
|
|
1604
|
+
@description("Grant read-only access to support project resources limited to prompt processing and file reading")
|
|
914
1605
|
@severity("medium")
|
|
915
|
-
@tags("rebac,
|
|
1606
|
+
@tags("rebac,project,support,read-only,organization")
|
|
916
1607
|
permit (
|
|
917
|
-
principal
|
|
1608
|
+
principal,
|
|
918
1609
|
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"read_file"],
|
|
919
|
-
resource
|
|
1610
|
+
resource in Overwatch::Project::"support-project"
|
|
920
1611
|
);
|
|
921
1612
|
`;
|
|
922
1613
|
const OVERWATCH_ORG_AGENT_GUARDRAILS_CEDAR = `// Agent-Specific Guardrails
|
|
@@ -940,7 +1631,7 @@ forbid (
|
|
|
940
1631
|
resource
|
|
941
1632
|
)
|
|
942
1633
|
when {
|
|
943
|
-
context.
|
|
1634
|
+
context.detected_threats.contains("prompt_injection")
|
|
944
1635
|
};
|
|
945
1636
|
|
|
946
1637
|
// Cursor: Block PII leakage
|
|
@@ -968,7 +1659,9 @@ export const OVERWATCH_CATEGORIES = [
|
|
|
968
1659
|
{ id: 'tools', name: 'Tool Permissioning', description: 'Control access to shell execution, file operations, MCP servers, and sensitive system paths' },
|
|
969
1660
|
{ id: 'organization', name: 'Organization Rules', description: 'Apply organization-wide policy baselines, team permissions, and agent-specific guardrails' },
|
|
970
1661
|
{ id: 'trust_safety', name: 'Content Safety', description: 'Detect and control violent, harmful, hateful, sexual, and profane content using trust/safety classification scores' },
|
|
971
|
-
{ id: 'agent_security', name: 'Agent Security', description: 'Detect tool poisoning, rug pull attacks,
|
|
1662
|
+
{ id: 'agent_security', name: 'Agent Security', description: 'Detect and block tool poisoning, rug pull attacks, indirect prompt injection, and MCP supply chain threats targeting AI agents' },
|
|
1663
|
+
{ id: 'encoding', name: 'Encoding & Unicode Attacks', description: 'Detect invisible Unicode characters, bidirectional text overrides, and encoded injection payloads used to hide malicious instructions' },
|
|
1664
|
+
{ id: 'behavioral', name: 'Behavioral Analysis', description: 'Detect suspicious action sequences, tool call loops, data exfiltration patterns, and credential theft chains across agent sessions' },
|
|
972
1665
|
];
|
|
973
1666
|
// =============================================================================
|
|
974
1667
|
// DEFAULT POLICIES
|
|
@@ -987,7 +1680,7 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
987
1680
|
{
|
|
988
1681
|
id: 'secrets-default',
|
|
989
1682
|
name: 'Secrets Detection',
|
|
990
|
-
description: 'Detect and block credential leakage across prompts, tool calls, file operations, and AI
|
|
1683
|
+
description: 'Detect and block credential leakage across prompts, tool calls, file operations, and AI responses using multi-layered detection',
|
|
991
1684
|
category: 'secrets',
|
|
992
1685
|
cedarText: OVERWATCH_SECRETS_DEFAULT_CEDAR,
|
|
993
1686
|
severity: 'critical',
|
|
@@ -997,37 +1690,37 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
997
1690
|
{
|
|
998
1691
|
id: 'pii-default',
|
|
999
1692
|
name: 'PII Detection',
|
|
1000
|
-
description: 'Detect and block credit card numbers,
|
|
1693
|
+
description: 'Detect and block credit card numbers, SSNs, health data, and other PII in prompts, tool calls, and file operations',
|
|
1001
1694
|
category: 'pii',
|
|
1002
1695
|
cedarText: OVERWATCH_PII_DEFAULT_CEDAR,
|
|
1003
1696
|
severity: 'critical',
|
|
1004
|
-
tags: ['pii', 'privacy', 'compliance', 'pci-dss', 'gdpr', 'baseline'],
|
|
1697
|
+
tags: ['pii', 'privacy', 'compliance', 'pci-dss', 'gdpr', 'hipaa', 'baseline'],
|
|
1005
1698
|
isActive: true,
|
|
1006
1699
|
},
|
|
1007
1700
|
{
|
|
1008
1701
|
id: 'semantic-default',
|
|
1009
1702
|
name: 'Semantic Threat Detection',
|
|
1010
|
-
description: 'Detect and block prompt injection, jailbreak attempts, and high-severity
|
|
1703
|
+
description: 'Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers',
|
|
1011
1704
|
category: 'semantic',
|
|
1012
1705
|
cedarText: OVERWATCH_SEMANTIC_DEFAULT_CEDAR,
|
|
1013
1706
|
severity: 'critical',
|
|
1014
|
-
tags: ['prompt-injection', 'jailbreak', 'owasp-llm01', 'security', 'baseline'],
|
|
1707
|
+
tags: ['prompt-injection', 'jailbreak', 'owasp-llm01', 'owasp-llm02', 'security', 'baseline'],
|
|
1015
1708
|
isActive: true,
|
|
1016
1709
|
},
|
|
1017
1710
|
{
|
|
1018
1711
|
id: 'tools-default',
|
|
1019
1712
|
name: 'Tool Permissioning',
|
|
1020
|
-
description: 'Block dangerous shell execution, restrict sensitive file paths, and
|
|
1713
|
+
description: 'Block dangerous shell execution, restrict sensitive file paths, enforce tool risk scoring, and detect command injection in tool arguments',
|
|
1021
1714
|
category: 'tools',
|
|
1022
1715
|
cedarText: OVERWATCH_TOOLS_DEFAULT_CEDAR,
|
|
1023
1716
|
severity: 'critical',
|
|
1024
|
-
tags: ['shell', 'command-injection', 'file-access', 'mitre-t1059', 'baseline'],
|
|
1025
|
-
isActive:
|
|
1717
|
+
tags: ['shell', 'command-injection', 'file-access', 'tool-risk', 'mitre-t1059', 'owasp-llm06', 'baseline'],
|
|
1718
|
+
isActive: true,
|
|
1026
1719
|
},
|
|
1027
1720
|
{
|
|
1028
1721
|
id: 'trust-safety-default',
|
|
1029
1722
|
name: 'Content Safety',
|
|
1030
|
-
description: 'Detect and block violent, harmful, hateful, sexual, and profane content using classification scores',
|
|
1723
|
+
description: 'Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores',
|
|
1031
1724
|
category: 'trust_safety',
|
|
1032
1725
|
cedarText: OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR,
|
|
1033
1726
|
severity: 'critical',
|
|
@@ -1037,11 +1730,31 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
1037
1730
|
{
|
|
1038
1731
|
id: 'agent-security-default',
|
|
1039
1732
|
name: 'Agent Security',
|
|
1040
|
-
description: 'Detect and block tool poisoning, rug pull attacks,
|
|
1733
|
+
description: 'Detect and block tool poisoning, rug pull attacks, indirect prompt injection, and MCP supply chain threats via Shield detection',
|
|
1041
1734
|
category: 'agent_security',
|
|
1042
1735
|
cedarText: OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR,
|
|
1043
1736
|
severity: 'critical',
|
|
1044
|
-
tags: ['tool-poisoning', 'rug-pull', 'indirect-injection', 'mcp-security', 'agent-security', 'baseline'],
|
|
1737
|
+
tags: ['tool-poisoning', 'rug-pull', 'indirect-injection', 'mcp-security', 'agent-security', 'owasp-asi01', 'owasp-asi04', 'baseline'],
|
|
1738
|
+
isActive: true,
|
|
1739
|
+
},
|
|
1740
|
+
{
|
|
1741
|
+
id: 'encoding-default',
|
|
1742
|
+
name: 'Encoding Attack Detection',
|
|
1743
|
+
description: 'Detect and block invisible Unicode characters, bidirectional overrides, and encoding-based injection attacks across prompts, tools, and files',
|
|
1744
|
+
category: 'encoding',
|
|
1745
|
+
cedarText: OVERWATCH_ENCODING_DEFAULT_CEDAR,
|
|
1746
|
+
severity: 'high',
|
|
1747
|
+
tags: ['unicode', 'invisible-chars', 'bidi-override', 'encoding', 'owasp-llm01', 'baseline'],
|
|
1748
|
+
isActive: true,
|
|
1749
|
+
},
|
|
1750
|
+
{
|
|
1751
|
+
id: 'behavioral-default',
|
|
1752
|
+
name: 'Behavioral Analysis',
|
|
1753
|
+
description: 'Detect and block tool call loops, data exfiltration sequences, credential theft chains, and destructive operation patterns',
|
|
1754
|
+
category: 'behavioral',
|
|
1755
|
+
cedarText: OVERWATCH_BEHAVIORAL_DEFAULT_CEDAR,
|
|
1756
|
+
severity: 'high',
|
|
1757
|
+
tags: ['loop-detection', 'data-exfiltration', 'credential-theft', 'behavioral', 'owasp-llm10', 'owasp-asi02', 'baseline'],
|
|
1045
1758
|
isActive: true,
|
|
1046
1759
|
},
|
|
1047
1760
|
];
|
|
@@ -1111,7 +1824,7 @@ export const OVERWATCH_TEMPLATES = [
|
|
|
1111
1824
|
export const OVERWATCH_TEMPLATES_JSON = `{
|
|
1112
1825
|
"service": "overwatch",
|
|
1113
1826
|
"version": "3.0.0",
|
|
1114
|
-
"description": "Overwatch policy templates for IDE security",
|
|
1827
|
+
"description": "Overwatch policy templates for IDE agent security",
|
|
1115
1828
|
"categories": [
|
|
1116
1829
|
{
|
|
1117
1830
|
"id": "secrets",
|
|
@@ -1146,7 +1859,17 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1146
1859
|
{
|
|
1147
1860
|
"id": "agent_security",
|
|
1148
1861
|
"name": "Agent Security",
|
|
1149
|
-
"description": "Detect tool poisoning, rug pull attacks,
|
|
1862
|
+
"description": "Detect and block tool poisoning, rug pull attacks, indirect prompt injection, and MCP supply chain threats targeting AI agents"
|
|
1863
|
+
},
|
|
1864
|
+
{
|
|
1865
|
+
"id": "encoding",
|
|
1866
|
+
"name": "Encoding & Unicode Attacks",
|
|
1867
|
+
"description": "Detect invisible Unicode characters, bidirectional text overrides, and encoded injection payloads used to hide malicious instructions"
|
|
1868
|
+
},
|
|
1869
|
+
{
|
|
1870
|
+
"id": "behavioral",
|
|
1871
|
+
"name": "Behavioral Analysis",
|
|
1872
|
+
"description": "Detect suspicious action sequences, tool call loops, data exfiltration patterns, and credential theft chains across agent sessions"
|
|
1150
1873
|
}
|
|
1151
1874
|
],
|
|
1152
1875
|
"defaults": [
|
|
@@ -1163,7 +1886,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1163
1886
|
{
|
|
1164
1887
|
"id": "secrets-default",
|
|
1165
1888
|
"name": "Secrets Detection",
|
|
1166
|
-
"description": "Detect and block credential leakage across prompts, tool calls, file operations, and AI
|
|
1889
|
+
"description": "Detect and block credential leakage across prompts, tool calls, file operations, and AI responses using multi-layered detection",
|
|
1167
1890
|
"category": "secrets",
|
|
1168
1891
|
"file": "defaults/secrets.cedar",
|
|
1169
1892
|
"severity": "critical",
|
|
@@ -1173,37 +1896,37 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1173
1896
|
{
|
|
1174
1897
|
"id": "pii-default",
|
|
1175
1898
|
"name": "PII Detection",
|
|
1176
|
-
"description": "Detect and block credit card numbers,
|
|
1899
|
+
"description": "Detect and block credit card numbers, SSNs, health data, and other PII in prompts, tool calls, and file operations",
|
|
1177
1900
|
"category": "pii",
|
|
1178
1901
|
"file": "defaults/pii.cedar",
|
|
1179
1902
|
"severity": "critical",
|
|
1180
|
-
"tags": ["pii", "privacy", "compliance", "pci-dss", "gdpr", "baseline"],
|
|
1903
|
+
"tags": ["pii", "privacy", "compliance", "pci-dss", "gdpr", "hipaa", "baseline"],
|
|
1181
1904
|
"is_active": true
|
|
1182
1905
|
},
|
|
1183
1906
|
{
|
|
1184
1907
|
"id": "semantic-default",
|
|
1185
1908
|
"name": "Semantic Threat Detection",
|
|
1186
|
-
"description": "Detect and block prompt injection, jailbreak attempts, and high-severity
|
|
1909
|
+
"description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers",
|
|
1187
1910
|
"category": "semantic",
|
|
1188
1911
|
"file": "defaults/semantic.cedar",
|
|
1189
1912
|
"severity": "critical",
|
|
1190
|
-
"tags": ["prompt-injection", "jailbreak", "owasp-llm01", "security", "baseline"],
|
|
1913
|
+
"tags": ["prompt-injection", "jailbreak", "owasp-llm01", "owasp-llm02", "security", "baseline"],
|
|
1191
1914
|
"is_active": true
|
|
1192
1915
|
},
|
|
1193
1916
|
{
|
|
1194
1917
|
"id": "tools-default",
|
|
1195
1918
|
"name": "Tool Permissioning",
|
|
1196
|
-
"description": "Block dangerous shell execution, restrict sensitive file paths, and
|
|
1919
|
+
"description": "Block dangerous shell execution, restrict sensitive file paths, enforce tool risk scoring, and detect command injection in tool arguments",
|
|
1197
1920
|
"category": "tools",
|
|
1198
1921
|
"file": "defaults/tools.cedar",
|
|
1199
1922
|
"severity": "critical",
|
|
1200
|
-
"tags": ["shell", "command-injection", "file-access", "mitre-t1059", "baseline"],
|
|
1201
|
-
"is_active":
|
|
1923
|
+
"tags": ["shell", "command-injection", "file-access", "tool-risk", "mitre-t1059", "owasp-llm06", "baseline"],
|
|
1924
|
+
"is_active": true
|
|
1202
1925
|
},
|
|
1203
1926
|
{
|
|
1204
1927
|
"id": "trust-safety-default",
|
|
1205
1928
|
"name": "Content Safety",
|
|
1206
|
-
"description": "Detect and block violent, harmful, hateful, sexual, and profane content using classification scores",
|
|
1929
|
+
"description": "Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores",
|
|
1207
1930
|
"category": "trust_safety",
|
|
1208
1931
|
"file": "defaults/trust_safety.cedar",
|
|
1209
1932
|
"severity": "critical",
|
|
@@ -1213,11 +1936,31 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1213
1936
|
{
|
|
1214
1937
|
"id": "agent-security-default",
|
|
1215
1938
|
"name": "Agent Security",
|
|
1216
|
-
"description": "Detect and block tool poisoning, rug pull attacks,
|
|
1939
|
+
"description": "Detect and block tool poisoning, rug pull attacks, indirect prompt injection, and MCP supply chain threats via Shield detection",
|
|
1217
1940
|
"category": "agent_security",
|
|
1218
1941
|
"file": "defaults/agent_security.cedar",
|
|
1219
1942
|
"severity": "critical",
|
|
1220
|
-
"tags": ["tool-poisoning", "rug-pull", "indirect-injection", "mcp-security", "agent-security", "baseline"],
|
|
1943
|
+
"tags": ["tool-poisoning", "rug-pull", "indirect-injection", "mcp-security", "agent-security", "owasp-asi01", "owasp-asi04", "baseline"],
|
|
1944
|
+
"is_active": true
|
|
1945
|
+
},
|
|
1946
|
+
{
|
|
1947
|
+
"id": "encoding-default",
|
|
1948
|
+
"name": "Encoding Attack Detection",
|
|
1949
|
+
"description": "Detect and block invisible Unicode characters, bidirectional overrides, and encoding-based injection attacks across prompts, tools, and files",
|
|
1950
|
+
"category": "encoding",
|
|
1951
|
+
"file": "defaults/encoding_attacks.cedar",
|
|
1952
|
+
"severity": "high",
|
|
1953
|
+
"tags": ["unicode", "invisible-chars", "bidi-override", "encoding", "owasp-llm01", "baseline"],
|
|
1954
|
+
"is_active": true
|
|
1955
|
+
},
|
|
1956
|
+
{
|
|
1957
|
+
"id": "behavioral-default",
|
|
1958
|
+
"name": "Behavioral Analysis",
|
|
1959
|
+
"description": "Detect and block tool call loops, data exfiltration sequences, credential theft chains, and destructive operation patterns",
|
|
1960
|
+
"category": "behavioral",
|
|
1961
|
+
"file": "defaults/behavioral.cedar",
|
|
1962
|
+
"severity": "high",
|
|
1963
|
+
"tags": ["loop-detection", "data-exfiltration", "credential-theft", "behavioral", "owasp-llm10", "owasp-asi02", "baseline"],
|
|
1221
1964
|
"is_active": true
|
|
1222
1965
|
}
|
|
1223
1966
|
],
|