@highflame/policy 2.1.2 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/overwatch-defaults.gen.js +358 -370
- package/package.json +1 -1
|
@@ -32,6 +32,307 @@ permit (
|
|
|
32
32
|
resource
|
|
33
33
|
);
|
|
34
34
|
`;
|
|
35
|
+
const OVERWATCH_SEMANTIC_DEFAULT_CEDAR = `// =============================================================================
|
|
36
|
+
// Semantic Threat Detection Policy (Default)
|
|
37
|
+
// =============================================================================
|
|
38
|
+
// Detects and blocks prompt injection, jailbreak attempts, and high-severity
|
|
39
|
+
// AI security threats. Uses multi-layered detection:
|
|
40
|
+
//
|
|
41
|
+
// 1. Detection engine rule triggers (detected_threats) — pattern-based
|
|
42
|
+
// 2. ML classifier confidence scores (injection_confidence, jailbreak_confidence)
|
|
43
|
+
// 3. Threat severity aggregation (max_threat_severity, highest_severity)
|
|
44
|
+
// 4. Cross-action enforcement (prompts + tool calls + file operations)
|
|
45
|
+
//
|
|
46
|
+
// Compliance:
|
|
47
|
+
// OWASP LLM01 (Prompt Injection) — direct + indirect
|
|
48
|
+
// OWASP LLM02 (Insecure Output Handling) — response manipulation
|
|
49
|
+
// OWASP ASI01 (Agent Goal Hijack) — behavioral manipulation
|
|
50
|
+
// MITRE ATLAS AML.T0051 (LLM Prompt Injection)
|
|
51
|
+
// MITRE ATLAS AML.T0054 (LLM Jailbreak)
|
|
52
|
+
// NIST 800-53 SI-3 (Malicious Code Protection)
|
|
53
|
+
// NIST 800-53 SI-4 (Information System Monitoring)
|
|
54
|
+
//
|
|
55
|
+
// Category: semantic
|
|
56
|
+
// Namespace: Overwatch
|
|
57
|
+
// =============================================================================
|
|
58
|
+
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Section 1: Prompt Injection Detection
|
|
61
|
+
// Blocks direct prompt injection — adversarial input designed to override
|
|
62
|
+
// system instructions and hijack agent behavior.
|
|
63
|
+
// Ref: OWASP LLM01, MITRE AML.T0051, 62% of LLM apps vulnerable (2024)
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
// Block content with prompt injection patterns detected by rules
|
|
67
|
+
@id("semantic-block-injection")
|
|
68
|
+
@name("Block prompt injection")
|
|
69
|
+
@description("Block prompts and tool calls when detection engine rules identify prompt injection patterns. Catches instruction override, role assumption, delimiter injection, and other manipulation techniques in both user input and tool arguments (OWASP LLM01).")
|
|
70
|
+
@severity("critical")
|
|
71
|
+
@tags("injection,security,owasp-llm01,mitre-aml-t0051,baseline")
|
|
72
|
+
@reject_message("Content was blocked because prompt injection patterns were detected. This is a security measure to prevent manipulation of AI agent behavior (OWASP LLM01).")
|
|
73
|
+
forbid (
|
|
74
|
+
principal,
|
|
75
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
76
|
+
resource
|
|
77
|
+
)
|
|
78
|
+
when {
|
|
79
|
+
context has detected_threats && context.detected_threats.contains("prompt_injection")
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
// Block content with high ML injection confidence
|
|
83
|
+
@id("semantic-block-injection-score")
|
|
84
|
+
@name("Block high-confidence injection")
|
|
85
|
+
@description("Block content when the ML injection classifier confidence exceeds threshold (75/100). Catches novel injection techniques that evade pattern-based detection — polymorphic payloads, encoding tricks, and obfuscated instructions.")
|
|
86
|
+
@severity("critical")
|
|
87
|
+
@tags("injection,ml-classifier,security,owasp-llm01,mitre-aml-t0051")
|
|
88
|
+
@reject_message("Your content was blocked because the ML classifier detected prompt injection with high confidence. This appears to be an attempt to manipulate agent behavior.")
|
|
89
|
+
forbid (
|
|
90
|
+
principal,
|
|
91
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
92
|
+
resource
|
|
93
|
+
)
|
|
94
|
+
when {
|
|
95
|
+
context has injection_confidence && context.injection_confidence >= 75
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
// Section 2: Jailbreak Detection
|
|
100
|
+
// Blocks jailbreak attempts — adversarial input designed to bypass AI safety
|
|
101
|
+
// guardrails and elicit restricted outputs.
|
|
102
|
+
// Ref: OWASP LLM02, MITRE AML.T0054, DAN/JailbreakChat/etc.
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
// Block prompts with jailbreak attempts detected by rules
|
|
106
|
+
@id("semantic-block-jailbreak")
|
|
107
|
+
@name("Block jailbreak attempts")
|
|
108
|
+
@description("Block prompts when detection engine rules identify jailbreak patterns: DAN-style prompts, role-play exploits, safety bypass instructions, and constraint removal attempts (OWASP LLM02).")
|
|
109
|
+
@severity("critical")
|
|
110
|
+
@tags("jailbreak,bypass,security,owasp-llm02,mitre-aml-t0054,baseline")
|
|
111
|
+
@reject_message("Your prompt was blocked because jailbreak patterns were detected. This is a security measure to prevent circumvention of AI safety controls (OWASP LLM02).")
|
|
112
|
+
forbid (
|
|
113
|
+
principal,
|
|
114
|
+
action == Overwatch::Action::"process_prompt",
|
|
115
|
+
resource
|
|
116
|
+
)
|
|
117
|
+
when {
|
|
118
|
+
context has detected_threats && context.detected_threats.contains("jailbreak")
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
// Block prompts with high ML jailbreak confidence
|
|
122
|
+
@id("semantic-block-jailbreak-score")
|
|
123
|
+
@name("Block high-confidence jailbreak")
|
|
124
|
+
@description("Block content when the ML jailbreak classifier confidence exceeds threshold (75/100). Catches sophisticated jailbreak techniques including multi-turn manipulation, encoded payloads, and novel prompt structures.")
|
|
125
|
+
@severity("critical")
|
|
126
|
+
@tags("jailbreak,ml-classifier,security,owasp-llm02,mitre-aml-t0054")
|
|
127
|
+
@reject_message("Your content was blocked because the ML classifier detected a jailbreak attempt with high confidence. This appears to be an attempt to bypass safety guardrails.")
|
|
128
|
+
forbid (
|
|
129
|
+
principal,
|
|
130
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
131
|
+
resource
|
|
132
|
+
)
|
|
133
|
+
when {
|
|
134
|
+
context has jailbreak_confidence && context.jailbreak_confidence >= 75
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
// Section 3: Threat Severity Aggregation
|
|
139
|
+
// Blocks based on aggregated threat severity from all detection engines.
|
|
140
|
+
// These act as catch-all rules for threats that don't match specific patterns.
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
// Block any content with critical severity threats
|
|
144
|
+
@id("semantic-block-critical")
|
|
145
|
+
@name("Block critical threats")
|
|
146
|
+
@description("Block all content when any detection engine reports critical severity. This is the ultimate catch-all — critical threats are blocked regardless of type or source.")
|
|
147
|
+
@severity("critical")
|
|
148
|
+
@tags("critical,baseline,security,catch-all")
|
|
149
|
+
@reject_message("Your content was blocked because security scanners detected a critical-severity threat. This content cannot be processed.")
|
|
150
|
+
forbid (
|
|
151
|
+
principal,
|
|
152
|
+
action,
|
|
153
|
+
resource
|
|
154
|
+
)
|
|
155
|
+
when {
|
|
156
|
+
context has highest_severity && context.highest_severity == "critical"
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
// Block prompts with high severity semantic threats
|
|
160
|
+
@id("semantic-block-high-severity")
|
|
161
|
+
@name("Block high severity threats")
|
|
162
|
+
@description("Block prompts when threat detection reports high severity (>= 3) in semantic categories. Catches threats that are individually below critical but collectively indicate adversarial intent.")
|
|
163
|
+
@severity("high")
|
|
164
|
+
@tags("semantic,severity,security,defense-in-depth")
|
|
165
|
+
@reject_message("Your prompt was blocked because security scanners detected high severity issues in the content. Review your prompt for manipulative or adversarial patterns.")
|
|
166
|
+
forbid (
|
|
167
|
+
principal,
|
|
168
|
+
action == Overwatch::Action::"process_prompt",
|
|
169
|
+
resource
|
|
170
|
+
)
|
|
171
|
+
when {
|
|
172
|
+
context has threat_categories && context has max_threat_severity &&
|
|
173
|
+
context.threat_categories.contains("semantic") &&
|
|
174
|
+
context.max_threat_severity >= 3
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
// Block tool calls with multiple concurrent threats
|
|
178
|
+
@id("semantic-block-multi-threat-tools")
|
|
179
|
+
@name("Block multi-threat tool calls")
|
|
180
|
+
@description("Block tool execution when multiple distinct threats are detected simultaneously (3+). Multiple concurrent threats in a tool call strongly indicate an adversarial attack chain.")
|
|
181
|
+
@severity("high")
|
|
182
|
+
@tags("multi-threat,tools,security,defense-in-depth")
|
|
183
|
+
@reject_message("Tool execution was blocked because multiple security threats were detected simultaneously. This pattern indicates a potential attack chain.")
|
|
184
|
+
forbid (
|
|
185
|
+
principal,
|
|
186
|
+
action == Overwatch::Action::"call_tool",
|
|
187
|
+
resource
|
|
188
|
+
)
|
|
189
|
+
when {
|
|
190
|
+
context has threat_count && context.threat_count >= 3
|
|
191
|
+
};
|
|
192
|
+
`;
|
|
193
|
+
const OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR = `// =============================================================================
|
|
194
|
+
// Content Safety Policy (Default)
|
|
195
|
+
// =============================================================================
|
|
196
|
+
// Detects and blocks violent, harmful, hateful, sexual, and profane content
|
|
197
|
+
// using trust/safety ML classification scores.
|
|
198
|
+
//
|
|
199
|
+
// The detection engine runs multiple ML classifiers (toxicity, content safety)
|
|
200
|
+
// and produces normalized scores (0-100) for each category. These policies
|
|
201
|
+
// enforce enterprise-appropriate content thresholds.
|
|
202
|
+
//
|
|
203
|
+
// Compliance:
|
|
204
|
+
// NIST 800-53 SI-4 (Information System Monitoring)
|
|
205
|
+
// EU AI Act Art. 52 (Transparency for AI Systems)
|
|
206
|
+
// OWASP LLM02 (Insecure Output Handling) — unsafe content generation
|
|
207
|
+
// ISO 42001 (AI Management System) — responsible AI use
|
|
208
|
+
//
|
|
209
|
+
// Category: trust_safety
|
|
210
|
+
// Namespace: Overwatch
|
|
211
|
+
// =============================================================================
|
|
212
|
+
|
|
213
|
+
// ---------------------------------------------------------------------------
|
|
214
|
+
// Section 1: Violence & Weapons
|
|
215
|
+
// Blocks content promoting, describing, or instructing violence and weapons.
|
|
216
|
+
// ---------------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
// Block content with high violence score
|
|
219
|
+
@id("ts-block-violence")
|
|
220
|
+
@name("Block violent content")
|
|
221
|
+
@description("Block content when the ML violence detection score exceeds threshold (80/100). Catches graphic violence descriptions, instructions for causing harm, and violent threat language.")
|
|
222
|
+
@severity("critical")
|
|
223
|
+
@tags("violence,content-safety,trust-safety,nist-si-4,iso-42001")
|
|
224
|
+
@reject_message("Content blocked: violent content detected. AI agents in enterprise environments must not process or generate violent content. Please rephrase without violence-related language.")
|
|
225
|
+
forbid (
|
|
226
|
+
principal,
|
|
227
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
228
|
+
resource
|
|
229
|
+
)
|
|
230
|
+
when {
|
|
231
|
+
context has violence_score && context.violence_score >= 80
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
// Block content with weapons references
|
|
235
|
+
@id("ts-block-weapons")
|
|
236
|
+
@name("Block weapons content")
|
|
237
|
+
@description("Block content when the ML weapons detection score exceeds threshold (80/100). Catches weapon manufacturing instructions, procurement guidance, and detailed weapon specifications.")
|
|
238
|
+
@severity("critical")
|
|
239
|
+
@tags("weapons,content-safety,trust-safety,nist-si-4")
|
|
240
|
+
@reject_message("Content blocked: weapons-related content detected. AI agents must not process weapons manufacturing, procurement, or detailed specification content.")
|
|
241
|
+
forbid (
|
|
242
|
+
principal,
|
|
243
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
244
|
+
resource
|
|
245
|
+
)
|
|
246
|
+
when {
|
|
247
|
+
context has weapons_score && context.weapons_score >= 80
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
// Section 2: Hate Speech & Discrimination
|
|
252
|
+
// Blocks hateful, discriminatory, and dehumanizing content.
|
|
253
|
+
// Lower threshold (75) — enterprises have zero tolerance for hate speech.
|
|
254
|
+
// ---------------------------------------------------------------------------
|
|
255
|
+
|
|
256
|
+
// Block hate speech content
|
|
257
|
+
@id("ts-block-hate-speech")
|
|
258
|
+
@name("Block hate speech")
|
|
259
|
+
@description("Block content when the ML hate speech detection score exceeds threshold (75/100). Lower threshold than other categories because enterprises have zero tolerance for discriminatory content. Catches slurs, dehumanizing language, and targeted harassment.")
|
|
260
|
+
@severity("critical")
|
|
261
|
+
@tags("hate-speech,content-safety,trust-safety,nist-si-4,eu-ai-act")
|
|
262
|
+
@reject_message("Content blocked: hate speech or discriminatory content detected. AI agents in enterprise environments must not process or generate hateful, discriminatory, or dehumanizing content.")
|
|
263
|
+
forbid (
|
|
264
|
+
principal,
|
|
265
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
266
|
+
resource
|
|
267
|
+
)
|
|
268
|
+
when {
|
|
269
|
+
context has hate_speech_score && context.hate_speech_score >= 75
|
|
270
|
+
};
|
|
271
|
+
|
|
272
|
+
// ---------------------------------------------------------------------------
|
|
273
|
+
// Section 3: Criminal Content
|
|
274
|
+
// Blocks content related to criminal activities and illegal operations.
|
|
275
|
+
// ---------------------------------------------------------------------------
|
|
276
|
+
|
|
277
|
+
// Block criminal content
|
|
278
|
+
@id("ts-block-crime")
|
|
279
|
+
@name("Block criminal content")
|
|
280
|
+
@description("Block content when the ML criminal activity detection score exceeds threshold (80/100). Catches illegal activity instructions, fraud techniques, and content promoting criminal behavior.")
|
|
281
|
+
@severity("high")
|
|
282
|
+
@tags("crime,content-safety,trust-safety,nist-si-4")
|
|
283
|
+
@reject_message("Content blocked: criminal activity content detected. AI agents must not process content related to illegal activities, fraud, or other criminal behavior.")
|
|
284
|
+
forbid (
|
|
285
|
+
principal,
|
|
286
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
287
|
+
resource
|
|
288
|
+
)
|
|
289
|
+
when {
|
|
290
|
+
context has crime_score && context.crime_score >= 80
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
// ---------------------------------------------------------------------------
|
|
294
|
+
// Section 4: Sexual Content
|
|
295
|
+
// Blocks sexually explicit or inappropriate content.
|
|
296
|
+
// ---------------------------------------------------------------------------
|
|
297
|
+
|
|
298
|
+
// Block sexual content
|
|
299
|
+
@id("ts-block-sexual")
|
|
300
|
+
@name("Block sexual content")
|
|
301
|
+
@description("Block content when the ML sexual content detection score exceeds threshold (80/100). Ensures enterprise AI agents do not process or generate sexually explicit material.")
|
|
302
|
+
@severity("high")
|
|
303
|
+
@tags("sexual,content-safety,trust-safety,eu-ai-act,iso-42001")
|
|
304
|
+
@reject_message("Content blocked: sexual content detected. AI agents in enterprise environments must not process sexually explicit material.")
|
|
305
|
+
forbid (
|
|
306
|
+
principal,
|
|
307
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
308
|
+
resource
|
|
309
|
+
)
|
|
310
|
+
when {
|
|
311
|
+
context has sexual_score && context.sexual_score >= 80
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
// ---------------------------------------------------------------------------
|
|
315
|
+
// Section 5: Profanity
|
|
316
|
+
// Blocks excessive profanity in enterprise environments.
|
|
317
|
+
// Higher threshold (90) — allows normal expression while blocking abuse.
|
|
318
|
+
// ---------------------------------------------------------------------------
|
|
319
|
+
|
|
320
|
+
// Block excessive profanity
|
|
321
|
+
@id("ts-block-profanity")
|
|
322
|
+
@name("Block profanity")
|
|
323
|
+
@description("Block content when the ML profanity detection score exceeds threshold (90/100). Higher threshold than other safety categories — allows normal expression while blocking abusive or harassing language patterns.")
|
|
324
|
+
@severity("medium")
|
|
325
|
+
@tags("profanity,content-safety,trust-safety")
|
|
326
|
+
@reject_message("Content blocked: excessive profanity detected. Please rephrase in a professional manner appropriate for enterprise AI interactions.")
|
|
327
|
+
forbid (
|
|
328
|
+
principal,
|
|
329
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
330
|
+
resource
|
|
331
|
+
)
|
|
332
|
+
when {
|
|
333
|
+
context has profanity_score && context.profanity_score >= 90
|
|
334
|
+
};
|
|
335
|
+
`;
|
|
35
336
|
const OVERWATCH_SECRETS_DEFAULT_CEDAR = `// =============================================================================
|
|
36
337
|
// Secrets Detection Policy (Default)
|
|
37
338
|
// =============================================================================
|
|
@@ -439,172 +740,14 @@ when {
|
|
|
439
740
|
@description("Block file reads and writes when PII is detected. Prevents agents from reading files containing personal data and from writing PII to new files where it could persist or be version-controlled.")
|
|
440
741
|
@severity("high")
|
|
441
742
|
@tags("pii,file-ops,data-protection,gdpr-art-32,nist-si-4")
|
|
442
|
-
@reject_message("File operation blocked: personally identifiable information was detected. Files containing PII must not be read or written through AI agents.")
|
|
443
|
-
forbid (
|
|
444
|
-
principal,
|
|
445
|
-
action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
|
|
446
|
-
resource
|
|
447
|
-
)
|
|
448
|
-
when {
|
|
449
|
-
context has pii_detected && context.pii_detected
|
|
450
|
-
};
|
|
451
|
-
`;
|
|
452
|
-
const OVERWATCH_SEMANTIC_DEFAULT_CEDAR = `// =============================================================================
|
|
453
|
-
// Semantic Threat Detection Policy (Default)
|
|
454
|
-
// =============================================================================
|
|
455
|
-
// Detects and blocks prompt injection, jailbreak attempts, and high-severity
|
|
456
|
-
// AI security threats. Uses multi-layered detection:
|
|
457
|
-
//
|
|
458
|
-
// 1. Detection engine rule triggers (detected_threats) — pattern-based
|
|
459
|
-
// 2. ML classifier confidence scores (injection_confidence, jailbreak_confidence)
|
|
460
|
-
// 3. Threat severity aggregation (max_threat_severity, highest_severity)
|
|
461
|
-
// 4. Cross-action enforcement (prompts + tool calls + file operations)
|
|
462
|
-
//
|
|
463
|
-
// Compliance:
|
|
464
|
-
// OWASP LLM01 (Prompt Injection) — direct + indirect
|
|
465
|
-
// OWASP LLM02 (Insecure Output Handling) — response manipulation
|
|
466
|
-
// OWASP ASI01 (Agent Goal Hijack) — behavioral manipulation
|
|
467
|
-
// MITRE ATLAS AML.T0051 (LLM Prompt Injection)
|
|
468
|
-
// MITRE ATLAS AML.T0054 (LLM Jailbreak)
|
|
469
|
-
// NIST 800-53 SI-3 (Malicious Code Protection)
|
|
470
|
-
// NIST 800-53 SI-4 (Information System Monitoring)
|
|
471
|
-
//
|
|
472
|
-
// Category: semantic
|
|
473
|
-
// Namespace: Overwatch
|
|
474
|
-
// =============================================================================
|
|
475
|
-
|
|
476
|
-
// ---------------------------------------------------------------------------
|
|
477
|
-
// Section 1: Prompt Injection Detection
|
|
478
|
-
// Blocks direct prompt injection — adversarial input designed to override
|
|
479
|
-
// system instructions and hijack agent behavior.
|
|
480
|
-
// Ref: OWASP LLM01, MITRE AML.T0051, 62% of LLM apps vulnerable (2024)
|
|
481
|
-
// ---------------------------------------------------------------------------
|
|
482
|
-
|
|
483
|
-
// Block content with prompt injection patterns detected by rules
|
|
484
|
-
@id("semantic-block-injection")
|
|
485
|
-
@name("Block prompt injection")
|
|
486
|
-
@description("Block prompts and tool calls when detection engine rules identify prompt injection patterns. Catches instruction override, role assumption, delimiter injection, and other manipulation techniques in both user input and tool arguments (OWASP LLM01).")
|
|
487
|
-
@severity("critical")
|
|
488
|
-
@tags("injection,security,owasp-llm01,mitre-aml-t0051,baseline")
|
|
489
|
-
@reject_message("Content was blocked because prompt injection patterns were detected. This is a security measure to prevent manipulation of AI agent behavior (OWASP LLM01).")
|
|
490
|
-
forbid (
|
|
491
|
-
principal,
|
|
492
|
-
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
493
|
-
resource
|
|
494
|
-
)
|
|
495
|
-
when {
|
|
496
|
-
context has detected_threats && context.detected_threats.contains("prompt_injection")
|
|
497
|
-
};
|
|
498
|
-
|
|
499
|
-
// Block content with high ML injection confidence
|
|
500
|
-
@id("semantic-block-injection-score")
|
|
501
|
-
@name("Block high-confidence injection")
|
|
502
|
-
@description("Block content when the ML injection classifier confidence exceeds threshold (75/100). Catches novel injection techniques that evade pattern-based detection — polymorphic payloads, encoding tricks, and obfuscated instructions.")
|
|
503
|
-
@severity("critical")
|
|
504
|
-
@tags("injection,ml-classifier,security,owasp-llm01,mitre-aml-t0051")
|
|
505
|
-
@reject_message("Your content was blocked because the ML classifier detected prompt injection with high confidence. This appears to be an attempt to manipulate agent behavior.")
|
|
506
|
-
forbid (
|
|
507
|
-
principal,
|
|
508
|
-
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
509
|
-
resource
|
|
510
|
-
)
|
|
511
|
-
when {
|
|
512
|
-
context has injection_confidence && context.injection_confidence >= 75
|
|
513
|
-
};
|
|
514
|
-
|
|
515
|
-
// ---------------------------------------------------------------------------
|
|
516
|
-
// Section 2: Jailbreak Detection
|
|
517
|
-
// Blocks jailbreak attempts — adversarial input designed to bypass AI safety
|
|
518
|
-
// guardrails and elicit restricted outputs.
|
|
519
|
-
// Ref: OWASP LLM02, MITRE AML.T0054, DAN/JailbreakChat/etc.
|
|
520
|
-
// ---------------------------------------------------------------------------
|
|
521
|
-
|
|
522
|
-
// Block prompts with jailbreak attempts detected by rules
|
|
523
|
-
@id("semantic-block-jailbreak")
|
|
524
|
-
@name("Block jailbreak attempts")
|
|
525
|
-
@description("Block prompts when detection engine rules identify jailbreak patterns: DAN-style prompts, role-play exploits, safety bypass instructions, and constraint removal attempts (OWASP LLM02).")
|
|
526
|
-
@severity("critical")
|
|
527
|
-
@tags("jailbreak,bypass,security,owasp-llm02,mitre-aml-t0054,baseline")
|
|
528
|
-
@reject_message("Your prompt was blocked because jailbreak patterns were detected. This is a security measure to prevent circumvention of AI safety controls (OWASP LLM02).")
|
|
529
|
-
forbid (
|
|
530
|
-
principal,
|
|
531
|
-
action == Overwatch::Action::"process_prompt",
|
|
532
|
-
resource
|
|
533
|
-
)
|
|
534
|
-
when {
|
|
535
|
-
context has detected_threats && context.detected_threats.contains("jailbreak")
|
|
536
|
-
};
|
|
537
|
-
|
|
538
|
-
// Block prompts with high ML jailbreak confidence
|
|
539
|
-
@id("semantic-block-jailbreak-score")
|
|
540
|
-
@name("Block high-confidence jailbreak")
|
|
541
|
-
@description("Block content when the ML jailbreak classifier confidence exceeds threshold (75/100). Catches sophisticated jailbreak techniques including multi-turn manipulation, encoded payloads, and novel prompt structures.")
|
|
542
|
-
@severity("critical")
|
|
543
|
-
@tags("jailbreak,ml-classifier,security,owasp-llm02,mitre-aml-t0054")
|
|
544
|
-
@reject_message("Your content was blocked because the ML classifier detected a jailbreak attempt with high confidence. This appears to be an attempt to bypass safety guardrails.")
|
|
545
|
-
forbid (
|
|
546
|
-
principal,
|
|
547
|
-
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
548
|
-
resource
|
|
549
|
-
)
|
|
550
|
-
when {
|
|
551
|
-
context has jailbreak_confidence && context.jailbreak_confidence >= 75
|
|
552
|
-
};
|
|
553
|
-
|
|
554
|
-
// ---------------------------------------------------------------------------
|
|
555
|
-
// Section 3: Threat Severity Aggregation
|
|
556
|
-
// Blocks based on aggregated threat severity from all detection engines.
|
|
557
|
-
// These act as catch-all rules for threats that don't match specific patterns.
|
|
558
|
-
// ---------------------------------------------------------------------------
|
|
559
|
-
|
|
560
|
-
// Block any content with critical severity threats
|
|
561
|
-
@id("semantic-block-critical")
|
|
562
|
-
@name("Block critical threats")
|
|
563
|
-
@description("Block all content when any detection engine reports critical severity. This is the ultimate catch-all — critical threats are blocked regardless of type or source.")
|
|
564
|
-
@severity("critical")
|
|
565
|
-
@tags("critical,baseline,security,catch-all")
|
|
566
|
-
@reject_message("Your content was blocked because security scanners detected a critical-severity threat. This content cannot be processed.")
|
|
567
|
-
forbid (
|
|
568
|
-
principal,
|
|
569
|
-
action,
|
|
570
|
-
resource
|
|
571
|
-
)
|
|
572
|
-
when {
|
|
573
|
-
context has highest_severity && context.highest_severity == "critical"
|
|
574
|
-
};
|
|
575
|
-
|
|
576
|
-
// Block prompts with high severity semantic threats
|
|
577
|
-
@id("semantic-block-high-severity")
|
|
578
|
-
@name("Block high severity threats")
|
|
579
|
-
@description("Block prompts when threat detection reports high severity (>= 3) in semantic categories. Catches threats that are individually below critical but collectively indicate adversarial intent.")
|
|
580
|
-
@severity("high")
|
|
581
|
-
@tags("semantic,severity,security,defense-in-depth")
|
|
582
|
-
@reject_message("Your prompt was blocked because security scanners detected high severity issues in the content. Review your prompt for manipulative or adversarial patterns.")
|
|
583
|
-
forbid (
|
|
584
|
-
principal,
|
|
585
|
-
action == Overwatch::Action::"process_prompt",
|
|
586
|
-
resource
|
|
587
|
-
)
|
|
588
|
-
when {
|
|
589
|
-
context has threat_categories && context has max_threat_severity &&
|
|
590
|
-
context.threat_categories.contains("semantic") &&
|
|
591
|
-
context.max_threat_severity >= 3
|
|
592
|
-
};
|
|
593
|
-
|
|
594
|
-
// Block tool calls with multiple concurrent threats
|
|
595
|
-
@id("semantic-block-multi-threat-tools")
|
|
596
|
-
@name("Block multi-threat tool calls")
|
|
597
|
-
@description("Block tool execution when multiple distinct threats are detected simultaneously (3+). Multiple concurrent threats in a tool call strongly indicate an adversarial attack chain.")
|
|
598
|
-
@severity("high")
|
|
599
|
-
@tags("multi-threat,tools,security,defense-in-depth")
|
|
600
|
-
@reject_message("Tool execution was blocked because multiple security threats were detected simultaneously. This pattern indicates a potential attack chain.")
|
|
743
|
+
@reject_message("File operation blocked: personally identifiable information was detected. Files containing PII must not be read or written through AI agents.")
|
|
601
744
|
forbid (
|
|
602
745
|
principal,
|
|
603
|
-
action
|
|
746
|
+
action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
|
|
604
747
|
resource
|
|
605
748
|
)
|
|
606
749
|
when {
|
|
607
|
-
context has
|
|
750
|
+
context has pii_detected && context.pii_detected
|
|
608
751
|
};
|
|
609
752
|
`;
|
|
610
753
|
const OVERWATCH_TOOLS_DEFAULT_CEDAR = `// =============================================================================
|
|
@@ -840,149 +983,6 @@ when {
|
|
|
840
983
|
context.detected_threats.contains("command_injection")
|
|
841
984
|
};
|
|
842
985
|
`;
|
|
843
|
-
const OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR = `// =============================================================================
|
|
844
|
-
// Content Safety Policy (Default)
|
|
845
|
-
// =============================================================================
|
|
846
|
-
// Detects and blocks violent, harmful, hateful, sexual, and profane content
|
|
847
|
-
// using trust/safety ML classification scores.
|
|
848
|
-
//
|
|
849
|
-
// The detection engine runs multiple ML classifiers (toxicity, content safety)
|
|
850
|
-
// and produces normalized scores (0-100) for each category. These policies
|
|
851
|
-
// enforce enterprise-appropriate content thresholds.
|
|
852
|
-
//
|
|
853
|
-
// Compliance:
|
|
854
|
-
// NIST 800-53 SI-4 (Information System Monitoring)
|
|
855
|
-
// EU AI Act Art. 52 (Transparency for AI Systems)
|
|
856
|
-
// OWASP LLM02 (Insecure Output Handling) — unsafe content generation
|
|
857
|
-
// ISO 42001 (AI Management System) — responsible AI use
|
|
858
|
-
//
|
|
859
|
-
// Category: trust_safety
|
|
860
|
-
// Namespace: Overwatch
|
|
861
|
-
// =============================================================================
|
|
862
|
-
|
|
863
|
-
// ---------------------------------------------------------------------------
|
|
864
|
-
// Section 1: Violence & Weapons
|
|
865
|
-
// Blocks content promoting, describing, or instructing violence and weapons.
|
|
866
|
-
// ---------------------------------------------------------------------------
|
|
867
|
-
|
|
868
|
-
// Block content with high violence score
|
|
869
|
-
@id("ts-block-violence")
|
|
870
|
-
@name("Block violent content")
|
|
871
|
-
@description("Block content when the ML violence detection score exceeds threshold (80/100). Catches graphic violence descriptions, instructions for causing harm, and violent threat language.")
|
|
872
|
-
@severity("critical")
|
|
873
|
-
@tags("violence,content-safety,trust-safety,nist-si-4,iso-42001")
|
|
874
|
-
@reject_message("Content blocked: violent content detected. AI agents in enterprise environments must not process or generate violent content. Please rephrase without violence-related language.")
|
|
875
|
-
forbid (
|
|
876
|
-
principal,
|
|
877
|
-
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
878
|
-
resource
|
|
879
|
-
)
|
|
880
|
-
when {
|
|
881
|
-
context has violence_score && context.violence_score >= 80
|
|
882
|
-
};
|
|
883
|
-
|
|
884
|
-
// Block content with weapons references
|
|
885
|
-
@id("ts-block-weapons")
|
|
886
|
-
@name("Block weapons content")
|
|
887
|
-
@description("Block content when the ML weapons detection score exceeds threshold (80/100). Catches weapon manufacturing instructions, procurement guidance, and detailed weapon specifications.")
|
|
888
|
-
@severity("critical")
|
|
889
|
-
@tags("weapons,content-safety,trust-safety,nist-si-4")
|
|
890
|
-
@reject_message("Content blocked: weapons-related content detected. AI agents must not process weapons manufacturing, procurement, or detailed specification content.")
|
|
891
|
-
forbid (
|
|
892
|
-
principal,
|
|
893
|
-
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
894
|
-
resource
|
|
895
|
-
)
|
|
896
|
-
when {
|
|
897
|
-
context has weapons_score && context.weapons_score >= 80
|
|
898
|
-
};
|
|
899
|
-
|
|
900
|
-
// ---------------------------------------------------------------------------
|
|
901
|
-
// Section 2: Hate Speech & Discrimination
|
|
902
|
-
// Blocks hateful, discriminatory, and dehumanizing content.
|
|
903
|
-
// Lower threshold (75) — enterprises have zero tolerance for hate speech.
|
|
904
|
-
// ---------------------------------------------------------------------------
|
|
905
|
-
|
|
906
|
-
// Block hate speech content
|
|
907
|
-
@id("ts-block-hate-speech")
|
|
908
|
-
@name("Block hate speech")
|
|
909
|
-
@description("Block content when the ML hate speech detection score exceeds threshold (75/100). Lower threshold than other categories because enterprises have zero tolerance for discriminatory content. Catches slurs, dehumanizing language, and targeted harassment.")
|
|
910
|
-
@severity("critical")
|
|
911
|
-
@tags("hate-speech,content-safety,trust-safety,nist-si-4,eu-ai-act")
|
|
912
|
-
@reject_message("Content blocked: hate speech or discriminatory content detected. AI agents in enterprise environments must not process or generate hateful, discriminatory, or dehumanizing content.")
|
|
913
|
-
forbid (
|
|
914
|
-
principal,
|
|
915
|
-
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
916
|
-
resource
|
|
917
|
-
)
|
|
918
|
-
when {
|
|
919
|
-
context has hate_speech_score && context.hate_speech_score >= 75
|
|
920
|
-
};
|
|
921
|
-
|
|
922
|
-
// ---------------------------------------------------------------------------
|
|
923
|
-
// Section 3: Criminal Content
|
|
924
|
-
// Blocks content related to criminal activities and illegal operations.
|
|
925
|
-
// ---------------------------------------------------------------------------
|
|
926
|
-
|
|
927
|
-
// Block criminal content
|
|
928
|
-
@id("ts-block-crime")
|
|
929
|
-
@name("Block criminal content")
|
|
930
|
-
@description("Block content when the ML criminal activity detection score exceeds threshold (80/100). Catches illegal activity instructions, fraud techniques, and content promoting criminal behavior.")
|
|
931
|
-
@severity("high")
|
|
932
|
-
@tags("crime,content-safety,trust-safety,nist-si-4")
|
|
933
|
-
@reject_message("Content blocked: criminal activity content detected. AI agents must not process content related to illegal activities, fraud, or other criminal behavior.")
|
|
934
|
-
forbid (
|
|
935
|
-
principal,
|
|
936
|
-
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
937
|
-
resource
|
|
938
|
-
)
|
|
939
|
-
when {
|
|
940
|
-
context has crime_score && context.crime_score >= 80
|
|
941
|
-
};
|
|
942
|
-
|
|
943
|
-
// ---------------------------------------------------------------------------
|
|
944
|
-
// Section 4: Sexual Content
|
|
945
|
-
// Blocks sexually explicit or inappropriate content.
|
|
946
|
-
// ---------------------------------------------------------------------------
|
|
947
|
-
|
|
948
|
-
// Block sexual content
|
|
949
|
-
@id("ts-block-sexual")
|
|
950
|
-
@name("Block sexual content")
|
|
951
|
-
@description("Block content when the ML sexual content detection score exceeds threshold (80/100). Ensures enterprise AI agents do not process or generate sexually explicit material.")
|
|
952
|
-
@severity("high")
|
|
953
|
-
@tags("sexual,content-safety,trust-safety,eu-ai-act,iso-42001")
|
|
954
|
-
@reject_message("Content blocked: sexual content detected. AI agents in enterprise environments must not process sexually explicit material.")
|
|
955
|
-
forbid (
|
|
956
|
-
principal,
|
|
957
|
-
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
958
|
-
resource
|
|
959
|
-
)
|
|
960
|
-
when {
|
|
961
|
-
context has sexual_score && context.sexual_score >= 80
|
|
962
|
-
};
|
|
963
|
-
|
|
964
|
-
// ---------------------------------------------------------------------------
|
|
965
|
-
// Section 5: Profanity
|
|
966
|
-
// Blocks excessive profanity in enterprise environments.
|
|
967
|
-
// Higher threshold (90) — allows normal expression while blocking abuse.
|
|
968
|
-
// ---------------------------------------------------------------------------
|
|
969
|
-
|
|
970
|
-
// Block excessive profanity
|
|
971
|
-
@id("ts-block-profanity")
|
|
972
|
-
@name("Block profanity")
|
|
973
|
-
@description("Block content when the ML profanity detection score exceeds threshold (90/100). Higher threshold than other safety categories — allows normal expression while blocking abusive or harassing language patterns.")
|
|
974
|
-
@severity("medium")
|
|
975
|
-
@tags("profanity,content-safety,trust-safety")
|
|
976
|
-
@reject_message("Content blocked: excessive profanity detected. Please rephrase in a professional manner appropriate for enterprise AI interactions.")
|
|
977
|
-
forbid (
|
|
978
|
-
principal,
|
|
979
|
-
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
980
|
-
resource
|
|
981
|
-
)
|
|
982
|
-
when {
|
|
983
|
-
context has profanity_score && context.profanity_score >= 90
|
|
984
|
-
};
|
|
985
|
-
`;
|
|
986
986
|
const OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR = `// =============================================================================
|
|
987
987
|
// Agent Security Policy (Default)
|
|
988
988
|
// =============================================================================
|
|
@@ -1677,6 +1677,31 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
1677
1677
|
tags: ['baseline', 'permit-default', 'organization'],
|
|
1678
1678
|
isActive: true,
|
|
1679
1679
|
},
|
|
1680
|
+
{
|
|
1681
|
+
id: 'semantic-default',
|
|
1682
|
+
name: 'Semantic Threat Detection',
|
|
1683
|
+
description: 'Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers',
|
|
1684
|
+
category: 'semantic',
|
|
1685
|
+
cedarText: OVERWATCH_SEMANTIC_DEFAULT_CEDAR,
|
|
1686
|
+
severity: 'critical',
|
|
1687
|
+
tags: ['prompt-injection', 'jailbreak', 'owasp-llm01', 'owasp-llm02', 'security', 'baseline'],
|
|
1688
|
+
isActive: true,
|
|
1689
|
+
},
|
|
1690
|
+
{
|
|
1691
|
+
id: 'trust-safety-default',
|
|
1692
|
+
name: 'Content Safety',
|
|
1693
|
+
description: 'Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores',
|
|
1694
|
+
category: 'trust_safety',
|
|
1695
|
+
cedarText: OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR,
|
|
1696
|
+
severity: 'critical',
|
|
1697
|
+
tags: ['violence', 'weapons', 'hate-speech', 'crime', 'sexual', 'profanity', 'content-safety', 'baseline'],
|
|
1698
|
+
isActive: true,
|
|
1699
|
+
},
|
|
1700
|
+
];
|
|
1701
|
+
// =============================================================================
|
|
1702
|
+
// ALL TEMPLATES
|
|
1703
|
+
// =============================================================================
|
|
1704
|
+
export const OVERWATCH_TEMPLATES = [
|
|
1680
1705
|
{
|
|
1681
1706
|
id: 'secrets-default',
|
|
1682
1707
|
name: 'Secrets Detection',
|
|
@@ -1685,7 +1710,6 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
1685
1710
|
cedarText: OVERWATCH_SECRETS_DEFAULT_CEDAR,
|
|
1686
1711
|
severity: 'critical',
|
|
1687
1712
|
tags: ['api-keys', 'tokens', 'credentials', 'aws', 'github', 'ssh', 'baseline'],
|
|
1688
|
-
isActive: true,
|
|
1689
1713
|
},
|
|
1690
1714
|
{
|
|
1691
1715
|
id: 'pii-default',
|
|
@@ -1695,17 +1719,6 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
1695
1719
|
cedarText: OVERWATCH_PII_DEFAULT_CEDAR,
|
|
1696
1720
|
severity: 'critical',
|
|
1697
1721
|
tags: ['pii', 'privacy', 'compliance', 'pci-dss', 'gdpr', 'hipaa', 'baseline'],
|
|
1698
|
-
isActive: true,
|
|
1699
|
-
},
|
|
1700
|
-
{
|
|
1701
|
-
id: 'semantic-default',
|
|
1702
|
-
name: 'Semantic Threat Detection',
|
|
1703
|
-
description: 'Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers',
|
|
1704
|
-
category: 'semantic',
|
|
1705
|
-
cedarText: OVERWATCH_SEMANTIC_DEFAULT_CEDAR,
|
|
1706
|
-
severity: 'critical',
|
|
1707
|
-
tags: ['prompt-injection', 'jailbreak', 'owasp-llm01', 'owasp-llm02', 'security', 'baseline'],
|
|
1708
|
-
isActive: true,
|
|
1709
1722
|
},
|
|
1710
1723
|
{
|
|
1711
1724
|
id: 'tools-default',
|
|
@@ -1715,17 +1728,6 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
1715
1728
|
cedarText: OVERWATCH_TOOLS_DEFAULT_CEDAR,
|
|
1716
1729
|
severity: 'critical',
|
|
1717
1730
|
tags: ['shell', 'command-injection', 'file-access', 'tool-risk', 'mitre-t1059', 'owasp-llm06', 'baseline'],
|
|
1718
|
-
isActive: true,
|
|
1719
|
-
},
|
|
1720
|
-
{
|
|
1721
|
-
id: 'trust-safety-default',
|
|
1722
|
-
name: 'Content Safety',
|
|
1723
|
-
description: 'Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores',
|
|
1724
|
-
category: 'trust_safety',
|
|
1725
|
-
cedarText: OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR,
|
|
1726
|
-
severity: 'critical',
|
|
1727
|
-
tags: ['violence', 'weapons', 'hate-speech', 'crime', 'sexual', 'profanity', 'content-safety', 'baseline'],
|
|
1728
|
-
isActive: true,
|
|
1729
1731
|
},
|
|
1730
1732
|
{
|
|
1731
1733
|
id: 'agent-security-default',
|
|
@@ -1735,7 +1737,6 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
1735
1737
|
cedarText: OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR,
|
|
1736
1738
|
severity: 'critical',
|
|
1737
1739
|
tags: ['tool-poisoning', 'rug-pull', 'indirect-injection', 'mcp-security', 'agent-security', 'owasp-asi01', 'owasp-asi04', 'baseline'],
|
|
1738
|
-
isActive: true,
|
|
1739
1740
|
},
|
|
1740
1741
|
{
|
|
1741
1742
|
id: 'encoding-default',
|
|
@@ -1745,7 +1746,6 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
1745
1746
|
cedarText: OVERWATCH_ENCODING_DEFAULT_CEDAR,
|
|
1746
1747
|
severity: 'high',
|
|
1747
1748
|
tags: ['unicode', 'invisible-chars', 'bidi-override', 'encoding', 'owasp-llm01', 'baseline'],
|
|
1748
|
-
isActive: true,
|
|
1749
1749
|
},
|
|
1750
1750
|
{
|
|
1751
1751
|
id: 'behavioral-default',
|
|
@@ -1755,13 +1755,7 @@ export const OVERWATCH_DEFAULTS = [
|
|
|
1755
1755
|
cedarText: OVERWATCH_BEHAVIORAL_DEFAULT_CEDAR,
|
|
1756
1756
|
severity: 'high',
|
|
1757
1757
|
tags: ['loop-detection', 'data-exfiltration', 'credential-theft', 'behavioral', 'owasp-llm10', 'owasp-asi02', 'baseline'],
|
|
1758
|
-
isActive: true,
|
|
1759
1758
|
},
|
|
1760
|
-
];
|
|
1761
|
-
// =============================================================================
|
|
1762
|
-
// ALL TEMPLATES
|
|
1763
|
-
// =============================================================================
|
|
1764
|
-
export const OVERWATCH_TEMPLATES = [
|
|
1765
1759
|
{
|
|
1766
1760
|
id: 'tools-mcp-allowlist',
|
|
1767
1761
|
name: 'MCP Server Allowlist',
|
|
@@ -1883,6 +1877,28 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1883
1877
|
"tags": ["baseline", "permit-default", "organization"],
|
|
1884
1878
|
"is_active": true
|
|
1885
1879
|
},
|
|
1880
|
+
{
|
|
1881
|
+
"id": "semantic-default",
|
|
1882
|
+
"name": "Semantic Threat Detection",
|
|
1883
|
+
"description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers",
|
|
1884
|
+
"category": "semantic",
|
|
1885
|
+
"file": "defaults/semantic.cedar",
|
|
1886
|
+
"severity": "critical",
|
|
1887
|
+
"tags": ["prompt-injection", "jailbreak", "owasp-llm01", "owasp-llm02", "security", "baseline"],
|
|
1888
|
+
"is_active": true
|
|
1889
|
+
},
|
|
1890
|
+
{
|
|
1891
|
+
"id": "trust-safety-default",
|
|
1892
|
+
"name": "Content Safety",
|
|
1893
|
+
"description": "Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores",
|
|
1894
|
+
"category": "trust_safety",
|
|
1895
|
+
"file": "defaults/trust_safety.cedar",
|
|
1896
|
+
"severity": "critical",
|
|
1897
|
+
"tags": ["violence", "weapons", "hate-speech", "crime", "sexual", "profanity", "content-safety", "baseline"],
|
|
1898
|
+
"is_active": true
|
|
1899
|
+
}
|
|
1900
|
+
],
|
|
1901
|
+
"templates": [
|
|
1886
1902
|
{
|
|
1887
1903
|
"id": "secrets-default",
|
|
1888
1904
|
"name": "Secrets Detection",
|
|
@@ -1890,8 +1906,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1890
1906
|
"category": "secrets",
|
|
1891
1907
|
"file": "defaults/secrets.cedar",
|
|
1892
1908
|
"severity": "critical",
|
|
1893
|
-
"tags": ["api-keys", "tokens", "credentials", "aws", "github", "ssh", "baseline"]
|
|
1894
|
-
"is_active": true
|
|
1909
|
+
"tags": ["api-keys", "tokens", "credentials", "aws", "github", "ssh", "baseline"]
|
|
1895
1910
|
},
|
|
1896
1911
|
{
|
|
1897
1912
|
"id": "pii-default",
|
|
@@ -1900,18 +1915,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1900
1915
|
"category": "pii",
|
|
1901
1916
|
"file": "defaults/pii.cedar",
|
|
1902
1917
|
"severity": "critical",
|
|
1903
|
-
"tags": ["pii", "privacy", "compliance", "pci-dss", "gdpr", "hipaa", "baseline"]
|
|
1904
|
-
"is_active": true
|
|
1905
|
-
},
|
|
1906
|
-
{
|
|
1907
|
-
"id": "semantic-default",
|
|
1908
|
-
"name": "Semantic Threat Detection",
|
|
1909
|
-
"description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers",
|
|
1910
|
-
"category": "semantic",
|
|
1911
|
-
"file": "defaults/semantic.cedar",
|
|
1912
|
-
"severity": "critical",
|
|
1913
|
-
"tags": ["prompt-injection", "jailbreak", "owasp-llm01", "owasp-llm02", "security", "baseline"],
|
|
1914
|
-
"is_active": true
|
|
1918
|
+
"tags": ["pii", "privacy", "compliance", "pci-dss", "gdpr", "hipaa", "baseline"]
|
|
1915
1919
|
},
|
|
1916
1920
|
{
|
|
1917
1921
|
"id": "tools-default",
|
|
@@ -1920,18 +1924,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1920
1924
|
"category": "tools",
|
|
1921
1925
|
"file": "defaults/tools.cedar",
|
|
1922
1926
|
"severity": "critical",
|
|
1923
|
-
"tags": ["shell", "command-injection", "file-access", "tool-risk", "mitre-t1059", "owasp-llm06", "baseline"]
|
|
1924
|
-
"is_active": true
|
|
1925
|
-
},
|
|
1926
|
-
{
|
|
1927
|
-
"id": "trust-safety-default",
|
|
1928
|
-
"name": "Content Safety",
|
|
1929
|
-
"description": "Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores",
|
|
1930
|
-
"category": "trust_safety",
|
|
1931
|
-
"file": "defaults/trust_safety.cedar",
|
|
1932
|
-
"severity": "critical",
|
|
1933
|
-
"tags": ["violence", "weapons", "hate-speech", "crime", "sexual", "profanity", "content-safety", "baseline"],
|
|
1934
|
-
"is_active": true
|
|
1927
|
+
"tags": ["shell", "command-injection", "file-access", "tool-risk", "mitre-t1059", "owasp-llm06", "baseline"]
|
|
1935
1928
|
},
|
|
1936
1929
|
{
|
|
1937
1930
|
"id": "agent-security-default",
|
|
@@ -1940,8 +1933,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1940
1933
|
"category": "agent_security",
|
|
1941
1934
|
"file": "defaults/agent_security.cedar",
|
|
1942
1935
|
"severity": "critical",
|
|
1943
|
-
"tags": ["tool-poisoning", "rug-pull", "indirect-injection", "mcp-security", "agent-security", "owasp-asi01", "owasp-asi04", "baseline"]
|
|
1944
|
-
"is_active": true
|
|
1936
|
+
"tags": ["tool-poisoning", "rug-pull", "indirect-injection", "mcp-security", "agent-security", "owasp-asi01", "owasp-asi04", "baseline"]
|
|
1945
1937
|
},
|
|
1946
1938
|
{
|
|
1947
1939
|
"id": "encoding-default",
|
|
@@ -1950,8 +1942,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1950
1942
|
"category": "encoding",
|
|
1951
1943
|
"file": "defaults/encoding_attacks.cedar",
|
|
1952
1944
|
"severity": "high",
|
|
1953
|
-
"tags": ["unicode", "invisible-chars", "bidi-override", "encoding", "owasp-llm01", "baseline"]
|
|
1954
|
-
"is_active": true
|
|
1945
|
+
"tags": ["unicode", "invisible-chars", "bidi-override", "encoding", "owasp-llm01", "baseline"]
|
|
1955
1946
|
},
|
|
1956
1947
|
{
|
|
1957
1948
|
"id": "behavioral-default",
|
|
@@ -1960,11 +1951,8 @@ export const OVERWATCH_TEMPLATES_JSON = `{
|
|
|
1960
1951
|
"category": "behavioral",
|
|
1961
1952
|
"file": "defaults/behavioral.cedar",
|
|
1962
1953
|
"severity": "high",
|
|
1963
|
-
"tags": ["loop-detection", "data-exfiltration", "credential-theft", "behavioral", "owasp-llm10", "owasp-asi02", "baseline"]
|
|
1964
|
-
|
|
1965
|
-
}
|
|
1966
|
-
],
|
|
1967
|
-
"templates": [
|
|
1954
|
+
"tags": ["loop-detection", "data-exfiltration", "credential-theft", "behavioral", "owasp-llm10", "owasp-asi02", "baseline"]
|
|
1955
|
+
},
|
|
1968
1956
|
{
|
|
1969
1957
|
"id": "tools-mcp-allowlist",
|
|
1970
1958
|
"name": "MCP Server Allowlist",
|