@highflame/policy 2.1.4 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/_schemas/guardrails/context.json +600 -6
- package/_schemas/guardrails/schema.cedarschema +79 -3
- package/_schemas/guardrails/templates/defaults/agent_identity.cedar +118 -0
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +4 -4
- package/_schemas/guardrails/templates/defaults/injection.cedar +10 -10
- package/_schemas/guardrails/templates/defaults/secrets.cedar +2 -2
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +1 -1
- package/_schemas/guardrails/templates/defaults/toxicity.cedar +3 -3
- package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +1 -1
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +4 -4
- package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +3 -3
- package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +1 -1
- package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +3 -3
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +157 -0
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +140 -0
- package/_schemas/guardrails/templates/templates.json +41 -0
- package/_schemas/overwatch/context.json +232 -4
- package/_schemas/overwatch/schema.cedarschema +42 -4
- package/dist/guardrails-context.gen.d.ts +24 -2
- package/dist/guardrails-context.gen.js +24 -2
- package/dist/guardrails-defaults.gen.d.ts +1 -1
- package/dist/guardrails-defaults.gen.js +524 -36
- package/dist/overwatch-context.gen.d.ts +10 -0
- package/dist/overwatch-context.gen.js +10 -0
- package/dist/service-schemas.gen.d.ts +2 -2
- package/dist/service-schemas.gen.js +275 -24
- package/package.json +1 -1
- package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +0 -85
- package/_schemas/guardrails/templates/profiles/code_agent.cedar +0 -125
- package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +0 -111
|
@@ -96,8 +96,12 @@ namespace Guardrails {
|
|
|
96
96
|
"detector_count": Long,
|
|
97
97
|
|
|
98
98
|
// Security - Injection & Jailbreak (optional)
|
|
99
|
-
"
|
|
100
|
-
"
|
|
99
|
+
"injection_confidence"?: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
100
|
+
"jailbreak_confidence"?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
101
|
+
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
102
|
+
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
103
|
+
"jailbreak_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
104
|
+
"jailbreak_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
101
105
|
"injection_type"?: String, // "prompt" | "sql" | "command" | "none"
|
|
102
106
|
|
|
103
107
|
// Privacy - Secrets (optional)
|
|
@@ -183,6 +187,21 @@ namespace Guardrails {
|
|
|
183
187
|
"session_injection_detected"?: Bool,
|
|
184
188
|
"session_command_injection"?: Bool,
|
|
185
189
|
"session_threat_turns"?: Long,
|
|
190
|
+
"session_max_injection_score"?: Long,
|
|
191
|
+
"session_max_jailbreak_score"?: Long,
|
|
192
|
+
"session_max_command_injection_score"?: Long,
|
|
193
|
+
"session_max_pii_score"?: Long,
|
|
194
|
+
"session_max_secret_score"?: Long,
|
|
195
|
+
"session_cumulative_risk_score"?: Long,
|
|
196
|
+
|
|
197
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
198
|
+
// Present when the request is made by an AI agent (API key or JWT with agent claims).
|
|
199
|
+
// Empty strings for human user requests. Use these to write agent-specific policies.
|
|
200
|
+
"agent_id"?: String, // Unique agent identifier (e.g., "agent_research_v3")
|
|
201
|
+
"agent_type"?: String, // "orchestrator" | "autonomous" | "tool_agent" | "human_proxy"
|
|
202
|
+
"agent_trust_level"?: String, // "first_party" | "verified_third_party" | "unverified"
|
|
203
|
+
"agent_framework"?: String, // Agent framework (e.g., "claude-code", "langchain", "crewai")
|
|
204
|
+
"agent_publisher"?: String, // Organization that published the agent
|
|
186
205
|
|
|
187
206
|
};
|
|
188
207
|
|
|
@@ -224,10 +243,13 @@ namespace Guardrails {
|
|
|
224
243
|
|
|
225
244
|
// Security checks on tool arguments (optional)
|
|
226
245
|
"contains_secrets"?: Bool,
|
|
246
|
+
"secret_count"?: Long,
|
|
227
247
|
"secret_types"?: Set<String>,
|
|
228
248
|
"pii_detected"?: Bool,
|
|
229
249
|
"pii_types"?: Set<String>,
|
|
230
|
-
"
|
|
250
|
+
"injection_confidence"?: Long,
|
|
251
|
+
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
252
|
+
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
231
253
|
|
|
232
254
|
// Security - Pattern Detection (optional)
|
|
233
255
|
"command_injection_detected"?: Bool,
|
|
@@ -276,6 +298,19 @@ namespace Guardrails {
|
|
|
276
298
|
"session_injection_detected"?: Bool,
|
|
277
299
|
"session_command_injection"?: Bool,
|
|
278
300
|
"session_threat_turns"?: Long,
|
|
301
|
+
"session_max_injection_score"?: Long,
|
|
302
|
+
"session_max_jailbreak_score"?: Long,
|
|
303
|
+
"session_max_command_injection_score"?: Long,
|
|
304
|
+
"session_max_pii_score"?: Long,
|
|
305
|
+
"session_max_secret_score"?: Long,
|
|
306
|
+
"session_cumulative_risk_score"?: Long,
|
|
307
|
+
|
|
308
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
309
|
+
"agent_id"?: String,
|
|
310
|
+
"agent_type"?: String,
|
|
311
|
+
"agent_trust_level"?: String,
|
|
312
|
+
"agent_framework"?: String,
|
|
313
|
+
"agent_publisher"?: String,
|
|
279
314
|
|
|
280
315
|
};
|
|
281
316
|
|
|
@@ -287,6 +322,7 @@ namespace Guardrails {
|
|
|
287
322
|
|
|
288
323
|
// Security checks on file content (optional)
|
|
289
324
|
"contains_secrets"?: Bool,
|
|
325
|
+
"secret_count"?: Long,
|
|
290
326
|
"secret_types"?: Set<String>,
|
|
291
327
|
"pii_detected"?: Bool,
|
|
292
328
|
"pii_types"?: Set<String>,
|
|
@@ -304,6 +340,19 @@ namespace Guardrails {
|
|
|
304
340
|
"session_injection_detected"?: Bool,
|
|
305
341
|
"session_command_injection"?: Bool,
|
|
306
342
|
"session_threat_turns"?: Long,
|
|
343
|
+
"session_max_injection_score"?: Long,
|
|
344
|
+
"session_max_jailbreak_score"?: Long,
|
|
345
|
+
"session_max_command_injection_score"?: Long,
|
|
346
|
+
"session_max_pii_score"?: Long,
|
|
347
|
+
"session_max_secret_score"?: Long,
|
|
348
|
+
"session_cumulative_risk_score"?: Long,
|
|
349
|
+
|
|
350
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
351
|
+
"agent_id"?: String,
|
|
352
|
+
"agent_type"?: String,
|
|
353
|
+
"agent_trust_level"?: String,
|
|
354
|
+
"agent_framework"?: String,
|
|
355
|
+
"agent_publisher"?: String,
|
|
307
356
|
|
|
308
357
|
};
|
|
309
358
|
|
|
@@ -315,6 +364,7 @@ namespace Guardrails {
|
|
|
315
364
|
|
|
316
365
|
// Security checks on content being written (optional)
|
|
317
366
|
"contains_secrets"?: Bool,
|
|
367
|
+
"secret_count"?: Long,
|
|
318
368
|
"secret_types"?: Set<String>,
|
|
319
369
|
"pii_detected"?: Bool,
|
|
320
370
|
"pii_types"?: Set<String>,
|
|
@@ -332,6 +382,19 @@ namespace Guardrails {
|
|
|
332
382
|
"session_injection_detected"?: Bool,
|
|
333
383
|
"session_command_injection"?: Bool,
|
|
334
384
|
"session_threat_turns"?: Long,
|
|
385
|
+
"session_max_injection_score"?: Long,
|
|
386
|
+
"session_max_jailbreak_score"?: Long,
|
|
387
|
+
"session_max_command_injection_score"?: Long,
|
|
388
|
+
"session_max_pii_score"?: Long,
|
|
389
|
+
"session_max_secret_score"?: Long,
|
|
390
|
+
"session_cumulative_risk_score"?: Long,
|
|
391
|
+
|
|
392
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
393
|
+
"agent_id"?: String,
|
|
394
|
+
"agent_type"?: String,
|
|
395
|
+
"agent_trust_level"?: String,
|
|
396
|
+
"agent_framework"?: String,
|
|
397
|
+
"agent_publisher"?: String,
|
|
335
398
|
|
|
336
399
|
};
|
|
337
400
|
|
|
@@ -368,6 +431,19 @@ namespace Guardrails {
|
|
|
368
431
|
"session_injection_detected"?: Bool,
|
|
369
432
|
"session_command_injection"?: Bool,
|
|
370
433
|
"session_threat_turns"?: Long,
|
|
434
|
+
"session_max_injection_score"?: Long,
|
|
435
|
+
"session_max_jailbreak_score"?: Long,
|
|
436
|
+
"session_max_command_injection_score"?: Long,
|
|
437
|
+
"session_max_pii_score"?: Long,
|
|
438
|
+
"session_max_secret_score"?: Long,
|
|
439
|
+
"session_cumulative_risk_score"?: Long,
|
|
440
|
+
|
|
441
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
442
|
+
"agent_id"?: String,
|
|
443
|
+
"agent_type"?: String,
|
|
444
|
+
"agent_trust_level"?: String,
|
|
445
|
+
"agent_framework"?: String,
|
|
446
|
+
"agent_publisher"?: String,
|
|
371
447
|
|
|
372
448
|
};
|
|
373
449
|
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
// =============================================================================
|
|
2
|
+
// Agent Identity Policy — Agent-to-Agent Security Defaults
|
|
3
|
+
// =============================================================================
|
|
4
|
+
// Enforces trust-based access control for AI agents authenticated via API key
|
|
5
|
+
// or JWT with agent claims. These policies use agent identity context keys
|
|
6
|
+
// populated from Shield's authentication layer.
|
|
7
|
+
//
|
|
8
|
+
// Agent trust levels:
|
|
9
|
+
// - first_party: Your own agents (highest trust)
|
|
10
|
+
// - verified_third_party: Audited external agents (medium trust)
|
|
11
|
+
// - unverified: Unknown/untrusted agents (lowest trust)
|
|
12
|
+
//
|
|
13
|
+
// Agent types:
|
|
14
|
+
// - orchestrator: Coordinates sub-agents
|
|
15
|
+
// - autonomous: Self-directed, no human in the loop
|
|
16
|
+
// - tool_agent: Single-purpose tool execution
|
|
17
|
+
// - human_proxy: Acts on behalf of a human
|
|
18
|
+
//
|
|
19
|
+
// Context keys used:
|
|
20
|
+
// - agent_id: String - Unique agent identifier
|
|
21
|
+
// - agent_type: String - Agent classification
|
|
22
|
+
// - agent_trust_level: String - Trust tier
|
|
23
|
+
// - agent_framework: String - Agent framework/SDK
|
|
24
|
+
// - agent_publisher: String - Publishing organization
|
|
25
|
+
// - tool_name: String - Tool being called
|
|
26
|
+
// - tool_category: String - "safe" | "sensitive" | "dangerous"
|
|
27
|
+
// - tool_risk_score: Long (0-100) - Computed risk score
|
|
28
|
+
// - injection_confidence: Long (0-100) - Injection detection score
|
|
29
|
+
//
|
|
30
|
+
// Category: agent_identity
|
|
31
|
+
// Namespace: Guardrails
|
|
32
|
+
// =============================================================================
|
|
33
|
+
|
|
34
|
+
// -----------------------------------------------------------------------------
|
|
35
|
+
// Trust-Based Tool Access
|
|
36
|
+
// -----------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
@id("agent-block-unverified-dangerous-tools")
|
|
39
|
+
@name("Block unverified agents from dangerous tools")
|
|
40
|
+
@description("Unverified agents cannot execute tools classified as dangerous. Require first_party or verified_third_party trust level for high-risk operations")
|
|
41
|
+
@severity("critical")
|
|
42
|
+
@tags("agent-identity,trust,tools,a2a")
|
|
43
|
+
forbid (
|
|
44
|
+
principal is Guardrails::Agent,
|
|
45
|
+
action == Guardrails::Action::"call_tool",
|
|
46
|
+
resource
|
|
47
|
+
) when {
|
|
48
|
+
context.agent_trust_level == "unverified" &&
|
|
49
|
+
context has tool_category && context.tool_category == "dangerous"
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
@id("agent-block-unverified-shell")
|
|
53
|
+
@name("Block unverified agents from shell execution")
|
|
54
|
+
@description("Shell and command execution are restricted to first_party and verified_third_party agents")
|
|
55
|
+
@severity("critical")
|
|
56
|
+
@tags("agent-identity,trust,shell,a2a")
|
|
57
|
+
forbid (
|
|
58
|
+
principal is Guardrails::Agent,
|
|
59
|
+
action == Guardrails::Action::"call_tool",
|
|
60
|
+
resource
|
|
61
|
+
) when {
|
|
62
|
+
context.agent_trust_level == "unverified" &&
|
|
63
|
+
context has tool_name &&
|
|
64
|
+
(context.tool_name == "shell" ||
|
|
65
|
+
context.tool_name == "execute_command" ||
|
|
66
|
+
context.tool_name == "bash")
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
@id("agent-block-unverified-sensitive-tools")
|
|
70
|
+
@name("Block unverified agents from sensitive tools with elevated risk")
|
|
71
|
+
@description("Unverified agents cannot execute sensitive tools with risk score above 60")
|
|
72
|
+
@severity("high")
|
|
73
|
+
@tags("agent-identity,trust,tools,a2a")
|
|
74
|
+
forbid (
|
|
75
|
+
principal is Guardrails::Agent,
|
|
76
|
+
action == Guardrails::Action::"call_tool",
|
|
77
|
+
resource
|
|
78
|
+
) when {
|
|
79
|
+
context.agent_trust_level == "unverified" &&
|
|
80
|
+
context has tool_is_sensitive && context.tool_is_sensitive == true &&
|
|
81
|
+
context has tool_risk_score && context.tool_risk_score > 60
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
// -----------------------------------------------------------------------------
|
|
85
|
+
// Autonomous Agent Restrictions
|
|
86
|
+
// -----------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
@id("agent-block-autonomous-injection")
|
|
89
|
+
@name("Stricter injection threshold for autonomous agents")
|
|
90
|
+
@description("Autonomous agents operate without human oversight. Apply lower injection confidence threshold (50 vs standard 80) to compensate for lack of human review")
|
|
91
|
+
@severity("high")
|
|
92
|
+
@tags("agent-identity,autonomous,injection,a2a")
|
|
93
|
+
forbid (
|
|
94
|
+
principal is Guardrails::Agent,
|
|
95
|
+
action == Guardrails::Action::"process_prompt",
|
|
96
|
+
resource
|
|
97
|
+
) when {
|
|
98
|
+
context.agent_type == "autonomous" &&
|
|
99
|
+
context has injection_confidence && context.injection_confidence > 50
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
// -----------------------------------------------------------------------------
|
|
103
|
+
// Cross-Turn Agent Trust Enforcement
|
|
104
|
+
// -----------------------------------------------------------------------------
|
|
105
|
+
|
|
106
|
+
@id("agent-block-unverified-after-threats")
|
|
107
|
+
@name("Block unverified agents after session threats")
|
|
108
|
+
@description("If any prior turn in the session detected threats, block unverified agents from further tool calls. Prevents compromised sessions from being exploited by untrusted agents")
|
|
109
|
+
@severity("critical")
|
|
110
|
+
@tags("agent-identity,trust,session,cross-turn,a2a")
|
|
111
|
+
forbid (
|
|
112
|
+
principal is Guardrails::Agent,
|
|
113
|
+
action == Guardrails::Action::"call_tool",
|
|
114
|
+
resource
|
|
115
|
+
) when {
|
|
116
|
+
context.agent_trust_level == "unverified" &&
|
|
117
|
+
context has session_threat_turns && context.session_threat_turns > 0
|
|
118
|
+
};
|
|
@@ -40,7 +40,7 @@ forbid (
|
|
|
40
40
|
@tags("agentic,exfiltration,security")
|
|
41
41
|
forbid (
|
|
42
42
|
principal,
|
|
43
|
-
action,
|
|
43
|
+
action == Guardrails::Action::"call_tool",
|
|
44
44
|
resource
|
|
45
45
|
) when {
|
|
46
46
|
context has suspicious_pattern && context.suspicious_pattern == true &&
|
|
@@ -59,7 +59,7 @@ forbid (
|
|
|
59
59
|
@tags("agentic,patterns,security")
|
|
60
60
|
forbid (
|
|
61
61
|
principal,
|
|
62
|
-
action,
|
|
62
|
+
action == Guardrails::Action::"call_tool",
|
|
63
63
|
resource
|
|
64
64
|
) when {
|
|
65
65
|
context has sequence_risk && context.sequence_risk > 80
|
|
@@ -72,7 +72,7 @@ forbid (
|
|
|
72
72
|
@tags("agentic,budget,cost-control")
|
|
73
73
|
forbid (
|
|
74
74
|
principal,
|
|
75
|
-
action,
|
|
75
|
+
action == Guardrails::Action::"call_tool",
|
|
76
76
|
resource
|
|
77
77
|
) when {
|
|
78
78
|
context has budget_exceeded && context.budget_exceeded == true
|
|
@@ -85,7 +85,7 @@ forbid (
|
|
|
85
85
|
@tags("agentic,budget,cost-control")
|
|
86
86
|
forbid (
|
|
87
87
|
principal,
|
|
88
|
-
action,
|
|
88
|
+
action == Guardrails::Action::"call_tool",
|
|
89
89
|
resource
|
|
90
90
|
) when {
|
|
91
91
|
context has budget_remaining_pct &&
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
// Uses ML-based confidence scores from normalized context.
|
|
6
6
|
//
|
|
7
7
|
// Context keys used (normalized by projection layer):
|
|
8
|
-
// -
|
|
9
|
-
// -
|
|
8
|
+
// - injection_confidence: Long (0-100) - Overall injection confidence
|
|
9
|
+
// - jailbreak_confidence: Long (0-100) - Jailbreak attempt confidence
|
|
10
10
|
// - injection_type: String - Type of injection detected
|
|
11
11
|
// - contains_invisible_chars: Bool - Invisible Unicode characters detected
|
|
12
12
|
// - invisible_chars_score: Long (0-100) - Invisible character density
|
|
@@ -22,10 +22,10 @@
|
|
|
22
22
|
@tags("injection,jailbreak,security")
|
|
23
23
|
forbid (
|
|
24
24
|
principal,
|
|
25
|
-
action,
|
|
25
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
26
26
|
resource
|
|
27
27
|
) when {
|
|
28
|
-
context has
|
|
28
|
+
context has injection_confidence && context.injection_confidence > 85
|
|
29
29
|
};
|
|
30
30
|
|
|
31
31
|
@id("jailbreak-block-high-confidence")
|
|
@@ -35,10 +35,10 @@ forbid (
|
|
|
35
35
|
@tags("jailbreak,security")
|
|
36
36
|
forbid (
|
|
37
37
|
principal,
|
|
38
|
-
action,
|
|
38
|
+
action == Guardrails::Action::"process_prompt",
|
|
39
39
|
resource
|
|
40
40
|
) when {
|
|
41
|
-
context has
|
|
41
|
+
context has jailbreak_confidence && context.jailbreak_confidence > 80
|
|
42
42
|
};
|
|
43
43
|
|
|
44
44
|
@id("injection-combined-threshold")
|
|
@@ -48,11 +48,11 @@ forbid (
|
|
|
48
48
|
@tags("injection,jailbreak,security")
|
|
49
49
|
forbid (
|
|
50
50
|
principal,
|
|
51
|
-
action,
|
|
51
|
+
action == Guardrails::Action::"process_prompt",
|
|
52
52
|
resource
|
|
53
53
|
) when {
|
|
54
|
-
context has
|
|
55
|
-
context.
|
|
54
|
+
context has injection_confidence && context has jailbreak_confidence &&
|
|
55
|
+
context.injection_confidence > 60 && context.jailbreak_confidence > 60
|
|
56
56
|
};
|
|
57
57
|
|
|
58
58
|
@id("injection-invisible-chars")
|
|
@@ -62,7 +62,7 @@ forbid (
|
|
|
62
62
|
@tags("injection,unicode,security")
|
|
63
63
|
forbid (
|
|
64
64
|
principal,
|
|
65
|
-
action,
|
|
65
|
+
action == Guardrails::Action::"process_prompt",
|
|
66
66
|
resource
|
|
67
67
|
) when {
|
|
68
68
|
context has contains_invisible_chars && context.contains_invisible_chars == true &&
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
@tags("secrets,security,data-leak")
|
|
21
21
|
forbid (
|
|
22
22
|
principal,
|
|
23
|
-
action,
|
|
23
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
24
24
|
resource
|
|
25
25
|
) when {
|
|
26
26
|
context has contains_secrets && context.contains_secrets == true
|
|
@@ -33,7 +33,7 @@ forbid (
|
|
|
33
33
|
@tags("secrets,security,data-leak")
|
|
34
34
|
forbid (
|
|
35
35
|
principal,
|
|
36
|
-
action,
|
|
36
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
37
37
|
resource
|
|
38
38
|
) when {
|
|
39
39
|
context has secret_count && context.secret_count > 2
|
|
@@ -36,7 +36,7 @@ forbid (
|
|
|
36
36
|
@tags("path-traversal,security")
|
|
37
37
|
forbid (
|
|
38
38
|
principal,
|
|
39
|
-
action,
|
|
39
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
40
40
|
resource
|
|
41
41
|
) when {
|
|
42
42
|
context has path_traversal_detected && context.path_traversal_detected == true &&
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
@tags("toxicity,trust-safety")
|
|
24
24
|
forbid (
|
|
25
25
|
principal,
|
|
26
|
-
action,
|
|
26
|
+
action == Guardrails::Action::"process_prompt",
|
|
27
27
|
resource
|
|
28
28
|
) when {
|
|
29
29
|
(context has violence_score && context.violence_score > 90) ||
|
|
@@ -41,7 +41,7 @@ forbid (
|
|
|
41
41
|
@tags("hate-speech,trust-safety")
|
|
42
42
|
forbid (
|
|
43
43
|
principal,
|
|
44
|
-
action,
|
|
44
|
+
action == Guardrails::Action::"process_prompt",
|
|
45
45
|
resource
|
|
46
46
|
) when {
|
|
47
47
|
context has hate_speech_score && context.hate_speech_score > 75
|
|
@@ -54,7 +54,7 @@ forbid (
|
|
|
54
54
|
@tags("toxicity,trust-safety")
|
|
55
55
|
forbid (
|
|
56
56
|
principal,
|
|
57
|
-
action,
|
|
57
|
+
action == Guardrails::Action::"process_prompt",
|
|
58
58
|
resource
|
|
59
59
|
) when {
|
|
60
60
|
// Any 2 of 6 toxicity scores above 60 triggers a block
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
@tags("profile,chat-assistant,pii,privacy")
|
|
16
16
|
forbid (
|
|
17
17
|
principal,
|
|
18
|
-
action,
|
|
18
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
21
|
context has pii_detected && context.pii_detected == true
|
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
@tags("profile,chat-assistant,injection,security")
|
|
16
16
|
forbid (
|
|
17
17
|
principal,
|
|
18
|
-
action,
|
|
18
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
|
-
context has
|
|
21
|
+
context has injection_confidence && context.injection_confidence > 70
|
|
22
22
|
};
|
|
23
23
|
|
|
24
24
|
@id("chat-jailbreak-lower-threshold")
|
|
@@ -28,8 +28,8 @@ forbid (
|
|
|
28
28
|
@tags("profile,chat-assistant,jailbreak,security")
|
|
29
29
|
forbid (
|
|
30
30
|
principal,
|
|
31
|
-
action,
|
|
31
|
+
action == Guardrails::Action::"process_prompt",
|
|
32
32
|
resource
|
|
33
33
|
) when {
|
|
34
|
-
context has
|
|
34
|
+
context has jailbreak_confidence && context.jailbreak_confidence > 65
|
|
35
35
|
};
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
@tags("profile,chat-assistant,toxicity,trust-safety")
|
|
16
16
|
forbid (
|
|
17
17
|
principal,
|
|
18
|
-
action,
|
|
18
|
+
action == Guardrails::Action::"process_prompt",
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
21
|
(context has violence_score && context.violence_score > 70) ||
|
|
@@ -31,7 +31,7 @@ forbid (
|
|
|
31
31
|
@tags("profile,chat-assistant,semantic,compliance")
|
|
32
32
|
forbid (
|
|
33
33
|
principal,
|
|
34
|
-
action,
|
|
34
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
35
35
|
resource
|
|
36
36
|
) when {
|
|
37
37
|
context has topic_confidence && context.topic_confidence > 70 &&
|
|
@@ -73,7 +73,7 @@ forbid (
|
|
|
73
73
|
@tags("profile,code-agent,agentic,exfiltration")
|
|
74
74
|
forbid (
|
|
75
75
|
principal,
|
|
76
|
-
action,
|
|
76
|
+
action == Guardrails::Action::"call_tool",
|
|
77
77
|
resource
|
|
78
78
|
) when {
|
|
79
79
|
context has suspicious_pattern && context.suspicious_pattern == true &&
|
|
@@ -89,7 +89,7 @@ forbid (
|
|
|
89
89
|
@tags("profile,code-agent,agentic,patterns")
|
|
90
90
|
forbid (
|
|
91
91
|
principal,
|
|
92
|
-
action,
|
|
92
|
+
action == Guardrails::Action::"call_tool",
|
|
93
93
|
resource
|
|
94
94
|
) when {
|
|
95
95
|
context has sequence_risk && context.sequence_risk > 75
|
|
@@ -102,7 +102,7 @@ forbid (
|
|
|
102
102
|
@tags("profile,code-agent,budget,cost-control")
|
|
103
103
|
forbid (
|
|
104
104
|
principal,
|
|
105
|
-
action,
|
|
105
|
+
action == Guardrails::Action::"call_tool",
|
|
106
106
|
resource
|
|
107
107
|
) when {
|
|
108
108
|
context has budget_exceeded && context.budget_exceeded == true
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
@tags("profile,data-pipeline,pii,privacy")
|
|
16
16
|
forbid (
|
|
17
17
|
principal,
|
|
18
|
-
action,
|
|
18
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
21
|
context has pii_detected && context.pii_detected == true
|
|
@@ -28,7 +28,7 @@ forbid (
|
|
|
28
28
|
@tags("profile,data-pipeline,pii,compliance")
|
|
29
29
|
forbid (
|
|
30
30
|
principal,
|
|
31
|
-
action,
|
|
31
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
32
32
|
resource
|
|
33
33
|
) when {
|
|
34
34
|
context has pii_types &&
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
@tags("profile,data-pipeline,secrets,security")
|
|
16
16
|
forbid (
|
|
17
17
|
principal,
|
|
18
|
-
action,
|
|
18
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
21
|
context has contains_secrets && context.contains_secrets == true
|
|
@@ -42,8 +42,8 @@ forbid (
|
|
|
42
42
|
@tags("profile,data-pipeline,injection,security")
|
|
43
43
|
forbid (
|
|
44
44
|
principal,
|
|
45
|
-
action,
|
|
45
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
46
46
|
resource
|
|
47
47
|
) when {
|
|
48
|
-
context has
|
|
48
|
+
context has injection_confidence && context.injection_confidence > 65
|
|
49
49
|
};
|