@highflame/policy 2.1.36 → 2.1.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/ai_gateway/context.json +431 -11
- package/_schemas/ai_gateway/schema.cedarschema +91 -11
- package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
- package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
- package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
- package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
- package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
- package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
- package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
- package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
- package/_schemas/ai_gateway/templates/templates.json +42 -47
- package/_schemas/guardrails/context.json +12 -12
- package/_schemas/guardrails/schema.cedarschema +12 -12
- package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
- package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
- package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
- package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
- package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
- package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
- package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
- package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
- package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
- package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
- package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
- package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
- package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
- package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
- package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
- package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
- package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
- package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
- package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
- package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
- package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
- package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
- package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
- package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
- package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
- package/_schemas/guardrails/templates/templates.json +188 -210
- package/_schemas/overwatch/context.json +14 -14
- package/_schemas/overwatch/schema.cedarschema +12 -12
- package/_schemas/sentry/context.json +11 -11
- package/_schemas/sentry/schema.cedarschema +11 -11
- package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
- package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
- package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
- package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
- package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
- package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
- package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
- package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
- package/_schemas/sentry/templates/templates.json +46 -46
- package/dist/ai_gateway-context.gen.d.ts +18 -4
- package/dist/ai_gateway-context.gen.js +18 -4
- package/dist/ai_gateway-defaults.gen.d.ts +1 -1
- package/dist/ai_gateway-defaults.gen.js +377 -313
- package/dist/guardrails-context.gen.d.ts +5 -5
- package/dist/guardrails-context.gen.js +5 -5
- package/dist/guardrails-defaults.gen.d.ts +1 -1
- package/dist/guardrails-defaults.gen.js +2070 -1849
- package/dist/overwatch-context.gen.d.ts +5 -5
- package/dist/overwatch-context.gen.js +5 -5
- package/dist/overwatch-defaults.gen.d.ts +1 -1
- package/dist/overwatch-defaults.gen.js +547 -573
- package/dist/sentry-context.gen.d.ts +3 -3
- package/dist/sentry-context.gen.js +3 -3
- package/dist/sentry-defaults.gen.d.ts +1 -1
- package/dist/sentry-defaults.gen.js +379 -460
- package/dist/service-schemas.gen.d.ts +4 -4
- package/dist/service-schemas.gen.js +249 -99
- package/package.json +1 -1
|
@@ -38,13 +38,13 @@
|
|
|
38
38
|
"description": "Number of detectors that were executed for this request"
|
|
39
39
|
},
|
|
40
40
|
{
|
|
41
|
-
"key": "
|
|
41
|
+
"key": "injection_score",
|
|
42
42
|
"type": "number",
|
|
43
43
|
"required": false,
|
|
44
44
|
"description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
|
|
45
45
|
},
|
|
46
46
|
{
|
|
47
|
-
"key": "
|
|
47
|
+
"key": "jailbreak_score",
|
|
48
48
|
"type": "number",
|
|
49
49
|
"required": false,
|
|
50
50
|
"description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control"
|
|
@@ -80,7 +80,7 @@
|
|
|
80
80
|
"description": "Type of injection detected: 'prompt', 'sql', 'command', or 'none'. Use this to apply different policies per injection type"
|
|
81
81
|
},
|
|
82
82
|
{
|
|
83
|
-
"key": "
|
|
83
|
+
"key": "secrets_detected",
|
|
84
84
|
"type": "boolean",
|
|
85
85
|
"required": false,
|
|
86
86
|
"description": "Whether any API keys, tokens, passwords, or credentials were detected in the content. True indicates presence of secrets"
|
|
@@ -116,7 +116,7 @@
|
|
|
116
116
|
"description": "Array of PII types detected (e.g., ['email', 'phone', 'ssn', 'credit_card']). Use .contains() to block specific sensitive types"
|
|
117
117
|
},
|
|
118
118
|
{
|
|
119
|
-
"key": "
|
|
119
|
+
"key": "pii_score",
|
|
120
120
|
"type": "number",
|
|
121
121
|
"required": false,
|
|
122
122
|
"description": "PII detection ML classifier confidence (0-100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss. Typical threshold: >=80 for high-confidence blocking"
|
|
@@ -176,7 +176,7 @@
|
|
|
176
176
|
"description": "Confidence score from topic classifier (0-100). Use with content_topics to tune sensitivity — higher thresholds reduce false positives"
|
|
177
177
|
},
|
|
178
178
|
{
|
|
179
|
-
"key": "
|
|
179
|
+
"key": "invisible_chars_detected",
|
|
180
180
|
"type": "boolean",
|
|
181
181
|
"required": false,
|
|
182
182
|
"description": "Whether invisible Unicode characters (zero-width joiners, RTL marks, etc.) were detected in the content. Commonly used for prompt injection evasion"
|
|
@@ -644,7 +644,7 @@
|
|
|
644
644
|
"description": "Confidence score from topic classifier for tool content (0-100)"
|
|
645
645
|
},
|
|
646
646
|
{
|
|
647
|
-
"key": "
|
|
647
|
+
"key": "secrets_detected",
|
|
648
648
|
"type": "boolean",
|
|
649
649
|
"required": false,
|
|
650
650
|
"description": "Whether secrets were detected in tool arguments or content"
|
|
@@ -680,13 +680,13 @@
|
|
|
680
680
|
"description": "Number of PII pattern matches in tool arguments or content. Use >=3 to detect bulk PII exposure indicating data dumps or CSV pastes"
|
|
681
681
|
},
|
|
682
682
|
{
|
|
683
|
-
"key": "
|
|
683
|
+
"key": "pii_score",
|
|
684
684
|
"type": "number",
|
|
685
685
|
"required": false,
|
|
686
686
|
"description": "PII detection ML classifier confidence for tool content (0-100). Catches novel PII patterns that escape regex detection"
|
|
687
687
|
},
|
|
688
688
|
{
|
|
689
|
-
"key": "
|
|
689
|
+
"key": "injection_score",
|
|
690
690
|
"type": "number",
|
|
691
691
|
"required": false,
|
|
692
692
|
"description": "Combined prompt injection confidence in tool arguments (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
|
|
@@ -854,7 +854,7 @@
|
|
|
854
854
|
"description": "File path targeted by the tool call (if file operation). Use for path-based blocking of .env files, credential files, system directories, and credential directories"
|
|
855
855
|
},
|
|
856
856
|
{
|
|
857
|
-
"key": "
|
|
857
|
+
"key": "invisible_chars_detected",
|
|
858
858
|
"type": "boolean",
|
|
859
859
|
"required": false,
|
|
860
860
|
"description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in tool arguments. Tool args should be plain text — invisible chars indicate payload injection"
|
|
@@ -1028,7 +1028,7 @@
|
|
|
1028
1028
|
"description": "File path being read. Use for path-based access control policies (e.g., block .env files, system directories, credential directories)"
|
|
1029
1029
|
},
|
|
1030
1030
|
{
|
|
1031
|
-
"key": "
|
|
1031
|
+
"key": "secrets_detected",
|
|
1032
1032
|
"type": "boolean",
|
|
1033
1033
|
"required": false,
|
|
1034
1034
|
"description": "Whether secrets were detected in the file content being read"
|
|
@@ -1208,7 +1208,7 @@
|
|
|
1208
1208
|
"description": "File path being written. Use for path-based blocking policies (e.g., block writes to .env files, credential directories)"
|
|
1209
1209
|
},
|
|
1210
1210
|
{
|
|
1211
|
-
"key": "
|
|
1211
|
+
"key": "invisible_chars_detected",
|
|
1212
1212
|
"type": "boolean",
|
|
1213
1213
|
"required": false,
|
|
1214
1214
|
"description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in the content being written. Prevents persistence of invisible payloads in source code"
|
|
@@ -1220,7 +1220,7 @@
|
|
|
1220
1220
|
"description": "Invisible character attack severity score (0-100) in write content"
|
|
1221
1221
|
},
|
|
1222
1222
|
{
|
|
1223
|
-
"key": "
|
|
1223
|
+
"key": "secrets_detected",
|
|
1224
1224
|
"type": "boolean",
|
|
1225
1225
|
"required": false,
|
|
1226
1226
|
"description": "Whether secrets were detected in the content being written. Block writes containing credentials"
|
|
@@ -104,8 +104,8 @@ namespace Guardrails {
|
|
|
104
104
|
"detector_count": Long,
|
|
105
105
|
|
|
106
106
|
// Security - Injection & Jailbreak (optional)
|
|
107
|
-
"
|
|
108
|
-
"
|
|
107
|
+
"injection_score"?: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
108
|
+
"jailbreak_score"?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
109
109
|
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
110
110
|
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
111
111
|
"jailbreak_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
@@ -113,7 +113,7 @@ namespace Guardrails {
|
|
|
113
113
|
"injection_type"?: String, // "prompt" | "sql" | "command" | "none"
|
|
114
114
|
|
|
115
115
|
// Privacy - Secrets (optional)
|
|
116
|
-
"
|
|
116
|
+
"secrets_detected"?: Bool,
|
|
117
117
|
"secret_count"?: Long,
|
|
118
118
|
"secret_types"?: Set<String>, // ["aws_access_key", "github_token", ...]
|
|
119
119
|
|
|
@@ -121,7 +121,7 @@ namespace Guardrails {
|
|
|
121
121
|
"pii_detected"?: Bool,
|
|
122
122
|
"pii_count"?: Long,
|
|
123
123
|
"pii_types"?: Set<String>, // ["email", "phone", "ssn", "credit_card", ...]
|
|
124
|
-
"
|
|
124
|
+
"pii_score"?: Long, // PII ML classifier confidence (0-100) — catches novel PII patterns that escape regex detection
|
|
125
125
|
|
|
126
126
|
// Threat Severity Aggregation (optional)
|
|
127
127
|
"highest_severity"?: String, // Highest severity across all detectors: "critical" | "high" | "medium" | "low" | "none"
|
|
@@ -139,7 +139,7 @@ namespace Guardrails {
|
|
|
139
139
|
"topic_confidence"?: Long, // 0-100
|
|
140
140
|
|
|
141
141
|
// Security - Invisible Character Detection (optional)
|
|
142
|
-
"
|
|
142
|
+
"invisible_chars_detected"?: Bool,
|
|
143
143
|
"invisible_chars_score"?: Long, // 0-100
|
|
144
144
|
|
|
145
145
|
// Security - Pattern Detection (optional)
|
|
@@ -281,14 +281,14 @@ namespace Guardrails {
|
|
|
281
281
|
"topic_confidence"?: Long, // 0-100
|
|
282
282
|
|
|
283
283
|
// Security checks on tool arguments (optional)
|
|
284
|
-
"
|
|
284
|
+
"secrets_detected"?: Bool,
|
|
285
285
|
"secret_count"?: Long,
|
|
286
286
|
"secret_types"?: Set<String>,
|
|
287
287
|
"pii_detected"?: Bool,
|
|
288
288
|
"pii_types"?: Set<String>,
|
|
289
289
|
"pii_count"?: Long, // Number of PII pattern matches in tool content
|
|
290
|
-
"
|
|
291
|
-
"
|
|
290
|
+
"pii_score"?: Long, // PII ML classifier confidence (0-100)
|
|
291
|
+
"injection_score"?: Long,
|
|
292
292
|
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
293
293
|
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
294
294
|
|
|
@@ -312,7 +312,7 @@ namespace Guardrails {
|
|
|
312
312
|
"path"?: String, // File path when tool operates on files
|
|
313
313
|
|
|
314
314
|
// Security - Invisible Character Detection in tool args (optional)
|
|
315
|
-
"
|
|
315
|
+
"invisible_chars_detected"?: Bool, // Whether invisible Unicode chars detected in tool args
|
|
316
316
|
"invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
|
|
317
317
|
|
|
318
318
|
// Security - Encoded Injection (optional)
|
|
@@ -376,7 +376,7 @@ namespace Guardrails {
|
|
|
376
376
|
"path"?: String, // File path being read
|
|
377
377
|
|
|
378
378
|
// Security checks on file content (optional)
|
|
379
|
-
"
|
|
379
|
+
"secrets_detected"?: Bool,
|
|
380
380
|
"secret_count"?: Long,
|
|
381
381
|
"secret_types"?: Set<String>,
|
|
382
382
|
"pii_detected"?: Bool,
|
|
@@ -436,11 +436,11 @@ namespace Guardrails {
|
|
|
436
436
|
"path"?: String, // File path being written
|
|
437
437
|
|
|
438
438
|
// Security - Invisible Character Detection in write content (optional)
|
|
439
|
-
"
|
|
439
|
+
"invisible_chars_detected"?: Bool, // Whether invisible Unicode chars detected in write content
|
|
440
440
|
"invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
|
|
441
441
|
|
|
442
442
|
// Security checks on content being written (optional)
|
|
443
|
-
"
|
|
443
|
+
"secrets_detected"?: Bool,
|
|
444
444
|
"secret_count"?: Long,
|
|
445
445
|
"secret_types"?: Set<String>,
|
|
446
446
|
"pii_detected"?: Bool,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Agent Identity
|
|
2
|
+
// Agent Identity & Trust (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
//
|
|
5
|
-
//
|
|
6
|
-
//
|
|
4
|
+
// Trust-based access control for AI agents authenticated via API key or JWT
|
|
5
|
+
// with agent claims. Uses identity context keys populated from Shield's
|
|
6
|
+
// authentication layer.
|
|
7
7
|
//
|
|
8
8
|
// Agent trust levels:
|
|
9
9
|
// - first_party: Your own agents (highest trust)
|
|
@@ -11,108 +11,112 @@
|
|
|
11
11
|
// - unverified: Unknown/untrusted agents (lowest trust)
|
|
12
12
|
//
|
|
13
13
|
// Agent types:
|
|
14
|
-
// - orchestrator
|
|
15
|
-
// - autonomous: Self-directed, no human in the loop
|
|
16
|
-
// - tool_agent: Single-purpose tool execution
|
|
17
|
-
// - human_proxy: Acts on behalf of a human
|
|
14
|
+
// - orchestrator, autonomous, tool_agent, human_proxy
|
|
18
15
|
//
|
|
19
|
-
// Context keys
|
|
20
|
-
//
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
//
|
|
24
|
-
// - agent_publisher: String - Publishing organization
|
|
25
|
-
// - tool_name: String - Tool being called
|
|
26
|
-
// - tool_category: String - "safe" | "sensitive" | "dangerous"
|
|
27
|
-
// - tool_risk_score: Long (0-100) - Computed risk score
|
|
28
|
-
// - injection_confidence: Long (0-100) - Injection detection score
|
|
16
|
+
// Context keys consumed:
|
|
17
|
+
// - agent_id, agent_type, agent_trust_level, agent_framework, agent_publisher
|
|
18
|
+
// - tool_name, tool_category, tool_is_sensitive, tool_risk_score
|
|
19
|
+
// - injection_score
|
|
20
|
+
// - session_threat_turns
|
|
29
21
|
//
|
|
30
|
-
// Category:
|
|
22
|
+
// Category: agent-identity
|
|
31
23
|
// Namespace: Guardrails
|
|
32
24
|
// =============================================================================
|
|
33
25
|
|
|
34
|
-
//
|
|
35
|
-
// Trust-
|
|
36
|
-
//
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Section 1: Trust-based tool access
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
37
29
|
|
|
38
|
-
@id("agent-block-unverified-dangerous-tools")
|
|
30
|
+
@id("agent-identity.block-unverified-dangerous-tools")
|
|
39
31
|
@name("Block unverified agents from dangerous tools")
|
|
40
|
-
@description("
|
|
32
|
+
@description("Blocks call_tool when agent_trust_level is \"unverified\" and tool_category is \"dangerous\".")
|
|
41
33
|
@severity("critical")
|
|
42
|
-
@tags("agent-identity,
|
|
34
|
+
@tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
|
|
35
|
+
@reject_message("Tool execution blocked: unverified agents cannot execute dangerous tools.")
|
|
43
36
|
forbid (
|
|
44
37
|
principal is Guardrails::Agent,
|
|
45
38
|
action == Guardrails::Action::"call_tool",
|
|
46
39
|
resource
|
|
47
|
-
)
|
|
40
|
+
)
|
|
41
|
+
when {
|
|
48
42
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
49
43
|
context has tool_category && context.tool_category == "dangerous"
|
|
50
44
|
};
|
|
51
45
|
|
|
52
|
-
@id("agent-block-unverified-shell")
|
|
46
|
+
@id("agent-identity.block-unverified-shell")
|
|
53
47
|
@name("Block unverified agents from shell execution")
|
|
54
|
-
@description("
|
|
48
|
+
@description("Blocks call_tool when agent_trust_level is \"unverified\" and tool_name is a shell tool.")
|
|
55
49
|
@severity("critical")
|
|
56
|
-
@tags("agent-identity,
|
|
50
|
+
@tags("category:agent-identity,threat:command-injection,scope:per-agent,detection:rule,surface:call-tool")
|
|
51
|
+
@reject_message("Tool execution blocked: shell and command execution require a verified agent.")
|
|
57
52
|
forbid (
|
|
58
53
|
principal is Guardrails::Agent,
|
|
59
54
|
action == Guardrails::Action::"call_tool",
|
|
60
55
|
resource
|
|
61
|
-
)
|
|
56
|
+
)
|
|
57
|
+
when {
|
|
62
58
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
63
59
|
context has tool_name &&
|
|
64
|
-
(
|
|
65
|
-
|
|
66
|
-
|
|
60
|
+
(
|
|
61
|
+
context.tool_name == "shell" ||
|
|
62
|
+
context.tool_name == "execute_command" ||
|
|
63
|
+
context.tool_name == "bash"
|
|
64
|
+
)
|
|
67
65
|
};
|
|
68
66
|
|
|
69
|
-
@id("agent-block-unverified-sensitive-tools")
|
|
70
|
-
@name("Block unverified agents from sensitive tools
|
|
71
|
-
@description("
|
|
67
|
+
@id("agent-identity.block-unverified-sensitive-tools")
|
|
68
|
+
@name("Block unverified agents from sensitive tools")
|
|
69
|
+
@description("Blocks call_tool when agent_trust_level is \"unverified\", tool_is_sensitive is true, and tool_risk_score >= 61.")
|
|
72
70
|
@severity("high")
|
|
73
|
-
@tags("agent-identity,
|
|
71
|
+
@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool")
|
|
72
|
+
@reject_message("Tool execution blocked: unverified agent attempted a sensitive tool with elevated risk.")
|
|
74
73
|
forbid (
|
|
75
74
|
principal is Guardrails::Agent,
|
|
76
75
|
action == Guardrails::Action::"call_tool",
|
|
77
76
|
resource
|
|
78
|
-
)
|
|
77
|
+
)
|
|
78
|
+
when {
|
|
79
79
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
80
80
|
context has tool_is_sensitive && context.tool_is_sensitive == true &&
|
|
81
|
-
context has tool_risk_score && context.tool_risk_score
|
|
81
|
+
context has tool_risk_score && context.tool_risk_score >= 61
|
|
82
82
|
};
|
|
83
83
|
|
|
84
|
-
//
|
|
85
|
-
// Autonomous
|
|
86
|
-
//
|
|
84
|
+
// ---------------------------------------------------------------------------
|
|
85
|
+
// Section 2: Autonomous-agent restrictions
|
|
86
|
+
// ---------------------------------------------------------------------------
|
|
87
87
|
|
|
88
|
-
@id("agent-block-autonomous-injection")
|
|
89
|
-
@name("
|
|
90
|
-
@description("
|
|
88
|
+
@id("agent-identity.block-autonomous-injection")
|
|
89
|
+
@name("Block injection on autonomous agents")
|
|
90
|
+
@description("Blocks process_prompt when agent_type is \"autonomous\" and injection_score >= 51.")
|
|
91
91
|
@severity("high")
|
|
92
|
-
@tags("agent-identity,
|
|
92
|
+
@tags("category:agent-identity,threat:injection,detection:ml,surface:process-prompt,owasp:llm01")
|
|
93
|
+
@reject_message("Prompt blocked: lower injection threshold applies to autonomous agents and was exceeded.")
|
|
93
94
|
forbid (
|
|
94
95
|
principal is Guardrails::Agent,
|
|
95
96
|
action == Guardrails::Action::"process_prompt",
|
|
96
97
|
resource
|
|
97
|
-
)
|
|
98
|
+
)
|
|
99
|
+
when {
|
|
98
100
|
context has agent_type && context.agent_type == "autonomous" &&
|
|
99
|
-
context has
|
|
101
|
+
context has injection_score && context.injection_score >= 51
|
|
100
102
|
};
|
|
101
103
|
|
|
102
|
-
//
|
|
103
|
-
// Cross-
|
|
104
|
-
//
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
// Section 3: Cross-turn enforcement
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
105
107
|
|
|
106
|
-
@id("agent-block-unverified-after-threats")
|
|
108
|
+
@id("agent-identity.block-unverified-after-threats")
|
|
107
109
|
@name("Block unverified agents after session threats")
|
|
108
|
-
@description("
|
|
110
|
+
@description("Blocks call_tool when agent_trust_level is \"unverified\" and session_threat_turns >= 1.")
|
|
109
111
|
@severity("critical")
|
|
110
|
-
@tags("agent-identity,
|
|
112
|
+
@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool")
|
|
113
|
+
@reject_message("Tool execution blocked: prior turns in this session detected threats; unverified agents cannot continue.")
|
|
111
114
|
forbid (
|
|
112
115
|
principal is Guardrails::Agent,
|
|
113
116
|
action == Guardrails::Action::"call_tool",
|
|
114
117
|
resource
|
|
115
|
-
)
|
|
118
|
+
)
|
|
119
|
+
when {
|
|
116
120
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
117
|
-
context has session_threat_turns && context.session_threat_turns
|
|
121
|
+
context has session_threat_turns && context.session_threat_turns >= 1
|
|
118
122
|
};
|
|
@@ -1,48 +1,63 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Agentic Safety
|
|
2
|
+
// Agentic Safety (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Blocks suspicious agentic
|
|
5
|
-
//
|
|
6
|
-
//
|
|
7
|
-
// - Budget violations (token/cost limits)
|
|
4
|
+
// Blocks suspicious agentic behaviour patterns: tool-call loops, data
|
|
5
|
+
// exfiltration sequences, budget overruns, plus MCP supply-chain risks
|
|
6
|
+
// (tool poisoning, rug pull, risky MCP configs).
|
|
8
7
|
//
|
|
9
|
-
// Context keys
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
//
|
|
15
|
-
//
|
|
16
|
-
//
|
|
8
|
+
// Context keys consumed:
|
|
9
|
+
// - loop_detected: Bool
|
|
10
|
+
// - loop_count: Long
|
|
11
|
+
// - suspicious_pattern: Bool
|
|
12
|
+
// - pattern_type: String
|
|
13
|
+
// - sequence_risk: Long (0-100)
|
|
14
|
+
// - budget_exceeded: Bool
|
|
15
|
+
// - budget_remaining_pct: Long (0-100)
|
|
16
|
+
// - tool_poisoning_score: Long (0-100)
|
|
17
|
+
// - rug_pull_detected: Bool
|
|
18
|
+
// - rug_pull_score: Long (0-100)
|
|
19
|
+
// - mcp_config_risk: Bool
|
|
20
|
+
// - mcp_risk_score: Long (0-100)
|
|
17
21
|
//
|
|
18
|
-
//
|
|
22
|
+
// Compliance:
|
|
23
|
+
// - OWASP LLM06, OWASP ASI01, OWASP ASI04
|
|
24
|
+
//
|
|
25
|
+
// Category: agent-security
|
|
19
26
|
// Namespace: Guardrails
|
|
20
27
|
// =============================================================================
|
|
21
28
|
|
|
22
|
-
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Section 1: Loops, exfiltration, budget
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
@id("agent-security.block-loops")
|
|
23
34
|
@name("Block tool call loops")
|
|
24
|
-
@description("
|
|
35
|
+
@description("Blocks call_tool when loop_detected is true and loop_count >= 4.")
|
|
25
36
|
@severity("high")
|
|
26
|
-
@tags("
|
|
37
|
+
@tags("category:agent-security,threat:loop,detection:aggregate,surface:call-tool")
|
|
38
|
+
@reject_message("Tool execution blocked: an infinite tool-call loop was detected.")
|
|
27
39
|
forbid (
|
|
28
40
|
principal,
|
|
29
41
|
action == Guardrails::Action::"call_tool",
|
|
30
42
|
resource
|
|
31
|
-
)
|
|
43
|
+
)
|
|
44
|
+
when {
|
|
32
45
|
context has loop_detected && context.loop_detected == true &&
|
|
33
|
-
context has loop_count && context.loop_count
|
|
46
|
+
context has loop_count && context.loop_count >= 4
|
|
34
47
|
};
|
|
35
48
|
|
|
36
|
-
@id("
|
|
49
|
+
@id("agent-security.block-exfiltration")
|
|
37
50
|
@name("Block data exfiltration patterns")
|
|
38
|
-
@description("
|
|
51
|
+
@description("Blocks call_tool when suspicious_pattern is true and pattern_type matches an exfiltration class.")
|
|
39
52
|
@severity("critical")
|
|
40
|
-
@tags("
|
|
53
|
+
@tags("category:agent-security,threat:exfiltration,detection:rule,surface:call-tool")
|
|
54
|
+
@reject_message("Tool execution blocked: suspicious action sequence consistent with data exfiltration.")
|
|
41
55
|
forbid (
|
|
42
56
|
principal,
|
|
43
57
|
action == Guardrails::Action::"call_tool",
|
|
44
58
|
resource
|
|
45
|
-
)
|
|
59
|
+
)
|
|
60
|
+
when {
|
|
46
61
|
context has suspicious_pattern && context.suspicious_pattern == true &&
|
|
47
62
|
context has pattern_type &&
|
|
48
63
|
(
|
|
@@ -52,88 +67,98 @@ forbid (
|
|
|
52
67
|
)
|
|
53
68
|
};
|
|
54
69
|
|
|
55
|
-
@id("
|
|
70
|
+
@id("agent-security.block-sequence-risk")
|
|
56
71
|
@name("Block high-risk action sequences")
|
|
57
|
-
@description("
|
|
72
|
+
@description("Blocks call_tool when sequence_risk >= 81.")
|
|
58
73
|
@severity("high")
|
|
59
|
-
@tags("
|
|
74
|
+
@tags("category:agent-security,detection:aggregate,surface:call-tool")
|
|
75
|
+
@reject_message("Tool execution blocked: action sequence scored a high cumulative risk.")
|
|
60
76
|
forbid (
|
|
61
77
|
principal,
|
|
62
78
|
action == Guardrails::Action::"call_tool",
|
|
63
79
|
resource
|
|
64
|
-
)
|
|
65
|
-
|
|
80
|
+
)
|
|
81
|
+
when {
|
|
82
|
+
context has sequence_risk && context.sequence_risk >= 81
|
|
66
83
|
};
|
|
67
84
|
|
|
68
|
-
@id("
|
|
69
|
-
@name("Block budget
|
|
70
|
-
@description("
|
|
85
|
+
@id("agent-security.block-budget-exceeded")
|
|
86
|
+
@name("Block on budget exceeded")
|
|
87
|
+
@description("Blocks call_tool when budget_exceeded is true.")
|
|
71
88
|
@severity("medium")
|
|
72
|
-
@tags("
|
|
89
|
+
@tags("category:agent-security,threat:budget-violation,detection:rule,surface:call-tool")
|
|
90
|
+
@reject_message("Tool execution blocked: the agent has exhausted its token or cost budget.")
|
|
73
91
|
forbid (
|
|
74
92
|
principal,
|
|
75
93
|
action == Guardrails::Action::"call_tool",
|
|
76
94
|
resource
|
|
77
|
-
)
|
|
95
|
+
)
|
|
96
|
+
when {
|
|
78
97
|
context has budget_exceeded && context.budget_exceeded == true
|
|
79
98
|
};
|
|
80
99
|
|
|
81
|
-
@id("
|
|
82
|
-
@name("Block
|
|
83
|
-
@description("
|
|
100
|
+
@id("agent-security.block-budget-low")
|
|
101
|
+
@name("Block when budget critically low")
|
|
102
|
+
@description("Blocks call_tool when budget_remaining_pct <= 4.")
|
|
84
103
|
@severity("medium")
|
|
85
|
-
@tags("
|
|
104
|
+
@tags("category:agent-security,threat:budget-violation,detection:rule,surface:call-tool")
|
|
105
|
+
@reject_message("Tool execution blocked: remaining budget is below the critical threshold.")
|
|
86
106
|
forbid (
|
|
87
107
|
principal,
|
|
88
108
|
action == Guardrails::Action::"call_tool",
|
|
89
109
|
resource
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
context.budget_remaining_pct
|
|
93
|
-
context.budget_remaining_pct > 0
|
|
110
|
+
)
|
|
111
|
+
when {
|
|
112
|
+
context has budget_remaining_pct && context.budget_remaining_pct <= 4
|
|
94
113
|
};
|
|
95
114
|
|
|
96
|
-
//
|
|
97
|
-
//
|
|
98
|
-
//
|
|
115
|
+
// ---------------------------------------------------------------------------
|
|
116
|
+
// Section 2: MCP supply chain
|
|
117
|
+
// ---------------------------------------------------------------------------
|
|
99
118
|
|
|
100
|
-
@id("
|
|
101
|
-
@name("Block tool poisoning
|
|
102
|
-
@description("
|
|
119
|
+
@id("agent-security.block-tool-poisoning")
|
|
120
|
+
@name("Block tool poisoning")
|
|
121
|
+
@description("Blocks call_tool and connect_server when tool_poisoning_score >= 70.")
|
|
103
122
|
@severity("critical")
|
|
104
|
-
@tags("
|
|
123
|
+
@tags("category:agent-security,threat:tool-poisoning,detection:ml,owasp:asi01")
|
|
124
|
+
@reject_message("Tool execution blocked: hidden manipulation instructions detected in tool description or arguments (OWASP ASI01).")
|
|
105
125
|
forbid (
|
|
106
126
|
principal,
|
|
107
127
|
action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
|
|
108
128
|
resource
|
|
109
|
-
)
|
|
129
|
+
)
|
|
130
|
+
when {
|
|
110
131
|
context has tool_poisoning_score && context.tool_poisoning_score >= 70
|
|
111
132
|
};
|
|
112
133
|
|
|
113
|
-
@id("
|
|
134
|
+
@id("agent-security.block-rug-pull")
|
|
114
135
|
@name("Block rug pull attacks")
|
|
115
|
-
@description("
|
|
136
|
+
@description("Blocks call_tool when rug_pull_detected is true and rug_pull_score >= 70.")
|
|
116
137
|
@severity("high")
|
|
117
|
-
@tags("
|
|
138
|
+
@tags("category:agent-security,threat:rug-pull,detection:ml,owasp:asi04")
|
|
139
|
+
@reject_message("Tool execution blocked: tool behavior has drifted significantly from its established pattern.")
|
|
118
140
|
forbid (
|
|
119
141
|
principal,
|
|
120
142
|
action == Guardrails::Action::"call_tool",
|
|
121
143
|
resource
|
|
122
|
-
)
|
|
144
|
+
)
|
|
145
|
+
when {
|
|
123
146
|
context has rug_pull_detected && context.rug_pull_detected == true &&
|
|
124
147
|
context has rug_pull_score && context.rug_pull_score >= 70
|
|
125
148
|
};
|
|
126
149
|
|
|
127
|
-
@id("
|
|
128
|
-
@name("Block risky MCP
|
|
129
|
-
@description("
|
|
150
|
+
@id("agent-security.block-mcp-config-risk")
|
|
151
|
+
@name("Block risky MCP server configs")
|
|
152
|
+
@description("Blocks call_tool and connect_server when mcp_config_risk is true and mcp_risk_score >= 70.")
|
|
130
153
|
@severity("high")
|
|
131
|
-
@tags("
|
|
154
|
+
@tags("category:agent-security,threat:mcp-config-risk,detection:ml,owasp:asi04")
|
|
155
|
+
@reject_message("MCP operation blocked: risky server configuration detected.")
|
|
132
156
|
forbid (
|
|
133
157
|
principal,
|
|
134
158
|
action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
|
|
135
159
|
resource
|
|
136
|
-
)
|
|
160
|
+
)
|
|
161
|
+
when {
|
|
137
162
|
context has mcp_config_risk && context.mcp_config_risk == true &&
|
|
138
163
|
context has mcp_risk_score && context.mcp_risk_score >= 70
|
|
139
164
|
};
|
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Baseline Permit
|
|
2
|
+
// Baseline Permit (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Permits all actions by default. Threat-specific forbid policies
|
|
5
|
-
// this
|
|
4
|
+
// Permits all Guardrails actions by default. Threat-specific forbid policies
|
|
5
|
+
// override this when detectors fire. Cedar is default-deny: without at least
|
|
6
|
+
// one permit rule, every request is denied regardless of forbid rules.
|
|
6
7
|
//
|
|
7
|
-
//
|
|
8
|
-
// is denied regardless of forbid rules. This baseline ensures the system
|
|
9
|
-
// is "allow unless blocked" rather than "block everything".
|
|
10
|
-
//
|
|
11
|
-
// Category: organization
|
|
8
|
+
// Category: organization
|
|
12
9
|
// Namespace: Guardrails
|
|
13
10
|
// =============================================================================
|
|
14
11
|
|
|
15
|
-
@id("
|
|
16
|
-
@name("Permit
|
|
17
|
-
@description("
|
|
12
|
+
@id("organization.permit-baseline")
|
|
13
|
+
@name("Permit baseline")
|
|
14
|
+
@description("Permits all Guardrails actions.")
|
|
18
15
|
@severity("low")
|
|
19
|
-
@tags("
|
|
16
|
+
@tags("category:organization,posture:permit-default")
|
|
20
17
|
permit (
|
|
21
18
|
principal,
|
|
22
19
|
action,
|