@highflame/policy 2.1.36 → 2.1.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/ai_gateway/context.json +431 -11
- package/_schemas/ai_gateway/schema.cedarschema +91 -11
- package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
- package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
- package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
- package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
- package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
- package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
- package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
- package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
- package/_schemas/ai_gateway/templates/templates.json +42 -47
- package/_schemas/guardrails/context.json +12 -12
- package/_schemas/guardrails/schema.cedarschema +12 -12
- package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
- package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
- package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
- package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
- package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
- package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
- package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
- package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
- package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
- package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
- package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
- package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
- package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
- package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
- package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
- package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
- package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
- package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
- package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
- package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
- package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
- package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
- package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
- package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
- package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
- package/_schemas/guardrails/templates/templates.json +188 -210
- package/_schemas/overwatch/context.json +14 -14
- package/_schemas/overwatch/schema.cedarschema +12 -12
- package/_schemas/sentry/context.json +11 -11
- package/_schemas/sentry/schema.cedarschema +11 -11
- package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
- package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
- package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
- package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
- package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
- package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
- package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
- package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
- package/_schemas/sentry/templates/templates.json +46 -46
- package/dist/ai_gateway-context.gen.d.ts +18 -4
- package/dist/ai_gateway-context.gen.js +18 -4
- package/dist/ai_gateway-defaults.gen.d.ts +1 -1
- package/dist/ai_gateway-defaults.gen.js +377 -313
- package/dist/guardrails-context.gen.d.ts +5 -5
- package/dist/guardrails-context.gen.js +5 -5
- package/dist/guardrails-defaults.gen.d.ts +1 -1
- package/dist/guardrails-defaults.gen.js +2070 -1849
- package/dist/overwatch-context.gen.d.ts +5 -5
- package/dist/overwatch-context.gen.js +5 -5
- package/dist/overwatch-defaults.gen.d.ts +1 -1
- package/dist/overwatch-defaults.gen.js +635 -595
- package/dist/sentry-context.gen.d.ts +3 -3
- package/dist/sentry-context.gen.js +3 -3
- package/dist/sentry-defaults.gen.d.ts +1 -1
- package/dist/sentry-defaults.gen.js +379 -460
- package/dist/service-schemas.gen.d.ts +4 -4
- package/dist/service-schemas.gen.js +249 -99
- package/package.json +1 -1
|
@@ -1,49 +1,31 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
2
|
// Data Pipeline — Security
|
|
3
3
|
// =============================================================================
|
|
4
|
-
//
|
|
5
|
-
//
|
|
4
|
+
// Aggressive injection defence for RAG inputs and data processing — these
|
|
5
|
+
// pipelines consume external content as trusted input and have the highest
|
|
6
|
+
// adversarial surface for indirect injection.
|
|
6
7
|
//
|
|
7
|
-
//
|
|
8
|
+
// Context keys consumed:
|
|
9
|
+
// - injection_score: Long (0-100)
|
|
10
|
+
//
|
|
11
|
+
// Compliance:
|
|
12
|
+
// - OWASP LLM01
|
|
13
|
+
//
|
|
14
|
+
// Category: security
|
|
8
15
|
// Namespace: Guardrails
|
|
9
16
|
// =============================================================================
|
|
10
17
|
|
|
11
|
-
@id("data-
|
|
12
|
-
@name("
|
|
13
|
-
@description("Blocks
|
|
14
|
-
@severity("critical")
|
|
15
|
-
@tags("profile,data-pipeline,secrets,security")
|
|
16
|
-
forbid (
|
|
17
|
-
principal,
|
|
18
|
-
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
19
|
-
resource
|
|
20
|
-
) when {
|
|
21
|
-
context has contains_secrets && context.contains_secrets == true
|
|
22
|
-
};
|
|
23
|
-
|
|
24
|
-
@id("data-block-output-secrets")
|
|
25
|
-
@name("Block secrets in pipeline outputs")
|
|
26
|
-
@description("Prevents data pipeline from writing secrets to any output")
|
|
27
|
-
@severity("critical")
|
|
28
|
-
@tags("profile,data-pipeline,secrets,output")
|
|
29
|
-
forbid (
|
|
30
|
-
principal,
|
|
31
|
-
action == Guardrails::Action::"write_file",
|
|
32
|
-
resource
|
|
33
|
-
) when {
|
|
34
|
-
(context has contains_secrets && context.contains_secrets == true) ||
|
|
35
|
-
(context has secret_count && context.secret_count > 0)
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
@id("data-injection-defense")
|
|
39
|
-
@name("Pipeline injection defense")
|
|
40
|
-
@description("Lower injection threshold for data pipelines — RAG inputs are high-risk for injection")
|
|
18
|
+
@id("security.data-pipeline-block-injection")
|
|
19
|
+
@name("Block pipeline injection (data-pipeline profile)")
|
|
20
|
+
@description("Blocks process_prompt and call_tool when injection_score >= 66 (lower threshold for data pipelines).")
|
|
41
21
|
@severity("high")
|
|
42
|
-
@tags("
|
|
22
|
+
@tags("category:security,threat:injection,detection:ml,owasp:llm01")
|
|
23
|
+
@reject_message("Request blocked: prompt injection detected at the data-pipeline threshold — RAG inputs are high-risk for indirect injection.")
|
|
43
24
|
forbid (
|
|
44
25
|
principal,
|
|
45
26
|
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
46
27
|
resource
|
|
47
|
-
)
|
|
48
|
-
|
|
28
|
+
)
|
|
29
|
+
when {
|
|
30
|
+
context has injection_score && context.injection_score >= 66
|
|
49
31
|
};
|
|
@@ -1,157 +1,164 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
2
|
// Multi-Agent Orchestration — Cross-Turn Agent Safety
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Session-aware policies that
|
|
5
|
-
// agent identity for defense-in-depth.
|
|
6
|
-
//
|
|
7
|
-
//
|
|
4
|
+
// Session-aware policies that combine cross-turn detection history with
|
|
5
|
+
// agent identity for defense-in-depth. When one agent in a multi-agent
|
|
6
|
+
// session detects a threat, subsequent agents in the same session are
|
|
7
|
+
// restricted accordingly.
|
|
8
8
|
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
9
|
+
// Context keys consumed:
|
|
10
|
+
// - agent_trust_level, agent_type, tool_name, tool_is_sensitive
|
|
11
|
+
// - session_pii_detected, session_pii_types
|
|
12
|
+
// - session_secrets_detected, session_injection_detected
|
|
13
|
+
// - session_command_injection
|
|
14
|
+
// - session_threat_turns: Long
|
|
15
|
+
// - session_cumulative_risk_score: Long
|
|
16
|
+
// - suspicious_pattern: Bool
|
|
13
17
|
//
|
|
14
|
-
//
|
|
15
|
-
//
|
|
16
|
-
// - agent_type: String - Agent classification
|
|
17
|
-
// - session_pii_detected: Bool - PII seen in any prior turn
|
|
18
|
-
// - session_pii_types: Set<String> - PII types accumulated
|
|
19
|
-
// - session_secrets_detected: Bool - Secrets seen in any prior turn
|
|
20
|
-
// - session_injection_detected: Bool - Injection seen in any prior turn
|
|
21
|
-
// - session_command_injection: Bool - Command injection in any prior turn
|
|
22
|
-
// - session_threat_turns: Long - Count of turns with threats
|
|
23
|
-
// - session_cumulative_risk_score: Long - Total accumulated risk
|
|
24
|
-
// - tool_name: String - Tool being called
|
|
25
|
-
// - tool_is_sensitive: Bool - Whether tool is sensitive
|
|
26
|
-
// - suspicious_pattern: Bool - Whether exfiltration pattern detected
|
|
18
|
+
// Compliance:
|
|
19
|
+
// - OWASP LLM01, LLM08
|
|
27
20
|
//
|
|
28
|
-
// Category:
|
|
21
|
+
// Category: agent-identity
|
|
29
22
|
// Namespace: Guardrails
|
|
30
23
|
// =============================================================================
|
|
31
24
|
|
|
32
|
-
//
|
|
33
|
-
//
|
|
34
|
-
//
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// Section 1: PII containment
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
35
28
|
|
|
36
|
-
@id("multi-agent-pii-block-network-tools")
|
|
37
|
-
@name("Block network tools
|
|
38
|
-
@description("
|
|
29
|
+
@id("agent-identity.multi-agent-pii-block-network-tools")
|
|
30
|
+
@name("Block network tools for non-first-party agents after PII")
|
|
31
|
+
@description("Blocks call_tool when agent_trust_level is not \"first_party\", session_pii_detected is true, and tool_name is a network tool.")
|
|
39
32
|
@severity("critical")
|
|
40
|
-
@tags("
|
|
33
|
+
@tags("category:agent-identity,threat:exfiltration,scope:per-agent,detection:aggregate,surface:call-tool")
|
|
34
|
+
@reject_message("Tool execution blocked: PII was detected earlier in this session and non-first-party agents cannot use network tools afterwards.")
|
|
41
35
|
forbid (
|
|
42
36
|
principal is Guardrails::Agent,
|
|
43
37
|
action == Guardrails::Action::"call_tool",
|
|
44
38
|
resource
|
|
45
|
-
)
|
|
39
|
+
)
|
|
40
|
+
when {
|
|
46
41
|
context has agent_trust_level && context.agent_trust_level != "first_party" &&
|
|
47
42
|
context has session_pii_detected && context.session_pii_detected == true &&
|
|
48
43
|
context has tool_name &&
|
|
49
|
-
(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
44
|
+
(
|
|
45
|
+
context.tool_name == "http_post" ||
|
|
46
|
+
context.tool_name == "send_email" ||
|
|
47
|
+
context.tool_name == "http_request" ||
|
|
48
|
+
context.tool_name == "webhook"
|
|
49
|
+
)
|
|
53
50
|
};
|
|
54
51
|
|
|
55
|
-
@id("multi-agent-pii-block-unverified-
|
|
56
|
-
@name("Block unverified agents from writing files after PII
|
|
57
|
-
@description("
|
|
52
|
+
@id("agent-identity.multi-agent-pii-block-unverified-write")
|
|
53
|
+
@name("Block unverified agents from writing files after PII")
|
|
54
|
+
@description("Blocks write_file when agent_trust_level is \"unverified\" and session_pii_detected is true.")
|
|
58
55
|
@severity("high")
|
|
59
|
-
@tags("
|
|
56
|
+
@tags("category:agent-identity,threat:pii,scope:per-agent,detection:aggregate,surface:write-file")
|
|
57
|
+
@reject_message("File write blocked: PII was detected earlier in this session and unverified agents cannot persist files.")
|
|
60
58
|
forbid (
|
|
61
59
|
principal is Guardrails::Agent,
|
|
62
60
|
action == Guardrails::Action::"write_file",
|
|
63
61
|
resource
|
|
64
|
-
)
|
|
62
|
+
)
|
|
63
|
+
when {
|
|
65
64
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
66
65
|
context has session_pii_detected && context.session_pii_detected == true
|
|
67
66
|
};
|
|
68
67
|
|
|
69
|
-
//
|
|
70
|
-
//
|
|
71
|
-
//
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Section 2: Secrets containment
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
72
71
|
|
|
73
|
-
@id("multi-agent-secrets-lockdown")
|
|
74
|
-
@name("
|
|
75
|
-
@description("
|
|
72
|
+
@id("agent-identity.multi-agent-secrets-lockdown")
|
|
73
|
+
@name("Block sensitive tools after secrets in session")
|
|
74
|
+
@description("Blocks call_tool when agent_trust_level is not \"first_party\", session_secrets_detected is true, and tool_is_sensitive is true.")
|
|
76
75
|
@severity("critical")
|
|
77
|
-
@tags("
|
|
76
|
+
@tags("category:agent-identity,threat:secrets,scope:per-agent,detection:aggregate,surface:call-tool")
|
|
77
|
+
@reject_message("Tool execution blocked: secrets were detected earlier in this session and non-first-party agents cannot call sensitive tools afterwards.")
|
|
78
78
|
forbid (
|
|
79
79
|
principal is Guardrails::Agent,
|
|
80
80
|
action == Guardrails::Action::"call_tool",
|
|
81
81
|
resource
|
|
82
|
-
)
|
|
82
|
+
)
|
|
83
|
+
when {
|
|
83
84
|
context has agent_trust_level && context.agent_trust_level != "first_party" &&
|
|
84
85
|
context has session_secrets_detected && context.session_secrets_detected == true &&
|
|
85
86
|
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
86
87
|
};
|
|
87
88
|
|
|
88
|
-
//
|
|
89
|
-
//
|
|
90
|
-
//
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
// Section 3: Injection escalation
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
91
92
|
|
|
92
|
-
@id("multi-agent-post-injection-lockdown")
|
|
93
|
-
@name("
|
|
94
|
-
@description("
|
|
93
|
+
@id("agent-identity.multi-agent-post-injection-lockdown")
|
|
94
|
+
@name("Block unverified agents after injection in session")
|
|
95
|
+
@description("Blocks call_tool when agent_trust_level is \"unverified\" and session_injection_detected is true.")
|
|
95
96
|
@severity("critical")
|
|
96
|
-
@tags("
|
|
97
|
+
@tags("category:agent-identity,threat:injection,scope:per-agent,detection:aggregate,surface:call-tool")
|
|
98
|
+
@reject_message("Tool execution blocked: injection was detected earlier in this session — unverified agents cannot continue.")
|
|
97
99
|
forbid (
|
|
98
100
|
principal is Guardrails::Agent,
|
|
99
101
|
action == Guardrails::Action::"call_tool",
|
|
100
102
|
resource
|
|
101
|
-
)
|
|
103
|
+
)
|
|
104
|
+
when {
|
|
102
105
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
103
106
|
context has session_injection_detected && context.session_injection_detected == true
|
|
104
107
|
};
|
|
105
108
|
|
|
106
|
-
@id("multi-agent-post-command-injection-
|
|
107
|
-
@name("Block
|
|
108
|
-
@description("
|
|
109
|
+
@id("agent-identity.multi-agent-post-command-injection-shell")
|
|
110
|
+
@name("Block shell after command injection in session")
|
|
111
|
+
@description("Blocks call_tool when session_command_injection is true and tool_name is a shell tool.")
|
|
109
112
|
@severity("critical")
|
|
110
|
-
@tags("
|
|
113
|
+
@tags("category:agent-identity,threat:command-injection,scope:per-agent,detection:aggregate,surface:call-tool,mitre:t1059")
|
|
114
|
+
@reject_message("Tool execution blocked: command injection was detected earlier in this session — no agent may execute shell commands afterwards.")
|
|
111
115
|
forbid (
|
|
112
116
|
principal is Guardrails::Agent,
|
|
113
117
|
action == Guardrails::Action::"call_tool",
|
|
114
118
|
resource
|
|
115
|
-
)
|
|
119
|
+
)
|
|
120
|
+
when {
|
|
116
121
|
context has session_command_injection && context.session_command_injection == true &&
|
|
117
122
|
context has tool_name &&
|
|
118
|
-
(context.tool_name == "shell" ||
|
|
119
|
-
context.tool_name == "execute_command" ||
|
|
120
|
-
context.tool_name == "bash")
|
|
123
|
+
(context.tool_name == "shell" || context.tool_name == "execute_command" || context.tool_name == "bash")
|
|
121
124
|
};
|
|
122
125
|
|
|
123
|
-
//
|
|
124
|
-
//
|
|
125
|
-
//
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Section 4: Cumulative risk circuit breakers
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
126
129
|
|
|
127
|
-
@id("multi-agent-high-cumulative-risk
|
|
128
|
-
@name("
|
|
129
|
-
@description("
|
|
130
|
+
@id("agent-identity.multi-agent-high-cumulative-risk")
|
|
131
|
+
@name("Block non-first-party sensitive tools at cumulative risk")
|
|
132
|
+
@description("Blocks call_tool when agent_trust_level is not \"first_party\", session_cumulative_risk_score >= 201, and tool_is_sensitive is true.")
|
|
130
133
|
@severity("high")
|
|
131
|
-
@tags("
|
|
134
|
+
@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool,posture:catch-all")
|
|
135
|
+
@reject_message("Tool execution blocked: cumulative session risk exceeded the multi-agent threshold for sensitive tools.")
|
|
132
136
|
forbid (
|
|
133
137
|
principal is Guardrails::Agent,
|
|
134
138
|
action == Guardrails::Action::"call_tool",
|
|
135
139
|
resource
|
|
136
|
-
)
|
|
140
|
+
)
|
|
141
|
+
when {
|
|
137
142
|
context has agent_trust_level && context.agent_trust_level != "first_party" &&
|
|
138
|
-
context has session_cumulative_risk_score && context.session_cumulative_risk_score
|
|
143
|
+
context has session_cumulative_risk_score && context.session_cumulative_risk_score >= 201 &&
|
|
139
144
|
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
140
145
|
};
|
|
141
146
|
|
|
142
|
-
@id("multi-agent-extreme-risk-
|
|
143
|
-
@name("
|
|
144
|
-
@description("
|
|
147
|
+
@id("agent-identity.multi-agent-extreme-risk-lockdown")
|
|
148
|
+
@name("Block unverified agents at extreme cumulative risk")
|
|
149
|
+
@description("Blocks call_tool when agent_trust_level is \"unverified\" and session_cumulative_risk_score >= 501 or session_threat_turns >= 6.")
|
|
145
150
|
@severity("critical")
|
|
146
|
-
@tags("
|
|
151
|
+
@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool,posture:catch-all")
|
|
152
|
+
@reject_message("Tool execution blocked: this session has accumulated extreme risk — unverified agents are locked out.")
|
|
147
153
|
forbid (
|
|
148
154
|
principal is Guardrails::Agent,
|
|
149
155
|
action == Guardrails::Action::"call_tool",
|
|
150
156
|
resource
|
|
151
|
-
)
|
|
157
|
+
)
|
|
158
|
+
when {
|
|
152
159
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
153
160
|
(
|
|
154
|
-
(context has session_cumulative_risk_score && context.session_cumulative_risk_score
|
|
155
|
-
(context has session_threat_turns && context.session_threat_turns
|
|
161
|
+
(context has session_cumulative_risk_score && context.session_cumulative_risk_score >= 501) ||
|
|
162
|
+
(context has session_threat_turns && context.session_threat_turns >= 6)
|
|
156
163
|
)
|
|
157
164
|
};
|
|
@@ -1,140 +1,143 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Multi-Agent Orchestration — Agent Trust
|
|
2
|
+
// Multi-Agent Orchestration — Agent Trust
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Production-grade trust policies for multi-agent systems where an
|
|
5
|
-
// coordinates sub-agents with varying trust levels.
|
|
6
|
-
// least-privilege access: each agent gets only the permissions its trust
|
|
7
|
-
// and type warrant.
|
|
4
|
+
// Production-grade trust policies for multi-agent systems where an
|
|
5
|
+
// orchestrator coordinates sub-agents with varying trust levels. Enforces
|
|
6
|
+
// least-privilege access: each agent gets only the permissions its trust
|
|
7
|
+
// level and type warrant.
|
|
8
8
|
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
9
|
+
// Context keys consumed:
|
|
10
|
+
// - agent_id, agent_type, agent_trust_level, agent_framework, agent_publisher
|
|
11
|
+
// - tool_name, tool_category, tool_is_sensitive, tool_risk_score
|
|
12
|
+
// - mcp_server_verified
|
|
13
|
+
// - injection_score, jailbreak_score
|
|
14
14
|
//
|
|
15
|
-
//
|
|
16
|
-
//
|
|
17
|
-
// - agent_type: String - orchestrator | autonomous | tool_agent | human_proxy
|
|
18
|
-
// - agent_trust_level: String - first_party | verified_third_party | unverified
|
|
19
|
-
// - agent_framework: String - Agent framework/SDK
|
|
20
|
-
// - agent_publisher: String - Publishing organization
|
|
21
|
-
// - tool_name: String - Tool being called
|
|
22
|
-
// - tool_category: String - safe | sensitive | dangerous
|
|
23
|
-
// - tool_risk_score: Long (0-100) - Computed risk score
|
|
24
|
-
// - tool_is_sensitive: Bool - Whether tool is sensitive
|
|
25
|
-
// - mcp_server_verified: Bool - Whether MCP server is verified
|
|
26
|
-
// - injection_confidence: Long (0-100) - Injection detection score
|
|
27
|
-
// - jailbreak_confidence: Long (0-100) - Jailbreak detection score
|
|
15
|
+
// Compliance:
|
|
16
|
+
// - OWASP LLM01, LLM08; OWASP ASI03, ASI05
|
|
28
17
|
//
|
|
29
|
-
// Category:
|
|
18
|
+
// Category: agent-identity
|
|
30
19
|
// Namespace: Guardrails
|
|
31
20
|
// =============================================================================
|
|
32
21
|
|
|
33
|
-
//
|
|
34
|
-
//
|
|
35
|
-
//
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Section 1: Tiered tool access
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
36
25
|
|
|
37
|
-
@id("multi-agent-
|
|
38
|
-
@name("
|
|
39
|
-
@description("
|
|
26
|
+
@id("agent-identity.multi-agent-block-non-first-party-dangerous")
|
|
27
|
+
@name("Block non-first-party agents from dangerous tools")
|
|
28
|
+
@description("Blocks call_tool when agent_trust_level is not \"first_party\" and tool_category is \"dangerous\".")
|
|
40
29
|
@severity("critical")
|
|
41
|
-
@tags("
|
|
30
|
+
@tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
|
|
31
|
+
@reject_message("Tool execution blocked: dangerous tools are restricted to first-party agents.")
|
|
42
32
|
forbid (
|
|
43
33
|
principal is Guardrails::Agent,
|
|
44
34
|
action == Guardrails::Action::"call_tool",
|
|
45
35
|
resource
|
|
46
|
-
)
|
|
36
|
+
)
|
|
37
|
+
when {
|
|
47
38
|
context has agent_trust_level && context.agent_trust_level != "first_party" &&
|
|
48
39
|
context has tool_category && context.tool_category == "dangerous"
|
|
49
40
|
};
|
|
50
41
|
|
|
51
|
-
@id("multi-agent-block-unverified-sensitive")
|
|
52
|
-
@name("Block unverified agents from
|
|
53
|
-
@description("
|
|
42
|
+
@id("agent-identity.multi-agent-block-unverified-sensitive")
|
|
43
|
+
@name("Block unverified agents from sensitive tools")
|
|
44
|
+
@description("Blocks call_tool when agent_trust_level is \"unverified\" and tool_is_sensitive is true.")
|
|
54
45
|
@severity("high")
|
|
55
|
-
@tags("
|
|
46
|
+
@tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
|
|
47
|
+
@reject_message("Tool execution blocked: unverified agents may only use tools classified as safe.")
|
|
56
48
|
forbid (
|
|
57
49
|
principal is Guardrails::Agent,
|
|
58
50
|
action == Guardrails::Action::"call_tool",
|
|
59
51
|
resource
|
|
60
|
-
)
|
|
52
|
+
)
|
|
53
|
+
when {
|
|
61
54
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
62
55
|
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
63
56
|
};
|
|
64
57
|
|
|
65
|
-
@id("multi-agent-block-unverified-mcp")
|
|
66
|
-
@name("Block unverified agents
|
|
67
|
-
@description("
|
|
58
|
+
@id("agent-identity.multi-agent-block-unverified-mcp")
|
|
59
|
+
@name("Block unverified agents on unverified MCP servers")
|
|
60
|
+
@description("Blocks call_tool when agent_trust_level is \"unverified\" and mcp_server_verified is false.")
|
|
68
61
|
@severity("critical")
|
|
69
|
-
@tags("
|
|
62
|
+
@tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:rule,surface:call-tool")
|
|
63
|
+
@reject_message("Tool execution blocked: unverified agents cannot use unverified MCP servers — supply-chain risk too high.")
|
|
70
64
|
forbid (
|
|
71
65
|
principal is Guardrails::Agent,
|
|
72
66
|
action == Guardrails::Action::"call_tool",
|
|
73
67
|
resource
|
|
74
|
-
)
|
|
68
|
+
)
|
|
69
|
+
when {
|
|
75
70
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
76
71
|
context has mcp_server_verified && context.mcp_server_verified == false
|
|
77
72
|
};
|
|
78
73
|
|
|
79
|
-
//
|
|
80
|
-
// Autonomous
|
|
81
|
-
//
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
// Section 2: Autonomous-agent safeguards
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
82
77
|
|
|
83
|
-
@id("multi-agent-autonomous-tool-risk
|
|
84
|
-
@name("
|
|
85
|
-
@description("
|
|
78
|
+
@id("agent-identity.multi-agent-cap-autonomous-tool-risk")
|
|
79
|
+
@name("Block high-risk tools for autonomous agents")
|
|
80
|
+
@description("Blocks call_tool when agent_type is \"autonomous\" and tool_risk_score >= 71.")
|
|
86
81
|
@severity("high")
|
|
87
|
-
@tags("
|
|
82
|
+
@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool")
|
|
83
|
+
@reject_message("Tool execution blocked: autonomous agents require human oversight for high-risk tools.")
|
|
88
84
|
forbid (
|
|
89
85
|
principal is Guardrails::Agent,
|
|
90
86
|
action == Guardrails::Action::"call_tool",
|
|
91
87
|
resource
|
|
92
|
-
)
|
|
88
|
+
)
|
|
89
|
+
when {
|
|
93
90
|
context has agent_type && context.agent_type == "autonomous" &&
|
|
94
|
-
context has tool_risk_score && context.tool_risk_score
|
|
91
|
+
context has tool_risk_score && context.tool_risk_score >= 71
|
|
95
92
|
};
|
|
96
93
|
|
|
97
|
-
@id("multi-agent-autonomous-injection
|
|
98
|
-
@name("
|
|
99
|
-
@description("
|
|
94
|
+
@id("agent-identity.multi-agent-autonomous-injection")
|
|
95
|
+
@name("Block injection on autonomous agents (stricter)")
|
|
96
|
+
@description("Blocks process_prompt when agent_type is \"autonomous\" and injection_score >= 51.")
|
|
100
97
|
@severity("high")
|
|
101
|
-
@tags("
|
|
98
|
+
@tags("category:agent-identity,threat:injection,detection:ml,surface:process-prompt,owasp:llm01")
|
|
99
|
+
@reject_message("Prompt blocked: lower injection threshold applies to autonomous agents and was exceeded.")
|
|
102
100
|
forbid (
|
|
103
101
|
principal is Guardrails::Agent,
|
|
104
102
|
action == Guardrails::Action::"process_prompt",
|
|
105
103
|
resource
|
|
106
|
-
)
|
|
104
|
+
)
|
|
105
|
+
when {
|
|
107
106
|
context has agent_type && context.agent_type == "autonomous" &&
|
|
108
|
-
context has
|
|
107
|
+
context has injection_score && context.injection_score >= 51
|
|
109
108
|
};
|
|
110
109
|
|
|
111
|
-
@id("multi-agent-autonomous-jailbreak
|
|
112
|
-
@name("
|
|
113
|
-
@description("
|
|
110
|
+
@id("agent-identity.multi-agent-autonomous-jailbreak")
|
|
111
|
+
@name("Block jailbreak on autonomous agents (stricter)")
|
|
112
|
+
@description("Blocks process_prompt when agent_type is \"autonomous\" and jailbreak_score >= 51.")
|
|
114
113
|
@severity("high")
|
|
115
|
-
@tags("
|
|
114
|
+
@tags("category:agent-identity,threat:jailbreak,detection:ml,surface:process-prompt,owasp:llm02")
|
|
115
|
+
@reject_message("Prompt blocked: lower jailbreak threshold applies to autonomous agents and was exceeded.")
|
|
116
116
|
forbid (
|
|
117
117
|
principal is Guardrails::Agent,
|
|
118
118
|
action == Guardrails::Action::"process_prompt",
|
|
119
119
|
resource
|
|
120
|
-
)
|
|
120
|
+
)
|
|
121
|
+
when {
|
|
121
122
|
context has agent_type && context.agent_type == "autonomous" &&
|
|
122
|
-
context has
|
|
123
|
+
context has jailbreak_score && context.jailbreak_score >= 51
|
|
123
124
|
};
|
|
124
125
|
|
|
125
|
-
//
|
|
126
|
-
// MCP
|
|
127
|
-
//
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Section 3: MCP server-connection trust
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
128
129
|
|
|
129
|
-
@id("multi-agent-block-unverified-server-connect")
|
|
130
|
+
@id("agent-identity.multi-agent-block-unverified-server-connect")
|
|
130
131
|
@name("Block unverified agents from connecting to MCP servers")
|
|
131
|
-
@description("
|
|
132
|
+
@description("Blocks connect_server when agent_trust_level is \"unverified\".")
|
|
132
133
|
@severity("high")
|
|
133
|
-
@tags("
|
|
134
|
+
@tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:rule,surface:connect-server")
|
|
135
|
+
@reject_message("MCP server connection blocked: unverified agents cannot establish new server connections.")
|
|
134
136
|
forbid (
|
|
135
137
|
principal is Guardrails::Agent,
|
|
136
138
|
action == Guardrails::Action::"connect_server",
|
|
137
139
|
resource
|
|
138
|
-
)
|
|
140
|
+
)
|
|
141
|
+
when {
|
|
139
142
|
context has agent_trust_level && context.agent_trust_level == "unverified"
|
|
140
143
|
};
|