@highflame/policy 2.1.35 → 2.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/_schemas/ai_gateway/context.json +431 -11
  2. package/_schemas/ai_gateway/schema.cedarschema +91 -11
  3. package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
  4. package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
  5. package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
  6. package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
  7. package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
  8. package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
  9. package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
  10. package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
  11. package/_schemas/ai_gateway/templates/templates.json +52 -50
  12. package/_schemas/guardrails/context.json +12 -12
  13. package/_schemas/guardrails/schema.cedarschema +12 -12
  14. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
  15. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
  16. package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
  17. package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
  18. package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
  19. package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
  20. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
  21. package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
  22. package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
  23. package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
  24. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
  25. package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
  26. package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
  27. package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
  28. package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
  29. package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
  30. package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
  31. package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
  32. package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
  33. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
  34. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
  35. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
  36. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
  37. package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
  38. package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
  39. package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
  40. package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
  41. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
  42. package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
  43. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
  44. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
  45. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
  46. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
  47. package/_schemas/guardrails/templates/templates.json +198 -217
  48. package/_schemas/overwatch/context.json +14 -14
  49. package/_schemas/overwatch/schema.cedarschema +12 -12
  50. package/_schemas/sentry/context.json +11 -11
  51. package/_schemas/sentry/schema.cedarschema +11 -11
  52. package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
  53. package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
  54. package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
  55. package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
  56. package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
  57. package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
  58. package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
  59. package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
  60. package/_schemas/sentry/templates/templates.json +53 -43
  61. package/dist/ai_gateway-context.gen.d.ts +18 -4
  62. package/dist/ai_gateway-context.gen.js +18 -4
  63. package/dist/ai_gateway-defaults.gen.d.ts +4 -1
  64. package/dist/ai_gateway-defaults.gen.js +398 -320
  65. package/dist/builder.d.ts +2 -1
  66. package/dist/builder.js +5 -2
  67. package/dist/guardrails-context.gen.d.ts +5 -5
  68. package/dist/guardrails-context.gen.js +5 -5
  69. package/dist/guardrails-defaults.gen.d.ts +4 -1
  70. package/dist/guardrails-defaults.gen.js +2130 -1903
  71. package/dist/overwatch-context.gen.d.ts +5 -5
  72. package/dist/overwatch-context.gen.js +5 -5
  73. package/dist/overwatch-defaults.gen.d.ts +4 -1
  74. package/dist/overwatch-defaults.gen.js +560 -566
  75. package/dist/sentry-context.gen.d.ts +3 -3
  76. package/dist/sentry-context.gen.js +3 -3
  77. package/dist/sentry-defaults.gen.d.ts +4 -1
  78. package/dist/sentry-defaults.gen.js +392 -453
  79. package/dist/service-schemas.gen.d.ts +4 -4
  80. package/dist/service-schemas.gen.js +249 -99
  81. package/package.json +1 -1
@@ -1,49 +1,31 @@
1
1
  // =============================================================================
2
2
  // Data Pipeline — Security
3
3
  // =============================================================================
4
- // Strict secrets detection and injection defense for data pipelines.
5
- // RAG inputs are high-risk for injection lower thresholds than defaults.
4
+ // Aggressive injection defence for RAG inputs and data processing — these
5
+ // pipelines consume external content as trusted input and have the highest
6
+ // adversarial surface for indirect injection.
6
7
  //
7
- // Category: security
8
+ // Context keys consumed:
9
+ // - injection_score: Long (0-100)
10
+ //
11
+ // Compliance:
12
+ // - OWASP LLM01
13
+ //
14
+ // Category: security
8
15
  // Namespace: Guardrails
9
16
  // =============================================================================
10
17
 
11
- @id("data-secrets-strict")
12
- @name("Strict secrets detection for data pipeline")
13
- @description("Blocks any content containing secrets even a single match")
14
- @severity("critical")
15
- @tags("profile,data-pipeline,secrets,security")
16
- forbid (
17
- principal,
18
- action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
19
- resource
20
- ) when {
21
- context has contains_secrets && context.contains_secrets == true
22
- };
23
-
24
- @id("data-block-output-secrets")
25
- @name("Block secrets in pipeline outputs")
26
- @description("Prevents data pipeline from writing secrets to any output")
27
- @severity("critical")
28
- @tags("profile,data-pipeline,secrets,output")
29
- forbid (
30
- principal,
31
- action == Guardrails::Action::"write_file",
32
- resource
33
- ) when {
34
- (context has contains_secrets && context.contains_secrets == true) ||
35
- (context has secret_count && context.secret_count > 0)
36
- };
37
-
38
- @id("data-injection-defense")
39
- @name("Pipeline injection defense")
40
- @description("Lower injection threshold for data pipelines — RAG inputs are high-risk for injection")
18
+ @id("security.data-pipeline-block-injection")
19
+ @name("Block pipeline injection (data-pipeline profile)")
20
+ @description("Blocks process_prompt and call_tool when injection_score >= 66 (lower threshold for data pipelines).")
41
21
  @severity("high")
42
- @tags("profile,data-pipeline,injection,security")
22
+ @tags("category:security,threat:injection,detection:ml,owasp:llm01")
23
+ @reject_message("Request blocked: prompt injection detected at the data-pipeline threshold — RAG inputs are high-risk for indirect injection.")
43
24
  forbid (
44
25
  principal,
45
26
  action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
46
27
  resource
47
- ) when {
48
- context has injection_confidence && context.injection_confidence > 65
28
+ )
29
+ when {
30
+ context has injection_score && context.injection_score >= 66
49
31
  };
@@ -1,157 +1,164 @@
1
1
  // =============================================================================
2
2
  // Multi-Agent Orchestration — Cross-Turn Agent Safety
3
3
  // =============================================================================
4
- // Session-aware policies that use cross-turn detection history combined with
5
- // agent identity for defense-in-depth. These policies handle the scenario
6
- // where one agent in a multi-agent session detects a threat, and subsequent
7
- // agents in the same session must be restricted accordingly.
4
+ // Session-aware policies that combine cross-turn detection history with
5
+ // agent identity for defense-in-depth. When one agent in a multi-agent
6
+ // session detects a threat, subsequent agents in the same session are
7
+ // restricted accordingly.
8
8
  //
9
- // Key insight: In multi-agent orchestration, Agent A may detect PII in turn 3,
10
- // and Agent B (a different agent) arrives in turn 5 wanting to call http_post.
11
- // Cross-turn session flags + agent trust level enable this policy:
12
- // "If PII was seen AND this agent is unverified → block network tools."
9
+ // Context keys consumed:
10
+ // - agent_trust_level, agent_type, tool_name, tool_is_sensitive
11
+ // - session_pii_detected, session_pii_types
12
+ // - session_secrets_detected, session_injection_detected
13
+ // - session_command_injection
14
+ // - session_threat_turns: Long
15
+ // - session_cumulative_risk_score: Long
16
+ // - suspicious_pattern: Bool
13
17
  //
14
- // Context keys used:
15
- // - agent_trust_level: String - Trust tier of the current agent
16
- // - agent_type: String - Agent classification
17
- // - session_pii_detected: Bool - PII seen in any prior turn
18
- // - session_pii_types: Set<String> - PII types accumulated
19
- // - session_secrets_detected: Bool - Secrets seen in any prior turn
20
- // - session_injection_detected: Bool - Injection seen in any prior turn
21
- // - session_command_injection: Bool - Command injection in any prior turn
22
- // - session_threat_turns: Long - Count of turns with threats
23
- // - session_cumulative_risk_score: Long - Total accumulated risk
24
- // - tool_name: String - Tool being called
25
- // - tool_is_sensitive: Bool - Whether tool is sensitive
26
- // - suspicious_pattern: Bool - Whether exfiltration pattern detected
18
+ // Compliance:
19
+ // - OWASP LLM01, LLM08
27
20
  //
28
- // Category: agent_identity
21
+ // Category: agent-identity
29
22
  // Namespace: Guardrails
30
23
  // =============================================================================
31
24
 
32
- // -----------------------------------------------------------------------------
33
- // PII Containment Prevent Agent Data Leakage
34
- // -----------------------------------------------------------------------------
25
+ // ---------------------------------------------------------------------------
26
+ // Section 1: PII containment
27
+ // ---------------------------------------------------------------------------
35
28
 
36
- @id("multi-agent-pii-block-network-tools")
37
- @name("Block network tools after PII detection for non-first-party agents")
38
- @description("If PII was detected in any prior turn, block non-first-party agents from calling network-facing tools. Prevents data exfiltration by untrusted agents in sessions containing sensitive data")
29
+ @id("agent-identity.multi-agent-pii-block-network-tools")
30
+ @name("Block network tools for non-first-party agents after PII")
31
+ @description("Blocks call_tool when agent_trust_level is not \"first_party\", session_pii_detected is true, and tool_name is a network tool.")
39
32
  @severity("critical")
40
- @tags("profile,multi-agent,pii,exfiltration,cross-turn,a2a")
33
+ @tags("category:agent-identity,threat:exfiltration,scope:per-agent,detection:aggregate,surface:call-tool")
34
+ @reject_message("Tool execution blocked: PII was detected earlier in this session and non-first-party agents cannot use network tools afterwards.")
41
35
  forbid (
42
36
  principal is Guardrails::Agent,
43
37
  action == Guardrails::Action::"call_tool",
44
38
  resource
45
- ) when {
39
+ )
40
+ when {
46
41
  context has agent_trust_level && context.agent_trust_level != "first_party" &&
47
42
  context has session_pii_detected && context.session_pii_detected == true &&
48
43
  context has tool_name &&
49
- (context.tool_name == "http_post" ||
50
- context.tool_name == "send_email" ||
51
- context.tool_name == "http_request" ||
52
- context.tool_name == "webhook")
44
+ (
45
+ context.tool_name == "http_post" ||
46
+ context.tool_name == "send_email" ||
47
+ context.tool_name == "http_request" ||
48
+ context.tool_name == "webhook"
49
+ )
53
50
  };
54
51
 
55
- @id("multi-agent-pii-block-unverified-file-write")
56
- @name("Block unverified agents from writing files after PII detection")
57
- @description("If PII was detected in the session, unverified agents cannot write files. Prevents PII persistence by untrusted agents")
52
+ @id("agent-identity.multi-agent-pii-block-unverified-write")
53
+ @name("Block unverified agents from writing files after PII")
54
+ @description("Blocks write_file when agent_trust_level is \"unverified\" and session_pii_detected is true.")
58
55
  @severity("high")
59
- @tags("profile,multi-agent,pii,file-write,cross-turn,a2a")
56
+ @tags("category:agent-identity,threat:pii,scope:per-agent,detection:aggregate,surface:write-file")
57
+ @reject_message("File write blocked: PII was detected earlier in this session and unverified agents cannot persist files.")
60
58
  forbid (
61
59
  principal is Guardrails::Agent,
62
60
  action == Guardrails::Action::"write_file",
63
61
  resource
64
- ) when {
62
+ )
63
+ when {
65
64
  context has agent_trust_level && context.agent_trust_level == "unverified" &&
66
65
  context has session_pii_detected && context.session_pii_detected == true
67
66
  };
68
67
 
69
- // -----------------------------------------------------------------------------
70
- // Secrets Containment Lock Down After Credential Exposure
71
- // -----------------------------------------------------------------------------
68
+ // ---------------------------------------------------------------------------
69
+ // Section 2: Secrets containment
70
+ // ---------------------------------------------------------------------------
72
71
 
73
- @id("multi-agent-secrets-lockdown")
74
- @name("Lock down sensitive tools after secrets detection")
75
- @description("If secrets (API keys, tokens) were detected in any prior turn, block all non-first-party agents from sensitive tool calls. Prevents credential exfiltration in compromised sessions")
72
+ @id("agent-identity.multi-agent-secrets-lockdown")
73
+ @name("Block sensitive tools after secrets in session")
74
+ @description("Blocks call_tool when agent_trust_level is not \"first_party\", session_secrets_detected is true, and tool_is_sensitive is true.")
76
75
  @severity("critical")
77
- @tags("profile,multi-agent,secrets,lockdown,cross-turn,a2a")
76
+ @tags("category:agent-identity,threat:secrets,scope:per-agent,detection:aggregate,surface:call-tool")
77
+ @reject_message("Tool execution blocked: secrets were detected earlier in this session and non-first-party agents cannot call sensitive tools afterwards.")
78
78
  forbid (
79
79
  principal is Guardrails::Agent,
80
80
  action == Guardrails::Action::"call_tool",
81
81
  resource
82
- ) when {
82
+ )
83
+ when {
83
84
  context has agent_trust_level && context.agent_trust_level != "first_party" &&
84
85
  context has session_secrets_detected && context.session_secrets_detected == true &&
85
86
  context has tool_is_sensitive && context.tool_is_sensitive == true
86
87
  };
87
88
 
88
- // -----------------------------------------------------------------------------
89
- // Injection Escalation Tighten After Prior Attacks
90
- // -----------------------------------------------------------------------------
89
+ // ---------------------------------------------------------------------------
90
+ // Section 3: Injection escalation
91
+ // ---------------------------------------------------------------------------
91
92
 
92
- @id("multi-agent-post-injection-lockdown")
93
- @name("Restrict unverified agents after injection detection")
94
- @description("If injection was detected in any prior turn, block unverified agents from all tool calls. An injection in a prior turn may have poisoned the context, making subsequent unverified agent actions high risk")
93
+ @id("agent-identity.multi-agent-post-injection-lockdown")
94
+ @name("Block unverified agents after injection in session")
95
+ @description("Blocks call_tool when agent_trust_level is \"unverified\" and session_injection_detected is true.")
95
96
  @severity("critical")
96
- @tags("profile,multi-agent,injection,lockdown,cross-turn,a2a")
97
+ @tags("category:agent-identity,threat:injection,scope:per-agent,detection:aggregate,surface:call-tool")
98
+ @reject_message("Tool execution blocked: injection was detected earlier in this session — unverified agents cannot continue.")
97
99
  forbid (
98
100
  principal is Guardrails::Agent,
99
101
  action == Guardrails::Action::"call_tool",
100
102
  resource
101
- ) when {
103
+ )
104
+ when {
102
105
  context has agent_trust_level && context.agent_trust_level == "unverified" &&
103
106
  context has session_injection_detected && context.session_injection_detected == true
104
107
  };
105
108
 
106
- @id("multi-agent-post-command-injection-block-shell")
107
- @name("Block all agent shell access after command injection")
108
- @description("If command injection was detected in any prior turn, no agent (regardless of trust) can execute shell commands. Defense against persistent shell compromise")
109
+ @id("agent-identity.multi-agent-post-command-injection-shell")
110
+ @name("Block shell after command injection in session")
111
+ @description("Blocks call_tool when session_command_injection is true and tool_name is a shell tool.")
109
112
  @severity("critical")
110
- @tags("profile,multi-agent,command-injection,shell,cross-turn,a2a")
113
+ @tags("category:agent-identity,threat:command-injection,scope:per-agent,detection:aggregate,surface:call-tool,mitre:t1059")
114
+ @reject_message("Tool execution blocked: command injection was detected earlier in this session — no agent may execute shell commands afterwards.")
111
115
  forbid (
112
116
  principal is Guardrails::Agent,
113
117
  action == Guardrails::Action::"call_tool",
114
118
  resource
115
- ) when {
119
+ )
120
+ when {
116
121
  context has session_command_injection && context.session_command_injection == true &&
117
122
  context has tool_name &&
118
- (context.tool_name == "shell" ||
119
- context.tool_name == "execute_command" ||
120
- context.tool_name == "bash")
123
+ (context.tool_name == "shell" || context.tool_name == "execute_command" || context.tool_name == "bash")
121
124
  };
122
125
 
123
- // -----------------------------------------------------------------------------
124
- // Cumulative Risk Session-Level Circuit Breaker
125
- // -----------------------------------------------------------------------------
126
+ // ---------------------------------------------------------------------------
127
+ // Section 4: Cumulative risk circuit breakers
128
+ // ---------------------------------------------------------------------------
126
129
 
127
- @id("multi-agent-high-cumulative-risk-restrict")
128
- @name("Restrict non-first-party agents in high-risk sessions")
129
- @description("When cumulative session risk exceeds 200, restrict non-first-party agents to safe tools only. Acts as a circuit breaker for sessions that have accumulated multiple risk signals across turns")
130
+ @id("agent-identity.multi-agent-high-cumulative-risk")
131
+ @name("Block non-first-party sensitive tools at cumulative risk")
132
+ @description("Blocks call_tool when agent_trust_level is not \"first_party\", session_cumulative_risk_score >= 201, and tool_is_sensitive is true.")
130
133
  @severity("high")
131
- @tags("profile,multi-agent,cumulative-risk,circuit-breaker,a2a")
134
+ @tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool,posture:catch-all")
135
+ @reject_message("Tool execution blocked: cumulative session risk exceeded the multi-agent threshold for sensitive tools.")
132
136
  forbid (
133
137
  principal is Guardrails::Agent,
134
138
  action == Guardrails::Action::"call_tool",
135
139
  resource
136
- ) when {
140
+ )
141
+ when {
137
142
  context has agent_trust_level && context.agent_trust_level != "first_party" &&
138
- context has session_cumulative_risk_score && context.session_cumulative_risk_score > 200 &&
143
+ context has session_cumulative_risk_score && context.session_cumulative_risk_score >= 201 &&
139
144
  context has tool_is_sensitive && context.tool_is_sensitive == true
140
145
  };
141
146
 
142
- @id("multi-agent-extreme-risk-full-lockdown")
143
- @name("Full lockdown for unverified agents in extreme-risk sessions")
144
- @description("When cumulative session risk exceeds 500 or more than 5 threat turns are detected, block ALL tool calls from unverified agents. Emergency circuit breaker for compromised sessions")
147
+ @id("agent-identity.multi-agent-extreme-risk-lockdown")
148
+ @name("Block unverified agents at extreme cumulative risk")
149
+ @description("Blocks call_tool when agent_trust_level is \"unverified\" and session_cumulative_risk_score >= 501 or session_threat_turns >= 6.")
145
150
  @severity("critical")
146
- @tags("profile,multi-agent,extreme-risk,lockdown,a2a")
151
+ @tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool,posture:catch-all")
152
+ @reject_message("Tool execution blocked: this session has accumulated extreme risk — unverified agents are locked out.")
147
153
  forbid (
148
154
  principal is Guardrails::Agent,
149
155
  action == Guardrails::Action::"call_tool",
150
156
  resource
151
- ) when {
157
+ )
158
+ when {
152
159
  context has agent_trust_level && context.agent_trust_level == "unverified" &&
153
160
  (
154
- (context has session_cumulative_risk_score && context.session_cumulative_risk_score > 500) ||
155
- (context has session_threat_turns && context.session_threat_turns > 5)
161
+ (context has session_cumulative_risk_score && context.session_cumulative_risk_score >= 501) ||
162
+ (context has session_threat_turns && context.session_threat_turns >= 6)
156
163
  )
157
164
  };
@@ -1,140 +1,143 @@
1
1
  // =============================================================================
2
- // Multi-Agent Orchestration — Agent Trust Policies
2
+ // Multi-Agent Orchestration — Agent Trust
3
3
  // =============================================================================
4
- // Production-grade trust policies for multi-agent systems where an orchestrator
5
- // coordinates sub-agents with varying trust levels. These policies enforce
6
- // least-privilege access: each agent gets only the permissions its trust level
7
- // and type warrant.
4
+ // Production-grade trust policies for multi-agent systems where an
5
+ // orchestrator coordinates sub-agents with varying trust levels. Enforces
6
+ // least-privilege access: each agent gets only the permissions its trust
7
+ // level and type warrant.
8
8
  //
9
- // Architecture supported:
10
- // Orchestrator (first_party)
11
- // ├── Research Agent (verified_third_party, autonomous)
12
- // ├── Code Agent (first_party, tool_agent)
13
- // └── External Plugin (unverified, tool_agent)
9
+ // Context keys consumed:
10
+ // - agent_id, agent_type, agent_trust_level, agent_framework, agent_publisher
11
+ // - tool_name, tool_category, tool_is_sensitive, tool_risk_score
12
+ // - mcp_server_verified
13
+ // - injection_score, jailbreak_score
14
14
  //
15
- // Context keys used:
16
- // - agent_id: String - Unique agent identifier
17
- // - agent_type: String - orchestrator | autonomous | tool_agent | human_proxy
18
- // - agent_trust_level: String - first_party | verified_third_party | unverified
19
- // - agent_framework: String - Agent framework/SDK
20
- // - agent_publisher: String - Publishing organization
21
- // - tool_name: String - Tool being called
22
- // - tool_category: String - safe | sensitive | dangerous
23
- // - tool_risk_score: Long (0-100) - Computed risk score
24
- // - tool_is_sensitive: Bool - Whether tool is sensitive
25
- // - mcp_server_verified: Bool - Whether MCP server is verified
26
- // - injection_confidence: Long (0-100) - Injection detection score
27
- // - jailbreak_confidence: Long (0-100) - Jailbreak detection score
15
+ // Compliance:
16
+ // - OWASP LLM01, LLM08; OWASP ASI03, ASI05
28
17
  //
29
- // Category: agent_identity
18
+ // Category: agent-identity
30
19
  // Namespace: Guardrails
31
20
  // =============================================================================
32
21
 
33
- // -----------------------------------------------------------------------------
34
- // Tiered Tool Access The Core A2A Trust Model
35
- // -----------------------------------------------------------------------------
22
+ // ---------------------------------------------------------------------------
23
+ // Section 1: Tiered tool access
24
+ // ---------------------------------------------------------------------------
36
25
 
37
- @id("multi-agent-only-first-party-dangerous")
38
- @name("Only first-party agents can use dangerous tools")
39
- @description("Dangerous tools (shell, delete_file, run_sql) are restricted to first-party agents only. Third-party and unverified agents are blocked regardless of other signals")
26
+ @id("agent-identity.multi-agent-block-non-first-party-dangerous")
27
+ @name("Block non-first-party agents from dangerous tools")
28
+ @description("Blocks call_tool when agent_trust_level is not \"first_party\" and tool_category is \"dangerous\".")
40
29
  @severity("critical")
41
- @tags("profile,multi-agent,trust,tools,a2a")
30
+ @tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
31
+ @reject_message("Tool execution blocked: dangerous tools are restricted to first-party agents.")
42
32
  forbid (
43
33
  principal is Guardrails::Agent,
44
34
  action == Guardrails::Action::"call_tool",
45
35
  resource
46
- ) when {
36
+ )
37
+ when {
47
38
  context has agent_trust_level && context.agent_trust_level != "first_party" &&
48
39
  context has tool_category && context.tool_category == "dangerous"
49
40
  };
50
41
 
51
- @id("multi-agent-block-unverified-sensitive")
52
- @name("Block unverified agents from all sensitive tools")
53
- @description("Unverified agents can only use tools classified as safe. Sensitive tools (write_file, http_post, send_email) require at least verified_third_party trust")
42
+ @id("agent-identity.multi-agent-block-unverified-sensitive")
43
+ @name("Block unverified agents from sensitive tools")
44
+ @description("Blocks call_tool when agent_trust_level is \"unverified\" and tool_is_sensitive is true.")
54
45
  @severity("high")
55
- @tags("profile,multi-agent,trust,tools,a2a")
46
+ @tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
47
+ @reject_message("Tool execution blocked: unverified agents may only use tools classified as safe.")
56
48
  forbid (
57
49
  principal is Guardrails::Agent,
58
50
  action == Guardrails::Action::"call_tool",
59
51
  resource
60
- ) when {
52
+ )
53
+ when {
61
54
  context has agent_trust_level && context.agent_trust_level == "unverified" &&
62
55
  context has tool_is_sensitive && context.tool_is_sensitive == true
63
56
  };
64
57
 
65
- @id("multi-agent-block-unverified-mcp")
66
- @name("Block unverified agents from unverified MCP servers")
67
- @description("Unverified agents cannot call tools from unverified MCP servers. Double-unverified (agent + server) presents unacceptable supply chain risk")
58
+ @id("agent-identity.multi-agent-block-unverified-mcp")
59
+ @name("Block unverified agents on unverified MCP servers")
60
+ @description("Blocks call_tool when agent_trust_level is \"unverified\" and mcp_server_verified is false.")
68
61
  @severity("critical")
69
- @tags("profile,multi-agent,trust,mcp,a2a")
62
+ @tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:rule,surface:call-tool")
63
+ @reject_message("Tool execution blocked: unverified agents cannot use unverified MCP servers — supply-chain risk too high.")
70
64
  forbid (
71
65
  principal is Guardrails::Agent,
72
66
  action == Guardrails::Action::"call_tool",
73
67
  resource
74
- ) when {
68
+ )
69
+ when {
75
70
  context has agent_trust_level && context.agent_trust_level == "unverified" &&
76
71
  context has mcp_server_verified && context.mcp_server_verified == false
77
72
  };
78
73
 
79
- // -----------------------------------------------------------------------------
80
- // Autonomous Agent Safeguards
81
- // -----------------------------------------------------------------------------
74
+ // ---------------------------------------------------------------------------
75
+ // Section 2: Autonomous-agent safeguards
76
+ // ---------------------------------------------------------------------------
82
77
 
83
- @id("multi-agent-autonomous-tool-risk-cap")
84
- @name("Cap tool risk for autonomous agents")
85
- @description("Autonomous agents (no human in the loop) have a lower tool risk ceiling. Tools with risk > 70 require human oversight that autonomous agents lack")
78
+ @id("agent-identity.multi-agent-cap-autonomous-tool-risk")
79
+ @name("Block high-risk tools for autonomous agents")
80
+ @description("Blocks call_tool when agent_type is \"autonomous\" and tool_risk_score >= 71.")
86
81
  @severity("high")
87
- @tags("profile,multi-agent,autonomous,tools,a2a")
82
+ @tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool")
83
+ @reject_message("Tool execution blocked: autonomous agents require human oversight for high-risk tools.")
88
84
  forbid (
89
85
  principal is Guardrails::Agent,
90
86
  action == Guardrails::Action::"call_tool",
91
87
  resource
92
- ) when {
88
+ )
89
+ when {
93
90
  context has agent_type && context.agent_type == "autonomous" &&
94
- context has tool_risk_score && context.tool_risk_score > 70
91
+ context has tool_risk_score && context.tool_risk_score >= 71
95
92
  };
96
93
 
97
- @id("multi-agent-autonomous-injection-defense")
98
- @name("Enhanced injection defense for autonomous agents")
99
- @description("Autonomous agents are high-value targets for injection. Lower the threshold to 50 (vs 80 standard) since there is no human to catch false negatives")
94
+ @id("agent-identity.multi-agent-autonomous-injection")
95
+ @name("Block injection on autonomous agents (stricter)")
96
+ @description("Blocks process_prompt when agent_type is \"autonomous\" and injection_score >= 51.")
100
97
  @severity("high")
101
- @tags("profile,multi-agent,autonomous,injection,a2a")
98
+ @tags("category:agent-identity,threat:injection,detection:ml,surface:process-prompt,owasp:llm01")
99
+ @reject_message("Prompt blocked: lower injection threshold applies to autonomous agents and was exceeded.")
102
100
  forbid (
103
101
  principal is Guardrails::Agent,
104
102
  action == Guardrails::Action::"process_prompt",
105
103
  resource
106
- ) when {
104
+ )
105
+ when {
107
106
  context has agent_type && context.agent_type == "autonomous" &&
108
- context has injection_confidence && context.injection_confidence > 50
107
+ context has injection_score && context.injection_score >= 51
109
108
  };
110
109
 
111
- @id("multi-agent-autonomous-jailbreak-defense")
112
- @name("Enhanced jailbreak defense for autonomous agents")
113
- @description("Lower jailbreak threshold for autonomous agents. Without human review, we must be more conservative")
110
+ @id("agent-identity.multi-agent-autonomous-jailbreak")
111
+ @name("Block jailbreak on autonomous agents (stricter)")
112
+ @description("Blocks process_prompt when agent_type is \"autonomous\" and jailbreak_score >= 51.")
114
113
  @severity("high")
115
- @tags("profile,multi-agent,autonomous,jailbreak,a2a")
114
+ @tags("category:agent-identity,threat:jailbreak,detection:ml,surface:process-prompt,owasp:llm02")
115
+ @reject_message("Prompt blocked: lower jailbreak threshold applies to autonomous agents and was exceeded.")
116
116
  forbid (
117
117
  principal is Guardrails::Agent,
118
118
  action == Guardrails::Action::"process_prompt",
119
119
  resource
120
- ) when {
120
+ )
121
+ when {
121
122
  context has agent_type && context.agent_type == "autonomous" &&
122
- context has jailbreak_confidence && context.jailbreak_confidence > 50
123
+ context has jailbreak_score && context.jailbreak_score >= 51
123
124
  };
124
125
 
125
- // -----------------------------------------------------------------------------
126
- // MCP Server Connection Trust
127
- // -----------------------------------------------------------------------------
126
+ // ---------------------------------------------------------------------------
127
+ // Section 3: MCP server-connection trust
128
+ // ---------------------------------------------------------------------------
128
129
 
129
- @id("multi-agent-block-unverified-server-connect")
130
+ @id("agent-identity.multi-agent-block-unverified-server-connect")
130
131
  @name("Block unverified agents from connecting to MCP servers")
131
- @description("Unverified agents cannot establish new MCP server connections. Limits blast radius of compromised or rogue agents")
132
+ @description("Blocks connect_server when agent_trust_level is \"unverified\".")
132
133
  @severity("high")
133
- @tags("profile,multi-agent,trust,mcp,connect,a2a")
134
+ @tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:rule,surface:connect-server")
135
+ @reject_message("MCP server connection blocked: unverified agents cannot establish new server connections.")
134
136
  forbid (
135
137
  principal is Guardrails::Agent,
136
138
  action == Guardrails::Action::"connect_server",
137
139
  resource
138
- ) when {
140
+ )
141
+ when {
139
142
  context has agent_trust_level && context.agent_trust_level == "unverified"
140
143
  };