@highflame/policy 2.1.7 → 2.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/_schemas/guardrails/context.json +78 -0
  2. package/_schemas/guardrails/schema.cedarschema +28 -1
  3. package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +105 -0
  4. package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +113 -0
  5. package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +118 -0
  6. package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +134 -0
  7. package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +117 -0
  8. package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +73 -0
  9. package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +66 -0
  10. package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +35 -0
  11. package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +55 -0
  12. package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +148 -0
  13. package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +120 -0
  14. package/_schemas/guardrails/templates/templates.json +121 -6
  15. package/_schemas/overwatch/schema.cedarschema +6 -0
  16. package/dist/guardrails-context.gen.d.ts +5 -0
  17. package/dist/guardrails-context.gen.js +5 -0
  18. package/dist/guardrails-defaults.gen.js +1315 -6
  19. package/dist/overwatch-defaults.gen.d.ts +1 -1
  20. package/dist/overwatch-defaults.gen.js +508 -1206
  21. package/dist/service-schemas.gen.d.ts +2 -2
  22. package/dist/service-schemas.gen.js +47 -1
  23. package/package.json +1 -1
@@ -115,6 +115,18 @@
115
115
  "required": false,
116
116
  "description": "Array of PII types detected (e.g., ['email', 'phone', 'ssn', 'credit_card']). Use .contains() to block specific sensitive types"
117
117
  },
118
+ {
119
+ "key": "pii_confidence",
120
+ "type": "number",
121
+ "required": false,
122
+ "description": "PII detection ML classifier confidence (0-100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss. Typical threshold: >=80 for high-confidence blocking"
123
+ },
124
+ {
125
+ "key": "highest_severity",
126
+ "type": "string",
127
+ "required": false,
128
+ "description": "Highest severity level across all detection engines: 'critical', 'high', 'medium', 'low', or 'none'. Use for severity-based catch-all policies (e.g., block any content flagged as critical)"
129
+ },
118
130
  {
119
131
  "key": "violence_score",
120
132
  "type": "number",
@@ -661,6 +673,18 @@
661
673
  "required": false,
662
674
  "description": "Array of PII types found in tool arguments"
663
675
  },
676
+ {
677
+ "key": "pii_count",
678
+ "type": "number",
679
+ "required": false,
680
+ "description": "Number of PII pattern matches in tool arguments or content. Use >=3 to detect bulk PII exposure indicating data dumps or CSV pastes"
681
+ },
682
+ {
683
+ "key": "pii_confidence",
684
+ "type": "number",
685
+ "required": false,
686
+ "description": "PII detection ML classifier confidence for tool content (0-100). Catches novel PII patterns that escape regex detection"
687
+ },
664
688
  {
665
689
  "key": "injection_confidence",
666
690
  "type": "number",
@@ -823,6 +847,36 @@
823
847
  "required": false,
824
848
  "description": "Risk score for encoded injection in tool arguments (0-100)"
825
849
  },
850
+ {
851
+ "key": "path",
852
+ "type": "string",
853
+ "required": false,
854
+ "description": "File path targeted by the tool call (if file operation). Use for path-based blocking of .env files, credential files, system directories, and credential directories"
855
+ },
856
+ {
857
+ "key": "contains_invisible_chars",
858
+ "type": "boolean",
859
+ "required": false,
860
+ "description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in tool arguments. Tool args should be plain text — invisible chars indicate payload injection"
861
+ },
862
+ {
863
+ "key": "invisible_chars_score",
864
+ "type": "number",
865
+ "required": false,
866
+ "description": "Invisible character attack severity score in tool arguments (0-100)"
867
+ },
868
+ {
869
+ "key": "indirect_injection_score",
870
+ "type": "number",
871
+ "required": false,
872
+ "description": "Indirect prompt injection risk score (0-100) — injection via tool outputs, retrieved documents, or external content. Use >=70 for general blocking, >=50 for sensitive tools"
873
+ },
874
+ {
875
+ "key": "indirect_injection_type",
876
+ "type": "string",
877
+ "required": false,
878
+ "description": "Type of indirect injection detected in tool content (e.g., instruction override, authority hijack)"
879
+ },
826
880
  {
827
881
  "key": "rug_pull_type",
828
882
  "type": "string",
@@ -967,6 +1021,12 @@
967
1021
  "required": true,
968
1022
  "description": "Unix timestamp in milliseconds"
969
1023
  },
1024
+ {
1025
+ "key": "path",
1026
+ "type": "string",
1027
+ "required": false,
1028
+ "description": "File path being read. Use for path-based access control policies (e.g., block .env files, system directories, credential directories)"
1029
+ },
970
1030
  {
971
1031
  "key": "contains_secrets",
972
1032
  "type": "boolean",
@@ -1141,6 +1201,24 @@
1141
1201
  "required": true,
1142
1202
  "description": "Unix timestamp in milliseconds"
1143
1203
  },
1204
+ {
1205
+ "key": "path",
1206
+ "type": "string",
1207
+ "required": false,
1208
+ "description": "File path being written. Use for path-based blocking policies (e.g., block writes to .env files, credential directories)"
1209
+ },
1210
+ {
1211
+ "key": "contains_invisible_chars",
1212
+ "type": "boolean",
1213
+ "required": false,
1214
+ "description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in the content being written. Prevents persistence of invisible payloads in source code"
1215
+ },
1216
+ {
1217
+ "key": "invisible_chars_score",
1218
+ "type": "number",
1219
+ "required": false,
1220
+ "description": "Invisible character attack severity score (0-100) in write content"
1221
+ },
1144
1222
  {
1145
1223
  "key": "contains_secrets",
1146
1224
  "type": "boolean",
@@ -113,6 +113,10 @@ namespace Guardrails {
113
113
  "pii_detected"?: Bool,
114
114
  "pii_count"?: Long,
115
115
  "pii_types"?: Set<String>, // ["email", "phone", "ssn", "credit_card", ...]
116
+ "pii_confidence"?: Long, // PII ML classifier confidence (0-100) — catches novel PII patterns that escape regex detection
117
+
118
+ // Threat Severity Aggregation (optional)
119
+ "highest_severity"?: String, // Highest severity across all detectors: "critical" | "high" | "medium" | "low" | "none"
116
120
 
117
121
  // Trust & Safety - Toxicity (optional)
118
122
  "violence_score"?: Long, // 0-100
@@ -225,7 +229,7 @@ namespace Guardrails {
225
229
 
226
230
  // Agentic - Behavioral Patterns (optional)
227
231
  "suspicious_pattern"?: Bool,
228
- "pattern_type"?: String, // "data_exfiltration" | "secret_exfiltration" | "db_exfiltration" | "none"
232
+ "pattern_type"?: String, // "data_exfiltration" | "secret_exfiltration" | "db_exfiltration" | "credential_theft" | "destructive_sequence" | "none"
229
233
  "sequence_risk"?: Long, // 0-100
230
234
 
231
235
  // Agentic - Loop Detection (optional)
@@ -247,6 +251,8 @@ namespace Guardrails {
247
251
  "secret_types"?: Set<String>,
248
252
  "pii_detected"?: Bool,
249
253
  "pii_types"?: Set<String>,
254
+ "pii_count"?: Long, // Number of PII pattern matches in tool content
255
+ "pii_confidence"?: Long, // PII ML classifier confidence (0-100)
250
256
  "injection_confidence"?: Long,
251
257
  "injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
252
258
  "injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
@@ -267,6 +273,13 @@ namespace Guardrails {
267
273
  "cross_origin_type"?: String,
268
274
  "cross_origin_score"?: Long, // 0-100
269
275
 
276
+ // File & Path (optional — for path-based access control policies)
277
+ "path"?: String, // File path when tool operates on files
278
+
279
+ // Security - Invisible Character Detection in tool args (optional)
280
+ "contains_invisible_chars"?: Bool, // Whether invisible Unicode chars detected in tool args
281
+ "invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
282
+
270
283
  // Security - Encoded Injection (optional)
271
284
  "encoded_content_detected"?: Bool,
272
285
  "encoded_types"?: Set<String>,
@@ -281,6 +294,10 @@ namespace Guardrails {
281
294
  "rug_pull_score"?: Long, // 0-100
282
295
  "rug_pull_type"?: String, // "risk_spike" | "pattern_change" | "combined" | "none"
283
296
 
297
+ // Agentic - Indirect Prompt Injection (optional — injection via tool outputs/retrieved content)
298
+ "indirect_injection_score"?: Long, // Indirect injection risk score (0-100)
299
+ "indirect_injection_type"?: String, // Type of indirect injection detected
300
+
284
301
  // Agentic - MCP Risk (optional)
285
302
  "mcp_config_risk"?: Bool,
286
303
  "mcp_risk_type"?: String, // "inline_execution" | "suspicious_url" | "cross_origin"
@@ -320,6 +337,9 @@ namespace Guardrails {
320
337
  "request_id": String,
321
338
  "timestamp": Long,
322
339
 
340
+ // File path (optional — for path-based access control policies)
341
+ "path"?: String, // File path being read
342
+
323
343
  // Security checks on file content (optional)
324
344
  "contains_secrets"?: Bool,
325
345
  "secret_count"?: Long,
@@ -362,6 +382,13 @@ namespace Guardrails {
362
382
  "request_id": String,
363
383
  "timestamp": Long,
364
384
 
385
+ // File path (optional — for path-based access control policies)
386
+ "path"?: String, // File path being written
387
+
388
+ // Security - Invisible Character Detection in write content (optional)
389
+ "contains_invisible_chars"?: Bool, // Whether invisible Unicode chars detected in write content
390
+ "invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
391
+
365
392
  // Security checks on content being written (optional)
366
393
  "contains_secrets"?: Bool,
367
394
  "secret_count"?: Long,
@@ -0,0 +1,105 @@
1
+ // =============================================================================
2
+ // A2A Security — Cross-Origin Trust Boundary Enforcement
3
+ // =============================================================================
4
+ // Detects and blocks confused deputy attacks where an agent from one trust
5
+ // domain attempts to operate in another. Cross-origin violations occur when:
6
+ // - An agent proxies requests across security domains
7
+ // - Mixed-security tool chains span trust boundaries
8
+ // - URL injection redirects agent communication to untrusted origins
9
+ //
10
+ // Key A2A distinction from MAS: In multi-agent systems (shared orchestrator),
11
+ // cross-origin is unlikely because all agents share a trust context. In A2A
12
+ // (independent agents, separate trust domains), cross-origin is the PRIMARY
13
+ // signal that trust boundaries are being violated.
14
+ //
15
+ // Shield cross-origin detector outputs discrete scores:
16
+ // 90 — mixed localhost + external domain
17
+ // 85 — URL injection in parameters
18
+ // 80 — proxy/redirect patterns
19
+ // 75 — multi-origin tool configs / JSON origin fields
20
+ // 70 — mixed HTTP/HTTPS or ws/wss schemes
21
+ // 65 — JSON arrays with multiple URLs
22
+ // 60 — generic multi-domain patterns
23
+ //
24
+ // Compliance:
25
+ // OWASP LLM08 (Excessive Agency)
26
+ // OWASP ASI03 (Excessive Permissions)
27
+ // MITRE ATLAS AML.T0051.002 (Indirect Prompt Injection via delegation)
28
+ // NIST 800-53 AC-4 (Information Flow Enforcement)
29
+ //
30
+ // Category: agent_identity
31
+ // Namespace: Guardrails
32
+ // =============================================================================
33
+
34
+ // Block high-confidence cross-origin for any agent
35
+ @id("a2a-cross-origin-block-critical")
36
+ @name("Block critical cross-origin from any agent")
37
+ @description("Block all agent requests when cross-origin trust boundary violation score exceeds 80. High-confidence cross-origin signals (mixed localhost/external, URL injection, proxy redirects) indicate confused deputy attacks regardless of agent trust level.")
38
+ @severity("critical")
39
+ @tags("profile,a2a-security,cross-origin,confused-deputy,trust-boundary,owasp-llm08")
40
+ @reject_message("Request blocked: high-confidence cross-origin trust boundary violation detected (score >= 80). An external agent or service is attempting to operate across trust domains. Review the origin chain before retrying.")
41
+ forbid (
42
+ principal is Guardrails::Agent,
43
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
44
+ resource
45
+ )
46
+ when {
47
+ context has agent_id && context.agent_id != "" &&
48
+ context has cross_origin_detected && context.cross_origin_detected == true &&
49
+ context has cross_origin_score && context.cross_origin_score >= 80
50
+ };
51
+
52
+ // Block cross-origin from unverified agents at any signal level
53
+ @id("a2a-cross-origin-block-unverified")
54
+ @name("Block cross-origin from unverified agents")
55
+ @description("Unverified agents are blocked from any cross-origin activity at the lowest meaningful detection threshold (score >= 60). Cross-origin from an unverified source is a strong confused deputy indicator — the agent has no attestation AND is crossing trust boundaries.")
56
+ @severity("high")
57
+ @tags("profile,a2a-security,cross-origin,unverified,trust-boundary,owasp-asi03")
58
+ @reject_message("Request blocked: cross-origin activity detected from an unverified agent. Unverified agents cannot operate across trust boundaries. Register the agent or use a verified agent.")
59
+ forbid (
60
+ principal is Guardrails::Agent,
61
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
62
+ resource
63
+ )
64
+ when {
65
+ context has agent_trust_level && context.agent_trust_level == "unverified" &&
66
+ context has cross_origin_detected && context.cross_origin_detected == true &&
67
+ context has cross_origin_score && context.cross_origin_score >= 60
68
+ };
69
+
70
+ // Block cross-origin MCP server connections from non-first-party agents
71
+ @id("a2a-cross-origin-block-server-connect")
72
+ @name("Block cross-origin MCP server connections")
73
+ @description("Non-first-party agents cannot connect to MCP servers when cross-origin signals are present (score >= 65). Server-level cross-origin has wide blast radius — a single compromised connection exposes all tools on that server.")
74
+ @severity("critical")
75
+ @tags("profile,a2a-security,cross-origin,mcp,server,trust-boundary,nist-ac-4")
76
+ @reject_message("MCP server connection blocked: cross-origin trust violation detected. Non-first-party agents cannot connect to MCP servers when cross-origin signals are present.")
77
+ forbid (
78
+ principal is Guardrails::Agent,
79
+ action == Guardrails::Action::"connect_server",
80
+ resource
81
+ )
82
+ when {
83
+ context has agent_trust_level && context.agent_trust_level != "first_party" &&
84
+ context has cross_origin_detected && context.cross_origin_detected == true &&
85
+ context has cross_origin_score && context.cross_origin_score >= 65
86
+ };
87
+
88
+ // Block cross-origin tool calls on sensitive tools from any agent
89
+ @id("a2a-cross-origin-block-sensitive-tools")
90
+ @name("Block cross-origin on sensitive tools")
91
+ @description("Any agent attempting to call sensitive tools while cross-origin signals are present (score >= 60) is blocked. Sensitive tools (write_file, http_post, send_email) amplify the impact of confused deputy attacks — an agent crossing trust boundaries should not have access to high-impact operations.")
92
+ @severity("high")
93
+ @tags("profile,a2a-security,cross-origin,sensitive-tools,confused-deputy,owasp-llm08")
94
+ @reject_message("Sensitive tool execution blocked: cross-origin trust violation detected. Tool calls to sensitive tools are blocked when cross-origin signals are present from agent requests.")
95
+ forbid (
96
+ principal is Guardrails::Agent,
97
+ action == Guardrails::Action::"call_tool",
98
+ resource
99
+ )
100
+ when {
101
+ context has agent_id && context.agent_id != "" &&
102
+ context has cross_origin_detected && context.cross_origin_detected == true &&
103
+ context has cross_origin_score && context.cross_origin_score >= 60 &&
104
+ context has tool_is_sensitive && context.tool_is_sensitive == true
105
+ };
@@ -0,0 +1,113 @@
1
+ // =============================================================================
2
+ // A2A Security — Escalation Detection & Circuit Breakers
3
+ // =============================================================================
4
+ // Detects progressive capability escalation across agent turns and applies
5
+ // session-level circuit breakers tuned for adversarial A2A communication:
6
+ //
7
+ // 1. SESSION PEAK SCORES: When the maximum injection or jailbreak score
8
+ // across all prior turns exceeds 70, non-first-party agents are restricted.
9
+ // Unlike MAS boolean flags, A2A uses numeric peaks for graduated response.
10
+ //
11
+ // 2. CUMULATIVE RISK CIRCUIT BREAKER: When accumulated risk across turns
12
+ // exceeds 150, sensitive tools are blocked for non-first-party agents.
13
+ // Lower threshold than MAS (200) because cross-system communication has
14
+ // higher adversarial surface area.
15
+ //
16
+ // 3. THREAT TURN ESCALATION: After 3+ turns with detected threats, unverified
17
+ // agents are fully locked out. This catches turn-by-turn probing attacks
18
+ // where an attacker incrementally tests boundaries.
19
+ //
20
+ // Key A2A distinction: In MAS, the orchestrator can reset or contain sessions.
21
+ // In A2A, independent agents have no shared circuit breaker, so policy must
22
+ // enforce escalation detection at the evaluation layer.
23
+ //
24
+ // Compliance:
25
+ // OWASP LLM01 (Prompt Injection) — multi-turn variant
26
+ // MITRE ATLAS AML.T0051 (LLM Prompt Injection)
27
+ // NIST 800-53 SI-4 (System Monitoring)
28
+ // NIST 800-53 IR-4 (Incident Handling)
29
+ //
30
+ // Category: agent_identity
31
+ // Namespace: Guardrails
32
+ // =============================================================================
33
+
34
+ // -----------------------------------------------------------------------------
35
+ // Session Peak Score Monitoring
36
+ // -----------------------------------------------------------------------------
37
+
38
+ // Block non-first-party agents when session injection peak is high
39
+ @id("a2a-session-injection-peak-block")
40
+ @name("Block agents when session injection peak is high")
41
+ @description("Block non-first-party agents from tool calls and prompt processing when the maximum injection score across all prior turns exceeds 70. Unlike MAS policies that use boolean session_injection_detected, A2A uses the numeric peak score for graduated response — a session with a prior score of 75 is more dangerous than one with 45, even though both set the boolean flag.")
42
+ @severity("critical")
43
+ @tags("profile,a2a-security,escalation,session-peak,injection,owasp-lml01,nist-si-4")
44
+ @reject_message("Request blocked: this session has a prior injection detection peak of 70+. Non-first-party agents are restricted after high injection signals to prevent progressive capability escalation.")
45
+ forbid (
46
+ principal is Guardrails::Agent,
47
+ action in [Guardrails::Action::"call_tool", Guardrails::Action::"process_prompt"],
48
+ resource
49
+ )
50
+ when {
51
+ context has agent_trust_level && context.agent_trust_level != "first_party" &&
52
+ context has session_max_injection_score && context.session_max_injection_score >= 70
53
+ };
54
+
55
+ // Block non-first-party agents when session jailbreak peak is high
56
+ @id("a2a-session-jailbreak-peak-block")
57
+ @name("Block agents when session jailbreak peak is high")
58
+ @description("Block non-first-party agents from prompt processing when the maximum jailbreak score across all prior turns exceeds 70. A high jailbreak peak indicates the session's guardrails may have been partially eroded — subsequent agent requests in this session should be treated as potentially manipulated.")
59
+ @severity("critical")
60
+ @tags("profile,a2a-security,escalation,session-peak,jailbreak,owasp-lml01,nist-si-4")
61
+ @reject_message("Request blocked: this session has a prior jailbreak detection peak of 70+. Non-first-party agents are restricted after high jailbreak signals to prevent progressive manipulation.")
62
+ forbid (
63
+ principal is Guardrails::Agent,
64
+ action == Guardrails::Action::"process_prompt",
65
+ resource
66
+ )
67
+ when {
68
+ context has agent_trust_level && context.agent_trust_level != "first_party" &&
69
+ context has session_max_jailbreak_score && context.session_max_jailbreak_score >= 70
70
+ };
71
+
72
+ // -----------------------------------------------------------------------------
73
+ // Cumulative Risk Circuit Breaker
74
+ // -----------------------------------------------------------------------------
75
+
76
+ // Lock down non-first-party agents at high cumulative risk
77
+ @id("a2a-cumulative-risk-agent-lockdown")
78
+ @name("Lock down non-first-party agents at high cumulative risk")
79
+ @description("Block non-first-party agents from sensitive tools when cumulative session risk exceeds 150. Lower threshold than MAS (200) because A2A sessions with cross-system agents should trip the circuit breaker earlier — independent agents cannot coordinate session recovery the way an orchestrator can.")
80
+ @severity("high")
81
+ @tags("profile,a2a-security,escalation,cumulative-risk,circuit-breaker,nist-ir-4")
82
+ @reject_message("Sensitive tool blocked: session cumulative risk exceeds 150. Non-first-party agents are restricted from sensitive tools in elevated-risk sessions to prevent progressive capability gain.")
83
+ forbid (
84
+ principal is Guardrails::Agent,
85
+ action == Guardrails::Action::"call_tool",
86
+ resource
87
+ )
88
+ when {
89
+ context has agent_trust_level && context.agent_trust_level != "first_party" &&
90
+ context has session_cumulative_risk_score && context.session_cumulative_risk_score > 150 &&
91
+ context has tool_is_sensitive && context.tool_is_sensitive == true
92
+ };
93
+
94
+ // -----------------------------------------------------------------------------
95
+ // Threat Turn Escalation — Probing Detection
96
+ // -----------------------------------------------------------------------------
97
+
98
+ // Block unverified agents after repeated threat turns
99
+ @id("a2a-threat-turn-escalation-block")
100
+ @name("Block unverified agents after repeated threat turns")
101
+ @description("Block unverified agents from all tool calls after 3+ threat turns are detected in the session. Lower threshold than MAS (5) because repeated threats from an unverified agent's session indicate adversarial probing — the attacker is incrementally testing boundaries. Three threat turns is sufficient evidence of active reconnaissance.")
102
+ @severity("critical")
103
+ @tags("profile,a2a-security,escalation,threat-turns,probing,unverified,nist-ir-4")
104
+ @reject_message("Tool execution blocked: 3+ threat turns detected in this session. Unverified agents are locked out after repeated threat signals to prevent adversarial escalation via turn-by-turn probing.")
105
+ forbid (
106
+ principal is Guardrails::Agent,
107
+ action == Guardrails::Action::"call_tool",
108
+ resource
109
+ )
110
+ when {
111
+ context has agent_trust_level && context.agent_trust_level == "unverified" &&
112
+ context has session_threat_turns && context.session_threat_turns > 2
113
+ };
@@ -0,0 +1,118 @@
1
+ // =============================================================================
2
+ // A2A Security — Agent Identity Enforcement
3
+ // =============================================================================
4
+ // Enforces strict identity requirements for cross-system agent communication:
5
+ //
6
+ // 1. ANONYMOUS AGENT BLOCKING: Agents that claim agent_type but provide no
7
+ // agent_id are likely spoofed or misconfigured — blocked from tool calls.
8
+ //
9
+ // 2. FRAMEWORK REGISTRATION: Unverified agents must declare their framework
10
+ // (claude-code, langchain, crewai, etc.) for sensitive operations. Missing
11
+ // framework on unverified agents indicates an ad-hoc or rogue integration.
12
+ //
13
+ // 3. SERVER CONNECTION RESTRICTIONS: Unverified agents cannot establish new
14
+ // MCP server connections — limits blast radius of unknown agents.
15
+ //
16
+ // 4. AUTONOMOUS + UNVERIFIED = BLOCKED: The most dangerous combination is
17
+ // an autonomous agent with no verification. No human oversight AND no
18
+ // trust attestation means zero recovery if the agent is compromised.
19
+ //
20
+ // Key A2A distinction: In MAS, the orchestrator validates all sub-agents.
21
+ // In A2A, each agent self-reports identity, so we must enforce identity
22
+ // completeness and consistency at the policy layer.
23
+ //
24
+ // Compliance:
25
+ // OWASP ASI05 (Identity Spoofing)
26
+ // NIST 800-63 (Digital Identity Guidelines)
27
+ // NIST 800-53 IA-2 (Identification and Authentication)
28
+ // NIST 800-53 IA-8 (Identification and Authentication — Non-Organizational Users)
29
+ //
30
+ // Category: agent_identity
31
+ // Namespace: Guardrails
32
+ // =============================================================================
33
+
34
+ // -----------------------------------------------------------------------------
35
+ // Anonymous Agent Detection — Incomplete Identity
36
+ // -----------------------------------------------------------------------------
37
+
38
+ // Block agents with type but no ID from tool execution
39
+ @id("a2a-block-anonymous-agent-tools")
40
+ @name("Block anonymous agents from tool execution")
41
+ @description("Block tool calls from agents that declare an agent_type but have no agent_id. This pattern (type present, ID absent) indicates a spoofed or misconfigured agent identity — legitimate agents always have both. Human proxies are exempt because they represent authenticated users, not independent agents.")
42
+ @severity("critical")
43
+ @tags("profile,a2a-security,identity,anonymous,spoofing,owasp-asi05,nist-ia-2")
44
+ @reject_message("Tool execution blocked: agent identity is required for A2A tool calls. This request has an agent type but no agent ID, indicating an improperly configured or spoofed agent identity.")
45
+ forbid (
46
+ principal is Guardrails::Agent,
47
+ action == Guardrails::Action::"call_tool",
48
+ resource
49
+ )
50
+ when {
51
+ context has agent_id && context.agent_id == "" &&
52
+ context has agent_type && context.agent_type != "" &&
53
+ context.agent_type != "human_proxy"
54
+ };
55
+
56
+ // -----------------------------------------------------------------------------
57
+ // Framework Registration — Unverified Agent Restrictions
58
+ // -----------------------------------------------------------------------------
59
+
60
+ // Block unregistered framework unverified agents from sensitive tools
61
+ @id("a2a-block-unregistered-framework")
62
+ @name("Block unregistered frameworks from sensitive tools")
63
+ @description("Block unverified agents with no declared framework from calling sensitive tools. In A2A, agent_framework identifies the SDK/runtime (claude-code, langchain, crewai, autogen). An unverified agent with no framework declaration is a black-box integration — it cannot be audited, patched, or trusted with sensitive operations.")
64
+ @severity("high")
65
+ @tags("profile,a2a-security,identity,framework,unverified,sensitive-tools,nist-ia-8")
66
+ @reject_message("Sensitive tool blocked: unverified agent with no registered framework attempted to call a sensitive tool. Agents must declare their framework (e.g., claude-code, langchain, crewai) for A2A sensitive operations.")
67
+ forbid (
68
+ principal is Guardrails::Agent,
69
+ action == Guardrails::Action::"call_tool",
70
+ resource
71
+ )
72
+ when {
73
+ context has agent_framework && context.agent_framework == "" &&
74
+ context has agent_trust_level && context.agent_trust_level == "unverified" &&
75
+ context has tool_is_sensitive && context.tool_is_sensitive == true
76
+ };
77
+
78
+ // -----------------------------------------------------------------------------
79
+ // Server Connection Restrictions
80
+ // -----------------------------------------------------------------------------
81
+
82
+ // Block unverified agents from establishing MCP server connections
83
+ @id("a2a-block-unverified-server-connect")
84
+ @name("Block unverified agents from MCP server connections")
85
+ @description("Unverified agents cannot establish new MCP server connections in A2A mode. Each server connection expands the agent's capability surface — unverified agents should use only pre-established connections from the orchestrator or host application.")
86
+ @severity("high")
87
+ @tags("profile,a2a-security,identity,unverified,mcp,server,nist-ia-8")
88
+ @reject_message("MCP server connection blocked: unverified agents cannot establish new MCP server connections in A2A mode. Register the agent as verified_third_party or first_party to enable server connections.")
89
+ forbid (
90
+ principal is Guardrails::Agent,
91
+ action == Guardrails::Action::"connect_server",
92
+ resource
93
+ )
94
+ when {
95
+ context has agent_trust_level && context.agent_trust_level == "unverified" &&
96
+ context has agent_id && context.agent_id != ""
97
+ };
98
+
99
+ // -----------------------------------------------------------------------------
100
+ // Dangerous Combinations — Maximum Risk
101
+ // -----------------------------------------------------------------------------
102
+
103
+ // Block unverified autonomous agents from all tool calls
104
+ @id("a2a-block-autonomous-unverified")
105
+ @name("Block unverified autonomous agents from all tool calls")
106
+ @description("The combination of autonomous (no human oversight) and unverified (no trust attestation) is the most dangerous agent configuration. If compromised, there is no human to catch anomalies and no verification to limit blast radius. These agents are unconditionally blocked from all tool execution in A2A workflows.")
107
+ @severity("critical")
108
+ @tags("profile,a2a-security,identity,autonomous,unverified,owasp-asi05,nist-ia-2")
109
+ @reject_message("Tool execution blocked: unverified autonomous agents are not permitted in A2A workflows. Autonomous agents operating without human oversight must be at least verified_third_party trust level.")
110
+ forbid (
111
+ principal is Guardrails::Agent,
112
+ action == Guardrails::Action::"call_tool",
113
+ resource
114
+ )
115
+ when {
116
+ context has agent_type && context.agent_type == "autonomous" &&
117
+ context has agent_trust_level && context.agent_trust_level == "unverified"
118
+ };