@highflame/policy 2.1.7 → 2.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/guardrails/context.json +78 -0
- package/_schemas/guardrails/schema.cedarschema +28 -1
- package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +105 -0
- package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +113 -0
- package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +118 -0
- package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +134 -0
- package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +117 -0
- package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +73 -0
- package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +66 -0
- package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +35 -0
- package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +55 -0
- package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +148 -0
- package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +120 -0
- package/_schemas/guardrails/templates/templates.json +121 -6
- package/dist/guardrails-context.gen.d.ts +5 -0
- package/dist/guardrails-context.gen.js +5 -0
- package/dist/guardrails-defaults.gen.js +1315 -6
- package/dist/service-schemas.gen.d.ts +1 -1
- package/dist/service-schemas.gen.js +41 -1
- package/package.json +1 -1
|
@@ -115,6 +115,18 @@
|
|
|
115
115
|
"required": false,
|
|
116
116
|
"description": "Array of PII types detected (e.g., ['email', 'phone', 'ssn', 'credit_card']). Use .contains() to block specific sensitive types"
|
|
117
117
|
},
|
|
118
|
+
{
|
|
119
|
+
"key": "pii_confidence",
|
|
120
|
+
"type": "number",
|
|
121
|
+
"required": false,
|
|
122
|
+
"description": "PII detection ML classifier confidence (0-100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss. Typical threshold: >=80 for high-confidence blocking"
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
"key": "highest_severity",
|
|
126
|
+
"type": "string",
|
|
127
|
+
"required": false,
|
|
128
|
+
"description": "Highest severity level across all detection engines: 'critical', 'high', 'medium', 'low', or 'none'. Use for severity-based catch-all policies (e.g., block any content flagged as critical)"
|
|
129
|
+
},
|
|
118
130
|
{
|
|
119
131
|
"key": "violence_score",
|
|
120
132
|
"type": "number",
|
|
@@ -661,6 +673,18 @@
|
|
|
661
673
|
"required": false,
|
|
662
674
|
"description": "Array of PII types found in tool arguments"
|
|
663
675
|
},
|
|
676
|
+
{
|
|
677
|
+
"key": "pii_count",
|
|
678
|
+
"type": "number",
|
|
679
|
+
"required": false,
|
|
680
|
+
"description": "Number of PII pattern matches in tool arguments or content. Use >=3 to detect bulk PII exposure indicating data dumps or CSV pastes"
|
|
681
|
+
},
|
|
682
|
+
{
|
|
683
|
+
"key": "pii_confidence",
|
|
684
|
+
"type": "number",
|
|
685
|
+
"required": false,
|
|
686
|
+
"description": "PII detection ML classifier confidence for tool content (0-100). Catches novel PII patterns that escape regex detection"
|
|
687
|
+
},
|
|
664
688
|
{
|
|
665
689
|
"key": "injection_confidence",
|
|
666
690
|
"type": "number",
|
|
@@ -823,6 +847,36 @@
|
|
|
823
847
|
"required": false,
|
|
824
848
|
"description": "Risk score for encoded injection in tool arguments (0-100)"
|
|
825
849
|
},
|
|
850
|
+
{
|
|
851
|
+
"key": "path",
|
|
852
|
+
"type": "string",
|
|
853
|
+
"required": false,
|
|
854
|
+
"description": "File path targeted by the tool call (if file operation). Use for path-based blocking of .env files, credential files, system directories, and credential directories"
|
|
855
|
+
},
|
|
856
|
+
{
|
|
857
|
+
"key": "contains_invisible_chars",
|
|
858
|
+
"type": "boolean",
|
|
859
|
+
"required": false,
|
|
860
|
+
"description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in tool arguments. Tool args should be plain text — invisible chars indicate payload injection"
|
|
861
|
+
},
|
|
862
|
+
{
|
|
863
|
+
"key": "invisible_chars_score",
|
|
864
|
+
"type": "number",
|
|
865
|
+
"required": false,
|
|
866
|
+
"description": "Invisible character attack severity score in tool arguments (0-100)"
|
|
867
|
+
},
|
|
868
|
+
{
|
|
869
|
+
"key": "indirect_injection_score",
|
|
870
|
+
"type": "number",
|
|
871
|
+
"required": false,
|
|
872
|
+
"description": "Indirect prompt injection risk score (0-100) — injection via tool outputs, retrieved documents, or external content. Use >=70 for general blocking, >=50 for sensitive tools"
|
|
873
|
+
},
|
|
874
|
+
{
|
|
875
|
+
"key": "indirect_injection_type",
|
|
876
|
+
"type": "string",
|
|
877
|
+
"required": false,
|
|
878
|
+
"description": "Type of indirect injection detected in tool content (e.g., instruction override, authority hijack)"
|
|
879
|
+
},
|
|
826
880
|
{
|
|
827
881
|
"key": "rug_pull_type",
|
|
828
882
|
"type": "string",
|
|
@@ -967,6 +1021,12 @@
|
|
|
967
1021
|
"required": true,
|
|
968
1022
|
"description": "Unix timestamp in milliseconds"
|
|
969
1023
|
},
|
|
1024
|
+
{
|
|
1025
|
+
"key": "path",
|
|
1026
|
+
"type": "string",
|
|
1027
|
+
"required": false,
|
|
1028
|
+
"description": "File path being read. Use for path-based access control policies (e.g., block .env files, system directories, credential directories)"
|
|
1029
|
+
},
|
|
970
1030
|
{
|
|
971
1031
|
"key": "contains_secrets",
|
|
972
1032
|
"type": "boolean",
|
|
@@ -1141,6 +1201,24 @@
|
|
|
1141
1201
|
"required": true,
|
|
1142
1202
|
"description": "Unix timestamp in milliseconds"
|
|
1143
1203
|
},
|
|
1204
|
+
{
|
|
1205
|
+
"key": "path",
|
|
1206
|
+
"type": "string",
|
|
1207
|
+
"required": false,
|
|
1208
|
+
"description": "File path being written. Use for path-based blocking policies (e.g., block writes to .env files, credential directories)"
|
|
1209
|
+
},
|
|
1210
|
+
{
|
|
1211
|
+
"key": "contains_invisible_chars",
|
|
1212
|
+
"type": "boolean",
|
|
1213
|
+
"required": false,
|
|
1214
|
+
"description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in the content being written. Prevents persistence of invisible payloads in source code"
|
|
1215
|
+
},
|
|
1216
|
+
{
|
|
1217
|
+
"key": "invisible_chars_score",
|
|
1218
|
+
"type": "number",
|
|
1219
|
+
"required": false,
|
|
1220
|
+
"description": "Invisible character attack severity score (0-100) in write content"
|
|
1221
|
+
},
|
|
1144
1222
|
{
|
|
1145
1223
|
"key": "contains_secrets",
|
|
1146
1224
|
"type": "boolean",
|
|
@@ -113,6 +113,10 @@ namespace Guardrails {
|
|
|
113
113
|
"pii_detected"?: Bool,
|
|
114
114
|
"pii_count"?: Long,
|
|
115
115
|
"pii_types"?: Set<String>, // ["email", "phone", "ssn", "credit_card", ...]
|
|
116
|
+
"pii_confidence"?: Long, // PII ML classifier confidence (0-100) — catches novel PII patterns that escape regex detection
|
|
117
|
+
|
|
118
|
+
// Threat Severity Aggregation (optional)
|
|
119
|
+
"highest_severity"?: String, // Highest severity across all detectors: "critical" | "high" | "medium" | "low" | "none"
|
|
116
120
|
|
|
117
121
|
// Trust & Safety - Toxicity (optional)
|
|
118
122
|
"violence_score"?: Long, // 0-100
|
|
@@ -225,7 +229,7 @@ namespace Guardrails {
|
|
|
225
229
|
|
|
226
230
|
// Agentic - Behavioral Patterns (optional)
|
|
227
231
|
"suspicious_pattern"?: Bool,
|
|
228
|
-
"pattern_type"?: String, // "data_exfiltration" | "secret_exfiltration" | "db_exfiltration" | "none"
|
|
232
|
+
"pattern_type"?: String, // "data_exfiltration" | "secret_exfiltration" | "db_exfiltration" | "credential_theft" | "destructive_sequence" | "none"
|
|
229
233
|
"sequence_risk"?: Long, // 0-100
|
|
230
234
|
|
|
231
235
|
// Agentic - Loop Detection (optional)
|
|
@@ -247,6 +251,8 @@ namespace Guardrails {
|
|
|
247
251
|
"secret_types"?: Set<String>,
|
|
248
252
|
"pii_detected"?: Bool,
|
|
249
253
|
"pii_types"?: Set<String>,
|
|
254
|
+
"pii_count"?: Long, // Number of PII pattern matches in tool content
|
|
255
|
+
"pii_confidence"?: Long, // PII ML classifier confidence (0-100)
|
|
250
256
|
"injection_confidence"?: Long,
|
|
251
257
|
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
252
258
|
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
@@ -267,6 +273,13 @@ namespace Guardrails {
|
|
|
267
273
|
"cross_origin_type"?: String,
|
|
268
274
|
"cross_origin_score"?: Long, // 0-100
|
|
269
275
|
|
|
276
|
+
// File & Path (optional — for path-based access control policies)
|
|
277
|
+
"path"?: String, // File path when tool operates on files
|
|
278
|
+
|
|
279
|
+
// Security - Invisible Character Detection in tool args (optional)
|
|
280
|
+
"contains_invisible_chars"?: Bool, // Whether invisible Unicode chars detected in tool args
|
|
281
|
+
"invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
|
|
282
|
+
|
|
270
283
|
// Security - Encoded Injection (optional)
|
|
271
284
|
"encoded_content_detected"?: Bool,
|
|
272
285
|
"encoded_types"?: Set<String>,
|
|
@@ -281,6 +294,10 @@ namespace Guardrails {
|
|
|
281
294
|
"rug_pull_score"?: Long, // 0-100
|
|
282
295
|
"rug_pull_type"?: String, // "risk_spike" | "pattern_change" | "combined" | "none"
|
|
283
296
|
|
|
297
|
+
// Agentic - Indirect Prompt Injection (optional — injection via tool outputs/retrieved content)
|
|
298
|
+
"indirect_injection_score"?: Long, // Indirect injection risk score (0-100)
|
|
299
|
+
"indirect_injection_type"?: String, // Type of indirect injection detected
|
|
300
|
+
|
|
284
301
|
// Agentic - MCP Risk (optional)
|
|
285
302
|
"mcp_config_risk"?: Bool,
|
|
286
303
|
"mcp_risk_type"?: String, // "inline_execution" | "suspicious_url" | "cross_origin"
|
|
@@ -320,6 +337,9 @@ namespace Guardrails {
|
|
|
320
337
|
"request_id": String,
|
|
321
338
|
"timestamp": Long,
|
|
322
339
|
|
|
340
|
+
// File path (optional — for path-based access control policies)
|
|
341
|
+
"path"?: String, // File path being read
|
|
342
|
+
|
|
323
343
|
// Security checks on file content (optional)
|
|
324
344
|
"contains_secrets"?: Bool,
|
|
325
345
|
"secret_count"?: Long,
|
|
@@ -362,6 +382,13 @@ namespace Guardrails {
|
|
|
362
382
|
"request_id": String,
|
|
363
383
|
"timestamp": Long,
|
|
364
384
|
|
|
385
|
+
// File path (optional — for path-based access control policies)
|
|
386
|
+
"path"?: String, // File path being written
|
|
387
|
+
|
|
388
|
+
// Security - Invisible Character Detection in write content (optional)
|
|
389
|
+
"contains_invisible_chars"?: Bool, // Whether invisible Unicode chars detected in write content
|
|
390
|
+
"invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
|
|
391
|
+
|
|
365
392
|
// Security checks on content being written (optional)
|
|
366
393
|
"contains_secrets"?: Bool,
|
|
367
394
|
"secret_count"?: Long,
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
// =============================================================================
|
|
2
|
+
// A2A Security — Cross-Origin Trust Boundary Enforcement
|
|
3
|
+
// =============================================================================
|
|
4
|
+
// Detects and blocks confused deputy attacks where an agent from one trust
|
|
5
|
+
// domain attempts to operate in another. Cross-origin violations occur when:
|
|
6
|
+
// - An agent proxies requests across security domains
|
|
7
|
+
// - Mixed-security tool chains span trust boundaries
|
|
8
|
+
// - URL injection redirects agent communication to untrusted origins
|
|
9
|
+
//
|
|
10
|
+
// Key A2A distinction from MAS: In multi-agent systems (shared orchestrator),
|
|
11
|
+
// cross-origin is unlikely because all agents share a trust context. In A2A
|
|
12
|
+
// (independent agents, separate trust domains), cross-origin is the PRIMARY
|
|
13
|
+
// signal that trust boundaries are being violated.
|
|
14
|
+
//
|
|
15
|
+
// Shield cross-origin detector outputs discrete scores:
|
|
16
|
+
// 90 — mixed localhost + external domain
|
|
17
|
+
// 85 — URL injection in parameters
|
|
18
|
+
// 80 — proxy/redirect patterns
|
|
19
|
+
// 75 — multi-origin tool configs / JSON origin fields
|
|
20
|
+
// 70 — mixed HTTP/HTTPS or ws/wss schemes
|
|
21
|
+
// 65 — JSON arrays with multiple URLs
|
|
22
|
+
// 60 — generic multi-domain patterns
|
|
23
|
+
//
|
|
24
|
+
// Compliance:
|
|
25
|
+
// OWASP LLM08 (Excessive Agency)
|
|
26
|
+
// OWASP ASI03 (Excessive Permissions)
|
|
27
|
+
// MITRE ATLAS AML.T0051.002 (Indirect Prompt Injection via delegation)
|
|
28
|
+
// NIST 800-53 AC-4 (Information Flow Enforcement)
|
|
29
|
+
//
|
|
30
|
+
// Category: agent_identity
|
|
31
|
+
// Namespace: Guardrails
|
|
32
|
+
// =============================================================================
|
|
33
|
+
|
|
34
|
+
// Block high-confidence cross-origin for any agent
|
|
35
|
+
@id("a2a-cross-origin-block-critical")
|
|
36
|
+
@name("Block critical cross-origin from any agent")
|
|
37
|
+
@description("Block all agent requests when cross-origin trust boundary violation score exceeds 80. High-confidence cross-origin signals (mixed localhost/external, URL injection, proxy redirects) indicate confused deputy attacks regardless of agent trust level.")
|
|
38
|
+
@severity("critical")
|
|
39
|
+
@tags("profile,a2a-security,cross-origin,confused-deputy,trust-boundary,owasp-llm08")
|
|
40
|
+
@reject_message("Request blocked: high-confidence cross-origin trust boundary violation detected (score >= 80). An external agent or service is attempting to operate across trust domains. Review the origin chain before retrying.")
|
|
41
|
+
forbid (
|
|
42
|
+
principal is Guardrails::Agent,
|
|
43
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
44
|
+
resource
|
|
45
|
+
)
|
|
46
|
+
when {
|
|
47
|
+
context has agent_id && context.agent_id != "" &&
|
|
48
|
+
context has cross_origin_detected && context.cross_origin_detected == true &&
|
|
49
|
+
context has cross_origin_score && context.cross_origin_score >= 80
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// Block cross-origin from unverified agents at any signal level
|
|
53
|
+
@id("a2a-cross-origin-block-unverified")
|
|
54
|
+
@name("Block cross-origin from unverified agents")
|
|
55
|
+
@description("Unverified agents are blocked from any cross-origin activity at the lowest meaningful detection threshold (score >= 60). Cross-origin from an unverified source is a strong confused deputy indicator — the agent has no attestation AND is crossing trust boundaries.")
|
|
56
|
+
@severity("high")
|
|
57
|
+
@tags("profile,a2a-security,cross-origin,unverified,trust-boundary,owasp-asi03")
|
|
58
|
+
@reject_message("Request blocked: cross-origin activity detected from an unverified agent. Unverified agents cannot operate across trust boundaries. Register the agent or use a verified agent.")
|
|
59
|
+
forbid (
|
|
60
|
+
principal is Guardrails::Agent,
|
|
61
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
62
|
+
resource
|
|
63
|
+
)
|
|
64
|
+
when {
|
|
65
|
+
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
66
|
+
context has cross_origin_detected && context.cross_origin_detected == true &&
|
|
67
|
+
context has cross_origin_score && context.cross_origin_score >= 60
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// Block cross-origin MCP server connections from non-first-party agents
|
|
71
|
+
@id("a2a-cross-origin-block-server-connect")
|
|
72
|
+
@name("Block cross-origin MCP server connections")
|
|
73
|
+
@description("Non-first-party agents cannot connect to MCP servers when cross-origin signals are present (score >= 65). Server-level cross-origin has wide blast radius — a single compromised connection exposes all tools on that server.")
|
|
74
|
+
@severity("critical")
|
|
75
|
+
@tags("profile,a2a-security,cross-origin,mcp,server,trust-boundary,nist-ac-4")
|
|
76
|
+
@reject_message("MCP server connection blocked: cross-origin trust violation detected. Non-first-party agents cannot connect to MCP servers when cross-origin signals are present.")
|
|
77
|
+
forbid (
|
|
78
|
+
principal is Guardrails::Agent,
|
|
79
|
+
action == Guardrails::Action::"connect_server",
|
|
80
|
+
resource
|
|
81
|
+
)
|
|
82
|
+
when {
|
|
83
|
+
context has agent_trust_level && context.agent_trust_level != "first_party" &&
|
|
84
|
+
context has cross_origin_detected && context.cross_origin_detected == true &&
|
|
85
|
+
context has cross_origin_score && context.cross_origin_score >= 65
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
// Block cross-origin tool calls on sensitive tools from any agent
|
|
89
|
+
@id("a2a-cross-origin-block-sensitive-tools")
|
|
90
|
+
@name("Block cross-origin on sensitive tools")
|
|
91
|
+
@description("Any agent attempting to call sensitive tools while cross-origin signals are present (score >= 60) is blocked. Sensitive tools (write_file, http_post, send_email) amplify the impact of confused deputy attacks — an agent crossing trust boundaries should not have access to high-impact operations.")
|
|
92
|
+
@severity("high")
|
|
93
|
+
@tags("profile,a2a-security,cross-origin,sensitive-tools,confused-deputy,owasp-llm08")
|
|
94
|
+
@reject_message("Sensitive tool execution blocked: cross-origin trust violation detected. Tool calls to sensitive tools are blocked when cross-origin signals are present from agent requests.")
|
|
95
|
+
forbid (
|
|
96
|
+
principal is Guardrails::Agent,
|
|
97
|
+
action == Guardrails::Action::"call_tool",
|
|
98
|
+
resource
|
|
99
|
+
)
|
|
100
|
+
when {
|
|
101
|
+
context has agent_id && context.agent_id != "" &&
|
|
102
|
+
context has cross_origin_detected && context.cross_origin_detected == true &&
|
|
103
|
+
context has cross_origin_score && context.cross_origin_score >= 60 &&
|
|
104
|
+
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
105
|
+
};
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
// =============================================================================
|
|
2
|
+
// A2A Security — Escalation Detection & Circuit Breakers
|
|
3
|
+
// =============================================================================
|
|
4
|
+
// Detects progressive capability escalation across agent turns and applies
|
|
5
|
+
// session-level circuit breakers tuned for adversarial A2A communication:
|
|
6
|
+
//
|
|
7
|
+
// 1. SESSION PEAK SCORES: When the maximum injection or jailbreak score
|
|
8
|
+
// across all prior turns exceeds 70, non-first-party agents are restricted.
|
|
9
|
+
// Unlike MAS boolean flags, A2A uses numeric peaks for graduated response.
|
|
10
|
+
//
|
|
11
|
+
// 2. CUMULATIVE RISK CIRCUIT BREAKER: When accumulated risk across turns
|
|
12
|
+
// exceeds 150, sensitive tools are blocked for non-first-party agents.
|
|
13
|
+
// Lower threshold than MAS (200) because cross-system communication has
|
|
14
|
+
// higher adversarial surface area.
|
|
15
|
+
//
|
|
16
|
+
// 3. THREAT TURN ESCALATION: After 3+ turns with detected threats, unverified
|
|
17
|
+
// agents are fully locked out. This catches turn-by-turn probing attacks
|
|
18
|
+
// where an attacker incrementally tests boundaries.
|
|
19
|
+
//
|
|
20
|
+
// Key A2A distinction: In MAS, the orchestrator can reset or contain sessions.
|
|
21
|
+
// In A2A, independent agents have no shared circuit breaker, so policy must
|
|
22
|
+
// enforce escalation detection at the evaluation layer.
|
|
23
|
+
//
|
|
24
|
+
// Compliance:
|
|
25
|
+
// OWASP LLM01 (Prompt Injection) — multi-turn variant
|
|
26
|
+
// MITRE ATLAS AML.T0051 (LLM Prompt Injection)
|
|
27
|
+
// NIST 800-53 SI-4 (System Monitoring)
|
|
28
|
+
// NIST 800-53 IR-4 (Incident Handling)
|
|
29
|
+
//
|
|
30
|
+
// Category: agent_identity
|
|
31
|
+
// Namespace: Guardrails
|
|
32
|
+
// =============================================================================
|
|
33
|
+
|
|
34
|
+
// -----------------------------------------------------------------------------
|
|
35
|
+
// Session Peak Score Monitoring
|
|
36
|
+
// -----------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
// Block non-first-party agents when session injection peak is high
|
|
39
|
+
@id("a2a-session-injection-peak-block")
|
|
40
|
+
@name("Block agents when session injection peak is high")
|
|
41
|
+
@description("Block non-first-party agents from tool calls and prompt processing when the maximum injection score across all prior turns exceeds 70. Unlike MAS policies that use boolean session_injection_detected, A2A uses the numeric peak score for graduated response — a session with a prior score of 75 is more dangerous than one with 45, even though both set the boolean flag.")
|
|
42
|
+
@severity("critical")
|
|
43
|
+
@tags("profile,a2a-security,escalation,session-peak,injection,owasp-lml01,nist-si-4")
|
|
44
|
+
@reject_message("Request blocked: this session has a prior injection detection peak of 70+. Non-first-party agents are restricted after high injection signals to prevent progressive capability escalation.")
|
|
45
|
+
forbid (
|
|
46
|
+
principal is Guardrails::Agent,
|
|
47
|
+
action in [Guardrails::Action::"call_tool", Guardrails::Action::"process_prompt"],
|
|
48
|
+
resource
|
|
49
|
+
)
|
|
50
|
+
when {
|
|
51
|
+
context has agent_trust_level && context.agent_trust_level != "first_party" &&
|
|
52
|
+
context has session_max_injection_score && context.session_max_injection_score >= 70
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// Block non-first-party agents when session jailbreak peak is high
|
|
56
|
+
@id("a2a-session-jailbreak-peak-block")
|
|
57
|
+
@name("Block agents when session jailbreak peak is high")
|
|
58
|
+
@description("Block non-first-party agents from prompt processing when the maximum jailbreak score across all prior turns exceeds 70. A high jailbreak peak indicates the session's guardrails may have been partially eroded — subsequent agent requests in this session should be treated as potentially manipulated.")
|
|
59
|
+
@severity("critical")
|
|
60
|
+
@tags("profile,a2a-security,escalation,session-peak,jailbreak,owasp-lml01,nist-si-4")
|
|
61
|
+
@reject_message("Request blocked: this session has a prior jailbreak detection peak of 70+. Non-first-party agents are restricted after high jailbreak signals to prevent progressive manipulation.")
|
|
62
|
+
forbid (
|
|
63
|
+
principal is Guardrails::Agent,
|
|
64
|
+
action == Guardrails::Action::"process_prompt",
|
|
65
|
+
resource
|
|
66
|
+
)
|
|
67
|
+
when {
|
|
68
|
+
context has agent_trust_level && context.agent_trust_level != "first_party" &&
|
|
69
|
+
context has session_max_jailbreak_score && context.session_max_jailbreak_score >= 70
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
// -----------------------------------------------------------------------------
|
|
73
|
+
// Cumulative Risk Circuit Breaker
|
|
74
|
+
// -----------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
// Lock down non-first-party agents at high cumulative risk
|
|
77
|
+
@id("a2a-cumulative-risk-agent-lockdown")
|
|
78
|
+
@name("Lock down non-first-party agents at high cumulative risk")
|
|
79
|
+
@description("Block non-first-party agents from sensitive tools when cumulative session risk exceeds 150. Lower threshold than MAS (200) because A2A sessions with cross-system agents should trip the circuit breaker earlier — independent agents cannot coordinate session recovery the way an orchestrator can.")
|
|
80
|
+
@severity("high")
|
|
81
|
+
@tags("profile,a2a-security,escalation,cumulative-risk,circuit-breaker,nist-ir-4")
|
|
82
|
+
@reject_message("Sensitive tool blocked: session cumulative risk exceeds 150. Non-first-party agents are restricted from sensitive tools in elevated-risk sessions to prevent progressive capability gain.")
|
|
83
|
+
forbid (
|
|
84
|
+
principal is Guardrails::Agent,
|
|
85
|
+
action == Guardrails::Action::"call_tool",
|
|
86
|
+
resource
|
|
87
|
+
)
|
|
88
|
+
when {
|
|
89
|
+
context has agent_trust_level && context.agent_trust_level != "first_party" &&
|
|
90
|
+
context has session_cumulative_risk_score && context.session_cumulative_risk_score > 150 &&
|
|
91
|
+
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
// -----------------------------------------------------------------------------
|
|
95
|
+
// Threat Turn Escalation — Probing Detection
|
|
96
|
+
// -----------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
// Block unverified agents after repeated threat turns
|
|
99
|
+
@id("a2a-threat-turn-escalation-block")
|
|
100
|
+
@name("Block unverified agents after repeated threat turns")
|
|
101
|
+
@description("Block unverified agents from all tool calls after 3+ threat turns are detected in the session. Lower threshold than MAS (5) because repeated threats from an unverified agent's session indicate adversarial probing — the attacker is incrementally testing boundaries. Three threat turns is sufficient evidence of active reconnaissance.")
|
|
102
|
+
@severity("critical")
|
|
103
|
+
@tags("profile,a2a-security,escalation,threat-turns,probing,unverified,nist-ir-4")
|
|
104
|
+
@reject_message("Tool execution blocked: 3+ threat turns detected in this session. Unverified agents are locked out after repeated threat signals to prevent adversarial escalation via turn-by-turn probing.")
|
|
105
|
+
forbid (
|
|
106
|
+
principal is Guardrails::Agent,
|
|
107
|
+
action == Guardrails::Action::"call_tool",
|
|
108
|
+
resource
|
|
109
|
+
)
|
|
110
|
+
when {
|
|
111
|
+
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
112
|
+
context has session_threat_turns && context.session_threat_turns > 2
|
|
113
|
+
};
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
// =============================================================================
|
|
2
|
+
// A2A Security — Agent Identity Enforcement
|
|
3
|
+
// =============================================================================
|
|
4
|
+
// Enforces strict identity requirements for cross-system agent communication:
|
|
5
|
+
//
|
|
6
|
+
// 1. ANONYMOUS AGENT BLOCKING: Agents that claim agent_type but provide no
|
|
7
|
+
// agent_id are likely spoofed or misconfigured — blocked from tool calls.
|
|
8
|
+
//
|
|
9
|
+
// 2. FRAMEWORK REGISTRATION: Unverified agents must declare their framework
|
|
10
|
+
// (claude-code, langchain, crewai, etc.) for sensitive operations. Missing
|
|
11
|
+
// framework on unverified agents indicates an ad-hoc or rogue integration.
|
|
12
|
+
//
|
|
13
|
+
// 3. SERVER CONNECTION RESTRICTIONS: Unverified agents cannot establish new
|
|
14
|
+
// MCP server connections — limits blast radius of unknown agents.
|
|
15
|
+
//
|
|
16
|
+
// 4. AUTONOMOUS + UNVERIFIED = BLOCKED: The most dangerous combination is
|
|
17
|
+
// an autonomous agent with no verification. No human oversight AND no
|
|
18
|
+
// trust attestation means zero recovery if the agent is compromised.
|
|
19
|
+
//
|
|
20
|
+
// Key A2A distinction: In MAS, the orchestrator validates all sub-agents.
|
|
21
|
+
// In A2A, each agent self-reports identity, so we must enforce identity
|
|
22
|
+
// completeness and consistency at the policy layer.
|
|
23
|
+
//
|
|
24
|
+
// Compliance:
|
|
25
|
+
// OWASP ASI05 (Identity Spoofing)
|
|
26
|
+
// NIST 800-63 (Digital Identity Guidelines)
|
|
27
|
+
// NIST 800-53 IA-2 (Identification and Authentication)
|
|
28
|
+
// NIST 800-53 IA-8 (Identification and Authentication — Non-Organizational Users)
|
|
29
|
+
//
|
|
30
|
+
// Category: agent_identity
|
|
31
|
+
// Namespace: Guardrails
|
|
32
|
+
// =============================================================================
|
|
33
|
+
|
|
34
|
+
// -----------------------------------------------------------------------------
|
|
35
|
+
// Anonymous Agent Detection — Incomplete Identity
|
|
36
|
+
// -----------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
// Block agents with type but no ID from tool execution
|
|
39
|
+
@id("a2a-block-anonymous-agent-tools")
|
|
40
|
+
@name("Block anonymous agents from tool execution")
|
|
41
|
+
@description("Block tool calls from agents that declare an agent_type but have no agent_id. This pattern (type present, ID absent) indicates a spoofed or misconfigured agent identity — legitimate agents always have both. Human proxies are exempt because they represent authenticated users, not independent agents.")
|
|
42
|
+
@severity("critical")
|
|
43
|
+
@tags("profile,a2a-security,identity,anonymous,spoofing,owasp-asi05,nist-ia-2")
|
|
44
|
+
@reject_message("Tool execution blocked: agent identity is required for A2A tool calls. This request has an agent type but no agent ID, indicating an improperly configured or spoofed agent identity.")
|
|
45
|
+
forbid (
|
|
46
|
+
principal is Guardrails::Agent,
|
|
47
|
+
action == Guardrails::Action::"call_tool",
|
|
48
|
+
resource
|
|
49
|
+
)
|
|
50
|
+
when {
|
|
51
|
+
context has agent_id && context.agent_id == "" &&
|
|
52
|
+
context has agent_type && context.agent_type != "" &&
|
|
53
|
+
context.agent_type != "human_proxy"
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
// -----------------------------------------------------------------------------
|
|
57
|
+
// Framework Registration — Unverified Agent Restrictions
|
|
58
|
+
// -----------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
// Block unregistered framework unverified agents from sensitive tools
|
|
61
|
+
@id("a2a-block-unregistered-framework")
|
|
62
|
+
@name("Block unregistered frameworks from sensitive tools")
|
|
63
|
+
@description("Block unverified agents with no declared framework from calling sensitive tools. In A2A, agent_framework identifies the SDK/runtime (claude-code, langchain, crewai, autogen). An unverified agent with no framework declaration is a black-box integration — it cannot be audited, patched, or trusted with sensitive operations.")
|
|
64
|
+
@severity("high")
|
|
65
|
+
@tags("profile,a2a-security,identity,framework,unverified,sensitive-tools,nist-ia-8")
|
|
66
|
+
@reject_message("Sensitive tool blocked: unverified agent with no registered framework attempted to call a sensitive tool. Agents must declare their framework (e.g., claude-code, langchain, crewai) for A2A sensitive operations.")
|
|
67
|
+
forbid (
|
|
68
|
+
principal is Guardrails::Agent,
|
|
69
|
+
action == Guardrails::Action::"call_tool",
|
|
70
|
+
resource
|
|
71
|
+
)
|
|
72
|
+
when {
|
|
73
|
+
context has agent_framework && context.agent_framework == "" &&
|
|
74
|
+
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
75
|
+
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
// -----------------------------------------------------------------------------
|
|
79
|
+
// Server Connection Restrictions
|
|
80
|
+
// -----------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
// Block unverified agents from establishing MCP server connections
|
|
83
|
+
@id("a2a-block-unverified-server-connect")
|
|
84
|
+
@name("Block unverified agents from MCP server connections")
|
|
85
|
+
@description("Unverified agents cannot establish new MCP server connections in A2A mode. Each server connection expands the agent's capability surface — unverified agents should use only pre-established connections from the orchestrator or host application.")
|
|
86
|
+
@severity("high")
|
|
87
|
+
@tags("profile,a2a-security,identity,unverified,mcp,server,nist-ia-8")
|
|
88
|
+
@reject_message("MCP server connection blocked: unverified agents cannot establish new MCP server connections in A2A mode. Register the agent as verified_third_party or first_party to enable server connections.")
|
|
89
|
+
forbid (
|
|
90
|
+
principal is Guardrails::Agent,
|
|
91
|
+
action == Guardrails::Action::"connect_server",
|
|
92
|
+
resource
|
|
93
|
+
)
|
|
94
|
+
when {
|
|
95
|
+
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
96
|
+
context has agent_id && context.agent_id != ""
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
// -----------------------------------------------------------------------------
|
|
100
|
+
// Dangerous Combinations — Maximum Risk
|
|
101
|
+
// -----------------------------------------------------------------------------
|
|
102
|
+
|
|
103
|
+
// Block unverified autonomous agents from all tool calls
|
|
104
|
+
@id("a2a-block-autonomous-unverified")
|
|
105
|
+
@name("Block unverified autonomous agents from all tool calls")
|
|
106
|
+
@description("The combination of autonomous (no human oversight) and unverified (no trust attestation) is the most dangerous agent configuration. If compromised, there is no human to catch anomalies and no verification to limit blast radius. These agents are unconditionally blocked from all tool execution in A2A workflows.")
|
|
107
|
+
@severity("critical")
|
|
108
|
+
@tags("profile,a2a-security,identity,autonomous,unverified,owasp-asi05,nist-ia-2")
|
|
109
|
+
@reject_message("Tool execution blocked: unverified autonomous agents are not permitted in A2A workflows. Autonomous agents operating without human oversight must be at least verified_third_party trust level.")
|
|
110
|
+
forbid (
|
|
111
|
+
principal is Guardrails::Agent,
|
|
112
|
+
action == Guardrails::Action::"call_tool",
|
|
113
|
+
resource
|
|
114
|
+
)
|
|
115
|
+
when {
|
|
116
|
+
context has agent_type && context.agent_type == "autonomous" &&
|
|
117
|
+
context has agent_trust_level && context.agent_trust_level == "unverified"
|
|
118
|
+
};
|