@highflame/policy 2.1.36 → 2.1.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/ai_gateway/context.json +431 -11
- package/_schemas/ai_gateway/schema.cedarschema +91 -11
- package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
- package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
- package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
- package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
- package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
- package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
- package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
- package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
- package/_schemas/ai_gateway/templates/templates.json +42 -47
- package/_schemas/guardrails/context.json +12 -12
- package/_schemas/guardrails/schema.cedarschema +12 -12
- package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
- package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
- package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
- package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
- package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
- package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
- package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
- package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
- package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
- package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
- package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
- package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
- package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
- package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
- package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
- package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
- package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
- package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
- package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
- package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
- package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
- package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
- package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
- package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
- package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
- package/_schemas/guardrails/templates/templates.json +188 -210
- package/_schemas/overwatch/context.json +14 -14
- package/_schemas/overwatch/schema.cedarschema +12 -12
- package/_schemas/sentry/context.json +11 -11
- package/_schemas/sentry/schema.cedarschema +11 -11
- package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
- package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
- package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
- package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
- package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
- package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
- package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
- package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
- package/_schemas/sentry/templates/templates.json +46 -46
- package/dist/ai_gateway-context.gen.d.ts +18 -4
- package/dist/ai_gateway-context.gen.js +18 -4
- package/dist/ai_gateway-defaults.gen.d.ts +1 -1
- package/dist/ai_gateway-defaults.gen.js +377 -313
- package/dist/guardrails-context.gen.d.ts +5 -5
- package/dist/guardrails-context.gen.js +5 -5
- package/dist/guardrails-defaults.gen.d.ts +1 -1
- package/dist/guardrails-defaults.gen.js +2070 -1849
- package/dist/overwatch-context.gen.d.ts +5 -5
- package/dist/overwatch-context.gen.js +5 -5
- package/dist/overwatch-defaults.gen.d.ts +1 -1
- package/dist/overwatch-defaults.gen.js +547 -573
- package/dist/sentry-context.gen.d.ts +3 -3
- package/dist/sentry-context.gen.js +3 -3
- package/dist/sentry-defaults.gen.d.ts +1 -1
- package/dist/sentry-defaults.gen.js +379 -460
- package/dist/service-schemas.gen.d.ts +4 -4
- package/dist/service-schemas.gen.js +249 -99
- package/package.json +1 -1
|
@@ -2,46 +2,32 @@
|
|
|
2
2
|
// A2A Security — Agent Identity Enforcement
|
|
3
3
|
// =============================================================================
|
|
4
4
|
// Enforces strict identity requirements for cross-system agent communication:
|
|
5
|
+
// blocks anonymous agents, requires framework registration for sensitive
|
|
6
|
+
// operations, restricts server connections, and unconditionally blocks the
|
|
7
|
+
// most dangerous combination — autonomous + unverified.
|
|
5
8
|
//
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
// 2. FRAMEWORK REGISTRATION: Unverified agents must declare their framework
|
|
10
|
-
// (claude-code, langchain, crewai, etc.) for sensitive operations. Missing
|
|
11
|
-
// framework on unverified agents indicates an ad-hoc or rogue integration.
|
|
12
|
-
//
|
|
13
|
-
// 3. SERVER CONNECTION RESTRICTIONS: Unverified agents cannot establish new
|
|
14
|
-
// MCP server connections — limits blast radius of unknown agents.
|
|
15
|
-
//
|
|
16
|
-
// 4. AUTONOMOUS + UNVERIFIED = BLOCKED: The most dangerous combination is
|
|
17
|
-
// an autonomous agent with no verification. No human oversight AND no
|
|
18
|
-
// trust attestation means zero recovery if the agent is compromised.
|
|
19
|
-
//
|
|
20
|
-
// Key A2A distinction: In MAS, the orchestrator validates all sub-agents.
|
|
21
|
-
// In A2A, each agent self-reports identity, so we must enforce identity
|
|
22
|
-
// completeness and consistency at the policy layer.
|
|
9
|
+
// Context keys consumed:
|
|
10
|
+
// - agent_id, agent_type, agent_trust_level, agent_framework
|
|
11
|
+
// - tool_is_sensitive: Bool
|
|
23
12
|
//
|
|
24
13
|
// Compliance:
|
|
25
|
-
// OWASP ASI05 (Identity Spoofing)
|
|
26
|
-
// NIST 800-
|
|
27
|
-
// NIST 800-53 IA-2 (Identification and Authentication)
|
|
28
|
-
// NIST 800-53 IA-8 (Identification and Authentication — Non-Organizational Users)
|
|
14
|
+
// - OWASP ASI05 (Identity Spoofing); NIST 800-63
|
|
15
|
+
// - NIST 800-53 IA-2, IA-8
|
|
29
16
|
//
|
|
30
|
-
// Category:
|
|
17
|
+
// Category: agent-identity
|
|
31
18
|
// Namespace: Guardrails
|
|
32
19
|
// =============================================================================
|
|
33
20
|
|
|
34
|
-
//
|
|
35
|
-
//
|
|
36
|
-
//
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Section 1: Anonymous agents
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
37
24
|
|
|
38
|
-
|
|
39
|
-
@
|
|
40
|
-
@
|
|
41
|
-
@description("Block tool calls from agents that declare an agent_type but have no agent_id. This pattern (type present, ID absent) indicates a spoofed or misconfigured agent identity — legitimate agents always have both. Human proxies are exempt because they represent authenticated users, not independent agents.")
|
|
25
|
+
@id("agent-identity.a2a-block-anonymous-agents")
|
|
26
|
+
@name("Block anonymous agents from tools")
|
|
27
|
+
@description("Blocks call_tool when agent_id is empty, agent_type is non-empty, and agent_type is not \"human_proxy\".")
|
|
42
28
|
@severity("critical")
|
|
43
|
-
@tags("
|
|
44
|
-
@reject_message("Tool execution blocked: agent
|
|
29
|
+
@tags("category:agent-identity,threat:spoofing,scope:per-agent,detection:rule,surface:call-tool,owasp:asi04")
|
|
30
|
+
@reject_message("Tool execution blocked: agent declares a type but no agent ID — improperly configured or spoofed identity.")
|
|
45
31
|
forbid (
|
|
46
32
|
principal is Guardrails::Agent,
|
|
47
33
|
action == Guardrails::Action::"call_tool",
|
|
@@ -53,17 +39,16 @@ when {
|
|
|
53
39
|
context.agent_type != "human_proxy"
|
|
54
40
|
};
|
|
55
41
|
|
|
56
|
-
//
|
|
57
|
-
//
|
|
58
|
-
//
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Section 2: Framework registration
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
59
45
|
|
|
60
|
-
|
|
61
|
-
@id("a2a-block-unregistered-framework")
|
|
46
|
+
@id("agent-identity.a2a-block-unregistered-framework")
|
|
62
47
|
@name("Block unregistered frameworks from sensitive tools")
|
|
63
|
-
@description("
|
|
48
|
+
@description("Blocks call_tool when agent_framework is empty, agent_trust_level is \"unverified\", and tool_is_sensitive is true.")
|
|
64
49
|
@severity("high")
|
|
65
|
-
@tags("
|
|
66
|
-
@reject_message("Sensitive tool blocked: unverified agent with no registered framework
|
|
50
|
+
@tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
|
|
51
|
+
@reject_message("Sensitive tool execution blocked: unverified agent with no registered framework — black-box integration cannot be trusted.")
|
|
67
52
|
forbid (
|
|
68
53
|
principal is Guardrails::Agent,
|
|
69
54
|
action == Guardrails::Action::"call_tool",
|
|
@@ -75,17 +60,16 @@ when {
|
|
|
75
60
|
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
76
61
|
};
|
|
77
62
|
|
|
78
|
-
//
|
|
79
|
-
//
|
|
80
|
-
//
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Section 3: MCP server connections
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
81
66
|
|
|
82
|
-
|
|
83
|
-
@
|
|
84
|
-
@
|
|
85
|
-
@description("Unverified agents cannot establish new MCP server connections in A2A mode. Each server connection expands the agent's capability surface — unverified agents should use only pre-established connections from the orchestrator or host application.")
|
|
67
|
+
@id("agent-identity.a2a-block-unverified-server-connect")
|
|
68
|
+
@name("Block unverified agents from server connections")
|
|
69
|
+
@description("Blocks connect_server when agent_trust_level is \"unverified\" and agent_id is non-empty.")
|
|
86
70
|
@severity("high")
|
|
87
|
-
@tags("
|
|
88
|
-
@reject_message("MCP server connection blocked: unverified agents cannot establish new
|
|
71
|
+
@tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:rule,surface:connect-server")
|
|
72
|
+
@reject_message("MCP server connection blocked: unverified agents cannot establish new server connections in A2A mode.")
|
|
89
73
|
forbid (
|
|
90
74
|
principal is Guardrails::Agent,
|
|
91
75
|
action == Guardrails::Action::"connect_server",
|
|
@@ -96,17 +80,16 @@ when {
|
|
|
96
80
|
context has agent_id && context.agent_id != ""
|
|
97
81
|
};
|
|
98
82
|
|
|
99
|
-
//
|
|
100
|
-
//
|
|
101
|
-
//
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
// Section 4: Maximum-risk combination
|
|
85
|
+
// ---------------------------------------------------------------------------
|
|
102
86
|
|
|
103
|
-
|
|
104
|
-
@
|
|
105
|
-
@
|
|
106
|
-
@description("The combination of autonomous (no human oversight) and unverified (no trust attestation) is the most dangerous agent configuration. If compromised, there is no human to catch anomalies and no verification to limit blast radius. These agents are unconditionally blocked from all tool execution in A2A workflows.")
|
|
87
|
+
@id("agent-identity.a2a-block-autonomous-unverified")
|
|
88
|
+
@name("Block autonomous unverified agents")
|
|
89
|
+
@description("Blocks call_tool when agent_type is \"autonomous\" and agent_trust_level is \"unverified\".")
|
|
107
90
|
@severity("critical")
|
|
108
|
-
@tags("
|
|
109
|
-
@reject_message("Tool execution blocked:
|
|
91
|
+
@tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool,owasp:asi04")
|
|
92
|
+
@reject_message("Tool execution blocked: autonomous + unverified is the maximum-risk agent configuration and is not permitted.")
|
|
110
93
|
forbid (
|
|
111
94
|
principal is Guardrails::Agent,
|
|
112
95
|
action == Guardrails::Action::"call_tool",
|
|
@@ -1,48 +1,38 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// A2A Security — Inter-Agent Injection
|
|
2
|
+
// A2A Security — Inter-Agent Injection Defence
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Detects and blocks prompt injection that travels between independent
|
|
4
|
+
// Detects and blocks prompt injection that travels between independent
|
|
5
|
+
// agents: indirect injection via tool outputs, multi-turn progressive
|
|
6
|
+
// attacks tracked by a GRU deep-context detector, and encoded-payload
|
|
7
|
+
// delivery (base64, hex, hash content).
|
|
5
8
|
//
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
// conversation state across turns.
|
|
15
|
-
//
|
|
16
|
-
// 3. ENCODED PAYLOAD DELIVERY: Base64, hex, or hash-encoded instructions
|
|
17
|
-
// designed to bypass single-turn classifiers. In A2A communication,
|
|
18
|
-
// encoded content is a strong indicator of injection evasion.
|
|
19
|
-
//
|
|
20
|
-
// Key A2A distinction: In MAS, the orchestrator controls all communication.
|
|
21
|
-
// In A2A, each agent independently receives content from external agents,
|
|
22
|
-
// making indirect injection the dominant threat vector.
|
|
9
|
+
// Context keys consumed:
|
|
10
|
+
// - agent_id, agent_trust_level
|
|
11
|
+
// - indirect_injection_score: Long (0-100)
|
|
12
|
+
// - tool_is_sensitive: Bool
|
|
13
|
+
// - multi_turn_detection: Bool
|
|
14
|
+
// - injection_deep_context_score, jailbreak_deep_context_score: Long
|
|
15
|
+
// - encoded_content_detected: Bool
|
|
16
|
+
// - encoded_score: Long
|
|
23
17
|
//
|
|
24
18
|
// Compliance:
|
|
25
|
-
// OWASP LLM01 (
|
|
26
|
-
//
|
|
27
|
-
// MITRE ATLAS AML.T0051 (LLM Prompt Injection)
|
|
28
|
-
// MITRE ATLAS AML.T0051.002 (Indirect Prompt Injection)
|
|
29
|
-
// NIST 800-53 SI-10 (Information Input Validation)
|
|
19
|
+
// - OWASP LLM01 (indirect), OWASP ASI01
|
|
20
|
+
// - MITRE ATLAS AML.T0051; NIST 800-53 SI-10
|
|
30
21
|
//
|
|
31
|
-
// Category:
|
|
22
|
+
// Category: agent-identity
|
|
32
23
|
// Namespace: Guardrails
|
|
33
24
|
// =============================================================================
|
|
34
25
|
|
|
35
|
-
//
|
|
36
|
-
//
|
|
37
|
-
//
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Section 1: Indirect injection
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
38
29
|
|
|
39
|
-
|
|
40
|
-
@id("a2a-indirect-injection-agent")
|
|
30
|
+
@id("agent-identity.a2a-block-indirect-injection")
|
|
41
31
|
@name("Block indirect injection from agent tool calls")
|
|
42
|
-
@description("
|
|
32
|
+
@description("Blocks call_tool when agent_id is non-empty and indirect_injection_score >= 60.")
|
|
43
33
|
@severity("critical")
|
|
44
|
-
@tags("
|
|
45
|
-
@reject_message("Tool execution blocked: indirect prompt injection detected in
|
|
34
|
+
@tags("category:agent-identity,threat:indirect-injection,detection:ml,surface:call-tool,owasp:llm01")
|
|
35
|
+
@reject_message("Tool execution blocked: indirect prompt injection detected in agent-received content — primary A2A attack vector.")
|
|
46
36
|
forbid (
|
|
47
37
|
principal is Guardrails::Agent,
|
|
48
38
|
action == Guardrails::Action::"call_tool",
|
|
@@ -53,13 +43,12 @@ when {
|
|
|
53
43
|
context has indirect_injection_score && context.indirect_injection_score >= 60
|
|
54
44
|
};
|
|
55
45
|
|
|
56
|
-
|
|
57
|
-
@
|
|
58
|
-
@
|
|
59
|
-
@description("Block sensitive tool execution from agents when indirect injection score exceeds 40. Sensitive tools (write_file, http_post, send_email) amplify the damage of indirect injection — a lower threshold compensates for the higher blast radius of sensitive operations in cross-system communication.")
|
|
46
|
+
@id("agent-identity.a2a-block-indirect-injection-sensitive")
|
|
47
|
+
@name("Block indirect injection on sensitive tools (A2A)")
|
|
48
|
+
@description("Blocks call_tool when agent_id is non-empty, indirect_injection_score >= 40, and tool_is_sensitive is true.")
|
|
60
49
|
@severity("critical")
|
|
61
|
-
@tags("
|
|
62
|
-
@reject_message("Sensitive tool blocked: moderate indirect
|
|
50
|
+
@tags("category:agent-identity,threat:indirect-injection,detection:ml,surface:call-tool,owasp:asi01")
|
|
51
|
+
@reject_message("Sensitive tool execution blocked: moderate indirect-injection risk in agent content — sensitive tools amplify damage.")
|
|
63
52
|
forbid (
|
|
64
53
|
principal is Guardrails::Agent,
|
|
65
54
|
action == Guardrails::Action::"call_tool",
|
|
@@ -71,17 +60,16 @@ when {
|
|
|
71
60
|
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
72
61
|
};
|
|
73
62
|
|
|
74
|
-
//
|
|
75
|
-
// Multi-
|
|
76
|
-
//
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Section 2: Multi-turn progressive attacks
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
77
66
|
|
|
78
|
-
|
|
79
|
-
@
|
|
80
|
-
@
|
|
81
|
-
@description("Block non-first-party agents when the GRU-based deep context detector identifies multi-turn progressive injection (score >= 60). Each turn may be benign individually, but the GRU model tracks hidden state across the full conversation to detect gradual goal hijacking.")
|
|
67
|
+
@id("agent-identity.a2a-block-deep-context-injection")
|
|
68
|
+
@name("Block multi-turn progressive injection")
|
|
69
|
+
@description("Blocks process_prompt when agent_trust_level is not \"first_party\", multi_turn_detection is true, and injection_deep_context_score >= 60.")
|
|
82
70
|
@severity("high")
|
|
83
|
-
@tags("
|
|
84
|
-
@reject_message("
|
|
71
|
+
@tags("category:agent-identity,threat:injection,detection:ml,surface:process-prompt,owasp:llm01")
|
|
72
|
+
@reject_message("Prompt blocked: multi-turn progressive injection detected by the deep-context model across conversation history.")
|
|
85
73
|
forbid (
|
|
86
74
|
principal is Guardrails::Agent,
|
|
87
75
|
action == Guardrails::Action::"process_prompt",
|
|
@@ -93,13 +81,12 @@ when {
|
|
|
93
81
|
context has injection_deep_context_score && context.injection_deep_context_score >= 60
|
|
94
82
|
};
|
|
95
83
|
|
|
96
|
-
|
|
97
|
-
@
|
|
98
|
-
@
|
|
99
|
-
@description("Block non-first-party agents when the GRU-based deep context detector identifies multi-turn progressive jailbreak (score >= 60). Jailbreak attempts spread across turns are harder to detect with single-turn classifiers — the deep context model maintains conversation state to catch these patterns.")
|
|
84
|
+
@id("agent-identity.a2a-block-deep-context-jailbreak")
|
|
85
|
+
@name("Block multi-turn progressive jailbreak")
|
|
86
|
+
@description("Blocks process_prompt when agent_trust_level is not \"first_party\", multi_turn_detection is true, and jailbreak_deep_context_score >= 60.")
|
|
100
87
|
@severity("high")
|
|
101
|
-
@tags("
|
|
102
|
-
@reject_message("
|
|
88
|
+
@tags("category:agent-identity,threat:jailbreak,detection:ml,surface:process-prompt,owasp:llm02")
|
|
89
|
+
@reject_message("Prompt blocked: multi-turn progressive jailbreak detected by the deep-context model across conversation history.")
|
|
103
90
|
forbid (
|
|
104
91
|
principal is Guardrails::Agent,
|
|
105
92
|
action == Guardrails::Action::"process_prompt",
|
|
@@ -111,17 +98,16 @@ when {
|
|
|
111
98
|
context has jailbreak_deep_context_score && context.jailbreak_deep_context_score >= 60
|
|
112
99
|
};
|
|
113
100
|
|
|
114
|
-
//
|
|
115
|
-
//
|
|
116
|
-
//
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
// Section 3: Encoded payload delivery
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
117
104
|
|
|
118
|
-
|
|
119
|
-
@id("a2a-encoded-injection-agent")
|
|
105
|
+
@id("agent-identity.a2a-block-encoded-injection")
|
|
120
106
|
@name("Block encoded payload delivery between agents")
|
|
121
|
-
@description("
|
|
107
|
+
@description("Blocks process_prompt and call_tool when agent_id is non-empty, encoded_content_detected is true, and encoded_score >= 60.")
|
|
122
108
|
@severity("high")
|
|
123
|
-
@tags("
|
|
124
|
-
@reject_message("Request blocked: encoded payload detected in agent communication
|
|
109
|
+
@tags("category:agent-identity,threat:encoded-payload,detection:pattern,owasp:llm01")
|
|
110
|
+
@reject_message("Request blocked: encoded payload detected in agent communication — agents should exchange plaintext, not encoded content.")
|
|
125
111
|
forbid (
|
|
126
112
|
principal is Guardrails::Agent,
|
|
127
113
|
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
@@ -1,48 +1,35 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// A2A Security — Supply Chain &
|
|
2
|
+
// A2A Security — Supply Chain & Behavioural Drift
|
|
3
3
|
// =============================================================================
|
|
4
|
-
//
|
|
4
|
+
// Blocks supply-chain attacks specific to agent-to-agent ecosystems: tool
|
|
5
|
+
// poisoning from external agent tool chains, rug pull behavioural drift,
|
|
6
|
+
// and multi-step credential theft.
|
|
5
7
|
//
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
// but changes behavior after trust is established — "risk_spike" (sudden
|
|
12
|
-
// risk increase) or "pattern_change" (behavioral deviation).
|
|
13
|
-
//
|
|
14
|
-
// 3. CREDENTIAL THEFT CHAINS: Multi-step attack where an agent reads
|
|
15
|
-
// credential files, encodes them, and exfiltrates via network tools.
|
|
16
|
-
// In A2A, this is higher risk because external agents have legitimate
|
|
17
|
-
// reasons to call multiple tools in sequence.
|
|
18
|
-
//
|
|
19
|
-
// Key A2A distinction: In MAS, the orchestrator vets all tools centrally.
|
|
20
|
-
// In A2A, each agent brings its own tool ecosystem, creating a supply chain
|
|
21
|
-
// attack surface at every agent boundary.
|
|
8
|
+
// Context keys consumed:
|
|
9
|
+
// - agent_id, agent_trust_level
|
|
10
|
+
// - tool_poisoning_detected, tool_poisoning_score
|
|
11
|
+
// - rug_pull_detected, rug_pull_score
|
|
12
|
+
// - suspicious_pattern, pattern_type
|
|
22
13
|
//
|
|
23
14
|
// Compliance:
|
|
24
|
-
// OWASP ASI04
|
|
25
|
-
//
|
|
26
|
-
//
|
|
27
|
-
// MITRE ATT&CK T1552 (Unsecured Credentials)
|
|
28
|
-
// MITRE ATLAS AML.T0049 (Backdoor ML Model) — analogous to agent drift
|
|
29
|
-
// NIST 800-53 SI-7 (Software, Firmware, and Information Integrity)
|
|
15
|
+
// - OWASP ASI04, OWASP MCP01/MCP03
|
|
16
|
+
// - MITRE ATT&CK T1552; MITRE ATLAS AML.T0049
|
|
17
|
+
// - NIST 800-53 SI-7
|
|
30
18
|
//
|
|
31
|
-
// Category:
|
|
19
|
+
// Category: agent-identity
|
|
32
20
|
// Namespace: Guardrails
|
|
33
21
|
// =============================================================================
|
|
34
22
|
|
|
35
|
-
//
|
|
36
|
-
//
|
|
37
|
-
//
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Section 1: Tool poisoning
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
38
26
|
|
|
39
|
-
|
|
40
|
-
@id("a2a-tool-poisoning-agent")
|
|
27
|
+
@id("agent-identity.a2a-block-tool-poisoning")
|
|
41
28
|
@name("Block tool poisoning from non-first-party agents")
|
|
42
|
-
@description("
|
|
29
|
+
@description("Blocks call_tool when agent_trust_level is not \"first_party\", tool_poisoning_detected is true, and tool_poisoning_score >= 60.")
|
|
43
30
|
@severity("critical")
|
|
44
|
-
@tags("
|
|
45
|
-
@reject_message("Tool execution blocked: tool poisoning detected in content from a non-first-party agent
|
|
31
|
+
@tags("category:agent-identity,threat:tool-poisoning,scope:per-agent,detection:ml,surface:call-tool,owasp:asi04")
|
|
32
|
+
@reject_message("Tool execution blocked: tool poisoning detected in content from a non-first-party agent — supply-chain risk.")
|
|
46
33
|
forbid (
|
|
47
34
|
principal is Guardrails::Agent,
|
|
48
35
|
action == Guardrails::Action::"call_tool",
|
|
@@ -54,13 +41,12 @@ when {
|
|
|
54
41
|
context has tool_poisoning_score && context.tool_poisoning_score >= 60
|
|
55
42
|
};
|
|
56
43
|
|
|
57
|
-
|
|
58
|
-
@id("a2a-server-poisoning-agent")
|
|
44
|
+
@id("agent-identity.a2a-block-server-poisoning")
|
|
59
45
|
@name("Block poisoned server connections from agents")
|
|
60
|
-
@description("
|
|
46
|
+
@description("Blocks connect_server when agent_id is non-empty, tool_poisoning_detected is true, and tool_poisoning_score >= 55.")
|
|
61
47
|
@severity("critical")
|
|
62
|
-
@tags("
|
|
63
|
-
@reject_message("MCP server connection blocked: tool poisoning
|
|
48
|
+
@tags("category:agent-identity,threat:tool-poisoning,scope:per-agent,detection:ml,surface:connect-server,owasp:asi04")
|
|
49
|
+
@reject_message("MCP server connection blocked: tool poisoning detected from agent request — server-level poisoning affects all tools.")
|
|
64
50
|
forbid (
|
|
65
51
|
principal is Guardrails::Agent,
|
|
66
52
|
action == Guardrails::Action::"connect_server",
|
|
@@ -72,17 +58,16 @@ when {
|
|
|
72
58
|
context has tool_poisoning_score && context.tool_poisoning_score >= 55
|
|
73
59
|
};
|
|
74
60
|
|
|
75
|
-
//
|
|
76
|
-
//
|
|
77
|
-
//
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// Section 2: Rug pull behavioural drift
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
78
64
|
|
|
79
|
-
|
|
80
|
-
@
|
|
81
|
-
@
|
|
82
|
-
@description("Block tool execution when behavioral drift is detected in agent tool usage (score >= 70). The rug pull detector compares current tool behavior against established baselines — a sudden risk spike or tool alternation pattern change after 3+ normal calls indicates the agent or tool has been compromised mid-session.")
|
|
65
|
+
@id("agent-identity.a2a-block-rug-pull")
|
|
66
|
+
@name("Block rug pull behavioural drift from agents")
|
|
67
|
+
@description("Blocks call_tool when agent_id is non-empty, rug_pull_detected is true, and rug_pull_score >= 70.")
|
|
83
68
|
@severity("critical")
|
|
84
|
-
@tags("
|
|
85
|
-
@reject_message("Tool execution blocked:
|
|
69
|
+
@tags("category:agent-identity,threat:rug-pull,scope:per-agent,detection:ml,surface:call-tool,owasp:asi04")
|
|
70
|
+
@reject_message("Tool execution blocked: behavioural drift detected — tool behaviour has deviated from established patterns.")
|
|
86
71
|
forbid (
|
|
87
72
|
principal is Guardrails::Agent,
|
|
88
73
|
action == Guardrails::Action::"call_tool",
|
|
@@ -94,17 +79,16 @@ when {
|
|
|
94
79
|
context has rug_pull_score && context.rug_pull_score >= 70
|
|
95
80
|
};
|
|
96
81
|
|
|
97
|
-
//
|
|
98
|
-
//
|
|
99
|
-
//
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
// Section 3: Credential theft chains
|
|
84
|
+
// ---------------------------------------------------------------------------
|
|
100
85
|
|
|
101
|
-
|
|
102
|
-
@
|
|
103
|
-
@
|
|
104
|
-
@description("Block tool execution when credential theft patterns are detected from non-first-party agents. The pattern detector identifies multi-step sequences (read credentials → encode → exfiltrate) that indicate coordinated credential harvesting. In A2A, external agents have legitimate multi-tool workflows, making pattern-based detection essential to distinguish theft from normal usage.")
|
|
86
|
+
@id("agent-identity.a2a-block-credential-theft")
|
|
87
|
+
@name("Block credential theft chains from agents")
|
|
88
|
+
@description("Blocks call_tool when agent_trust_level is not \"first_party\", suspicious_pattern is true, and pattern_type equals \"credential_theft\".")
|
|
105
89
|
@severity("critical")
|
|
106
|
-
@tags("
|
|
107
|
-
@reject_message("Tool execution blocked: credential theft chain detected from a non-first-party agent
|
|
90
|
+
@tags("category:agent-identity,threat:exfiltration,scope:per-agent,detection:rule,surface:call-tool")
|
|
91
|
+
@reject_message("Tool execution blocked: credential theft chain detected from a non-first-party agent — multi-step credential harvesting.")
|
|
108
92
|
forbid (
|
|
109
93
|
principal is Guardrails::Agent,
|
|
110
94
|
action == Guardrails::Action::"call_tool",
|
|
@@ -1,36 +1,28 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
2
|
// Advanced Detection — PII
|
|
3
3
|
// =============================================================================
|
|
4
|
-
//
|
|
5
|
-
//
|
|
6
|
-
// boolean pii_detected flag with layered detection:
|
|
4
|
+
// Layered PII detection: bulk exposure threshold, ML classifier confidence,
|
|
5
|
+
// and file-operation blocking. Goes beyond the boolean pii_detected flag.
|
|
7
6
|
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
// These policies benefit any Guardrails deployment — not just coding agents.
|
|
13
|
-
//
|
|
14
|
-
// Adapted from Overwatch PII detection policies for Guardrails namespace.
|
|
7
|
+
// Context keys consumed:
|
|
8
|
+
// - pii_count: Long
|
|
9
|
+
// - pii_score: Long (0-100)
|
|
10
|
+
// - pii_detected: Bool
|
|
15
11
|
//
|
|
16
12
|
// Compliance:
|
|
17
|
-
// PCI DSS 3.4
|
|
18
|
-
//
|
|
19
|
-
// HIPAA §164.312 (Technical Safeguards)
|
|
20
|
-
// CCPA §1798.150 (Data Protection)
|
|
21
|
-
// NIST 800-53 SI-4 (Information System Monitoring)
|
|
13
|
+
// - PCI DSS 3.4, GDPR Art. 32, HIPAA §164.312, CCPA §1798.150
|
|
14
|
+
// - NIST 800-53 SI-4
|
|
22
15
|
//
|
|
23
|
-
// Category:
|
|
16
|
+
// Category: privacy
|
|
24
17
|
// Namespace: Guardrails
|
|
25
18
|
// =============================================================================
|
|
26
19
|
|
|
27
|
-
|
|
28
|
-
@id("detection-block-bulk-pii")
|
|
20
|
+
@id("privacy.advanced-block-pii-bulk")
|
|
29
21
|
@name("Block bulk PII exposure")
|
|
30
|
-
@description("
|
|
22
|
+
@description("Blocks process_prompt and call_tool when pii_count >= 3.")
|
|
31
23
|
@severity("critical")
|
|
32
|
-
@tags("
|
|
33
|
-
@reject_message("Content blocked:
|
|
24
|
+
@tags("category:privacy,threat:pii,threat:exfiltration,detection:aggregate,compliance:gdpr,compliance:pci-dss")
|
|
25
|
+
@reject_message("Content blocked: 3+ PII items detected — bulk exposure indicates data dump or exfiltration.")
|
|
34
26
|
forbid (
|
|
35
27
|
principal,
|
|
36
28
|
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
@@ -40,29 +32,27 @@ when {
|
|
|
40
32
|
context has pii_count && context.pii_count >= 3
|
|
41
33
|
};
|
|
42
34
|
|
|
43
|
-
|
|
44
|
-
@
|
|
45
|
-
@
|
|
46
|
-
@description("Block content when the ML PII classifier confidence exceeds threshold (80/100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss — defense-in-depth behind the pii_detected boolean.")
|
|
35
|
+
@id("privacy.advanced-block-pii-ml")
|
|
36
|
+
@name("Block high-confidence PII (ML)")
|
|
37
|
+
@description("Blocks process_prompt and call_tool when pii_score >= 80.")
|
|
47
38
|
@severity("critical")
|
|
48
|
-
@tags("
|
|
49
|
-
@reject_message("Content blocked: the ML classifier detected personally identifiable information with high confidence.
|
|
39
|
+
@tags("category:privacy,threat:pii,detection:ml,compliance:gdpr,compliance:hipaa")
|
|
40
|
+
@reject_message("Content blocked: the ML classifier detected personally identifiable information with high confidence.")
|
|
50
41
|
forbid (
|
|
51
42
|
principal,
|
|
52
43
|
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
53
44
|
resource
|
|
54
45
|
)
|
|
55
46
|
when {
|
|
56
|
-
context has
|
|
47
|
+
context has pii_score && context.pii_score >= 80
|
|
57
48
|
};
|
|
58
49
|
|
|
59
|
-
|
|
60
|
-
@
|
|
61
|
-
@
|
|
62
|
-
@description("Block file reads and writes when PII is detected. Prevents agents from reading files containing personal data and from writing PII to new files where it could persist or be version-controlled.")
|
|
50
|
+
@id("privacy.advanced-block-pii-file-ops")
|
|
51
|
+
@name("Block PII in file operations")
|
|
52
|
+
@description("Blocks read_file and write_file when pii_detected is true.")
|
|
63
53
|
@severity("high")
|
|
64
|
-
@tags("
|
|
65
|
-
@reject_message("File operation blocked: personally identifiable information
|
|
54
|
+
@tags("category:privacy,threat:pii,detection:rule,compliance:gdpr")
|
|
55
|
+
@reject_message("File operation blocked: personally identifiable information detected in file content.")
|
|
66
56
|
forbid (
|
|
67
57
|
principal,
|
|
68
58
|
action in [Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|