@highflame/policy 2.1.36 → 2.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/_schemas/ai_gateway/context.json +431 -11
  2. package/_schemas/ai_gateway/schema.cedarschema +91 -11
  3. package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
  4. package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
  5. package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
  6. package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
  7. package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
  8. package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
  9. package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
  10. package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
  11. package/_schemas/ai_gateway/templates/templates.json +42 -47
  12. package/_schemas/guardrails/context.json +12 -12
  13. package/_schemas/guardrails/schema.cedarschema +12 -12
  14. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
  15. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
  16. package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
  17. package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
  18. package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
  19. package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
  20. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
  21. package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
  22. package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
  23. package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
  24. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
  25. package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
  26. package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
  27. package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
  28. package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
  29. package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
  30. package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
  31. package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
  32. package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
  33. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
  34. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
  35. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
  36. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
  37. package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
  38. package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
  39. package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
  40. package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
  41. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
  42. package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
  43. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
  44. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
  45. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
  46. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
  47. package/_schemas/guardrails/templates/templates.json +188 -210
  48. package/_schemas/overwatch/context.json +14 -14
  49. package/_schemas/overwatch/schema.cedarschema +12 -12
  50. package/_schemas/sentry/context.json +11 -11
  51. package/_schemas/sentry/schema.cedarschema +11 -11
  52. package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
  53. package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
  54. package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
  55. package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
  56. package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
  57. package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
  58. package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
  59. package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
  60. package/_schemas/sentry/templates/templates.json +46 -46
  61. package/dist/ai_gateway-context.gen.d.ts +18 -4
  62. package/dist/ai_gateway-context.gen.js +18 -4
  63. package/dist/ai_gateway-defaults.gen.d.ts +1 -1
  64. package/dist/ai_gateway-defaults.gen.js +377 -313
  65. package/dist/guardrails-context.gen.d.ts +5 -5
  66. package/dist/guardrails-context.gen.js +5 -5
  67. package/dist/guardrails-defaults.gen.d.ts +1 -1
  68. package/dist/guardrails-defaults.gen.js +2070 -1849
  69. package/dist/overwatch-context.gen.d.ts +5 -5
  70. package/dist/overwatch-context.gen.js +5 -5
  71. package/dist/overwatch-defaults.gen.d.ts +1 -1
  72. package/dist/overwatch-defaults.gen.js +547 -573
  73. package/dist/sentry-context.gen.d.ts +3 -3
  74. package/dist/sentry-context.gen.js +3 -3
  75. package/dist/sentry-defaults.gen.d.ts +1 -1
  76. package/dist/sentry-defaults.gen.js +379 -460
  77. package/dist/service-schemas.gen.d.ts +4 -4
  78. package/dist/service-schemas.gen.js +249 -99
  79. package/package.json +1 -1
@@ -2,46 +2,32 @@
2
2
  // A2A Security — Agent Identity Enforcement
3
3
  // =============================================================================
4
4
  // Enforces strict identity requirements for cross-system agent communication:
5
+ // blocks anonymous agents, requires framework registration for sensitive
6
+ // operations, restricts server connections, and unconditionally blocks the
7
+ // most dangerous combination — autonomous + unverified.
5
8
  //
6
- // 1. ANONYMOUS AGENT BLOCKING: Agents that claim agent_type but provide no
7
- // agent_id are likely spoofed or misconfigured — blocked from tool calls.
8
- //
9
- // 2. FRAMEWORK REGISTRATION: Unverified agents must declare their framework
10
- // (claude-code, langchain, crewai, etc.) for sensitive operations. Missing
11
- // framework on unverified agents indicates an ad-hoc or rogue integration.
12
- //
13
- // 3. SERVER CONNECTION RESTRICTIONS: Unverified agents cannot establish new
14
- // MCP server connections — limits blast radius of unknown agents.
15
- //
16
- // 4. AUTONOMOUS + UNVERIFIED = BLOCKED: The most dangerous combination is
17
- // an autonomous agent with no verification. No human oversight AND no
18
- // trust attestation means zero recovery if the agent is compromised.
19
- //
20
- // Key A2A distinction: In MAS, the orchestrator validates all sub-agents.
21
- // In A2A, each agent self-reports identity, so we must enforce identity
22
- // completeness and consistency at the policy layer.
9
+ // Context keys consumed:
10
+ // - agent_id, agent_type, agent_trust_level, agent_framework
11
+ // - tool_is_sensitive: Bool
23
12
  //
24
13
  // Compliance:
25
- // OWASP ASI05 (Identity Spoofing)
26
- // NIST 800-63 (Digital Identity Guidelines)
27
- // NIST 800-53 IA-2 (Identification and Authentication)
28
- // NIST 800-53 IA-8 (Identification and Authentication — Non-Organizational Users)
14
+ // - OWASP ASI05 (Identity Spoofing); NIST 800-63
15
+ // - NIST 800-53 IA-2, IA-8
29
16
  //
30
- // Category: agent_identity
17
+ // Category: agent-identity
31
18
  // Namespace: Guardrails
32
19
  // =============================================================================
33
20
 
34
- // -----------------------------------------------------------------------------
35
- // Anonymous Agent Detection — Incomplete Identity
36
- // -----------------------------------------------------------------------------
21
+ // ---------------------------------------------------------------------------
22
+ // Section 1: Anonymous agents
23
+ // ---------------------------------------------------------------------------
37
24
 
38
- // Block agents with type but no ID from tool execution
39
- @id("a2a-block-anonymous-agent-tools")
40
- @name("Block anonymous agents from tool execution")
41
- @description("Block tool calls from agents that declare an agent_type but have no agent_id. This pattern (type present, ID absent) indicates a spoofed or misconfigured agent identity — legitimate agents always have both. Human proxies are exempt because they represent authenticated users, not independent agents.")
25
+ @id("agent-identity.a2a-block-anonymous-agents")
26
+ @name("Block anonymous agents from tools")
27
+ @description("Blocks call_tool when agent_id is empty, agent_type is non-empty, and agent_type is not \"human_proxy\".")
42
28
  @severity("critical")
43
- @tags("profile,a2a-security,identity,anonymous,spoofing,owasp-asi05,nist-ia-2")
44
- @reject_message("Tool execution blocked: agent identity is required for A2A tool calls. This request has an agent type but no agent ID, indicating an improperly configured or spoofed agent identity.")
29
+ @tags("category:agent-identity,threat:spoofing,scope:per-agent,detection:rule,surface:call-tool,owasp:asi04")
30
+ @reject_message("Tool execution blocked: agent declares a type but no agent ID improperly configured or spoofed identity.")
45
31
  forbid (
46
32
  principal is Guardrails::Agent,
47
33
  action == Guardrails::Action::"call_tool",
@@ -53,17 +39,16 @@ when {
53
39
  context.agent_type != "human_proxy"
54
40
  };
55
41
 
56
- // -----------------------------------------------------------------------------
57
- // Framework Registration Unverified Agent Restrictions
58
- // -----------------------------------------------------------------------------
42
+ // ---------------------------------------------------------------------------
43
+ // Section 2: Framework registration
44
+ // ---------------------------------------------------------------------------
59
45
 
60
- // Block unregistered framework unverified agents from sensitive tools
61
- @id("a2a-block-unregistered-framework")
46
+ @id("agent-identity.a2a-block-unregistered-framework")
62
47
  @name("Block unregistered frameworks from sensitive tools")
63
- @description("Block unverified agents with no declared framework from calling sensitive tools. In A2A, agent_framework identifies the SDK/runtime (claude-code, langchain, crewai, autogen). An unverified agent with no framework declaration is a black-box integration — it cannot be audited, patched, or trusted with sensitive operations.")
48
+ @description("Blocks call_tool when agent_framework is empty, agent_trust_level is \"unverified\", and tool_is_sensitive is true.")
64
49
  @severity("high")
65
- @tags("profile,a2a-security,identity,framework,unverified,sensitive-tools,nist-ia-8")
66
- @reject_message("Sensitive tool blocked: unverified agent with no registered framework attempted to call a sensitive tool. Agents must declare their framework (e.g., claude-code, langchain, crewai) for A2A sensitive operations.")
50
+ @tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
51
+ @reject_message("Sensitive tool execution blocked: unverified agent with no registered framework black-box integration cannot be trusted.")
67
52
  forbid (
68
53
  principal is Guardrails::Agent,
69
54
  action == Guardrails::Action::"call_tool",
@@ -75,17 +60,16 @@ when {
75
60
  context has tool_is_sensitive && context.tool_is_sensitive == true
76
61
  };
77
62
 
78
- // -----------------------------------------------------------------------------
79
- // Server Connection Restrictions
80
- // -----------------------------------------------------------------------------
63
+ // ---------------------------------------------------------------------------
64
+ // Section 3: MCP server connections
65
+ // ---------------------------------------------------------------------------
81
66
 
82
- // Block unverified agents from establishing MCP server connections
83
- @id("a2a-block-unverified-server-connect")
84
- @name("Block unverified agents from MCP server connections")
85
- @description("Unverified agents cannot establish new MCP server connections in A2A mode. Each server connection expands the agent's capability surface — unverified agents should use only pre-established connections from the orchestrator or host application.")
67
+ @id("agent-identity.a2a-block-unverified-server-connect")
68
+ @name("Block unverified agents from server connections")
69
+ @description("Blocks connect_server when agent_trust_level is \"unverified\" and agent_id is non-empty.")
86
70
  @severity("high")
87
- @tags("profile,a2a-security,identity,unverified,mcp,server,nist-ia-8")
88
- @reject_message("MCP server connection blocked: unverified agents cannot establish new MCP server connections in A2A mode. Register the agent as verified_third_party or first_party to enable server connections.")
71
+ @tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:rule,surface:connect-server")
72
+ @reject_message("MCP server connection blocked: unverified agents cannot establish new server connections in A2A mode.")
89
73
  forbid (
90
74
  principal is Guardrails::Agent,
91
75
  action == Guardrails::Action::"connect_server",
@@ -96,17 +80,16 @@ when {
96
80
  context has agent_id && context.agent_id != ""
97
81
  };
98
82
 
99
- // -----------------------------------------------------------------------------
100
- // Dangerous Combinations Maximum Risk
101
- // -----------------------------------------------------------------------------
83
+ // ---------------------------------------------------------------------------
84
+ // Section 4: Maximum-risk combination
85
+ // ---------------------------------------------------------------------------
102
86
 
103
- // Block unverified autonomous agents from all tool calls
104
- @id("a2a-block-autonomous-unverified")
105
- @name("Block unverified autonomous agents from all tool calls")
106
- @description("The combination of autonomous (no human oversight) and unverified (no trust attestation) is the most dangerous agent configuration. If compromised, there is no human to catch anomalies and no verification to limit blast radius. These agents are unconditionally blocked from all tool execution in A2A workflows.")
87
+ @id("agent-identity.a2a-block-autonomous-unverified")
88
+ @name("Block autonomous unverified agents")
89
+ @description("Blocks call_tool when agent_type is \"autonomous\" and agent_trust_level is \"unverified\".")
107
90
  @severity("critical")
108
- @tags("profile,a2a-security,identity,autonomous,unverified,owasp-asi05,nist-ia-2")
109
- @reject_message("Tool execution blocked: unverified autonomous agents are not permitted in A2A workflows. Autonomous agents operating without human oversight must be at least verified_third_party trust level.")
91
+ @tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool,owasp:asi04")
92
+ @reject_message("Tool execution blocked: autonomous + unverified is the maximum-risk agent configuration and is not permitted.")
110
93
  forbid (
111
94
  principal is Guardrails::Agent,
112
95
  action == Guardrails::Action::"call_tool",
@@ -1,48 +1,38 @@
1
1
  // =============================================================================
2
- // A2A Security — Inter-Agent Injection Defense
2
+ // A2A Security — Inter-Agent Injection Defence
3
3
  // =============================================================================
4
- // Detects and blocks prompt injection that travels between independent agents:
4
+ // Detects and blocks prompt injection that travels between independent
5
+ // agents: indirect injection via tool outputs, multi-turn progressive
6
+ // attacks tracked by a GRU deep-context detector, and encoded-payload
7
+ // delivery (base64, hex, hash content).
5
8
  //
6
- // 1. INDIRECT INJECTION: Malicious content injected via tool outputs, RAG
7
- // retrieval, or API responses from one agent that manipulates another.
8
- // In A2A, this is the primary attack vector because agents consume each
9
- // other's outputs as trusted input.
10
- //
11
- // 2. MULTI-TURN PROGRESSIVE ATTACKS: Gradual context manipulation across
12
- // turns where each turn is benign individually but collectively builds
13
- // toward goal hijacking. The GRU-based deep context detector tracks
14
- // conversation state across turns.
15
- //
16
- // 3. ENCODED PAYLOAD DELIVERY: Base64, hex, or hash-encoded instructions
17
- // designed to bypass single-turn classifiers. In A2A communication,
18
- // encoded content is a strong indicator of injection evasion.
19
- //
20
- // Key A2A distinction: In MAS, the orchestrator controls all communication.
21
- // In A2A, each agent independently receives content from external agents,
22
- // making indirect injection the dominant threat vector.
9
+ // Context keys consumed:
10
+ // - agent_id, agent_trust_level
11
+ // - indirect_injection_score: Long (0-100)
12
+ // - tool_is_sensitive: Bool
13
+ // - multi_turn_detection: Bool
14
+ // - injection_deep_context_score, jailbreak_deep_context_score: Long
15
+ // - encoded_content_detected: Bool
16
+ // - encoded_score: Long
23
17
  //
24
18
  // Compliance:
25
- // OWASP LLM01 (Prompt Injection) indirect variant
26
- // OWASP ASI01 (Agent Goal Hijack)
27
- // MITRE ATLAS AML.T0051 (LLM Prompt Injection)
28
- // MITRE ATLAS AML.T0051.002 (Indirect Prompt Injection)
29
- // NIST 800-53 SI-10 (Information Input Validation)
19
+ // - OWASP LLM01 (indirect), OWASP ASI01
20
+ // - MITRE ATLAS AML.T0051; NIST 800-53 SI-10
30
21
  //
31
- // Category: agent_identity
22
+ // Category: agent-identity
32
23
  // Namespace: Guardrails
33
24
  // =============================================================================
34
25
 
35
- // -----------------------------------------------------------------------------
36
- // Indirect Injection Tool Output Poisoning
37
- // -----------------------------------------------------------------------------
26
+ // ---------------------------------------------------------------------------
27
+ // Section 1: Indirect injection
28
+ // ---------------------------------------------------------------------------
38
29
 
39
- // Block indirect injection from agent tool calls
40
- @id("a2a-indirect-injection-agent")
30
+ @id("agent-identity.a2a-block-indirect-injection")
41
31
  @name("Block indirect injection from agent tool calls")
42
- @description("Block tool execution when indirect injection is detected in content received by an agent (score >= 60). Indirect injection travels through tool outputs, RAG retrieval, and API responses — the primary A2A attack vector because agents consume each other's outputs as trusted input. Lower threshold than code-agent (70) because cross-system content has higher adversarial surface.")
32
+ @description("Blocks call_tool when agent_id is non-empty and indirect_injection_score >= 60.")
43
33
  @severity("critical")
44
- @tags("profile,a2a-security,indirect-injection,tool-output,owasp-lml01,mitre-t0051-002")
45
- @reject_message("Tool execution blocked: indirect prompt injection detected in content received by this agent (score >= 60). An external source may be attempting to hijack agent behavior through tool outputs or retrieved content.")
34
+ @tags("category:agent-identity,threat:indirect-injection,detection:ml,surface:call-tool,owasp:llm01")
35
+ @reject_message("Tool execution blocked: indirect prompt injection detected in agent-received content primary A2A attack vector.")
46
36
  forbid (
47
37
  principal is Guardrails::Agent,
48
38
  action == Guardrails::Action::"call_tool",
@@ -53,13 +43,12 @@ when {
53
43
  context has indirect_injection_score && context.indirect_injection_score >= 60
54
44
  };
55
45
 
56
- // Stricter threshold for sensitive tools
57
- @id("a2a-indirect-injection-sensitive-agent")
58
- @name("Block moderate indirect injection on sensitive tools from agents")
59
- @description("Block sensitive tool execution from agents when indirect injection score exceeds 40. Sensitive tools (write_file, http_post, send_email) amplify the damage of indirect injection — a lower threshold compensates for the higher blast radius of sensitive operations in cross-system communication.")
46
+ @id("agent-identity.a2a-block-indirect-injection-sensitive")
47
+ @name("Block indirect injection on sensitive tools (A2A)")
48
+ @description("Blocks call_tool when agent_id is non-empty, indirect_injection_score >= 40, and tool_is_sensitive is true.")
60
49
  @severity("critical")
61
- @tags("profile,a2a-security,indirect-injection,sensitive-tools,owasp-asi01")
62
- @reject_message("Sensitive tool blocked: moderate indirect injection risk detected in agent-to-agent content (score >= 40). Sensitive tools require higher confidence that inter-agent content is safe.")
50
+ @tags("category:agent-identity,threat:indirect-injection,detection:ml,surface:call-tool,owasp:asi01")
51
+ @reject_message("Sensitive tool execution blocked: moderate indirect-injection risk in agent content sensitive tools amplify damage.")
63
52
  forbid (
64
53
  principal is Guardrails::Agent,
65
54
  action == Guardrails::Action::"call_tool",
@@ -71,17 +60,16 @@ when {
71
60
  context has tool_is_sensitive && context.tool_is_sensitive == true
72
61
  };
73
62
 
74
- // -----------------------------------------------------------------------------
75
- // Multi-Turn Progressive Attacks — Deep Context GRU Detection
76
- // -----------------------------------------------------------------------------
63
+ // ---------------------------------------------------------------------------
64
+ // Section 2: Multi-turn progressive attacks
65
+ // ---------------------------------------------------------------------------
77
66
 
78
- // Block multi-turn progressive injection for non-first-party agents
79
- @id("a2a-deep-context-injection-agent")
80
- @name("Block multi-turn progressive injection for agents")
81
- @description("Block non-first-party agents when the GRU-based deep context detector identifies multi-turn progressive injection (score >= 60). Each turn may be benign individually, but the GRU model tracks hidden state across the full conversation to detect gradual goal hijacking.")
67
+ @id("agent-identity.a2a-block-deep-context-injection")
68
+ @name("Block multi-turn progressive injection")
69
+ @description("Blocks process_prompt when agent_trust_level is not \"first_party\", multi_turn_detection is true, and injection_deep_context_score >= 60.")
82
70
  @severity("high")
83
- @tags("profile,a2a-security,multi-turn,deep-context,injection,owasp-lml01")
84
- @reject_message("Request blocked: multi-turn progressive injection detected across conversation history (deep context score >= 60). A gradual attack may be building context manipulation over multiple agent turns.")
71
+ @tags("category:agent-identity,threat:injection,detection:ml,surface:process-prompt,owasp:llm01")
72
+ @reject_message("Prompt blocked: multi-turn progressive injection detected by the deep-context model across conversation history.")
85
73
  forbid (
86
74
  principal is Guardrails::Agent,
87
75
  action == Guardrails::Action::"process_prompt",
@@ -93,13 +81,12 @@ when {
93
81
  context has injection_deep_context_score && context.injection_deep_context_score >= 60
94
82
  };
95
83
 
96
- // Block multi-turn progressive jailbreak for non-first-party agents
97
- @id("a2a-deep-context-jailbreak-agent")
98
- @name("Block multi-turn progressive jailbreak for agents")
99
- @description("Block non-first-party agents when the GRU-based deep context detector identifies multi-turn progressive jailbreak (score >= 60). Jailbreak attempts spread across turns are harder to detect with single-turn classifiers — the deep context model maintains conversation state to catch these patterns.")
84
+ @id("agent-identity.a2a-block-deep-context-jailbreak")
85
+ @name("Block multi-turn progressive jailbreak")
86
+ @description("Blocks process_prompt when agent_trust_level is not \"first_party\", multi_turn_detection is true, and jailbreak_deep_context_score >= 60.")
100
87
  @severity("high")
101
- @tags("profile,a2a-security,multi-turn,deep-context,jailbreak,owasp-asi01")
102
- @reject_message("Request blocked: multi-turn progressive jailbreak detected across conversation history (deep context score >= 60). A gradual jailbreak attempt is building across multiple agent turns.")
88
+ @tags("category:agent-identity,threat:jailbreak,detection:ml,surface:process-prompt,owasp:llm02")
89
+ @reject_message("Prompt blocked: multi-turn progressive jailbreak detected by the deep-context model across conversation history.")
103
90
  forbid (
104
91
  principal is Guardrails::Agent,
105
92
  action == Guardrails::Action::"process_prompt",
@@ -111,17 +98,16 @@ when {
111
98
  context has jailbreak_deep_context_score && context.jailbreak_deep_context_score >= 60
112
99
  };
113
100
 
114
- // -----------------------------------------------------------------------------
115
- // Encoded Payload Delivery Injection Evasion
116
- // -----------------------------------------------------------------------------
101
+ // ---------------------------------------------------------------------------
102
+ // Section 3: Encoded payload delivery
103
+ // ---------------------------------------------------------------------------
117
104
 
118
- // Block encoded injection payloads between agents
119
- @id("a2a-encoded-injection-agent")
105
+ @id("agent-identity.a2a-block-encoded-injection")
120
106
  @name("Block encoded payload delivery between agents")
121
- @description("Block agent requests when encoded payloads are detected (base64, hex, or suspicious hash content with score >= 60). In A2A communication, encoded content is a strong indicator of injection evasion — agents should communicate in plaintext, not encoded payloads.")
107
+ @description("Blocks process_prompt and call_tool when agent_id is non-empty, encoded_content_detected is true, and encoded_score >= 60.")
122
108
  @severity("high")
123
- @tags("profile,a2a-security,encoded-injection,evasion,base64,owasp-lml01")
124
- @reject_message("Request blocked: encoded payload detected in agent communication (base64, hex, or suspicious hash content). Encoded content in agent-to-agent messages indicates injection evasion or payload delivery.")
109
+ @tags("category:agent-identity,threat:encoded-payload,detection:pattern,owasp:llm01")
110
+ @reject_message("Request blocked: encoded payload detected in agent communication agents should exchange plaintext, not encoded content.")
125
111
  forbid (
126
112
  principal is Guardrails::Agent,
127
113
  action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
@@ -1,48 +1,35 @@
1
1
  // =============================================================================
2
- // A2A Security — Supply Chain & Behavioral Drift
2
+ // A2A Security — Supply Chain & Behavioural Drift
3
3
  // =============================================================================
4
- // Detects and blocks supply chain attacks specific to agent-to-agent ecosystems:
4
+ // Blocks supply-chain attacks specific to agent-to-agent ecosystems: tool
5
+ // poisoning from external agent tool chains, rug pull behavioural drift,
6
+ // and multi-step credential theft.
5
7
  //
6
- // 1. TOOL POISONING: External agents install or recommend MCP tools with
7
- // hidden instructions, system prompt injection, authority hijack, or
8
- // information suppression patterns in tool descriptions.
9
- //
10
- // 2. RUG PULL: An agent or tool that behaves normally during evaluation
11
- // but changes behavior after trust is established — "risk_spike" (sudden
12
- // risk increase) or "pattern_change" (behavioral deviation).
13
- //
14
- // 3. CREDENTIAL THEFT CHAINS: Multi-step attack where an agent reads
15
- // credential files, encodes them, and exfiltrates via network tools.
16
- // In A2A, this is higher risk because external agents have legitimate
17
- // reasons to call multiple tools in sequence.
18
- //
19
- // Key A2A distinction: In MAS, the orchestrator vets all tools centrally.
20
- // In A2A, each agent brings its own tool ecosystem, creating a supply chain
21
- // attack surface at every agent boundary.
8
+ // Context keys consumed:
9
+ // - agent_id, agent_trust_level
10
+ // - tool_poisoning_detected, tool_poisoning_score
11
+ // - rug_pull_detected, rug_pull_score
12
+ // - suspicious_pattern, pattern_type
22
13
  //
23
14
  // Compliance:
24
- // OWASP ASI04 (Supply Chain Vulnerabilities)
25
- // OWASP MCP01 (Tool Poisoning)
26
- // OWASP MCP03 (Tool Shadowing)
27
- // MITRE ATT&CK T1552 (Unsecured Credentials)
28
- // MITRE ATLAS AML.T0049 (Backdoor ML Model) — analogous to agent drift
29
- // NIST 800-53 SI-7 (Software, Firmware, and Information Integrity)
15
+ // - OWASP ASI04, OWASP MCP01/MCP03
16
+ // - MITRE ATT&CK T1552; MITRE ATLAS AML.T0049
17
+ // - NIST 800-53 SI-7
30
18
  //
31
- // Category: agent_identity
19
+ // Category: agent-identity
32
20
  // Namespace: Guardrails
33
21
  // =============================================================================
34
22
 
35
- // -----------------------------------------------------------------------------
36
- // Tool Poisoning Hidden Instructions in External Agent Tools
37
- // -----------------------------------------------------------------------------
23
+ // ---------------------------------------------------------------------------
24
+ // Section 1: Tool poisoning
25
+ // ---------------------------------------------------------------------------
38
26
 
39
- // Block tool poisoning from non-first-party agents
40
- @id("a2a-tool-poisoning-agent")
27
+ @id("agent-identity.a2a-block-tool-poisoning")
41
28
  @name("Block tool poisoning from non-first-party agents")
42
- @description("Block tool execution when poisoning is detected from non-first-party agents (score >= 60). Tool poisoning includes hidden instructions, system prompt injection, authority hijack, and information suppression patterns in tool descriptions or arguments. Lower threshold than default (70) because external agent tool chains have higher supply chain risk.")
29
+ @description("Blocks call_tool when agent_trust_level is not \"first_party\", tool_poisoning_detected is true, and tool_poisoning_score >= 60.")
43
30
  @severity("critical")
44
- @tags("profile,a2a-security,supply-chain,tool-poisoning,owasp-mcp01,owasp-asi04")
45
- @reject_message("Tool execution blocked: tool poisoning detected in content from a non-first-party agent (score >= 60). Hidden instructions, authority hijack, or system prompt injection patterns found in tool descriptions or arguments.")
31
+ @tags("category:agent-identity,threat:tool-poisoning,scope:per-agent,detection:ml,surface:call-tool,owasp:asi04")
32
+ @reject_message("Tool execution blocked: tool poisoning detected in content from a non-first-party agent supply-chain risk.")
46
33
  forbid (
47
34
  principal is Guardrails::Agent,
48
35
  action == Guardrails::Action::"call_tool",
@@ -54,13 +41,12 @@ when {
54
41
  context has tool_poisoning_score && context.tool_poisoning_score >= 60
55
42
  };
56
43
 
57
- // Block poisoned MCP server connections from agents
58
- @id("a2a-server-poisoning-agent")
44
+ @id("agent-identity.a2a-block-server-poisoning")
59
45
  @name("Block poisoned server connections from agents")
60
- @description("Block MCP server connections when poisoning is detected from agent requests (score >= 55). Server-level poisoning has wider blast radius than individual tool poisoning — a single compromised server connection exposes all tools on that server. Lower threshold (55 vs 60) compensates for the amplified impact.")
46
+ @description("Blocks connect_server when agent_id is non-empty, tool_poisoning_detected is true, and tool_poisoning_score >= 55.")
61
47
  @severity("critical")
62
- @tags("profile,a2a-security,supply-chain,server-poisoning,mcp,owasp-mcp01")
63
- @reject_message("MCP server connection blocked: tool poisoning patterns detected in server from agent request (score >= 55). Server-level poisoning affects all tools and warrants a lower threshold than individual tool poisoning.")
48
+ @tags("category:agent-identity,threat:tool-poisoning,scope:per-agent,detection:ml,surface:connect-server,owasp:asi04")
49
+ @reject_message("MCP server connection blocked: tool poisoning detected from agent request server-level poisoning affects all tools.")
64
50
  forbid (
65
51
  principal is Guardrails::Agent,
66
52
  action == Guardrails::Action::"connect_server",
@@ -72,17 +58,16 @@ when {
72
58
  context has tool_poisoning_score && context.tool_poisoning_score >= 55
73
59
  };
74
60
 
75
- // -----------------------------------------------------------------------------
76
- // Rug Pull Behavioral Drift After Trust Establishment
77
- // -----------------------------------------------------------------------------
61
+ // ---------------------------------------------------------------------------
62
+ // Section 2: Rug pull behavioural drift
63
+ // ---------------------------------------------------------------------------
78
64
 
79
- // Block after rug pull behavioral drift detection
80
- @id("a2a-rug-pull-agent")
81
- @name("Block rug pull behavioral drift from agents")
82
- @description("Block tool execution when behavioral drift is detected in agent tool usage (score >= 70). The rug pull detector compares current tool behavior against established baselines — a sudden risk spike or tool alternation pattern change after 3+ normal calls indicates the agent or tool has been compromised mid-session.")
65
+ @id("agent-identity.a2a-block-rug-pull")
66
+ @name("Block rug pull behavioural drift from agents")
67
+ @description("Blocks call_tool when agent_id is non-empty, rug_pull_detected is true, and rug_pull_score >= 70.")
83
68
  @severity("critical")
84
- @tags("profile,a2a-security,supply-chain,rug-pull,behavioral-drift,mitre-t0049")
85
- @reject_message("Tool execution blocked: behavioral drift detected in agent tool usage (rug pull score >= 70). The tool's behavior has deviated significantly from established patterns, indicating potential supply chain compromise.")
69
+ @tags("category:agent-identity,threat:rug-pull,scope:per-agent,detection:ml,surface:call-tool,owasp:asi04")
70
+ @reject_message("Tool execution blocked: behavioural drift detected tool behaviour has deviated from established patterns.")
86
71
  forbid (
87
72
  principal is Guardrails::Agent,
88
73
  action == Guardrails::Action::"call_tool",
@@ -94,17 +79,16 @@ when {
94
79
  context has rug_pull_score && context.rug_pull_score >= 70
95
80
  };
96
81
 
97
- // -----------------------------------------------------------------------------
98
- // Credential Theft Chains Multi-Step Exfiltration
99
- // -----------------------------------------------------------------------------
82
+ // ---------------------------------------------------------------------------
83
+ // Section 3: Credential theft chains
84
+ // ---------------------------------------------------------------------------
100
85
 
101
- // Block agent-initiated credential theft chains
102
- @id("a2a-credential-theft-agent")
103
- @name("Block agent-initiated credential theft chains")
104
- @description("Block tool execution when credential theft patterns are detected from non-first-party agents. The pattern detector identifies multi-step sequences (read credentials → encode → exfiltrate) that indicate coordinated credential harvesting. In A2A, external agents have legitimate multi-tool workflows, making pattern-based detection essential to distinguish theft from normal usage.")
86
+ @id("agent-identity.a2a-block-credential-theft")
87
+ @name("Block credential theft chains from agents")
88
+ @description("Blocks call_tool when agent_trust_level is not \"first_party\", suspicious_pattern is true, and pattern_type equals \"credential_theft\".")
105
89
  @severity("critical")
106
- @tags("profile,a2a-security,supply-chain,credential-theft,exfiltration,mitre-t1552")
107
- @reject_message("Tool execution blocked: credential theft chain detected from a non-first-party agent. The agent is performing a multi-step operation to harvest and exfiltrate credentials. Only first-party agents may access credential-adjacent resources.")
90
+ @tags("category:agent-identity,threat:exfiltration,scope:per-agent,detection:rule,surface:call-tool")
91
+ @reject_message("Tool execution blocked: credential theft chain detected from a non-first-party agent multi-step credential harvesting.")
108
92
  forbid (
109
93
  principal is Guardrails::Agent,
110
94
  action == Guardrails::Action::"call_tool",
@@ -1,36 +1,28 @@
1
1
  // =============================================================================
2
2
  // Advanced Detection — PII
3
3
  // =============================================================================
4
- // Advanced PII detection policies using ML classifier confidence scoring,
5
- // bulk exposure thresholds, and file operation blocking. Goes beyond the
6
- // boolean pii_detected flag with layered detection:
4
+ // Layered PII detection: bulk exposure threshold, ML classifier confidence,
5
+ // and file-operation blocking. Goes beyond the boolean pii_detected flag.
7
6
  //
8
- // 1. Bulk PII exposure — 3+ PII matches indicates data dumps or CSV pastes
9
- // 2. ML classifier confidence — catches novel PII patterns that regex misses
10
- // 3. File operation PII — prevents PII persistence to disk
11
- //
12
- // These policies benefit any Guardrails deployment — not just coding agents.
13
- //
14
- // Adapted from Overwatch PII detection policies for Guardrails namespace.
7
+ // Context keys consumed:
8
+ // - pii_count: Long
9
+ // - pii_score: Long (0-100)
10
+ // - pii_detected: Bool
15
11
  //
16
12
  // Compliance:
17
- // PCI DSS 3.4 (Payment Card Data)
18
- // GDPR Art. 32 (Security of Processing)
19
- // HIPAA §164.312 (Technical Safeguards)
20
- // CCPA §1798.150 (Data Protection)
21
- // NIST 800-53 SI-4 (Information System Monitoring)
13
+ // - PCI DSS 3.4, GDPR Art. 32, HIPAA §164.312, CCPA §1798.150
14
+ // - NIST 800-53 SI-4
22
15
  //
23
- // Category: privacy
16
+ // Category: privacy
24
17
  // Namespace: Guardrails
25
18
  // =============================================================================
26
19
 
27
- // Block bulk PII exposure (3+ PII matches)
28
- @id("detection-block-bulk-pii")
20
+ @id("privacy.advanced-block-pii-bulk")
29
21
  @name("Block bulk PII exposure")
30
- @description("Block content containing 3 or more PII matches. Multiple PII items in a single request indicates a data dump, CSV paste, or data exfiltration attempt. Single PII occurrences may be incidental — bulk exposure is always intentional or negligent.")
22
+ @description("Blocks process_prompt and call_tool when pii_count >= 3.")
31
23
  @severity("critical")
32
- @tags("profile,advanced-detection,pii,bulk,data-exfiltration,gdpr-art-32,ccpa")
33
- @reject_message("Content blocked: multiple PII items detected (3+). Bulk personal data must never be processed through AI agents. Use data masking or tokenization for batch operations.")
24
+ @tags("category:privacy,threat:pii,threat:exfiltration,detection:aggregate,compliance:gdpr,compliance:pci-dss")
25
+ @reject_message("Content blocked: 3+ PII items detected bulk exposure indicates data dump or exfiltration.")
34
26
  forbid (
35
27
  principal,
36
28
  action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
@@ -40,29 +32,27 @@ when {
40
32
  context has pii_count && context.pii_count >= 3
41
33
  };
42
34
 
43
- // Block content with high ML PII classifier confidence
44
- @id("detection-block-pii-high-confidence")
45
- @name("Block high-confidence PII")
46
- @description("Block content when the ML PII classifier confidence exceeds threshold (80/100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss — defense-in-depth behind the pii_detected boolean.")
35
+ @id("privacy.advanced-block-pii-ml")
36
+ @name("Block high-confidence PII (ML)")
37
+ @description("Blocks process_prompt and call_tool when pii_score >= 80.")
47
38
  @severity("critical")
48
- @tags("profile,advanced-detection,pii,ml-classifier,privacy,compliance")
49
- @reject_message("Content blocked: the ML classifier detected personally identifiable information with high confidence. Even if specific PII types aren't identified, the content appears to contain personal data.")
39
+ @tags("category:privacy,threat:pii,detection:ml,compliance:gdpr,compliance:hipaa")
40
+ @reject_message("Content blocked: the ML classifier detected personally identifiable information with high confidence.")
50
41
  forbid (
51
42
  principal,
52
43
  action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
53
44
  resource
54
45
  )
55
46
  when {
56
- context has pii_confidence && context.pii_confidence >= 80
47
+ context has pii_score && context.pii_score >= 80
57
48
  };
58
49
 
59
- // Block file operations containing PII
60
- @id("detection-block-pii-file-ops")
61
- @name("Block file operations with PII")
62
- @description("Block file reads and writes when PII is detected. Prevents agents from reading files containing personal data and from writing PII to new files where it could persist or be version-controlled.")
50
+ @id("privacy.advanced-block-pii-file-ops")
51
+ @name("Block PII in file operations")
52
+ @description("Blocks read_file and write_file when pii_detected is true.")
63
53
  @severity("high")
64
- @tags("profile,advanced-detection,pii,file-ops,data-protection,gdpr-art-32")
65
- @reject_message("File operation blocked: personally identifiable information was detected. Files containing PII must not be read or written through AI agents.")
54
+ @tags("category:privacy,threat:pii,detection:rule,compliance:gdpr")
55
+ @reject_message("File operation blocked: personally identifiable information detected in file content.")
66
56
  forbid (
67
57
  principal,
68
58
  action in [Guardrails::Action::"read_file", Guardrails::Action::"write_file"],