npm - @highflame/policy - Versions diffs - 2.1.36 → 2.1.37 - Mend

@highflame/policy 2.1.36 → 2.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar CHANGED Viewed

@@ -2,46 +2,32 @@
 // A2A Security — Agent Identity Enforcement
 // =============================================================================
 // Enforces strict identity requirements for cross-system agent communication:
+// blocks anonymous agents, requires framework registration for sensitive
+// operations, restricts server connections, and unconditionally blocks the
+// most dangerous combination — autonomous + unverified.
 //
-// 1. ANONYMOUS AGENT BLOCKING: Agents that claim agent_type but provide no
-//    agent_id are likely spoofed or misconfigured — blocked from tool calls.
-//
-// 2. FRAMEWORK REGISTRATION: Unverified agents must declare their framework
-//    (claude-code, langchain, crewai, etc.) for sensitive operations. Missing
-//    framework on unverified agents indicates an ad-hoc or rogue integration.
-//
-// 3. SERVER CONNECTION RESTRICTIONS: Unverified agents cannot establish new
-//    MCP server connections — limits blast radius of unknown agents.
-//
-// 4. AUTONOMOUS + UNVERIFIED = BLOCKED: The most dangerous combination is
-//    an autonomous agent with no verification. No human oversight AND no
-//    trust attestation means zero recovery if the agent is compromised.
-//
-// Key A2A distinction: In MAS, the orchestrator validates all sub-agents.
-// In A2A, each agent self-reports identity, so we must enforce identity
-// completeness and consistency at the policy layer.
+// Context keys consumed:
+//   - agent_id, agent_type, agent_trust_level, agent_framework
+//   - tool_is_sensitive: Bool
 //
 // Compliance:
-//   OWASP ASI05 (Identity Spoofing)
-//   NIST 800-63 (Digital Identity Guidelines)
-//   NIST 800-53 IA-2 (Identification and Authentication)
-//   NIST 800-53 IA-8 (Identification and Authentication — Non-Organizational Users)
+//   - OWASP ASI05 (Identity Spoofing); NIST 800-63
+//   - NIST 800-53 IA-2, IA-8
 //
-// Category: agent_identity
+// Category:  agent-identity
 // Namespace: Guardrails
 // =============================================================================
-// -----------------------------------------------------------------------------
-// Anonymous Agent Detection — Incomplete Identity
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 1: Anonymous agents
+// ---------------------------------------------------------------------------
-// Block agents with type but no ID from tool execution
-@id("a2a-block-anonymous-agent-tools")
-@name("Block anonymous agents from tool execution")
-@description("Block tool calls from agents that declare an agent_type but have no agent_id. This pattern (type present, ID absent) indicates a spoofed or misconfigured agent identity — legitimate agents always have both. Human proxies are exempt because they represent authenticated users, not independent agents.")
+@id("agent-identity.a2a-block-anonymous-agents")
+@name("Block anonymous agents from tools")
+@description("Blocks call_tool when agent_id is empty, agent_type is non-empty, and agent_type is not \"human_proxy\".")
 @severity("critical")
-@tags("profile,a2a-security,identity,anonymous,spoofing,owasp-asi05,nist-ia-2")
-@reject_message("Tool execution blocked: agent identity is required for A2A tool calls. This request has an agent type but no agent ID, indicating an improperly configured or spoofed agent identity.")
+@tags("category:agent-identity,threat:spoofing,scope:per-agent,detection:rule,surface:call-tool,owasp:asi04")
+@reject_message("Tool execution blocked: agent declares a type but no agent ID — improperly configured or spoofed identity.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
@@ -53,17 +39,16 @@ when {
     context.agent_type != "human_proxy"
 };
-// -----------------------------------------------------------------------------
-// Framework Registration — Unverified Agent Restrictions
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 2: Framework registration
+// ---------------------------------------------------------------------------
-// Block unregistered framework unverified agents from sensitive tools
-@id("a2a-block-unregistered-framework")
+@id("agent-identity.a2a-block-unregistered-framework")
 @name("Block unregistered frameworks from sensitive tools")
-@description("Block unverified agents with no declared framework from calling sensitive tools. In A2A, agent_framework identifies the SDK/runtime (claude-code, langchain, crewai, autogen). An unverified agent with no framework declaration is a black-box integration — it cannot be audited, patched, or trusted with sensitive operations.")
+@description("Blocks call_tool when agent_framework is empty, agent_trust_level is \"unverified\", and tool_is_sensitive is true.")
 @severity("high")
-@tags("profile,a2a-security,identity,framework,unverified,sensitive-tools,nist-ia-8")
-@reject_message("Sensitive tool blocked: unverified agent with no registered framework attempted to call a sensitive tool. Agents must declare their framework (e.g., claude-code, langchain, crewai) for A2A sensitive operations.")
+@tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
+@reject_message("Sensitive tool execution blocked: unverified agent with no registered framework — black-box integration cannot be trusted.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
@@ -75,17 +60,16 @@ when {
     context has tool_is_sensitive && context.tool_is_sensitive == true
 };
-// -----------------------------------------------------------------------------
-// Server Connection Restrictions
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 3: MCP server connections
+// ---------------------------------------------------------------------------
-// Block unverified agents from establishing MCP server connections
-@id("a2a-block-unverified-server-connect")
-@name("Block unverified agents from MCP server connections")
-@description("Unverified agents cannot establish new MCP server connections in A2A mode. Each server connection expands the agent's capability surface — unverified agents should use only pre-established connections from the orchestrator or host application.")
+@id("agent-identity.a2a-block-unverified-server-connect")
+@name("Block unverified agents from server connections")
+@description("Blocks connect_server when agent_trust_level is \"unverified\" and agent_id is non-empty.")
 @severity("high")
-@tags("profile,a2a-security,identity,unverified,mcp,server,nist-ia-8")
-@reject_message("MCP server connection blocked: unverified agents cannot establish new MCP server connections in A2A mode. Register the agent as verified_third_party or first_party to enable server connections.")
+@tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:rule,surface:connect-server")
+@reject_message("MCP server connection blocked: unverified agents cannot establish new server connections in A2A mode.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"connect_server",
@@ -96,17 +80,16 @@ when {
     context has agent_id && context.agent_id != ""
 };
-// -----------------------------------------------------------------------------
-// Dangerous Combinations — Maximum Risk
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 4: Maximum-risk combination
+// ---------------------------------------------------------------------------
-// Block unverified autonomous agents from all tool calls
-@id("a2a-block-autonomous-unverified")
-@name("Block unverified autonomous agents from all tool calls")
-@description("The combination of autonomous (no human oversight) and unverified (no trust attestation) is the most dangerous agent configuration. If compromised, there is no human to catch anomalies and no verification to limit blast radius. These agents are unconditionally blocked from all tool execution in A2A workflows.")
+@id("agent-identity.a2a-block-autonomous-unverified")
+@name("Block autonomous unverified agents")
+@description("Blocks call_tool when agent_type is \"autonomous\" and agent_trust_level is \"unverified\".")
 @severity("critical")
-@tags("profile,a2a-security,identity,autonomous,unverified,owasp-asi05,nist-ia-2")
-@reject_message("Tool execution blocked: unverified autonomous agents are not permitted in A2A workflows. Autonomous agents operating without human oversight must be at least verified_third_party trust level.")
+@tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool,owasp:asi04")
+@reject_message("Tool execution blocked: autonomous + unverified is the maximum-risk agent configuration and is not permitted.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",

package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar CHANGED Viewed

@@ -1,48 +1,38 @@
 // =============================================================================
-// A2A Security — Inter-Agent Injection Defense
+// A2A Security — Inter-Agent Injection Defence
 // =============================================================================
-// Detects and blocks prompt injection that travels between independent agents:
+// Detects and blocks prompt injection that travels between independent
+// agents: indirect injection via tool outputs, multi-turn progressive
+// attacks tracked by a GRU deep-context detector, and encoded-payload
+// delivery (base64, hex, hash content).
 //
-// 1. INDIRECT INJECTION: Malicious content injected via tool outputs, RAG
-//    retrieval, or API responses from one agent that manipulates another.
-//    In A2A, this is the primary attack vector because agents consume each
-//    other's outputs as trusted input.
-//
-// 2. MULTI-TURN PROGRESSIVE ATTACKS: Gradual context manipulation across
-//    turns where each turn is benign individually but collectively builds
-//    toward goal hijacking. The GRU-based deep context detector tracks
-//    conversation state across turns.
-//
-// 3. ENCODED PAYLOAD DELIVERY: Base64, hex, or hash-encoded instructions
-//    designed to bypass single-turn classifiers. In A2A communication,
-//    encoded content is a strong indicator of injection evasion.
-//
-// Key A2A distinction: In MAS, the orchestrator controls all communication.
-// In A2A, each agent independently receives content from external agents,
-// making indirect injection the dominant threat vector.
+// Context keys consumed:
+//   - agent_id, agent_trust_level
+//   - indirect_injection_score: Long (0-100)
+//   - tool_is_sensitive:        Bool
+//   - multi_turn_detection:     Bool
+//   - injection_deep_context_score, jailbreak_deep_context_score: Long
+//   - encoded_content_detected: Bool
+//   - encoded_score:            Long
 //
 // Compliance:
-//   OWASP LLM01 (Prompt Injection) — indirect variant
-//   OWASP ASI01 (Agent Goal Hijack)
-//   MITRE ATLAS AML.T0051 (LLM Prompt Injection)
-//   MITRE ATLAS AML.T0051.002 (Indirect Prompt Injection)
-//   NIST 800-53 SI-10 (Information Input Validation)
+//   - OWASP LLM01 (indirect), OWASP ASI01
+//   - MITRE ATLAS AML.T0051; NIST 800-53 SI-10
 //
-// Category: agent_identity
+// Category:  agent-identity
 // Namespace: Guardrails
 // =============================================================================
-// -----------------------------------------------------------------------------
-// Indirect Injection — Tool Output Poisoning
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 1: Indirect injection
+// ---------------------------------------------------------------------------
-// Block indirect injection from agent tool calls
-@id("a2a-indirect-injection-agent")
+@id("agent-identity.a2a-block-indirect-injection")
 @name("Block indirect injection from agent tool calls")
-@description("Block tool execution when indirect injection is detected in content received by an agent (score >= 60). Indirect injection travels through tool outputs, RAG retrieval, and API responses — the primary A2A attack vector because agents consume each other's outputs as trusted input. Lower threshold than code-agent (70) because cross-system content has higher adversarial surface.")
+@description("Blocks call_tool when agent_id is non-empty and indirect_injection_score >= 60.")
 @severity("critical")
-@tags("profile,a2a-security,indirect-injection,tool-output,owasp-lml01,mitre-t0051-002")
-@reject_message("Tool execution blocked: indirect prompt injection detected in content received by this agent (score >= 60). An external source may be attempting to hijack agent behavior through tool outputs or retrieved content.")
+@tags("category:agent-identity,threat:indirect-injection,detection:ml,surface:call-tool,owasp:llm01")
+@reject_message("Tool execution blocked: indirect prompt injection detected in agent-received content — primary A2A attack vector.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
@@ -53,13 +43,12 @@ when {
     context has indirect_injection_score && context.indirect_injection_score >= 60
 };
-// Stricter threshold for sensitive tools
-@id("a2a-indirect-injection-sensitive-agent")
-@name("Block moderate indirect injection on sensitive tools from agents")
-@description("Block sensitive tool execution from agents when indirect injection score exceeds 40. Sensitive tools (write_file, http_post, send_email) amplify the damage of indirect injection — a lower threshold compensates for the higher blast radius of sensitive operations in cross-system communication.")
+@id("agent-identity.a2a-block-indirect-injection-sensitive")
+@name("Block indirect injection on sensitive tools (A2A)")
+@description("Blocks call_tool when agent_id is non-empty, indirect_injection_score >= 40, and tool_is_sensitive is true.")
 @severity("critical")
-@tags("profile,a2a-security,indirect-injection,sensitive-tools,owasp-asi01")
-@reject_message("Sensitive tool blocked: moderate indirect injection risk detected in agent-to-agent content (score >= 40). Sensitive tools require higher confidence that inter-agent content is safe.")
+@tags("category:agent-identity,threat:indirect-injection,detection:ml,surface:call-tool,owasp:asi01")
+@reject_message("Sensitive tool execution blocked: moderate indirect-injection risk in agent content — sensitive tools amplify damage.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
@@ -71,17 +60,16 @@ when {
     context has tool_is_sensitive && context.tool_is_sensitive == true
 };
-// -----------------------------------------------------------------------------
-// Multi-Turn Progressive Attacks — Deep Context GRU Detection
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 2: Multi-turn progressive attacks
+// ---------------------------------------------------------------------------
-// Block multi-turn progressive injection for non-first-party agents
-@id("a2a-deep-context-injection-agent")
-@name("Block multi-turn progressive injection for agents")
-@description("Block non-first-party agents when the GRU-based deep context detector identifies multi-turn progressive injection (score >= 60). Each turn may be benign individually, but the GRU model tracks hidden state across the full conversation to detect gradual goal hijacking.")
+@id("agent-identity.a2a-block-deep-context-injection")
+@name("Block multi-turn progressive injection")
+@description("Blocks process_prompt when agent_trust_level is not \"first_party\", multi_turn_detection is true, and injection_deep_context_score >= 60.")
 @severity("high")
-@tags("profile,a2a-security,multi-turn,deep-context,injection,owasp-lml01")
-@reject_message("Request blocked: multi-turn progressive injection detected across conversation history (deep context score >= 60). A gradual attack may be building context manipulation over multiple agent turns.")
+@tags("category:agent-identity,threat:injection,detection:ml,surface:process-prompt,owasp:llm01")
+@reject_message("Prompt blocked: multi-turn progressive injection detected by the deep-context model across conversation history.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"process_prompt",
@@ -93,13 +81,12 @@ when {
     context has injection_deep_context_score && context.injection_deep_context_score >= 60
 };
-// Block multi-turn progressive jailbreak for non-first-party agents
-@id("a2a-deep-context-jailbreak-agent")
-@name("Block multi-turn progressive jailbreak for agents")
-@description("Block non-first-party agents when the GRU-based deep context detector identifies multi-turn progressive jailbreak (score >= 60). Jailbreak attempts spread across turns are harder to detect with single-turn classifiers — the deep context model maintains conversation state to catch these patterns.")
+@id("agent-identity.a2a-block-deep-context-jailbreak")
+@name("Block multi-turn progressive jailbreak")
+@description("Blocks process_prompt when agent_trust_level is not \"first_party\", multi_turn_detection is true, and jailbreak_deep_context_score >= 60.")
 @severity("high")
-@tags("profile,a2a-security,multi-turn,deep-context,jailbreak,owasp-asi01")
-@reject_message("Request blocked: multi-turn progressive jailbreak detected across conversation history (deep context score >= 60). A gradual jailbreak attempt is building across multiple agent turns.")
+@tags("category:agent-identity,threat:jailbreak,detection:ml,surface:process-prompt,owasp:llm02")
+@reject_message("Prompt blocked: multi-turn progressive jailbreak detected by the deep-context model across conversation history.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"process_prompt",
@@ -111,17 +98,16 @@ when {
     context has jailbreak_deep_context_score && context.jailbreak_deep_context_score >= 60
 };
-// -----------------------------------------------------------------------------
-// Encoded Payload Delivery — Injection Evasion
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 3: Encoded payload delivery
+// ---------------------------------------------------------------------------
-// Block encoded injection payloads between agents
-@id("a2a-encoded-injection-agent")
+@id("agent-identity.a2a-block-encoded-injection")
 @name("Block encoded payload delivery between agents")
-@description("Block agent requests when encoded payloads are detected (base64, hex, or suspicious hash content with score >= 60). In A2A communication, encoded content is a strong indicator of injection evasion — agents should communicate in plaintext, not encoded payloads.")
+@description("Blocks process_prompt and call_tool when agent_id is non-empty, encoded_content_detected is true, and encoded_score >= 60.")
 @severity("high")
-@tags("profile,a2a-security,encoded-injection,evasion,base64,owasp-lml01")
-@reject_message("Request blocked: encoded payload detected in agent communication (base64, hex, or suspicious hash content). Encoded content in agent-to-agent messages indicates injection evasion or payload delivery.")
+@tags("category:agent-identity,threat:encoded-payload,detection:pattern,owasp:llm01")
+@reject_message("Request blocked: encoded payload detected in agent communication — agents should exchange plaintext, not encoded content.")
 forbid (
     principal is Guardrails::Agent,
     action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],

package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar CHANGED Viewed

@@ -1,48 +1,35 @@
 // =============================================================================
-// A2A Security — Supply Chain & Behavioral Drift
+// A2A Security — Supply Chain & Behavioural Drift
 // =============================================================================
-// Detects and blocks supply chain attacks specific to agent-to-agent ecosystems:
+// Blocks supply-chain attacks specific to agent-to-agent ecosystems: tool
+// poisoning from external agent tool chains, rug pull behavioural drift,
+// and multi-step credential theft.
 //
-// 1. TOOL POISONING: External agents install or recommend MCP tools with
-//    hidden instructions, system prompt injection, authority hijack, or
-//    information suppression patterns in tool descriptions.
-//
-// 2. RUG PULL: An agent or tool that behaves normally during evaluation
-//    but changes behavior after trust is established — "risk_spike" (sudden
-//    risk increase) or "pattern_change" (behavioral deviation).
-//
-// 3. CREDENTIAL THEFT CHAINS: Multi-step attack where an agent reads
-//    credential files, encodes them, and exfiltrates via network tools.
-//    In A2A, this is higher risk because external agents have legitimate
-//    reasons to call multiple tools in sequence.
-//
-// Key A2A distinction: In MAS, the orchestrator vets all tools centrally.
-// In A2A, each agent brings its own tool ecosystem, creating a supply chain
-// attack surface at every agent boundary.
+// Context keys consumed:
+//   - agent_id, agent_trust_level
+//   - tool_poisoning_detected, tool_poisoning_score
+//   - rug_pull_detected, rug_pull_score
+//   - suspicious_pattern, pattern_type
 //
 // Compliance:
-//   OWASP ASI04 (Supply Chain Vulnerabilities)
-//   OWASP MCP01 (Tool Poisoning)
-//   OWASP MCP03 (Tool Shadowing)
-//   MITRE ATT&CK T1552 (Unsecured Credentials)
-//   MITRE ATLAS AML.T0049 (Backdoor ML Model) — analogous to agent drift
-//   NIST 800-53 SI-7 (Software, Firmware, and Information Integrity)
+//   - OWASP ASI04, OWASP MCP01/MCP03
+//   - MITRE ATT&CK T1552; MITRE ATLAS AML.T0049
+//   - NIST 800-53 SI-7
 //
-// Category: agent_identity
+// Category:  agent-identity
 // Namespace: Guardrails
 // =============================================================================
-// -----------------------------------------------------------------------------
-// Tool Poisoning — Hidden Instructions in External Agent Tools
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 1: Tool poisoning
+// ---------------------------------------------------------------------------
-// Block tool poisoning from non-first-party agents
-@id("a2a-tool-poisoning-agent")
+@id("agent-identity.a2a-block-tool-poisoning")
 @name("Block tool poisoning from non-first-party agents")
-@description("Block tool execution when poisoning is detected from non-first-party agents (score >= 60). Tool poisoning includes hidden instructions, system prompt injection, authority hijack, and information suppression patterns in tool descriptions or arguments. Lower threshold than default (70) because external agent tool chains have higher supply chain risk.")
+@description("Blocks call_tool when agent_trust_level is not \"first_party\", tool_poisoning_detected is true, and tool_poisoning_score >= 60.")
 @severity("critical")
-@tags("profile,a2a-security,supply-chain,tool-poisoning,owasp-mcp01,owasp-asi04")
-@reject_message("Tool execution blocked: tool poisoning detected in content from a non-first-party agent (score >= 60). Hidden instructions, authority hijack, or system prompt injection patterns found in tool descriptions or arguments.")
+@tags("category:agent-identity,threat:tool-poisoning,scope:per-agent,detection:ml,surface:call-tool,owasp:asi04")
+@reject_message("Tool execution blocked: tool poisoning detected in content from a non-first-party agent — supply-chain risk.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
@@ -54,13 +41,12 @@ when {
     context has tool_poisoning_score && context.tool_poisoning_score >= 60
 };
-// Block poisoned MCP server connections from agents
-@id("a2a-server-poisoning-agent")
+@id("agent-identity.a2a-block-server-poisoning")
 @name("Block poisoned server connections from agents")
-@description("Block MCP server connections when poisoning is detected from agent requests (score >= 55). Server-level poisoning has wider blast radius than individual tool poisoning — a single compromised server connection exposes all tools on that server. Lower threshold (55 vs 60) compensates for the amplified impact.")
+@description("Blocks connect_server when agent_id is non-empty, tool_poisoning_detected is true, and tool_poisoning_score >= 55.")
 @severity("critical")
-@tags("profile,a2a-security,supply-chain,server-poisoning,mcp,owasp-mcp01")
-@reject_message("MCP server connection blocked: tool poisoning patterns detected in server from agent request (score >= 55). Server-level poisoning affects all tools and warrants a lower threshold than individual tool poisoning.")
+@tags("category:agent-identity,threat:tool-poisoning,scope:per-agent,detection:ml,surface:connect-server,owasp:asi04")
+@reject_message("MCP server connection blocked: tool poisoning detected from agent request — server-level poisoning affects all tools.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"connect_server",
@@ -72,17 +58,16 @@ when {
     context has tool_poisoning_score && context.tool_poisoning_score >= 55
 };
-// -----------------------------------------------------------------------------
-// Rug Pull — Behavioral Drift After Trust Establishment
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 2: Rug pull behavioural drift
+// ---------------------------------------------------------------------------
-// Block after rug pull behavioral drift detection
-@id("a2a-rug-pull-agent")
-@name("Block rug pull behavioral drift from agents")
-@description("Block tool execution when behavioral drift is detected in agent tool usage (score >= 70). The rug pull detector compares current tool behavior against established baselines — a sudden risk spike or tool alternation pattern change after 3+ normal calls indicates the agent or tool has been compromised mid-session.")
+@id("agent-identity.a2a-block-rug-pull")
+@name("Block rug pull behavioural drift from agents")
+@description("Blocks call_tool when agent_id is non-empty, rug_pull_detected is true, and rug_pull_score >= 70.")
 @severity("critical")
-@tags("profile,a2a-security,supply-chain,rug-pull,behavioral-drift,mitre-t0049")
-@reject_message("Tool execution blocked: behavioral drift detected in agent tool usage (rug pull score >= 70). The tool's behavior has deviated significantly from established patterns, indicating potential supply chain compromise.")
+@tags("category:agent-identity,threat:rug-pull,scope:per-agent,detection:ml,surface:call-tool,owasp:asi04")
+@reject_message("Tool execution blocked: behavioural drift detected — tool behaviour has deviated from established patterns.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
@@ -94,17 +79,16 @@ when {
     context has rug_pull_score && context.rug_pull_score >= 70
 };
-// -----------------------------------------------------------------------------
-// Credential Theft Chains — Multi-Step Exfiltration
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 3: Credential theft chains
+// ---------------------------------------------------------------------------
-// Block agent-initiated credential theft chains
-@id("a2a-credential-theft-agent")
-@name("Block agent-initiated credential theft chains")
-@description("Block tool execution when credential theft patterns are detected from non-first-party agents. The pattern detector identifies multi-step sequences (read credentials → encode → exfiltrate) that indicate coordinated credential harvesting. In A2A, external agents have legitimate multi-tool workflows, making pattern-based detection essential to distinguish theft from normal usage.")
+@id("agent-identity.a2a-block-credential-theft")
+@name("Block credential theft chains from agents")
+@description("Blocks call_tool when agent_trust_level is not \"first_party\", suspicious_pattern is true, and pattern_type equals \"credential_theft\".")
 @severity("critical")
-@tags("profile,a2a-security,supply-chain,credential-theft,exfiltration,mitre-t1552")
-@reject_message("Tool execution blocked: credential theft chain detected from a non-first-party agent. The agent is performing a multi-step operation to harvest and exfiltrate credentials. Only first-party agents may access credential-adjacent resources.")
+@tags("category:agent-identity,threat:exfiltration,scope:per-agent,detection:rule,surface:call-tool")
+@reject_message("Tool execution blocked: credential theft chain detected from a non-first-party agent — multi-step credential harvesting.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",

package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar CHANGED Viewed

@@ -1,36 +1,28 @@
 // =============================================================================
 // Advanced Detection — PII
 // =============================================================================
-// Advanced PII detection policies using ML classifier confidence scoring,
-// bulk exposure thresholds, and file operation blocking. Goes beyond the
-// boolean pii_detected flag with layered detection:
+// Layered PII detection: bulk exposure threshold, ML classifier confidence,
+// and file-operation blocking. Goes beyond the boolean pii_detected flag.
 //
-//   1. Bulk PII exposure — 3+ PII matches indicates data dumps or CSV pastes
-//   2. ML classifier confidence — catches novel PII patterns that regex misses
-//   3. File operation PII — prevents PII persistence to disk
-//
-// These policies benefit any Guardrails deployment — not just coding agents.
-//
-// Adapted from Overwatch PII detection policies for Guardrails namespace.
+// Context keys consumed:
+//   - pii_count:      Long
+//   - pii_score: Long (0-100)
+//   - pii_detected:   Bool
 //
 // Compliance:
-//   PCI DSS 3.4 (Payment Card Data)
-//   GDPR Art. 32 (Security of Processing)
-//   HIPAA §164.312 (Technical Safeguards)
-//   CCPA §1798.150 (Data Protection)
-//   NIST 800-53 SI-4 (Information System Monitoring)
+//   - PCI DSS 3.4, GDPR Art. 32, HIPAA §164.312, CCPA §1798.150
+//   - NIST 800-53 SI-4
 //
-// Category: privacy
+// Category:  privacy
 // Namespace: Guardrails
 // =============================================================================
-// Block bulk PII exposure (3+ PII matches)
-@id("detection-block-bulk-pii")
+@id("privacy.advanced-block-pii-bulk")
 @name("Block bulk PII exposure")
-@description("Block content containing 3 or more PII matches. Multiple PII items in a single request indicates a data dump, CSV paste, or data exfiltration attempt. Single PII occurrences may be incidental — bulk exposure is always intentional or negligent.")
+@description("Blocks process_prompt and call_tool when pii_count >= 3.")
 @severity("critical")
-@tags("profile,advanced-detection,pii,bulk,data-exfiltration,gdpr-art-32,ccpa")
-@reject_message("Content blocked: multiple PII items detected (3+). Bulk personal data must never be processed through AI agents. Use data masking or tokenization for batch operations.")
+@tags("category:privacy,threat:pii,threat:exfiltration,detection:aggregate,compliance:gdpr,compliance:pci-dss")
+@reject_message("Content blocked: 3+ PII items detected — bulk exposure indicates data dump or exfiltration.")
 forbid (
     principal,
     action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
@@ -40,29 +32,27 @@ when {
     context has pii_count && context.pii_count >= 3
 };
-// Block content with high ML PII classifier confidence
-@id("detection-block-pii-high-confidence")
-@name("Block high-confidence PII")
-@description("Block content when the ML PII classifier confidence exceeds threshold (80/100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss — defense-in-depth behind the pii_detected boolean.")
+@id("privacy.advanced-block-pii-ml")
+@name("Block high-confidence PII (ML)")
+@description("Blocks process_prompt and call_tool when pii_score >= 80.")
 @severity("critical")
-@tags("profile,advanced-detection,pii,ml-classifier,privacy,compliance")
-@reject_message("Content blocked: the ML classifier detected personally identifiable information with high confidence. Even if specific PII types aren't identified, the content appears to contain personal data.")
+@tags("category:privacy,threat:pii,detection:ml,compliance:gdpr,compliance:hipaa")
+@reject_message("Content blocked: the ML classifier detected personally identifiable information with high confidence.")
 forbid (
     principal,
     action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
     resource
 )
 when {
-    context has pii_confidence && context.pii_confidence >= 80
+    context has pii_score && context.pii_score >= 80
 };
-// Block file operations containing PII
-@id("detection-block-pii-file-ops")
-@name("Block file operations with PII")
-@description("Block file reads and writes when PII is detected. Prevents agents from reading files containing personal data and from writing PII to new files where it could persist or be version-controlled.")
+@id("privacy.advanced-block-pii-file-ops")
+@name("Block PII in file operations")
+@description("Blocks read_file and write_file when pii_detected is true.")
 @severity("high")
-@tags("profile,advanced-detection,pii,file-ops,data-protection,gdpr-art-32")
-@reject_message("File operation blocked: personally identifiable information was detected. Files containing PII must not be read or written through AI agents.")
+@tags("category:privacy,threat:pii,detection:rule,compliance:gdpr")
+@reject_message("File operation blocked: personally identifiable information detected in file content.")
 forbid (
     principal,
     action in [Guardrails::Action::"read_file", Guardrails::Action::"write_file"],