npm - @highflame/policy - Versions diffs - 2.1.5 → 2.1.6 - Mend

@highflame/policy 2.1.5 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar CHANGED Viewed

@@ -15,7 +15,7 @@
 @tags("profile,data-pipeline,secrets,security")
 forbid (
     principal,
-    action,
+    action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
     resource
 ) when {
     context has contains_secrets && context.contains_secrets == true
@@ -42,7 +42,7 @@ forbid (
 @tags("profile,data-pipeline,injection,security")
 forbid (
     principal,
-    action,
+    action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
     resource
 ) when {
     context has injection_confidence && context.injection_confidence > 65

package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar ADDED Viewed

@@ -0,0 +1,157 @@
+// =============================================================================
+// Multi-Agent Orchestration — Cross-Turn Agent Safety
+// =============================================================================
+// Session-aware policies that use cross-turn detection history combined with
+// agent identity for defense-in-depth. These policies handle the scenario
+// where one agent in a multi-agent session detects a threat, and subsequent
+// agents in the same session must be restricted accordingly.
+//
+// Key insight: In multi-agent orchestration, Agent A may detect PII in turn 3,
+// and Agent B (a different agent) arrives in turn 5 wanting to call http_post.
+// Cross-turn session flags + agent trust level enable this policy:
+//   "If PII was seen AND this agent is unverified → block network tools."
+//
+// Context keys used:
+// - agent_trust_level: String - Trust tier of the current agent
+// - agent_type: String - Agent classification
+// - session_pii_detected: Bool - PII seen in any prior turn
+// - session_pii_types: Set<String> - PII types accumulated
+// - session_secrets_detected: Bool - Secrets seen in any prior turn
+// - session_injection_detected: Bool - Injection seen in any prior turn
+// - session_command_injection: Bool - Command injection in any prior turn
+// - session_threat_turns: Long - Count of turns with threats
+// - session_cumulative_risk_score: Long - Total accumulated risk
+// - tool_name: String - Tool being called
+// - tool_is_sensitive: Bool - Whether tool is sensitive
+// - suspicious_pattern: Bool - Whether exfiltration pattern detected
+//
+// Category: agent_identity
+// Namespace: Guardrails
+// =============================================================================
+// -----------------------------------------------------------------------------
+// PII Containment — Prevent Agent Data Leakage
+// -----------------------------------------------------------------------------
+@id("multi-agent-pii-block-network-tools")
+@name("Block network tools after PII detection for non-first-party agents")
+@description("If PII was detected in any prior turn, block non-first-party agents from calling network-facing tools. Prevents data exfiltration by untrusted agents in sessions containing sensitive data")
+@severity("critical")
+@tags("profile,multi-agent,pii,exfiltration,cross-turn,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context.agent_trust_level != "first_party" &&
+    context has session_pii_detected && context.session_pii_detected == true &&
+    context has tool_name &&
+    (context.tool_name == "http_post" ||
+     context.tool_name == "send_email" ||
+     context.tool_name == "http_request" ||
+     context.tool_name == "webhook")
+};
+@id("multi-agent-pii-block-unverified-file-write")
+@name("Block unverified agents from writing files after PII detection")
+@description("If PII was detected in the session, unverified agents cannot write files. Prevents PII persistence by untrusted agents")
+@severity("high")
+@tags("profile,multi-agent,pii,file-write,cross-turn,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"write_file",
+    resource
+) when {
+    context.agent_trust_level == "unverified" &&
+    context has session_pii_detected && context.session_pii_detected == true
+};
+// -----------------------------------------------------------------------------
+// Secrets Containment — Lock Down After Credential Exposure
+// -----------------------------------------------------------------------------
+@id("multi-agent-secrets-lockdown")
+@name("Lock down sensitive tools after secrets detection")
+@description("If secrets (API keys, tokens) were detected in any prior turn, block all non-first-party agents from sensitive tool calls. Prevents credential exfiltration in compromised sessions")
+@severity("critical")
+@tags("profile,multi-agent,secrets,lockdown,cross-turn,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context.agent_trust_level != "first_party" &&
+    context has session_secrets_detected && context.session_secrets_detected == true &&
+    context has tool_is_sensitive && context.tool_is_sensitive == true
+};
+// -----------------------------------------------------------------------------
+// Injection Escalation — Tighten After Prior Attacks
+// -----------------------------------------------------------------------------
+@id("multi-agent-post-injection-lockdown")
+@name("Restrict unverified agents after injection detection")
+@description("If injection was detected in any prior turn, block unverified agents from all tool calls. An injection in a prior turn may have poisoned the context, making subsequent unverified agent actions high risk")
+@severity("critical")
+@tags("profile,multi-agent,injection,lockdown,cross-turn,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context.agent_trust_level == "unverified" &&
+    context has session_injection_detected && context.session_injection_detected == true
+};
+@id("multi-agent-post-command-injection-block-shell")
+@name("Block all agent shell access after command injection")
+@description("If command injection was detected in any prior turn, no agent (regardless of trust) can execute shell commands. Defense against persistent shell compromise")
+@severity("critical")
+@tags("profile,multi-agent,command-injection,shell,cross-turn,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context has session_command_injection && context.session_command_injection == true &&
+    context has tool_name &&
+    (context.tool_name == "shell" ||
+     context.tool_name == "execute_command" ||
+     context.tool_name == "bash")
+};
+// -----------------------------------------------------------------------------
+// Cumulative Risk — Session-Level Circuit Breaker
+// -----------------------------------------------------------------------------
+@id("multi-agent-high-cumulative-risk-restrict")
+@name("Restrict non-first-party agents in high-risk sessions")
+@description("When cumulative session risk exceeds 200, restrict non-first-party agents to safe tools only. Acts as a circuit breaker for sessions that have accumulated multiple risk signals across turns")
+@severity("high")
+@tags("profile,multi-agent,cumulative-risk,circuit-breaker,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context.agent_trust_level != "first_party" &&
+    context has session_cumulative_risk_score && context.session_cumulative_risk_score > 200 &&
+    context has tool_is_sensitive && context.tool_is_sensitive == true
+};
+@id("multi-agent-extreme-risk-full-lockdown")
+@name("Full lockdown for unverified agents in extreme-risk sessions")
+@description("When cumulative session risk exceeds 500 or more than 5 threat turns are detected, block ALL tool calls from unverified agents. Emergency circuit breaker for compromised sessions")
+@severity("critical")
+@tags("profile,multi-agent,extreme-risk,lockdown,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context.agent_trust_level == "unverified" &&
+    (
+        (context has session_cumulative_risk_score && context.session_cumulative_risk_score > 500) ||
+        (context has session_threat_turns && context.session_threat_turns > 5)
+    )
+};

package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar ADDED Viewed

@@ -0,0 +1,140 @@
+// =============================================================================
+// Multi-Agent Orchestration — Agent Trust Policies
+// =============================================================================
+// Production-grade trust policies for multi-agent systems where an orchestrator
+// coordinates sub-agents with varying trust levels. These policies enforce
+// least-privilege access: each agent gets only the permissions its trust level
+// and type warrant.
+//
+// Architecture supported:
+//   Orchestrator (first_party)
+//     ├── Research Agent (verified_third_party, autonomous)
+//     ├── Code Agent (first_party, tool_agent)
+//     └── External Plugin (unverified, tool_agent)
+//
+// Context keys used:
+// - agent_id: String - Unique agent identifier
+// - agent_type: String - orchestrator | autonomous | tool_agent | human_proxy
+// - agent_trust_level: String - first_party | verified_third_party | unverified
+// - agent_framework: String - Agent framework/SDK
+// - agent_publisher: String - Publishing organization
+// - tool_name: String - Tool being called
+// - tool_category: String - safe | sensitive | dangerous
+// - tool_risk_score: Long (0-100) - Computed risk score
+// - tool_is_sensitive: Bool - Whether tool is sensitive
+// - mcp_server_verified: Bool - Whether MCP server is verified
+// - injection_confidence: Long (0-100) - Injection detection score
+// - jailbreak_confidence: Long (0-100) - Jailbreak detection score
+//
+// Category: agent_identity
+// Namespace: Guardrails
+// =============================================================================
+// -----------------------------------------------------------------------------
+// Tiered Tool Access — The Core A2A Trust Model
+// -----------------------------------------------------------------------------
+@id("multi-agent-only-first-party-dangerous")
+@name("Only first-party agents can use dangerous tools")
+@description("Dangerous tools (shell, delete_file, run_sql) are restricted to first-party agents only. Third-party and unverified agents are blocked regardless of other signals")
+@severity("critical")
+@tags("profile,multi-agent,trust,tools,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context.agent_trust_level != "first_party" &&
+    context has tool_category && context.tool_category == "dangerous"
+};
+@id("multi-agent-block-unverified-sensitive")
+@name("Block unverified agents from all sensitive tools")
+@description("Unverified agents can only use tools classified as safe. Sensitive tools (write_file, http_post, send_email) require at least verified_third_party trust")
+@severity("high")
+@tags("profile,multi-agent,trust,tools,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context.agent_trust_level == "unverified" &&
+    context has tool_is_sensitive && context.tool_is_sensitive == true
+};
+@id("multi-agent-block-unverified-mcp")
+@name("Block unverified agents from unverified MCP servers")
+@description("Unverified agents cannot call tools from unverified MCP servers. Double-unverified (agent + server) presents unacceptable supply chain risk")
+@severity("critical")
+@tags("profile,multi-agent,trust,mcp,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context.agent_trust_level == "unverified" &&
+    context has mcp_server_verified && context.mcp_server_verified == false
+};
+// -----------------------------------------------------------------------------
+// Autonomous Agent Safeguards
+// -----------------------------------------------------------------------------
+@id("multi-agent-autonomous-tool-risk-cap")
+@name("Cap tool risk for autonomous agents")
+@description("Autonomous agents (no human in the loop) have a lower tool risk ceiling. Tools with risk > 70 require human oversight that autonomous agents lack")
+@severity("high")
+@tags("profile,multi-agent,autonomous,tools,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context.agent_type == "autonomous" &&
+    context has tool_risk_score && context.tool_risk_score > 70
+};
+@id("multi-agent-autonomous-injection-defense")
+@name("Enhanced injection defense for autonomous agents")
+@description("Autonomous agents are high-value targets for injection. Lower the threshold to 50 (vs 80 standard) since there is no human to catch false negatives")
+@severity("high")
+@tags("profile,multi-agent,autonomous,injection,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"process_prompt",
+    resource
+) when {
+    context.agent_type == "autonomous" &&
+    context has injection_confidence && context.injection_confidence > 50
+};
+@id("multi-agent-autonomous-jailbreak-defense")
+@name("Enhanced jailbreak defense for autonomous agents")
+@description("Lower jailbreak threshold for autonomous agents. Without human review, we must be more conservative")
+@severity("high")
+@tags("profile,multi-agent,autonomous,jailbreak,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"process_prompt",
+    resource
+) when {
+    context.agent_type == "autonomous" &&
+    context has jailbreak_confidence && context.jailbreak_confidence > 50
+};
+// -----------------------------------------------------------------------------
+// MCP Server Connection Trust
+// -----------------------------------------------------------------------------
+@id("multi-agent-block-unverified-server-connect")
+@name("Block unverified agents from connecting to MCP servers")
+@description("Unverified agents cannot establish new MCP server connections. Limits blast radius of compromised or rogue agents")
+@severity("high")
+@tags("profile,multi-agent,trust,mcp,connect,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"connect_server",
+    resource
+) when {
+    context.agent_trust_level == "unverified"
+};

package/_schemas/guardrails/templates/templates.json CHANGED Viewed

@@ -23,6 +23,11 @@
       "name": "Agentic Security",
       "description": "Detect tool abuse, data exfiltration patterns, infinite loops, and budget violations"
     },
+    {
+      "id": "agent_identity",
+      "name": "Agent-to-Agent Security",
+      "description": "Trust-based access control for AI agents — tiered permissions by trust level, agent type restrictions, cross-turn session lockdowns for multi-agent orchestration"
+    },
     {
       "id": "organization",
       "name": "Organization",
@@ -109,6 +114,16 @@
       "severity": "critical",
       "tags": ["command-injection", "path-traversal", "sql-injection", "security"],
       "is_active": true
+    },
+    {
+      "id": "agent-identity-default",
+      "name": "Agent Identity & Trust",
+      "description": "Trust-based access control for AI agents: block unverified agents from dangerous/sensitive tools, apply stricter thresholds for autonomous agents, restrict unverified agents after session threats",
+      "category": "agent_identity",
+      "file": "defaults/agent_identity.cedar",
+      "severity": "critical",
+      "tags": ["agent-identity", "trust", "a2a", "autonomous", "cross-turn"],
+      "is_active": true
     }
   ],
   "templates": [
@@ -192,6 +207,24 @@
       "file": "profiles/data_pipeline/agentic_security.cedar",
       "severity": "critical",
       "tags": ["profile", "data-pipeline", "exfiltration", "tools"]
+    },
+    {
+      "id": "multi-agent-trust",
+      "name": "Multi-Agent Orchestration — Agent Trust",
+      "description": "Tiered trust policies for multi-agent systems: only first-party agents can use dangerous tools, unverified agents restricted to safe tools, autonomous agents have lower risk ceilings, MCP server connection trust enforcement",
+      "category": "agent_identity",
+      "file": "profiles/multi_agent/agent_trust.cedar",
+      "severity": "critical",
+      "tags": ["profile", "multi-agent", "trust", "a2a", "autonomous", "mcp"]
+    },
+    {
+      "id": "multi-agent-safety",
+      "name": "Multi-Agent Orchestration — Cross-Turn Safety",
+      "description": "Session-aware agent safety policies: PII containment across agents, secrets lockdown, injection escalation response, cumulative risk circuit breakers for multi-agent sessions",
+      "category": "agent_identity",
+      "file": "profiles/multi_agent/agent_safety.cedar",
+      "severity": "critical",
+      "tags": ["profile", "multi-agent", "cross-turn", "a2a", "pii", "secrets", "injection", "circuit-breaker"]
     }
   ],
   "profiles": [
@@ -218,6 +251,14 @@
       "severity": "critical",
       "tags": ["data-pipeline", "pii", "secrets", "exfiltration"],
       "template_ids": ["data-pipeline-privacy", "data-pipeline-security", "data-pipeline-agentic-security"]
+    },
+    {
+      "id": "multi-agent",
+      "name": "Multi-Agent Orchestration",
+      "description": "Production-grade A2A guardrails for multi-agent systems — tiered trust access control, autonomous agent safeguards, cross-turn PII/secrets containment, injection escalation response, cumulative risk circuit breakers",
+      "severity": "critical",
+      "tags": ["multi-agent", "a2a", "trust", "cross-turn", "circuit-breaker"],
+      "template_ids": ["multi-agent-trust", "multi-agent-safety"]
     }
   ]
 }

package/dist/guardrails-context.gen.d.ts CHANGED Viewed

@@ -5,6 +5,11 @@
  * Guardrails Cedar schema and are used at policy evaluation time.
  */
 export declare const GuardrailsContextKey: {
+    readonly AgentFramework: "agent_framework";
+    readonly AgentId: "agent_id";
+    readonly AgentPublisher: "agent_publisher";
+    readonly AgentTrustLevel: "agent_trust_level";
+    readonly AgentType: "agent_type";
     readonly BudgetExceeded: "budget_exceeded";
     readonly BudgetRemainingPct: "budget_remaining_pct";
     readonly CodeLanguages: "code_languages";

package/dist/guardrails-context.gen.js CHANGED Viewed

@@ -7,6 +7,11 @@
  * Guardrails Cedar schema and are used at policy evaluation time.
  */
 export const GuardrailsContextKey = {
+    AgentFramework: 'agent_framework',
+    AgentId: 'agent_id',
+    AgentPublisher: 'agent_publisher',
+    AgentTrustLevel: 'agent_trust_level',
+    AgentType: 'agent_type',
     BudgetExceeded: 'budget_exceeded',
     BudgetRemainingPct: 'budget_remaining_pct',
     CodeLanguages: 'code_languages',

package/dist/guardrails-defaults.gen.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * Guardrails policy category identifiers.
  * Maps to UI tab names in Studio.
  */
-export type GuardrailsCategory = 'security' | 'privacy' | 'trust_safety' | 'agentic_security' | 'organization';
+export type GuardrailsCategory = 'security' | 'privacy' | 'trust_safety' | 'agentic_security' | 'agent_identity' | 'organization';
 /**
  * Category metadata for UI display.
  */