npm - @highflame/policy - Versions diffs - 2.1.7 → 2.1.8 - Mend

@highflame/policy 2.1.7 → 2.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar ADDED Viewed

@@ -0,0 +1,120 @@
+// =============================================================================
+// Code Agent — Supply Chain Security
+// =============================================================================
+// Detects and blocks MCP server poisoning, indirect prompt injection from tool
+// outputs, credential theft chains, and destructive operation sequences.
+//
+// These are agentic AI-specific attack vectors where tool descriptions, server
+// responses, or behavioral drift manipulate agent behavior.
+//
+// Adapted from Overwatch agent security and behavioral analysis policies for
+// the Guardrails namespace.
+//
+// Compliance:
+//   OWASP ASI01 (Agent Goal Hijack)
+//   OWASP ASI02 (Tool Misuse)
+//   OWASP ASI04 (Supply Chain)
+//   OWASP LLM01 (Prompt Injection) — indirect variant
+//   OWASP MCP01-05
+//   MITRE ATLAS AML.T0051 (Prompt Injection)
+//   MITRE ATT&CK T1552 (Unsecured Credentials)
+//
+// Category: agentic_security
+// Namespace: Guardrails
+// =============================================================================
+// ---------------------------------------------------------------------------
+// Section 1: MCP Server Poisoning
+// Blocks connections to MCP servers with poisoned tool descriptions.
+// Lower threshold than tool-level poisoning since it affects all tools.
+// ---------------------------------------------------------------------------
+@id("code-block-server-poisoning")
+@name("Block poisoned MCP servers")
+@description("Block connections to MCP servers when tool poisoning patterns are detected in tool descriptions (score >= 60). Lower threshold than tool-level poisoning since server-level poisoning affects all tools on the server.")
+@severity("critical")
+@tags("profile,code-agent,supply-chain,tool-poisoning,mcp-security,owasp-asi04")
+@reject_message("MCP server connection blocked: tool poisoning patterns detected in server tool descriptions. Review server tools before connecting.")
+forbid (
+    principal,
+    action == Guardrails::Action::"connect_server",
+    resource
+)
+when {
+    context has tool_poisoning_score && context.tool_poisoning_score >= 60
+};
+// ---------------------------------------------------------------------------
+// Section 2: Indirect Prompt Injection
+// Blocks injection from tool outputs and retrieved content — not direct
+// user input but external content that manipulates agent behavior.
+// Ref: EchoLeak CVE-2025-32711, IDEsaster (30+ CVEs)
+// ---------------------------------------------------------------------------
+@id("code-block-indirect-injection")
+@name("Block indirect prompt injection")
+@description("Block tool execution when indirect prompt injection is detected in tool outputs, file contents, or retrieved documents (score >= 70). Defends against injection via external content that manipulates agent behavior.")
+@severity("critical")
+@tags("profile,code-agent,supply-chain,indirect-injection,owasp-llm01,owasp-asi01")
+@reject_message("Content blocked: indirect prompt injection detected in tool output or retrieved content. An external source may be attempting to hijack agent behavior.")
+forbid (
+    principal,
+    action == Guardrails::Action::"call_tool",
+    resource
+)
+when {
+    context has indirect_injection_score && context.indirect_injection_score >= 70
+};
+@id("code-block-indirect-injection-sensitive")
+@name("Block indirect injection on sensitive tools")
+@description("Lower threshold (>= 50) for indirect injection when the tool is classified as sensitive. Even moderate injection risk on sensitive tools (shell, file write, network) warrants blocking.")
+@severity("critical")
+@tags("profile,code-agent,supply-chain,indirect-injection,sensitive-tools,owasp-asi02")
+@reject_message("Sensitive tool execution blocked: moderate indirect injection risk detected. Sensitive tools require higher confidence that content is safe.")
+forbid (
+    principal,
+    action == Guardrails::Action::"call_tool",
+    resource
+)
+when {
+    context has indirect_injection_score && context.indirect_injection_score >= 50 &&
+    context has tool_is_sensitive && context.tool_is_sensitive == true
+};
+// ---------------------------------------------------------------------------
+// Section 3: Behavioral Attack Patterns
+// Detects multi-step attack chains targeting credentials and workspace integrity.
+// ---------------------------------------------------------------------------
+@id("code-block-credential-theft")
+@name("Block credential theft chains")
+@description("Block tool execution when a credential theft chain is detected — accessing SSH keys, cloud credentials, or API tokens followed by encoding, compression, or transfer operations. Multi-step attack pattern for autonomous credential harvesting.")
+@severity("critical")
+@tags("profile,code-agent,supply-chain,credential-theft,behavioral,mitre-t1552")
+@reject_message("Tool execution blocked: credential theft chain detected. The agent is performing a multi-step operation to harvest and exfiltrate credentials.")
+forbid (
+    principal,
+    action == Guardrails::Action::"call_tool",
+    resource
+)
+when {
+    context has suspicious_pattern && context.suspicious_pattern == true &&
+    context has pattern_type && context.pattern_type == "credential_theft"
+};
+@id("code-block-destructive-sequence")
+@name("Block destructive operation sequences")
+@description("Block tool execution when a destructive operation sequence is detected — bulk file deletions, permission changes, config overwrites, or repository manipulation patterns. Prevents agent-initiated workspace damage.")
+@severity("critical")
+@tags("profile,code-agent,supply-chain,destructive,behavioral,owasp-asi02")
+@reject_message("Tool execution blocked: destructive operation sequence detected. The agent is performing a pattern of destructive operations that could damage the workspace.")
+forbid (
+    principal,
+    action == Guardrails::Action::"call_tool",
+    resource
+)
+when {
+    context has suspicious_pattern && context.suspicious_pattern == true &&
+    context has pattern_type && context.pattern_type == "destructive_sequence"
+};

package/_schemas/guardrails/templates/templates.json CHANGED Viewed

@@ -224,6 +224,105 @@
       "file": "profiles/multi_agent/agent_safety.cedar",
       "severity": "critical",
       "tags": ["profile", "multi-agent", "cross-turn", "a2a", "pii", "secrets", "injection", "circuit-breaker"]
+    },
+    {
+      "id": "code-agent-path-security",
+      "name": "Code Agent — Path Security",
+      "description": "Block access to .env files, credential files, system directories, credential directories, and destructive file operations for coding agents",
+      "category": "security",
+      "file": "profiles/code_agent/path_security.cedar",
+      "severity": "high",
+      "tags": ["profile", "code-agent", "path-security", "credentials", "system-paths"]
+    },
+    {
+      "id": "code-agent-supply-chain",
+      "name": "Code Agent — Supply Chain Security",
+      "description": "Block MCP server poisoning, indirect prompt injection from tool outputs, credential theft patterns, and destructive operation sequences for coding agents",
+      "category": "agentic_security",
+      "file": "profiles/code_agent/supply_chain.cedar",
+      "severity": "critical",
+      "tags": ["profile", "code-agent", "supply-chain", "tool-poisoning", "indirect-injection"]
+    },
+    {
+      "id": "code-agent-encoding",
+      "name": "Code Agent — Encoding Attacks",
+      "description": "Block invisible Unicode characters in tool arguments and file writes to prevent encoding-based prompt injection for coding agents",
+      "category": "security",
+      "file": "profiles/code_agent/encoding.cedar",
+      "severity": "high",
+      "tags": ["profile", "code-agent", "encoding", "unicode", "invisible-chars"]
+    },
+    {
+      "id": "advanced-detection-secrets",
+      "name": "Advanced Detection — Granular Secrets",
+      "description": "Granular secret type blocking for high-risk credentials (cloud provider keys, GitHub tokens, SSH keys, database URLs) and API keys/tokens",
+      "category": "security",
+      "file": "profiles/advanced_detection/secrets.cedar",
+      "severity": "critical",
+      "tags": ["profile", "advanced-detection", "secrets", "credentials", "cloud-keys"]
+    },
+    {
+      "id": "advanced-detection-pii",
+      "name": "Advanced Detection — PII",
+      "description": "Bulk PII exposure blocking, high-confidence ML PII detection, and PII in file operations for advanced threat detection",
+      "category": "privacy",
+      "file": "profiles/advanced_detection/pii.cedar",
+      "severity": "critical",
+      "tags": ["profile", "advanced-detection", "pii", "privacy", "ml-classifier"]
+    },
+    {
+      "id": "advanced-detection-threat-severity",
+      "name": "Advanced Detection — Threat Severity",
+      "description": "Block any content flagged with critical severity by detection engines as a catch-all safety net",
+      "category": "security",
+      "file": "profiles/advanced_detection/threat_severity.cedar",
+      "severity": "critical",
+      "tags": ["profile", "advanced-detection", "severity", "critical", "catch-all"]
+    },
+    {
+      "id": "a2a-cross-origin",
+      "name": "A2A Security — Cross-Origin Trust Boundaries",
+      "description": "Block confused deputy attacks and trust boundary violations from cross-system agent communication — critical cross-origin blocking, unverified agent restrictions, sensitive tool protection",
+      "category": "agent_identity",
+      "file": "profiles/a2a_security/cross_origin.cedar",
+      "severity": "critical",
+      "tags": ["profile", "a2a-security", "cross-origin", "confused-deputy", "trust-boundary"]
+    },
+    {
+      "id": "a2a-inter-agent-injection",
+      "name": "A2A Security — Inter-Agent Injection Defense",
+      "description": "Block indirect prompt injection via tool outputs, multi-turn progressive attacks using deep context models, and encoded payload delivery between independent agents",
+      "category": "agent_identity",
+      "file": "profiles/a2a_security/inter_agent_injection.cedar",
+      "severity": "critical",
+      "tags": ["profile", "a2a-security", "indirect-injection", "multi-turn", "encoded-injection", "deep-context"]
+    },
+    {
+      "id": "a2a-supply-chain",
+      "name": "A2A Security — Supply Chain & Behavioral Drift",
+      "description": "Block tool poisoning from external agent ecosystems, rug pull behavioral drift, and credential theft chains initiated by compromised agents",
+      "category": "agent_identity",
+      "file": "profiles/a2a_security/supply_chain.cedar",
+      "severity": "critical",
+      "tags": ["profile", "a2a-security", "supply-chain", "tool-poisoning", "rug-pull", "credential-theft"]
+    },
+    {
+      "id": "a2a-identity-enforcement",
+      "name": "A2A Security — Agent Identity Enforcement",
+      "description": "Enforce strict identity requirements for cross-system agents — block anonymous agents, require framework registration, prevent unverified autonomous agents",
+      "category": "agent_identity",
+      "file": "profiles/a2a_security/identity_enforcement.cedar",
+      "severity": "critical",
+      "tags": ["profile", "a2a-security", "identity", "spoofing", "framework", "autonomous"]
+    },
+    {
+      "id": "a2a-escalation-detection",
+      "name": "A2A Security — Escalation Detection & Circuit Breakers",
+      "description": "Detect progressive capability escalation across turns with session peak score monitoring and cumulative risk circuit breakers tuned for adversarial A2A communication",
+      "category": "agent_identity",
+      "file": "profiles/a2a_security/escalation_detection.cedar",
+      "severity": "critical",
+      "tags": ["profile", "a2a-security", "escalation", "circuit-breaker", "session-peak", "cumulative-risk"]
     }
   ],
   "profiles": [
@@ -238,10 +337,10 @@
     {
       "id": "code-agent",
       "name": "Code Agent",
-      "description": "Optimized for coding assistants — tool risk controls, shell blocking, loop detection, exfiltration prevention, budget enforcement",
+      "description": "Optimized for coding assistants — tool risk controls, shell blocking, loop detection, exfiltration prevention, budget enforcement, path security, supply chain defense, and encoding attack protection",
       "severity": "high",
-      "tags": ["code-agent", "tools", "agentic", "exfiltration"],
-      "template_ids": ["code-agent-agentic-security", "code-agent-security"]
+      "tags": ["code-agent", "tools", "agentic", "exfiltration", "path-security", "supply-chain", "encoding"],
+      "template_ids": ["code-agent-agentic-security", "code-agent-security", "code-agent-path-security", "code-agent-supply-chain", "code-agent-encoding"]
     },
     {
       "id": "data-pipeline",
@@ -253,11 +352,27 @@
     },
     {
       "id": "multi-agent",
-      "name": "Multi-Agent Orchestration",
-      "description": "Production-grade A2A guardrails for multi-agent systems — tiered trust access control, autonomous agent safeguards, cross-turn PII/secrets containment, injection escalation response, cumulative risk circuit breakers",
+      "name": "Multi-Agent Orchestration (MAS)",
+      "description": "Production-grade guardrails for multi-agent systems with shared orchestration — tiered trust access control, autonomous agent safeguards, cross-turn PII/secrets containment, injection escalation response, cumulative risk circuit breakers. For independent agent-to-agent communication across separate trust domains, use the A2A Security profile",
       "severity": "critical",
-      "tags": ["multi-agent", "a2a", "trust", "cross-turn", "circuit-breaker"],
+      "tags": ["multi-agent", "mas", "trust", "cross-turn", "circuit-breaker"],
       "template_ids": ["agent-identity-trust", "multi-agent-trust", "multi-agent-safety"]
+    },
+    {
+      "id": "a2a-security",
+      "name": "A2A Security",
+      "description": "Production-grade security for independent agent-to-agent communication across separate trust domains — cross-origin trust enforcement, inter-agent injection defense (indirect, multi-turn, encoded), supply chain protection (tool poisoning, rug pull), identity enforcement, and escalation circuit breakers",
+      "severity": "critical",
+      "tags": ["a2a-security", "cross-origin", "injection", "supply-chain", "identity", "escalation"],
+      "template_ids": ["a2a-cross-origin", "a2a-inter-agent-injection", "a2a-supply-chain", "a2a-identity-enforcement", "a2a-escalation-detection"]
+    },
+    {
+      "id": "advanced-detection",
+      "name": "Advanced Detection",
+      "description": "Production-grade advanced threat detection — granular secret type blocking, ML-based PII detection, bulk exposure prevention, and critical severity catch-all for high-security environments",
+      "severity": "critical",
+      "tags": ["advanced-detection", "secrets", "pii", "severity", "ml-detection"],
+      "template_ids": ["advanced-detection-secrets", "advanced-detection-pii", "advanced-detection-threat-severity"]
     }
   ]
 }

package/dist/guardrails-context.gen.d.ts CHANGED Viewed

@@ -41,6 +41,9 @@ export declare const GuardrailsContextKey: {
     readonly FactualityScore: "factuality_score";
     readonly HallucinationScore: "hallucination_score";
     readonly HateSpeechScore: "hate_speech_score";
+    readonly HighestSeverity: "highest_severity";
+    readonly IndirectInjectionScore: "indirect_injection_score";
+    readonly IndirectInjectionType: "indirect_injection_type";
     readonly InjectionConfidence: "injection_confidence";
     readonly InjectionDeepContextScore: "injection_deep_context_score";
     readonly InjectionPulseScore: "injection_pulse_score";
@@ -65,11 +68,13 @@ export declare const GuardrailsContextKey: {
     readonly McpServerVerified: "mcp_server_verified";
     readonly McpTool: "mcp_tool";
     readonly MultiTurnDetection: "multi_turn_detection";
+    readonly Path: "path";
     readonly PathTraversalDetected: "path_traversal_detected";
     readonly PathTraversalSeverity: "path_traversal_severity";
     readonly PathTraversalType: "path_traversal_type";
     readonly PatternType: "pattern_type";
     readonly PhishingDetected: "phishing_detected";
+    readonly PiiConfidence: "pii_confidence";
     readonly PiiCount: "pii_count";
     readonly PiiDetected: "pii_detected";
     readonly PiiTypes: "pii_types";

package/dist/guardrails-context.gen.js CHANGED Viewed

@@ -43,6 +43,9 @@ export const GuardrailsContextKey = {
     FactualityScore: 'factuality_score',
     HallucinationScore: 'hallucination_score',
     HateSpeechScore: 'hate_speech_score',
+    HighestSeverity: 'highest_severity',
+    IndirectInjectionScore: 'indirect_injection_score',
+    IndirectInjectionType: 'indirect_injection_type',
     InjectionConfidence: 'injection_confidence',
     InjectionDeepContextScore: 'injection_deep_context_score',
     InjectionPulseScore: 'injection_pulse_score',
@@ -67,11 +70,13 @@ export const GuardrailsContextKey = {
     McpServerVerified: 'mcp_server_verified',
     McpTool: 'mcp_tool',
     MultiTurnDetection: 'multi_turn_detection',
+    Path: 'path',
     PathTraversalDetected: 'path_traversal_detected',
     PathTraversalSeverity: 'path_traversal_severity',
     PathTraversalType: 'path_traversal_type',
     PatternType: 'pattern_type',
     PhishingDetected: 'phishing_detected',
+    PiiConfidence: 'pii_confidence',
     PiiCount: 'pii_count',
     PiiDetected: 'pii_detected',
     PiiTypes: 'pii_types',