npm - @highflame/policy - Versions diffs - 2.1.5 → 2.1.7 - Mend

@highflame/policy 2.1.5 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/_schemas/guardrails/context.json CHANGED Viewed

@@ -474,6 +474,36 @@
           "type": "number",
           "required": false,
           "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
+        },
+        {
+          "key": "agent_id",
+          "type": "string",
+          "required": false,
+          "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
+        },
+        {
+          "key": "agent_type",
+          "type": "string",
+          "required": false,
+          "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
+        },
+        {
+          "key": "agent_trust_level",
+          "type": "string",
+          "required": false,
+          "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
+        },
+        {
+          "key": "agent_framework",
+          "type": "string",
+          "required": false,
+          "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
+        },
+        {
+          "key": "agent_publisher",
+          "type": "string",
+          "required": false,
+          "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
         }
       ]
     },
@@ -607,6 +637,12 @@
           "required": false,
           "description": "Whether secrets were detected in tool arguments or content"
         },
+        {
+          "key": "secret_count",
+          "type": "number",
+          "required": false,
+          "description": "Total number of secret matches found in tool arguments or content. Multiple matches may indicate data dumps or accidental credential exposure"
+        },
         {
           "key": "secret_types",
           "type": "array",
@@ -882,6 +918,36 @@
           "type": "number",
           "required": false,
           "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
+        },
+        {
+          "key": "agent_id",
+          "type": "string",
+          "required": false,
+          "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
+        },
+        {
+          "key": "agent_type",
+          "type": "string",
+          "required": false,
+          "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
+        },
+        {
+          "key": "agent_trust_level",
+          "type": "string",
+          "required": false,
+          "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
+        },
+        {
+          "key": "agent_framework",
+          "type": "string",
+          "required": false,
+          "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
+        },
+        {
+          "key": "agent_publisher",
+          "type": "string",
+          "required": false,
+          "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
         }
       ]
     },
@@ -907,6 +973,12 @@
           "required": false,
           "description": "Whether secrets were detected in the file content being read"
         },
+        {
+          "key": "secret_count",
+          "type": "number",
+          "required": false,
+          "description": "Total number of secret matches found in file content. Multiple matches may indicate data dumps or accidental credential exposure"
+        },
         {
           "key": "secret_types",
           "type": "array",
@@ -1020,6 +1092,36 @@
           "type": "number",
           "required": false,
           "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
+        },
+        {
+          "key": "agent_id",
+          "type": "string",
+          "required": false,
+          "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
+        },
+        {
+          "key": "agent_type",
+          "type": "string",
+          "required": false,
+          "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
+        },
+        {
+          "key": "agent_trust_level",
+          "type": "string",
+          "required": false,
+          "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
+        },
+        {
+          "key": "agent_framework",
+          "type": "string",
+          "required": false,
+          "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
+        },
+        {
+          "key": "agent_publisher",
+          "type": "string",
+          "required": false,
+          "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
         }
       ]
     },
@@ -1045,6 +1147,12 @@
           "required": false,
           "description": "Whether secrets were detected in the content being written. Block writes containing credentials"
         },
+        {
+          "key": "secret_count",
+          "type": "number",
+          "required": false,
+          "description": "Total number of secret matches found in write content. Multiple matches may indicate data dumps or accidental credential exposure"
+        },
         {
           "key": "secret_types",
           "type": "array",
@@ -1158,6 +1266,36 @@
           "type": "number",
           "required": false,
           "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
+        },
+        {
+          "key": "agent_id",
+          "type": "string",
+          "required": false,
+          "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
+        },
+        {
+          "key": "agent_type",
+          "type": "string",
+          "required": false,
+          "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
+        },
+        {
+          "key": "agent_trust_level",
+          "type": "string",
+          "required": false,
+          "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
+        },
+        {
+          "key": "agent_framework",
+          "type": "string",
+          "required": false,
+          "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
+        },
+        {
+          "key": "agent_publisher",
+          "type": "string",
+          "required": false,
+          "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
         }
       ]
     },
@@ -1320,6 +1458,36 @@
           "type": "number",
           "required": false,
           "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
+        },
+        {
+          "key": "agent_id",
+          "type": "string",
+          "required": false,
+          "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
+        },
+        {
+          "key": "agent_type",
+          "type": "string",
+          "required": false,
+          "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
+        },
+        {
+          "key": "agent_trust_level",
+          "type": "string",
+          "required": false,
+          "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
+        },
+        {
+          "key": "agent_framework",
+          "type": "string",
+          "required": false,
+          "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
+        },
+        {
+          "key": "agent_publisher",
+          "type": "string",
+          "required": false,
+          "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
         }
       ]
     }

package/_schemas/guardrails/schema.cedarschema CHANGED Viewed

@@ -194,6 +194,15 @@ namespace Guardrails {
         "session_max_secret_score"?: Long,
         "session_cumulative_risk_score"?: Long,
+        // Agent Identity — authenticated agent principal metadata (optional)
+        // Present when the request is made by an AI agent (API key or JWT with agent claims).
+        // Empty strings for human user requests. Use these to write agent-specific policies.
+        "agent_id"?: String,             // Unique agent identifier (e.g., "agent_research_v3")
+        "agent_type"?: String,           // "orchestrator" | "autonomous" | "tool_agent" | "human_proxy"
+        "agent_trust_level"?: String,    // "first_party" | "verified_third_party" | "unverified"
+        "agent_framework"?: String,      // Agent framework (e.g., "claude-code", "langchain", "crewai")
+        "agent_publisher"?: String,      // Organization that published the agent
     };
     /// Context for call_tool action (agentic tool execution)
@@ -234,6 +243,7 @@ namespace Guardrails {
         // Security checks on tool arguments (optional)
         "contains_secrets"?: Bool,
+        "secret_count"?: Long,
         "secret_types"?: Set<String>,
         "pii_detected"?: Bool,
         "pii_types"?: Set<String>,
@@ -295,6 +305,13 @@ namespace Guardrails {
         "session_max_secret_score"?: Long,
         "session_cumulative_risk_score"?: Long,
+        // Agent Identity — authenticated agent principal metadata (optional)
+        "agent_id"?: String,
+        "agent_type"?: String,
+        "agent_trust_level"?: String,
+        "agent_framework"?: String,
+        "agent_publisher"?: String,
     };
     /// Context for read_file action
@@ -305,6 +322,7 @@ namespace Guardrails {
         // Security checks on file content (optional)
         "contains_secrets"?: Bool,
+        "secret_count"?: Long,
         "secret_types"?: Set<String>,
         "pii_detected"?: Bool,
         "pii_types"?: Set<String>,
@@ -329,6 +347,13 @@ namespace Guardrails {
         "session_max_secret_score"?: Long,
         "session_cumulative_risk_score"?: Long,
+        // Agent Identity — authenticated agent principal metadata (optional)
+        "agent_id"?: String,
+        "agent_type"?: String,
+        "agent_trust_level"?: String,
+        "agent_framework"?: String,
+        "agent_publisher"?: String,
     };
     /// Context for write_file action
@@ -339,6 +364,7 @@ namespace Guardrails {
         // Security checks on content being written (optional)
         "contains_secrets"?: Bool,
+        "secret_count"?: Long,
         "secret_types"?: Set<String>,
         "pii_detected"?: Bool,
         "pii_types"?: Set<String>,
@@ -363,6 +389,13 @@ namespace Guardrails {
         "session_max_secret_score"?: Long,
         "session_cumulative_risk_score"?: Long,
+        // Agent Identity — authenticated agent principal metadata (optional)
+        "agent_id"?: String,
+        "agent_type"?: String,
+        "agent_trust_level"?: String,
+        "agent_framework"?: String,
+        "agent_publisher"?: String,
     };
     /// Context for connect_server action (MCP server connections)
@@ -405,5 +438,12 @@ namespace Guardrails {
         "session_max_secret_score"?: Long,
         "session_cumulative_risk_score"?: Long,
+        // Agent Identity — authenticated agent principal metadata (optional)
+        "agent_id"?: String,
+        "agent_type"?: String,
+        "agent_trust_level"?: String,
+        "agent_framework"?: String,
+        "agent_publisher"?: String,
     };
 }

package/_schemas/guardrails/templates/defaults/agent_identity.cedar ADDED Viewed

@@ -0,0 +1,118 @@
+// =============================================================================
+// Agent Identity Policy — Agent-to-Agent Security Defaults
+// =============================================================================
+// Enforces trust-based access control for AI agents authenticated via API key
+// or JWT with agent claims. These policies use agent identity context keys
+// populated from Shield's authentication layer.
+//
+// Agent trust levels:
+//   - first_party:          Your own agents (highest trust)
+//   - verified_third_party: Audited external agents (medium trust)
+//   - unverified:           Unknown/untrusted agents (lowest trust)
+//
+// Agent types:
+//   - orchestrator:   Coordinates sub-agents
+//   - autonomous:     Self-directed, no human in the loop
+//   - tool_agent:     Single-purpose tool execution
+//   - human_proxy:    Acts on behalf of a human
+//
+// Context keys used:
+// - agent_id: String - Unique agent identifier
+// - agent_type: String - Agent classification
+// - agent_trust_level: String - Trust tier
+// - agent_framework: String - Agent framework/SDK
+// - agent_publisher: String - Publishing organization
+// - tool_name: String - Tool being called
+// - tool_category: String - "safe" | "sensitive" | "dangerous"
+// - tool_risk_score: Long (0-100) - Computed risk score
+// - injection_confidence: Long (0-100) - Injection detection score
+//
+// Category: agent_identity
+// Namespace: Guardrails
+// =============================================================================
+// -----------------------------------------------------------------------------
+// Trust-Based Tool Access
+// -----------------------------------------------------------------------------
+@id("agent-block-unverified-dangerous-tools")
+@name("Block unverified agents from dangerous tools")
+@description("Unverified agents cannot execute tools classified as dangerous. Require first_party or verified_third_party trust level for high-risk operations")
+@severity("critical")
+@tags("agent-identity,trust,tools,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context has agent_trust_level && context.agent_trust_level == "unverified" &&
+    context has tool_category && context.tool_category == "dangerous"
+};
+@id("agent-block-unverified-shell")
+@name("Block unverified agents from shell execution")
+@description("Shell and command execution are restricted to first_party and verified_third_party agents")
+@severity("critical")
+@tags("agent-identity,trust,shell,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context has agent_trust_level && context.agent_trust_level == "unverified" &&
+    context has tool_name &&
+    (context.tool_name == "shell" ||
+     context.tool_name == "execute_command" ||
+     context.tool_name == "bash")
+};
+@id("agent-block-unverified-sensitive-tools")
+@name("Block unverified agents from sensitive tools with elevated risk")
+@description("Unverified agents cannot execute sensitive tools with risk score above 60")
+@severity("high")
+@tags("agent-identity,trust,tools,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context has agent_trust_level && context.agent_trust_level == "unverified" &&
+    context has tool_is_sensitive && context.tool_is_sensitive == true &&
+    context has tool_risk_score && context.tool_risk_score > 60
+};
+// -----------------------------------------------------------------------------
+// Autonomous Agent Restrictions
+// -----------------------------------------------------------------------------
+@id("agent-block-autonomous-injection")
+@name("Stricter injection threshold for autonomous agents")
+@description("Autonomous agents operate without human oversight. Apply lower injection confidence threshold (50 vs standard 80) to compensate for lack of human review")
+@severity("high")
+@tags("agent-identity,autonomous,injection,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"process_prompt",
+    resource
+) when {
+    context has agent_type && context.agent_type == "autonomous" &&
+    context has injection_confidence && context.injection_confidence > 50
+};
+// -----------------------------------------------------------------------------
+// Cross-Turn Agent Trust Enforcement
+// -----------------------------------------------------------------------------
+@id("agent-block-unverified-after-threats")
+@name("Block unverified agents after session threats")
+@description("If any prior turn in the session detected threats, block unverified agents from further tool calls. Prevents compromised sessions from being exploited by untrusted agents")
+@severity("critical")
+@tags("agent-identity,trust,session,cross-turn,a2a")
+forbid (
+    principal is Guardrails::Agent,
+    action == Guardrails::Action::"call_tool",
+    resource
+) when {
+    context has agent_trust_level && context.agent_trust_level == "unverified" &&
+    context has session_threat_turns && context.session_threat_turns > 0
+};

package/_schemas/guardrails/templates/defaults/agentic_safety.cedar CHANGED Viewed

@@ -40,7 +40,7 @@ forbid (
 @tags("agentic,exfiltration,security")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"call_tool",
     resource
 ) when {
     context has suspicious_pattern && context.suspicious_pattern == true &&
@@ -59,7 +59,7 @@ forbid (
 @tags("agentic,patterns,security")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"call_tool",
     resource
 ) when {
     context has sequence_risk && context.sequence_risk > 80
@@ -72,7 +72,7 @@ forbid (
 @tags("agentic,budget,cost-control")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"call_tool",
     resource
 ) when {
     context has budget_exceeded && context.budget_exceeded == true
@@ -85,7 +85,7 @@ forbid (
 @tags("agentic,budget,cost-control")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"call_tool",
     resource
 ) when {
     context has budget_remaining_pct &&

package/_schemas/guardrails/templates/defaults/injection.cedar CHANGED Viewed

@@ -22,7 +22,7 @@
 @tags("injection,jailbreak,security")
 forbid (
     principal,
-    action,
+    action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
     resource
 ) when {
     context has injection_confidence && context.injection_confidence > 85
@@ -35,7 +35,7 @@ forbid (
 @tags("jailbreak,security")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"process_prompt",
     resource
 ) when {
     context has jailbreak_confidence && context.jailbreak_confidence > 80
@@ -48,7 +48,7 @@ forbid (
 @tags("injection,jailbreak,security")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"process_prompt",
     resource
 ) when {
     context has injection_confidence && context has jailbreak_confidence &&
@@ -62,7 +62,7 @@ forbid (
 @tags("injection,unicode,security")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"process_prompt",
     resource
 ) when {
     context has contains_invisible_chars && context.contains_invisible_chars == true &&

package/_schemas/guardrails/templates/defaults/secrets.cedar CHANGED Viewed

@@ -20,7 +20,7 @@
 @tags("secrets,security,data-leak")
 forbid (
     principal,
-    action,
+    action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
     resource
 ) when {
     context has contains_secrets && context.contains_secrets == true
@@ -33,7 +33,7 @@ forbid (
 @tags("secrets,security,data-leak")
 forbid (
     principal,
-    action,
+    action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
     resource
 ) when {
     context has secret_count && context.secret_count > 2

package/_schemas/guardrails/templates/defaults/security_patterns.cedar CHANGED Viewed

@@ -36,7 +36,7 @@ forbid (
 @tags("path-traversal,security")
 forbid (
     principal,
-    action,
+    action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
     resource
 ) when {
     context has path_traversal_detected && context.path_traversal_detected == true &&

package/_schemas/guardrails/templates/defaults/semantic.cedar CHANGED Viewed

@@ -22,9 +22,10 @@ forbid (
     action == Guardrails::Action::"process_prompt",
     resource
 ) when {
-    context.content_topics.contains("weapons_manufacturing") ||
-    context.content_topics.contains("explosive_materials") ||
-    context.content_topics.contains("illegal_synthesis")
+    context has content_topics &&
+    (context.content_topics.contains("weapons_manufacturing") ||
+     context.content_topics.contains("explosive_materials") ||
+     context.content_topics.contains("illegal_synthesis"))
 };
 @id("semantic-block-controlled-substances")
@@ -37,9 +38,10 @@ forbid (
     action == Guardrails::Action::"process_prompt",
     resource
 ) when {
-    context.content_topics.contains("controlled_substances") ||
-    context.content_topics.contains("drug_manufacturing") ||
-    context.content_topics.contains("precursor_chemicals")
+    context has content_topics &&
+    (context.content_topics.contains("controlled_substances") ||
+     context.content_topics.contains("drug_manufacturing") ||
+     context.content_topics.contains("precursor_chemicals"))
 };
 @id("semantic-block-high-confidence-restricted")
@@ -52,7 +54,8 @@ forbid (
     action == Guardrails::Action::"process_prompt",
     resource
 ) when {
-    context.topic_confidence > 80 &&
+    context has topic_confidence && context.topic_confidence > 80 &&
+    context has content_topics &&
     (context.content_topics.contains("illegal_activity") ||
      context.content_topics.contains("financial_fraud") ||
      context.content_topics.contains("social_engineering"))

package/_schemas/guardrails/templates/defaults/toxicity.cedar CHANGED Viewed

@@ -23,7 +23,7 @@
 @tags("toxicity,trust-safety")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"process_prompt",
     resource
 ) when {
     (context has violence_score && context.violence_score > 90) ||
@@ -41,7 +41,7 @@ forbid (
 @tags("hate-speech,trust-safety")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"process_prompt",
     resource
 ) when {
     context has hate_speech_score && context.hate_speech_score > 75
@@ -54,7 +54,7 @@ forbid (
 @tags("toxicity,trust-safety")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"process_prompt",
     resource
 ) when {
     // Any 2 of 6 toxicity scores above 60 triggers a block

package/_schemas/guardrails/templates/mcp_tool_permissions.cedar CHANGED Viewed

@@ -28,7 +28,8 @@ permit (
     action == Guardrails::Action::"call_tool",
     resource
 ) when {
-    context.mcp_server == "github" &&
+    context has mcp_server && context.mcp_server == "github" &&
+    context has tool_name &&
     (context.tool_name == "read_issues" ||
      context.tool_name == "get_issue" ||
      context.tool_name == "list_repos" ||
@@ -47,7 +48,7 @@ forbid (
     action == Guardrails::Action::"call_tool",
     resource
 ) when {
-    context.mcp_server == "github"
+    context has mcp_server && context.mcp_server == "github"
 };
 // -- Organization-wide MCP server exclusions --------------------------------
@@ -64,8 +65,9 @@ forbid (
 ) when {
     // Add server names to block across the organization.
     // Modify this list to match your exclusion requirements.
-    context.mcp_server == "untrusted-server" ||
-    context.mcp_server == "deprecated-server"
+    context has mcp_server &&
+    (context.mcp_server == "untrusted-server" ||
+     context.mcp_server == "deprecated-server")
 };
 // -- Block unverified MCP servers -------------------------------------------
@@ -80,5 +82,5 @@ forbid (
     action == Guardrails::Action::"call_tool",
     resource
 ) when {
-    context.mcp_server_verified == false
+    context has mcp_server_verified && context.mcp_server_verified == false
 };

package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar CHANGED Viewed

@@ -15,7 +15,7 @@
 @tags("profile,chat-assistant,pii,privacy")
 forbid (
     principal,
-    action,
+    action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
     resource
 ) when {
     context has pii_detected && context.pii_detected == true

package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar CHANGED Viewed

@@ -15,7 +15,7 @@
 @tags("profile,chat-assistant,injection,security")
 forbid (
     principal,
-    action,
+    action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
     resource
 ) when {
     context has injection_confidence && context.injection_confidence > 70
@@ -28,7 +28,7 @@ forbid (
 @tags("profile,chat-assistant,jailbreak,security")
 forbid (
     principal,
-    action,
+    action == Guardrails::Action::"process_prompt",
     resource
 ) when {
     context has jailbreak_confidence && context.jailbreak_confidence > 65