npm - @highflame/policy - Versions diffs - 2.1.36 → 2.1.38 - Mend

@highflame/policy 2.1.36 → 2.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/_schemas/guardrails/context.json CHANGED Viewed

@@ -38,13 +38,13 @@
           "description": "Number of detectors that were executed for this request"
         },
         {
-          "key": "injection_confidence",
+          "key": "injection_score",
           "type": "number",
           "required": false,
           "description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
         },
         {
-          "key": "jailbreak_confidence",
+          "key": "jailbreak_score",
           "type": "number",
           "required": false,
           "description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control"
@@ -80,7 +80,7 @@
           "description": "Type of injection detected: 'prompt', 'sql', 'command', or 'none'. Use this to apply different policies per injection type"
         },
         {
-          "key": "contains_secrets",
+          "key": "secrets_detected",
           "type": "boolean",
           "required": false,
           "description": "Whether any API keys, tokens, passwords, or credentials were detected in the content. True indicates presence of secrets"
@@ -116,7 +116,7 @@
           "description": "Array of PII types detected (e.g., ['email', 'phone', 'ssn', 'credit_card']). Use .contains() to block specific sensitive types"
         },
         {
-          "key": "pii_confidence",
+          "key": "pii_score",
           "type": "number",
           "required": false,
           "description": "PII detection ML classifier confidence (0-100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss. Typical threshold: >=80 for high-confidence blocking"
@@ -176,7 +176,7 @@
           "description": "Confidence score from topic classifier (0-100). Use with content_topics to tune sensitivity — higher thresholds reduce false positives"
         },
         {
-          "key": "contains_invisible_chars",
+          "key": "invisible_chars_detected",
           "type": "boolean",
           "required": false,
           "description": "Whether invisible Unicode characters (zero-width joiners, RTL marks, etc.) were detected in the content. Commonly used for prompt injection evasion"
@@ -644,7 +644,7 @@
           "description": "Confidence score from topic classifier for tool content (0-100)"
         },
         {
-          "key": "contains_secrets",
+          "key": "secrets_detected",
           "type": "boolean",
           "required": false,
           "description": "Whether secrets were detected in tool arguments or content"
@@ -680,13 +680,13 @@
           "description": "Number of PII pattern matches in tool arguments or content. Use >=3 to detect bulk PII exposure indicating data dumps or CSV pastes"
         },
         {
-          "key": "pii_confidence",
+          "key": "pii_score",
           "type": "number",
           "required": false,
           "description": "PII detection ML classifier confidence for tool content (0-100). Catches novel PII patterns that escape regex detection"
         },
         {
-          "key": "injection_confidence",
+          "key": "injection_score",
           "type": "number",
           "required": false,
           "description": "Combined prompt injection confidence in tool arguments (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
@@ -854,7 +854,7 @@
           "description": "File path targeted by the tool call (if file operation). Use for path-based blocking of .env files, credential files, system directories, and credential directories"
         },
         {
-          "key": "contains_invisible_chars",
+          "key": "invisible_chars_detected",
           "type": "boolean",
           "required": false,
           "description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in tool arguments. Tool args should be plain text — invisible chars indicate payload injection"
@@ -1028,7 +1028,7 @@
           "description": "File path being read. Use for path-based access control policies (e.g., block .env files, system directories, credential directories)"
         },
         {
-          "key": "contains_secrets",
+          "key": "secrets_detected",
           "type": "boolean",
           "required": false,
           "description": "Whether secrets were detected in the file content being read"
@@ -1208,7 +1208,7 @@
           "description": "File path being written. Use for path-based blocking policies (e.g., block writes to .env files, credential directories)"
         },
         {
-          "key": "contains_invisible_chars",
+          "key": "invisible_chars_detected",
           "type": "boolean",
           "required": false,
           "description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in the content being written. Prevents persistence of invisible payloads in source code"
@@ -1220,7 +1220,7 @@
           "description": "Invisible character attack severity score (0-100) in write content"
         },
         {
-          "key": "contains_secrets",
+          "key": "secrets_detected",
           "type": "boolean",
           "required": false,
           "description": "Whether secrets were detected in the content being written. Block writes containing credentials"

package/_schemas/guardrails/schema.cedarschema CHANGED Viewed

@@ -104,8 +104,8 @@ namespace Guardrails {
         "detector_count": Long,
         // Security - Injection & Jailbreak (optional)
-        "injection_confidence"?: Long,  // Combined injection confidence: MAX(pulse, deep_context)
-        "jailbreak_confidence"?: Long,  // Combined jailbreak confidence: MAX(pulse, deep_context)
+        "injection_score"?: Long,  // Combined injection confidence: MAX(pulse, deep_context)
+        "jailbreak_score"?: Long,  // Combined jailbreak confidence: MAX(pulse, deep_context)
         "injection_pulse_score"?: Long,  // 0-100 Pulse single-turn classifier
         "injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
         "jailbreak_pulse_score"?: Long,  // 0-100 Pulse single-turn classifier
@@ -113,7 +113,7 @@ namespace Guardrails {
         "injection_type"?: String,       // "prompt" | "sql" | "command" | "none"
         // Privacy - Secrets (optional)
-        "contains_secrets"?: Bool,
+        "secrets_detected"?: Bool,
         "secret_count"?: Long,
         "secret_types"?: Set<String>,    // ["aws_access_key", "github_token", ...]
@@ -121,7 +121,7 @@ namespace Guardrails {
         "pii_detected"?: Bool,
         "pii_count"?: Long,
         "pii_types"?: Set<String>,       // ["email", "phone", "ssn", "credit_card", ...]
-        "pii_confidence"?: Long,         // PII ML classifier confidence (0-100) — catches novel PII patterns that escape regex detection
+        "pii_score"?: Long,         // PII ML classifier confidence (0-100) — catches novel PII patterns that escape regex detection
         // Threat Severity Aggregation (optional)
         "highest_severity"?: String,     // Highest severity across all detectors: "critical" | "high" | "medium" | "low" | "none"
@@ -139,7 +139,7 @@ namespace Guardrails {
         "topic_confidence"?: Long,           // 0-100
         // Security - Invisible Character Detection (optional)
-        "contains_invisible_chars"?: Bool,
+        "invisible_chars_detected"?: Bool,
         "invisible_chars_score"?: Long,      // 0-100
         // Security - Pattern Detection (optional)
@@ -281,14 +281,14 @@ namespace Guardrails {
         "topic_confidence"?: Long,           // 0-100
         // Security checks on tool arguments (optional)
-        "contains_secrets"?: Bool,
+        "secrets_detected"?: Bool,
         "secret_count"?: Long,
         "secret_types"?: Set<String>,
         "pii_detected"?: Bool,
         "pii_types"?: Set<String>,
         "pii_count"?: Long,              // Number of PII pattern matches in tool content
-        "pii_confidence"?: Long,         // PII ML classifier confidence (0-100)
-        "injection_confidence"?: Long,
+        "pii_score"?: Long,         // PII ML classifier confidence (0-100)
+        "injection_score"?: Long,
         "injection_pulse_score"?: Long,  // 0-100 Pulse single-turn classifier
         "injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
@@ -312,7 +312,7 @@ namespace Guardrails {
         "path"?: String,                         // File path when tool operates on files
         // Security - Invisible Character Detection in tool args (optional)
-        "contains_invisible_chars"?: Bool,       // Whether invisible Unicode chars detected in tool args
+        "invisible_chars_detected"?: Bool,       // Whether invisible Unicode chars detected in tool args
         "invisible_chars_score"?: Long,          // Invisible character attack severity (0-100)
         // Security - Encoded Injection (optional)
@@ -376,7 +376,7 @@ namespace Guardrails {
         "path"?: String,                 // File path being read
         // Security checks on file content (optional)
-        "contains_secrets"?: Bool,
+        "secrets_detected"?: Bool,
         "secret_count"?: Long,
         "secret_types"?: Set<String>,
         "pii_detected"?: Bool,
@@ -436,11 +436,11 @@ namespace Guardrails {
         "path"?: String,                         // File path being written
         // Security - Invisible Character Detection in write content (optional)
-        "contains_invisible_chars"?: Bool,       // Whether invisible Unicode chars detected in write content
+        "invisible_chars_detected"?: Bool,       // Whether invisible Unicode chars detected in write content
         "invisible_chars_score"?: Long,          // Invisible character attack severity (0-100)
         // Security checks on content being written (optional)
-        "contains_secrets"?: Bool,
+        "secrets_detected"?: Bool,
         "secret_count"?: Long,
         "secret_types"?: Set<String>,
         "pii_detected"?: Bool,

package/_schemas/guardrails/templates/defaults/agent_identity.cedar CHANGED Viewed

@@ -1,9 +1,9 @@
 // =============================================================================
-// Agent Identity Policy — Agent-to-Agent Security Defaults
+// Agent Identity & Trust (Default)
 // =============================================================================
-// Enforces trust-based access control for AI agents authenticated via API key
-// or JWT with agent claims. These policies use agent identity context keys
-// populated from Shield's authentication layer.
+// Trust-based access control for AI agents authenticated via API key or JWT
+// with agent claims. Uses identity context keys populated from Shield's
+// authentication layer.
 //
 // Agent trust levels:
 //   - first_party:          Your own agents (highest trust)
@@ -11,108 +11,112 @@
 //   - unverified:           Unknown/untrusted agents (lowest trust)
 //
 // Agent types:
-//   - orchestrator:   Coordinates sub-agents
-//   - autonomous:     Self-directed, no human in the loop
-//   - tool_agent:     Single-purpose tool execution
-//   - human_proxy:    Acts on behalf of a human
+//   - orchestrator, autonomous, tool_agent, human_proxy
 //
-// Context keys used:
-// - agent_id: String - Unique agent identifier
-// - agent_type: String - Agent classification
-// - agent_trust_level: String - Trust tier
-// - agent_framework: String - Agent framework/SDK
-// - agent_publisher: String - Publishing organization
-// - tool_name: String - Tool being called
-// - tool_category: String - "safe" | "sensitive" | "dangerous"
-// - tool_risk_score: Long (0-100) - Computed risk score
-// - injection_confidence: Long (0-100) - Injection detection score
+// Context keys consumed:
+//   - agent_id, agent_type, agent_trust_level, agent_framework, agent_publisher
+//   - tool_name, tool_category, tool_is_sensitive, tool_risk_score
+//   - injection_score
+//   - session_threat_turns
 //
-// Category: agent_identity
+// Category:  agent-identity
 // Namespace: Guardrails
 // =============================================================================
-// -----------------------------------------------------------------------------
-// Trust-Based Tool Access
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 1: Trust-based tool access
+// ---------------------------------------------------------------------------
-@id("agent-block-unverified-dangerous-tools")
+@id("agent-identity.block-unverified-dangerous-tools")
 @name("Block unverified agents from dangerous tools")
-@description("Unverified agents cannot execute tools classified as dangerous. Require first_party or verified_third_party trust level for high-risk operations")
+@description("Blocks call_tool when agent_trust_level is \"unverified\" and tool_category is \"dangerous\".")
 @severity("critical")
-@tags("agent-identity,trust,tools,a2a")
+@tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
+@reject_message("Tool execution blocked: unverified agents cannot execute dangerous tools.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has agent_trust_level && context.agent_trust_level == "unverified" &&
     context has tool_category && context.tool_category == "dangerous"
 };
-@id("agent-block-unverified-shell")
+@id("agent-identity.block-unverified-shell")
 @name("Block unverified agents from shell execution")
-@description("Shell and command execution are restricted to first_party and verified_third_party agents")
+@description("Blocks call_tool when agent_trust_level is \"unverified\" and tool_name is a shell tool.")
 @severity("critical")
-@tags("agent-identity,trust,shell,a2a")
+@tags("category:agent-identity,threat:command-injection,scope:per-agent,detection:rule,surface:call-tool")
+@reject_message("Tool execution blocked: shell and command execution require a verified agent.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has agent_trust_level && context.agent_trust_level == "unverified" &&
     context has tool_name &&
-    (context.tool_name == "shell" ||
-     context.tool_name == "execute_command" ||
-     context.tool_name == "bash")
+    (
+        context.tool_name == "shell" ||
+        context.tool_name == "execute_command" ||
+        context.tool_name == "bash"
+    )
 };
-@id("agent-block-unverified-sensitive-tools")
-@name("Block unverified agents from sensitive tools with elevated risk")
-@description("Unverified agents cannot execute sensitive tools with risk score above 60")
+@id("agent-identity.block-unverified-sensitive-tools")
+@name("Block unverified agents from sensitive tools")
+@description("Blocks call_tool when agent_trust_level is \"unverified\", tool_is_sensitive is true, and tool_risk_score >= 61.")
 @severity("high")
-@tags("agent-identity,trust,tools,a2a")
+@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool")
+@reject_message("Tool execution blocked: unverified agent attempted a sensitive tool with elevated risk.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has agent_trust_level && context.agent_trust_level == "unverified" &&
     context has tool_is_sensitive && context.tool_is_sensitive == true &&
-    context has tool_risk_score && context.tool_risk_score > 60
+    context has tool_risk_score && context.tool_risk_score >= 61
 };
-// -----------------------------------------------------------------------------
-// Autonomous Agent Restrictions
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 2: Autonomous-agent restrictions
+// ---------------------------------------------------------------------------
-@id("agent-block-autonomous-injection")
-@name("Stricter injection threshold for autonomous agents")
-@description("Autonomous agents operate without human oversight. Apply lower injection confidence threshold (50 vs standard 80) to compensate for lack of human review")
+@id("agent-identity.block-autonomous-injection")
+@name("Block injection on autonomous agents")
+@description("Blocks process_prompt when agent_type is \"autonomous\" and injection_score >= 51.")
 @severity("high")
-@tags("agent-identity,autonomous,injection,a2a")
+@tags("category:agent-identity,threat:injection,detection:ml,surface:process-prompt,owasp:llm01")
+@reject_message("Prompt blocked: lower injection threshold applies to autonomous agents and was exceeded.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"process_prompt",
     resource
-) when {
+)
+when {
     context has agent_type && context.agent_type == "autonomous" &&
-    context has injection_confidence && context.injection_confidence > 50
+    context has injection_score && context.injection_score >= 51
 };
-// -----------------------------------------------------------------------------
-// Cross-Turn Agent Trust Enforcement
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 3: Cross-turn enforcement
+// ---------------------------------------------------------------------------
-@id("agent-block-unverified-after-threats")
+@id("agent-identity.block-unverified-after-threats")
 @name("Block unverified agents after session threats")
-@description("If any prior turn in the session detected threats, block unverified agents from further tool calls. Prevents compromised sessions from being exploited by untrusted agents")
+@description("Blocks call_tool when agent_trust_level is \"unverified\" and session_threat_turns >= 1.")
 @severity("critical")
-@tags("agent-identity,trust,session,cross-turn,a2a")
+@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool")
+@reject_message("Tool execution blocked: prior turns in this session detected threats; unverified agents cannot continue.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has agent_trust_level && context.agent_trust_level == "unverified" &&
-    context has session_threat_turns && context.session_threat_turns > 0
+    context has session_threat_turns && context.session_threat_turns >= 1
 };

package/_schemas/guardrails/templates/defaults/agentic_safety.cedar CHANGED Viewed

@@ -1,48 +1,63 @@
 // =============================================================================
-// Agentic Safety Policy
+// Agentic Safety (Default)
 // =============================================================================
-// Blocks suspicious agentic behavior patterns:
-// - Infinite loops (repeated tool calls)
-// - Suspicious action sequences (data exfiltration patterns)
-// - Budget violations (token/cost limits)
+// Blocks suspicious agentic behaviour patterns: tool-call loops, data
+// exfiltration sequences, budget overruns, plus MCP supply-chain risks
+// (tool poisoning, rug pull, risky MCP configs).
 //
-// Context keys used (normalized by projection layer):
-// - loop_detected: bool - Whether tool call loop detected
-// - loop_count: Long - Number of consecutive repeated calls
-// - suspicious_pattern: bool - Whether suspicious sequence detected
-// - pattern_type: String - Type of pattern (e.g., "data_exfiltration")
-// - sequence_risk: Long (0-100) - Risk score from sequence analysis
-// - budget_exceeded: bool - Whether token budget exceeded
-// - budget_remaining_pct: Long (0-100) - Remaining budget percentage
+// Context keys consumed:
+//   - loop_detected:         Bool
+//   - loop_count:            Long
+//   - suspicious_pattern:    Bool
+//   - pattern_type:          String
+//   - sequence_risk:         Long (0-100)
+//   - budget_exceeded:       Bool
+//   - budget_remaining_pct:  Long (0-100)
+//   - tool_poisoning_score:  Long (0-100)
+//   - rug_pull_detected:     Bool
+//   - rug_pull_score:        Long (0-100)
+//   - mcp_config_risk:       Bool
+//   - mcp_risk_score:        Long (0-100)
 //
-// Category: agentic-security
+// Compliance:
+//   - OWASP LLM06, OWASP ASI01, OWASP ASI04
+//
+// Category:  agent-security
 // Namespace: Guardrails
 // =============================================================================
-@id("agentic-block-loops")
+// ---------------------------------------------------------------------------
+// Section 1: Loops, exfiltration, budget
+// ---------------------------------------------------------------------------
+@id("agent-security.block-loops")
 @name("Block tool call loops")
-@description("Forbids repeated tool calls indicating infinite loop")
+@description("Blocks call_tool when loop_detected is true and loop_count >= 4.")
 @severity("high")
-@tags("agentic,loops,safety")
+@tags("category:agent-security,threat:loop,detection:aggregate,surface:call-tool")
+@reject_message("Tool execution blocked: an infinite tool-call loop was detected.")
 forbid (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has loop_detected && context.loop_detected == true &&
-    context has loop_count && context.loop_count > 3
+    context has loop_count && context.loop_count >= 4
 };
-@id("agentic-block-exfiltration")
+@id("agent-security.block-exfiltration")
 @name("Block data exfiltration patterns")
-@description("Forbids suspicious action sequences like read → http_post")
+@description("Blocks call_tool when suspicious_pattern is true and pattern_type matches an exfiltration class.")
 @severity("critical")
-@tags("agentic,exfiltration,security")
+@tags("category:agent-security,threat:exfiltration,detection:rule,surface:call-tool")
+@reject_message("Tool execution blocked: suspicious action sequence consistent with data exfiltration.")
 forbid (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has suspicious_pattern && context.suspicious_pattern == true &&
     context has pattern_type &&
     (
@@ -52,88 +67,98 @@ forbid (
     )
 };
-@id("agentic-block-high-sequence-risk")
+@id("agent-security.block-sequence-risk")
 @name("Block high-risk action sequences")
-@description("Forbids action sequences with risk score above 80")
+@description("Blocks call_tool when sequence_risk >= 81.")
 @severity("high")
-@tags("agentic,patterns,security")
+@tags("category:agent-security,detection:aggregate,surface:call-tool")
+@reject_message("Tool execution blocked: action sequence scored a high cumulative risk.")
 forbid (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
-    context has sequence_risk && context.sequence_risk > 80
+)
+when {
+    context has sequence_risk && context.sequence_risk >= 81
 };
-@id("agentic-block-budget-exceeded")
-@name("Block budget violations")
-@description("Forbids requests when token budget is exceeded")
+@id("agent-security.block-budget-exceeded")
+@name("Block on budget exceeded")
+@description("Blocks call_tool when budget_exceeded is true.")
 @severity("medium")
-@tags("agentic,budget,cost-control")
+@tags("category:agent-security,threat:budget-violation,detection:rule,surface:call-tool")
+@reject_message("Tool execution blocked: the agent has exhausted its token or cost budget.")
 forbid (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has budget_exceeded && context.budget_exceeded == true
 };
-@id("agentic-block-low-budget")
-@name("Block requests when budget critically low")
-@description("Forbids requests when remaining budget falls below 5%")
+@id("agent-security.block-budget-low")
+@name("Block when budget critically low")
+@description("Blocks call_tool when budget_remaining_pct <= 4.")
 @severity("medium")
-@tags("agentic,budget,cost-control")
+@tags("category:agent-security,threat:budget-violation,detection:rule,surface:call-tool")
+@reject_message("Tool execution blocked: remaining budget is below the critical threshold.")
 forbid (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
-    context has budget_remaining_pct &&
-    context.budget_remaining_pct < 5 &&
-    context.budget_remaining_pct > 0
+)
+when {
+    context has budget_remaining_pct && context.budget_remaining_pct <= 4
 };
-// =============================================================================
-// Agent Security — Supply Chain & Behavioral Drift
-// =============================================================================
+// ---------------------------------------------------------------------------
+// Section 2: MCP supply chain
+// ---------------------------------------------------------------------------
-@id("agentic-block-tool-poisoning")
-@name("Block tool poisoning attacks")
-@description("Forbids tool calls or server connections when hidden instructions or authority hijack patterns are detected in tool descriptions or arguments")
+@id("agent-security.block-tool-poisoning")
+@name("Block tool poisoning")
+@description("Blocks call_tool and connect_server when tool_poisoning_score >= 70.")
 @severity("critical")
-@tags("agentic,tool-poisoning,supply-chain")
+@tags("category:agent-security,threat:tool-poisoning,detection:ml,owasp:asi01")
+@reject_message("Tool execution blocked: hidden manipulation instructions detected in tool description or arguments (OWASP ASI01).")
 forbid (
     principal,
     action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
     resource
-) when {
+)
+when {
     context has tool_poisoning_score && context.tool_poisoning_score >= 70
 };
-@id("agentic-block-rug-pull")
+@id("agent-security.block-rug-pull")
 @name("Block rug pull attacks")
-@description("Forbids tool calls when significant behavioral drift is detected (tool output deviates from established patterns)")
+@description("Blocks call_tool when rug_pull_detected is true and rug_pull_score >= 70.")
 @severity("high")
-@tags("agentic,rug-pull,behavioral-drift")
+@tags("category:agent-security,threat:rug-pull,detection:ml,owasp:asi04")
+@reject_message("Tool execution blocked: tool behavior has drifted significantly from its established pattern.")
 forbid (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has rug_pull_detected && context.rug_pull_detected == true &&
     context has rug_pull_score && context.rug_pull_score >= 70
 };
-@id("agentic-block-mcp-config-risk")
-@name("Block risky MCP configurations")
-@description("Forbids tool calls or server connections when MCP configuration risks are detected (inline execution, suspicious URLs, cross-origin issues)")
+@id("agent-security.block-mcp-config-risk")
+@name("Block risky MCP server configs")
+@description("Blocks call_tool and connect_server when mcp_config_risk is true and mcp_risk_score >= 70.")
 @severity("high")
-@tags("agentic,mcp-risk,supply-chain")
+@tags("category:agent-security,threat:mcp-config-risk,detection:ml,owasp:asi04")
+@reject_message("MCP operation blocked: risky server configuration detected.")
 forbid (
     principal,
     action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
     resource
-) when {
+)
+when {
     context has mcp_config_risk && context.mcp_config_risk == true &&
     context has mcp_risk_score && context.mcp_risk_score >= 70
 };

package/_schemas/guardrails/templates/defaults/baseline.cedar CHANGED Viewed

@@ -1,22 +1,19 @@
 // =============================================================================
-// Baseline Permit Policy (Default)
+// Baseline Permit (Default)
 // =============================================================================
-// Permits all actions by default. Threat-specific forbid policies override
-// this to block when detectors find security issues.
+// Permits all Guardrails actions by default. Threat-specific forbid policies
+// override this when detectors fire. Cedar is default-deny: without at least
+// one permit rule, every request is denied regardless of forbid rules.
 //
-// Cedar is default-deny: without at least one permit rule, every request
-// is denied regardless of forbid rules. This baseline ensures the system
-// is "allow unless blocked" rather than "block everything".
-//
-// Category: organization
+// Category:  organization
 // Namespace: Guardrails
 // =============================================================================
-@id("baseline-permit-all")
-@name("Permit all actions by default")
-@description("Baseline permit for all actions — threat-specific forbid policies override this when threats are detected")
+@id("organization.permit-baseline")
+@name("Permit baseline")
+@description("Permits all Guardrails actions.")
 @severity("low")
-@tags("baseline,permit-default,organization")
+@tags("category:organization,posture:permit-default")
 permit (
     principal,
     action,