npm - @highflame/policy - Versions diffs - 2.1.36 → 2.1.37 - Mend

@highflame/policy 2.1.36 → 2.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/_schemas/ai_gateway/schema.cedarschema CHANGED Viewed

@@ -87,7 +87,7 @@ action call_tool appliesTo {
     threat_categories?: Set<String>,  // Threat category names
     detected_threats?: Set<String>,   // Detection rule names that matched
     max_threat_severity?: Long,       // Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)
-    contains_secrets?: Bool,          // Whether secrets/credentials detected
+    secrets_detected?: Bool,          // Whether secrets/credentials detected
     // --- Secrets (granular) ---
     secret_types?: Set<String>,
@@ -97,10 +97,11 @@ action call_tool appliesTo {
     pii_detected?: Bool,
     pii_types?: Set<String>,
     pii_count?: Long,
+    pii_score?: Long,
     // --- ML Detector Confidence Scores (0-100) ---
-    injection_confidence?: Long,      // Prompt injection classifier confidence
-    jailbreak_confidence?: Long,      // Jailbreak detection classifier confidence
+    injection_score?: Long,      // Prompt injection classifier confidence
+    jailbreak_score?: Long,      // Jailbreak detection classifier confidence
     // --- Agent Security (0-100) ---
     tool_poisoning_score?: Long,      // Hidden instructions in tool description/args
@@ -127,7 +128,7 @@ action call_tool appliesTo {
     profanity_score?: Long,
     // --- Encoding & Unicode Attacks ---
-    contains_invisible_chars?: Bool,
+    invisible_chars_detected?: Bool,
     invisible_chars_score?: Long,
     // --- Behavioral Analysis ---
@@ -138,6 +139,21 @@ action call_tool appliesTo {
     pattern_type?: String,
     sequence_risk?: Long,
+    // --- Session-Aware Threat Aggregation ---
+    session_threat_turns?: Long,                  // Count of turns with detected threats
+    session_cumulative_risk_score?: Long,         // Accumulated risk across turns
+    session_injection_detected?: Bool,            // Injection seen in any prior turn
+    session_command_injection?: Bool,             // Command injection seen in any prior turn
+    session_pii_detected?: Bool,                  // PII seen in any prior turn
+    session_pii_types?: Set<String>,              // PII types accumulated across session
+    session_secrets_detected?: Bool,              // Secrets seen in any prior turn
+    session_secret_types?: Set<String>,           // Secret types accumulated across session
+    session_max_injection_score?: Long,           // Max injection score across session turns
+    session_max_jailbreak_score?: Long,           // Max jailbreak score across session turns
+    session_max_command_injection_score?: Long,   // Max command-injection score across session turns
+    session_max_pii_score?: Long,                 // Max pii score across session turns
+    session_max_secret_score?: Long,              // Max secret score across session turns
     // --- Usage Budget (multi-window, multi-dimension) ---
     budget_remaining_pct?: Long,
     budget_exceeded?: Bool,
@@ -177,7 +193,7 @@ action connect_server appliesTo {
     indirect_injection_score?: Long,
     // --- Secrets ---
-    contains_secrets?: Bool,
+    secrets_detected?: Bool,
     secret_types?: Set<String>,
     secret_count?: Long,
@@ -185,12 +201,28 @@ action connect_server appliesTo {
     pii_detected?: Bool,
     pii_types?: Set<String>,
     pii_count?: Long,
+    pii_score?: Long,
     // --- MCP Trust & Config Risk ---
     mcp_server_verified?: Bool,
     mcp_config_risk?: Bool,
     mcp_risk_score?: Long,
+    // --- Session-Aware Threat Aggregation ---
+    session_threat_turns?: Long,                  // Count of turns with detected threats
+    session_cumulative_risk_score?: Long,         // Accumulated risk across turns
+    session_injection_detected?: Bool,            // Injection seen in any prior turn
+    session_command_injection?: Bool,             // Command injection seen in any prior turn
+    session_pii_detected?: Bool,                  // PII seen in any prior turn
+    session_pii_types?: Set<String>,              // PII types accumulated across session
+    session_secrets_detected?: Bool,              // Secrets seen in any prior turn
+    session_secret_types?: Set<String>,           // Secret types accumulated across session
+    session_max_injection_score?: Long,           // Max injection score across session turns
+    session_max_jailbreak_score?: Long,           // Max jailbreak score across session turns
+    session_max_command_injection_score?: Long,   // Max command-injection score across session turns
+    session_max_pii_score?: Long,                 // Max pii score across session turns
+    session_max_secret_score?: Long,              // Max secret score across session turns
     // --- Usage Budget (multi-window, multi-dimension) ---
     budget_remaining_pct?: Long,
     budget_exceeded?: Bool,
@@ -222,7 +254,7 @@ action process_prompt appliesTo {
     threat_categories?: Set<String>,
     detected_threats?: Set<String>,
     max_threat_severity?: Long,
-    contains_secrets?: Bool,
+    secrets_detected?: Bool,
     // --- Secrets ---
     secret_types?: Set<String>,
@@ -232,10 +264,11 @@ action process_prompt appliesTo {
     pii_detected?: Bool,
     pii_types?: Set<String>,
     pii_count?: Long,
+    pii_score?: Long,
     // --- ML Detector Confidence Scores (0-100) ---
-    injection_confidence?: Long,
-    jailbreak_confidence?: Long,
+    injection_score?: Long,
+    jailbreak_score?: Long,
     // --- Content Safety Scores (0-100) ---
     violence_score?: Long,
@@ -246,13 +279,28 @@ action process_prompt appliesTo {
     profanity_score?: Long,
     // --- Encoding ---
-    contains_invisible_chars?: Bool,
+    invisible_chars_detected?: Bool,
     invisible_chars_score?: Long,
     // --- LLM-specific ---
     model_name?: String,              // Target model name (e.g., "gpt-4", "claude-3-opus")
     model_provider?: String,          // Provider name (e.g., "openai", "anthropic", "bedrock")
+    // --- Session-Aware Threat Aggregation ---
+    session_threat_turns?: Long,                  // Count of turns with detected threats
+    session_cumulative_risk_score?: Long,         // Accumulated risk across turns
+    session_injection_detected?: Bool,            // Injection seen in any prior turn
+    session_command_injection?: Bool,             // Command injection seen in any prior turn
+    session_pii_detected?: Bool,                  // PII seen in any prior turn
+    session_pii_types?: Set<String>,              // PII types accumulated across session
+    session_secrets_detected?: Bool,              // Secrets seen in any prior turn
+    session_secret_types?: Set<String>,           // Secret types accumulated across session
+    session_max_injection_score?: Long,           // Max injection score across session turns
+    session_max_jailbreak_score?: Long,           // Max jailbreak score across session turns
+    session_max_command_injection_score?: Long,   // Max command-injection score across session turns
+    session_max_pii_score?: Long,                 // Max pii score across session turns
+    session_max_secret_score?: Long,              // Max secret score across session turns
     // --- Usage Budget (multi-window, multi-dimension) ---
     budget_remaining_pct?: Long,
     budget_exceeded?: Bool,
@@ -284,7 +332,7 @@ action read_file appliesTo {
     threat_categories?: Set<String>,
     detected_threats?: Set<String>,
     max_threat_severity?: Long,
-    contains_secrets?: Bool,
+    secrets_detected?: Bool,
     // --- Secrets ---
     secret_types?: Set<String>,
@@ -294,6 +342,22 @@ action read_file appliesTo {
     pii_detected?: Bool,
     pii_types?: Set<String>,
     pii_count?: Long,
+    pii_score?: Long,
+    // --- Session-Aware Threat Aggregation ---
+    session_threat_turns?: Long,                  // Count of turns with detected threats
+    session_cumulative_risk_score?: Long,         // Accumulated risk across turns
+    session_injection_detected?: Bool,            // Injection seen in any prior turn
+    session_command_injection?: Bool,             // Command injection seen in any prior turn
+    session_pii_detected?: Bool,                  // PII seen in any prior turn
+    session_pii_types?: Set<String>,              // PII types accumulated across session
+    session_secrets_detected?: Bool,              // Secrets seen in any prior turn
+    session_secret_types?: Set<String>,           // Secret types accumulated across session
+    session_max_injection_score?: Long,           // Max injection score across session turns
+    session_max_jailbreak_score?: Long,           // Max jailbreak score across session turns
+    session_max_command_injection_score?: Long,   // Max command-injection score across session turns
+    session_max_pii_score?: Long,                 // Max pii score across session turns
+    session_max_secret_score?: Long,              // Max secret score across session turns
     // --- Usage Budget (multi-window, multi-dimension) ---
     budget_remaining_pct?: Long,
@@ -326,7 +390,7 @@ action write_file appliesTo {
     threat_categories?: Set<String>,
     detected_threats?: Set<String>,
     max_threat_severity?: Long,
-    contains_secrets?: Bool,
+    secrets_detected?: Bool,
     // --- Secrets ---
     secret_types?: Set<String>,
@@ -336,6 +400,22 @@ action write_file appliesTo {
     pii_detected?: Bool,
     pii_types?: Set<String>,
     pii_count?: Long,
+    pii_score?: Long,
+    // --- Session-Aware Threat Aggregation ---
+    session_threat_turns?: Long,                  // Count of turns with detected threats
+    session_cumulative_risk_score?: Long,         // Accumulated risk across turns
+    session_injection_detected?: Bool,            // Injection seen in any prior turn
+    session_command_injection?: Bool,             // Command injection seen in any prior turn
+    session_pii_detected?: Bool,                  // PII seen in any prior turn
+    session_pii_types?: Set<String>,              // PII types accumulated across session
+    session_secrets_detected?: Bool,              // Secrets seen in any prior turn
+    session_secret_types?: Set<String>,           // Secret types accumulated across session
+    session_max_injection_score?: Long,           // Max injection score across session turns
+    session_max_jailbreak_score?: Long,           // Max jailbreak score across session turns
+    session_max_command_injection_score?: Long,   // Max command-injection score across session turns
+    session_max_pii_score?: Long,                 // Max pii score across session turns
+    session_max_secret_score?: Long,              // Max secret score across session turns
     // --- Usage Budget (multi-window, multi-dimension) ---
     budget_remaining_pct?: Long,

package/_schemas/ai_gateway/templates/defaults/agent_security.cedar CHANGED Viewed

@@ -1,20 +1,38 @@
 // =============================================================================
-// Agent Security Policy (Default)
+// Agent Security (Default)
 // =============================================================================
-// Detects and blocks tool poisoning, rug pull attacks, indirect prompt injection,
-// and MCP supply chain threats.
+// Blocks tool poisoning, rug pull behavioral drift, indirect prompt injection
+// from tool outputs, and MCP supply-chain risks (unverified servers, risky
+// configurations).
 //
-// Category: agent_security
+// Context keys consumed:
+//   - tool_poisoning_score:      Long (0-100)
+//   - rug_pull_score:            Long (0-100)
+//   - indirect_injection_score:  Long (0-100)
+//   - tool_is_sensitive:         Bool
+//   - mcp_server_verified:       Bool
+//   - mcp_config_risk:           Bool
+//   - mcp_risk_score:            Long (0-100)
+//   - threat_count:              Long
+//
+// Compliance:
+//   - OWASP LLM01, OWASP ASI01, OWASP ASI02, OWASP ASI04
+//   - OWASP MCP02, MCP03, MCP05
+//
+// Category:  agent-security
 // Namespace: AIGateway
 // =============================================================================
-// Block tool calls with tool poisoning risk
-@id("as-block-tool-poisoning")
+// ---------------------------------------------------------------------------
+// Section 1: Tool poisoning
+// ---------------------------------------------------------------------------
+@id("agent-security.block-tool-poisoning")
 @name("Block tool poisoning")
-@description("Block tool execution when hidden instructions are detected in tool descriptions or arguments (score >= 70)")
+@description("Blocks call_tool when tool_poisoning_score >= 70.")
 @severity("critical")
-@tags("tool-poisoning,agent-security,owasp-asi01")
-@reject_message("Tool execution blocked: hidden manipulation instructions detected in tool description or arguments (OWASP ASI01).")
+@tags("category:agent-security,threat:tool-poisoning,detection:ml,surface:call-tool,owasp:asi01")
+@reject_message("Tool execution blocked: hidden manipulation instructions detected (OWASP ASI01).")
 forbid (
     principal,
     action == AIGateway::Action::"call_tool",
@@ -24,12 +42,11 @@ when {
     context has tool_poisoning_score && context.tool_poisoning_score >= 70
 };
-// Block MCP server connections with poisoning risk
-@id("as-block-server-poisoning")
+@id("agent-security.block-server-poisoning")
 @name("Block poisoned MCP servers")
-@description("Block connections to MCP servers when tool poisoning patterns are detected (score >= 60)")
+@description("Blocks connect_server when tool_poisoning_score >= 60.")
 @severity("critical")
-@tags("tool-poisoning,mcp-security,owasp-asi04,owasp-mcp02")
+@tags("category:agent-security,threat:tool-poisoning,detection:ml,surface:connect-server,owasp:asi04,owasp:mcp02")
 @reject_message("MCP server connection blocked: tool poisoning patterns detected in server tool descriptions.")
 forbid (
     principal,
@@ -40,13 +57,16 @@ when {
     context has tool_poisoning_score && context.tool_poisoning_score >= 60
 };
-// Block tool calls with behavioral drift (rug pull)
-@id("as-block-rug-pull")
+// ---------------------------------------------------------------------------
+// Section 2: Rug pull (behavioral drift)
+// ---------------------------------------------------------------------------
+@id("agent-security.block-rug-pull")
 @name("Block rug pull attacks")
-@description("Block tool execution when behavioral drift is detected (score >= 70)")
+@description("Blocks call_tool and connect_server when rug_pull_score >= 70.")
 @severity("critical")
-@tags("rug-pull,agent-security,owasp-asi04")
-@reject_message("Tool execution blocked: tool behavior has changed significantly from its established pattern.")
+@tags("category:agent-security,threat:rug-pull,detection:ml,owasp:asi04")
+@reject_message("Tool execution blocked: tool behavior has drifted significantly from its established pattern.")
 forbid (
     principal,
     action in [AIGateway::Action::"call_tool", AIGateway::Action::"connect_server"],
@@ -56,12 +76,15 @@ when {
     context has rug_pull_score && context.rug_pull_score >= 70
 };
-// Block with indirect injection from tool outputs
-@id("as-block-indirect-injection")
-@name("Block indirect prompt injection")
-@description("Block when indirect prompt injection is detected in tool outputs (score >= 70)")
+// ---------------------------------------------------------------------------
+// Section 3: Indirect prompt injection
+// ---------------------------------------------------------------------------
+@id("agent-security.block-indirect-injection")
+@name("Block indirect injection")
+@description("Blocks call_tool and connect_server when indirect_injection_score >= 70.")
 @severity("critical")
-@tags("indirect-injection,owasp-llm01,owasp-asi01")
+@tags("category:agent-security,threat:indirect-injection,detection:ml,owasp:llm01,owasp:asi01")
 @reject_message("Content blocked: indirect prompt injection detected in tool output or retrieved content.")
 forbid (
     principal,
@@ -72,13 +95,12 @@ when {
     context has indirect_injection_score && context.indirect_injection_score >= 70
 };
-// Strict indirect injection for sensitive tool calls
-@id("as-block-indirect-injection-sensitive-tools")
+@id("agent-security.block-indirect-injection-sensitive")
 @name("Block indirect injection on sensitive tools")
-@description("Lower threshold (>= 50) for indirect injection when the tool is classified as sensitive")
+@description("Blocks call_tool when tool_is_sensitive is true and indirect_injection_score >= 50.")
 @severity("critical")
-@tags("indirect-injection,sensitive-tools,owasp-asi02")
-@reject_message("Sensitive tool execution blocked: moderate indirect injection risk detected.")
+@tags("category:agent-security,threat:indirect-injection,detection:ml,surface:call-tool,owasp:asi02")
+@reject_message("Tool execution blocked: a sensitive tool was called with moderate indirect-injection risk.")
 forbid (
     principal,
     action == AIGateway::Action::"call_tool",
@@ -86,15 +108,18 @@ forbid (
 )
 when {
     context has indirect_injection_score && context.indirect_injection_score >= 50 &&
-    context has tool_is_sensitive && context.tool_is_sensitive
+    context has tool_is_sensitive && context.tool_is_sensitive == true
 };
-// Block unverified MCP server tool calls with detected threats
-@id("as-block-unverified-threats")
+// ---------------------------------------------------------------------------
+// Section 4: MCP supply chain & trust
+// ---------------------------------------------------------------------------
+@id("agent-security.block-unverified-threats")
 @name("Block unverified server threats")
-@description("Block tool calls from unverified MCP servers when any threat is detected")
+@description("Blocks call_tool when mcp_server_verified is false and threat_count >= 1.")
 @severity("high")
-@tags("mcp-trust,owasp-asi04,supply-chain")
+@tags("category:agent-security,threat:supply-chain,detection:aggregate,surface:call-tool,owasp:asi04")
 @reject_message("Tool execution blocked: the MCP server is unverified and security threats were detected.")
 forbid (
     principal,
@@ -103,15 +128,14 @@ forbid (
 )
 when {
     context has mcp_server_verified && context.mcp_server_verified == false &&
-    context has threat_count && context.threat_count > 0
+    context has threat_count && context.threat_count >= 1
 };
-// Block connections to MCP servers with risky configurations
-@id("as-block-mcp-config-risk")
+@id("agent-security.block-mcp-config-risk")
 @name("Block risky MCP server configs")
-@description("Block MCP server connections when risky configuration patterns are detected (score >= 70)")
+@description("Blocks connect_server when mcp_config_risk is true and mcp_risk_score >= 70.")
 @severity("high")
-@tags("mcp-config,owasp-mcp03,supply-chain")
+@tags("category:agent-security,threat:mcp-config-risk,detection:ml,surface:connect-server,owasp:mcp03")
 @reject_message("MCP server connection blocked: risky server configuration detected.")
 forbid (
     principal,
@@ -119,16 +143,15 @@ forbid (
     resource
 )
 when {
-    context has mcp_config_risk && context.mcp_config_risk &&
+    context has mcp_config_risk && context.mcp_config_risk == true &&
     context has mcp_risk_score && context.mcp_risk_score >= 70
 };
-// Block connections to unverified MCP servers
-@id("as-block-unverified-server-connect")
+@id("agent-security.block-unverified-server")
 @name("Block unverified MCP server connections")
-@description("Block connections to MCP servers that are not from a verified registry")
+@description("Blocks connect_server when mcp_server_verified is false.")
 @severity("high")
-@tags("mcp-trust,owasp-asi04,owasp-mcp05,supply-chain")
+@tags("category:agent-security,threat:supply-chain,detection:rule,surface:connect-server,owasp:asi04,owasp:mcp05")
 @reject_message("MCP server connection blocked: server is not from a verified registry.")
 forbid (
     principal,

package/_schemas/ai_gateway/templates/defaults/baseline.cedar CHANGED Viewed

@@ -1,21 +1,19 @@
 // =============================================================================
-// Baseline Permit Policy (Default)
+// Baseline Permit (Default)
 // =============================================================================
-// Permits all actions by default. Threat-specific forbid policies override
-// this to block when detection engines identify issues.
+// Permits all AI Gateway actions by default. Threat-specific forbid policies
+// override this when detectors fire. Cedar is default-deny: without at least
+// one permit rule, every request is denied regardless of forbid rules.
 //
-// Cedar is default-deny: without at least one permit rule, every request
-// is denied regardless of forbid rules.
-//
-// Category: organization
+// Category:  organization
 // Namespace: AIGateway
 // =============================================================================
-@id("baseline-permit-all")
-@name("Permit all actions by default")
-@description("Baseline permit for all actions -- threat-specific forbid policies override this when threats are detected")
+@id("organization.permit-baseline")
+@name("Permit baseline")
+@description("Permits all AI Gateway actions.")
 @severity("low")
-@tags("baseline,permit-default,organization")
+@tags("category:organization,posture:permit-default")
 permit (
     principal,
     action,

package/_schemas/ai_gateway/templates/defaults/semantic.cedar CHANGED Viewed

@@ -1,20 +1,40 @@
 // =============================================================================
-// Semantic Threat Detection Policy (Default)
+// Semantic Threat Detection (Default)
 // =============================================================================
-// Detects and blocks prompt injection, jailbreak attempts, and high-severity
-// threats in MCP tool calls and server connections.
+// Blocks prompt injection, jailbreak attempts, and high-severity threats in
+// MCP tool calls and LLM prompts. Uses both detection-engine rule triggers and
+// ML classifier confidence scores.
 //
-// Category: semantic
+// Detection layers:
+//   - Rule triggers (detected_threats), always available
+//   - ML classifier scores (injection_score, jailbreak_score), require API token
+//   - Severity aggregation (highest_severity, threat_count), catch-all rules
+//
+// Context keys consumed:
+//   - detected_threats:  Set<String>
+//   - injection_score:   Long (0-100)
+//   - jailbreak_score:   Long (0-100)
+//   - highest_severity:  String
+//   - threat_count:      Long
+//
+// Compliance:
+//   - OWASP LLM01, OWASP LLM02
+//   - MITRE ATLAS AML.T0051, AML.T0054
+//
+// Category:  semantic
 // Namespace: AIGateway
 // =============================================================================
-// Block content with prompt injection patterns detected by rules
-@id("semantic-block-injection")
-@name("Block prompt injection")
-@description("Block tool calls when detection engine rules identify prompt injection patterns in tool arguments or content")
+// ---------------------------------------------------------------------------
+// Section 1: Prompt injection
+// ---------------------------------------------------------------------------
+@id("semantic.block-injection-rule")
+@name("Block injection (rule)")
+@description("Blocks call_tool when detected_threats contains \"prompt_injection\".")
 @severity("critical")
-@tags("injection,security,owasp-llm01,baseline")
-@reject_message("Tool call was blocked because prompt injection patterns were detected in the content (OWASP LLM01).")
+@tags("category:semantic,threat:injection,detection:rule,surface:call-tool,owasp:llm01,mitre:atlas-t0051,posture:baseline")
+@reject_message("Tool execution blocked: prompt injection patterns detected (OWASP LLM01).")
 forbid (
     principal,
     action == AIGateway::Action::"call_tool",
@@ -24,29 +44,31 @@ when {
     context has detected_threats && context.detected_threats.contains("prompt_injection")
 };
-// Block content with high ML injection confidence
-@id("semantic-block-injection-score")
-@name("Block high-confidence injection")
-@description("Block tool calls when the ML injection classifier confidence exceeds 75/100")
+@id("semantic.block-injection-ml")
+@name("Block injection (ML)")
+@description("Blocks call_tool when injection_score >= 75.")
 @severity("critical")
-@tags("injection,ml-classifier,security,owasp-llm01")
-@reject_message("Tool call was blocked because the ML classifier detected prompt injection with high confidence.")
+@tags("category:semantic,threat:injection,detection:ml,surface:call-tool,owasp:llm01,mitre:atlas-t0051")
+@reject_message("Tool execution blocked: the ML classifier detected prompt injection with high confidence.")
 forbid (
     principal,
     action == AIGateway::Action::"call_tool",
     resource
 )
 when {
-    context has injection_confidence && context.injection_confidence >= 75
+    context has injection_score && context.injection_score >= 75
 };
-// Block content with jailbreak patterns
-@id("semantic-block-jailbreak")
-@name("Block jailbreak attempts")
-@description("Block tool calls when jailbreak patterns are detected in content")
+// ---------------------------------------------------------------------------
+// Section 2: Jailbreak
+// ---------------------------------------------------------------------------
+@id("semantic.block-jailbreak-rule")
+@name("Block jailbreak (rule)")
+@description("Blocks call_tool when detected_threats contains \"jailbreak\".")
 @severity("critical")
-@tags("jailbreak,security,owasp-llm02,baseline")
-@reject_message("Tool call was blocked because jailbreak patterns were detected.")
+@tags("category:semantic,threat:jailbreak,detection:rule,surface:call-tool,owasp:llm02,mitre:atlas-t0054,posture:baseline")
+@reject_message("Tool execution blocked: jailbreak patterns detected (OWASP LLM02).")
 forbid (
     principal,
     action == AIGateway::Action::"call_tool",
@@ -56,29 +78,31 @@ when {
     context has detected_threats && context.detected_threats.contains("jailbreak")
 };
-// Block content with high ML jailbreak confidence
-@id("semantic-block-jailbreak-score")
-@name("Block high-confidence jailbreak")
-@description("Block tool calls when the ML jailbreak classifier confidence exceeds 75/100")
+@id("semantic.block-jailbreak-ml")
+@name("Block jailbreak (ML)")
+@description("Blocks call_tool when jailbreak_score >= 75.")
 @severity("critical")
-@tags("jailbreak,ml-classifier,security,owasp-llm02")
-@reject_message("Tool call was blocked because the ML classifier detected a jailbreak attempt with high confidence.")
+@tags("category:semantic,threat:jailbreak,detection:ml,surface:call-tool,owasp:llm02,mitre:atlas-t0054")
+@reject_message("Tool execution blocked: the ML classifier detected a jailbreak attempt with high confidence.")
 forbid (
     principal,
     action == AIGateway::Action::"call_tool",
     resource
 )
 when {
-    context has jailbreak_confidence && context.jailbreak_confidence >= 75
+    context has jailbreak_score && context.jailbreak_score >= 75
 };
-// Block any content with critical severity threats
-@id("semantic-block-critical")
+// ---------------------------------------------------------------------------
+// Section 3: Severity aggregation
+// ---------------------------------------------------------------------------
+@id("semantic.block-critical")
 @name("Block critical threats")
-@description("Block all MCP operations when any detection engine reports critical severity")
+@description("Blocks all MCP operations when highest_severity equals \"critical\".")
 @severity("critical")
-@tags("critical,baseline,security,catch-all")
-@reject_message("MCP operation was blocked because security scanners detected a critical-severity threat.")
+@tags("category:semantic,detection:aggregate,posture:catch-all")
+@reject_message("MCP operation blocked: a critical-severity threat was detected.")
 forbid (
     principal,
     action,
@@ -88,13 +112,12 @@ when {
     context has highest_severity && context.highest_severity == "critical"
 };
-// Block tool calls with multiple concurrent threats
-@id("semantic-block-multi-threat-tools")
+@id("semantic.block-multi-threat")
 @name("Block multi-threat tool calls")
-@description("Block tool execution when 3+ distinct threats are detected simultaneously")
+@description("Blocks call_tool when threat_count >= 3.")
 @severity("high")
-@tags("multi-threat,tools,security,defense-in-depth")
-@reject_message("Tool execution was blocked because multiple security threats were detected simultaneously.")
+@tags("category:semantic,detection:aggregate,surface:call-tool,posture:catch-all")
+@reject_message("Tool execution blocked: multiple concurrent security threats were detected.")
 forbid (
     principal,
     action == AIGateway::Action::"call_tool",