npm - @highflame/policy - Versions diffs - 2.1.36 → 2.1.37 - Mend

@highflame/policy 2.1.36 → 2.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/_schemas/guardrails/templates/defaults/toxicity.cedar CHANGED Viewed

@@ -1,76 +1,86 @@
 // =============================================================================
-// Toxicity & Content Moderation Policy
+// Toxicity & Content Moderation (Default)
 // =============================================================================
-// Blocks toxic, harmful, or inappropriate content.
-// Covers violence, hate speech, sexual content, weapons, crime, and profanity.
+// Blocks toxic, harmful, or inappropriate content across the six standard
+// toxicity dimensions. Three rules: a strict critical-threshold catch-all,
+// a hate-speech-specific lower threshold, and a "any two moderate signals"
+// combined-toxicity rule.
 //
-// Context keys used (normalized by projection layer):
-// - violence_score: Long (0-100) - Violence reference score
-// - hate_speech_score: Long (0-100) - Hate speech score
-// - sexual_score: Long (0-100) - Sexual content score
-// - weapons_score: Long (0-100) - Weapons reference score
-// - crime_score: Long (0-100) - Criminal activity score
-// - profanity_score: Long (0-100) - Profanity score
+// Context keys consumed:
+//   - violence_score:     Long (0-100)
+//   - hate_speech_score:  Long (0-100)
+//   - sexual_score:       Long (0-100)
+//   - weapons_score:      Long (0-100)
+//   - crime_score:        Long (0-100)
+//   - profanity_score:    Long (0-100)
 //
-// Category: trust-safety
+// Compliance:
+//   - EU AI Act, ISO 42001, NIST SI-4
+//
+// Category:  trust-safety
 // Namespace: Guardrails
 // =============================================================================
-@id("toxicity-block-critical")
+@id("trust-safety.block-toxicity-critical")
 @name("Block critical toxicity")
-@description("Forbids content with any toxicity score above 90")
+@description("Blocks process_prompt when any toxicity score is >= 91.")
 @severity("critical")
-@tags("toxicity,trust-safety")
+@tags("category:trust-safety,threat:harmful,detection:ml,compliance:eu-ai-act,compliance:iso-42001")
+@reject_message("Prompt blocked: critical-severity toxic content detected.")
 forbid (
     principal,
     action == Guardrails::Action::"process_prompt",
     resource
-) when {
-    (context has violence_score && context.violence_score > 90) ||
-    (context has hate_speech_score && context.hate_speech_score > 90) ||
-    (context has sexual_score && context.sexual_score > 90) ||
-    (context has weapons_score && context.weapons_score > 90) ||
-    (context has crime_score && context.crime_score > 90) ||
-    (context has profanity_score && context.profanity_score > 90)
+)
+when {
+    (context has violence_score    && context.violence_score    >= 91) ||
+    (context has hate_speech_score && context.hate_speech_score >= 91) ||
+    (context has sexual_score      && context.sexual_score      >= 91) ||
+    (context has weapons_score     && context.weapons_score     >= 91) ||
+    (context has crime_score       && context.crime_score       >= 91) ||
+    (context has profanity_score   && context.profanity_score   >= 91)
 };
-@id("toxicity-block-hate-speech")
+@id("trust-safety.block-hate-speech")
 @name("Block hate speech")
-@description("Forbids content with hate speech above 75")
+@description("Blocks process_prompt when hate_speech_score >= 76.")
 @severity("high")
-@tags("hate-speech,trust-safety")
+@tags("category:trust-safety,threat:hate-speech,detection:ml,compliance:eu-ai-act")
+@reject_message("Prompt blocked: hate speech detected.")
 forbid (
     principal,
     action == Guardrails::Action::"process_prompt",
     resource
-) when {
-    context has hate_speech_score && context.hate_speech_score > 75
+)
+when {
+    context has hate_speech_score && context.hate_speech_score >= 76
 };
-@id("toxicity-block-combined")
-@name("Block combined moderate toxicity")
-@description("Forbids content with two or more moderate toxicity scores above 60")
+@id("trust-safety.block-toxicity-combined")
+@name("Block combined toxicity")
+@description("Blocks process_prompt when any two toxicity scores are >= 61.")
 @severity("high")
-@tags("toxicity,trust-safety")
+@tags("category:trust-safety,threat:harmful,detection:aggregate")
+@reject_message("Prompt blocked: multiple moderate toxicity signals were detected together.")
 forbid (
     principal,
     action == Guardrails::Action::"process_prompt",
     resource
-) when {
-    // Any 2 of 6 toxicity scores above 60 triggers a block
-    (context has violence_score && context has hate_speech_score && context.violence_score > 60 && context.hate_speech_score > 60) ||
-    (context has violence_score && context has sexual_score && context.violence_score > 60 && context.sexual_score > 60) ||
-    (context has violence_score && context has weapons_score && context.violence_score > 60 && context.weapons_score > 60) ||
-    (context has violence_score && context has crime_score && context.violence_score > 60 && context.crime_score > 60) ||
-    (context has violence_score && context has profanity_score && context.violence_score > 60 && context.profanity_score > 60) ||
-    (context has hate_speech_score && context has sexual_score && context.hate_speech_score > 60 && context.sexual_score > 60) ||
-    (context has hate_speech_score && context has weapons_score && context.hate_speech_score > 60 && context.weapons_score > 60) ||
-    (context has hate_speech_score && context has crime_score && context.hate_speech_score > 60 && context.crime_score > 60) ||
-    (context has hate_speech_score && context has profanity_score && context.hate_speech_score > 60 && context.profanity_score > 60) ||
-    (context has sexual_score && context has weapons_score && context.sexual_score > 60 && context.weapons_score > 60) ||
-    (context has sexual_score && context has crime_score && context.sexual_score > 60 && context.crime_score > 60) ||
-    (context has sexual_score && context has profanity_score && context.sexual_score > 60 && context.profanity_score > 60) ||
-    (context has weapons_score && context has crime_score && context.weapons_score > 60 && context.crime_score > 60) ||
-    (context has weapons_score && context has profanity_score && context.weapons_score > 60 && context.profanity_score > 60) ||
-    (context has crime_score && context has profanity_score && context.crime_score > 60 && context.profanity_score > 60)
+)
+when {
+    (context has violence_score    && context has hate_speech_score && context.violence_score    >= 61 && context.hate_speech_score >= 61) ||
+    (context has violence_score    && context has sexual_score      && context.violence_score    >= 61 && context.sexual_score      >= 61) ||
+    (context has violence_score    && context has weapons_score     && context.violence_score    >= 61 && context.weapons_score     >= 61) ||
+    (context has violence_score    && context has crime_score       && context.violence_score    >= 61 && context.crime_score       >= 61) ||
+    (context has violence_score    && context has profanity_score   && context.violence_score    >= 61 && context.profanity_score   >= 61) ||
+    (context has hate_speech_score && context has sexual_score      && context.hate_speech_score >= 61 && context.sexual_score      >= 61) ||
+    (context has hate_speech_score && context has weapons_score     && context.hate_speech_score >= 61 && context.weapons_score     >= 61) ||
+    (context has hate_speech_score && context has crime_score       && context.hate_speech_score >= 61 && context.crime_score       >= 61) ||
+    (context has hate_speech_score && context has profanity_score   && context.hate_speech_score >= 61 && context.profanity_score   >= 61) ||
+    (context has sexual_score      && context has weapons_score     && context.sexual_score      >= 61 && context.weapons_score     >= 61) ||
+    (context has sexual_score      && context has crime_score       && context.sexual_score      >= 61 && context.crime_score       >= 61) ||
+    (context has sexual_score      && context has profanity_score   && context.sexual_score      >= 61 && context.profanity_score   >= 61) ||
+    (context has weapons_score     && context has crime_score       && context.weapons_score     >= 61 && context.crime_score       >= 61) ||
+    (context has weapons_score     && context has profanity_score   && context.weapons_score     >= 61 && context.profanity_score   >= 61) ||
+    (context has crime_score       && context has profanity_score   && context.crime_score       >= 61 && context.profanity_score   >= 61)
 };

package/_schemas/guardrails/templates/mcp_tool_permissions.cedar CHANGED Viewed

@@ -1,86 +1,103 @@
 // =============================================================================
-// MCP Tool Permissions Template
+// MCP Tool Permissions
 // =============================================================================
-// Per-tool access control for MCP servers. Allows fine-grained permissioning:
-// allow specific tools from a server while denying others.
+// Per-tool access control for MCP servers — fine-grained permissioning that
+// allows specific tools from a server while denying others.
 //
-// Example: Allow read_issues from GitHub but deny create_issues.
+// Bundled rules cover:
+//   - GitHub MCP example: permit read tools, block write tools
+//   - Org-wide exclusion list (untrusted / deprecated servers)
+//   - Unverified MCP server block
 //
-// Context keys used:
-// - mcp_server: String - MCP server name
-// - mcp_tool: String - Tool name within the server
-// - tool_name: String - Normalized tool name
-// - mcp_server_verified: Bool - Whether server is verified
+// Context keys consumed:
+//   - mcp_server:           String
+//   - tool_name:            String
+//   - mcp_server_verified:  Bool
 //
-// Category: agentic_security
+// Category:  tools
 // Namespace: Guardrails
 // =============================================================================
-// -- GitHub MCP: Read-only access -------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 1: Example — GitHub MCP read/write split
+// ---------------------------------------------------------------------------
-@id("mcp-github-allow-read")
-@name("Allow read-only GitHub tools")
-@description("Permit read operations from GitHub MCP server (issues, PRs, code search)")
+@id("tools.allow-github-mcp-read")
+@name("Allow GitHub MCP read tools")
+@description("Permits call_tool when mcp_server is \"github\" and tool_name is a read-only operation.")
 @severity("medium")
-@tags("mcp,github,read-only,least-privilege")
+@tags("category:tools,scope:per-tool,surface:call-tool,posture:deny-default")
 permit (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has mcp_server && context.mcp_server == "github" &&
     context has tool_name &&
-    (context.tool_name == "read_issues" ||
-     context.tool_name == "get_issue" ||
-     context.tool_name == "list_repos" ||
-     context.tool_name == "get_pull_request" ||
-     context.tool_name == "search_code" ||
-     context.tool_name == "get_file_contents")
+    (
+        context.tool_name == "read_issues" ||
+        context.tool_name == "get_issue" ||
+        context.tool_name == "list_repos" ||
+        context.tool_name == "get_pull_request" ||
+        context.tool_name == "search_code" ||
+        context.tool_name == "get_file_contents"
+    )
 };
-@id("mcp-github-deny-write")
-@name("Deny write GitHub tools")
-@description("Block create/update/delete operations on GitHub MCP server")
+@id("tools.block-github-mcp-write")
+@name("Block GitHub MCP write tools")
+@description("Blocks call_tool when mcp_server is \"github\"; combine with the read-allow permit above.")
 @severity("high")
-@tags("mcp,github,write-block,least-privilege")
+@tags("category:tools,scope:per-tool,surface:call-tool,posture:deny-default")
+@reject_message("Tool execution blocked: write operations against the GitHub MCP server are not permitted.")
 forbid (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has mcp_server && context.mcp_server == "github"
 };
-// -- Organization-wide MCP server exclusions --------------------------------
+// ---------------------------------------------------------------------------
+// Section 2: Org-wide exclusion list
+// ---------------------------------------------------------------------------
-@id("mcp-exclude-server")
-@name("Exclude specific MCP servers")
-@description("Block all tool calls from excluded MCP servers (org-wide exclusion list)")
+@id("tools.exclude-mcp-servers")
+@name("Block excluded MCP servers")
+@description("Blocks call_tool when mcp_server is in the org-wide exclusion list.")
 @severity("critical")
-@tags("mcp,exclusion,org-wide,block")
+@tags("category:tools,scope:org-wide,surface:call-tool,posture:deny-default")
+@reject_message("Tool execution blocked: MCP server is on the org-wide exclusion list.")
 forbid (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
-    // Add server names to block across the organization.
-    // Modify this list to match your exclusion requirements.
+)
+when {
     context has mcp_server &&
-    (context.mcp_server == "untrusted-server" ||
-     context.mcp_server == "deprecated-server")
+    (
+        context.mcp_server == "untrusted-server" ||
+        context.mcp_server == "deprecated-server"
+    )
 };
-// -- Block unverified MCP servers -------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 3: Unverified server block
+// ---------------------------------------------------------------------------
-@id("mcp-block-unverified")
-@name("Block unverified MCP servers")
-@description("Deny all tool calls from MCP servers not in the verified registry")
+@id("tools.block-unverified-mcp-tools")
+@name("Block unverified MCP server tools")
+@description("Blocks call_tool when mcp_server_verified is false.")
 @severity("high")
-@tags("mcp,trust,verification")
+@tags("category:tools,threat:supply-chain,detection:rule,surface:call-tool")
+@reject_message("Tool execution blocked: MCP server is not from a verified registry.")
 forbid (
     principal,
     action == Guardrails::Action::"call_tool",
     resource
-) when {
+)
+when {
     context has mcp_server_verified && context.mcp_server_verified == false
 };

package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar CHANGED Viewed

@@ -1,43 +1,33 @@
 // =============================================================================
 // A2A Security — Cross-Origin Trust Boundary Enforcement
 // =============================================================================
-// Detects and blocks confused deputy attacks where an agent from one trust
-// domain attempts to operate in another. Cross-origin violations occur when:
-//   - An agent proxies requests across security domains
-//   - Mixed-security tool chains span trust boundaries
-//   - URL injection redirects agent communication to untrusted origins
+// Detects and blocks confused-deputy attacks where an agent from one trust
+// domain attempts to operate in another. Primary signal in A2A
+// communication where independent agents cross separate trust domains.
 //
-// Key A2A distinction from MAS: In multi-agent systems (shared orchestrator),
-// cross-origin is unlikely because all agents share a trust context. In A2A
-// (independent agents, separate trust domains), cross-origin is the PRIMARY
-// signal that trust boundaries are being violated.
+// Shield cross-origin detector scores (discrete):
+//   90 mixed localhost + external | 85 URL injection | 80 proxy/redirect
+//   75 multi-origin tool configs  | 70 mixed HTTP/HTTPS | 65 JSON multi-URL
+//   60 generic multi-domain patterns
 //
-// Shield cross-origin detector outputs discrete scores:
-//   90 — mixed localhost + external domain
-//   85 — URL injection in parameters
-//   80 — proxy/redirect patterns
-//   75 — multi-origin tool configs / JSON origin fields
-//   70 — mixed HTTP/HTTPS or ws/wss schemes
-//   65 — JSON arrays with multiple URLs
-//   60 — generic multi-domain patterns
+// Context keys consumed:
+//   - cross_origin_detected, cross_origin_score
+//   - agent_id, agent_trust_level
+//   - tool_is_sensitive
 //
 // Compliance:
-//   OWASP LLM08 (Excessive Agency)
-//   OWASP ASI03 (Excessive Permissions)
-//   MITRE ATLAS AML.T0051.002 (Indirect Prompt Injection via delegation)
-//   NIST 800-53 AC-4 (Information Flow Enforcement)
+//   - OWASP LLM08, OWASP ASI03; NIST 800-53 AC-4
 //
-// Category: agent_identity
+// Category:  agent-identity
 // Namespace: Guardrails
 // =============================================================================
-// Block high-confidence cross-origin for any agent
-@id("a2a-cross-origin-block-critical")
+@id("agent-identity.a2a-block-cross-origin-critical")
 @name("Block critical cross-origin from any agent")
-@description("Block all agent requests when cross-origin trust boundary violation score exceeds 80. High-confidence cross-origin signals (mixed localhost/external, URL injection, proxy redirects) indicate confused deputy attacks regardless of agent trust level.")
+@description("Blocks process_prompt and call_tool when cross_origin_detected is true and cross_origin_score >= 80.")
 @severity("critical")
-@tags("profile,a2a-security,cross-origin,confused-deputy,trust-boundary,owasp-llm08")
-@reject_message("Request blocked: high-confidence cross-origin trust boundary violation detected (score >= 80). An external agent or service is attempting to operate across trust domains. Review the origin chain before retrying.")
+@tags("category:agent-identity,threat:supply-chain,detection:ml,owasp:llm08")
+@reject_message("Request blocked: high-confidence cross-origin trust-boundary violation detected — confused-deputy attack.")
 forbid (
     principal is Guardrails::Agent,
     action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
@@ -49,13 +39,12 @@ when {
     context has cross_origin_score && context.cross_origin_score >= 80
 };
-// Block cross-origin from unverified agents at any signal level
-@id("a2a-cross-origin-block-unverified")
+@id("agent-identity.a2a-block-cross-origin-unverified")
 @name("Block cross-origin from unverified agents")
-@description("Unverified agents are blocked from any cross-origin activity at the lowest meaningful detection threshold (score >= 60). Cross-origin from an unverified source is a strong confused deputy indicator — the agent has no attestation AND is crossing trust boundaries.")
+@description("Blocks process_prompt and call_tool when agent_trust_level is \"unverified\", cross_origin_detected is true, and cross_origin_score >= 60.")
 @severity("high")
-@tags("profile,a2a-security,cross-origin,unverified,trust-boundary,owasp-asi03")
-@reject_message("Request blocked: cross-origin activity detected from an unverified agent. Unverified agents cannot operate across trust boundaries. Register the agent or use a verified agent.")
+@tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:ml,owasp:asi03")
+@reject_message("Request blocked: unverified agent attempted cross-origin activity — no trust attestation across trust domains.")
 forbid (
     principal is Guardrails::Agent,
     action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
@@ -67,13 +56,12 @@ when {
     context has cross_origin_score && context.cross_origin_score >= 60
 };
-// Block cross-origin MCP server connections from non-first-party agents
-@id("a2a-cross-origin-block-server-connect")
+@id("agent-identity.a2a-block-cross-origin-server-connect")
 @name("Block cross-origin MCP server connections")
-@description("Non-first-party agents cannot connect to MCP servers when cross-origin signals are present (score >= 65). Server-level cross-origin has wide blast radius — a single compromised connection exposes all tools on that server.")
+@description("Blocks connect_server when agent_trust_level is not \"first_party\", cross_origin_detected is true, and cross_origin_score >= 65.")
 @severity("critical")
-@tags("profile,a2a-security,cross-origin,mcp,server,trust-boundary,nist-ac-4")
-@reject_message("MCP server connection blocked: cross-origin trust violation detected. Non-first-party agents cannot connect to MCP servers when cross-origin signals are present.")
+@tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:ml,surface:connect-server")
+@reject_message("MCP server connection blocked: non-first-party agent crossed trust boundaries during connection — supply-chain risk.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"connect_server",
@@ -85,13 +73,12 @@ when {
     context has cross_origin_score && context.cross_origin_score >= 65
 };
-// Block cross-origin tool calls on sensitive tools from any agent
-@id("a2a-cross-origin-block-sensitive-tools")
+@id("agent-identity.a2a-block-cross-origin-sensitive-tools")
 @name("Block cross-origin on sensitive tools")
-@description("Any agent attempting to call sensitive tools while cross-origin signals are present (score >= 60) is blocked. Sensitive tools (write_file, http_post, send_email) amplify the impact of confused deputy attacks — an agent crossing trust boundaries should not have access to high-impact operations.")
+@description("Blocks call_tool when cross_origin_detected is true, cross_origin_score >= 60, and tool_is_sensitive is true.")
 @severity("high")
-@tags("profile,a2a-security,cross-origin,sensitive-tools,confused-deputy,owasp-llm08")
-@reject_message("Sensitive tool execution blocked: cross-origin trust violation detected. Tool calls to sensitive tools are blocked when cross-origin signals are present from agent requests.")
+@tags("category:agent-identity,threat:supply-chain,detection:ml,surface:call-tool,owasp:llm08")
+@reject_message("Sensitive tool execution blocked: cross-origin signal present — sensitive tools amplify confused-deputy impact.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",

package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar CHANGED Viewed

@@ -1,47 +1,36 @@
 // =============================================================================
 // A2A Security — Escalation Detection & Circuit Breakers
 // =============================================================================
-// Detects progressive capability escalation across agent turns and applies
-// session-level circuit breakers tuned for adversarial A2A communication:
+// Detects progressive capability escalation across agent turns. Lower
+// circuit-breaker thresholds than the multi-agent profile because A2A
+// communication spans separate trust domains with no shared orchestrator
+// to reset session state.
 //
-// 1. SESSION PEAK SCORES: When the maximum injection or jailbreak score
-//    across all prior turns exceeds 70, non-first-party agents are restricted.
-//    Unlike MAS boolean flags, A2A uses numeric peaks for graduated response.
-//
-// 2. CUMULATIVE RISK CIRCUIT BREAKER: When accumulated risk across turns
-//    exceeds 150, sensitive tools are blocked for non-first-party agents.
-//    Lower threshold than MAS (200) because cross-system communication has
-//    higher adversarial surface area.
-//
-// 3. THREAT TURN ESCALATION: After 3+ turns with detected threats, unverified
-//    agents are fully locked out. This catches turn-by-turn probing attacks
-//    where an attacker incrementally tests boundaries.
-//
-// Key A2A distinction: In MAS, the orchestrator can reset or contain sessions.
-// In A2A, independent agents have no shared circuit breaker, so policy must
-// enforce escalation detection at the evaluation layer.
+// Context keys consumed:
+//   - agent_trust_level
+//   - session_max_injection_score, session_max_jailbreak_score: Long (0-100)
+//   - session_cumulative_risk_score: Long
+//   - session_threat_turns:          Long
+//   - tool_is_sensitive:             Bool
 //
 // Compliance:
-//   OWASP LLM01 (Prompt Injection) — multi-turn variant
-//   MITRE ATLAS AML.T0051 (LLM Prompt Injection)
-//   NIST 800-53 SI-4 (System Monitoring)
-//   NIST 800-53 IR-4 (Incident Handling)
+//   - OWASP LLM01 (multi-turn); MITRE ATLAS AML.T0051
+//   - NIST 800-53 SI-4, IR-4
 //
-// Category: agent_identity
+// Category:  agent-identity
 // Namespace: Guardrails
 // =============================================================================
-// -----------------------------------------------------------------------------
-// Session Peak Score Monitoring
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 1: Session peak score monitoring
+// ---------------------------------------------------------------------------
-// Block non-first-party agents when session injection peak is high
-@id("a2a-session-injection-peak-block")
-@name("Block agents when session injection peak is high")
-@description("Block non-first-party agents from tool calls and prompt processing when the maximum injection score across all prior turns exceeds 70. Unlike MAS policies that use boolean session_injection_detected, A2A uses the numeric peak score for graduated response — a session with a prior score of 75 is more dangerous than one with 45, even though both set the boolean flag.")
+@id("agent-identity.a2a-block-session-injection-peak")
+@name("Block agents at high session injection peak")
+@description("Blocks call_tool and process_prompt when agent_trust_level is not \"first_party\" and session_max_injection_score >= 70.")
 @severity("critical")
-@tags("profile,a2a-security,escalation,session-peak,injection,owasp-lml01,nist-si-4")
-@reject_message("Request blocked: this session has a prior injection detection peak of 70+. Non-first-party agents are restricted after high injection signals to prevent progressive capability escalation.")
+@tags("category:agent-identity,threat:injection,scope:per-agent,detection:aggregate,owasp:llm01")
+@reject_message("Request blocked: this session previously crossed the injection peak threshold; non-first-party agents are restricted.")
 forbid (
     principal is Guardrails::Agent,
     action in [Guardrails::Action::"call_tool", Guardrails::Action::"process_prompt"],
@@ -52,13 +41,12 @@ when {
     context has session_max_injection_score && context.session_max_injection_score >= 70
 };
-// Block non-first-party agents when session jailbreak peak is high
-@id("a2a-session-jailbreak-peak-block")
-@name("Block agents when session jailbreak peak is high")
-@description("Block non-first-party agents from prompt processing when the maximum jailbreak score across all prior turns exceeds 70. A high jailbreak peak indicates the session's guardrails may have been partially eroded — subsequent agent requests in this session should be treated as potentially manipulated.")
+@id("agent-identity.a2a-block-session-jailbreak-peak")
+@name("Block agents at high session jailbreak peak")
+@description("Blocks process_prompt when agent_trust_level is not \"first_party\" and session_max_jailbreak_score >= 70.")
 @severity("critical")
-@tags("profile,a2a-security,escalation,session-peak,jailbreak,owasp-lml01,nist-si-4")
-@reject_message("Request blocked: this session has a prior jailbreak detection peak of 70+. Non-first-party agents are restricted after high jailbreak signals to prevent progressive manipulation.")
+@tags("category:agent-identity,threat:jailbreak,scope:per-agent,detection:aggregate,owasp:llm02")
+@reject_message("Request blocked: this session previously crossed the jailbreak peak threshold; non-first-party agents are restricted.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"process_prompt",
@@ -69,17 +57,16 @@ when {
     context has session_max_jailbreak_score && context.session_max_jailbreak_score >= 70
 };
-// -----------------------------------------------------------------------------
-// Cumulative Risk Circuit Breaker
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 2: Cumulative risk circuit breaker
+// ---------------------------------------------------------------------------
-// Lock down non-first-party agents at high cumulative risk
-@id("a2a-cumulative-risk-agent-lockdown")
-@name("Lock down non-first-party agents at high cumulative risk")
-@description("Block non-first-party agents from sensitive tools when cumulative session risk exceeds 150. Lower threshold than MAS (200) because A2A sessions with cross-system agents should trip the circuit breaker earlier — independent agents cannot coordinate session recovery the way an orchestrator can.")
+@id("agent-identity.a2a-cumulative-risk-lockdown")
+@name("Block non-first-party agents at cumulative risk")
+@description("Blocks call_tool when agent_trust_level is not \"first_party\", session_cumulative_risk_score >= 151, and tool_is_sensitive is true.")
 @severity("high")
-@tags("profile,a2a-security,escalation,cumulative-risk,circuit-breaker,nist-ir-4")
-@reject_message("Sensitive tool blocked: session cumulative risk exceeds 150. Non-first-party agents are restricted from sensitive tools in elevated-risk sessions to prevent progressive capability gain.")
+@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool,posture:catch-all")
+@reject_message("Tool execution blocked: cumulative session risk exceeded the A2A circuit-breaker threshold for sensitive tools.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
@@ -87,21 +74,20 @@ forbid (
 )
 when {
     context has agent_trust_level && context.agent_trust_level != "first_party" &&
-    context has session_cumulative_risk_score && context.session_cumulative_risk_score > 150 &&
+    context has session_cumulative_risk_score && context.session_cumulative_risk_score >= 151 &&
     context has tool_is_sensitive && context.tool_is_sensitive == true
 };
-// -----------------------------------------------------------------------------
-// Threat Turn Escalation — Probing Detection
-// -----------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Section 3: Threat turn escalation
+// ---------------------------------------------------------------------------
-// Block unverified agents after repeated threat turns
-@id("a2a-threat-turn-escalation-block")
+@id("agent-identity.a2a-threat-turn-escalation")
 @name("Block unverified agents after repeated threat turns")
-@description("Block unverified agents from all tool calls after 3+ threat turns are detected in the session. Lower threshold than MAS (5) because repeated threats from an unverified agent's session indicate adversarial probing — the attacker is incrementally testing boundaries. Three threat turns is sufficient evidence of active reconnaissance.")
+@description("Blocks call_tool when agent_trust_level is \"unverified\" and session_threat_turns >= 3.")
 @severity("critical")
-@tags("profile,a2a-security,escalation,threat-turns,probing,unverified,nist-ir-4")
-@reject_message("Tool execution blocked: 3+ threat turns detected in this session. Unverified agents are locked out after repeated threat signals to prevent adversarial escalation via turn-by-turn probing.")
+@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool,posture:catch-all")
+@reject_message("Tool execution blocked: 3+ threat turns in this session — unverified agents are locked out to stop adversarial probing.")
 forbid (
     principal is Guardrails::Agent,
     action == Guardrails::Action::"call_tool",
@@ -109,5 +95,5 @@ forbid (
 )
 when {
     context has agent_trust_level && context.agent_trust_level == "unverified" &&
-    context has session_threat_turns && context.session_threat_turns > 2
+    context has session_threat_turns && context.session_threat_turns >= 3
 };