npm - @highflame/policy - Versions diffs - 2.1.2 → 2.1.3 - Mend

@highflame/policy 2.1.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/overwatch-defaults.gen.js +358 -370
package/package.json +1 -1

package/dist/overwatch-defaults.gen.js CHANGED Viewed

@@ -32,6 +32,307 @@ permit (
     resource
 );
 `;
+const OVERWATCH_SEMANTIC_DEFAULT_CEDAR = `// =============================================================================
+// Semantic Threat Detection Policy (Default)
+// =============================================================================
+// Detects and blocks prompt injection, jailbreak attempts, and high-severity
+// AI security threats. Uses multi-layered detection:
+//
+//   1. Detection engine rule triggers (detected_threats) — pattern-based
+//   2. ML classifier confidence scores (injection_confidence, jailbreak_confidence)
+//   3. Threat severity aggregation (max_threat_severity, highest_severity)
+//   4. Cross-action enforcement (prompts + tool calls + file operations)
+//
+// Compliance:
+//   OWASP LLM01 (Prompt Injection) — direct + indirect
+//   OWASP LLM02 (Insecure Output Handling) — response manipulation
+//   OWASP ASI01 (Agent Goal Hijack) — behavioral manipulation
+//   MITRE ATLAS AML.T0051 (LLM Prompt Injection)
+//   MITRE ATLAS AML.T0054 (LLM Jailbreak)
+//   NIST 800-53 SI-3 (Malicious Code Protection)
+//   NIST 800-53 SI-4 (Information System Monitoring)
+//
+// Category: semantic
+// Namespace: Overwatch
+// =============================================================================
+// ---------------------------------------------------------------------------
+// Section 1: Prompt Injection Detection
+// Blocks direct prompt injection — adversarial input designed to override
+// system instructions and hijack agent behavior.
+// Ref: OWASP LLM01, MITRE AML.T0051, 62% of LLM apps vulnerable (2024)
+// ---------------------------------------------------------------------------
+// Block content with prompt injection patterns detected by rules
+@id("semantic-block-injection")
+@name("Block prompt injection")
+@description("Block prompts and tool calls when detection engine rules identify prompt injection patterns. Catches instruction override, role assumption, delimiter injection, and other manipulation techniques in both user input and tool arguments (OWASP LLM01).")
+@severity("critical")
+@tags("injection,security,owasp-llm01,mitre-aml-t0051,baseline")
+@reject_message("Content was blocked because prompt injection patterns were detected. This is a security measure to prevent manipulation of AI agent behavior (OWASP LLM01).")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has detected_threats && context.detected_threats.contains("prompt_injection")
+};
+// Block content with high ML injection confidence
+@id("semantic-block-injection-score")
+@name("Block high-confidence injection")
+@description("Block content when the ML injection classifier confidence exceeds threshold (75/100). Catches novel injection techniques that evade pattern-based detection — polymorphic payloads, encoding tricks, and obfuscated instructions.")
+@severity("critical")
+@tags("injection,ml-classifier,security,owasp-llm01,mitre-aml-t0051")
+@reject_message("Your content was blocked because the ML classifier detected prompt injection with high confidence. This appears to be an attempt to manipulate agent behavior.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has injection_confidence && context.injection_confidence >= 75
+};
+// ---------------------------------------------------------------------------
+// Section 2: Jailbreak Detection
+// Blocks jailbreak attempts — adversarial input designed to bypass AI safety
+// guardrails and elicit restricted outputs.
+// Ref: OWASP LLM02, MITRE AML.T0054, DAN/JailbreakChat/etc.
+// ---------------------------------------------------------------------------
+// Block prompts with jailbreak attempts detected by rules
+@id("semantic-block-jailbreak")
+@name("Block jailbreak attempts")
+@description("Block prompts when detection engine rules identify jailbreak patterns: DAN-style prompts, role-play exploits, safety bypass instructions, and constraint removal attempts (OWASP LLM02).")
+@severity("critical")
+@tags("jailbreak,bypass,security,owasp-llm02,mitre-aml-t0054,baseline")
+@reject_message("Your prompt was blocked because jailbreak patterns were detected. This is a security measure to prevent circumvention of AI safety controls (OWASP LLM02).")
+forbid (
+    principal,
+    action == Overwatch::Action::"process_prompt",
+    resource
+)
+when {
+    context has detected_threats && context.detected_threats.contains("jailbreak")
+};
+// Block prompts with high ML jailbreak confidence
+@id("semantic-block-jailbreak-score")
+@name("Block high-confidence jailbreak")
+@description("Block content when the ML jailbreak classifier confidence exceeds threshold (75/100). Catches sophisticated jailbreak techniques including multi-turn manipulation, encoded payloads, and novel prompt structures.")
+@severity("critical")
+@tags("jailbreak,ml-classifier,security,owasp-llm02,mitre-aml-t0054")
+@reject_message("Your content was blocked because the ML classifier detected a jailbreak attempt with high confidence. This appears to be an attempt to bypass safety guardrails.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has jailbreak_confidence && context.jailbreak_confidence >= 75
+};
+// ---------------------------------------------------------------------------
+// Section 3: Threat Severity Aggregation
+// Blocks based on aggregated threat severity from all detection engines.
+// These act as catch-all rules for threats that don't match specific patterns.
+// ---------------------------------------------------------------------------
+// Block any content with critical severity threats
+@id("semantic-block-critical")
+@name("Block critical threats")
+@description("Block all content when any detection engine reports critical severity. This is the ultimate catch-all — critical threats are blocked regardless of type or source.")
+@severity("critical")
+@tags("critical,baseline,security,catch-all")
+@reject_message("Your content was blocked because security scanners detected a critical-severity threat. This content cannot be processed.")
+forbid (
+    principal,
+    action,
+    resource
+)
+when {
+    context has highest_severity && context.highest_severity == "critical"
+};
+// Block prompts with high severity semantic threats
+@id("semantic-block-high-severity")
+@name("Block high severity threats")
+@description("Block prompts when threat detection reports high severity (>= 3) in semantic categories. Catches threats that are individually below critical but collectively indicate adversarial intent.")
+@severity("high")
+@tags("semantic,severity,security,defense-in-depth")
+@reject_message("Your prompt was blocked because security scanners detected high severity issues in the content. Review your prompt for manipulative or adversarial patterns.")
+forbid (
+    principal,
+    action == Overwatch::Action::"process_prompt",
+    resource
+)
+when {
+    context has threat_categories && context has max_threat_severity &&
+    context.threat_categories.contains("semantic") &&
+    context.max_threat_severity >= 3
+};
+// Block tool calls with multiple concurrent threats
+@id("semantic-block-multi-threat-tools")
+@name("Block multi-threat tool calls")
+@description("Block tool execution when multiple distinct threats are detected simultaneously (3+). Multiple concurrent threats in a tool call strongly indicate an adversarial attack chain.")
+@severity("high")
+@tags("multi-threat,tools,security,defense-in-depth")
+@reject_message("Tool execution was blocked because multiple security threats were detected simultaneously. This pattern indicates a potential attack chain.")
+forbid (
+    principal,
+    action == Overwatch::Action::"call_tool",
+    resource
+)
+when {
+    context has threat_count && context.threat_count >= 3
+};
+`;
+const OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR = `// =============================================================================
+// Content Safety Policy (Default)
+// =============================================================================
+// Detects and blocks violent, harmful, hateful, sexual, and profane content
+// using trust/safety ML classification scores.
+//
+// The detection engine runs multiple ML classifiers (toxicity, content safety)
+// and produces normalized scores (0-100) for each category. These policies
+// enforce enterprise-appropriate content thresholds.
+//
+// Compliance:
+//   NIST 800-53 SI-4 (Information System Monitoring)
+//   EU AI Act Art. 52 (Transparency for AI Systems)
+//   OWASP LLM02 (Insecure Output Handling) — unsafe content generation
+//   ISO 42001 (AI Management System) — responsible AI use
+//
+// Category: trust_safety
+// Namespace: Overwatch
+// =============================================================================
+// ---------------------------------------------------------------------------
+// Section 1: Violence & Weapons
+// Blocks content promoting, describing, or instructing violence and weapons.
+// ---------------------------------------------------------------------------
+// Block content with high violence score
+@id("ts-block-violence")
+@name("Block violent content")
+@description("Block content when the ML violence detection score exceeds threshold (80/100). Catches graphic violence descriptions, instructions for causing harm, and violent threat language.")
+@severity("critical")
+@tags("violence,content-safety,trust-safety,nist-si-4,iso-42001")
+@reject_message("Content blocked: violent content detected. AI agents in enterprise environments must not process or generate violent content. Please rephrase without violence-related language.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has violence_score && context.violence_score >= 80
+};
+// Block content with weapons references
+@id("ts-block-weapons")
+@name("Block weapons content")
+@description("Block content when the ML weapons detection score exceeds threshold (80/100). Catches weapon manufacturing instructions, procurement guidance, and detailed weapon specifications.")
+@severity("critical")
+@tags("weapons,content-safety,trust-safety,nist-si-4")
+@reject_message("Content blocked: weapons-related content detected. AI agents must not process weapons manufacturing, procurement, or detailed specification content.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has weapons_score && context.weapons_score >= 80
+};
+// ---------------------------------------------------------------------------
+// Section 2: Hate Speech & Discrimination
+// Blocks hateful, discriminatory, and dehumanizing content.
+// Lower threshold (75) — enterprises have zero tolerance for hate speech.
+// ---------------------------------------------------------------------------
+// Block hate speech content
+@id("ts-block-hate-speech")
+@name("Block hate speech")
+@description("Block content when the ML hate speech detection score exceeds threshold (75/100). Lower threshold than other categories because enterprises have zero tolerance for discriminatory content. Catches slurs, dehumanizing language, and targeted harassment.")
+@severity("critical")
+@tags("hate-speech,content-safety,trust-safety,nist-si-4,eu-ai-act")
+@reject_message("Content blocked: hate speech or discriminatory content detected. AI agents in enterprise environments must not process or generate hateful, discriminatory, or dehumanizing content.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has hate_speech_score && context.hate_speech_score >= 75
+};
+// ---------------------------------------------------------------------------
+// Section 3: Criminal Content
+// Blocks content related to criminal activities and illegal operations.
+// ---------------------------------------------------------------------------
+// Block criminal content
+@id("ts-block-crime")
+@name("Block criminal content")
+@description("Block content when the ML criminal activity detection score exceeds threshold (80/100). Catches illegal activity instructions, fraud techniques, and content promoting criminal behavior.")
+@severity("high")
+@tags("crime,content-safety,trust-safety,nist-si-4")
+@reject_message("Content blocked: criminal activity content detected. AI agents must not process content related to illegal activities, fraud, or other criminal behavior.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has crime_score && context.crime_score >= 80
+};
+// ---------------------------------------------------------------------------
+// Section 4: Sexual Content
+// Blocks sexually explicit or inappropriate content.
+// ---------------------------------------------------------------------------
+// Block sexual content
+@id("ts-block-sexual")
+@name("Block sexual content")
+@description("Block content when the ML sexual content detection score exceeds threshold (80/100). Ensures enterprise AI agents do not process or generate sexually explicit material.")
+@severity("high")
+@tags("sexual,content-safety,trust-safety,eu-ai-act,iso-42001")
+@reject_message("Content blocked: sexual content detected. AI agents in enterprise environments must not process sexually explicit material.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has sexual_score && context.sexual_score >= 80
+};
+// ---------------------------------------------------------------------------
+// Section 5: Profanity
+// Blocks excessive profanity in enterprise environments.
+// Higher threshold (90) — allows normal expression while blocking abuse.
+// ---------------------------------------------------------------------------
+// Block excessive profanity
+@id("ts-block-profanity")
+@name("Block profanity")
+@description("Block content when the ML profanity detection score exceeds threshold (90/100). Higher threshold than other safety categories — allows normal expression while blocking abusive or harassing language patterns.")
+@severity("medium")
+@tags("profanity,content-safety,trust-safety")
+@reject_message("Content blocked: excessive profanity detected. Please rephrase in a professional manner appropriate for enterprise AI interactions.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has profanity_score && context.profanity_score >= 90
+};
+`;
 const OVERWATCH_SECRETS_DEFAULT_CEDAR = `// =============================================================================
 // Secrets Detection Policy (Default)
 // =============================================================================
@@ -439,172 +740,14 @@ when {
 @description("Block file reads and writes when PII is detected. Prevents agents from reading files containing personal data and from writing PII to new files where it could persist or be version-controlled.")
 @severity("high")
 @tags("pii,file-ops,data-protection,gdpr-art-32,nist-si-4")
-@reject_message("File operation blocked: personally identifiable information was detected. Files containing PII must not be read or written through AI agents.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
-    resource
-)
-when {
-    context has pii_detected && context.pii_detected
-};
-`;
-const OVERWATCH_SEMANTIC_DEFAULT_CEDAR = `// =============================================================================
-// Semantic Threat Detection Policy (Default)
-// =============================================================================
-// Detects and blocks prompt injection, jailbreak attempts, and high-severity
-// AI security threats. Uses multi-layered detection:
-//
-//   1. Detection engine rule triggers (detected_threats) — pattern-based
-//   2. ML classifier confidence scores (injection_confidence, jailbreak_confidence)
-//   3. Threat severity aggregation (max_threat_severity, highest_severity)
-//   4. Cross-action enforcement (prompts + tool calls + file operations)
-//
-// Compliance:
-//   OWASP LLM01 (Prompt Injection) — direct + indirect
-//   OWASP LLM02 (Insecure Output Handling) — response manipulation
-//   OWASP ASI01 (Agent Goal Hijack) — behavioral manipulation
-//   MITRE ATLAS AML.T0051 (LLM Prompt Injection)
-//   MITRE ATLAS AML.T0054 (LLM Jailbreak)
-//   NIST 800-53 SI-3 (Malicious Code Protection)
-//   NIST 800-53 SI-4 (Information System Monitoring)
-//
-// Category: semantic
-// Namespace: Overwatch
-// =============================================================================
-// ---------------------------------------------------------------------------
-// Section 1: Prompt Injection Detection
-// Blocks direct prompt injection — adversarial input designed to override
-// system instructions and hijack agent behavior.
-// Ref: OWASP LLM01, MITRE AML.T0051, 62% of LLM apps vulnerable (2024)
-// ---------------------------------------------------------------------------
-// Block content with prompt injection patterns detected by rules
-@id("semantic-block-injection")
-@name("Block prompt injection")
-@description("Block prompts and tool calls when detection engine rules identify prompt injection patterns. Catches instruction override, role assumption, delimiter injection, and other manipulation techniques in both user input and tool arguments (OWASP LLM01).")
-@severity("critical")
-@tags("injection,security,owasp-llm01,mitre-aml-t0051,baseline")
-@reject_message("Content was blocked because prompt injection patterns were detected. This is a security measure to prevent manipulation of AI agent behavior (OWASP LLM01).")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has detected_threats && context.detected_threats.contains("prompt_injection")
-};
-// Block content with high ML injection confidence
-@id("semantic-block-injection-score")
-@name("Block high-confidence injection")
-@description("Block content when the ML injection classifier confidence exceeds threshold (75/100). Catches novel injection techniques that evade pattern-based detection — polymorphic payloads, encoding tricks, and obfuscated instructions.")
-@severity("critical")
-@tags("injection,ml-classifier,security,owasp-llm01,mitre-aml-t0051")
-@reject_message("Your content was blocked because the ML classifier detected prompt injection with high confidence. This appears to be an attempt to manipulate agent behavior.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has injection_confidence && context.injection_confidence >= 75
-};
-// ---------------------------------------------------------------------------
-// Section 2: Jailbreak Detection
-// Blocks jailbreak attempts — adversarial input designed to bypass AI safety
-// guardrails and elicit restricted outputs.
-// Ref: OWASP LLM02, MITRE AML.T0054, DAN/JailbreakChat/etc.
-// ---------------------------------------------------------------------------
-// Block prompts with jailbreak attempts detected by rules
-@id("semantic-block-jailbreak")
-@name("Block jailbreak attempts")
-@description("Block prompts when detection engine rules identify jailbreak patterns: DAN-style prompts, role-play exploits, safety bypass instructions, and constraint removal attempts (OWASP LLM02).")
-@severity("critical")
-@tags("jailbreak,bypass,security,owasp-llm02,mitre-aml-t0054,baseline")
-@reject_message("Your prompt was blocked because jailbreak patterns were detected. This is a security measure to prevent circumvention of AI safety controls (OWASP LLM02).")
-forbid (
-    principal,
-    action == Overwatch::Action::"process_prompt",
-    resource
-)
-when {
-    context has detected_threats && context.detected_threats.contains("jailbreak")
-};
-// Block prompts with high ML jailbreak confidence
-@id("semantic-block-jailbreak-score")
-@name("Block high-confidence jailbreak")
-@description("Block content when the ML jailbreak classifier confidence exceeds threshold (75/100). Catches sophisticated jailbreak techniques including multi-turn manipulation, encoded payloads, and novel prompt structures.")
-@severity("critical")
-@tags("jailbreak,ml-classifier,security,owasp-llm02,mitre-aml-t0054")
-@reject_message("Your content was blocked because the ML classifier detected a jailbreak attempt with high confidence. This appears to be an attempt to bypass safety guardrails.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has jailbreak_confidence && context.jailbreak_confidence >= 75
-};
-// ---------------------------------------------------------------------------
-// Section 3: Threat Severity Aggregation
-// Blocks based on aggregated threat severity from all detection engines.
-// These act as catch-all rules for threats that don't match specific patterns.
-// ---------------------------------------------------------------------------
-// Block any content with critical severity threats
-@id("semantic-block-critical")
-@name("Block critical threats")
-@description("Block all content when any detection engine reports critical severity. This is the ultimate catch-all — critical threats are blocked regardless of type or source.")
-@severity("critical")
-@tags("critical,baseline,security,catch-all")
-@reject_message("Your content was blocked because security scanners detected a critical-severity threat. This content cannot be processed.")
-forbid (
-    principal,
-    action,
-    resource
-)
-when {
-    context has highest_severity && context.highest_severity == "critical"
-};
-// Block prompts with high severity semantic threats
-@id("semantic-block-high-severity")
-@name("Block high severity threats")
-@description("Block prompts when threat detection reports high severity (>= 3) in semantic categories. Catches threats that are individually below critical but collectively indicate adversarial intent.")
-@severity("high")
-@tags("semantic,severity,security,defense-in-depth")
-@reject_message("Your prompt was blocked because security scanners detected high severity issues in the content. Review your prompt for manipulative or adversarial patterns.")
-forbid (
-    principal,
-    action == Overwatch::Action::"process_prompt",
-    resource
-)
-when {
-    context has threat_categories && context has max_threat_severity &&
-    context.threat_categories.contains("semantic") &&
-    context.max_threat_severity >= 3
-};
-// Block tool calls with multiple concurrent threats
-@id("semantic-block-multi-threat-tools")
-@name("Block multi-threat tool calls")
-@description("Block tool execution when multiple distinct threats are detected simultaneously (3+). Multiple concurrent threats in a tool call strongly indicate an adversarial attack chain.")
-@severity("high")
-@tags("multi-threat,tools,security,defense-in-depth")
-@reject_message("Tool execution was blocked because multiple security threats were detected simultaneously. This pattern indicates a potential attack chain.")
+@reject_message("File operation blocked: personally identifiable information was detected. Files containing PII must not be read or written through AI agents.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has threat_count && context.threat_count >= 3
+    context has pii_detected && context.pii_detected
 };
 `;
 const OVERWATCH_TOOLS_DEFAULT_CEDAR = `// =============================================================================
@@ -840,149 +983,6 @@ when {
     context.detected_threats.contains("command_injection")
 };
 `;
-const OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR = `// =============================================================================
-// Content Safety Policy (Default)
-// =============================================================================
-// Detects and blocks violent, harmful, hateful, sexual, and profane content
-// using trust/safety ML classification scores.
-//
-// The detection engine runs multiple ML classifiers (toxicity, content safety)
-// and produces normalized scores (0-100) for each category. These policies
-// enforce enterprise-appropriate content thresholds.
-//
-// Compliance:
-//   NIST 800-53 SI-4 (Information System Monitoring)
-//   EU AI Act Art. 52 (Transparency for AI Systems)
-//   OWASP LLM02 (Insecure Output Handling) — unsafe content generation
-//   ISO 42001 (AI Management System) — responsible AI use
-//
-// Category: trust_safety
-// Namespace: Overwatch
-// =============================================================================
-// ---------------------------------------------------------------------------
-// Section 1: Violence & Weapons
-// Blocks content promoting, describing, or instructing violence and weapons.
-// ---------------------------------------------------------------------------
-// Block content with high violence score
-@id("ts-block-violence")
-@name("Block violent content")
-@description("Block content when the ML violence detection score exceeds threshold (80/100). Catches graphic violence descriptions, instructions for causing harm, and violent threat language.")
-@severity("critical")
-@tags("violence,content-safety,trust-safety,nist-si-4,iso-42001")
-@reject_message("Content blocked: violent content detected. AI agents in enterprise environments must not process or generate violent content. Please rephrase without violence-related language.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has violence_score && context.violence_score >= 80
-};
-// Block content with weapons references
-@id("ts-block-weapons")
-@name("Block weapons content")
-@description("Block content when the ML weapons detection score exceeds threshold (80/100). Catches weapon manufacturing instructions, procurement guidance, and detailed weapon specifications.")
-@severity("critical")
-@tags("weapons,content-safety,trust-safety,nist-si-4")
-@reject_message("Content blocked: weapons-related content detected. AI agents must not process weapons manufacturing, procurement, or detailed specification content.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has weapons_score && context.weapons_score >= 80
-};
-// ---------------------------------------------------------------------------
-// Section 2: Hate Speech & Discrimination
-// Blocks hateful, discriminatory, and dehumanizing content.
-// Lower threshold (75) — enterprises have zero tolerance for hate speech.
-// ---------------------------------------------------------------------------
-// Block hate speech content
-@id("ts-block-hate-speech")
-@name("Block hate speech")
-@description("Block content when the ML hate speech detection score exceeds threshold (75/100). Lower threshold than other categories because enterprises have zero tolerance for discriminatory content. Catches slurs, dehumanizing language, and targeted harassment.")
-@severity("critical")
-@tags("hate-speech,content-safety,trust-safety,nist-si-4,eu-ai-act")
-@reject_message("Content blocked: hate speech or discriminatory content detected. AI agents in enterprise environments must not process or generate hateful, discriminatory, or dehumanizing content.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has hate_speech_score && context.hate_speech_score >= 75
-};
-// ---------------------------------------------------------------------------
-// Section 3: Criminal Content
-// Blocks content related to criminal activities and illegal operations.
-// ---------------------------------------------------------------------------
-// Block criminal content
-@id("ts-block-crime")
-@name("Block criminal content")
-@description("Block content when the ML criminal activity detection score exceeds threshold (80/100). Catches illegal activity instructions, fraud techniques, and content promoting criminal behavior.")
-@severity("high")
-@tags("crime,content-safety,trust-safety,nist-si-4")
-@reject_message("Content blocked: criminal activity content detected. AI agents must not process content related to illegal activities, fraud, or other criminal behavior.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has crime_score && context.crime_score >= 80
-};
-// ---------------------------------------------------------------------------
-// Section 4: Sexual Content
-// Blocks sexually explicit or inappropriate content.
-// ---------------------------------------------------------------------------
-// Block sexual content
-@id("ts-block-sexual")
-@name("Block sexual content")
-@description("Block content when the ML sexual content detection score exceeds threshold (80/100). Ensures enterprise AI agents do not process or generate sexually explicit material.")
-@severity("high")
-@tags("sexual,content-safety,trust-safety,eu-ai-act,iso-42001")
-@reject_message("Content blocked: sexual content detected. AI agents in enterprise environments must not process sexually explicit material.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has sexual_score && context.sexual_score >= 80
-};
-// ---------------------------------------------------------------------------
-// Section 5: Profanity
-// Blocks excessive profanity in enterprise environments.
-// Higher threshold (90) — allows normal expression while blocking abuse.
-// ---------------------------------------------------------------------------
-// Block excessive profanity
-@id("ts-block-profanity")
-@name("Block profanity")
-@description("Block content when the ML profanity detection score exceeds threshold (90/100). Higher threshold than other safety categories — allows normal expression while blocking abusive or harassing language patterns.")
-@severity("medium")
-@tags("profanity,content-safety,trust-safety")
-@reject_message("Content blocked: excessive profanity detected. Please rephrase in a professional manner appropriate for enterprise AI interactions.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has profanity_score && context.profanity_score >= 90
-};
-`;
 const OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR = `// =============================================================================
 // Agent Security Policy (Default)
 // =============================================================================
@@ -1677,6 +1677,31 @@ export const OVERWATCH_DEFAULTS = [
         tags: ['baseline', 'permit-default', 'organization'],
         isActive: true,
     },
+    {
+        id: 'semantic-default',
+        name: 'Semantic Threat Detection',
+        description: 'Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers',
+        category: 'semantic',
+        cedarText: OVERWATCH_SEMANTIC_DEFAULT_CEDAR,
+        severity: 'critical',
+        tags: ['prompt-injection', 'jailbreak', 'owasp-llm01', 'owasp-llm02', 'security', 'baseline'],
+        isActive: true,
+    },
+    {
+        id: 'trust-safety-default',
+        name: 'Content Safety',
+        description: 'Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores',
+        category: 'trust_safety',
+        cedarText: OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR,
+        severity: 'critical',
+        tags: ['violence', 'weapons', 'hate-speech', 'crime', 'sexual', 'profanity', 'content-safety', 'baseline'],
+        isActive: true,
+    },
+];
+// =============================================================================
+// ALL TEMPLATES
+// =============================================================================
+export const OVERWATCH_TEMPLATES = [
     {
         id: 'secrets-default',
         name: 'Secrets Detection',
@@ -1685,7 +1710,6 @@ export const OVERWATCH_DEFAULTS = [
         cedarText: OVERWATCH_SECRETS_DEFAULT_CEDAR,
         severity: 'critical',
         tags: ['api-keys', 'tokens', 'credentials', 'aws', 'github', 'ssh', 'baseline'],
-        isActive: true,
     },
     {
         id: 'pii-default',
@@ -1695,17 +1719,6 @@ export const OVERWATCH_DEFAULTS = [
         cedarText: OVERWATCH_PII_DEFAULT_CEDAR,
         severity: 'critical',
         tags: ['pii', 'privacy', 'compliance', 'pci-dss', 'gdpr', 'hipaa', 'baseline'],
-        isActive: true,
-    },
-    {
-        id: 'semantic-default',
-        name: 'Semantic Threat Detection',
-        description: 'Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers',
-        category: 'semantic',
-        cedarText: OVERWATCH_SEMANTIC_DEFAULT_CEDAR,
-        severity: 'critical',
-        tags: ['prompt-injection', 'jailbreak', 'owasp-llm01', 'owasp-llm02', 'security', 'baseline'],
-        isActive: true,
     },
     {
         id: 'tools-default',
@@ -1715,17 +1728,6 @@ export const OVERWATCH_DEFAULTS = [
         cedarText: OVERWATCH_TOOLS_DEFAULT_CEDAR,
         severity: 'critical',
         tags: ['shell', 'command-injection', 'file-access', 'tool-risk', 'mitre-t1059', 'owasp-llm06', 'baseline'],
-        isActive: true,
-    },
-    {
-        id: 'trust-safety-default',
-        name: 'Content Safety',
-        description: 'Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores',
-        category: 'trust_safety',
-        cedarText: OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR,
-        severity: 'critical',
-        tags: ['violence', 'weapons', 'hate-speech', 'crime', 'sexual', 'profanity', 'content-safety', 'baseline'],
-        isActive: true,
     },
     {
         id: 'agent-security-default',
@@ -1735,7 +1737,6 @@ export const OVERWATCH_DEFAULTS = [
         cedarText: OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR,
         severity: 'critical',
         tags: ['tool-poisoning', 'rug-pull', 'indirect-injection', 'mcp-security', 'agent-security', 'owasp-asi01', 'owasp-asi04', 'baseline'],
-        isActive: true,
     },
     {
         id: 'encoding-default',
@@ -1745,7 +1746,6 @@ export const OVERWATCH_DEFAULTS = [
         cedarText: OVERWATCH_ENCODING_DEFAULT_CEDAR,
         severity: 'high',
         tags: ['unicode', 'invisible-chars', 'bidi-override', 'encoding', 'owasp-llm01', 'baseline'],
-        isActive: true,
     },
     {
         id: 'behavioral-default',
@@ -1755,13 +1755,7 @@ export const OVERWATCH_DEFAULTS = [
         cedarText: OVERWATCH_BEHAVIORAL_DEFAULT_CEDAR,
         severity: 'high',
         tags: ['loop-detection', 'data-exfiltration', 'credential-theft', 'behavioral', 'owasp-llm10', 'owasp-asi02', 'baseline'],
-        isActive: true,
     },
-];
-// =============================================================================
-// ALL TEMPLATES
-// =============================================================================
-export const OVERWATCH_TEMPLATES = [
     {
         id: 'tools-mcp-allowlist',
         name: 'MCP Server Allowlist',
@@ -1883,6 +1877,28 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "tags": ["baseline", "permit-default", "organization"],
       "is_active": true
     },
+    {
+      "id": "semantic-default",
+      "name": "Semantic Threat Detection",
+      "description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers",
+      "category": "semantic",
+      "file": "defaults/semantic.cedar",
+      "severity": "critical",
+      "tags": ["prompt-injection", "jailbreak", "owasp-llm01", "owasp-llm02", "security", "baseline"],
+      "is_active": true
+    },
+    {
+      "id": "trust-safety-default",
+      "name": "Content Safety",
+      "description": "Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores",
+      "category": "trust_safety",
+      "file": "defaults/trust_safety.cedar",
+      "severity": "critical",
+      "tags": ["violence", "weapons", "hate-speech", "crime", "sexual", "profanity", "content-safety", "baseline"],
+      "is_active": true
+    }
+  ],
+  "templates": [
     {
       "id": "secrets-default",
       "name": "Secrets Detection",
@@ -1890,8 +1906,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "category": "secrets",
       "file": "defaults/secrets.cedar",
       "severity": "critical",
-      "tags": ["api-keys", "tokens", "credentials", "aws", "github", "ssh", "baseline"],
-      "is_active": true
+      "tags": ["api-keys", "tokens", "credentials", "aws", "github", "ssh", "baseline"]
     },
     {
       "id": "pii-default",
@@ -1900,18 +1915,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "category": "pii",
       "file": "defaults/pii.cedar",
       "severity": "critical",
-      "tags": ["pii", "privacy", "compliance", "pci-dss", "gdpr", "hipaa", "baseline"],
-      "is_active": true
-    },
-    {
-      "id": "semantic-default",
-      "name": "Semantic Threat Detection",
-      "description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers",
-      "category": "semantic",
-      "file": "defaults/semantic.cedar",
-      "severity": "critical",
-      "tags": ["prompt-injection", "jailbreak", "owasp-llm01", "owasp-llm02", "security", "baseline"],
-      "is_active": true
+      "tags": ["pii", "privacy", "compliance", "pci-dss", "gdpr", "hipaa", "baseline"]
     },
     {
       "id": "tools-default",
@@ -1920,18 +1924,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "category": "tools",
       "file": "defaults/tools.cedar",
       "severity": "critical",
-      "tags": ["shell", "command-injection", "file-access", "tool-risk", "mitre-t1059", "owasp-llm06", "baseline"],
-      "is_active": true
-    },
-    {
-      "id": "trust-safety-default",
-      "name": "Content Safety",
-      "description": "Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores",
-      "category": "trust_safety",
-      "file": "defaults/trust_safety.cedar",
-      "severity": "critical",
-      "tags": ["violence", "weapons", "hate-speech", "crime", "sexual", "profanity", "content-safety", "baseline"],
-      "is_active": true
+      "tags": ["shell", "command-injection", "file-access", "tool-risk", "mitre-t1059", "owasp-llm06", "baseline"]
     },
     {
       "id": "agent-security-default",
@@ -1940,8 +1933,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "category": "agent_security",
       "file": "defaults/agent_security.cedar",
       "severity": "critical",
-      "tags": ["tool-poisoning", "rug-pull", "indirect-injection", "mcp-security", "agent-security", "owasp-asi01", "owasp-asi04", "baseline"],
-      "is_active": true
+      "tags": ["tool-poisoning", "rug-pull", "indirect-injection", "mcp-security", "agent-security", "owasp-asi01", "owasp-asi04", "baseline"]
     },
     {
       "id": "encoding-default",
@@ -1950,8 +1942,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "category": "encoding",
       "file": "defaults/encoding_attacks.cedar",
       "severity": "high",
-      "tags": ["unicode", "invisible-chars", "bidi-override", "encoding", "owasp-llm01", "baseline"],
-      "is_active": true
+      "tags": ["unicode", "invisible-chars", "bidi-override", "encoding", "owasp-llm01", "baseline"]
     },
     {
       "id": "behavioral-default",
@@ -1960,11 +1951,8 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "category": "behavioral",
       "file": "defaults/behavioral.cedar",
       "severity": "high",
-      "tags": ["loop-detection", "data-exfiltration", "credential-theft", "behavioral", "owasp-llm10", "owasp-asi02", "baseline"],
-      "is_active": true
-    }
-  ],
-  "templates": [
+      "tags": ["loop-detection", "data-exfiltration", "credential-theft", "behavioral", "owasp-llm10", "owasp-asi02", "baseline"]
+    },
     {
       "id": "tools-mcp-allowlist",
       "name": "MCP Server Allowlist",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@highflame/policy",
-  "version": "2.1.2",
+  "version": "2.1.3",
   "description": "Highflame Cedar policy types and engine wrapper",
   "readme": "README.md",
   "main": "dist/index.js",