npm - @highflame/policy - Versions diffs - 2.1.8 → 2.1.9 - Mend

@highflame/policy 2.1.8 → 2.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/_schemas/overwatch/schema.cedarschema +6 -0
package/dist/overwatch-defaults.gen.d.ts +1 -1
package/dist/overwatch-defaults.gen.js +508 -1206
package/dist/service-schemas.gen.d.ts +1 -1
package/dist/service-schemas.gen.js +6 -0
package/package.json +1 -1

package/dist/overwatch-defaults.gen.js CHANGED Viewed

@@ -32,325 +32,27 @@ permit (
     resource
 );
 `;
-const OVERWATCH_SEMANTIC_DEFAULT_CEDAR = `// =============================================================================
-// Semantic Threat Detection Policy (Default)
-// =============================================================================
-// Detects and blocks prompt injection, jailbreak attempts, and high-severity
-// AI security threats. Uses multi-layered detection:
-//
-//   1. Detection engine rule triggers (detected_threats) — pattern-based
-//   2. ML classifier confidence scores (injection_confidence, jailbreak_confidence)
-//   3. Threat severity aggregation (max_threat_severity, highest_severity)
-//   4. Cross-action enforcement (prompts + tool calls + file operations)
-//
-// Compliance:
-//   OWASP LLM01 (Prompt Injection) — direct + indirect
-//   OWASP LLM02 (Insecure Output Handling) — response manipulation
-//   OWASP ASI01 (Agent Goal Hijack) — behavioral manipulation
-//   MITRE ATLAS AML.T0051 (LLM Prompt Injection)
-//   MITRE ATLAS AML.T0054 (LLM Jailbreak)
-//   NIST 800-53 SI-3 (Malicious Code Protection)
-//   NIST 800-53 SI-4 (Information System Monitoring)
-//
-// Category: semantic
-// Namespace: Overwatch
-// =============================================================================
-// ---------------------------------------------------------------------------
-// Section 1: Prompt Injection Detection
-// Blocks direct prompt injection — adversarial input designed to override
-// system instructions and hijack agent behavior.
-// Ref: OWASP LLM01, MITRE AML.T0051, 62% of LLM apps vulnerable (2024)
-// ---------------------------------------------------------------------------
-// Block content with prompt injection patterns detected by rules
-@id("semantic-block-injection")
-@name("Block prompt injection")
-@description("Block prompts and tool calls when detection engine rules identify prompt injection patterns. Catches instruction override, role assumption, delimiter injection, and other manipulation techniques in both user input and tool arguments (OWASP LLM01).")
-@severity("critical")
-@tags("injection,security,owasp-llm01,mitre-aml-t0051,baseline")
-@reject_message("Content was blocked because prompt injection patterns were detected. This is a security measure to prevent manipulation of AI agent behavior (OWASP LLM01).")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has detected_threats && context.detected_threats.contains("prompt_injection")
-};
-// Block content with high ML injection confidence
-@id("semantic-block-injection-score")
-@name("Block high-confidence injection")
-@description("Block content when the ML injection classifier confidence exceeds threshold (75/100). Catches novel injection techniques that evade pattern-based detection — polymorphic payloads, encoding tricks, and obfuscated instructions.")
-@severity("critical")
-@tags("injection,ml-classifier,security,owasp-llm01,mitre-aml-t0051")
-@reject_message("Your content was blocked because the ML classifier detected prompt injection with high confidence. This appears to be an attempt to manipulate agent behavior.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has injection_confidence && context.injection_confidence >= 75
-};
-// ---------------------------------------------------------------------------
-// Section 2: Jailbreak Detection
-// Blocks jailbreak attempts — adversarial input designed to bypass AI safety
-// guardrails and elicit restricted outputs.
-// Ref: OWASP LLM02, MITRE AML.T0054, DAN/JailbreakChat/etc.
-// ---------------------------------------------------------------------------
-// Block prompts with jailbreak attempts detected by rules
-@id("semantic-block-jailbreak")
-@name("Block jailbreak attempts")
-@description("Block prompts when detection engine rules identify jailbreak patterns: DAN-style prompts, role-play exploits, safety bypass instructions, and constraint removal attempts (OWASP LLM02).")
-@severity("critical")
-@tags("jailbreak,bypass,security,owasp-llm02,mitre-aml-t0054,baseline")
-@reject_message("Your prompt was blocked because jailbreak patterns were detected. This is a security measure to prevent circumvention of AI safety controls (OWASP LLM02).")
-forbid (
-    principal,
-    action == Overwatch::Action::"process_prompt",
-    resource
-)
-when {
-    context has detected_threats && context.detected_threats.contains("jailbreak")
-};
-// Block prompts with high ML jailbreak confidence
-@id("semantic-block-jailbreak-score")
-@name("Block high-confidence jailbreak")
-@description("Block content when the ML jailbreak classifier confidence exceeds threshold (75/100). Catches sophisticated jailbreak techniques including multi-turn manipulation, encoded payloads, and novel prompt structures.")
-@severity("critical")
-@tags("jailbreak,ml-classifier,security,owasp-llm02,mitre-aml-t0054")
-@reject_message("Your content was blocked because the ML classifier detected a jailbreak attempt with high confidence. This appears to be an attempt to bypass safety guardrails.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has jailbreak_confidence && context.jailbreak_confidence >= 75
-};
-// ---------------------------------------------------------------------------
-// Section 3: Threat Severity Aggregation
-// Blocks based on aggregated threat severity from all detection engines.
-// These act as catch-all rules for threats that don't match specific patterns.
-// ---------------------------------------------------------------------------
-// Block any content with critical severity threats
-@id("semantic-block-critical")
-@name("Block critical threats")
-@description("Block all content when any detection engine reports critical severity. This is the ultimate catch-all — critical threats are blocked regardless of type or source.")
-@severity("critical")
-@tags("critical,baseline,security,catch-all")
-@reject_message("Your content was blocked because security scanners detected a critical-severity threat. This content cannot be processed.")
-forbid (
-    principal,
-    action,
-    resource
-)
-when {
-    context has highest_severity && context.highest_severity == "critical"
-};
-// Block prompts with high severity semantic threats
-@id("semantic-block-high-severity")
-@name("Block high severity threats")
-@description("Block prompts when threat detection reports high severity (>= 3) in semantic categories. Catches threats that are individually below critical but collectively indicate adversarial intent.")
-@severity("high")
-@tags("semantic,severity,security,defense-in-depth")
-@reject_message("Your prompt was blocked because security scanners detected high severity issues in the content. Review your prompt for manipulative or adversarial patterns.")
-forbid (
-    principal,
-    action == Overwatch::Action::"process_prompt",
-    resource
-)
-when {
-    context has threat_categories && context has max_threat_severity &&
-    context.threat_categories.contains("semantic") &&
-    context.max_threat_severity >= 3
-};
-// Block tool calls with multiple concurrent threats
-@id("semantic-block-multi-threat-tools")
-@name("Block multi-threat tool calls")
-@description("Block tool execution when multiple distinct threats are detected simultaneously (3+). Multiple concurrent threats in a tool call strongly indicate an adversarial attack chain.")
-@severity("high")
-@tags("multi-threat,tools,security,defense-in-depth")
-@reject_message("Tool execution was blocked because multiple security threats were detected simultaneously. This pattern indicates a potential attack chain.")
-forbid (
-    principal,
-    action == Overwatch::Action::"call_tool",
-    resource
-)
-when {
-    context has threat_count && context.threat_count >= 3
-};
-`;
-const OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR = `// =============================================================================
-// Content Safety Policy (Default)
-// =============================================================================
-// Detects and blocks violent, harmful, hateful, sexual, and profane content
-// using trust/safety ML classification scores.
-//
-// The detection engine runs multiple ML classifiers (toxicity, content safety)
-// and produces normalized scores (0-100) for each category. These policies
-// enforce enterprise-appropriate content thresholds.
-//
-// Compliance:
-//   NIST 800-53 SI-4 (Information System Monitoring)
-//   EU AI Act Art. 52 (Transparency for AI Systems)
-//   OWASP LLM02 (Insecure Output Handling) — unsafe content generation
-//   ISO 42001 (AI Management System) — responsible AI use
-//
-// Category: trust_safety
-// Namespace: Overwatch
-// =============================================================================
-// ---------------------------------------------------------------------------
-// Section 1: Violence & Weapons
-// Blocks content promoting, describing, or instructing violence and weapons.
-// ---------------------------------------------------------------------------
-// Block content with high violence score
-@id("ts-block-violence")
-@name("Block violent content")
-@description("Block content when the ML violence detection score exceeds threshold (80/100). Catches graphic violence descriptions, instructions for causing harm, and violent threat language.")
-@severity("critical")
-@tags("violence,content-safety,trust-safety,nist-si-4,iso-42001")
-@reject_message("Content blocked: violent content detected. AI agents in enterprise environments must not process or generate violent content. Please rephrase without violence-related language.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has violence_score && context.violence_score >= 80
-};
-// Block content with weapons references
-@id("ts-block-weapons")
-@name("Block weapons content")
-@description("Block content when the ML weapons detection score exceeds threshold (80/100). Catches weapon manufacturing instructions, procurement guidance, and detailed weapon specifications.")
-@severity("critical")
-@tags("weapons,content-safety,trust-safety,nist-si-4")
-@reject_message("Content blocked: weapons-related content detected. AI agents must not process weapons manufacturing, procurement, or detailed specification content.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has weapons_score && context.weapons_score >= 80
-};
-// ---------------------------------------------------------------------------
-// Section 2: Hate Speech & Discrimination
-// Blocks hateful, discriminatory, and dehumanizing content.
-// Lower threshold (75) — enterprises have zero tolerance for hate speech.
-// ---------------------------------------------------------------------------
-// Block hate speech content
-@id("ts-block-hate-speech")
-@name("Block hate speech")
-@description("Block content when the ML hate speech detection score exceeds threshold (75/100). Lower threshold than other categories because enterprises have zero tolerance for discriminatory content. Catches slurs, dehumanizing language, and targeted harassment.")
-@severity("critical")
-@tags("hate-speech,content-safety,trust-safety,nist-si-4,eu-ai-act")
-@reject_message("Content blocked: hate speech or discriminatory content detected. AI agents in enterprise environments must not process or generate hateful, discriminatory, or dehumanizing content.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has hate_speech_score && context.hate_speech_score >= 75
-};
-// ---------------------------------------------------------------------------
-// Section 3: Criminal Content
-// Blocks content related to criminal activities and illegal operations.
-// ---------------------------------------------------------------------------
-// Block criminal content
-@id("ts-block-crime")
-@name("Block criminal content")
-@description("Block content when the ML criminal activity detection score exceeds threshold (80/100). Catches illegal activity instructions, fraud techniques, and content promoting criminal behavior.")
-@severity("high")
-@tags("crime,content-safety,trust-safety,nist-si-4")
-@reject_message("Content blocked: criminal activity content detected. AI agents must not process content related to illegal activities, fraud, or other criminal behavior.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has crime_score && context.crime_score >= 80
-};
-// ---------------------------------------------------------------------------
-// Section 4: Sexual Content
-// Blocks sexually explicit or inappropriate content.
-// ---------------------------------------------------------------------------
-// Block sexual content
-@id("ts-block-sexual")
-@name("Block sexual content")
-@description("Block content when the ML sexual content detection score exceeds threshold (80/100). Ensures enterprise AI agents do not process or generate sexually explicit material.")
-@severity("high")
-@tags("sexual,content-safety,trust-safety,eu-ai-act,iso-42001")
-@reject_message("Content blocked: sexual content detected. AI agents in enterprise environments must not process sexually explicit material.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has sexual_score && context.sexual_score >= 80
-};
-// ---------------------------------------------------------------------------
-// Section 5: Profanity
-// Blocks excessive profanity in enterprise environments.
-// Higher threshold (90) — allows normal expression while blocking abuse.
-// ---------------------------------------------------------------------------
-// Block excessive profanity
-@id("ts-block-profanity")
-@name("Block profanity")
-@description("Block content when the ML profanity detection score exceeds threshold (90/100). Higher threshold than other safety categories — allows normal expression while blocking abusive or harassing language patterns.")
-@severity("medium")
-@tags("profanity,content-safety,trust-safety")
-@reject_message("Content blocked: excessive profanity detected. Please rephrase in a professional manner appropriate for enterprise AI interactions.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has profanity_score && context.profanity_score >= 90
-};
-`;
 const OVERWATCH_SECRETS_DEFAULT_CEDAR = `// =============================================================================
 // Secrets Detection Policy (Default)
 // =============================================================================
-// Detects and blocks credential leakage across prompts, tool calls, file
-// operations, and AI response content. Uses multi-layered detection:
+// Detects and blocks credential leakage across prompts, tool calls, and file
+// operations using YARA rule name matching and file path patterns.
+//
+// Detection layers:
+//   1. YARA rule triggers (detected_threats) — pattern-based, always available
+//   2. Sensitive file path blocking (.env, credential directories)
 //
-//   1. Detection engine boolean (contains_secrets) — fastest, broadest catch
-//   2. Granular secret type matching (secret_types) — type-specific blocking
-//   3. Detection rule pattern matching (detected_threats) — named rule triggers
-//   4. Sensitive file path blocking (.env, credentials files)
-//   5. Response content pattern matching (defense-in-depth for AI outputs)
+// YARA rules matched:
+//   secrets_leakage    — AWS keys, GitHub tokens, private key headers, API keys
+//   ssh_key_exposure   — SSH private key content and key file paths
+//   pem_file_access    — PEM/certificate private keys and key files
+//   environment_variable_leakage — OPENAI_API_KEY, HF_TOKEN, generic *_API_KEY
 //
 // Compliance:
 //   NIST 800-53 SC-28 (Protection of Information at Rest)
 //   NIST 800-53 IA-5 (Authenticator Management)
 //   OWASP LLM07 (Insecure Plugin Design) — secrets in tool args
 //   MITRE ATT&CK T1552 (Unsecured Credentials)
-//   MITRE ATT&CK T1555 (Credentials from Password Stores)
 //   CIS Benchmark 1.4 (Secrets Management)
 //
 // Category: secrets
@@ -358,1070 +60,764 @@ const OVERWATCH_SECRETS_DEFAULT_CEDAR = `// ====================================
 // =============================================================================
 // ---------------------------------------------------------------------------
-// Section 1: Detection Engine — Primary Secret Detection
-// These fire when the detection pipeline identifies secrets in any content.
+// Section 1: YARA Rule — secrets_leakage
+// Detects AWS access keys, GitHub tokens, private key headers, API key
+// assignments, and bearer tokens in content.
 // ---------------------------------------------------------------------------
 // Block prompts containing detected secrets
-@id("secrets-block-prompts")
-@name("Block prompts with secrets")
-@description("Block prompts when detection engines identify API keys, tokens, or credential patterns. First line of defense against accidental secret exposure in user input.")
-@severity("critical")
-@tags("secrets,credentials,prompts,nist-sc-28,nist-ia-5")
-@reject_message("Your prompt was blocked because it contains detected secrets such as API keys, tokens, or credentials. Remove all secrets before resubmitting.")
+@id("secrets-block-leakage-prompt")
+@name("Block secrets in prompts")
+@description("Block prompts when YARA detects exposed secrets — AWS access keys (AKIA...), GitHub tokens (ghp_...), private key headers, bearer tokens, or API key assignments (api_key=sk-proj-...).")
+@severity("high")
+@tags("secrets,credentials,yara,process-prompt,nist-sc-28,nist-ia-5")
+@reject_message("Prompt blocked: exposed secrets detected (AWS keys, tokens, private keys, or API key assignments). Remove sensitive credentials before submitting.")
 forbid (
     principal,
     action == Overwatch::Action::"process_prompt",
     resource
 )
 when {
-    context has contains_secrets && context.contains_secrets
+    context.detected_threats.contains("secrets_leakage")
 };
-// Block file reads and tool calls when secrets are detected
-@id("secrets-block-reads-and-tools")
-@name("Block file reads and tool calls with secrets")
-@description("Prevent file reads and tool execution when secrets or credentials are detected in content. Blocks exfiltration of secrets via file operations and tool arguments.")
+// Block tool calls containing detected secrets
+@id("secrets-block-leakage-tool")
+@name("Block secrets in tool calls")
+@description("Block tool execution when YARA detects exposed secrets in tool arguments or command content.")
 @severity("high")
-@tags("secrets,file-access,tools,credentials,nist-sc-28,mitre-t1552")
-@reject_message("This operation was blocked because secrets or credentials were detected in the content. File reads and tool calls are restricted when credential exposure is identified.")
+@tags("secrets,credentials,yara,call-tool,nist-sc-28,mitre-t1552")
+@reject_message("Tool execution blocked: exposed secrets detected in command or arguments. Remove sensitive credentials before executing.")
 forbid (
     principal,
-    action in [Overwatch::Action::"read_file", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has contains_secrets && context.contains_secrets
-};
-// Block file writes containing secrets
-@id("secrets-block-file-writes")
-@name("Block file writes with secrets")
-@description("Prevent writing files that contain secrets. Stops credential persistence to disk where they could be committed to version control or accessed by other tools.")
-@severity("critical")
-@tags("secrets,file-write,credentials,nist-sc-28,cis-1.4")
-@reject_message("File write was blocked because secrets or credentials were detected in the content. Credentials should never be written to files — use a secrets manager or environment variables.")
-forbid (
-    principal,
-    action == Overwatch::Action::"write_file",
+    action == Overwatch::Action::"call_tool",
     resource
 )
 when {
-    context has contains_secrets && context.contains_secrets
+    context has detected_threats && context.detected_threats.contains("secrets_leakage")
 };
 // ---------------------------------------------------------------------------
-// Section 2: Granular Secret Type Blocking
-// Blocks specific high-risk credential types identified by the detection
-// engine's pattern-matching (e.g., AWS keys, GitHub tokens, SSH keys).
+// Section 2: YARA Rule — ssh_key_exposure
+// Detects SSH private key content (BEGIN OPENSSH/RSA PRIVATE KEY) and
+// SSH key file paths (/.ssh/id_rsa, id_ed25519).
 // ---------------------------------------------------------------------------
-// Block high-risk secret types across all actions
-@id("secrets-block-high-risk-types")
-@name("Block high-risk credential types")
-@description("Block content containing cloud provider keys (AWS, GCP, Azure), GitHub tokens, SSH private keys, or database connection strings. These credential types pose the highest exfiltration risk.")
-@severity("critical")
-@tags("secrets,aws,github,ssh,cloud,nist-ia-5,mitre-t1552")
-@reject_message("Content blocked: high-risk credentials detected (cloud provider keys, GitHub tokens, SSH keys, or database credentials). Use a secrets manager — never pass credentials through AI agents.")
-forbid (
-    principal,
-    action,
-    resource
-)
-when {
-    context has secret_types &&
-    (context.secret_types.contains("aws_access_key") ||
-     context.secret_types.contains("aws_secret_key") ||
-     context.secret_types.contains("gcp_service_account") ||
-     context.secret_types.contains("azure_client_secret") ||
-     context.secret_types.contains("github_token") ||
-     context.secret_types.contains("github_pat") ||
-     context.secret_types.contains("ssh_private_key") ||
-     context.secret_types.contains("database_url"))
-};
-// Block API keys and bearer tokens across all actions
-@id("secrets-block-api-keys")
-@name("Block API keys and bearer tokens")
-@description("Block content containing generic API keys, bearer tokens, JWT tokens, and OAuth credentials. These are the most commonly leaked credential types in AI agent interactions.")
-@severity("high")
-@tags("secrets,api-key,bearer,jwt,oauth,nist-ia-5")
-@reject_message("Content blocked: API keys, bearer tokens, or OAuth credentials detected. These must never be passed through AI agent prompts or tool calls.")
-forbid (
-    principal,
-    action,
-    resource
-)
-when {
-    context has secret_types &&
-    (context.secret_types.contains("api_key") ||
-     context.secret_types.contains("bearer_token") ||
-     context.secret_types.contains("jwt_token") ||
-     context.secret_types.contains("oauth_token") ||
-     context.secret_types.contains("oauth_secret"))
-};
-// Block when multiple secrets are detected (bulk exposure)
-@id("secrets-block-bulk-exposure")
-@name("Block bulk secret exposure")
-@description("Block content when 3 or more distinct secrets are found. Multiple secrets in a single request indicates either a configuration dump, .env file paste, or credential harvesting attempt.")
+// Block SSH key exposure across tool calls and file operations
+@id("secrets-block-ssh-keys")
+@name("Block SSH key exposure")
+@description("Block when YARA detects SSH private key content or SSH key file paths. Covers tool calls, file reads, and file writes. AI agents must not access SSH credentials.")
 @severity("critical")
-@tags("secrets,bulk,data-exfiltration,nist-sc-28,mitre-t1552")
-@reject_message("Content blocked: multiple credentials detected (3+). This appears to be a bulk credential exposure — configuration dumps and credential lists must never be passed through AI agents.")
+@tags("secrets,ssh,credentials,yara,nist-ia-5,mitre-t1552")
+@reject_message("Blocked: SSH private key content or key file path detected. AI agents must not access SSH credentials.")
 forbid (
     principal,
-    action,
+    action in [Overwatch::Action::"call_tool", Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has secret_count && context.secret_count >= 3
+    context has detected_threats && context.detected_threats.contains("ssh_key_exposure")
 };
 // ---------------------------------------------------------------------------
-// Section 3: Detection Rule Pattern Matching
-// Catches specific named detection rules that fire for credential exposure.
+// Section 3: YARA Rule — pem_file_access
+// Detects PEM private key content (BEGIN ENCRYPTED/RSA/EC/DSA PRIVATE KEY)
+// and key file paths (.pem, .key, .p12, .pfx).
 // ---------------------------------------------------------------------------
-// Block content flagged by detection engine credential rules
-@id("secrets-block-detected-credentials")
-@name("Block detected credential patterns")
-@description("Block content flagged by detection engine rules for credential exposure, API key leaks, JWT tokens, and bearer tokens. Defense-in-depth behind contains_secrets.")
+// Block PEM/certificate key exposure
+@id("secrets-block-pem-keys")
+@name("Block PEM/certificate key exposure")
+@description("Block when YARA detects PEM private key content or certificate key file paths (.pem, .key, .p12, .pfx). AI agents must not access certificate credentials.")
 @severity("critical")
-@tags("secrets,credentials,jwt,bearer,nist-ia-5,mitre-t1552")
-@reject_message("Content blocked: detection engines identified credential patterns including secret exposure, credential leaks, API keys, or token exposure.")
-forbid (
-    principal,
-    action,
-    resource
-)
-when {
-    context has detected_threats &&
-    (context.detected_threats.contains("secret_exposure") ||
-     context.detected_threats.contains("credential_leak") ||
-     context.detected_threats.contains("api_key_exposure") ||
-     context.detected_threats.contains("jwt_token_exposure") ||
-     context.detected_threats.contains("bearer_token_leak"))
-};
-// ---------------------------------------------------------------------------
-// Section 4: Sensitive File Path Protection
-// Blocks access to files that commonly contain secrets.
-// ---------------------------------------------------------------------------
-// Block .env file access across all operations
-@id("secrets-block-env-files")
-@name("Block .env file access")
-@description("Block access to .env files that commonly contain secrets, API keys, and database credentials. Environment files are the #1 source of accidental credential exposure in development workflows.")
-@severity("high")
-@tags("secrets,env-files,config,nist-sc-28,mitre-t1552,cis-1.4")
-@reject_message("Access to .env files is blocked because they commonly contain secrets, API keys, and database credentials. Use a secrets manager instead of .env files.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has path && context.path like "*.env*"
-};
-// Block access to known credential files
-@id("secrets-block-credential-files")
-@name("Block credential file access")
-@description("Block access to common credential files: .netrc, .npmrc, .pypirc, credentials, config files in cloud provider directories. These files often contain hardcoded tokens and passwords.")
-@severity("high")
-@tags("secrets,credential-files,config,nist-sc-28,mitre-t1555")
-@reject_message("Access to this credential file is blocked. Files like .netrc, .npmrc, .pypirc, and cloud provider config files commonly contain hardcoded credentials.")
+@tags("secrets,certificates,pem,yara,nist-ia-5,mitre-t1552")
+@reject_message("Blocked: PEM private key or certificate key file detected. AI agents must not access certificate credentials.")
 forbid (
     principal,
-    action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file", Overwatch::Action::"call_tool"],
+    action in [Overwatch::Action::"call_tool", Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has path &&
-    (context.path like "*/.netrc" ||
-     context.path like "*/.npmrc" ||
-     context.path like "*/.pypirc" ||
-     context.path like "*/.docker/config.json" ||
-     context.path like "*/.kube/config" ||
-     context.path like "*/.config/gcloud/*" ||
-     context.path like "*/credentials.json" ||
-     context.path like "*/service-account*.json")
+    context has detected_threats && context.detected_threats.contains("pem_file_access")
 };
-// NOTE: Response content secret detection (AWS keys, GitHub tokens, private keys
-// in AI outputs) is handled by the detection engine's secrets scanner, which
-// analyzes all content types including tool responses. The contains_secrets and
-// secret_types rules above cover this case.
-`;
-const OVERWATCH_PII_DEFAULT_CEDAR = `// =============================================================================
-// PII Detection Policy (Default)
-// =============================================================================
-// Detects and blocks personally identifiable information across prompts, tool
-// calls, file operations, and AI responses. Uses multi-layered detection:
-//
-//   1. PII boolean flag (pii_detected) — broadest catch from detection engine
-//   2. Granular PII type matching (pii_types) — type-specific blocking
-//   3. ML classifier confidence (pii_confidence) — catches novel PII patterns
-//   4. Detection rule triggers (detected_threats) — named rule matches
-//   5. File operation PII blocking — prevents PII persistence to disk
-//
-// Compliance:
-//   PCI DSS 3.4, 4.1 (Payment Card Data)
-//   GDPR Art. 32 (Security of Processing)
-//   HIPAA §164.312 (Technical Safeguards)
-//   NIST 800-53 SI-4 (Information System Monitoring)
-//   CCPA §1798.150 (Data Protection)
-//   OWASP LLM06 (Sensitive Information Disclosure)
-//
-// Category: pii
-// Namespace: Overwatch
-// =============================================================================
 // ---------------------------------------------------------------------------
-// Section 1: Detection Engine — Primary PII Detection
-// Fires when the detection pipeline identifies PII in any content.
+// Section 4: YARA Rule — environment_variable_leakage
+// Detects OPENAI_API_KEY=sk-..., HF_TOKEN=hf_..., and generic
+// <NAME>_API_KEY=<value> (16+ chars) patterns.
 // ---------------------------------------------------------------------------
-// Block prompts containing any detected PII
-@id("pii-block-any-detected")
-@name("Block prompts with PII")
-@description("Block prompts when the detection engine identifies any PII patterns. This is the broadest PII catch — fires before type-specific rules.")
-@severity("critical")
-@tags("pii,privacy,data-protection,gdpr-art-32,owasp-llm06")
-@reject_message("Your prompt was blocked because personally identifiable information was detected. Remove all PII (names, addresses, SSNs, credit cards, etc.) before resubmitting.")
-forbid (
-    principal,
-    action == Overwatch::Action::"process_prompt",
-    resource
-)
-when {
-    context has pii_detected && context.pii_detected
-};
-// Block tool calls containing PII
-@id("pii-block-tool-calls")
-@name("Block tool calls with PII")
-@description("Prevent tool execution when PII patterns are detected in tool arguments or content. Stops PII from being passed to external tools, MCP servers, or shell commands.")
+// Block environment variable secret exposure
+@id("secrets-block-env-vars")
+@name("Block environment variable leakage")
+@description("Block when YARA detects environment variable secret assignments — OPENAI_API_KEY, HF_TOKEN, or generic <NAME>_API_KEY=<value> patterns with 16+ character values.")
 @severity("high")
-@tags("pii,tools,data-protection,owasp-llm06")
-@reject_message("Tool execution was blocked because personally identifiable information was detected in the content. PII must be removed before tool calls are permitted.")
-forbid (
-    principal,
-    action == Overwatch::Action::"call_tool",
-    resource
-)
-when {
-    context has pii_detected && context.pii_detected
-};
-// ---------------------------------------------------------------------------
-// Section 2: Granular PII Type Blocking
-// Blocks specific PII types based on regulatory requirements.
-// ---------------------------------------------------------------------------
-// Block credit card numbers (PCI DSS compliance)
-@id("pii-block-credit-cards")
-@name("Block credit card numbers")
-@description("Detect and block content containing credit card number patterns. PCI DSS 3.4 requires that PANs are rendered unreadable — AI agents must never process raw card numbers.")
-@severity("critical")
-@tags("pci,credit-card,payment,compliance,pci-dss-3.4,pci-dss-4.1")
-@reject_message("Content blocked: credit card number patterns detected. Sharing payment card data through AI agents violates PCI DSS requirements. Use tokenized card references instead.")
-forbid (
-    principal,
-    action,
-    resource
-)
-when {
-    (context has pii_types && context.pii_types.contains("credit_card")) ||
-    (context has detected_threats && context.detected_threats.contains("credit_card"))
-};
-// Block Social Security Numbers
-@id("pii-block-ssn")
-@name("Block Social Security Numbers")
-@description("Detect and block content containing SSN patterns (XXX-XX-XXXX and variants). SSNs are high-value identity theft targets — exposure through AI agents is a critical privacy violation.")
-@severity("critical")
-@tags("ssn,identity,privacy,compliance,nist-si-4")
-@reject_message("Content blocked: Social Security Number patterns detected. SSNs are protected personal identifiers that must never be shared through AI agents.")
-forbid (
-    principal,
-    action,
-    resource
-)
-when {
-    (context has pii_types && context.pii_types.contains("ssn")) ||
-    (context has detected_threats && context.detected_threats.contains("ssn"))
-};
-// Block medical/health records (HIPAA compliance)
-@id("pii-block-health-data")
-@name("Block health information")
-@description("Block content containing medical record numbers, health insurance IDs, or other Protected Health Information (PHI). HIPAA §164.312 requires technical safeguards for PHI.")
-@severity("critical")
-@tags("phi,hipaa,health,medical,compliance,hipaa-164.312")
-@reject_message("Content blocked: Protected Health Information (PHI) detected. Health data must not be processed through AI agents per HIPAA requirements.")
-forbid (
-    principal,
-    action,
-    resource
-)
-when {
-    context has pii_types &&
-    (context.pii_types.contains("medical_record") ||
-     context.pii_types.contains("health_insurance_id"))
-};
-// Block bulk PII exposure (multiple PII items)
-@id("pii-block-bulk-exposure")
-@name("Block bulk PII exposure")
-@description("Block content containing 3 or more PII matches. Multiple PII items in a single request indicates a data dump, CSV paste, or data exfiltration attempt.")
-@severity("critical")
-@tags("pii,bulk,data-exfiltration,gdpr-art-32,ccpa")
-@reject_message("Content blocked: multiple PII items detected (3+). Bulk personal data must never be processed through AI agents. Use data masking or tokenization for batch operations.")
-forbid (
-    principal,
-    action,
-    resource
-)
-when {
-    context has pii_count && context.pii_count >= 3
-};
-// ---------------------------------------------------------------------------
-// Section 3: ML Classifier — Novel PII Detection
-// Catches PII patterns that escape regex detection using ML classifiers.
-// ---------------------------------------------------------------------------
-// Block content with high PII classifier confidence
-@id("pii-block-high-confidence")
-@name("Block high-confidence PII")
-@description("Block content when the ML PII classifier confidence exceeds threshold (80/100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss.")
-@severity("critical")
-@tags("pii,confidence,privacy,compliance,ml-classifier")
-@reject_message("Content blocked: the ML classifier detected personally identifiable information with high confidence. Even if specific PII types aren't identified, the content appears to contain personal data.")
+@tags("secrets,environment,yara,nist-ia-5")
+@reject_message("Blocked: environment variable secret detected (API keys, tokens). Remove sensitive values before proceeding.")
 forbid (
     principal,
     action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has pii_confidence && context.pii_confidence >= 80
+    context has detected_threats && context.detected_threats.contains("environment_variable_leakage")
 };
 // ---------------------------------------------------------------------------
-// Section 4: Detection Rule Pattern Matching
-// Catches PII detected by named detection rules.
+// Section 5: Sensitive File Path Protection
+// Blocks file read/write access to credential directories and .env files.
+// NOTE: Targets read_file/write_file only — NOT call_tool. The path field
+// is empty for Bash commands (extractor reads tool_input.file_path which is
+// undefined for Bash). SSH key access via Bash is caught by YARA rules above.
 // ---------------------------------------------------------------------------
-// Block content with PII threat category
-@id("pii-block-threat-category")
-@name("Block PII threat category")
-@description("Block content when threat categorization identifies PII. Defense-in-depth behind the pii_detected boolean — catches cases where PII is flagged at the threat aggregation layer.")
-@severity("high")
-@tags("pii,privacy,data-protection,gdpr")
-@reject_message("Content blocked: threat scanners detected personally identifiable information. Remove all PII before resubmitting.")
+// Block access to credential directories
+@id("secrets-block-credential-paths")
+@name("Block credential directory access")
+@description("Block file read/write to SSH keys, cloud credentials, GPG keys, and certificate files. Only applies to Read/Write/Edit tools (path is not populated for Bash commands).")
+@severity("critical")
+@tags("secrets,credentials,path,file-access,nist-sc-28,mitre-t1552")
+@reject_message("Blocked: access to credential directory or key file. AI agents must not access .ssh, .aws, .gnupg, .gcloud, or .azure directories.")
 forbid (
     principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has threat_categories && context.threat_categories.contains("pii")
+    context has path &&
+    (context.path like "*/.ssh/*" ||
+     context.path like "*/.aws/*" ||
+     context.path like "*/.gnupg/*" ||
+     context.path like "*/.config/gcloud/*" ||
+     context.path like "*/.azure/*" ||
+     context.path like "*/id_rsa*" ||
+     context.path like "*/id_ed25519*" ||
+     context.path like "*/id_ecdsa*")
 };
-// ---------------------------------------------------------------------------
-// Section 5: File Operation PII Blocking
-// Prevents PII from being read from or written to disk.
-// ---------------------------------------------------------------------------
-// Block file operations containing PII
-@id("pii-block-file-ops")
-@name("Block file operations with PII")
-@description("Block file reads and writes when PII is detected. Prevents agents from reading files containing personal data and from writing PII to new files where it could persist or be version-controlled.")
+// Block .env file access
+@id("secrets-block-env-file-paths")
+@name("Block .env file access")
+@description("Block file read/write to .env files which typically contain secrets, API keys, and database credentials.")
 @severity("high")
-@tags("pii,file-ops,data-protection,gdpr-art-32,nist-si-4")
-@reject_message("File operation blocked: personally identifiable information was detected. Files containing PII must not be read or written through AI agents.")
+@tags("secrets,environment,path,file-access,nist-sc-28,cis-1.4")
+@reject_message("Blocked: .env file access. These files typically contain secrets and should not be accessed by AI agents.")
 forbid (
     principal,
     action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has pii_detected && context.pii_detected
+    context has path && context.path like "*.env*"
 };
 `;
-const OVERWATCH_TOOLS_DEFAULT_CEDAR = `// =============================================================================
-// Tool Permissioning Policy (Default)
+const OVERWATCH_SEMANTIC_DEFAULT_CEDAR = `// =============================================================================
+// Semantic Threat Detection Policy (Default)
 // =============================================================================
-// Controls access to IDE tools, shell execution, file system paths, and MCP
-// operations. Enforces least-privilege for agent tool usage with multi-layered
-// controls:
+// Detects and blocks injection attacks, prompt injection, jailbreak attempts,
+// and unsafe content using multi-layered detection:
+//
+//   Tier 1 — YARA rules (always available, no external dependency):
+//     command_injection, sql_injection, path_traversal, detect_encoded
+//
+//   Tier 2 — Javelin ML classifiers (require Highflame API token):
+//     injection_confidence, jailbreak_confidence
+//
+//   Tier 3 — Content safety ML scores (require Highflame API token):
+//     violence, weapons, hate_speech, crime, sexual, profanity
 //
-//   1. Dangerous tool blocking (shell, command execution, destructive ops)
-//   2. Sensitive system path protection (credentials, system dirs)
-//   3. Tool risk scoring (computed risk assessment)
-//   4. Tool category enforcement (safe/sensitive/dangerous classification)
-//   5. Threat-based tool blocking (threat severity gates)
-//   6. Command injection detection (reverse shells, code execution, etc.)
 //
 // Compliance:
-//   NIST 800-53 AC-3 (Access Enforcement)
-//   NIST 800-53 AC-6 (Least Privilege)
-//   NIST 800-53 CM-7 (Least Functionality)
-//   OWASP LLM06 (Excessive Agency) — agent tool access control
-//   OWASP ASI02 (Tool Misuse) — unauthorized tool operations
-//   MITRE ATT&CK T1059 (Command and Scripting Interpreter)
-//   MITRE ATT&CK T1005 (Data from Local System)
-//   MITRE ATT&CK T1552 (Unsecured Credentials)
+//   OWASP LLM01 (Prompt Injection) — direct + indirect
+//   OWASP LLM02 (Insecure Output Handling) — response manipulation
+//   OWASP ASI01 (Agent Goal Hijack) — behavioral manipulation
+//   MITRE ATLAS AML.T0051 (LLM Prompt Injection)
+//   MITRE ATLAS AML.T0054 (LLM Jailbreak)
+//   NIST 800-53 SI-3 (Malicious Code Protection)
+//   NIST 800-53 SI-4 (Information System Monitoring)
+//   EU AI Act Art. 52 (Transparency for AI Systems)
+//   ISO 42001 (AI Management System)
 //
-// Category: tools
+// Category: semantic
 // Namespace: Overwatch
 // =============================================================================
-// ---------------------------------------------------------------------------
-// Section 1: Dangerous Tool Blocking
-// Blocks tools classified as inherently dangerous for agent use.
-// Ref: OWASP LLM06, MITRE T1059
-// ---------------------------------------------------------------------------
-// Block shell and command execution tools
-@id("tools-block-shell-execution")
-@name("Block shell and command execution")
-@description("Block direct shell, bash, and command execution tools. Unrestricted shell access is the #1 risk in AI coding agents — enables command injection, data exfiltration, and arbitrary code execution (MITRE T1059).")
-@severity("critical")
-@tags("shell,command-injection,execution,nist-cm-7,mitre-t1059,owasp-llm06,baseline")
-@reject_message("Tool execution was blocked: direct shell and command execution tools (shell, bash, terminal) are restricted to prevent command injection attacks (MITRE T1059). Use specific, scoped tools instead.")
-forbid (
-    principal,
-    action == Overwatch::Action::"call_tool",
-    resource
-)
-when {
-    context has tool_name &&
-    (context.tool_name == "shell" ||
-     context.tool_name == "bash" ||
-     context.tool_name == "sh" ||
-     context.tool_name == "terminal" ||
-     context.tool_name == "system.exec" ||
-     context.tool_name == "process.spawn" ||
-     context.tool_name == "cmd" ||
-     context.tool_name == "powershell")
-};
-// Block destructive file operations
-@id("tools-block-destructive-ops")
-@name("Block destructive file operations")
-@description("Block file deletion, directory removal, and other destructive operations. Agents should not have delete access by default — destructive operations require explicit human approval.")
-@severity("high")
-@tags("file,delete,destructive,nist-ac-3,owasp-asi02")
-@reject_message("Tool execution was blocked: destructive file operations (delete, rmdir, unlink) are restricted to prevent data loss. Request explicit human approval for destructive actions.")
-forbid (
-    principal,
-    action == Overwatch::Action::"call_tool",
-    resource
-)
-when {
-    context has tool_name &&
-    (context.tool_name == "fs.delete" ||
-     context.tool_name == "fs.rmdir" ||
-     context.tool_name == "fs.unlink" ||
-     context.tool_name == "fs.remove" ||
-     context.tool_name == "delete_file" ||
-     context.tool_name == "remove_directory")
-};
 // ---------------------------------------------------------------------------
-// Section 2: Sensitive System Path Protection
-// Blocks access to system directories, credential files, and sensitive paths.
-// Ref: MITRE T1005, T1552
+// Tier 1: YARA-Based Injection Detection (always available)
+// These fire on actual YARA rule names from the detection engine.
+// No external API dependency — works offline with local YARA scanning.
 // ---------------------------------------------------------------------------
-// Block access to system directories
-@id("tools-block-system-paths")
-@name("Block system directory access")
-@description("Prevent access to sensitive system directories (/etc, /proc, /sys, /root, /var). These directories contain system configuration, process information, and credentials that agents must never access.")
-@severity("high")
-@tags("file,path,system,security,nist-ac-6,mitre-t1005")
-@reject_message("Access blocked: this path targets a sensitive system directory. AI agents are restricted from accessing /etc, /proc, /sys, /root, and /var directories.")
-forbid (
-    principal,
-    action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file", Overwatch::Action::"call_tool"],
-    resource
-)
-when {
-    context has path &&
-    (context.path like "/etc/*" ||
-     context.path like "/proc/*" ||
-     context.path like "/sys/*" ||
-     context.path like "/root/*" ||
-     context.path like "/var/log/*" ||
-     context.path like "/var/run/*")
-};
-// Block access to credential and key directories
-@id("tools-block-credential-paths")
-@name("Block credential directory access")
-@description("Prevent access to SSH keys, cloud provider credentials, GPG keys, and other authentication material directories. These are primary targets for credential theft (MITRE T1552).")
+// Block command injection in tool calls
+@id("semantic-block-command-injection-tool")
+@name("Block command injection in tool calls")
+@description("Block tool execution when YARA detects command injection — reverse shells, rm -rf, privilege escalation, code execution, pipe-to-shell, or encoding evasion. Ref: AIShellJack (41-84% success rate).")
 @severity("critical")
-@tags("file,credentials,ssh,aws,security,nist-ac-6,mitre-t1552")
-@reject_message("Access blocked: this path targets a credential or key directory (.ssh, .aws, .gnupg, .config/gcloud). AI agents must never access authentication material.")
+@tags("command-injection,yara,call-tool,mitre-t1059,owasp-asi02")
+@reject_message("Tool execution blocked: command injection pattern detected — reverse shell, destructive command, privilege escalation, or code execution attempt.")
 forbid (
     principal,
-    action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file", Overwatch::Action::"call_tool"],
+    action == Overwatch::Action::"call_tool",
     resource
 )
 when {
-    context has path &&
-    (context.path like "*/.ssh/*" ||
-     context.path like "*/.aws/*" ||
-     context.path like "*/.gnupg/*" ||
-     context.path like "*/.config/gcloud/*" ||
-     context.path like "*/.azure/*" ||
-     context.path like "*.pem" ||
-     context.path like "*/id_rsa*" ||
-     context.path like "*/id_ed25519*" ||
-     context.path like "*/id_ecdsa*")
+    context has detected_threats && context.detected_threats.contains("command_injection")
 };
-// ---------------------------------------------------------------------------
-// Section 3: Tool Risk Scoring
-// Uses computed tool risk scores from the detection engine to dynamically
-// assess and block risky tool operations.
-// ---------------------------------------------------------------------------
-// Block tools with very high computed risk
-@id("tools-block-high-risk-score")
-@name("Block high-risk tool operations")
-@description("Block tool operations when the computed risk score exceeds 90/100. The risk score combines tool type, argument analysis, context, and historical behavior into a single metric.")
+// Block command injection in prompts
+@id("semantic-block-command-injection-prompt")
+@name("Block command injection in prompts")
+@description("Block prompts when YARA detects command injection patterns. Catches prompt-level injection where the user or injected content includes shell commands.")
 @severity("critical")
-@tags("tool-risk,dynamic,security,owasp-llm06,owasp-asi02")
-@reject_message("Tool execution blocked: this operation scored 90+ on the risk assessment. The combination of tool type, arguments, and context indicates a high-risk operation.")
+@tags("command-injection,yara,process-prompt,mitre-t1059")
+@reject_message("Prompt blocked: command injection pattern detected. The prompt contains shell commands, reverse shells, or code execution patterns.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action == Overwatch::Action::"process_prompt",
     resource
 )
 when {
-    context has tool_risk_score && context.tool_risk_score >= 90
+    context.detected_threats.contains("command_injection")
 };
-// Block tools classified as dangerous
-@id("tools-block-dangerous-category")
-@name("Block dangerous tool category")
-@description("Block all tools classified as 'dangerous' by the detection engine. The dangerous category includes tools with unrestricted system access, code execution, or network capabilities.")
-@severity("critical")
-@tags("tool-category,dangerous,security,owasp-llm06,nist-ac-6")
-@reject_message("Tool execution blocked: this tool is classified as 'dangerous' due to its unrestricted system access, code execution, or network capabilities. Use a safer alternative.")
+// Block SQL injection in tool calls
+@id("semantic-block-sql-injection-tool")
+@name("Block SQL injection in tool calls")
+@description("Block tool execution when YARA detects SQL injection — tautologies (OR 1=1), UNION SELECT, DROP TABLE, time-based attacks (SLEEP, WAITFOR), or system object access (information_schema).")
+@severity("high")
+@tags("sql-injection,yara,call-tool,database")
+@reject_message("Tool execution blocked: SQL injection pattern detected — tautology, UNION attack, destructive SQL, or system object access.")
 forbid (
     principal,
     action == Overwatch::Action::"call_tool",
     resource
 )
 when {
-    context has tool_category && context.tool_category == "dangerous"
+    context has detected_threats && context.detected_threats.contains("sql_injection")
 };
-// Stricter threshold for sensitive tools
-@id("tools-block-sensitive-with-threats")
-@name("Block sensitive tools with threats")
-@description("Block sensitive tools (file write, shell, network) when any threats are detected. Sensitive tools with concurrent threats indicate an attack leveraging tool capabilities for malicious purposes.")
+// Block SQL injection in prompts
+@id("semantic-block-sql-injection-prompt")
+@name("Block SQL injection in prompts")
+@description("Block prompts when YARA detects SQL injection patterns.")
 @severity("high")
-@tags("tool-category,sensitive,security,owasp-asi02,defense-in-depth")
-@reject_message("Sensitive tool execution blocked: threats were detected alongside a sensitive tool operation. Sensitive tools require zero threat context to execute.")
+@tags("sql-injection,yara,process-prompt,database")
+@reject_message("Prompt blocked: SQL injection pattern detected.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action == Overwatch::Action::"process_prompt",
     resource
 )
 when {
-    context has tool_is_sensitive && context.tool_is_sensitive &&
-    context has threat_count && context.threat_count > 0
+    context.detected_threats.contains("sql_injection")
 };
-// ---------------------------------------------------------------------------
-// Section 4: Threat-Based Tool Blocking
-// Blocks tool calls based on threat severity from detection engines.
-// ---------------------------------------------------------------------------
-// Block tool calls with high severity threats
-@id("tools-block-high-severity-threats")
-@name("Block tool calls with high severity threats")
-@description("Prevent tool execution when high or critical severity threats (>= 3) are detected in content. Tools must not execute when the content they operate on is flagged as dangerous.")
+// Block path traversal attacks
+@id("semantic-block-path-traversal")
+@name("Block path traversal attacks")
+@description("Block when YARA detects path traversal — 2+ levels of ../ combined with sensitive file targets (/etc/passwd, /etc/shadow) or file read/include operations with traversal.")
 @severity("high")
-@tags("tools,threats,severity,security,defense-in-depth")
-@reject_message("Tool execution was blocked because high or critical severity threats were detected in the content by security scanners.")
+@tags("path-traversal,yara,file-access,mitre-t1005")
+@reject_message("Blocked: path traversal attack detected — directory traversal sequences targeting sensitive system files.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"call_tool", Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has threat_count && context has max_threat_severity &&
-    context.threat_count > 0 && context.max_threat_severity >= 3
+    context has detected_threats && context.detected_threats.contains("path_traversal")
 };
-// ---------------------------------------------------------------------------
-// Section 5: Command Injection Detection
-// Blocks tool calls containing command injection patterns in arguments.
-// Ref: AIShellJack (41-84% success rate, 314 payloads)
-// ---------------------------------------------------------------------------
-// Block detected command injection patterns
-@id("tools-block-command-injection")
-@name("Block command injection in tool calls")
-@description("Block tool calls when command injection patterns are detected in arguments — reverse shells, privilege escalation, code execution, and data exfiltration commands. Ref: AIShellJack attack (41-84% success rate).")
-@severity("critical")
-@tags("command-injection,shell,security,mitre-t1059,owasp-asi02")
-@reject_message("Tool execution blocked: command injection pattern detected in tool arguments. This may be a shell injection attack attempting to execute unauthorized commands.")
+// Block encoded/obfuscated payloads in tool calls
+@id("semantic-block-encoded")
+@name("Block encoded payloads in tool calls")
+@description("Block tool calls when YARA detects base64-encoded payloads (30+ chars) or hash IOCs. Base64 detection excludes npm package paths to reduce false positives.")
+@severity("medium")
+@tags("encoded,obfuscation,yara,call-tool")
+@reject_message("Tool execution blocked: encoded or obfuscated payload detected. Base64-encoded content or hash IOCs found in tool arguments.")
 forbid (
-    principal,
-    action == Overwatch::Action::"call_tool",
-    resource
-)
-when {
-    context has detected_threats &&
-    context.detected_threats.contains("command_injection")
-};
-`;
-const OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR = `// =============================================================================
-// Agent Security Policy (Default)
-// =============================================================================
-// Detects and blocks tool poisoning, rug pull attacks, indirect prompt injection,
-// and MCP supply chain threats targeting AI coding agents.
-//
-// These are agentic AI-specific attack vectors (OWASP Agentic Top 10) where tool
-// descriptions, server responses, or behavioral drift manipulate agent behavior.
-//
-// Compliance:
-//   OWASP LLM01 (Prompt Injection) | OWASP LLM06 (Excessive Agency)
-//   OWASP ASI01 (Agent Goal Hijack) | OWASP ASI02 (Tool Misuse)
-//   OWASP ASI04 (Supply Chain) | OWASP MCP01-05
-//   MITRE ATLAS AML.T0051 (Prompt Injection) | AML.T0080 (Memory Manipulation)
-//
-// Category: agent_security
-// Namespace: Overwatch
-// =============================================================================
+    principal,
+    action == Overwatch::Action::"call_tool",
+    resource
+)
+when {
+    context has detected_threats && context.detected_threats.contains("detect_encoded")
+};
 // ---------------------------------------------------------------------------
-// Tool Poisoning — hidden instructions in tool descriptions/arguments
-// Ref: Invariant Labs (April 2025), 84.2% success rate with auto-approval
+// Tier 2: Javelin ML-Based Detection (require Highflame API token)
+// Prompt injection and jailbreak classifiers. These are inert without
+// the API token — scores default to 0 (prompts) or are absent (tools).
 // ---------------------------------------------------------------------------
-// Block tool calls with tool poisoning risk
-@id("as-block-tool-poisoning")
-@name("Block tool poisoning")
-@description("Block tool execution when hidden instructions are detected in tool descriptions or arguments (score >= 70/100). Catches authority hijack, system prompt injection, and hidden instruction patterns. Adjust the threshold to tune sensitivity — lower catches more but may flag legitimate tools with instructional descriptions (OWASP ASI01).")
+// Block ML-detected prompt injection
+@id("semantic-block-injection-ml")
+@name("Block ML-detected prompt injection")
+@description("Block when Javelin's ML classifier scores prompt injection confidence >= 75/100. Catches sophisticated injection that pattern matching misses — authority hijack, instruction override, polymorphic payloads.")
 @severity("critical")
-@tags("tool-poisoning,agent-security,owasp-asi01,mitre-aml-t0051")
-@reject_message("Tool execution blocked: hidden manipulation instructions detected in tool description or arguments. This may be a tool poisoning attack (OWASP ASI01).")
+@tags("prompt-injection,ml,javelin,owasp-llm01,mitre-aml-t0051")
+@reject_message("Blocked: prompt injection detected by ML classifier (confidence 75+). The content attempts to override agent instructions.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has tool_poisoning_score && context.tool_poisoning_score >= 70
+    context has injection_confidence && context.injection_confidence >= 75
 };
-// Block MCP server connections with poisoning risk
-@id("as-block-server-poisoning")
-@name("Block poisoned MCP servers")
-@description("Block connections to MCP servers when tool poisoning patterns are detected in tool descriptions (score >= 60). Lower threshold for servers since poisoning affects all tools on the server.")
+// Block ML-detected jailbreak attempts
+@id("semantic-block-jailbreak-ml")
+@name("Block ML-detected jailbreak attempts")
+@description("Block when Javelin's ML classifier scores jailbreak confidence >= 75/100. Catches DAN prompts, role-playing bypasses, and safety guideline override attempts.")
 @severity("critical")
-@tags("tool-poisoning,mcp-security,owasp-asi04,owasp-mcp02")
-@reject_message("MCP server connection blocked: tool poisoning patterns detected in server tool descriptions. Review server tools before connecting.")
+@tags("jailbreak,ml,javelin,owasp-llm02,mitre-aml-t0054")
+@reject_message("Blocked: jailbreak attempt detected by ML classifier (confidence 75+). The content attempts to bypass agent safety guidelines.")
 forbid (
     principal,
-    action == Overwatch::Action::"connect_server",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has tool_poisoning_score && context.tool_poisoning_score >= 60
+    context has jailbreak_confidence && context.jailbreak_confidence >= 75
 };
 // ---------------------------------------------------------------------------
-// Rug Pull — tool behavior changes after trust establishment
-// Ref: Acuvity (2025), tools approved once then silently redefined
+// Tier 3: Content Safety (Javelin ML scores, require API token)
+// Trust & safety classification scores for violence, weapons, hate speech,
+// crime, sexual content, and profanity. Ensures enterprise-appropriate
+// content thresholds.
 // ---------------------------------------------------------------------------
-// Block tool calls with behavioral drift (rug pull)
-@id("as-block-rug-pull")
-@name("Block rug pull attacks")
-@description("Block tool execution when behavioral drift is detected — tool behavior diverges significantly from established patterns (score >= 70/100). Defends against tools that are approved once then silently redefined to act maliciously. Adjust the threshold to tune sensitivity (OWASP ASI04).")
+// Block violent content
+@id("semantic-block-violence")
+@name("Block violent content")
+@description("Block content when the ML violence detection score exceeds threshold (80/100). Catches graphic violence descriptions, instructions for causing harm, and violent threat language.")
 @severity("critical")
-@tags("rug-pull,agent-security,owasp-asi04,behavioral-drift")
-@reject_message("Tool execution blocked: tool behavior has changed significantly from its established pattern. This may be a rug pull attack where a tool was silently redefined after initial approval.")
+@tags("violence,content-safety,trust-safety,nist-si-4,iso-42001")
+@reject_message("Content blocked: violent content detected. AI agents in enterprise environments must not process or generate violent content.")
 forbid (
     principal,
-    action in [Overwatch::Action::"call_tool", Overwatch::Action::"connect_server"],
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has rug_pull_score && context.rug_pull_score >= 70
+    context has violence_score && context.violence_score >= 80
 };
-// ---------------------------------------------------------------------------
-// Indirect Prompt Injection — injection via tool outputs and retrieved content
-// Ref: EchoLeak CVE-2025-32711, IDEsaster (30+ CVEs in AI IDEs)
-// ---------------------------------------------------------------------------
-// Block prompts with indirect injection from tool outputs
-@id("as-block-indirect-injection")
-@name("Block indirect prompt injection")
-@description("Block when indirect prompt injection is detected in tool outputs, file contents, or retrieved documents (score >= 70). Defends against OWASP LLM01 and ASI01.")
+// Block weapons content
+@id("semantic-block-weapons")
+@name("Block weapons content")
+@description("Block content when the ML weapons detection score exceeds threshold (80/100). Catches weapon manufacturing instructions, procurement guidance, and detailed specifications.")
 @severity("critical")
-@tags("indirect-injection,owasp-llm01,owasp-asi01,mitre-aml-t0051")
-@reject_message("Content blocked: indirect prompt injection detected in tool output or retrieved content. An external source may be attempting to hijack agent behavior.")
+@tags("weapons,content-safety,trust-safety,nist-si-4")
+@reject_message("Content blocked: weapons-related content detected. AI agents must not process weapons manufacturing, procurement, or detailed specification content.")
 forbid (
     principal,
-    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool", Overwatch::Action::"connect_server"],
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has indirect_injection_score && context.indirect_injection_score >= 70
+    context has weapons_score && context.weapons_score >= 80
 };
-// Strict indirect injection for sensitive tool calls
-@id("as-block-indirect-injection-sensitive-tools")
-@name("Block indirect injection on sensitive tools")
-@description("Lower threshold (>= 50) for indirect injection when the tool is classified as sensitive (shell, file write, network). Even moderate injection risk on sensitive tools warrants blocking.")
+// Block hate speech (lower threshold — zero tolerance)
+@id("semantic-block-hate-speech")
+@name("Block hate speech")
+@description("Block content when the ML hate speech detection score exceeds threshold (75/100). Lower threshold than other categories — enterprises have zero tolerance for discriminatory content.")
 @severity("critical")
-@tags("indirect-injection,sensitive-tools,owasp-asi02")
-@reject_message("Sensitive tool execution blocked: moderate indirect injection risk detected. Sensitive tools require higher confidence that content is safe.")
+@tags("hate-speech,content-safety,trust-safety,nist-si-4,eu-ai-act")
+@reject_message("Content blocked: hate speech or discriminatory content detected. AI agents must not process or generate hateful, discriminatory, or dehumanizing content.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has indirect_injection_score && context.indirect_injection_score >= 50 &&
-    context has tool_is_sensitive && context.tool_is_sensitive
+    context has hate_speech_score && context.hate_speech_score >= 75
 };
-// ---------------------------------------------------------------------------
-// MCP Supply Chain — unverified servers, risky configs
-// Ref: OWASP MCP Top 10, OWASP ASI04, MITRE AML.T0082
-// ---------------------------------------------------------------------------
-// Block unverified MCP server tool calls with detected threats
-@id("as-block-unverified-threats")
-@name("Block unverified server threats")
-@description("Block tool calls from unverified MCP servers when any threat is detected. Unverified servers with threats are high-risk supply chain vectors.")
+// Block criminal content
+@id("semantic-block-crime")
+@name("Block criminal content")
+@description("Block content when the ML criminal activity detection score exceeds threshold (80/100). Catches illegal activity instructions, fraud techniques, and content promoting criminal behavior.")
 @severity("high")
-@tags("mcp-trust,owasp-asi04,owasp-mcp02,supply-chain")
-@reject_message("Tool execution blocked: the MCP server is unverified and security threats were detected. Only use tools from verified or trusted servers.")
+@tags("crime,content-safety,trust-safety,nist-si-4")
+@reject_message("Content blocked: criminal activity content detected. AI agents must not process content related to illegal activities, fraud, or other criminal behavior.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has mcp_server_verified && context.mcp_server_verified == false &&
-    context has threat_count && context.threat_count > 0
+    context has crime_score && context.crime_score >= 80
 };
-// Block connections to MCP servers with risky configurations
-@id("as-block-mcp-config-risk")
-@name("Block risky MCP server configs")
-@description("Block MCP server connections when risky configuration patterns are detected (inline code execution, mixed transports, proxy patterns). Score >= 70.")
+// Block sexual content
+@id("semantic-block-sexual")
+@name("Block sexual content")
+@description("Block content when the ML sexual content detection score exceeds threshold (80/100). Ensures enterprise AI agents do not process sexually explicit material.")
 @severity("high")
-@tags("mcp-config,owasp-mcp03,owasp-asi04,supply-chain")
-@reject_message("MCP server connection blocked: risky server configuration detected (e.g., inline code execution, mixed transports). Review the server configuration before connecting.")
+@tags("sexual,content-safety,trust-safety,eu-ai-act,iso-42001")
+@reject_message("Content blocked: sexual content detected. AI agents in enterprise environments must not process sexually explicit material.")
 forbid (
     principal,
-    action == Overwatch::Action::"connect_server",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has mcp_config_risk && context.mcp_config_risk &&
-    context has mcp_risk_score && context.mcp_risk_score >= 70
+    context has sexual_score && context.sexual_score >= 80
 };
-// Block connections to unverified MCP servers entirely
-@id("as-block-unverified-server-connect")
-@name("Block unverified MCP server connections")
-@description("Block connections to MCP servers that are not from a verified registry. This prevents supply chain attacks via malicious MCP servers.")
-@severity("high")
-@tags("mcp-trust,owasp-asi04,owasp-mcp05,supply-chain")
-@reject_message("MCP server connection blocked: server is not from a verified registry. Add the server to your verified list or contact your admin.")
+// Block excessive profanity (higher threshold — allows normal expression)
+@id("semantic-block-profanity")
+@name("Block profanity")
+@description("Block content when the ML profanity detection score exceeds threshold (90/100). Higher threshold — allows normal expression while blocking abusive or harassing language patterns.")
+@severity("medium")
+@tags("profanity,content-safety,trust-safety")
+@reject_message("Content blocked: excessive profanity detected. Please rephrase in a professional manner appropriate for enterprise AI interactions.")
 forbid (
     principal,
-    action == Overwatch::Action::"connect_server",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has mcp_server_verified && context.mcp_server_verified == false
+    context has profanity_score && context.profanity_score >= 90
 };
 `;
-const OVERWATCH_ENCODING_DEFAULT_CEDAR = `// =============================================================================
-// Encoding & Unicode Attack Detection Policy (Default)
+const OVERWATCH_TOOLS_DEFAULT_CEDAR = `// =============================================================================
+// Tool Permissioning Policy (Default)
 // =============================================================================
-// Detects and blocks invisible Unicode characters, bidirectional text
-// overrides, tag characters, and other encoding-based attack vectors used
-// to hide malicious instructions from human review while remaining
-// visible to AI model tokenizers.
+// Controls access to IDE tools, shell execution, file system paths, and MCP
+// operations. Enforces least-privilege for agent tool usage with:
 //
-// Attack vectors:
-//   - Zero-width characters (U+200B, U+200C, U+200D, U+FEFF) hiding instructions
-//   - Bidirectional overrides (U+202A-U+202E) creating visually misleading text
-//   - Tag characters (U+E0001-U+E007F) embedding invisible payloads
-//   - Variation selectors used as steganographic channels
-//   - Homoglyph attacks using lookalike Unicode characters
+//   1. Shell/command execution blocking (opt-in — inactive by default)
+//   2. Destructive file operation blocking (opt-in — inactive by default)
+//   3. Sensitive system path protection (active)
+//   4. Threat-severity-based tool blocking (active)
 //
-// Ref: EchoLeak CVE-2025-32711 (invisible prompt injection via Unicode)
-//      Rules File Backdoor (Pillar Security, March 2025)
-//      Unicode-based prompt injection in Claude Code (CERT-2025)
+// NOTE: Policies 1-2 are included in this file but classified as opt-in
+// templates in templates.json (not in the defaults array). They are shipped
+// as part of the tool permissioning category but must be explicitly enabled.
 //
 // Compliance:
-//   OWASP LLM01 (Prompt Injection) — encoding evasion
-//   OWASP ASI01 (Agent Goal Hijack) — hidden instructions
-//   MITRE ATLAS AML.T0051 (LLM Prompt Injection)
-//   NIST 800-53 SI-10 (Information Input Validation)
+//   NIST 800-53 AC-3 (Access Enforcement)
+//   NIST 800-53 AC-6 (Least Privilege)
+//   NIST 800-53 CM-7 (Least Functionality)
+//   OWASP LLM06 (Excessive Agency)
+//   OWASP ASI02 (Tool Misuse)
+//   MITRE ATT&CK T1059 (Command and Scripting Interpreter)
+//   MITRE ATT&CK T1005 (Data from Local System)
 //
-// Category: encoding
+// Category: tools
 // Namespace: Overwatch
 // =============================================================================
 // ---------------------------------------------------------------------------
-// Section 1: Invisible Character Detection in Prompts
-// Blocks prompts containing suspicious invisible Unicode patterns.
+// Section 1: Shell Blocking (opt-in — inactive by default)
+// Blocks all shell/command execution tools. Enable for high-security
+// environments where shell access is explicitly prohibited.
+// WARNING: Blocks ALL shell use including safe commands (git, npm, echo).
+// Ref: OWASP LLM06, MITRE T1059
 // ---------------------------------------------------------------------------
-// Block prompts with invisible characters above risk threshold
-@id("encoding-block-invisible-prompt")
-@name("Block invisible characters in prompts")
-@description("Block prompts when invisible Unicode characters are detected with a risk score >= 50. Invisible chars (zero-width joiners, bidi overrides, tag characters) can hide malicious instructions from human review while being processed by AI models. Ref: EchoLeak CVE-2025-32711.")
-@severity("high")
-@tags("unicode,invisible-chars,encoding,owasp-llm01,owasp-asi01,mitre-aml-t0051,nist-si-10")
-@reject_message("Your prompt was blocked because suspicious invisible Unicode characters were detected. These characters (zero-width, bidirectional overrides, tag characters) can be used to hide malicious instructions. Please remove non-visible characters and resubmit.")
+// Block shell and command execution tools
+@id("tools-block-shell")
+@name("Block shell and command execution")
+@description("Block direct shell, bash, and command execution tools. Unrestricted shell access enables command injection, data exfiltration, and arbitrary code execution. INACTIVE BY DEFAULT — enable for high-security environments where all shell access is prohibited.")
+@severity("critical")
+@tags("shell,execution,nist-cm-7,mitre-t1059,owasp-llm06,opt-in")
+@reject_message("Tool blocked: shell/command execution is restricted in this environment. Use specific, scoped tools instead.")
 forbid (
     principal,
-    action == Overwatch::Action::"process_prompt",
+    action == Overwatch::Action::"call_tool",
     resource
 )
 when {
-    context has contains_invisible_chars && context.contains_invisible_chars &&
-    context has invisible_chars_score && context.invisible_chars_score >= 50
+    context has tool_name &&
+    (context.tool_name == "shell" ||
+     context.tool_name == "bash" ||
+     context.tool_name == "sh" ||
+     context.tool_name == "terminal" ||
+     context.tool_name == "cmd" ||
+     context.tool_name == "powershell")
 };
 // ---------------------------------------------------------------------------
-// Section 2: Invisible Characters in Tool Calls
-// Any invisible characters in tool arguments are suspicious — tool args
-// should be plain text/JSON. Lower threshold than prompts.
+// Section 2: Destructive Operations (opt-in — inactive by default)
+// Blocks file deletion tools. Enable when agents should not have delete access.
+// NOTE: Only matches MCP tool names, not Bash rm commands (which use
+// tool_name "shell"). Bash destructive commands are caught by YARA
+// command_injection rule in semantic.cedar.
 // ---------------------------------------------------------------------------
-// Block tool calls with any invisible characters
-@id("encoding-block-invisible-tool")
-@name("Block invisible characters in tool calls")
-@description("Block tool execution when invisible Unicode characters are detected in tool arguments or content. Tool arguments should be plain text/JSON — invisible characters in tool calls are almost certainly malicious payload injection.")
-@severity("critical")
-@tags("unicode,invisible-chars,tools,encoding,owasp-asi01,owasp-asi02")
-@reject_message("Tool execution blocked: invisible Unicode characters detected in tool arguments. Tool calls should contain only plain text — invisible characters indicate payload injection or encoding evasion.")
+// Block destructive file operations
+@id("tools-block-destructive-ops")
+@name("Block destructive file operations")
+@description("Block file deletion and directory removal tools. INACTIVE BY DEFAULT — enable when agents should not have delete access. Only matches MCP tool names; Bash rm commands go through 'shell' tool name.")
+@severity("high")
+@tags("file,delete,destructive,nist-ac-3,owasp-asi02,opt-in")
+@reject_message("Tool blocked: destructive file operations (delete, rmdir) are restricted. Request human approval for destructive actions.")
 forbid (
     principal,
     action == Overwatch::Action::"call_tool",
     resource
 )
 when {
-    context has contains_invisible_chars && context.contains_invisible_chars
+    context has tool_name &&
+    (context.tool_name == "fs.delete" ||
+     context.tool_name == "fs.rmdir" ||
+     context.tool_name == "fs.unlink" ||
+     context.tool_name == "fs.remove" ||
+     context.tool_name == "delete_file" ||
+     context.tool_name == "remove_directory")
 };
 // ---------------------------------------------------------------------------
-// Section 3: Invisible Characters in File Operations
-// Blocks file reads/writes with encoding attacks to prevent persistence
-// of invisible payloads in the codebase.
+// Section 3: Sensitive System Path Protection (active)
+// Blocks file read/write access to system directories.
+// NOTE: Targets read_file/write_file only — NOT call_tool. The path field
+// is empty for Bash commands. Bash access to system files is caught by
+// YARA rules in semantic.cedar (command_injection, path_traversal).
+// Ref: MITRE T1005, T1552
 // ---------------------------------------------------------------------------
-// Block file writes with invisible characters
-@id("encoding-block-invisible-file-write")
-@name("Block invisible characters in file writes")
-@description("Block file writes when invisible Unicode characters are detected. Prevents persistence of invisible payloads in source code, config files, or documentation where they could later be processed by AI agents. Ref: Rules File Backdoor attack (Pillar Security).")
+// Block access to system directories
+@id("tools-block-system-paths")
+@name("Block system directory access")
+@description("Block file read/write to sensitive system directories. Includes Linux (/etc, /proc, /sys, /root, /var) and macOS (/private/etc, /Library, /System) paths. Only applies to Read/Write/Edit tools.")
 @severity("high")
-@tags("unicode,invisible-chars,file-write,encoding,owasp-asi01")
-@reject_message("File write blocked: invisible Unicode characters detected in content. Writing invisible characters to files can create persistent backdoors that affect AI agents processing those files later.")
+@tags("file,path,system,nist-ac-6,mitre-t1005")
+@reject_message("Blocked: access to sensitive system directory. AI agents are restricted from system configuration and process directories.")
 forbid (
     principal,
-    action == Overwatch::Action::"write_file",
+    action in [Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has contains_invisible_chars && context.contains_invisible_chars
+    context has path &&
+    (context.path like "/etc/*" ||
+     context.path like "/proc/*" ||
+     context.path like "/sys/*" ||
+     context.path like "/root/*" ||
+     context.path like "/var/log/*" ||
+     context.path like "/var/run/*" ||
+     context.path like "/private/etc/*" ||
+     context.path like "/private/var/*" ||
+     context.path like "/Library/*" ||
+     context.path like "/System/*")
 };
-// Block MCP server connections with invisible characters in config
-@id("encoding-block-invisible-server")
-@name("Block invisible characters in server config")
-@description("Block MCP server connections when invisible Unicode characters are detected in server configuration or responses. Invisible chars in server data indicate a compromised or malicious MCP server.")
-@severity("critical")
-@tags("unicode,invisible-chars,mcp,encoding,owasp-mcp02,owasp-asi04")
-@reject_message("MCP server connection blocked: invisible Unicode characters detected in server data. This may indicate a compromised MCP server using encoding attacks to inject hidden instructions.")
+// ---------------------------------------------------------------------------
+// Section 4: Threat-Based Tool Blocking (active)
+// Blocks tool calls based on threat severity from detection engines.
+// This is the primary catch-all — any YARA rule with severity HIGH (3)
+// or CRITICAL (4) triggers this. Provides defense-in-depth behind
+// specific YARA rule policies in semantic.cedar and secrets.cedar.
+// ---------------------------------------------------------------------------
+// Block tool calls with high/critical severity threats
+@id("tools-block-high-severity")
+@name("Block tool calls with high severity threats")
+@description("Block tool execution when YARA or Javelin detects threats with severity >= HIGH (3). Primary catch-all defense — any YARA rule with severity HIGH or CRITICAL triggers this.")
+@severity("high")
+@tags("tools,threats,severity,defense-in-depth")
+@reject_message("Tool execution blocked: high or critical severity threats detected in content by security scanners.")
 forbid (
     principal,
-    action == Overwatch::Action::"connect_server",
+    action == Overwatch::Action::"call_tool",
     resource
 )
 when {
-    context has contains_invisible_chars && context.contains_invisible_chars
+    context has max_threat_severity && context.max_threat_severity >= 3
 };
 `;
-const OVERWATCH_BEHAVIORAL_DEFAULT_CEDAR = `// =============================================================================
-// Behavioral Analysis Policy (Default)
+const OVERWATCH_PII_DEFAULT_CEDAR = `// =============================================================================
+// PII Detection Policy (Default)
 // =============================================================================
-// Detects and blocks suspicious agent behavioral patterns including tool call
-// loops, data exfiltration sequences, credential theft chains, and destructive
-// operation patterns. Operates on session-level behavioral signals rather
-// than single-request content analysis.
+// Detects and blocks personally identifiable information across prompts, tool
+// calls, and file operations using YARA rule name matching and ML classifiers.
 //
-// Attack vectors:
-//   - Tool call loops: Agent stuck in retry loop or manipulation-induced recursion
-//   - Data exfiltration: Read sensitive data → send to external endpoint sequence
-//   - Secret exfiltration: Read credentials → curl/fetch external URL
-//   - Credential theft: Access .ssh/.aws → encode/compress → network tool
-//   - Destructive sequences: Bulk delete, permission changes, config overwrites
+// Detection layers:
+//   1. YARA rule triggers (detected_threats) — pattern-based, always available
 //
-// Ref: OWASP LLM10 (Unbounded Consumption) — loop/recursion attacks
-//      OWASP ASI02 (Tool Misuse) — tool abuse sequences
-//      OWASP ASI08 (Lack of Monitoring) — behavioral anomaly detection
-//      GlassWorm Attack (35,800+ installations, cross-agent propagation)
-//      MITRE ATLAS AML.T0080 (Memory Manipulation)
+// YARA rules matched:
+//   pii_ssn            — US Social Security Numbers (XXX-XX-XXXX)
+//   pii_credit_card    — Credit card numbers (13-19 digits)
+//   pii_passport       — Passport numbers (1-2 letters + 6-9 digits)
+//   pii_iban           — International Bank Account Numbers
+//   pii_email          — Email addresses
+//   pii_phone_us       — US phone numbers
+//   pii_date_of_birth  — Dates in MM/DD/YYYY format
+//   pii_ip_address     — IPv4 addresses
 //
 // Compliance:
-//   OWASP LLM10 (Unbounded Consumption)
-//   OWASP ASI02 (Tool Misuse)
-//   OWASP ASI08 (Lack of Monitoring & Logging)
-//   MITRE ATLAS AML.T0080 (AI Memory Manipulation)
-//   MITRE ATT&CK T1041 (Exfiltration Over C2 Channel)
-//   NIST 800-53 AU-6 (Audit Review, Analysis, and Reporting)
+//   PCI DSS 3.4, 4.1 (Payment Card Data)
+//   GDPR Art. 32 (Security of Processing)
+//   HIPAA §164.312 (Technical Safeguards)
 //   NIST 800-53 SI-4 (Information System Monitoring)
+//   CCPA §1798.150 (Data Protection)
+//   OWASP LLM06 (Sensitive Information Disclosure)
 //
-// Category: behavioral
+// Category: pii
 // Namespace: Overwatch
 // =============================================================================
 // ---------------------------------------------------------------------------
-// Section 1: Tool Call Loop Detection
-// Detects agents trapped in infinite loops — either through manipulation
-// (adversarial prompt inducing repetitive behavior) or bugs (retry storms).
-// Ref: OWASP LLM10 (Unbounded Consumption)
+// Section 1: Critical PII — Identity Theft Risk
+// SSNs and credit card numbers are the highest-risk PII types.
+// Blocked across all actions (prompts, tool calls, file reads/writes).
 // ---------------------------------------------------------------------------
-// Block tool calls in detected loops (5+ consecutive same-tool calls)
-@id("behavioral-block-loop")
-@name("Block tool call loops")
-@description("Block tool execution when a loop is detected — 5 or more consecutive calls to the same tool. This indicates either adversarial manipulation inducing repetitive agent behavior or a bug causing retry storms. Both waste compute and can cause damage. Adjust the threshold (default 5) to match your workflow — lower for stricter enforcement, higher for agents that legitimately retry (OWASP LLM10).")
-@severity("high")
-@tags("loop-detection,behavioral,owasp-llm10,owasp-asi02,nist-si-4")
-@reject_message("Tool execution blocked: repetitive tool call loop detected (5+ consecutive calls to the same tool). This may indicate adversarial manipulation or a system error. The agent session should be reviewed.")
+// Block Social Security Numbers
+@id("pii-block-ssn")
+@name("Block Social Security Numbers")
+@description("Block content containing SSN patterns (XXX-XX-XXXX). SSNs are high-value identity theft targets — exposure through AI agents is a critical privacy violation.")
+@severity("critical")
+@tags("pii,ssn,identity,yara,pci-dss,nist-si-4")
+@reject_message("Content blocked: Social Security Number patterns detected. SSNs must never be processed through AI agents.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool", Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has loop_detected && context.loop_detected &&
-    context has loop_count && context.loop_count >= 5
+    context has detected_threats && context.detected_threats.contains("pii_ssn")
 };
-// ---------------------------------------------------------------------------
-// Section 2: Data Exfiltration Detection
-// Detects sequences where an agent reads sensitive data and then attempts
-// to send it to an external endpoint.
-// Ref: GlassWorm attack, MITRE T1041
-// ---------------------------------------------------------------------------
-// Block data exfiltration patterns
-@id("behavioral-block-data-exfil")
-@name("Block data exfiltration")
-@description("Block tool execution when a data exfiltration pattern is detected — the agent reads sensitive local data (files, configs, source code) followed by a network operation sending data externally. This is the hallmark of autonomous agent compromise (GlassWorm, EchoLeak).")
+// Block credit card numbers (PCI DSS compliance)
+@id("pii-block-credit-card")
+@name("Block credit card numbers")
+@description("Block content containing credit card number patterns (13-19 digits). PCI DSS 3.4 requires PANs are rendered unreadable — AI agents must never process raw card numbers.")
 @severity("critical")
-@tags("data-exfiltration,behavioral,owasp-asi02,mitre-t1041,nist-si-4")
-@reject_message("Tool execution blocked: data exfiltration pattern detected. The agent appears to be reading sensitive data and sending it to an external endpoint. This is a critical security event — the agent session has been terminated.")
+@tags("pii,credit-card,payment,yara,pci-dss-3.4,pci-dss-4.1")
+@reject_message("Content blocked: credit card number patterns detected. Sharing payment card data through AI agents violates PCI DSS. Use tokenized references instead.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool", Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has suspicious_pattern && context.suspicious_pattern &&
-    context has pattern_type && context.pattern_type == "data_exfiltration"
+    context has detected_threats && context.detected_threats.contains("pii_credit_card")
 };
-// Block secret exfiltration (credential-specific exfil)
-@id("behavioral-block-secret-exfil")
-@name("Block secret exfiltration")
-@description("Block tool execution when a secret exfiltration pattern is detected — the agent accesses credential files (.env, .aws, tokens) followed by a network tool call. This is a targeted credential theft sequence.")
-@severity("critical")
-@tags("secret-exfiltration,behavioral,owasp-asi02,mitre-t1552,mitre-t1041")
-@reject_message("Tool execution blocked: secret exfiltration pattern detected. The agent accessed credential files and is attempting to send them externally. This is a targeted credential theft attack.")
+// ---------------------------------------------------------------------------
+// Section 2: High PII — Government & Financial Identifiers
+// Passport numbers and IBANs are regulated identifiers with high
+// identity theft and financial fraud risk.
+// ---------------------------------------------------------------------------
+// Block passport numbers
+@id("pii-block-passport")
+@name("Block passport numbers")
+@description("Block content containing passport number patterns (1-2 letters + 6-9 digits). Passport numbers are government-issued identifiers with high identity theft risk.")
+@severity("high")
+@tags("pii,passport,identity,yara,gdpr-art-32")
+@reject_message("Content blocked: passport number patterns detected. Government-issued identifiers must not be processed through AI agents.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool", Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has suspicious_pattern && context.suspicious_pattern &&
-    context has pattern_type && context.pattern_type == "secret_exfiltration"
+    context has detected_threats && context.detected_threats.contains("pii_passport")
 };
-// Block credential theft chains
-@id("behavioral-block-credential-theft")
-@name("Block credential theft")
-@description("Block tool execution when a credential theft chain is detected — accessing SSH keys, cloud credentials, or API tokens followed by encoding, compression, or transfer operations. Multi-step attack pattern for autonomous credential harvesting.")
-@severity("critical")
-@tags("credential-theft,behavioral,owasp-asi02,mitre-t1552,mitre-t1555")
-@reject_message("Tool execution blocked: credential theft chain detected. The agent is performing a multi-step operation to harvest and exfiltrate credentials (SSH keys, cloud tokens, API keys). Session terminated.")
+// Block IBAN (International Bank Account Numbers)
+@id("pii-block-iban")
+@name("Block bank account numbers")
+@description("Block content containing IBAN patterns. Bank account numbers are sensitive financial identifiers that must not be exposed through AI agents.")
+@severity("high")
+@tags("pii,iban,financial,yara,gdpr-art-32,pci-dss")
+@reject_message("Content blocked: bank account number (IBAN) patterns detected. Financial account numbers must not be processed through AI agents.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool", Overwatch::Action::"read_file", Overwatch::Action::"write_file"],
     resource
 )
 when {
-    context has suspicious_pattern && context.suspicious_pattern &&
-    context has pattern_type && context.pattern_type == "credential_theft"
+    context has detected_threats && context.detected_threats.contains("pii_iban")
 };
 // ---------------------------------------------------------------------------
-// Section 3: Destructive Sequence Detection
-// Detects sequences of destructive operations that could damage the workspace.
+// Section 3: Medium PII — Contact Information
+// Email addresses and phone numbers. Only blocked in prompts and tool calls
+// (not file ops — too common in source code, configs, and test fixtures).
 // ---------------------------------------------------------------------------
-// Block destructive operation sequences
-@id("behavioral-block-destructive-sequence")
-@name("Block destructive sequences")
-@description("Block tool execution when a destructive operation sequence is detected — bulk file deletions, permission changes, config overwrites, or repository manipulation patterns. Prevents agent-initiated workspace damage.")
-@severity("critical")
-@tags("destructive,behavioral,owasp-asi02,nist-si-4")
-@reject_message("Tool execution blocked: destructive operation sequence detected. The agent is performing a pattern of destructive operations (bulk deletions, permission changes, config overwrites) that could damage the workspace.")
+// Block email addresses
+@id("pii-block-email")
+@name("Block email addresses")
+@description("Block prompts and tool calls containing email address patterns. Prevents accidental sharing of personal email addresses with AI agents.")
+@severity("medium")
+@tags("pii,email,contact,yara,gdpr-art-32")
+@reject_message("Content blocked: email address patterns detected. Remove personal email addresses before submitting to AI agents.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has detected_threats && context.detected_threats.contains("pii_email")
+};
+// Block US phone numbers
+@id("pii-block-phone")
+@name("Block phone numbers")
+@description("Block prompts and tool calls containing US phone number patterns. Prevents accidental sharing of personal phone numbers with AI agents.")
+@severity("medium")
+@tags("pii,phone,contact,yara,ccpa")
+@reject_message("Content blocked: phone number patterns detected. Remove personal phone numbers before submitting to AI agents.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
     resource
 )
 when {
-    context has suspicious_pattern && context.suspicious_pattern &&
-    context has pattern_type && context.pattern_type == "destructive_sequence"
+    context has detected_threats && context.detected_threats.contains("pii_phone_us")
+};
+// Block dates of birth
+@id("pii-block-dob")
+@name("Block dates of birth")
+@description("Block prompts and tool calls containing date of birth patterns (MM/DD/YYYY). Date of birth combined with other identifiers enables identity theft.")
+@severity("medium")
+@tags("pii,dob,identity,yara,hipaa-164.312")
+@reject_message("Content blocked: date of birth patterns detected. Remove personal dates before submitting to AI agents.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has detected_threats && context.detected_threats.contains("pii_date_of_birth")
 };
 // ---------------------------------------------------------------------------
-// Section 4: Sequence Risk Scoring
-// Uses computed sequence risk scores for dynamic behavioral assessment.
+// Section 4: Low PII — Network Identifiers
+// IP addresses are extremely common in code, configs, and logs.
+// Only blocked in prompts to catch accidental data dumps.
 // ---------------------------------------------------------------------------
-// Block high-risk behavioral sequences
-@id("behavioral-block-high-risk-sequence")
-@name("Block high-risk behavioral sequences")
-@description("Block tool execution when the computed sequence risk score exceeds 80/100. The score aggregates behavioral signals including action history, tool combination analysis, and deviation from normal patterns. High scores indicate coordinated multi-step attacks.")
-@severity("high")
-@tags("sequence-risk,behavioral,dynamic,owasp-asi08,nist-au-6")
-@reject_message("Tool execution blocked: high-risk behavioral sequence detected (risk score 80+). The pattern of agent actions indicates a coordinated attack. This session requires human review before continuing.")
+// Block IP addresses in prompts
+@id("pii-block-ip-address")
+@name("Block IP addresses in prompts")
+@description("Block prompts containing IPv4 address patterns. Only targets prompts — IP addresses are too common in source code and config files to block in tool calls or file operations.")
+@severity("low")
+@tags("pii,ip-address,network,yara")
+@reject_message("Content blocked: IP address patterns detected in prompt. Remove network identifiers before submitting.")
 forbid (
     principal,
-    action == Overwatch::Action::"call_tool",
+    action == Overwatch::Action::"process_prompt",
     resource
 )
 when {
-    context has sequence_risk && context.sequence_risk >= 80
+    context.detected_threats.contains("pii_ip_address")
 };
 `;
 const OVERWATCH_TOOLS_MCP_ALLOWLIST_CEDAR = `// MCP Server Allowlist Template
 // Only allow specific MCP servers to be used
@@ -1653,15 +1049,11 @@ when {
 // CATEGORIES
 // =============================================================================
 export const OVERWATCH_CATEGORIES = [
-    { id: 'secrets', name: 'Secrets Detection', description: 'Detect and block credentials, tokens, API keys, and sensitive key patterns in prompts, tool calls, and AI responses' },
+    { id: 'secrets', name: 'Secrets Detection', description: 'Detect and block credentials, tokens, API keys, and sensitive key patterns in prompts, tool calls, and file operations' },
     { id: 'pii', name: 'PII Detection', description: 'Detect and block personally identifiable information (PII) such as credit card numbers, SSNs, and other sensitive data' },
-    { id: 'semantic', name: 'Semantic Threat Detection', description: 'Detect and block prompt injection, jailbreak attempts, and high-severity AI security threats' },
+    { id: 'semantic', name: 'Semantic Threat Detection', description: 'Detect and block injection attacks, prompt injection, jailbreak attempts, and unsafe content using YARA rules and ML classifiers' },
     { id: 'tools', name: 'Tool Permissioning', description: 'Control access to shell execution, file operations, MCP servers, and sensitive system paths' },
     { id: 'organization', name: 'Organization Rules', description: 'Apply organization-wide policy baselines, team permissions, and agent-specific guardrails' },
-    { id: 'trust_safety', name: 'Content Safety', description: 'Detect and control violent, harmful, hateful, sexual, and profane content using trust/safety classification scores' },
-    { id: 'agent_security', name: 'Agent Security', description: 'Detect and block tool poisoning, rug pull attacks, indirect prompt injection, and MCP supply chain threats targeting AI agents' },
-    { id: 'encoding', name: 'Encoding & Unicode Attacks', description: 'Detect invisible Unicode characters, bidirectional text overrides, and encoded injection payloads used to hide malicious instructions' },
-    { id: 'behavioral', name: 'Behavioral Analysis', description: 'Detect suspicious action sequences, tool call loops, data exfiltration patterns, and credential theft chains across agent sessions' },
 ];
 // =============================================================================
 // DEFAULT POLICIES
@@ -1677,24 +1069,34 @@ export const OVERWATCH_DEFAULTS = [
         tags: ['baseline', 'permit-default', 'organization'],
         isActive: true,
     },
+    {
+        id: 'secrets-default',
+        name: 'Secrets Detection',
+        description: 'Detect and block credential leakage using YARA rule matching (secrets_leakage, ssh_key_exposure, pem_file_access, environment_variable_leakage) and sensitive file path protection',
+        category: 'secrets',
+        cedarText: OVERWATCH_SECRETS_DEFAULT_CEDAR,
+        severity: 'critical',
+        tags: ['secrets', 'credentials', 'aws', 'github', 'ssh', 'pem', 'yara', 'baseline'],
+        isActive: true,
+    },
     {
         id: 'semantic-default',
         name: 'Semantic Threat Detection',
-        description: 'Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers',
+        description: 'Detect and block injection attacks (command, SQL, path traversal), prompt injection, jailbreak, and unsafe content (violence, hate speech, etc.) using YARA rules and ML classifiers',
         category: 'semantic',
         cedarText: OVERWATCH_SEMANTIC_DEFAULT_CEDAR,
         severity: 'critical',
-        tags: ['prompt-injection', 'jailbreak', 'owasp-llm01', 'owasp-llm02', 'security', 'baseline'],
+        tags: ['injection', 'jailbreak', 'content-safety', 'yara', 'ml', 'owasp-llm01', 'owasp-llm02', 'baseline'],
         isActive: true,
     },
     {
-        id: 'trust-safety-default',
-        name: 'Content Safety',
-        description: 'Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores',
-        category: 'trust_safety',
-        cedarText: OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR,
-        severity: 'critical',
-        tags: ['violence', 'weapons', 'hate-speech', 'crime', 'sexual', 'profanity', 'content-safety', 'baseline'],
+        id: 'tools-default',
+        name: 'Tool Permissioning',
+        description: 'Block access to sensitive system paths and tool calls with high-severity threats. Includes opt-in shell blocking and destructive operation blocking.',
+        category: 'tools',
+        cedarText: OVERWATCH_TOOLS_DEFAULT_CEDAR,
+        severity: 'high',
+        tags: ['tools', 'file-access', 'system-paths', 'severity', 'baseline'],
         isActive: true,
     },
 ];
@@ -1702,15 +1104,6 @@ export const OVERWATCH_DEFAULTS = [
 // ALL TEMPLATES
 // =============================================================================
 export const OVERWATCH_TEMPLATES = [
-    {
-        id: 'secrets-default',
-        name: 'Secrets Detection',
-        description: 'Detect and block credential leakage across prompts, tool calls, file operations, and AI responses using multi-layered detection',
-        category: 'secrets',
-        cedarText: OVERWATCH_SECRETS_DEFAULT_CEDAR,
-        severity: 'critical',
-        tags: ['api-keys', 'tokens', 'credentials', 'aws', 'github', 'ssh', 'baseline'],
-    },
     {
         id: 'pii-default',
         name: 'PII Detection',
@@ -1720,42 +1113,6 @@ export const OVERWATCH_TEMPLATES = [
         severity: 'critical',
         tags: ['pii', 'privacy', 'compliance', 'pci-dss', 'gdpr', 'hipaa', 'baseline'],
     },
-    {
-        id: 'tools-default',
-        name: 'Tool Permissioning',
-        description: 'Block dangerous shell execution, restrict sensitive file paths, enforce tool risk scoring, and detect command injection in tool arguments',
-        category: 'tools',
-        cedarText: OVERWATCH_TOOLS_DEFAULT_CEDAR,
-        severity: 'critical',
-        tags: ['shell', 'command-injection', 'file-access', 'tool-risk', 'mitre-t1059', 'owasp-llm06', 'baseline'],
-    },
-    {
-        id: 'agent-security-default',
-        name: 'Agent Security',
-        description: 'Detect and block tool poisoning, rug pull attacks, indirect prompt injection, and MCP supply chain threats via Shield detection',
-        category: 'agent_security',
-        cedarText: OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR,
-        severity: 'critical',
-        tags: ['tool-poisoning', 'rug-pull', 'indirect-injection', 'mcp-security', 'agent-security', 'owasp-asi01', 'owasp-asi04', 'baseline'],
-    },
-    {
-        id: 'encoding-default',
-        name: 'Encoding Attack Detection',
-        description: 'Detect and block invisible Unicode characters, bidirectional overrides, and encoding-based injection attacks across prompts, tools, and files',
-        category: 'encoding',
-        cedarText: OVERWATCH_ENCODING_DEFAULT_CEDAR,
-        severity: 'high',
-        tags: ['unicode', 'invisible-chars', 'bidi-override', 'encoding', 'owasp-llm01', 'baseline'],
-    },
-    {
-        id: 'behavioral-default',
-        name: 'Behavioral Analysis',
-        description: 'Detect and block tool call loops, data exfiltration sequences, credential theft chains, and destructive operation patterns',
-        category: 'behavioral',
-        cedarText: OVERWATCH_BEHAVIORAL_DEFAULT_CEDAR,
-        severity: 'high',
-        tags: ['loop-detection', 'data-exfiltration', 'credential-theft', 'behavioral', 'owasp-llm10', 'owasp-asi02', 'baseline'],
-    },
     {
         id: 'tools-mcp-allowlist',
         name: 'MCP Server Allowlist',
@@ -1817,13 +1174,13 @@ export const OVERWATCH_TEMPLATES = [
 /** Raw templates.json metadata for the Overwatch service. */
 export const OVERWATCH_TEMPLATES_JSON = `{
   "service": "overwatch",
-  "version": "3.0.0",
+  "version": "4.0.0",
   "description": "Overwatch policy templates for IDE agent security",
   "categories": [
     {
       "id": "secrets",
       "name": "Secrets Detection",
-      "description": "Detect and block credentials, tokens, API keys, and sensitive key patterns in prompts, tool calls, and AI responses"
+      "description": "Detect and block credentials, tokens, API keys, and sensitive key patterns in prompts, tool calls, and file operations"
     },
     {
       "id": "pii",
@@ -1833,7 +1190,7 @@ export const OVERWATCH_TEMPLATES_JSON = `{
     {
       "id": "semantic",
       "name": "Semantic Threat Detection",
-      "description": "Detect and block prompt injection, jailbreak attempts, and high-severity AI security threats"
+      "description": "Detect and block injection attacks, prompt injection, jailbreak attempts, and unsafe content using YARA rules and ML classifiers"
     },
     {
       "id": "tools",
@@ -1844,26 +1201,6 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "id": "organization",
       "name": "Organization Rules",
       "description": "Apply organization-wide policy baselines, team permissions, and agent-specific guardrails"
-    },
-    {
-      "id": "trust_safety",
-      "name": "Content Safety",
-      "description": "Detect and control violent, harmful, hateful, sexual, and profane content using trust/safety classification scores"
-    },
-    {
-      "id": "agent_security",
-      "name": "Agent Security",
-      "description": "Detect and block tool poisoning, rug pull attacks, indirect prompt injection, and MCP supply chain threats targeting AI agents"
-    },
-    {
-      "id": "encoding",
-      "name": "Encoding & Unicode Attacks",
-      "description": "Detect invisible Unicode characters, bidirectional text overrides, and encoded injection payloads used to hide malicious instructions"
-    },
-    {
-      "id": "behavioral",
-      "name": "Behavioral Analysis",
-      "description": "Detect suspicious action sequences, tool call loops, data exfiltration patterns, and credential theft chains across agent sessions"
     }
   ],
   "defaults": [
@@ -1877,37 +1214,38 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "tags": ["baseline", "permit-default", "organization"],
       "is_active": true
     },
+    {
+      "id": "secrets-default",
+      "name": "Secrets Detection",
+      "description": "Detect and block credential leakage using YARA rule matching (secrets_leakage, ssh_key_exposure, pem_file_access, environment_variable_leakage) and sensitive file path protection",
+      "category": "secrets",
+      "file": "defaults/secrets.cedar",
+      "severity": "critical",
+      "tags": ["secrets", "credentials", "aws", "github", "ssh", "pem", "yara", "baseline"],
+      "is_active": true
+    },
     {
       "id": "semantic-default",
       "name": "Semantic Threat Detection",
-      "description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats using detection rules and ML classifiers",
+      "description": "Detect and block injection attacks (command, SQL, path traversal), prompt injection, jailbreak, and unsafe content (violence, hate speech, etc.) using YARA rules and ML classifiers",
       "category": "semantic",
       "file": "defaults/semantic.cedar",
       "severity": "critical",
-      "tags": ["prompt-injection", "jailbreak", "owasp-llm01", "owasp-llm02", "security", "baseline"],
+      "tags": ["injection", "jailbreak", "content-safety", "yara", "ml", "owasp-llm01", "owasp-llm02", "baseline"],
       "is_active": true
     },
     {
-      "id": "trust-safety-default",
-      "name": "Content Safety",
-      "description": "Detect and block violent, harmful, hateful, sexual, and profane content using ML classification scores",
-      "category": "trust_safety",
-      "file": "defaults/trust_safety.cedar",
-      "severity": "critical",
-      "tags": ["violence", "weapons", "hate-speech", "crime", "sexual", "profanity", "content-safety", "baseline"],
+      "id": "tools-default",
+      "name": "Tool Permissioning",
+      "description": "Block access to sensitive system paths and tool calls with high-severity threats. Includes opt-in shell blocking and destructive operation blocking.",
+      "category": "tools",
+      "file": "defaults/tools.cedar",
+      "severity": "high",
+      "tags": ["tools", "file-access", "system-paths", "severity", "baseline"],
       "is_active": true
     }
   ],
   "templates": [
-    {
-      "id": "secrets-default",
-      "name": "Secrets Detection",
-      "description": "Detect and block credential leakage across prompts, tool calls, file operations, and AI responses using multi-layered detection",
-      "category": "secrets",
-      "file": "defaults/secrets.cedar",
-      "severity": "critical",
-      "tags": ["api-keys", "tokens", "credentials", "aws", "github", "ssh", "baseline"]
-    },
     {
       "id": "pii-default",
       "name": "PII Detection",
@@ -1917,42 +1255,6 @@ export const OVERWATCH_TEMPLATES_JSON = `{
       "severity": "critical",
       "tags": ["pii", "privacy", "compliance", "pci-dss", "gdpr", "hipaa", "baseline"]
     },
-    {
-      "id": "tools-default",
-      "name": "Tool Permissioning",
-      "description": "Block dangerous shell execution, restrict sensitive file paths, enforce tool risk scoring, and detect command injection in tool arguments",
-      "category": "tools",
-      "file": "defaults/tools.cedar",
-      "severity": "critical",
-      "tags": ["shell", "command-injection", "file-access", "tool-risk", "mitre-t1059", "owasp-llm06", "baseline"]
-    },
-    {
-      "id": "agent-security-default",
-      "name": "Agent Security",
-      "description": "Detect and block tool poisoning, rug pull attacks, indirect prompt injection, and MCP supply chain threats via Shield detection",
-      "category": "agent_security",
-      "file": "defaults/agent_security.cedar",
-      "severity": "critical",
-      "tags": ["tool-poisoning", "rug-pull", "indirect-injection", "mcp-security", "agent-security", "owasp-asi01", "owasp-asi04", "baseline"]
-    },
-    {
-      "id": "encoding-default",
-      "name": "Encoding Attack Detection",
-      "description": "Detect and block invisible Unicode characters, bidirectional overrides, and encoding-based injection attacks across prompts, tools, and files",
-      "category": "encoding",
-      "file": "defaults/encoding_attacks.cedar",
-      "severity": "high",
-      "tags": ["unicode", "invisible-chars", "bidi-override", "encoding", "owasp-llm01", "baseline"]
-    },
-    {
-      "id": "behavioral-default",
-      "name": "Behavioral Analysis",
-      "description": "Detect and block tool call loops, data exfiltration sequences, credential theft chains, and destructive operation patterns",
-      "category": "behavioral",
-      "file": "defaults/behavioral.cedar",
-      "severity": "high",
-      "tags": ["loop-detection", "data-exfiltration", "credential-theft", "behavioral", "owasp-llm10", "owasp-asi02", "baseline"]
-    },
     {
       "id": "tools-mcp-allowlist",
       "name": "MCP Server Allowlist",