npm - @highflame/policy - Versions diffs - 2.0.7 → 2.0.8 - Mend

@highflame/policy 2.0.7 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/_schemas/overwatch/context.json +163 -1
package/_schemas/overwatch/schema.cedarschema +45 -0
package/dist/overwatch-context.gen.d.ts +13 -0
package/dist/overwatch-context.gen.d.ts.map +1 -1
package/dist/overwatch-context.gen.js +13 -0
package/dist/overwatch-context.gen.js.map +1 -1
package/dist/overwatch-defaults.gen.d.ts +1 -1
package/dist/overwatch-defaults.gen.d.ts.map +1 -1
package/dist/overwatch-defaults.gen.js +346 -1
package/dist/overwatch-defaults.gen.js.map +1 -1
package/dist/overwatch-defaults.test.js +5 -5
package/dist/overwatch-defaults.test.js.map +1 -1
package/dist/schemas.test.js +32 -0
package/dist/schemas.test.js.map +1 -1
package/package.json +1 -1
package/src/overwatch-context.gen.ts +13 -0
package/src/overwatch-defaults.gen.ts +350 -2
package/src/overwatch-defaults.test.ts +5 -5
package/src/schemas.test.ts +32 -0

package/src/overwatch-defaults.gen.ts CHANGED Viewed

@@ -9,7 +9,7 @@
  * Overwatch policy category identifiers.
  * Maps to UI tab names in Studio.
  */
-export type OverwatchCategory = 'secrets' | 'pii' | 'semantic' | 'tools' | 'organization';
+export type OverwatchCategory = 'secrets' | 'pii' | 'semantic' | 'tools' | 'organization' | 'trust_safety' | 'agent_security';
 /**
  * Category metadata for UI display.
@@ -66,6 +66,32 @@ export interface OverwatchTemplate {
 // EMBEDDED CEDAR POLICY TEXT
 // =============================================================================
+const OVERWATCH_BASELINE_DEFAULT_CEDAR = `// =============================================================================
+// Baseline Permit Policy (Default)
+// =============================================================================
+// Permits all actions by default. Threat-specific forbid policies override
+// this to block when YARA, Javelin, or other scanners detect issues.
+//
+// Cedar is default-deny: without at least one permit rule, every request
+// is denied regardless of forbid rules. This baseline ensures the system
+// is "allow unless blocked" rather than "block everything".
+//
+// Category: organization
+// Namespace: Overwatch
+// =============================================================================
+@id("baseline-permit-all")
+@name("Permit all actions by default")
+@description("Baseline permit for all actions — threat-specific forbid policies override this when threats are detected")
+@severity("low")
+@tags("baseline,permit-default,organization")
+permit (
+    principal,
+    action,
+    resource
+);
+`;
 const OVERWATCH_SECRETS_DEFAULT_CEDAR = `// =============================================================================
 // Secrets Detection Policy (Default)
 // =============================================================================
@@ -293,6 +319,22 @@ when {
     context has threat_categories && context.threat_categories.contains("pii")
 };
+// Block prompts with high PII confidence score
+@id("pii-block-high-confidence")
+@name("Block high-confidence PII")
+@description("Block content when PII classifier confidence exceeds threshold (80/100)")
+@severity("critical")
+@tags("pii,confidence,privacy,compliance")
+@reject_message("Your content was blocked because personally identifiable information was detected with high confidence.")
+forbid (
+    principal,
+    action == Overwatch::Action::"process_prompt",
+    resource
+)
+when {
+    context has pii_confidence && context.pii_confidence >= 80
+};
 // Block PII leakage via tool calls
 @id("pii-block-tool-calls")
 @name("Block tool calls with PII")
@@ -337,6 +379,22 @@ when {
     context has yara_threats && context.yara_threats.contains("prompt_injection")
 };
+// Block prompts with high injection confidence score
+@id("semantic-block-injection-score")
+@name("Block high-confidence injection")
+@description("Block content when injection classifier confidence exceeds threshold (75/100)")
+@severity("critical")
+@tags("injection,confidence,security,owasp-llm01")
+@reject_message("Your prompt was blocked because a high-confidence prompt injection pattern was detected.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has injection_confidence && context.injection_confidence >= 75
+};
 // Block prompts with jailbreak attempts
 @id("semantic-block-jailbreak")
 @name("Block jailbreak attempts")
@@ -352,6 +410,22 @@ when {
     context has yara_threats && context.yara_threats.contains("jailbreak")
 };
+// Block prompts with high jailbreak confidence score
+@id("semantic-block-jailbreak-score")
+@name("Block high-confidence jailbreak")
+@description("Block content when jailbreak classifier confidence exceeds threshold (75/100)")
+@severity("critical")
+@tags("jailbreak,confidence,security,owasp-llm02")
+@reject_message("Your prompt was blocked because a high-confidence jailbreak attempt was detected.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has jailbreak_confidence && context.jailbreak_confidence >= 75
+};
 // Block prompts with high severity semantic threats
 @id("semantic-block-high-severity")
 @name("Block high severity threats")
@@ -508,6 +582,208 @@ when {
 };
 `;
+const OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR = `// =============================================================================
+// Content Safety Policy (Default)
+// =============================================================================
+// Detects and blocks violent, harmful, hateful, sexual, and profane content
+// using trust/safety classification scores from Javelin/Lakera/LlamaGuard.
+//
+// Compliance: NIST 800-53 SI-4 | EU AI Act Art. 52
+// Category: trust_safety
+// Namespace: Overwatch
+// =============================================================================
+// Block prompts with high violence score
+@id("ts-block-violence")
+@name("Block violent content")
+@description("Block content when violence detection score exceeds threshold (80/100)")
+@severity("critical")
+@tags("violence,content-safety,trust-safety,nist-si-4")
+@reject_message("Your prompt was blocked because it contains violent content. Please rephrase without violence-related language.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has violence_score && context.violence_score >= 80
+};
+// Block prompts with weapons content
+@id("ts-block-weapons")
+@name("Block weapons content")
+@description("Block content when weapons detection score exceeds threshold (80/100)")
+@severity("critical")
+@tags("weapons,content-safety,trust-safety,nist-si-4")
+@reject_message("Your prompt was blocked because it contains weapons-related content. Please rephrase without weapons references.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has weapons_score && context.weapons_score >= 80
+};
+// Block prompts with hate speech
+@id("ts-block-hate-speech")
+@name("Block hate speech")
+@description("Block content when hate speech detection score exceeds threshold (75/100)")
+@severity("critical")
+@tags("hate-speech,content-safety,trust-safety,nist-si-4")
+@reject_message("Your prompt was blocked because it contains hate speech. Please rephrase without hateful or discriminatory language.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has hate_speech_score && context.hate_speech_score >= 75
+};
+// Block prompts with criminal content
+@id("ts-block-crime")
+@name("Block criminal content")
+@description("Block content when criminal activity detection score exceeds threshold (80/100)")
+@severity("high")
+@tags("crime,content-safety,trust-safety,nist-si-4")
+@reject_message("Your prompt was blocked because it contains content related to criminal activity.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has crime_score && context.crime_score >= 80
+};
+// Block prompts with sexual content
+@id("ts-block-sexual")
+@name("Block sexual content")
+@description("Block content when sexual content detection score exceeds threshold (80/100)")
+@severity("high")
+@tags("sexual,content-safety,trust-safety,eu-ai-act")
+@reject_message("Your prompt was blocked because it contains sexual content.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has sexual_score && context.sexual_score >= 80
+};
+// Block prompts with excessive profanity
+@id("ts-block-profanity")
+@name("Block profanity")
+@description("Block content when profanity detection score exceeds threshold (90/100)")
+@severity("medium")
+@tags("profanity,content-safety,trust-safety")
+@reject_message("Your prompt was blocked due to excessive profanity. Please rephrase in a professional manner.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
+    resource
+)
+when {
+    context has profanity_score && context.profanity_score >= 90
+};
+`;
+const OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR = `// =============================================================================
+// Agent Security Policy (Default)
+// =============================================================================
+// Detects and blocks tool poisoning, rug pull attacks, and indirect prompt
+// injection targeting AI coding agents. These are agentic AI-specific attack
+// vectors where tool descriptions or server responses manipulate agent behavior.
+//
+// Compliance: OWASP LLM09 (Improper Output Handling) | MITRE ATLAS AML.T0054
+// Category: agent_security
+// Namespace: Overwatch
+// =============================================================================
+// Block tool calls with high tool poisoning risk
+@id("as-block-tool-poisoning")
+@name("Block tool poisoning")
+@description("Block tool execution when tool description contains manipulation patterns (score >= 70/100)")
+@severity("critical")
+@tags("tool-poisoning,agent-security,owasp-llm09")
+@reject_message("Tool execution was blocked because the tool description contains manipulation patterns that could compromise agent behavior.")
+forbid (
+    principal,
+    action == Overwatch::Action::"call_tool",
+    resource
+)
+when {
+    context has tool_poisoning_score && context.tool_poisoning_score >= 70
+};
+// Block tool calls with rug pull detection
+@id("as-block-rug-pull")
+@name("Block rug pull attacks")
+@description("Block tool execution when tool behavior diverges from advertised capabilities (score >= 70/100)")
+@severity("critical")
+@tags("rug-pull,agent-security,mcp-security")
+@reject_message("Tool execution was blocked because the tool's actual behavior diverges from its advertised capabilities.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"call_tool", Overwatch::Action::"connect_server"],
+    resource
+)
+when {
+    context has rug_pull_score && context.rug_pull_score >= 70
+};
+// Block MCP server connections with high poisoning risk
+@id("as-block-server-poisoning")
+@name("Block poisoned MCP servers")
+@description("Block connections to MCP servers when tool poisoning patterns are detected (score >= 60/100)")
+@severity("critical")
+@tags("tool-poisoning,mcp-security,agent-security")
+@reject_message("Connection to this MCP server was blocked because tool poisoning patterns were detected in its tool descriptions.")
+forbid (
+    principal,
+    action == Overwatch::Action::"connect_server",
+    resource
+)
+when {
+    context has tool_poisoning_score && context.tool_poisoning_score >= 60
+};
+// Block prompts with indirect injection patterns
+@id("as-block-indirect-injection")
+@name("Block indirect prompt injection")
+@description("Block content when indirect prompt injection is detected in tool outputs or retrieved documents (score >= 70/100)")
+@severity("critical")
+@tags("indirect-injection,agent-security,owasp-llm01")
+@reject_message("This content was blocked because indirect prompt injection patterns were detected in tool outputs or retrieved documents.")
+forbid (
+    principal,
+    action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool", Overwatch::Action::"connect_server"],
+    resource
+)
+when {
+    context has indirect_injection_score && context.indirect_injection_score >= 70
+};
+// Block unverified MCP server tool calls with any detected threats
+@id("as-block-unverified-threats")
+@name("Block unverified server threats")
+@description("Block tool calls from unverified MCP servers when any threat is detected")
+@severity("high")
+@tags("mcp-trust,agent-security,unverified")
+@reject_message("Tool execution was blocked because the MCP server is unverified and threats were detected in the content.")
+forbid (
+    principal,
+    action == Overwatch::Action::"call_tool",
+    resource
+)
+when {
+    context has mcp_server_verified && context.mcp_server_verified == false &&
+    context has threat_count && context.threat_count > 0
+};
+`;
 const OVERWATCH_TOOLS_MCP_ALLOWLIST_CEDAR = `// MCP Server Allowlist Template
 // Only allow specific MCP servers to be used
 // Category: tools
@@ -661,6 +937,8 @@ export const OVERWATCH_CATEGORIES: OverwatchCategoryInfo[] = [
   { id: 'semantic', name: 'Semantic Threat Detection', description: 'Detect and block prompt injection, jailbreak attempts, and high-severity AI security threats' },
   { id: 'tools', name: 'Tool Permissioning', description: 'Control access to shell execution, file operations, MCP servers, and sensitive system paths' },
   { id: 'organization', name: 'Organization Rules', description: 'Apply organization-wide policy baselines, team permissions, and agent-specific guardrails' },
+  { id: 'trust_safety', name: 'Content Safety', description: 'Detect and control violent, harmful, hateful, sexual, and profane content using trust/safety classification scores' },
+  { id: 'agent_security', name: 'Agent Security', description: 'Detect tool poisoning, rug pull attacks, and indirect prompt injection targeting AI agents' },
 ];
 // =============================================================================
@@ -668,6 +946,16 @@ export const OVERWATCH_CATEGORIES: OverwatchCategoryInfo[] = [
 // =============================================================================
 export const OVERWATCH_DEFAULTS: OverwatchDefaultPolicy[] = [
+  {
+    id: 'baseline-default',
+    name: 'Baseline Permit',
+    description: 'Permits all actions by default — threat-specific forbid policies override this when threats are detected',
+    category: 'organization',
+    cedarText: OVERWATCH_BASELINE_DEFAULT_CEDAR,
+    severity: 'low',
+    tags: ['baseline', 'permit-default', 'organization'],
+    isActive: true,
+  },
   {
     id: 'secrets-default',
     name: 'Secrets Detection',
@@ -708,6 +996,26 @@ export const OVERWATCH_DEFAULTS: OverwatchDefaultPolicy[] = [
     tags: ['shell', 'command-injection', 'file-access', 'mitre-t1059', 'baseline'],
     isActive: false,
   },
+  {
+    id: 'trust-safety-default',
+    name: 'Content Safety',
+    description: 'Detect and block violent, harmful, hateful, sexual, and profane content using classification scores',
+    category: 'trust_safety',
+    cedarText: OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR,
+    severity: 'critical',
+    tags: ['violence', 'weapons', 'hate-speech', 'crime', 'sexual', 'profanity', 'content-safety', 'baseline'],
+    isActive: true,
+  },
+  {
+    id: 'agent-security-default',
+    name: 'Agent Security',
+    description: 'Detect and block tool poisoning, rug pull attacks, and indirect prompt injection targeting AI agents',
+    category: 'agent_security',
+    cedarText: OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR,
+    severity: 'critical',
+    tags: ['tool-poisoning', 'rug-pull', 'indirect-injection', 'mcp-security', 'agent-security', 'baseline'],
+    isActive: true,
+  },
 ];
 // =============================================================================
@@ -769,7 +1077,7 @@ export const OVERWATCH_TEMPLATES: OverwatchTemplate[] = [
 /** Raw templates.json metadata for the Overwatch service. */
 export const OVERWATCH_TEMPLATES_JSON: string = `{
   "service": "overwatch",
-  "version": "2.0.0",
+  "version": "3.0.0",
   "description": "Overwatch policy templates for IDE security",
   "categories": [
     {
@@ -796,9 +1104,29 @@ export const OVERWATCH_TEMPLATES_JSON: string = `{
       "id": "organization",
       "name": "Organization Rules",
       "description": "Apply organization-wide policy baselines, team permissions, and agent-specific guardrails"
+    },
+    {
+      "id": "trust_safety",
+      "name": "Content Safety",
+      "description": "Detect and control violent, harmful, hateful, sexual, and profane content using trust/safety classification scores"
+    },
+    {
+      "id": "agent_security",
+      "name": "Agent Security",
+      "description": "Detect tool poisoning, rug pull attacks, and indirect prompt injection targeting AI agents"
     }
   ],
   "defaults": [
+    {
+      "id": "baseline-default",
+      "name": "Baseline Permit",
+      "description": "Permits all actions by default — threat-specific forbid policies override this when threats are detected",
+      "category": "organization",
+      "file": "defaults/baseline.cedar",
+      "severity": "low",
+      "tags": ["baseline", "permit-default", "organization"],
+      "is_active": true
+    },
     {
       "id": "secrets-default",
       "name": "Secrets Detection",
@@ -838,6 +1166,26 @@ export const OVERWATCH_TEMPLATES_JSON: string = `{
       "severity": "critical",
       "tags": ["shell", "command-injection", "file-access", "mitre-t1059", "baseline"],
       "is_active": false
+    },
+    {
+      "id": "trust-safety-default",
+      "name": "Content Safety",
+      "description": "Detect and block violent, harmful, hateful, sexual, and profane content using classification scores",
+      "category": "trust_safety",
+      "file": "defaults/trust_safety.cedar",
+      "severity": "critical",
+      "tags": ["violence", "weapons", "hate-speech", "crime", "sexual", "profanity", "content-safety", "baseline"],
+      "is_active": true
+    },
+    {
+      "id": "agent-security-default",
+      "name": "Agent Security",
+      "description": "Detect and block tool poisoning, rug pull attacks, and indirect prompt injection targeting AI agents",
+      "category": "agent_security",
+      "file": "defaults/agent_security.cedar",
+      "severity": "critical",
+      "tags": ["tool-poisoning", "rug-pull", "indirect-injection", "mcp-security", "agent-security", "baseline"],
+      "is_active": true
     }
   ],
   "templates": [

package/src/overwatch-defaults.test.ts CHANGED Viewed

@@ -21,14 +21,14 @@ import {
 // =============================================================================
 describe("Overwatch defaults data", () => {
-  test("should have 5 categories", () => {
-    expect(OVERWATCH_CATEGORIES).toHaveLength(5);
+  test("should have 7 categories", () => {
+    expect(OVERWATCH_CATEGORIES).toHaveLength(7);
     const ids = OVERWATCH_CATEGORIES.map((c) => c.id);
-    expect(ids).toEqual(["secrets", "pii", "semantic", "tools", "organization"]);
+    expect(ids).toEqual(["secrets", "pii", "semantic", "tools", "organization", "trust_safety", "agent_security"]);
   });
-  test("should have 4 default policies", () => {
-    expect(OVERWATCH_DEFAULTS).toHaveLength(4);
+  test("should have 7 default policies", () => {
+    expect(OVERWATCH_DEFAULTS).toHaveLength(7);
   });
   test("should have 5 templates", () => {

package/src/schemas.test.ts CHANGED Viewed

@@ -216,6 +216,22 @@ describe('Service-Specific Schemas', () => {
           max_threat_severity: 1,
           contains_secrets: false,
           response_content: '',
+          // Trust/Safety scores
+          violence_score: 0,
+          weapons_score: 0,
+          hate_speech_score: 0,
+          crime_score: 0,
+          sexual_score: 0,
+          profanity_score: 0,
+          // Detector confidence
+          pii_confidence: 0,
+          injection_confidence: 0,
+          jailbreak_confidence: 0,
+          // Agent security
+          tool_poisoning_score: 0,
+          rug_pull_score: 0,
+          indirect_injection_score: 0,
+          mcp_server_verified: false,
         },
         entities,
       });
@@ -397,6 +413,22 @@ describe('Service-Specific Schemas', () => {
           max_threat_severity: 2,
           contains_secrets: false,
           response_content: '',
+          // Trust/Safety scores
+          violence_score: 0,
+          weapons_score: 0,
+          hate_speech_score: 0,
+          crime_score: 0,
+          sexual_score: 0,
+          profanity_score: 0,
+          // Detector confidence
+          pii_confidence: 0,
+          injection_confidence: 0,
+          jailbreak_confidence: 0,
+          // Agent security
+          tool_poisoning_score: 0,
+          rug_pull_score: 0,
+          indirect_injection_score: 0,
+          mcp_server_verified: false,
         },
         entities,
       });