@highflame/policy 2.0.7 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/overwatch/context.json +163 -1
- package/_schemas/overwatch/schema.cedarschema +45 -0
- package/dist/overwatch-context.gen.d.ts +13 -0
- package/dist/overwatch-context.gen.d.ts.map +1 -1
- package/dist/overwatch-context.gen.js +13 -0
- package/dist/overwatch-context.gen.js.map +1 -1
- package/dist/overwatch-defaults.gen.d.ts +1 -1
- package/dist/overwatch-defaults.gen.d.ts.map +1 -1
- package/dist/overwatch-defaults.gen.js +346 -1
- package/dist/overwatch-defaults.gen.js.map +1 -1
- package/dist/overwatch-defaults.test.js +5 -5
- package/dist/overwatch-defaults.test.js.map +1 -1
- package/dist/schemas.test.js +32 -0
- package/dist/schemas.test.js.map +1 -1
- package/package.json +1 -1
- package/src/overwatch-context.gen.ts +13 -0
- package/src/overwatch-defaults.gen.ts +350 -2
- package/src/overwatch-defaults.test.ts +5 -5
- package/src/schemas.test.ts +32 -0
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* Overwatch policy category identifiers.
|
|
10
10
|
* Maps to UI tab names in Studio.
|
|
11
11
|
*/
|
|
12
|
-
export type OverwatchCategory = 'secrets' | 'pii' | 'semantic' | 'tools' | 'organization';
|
|
12
|
+
export type OverwatchCategory = 'secrets' | 'pii' | 'semantic' | 'tools' | 'organization' | 'trust_safety' | 'agent_security';
|
|
13
13
|
|
|
14
14
|
/**
|
|
15
15
|
* Category metadata for UI display.
|
|
@@ -66,6 +66,32 @@ export interface OverwatchTemplate {
|
|
|
66
66
|
// EMBEDDED CEDAR POLICY TEXT
|
|
67
67
|
// =============================================================================
|
|
68
68
|
|
|
69
|
+
const OVERWATCH_BASELINE_DEFAULT_CEDAR = `// =============================================================================
|
|
70
|
+
// Baseline Permit Policy (Default)
|
|
71
|
+
// =============================================================================
|
|
72
|
+
// Permits all actions by default. Threat-specific forbid policies override
|
|
73
|
+
// this to block when YARA, Javelin, or other scanners detect issues.
|
|
74
|
+
//
|
|
75
|
+
// Cedar is default-deny: without at least one permit rule, every request
|
|
76
|
+
// is denied regardless of forbid rules. This baseline ensures the system
|
|
77
|
+
// is "allow unless blocked" rather than "block everything".
|
|
78
|
+
//
|
|
79
|
+
// Category: organization
|
|
80
|
+
// Namespace: Overwatch
|
|
81
|
+
// =============================================================================
|
|
82
|
+
|
|
83
|
+
@id("baseline-permit-all")
|
|
84
|
+
@name("Permit all actions by default")
|
|
85
|
+
@description("Baseline permit for all actions — threat-specific forbid policies override this when threats are detected")
|
|
86
|
+
@severity("low")
|
|
87
|
+
@tags("baseline,permit-default,organization")
|
|
88
|
+
permit (
|
|
89
|
+
principal,
|
|
90
|
+
action,
|
|
91
|
+
resource
|
|
92
|
+
);
|
|
93
|
+
`;
|
|
94
|
+
|
|
69
95
|
const OVERWATCH_SECRETS_DEFAULT_CEDAR = `// =============================================================================
|
|
70
96
|
// Secrets Detection Policy (Default)
|
|
71
97
|
// =============================================================================
|
|
@@ -293,6 +319,22 @@ when {
|
|
|
293
319
|
context has threat_categories && context.threat_categories.contains("pii")
|
|
294
320
|
};
|
|
295
321
|
|
|
322
|
+
// Block prompts with high PII confidence score
|
|
323
|
+
@id("pii-block-high-confidence")
|
|
324
|
+
@name("Block high-confidence PII")
|
|
325
|
+
@description("Block content when PII classifier confidence exceeds threshold (80/100)")
|
|
326
|
+
@severity("critical")
|
|
327
|
+
@tags("pii,confidence,privacy,compliance")
|
|
328
|
+
@reject_message("Your content was blocked because personally identifiable information was detected with high confidence.")
|
|
329
|
+
forbid (
|
|
330
|
+
principal,
|
|
331
|
+
action == Overwatch::Action::"process_prompt",
|
|
332
|
+
resource
|
|
333
|
+
)
|
|
334
|
+
when {
|
|
335
|
+
context has pii_confidence && context.pii_confidence >= 80
|
|
336
|
+
};
|
|
337
|
+
|
|
296
338
|
// Block PII leakage via tool calls
|
|
297
339
|
@id("pii-block-tool-calls")
|
|
298
340
|
@name("Block tool calls with PII")
|
|
@@ -337,6 +379,22 @@ when {
|
|
|
337
379
|
context has yara_threats && context.yara_threats.contains("prompt_injection")
|
|
338
380
|
};
|
|
339
381
|
|
|
382
|
+
// Block prompts with high injection confidence score
|
|
383
|
+
@id("semantic-block-injection-score")
|
|
384
|
+
@name("Block high-confidence injection")
|
|
385
|
+
@description("Block content when injection classifier confidence exceeds threshold (75/100)")
|
|
386
|
+
@severity("critical")
|
|
387
|
+
@tags("injection,confidence,security,owasp-llm01")
|
|
388
|
+
@reject_message("Your prompt was blocked because a high-confidence prompt injection pattern was detected.")
|
|
389
|
+
forbid (
|
|
390
|
+
principal,
|
|
391
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
392
|
+
resource
|
|
393
|
+
)
|
|
394
|
+
when {
|
|
395
|
+
context has injection_confidence && context.injection_confidence >= 75
|
|
396
|
+
};
|
|
397
|
+
|
|
340
398
|
// Block prompts with jailbreak attempts
|
|
341
399
|
@id("semantic-block-jailbreak")
|
|
342
400
|
@name("Block jailbreak attempts")
|
|
@@ -352,6 +410,22 @@ when {
|
|
|
352
410
|
context has yara_threats && context.yara_threats.contains("jailbreak")
|
|
353
411
|
};
|
|
354
412
|
|
|
413
|
+
// Block prompts with high jailbreak confidence score
|
|
414
|
+
@id("semantic-block-jailbreak-score")
|
|
415
|
+
@name("Block high-confidence jailbreak")
|
|
416
|
+
@description("Block content when jailbreak classifier confidence exceeds threshold (75/100)")
|
|
417
|
+
@severity("critical")
|
|
418
|
+
@tags("jailbreak,confidence,security,owasp-llm02")
|
|
419
|
+
@reject_message("Your prompt was blocked because a high-confidence jailbreak attempt was detected.")
|
|
420
|
+
forbid (
|
|
421
|
+
principal,
|
|
422
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
423
|
+
resource
|
|
424
|
+
)
|
|
425
|
+
when {
|
|
426
|
+
context has jailbreak_confidence && context.jailbreak_confidence >= 75
|
|
427
|
+
};
|
|
428
|
+
|
|
355
429
|
// Block prompts with high severity semantic threats
|
|
356
430
|
@id("semantic-block-high-severity")
|
|
357
431
|
@name("Block high severity threats")
|
|
@@ -508,6 +582,208 @@ when {
|
|
|
508
582
|
};
|
|
509
583
|
`;
|
|
510
584
|
|
|
585
|
+
const OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR = `// =============================================================================
|
|
586
|
+
// Content Safety Policy (Default)
|
|
587
|
+
// =============================================================================
|
|
588
|
+
// Detects and blocks violent, harmful, hateful, sexual, and profane content
|
|
589
|
+
// using trust/safety classification scores from Javelin/Lakera/LlamaGuard.
|
|
590
|
+
//
|
|
591
|
+
// Compliance: NIST 800-53 SI-4 | EU AI Act Art. 52
|
|
592
|
+
// Category: trust_safety
|
|
593
|
+
// Namespace: Overwatch
|
|
594
|
+
// =============================================================================
|
|
595
|
+
|
|
596
|
+
// Block prompts with high violence score
|
|
597
|
+
@id("ts-block-violence")
|
|
598
|
+
@name("Block violent content")
|
|
599
|
+
@description("Block content when violence detection score exceeds threshold (80/100)")
|
|
600
|
+
@severity("critical")
|
|
601
|
+
@tags("violence,content-safety,trust-safety,nist-si-4")
|
|
602
|
+
@reject_message("Your prompt was blocked because it contains violent content. Please rephrase without violence-related language.")
|
|
603
|
+
forbid (
|
|
604
|
+
principal,
|
|
605
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
606
|
+
resource
|
|
607
|
+
)
|
|
608
|
+
when {
|
|
609
|
+
context has violence_score && context.violence_score >= 80
|
|
610
|
+
};
|
|
611
|
+
|
|
612
|
+
// Block prompts with weapons content
|
|
613
|
+
@id("ts-block-weapons")
|
|
614
|
+
@name("Block weapons content")
|
|
615
|
+
@description("Block content when weapons detection score exceeds threshold (80/100)")
|
|
616
|
+
@severity("critical")
|
|
617
|
+
@tags("weapons,content-safety,trust-safety,nist-si-4")
|
|
618
|
+
@reject_message("Your prompt was blocked because it contains weapons-related content. Please rephrase without weapons references.")
|
|
619
|
+
forbid (
|
|
620
|
+
principal,
|
|
621
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
622
|
+
resource
|
|
623
|
+
)
|
|
624
|
+
when {
|
|
625
|
+
context has weapons_score && context.weapons_score >= 80
|
|
626
|
+
};
|
|
627
|
+
|
|
628
|
+
// Block prompts with hate speech
|
|
629
|
+
@id("ts-block-hate-speech")
|
|
630
|
+
@name("Block hate speech")
|
|
631
|
+
@description("Block content when hate speech detection score exceeds threshold (75/100)")
|
|
632
|
+
@severity("critical")
|
|
633
|
+
@tags("hate-speech,content-safety,trust-safety,nist-si-4")
|
|
634
|
+
@reject_message("Your prompt was blocked because it contains hate speech. Please rephrase without hateful or discriminatory language.")
|
|
635
|
+
forbid (
|
|
636
|
+
principal,
|
|
637
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
638
|
+
resource
|
|
639
|
+
)
|
|
640
|
+
when {
|
|
641
|
+
context has hate_speech_score && context.hate_speech_score >= 75
|
|
642
|
+
};
|
|
643
|
+
|
|
644
|
+
// Block prompts with criminal content
|
|
645
|
+
@id("ts-block-crime")
|
|
646
|
+
@name("Block criminal content")
|
|
647
|
+
@description("Block content when criminal activity detection score exceeds threshold (80/100)")
|
|
648
|
+
@severity("high")
|
|
649
|
+
@tags("crime,content-safety,trust-safety,nist-si-4")
|
|
650
|
+
@reject_message("Your prompt was blocked because it contains content related to criminal activity.")
|
|
651
|
+
forbid (
|
|
652
|
+
principal,
|
|
653
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
654
|
+
resource
|
|
655
|
+
)
|
|
656
|
+
when {
|
|
657
|
+
context has crime_score && context.crime_score >= 80
|
|
658
|
+
};
|
|
659
|
+
|
|
660
|
+
// Block prompts with sexual content
|
|
661
|
+
@id("ts-block-sexual")
|
|
662
|
+
@name("Block sexual content")
|
|
663
|
+
@description("Block content when sexual content detection score exceeds threshold (80/100)")
|
|
664
|
+
@severity("high")
|
|
665
|
+
@tags("sexual,content-safety,trust-safety,eu-ai-act")
|
|
666
|
+
@reject_message("Your prompt was blocked because it contains sexual content.")
|
|
667
|
+
forbid (
|
|
668
|
+
principal,
|
|
669
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
670
|
+
resource
|
|
671
|
+
)
|
|
672
|
+
when {
|
|
673
|
+
context has sexual_score && context.sexual_score >= 80
|
|
674
|
+
};
|
|
675
|
+
|
|
676
|
+
// Block prompts with excessive profanity
|
|
677
|
+
@id("ts-block-profanity")
|
|
678
|
+
@name("Block profanity")
|
|
679
|
+
@description("Block content when profanity detection score exceeds threshold (90/100)")
|
|
680
|
+
@severity("medium")
|
|
681
|
+
@tags("profanity,content-safety,trust-safety")
|
|
682
|
+
@reject_message("Your prompt was blocked due to excessive profanity. Please rephrase in a professional manner.")
|
|
683
|
+
forbid (
|
|
684
|
+
principal,
|
|
685
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool"],
|
|
686
|
+
resource
|
|
687
|
+
)
|
|
688
|
+
when {
|
|
689
|
+
context has profanity_score && context.profanity_score >= 90
|
|
690
|
+
};
|
|
691
|
+
`;
|
|
692
|
+
|
|
693
|
+
const OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR = `// =============================================================================
|
|
694
|
+
// Agent Security Policy (Default)
|
|
695
|
+
// =============================================================================
|
|
696
|
+
// Detects and blocks tool poisoning, rug pull attacks, and indirect prompt
|
|
697
|
+
// injection targeting AI coding agents. These are agentic AI-specific attack
|
|
698
|
+
// vectors where tool descriptions or server responses manipulate agent behavior.
|
|
699
|
+
//
|
|
700
|
+
// Compliance: OWASP LLM09 (Improper Output Handling) | MITRE ATLAS AML.T0054
|
|
701
|
+
// Category: agent_security
|
|
702
|
+
// Namespace: Overwatch
|
|
703
|
+
// =============================================================================
|
|
704
|
+
|
|
705
|
+
// Block tool calls with high tool poisoning risk
|
|
706
|
+
@id("as-block-tool-poisoning")
|
|
707
|
+
@name("Block tool poisoning")
|
|
708
|
+
@description("Block tool execution when tool description contains manipulation patterns (score >= 70/100)")
|
|
709
|
+
@severity("critical")
|
|
710
|
+
@tags("tool-poisoning,agent-security,owasp-llm09")
|
|
711
|
+
@reject_message("Tool execution was blocked because the tool description contains manipulation patterns that could compromise agent behavior.")
|
|
712
|
+
forbid (
|
|
713
|
+
principal,
|
|
714
|
+
action == Overwatch::Action::"call_tool",
|
|
715
|
+
resource
|
|
716
|
+
)
|
|
717
|
+
when {
|
|
718
|
+
context has tool_poisoning_score && context.tool_poisoning_score >= 70
|
|
719
|
+
};
|
|
720
|
+
|
|
721
|
+
// Block tool calls with rug pull detection
|
|
722
|
+
@id("as-block-rug-pull")
|
|
723
|
+
@name("Block rug pull attacks")
|
|
724
|
+
@description("Block tool execution when tool behavior diverges from advertised capabilities (score >= 70/100)")
|
|
725
|
+
@severity("critical")
|
|
726
|
+
@tags("rug-pull,agent-security,mcp-security")
|
|
727
|
+
@reject_message("Tool execution was blocked because the tool's actual behavior diverges from its advertised capabilities.")
|
|
728
|
+
forbid (
|
|
729
|
+
principal,
|
|
730
|
+
action in [Overwatch::Action::"call_tool", Overwatch::Action::"connect_server"],
|
|
731
|
+
resource
|
|
732
|
+
)
|
|
733
|
+
when {
|
|
734
|
+
context has rug_pull_score && context.rug_pull_score >= 70
|
|
735
|
+
};
|
|
736
|
+
|
|
737
|
+
// Block MCP server connections with high poisoning risk
|
|
738
|
+
@id("as-block-server-poisoning")
|
|
739
|
+
@name("Block poisoned MCP servers")
|
|
740
|
+
@description("Block connections to MCP servers when tool poisoning patterns are detected (score >= 60/100)")
|
|
741
|
+
@severity("critical")
|
|
742
|
+
@tags("tool-poisoning,mcp-security,agent-security")
|
|
743
|
+
@reject_message("Connection to this MCP server was blocked because tool poisoning patterns were detected in its tool descriptions.")
|
|
744
|
+
forbid (
|
|
745
|
+
principal,
|
|
746
|
+
action == Overwatch::Action::"connect_server",
|
|
747
|
+
resource
|
|
748
|
+
)
|
|
749
|
+
when {
|
|
750
|
+
context has tool_poisoning_score && context.tool_poisoning_score >= 60
|
|
751
|
+
};
|
|
752
|
+
|
|
753
|
+
// Block prompts with indirect injection patterns
|
|
754
|
+
@id("as-block-indirect-injection")
|
|
755
|
+
@name("Block indirect prompt injection")
|
|
756
|
+
@description("Block content when indirect prompt injection is detected in tool outputs or retrieved documents (score >= 70/100)")
|
|
757
|
+
@severity("critical")
|
|
758
|
+
@tags("indirect-injection,agent-security,owasp-llm01")
|
|
759
|
+
@reject_message("This content was blocked because indirect prompt injection patterns were detected in tool outputs or retrieved documents.")
|
|
760
|
+
forbid (
|
|
761
|
+
principal,
|
|
762
|
+
action in [Overwatch::Action::"process_prompt", Overwatch::Action::"call_tool", Overwatch::Action::"connect_server"],
|
|
763
|
+
resource
|
|
764
|
+
)
|
|
765
|
+
when {
|
|
766
|
+
context has indirect_injection_score && context.indirect_injection_score >= 70
|
|
767
|
+
};
|
|
768
|
+
|
|
769
|
+
// Block unverified MCP server tool calls with any detected threats
|
|
770
|
+
@id("as-block-unverified-threats")
|
|
771
|
+
@name("Block unverified server threats")
|
|
772
|
+
@description("Block tool calls from unverified MCP servers when any threat is detected")
|
|
773
|
+
@severity("high")
|
|
774
|
+
@tags("mcp-trust,agent-security,unverified")
|
|
775
|
+
@reject_message("Tool execution was blocked because the MCP server is unverified and threats were detected in the content.")
|
|
776
|
+
forbid (
|
|
777
|
+
principal,
|
|
778
|
+
action == Overwatch::Action::"call_tool",
|
|
779
|
+
resource
|
|
780
|
+
)
|
|
781
|
+
when {
|
|
782
|
+
context has mcp_server_verified && context.mcp_server_verified == false &&
|
|
783
|
+
context has threat_count && context.threat_count > 0
|
|
784
|
+
};
|
|
785
|
+
`;
|
|
786
|
+
|
|
511
787
|
const OVERWATCH_TOOLS_MCP_ALLOWLIST_CEDAR = `// MCP Server Allowlist Template
|
|
512
788
|
// Only allow specific MCP servers to be used
|
|
513
789
|
// Category: tools
|
|
@@ -661,6 +937,8 @@ export const OVERWATCH_CATEGORIES: OverwatchCategoryInfo[] = [
|
|
|
661
937
|
{ id: 'semantic', name: 'Semantic Threat Detection', description: 'Detect and block prompt injection, jailbreak attempts, and high-severity AI security threats' },
|
|
662
938
|
{ id: 'tools', name: 'Tool Permissioning', description: 'Control access to shell execution, file operations, MCP servers, and sensitive system paths' },
|
|
663
939
|
{ id: 'organization', name: 'Organization Rules', description: 'Apply organization-wide policy baselines, team permissions, and agent-specific guardrails' },
|
|
940
|
+
{ id: 'trust_safety', name: 'Content Safety', description: 'Detect and control violent, harmful, hateful, sexual, and profane content using trust/safety classification scores' },
|
|
941
|
+
{ id: 'agent_security', name: 'Agent Security', description: 'Detect tool poisoning, rug pull attacks, and indirect prompt injection targeting AI agents' },
|
|
664
942
|
];
|
|
665
943
|
|
|
666
944
|
// =============================================================================
|
|
@@ -668,6 +946,16 @@ export const OVERWATCH_CATEGORIES: OverwatchCategoryInfo[] = [
|
|
|
668
946
|
// =============================================================================
|
|
669
947
|
|
|
670
948
|
export const OVERWATCH_DEFAULTS: OverwatchDefaultPolicy[] = [
|
|
949
|
+
{
|
|
950
|
+
id: 'baseline-default',
|
|
951
|
+
name: 'Baseline Permit',
|
|
952
|
+
description: 'Permits all actions by default — threat-specific forbid policies override this when threats are detected',
|
|
953
|
+
category: 'organization',
|
|
954
|
+
cedarText: OVERWATCH_BASELINE_DEFAULT_CEDAR,
|
|
955
|
+
severity: 'low',
|
|
956
|
+
tags: ['baseline', 'permit-default', 'organization'],
|
|
957
|
+
isActive: true,
|
|
958
|
+
},
|
|
671
959
|
{
|
|
672
960
|
id: 'secrets-default',
|
|
673
961
|
name: 'Secrets Detection',
|
|
@@ -708,6 +996,26 @@ export const OVERWATCH_DEFAULTS: OverwatchDefaultPolicy[] = [
|
|
|
708
996
|
tags: ['shell', 'command-injection', 'file-access', 'mitre-t1059', 'baseline'],
|
|
709
997
|
isActive: false,
|
|
710
998
|
},
|
|
999
|
+
{
|
|
1000
|
+
id: 'trust-safety-default',
|
|
1001
|
+
name: 'Content Safety',
|
|
1002
|
+
description: 'Detect and block violent, harmful, hateful, sexual, and profane content using classification scores',
|
|
1003
|
+
category: 'trust_safety',
|
|
1004
|
+
cedarText: OVERWATCH_TRUST_SAFETY_DEFAULT_CEDAR,
|
|
1005
|
+
severity: 'critical',
|
|
1006
|
+
tags: ['violence', 'weapons', 'hate-speech', 'crime', 'sexual', 'profanity', 'content-safety', 'baseline'],
|
|
1007
|
+
isActive: true,
|
|
1008
|
+
},
|
|
1009
|
+
{
|
|
1010
|
+
id: 'agent-security-default',
|
|
1011
|
+
name: 'Agent Security',
|
|
1012
|
+
description: 'Detect and block tool poisoning, rug pull attacks, and indirect prompt injection targeting AI agents',
|
|
1013
|
+
category: 'agent_security',
|
|
1014
|
+
cedarText: OVERWATCH_AGENT_SECURITY_DEFAULT_CEDAR,
|
|
1015
|
+
severity: 'critical',
|
|
1016
|
+
tags: ['tool-poisoning', 'rug-pull', 'indirect-injection', 'mcp-security', 'agent-security', 'baseline'],
|
|
1017
|
+
isActive: true,
|
|
1018
|
+
},
|
|
711
1019
|
];
|
|
712
1020
|
|
|
713
1021
|
// =============================================================================
|
|
@@ -769,7 +1077,7 @@ export const OVERWATCH_TEMPLATES: OverwatchTemplate[] = [
|
|
|
769
1077
|
/** Raw templates.json metadata for the Overwatch service. */
|
|
770
1078
|
export const OVERWATCH_TEMPLATES_JSON: string = `{
|
|
771
1079
|
"service": "overwatch",
|
|
772
|
-
"version": "
|
|
1080
|
+
"version": "3.0.0",
|
|
773
1081
|
"description": "Overwatch policy templates for IDE security",
|
|
774
1082
|
"categories": [
|
|
775
1083
|
{
|
|
@@ -796,9 +1104,29 @@ export const OVERWATCH_TEMPLATES_JSON: string = `{
|
|
|
796
1104
|
"id": "organization",
|
|
797
1105
|
"name": "Organization Rules",
|
|
798
1106
|
"description": "Apply organization-wide policy baselines, team permissions, and agent-specific guardrails"
|
|
1107
|
+
},
|
|
1108
|
+
{
|
|
1109
|
+
"id": "trust_safety",
|
|
1110
|
+
"name": "Content Safety",
|
|
1111
|
+
"description": "Detect and control violent, harmful, hateful, sexual, and profane content using trust/safety classification scores"
|
|
1112
|
+
},
|
|
1113
|
+
{
|
|
1114
|
+
"id": "agent_security",
|
|
1115
|
+
"name": "Agent Security",
|
|
1116
|
+
"description": "Detect tool poisoning, rug pull attacks, and indirect prompt injection targeting AI agents"
|
|
799
1117
|
}
|
|
800
1118
|
],
|
|
801
1119
|
"defaults": [
|
|
1120
|
+
{
|
|
1121
|
+
"id": "baseline-default",
|
|
1122
|
+
"name": "Baseline Permit",
|
|
1123
|
+
"description": "Permits all actions by default — threat-specific forbid policies override this when threats are detected",
|
|
1124
|
+
"category": "organization",
|
|
1125
|
+
"file": "defaults/baseline.cedar",
|
|
1126
|
+
"severity": "low",
|
|
1127
|
+
"tags": ["baseline", "permit-default", "organization"],
|
|
1128
|
+
"is_active": true
|
|
1129
|
+
},
|
|
802
1130
|
{
|
|
803
1131
|
"id": "secrets-default",
|
|
804
1132
|
"name": "Secrets Detection",
|
|
@@ -838,6 +1166,26 @@ export const OVERWATCH_TEMPLATES_JSON: string = `{
|
|
|
838
1166
|
"severity": "critical",
|
|
839
1167
|
"tags": ["shell", "command-injection", "file-access", "mitre-t1059", "baseline"],
|
|
840
1168
|
"is_active": false
|
|
1169
|
+
},
|
|
1170
|
+
{
|
|
1171
|
+
"id": "trust-safety-default",
|
|
1172
|
+
"name": "Content Safety",
|
|
1173
|
+
"description": "Detect and block violent, harmful, hateful, sexual, and profane content using classification scores",
|
|
1174
|
+
"category": "trust_safety",
|
|
1175
|
+
"file": "defaults/trust_safety.cedar",
|
|
1176
|
+
"severity": "critical",
|
|
1177
|
+
"tags": ["violence", "weapons", "hate-speech", "crime", "sexual", "profanity", "content-safety", "baseline"],
|
|
1178
|
+
"is_active": true
|
|
1179
|
+
},
|
|
1180
|
+
{
|
|
1181
|
+
"id": "agent-security-default",
|
|
1182
|
+
"name": "Agent Security",
|
|
1183
|
+
"description": "Detect and block tool poisoning, rug pull attacks, and indirect prompt injection targeting AI agents",
|
|
1184
|
+
"category": "agent_security",
|
|
1185
|
+
"file": "defaults/agent_security.cedar",
|
|
1186
|
+
"severity": "critical",
|
|
1187
|
+
"tags": ["tool-poisoning", "rug-pull", "indirect-injection", "mcp-security", "agent-security", "baseline"],
|
|
1188
|
+
"is_active": true
|
|
841
1189
|
}
|
|
842
1190
|
],
|
|
843
1191
|
"templates": [
|
|
@@ -21,14 +21,14 @@ import {
|
|
|
21
21
|
// =============================================================================
|
|
22
22
|
|
|
23
23
|
describe("Overwatch defaults data", () => {
|
|
24
|
-
test("should have
|
|
25
|
-
expect(OVERWATCH_CATEGORIES).toHaveLength(
|
|
24
|
+
test("should have 7 categories", () => {
|
|
25
|
+
expect(OVERWATCH_CATEGORIES).toHaveLength(7);
|
|
26
26
|
const ids = OVERWATCH_CATEGORIES.map((c) => c.id);
|
|
27
|
-
expect(ids).toEqual(["secrets", "pii", "semantic", "tools", "organization"]);
|
|
27
|
+
expect(ids).toEqual(["secrets", "pii", "semantic", "tools", "organization", "trust_safety", "agent_security"]);
|
|
28
28
|
});
|
|
29
29
|
|
|
30
|
-
test("should have
|
|
31
|
-
expect(OVERWATCH_DEFAULTS).toHaveLength(
|
|
30
|
+
test("should have 7 default policies", () => {
|
|
31
|
+
expect(OVERWATCH_DEFAULTS).toHaveLength(7);
|
|
32
32
|
});
|
|
33
33
|
|
|
34
34
|
test("should have 5 templates", () => {
|
package/src/schemas.test.ts
CHANGED
|
@@ -216,6 +216,22 @@ describe('Service-Specific Schemas', () => {
|
|
|
216
216
|
max_threat_severity: 1,
|
|
217
217
|
contains_secrets: false,
|
|
218
218
|
response_content: '',
|
|
219
|
+
// Trust/Safety scores
|
|
220
|
+
violence_score: 0,
|
|
221
|
+
weapons_score: 0,
|
|
222
|
+
hate_speech_score: 0,
|
|
223
|
+
crime_score: 0,
|
|
224
|
+
sexual_score: 0,
|
|
225
|
+
profanity_score: 0,
|
|
226
|
+
// Detector confidence
|
|
227
|
+
pii_confidence: 0,
|
|
228
|
+
injection_confidence: 0,
|
|
229
|
+
jailbreak_confidence: 0,
|
|
230
|
+
// Agent security
|
|
231
|
+
tool_poisoning_score: 0,
|
|
232
|
+
rug_pull_score: 0,
|
|
233
|
+
indirect_injection_score: 0,
|
|
234
|
+
mcp_server_verified: false,
|
|
219
235
|
},
|
|
220
236
|
entities,
|
|
221
237
|
});
|
|
@@ -397,6 +413,22 @@ describe('Service-Specific Schemas', () => {
|
|
|
397
413
|
max_threat_severity: 2,
|
|
398
414
|
contains_secrets: false,
|
|
399
415
|
response_content: '',
|
|
416
|
+
// Trust/Safety scores
|
|
417
|
+
violence_score: 0,
|
|
418
|
+
weapons_score: 0,
|
|
419
|
+
hate_speech_score: 0,
|
|
420
|
+
crime_score: 0,
|
|
421
|
+
sexual_score: 0,
|
|
422
|
+
profanity_score: 0,
|
|
423
|
+
// Detector confidence
|
|
424
|
+
pii_confidence: 0,
|
|
425
|
+
injection_confidence: 0,
|
|
426
|
+
jailbreak_confidence: 0,
|
|
427
|
+
// Agent security
|
|
428
|
+
tool_poisoning_score: 0,
|
|
429
|
+
rug_pull_score: 0,
|
|
430
|
+
indirect_injection_score: 0,
|
|
431
|
+
mcp_server_verified: false,
|
|
400
432
|
},
|
|
401
433
|
entities,
|
|
402
434
|
});
|