@highflame/policy 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,40 +9,86 @@
9
9
  export const GuardrailsContextKey = {
10
10
  BudgetExceeded: 'budget_exceeded',
11
11
  BudgetRemainingPct: 'budget_remaining_pct',
12
+ CodeLanguages: 'code_languages',
13
+ CodeRatio: 'code_ratio',
14
+ CommandInjectionDetected: 'command_injection_detected',
15
+ CommandInjectionScore: 'command_injection_score',
16
+ CommandInjectionType: 'command_injection_type',
17
+ ContainsCode: 'contains_code',
12
18
  ContainsInvisibleChars: 'contains_invisible_chars',
19
+ ContainsNonAscii: 'contains_non_ascii',
13
20
  ContainsSecrets: 'contains_secrets',
21
+ ContentSafetyBlocked: 'content_safety_blocked',
22
+ ContentSafetyScore: 'content_safety_score',
14
23
  ContentTopics: 'content_topics',
15
24
  ContentType: 'content_type',
25
+ ConversationTurn: 'conversation_turn',
16
26
  CrimeScore: 'crime_score',
27
+ CrossOriginDetected: 'cross_origin_detected',
28
+ CrossOriginScore: 'cross_origin_score',
29
+ CrossOriginType: 'cross_origin_type',
30
+ DetectedLanguage: 'detected_language',
31
+ DetectedScript: 'detected_script',
17
32
  DetectorCount: 'detector_count',
18
33
  Direction: 'direction',
34
+ EncodedContentDetected: 'encoded_content_detected',
35
+ EncodedCount: 'encoded_count',
36
+ EncodedScore: 'encoded_score',
37
+ EncodedTypes: 'encoded_types',
38
+ FactualityScore: 'factuality_score',
39
+ HallucinationScore: 'hallucination_score',
19
40
  HateSpeechScore: 'hate_speech_score',
20
41
  InjectionScore: 'injection_score',
21
42
  InjectionType: 'injection_type',
22
43
  InvisibleCharsScore: 'invisible_chars_score',
44
+ IsEnglish: 'is_english',
45
+ IsLatinScript: 'is_latin_script',
23
46
  JailbreakScore: 'jailbreak_score',
47
+ KeywordCategories: 'keyword_categories',
48
+ KeywordCount: 'keyword_count',
49
+ KeywordMatched: 'keyword_matched',
50
+ LanguageConfidence: 'language_confidence',
24
51
  LoopCount: 'loop_count',
25
52
  LoopDetected: 'loop_detected',
26
53
  LoopTool: 'loop_tool',
54
+ McpConfigRisk: 'mcp_config_risk',
55
+ McpRiskScore: 'mcp_risk_score',
56
+ McpRiskType: 'mcp_risk_type',
27
57
  McpServer: 'mcp_server',
28
58
  McpServerVerified: 'mcp_server_verified',
29
59
  McpTool: 'mcp_tool',
60
+ MultiTurnDetection: 'multi_turn_detection',
61
+ PathTraversalDetected: 'path_traversal_detected',
62
+ PathTraversalSeverity: 'path_traversal_severity',
63
+ PathTraversalType: 'path_traversal_type',
30
64
  PatternType: 'pattern_type',
65
+ PhishingDetected: 'phishing_detected',
31
66
  PiiCount: 'pii_count',
32
67
  PiiDetected: 'pii_detected',
33
68
  PiiTypes: 'pii_types',
34
69
  ProfanityScore: 'profanity_score',
35
70
  RequestId: 'request_id',
71
+ RugPullDetected: 'rug_pull_detected',
72
+ RugPullScore: 'rug_pull_score',
73
+ RugPullType: 'rug_pull_type',
74
+ ScriptConfidence: 'script_confidence',
36
75
  SecretCount: 'secret_count',
37
76
  SecretTypes: 'secret_types',
77
+ SentimentScore: 'sentiment_score',
38
78
  SequenceRisk: 'sequence_risk',
39
79
  SexualScore: 'sexual_score',
80
+ SqlInjectionDetected: 'sql_injection_detected',
81
+ SqlInjectionScore: 'sql_injection_score',
82
+ SqlInjectionType: 'sql_injection_type',
40
83
  SuspiciousPattern: 'suspicious_pattern',
41
84
  Timestamp: 'timestamp',
42
85
  ToolCategory: 'tool_category',
43
86
  ToolIsBuiltin: 'tool_is_builtin',
44
87
  ToolIsSensitive: 'tool_is_sensitive',
45
88
  ToolName: 'tool_name',
89
+ ToolPoisoningDetected: 'tool_poisoning_detected',
90
+ ToolPoisoningScore: 'tool_poisoning_score',
91
+ ToolPoisoningType: 'tool_poisoning_type',
46
92
  ToolRiskScore: 'tool_risk_score',
47
93
  TopicConfidence: 'topic_confidence',
48
94
  ViolenceScore: 'violence_score',
@@ -423,6 +423,111 @@ forbid (
423
423
  context.budget_remaining_pct < 5 &&
424
424
  context.budget_remaining_pct > 0
425
425
  };
426
+
427
+ // =============================================================================
428
+ // Agent Security — Supply Chain & Behavioral Drift
429
+ // =============================================================================
430
+
431
+ @id("agentic-block-tool-poisoning")
432
+ @name("Block tool poisoning attacks")
433
+ @description("Forbids tool calls or server connections when hidden instructions or authority hijack patterns are detected in tool descriptions or arguments")
434
+ @severity("critical")
435
+ @tags("agentic,tool-poisoning,supply-chain")
436
+ forbid (
437
+ principal,
438
+ action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
439
+ resource
440
+ ) when {
441
+ context has tool_poisoning_score && context.tool_poisoning_score >= 70
442
+ };
443
+
444
+ @id("agentic-block-rug-pull")
445
+ @name("Block rug pull attacks")
446
+ @description("Forbids tool calls when significant behavioral drift is detected (tool output deviates from established patterns)")
447
+ @severity("high")
448
+ @tags("agentic,rug-pull,behavioral-drift")
449
+ forbid (
450
+ principal,
451
+ action == Guardrails::Action::"call_tool",
452
+ resource
453
+ ) when {
454
+ context has rug_pull_detected && context.rug_pull_detected == true &&
455
+ context has rug_pull_score && context.rug_pull_score >= 70
456
+ };
457
+
458
+ @id("agentic-block-mcp-config-risk")
459
+ @name("Block risky MCP configurations")
460
+ @description("Forbids tool calls or server connections when MCP configuration risks are detected (inline execution, suspicious URLs, cross-origin issues)")
461
+ @severity("high")
462
+ @tags("agentic,mcp-risk,supply-chain")
463
+ forbid (
464
+ principal,
465
+ action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
466
+ resource
467
+ ) when {
468
+ context has mcp_config_risk && context.mcp_config_risk == true &&
469
+ context has mcp_risk_score && context.mcp_risk_score >= 70
470
+ };
471
+ `;
472
+ const GUARDRAILS_SECURITY_PATTERNS_DEFAULT_CEDAR = `// =============================================================================
473
+ // Security Pattern Detection Policy
474
+ // =============================================================================
475
+ // Blocks command injection, path traversal, and SQL injection attacks using
476
+ // regex-based pattern detection from Shield's security detectors.
477
+ //
478
+ // Context keys used (normalized by projection layer):
479
+ // - command_injection_detected: Bool - Command injection pattern found
480
+ // - command_injection_score: Long (0-100) - Detection confidence
481
+ // - path_traversal_detected: Bool - Path traversal pattern found
482
+ // - path_traversal_severity: String - Severity level (critical/high/medium/low)
483
+ // - sql_injection_detected: Bool - SQL injection pattern found
484
+ // - sql_injection_score: Long (0-100) - Detection confidence
485
+ //
486
+ // Category: security
487
+ // Namespace: Guardrails
488
+ // =============================================================================
489
+
490
+ @id("security-block-command-injection")
491
+ @name("Block command injection")
492
+ @description("Forbids requests containing command injection patterns such as reverse shells, privilege escalation, or destructive commands")
493
+ @severity("critical")
494
+ @tags("command-injection,security")
495
+ forbid (
496
+ principal,
497
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
498
+ resource
499
+ ) when {
500
+ context has command_injection_detected && context.command_injection_detected == true
501
+ };
502
+
503
+ @id("security-block-path-traversal")
504
+ @name("Block high-severity path traversal")
505
+ @description("Forbids requests containing path traversal patterns targeting sensitive system files or using deep directory traversal")
506
+ @severity("high")
507
+ @tags("path-traversal,security")
508
+ forbid (
509
+ principal,
510
+ action,
511
+ resource
512
+ ) when {
513
+ context has path_traversal_detected && context.path_traversal_detected == true &&
514
+ context has path_traversal_severity &&
515
+ (context.path_traversal_severity == "critical" || context.path_traversal_severity == "high")
516
+ };
517
+
518
+ @id("security-block-sql-injection")
519
+ @name("Block high-confidence SQL injection")
520
+ @description("Forbids requests with SQL injection confidence above 75% (tautologies, UNION-based, destructive queries)")
521
+ @severity("high")
522
+ @tags("sql-injection,security")
523
+ forbid (
524
+ principal,
525
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
526
+ resource
527
+ ) when {
528
+ context has sql_injection_detected && context.sql_injection_detected == true &&
529
+ context has sql_injection_score && context.sql_injection_score >= 75
530
+ };
426
531
  `;
427
532
  const GUARDRAILS_MCP_TOOL_PERMISSIONS_CEDAR = `// =============================================================================
428
533
  // MCP Tool Permissions Template
@@ -952,11 +1057,21 @@ export const GUARDRAILS_DEFAULTS = [
952
1057
  {
953
1058
  id: 'agentic-safety-default',
954
1059
  name: 'Agentic Safety',
955
- description: 'Block tool call loops, data exfiltration patterns, high-risk sequences, and budget violations',
1060
+ description: 'Block tool call loops, data exfiltration patterns, high-risk sequences, budget violations, tool poisoning, rug pull attacks, and MCP configuration risks',
956
1061
  category: 'agentic_security',
957
1062
  cedarText: GUARDRAILS_AGENTIC_SAFETY_DEFAULT_CEDAR,
958
1063
  severity: 'high',
959
- tags: ['agentic', 'safety', 'loops', 'exfiltration', 'budget'],
1064
+ tags: ['agentic', 'safety', 'loops', 'exfiltration', 'budget', 'tool-poisoning', 'rug-pull', 'mcp-risk'],
1065
+ isActive: true,
1066
+ },
1067
+ {
1068
+ id: 'security-patterns-default',
1069
+ name: 'Security Pattern Detection',
1070
+ description: 'Block command injection, path traversal, and SQL injection attacks using regex-based pattern detection',
1071
+ category: 'security',
1072
+ cedarText: GUARDRAILS_SECURITY_PATTERNS_DEFAULT_CEDAR,
1073
+ severity: 'critical',
1074
+ tags: ['command-injection', 'path-traversal', 'sql-injection', 'security'],
960
1075
  isActive: true,
961
1076
  },
962
1077
  ];
@@ -1145,11 +1260,21 @@ export const GUARDRAILS_TEMPLATES_JSON = `{
1145
1260
  {
1146
1261
  "id": "agentic-safety-default",
1147
1262
  "name": "Agentic Safety",
1148
- "description": "Block tool call loops, data exfiltration patterns, high-risk sequences, and budget violations",
1263
+ "description": "Block tool call loops, data exfiltration patterns, high-risk sequences, budget violations, tool poisoning, rug pull attacks, and MCP configuration risks",
1149
1264
  "category": "agentic_security",
1150
1265
  "file": "defaults/agentic_safety.cedar",
1151
1266
  "severity": "high",
1152
- "tags": ["agentic", "safety", "loops", "exfiltration", "budget"],
1267
+ "tags": ["agentic", "safety", "loops", "exfiltration", "budget", "tool-poisoning", "rug-pull", "mcp-risk"],
1268
+ "is_active": true
1269
+ },
1270
+ {
1271
+ "id": "security-patterns-default",
1272
+ "name": "Security Pattern Detection",
1273
+ "description": "Block command injection, path traversal, and SQL injection attacks using regex-based pattern detection",
1274
+ "category": "security",
1275
+ "file": "defaults/security_patterns.cedar",
1276
+ "severity": "critical",
1277
+ "tags": ["command-injection", "path-traversal", "sql-injection", "security"],
1153
1278
  "is_active": true
1154
1279
  }
1155
1280
  ],
@@ -1,41 +1,61 @@
1
1
  /**
2
- * Context attribute keys for Overwatch Overwatch (Guardian) IDE security & policy enforcement.
2
+ * Context attribute keys for Overwatch Overwatch IDE agent security & policy enforcement.
3
3
  *
4
4
  * These constants correspond to the context attributes defined in the
5
5
  * Overwatch Cedar schema and are used at policy evaluation time.
6
6
  */
7
7
  export declare const OverwatchContextKey: {
8
+ readonly ContainsInvisibleChars: "contains_invisible_chars";
8
9
  readonly ContainsSecrets: "contains_secrets";
9
10
  readonly Content: "content";
10
11
  readonly CrimeScore: "crime_score";
11
12
  readonly Cwd: "cwd";
13
+ readonly DetectedThreats: "detected_threats";
12
14
  readonly Event: "event";
13
15
  readonly HateSpeechScore: "hate_speech_score";
14
16
  readonly HighestSeverity: "highest_severity";
15
17
  readonly IndirectInjectionScore: "indirect_injection_score";
16
18
  readonly InjectionConfidence: "injection_confidence";
19
+ readonly InvisibleCharsScore: "invisible_chars_score";
17
20
  readonly JailbreakConfidence: "jailbreak_confidence";
21
+ readonly LoopCount: "loop_count";
22
+ readonly LoopDetected: "loop_detected";
23
+ readonly LoopTool: "loop_tool";
18
24
  readonly MaxThreatSeverity: "max_threat_severity";
25
+ readonly McpConfigRisk: "mcp_config_risk";
26
+ readonly McpRiskScore: "mcp_risk_score";
19
27
  readonly McpServer: "mcp_server";
20
28
  readonly McpServerVerified: "mcp_server_verified";
21
29
  readonly McpTool: "mcp_tool";
22
30
  readonly Path: "path";
31
+ readonly PatternType: "pattern_type";
23
32
  readonly PiiConfidence: "pii_confidence";
33
+ readonly PiiCount: "pii_count";
34
+ readonly PiiDetected: "pii_detected";
35
+ readonly PiiTypes: "pii_types";
24
36
  readonly ProfanityScore: "profanity_score";
25
37
  readonly PromptText: "prompt_text";
26
38
  readonly ResponseContent: "response_content";
39
+ readonly RugPullDetected: "rug_pull_detected";
27
40
  readonly RugPullScore: "rug_pull_score";
41
+ readonly SecretCount: "secret_count";
42
+ readonly SecretTypes: "secret_types";
43
+ readonly SequenceRisk: "sequence_risk";
28
44
  readonly SexualScore: "sexual_score";
29
45
  readonly Source: "source";
46
+ readonly SuspiciousPattern: "suspicious_pattern";
30
47
  readonly ThreatCategories: "threat_categories";
31
48
  readonly ThreatCount: "threat_count";
32
- readonly ThreatTypes: "threat_types";
49
+ readonly ToolCategory: "tool_category";
50
+ readonly ToolIsBuiltin: "tool_is_builtin";
51
+ readonly ToolIsSensitive: "tool_is_sensitive";
33
52
  readonly ToolName: "tool_name";
53
+ readonly ToolPoisoningDetected: "tool_poisoning_detected";
34
54
  readonly ToolPoisoningScore: "tool_poisoning_score";
55
+ readonly ToolRiskScore: "tool_risk_score";
35
56
  readonly UserEmail: "user_email";
36
57
  readonly ViolenceScore: "violence_score";
37
58
  readonly WeaponsScore: "weapons_score";
38
59
  readonly WorkspaceRoot: "workspace_root";
39
- readonly YaraThreats: "yara_threats";
40
60
  };
41
61
  export type OverwatchContextKey = (typeof OverwatchContextKey)[keyof typeof OverwatchContextKey];
@@ -1,42 +1,62 @@
1
1
  // Code generated by highflame-policy-codegen. DO NOT EDIT.
2
2
  // Source: schemas/overwatch/context.json
3
3
  /**
4
- * Context attribute keys for Overwatch Overwatch (Guardian) IDE security & policy enforcement.
4
+ * Context attribute keys for Overwatch Overwatch IDE agent security & policy enforcement.
5
5
  *
6
6
  * These constants correspond to the context attributes defined in the
7
7
  * Overwatch Cedar schema and are used at policy evaluation time.
8
8
  */
9
9
  export const OverwatchContextKey = {
10
+ ContainsInvisibleChars: 'contains_invisible_chars',
10
11
  ContainsSecrets: 'contains_secrets',
11
12
  Content: 'content',
12
13
  CrimeScore: 'crime_score',
13
14
  Cwd: 'cwd',
15
+ DetectedThreats: 'detected_threats',
14
16
  Event: 'event',
15
17
  HateSpeechScore: 'hate_speech_score',
16
18
  HighestSeverity: 'highest_severity',
17
19
  IndirectInjectionScore: 'indirect_injection_score',
18
20
  InjectionConfidence: 'injection_confidence',
21
+ InvisibleCharsScore: 'invisible_chars_score',
19
22
  JailbreakConfidence: 'jailbreak_confidence',
23
+ LoopCount: 'loop_count',
24
+ LoopDetected: 'loop_detected',
25
+ LoopTool: 'loop_tool',
20
26
  MaxThreatSeverity: 'max_threat_severity',
27
+ McpConfigRisk: 'mcp_config_risk',
28
+ McpRiskScore: 'mcp_risk_score',
21
29
  McpServer: 'mcp_server',
22
30
  McpServerVerified: 'mcp_server_verified',
23
31
  McpTool: 'mcp_tool',
24
32
  Path: 'path',
33
+ PatternType: 'pattern_type',
25
34
  PiiConfidence: 'pii_confidence',
35
+ PiiCount: 'pii_count',
36
+ PiiDetected: 'pii_detected',
37
+ PiiTypes: 'pii_types',
26
38
  ProfanityScore: 'profanity_score',
27
39
  PromptText: 'prompt_text',
28
40
  ResponseContent: 'response_content',
41
+ RugPullDetected: 'rug_pull_detected',
29
42
  RugPullScore: 'rug_pull_score',
43
+ SecretCount: 'secret_count',
44
+ SecretTypes: 'secret_types',
45
+ SequenceRisk: 'sequence_risk',
30
46
  SexualScore: 'sexual_score',
31
47
  Source: 'source',
48
+ SuspiciousPattern: 'suspicious_pattern',
32
49
  ThreatCategories: 'threat_categories',
33
50
  ThreatCount: 'threat_count',
34
- ThreatTypes: 'threat_types',
51
+ ToolCategory: 'tool_category',
52
+ ToolIsBuiltin: 'tool_is_builtin',
53
+ ToolIsSensitive: 'tool_is_sensitive',
35
54
  ToolName: 'tool_name',
55
+ ToolPoisoningDetected: 'tool_poisoning_detected',
36
56
  ToolPoisoningScore: 'tool_poisoning_score',
57
+ ToolRiskScore: 'tool_risk_score',
37
58
  UserEmail: 'user_email',
38
59
  ViolenceScore: 'violence_score',
39
60
  WeaponsScore: 'weapons_score',
40
61
  WorkspaceRoot: 'workspace_root',
41
- YaraThreats: 'yara_threats',
42
62
  };
@@ -2,7 +2,7 @@
2
2
  * Overwatch policy category identifiers.
3
3
  * Maps to UI tab names in Studio.
4
4
  */
5
- export type OverwatchCategory = 'secrets' | 'pii' | 'semantic' | 'tools' | 'organization' | 'trust_safety' | 'agent_security';
5
+ export type OverwatchCategory = 'secrets' | 'pii' | 'semantic' | 'tools' | 'organization' | 'trust_safety' | 'agent_security' | 'encoding' | 'behavioral';
6
6
  /**
7
7
  * Category metadata for UI display.
8
8
  */