@highflame/policy 2.0.9 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/_schemas/guardrails/context.json +435 -0
  2. package/_schemas/guardrails/schema.cedarschema +225 -0
  3. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +94 -0
  4. package/_schemas/guardrails/templates/defaults/baseline.cedar +24 -0
  5. package/_schemas/guardrails/templates/defaults/injection.cedar +70 -0
  6. package/_schemas/guardrails/templates/defaults/pii.cedar +48 -0
  7. package/_schemas/guardrails/templates/defaults/secrets.cedar +40 -0
  8. package/_schemas/guardrails/templates/defaults/semantic.cedar +59 -0
  9. package/_schemas/guardrails/templates/defaults/tool_risk.cedar +58 -0
  10. package/_schemas/guardrails/templates/defaults/toxicity.cedar +76 -0
  11. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +84 -0
  12. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -0
  13. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +35 -0
  14. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +43 -0
  15. package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +85 -0
  16. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +109 -0
  17. package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +22 -0
  18. package/_schemas/guardrails/templates/profiles/code_agent.cedar +125 -0
  19. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +38 -0
  20. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +40 -0
  21. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +49 -0
  22. package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +111 -0
  23. package/_schemas/guardrails/templates/templates.json +213 -0
  24. package/_schemas/overwatch/context.json +54 -54
  25. package/_schemas/overwatch/schema.cedarschema +77 -68
  26. package/dist/builder.d.ts +106 -13
  27. package/dist/builder.js +103 -34
  28. package/dist/engine.d.ts +20 -2
  29. package/dist/engine.js +50 -20
  30. package/dist/entities.gen.d.ts +4 -0
  31. package/dist/entities.gen.js +4 -0
  32. package/dist/explain.d.ts +150 -0
  33. package/dist/explain.js +363 -0
  34. package/dist/guardrails-context.gen.d.ts +49 -0
  35. package/dist/guardrails-context.gen.js +50 -0
  36. package/dist/guardrails-defaults.gen.d.ts +61 -0
  37. package/dist/guardrails-defaults.gen.js +1278 -0
  38. package/dist/guardrails-entities.gen.d.ts +11 -0
  39. package/dist/guardrails-entities.gen.js +37 -0
  40. package/dist/index.d.ts +6 -1
  41. package/dist/index.js +6 -1
  42. package/dist/overwatch-defaults.gen.js +122 -2
  43. package/dist/parser.js +136 -4
  44. package/dist/schema.gen.d.ts +1 -1
  45. package/dist/schema.gen.js +6 -0
  46. package/dist/service-schemas.gen.d.ts +15 -11
  47. package/dist/service-schemas.gen.js +509 -84
  48. package/dist/types.d.ts +6 -1
  49. package/dist/types.js +6 -1
  50. package/package.json +5 -1
@@ -1,19 +1,248 @@
1
1
  // Code generated by highflame-policy-codegen. DO NOT EDIT.
2
- // Source: schemas/overwatch/schema.cedarschema, schemas/palisade/schema.cedarschema
2
+ // Source: schemas/guardrails/schema.cedarschema, schemas/overwatch/schema.cedarschema, schemas/palisade/schema.cedarschema
3
3
  //
4
4
  // Service-specific Cedar schemas and context metadata.
5
5
  // Works in both browser and Node.js environments.
6
6
  //
7
7
  // Usage:
8
- // import { OVERWATCH_SCHEMA, PALISADE_SCHEMA } from '@highflame/policy/types';
9
- // import { OVERWATCH_CONTEXT, PALISADE_CONTEXT } from '@highflame/policy/types';
8
+ // import { GUARDRAILS_SCHEMA, GUARDRAILS_CONTEXT } from '@highflame/policy/types';
9
+ // import { OVERWATCH_SCHEMA, OVERWATCH_CONTEXT } from '@highflame/policy/types';
10
+ // import { PALISADE_SCHEMA, PALISADE_CONTEXT } from '@highflame/policy/types';
10
11
  /**
11
- * Overwatch (Guardian) Cedar schema
12
+ * Guardrails Cedar schema
12
13
  *
13
- * Full Cedar schema for IDE security, including:
14
- * - Actions: process_prompt, call_tool, connect_server, read_file, write_file
15
- * - Entities: User, Agent, LlmPrompt, Tool, Server, FilePath
16
- * - Context attributes for threat detection and workspace security
14
+ * Full Cedar schema for guardrails, embedded at codegen time.
15
+ */
16
+ export const GUARDRAILS_SCHEMA = `// =============================================================================
17
+ // Guardrails Cedar Schema
18
+ // =============================================================================
19
+ // Defines entity types, actions, and context attributes for the highflame-shield
20
+ // guardrails service. This schema enables type-safe policy authoring and
21
+ // validation in both Studio UI and backend.
22
+ //
23
+ // Service: highflame-shield (guardrails)
24
+ // Namespace: Guardrails
25
+ // =============================================================================
26
+
27
+ namespace Guardrails {
28
+ // =========================================================================
29
+ // Entity Types — ReBAC Hierarchy
30
+ // =========================================================================
31
+ // Entity hierarchy enables Cedar's \`in\` operator for policy scoping:
32
+ // Account (org root)
33
+ // └── Project in [Account]
34
+ // └── App in [Project]
35
+ // └── Session in [App]
36
+ //
37
+ // Policy scoping examples:
38
+ // resource == Guardrails::App::"<uuid>" → app-scoped
39
+ // resource in Guardrails::Project::"<uuid>" → project-wide
40
+ // resource in Guardrails::Account::"<uuid>" → org-wide
41
+ // =========================================================================
42
+
43
+ /// Account represents an organization (top-level tenant)
44
+ entity Account;
45
+
46
+ /// Project represents a project within an account
47
+ entity Project in [Account];
48
+
49
+ /// User represents a principal (human or service) making requests
50
+ entity User;
51
+
52
+ /// Agent represents an AI agent (Claude, Cursor, Copilot, etc.) making requests
53
+ entity Agent;
54
+
55
+ /// App represents a protected application (guardrails-enabled LLM app)
56
+ entity App in [Project];
57
+
58
+ /// Session represents an agentic conversation session with state tracking
59
+ entity Session in [App];
60
+
61
+ // =========================================================================
62
+ // Actions
63
+ // =========================================================================
64
+
65
+ /// Process user prompts and AI responses for security threats and content violations
66
+ action "process_prompt" appliesTo {
67
+ principal: [User, Agent],
68
+ resource: [App, Session],
69
+ context: ProcessPromptContext
70
+ };
71
+
72
+ /// Execute tool calls (shell, file operations, MCP tools)
73
+ action "call_tool" appliesTo {
74
+ principal: [User, Agent],
75
+ resource: [Session],
76
+ context: CallToolContext
77
+ };
78
+
79
+ /// Read file operations
80
+ action "read_file" appliesTo {
81
+ principal: [User, Agent],
82
+ resource: [Session],
83
+ context: FileReadContext
84
+ };
85
+
86
+ /// Write file operations
87
+ action "write_file" appliesTo {
88
+ principal: [User, Agent],
89
+ resource: [Session],
90
+ context: FileWriteContext
91
+ };
92
+
93
+ /// Connect to an MCP server
94
+ action "connect_server" appliesTo {
95
+ principal: [User, Agent],
96
+ resource: [Session],
97
+ context: ConnectServerContext
98
+ };
99
+
100
+ // =========================================================================
101
+ // Context Types (Action-Specific)
102
+ // =========================================================================
103
+
104
+ /// Context for process_prompt action (user prompts & AI responses)
105
+ type ProcessPromptContext = {
106
+ // Core metadata (required)
107
+ "request_id": String,
108
+ "timestamp": Long,
109
+ "direction": String, // "input" | "output"
110
+ "content_type": String, // "prompt" | "response" | "tool_call" | "file"
111
+ "detector_count": Long,
112
+
113
+ // Security - Injection & Jailbreak (optional)
114
+ "injection_score"?: Long, // 0-100
115
+ "jailbreak_score"?: Long, // 0-100
116
+ "injection_type"?: String, // "prompt" | "sql" | "command" | "none"
117
+
118
+ // Privacy - Secrets (optional)
119
+ "contains_secrets"?: Bool,
120
+ "secret_count"?: Long,
121
+ "secret_types"?: Set<String>, // ["aws_access_key", "github_token", ...]
122
+
123
+ // Privacy - PII (optional)
124
+ "pii_detected"?: Bool,
125
+ "pii_count"?: Long,
126
+ "pii_types"?: Set<String>, // ["email", "phone", "ssn", "credit_card", ...]
127
+
128
+ // Trust & Safety - Toxicity (optional)
129
+ "violence_score"?: Long, // 0-100
130
+ "hate_speech_score"?: Long, // 0-100
131
+ "sexual_score"?: Long, // 0-100
132
+ "weapons_score"?: Long, // 0-100
133
+ "crime_score"?: Long, // 0-100
134
+ "profanity_score"?: Long, // 0-100
135
+
136
+ // Semantic - Topic Classification (optional)
137
+ "content_topics"?: Set<String>, // ["controlled_substances", "weapons_manufacturing", ...]
138
+ "topic_confidence"?: Long, // 0-100
139
+
140
+ // Security - Invisible Character Detection (optional)
141
+ "contains_invisible_chars"?: Bool,
142
+ "invisible_chars_score"?: Long, // 0-100
143
+
144
+ // Additional detectors (optional)
145
+ "hallucination_score"?: Long,
146
+ "sentiment_score"?: Long,
147
+ "contains_code"?: Bool,
148
+ "code_languages"?: Set<String>,
149
+ "keyword_matched"?: Bool,
150
+ "keyword_categories"?: Set<String>,
151
+ "detected_language"?: String,
152
+ "phishing_detected"?: Bool,
153
+
154
+ };
155
+
156
+ /// Context for call_tool action (agentic tool execution)
157
+ type CallToolContext = {
158
+ // Core metadata (required)
159
+ "request_id": String,
160
+ "timestamp": Long,
161
+
162
+ // Tool Risk (optional)
163
+ "tool_name"?: String, // "shell", "write_file", "http_post", etc.
164
+ "tool_risk_score"?: Long, // 0-100
165
+ "tool_is_sensitive"?: Bool,
166
+ "tool_category"?: String, // "safe" | "sensitive" | "dangerous"
167
+ "tool_is_builtin"?: Bool,
168
+
169
+ // MCP context (optional — only present for MCP tool calls)
170
+ "mcp_server"?: String, // MCP server name (e.g., "github", "filesystem")
171
+ "mcp_tool"?: String, // MCP tool name within the server
172
+ "mcp_server_verified"?: Bool, // Whether server is from verified registry
173
+
174
+ // Agentic - Behavioral Patterns (optional)
175
+ "suspicious_pattern"?: Bool,
176
+ "pattern_type"?: String, // "data_exfiltration" | "secret_exfiltration" | "db_exfiltration" | "none"
177
+ "sequence_risk"?: Long, // 0-100
178
+
179
+ // Agentic - Loop Detection (optional)
180
+ "loop_detected"?: Bool,
181
+ "loop_count"?: Long,
182
+ "loop_tool"?: String,
183
+
184
+ // Agentic - Budget Control (optional)
185
+ "budget_remaining_pct"?: Long, // 0-100
186
+ "budget_exceeded"?: Bool,
187
+
188
+ // Semantic - Topic Classification (optional)
189
+ "content_topics"?: Set<String>, // ["controlled_substances", "weapons_manufacturing", ...]
190
+ "topic_confidence"?: Long, // 0-100
191
+
192
+ // Security checks on tool arguments (optional)
193
+ "contains_secrets"?: Bool,
194
+ "secret_types"?: Set<String>,
195
+ "pii_detected"?: Bool,
196
+ "pii_types"?: Set<String>,
197
+ "injection_score"?: Long,
198
+
199
+ };
200
+
201
+ /// Context for read_file action
202
+ type FileReadContext = {
203
+ // Core metadata (required)
204
+ "request_id": String,
205
+ "timestamp": Long,
206
+
207
+ // Security checks on file content (optional)
208
+ "contains_secrets"?: Bool,
209
+ "secret_types"?: Set<String>,
210
+ "pii_detected"?: Bool,
211
+ "pii_types"?: Set<String>,
212
+
213
+ };
214
+
215
+ /// Context for write_file action
216
+ type FileWriteContext = {
217
+ // Core metadata (required)
218
+ "request_id": String,
219
+ "timestamp": Long,
220
+
221
+ // Security checks on content being written (optional)
222
+ "contains_secrets"?: Bool,
223
+ "secret_types"?: Set<String>,
224
+ "pii_detected"?: Bool,
225
+ "pii_types"?: Set<String>,
226
+
227
+ };
228
+
229
+ /// Context for connect_server action (MCP server connections)
230
+ type ConnectServerContext = {
231
+ // Core metadata (required)
232
+ "request_id": String,
233
+ "timestamp": Long,
234
+
235
+ // MCP context (optional)
236
+ "mcp_server"?: String, // MCP server name (e.g., "github", "filesystem")
237
+ "mcp_server_verified"?: Bool, // Whether server is from verified registry
238
+
239
+ };
240
+ }
241
+ `;
242
+ /**
243
+ * Overwatch Cedar schema
244
+ *
245
+ * Full Cedar schema for overwatch, embedded at codegen time.
17
246
  */
18
247
  export const OVERWATCH_SCHEMA = `// Overwatch (Guardian) Cedar Schema
19
248
  // ===================================
@@ -33,17 +262,33 @@ export const OVERWATCH_SCHEMA = `// Overwatch (Guardian) Cedar Schema
33
262
  namespace Overwatch {
34
263
 
35
264
  // =============================================================================
36
- // ENTITIES
265
+ // ENTITIES - Organization Hierarchy (ReBAC)
266
+ // =============================================================================
267
+
268
+ // Top-level organization for multi-tenant policy enforcement
269
+ // Enables policies like: principal in Overwatch::Organization::"acme-corp"
270
+ entity Organization {
271
+ name: String, // "Acme Corp", "Highflame"
272
+ };
273
+
274
+ // Team within an organization
275
+ // Enables policies like: principal in Overwatch::Team::"security-team"
276
+ entity Team in [Organization] {
277
+ name: String, // "security", "engineering", "devops"
278
+ };
279
+
280
+ // =============================================================================
281
+ // ENTITIES - Principals
37
282
  // =============================================================================
38
283
 
39
284
  // Human user or service account making requests to the IDE
40
- entity User {
285
+ entity User in [Team] {
41
286
  user_type: String, // "external" or "internal"
42
287
  email: String, // User email (optional)
43
288
  };
44
289
 
45
290
  // AI agent (Claude, GitHub Copilot, etc.)
46
- entity Agent {
291
+ entity Agent in [Team] {
47
292
  agent_type: String, // "claude", "copilot", etc.
48
293
  };
49
294
 
@@ -85,19 +330,37 @@ action process_prompt appliesTo {
85
330
  user_email: String, // User identifier
86
331
 
87
332
  // Workspace
88
- cwd: String, // Current working directory
89
- workspace_root: String, // Workspace/repository root
333
+ cwd?: String, // Current working directory
334
+ workspace_root?: String, // Workspace/repository root
90
335
 
91
336
  // Threat Detection
92
337
  threat_count: Long, // Total threats detected
93
338
  highest_severity: String, // "critical", "high", "medium", "low"
94
339
  threat_categories: Set<String>, // Threat category names
95
-
96
340
  yara_threats: Set<String>, // YARA rule names
97
341
  max_threat_severity: Long, // Numeric severity (0-4)
98
342
  contains_secrets: Bool, // Whether secrets detected
99
- prompt_text: String, // Same as content (legacy)
100
- response_content: String, // Response content (if available)
343
+ prompt_text?: String, // Same as content (legacy)
344
+ response_content?: String, // Response content (if available)
345
+
346
+ // Trust/Safety Scores (0-100, from Javelin/Lakera/LlamaGuard classifiers)
347
+ // Required: content safety classifiers always run for prompt processing
348
+ violence_score: Long, // Violence content detection score
349
+ weapons_score: Long, // Weapons content detection score
350
+ hate_speech_score: Long, // Hate speech detection score
351
+ crime_score: Long, // Criminal content detection score
352
+ sexual_score: Long, // Sexual content detection score
353
+ profanity_score: Long, // Profanity detection score
354
+
355
+ // Detector Confidence Scores (0-100, ML classifier confidence)
356
+ // Required: ML classifiers always run for prompt processing
357
+ pii_confidence: Long, // PII detection confidence
358
+ injection_confidence: Long, // Prompt injection confidence
359
+ jailbreak_confidence: Long, // Jailbreak detection confidence
360
+
361
+ // Agent Security (0-100)
362
+ // Required: agent security scanners always run for prompt processing
363
+ indirect_injection_score: Long, // Indirect prompt injection risk
101
364
  },
102
365
  };
103
366
 
@@ -113,26 +376,50 @@ action call_tool appliesTo {
113
376
  user_email: String, // User identifier
114
377
 
115
378
  // Tool & MCP
116
- tool_name: String, // Normalized tool name ("shell", "read_file", etc.)
117
- mcp_server: String, // MCP server name
118
- mcp_tool: String, // MCP tool name
379
+ tool_name?: String, // Normalized tool name ("shell", "read_file", etc.)
380
+ mcp_server?: String, // MCP server name
381
+ mcp_tool?: String, // MCP tool name
119
382
 
120
383
  // File & Path
121
- path: String, // File path (if file operation)
384
+ path?: String, // File path (if file operation)
122
385
 
123
386
  // Workspace
124
- cwd: String,
125
- workspace_root: String,
126
-
127
- // Threat Detection
128
- threat_count: Long,
129
- highest_severity: String,
130
- threat_categories: Set<String>,
131
-
132
- yara_threats: Set<String>,
133
- max_threat_severity: Long,
134
- contains_secrets: Bool,
135
- response_content: String,
387
+ cwd?: String,
388
+ workspace_root?: String,
389
+
390
+ // Threat Detection (optional: scanning may not have run before tool call)
391
+ threat_count?: Long,
392
+ highest_severity?: String,
393
+ threat_categories?: Set<String>,
394
+ yara_threats?: Set<String>,
395
+ max_threat_severity?: Long,
396
+ contains_secrets?: Bool,
397
+ response_content?: String,
398
+
399
+ // Trust/Safety Scores (0-100, from Javelin/Lakera/LlamaGuard classifiers)
400
+ // Optional: only present when trust/safety classifiers have run
401
+ violence_score?: Long, // Violence content detection score
402
+ weapons_score?: Long, // Weapons content detection score
403
+ hate_speech_score?: Long, // Hate speech detection score
404
+ crime_score?: Long, // Criminal content detection score
405
+ sexual_score?: Long, // Sexual content detection score
406
+ profanity_score?: Long, // Profanity detection score
407
+
408
+ // Detector Confidence Scores (0-100, ML classifier confidence)
409
+ // Optional: only present when ML classifiers have run
410
+ pii_confidence?: Long, // PII detection confidence
411
+ injection_confidence?: Long, // Prompt injection confidence
412
+ jailbreak_confidence?: Long, // Jailbreak detection confidence
413
+
414
+ // Agent Security (0-100)
415
+ // Optional: only present when agent security scanners have run
416
+ tool_poisoning_score?: Long, // Tool description manipulation risk
417
+ rug_pull_score?: Long, // Tool behavior mismatch risk
418
+ indirect_injection_score?: Long, // Indirect prompt injection risk
419
+
420
+ // MCP Trust
421
+ // Optional: only present when MCP server verification has run
422
+ mcp_server_verified?: Bool, // Whether server is from verified registry
136
423
  },
137
424
  };
138
425
 
@@ -141,15 +428,25 @@ action connect_server appliesTo {
141
428
  principal: [User, Agent],
142
429
  resource: [Server],
143
430
  context: {
144
- content: String,
431
+ content?: String, // No content to scan when connecting
145
432
  source: String,
146
433
  event: String,
147
434
  user_email: String,
148
- mcp_server: String,
149
- threat_count: Long,
150
- highest_severity: String,
151
- threat_categories: Set<String>,
152
- max_threat_severity: Long,
435
+ mcp_server?: String,
436
+ threat_count?: Long, // Threat scanning may not run for connections
437
+ highest_severity?: String,
438
+ threat_categories?: Set<String>,
439
+ max_threat_severity?: Long,
440
+
441
+ // Agent Security (0-100)
442
+ // Optional: only present when agent security scanners have run
443
+ tool_poisoning_score?: Long, // Tool description manipulation risk
444
+ rug_pull_score?: Long, // Tool behavior mismatch risk
445
+ indirect_injection_score?: Long, // Indirect prompt injection risk
446
+
447
+ // MCP Trust
448
+ // Optional: only present when MCP server verification has run
449
+ mcp_server_verified?: Bool, // Whether server is from verified registry
153
450
  },
154
451
  };
155
452
 
@@ -162,14 +459,14 @@ action read_file appliesTo {
162
459
  source: String,
163
460
  event: String,
164
461
  user_email: String,
165
- path: String,
166
- cwd: String,
167
- workspace_root: String,
168
- threat_count: Long,
169
- highest_severity: String,
170
- threat_categories: Set<String>,
171
- max_threat_severity: Long,
172
- contains_secrets: Bool,
462
+ path?: String,
463
+ cwd?: String,
464
+ workspace_root?: String,
465
+ threat_count?: Long, // Threat scanning may not have run
466
+ highest_severity?: String,
467
+ threat_categories?: Set<String>,
468
+ max_threat_severity?: Long,
469
+ contains_secrets?: Bool,
173
470
  },
174
471
  };
175
472
 
@@ -182,14 +479,14 @@ action write_file appliesTo {
182
479
  source: String,
183
480
  event: String,
184
481
  user_email: String,
185
- path: String,
186
- cwd: String,
187
- workspace_root: String,
188
- threat_count: Long,
189
- highest_severity: String,
190
- threat_categories: Set<String>,
191
- max_threat_severity: Long,
192
- contains_secrets: Bool,
482
+ path?: String,
483
+ cwd?: String,
484
+ workspace_root?: String,
485
+ threat_count?: Long, // Threat scanning may not have run
486
+ highest_severity?: String,
487
+ threat_categories?: Set<String>,
488
+ max_threat_severity?: Long,
489
+ contains_secrets?: Bool,
193
490
  },
194
491
  };
195
492
 
@@ -198,10 +495,7 @@ action write_file appliesTo {
198
495
  /**
199
496
  * Palisade Cedar schema
200
497
  *
201
- * Full Cedar schema for ML supply chain security, including:
202
- * - Actions: scan_artifact, validate_integrity, validate_provenance, quarantine_artifact, load_model, deploy_model
203
- * - Entities: Scanner, Artifact, Package
204
- * - Context attributes for ML security findings
498
+ * Full Cedar schema for palisade, embedded at codegen time.
205
499
  */
206
500
  export const PALISADE_SCHEMA = `// Palisade Cedar Schema
207
501
  // =====================
@@ -372,12 +666,116 @@ action scan_package appliesTo {
372
666
 
373
667
  }
374
668
  `;
669
+ /**
670
+ * Guardrails context metadata (parsed JSON)
671
+ */
672
+ export const GUARDRAILS_CONTEXT = {
673
+ "service": "guardrails",
674
+ "version": "1.0.0",
675
+ "description": "Guardrails (Shield) content security & policy enforcement for LLM applications",
676
+ "actions": [
677
+ {
678
+ "name": "process_prompt",
679
+ "description": "Analyze user prompts and AI responses for security threats, PII, and content violations",
680
+ "context_attributes": [
681
+ { "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request, useful for audit trails and debugging" },
682
+ { "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds when the request was processed" },
683
+ { "key": "direction", "type": "string", "required": true, "description": "Content flow direction: \'input\' for user prompts, \'output\' for AI responses. Use this to apply different policies to inputs vs outputs (e.g., block PII only in outputs)" },
684
+ { "key": "content_type", "type": "string", "required": true, "description": "Type of content being analyzed: \'prompt\', \'response\', \'tool_call\', or \'file\'" },
685
+ { "key": "detector_count", "type": "number", "required": true, "description": "Number of detectors that were executed for this request" },
686
+ { "key": "injection_score", "type": "number", "required": false, "description": "ML-based confidence score for prompt injection attacks (0-100). Higher scores indicate higher confidence. Typical threshold: >85 for high-confidence blocks" },
687
+ { "key": "jailbreak_score", "type": "number", "required": false, "description": "ML-based confidence score for jailbreak attempts (0-100). Detects attempts to bypass safety guardrails. Typical threshold: >80 for blocks" },
688
+ { "key": "injection_type", "type": "string", "required": false, "description": "Type of injection detected: \'prompt\', \'sql\', \'command\', or \'none\'. Use this to apply different policies per injection type" },
689
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether any API keys, tokens, passwords, or credentials were detected in the content. True indicates presence of secrets" },
690
+ { "key": "secret_count", "type": "number", "required": false, "description": "Total number of secret matches found. Multiple matches may indicate data dumps or accidental credential exposure" },
691
+ { "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found (e.g., [\'aws_access_key\', \'github_token\']). Use set operations like .contains() to check for specific types" },
692
+ { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether personally identifiable information (PII) was found in the content. Commonly used to block PII in outputs to prevent data leakage" },
693
+ { "key": "pii_count", "type": "number", "required": false, "description": "Total number of PII matches found (emails, phone numbers, SSNs, etc.)" },
694
+ { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types detected (e.g., [\'email\', \'phone\', \'ssn\', \'credit_card\']). Use .contains() to block specific sensitive types" },
695
+ { "key": "violence_score", "type": "number", "required": false, "description": "ML-based score for violent content references (0-100). Typical threshold: >90 for critical blocks, >60 for warnings" },
696
+ { "key": "hate_speech_score", "type": "number", "required": false, "description": "ML-based score for hate speech, discriminatory language, or targeted harassment (0-100). Typical threshold: >75 for blocks" },
697
+ { "key": "sexual_score", "type": "number", "required": false, "description": "ML-based score for sexual or adult content (0-100). Adjust thresholds based on your application\'s audience" },
698
+ { "key": "weapons_score", "type": "number", "required": false, "description": "ML-based score for weapons references or violent imagery (0-100)" },
699
+ { "key": "crime_score", "type": "number", "required": false, "description": "ML-based score for criminal activity discussions (0-100)" },
700
+ { "key": "profanity_score", "type": "number", "required": false, "description": "ML-based score for profanity and vulgar language (0-100)" },
701
+ { "key": "content_topics", "type": "array", "required": false, "description": "Semantic topics detected in content (e.g., [\'controlled_substances\', \'weapons_manufacturing\']). Use .contains() to block specific topics per application" },
702
+ { "key": "topic_confidence", "type": "number", "required": false, "description": "Confidence score from topic classifier (0-100). Use with content_topics to tune sensitivity — higher thresholds reduce false positives" },
703
+ { "key": "contains_invisible_chars", "type": "boolean", "required": false, "description": "Whether invisible Unicode characters (zero-width joiners, RTL marks, etc.) were detected in the content. Commonly used for prompt injection evasion" },
704
+ { "key": "invisible_chars_score", "type": "number", "required": false, "description": "Density score for invisible characters in the content (0-100). Higher scores indicate more invisible characters, suggesting evasion attempts" }
705
+ ]
706
+ },
707
+ {
708
+ "name": "call_tool",
709
+ "description": "Execute agentic tool calls, including shell commands, file operations, and MCP tools",
710
+ "context_attributes": [
711
+ { "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request" },
712
+ { "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds" },
713
+ { "key": "tool_name", "type": "string", "required": false, "description": "Name of the tool being called (e.g., \'shell\', \'write_file\', \'http_post\'). Use this to block specific dangerous tools" },
714
+ { "key": "tool_risk_score", "type": "number", "required": false, "description": "Computed risk score for this tool call (0-100). Considers tool sensitivity, argument patterns, and MCP verification status. Typical threshold: >85 for dangerous tools" },
715
+ { "key": "tool_is_sensitive", "type": "boolean", "required": false, "description": "Whether the tool is classified as sensitive (shell, file operations, network access, etc.)" },
716
+ { "key": "tool_category", "type": "string", "required": false, "description": "Tool classification: \'safe\', \'sensitive\', or \'dangerous\'. Based on tool type and argument patterns" },
717
+ { "key": "tool_is_builtin", "type": "boolean", "required": false, "description": "Whether the tool is a built-in tool (vs MCP external tool). Built-in tools are generally more trusted" },
718
+ { "key": "mcp_server", "type": "string", "required": false, "description": "Name of the MCP server providing this tool (e.g., \'github\', \'filesystem\', \'slack\'). Empty for built-in tools. Use this to control which tools are allowed per MCP server" },
719
+ { "key": "mcp_tool", "type": "string", "required": false, "description": "Name of the specific tool within the MCP server (e.g., \'read_issues\', \'create_file\'). Use with mcp_server for fine-grained per-tool permissioning" },
720
+ { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether the MCP server is from a verified/trusted registry. Unverified servers have elevated risk scores. Use this to block tool calls from unverified sources" },
721
+ { "key": "suspicious_pattern", "type": "boolean", "required": false, "description": "Whether a suspicious action sequence was detected (e.g., read_file → http_post indicating data exfiltration). Requires session tracking" },
722
+ { "key": "pattern_type", "type": "string", "required": false, "description": "Type of suspicious pattern: \'data_exfiltration\', \'secret_exfiltration\', \'db_exfiltration\', or \'none\'. Use this to apply different policies per threat type" },
723
+ { "key": "sequence_risk", "type": "number", "required": false, "description": "Risk score from action sequence analysis (0-100). Analyzes history of tool calls to detect attack patterns. Typical threshold: >80 for blocks" },
724
+ { "key": "loop_detected", "type": "boolean", "required": false, "description": "Whether a tool call loop was detected (same tool called repeatedly). May indicate infinite loops or runaway agents. Requires session tracking" },
725
+ { "key": "loop_count", "type": "number", "required": false, "description": "Number of consecutive repeated tool calls. Typical threshold: >3 for loop detection" },
726
+ { "key": "loop_tool", "type": "string", "required": false, "description": "Name of the tool involved in the loop" },
727
+ { "key": "budget_remaining_pct", "type": "number", "required": false, "description": "Remaining token budget as percentage (0-100). Use this to warn or block when budget is low. Requires session with token budget configuration" },
728
+ { "key": "budget_exceeded", "type": "boolean", "required": false, "description": "Whether the token budget has been exceeded. Use this to enforce cost controls on agentic sessions" },
729
+ { "key": "content_topics", "type": "array", "required": false, "description": "Semantic topics detected in tool arguments or content (e.g., [\'controlled_substances\']). Use .contains() to restrict tool calls involving specific topics" },
730
+ { "key": "topic_confidence", "type": "number", "required": false, "description": "Confidence score from topic classifier for tool content (0-100)" },
731
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in tool arguments or content" },
732
+ { "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in tool arguments" },
733
+ { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in tool arguments or content" },
734
+ { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in tool arguments" }
735
+ ]
736
+ },
737
+ {
738
+ "name": "read_file",
739
+ "description": "Read file operations for analyzing file content before allowing access",
740
+ "context_attributes": [
741
+ { "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request" },
742
+ { "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds" },
743
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in the file content being read" },
744
+ { "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in file content" },
745
+ { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in the file content" },
746
+ { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in file content" }
747
+ ]
748
+ },
749
+ {
750
+ "name": "write_file",
751
+ "description": "Write file operations for preventing writes of sensitive content",
752
+ "context_attributes": [
753
+ { "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request" },
754
+ { "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds" },
755
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in the content being written. Block writes containing credentials" },
756
+ { "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in write content" },
757
+ { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in the content being written" },
758
+ { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in write content" }
759
+ ]
760
+ },
761
+ {
762
+ "name": "connect_server",
763
+ "description": "Connect to an MCP server, used to control which MCP servers are allowed",
764
+ "context_attributes": [
765
+ { "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request" },
766
+ { "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds" },
767
+ { "key": "mcp_server", "type": "string", "required": false, "description": "Name of the MCP server being connected to (e.g., \'github\', \'filesystem\', \'slack\'). Use this to allow or block specific MCP servers" },
768
+ { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether the MCP server is from a verified/trusted registry. Use this to block connections to unverified servers" }
769
+ ]
770
+ }
771
+ ]
772
+ };
375
773
  /**
376
774
  * Overwatch context metadata (parsed JSON)
377
775
  */
378
776
  export const OVERWATCH_CONTEXT = {
379
777
  "service": "overwatch",
380
- "version": "1.0.0",
778
+ "version": "2.0.0",
381
779
  "description": "Overwatch (Guardian) IDE security & policy enforcement",
382
780
  "actions": [
383
781
  {
@@ -393,11 +791,22 @@ export const OVERWATCH_CONTEXT = {
393
791
  { "key": "threat_count", "type": "number", "required": true, "description": "Total number of threats detected by YARA/Javelin" },
394
792
  { "key": "highest_severity", "type": "string", "required": true, "description": "Highest severity level: critical, high, medium, low" },
395
793
  { "key": "threat_categories", "type": "array", "required": true, "description": "Threat category names from aggregator" },
794
+ { "key": "threat_types", "type": "array", "required": true, "description": "YARA threat category names" },
396
795
  { "key": "yara_threats", "type": "array", "required": true, "description": "YARA rule names that matched" },
397
796
  { "key": "max_threat_severity", "type": "number", "required": true, "description": "Numeric severity (0-4, where 4=CRITICAL)" },
398
797
  { "key": "contains_secrets", "type": "boolean", "required": true, "description": "Whether secrets or credentials were detected" },
399
798
  { "key": "prompt_text", "type": "string", "required": false, "description": "Same as content (legacy field)" },
400
- { "key": "response_content", "type": "string", "required": false, "description": "Response content from AI (if available)" }
799
+ { "key": "response_content", "type": "string", "required": false, "description": "Response content from AI (if available)" },
800
+ { "key": "violence_score", "type": "number", "required": true, "description": "Violence content detection score (0-100)" },
801
+ { "key": "weapons_score", "type": "number", "required": true, "description": "Weapons content detection score (0-100)" },
802
+ { "key": "hate_speech_score", "type": "number", "required": true, "description": "Hate speech detection score (0-100)" },
803
+ { "key": "crime_score", "type": "number", "required": true, "description": "Criminal content detection score (0-100)" },
804
+ { "key": "sexual_score", "type": "number", "required": true, "description": "Sexual content detection score (0-100)" },
805
+ { "key": "profanity_score", "type": "number", "required": true, "description": "Profanity detection score (0-100)" },
806
+ { "key": "pii_confidence", "type": "number", "required": true, "description": "PII detection classifier confidence (0-100)" },
807
+ { "key": "injection_confidence", "type": "number", "required": true, "description": "Prompt injection classifier confidence (0-100)" },
808
+ { "key": "jailbreak_confidence", "type": "number", "required": true, "description": "Jailbreak detection classifier confidence (0-100)" },
809
+ { "key": "indirect_injection_score", "type": "number", "required": true, "description": "Indirect prompt injection risk score (0-100)" }
401
810
  ]
402
811
  },
403
812
  {
@@ -414,28 +823,46 @@ export const OVERWATCH_CONTEXT = {
414
823
  { "key": "path", "type": "string", "required": false, "description": "File path (if file operation)" },
415
824
  { "key": "cwd", "type": "string", "required": false, "description": "Current working directory" },
416
825
  { "key": "workspace_root", "type": "string", "required": false, "description": "Workspace/repository root path" },
417
- { "key": "threat_count", "type": "number", "required": true, "description": "Total threats detected" },
418
- { "key": "highest_severity", "type": "string", "required": true, "description": "Highest severity: critical, high, medium, low" },
419
- { "key": "threat_categories", "type": "array", "required": true, "description": "Threat category names" },
420
- { "key": "yara_threats", "type": "array", "required": true, "description": "YARA rule names" },
421
- { "key": "max_threat_severity", "type": "number", "required": true, "description": "Numeric severity (0-4)" },
422
- { "key": "contains_secrets", "type": "boolean", "required": true, "description": "Whether secrets detected" },
423
- { "key": "response_content", "type": "string", "required": false, "description": "Response content (if available)" }
826
+ { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected (if scanning ran)" },
827
+ { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity (if scanning ran)" },
828
+ { "key": "threat_categories", "type": "array", "required": false, "description": "Threat category names (if scanning ran)" },
829
+ { "key": "threat_types", "type": "array", "required": false, "description": "YARA threat categories (if scanning ran)" },
830
+ { "key": "yara_threats", "type": "array", "required": false, "description": "YARA rule names (if scanning ran)" },
831
+ { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity 0-4 (if scanning ran)" },
832
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets detected (if scanning ran)" },
833
+ { "key": "response_content", "type": "string", "required": false, "description": "Response content (if available)" },
834
+ { "key": "violence_score", "type": "number", "required": false, "description": "Violence content detection score (0-100)" },
835
+ { "key": "weapons_score", "type": "number", "required": false, "description": "Weapons content detection score (0-100)" },
836
+ { "key": "hate_speech_score", "type": "number", "required": false, "description": "Hate speech detection score (0-100)" },
837
+ { "key": "crime_score", "type": "number", "required": false, "description": "Criminal content detection score (0-100)" },
838
+ { "key": "sexual_score", "type": "number", "required": false, "description": "Sexual content detection score (0-100)" },
839
+ { "key": "profanity_score", "type": "number", "required": false, "description": "Profanity detection score (0-100)" },
840
+ { "key": "pii_confidence", "type": "number", "required": false, "description": "PII detection classifier confidence (0-100)" },
841
+ { "key": "injection_confidence", "type": "number", "required": false, "description": "Prompt injection classifier confidence (0-100)" },
842
+ { "key": "jailbreak_confidence", "type": "number", "required": false, "description": "Jailbreak detection classifier confidence (0-100)" },
843
+ { "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Tool description manipulation risk score (0-100)" },
844
+ { "key": "rug_pull_score", "type": "number", "required": false, "description": "Tool behavior mismatch risk score (0-100)" },
845
+ { "key": "indirect_injection_score", "type": "number", "required": false, "description": "Indirect prompt injection risk score (0-100)" },
846
+ { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether MCP server is from a verified registry" }
424
847
  ]
425
848
  },
426
849
  {
427
850
  "name": "connect_server",
428
851
  "description": "Connect to an MCP server",
429
852
  "context_attributes": [
430
- { "key": "content", "type": "string", "required": true, "description": "Raw content being scanned" },
853
+ { "key": "content", "type": "string", "required": false, "description": "Raw content being scanned (if available)" },
431
854
  { "key": "source", "type": "string", "required": true, "description": "IDE source" },
432
855
  { "key": "event", "type": "string", "required": true, "description": "Hook event name" },
433
856
  { "key": "user_email", "type": "string", "required": true, "description": "User identifier" },
434
857
  { "key": "mcp_server", "type": "string", "required": false, "description": "MCP server name" },
435
- { "key": "threat_count", "type": "number", "required": true, "description": "Total threats detected" },
436
- { "key": "highest_severity", "type": "string", "required": true, "description": "Highest severity level" },
437
- { "key": "threat_categories", "type": "array", "required": true, "description": "Threat category names" },
438
- { "key": "max_threat_severity", "type": "number", "required": true, "description": "Numeric severity (0-4)" }
858
+ { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected (if scanning ran)" },
859
+ { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level (if scanning ran)" },
860
+ { "key": "threat_categories", "type": "array", "required": false, "description": "Threat category names (if scanning ran)" },
861
+ { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity 0-4 (if scanning ran)" },
862
+ { "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Tool description manipulation risk score (0-100)" },
863
+ { "key": "rug_pull_score", "type": "number", "required": false, "description": "Tool behavior mismatch risk score (0-100)" },
864
+ { "key": "indirect_injection_score", "type": "number", "required": false, "description": "Indirect prompt injection risk score (0-100)" },
865
+ { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether MCP server is from a verified registry" }
439
866
  ]
440
867
  },
441
868
  {
@@ -447,14 +874,13 @@ export const OVERWATCH_CONTEXT = {
447
874
  { "key": "event", "type": "string", "required": true, "description": "Hook event name (e.g., beforeReadFile)" },
448
875
  { "key": "user_email", "type": "string", "required": true, "description": "User identifier" },
449
876
  { "key": "path", "type": "string", "required": false, "description": "File path being read" },
450
- { "key": "file_path", "type": "string", "required": false, "description": "Duplicate of path field" },
451
877
  { "key": "cwd", "type": "string", "required": false, "description": "Current working directory" },
452
878
  { "key": "workspace_root", "type": "string", "required": false, "description": "Workspace root path" },
453
- { "key": "threat_count", "type": "number", "required": true, "description": "Total threats detected" },
454
- { "key": "highest_severity", "type": "string", "required": true, "description": "Highest severity level" },
455
- { "key": "threat_categories", "type": "array", "required": true, "description": "Threat categories" },
456
- { "key": "max_threat_severity", "type": "number", "required": true, "description": "Numeric severity (0-4)" },
457
- { "key": "contains_secrets", "type": "boolean", "required": true, "description": "Whether secrets detected" }
879
+ { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected (if scanning ran)" },
880
+ { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level (if scanning ran)" },
881
+ { "key": "threat_categories", "type": "array", "required": false, "description": "Threat categories (if scanning ran)" },
882
+ { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity 0-4 (if scanning ran)" },
883
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets detected (if scanning ran)" }
458
884
  ]
459
885
  },
460
886
  {
@@ -466,14 +892,13 @@ export const OVERWATCH_CONTEXT = {
466
892
  { "key": "event", "type": "string", "required": true, "description": "Hook event name" },
467
893
  { "key": "user_email", "type": "string", "required": true, "description": "User identifier" },
468
894
  { "key": "path", "type": "string", "required": false, "description": "File path being written" },
469
- { "key": "file_path", "type": "string", "required": false, "description": "Duplicate of path field" },
470
895
  { "key": "cwd", "type": "string", "required": false, "description": "Current working directory" },
471
896
  { "key": "workspace_root", "type": "string", "required": false, "description": "Workspace root path" },
472
- { "key": "threat_count", "type": "number", "required": true, "description": "Total threats detected" },
473
- { "key": "highest_severity", "type": "string", "required": true, "description": "Highest severity level" },
474
- { "key": "threat_categories", "type": "array", "required": true, "description": "Threat categories" },
475
- { "key": "max_threat_severity", "type": "number", "required": true, "description": "Numeric severity (0-4)" },
476
- { "key": "contains_secrets", "type": "boolean", "required": true, "description": "Whether secrets detected" }
897
+ { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected (if scanning ran)" },
898
+ { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level (if scanning ran)" },
899
+ { "key": "threat_categories", "type": "array", "required": false, "description": "Threat categories (if scanning ran)" },
900
+ { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity 0-4 (if scanning ran)" },
901
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets detected (if scanning ran)" }
477
902
  ]
478
903
  }
479
904
  ]