@highflame/policy 2.1.1 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -141,15 +141,63 @@ namespace Guardrails {
141
141
  "contains_invisible_chars"?: Bool,
142
142
  "invisible_chars_score"?: Long, // 0-100
143
143
 
144
- // Additional detectors (optional)
144
+ // Security - Pattern Detection (optional)
145
+ "command_injection_detected"?: Bool,
146
+ "command_injection_type"?: String, // "reverse_shell" | "privilege_escalation" | "code_execution" | "destructive_command" | "data_exfiltration"
147
+ "command_injection_score"?: Long, // 0-100
148
+ "path_traversal_detected"?: Bool,
149
+ "path_traversal_severity"?: String, // "critical" | "high" | "medium" | "low" | "none"
150
+ "path_traversal_type"?: String,
151
+ "sql_injection_detected"?: Bool,
152
+ "sql_injection_type"?: String, // "tautology" | "union_based" | "destructive" | "blind" | "error_based"
153
+ "sql_injection_score"?: Long, // 0-100
154
+
155
+ // Security - Cross-Origin Escalation (optional)
156
+ "cross_origin_detected"?: Bool,
157
+ "cross_origin_type"?: String, // "cross_origin_tool" | "cross_origin_server" | "none"
158
+ "cross_origin_score"?: Long, // 0-100
159
+
160
+ // Security - Encoded Injection (optional)
161
+ "encoded_content_detected"?: Bool,
162
+ "encoded_types"?: Set<String>, // ["base64", "hex", "unicode", "url", ...]
163
+ "encoded_count"?: Long,
164
+ "encoded_score"?: Long, // 0-100
165
+
166
+ // Language & Script Detection (optional)
167
+ "detected_language"?: String, // ISO language code
168
+ "is_english"?: Bool,
169
+ "language_confidence"?: Long, // 0-100
170
+ "detected_script"?: String, // "latin" | "cyrillic" | "arabic" | "unknown" | ...
171
+ "is_latin_script"?: Bool,
172
+ "script_confidence"?: Long, // 0-100
173
+
174
+ // Content Analysis (optional)
145
175
  "hallucination_score"?: Long,
176
+ "factuality_score"?: Long, // 0-100
146
177
  "sentiment_score"?: Long,
147
178
  "contains_code"?: Bool,
148
179
  "code_languages"?: Set<String>,
180
+ "code_ratio"?: Long, // 0-100, percentage of content that is code
149
181
  "keyword_matched"?: Bool,
150
182
  "keyword_categories"?: Set<String>,
151
- "detected_language"?: String,
183
+ "keyword_count"?: Long,
184
+ "contains_non_ascii"?: Bool,
152
185
  "phishing_detected"?: Bool,
186
+ "content_safety_score"?: Long, // 0-100
187
+ "content_safety_blocked"?: Bool,
188
+
189
+ // Agentic - Multi-Turn Context (optional)
190
+ "conversation_turn"?: Long,
191
+ "multi_turn_detection"?: Bool,
192
+
193
+ // Session Detection History — cross-turn sticky flags (optional)
194
+ "session_pii_detected"?: Bool,
195
+ "session_pii_types"?: Set<String>,
196
+ "session_secrets_detected"?: Bool,
197
+ "session_secret_types"?: Set<String>,
198
+ "session_injection_detected"?: Bool,
199
+ "session_command_injection"?: Bool,
200
+ "session_threat_turns"?: Long,
153
201
 
154
202
  };
155
203
 
@@ -196,6 +244,54 @@ namespace Guardrails {
196
244
  "pii_types"?: Set<String>,
197
245
  "injection_score"?: Long,
198
246
 
247
+ // Security - Pattern Detection (optional)
248
+ "command_injection_detected"?: Bool,
249
+ "command_injection_type"?: String,
250
+ "command_injection_score"?: Long, // 0-100
251
+ "path_traversal_detected"?: Bool,
252
+ "path_traversal_severity"?: String,
253
+ "path_traversal_type"?: String,
254
+ "sql_injection_detected"?: Bool,
255
+ "sql_injection_type"?: String,
256
+ "sql_injection_score"?: Long, // 0-100
257
+
258
+ // Security - Cross-Origin Escalation (optional)
259
+ "cross_origin_detected"?: Bool,
260
+ "cross_origin_type"?: String,
261
+ "cross_origin_score"?: Long, // 0-100
262
+
263
+ // Security - Encoded Injection (optional)
264
+ "encoded_content_detected"?: Bool,
265
+ "encoded_types"?: Set<String>,
266
+ "encoded_count"?: Long,
267
+ "encoded_score"?: Long, // 0-100
268
+
269
+ // Agentic - Agent Security (optional)
270
+ "tool_poisoning_detected"?: Bool,
271
+ "tool_poisoning_score"?: Long, // 0-100
272
+ "tool_poisoning_type"?: String, // "hidden_instructions" | "system_prompt_injection" | "authority_hijack"
273
+ "rug_pull_detected"?: Bool,
274
+ "rug_pull_score"?: Long, // 0-100
275
+ "rug_pull_type"?: String, // "risk_spike" | "pattern_change" | "combined" | "none"
276
+
277
+ // Agentic - MCP Risk (optional)
278
+ "mcp_config_risk"?: Bool,
279
+ "mcp_risk_type"?: String, // "inline_execution" | "suspicious_url" | "cross_origin"
280
+ "mcp_risk_score"?: Long, // 0-100
281
+
282
+ // Agentic - Multi-Turn Context (optional)
283
+ "conversation_turn"?: Long,
284
+ "multi_turn_detection"?: Bool,
285
+
286
+ // Session Detection History — cross-turn sticky flags (optional)
287
+ "session_pii_detected"?: Bool,
288
+ "session_pii_types"?: Set<String>,
289
+ "session_secrets_detected"?: Bool,
290
+ "session_secret_types"?: Set<String>,
291
+ "session_injection_detected"?: Bool,
292
+ "session_command_injection"?: Bool,
293
+ "session_threat_turns"?: Long,
294
+
199
295
  };
200
296
 
201
297
  /// Context for read_file action
@@ -210,6 +306,20 @@ namespace Guardrails {
210
306
  "pii_detected"?: Bool,
211
307
  "pii_types"?: Set<String>,
212
308
 
309
+ // Security - Path Traversal (optional)
310
+ "path_traversal_detected"?: Bool,
311
+ "path_traversal_severity"?: String,
312
+ "path_traversal_type"?: String,
313
+
314
+ // Session Detection History — cross-turn sticky flags (optional)
315
+ "session_pii_detected"?: Bool,
316
+ "session_pii_types"?: Set<String>,
317
+ "session_secrets_detected"?: Bool,
318
+ "session_secret_types"?: Set<String>,
319
+ "session_injection_detected"?: Bool,
320
+ "session_command_injection"?: Bool,
321
+ "session_threat_turns"?: Long,
322
+
213
323
  };
214
324
 
215
325
  /// Context for write_file action
@@ -224,6 +334,20 @@ namespace Guardrails {
224
334
  "pii_detected"?: Bool,
225
335
  "pii_types"?: Set<String>,
226
336
 
337
+ // Security - Path Traversal (optional)
338
+ "path_traversal_detected"?: Bool,
339
+ "path_traversal_severity"?: String,
340
+ "path_traversal_type"?: String,
341
+
342
+ // Session Detection History — cross-turn sticky flags (optional)
343
+ "session_pii_detected"?: Bool,
344
+ "session_pii_types"?: Set<String>,
345
+ "session_secrets_detected"?: Bool,
346
+ "session_secret_types"?: Set<String>,
347
+ "session_injection_detected"?: Bool,
348
+ "session_command_injection"?: Bool,
349
+ "session_threat_turns"?: Long,
350
+
227
351
  };
228
352
 
229
353
  /// Context for connect_server action (MCP server connections)
@@ -236,6 +360,30 @@ namespace Guardrails {
236
360
  "mcp_server"?: String, // MCP server name (e.g., "github", "filesystem")
237
361
  "mcp_server_verified"?: Bool, // Whether server is from verified registry
238
362
 
363
+ // Agentic - Agent Security (optional)
364
+ "tool_poisoning_detected"?: Bool,
365
+ "tool_poisoning_score"?: Long,
366
+ "tool_poisoning_type"?: String,
367
+
368
+ // Agentic - MCP Risk (optional)
369
+ "mcp_config_risk"?: Bool,
370
+ "mcp_risk_type"?: String,
371
+ "mcp_risk_score"?: Long,
372
+
373
+ // Security - Cross-Origin Escalation (optional)
374
+ "cross_origin_detected"?: Bool,
375
+ "cross_origin_type"?: String,
376
+ "cross_origin_score"?: Long,
377
+
378
+ // Session Detection History — cross-turn sticky flags (optional)
379
+ "session_pii_detected"?: Bool,
380
+ "session_pii_types"?: Set<String>,
381
+ "session_secrets_detected"?: Bool,
382
+ "session_secret_types"?: Set<String>,
383
+ "session_injection_detected"?: Bool,
384
+ "session_command_injection"?: Bool,
385
+ "session_threat_turns"?: Long,
386
+
239
387
  };
240
388
  }
241
389
  `;
@@ -244,213 +392,288 @@ namespace Guardrails {
244
392
  *
245
393
  * Full Cedar schema for overwatch, embedded at codegen time.
246
394
  */
247
- export const OVERWATCH_SCHEMA = `// Overwatch (Guardian) Cedar Schema
395
+ export const OVERWATCH_SCHEMA = `// Overwatch Cedar Schema
248
396
  // ===================================
249
- // IDE Security & Policy Enforcement
397
+ // IDE Agent Security & Policy Enforcement
250
398
  //
251
- // Overwatch protects IDE operations (prompts, tool calls, file access) by evaluating
252
- // threats detected by YARA and Javelin scanners against Cedar policies.
399
+ // Overwatch protects IDE agent operations (prompts, tool calls, file access, MCP connections)
400
+ // by evaluating threats detected by the detection engine pipeline against Cedar policies.
253
401
  //
254
402
  // Architecture:
255
- // User/Agent → IDE Hook → YARA/Javelin → Cedar Policy → Allow/Deny
403
+ // User/Agent → IDE Hook → Detection Engine → Cedar Policy → Allow/Deny
256
404
  //
257
405
  // Supported IDEs:
258
406
  // - Cursor (beforeSubmitPrompt, beforeShellExecution, beforeMCPExecution, etc.)
259
407
  // - Claude Code (UserPromptSubmit, PreToolUse)
260
408
  // - GitHub Copilot (userPromptSubmitted, preToolUse)
409
+ //
410
+ // Threat Coverage:
411
+ // - OWASP Top 10 for LLM Applications 2025 (LLM01-LLM10)
412
+ // - OWASP Top 10 for Agentic Applications (ASI01-ASI10)
413
+ // - OWASP MCP Top 10 (MCP01-MCP05)
414
+ // - MITRE ATLAS Agent Techniques (AML.T0051, AML.T0080-T0082)
261
415
 
262
416
  namespace Overwatch {
263
417
 
264
418
  // =============================================================================
265
- // ENTITIES - Organization Hierarchy (ReBAC)
419
+ // ENTITIES - Tenant Hierarchy (ReBAC)
266
420
  // =============================================================================
421
+ // Aligned with Guardrails entity hierarchy (Account → Project).
422
+ // Overwatch does not have app-specific policies, so App is omitted.
423
+ //
424
+ // Entity hierarchy enables Cedar's \`in\` operator for policy scoping:
425
+ // Account (org root)
426
+ // └── Project in [Account]
427
+ // └── Tool/Server/FilePath/LlmPrompt in [Project]
428
+ //
429
+ // Policy scoping examples:
430
+ // resource == Overwatch::Tool::"shell" → specific tool
431
+ // resource in Overwatch::Project::"<uuid>" → project-wide
432
+ // resource in Overwatch::Account::"<uuid>" → org-wide
267
433
 
268
- // Top-level organization for multi-tenant policy enforcement
269
- // Enables policies like: principal in Overwatch::Organization::"acme-corp"
270
- entity Organization {
271
- name: String, // "Acme Corp", "Highflame"
272
- };
434
+ /// Account represents an organization (top-level tenant)
435
+ entity Account;
273
436
 
274
- // Team within an organization
275
- // Enables policies like: principal in Overwatch::Team::"security-team"
276
- entity Team in [Organization] {
277
- name: String, // "security", "engineering", "devops"
278
- };
437
+ /// Project represents a project within an account
438
+ entity Project in [Account];
279
439
 
280
440
  // =============================================================================
281
441
  // ENTITIES - Principals
282
442
  // =============================================================================
283
443
 
284
- // Human user or service account making requests to the IDE
285
- entity User in [Team] {
286
- user_type: String, // "external" or "internal"
287
- email: String, // User email (optional)
288
- };
444
+ /// Human user or service account making requests to the IDE
445
+ entity User;
289
446
 
290
- // AI agent (Claude, GitHub Copilot, etc.)
291
- entity Agent in [Team] {
292
- agent_type: String, // "claude", "copilot", etc.
293
- };
447
+ /// AI agent (Claude, GitHub Copilot, etc.)
448
+ entity Agent;
294
449
 
295
- // LLM prompt or session
296
- entity LlmPrompt {
297
- prompt_type: String, // "user_prompt", "session"
298
- };
450
+ // =============================================================================
451
+ // ENTITIES - Resources (scoped under Project)
452
+ // =============================================================================
299
453
 
300
- // MCP tool or native IDE tool
301
- entity Tool {
302
- tool_name: String, // "shell", "read_file", "playwright", etc.
303
- risk_level: String, // "low", "medium", "high"
304
- };
454
+ /// LLM prompt or session resource for process_prompt action
455
+ entity LlmPrompt in [Project];
305
456
 
306
- // MCP server
307
- entity Server {
308
- server_name: String, // "filesystem", "playwright", etc.
309
- };
457
+ /// MCP tool or native IDE tool — resource for call_tool action
458
+ entity Tool in [Project];
310
459
 
311
- // File system path
312
- entity FilePath {
313
- path: String,
314
- is_within_workspace: Bool,
315
- };
460
+ /// MCP server — resource for connect_server action
461
+ entity Server in [Project];
462
+
463
+ /// File system path — resource for read_file/write_file/call_tool actions
464
+ entity FilePath in [Project];
316
465
 
317
466
  // =============================================================================
318
467
  // ACTIONS
319
468
  // =============================================================================
320
469
 
321
470
  // User submits a prompt or receives AI response
471
+ // Threat focus: injection, jailbreak, secrets, PII, content safety, invisible chars
322
472
  action process_prompt appliesTo {
323
473
  principal: [User, Agent],
324
474
  resource: [LlmPrompt],
325
475
  context: {
326
- // Event & Source
327
- content: String, // Raw content being scanned
328
- source: String, // IDE source: "cursor", "claudecode", "github_copilot"
329
- event: String, // Hook event name
330
- user_email: String, // User identifier
331
-
332
- // Workspace
333
- cwd?: String, // Current working directory
334
- workspace_root?: String, // Workspace/repository root
335
-
336
- // Threat Detection
337
- threat_count: Long, // Total threats detected
338
- highest_severity: String, // "critical", "high", "medium", "low"
339
- threat_categories: Set<String>, // Threat category names
340
- yara_threats: Set<String>, // YARA rule names
341
- max_threat_severity: Long, // Numeric severity (0-4)
342
- contains_secrets: Bool, // Whether secrets detected
343
- prompt_text?: String, // Same as content (legacy)
344
- response_content?: String, // Response content (if available)
345
-
346
- // Trust/Safety Scores (0-100, from Javelin/Lakera/LlamaGuard classifiers)
347
- // Required: content safety classifiers always run for prompt processing
348
- violence_score: Long, // Violence content detection score
349
- weapons_score: Long, // Weapons content detection score
350
- hate_speech_score: Long, // Hate speech detection score
351
- crime_score: Long, // Criminal content detection score
352
- sexual_score: Long, // Sexual content detection score
353
- profanity_score: Long, // Profanity detection score
354
-
355
- // Detector Confidence Scores (0-100, ML classifier confidence)
356
- // Required: ML classifiers always run for prompt processing
357
- pii_confidence: Long, // PII detection confidence
358
- injection_confidence: Long, // Prompt injection confidence
359
- jailbreak_confidence: Long, // Jailbreak detection confidence
360
-
361
- // Agent Security (0-100)
362
- // Required: agent security scanners always run for prompt processing
363
- indirect_injection_score: Long, // Indirect prompt injection risk
476
+ // --- Event & Source ---
477
+ content: String, // Raw content being scanned
478
+ source: String, // IDE source: "cursor", "claudecode", "github_copilot"
479
+ event: String, // Hook event name
480
+ user_email: String, // User identifier
481
+
482
+ // --- Workspace ---
483
+ cwd?: String, // Current working directory
484
+ workspace_root?: String, // Workspace/repository root
485
+
486
+ // --- Threat Detection (from detection engine pipeline) ---
487
+ threat_count: Long, // Total threats detected
488
+ highest_severity: String, // "critical", "high", "medium", "low", "none"
489
+ threat_categories: Set<String>, // Threat category names
490
+ detected_threats: Set<String>, // Detection rule names that matched
491
+ max_threat_severity: Long, // Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)
492
+ contains_secrets: Bool, // Whether secrets/credentials detected
493
+
494
+ // --- Secrets (granular) ---
495
+ secret_types?: Set<String>, // Types: "aws_access_key", "github_token", "ssh_private_key", etc.
496
+ secret_count?: Long, // Number of distinct secrets found
497
+
498
+ // --- PII Detection ---
499
+ pii_detected?: Bool, // Whether any PII patterns matched
500
+ pii_types?: Set<String>, // Types: "ssn", "credit_card", "email", "phone", etc.
501
+ pii_count?: Long, // Number of PII matches
502
+
503
+ // --- Encoding & Unicode Attacks ---
504
+ contains_invisible_chars?: Bool, // Zero-width chars, bidi overrides, tag chars detected
505
+ invisible_chars_score?: Long, // Unicode attack severity (0-100)
506
+
507
+ // --- Content Safety Scores (0-100, from ML classifiers) ---
508
+ violence_score: Long,
509
+ weapons_score: Long,
510
+ hate_speech_score: Long,
511
+ crime_score: Long,
512
+ sexual_score: Long,
513
+ profanity_score: Long,
514
+
515
+ // --- ML Detector Confidence Scores (0-100) ---
516
+ pii_confidence: Long, // PII detection classifier confidence
517
+ injection_confidence: Long, // Prompt injection classifier confidence
518
+ jailbreak_confidence: Long, // Jailbreak detection classifier confidence
519
+
520
+ // --- Agent Security (0-100) ---
521
+ indirect_injection_score: Long, // Indirect prompt injection risk (OWASP LLM01, ASI01)
522
+
523
+ // --- Session Detection History (cross-turn sticky flags) ---
524
+ session_pii_detected?: Bool,
525
+ session_pii_types?: Set<String>,
526
+ session_secrets_detected?: Bool,
527
+ session_secret_types?: Set<String>,
528
+ session_injection_detected?: Bool,
529
+ session_command_injection?: Bool,
530
+ session_threat_turns?: Long,
531
+
532
+ // --- Legacy ---
533
+ prompt_text?: String, // Same as content (backward compatibility)
534
+ response_content?: String, // Response content (if available)
364
535
  },
365
536
  };
366
537
 
367
538
  // User calls a tool (native IDE tool or MCP tool)
539
+ // Threat focus: command injection, tool poisoning, rug pull, data exfiltration, loops
368
540
  action call_tool appliesTo {
369
541
  principal: [User, Agent],
370
542
  resource: [Tool, FilePath],
371
543
  context: {
372
- // Event & Source
373
- content: String, // Raw content being scanned (e.g., shell command)
374
- source: String, // IDE source
375
- event: String, // Hook event name
376
- user_email: String, // User identifier
544
+ // --- Event & Source ---
545
+ content: String, // Raw content being scanned (e.g., shell command, tool args)
546
+ source: String, // IDE source
547
+ event: String, // Hook event name
548
+ user_email: String, // User identifier
377
549
 
378
- // Tool & MCP
379
- tool_name?: String, // Normalized tool name ("shell", "read_file", etc.)
380
- mcp_server?: String, // MCP server name
381
- mcp_tool?: String, // MCP tool name
550
+ // --- Tool & MCP ---
551
+ tool_name?: String, // Normalized tool name ("shell", "read_file", etc.)
552
+ mcp_server?: String, // MCP server name
553
+ mcp_tool?: String, // MCP tool name
382
554
 
383
- // File & Path
384
- path?: String, // File path (if file operation)
555
+ // --- File & Path ---
556
+ path?: String, // File path (if file operation)
385
557
 
386
- // Workspace
558
+ // --- Workspace ---
387
559
  cwd?: String,
388
560
  workspace_root?: String,
389
561
 
390
- // Threat Detection (optional: scanning may not have run before tool call)
562
+ // --- Threat Detection ---
391
563
  threat_count?: Long,
392
564
  highest_severity?: String,
393
565
  threat_categories?: Set<String>,
394
- yara_threats?: Set<String>,
566
+ detected_threats?: Set<String>,
395
567
  max_threat_severity?: Long,
396
568
  contains_secrets?: Bool,
397
- response_content?: String,
398
569
 
399
- // Trust/Safety Scores (0-100, from Javelin/Lakera/LlamaGuard classifiers)
400
- // Optional: only present when trust/safety classifiers have run
401
- violence_score?: Long, // Violence content detection score
402
- weapons_score?: Long, // Weapons content detection score
403
- hate_speech_score?: Long, // Hate speech detection score
404
- crime_score?: Long, // Criminal content detection score
405
- sexual_score?: Long, // Sexual content detection score
406
- profanity_score?: Long, // Profanity detection score
407
-
408
- // Detector Confidence Scores (0-100, ML classifier confidence)
409
- // Optional: only present when ML classifiers have run
410
- pii_confidence?: Long, // PII detection confidence
411
- injection_confidence?: Long, // Prompt injection confidence
412
- jailbreak_confidence?: Long, // Jailbreak detection confidence
413
-
414
- // Agent Security (0-100)
415
- // Optional: only present when agent security scanners have run
416
- tool_poisoning_score?: Long, // Tool description manipulation risk
417
- rug_pull_score?: Long, // Tool behavior mismatch risk
418
- indirect_injection_score?: Long, // Indirect prompt injection risk
419
-
420
- // MCP Trust
421
- // Optional: only present when MCP server verification has run
422
- mcp_server_verified?: Bool, // Whether server is from verified registry
570
+ // --- Secrets (granular) ---
571
+ secret_types?: Set<String>,
572
+ secret_count?: Long,
573
+
574
+ // --- PII Detection ---
575
+ pii_detected?: Bool,
576
+ pii_types?: Set<String>,
577
+ pii_count?: Long,
578
+
579
+ // --- Encoding & Unicode Attacks ---
580
+ contains_invisible_chars?: Bool,
581
+ invisible_chars_score?: Long,
582
+
583
+ // --- Content Safety Scores (0-100) ---
584
+ violence_score?: Long,
585
+ weapons_score?: Long,
586
+ hate_speech_score?: Long,
587
+ crime_score?: Long,
588
+ sexual_score?: Long,
589
+ profanity_score?: Long,
590
+
591
+ // --- ML Detector Confidence Scores (0-100) ---
592
+ pii_confidence?: Long,
593
+ injection_confidence?: Long,
594
+ jailbreak_confidence?: Long,
595
+
596
+ // --- Agent Security (0-100) --- (OWASP ASI01, ASI02, ASI04; MITRE AML.T0051)
597
+ tool_poisoning_score?: Long, // Hidden instructions in tool description/args
598
+ tool_poisoning_detected?: Bool, // Boolean flag for tool poisoning
599
+ rug_pull_score?: Long, // Tool behavior drift after trust establishment
600
+ rug_pull_detected?: Bool, // Boolean flag for rug pull
601
+ indirect_injection_score?: Long, // Indirect injection via tool output
602
+
603
+ // --- Tool Risk Assessment ---
604
+ tool_risk_score?: Long, // Computed tool risk (0-100)
605
+ tool_category?: String, // "safe", "sensitive", "dangerous"
606
+ tool_is_sensitive?: Bool, // Sensitivity classification
607
+ tool_is_builtin?: Bool, // Built-in IDE tool vs MCP tool
608
+
609
+ // --- Behavioral Analysis --- (OWASP LLM10, ASI02, ASI08)
610
+ loop_detected?: Bool, // Consecutive same-tool call loop
611
+ loop_count?: Long, // Number of consecutive repeat calls
612
+ loop_tool?: String, // Tool name in loop
613
+ suspicious_pattern?: Bool, // Data exfiltration or attack sequence detected
614
+ pattern_type?: String, // "data_exfiltration", "secret_exfiltration", "credential_theft", "destructive_sequence"
615
+ sequence_risk?: Long, // Sequence risk score (0-100)
616
+
617
+ // --- MCP Trust ---
618
+ mcp_server_verified?: Bool, // Whether server is from verified registry
619
+
620
+ // --- Session Detection History (cross-turn sticky flags) ---
621
+ session_pii_detected?: Bool,
622
+ session_pii_types?: Set<String>,
623
+ session_secrets_detected?: Bool,
624
+ session_secret_types?: Set<String>,
625
+ session_injection_detected?: Bool,
626
+ session_command_injection?: Bool,
627
+ session_threat_turns?: Long,
628
+
629
+ // --- Legacy ---
630
+ response_content?: String,
423
631
  },
424
632
  };
425
633
 
426
634
  // Connect to an MCP server
635
+ // Threat focus: supply chain, tool poisoning, rug pull, config risk
427
636
  action connect_server appliesTo {
428
637
  principal: [User, Agent],
429
638
  resource: [Server],
430
639
  context: {
431
- content?: String, // No content to scan when connecting
640
+ content?: String, // Server config content (if available)
432
641
  source: String,
433
642
  event: String,
434
643
  user_email: String,
435
644
  mcp_server?: String,
436
- threat_count?: Long, // Threat scanning may not run for connections
645
+
646
+ // --- Threat Detection ---
647
+ threat_count?: Long,
437
648
  highest_severity?: String,
438
649
  threat_categories?: Set<String>,
439
650
  max_threat_severity?: Long,
440
651
 
441
- // Agent Security (0-100)
442
- // Optional: only present when agent security scanners have run
443
- tool_poisoning_score?: Long, // Tool description manipulation risk
444
- rug_pull_score?: Long, // Tool behavior mismatch risk
445
- indirect_injection_score?: Long, // Indirect prompt injection risk
446
-
447
- // MCP Trust
448
- // Optional: only present when MCP server verification has run
449
- mcp_server_verified?: Bool, // Whether server is from verified registry
652
+ // --- Agent Security (0-100) --- (OWASP ASI04, MCP01-MCP05)
653
+ tool_poisoning_score?: Long, // Poisoned tool descriptions in server
654
+ tool_poisoning_detected?: Bool,
655
+ rug_pull_score?: Long, // Server behavior change after approval
656
+ rug_pull_detected?: Bool,
657
+ indirect_injection_score?: Long, // Injection payloads in server responses
658
+
659
+ // --- MCP Trust & Config Risk ---
660
+ mcp_server_verified?: Bool, // Verified registry status
661
+ mcp_config_risk?: Bool, // Risky server config detected (inline code exec, etc.)
662
+ mcp_risk_score?: Long, // Config risk severity (0-100)
663
+
664
+ // --- Session Detection History (cross-turn sticky flags) ---
665
+ session_pii_detected?: Bool,
666
+ session_pii_types?: Set<String>,
667
+ session_secrets_detected?: Bool,
668
+ session_secret_types?: Set<String>,
669
+ session_injection_detected?: Bool,
670
+ session_command_injection?: Bool,
671
+ session_threat_turns?: Long,
450
672
  },
451
673
  };
452
674
 
453
675
  // Read a file from disk
676
+ // Threat focus: secrets exposure, PII exposure, path traversal, sensitive paths
454
677
  action read_file appliesTo {
455
678
  principal: [User, Agent],
456
679
  resource: [FilePath],
@@ -462,15 +685,37 @@ action read_file appliesTo {
462
685
  path?: String,
463
686
  cwd?: String,
464
687
  workspace_root?: String,
465
- threat_count?: Long, // Threat scanning may not have run
688
+
689
+ // --- Threat Detection ---
690
+ threat_count?: Long,
466
691
  highest_severity?: String,
467
692
  threat_categories?: Set<String>,
693
+ detected_threats?: Set<String>,
468
694
  max_threat_severity?: Long,
469
695
  contains_secrets?: Bool,
696
+
697
+ // --- Secrets (granular) ---
698
+ secret_types?: Set<String>,
699
+ secret_count?: Long,
700
+
701
+ // --- PII Detection ---
702
+ pii_detected?: Bool,
703
+ pii_types?: Set<String>,
704
+ pii_count?: Long,
705
+
706
+ // --- Session Detection History (cross-turn sticky flags) ---
707
+ session_pii_detected?: Bool,
708
+ session_pii_types?: Set<String>,
709
+ session_secrets_detected?: Bool,
710
+ session_secret_types?: Set<String>,
711
+ session_injection_detected?: Bool,
712
+ session_command_injection?: Bool,
713
+ session_threat_turns?: Long,
470
714
  },
471
715
  };
472
716
 
473
717
  // Write a file to disk
718
+ // Threat focus: secrets in output, PII in output, sensitive paths, malicious code
474
719
  action write_file appliesTo {
475
720
  principal: [User, Agent],
476
721
  resource: [FilePath],
@@ -482,11 +727,32 @@ action write_file appliesTo {
482
727
  path?: String,
483
728
  cwd?: String,
484
729
  workspace_root?: String,
485
- threat_count?: Long, // Threat scanning may not have run
730
+
731
+ // --- Threat Detection ---
732
+ threat_count?: Long,
486
733
  highest_severity?: String,
487
734
  threat_categories?: Set<String>,
735
+ detected_threats?: Set<String>,
488
736
  max_threat_severity?: Long,
489
737
  contains_secrets?: Bool,
738
+
739
+ // --- Secrets (granular) ---
740
+ secret_types?: Set<String>,
741
+ secret_count?: Long,
742
+
743
+ // --- PII Detection ---
744
+ pii_detected?: Bool,
745
+ pii_types?: Set<String>,
746
+ pii_count?: Long,
747
+
748
+ // --- Session Detection History (cross-turn sticky flags) ---
749
+ session_pii_detected?: Bool,
750
+ session_pii_types?: Set<String>,
751
+ session_secrets_detected?: Bool,
752
+ session_secret_types?: Set<String>,
753
+ session_injection_detected?: Bool,
754
+ session_command_injection?: Bool,
755
+ session_threat_turns?: Long,
490
756
  },
491
757
  };
492
758
 
@@ -701,7 +967,44 @@ export const GUARDRAILS_CONTEXT = {
701
967
  { "key": "content_topics", "type": "array", "required": false, "description": "Semantic topics detected in content (e.g., [\'controlled_substances\', \'weapons_manufacturing\']). Use .contains() to block specific topics per application" },
702
968
  { "key": "topic_confidence", "type": "number", "required": false, "description": "Confidence score from topic classifier (0-100). Use with content_topics to tune sensitivity — higher thresholds reduce false positives" },
703
969
  { "key": "contains_invisible_chars", "type": "boolean", "required": false, "description": "Whether invisible Unicode characters (zero-width joiners, RTL marks, etc.) were detected in the content. Commonly used for prompt injection evasion" },
704
- { "key": "invisible_chars_score", "type": "number", "required": false, "description": "Density score for invisible characters in the content (0-100). Higher scores indicate more invisible characters, suggesting evasion attempts" }
970
+ { "key": "invisible_chars_score", "type": "number", "required": false, "description": "Density score for invisible characters in the content (0-100). Higher scores indicate more invisible characters, suggesting evasion attempts" },
971
+ { "key": "command_injection_detected", "type": "boolean", "required": false, "description": "Whether command injection patterns were detected in the content (reverse shells, privilege escalation, destructive commands)" },
972
+ { "key": "command_injection_type", "type": "string", "required": false, "description": "Type of command injection: \'reverse_shell\', \'privilege_escalation\', \'code_execution\', \'destructive_command\', or \'data_exfiltration\'" },
973
+ { "key": "command_injection_score", "type": "number", "required": false, "description": "Confidence score for command injection detection (0-100). Higher scores indicate stronger pattern matches" },
974
+ { "key": "path_traversal_detected", "type": "boolean", "required": false, "description": "Whether path traversal patterns were detected (e.g., ../../../etc/passwd, URL-encoded variants)" },
975
+ { "key": "path_traversal_severity", "type": "string", "required": false, "description": "Severity of path traversal: \'critical\' (sensitive system files), \'high\' (deep traversal), \'medium\' (moderate traversal), \'low\', or \'none\'" },
976
+ { "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected (e.g., \'sensitive_file\', \'deep_traversal\', \'encoded_traversal\')" },
977
+ { "key": "sql_injection_detected", "type": "boolean", "required": false, "description": "Whether SQL injection patterns were detected (tautologies, UNION-based, destructive queries)" },
978
+ { "key": "sql_injection_type", "type": "string", "required": false, "description": "Type of SQL injection: \'tautology\', \'union_based\', \'destructive\', \'blind\', or \'error_based\'" },
979
+ { "key": "sql_injection_score", "type": "number", "required": false, "description": "Confidence score for SQL injection detection (0-100). Typical threshold: >=75 for blocks" },
980
+ { "key": "cross_origin_detected", "type": "boolean", "required": false, "description": "Whether cross-origin escalation patterns were detected (tool calls or references crossing trust boundaries)" },
981
+ { "key": "cross_origin_type", "type": "string", "required": false, "description": "Type of cross-origin escalation: \'cross_origin_tool\', \'cross_origin_server\', or \'none\'" },
982
+ { "key": "cross_origin_score", "type": "number", "required": false, "description": "Risk score for cross-origin escalation (0-100). Higher scores indicate more suspicious cross-boundary activity" },
983
+ { "key": "encoded_content_detected", "type": "boolean", "required": false, "description": "Whether encoded content was detected (base64, hex, unicode, URL encoding) that may hide malicious payloads" },
984
+ { "key": "encoded_types", "type": "array", "required": false, "description": "Array of encoding types detected (e.g., [\'base64\', \'hex\', \'unicode\', \'url\']). Use .contains() to check for specific encoding types" },
985
+ { "key": "encoded_count", "type": "number", "required": false, "description": "Number of encoded segments found in the content" },
986
+ { "key": "encoded_score", "type": "number", "required": false, "description": "Risk score for encoded injection attempts (0-100). Considers encoding density and decoded content patterns" },
987
+ { "key": "detected_language", "type": "string", "required": false, "description": "ISO language code of the detected language (e.g., \'en\', \'fr\', \'zh\'). Use this for language-specific policy enforcement" },
988
+ { "key": "is_english", "type": "boolean", "required": false, "description": "Whether the content is in English. Useful for applying English-only policies or flagging non-English content" },
989
+ { "key": "language_confidence", "type": "number", "required": false, "description": "Confidence score for language detection (0-100). Use with detected_language to tune sensitivity" },
990
+ { "key": "detected_script", "type": "string", "required": false, "description": "Writing script detected in content: \'latin\', \'cyrillic\', \'arabic\', \'cjk\', \'unknown\', etc. Use to detect script-switching attacks" },
991
+ { "key": "is_latin_script", "type": "boolean", "required": false, "description": "Whether the content uses Latin script. Useful for detecting non-Latin script injection attempts" },
992
+ { "key": "script_confidence", "type": "number", "required": false, "description": "Confidence score for script detection (0-100)" },
993
+ { "key": "hallucination_score", "type": "number", "required": false, "description": "ML-based score for hallucinated or fabricated content (0-100). Higher scores indicate higher likelihood of non-factual claims" },
994
+ { "key": "factuality_score", "type": "number", "required": false, "description": "ML-based factuality assessment score (0-100). Higher scores indicate more factually grounded content" },
995
+ { "key": "sentiment_score", "type": "number", "required": false, "description": "Sentiment analysis score. Use to detect overly negative or manipulative tone in prompts or responses" },
996
+ { "key": "contains_code", "type": "boolean", "required": false, "description": "Whether code snippets were detected in the content" },
997
+ { "key": "code_languages", "type": "array", "required": false, "description": "Array of programming languages detected in code snippets (e.g., [\'python\', \'javascript\'])" },
998
+ { "key": "code_ratio", "type": "number", "required": false, "description": "Percentage of content that consists of code (0-100). High values may indicate code dumps or automated content" },
999
+ { "key": "keyword_matched", "type": "boolean", "required": false, "description": "Whether any configured keywords or phrases were matched in the content" },
1000
+ { "key": "keyword_categories", "type": "array", "required": false, "description": "Array of keyword categories matched (e.g., [\'competitor_names\', \'restricted_topics\']). Use .contains() for category-specific policies" },
1001
+ { "key": "keyword_count", "type": "number", "required": false, "description": "Number of keyword matches found in the content" },
1002
+ { "key": "contains_non_ascii", "type": "boolean", "required": false, "description": "Whether non-ASCII characters were detected. May indicate obfuscation or encoding-based evasion attempts" },
1003
+ { "key": "phishing_detected", "type": "boolean", "required": false, "description": "Whether phishing patterns were detected in the content (suspicious URLs, impersonation, credential requests)" },
1004
+ { "key": "content_safety_score", "type": "number", "required": false, "description": "Aggregate content safety score (0-100). Combines multiple safety signals into a single risk indicator" },
1005
+ { "key": "content_safety_blocked", "type": "boolean", "required": false, "description": "Whether content was flagged for blocking by the content safety system" },
1006
+ { "key": "conversation_turn", "type": "number", "required": false, "description": "Current conversation turn number. Use for policies that escalate strictness over long conversations" },
1007
+ { "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected (attack spread across multiple conversation turns)" }
705
1008
  ]
706
1009
  },
707
1010
  {
@@ -731,7 +1034,35 @@ export const GUARDRAILS_CONTEXT = {
731
1034
  { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in tool arguments or content" },
732
1035
  { "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in tool arguments" },
733
1036
  { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in tool arguments or content" },
734
- { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in tool arguments" }
1037
+ { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in tool arguments" },
1038
+ { "key": "injection_score", "type": "number", "required": false, "description": "ML-based confidence score for prompt injection in tool arguments (0-100)" },
1039
+ { "key": "command_injection_detected", "type": "boolean", "required": false, "description": "Whether command injection patterns were detected in tool arguments" },
1040
+ { "key": "command_injection_type", "type": "string", "required": false, "description": "Type of command injection detected in tool arguments" },
1041
+ { "key": "command_injection_score", "type": "number", "required": false, "description": "Confidence score for command injection in tool arguments (0-100)" },
1042
+ { "key": "path_traversal_detected", "type": "boolean", "required": false, "description": "Whether path traversal patterns were detected in tool arguments" },
1043
+ { "key": "path_traversal_severity", "type": "string", "required": false, "description": "Severity of path traversal in tool arguments" },
1044
+ { "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in tool arguments" },
1045
+ { "key": "sql_injection_detected", "type": "boolean", "required": false, "description": "Whether SQL injection patterns were detected in tool arguments" },
1046
+ { "key": "sql_injection_type", "type": "string", "required": false, "description": "Type of SQL injection detected in tool arguments" },
1047
+ { "key": "sql_injection_score", "type": "number", "required": false, "description": "Confidence score for SQL injection in tool arguments (0-100)" },
1048
+ { "key": "tool_poisoning_detected", "type": "boolean", "required": false, "description": "Whether tool poisoning was detected (hidden instructions in tool descriptions or arguments designed to manipulate agent behavior)" },
1049
+ { "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Confidence score for tool poisoning detection (0-100). Typical threshold: >=70 for blocks" },
1050
+ { "key": "tool_poisoning_type", "type": "string", "required": false, "description": "Type of tool poisoning: \'hidden_instructions\', \'system_prompt_injection\', or \'authority_hijack\'" },
1051
+ { "key": "rug_pull_detected", "type": "boolean", "required": false, "description": "Whether a rug pull was detected (tool behavior changed significantly after establishing trust)" },
1052
+ { "key": "rug_pull_score", "type": "number", "required": false, "description": "Confidence score for rug pull detection based on behavioral drift analysis (0-100)" },
1053
+ { "key": "mcp_config_risk", "type": "boolean", "required": false, "description": "Whether MCP configuration risk was detected (inline code execution, suspicious URLs, cross-origin issues)" },
1054
+ { "key": "mcp_risk_type", "type": "string", "required": false, "description": "Type of MCP risk: \'inline_execution\', \'suspicious_url\', or \'cross_origin\'" },
1055
+ { "key": "mcp_risk_score", "type": "number", "required": false, "description": "Risk score for MCP configuration issues (0-100). Typical threshold: >=70 for blocks" },
1056
+ { "key": "cross_origin_detected", "type": "boolean", "required": false, "description": "Whether cross-origin escalation patterns were detected in tool calls crossing trust boundaries" },
1057
+ { "key": "cross_origin_type", "type": "string", "required": false, "description": "Type of cross-origin escalation detected in tool call" },
1058
+ { "key": "cross_origin_score", "type": "number", "required": false, "description": "Risk score for cross-origin escalation in tool calls (0-100)" },
1059
+ { "key": "encoded_content_detected", "type": "boolean", "required": false, "description": "Whether encoded content was detected in tool arguments (base64, hex, unicode encoding hiding payloads)" },
1060
+ { "key": "encoded_types", "type": "array", "required": false, "description": "Array of encoding types detected in tool arguments (e.g., [\'base64\', \'hex\'])" },
1061
+ { "key": "encoded_count", "type": "number", "required": false, "description": "Number of encoded segments found in tool arguments" },
1062
+ { "key": "encoded_score", "type": "number", "required": false, "description": "Risk score for encoded injection in tool arguments (0-100)" },
1063
+ { "key": "rug_pull_type", "type": "string", "required": false, "description": "Type of rug pull detected: \'risk_spike\' (sudden risk increase), \'pattern_change\' (behavioral shift), \'combined\', or \'none\'" },
1064
+ { "key": "conversation_turn", "type": "number", "required": false, "description": "Current conversation turn number for the agentic session" },
1065
+ { "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected across tool calls in the session" }
735
1066
  ]
736
1067
  },
737
1068
  {
@@ -743,7 +1074,10 @@ export const GUARDRAILS_CONTEXT = {
743
1074
  { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in the file content being read" },
744
1075
  { "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in file content" },
745
1076
  { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in the file content" },
746
- { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in file content" }
1077
+ { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in file content" },
1078
+ { "key": "path_traversal_detected", "type": "boolean", "required": false, "description": "Whether path traversal patterns were detected in the file path being read" },
1079
+ { "key": "path_traversal_severity", "type": "string", "required": false, "description": "Severity of path traversal in the file read path" },
1080
+ { "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file read path" }
747
1081
  ]
748
1082
  },
749
1083
  {
@@ -755,7 +1089,10 @@ export const GUARDRAILS_CONTEXT = {
755
1089
  { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in the content being written. Block writes containing credentials" },
756
1090
  { "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in write content" },
757
1091
  { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in the content being written" },
758
- { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in write content" }
1092
+ { "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in write content" },
1093
+ { "key": "path_traversal_detected", "type": "boolean", "required": false, "description": "Whether path traversal patterns were detected in the file path being written" },
1094
+ { "key": "path_traversal_severity", "type": "string", "required": false, "description": "Severity of path traversal in the file write path" },
1095
+ { "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file write path" }
759
1096
  ]
760
1097
  },
761
1098
  {
@@ -765,7 +1102,16 @@ export const GUARDRAILS_CONTEXT = {
765
1102
  { "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request" },
766
1103
  { "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds" },
767
1104
  { "key": "mcp_server", "type": "string", "required": false, "description": "Name of the MCP server being connected to (e.g., \'github\', \'filesystem\', \'slack\'). Use this to allow or block specific MCP servers" },
768
- { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether the MCP server is from a verified/trusted registry. Use this to block connections to unverified servers" }
1105
+ { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether the MCP server is from a verified/trusted registry. Use this to block connections to unverified servers" },
1106
+ { "key": "tool_poisoning_detected", "type": "boolean", "required": false, "description": "Whether tool poisoning was detected in the MCP server\'s tool descriptions (hidden instructions designed to manipulate agent behavior)" },
1107
+ { "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Confidence score for tool poisoning in MCP server tools (0-100)" },
1108
+ { "key": "tool_poisoning_type", "type": "string", "required": false, "description": "Type of tool poisoning detected in MCP server: \'hidden_instructions\', \'system_prompt_injection\', or \'authority_hijack\'" },
1109
+ { "key": "mcp_config_risk", "type": "boolean", "required": false, "description": "Whether MCP configuration risk was detected (inline code execution in server configs, suspicious URLs, cross-origin issues)" },
1110
+ { "key": "mcp_risk_type", "type": "string", "required": false, "description": "Type of MCP configuration risk: \'inline_execution\', \'suspicious_url\', or \'cross_origin\'" },
1111
+ { "key": "mcp_risk_score", "type": "number", "required": false, "description": "Risk score for MCP configuration issues (0-100)" },
1112
+ { "key": "cross_origin_detected", "type": "boolean", "required": false, "description": "Whether cross-origin escalation patterns were detected in the MCP server connection" },
1113
+ { "key": "cross_origin_type", "type": "string", "required": false, "description": "Type of cross-origin escalation detected in server connection" },
1114
+ { "key": "cross_origin_score", "type": "number", "required": false, "description": "Risk score for cross-origin escalation in server connection (0-100)" }
769
1115
  ]
770
1116
  }
771
1117
  ]
@@ -775,8 +1121,8 @@ export const GUARDRAILS_CONTEXT = {
775
1121
  */
776
1122
  export const OVERWATCH_CONTEXT = {
777
1123
  "service": "overwatch",
778
- "version": "2.0.0",
779
- "description": "Overwatch (Guardian) IDE security & policy enforcement",
1124
+ "version": "3.0.0",
1125
+ "description": "Overwatch IDE agent security & policy enforcement",
780
1126
  "actions": [
781
1127
  {
782
1128
  "name": "process_prompt",
@@ -788,14 +1134,20 @@ export const OVERWATCH_CONTEXT = {
788
1134
  { "key": "user_email", "type": "string", "required": true, "description": "User identifier (OAuth verified or fallback)" },
789
1135
  { "key": "cwd", "type": "string", "required": false, "description": "Current working directory" },
790
1136
  { "key": "workspace_root", "type": "string", "required": false, "description": "Workspace/repository root path" },
791
- { "key": "threat_count", "type": "number", "required": true, "description": "Total number of threats detected by YARA/Javelin" },
792
- { "key": "highest_severity", "type": "string", "required": true, "description": "Highest severity level: critical, high, medium, low" },
793
- { "key": "threat_categories", "type": "array", "required": true, "description": "Threat category names from aggregator" },
794
- { "key": "threat_types", "type": "array", "required": true, "description": "YARA threat category names" },
795
- { "key": "yara_threats", "type": "array", "required": true, "description": "YARA rule names that matched" },
796
- { "key": "max_threat_severity", "type": "number", "required": true, "description": "Numeric severity (0-4, where 4=CRITICAL)" },
1137
+ { "key": "threat_count", "type": "number", "required": true, "description": "Total number of threats detected by the detection engine pipeline" },
1138
+ { "key": "highest_severity", "type": "string", "required": true, "description": "Highest severity level: critical, high, medium, low, none" },
1139
+ { "key": "threat_categories", "type": "array", "required": true, "description": "Threat category names from the detection aggregator" },
1140
+ { "key": "detected_threats", "type": "array", "required": true, "description": "Detection rule names that matched (e.g., prompt_injection, jailbreak, credit_card, secret_exposure)" },
1141
+ { "key": "max_threat_severity", "type": "number", "required": true, "description": "Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)" },
797
1142
  { "key": "contains_secrets", "type": "boolean", "required": true, "description": "Whether secrets or credentials were detected" },
798
- { "key": "prompt_text", "type": "string", "required": false, "description": "Same as content (legacy field)" },
1143
+ { "key": "secret_types", "type": "array", "required": false, "description": "Specific secret types found: aws_access_key, github_token, ssh_private_key, api_key, etc." },
1144
+ { "key": "secret_count", "type": "number", "required": false, "description": "Number of distinct secrets detected" },
1145
+ { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether any PII patterns were matched" },
1146
+ { "key": "pii_types", "type": "array", "required": false, "description": "Specific PII types found: ssn, credit_card, email, phone, medical_record, etc." },
1147
+ { "key": "pii_count", "type": "number", "required": false, "description": "Number of PII pattern matches" },
1148
+ { "key": "contains_invisible_chars", "type": "boolean", "required": false, "description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag chars) were detected" },
1149
+ { "key": "invisible_chars_score", "type": "number", "required": false, "description": "Invisible character attack severity score (0-100)" },
1150
+ { "key": "prompt_text", "type": "string", "required": false, "description": "Same as content (backward compatibility)" },
799
1151
  { "key": "response_content", "type": "string", "required": false, "description": "Response content from AI (if available)" },
800
1152
  { "key": "violence_score", "type": "number", "required": true, "description": "Violence content detection score (0-100)" },
801
1153
  { "key": "weapons_score", "type": "number", "required": true, "description": "Weapons content detection score (0-100)" },
@@ -803,17 +1155,17 @@ export const OVERWATCH_CONTEXT = {
803
1155
  { "key": "crime_score", "type": "number", "required": true, "description": "Criminal content detection score (0-100)" },
804
1156
  { "key": "sexual_score", "type": "number", "required": true, "description": "Sexual content detection score (0-100)" },
805
1157
  { "key": "profanity_score", "type": "number", "required": true, "description": "Profanity detection score (0-100)" },
806
- { "key": "pii_confidence", "type": "number", "required": true, "description": "PII detection classifier confidence (0-100)" },
807
- { "key": "injection_confidence", "type": "number", "required": true, "description": "Prompt injection classifier confidence (0-100)" },
808
- { "key": "jailbreak_confidence", "type": "number", "required": true, "description": "Jailbreak detection classifier confidence (0-100)" },
809
- { "key": "indirect_injection_score", "type": "number", "required": true, "description": "Indirect prompt injection risk score (0-100)" }
1158
+ { "key": "pii_confidence", "type": "number", "required": true, "description": "PII detection ML classifier confidence (0-100)" },
1159
+ { "key": "injection_confidence", "type": "number", "required": true, "description": "Prompt injection ML classifier confidence (0-100)" },
1160
+ { "key": "jailbreak_confidence", "type": "number", "required": true, "description": "Jailbreak detection ML classifier confidence (0-100)" },
1161
+ { "key": "indirect_injection_score", "type": "number", "required": true, "description": "Indirect prompt injection risk score (0-100) — injection via tool outputs or retrieved content" }
810
1162
  ]
811
1163
  },
812
1164
  {
813
1165
  "name": "call_tool",
814
1166
  "description": "User calls a tool (native IDE tool or MCP tool)",
815
1167
  "context_attributes": [
816
- { "key": "content", "type": "string", "required": true, "description": "Raw content being scanned (e.g., shell command)" },
1168
+ { "key": "content", "type": "string", "required": true, "description": "Raw content being scanned (e.g., shell command, tool arguments)" },
817
1169
  { "key": "source", "type": "string", "required": true, "description": "IDE source: cursor, claudecode, github_copilot" },
818
1170
  { "key": "event", "type": "string", "required": true, "description": "Hook event name (e.g., beforeShellExecution, PreToolUse)" },
819
1171
  { "key": "user_email", "type": "string", "required": true, "description": "User identifier" },
@@ -823,46 +1175,69 @@ export const OVERWATCH_CONTEXT = {
823
1175
  { "key": "path", "type": "string", "required": false, "description": "File path (if file operation)" },
824
1176
  { "key": "cwd", "type": "string", "required": false, "description": "Current working directory" },
825
1177
  { "key": "workspace_root", "type": "string", "required": false, "description": "Workspace/repository root path" },
826
- { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected (if scanning ran)" },
827
- { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity (if scanning ran)" },
828
- { "key": "threat_categories", "type": "array", "required": false, "description": "Threat category names (if scanning ran)" },
829
- { "key": "threat_types", "type": "array", "required": false, "description": "YARA threat categories (if scanning ran)" },
830
- { "key": "yara_threats", "type": "array", "required": false, "description": "YARA rule names (if scanning ran)" },
831
- { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity 0-4 (if scanning ran)" },
832
- { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets detected (if scanning ran)" },
833
- { "key": "response_content", "type": "string", "required": false, "description": "Response content (if available)" },
1178
+ { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected by the detection engine pipeline" },
1179
+ { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level: critical, high, medium, low, none" },
1180
+ { "key": "threat_categories", "type": "array", "required": false, "description": "Threat category names from the detection aggregator" },
1181
+ { "key": "detected_threats", "type": "array", "required": false, "description": "Detection rule names that matched" },
1182
+ { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)" },
1183
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets or credentials were detected" },
1184
+ { "key": "secret_types", "type": "array", "required": false, "description": "Specific secret types found" },
1185
+ { "key": "secret_count", "type": "number", "required": false, "description": "Number of distinct secrets detected" },
1186
+ { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether any PII patterns were matched" },
1187
+ { "key": "pii_types", "type": "array", "required": false, "description": "Specific PII types found" },
1188
+ { "key": "pii_count", "type": "number", "required": false, "description": "Number of PII pattern matches" },
1189
+ { "key": "contains_invisible_chars", "type": "boolean", "required": false, "description": "Whether invisible Unicode characters were detected" },
1190
+ { "key": "invisible_chars_score", "type": "number", "required": false, "description": "Invisible character attack severity score (0-100)" },
1191
+ { "key": "response_content", "type": "string", "required": false, "description": "Response content from AI (if available)" },
834
1192
  { "key": "violence_score", "type": "number", "required": false, "description": "Violence content detection score (0-100)" },
835
1193
  { "key": "weapons_score", "type": "number", "required": false, "description": "Weapons content detection score (0-100)" },
836
1194
  { "key": "hate_speech_score", "type": "number", "required": false, "description": "Hate speech detection score (0-100)" },
837
1195
  { "key": "crime_score", "type": "number", "required": false, "description": "Criminal content detection score (0-100)" },
838
1196
  { "key": "sexual_score", "type": "number", "required": false, "description": "Sexual content detection score (0-100)" },
839
1197
  { "key": "profanity_score", "type": "number", "required": false, "description": "Profanity detection score (0-100)" },
840
- { "key": "pii_confidence", "type": "number", "required": false, "description": "PII detection classifier confidence (0-100)" },
841
- { "key": "injection_confidence", "type": "number", "required": false, "description": "Prompt injection classifier confidence (0-100)" },
842
- { "key": "jailbreak_confidence", "type": "number", "required": false, "description": "Jailbreak detection classifier confidence (0-100)" },
843
- { "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Tool description manipulation risk score (0-100)" },
844
- { "key": "rug_pull_score", "type": "number", "required": false, "description": "Tool behavior mismatch risk score (0-100)" },
845
- { "key": "indirect_injection_score", "type": "number", "required": false, "description": "Indirect prompt injection risk score (0-100)" },
846
- { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether MCP server is from a verified registry" }
1198
+ { "key": "pii_confidence", "type": "number", "required": false, "description": "PII detection ML classifier confidence (0-100)" },
1199
+ { "key": "injection_confidence", "type": "number", "required": false, "description": "Prompt injection ML classifier confidence (0-100)" },
1200
+ { "key": "jailbreak_confidence", "type": "number", "required": false, "description": "Jailbreak detection ML classifier confidence (0-100)" },
1201
+ { "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Tool description manipulation risk score (0-100) — hidden instructions in tool descriptions or arguments" },
1202
+ { "key": "tool_poisoning_detected", "type": "boolean", "required": false, "description": "Whether tool poisoning patterns were explicitly detected" },
1203
+ { "key": "rug_pull_score", "type": "number", "required": false, "description": "Tool behavioral drift score (0-100) — deviation from established tool behavior patterns" },
1204
+ { "key": "rug_pull_detected", "type": "boolean", "required": false, "description": "Whether a rug pull attack was explicitly detected" },
1205
+ { "key": "indirect_injection_score", "type": "number", "required": false, "description": "Indirect prompt injection risk score (0-100) — injection via tool outputs" },
1206
+ { "key": "tool_risk_score", "type": "number", "required": false, "description": "Computed tool risk score (0-100) combining tool type, arguments, and context" },
1207
+ { "key": "tool_category", "type": "string", "required": false, "description": "Tool risk classification: safe, sensitive, dangerous" },
1208
+ { "key": "tool_is_sensitive", "type": "boolean", "required": false, "description": "Whether the tool is classified as sensitive (shell, file write, network)" },
1209
+ { "key": "tool_is_builtin", "type": "boolean", "required": false, "description": "Whether the tool is a built-in IDE tool (true) or MCP tool (false)" },
1210
+ { "key": "loop_detected", "type": "boolean", "required": false, "description": "Whether a consecutive same-tool call loop was detected" },
1211
+ { "key": "loop_count", "type": "number", "required": false, "description": "Number of consecutive calls to the same tool" },
1212
+ { "key": "loop_tool", "type": "string", "required": false, "description": "The tool name involved in the detected loop" },
1213
+ { "key": "suspicious_pattern", "type": "boolean", "required": false, "description": "Whether a suspicious action sequence was detected (exfiltration, theft, destructive)" },
1214
+ { "key": "pattern_type", "type": "string", "required": false, "description": "Type of suspicious pattern: data_exfiltration, secret_exfiltration, credential_theft, destructive_sequence" },
1215
+ { "key": "sequence_risk", "type": "number", "required": false, "description": "Behavioral sequence risk score (0-100)" },
1216
+ { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether the MCP server is from a verified registry" }
847
1217
  ]
848
1218
  },
849
1219
  {
850
1220
  "name": "connect_server",
851
1221
  "description": "Connect to an MCP server",
852
1222
  "context_attributes": [
853
- { "key": "content", "type": "string", "required": false, "description": "Raw content being scanned (if available)" },
854
- { "key": "source", "type": "string", "required": true, "description": "IDE source" },
1223
+ { "key": "content", "type": "string", "required": false, "description": "Server configuration content (if available)" },
1224
+ { "key": "source", "type": "string", "required": true, "description": "IDE source: cursor, claudecode, github_copilot" },
855
1225
  { "key": "event", "type": "string", "required": true, "description": "Hook event name" },
856
1226
  { "key": "user_email", "type": "string", "required": true, "description": "User identifier" },
857
1227
  { "key": "mcp_server", "type": "string", "required": false, "description": "MCP server name" },
858
- { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected (if scanning ran)" },
859
- { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level (if scanning ran)" },
860
- { "key": "threat_categories", "type": "array", "required": false, "description": "Threat category names (if scanning ran)" },
861
- { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity 0-4 (if scanning ran)" },
862
- { "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Tool description manipulation risk score (0-100)" },
863
- { "key": "rug_pull_score", "type": "number", "required": false, "description": "Tool behavior mismatch risk score (0-100)" },
864
- { "key": "indirect_injection_score", "type": "number", "required": false, "description": "Indirect prompt injection risk score (0-100)" },
865
- { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether MCP server is from a verified registry" }
1228
+ { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected by the detection engine pipeline" },
1229
+ { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level: critical, high, medium, low, none" },
1230
+ { "key": "threat_categories", "type": "array", "required": false, "description": "Threat category names from the detection aggregator" },
1231
+ { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)" },
1232
+ { "key": "contains_invisible_chars", "type": "boolean", "required": false, "description": "Whether invisible Unicode characters were detected in server data" },
1233
+ { "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Tool description manipulation risk score (0-100) — poisoned tool descriptions in server" },
1234
+ { "key": "tool_poisoning_detected", "type": "boolean", "required": false, "description": "Whether tool poisoning patterns were explicitly detected" },
1235
+ { "key": "rug_pull_score", "type": "number", "required": false, "description": "Server behavioral drift score (0-100) behavior change after approval" },
1236
+ { "key": "rug_pull_detected", "type": "boolean", "required": false, "description": "Whether a rug pull attack was explicitly detected" },
1237
+ { "key": "indirect_injection_score", "type": "number", "required": false, "description": "Indirect injection risk score (0-100) — injection payloads in server responses" },
1238
+ { "key": "mcp_server_verified", "type": "boolean", "required": false, "description": "Whether the MCP server is from a verified registry" },
1239
+ { "key": "mcp_config_risk", "type": "boolean", "required": false, "description": "Whether risky server configuration was detected (inline code exec, mixed transports)" },
1240
+ { "key": "mcp_risk_score", "type": "number", "required": false, "description": "MCP configuration risk severity score (0-100)" }
866
1241
  ]
867
1242
  },
868
1243
  {
@@ -870,17 +1245,23 @@ export const OVERWATCH_CONTEXT = {
870
1245
  "description": "Read a file from disk",
871
1246
  "context_attributes": [
872
1247
  { "key": "content", "type": "string", "required": true, "description": "File content or operation details" },
873
- { "key": "source", "type": "string", "required": true, "description": "IDE source" },
1248
+ { "key": "source", "type": "string", "required": true, "description": "IDE source: cursor, claudecode, github_copilot" },
874
1249
  { "key": "event", "type": "string", "required": true, "description": "Hook event name (e.g., beforeReadFile)" },
875
1250
  { "key": "user_email", "type": "string", "required": true, "description": "User identifier" },
876
1251
  { "key": "path", "type": "string", "required": false, "description": "File path being read" },
877
1252
  { "key": "cwd", "type": "string", "required": false, "description": "Current working directory" },
878
1253
  { "key": "workspace_root", "type": "string", "required": false, "description": "Workspace root path" },
879
- { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected (if scanning ran)" },
880
- { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level (if scanning ran)" },
881
- { "key": "threat_categories", "type": "array", "required": false, "description": "Threat categories (if scanning ran)" },
882
- { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity 0-4 (if scanning ran)" },
883
- { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets detected (if scanning ran)" }
1254
+ { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected by the detection engine pipeline" },
1255
+ { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level: critical, high, medium, low, none" },
1256
+ { "key": "threat_categories", "type": "array", "required": false, "description": "Threat category names from the detection aggregator" },
1257
+ { "key": "detected_threats", "type": "array", "required": false, "description": "Detection rule names that matched" },
1258
+ { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)" },
1259
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets or credentials were detected in file content" },
1260
+ { "key": "secret_types", "type": "array", "required": false, "description": "Specific secret types found in file" },
1261
+ { "key": "secret_count", "type": "number", "required": false, "description": "Number of distinct secrets detected in file" },
1262
+ { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether any PII patterns were matched in file content" },
1263
+ { "key": "pii_types", "type": "array", "required": false, "description": "Specific PII types found in file" },
1264
+ { "key": "pii_count", "type": "number", "required": false, "description": "Number of PII pattern matches in file" }
884
1265
  ]
885
1266
  },
886
1267
  {
@@ -888,17 +1269,24 @@ export const OVERWATCH_CONTEXT = {
888
1269
  "description": "Write a file to disk",
889
1270
  "context_attributes": [
890
1271
  { "key": "content", "type": "string", "required": true, "description": "File content being written" },
891
- { "key": "source", "type": "string", "required": true, "description": "IDE source" },
1272
+ { "key": "source", "type": "string", "required": true, "description": "IDE source: cursor, claudecode, github_copilot" },
892
1273
  { "key": "event", "type": "string", "required": true, "description": "Hook event name" },
893
1274
  { "key": "user_email", "type": "string", "required": true, "description": "User identifier" },
894
1275
  { "key": "path", "type": "string", "required": false, "description": "File path being written" },
895
1276
  { "key": "cwd", "type": "string", "required": false, "description": "Current working directory" },
896
1277
  { "key": "workspace_root", "type": "string", "required": false, "description": "Workspace root path" },
897
- { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected (if scanning ran)" },
898
- { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level (if scanning ran)" },
899
- { "key": "threat_categories", "type": "array", "required": false, "description": "Threat categories (if scanning ran)" },
900
- { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity 0-4 (if scanning ran)" },
901
- { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets detected (if scanning ran)" }
1278
+ { "key": "threat_count", "type": "number", "required": false, "description": "Total threats detected by the detection engine pipeline" },
1279
+ { "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level: critical, high, medium, low, none" },
1280
+ { "key": "threat_categories", "type": "array", "required": false, "description": "Threat category names from the detection aggregator" },
1281
+ { "key": "detected_threats", "type": "array", "required": false, "description": "Detection rule names that matched" },
1282
+ { "key": "max_threat_severity", "type": "number", "required": false, "description": "Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)" },
1283
+ { "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets or credentials were detected in content being written" },
1284
+ { "key": "secret_types", "type": "array", "required": false, "description": "Specific secret types found" },
1285
+ { "key": "secret_count", "type": "number", "required": false, "description": "Number of distinct secrets detected" },
1286
+ { "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether any PII patterns were matched in content being written" },
1287
+ { "key": "pii_types", "type": "array", "required": false, "description": "Specific PII types found" },
1288
+ { "key": "pii_count", "type": "number", "required": false, "description": "Number of PII pattern matches" },
1289
+ { "key": "contains_invisible_chars", "type": "boolean", "required": false, "description": "Whether invisible Unicode characters were detected in content being written" }
902
1290
  ]
903
1291
  }
904
1292
  ]