@highflame/policy 2.1.4 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/_schemas/guardrails/context.json +600 -6
- package/_schemas/guardrails/schema.cedarschema +79 -3
- package/_schemas/guardrails/templates/defaults/agent_identity.cedar +118 -0
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +4 -4
- package/_schemas/guardrails/templates/defaults/injection.cedar +10 -10
- package/_schemas/guardrails/templates/defaults/secrets.cedar +2 -2
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +1 -1
- package/_schemas/guardrails/templates/defaults/toxicity.cedar +3 -3
- package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +1 -1
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +4 -4
- package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +3 -3
- package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +1 -1
- package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +3 -3
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +157 -0
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +140 -0
- package/_schemas/guardrails/templates/templates.json +41 -0
- package/_schemas/overwatch/context.json +232 -4
- package/_schemas/overwatch/schema.cedarschema +42 -4
- package/dist/guardrails-context.gen.d.ts +24 -2
- package/dist/guardrails-context.gen.js +24 -2
- package/dist/guardrails-defaults.gen.d.ts +1 -1
- package/dist/guardrails-defaults.gen.js +524 -36
- package/dist/overwatch-context.gen.d.ts +10 -0
- package/dist/overwatch-context.gen.js +10 -0
- package/dist/service-schemas.gen.d.ts +2 -2
- package/dist/service-schemas.gen.js +275 -24
- package/package.json +1 -1
- package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +0 -85
- package/_schemas/guardrails/templates/profiles/code_agent.cedar +0 -125
- package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +0 -111
|
@@ -112,8 +112,12 @@ namespace Guardrails {
|
|
|
112
112
|
"detector_count": Long,
|
|
113
113
|
|
|
114
114
|
// Security - Injection & Jailbreak (optional)
|
|
115
|
-
"
|
|
116
|
-
"
|
|
115
|
+
"injection_confidence"?: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
116
|
+
"jailbreak_confidence"?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
117
|
+
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
118
|
+
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
119
|
+
"jailbreak_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
120
|
+
"jailbreak_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
117
121
|
"injection_type"?: String, // "prompt" | "sql" | "command" | "none"
|
|
118
122
|
|
|
119
123
|
// Privacy - Secrets (optional)
|
|
@@ -199,6 +203,21 @@ namespace Guardrails {
|
|
|
199
203
|
"session_injection_detected"?: Bool,
|
|
200
204
|
"session_command_injection"?: Bool,
|
|
201
205
|
"session_threat_turns"?: Long,
|
|
206
|
+
"session_max_injection_score"?: Long,
|
|
207
|
+
"session_max_jailbreak_score"?: Long,
|
|
208
|
+
"session_max_command_injection_score"?: Long,
|
|
209
|
+
"session_max_pii_score"?: Long,
|
|
210
|
+
"session_max_secret_score"?: Long,
|
|
211
|
+
"session_cumulative_risk_score"?: Long,
|
|
212
|
+
|
|
213
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
214
|
+
// Present when the request is made by an AI agent (API key or JWT with agent claims).
|
|
215
|
+
// Empty strings for human user requests. Use these to write agent-specific policies.
|
|
216
|
+
"agent_id"?: String, // Unique agent identifier (e.g., "agent_research_v3")
|
|
217
|
+
"agent_type"?: String, // "orchestrator" | "autonomous" | "tool_agent" | "human_proxy"
|
|
218
|
+
"agent_trust_level"?: String, // "first_party" | "verified_third_party" | "unverified"
|
|
219
|
+
"agent_framework"?: String, // Agent framework (e.g., "claude-code", "langchain", "crewai")
|
|
220
|
+
"agent_publisher"?: String, // Organization that published the agent
|
|
202
221
|
|
|
203
222
|
};
|
|
204
223
|
|
|
@@ -240,10 +259,13 @@ namespace Guardrails {
|
|
|
240
259
|
|
|
241
260
|
// Security checks on tool arguments (optional)
|
|
242
261
|
"contains_secrets"?: Bool,
|
|
262
|
+
"secret_count"?: Long,
|
|
243
263
|
"secret_types"?: Set<String>,
|
|
244
264
|
"pii_detected"?: Bool,
|
|
245
265
|
"pii_types"?: Set<String>,
|
|
246
|
-
"
|
|
266
|
+
"injection_confidence"?: Long,
|
|
267
|
+
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
268
|
+
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
247
269
|
|
|
248
270
|
// Security - Pattern Detection (optional)
|
|
249
271
|
"command_injection_detected"?: Bool,
|
|
@@ -292,6 +314,19 @@ namespace Guardrails {
|
|
|
292
314
|
"session_injection_detected"?: Bool,
|
|
293
315
|
"session_command_injection"?: Bool,
|
|
294
316
|
"session_threat_turns"?: Long,
|
|
317
|
+
"session_max_injection_score"?: Long,
|
|
318
|
+
"session_max_jailbreak_score"?: Long,
|
|
319
|
+
"session_max_command_injection_score"?: Long,
|
|
320
|
+
"session_max_pii_score"?: Long,
|
|
321
|
+
"session_max_secret_score"?: Long,
|
|
322
|
+
"session_cumulative_risk_score"?: Long,
|
|
323
|
+
|
|
324
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
325
|
+
"agent_id"?: String,
|
|
326
|
+
"agent_type"?: String,
|
|
327
|
+
"agent_trust_level"?: String,
|
|
328
|
+
"agent_framework"?: String,
|
|
329
|
+
"agent_publisher"?: String,
|
|
295
330
|
|
|
296
331
|
};
|
|
297
332
|
|
|
@@ -303,6 +338,7 @@ namespace Guardrails {
|
|
|
303
338
|
|
|
304
339
|
// Security checks on file content (optional)
|
|
305
340
|
"contains_secrets"?: Bool,
|
|
341
|
+
"secret_count"?: Long,
|
|
306
342
|
"secret_types"?: Set<String>,
|
|
307
343
|
"pii_detected"?: Bool,
|
|
308
344
|
"pii_types"?: Set<String>,
|
|
@@ -320,6 +356,19 @@ namespace Guardrails {
|
|
|
320
356
|
"session_injection_detected"?: Bool,
|
|
321
357
|
"session_command_injection"?: Bool,
|
|
322
358
|
"session_threat_turns"?: Long,
|
|
359
|
+
"session_max_injection_score"?: Long,
|
|
360
|
+
"session_max_jailbreak_score"?: Long,
|
|
361
|
+
"session_max_command_injection_score"?: Long,
|
|
362
|
+
"session_max_pii_score"?: Long,
|
|
363
|
+
"session_max_secret_score"?: Long,
|
|
364
|
+
"session_cumulative_risk_score"?: Long,
|
|
365
|
+
|
|
366
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
367
|
+
"agent_id"?: String,
|
|
368
|
+
"agent_type"?: String,
|
|
369
|
+
"agent_trust_level"?: String,
|
|
370
|
+
"agent_framework"?: String,
|
|
371
|
+
"agent_publisher"?: String,
|
|
323
372
|
|
|
324
373
|
};
|
|
325
374
|
|
|
@@ -331,6 +380,7 @@ namespace Guardrails {
|
|
|
331
380
|
|
|
332
381
|
// Security checks on content being written (optional)
|
|
333
382
|
"contains_secrets"?: Bool,
|
|
383
|
+
"secret_count"?: Long,
|
|
334
384
|
"secret_types"?: Set<String>,
|
|
335
385
|
"pii_detected"?: Bool,
|
|
336
386
|
"pii_types"?: Set<String>,
|
|
@@ -348,6 +398,19 @@ namespace Guardrails {
|
|
|
348
398
|
"session_injection_detected"?: Bool,
|
|
349
399
|
"session_command_injection"?: Bool,
|
|
350
400
|
"session_threat_turns"?: Long,
|
|
401
|
+
"session_max_injection_score"?: Long,
|
|
402
|
+
"session_max_jailbreak_score"?: Long,
|
|
403
|
+
"session_max_command_injection_score"?: Long,
|
|
404
|
+
"session_max_pii_score"?: Long,
|
|
405
|
+
"session_max_secret_score"?: Long,
|
|
406
|
+
"session_cumulative_risk_score"?: Long,
|
|
407
|
+
|
|
408
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
409
|
+
"agent_id"?: String,
|
|
410
|
+
"agent_type"?: String,
|
|
411
|
+
"agent_trust_level"?: String,
|
|
412
|
+
"agent_framework"?: String,
|
|
413
|
+
"agent_publisher"?: String,
|
|
351
414
|
|
|
352
415
|
};
|
|
353
416
|
|
|
@@ -384,6 +447,19 @@ namespace Guardrails {
|
|
|
384
447
|
"session_injection_detected"?: Bool,
|
|
385
448
|
"session_command_injection"?: Bool,
|
|
386
449
|
"session_threat_turns"?: Long,
|
|
450
|
+
"session_max_injection_score"?: Long,
|
|
451
|
+
"session_max_jailbreak_score"?: Long,
|
|
452
|
+
"session_max_command_injection_score"?: Long,
|
|
453
|
+
"session_max_pii_score"?: Long,
|
|
454
|
+
"session_max_secret_score"?: Long,
|
|
455
|
+
"session_cumulative_risk_score"?: Long,
|
|
456
|
+
|
|
457
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
458
|
+
"agent_id"?: String,
|
|
459
|
+
"agent_type"?: String,
|
|
460
|
+
"agent_trust_level"?: String,
|
|
461
|
+
"agent_framework"?: String,
|
|
462
|
+
"agent_publisher"?: String,
|
|
387
463
|
|
|
388
464
|
};
|
|
389
465
|
}
|
|
@@ -515,8 +591,12 @@ action process_prompt appliesTo {
|
|
|
515
591
|
|
|
516
592
|
// --- ML Detector Confidence Scores (0-100) ---
|
|
517
593
|
pii_confidence: Long, // PII detection classifier confidence
|
|
518
|
-
injection_confidence: Long, //
|
|
519
|
-
jailbreak_confidence: Long, //
|
|
594
|
+
injection_confidence: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
595
|
+
jailbreak_confidence: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
596
|
+
injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
597
|
+
injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
598
|
+
jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
599
|
+
jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
520
600
|
|
|
521
601
|
// --- Agent Security (0-100) ---
|
|
522
602
|
indirect_injection_score: Long, // Indirect prompt injection risk (OWASP LLM01, ASI01)
|
|
@@ -529,6 +609,12 @@ action process_prompt appliesTo {
|
|
|
529
609
|
session_injection_detected?: Bool,
|
|
530
610
|
session_command_injection?: Bool,
|
|
531
611
|
session_threat_turns?: Long,
|
|
612
|
+
session_max_injection_score?: Long,
|
|
613
|
+
session_max_jailbreak_score?: Long,
|
|
614
|
+
session_max_command_injection_score?: Long,
|
|
615
|
+
session_max_pii_score?: Long,
|
|
616
|
+
session_max_secret_score?: Long,
|
|
617
|
+
session_cumulative_risk_score?: Long,
|
|
532
618
|
|
|
533
619
|
// --- Legacy ---
|
|
534
620
|
prompt_text?: String, // Same as content (backward compatibility)
|
|
@@ -591,8 +677,12 @@ action call_tool appliesTo {
|
|
|
591
677
|
|
|
592
678
|
// --- ML Detector Confidence Scores (0-100) ---
|
|
593
679
|
pii_confidence?: Long,
|
|
594
|
-
injection_confidence?: Long,
|
|
595
|
-
jailbreak_confidence?: Long,
|
|
680
|
+
injection_confidence?: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
681
|
+
jailbreak_confidence?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
682
|
+
injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
683
|
+
injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
684
|
+
jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
685
|
+
jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
596
686
|
|
|
597
687
|
// --- Agent Security (0-100) --- (OWASP ASI01, ASI02, ASI04; MITRE AML.T0051)
|
|
598
688
|
tool_poisoning_score?: Long, // Hidden instructions in tool description/args
|
|
@@ -626,6 +716,12 @@ action call_tool appliesTo {
|
|
|
626
716
|
session_injection_detected?: Bool,
|
|
627
717
|
session_command_injection?: Bool,
|
|
628
718
|
session_threat_turns?: Long,
|
|
719
|
+
session_max_injection_score?: Long,
|
|
720
|
+
session_max_jailbreak_score?: Long,
|
|
721
|
+
session_max_command_injection_score?: Long,
|
|
722
|
+
session_max_pii_score?: Long,
|
|
723
|
+
session_max_secret_score?: Long,
|
|
724
|
+
session_cumulative_risk_score?: Long,
|
|
629
725
|
|
|
630
726
|
// --- Legacy ---
|
|
631
727
|
response_content?: String,
|
|
@@ -670,6 +766,12 @@ action connect_server appliesTo {
|
|
|
670
766
|
session_injection_detected?: Bool,
|
|
671
767
|
session_command_injection?: Bool,
|
|
672
768
|
session_threat_turns?: Long,
|
|
769
|
+
session_max_injection_score?: Long,
|
|
770
|
+
session_max_jailbreak_score?: Long,
|
|
771
|
+
session_max_command_injection_score?: Long,
|
|
772
|
+
session_max_pii_score?: Long,
|
|
773
|
+
session_max_secret_score?: Long,
|
|
774
|
+
session_cumulative_risk_score?: Long,
|
|
673
775
|
},
|
|
674
776
|
};
|
|
675
777
|
|
|
@@ -712,6 +814,12 @@ action read_file appliesTo {
|
|
|
712
814
|
session_injection_detected?: Bool,
|
|
713
815
|
session_command_injection?: Bool,
|
|
714
816
|
session_threat_turns?: Long,
|
|
817
|
+
session_max_injection_score?: Long,
|
|
818
|
+
session_max_jailbreak_score?: Long,
|
|
819
|
+
session_max_command_injection_score?: Long,
|
|
820
|
+
session_max_pii_score?: Long,
|
|
821
|
+
session_max_secret_score?: Long,
|
|
822
|
+
session_cumulative_risk_score?: Long,
|
|
715
823
|
},
|
|
716
824
|
};
|
|
717
825
|
|
|
@@ -754,6 +862,12 @@ action write_file appliesTo {
|
|
|
754
862
|
session_injection_detected?: Bool,
|
|
755
863
|
session_command_injection?: Bool,
|
|
756
864
|
session_threat_turns?: Long,
|
|
865
|
+
session_max_injection_score?: Long,
|
|
866
|
+
session_max_jailbreak_score?: Long,
|
|
867
|
+
session_max_command_injection_score?: Long,
|
|
868
|
+
session_max_pii_score?: Long,
|
|
869
|
+
session_max_secret_score?: Long,
|
|
870
|
+
session_cumulative_risk_score?: Long,
|
|
757
871
|
},
|
|
758
872
|
};
|
|
759
873
|
|
|
@@ -1344,8 +1458,12 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1344
1458
|
{ "key": "direction", "type": "string", "required": true, "description": "Content flow direction: \'input\' for user prompts, \'output\' for AI responses. Use this to apply different policies to inputs vs outputs (e.g., block PII only in outputs)" },
|
|
1345
1459
|
{ "key": "content_type", "type": "string", "required": true, "description": "Type of content being analyzed: \'prompt\', \'response\', \'tool_call\', or \'file\'" },
|
|
1346
1460
|
{ "key": "detector_count", "type": "number", "required": true, "description": "Number of detectors that were executed for this request" },
|
|
1347
|
-
{ "key": "
|
|
1348
|
-
{ "key": "
|
|
1461
|
+
{ "key": "injection_confidence", "type": "number", "required": false, "description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control" },
|
|
1462
|
+
{ "key": "jailbreak_confidence", "type": "number", "required": false, "description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control" },
|
|
1463
|
+
{ "key": "injection_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for prompt injection (0-100). Raw score from Pulse detector before combination with deep-context. Use for per-detector policy control" },
|
|
1464
|
+
{ "key": "injection_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for prompt injection (0-100). Tracks injection patterns across conversation history. Generally higher confidence than single-turn" },
|
|
1465
|
+
{ "key": "jailbreak_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for jailbreak attempts (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1466
|
+
{ "key": "jailbreak_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for jailbreak attempts (0-100). Detects jailbreak escalation patterns across conversation turns" },
|
|
1349
1467
|
{ "key": "injection_type", "type": "string", "required": false, "description": "Type of injection detected: \'prompt\', \'sql\', \'command\', or \'none\'. Use this to apply different policies per injection type" },
|
|
1350
1468
|
{ "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether any API keys, tokens, passwords, or credentials were detected in the content. True indicates presence of secrets" },
|
|
1351
1469
|
{ "key": "secret_count", "type": "number", "required": false, "description": "Total number of secret matches found. Multiple matches may indicate data dumps or accidental credential exposure" },
|
|
@@ -1399,7 +1517,25 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1399
1517
|
{ "key": "content_safety_score", "type": "number", "required": false, "description": "Aggregate content safety score (0-100). Combines multiple safety signals into a single risk indicator" },
|
|
1400
1518
|
{ "key": "content_safety_blocked", "type": "boolean", "required": false, "description": "Whether content was flagged for blocking by the content safety system" },
|
|
1401
1519
|
{ "key": "conversation_turn", "type": "number", "required": false, "description": "Current conversation turn number. Use for policies that escalate strictness over long conversations" },
|
|
1402
|
-
{ "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected (attack spread across multiple conversation turns)" }
|
|
1520
|
+
{ "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected (attack spread across multiple conversation turns)" },
|
|
1521
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1522
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1523
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1524
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1525
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1526
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1527
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1528
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1529
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1530
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1531
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1532
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1533
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" },
|
|
1534
|
+
{ "key": "agent_id", "type": "string", "required": false, "description": "Unique identifier of the authenticated AI agent making this request (e.g., \'agent_research_v3\'). Empty string for human user requests. Use this to write agent-specific policies" },
|
|
1535
|
+
{ "key": "agent_type", "type": "string", "required": false, "description": "Type of the authenticated agent: \'orchestrator\' (coordinates sub-agents), \'autonomous\' (self-directed), \'tool_agent\' (single-purpose tool), or \'human_proxy\' (acts on behalf of a human). Empty string for human users" },
|
|
1536
|
+
{ "key": "agent_trust_level", "type": "string", "required": false, "description": "Trust level of the authenticated agent: \'first_party\' (your own agents), \'verified_third_party\' (audited external agents), or \'unverified\' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools" },
|
|
1537
|
+
{ "key": "agent_framework", "type": "string", "required": false, "description": "Framework or SDK the agent is built with (e.g., \'claude-code\', \'langchain\', \'crewai\', \'autogen\'). Use this to apply framework-specific policies" },
|
|
1538
|
+
{ "key": "agent_publisher", "type": "string", "required": false, "description": "Organization that published the agent (e.g., \'anthropic\', \'internal\', \'acme-corp\'). Use this for organization-level trust policies" }
|
|
1403
1539
|
]
|
|
1404
1540
|
},
|
|
1405
1541
|
{
|
|
@@ -1427,10 +1563,13 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1427
1563
|
{ "key": "content_topics", "type": "array", "required": false, "description": "Semantic topics detected in tool arguments or content (e.g., [\'controlled_substances\']). Use .contains() to restrict tool calls involving specific topics" },
|
|
1428
1564
|
{ "key": "topic_confidence", "type": "number", "required": false, "description": "Confidence score from topic classifier for tool content (0-100)" },
|
|
1429
1565
|
{ "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in tool arguments or content" },
|
|
1566
|
+
{ "key": "secret_count", "type": "number", "required": false, "description": "Total number of secret matches found in tool arguments or content. Multiple matches may indicate data dumps or accidental credential exposure" },
|
|
1430
1567
|
{ "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in tool arguments" },
|
|
1431
1568
|
{ "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in tool arguments or content" },
|
|
1432
1569
|
{ "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in tool arguments" },
|
|
1433
|
-
{ "key": "
|
|
1570
|
+
{ "key": "injection_confidence", "type": "number", "required": false, "description": "Combined prompt injection confidence in tool arguments (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control" },
|
|
1571
|
+
{ "key": "injection_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for prompt injection in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1572
|
+
{ "key": "injection_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for prompt injection in tool arguments (0-100). Tracks injection patterns across tool call history" },
|
|
1434
1573
|
{ "key": "command_injection_detected", "type": "boolean", "required": false, "description": "Whether command injection patterns were detected in tool arguments" },
|
|
1435
1574
|
{ "key": "command_injection_type", "type": "string", "required": false, "description": "Type of command injection detected in tool arguments" },
|
|
1436
1575
|
{ "key": "command_injection_score", "type": "number", "required": false, "description": "Confidence score for command injection in tool arguments (0-100)" },
|
|
@@ -1457,7 +1596,25 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1457
1596
|
{ "key": "encoded_score", "type": "number", "required": false, "description": "Risk score for encoded injection in tool arguments (0-100)" },
|
|
1458
1597
|
{ "key": "rug_pull_type", "type": "string", "required": false, "description": "Type of rug pull detected: \'risk_spike\' (sudden risk increase), \'pattern_change\' (behavioral shift), \'combined\', or \'none\'" },
|
|
1459
1598
|
{ "key": "conversation_turn", "type": "number", "required": false, "description": "Current conversation turn number for the agentic session" },
|
|
1460
|
-
{ "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected across tool calls in the session" }
|
|
1599
|
+
{ "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected across tool calls in the session" },
|
|
1600
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1601
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1602
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1603
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1604
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1605
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1606
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1607
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1608
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1609
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1610
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1611
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1612
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" },
|
|
1613
|
+
{ "key": "agent_id", "type": "string", "required": false, "description": "Unique identifier of the authenticated AI agent making this request (e.g., \'agent_research_v3\'). Empty string for human user requests. Use this to write agent-specific policies" },
|
|
1614
|
+
{ "key": "agent_type", "type": "string", "required": false, "description": "Type of the authenticated agent: \'orchestrator\' (coordinates sub-agents), \'autonomous\' (self-directed), \'tool_agent\' (single-purpose tool), or \'human_proxy\' (acts on behalf of a human). Empty string for human users" },
|
|
1615
|
+
{ "key": "agent_trust_level", "type": "string", "required": false, "description": "Trust level of the authenticated agent: \'first_party\' (your own agents), \'verified_third_party\' (audited external agents), or \'unverified\' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools" },
|
|
1616
|
+
{ "key": "agent_framework", "type": "string", "required": false, "description": "Framework or SDK the agent is built with (e.g., \'claude-code\', \'langchain\', \'crewai\', \'autogen\'). Use this to apply framework-specific policies" },
|
|
1617
|
+
{ "key": "agent_publisher", "type": "string", "required": false, "description": "Organization that published the agent (e.g., \'anthropic\', \'internal\', \'acme-corp\'). Use this for organization-level trust policies" }
|
|
1461
1618
|
]
|
|
1462
1619
|
},
|
|
1463
1620
|
{
|
|
@@ -1467,12 +1624,31 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1467
1624
|
{ "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request" },
|
|
1468
1625
|
{ "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds" },
|
|
1469
1626
|
{ "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in the file content being read" },
|
|
1627
|
+
{ "key": "secret_count", "type": "number", "required": false, "description": "Total number of secret matches found in file content. Multiple matches may indicate data dumps or accidental credential exposure" },
|
|
1470
1628
|
{ "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in file content" },
|
|
1471
1629
|
{ "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in the file content" },
|
|
1472
1630
|
{ "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in file content" },
|
|
1473
1631
|
{ "key": "path_traversal_detected", "type": "boolean", "required": false, "description": "Whether path traversal patterns were detected in the file path being read" },
|
|
1474
1632
|
{ "key": "path_traversal_severity", "type": "string", "required": false, "description": "Severity of path traversal in the file read path" },
|
|
1475
|
-
{ "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file read path" }
|
|
1633
|
+
{ "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file read path" },
|
|
1634
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1635
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1636
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1637
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1638
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1639
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1640
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1641
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1642
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1643
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1644
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1645
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1646
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" },
|
|
1647
|
+
{ "key": "agent_id", "type": "string", "required": false, "description": "Unique identifier of the authenticated AI agent making this request (e.g., \'agent_research_v3\'). Empty string for human user requests. Use this to write agent-specific policies" },
|
|
1648
|
+
{ "key": "agent_type", "type": "string", "required": false, "description": "Type of the authenticated agent: \'orchestrator\' (coordinates sub-agents), \'autonomous\' (self-directed), \'tool_agent\' (single-purpose tool), or \'human_proxy\' (acts on behalf of a human). Empty string for human users" },
|
|
1649
|
+
{ "key": "agent_trust_level", "type": "string", "required": false, "description": "Trust level of the authenticated agent: \'first_party\' (your own agents), \'verified_third_party\' (audited external agents), or \'unverified\' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools" },
|
|
1650
|
+
{ "key": "agent_framework", "type": "string", "required": false, "description": "Framework or SDK the agent is built with (e.g., \'claude-code\', \'langchain\', \'crewai\', \'autogen\'). Use this to apply framework-specific policies" },
|
|
1651
|
+
{ "key": "agent_publisher", "type": "string", "required": false, "description": "Organization that published the agent (e.g., \'anthropic\', \'internal\', \'acme-corp\'). Use this for organization-level trust policies" }
|
|
1476
1652
|
]
|
|
1477
1653
|
},
|
|
1478
1654
|
{
|
|
@@ -1482,12 +1658,31 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1482
1658
|
{ "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request" },
|
|
1483
1659
|
{ "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds" },
|
|
1484
1660
|
{ "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in the content being written. Block writes containing credentials" },
|
|
1661
|
+
{ "key": "secret_count", "type": "number", "required": false, "description": "Total number of secret matches found in write content. Multiple matches may indicate data dumps or accidental credential exposure" },
|
|
1485
1662
|
{ "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in write content" },
|
|
1486
1663
|
{ "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in the content being written" },
|
|
1487
1664
|
{ "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in write content" },
|
|
1488
1665
|
{ "key": "path_traversal_detected", "type": "boolean", "required": false, "description": "Whether path traversal patterns were detected in the file path being written" },
|
|
1489
1666
|
{ "key": "path_traversal_severity", "type": "string", "required": false, "description": "Severity of path traversal in the file write path" },
|
|
1490
|
-
{ "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file write path" }
|
|
1667
|
+
{ "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file write path" },
|
|
1668
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1669
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1670
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1671
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1672
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1673
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1674
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1675
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1676
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1677
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1678
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1679
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1680
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" },
|
|
1681
|
+
{ "key": "agent_id", "type": "string", "required": false, "description": "Unique identifier of the authenticated AI agent making this request (e.g., \'agent_research_v3\'). Empty string for human user requests. Use this to write agent-specific policies" },
|
|
1682
|
+
{ "key": "agent_type", "type": "string", "required": false, "description": "Type of the authenticated agent: \'orchestrator\' (coordinates sub-agents), \'autonomous\' (self-directed), \'tool_agent\' (single-purpose tool), or \'human_proxy\' (acts on behalf of a human). Empty string for human users" },
|
|
1683
|
+
{ "key": "agent_trust_level", "type": "string", "required": false, "description": "Trust level of the authenticated agent: \'first_party\' (your own agents), \'verified_third_party\' (audited external agents), or \'unverified\' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools" },
|
|
1684
|
+
{ "key": "agent_framework", "type": "string", "required": false, "description": "Framework or SDK the agent is built with (e.g., \'claude-code\', \'langchain\', \'crewai\', \'autogen\'). Use this to apply framework-specific policies" },
|
|
1685
|
+
{ "key": "agent_publisher", "type": "string", "required": false, "description": "Organization that published the agent (e.g., \'anthropic\', \'internal\', \'acme-corp\'). Use this for organization-level trust policies" }
|
|
1491
1686
|
]
|
|
1492
1687
|
},
|
|
1493
1688
|
{
|
|
@@ -1506,7 +1701,25 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1506
1701
|
{ "key": "mcp_risk_score", "type": "number", "required": false, "description": "Risk score for MCP configuration issues (0-100)" },
|
|
1507
1702
|
{ "key": "cross_origin_detected", "type": "boolean", "required": false, "description": "Whether cross-origin escalation patterns were detected in the MCP server connection" },
|
|
1508
1703
|
{ "key": "cross_origin_type", "type": "string", "required": false, "description": "Type of cross-origin escalation detected in server connection" },
|
|
1509
|
-
{ "key": "cross_origin_score", "type": "number", "required": false, "description": "Risk score for cross-origin escalation in server connection (0-100)" }
|
|
1704
|
+
{ "key": "cross_origin_score", "type": "number", "required": false, "description": "Risk score for cross-origin escalation in server connection (0-100)" },
|
|
1705
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1706
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1707
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1708
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1709
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1710
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1711
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1712
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1713
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1714
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1715
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1716
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1717
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" },
|
|
1718
|
+
{ "key": "agent_id", "type": "string", "required": false, "description": "Unique identifier of the authenticated AI agent making this request (e.g., \'agent_research_v3\'). Empty string for human user requests. Use this to write agent-specific policies" },
|
|
1719
|
+
{ "key": "agent_type", "type": "string", "required": false, "description": "Type of the authenticated agent: \'orchestrator\' (coordinates sub-agents), \'autonomous\' (self-directed), \'tool_agent\' (single-purpose tool), or \'human_proxy\' (acts on behalf of a human). Empty string for human users" },
|
|
1720
|
+
{ "key": "agent_trust_level", "type": "string", "required": false, "description": "Trust level of the authenticated agent: \'first_party\' (your own agents), \'verified_third_party\' (audited external agents), or \'unverified\' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools" },
|
|
1721
|
+
{ "key": "agent_framework", "type": "string", "required": false, "description": "Framework or SDK the agent is built with (e.g., \'claude-code\', \'langchain\', \'crewai\', \'autogen\'). Use this to apply framework-specific policies" },
|
|
1722
|
+
{ "key": "agent_publisher", "type": "string", "required": false, "description": "Organization that published the agent (e.g., \'anthropic\', \'internal\', \'acme-corp\'). Use this for organization-level trust policies" }
|
|
1510
1723
|
]
|
|
1511
1724
|
}
|
|
1512
1725
|
]
|
|
@@ -1551,8 +1764,12 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1551
1764
|
{ "key": "sexual_score", "type": "number", "required": true, "description": "Sexual content detection score (0-100)" },
|
|
1552
1765
|
{ "key": "profanity_score", "type": "number", "required": true, "description": "Profanity detection score (0-100)" },
|
|
1553
1766
|
{ "key": "pii_confidence", "type": "number", "required": true, "description": "PII detection ML classifier confidence (0-100)" },
|
|
1554
|
-
{ "key": "injection_confidence", "type": "number", "required": true, "description": "
|
|
1555
|
-
{ "key": "jailbreak_confidence", "type": "number", "required": true, "description": "
|
|
1767
|
+
{ "key": "injection_confidence", "type": "number", "required": true, "description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control" },
|
|
1768
|
+
{ "key": "jailbreak_confidence", "type": "number", "required": true, "description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control" },
|
|
1769
|
+
{ "key": "injection_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for prompt injection (0-100). Raw score from Pulse detector before combination with deep-context. Use for per-detector policy control" },
|
|
1770
|
+
{ "key": "injection_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for prompt injection (0-100). Tracks injection patterns across conversation history. Generally higher confidence than single-turn" },
|
|
1771
|
+
{ "key": "jailbreak_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for jailbreak attempts (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1772
|
+
{ "key": "jailbreak_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for jailbreak attempts (0-100). Detects jailbreak escalation patterns across conversation turns" },
|
|
1556
1773
|
{ "key": "indirect_injection_score", "type": "number", "required": true, "description": "Indirect prompt injection risk score (0-100) — injection via tool outputs or retrieved content" },
|
|
1557
1774
|
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1558
1775
|
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
@@ -1560,7 +1777,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1560
1777
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1561
1778
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1562
1779
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1563
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1780
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1781
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1782
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1783
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1784
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1785
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1786
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1564
1787
|
]
|
|
1565
1788
|
},
|
|
1566
1789
|
{
|
|
@@ -1598,8 +1821,12 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1598
1821
|
{ "key": "sexual_score", "type": "number", "required": false, "description": "Sexual content detection score (0-100)" },
|
|
1599
1822
|
{ "key": "profanity_score", "type": "number", "required": false, "description": "Profanity detection score (0-100)" },
|
|
1600
1823
|
{ "key": "pii_confidence", "type": "number", "required": false, "description": "PII detection ML classifier confidence (0-100)" },
|
|
1601
|
-
{ "key": "injection_confidence", "type": "number", "required": false, "description": "
|
|
1602
|
-
{ "key": "jailbreak_confidence", "type": "number", "required": false, "description": "
|
|
1824
|
+
{ "key": "injection_confidence", "type": "number", "required": false, "description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control" },
|
|
1825
|
+
{ "key": "jailbreak_confidence", "type": "number", "required": false, "description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control" },
|
|
1826
|
+
{ "key": "injection_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for prompt injection in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1827
|
+
{ "key": "injection_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for prompt injection in tool arguments (0-100). Tracks injection patterns across tool call history" },
|
|
1828
|
+
{ "key": "jailbreak_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for jailbreak in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1829
|
+
{ "key": "jailbreak_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for jailbreak in tool arguments (0-100). Detects jailbreak escalation patterns across tool call turns" },
|
|
1603
1830
|
{ "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Tool description manipulation risk score (0-100) — hidden instructions in tool descriptions or arguments" },
|
|
1604
1831
|
{ "key": "tool_poisoning_detected", "type": "boolean", "required": false, "description": "Whether tool poisoning patterns were explicitly detected" },
|
|
1605
1832
|
{ "key": "rug_pull_score", "type": "number", "required": false, "description": "Tool behavioral drift score (0-100) — deviation from established tool behavior patterns" },
|
|
@@ -1622,7 +1849,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1622
1849
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1623
1850
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1624
1851
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1625
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1852
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1853
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1854
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1855
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1856
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1857
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1858
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1626
1859
|
]
|
|
1627
1860
|
},
|
|
1628
1861
|
{
|
|
@@ -1653,7 +1886,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1653
1886
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1654
1887
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1655
1888
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1656
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1889
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1890
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1891
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1892
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1893
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1894
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1895
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1657
1896
|
]
|
|
1658
1897
|
},
|
|
1659
1898
|
{
|
|
@@ -1684,7 +1923,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1684
1923
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1685
1924
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1686
1925
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1687
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1926
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1927
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1928
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1929
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1930
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1931
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1932
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1688
1933
|
]
|
|
1689
1934
|
},
|
|
1690
1935
|
{
|
|
@@ -1716,7 +1961,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1716
1961
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1717
1962
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1718
1963
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1719
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1964
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1965
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1966
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1967
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1968
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1969
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1970
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1720
1971
|
]
|
|
1721
1972
|
}
|
|
1722
1973
|
]
|