@highflame/policy 2.1.4 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/_schemas/guardrails/context.json +432 -6
- package/_schemas/guardrails/schema.cedarschema +39 -3
- package/_schemas/guardrails/templates/defaults/injection.cedar +6 -6
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +1 -1
- package/_schemas/overwatch/context.json +232 -4
- package/_schemas/overwatch/schema.cedarschema +42 -4
- package/dist/guardrails-context.gen.d.ts +19 -2
- package/dist/guardrails-context.gen.js +19 -2
- package/dist/guardrails-defaults.gen.js +9 -9
- package/dist/overwatch-context.gen.d.ts +10 -0
- package/dist/overwatch-context.gen.js +10 -0
- package/dist/service-schemas.gen.d.ts +2 -2
- package/dist/service-schemas.gen.js +207 -24
- package/package.json +1 -1
- package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +0 -85
- package/_schemas/guardrails/templates/profiles/code_agent.cedar +0 -125
- package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +0 -111
|
@@ -112,8 +112,12 @@ namespace Guardrails {
|
|
|
112
112
|
"detector_count": Long,
|
|
113
113
|
|
|
114
114
|
// Security - Injection & Jailbreak (optional)
|
|
115
|
-
"
|
|
116
|
-
"
|
|
115
|
+
"injection_confidence"?: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
116
|
+
"jailbreak_confidence"?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
117
|
+
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
118
|
+
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
119
|
+
"jailbreak_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
120
|
+
"jailbreak_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
117
121
|
"injection_type"?: String, // "prompt" | "sql" | "command" | "none"
|
|
118
122
|
|
|
119
123
|
// Privacy - Secrets (optional)
|
|
@@ -199,6 +203,12 @@ namespace Guardrails {
|
|
|
199
203
|
"session_injection_detected"?: Bool,
|
|
200
204
|
"session_command_injection"?: Bool,
|
|
201
205
|
"session_threat_turns"?: Long,
|
|
206
|
+
"session_max_injection_score"?: Long,
|
|
207
|
+
"session_max_jailbreak_score"?: Long,
|
|
208
|
+
"session_max_command_injection_score"?: Long,
|
|
209
|
+
"session_max_pii_score"?: Long,
|
|
210
|
+
"session_max_secret_score"?: Long,
|
|
211
|
+
"session_cumulative_risk_score"?: Long,
|
|
202
212
|
|
|
203
213
|
};
|
|
204
214
|
|
|
@@ -243,7 +253,9 @@ namespace Guardrails {
|
|
|
243
253
|
"secret_types"?: Set<String>,
|
|
244
254
|
"pii_detected"?: Bool,
|
|
245
255
|
"pii_types"?: Set<String>,
|
|
246
|
-
"
|
|
256
|
+
"injection_confidence"?: Long,
|
|
257
|
+
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
258
|
+
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
247
259
|
|
|
248
260
|
// Security - Pattern Detection (optional)
|
|
249
261
|
"command_injection_detected"?: Bool,
|
|
@@ -292,6 +304,12 @@ namespace Guardrails {
|
|
|
292
304
|
"session_injection_detected"?: Bool,
|
|
293
305
|
"session_command_injection"?: Bool,
|
|
294
306
|
"session_threat_turns"?: Long,
|
|
307
|
+
"session_max_injection_score"?: Long,
|
|
308
|
+
"session_max_jailbreak_score"?: Long,
|
|
309
|
+
"session_max_command_injection_score"?: Long,
|
|
310
|
+
"session_max_pii_score"?: Long,
|
|
311
|
+
"session_max_secret_score"?: Long,
|
|
312
|
+
"session_cumulative_risk_score"?: Long,
|
|
295
313
|
|
|
296
314
|
};
|
|
297
315
|
|
|
@@ -320,6 +338,12 @@ namespace Guardrails {
|
|
|
320
338
|
"session_injection_detected"?: Bool,
|
|
321
339
|
"session_command_injection"?: Bool,
|
|
322
340
|
"session_threat_turns"?: Long,
|
|
341
|
+
"session_max_injection_score"?: Long,
|
|
342
|
+
"session_max_jailbreak_score"?: Long,
|
|
343
|
+
"session_max_command_injection_score"?: Long,
|
|
344
|
+
"session_max_pii_score"?: Long,
|
|
345
|
+
"session_max_secret_score"?: Long,
|
|
346
|
+
"session_cumulative_risk_score"?: Long,
|
|
323
347
|
|
|
324
348
|
};
|
|
325
349
|
|
|
@@ -348,6 +372,12 @@ namespace Guardrails {
|
|
|
348
372
|
"session_injection_detected"?: Bool,
|
|
349
373
|
"session_command_injection"?: Bool,
|
|
350
374
|
"session_threat_turns"?: Long,
|
|
375
|
+
"session_max_injection_score"?: Long,
|
|
376
|
+
"session_max_jailbreak_score"?: Long,
|
|
377
|
+
"session_max_command_injection_score"?: Long,
|
|
378
|
+
"session_max_pii_score"?: Long,
|
|
379
|
+
"session_max_secret_score"?: Long,
|
|
380
|
+
"session_cumulative_risk_score"?: Long,
|
|
351
381
|
|
|
352
382
|
};
|
|
353
383
|
|
|
@@ -384,6 +414,12 @@ namespace Guardrails {
|
|
|
384
414
|
"session_injection_detected"?: Bool,
|
|
385
415
|
"session_command_injection"?: Bool,
|
|
386
416
|
"session_threat_turns"?: Long,
|
|
417
|
+
"session_max_injection_score"?: Long,
|
|
418
|
+
"session_max_jailbreak_score"?: Long,
|
|
419
|
+
"session_max_command_injection_score"?: Long,
|
|
420
|
+
"session_max_pii_score"?: Long,
|
|
421
|
+
"session_max_secret_score"?: Long,
|
|
422
|
+
"session_cumulative_risk_score"?: Long,
|
|
387
423
|
|
|
388
424
|
};
|
|
389
425
|
}
|
|
@@ -515,8 +551,12 @@ action process_prompt appliesTo {
|
|
|
515
551
|
|
|
516
552
|
// --- ML Detector Confidence Scores (0-100) ---
|
|
517
553
|
pii_confidence: Long, // PII detection classifier confidence
|
|
518
|
-
injection_confidence: Long, //
|
|
519
|
-
jailbreak_confidence: Long, //
|
|
554
|
+
injection_confidence: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
555
|
+
jailbreak_confidence: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
556
|
+
injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
557
|
+
injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
558
|
+
jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
559
|
+
jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
520
560
|
|
|
521
561
|
// --- Agent Security (0-100) ---
|
|
522
562
|
indirect_injection_score: Long, // Indirect prompt injection risk (OWASP LLM01, ASI01)
|
|
@@ -529,6 +569,12 @@ action process_prompt appliesTo {
|
|
|
529
569
|
session_injection_detected?: Bool,
|
|
530
570
|
session_command_injection?: Bool,
|
|
531
571
|
session_threat_turns?: Long,
|
|
572
|
+
session_max_injection_score?: Long,
|
|
573
|
+
session_max_jailbreak_score?: Long,
|
|
574
|
+
session_max_command_injection_score?: Long,
|
|
575
|
+
session_max_pii_score?: Long,
|
|
576
|
+
session_max_secret_score?: Long,
|
|
577
|
+
session_cumulative_risk_score?: Long,
|
|
532
578
|
|
|
533
579
|
// --- Legacy ---
|
|
534
580
|
prompt_text?: String, // Same as content (backward compatibility)
|
|
@@ -591,8 +637,12 @@ action call_tool appliesTo {
|
|
|
591
637
|
|
|
592
638
|
// --- ML Detector Confidence Scores (0-100) ---
|
|
593
639
|
pii_confidence?: Long,
|
|
594
|
-
injection_confidence?: Long,
|
|
595
|
-
jailbreak_confidence?: Long,
|
|
640
|
+
injection_confidence?: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
641
|
+
jailbreak_confidence?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
642
|
+
injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
643
|
+
injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
644
|
+
jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
645
|
+
jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
596
646
|
|
|
597
647
|
// --- Agent Security (0-100) --- (OWASP ASI01, ASI02, ASI04; MITRE AML.T0051)
|
|
598
648
|
tool_poisoning_score?: Long, // Hidden instructions in tool description/args
|
|
@@ -626,6 +676,12 @@ action call_tool appliesTo {
|
|
|
626
676
|
session_injection_detected?: Bool,
|
|
627
677
|
session_command_injection?: Bool,
|
|
628
678
|
session_threat_turns?: Long,
|
|
679
|
+
session_max_injection_score?: Long,
|
|
680
|
+
session_max_jailbreak_score?: Long,
|
|
681
|
+
session_max_command_injection_score?: Long,
|
|
682
|
+
session_max_pii_score?: Long,
|
|
683
|
+
session_max_secret_score?: Long,
|
|
684
|
+
session_cumulative_risk_score?: Long,
|
|
629
685
|
|
|
630
686
|
// --- Legacy ---
|
|
631
687
|
response_content?: String,
|
|
@@ -670,6 +726,12 @@ action connect_server appliesTo {
|
|
|
670
726
|
session_injection_detected?: Bool,
|
|
671
727
|
session_command_injection?: Bool,
|
|
672
728
|
session_threat_turns?: Long,
|
|
729
|
+
session_max_injection_score?: Long,
|
|
730
|
+
session_max_jailbreak_score?: Long,
|
|
731
|
+
session_max_command_injection_score?: Long,
|
|
732
|
+
session_max_pii_score?: Long,
|
|
733
|
+
session_max_secret_score?: Long,
|
|
734
|
+
session_cumulative_risk_score?: Long,
|
|
673
735
|
},
|
|
674
736
|
};
|
|
675
737
|
|
|
@@ -712,6 +774,12 @@ action read_file appliesTo {
|
|
|
712
774
|
session_injection_detected?: Bool,
|
|
713
775
|
session_command_injection?: Bool,
|
|
714
776
|
session_threat_turns?: Long,
|
|
777
|
+
session_max_injection_score?: Long,
|
|
778
|
+
session_max_jailbreak_score?: Long,
|
|
779
|
+
session_max_command_injection_score?: Long,
|
|
780
|
+
session_max_pii_score?: Long,
|
|
781
|
+
session_max_secret_score?: Long,
|
|
782
|
+
session_cumulative_risk_score?: Long,
|
|
715
783
|
},
|
|
716
784
|
};
|
|
717
785
|
|
|
@@ -754,6 +822,12 @@ action write_file appliesTo {
|
|
|
754
822
|
session_injection_detected?: Bool,
|
|
755
823
|
session_command_injection?: Bool,
|
|
756
824
|
session_threat_turns?: Long,
|
|
825
|
+
session_max_injection_score?: Long,
|
|
826
|
+
session_max_jailbreak_score?: Long,
|
|
827
|
+
session_max_command_injection_score?: Long,
|
|
828
|
+
session_max_pii_score?: Long,
|
|
829
|
+
session_max_secret_score?: Long,
|
|
830
|
+
session_cumulative_risk_score?: Long,
|
|
757
831
|
},
|
|
758
832
|
};
|
|
759
833
|
|
|
@@ -1344,8 +1418,12 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1344
1418
|
{ "key": "direction", "type": "string", "required": true, "description": "Content flow direction: \'input\' for user prompts, \'output\' for AI responses. Use this to apply different policies to inputs vs outputs (e.g., block PII only in outputs)" },
|
|
1345
1419
|
{ "key": "content_type", "type": "string", "required": true, "description": "Type of content being analyzed: \'prompt\', \'response\', \'tool_call\', or \'file\'" },
|
|
1346
1420
|
{ "key": "detector_count", "type": "number", "required": true, "description": "Number of detectors that were executed for this request" },
|
|
1347
|
-
{ "key": "
|
|
1348
|
-
{ "key": "
|
|
1421
|
+
{ "key": "injection_confidence", "type": "number", "required": false, "description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control" },
|
|
1422
|
+
{ "key": "jailbreak_confidence", "type": "number", "required": false, "description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control" },
|
|
1423
|
+
{ "key": "injection_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for prompt injection (0-100). Raw score from Pulse detector before combination with deep-context. Use for per-detector policy control" },
|
|
1424
|
+
{ "key": "injection_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for prompt injection (0-100). Tracks injection patterns across conversation history. Generally higher confidence than single-turn" },
|
|
1425
|
+
{ "key": "jailbreak_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for jailbreak attempts (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1426
|
+
{ "key": "jailbreak_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for jailbreak attempts (0-100). Detects jailbreak escalation patterns across conversation turns" },
|
|
1349
1427
|
{ "key": "injection_type", "type": "string", "required": false, "description": "Type of injection detected: \'prompt\', \'sql\', \'command\', or \'none\'. Use this to apply different policies per injection type" },
|
|
1350
1428
|
{ "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether any API keys, tokens, passwords, or credentials were detected in the content. True indicates presence of secrets" },
|
|
1351
1429
|
{ "key": "secret_count", "type": "number", "required": false, "description": "Total number of secret matches found. Multiple matches may indicate data dumps or accidental credential exposure" },
|
|
@@ -1399,7 +1477,20 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1399
1477
|
{ "key": "content_safety_score", "type": "number", "required": false, "description": "Aggregate content safety score (0-100). Combines multiple safety signals into a single risk indicator" },
|
|
1400
1478
|
{ "key": "content_safety_blocked", "type": "boolean", "required": false, "description": "Whether content was flagged for blocking by the content safety system" },
|
|
1401
1479
|
{ "key": "conversation_turn", "type": "number", "required": false, "description": "Current conversation turn number. Use for policies that escalate strictness over long conversations" },
|
|
1402
|
-
{ "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected (attack spread across multiple conversation turns)" }
|
|
1480
|
+
{ "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected (attack spread across multiple conversation turns)" },
|
|
1481
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1482
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1483
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1484
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1485
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1486
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1487
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1488
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1489
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1490
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1491
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1492
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1493
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1403
1494
|
]
|
|
1404
1495
|
},
|
|
1405
1496
|
{
|
|
@@ -1430,7 +1521,9 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1430
1521
|
{ "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in tool arguments" },
|
|
1431
1522
|
{ "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in tool arguments or content" },
|
|
1432
1523
|
{ "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in tool arguments" },
|
|
1433
|
-
{ "key": "
|
|
1524
|
+
{ "key": "injection_confidence", "type": "number", "required": false, "description": "Combined prompt injection confidence in tool arguments (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control" },
|
|
1525
|
+
{ "key": "injection_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for prompt injection in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1526
|
+
{ "key": "injection_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for prompt injection in tool arguments (0-100). Tracks injection patterns across tool call history" },
|
|
1434
1527
|
{ "key": "command_injection_detected", "type": "boolean", "required": false, "description": "Whether command injection patterns were detected in tool arguments" },
|
|
1435
1528
|
{ "key": "command_injection_type", "type": "string", "required": false, "description": "Type of command injection detected in tool arguments" },
|
|
1436
1529
|
{ "key": "command_injection_score", "type": "number", "required": false, "description": "Confidence score for command injection in tool arguments (0-100)" },
|
|
@@ -1457,7 +1550,20 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1457
1550
|
{ "key": "encoded_score", "type": "number", "required": false, "description": "Risk score for encoded injection in tool arguments (0-100)" },
|
|
1458
1551
|
{ "key": "rug_pull_type", "type": "string", "required": false, "description": "Type of rug pull detected: \'risk_spike\' (sudden risk increase), \'pattern_change\' (behavioral shift), \'combined\', or \'none\'" },
|
|
1459
1552
|
{ "key": "conversation_turn", "type": "number", "required": false, "description": "Current conversation turn number for the agentic session" },
|
|
1460
|
-
{ "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected across tool calls in the session" }
|
|
1553
|
+
{ "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected across tool calls in the session" },
|
|
1554
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1555
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1556
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1557
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1558
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1559
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1560
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1561
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1562
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1563
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1564
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1565
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1566
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1461
1567
|
]
|
|
1462
1568
|
},
|
|
1463
1569
|
{
|
|
@@ -1472,7 +1578,20 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1472
1578
|
{ "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in file content" },
|
|
1473
1579
|
{ "key": "path_traversal_detected", "type": "boolean", "required": false, "description": "Whether path traversal patterns were detected in the file path being read" },
|
|
1474
1580
|
{ "key": "path_traversal_severity", "type": "string", "required": false, "description": "Severity of path traversal in the file read path" },
|
|
1475
|
-
{ "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file read path" }
|
|
1581
|
+
{ "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file read path" },
|
|
1582
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1583
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1584
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1585
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1586
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1587
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1588
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1589
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1590
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1591
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1592
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1593
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1594
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1476
1595
|
]
|
|
1477
1596
|
},
|
|
1478
1597
|
{
|
|
@@ -1487,7 +1606,20 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1487
1606
|
{ "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in write content" },
|
|
1488
1607
|
{ "key": "path_traversal_detected", "type": "boolean", "required": false, "description": "Whether path traversal patterns were detected in the file path being written" },
|
|
1489
1608
|
{ "key": "path_traversal_severity", "type": "string", "required": false, "description": "Severity of path traversal in the file write path" },
|
|
1490
|
-
{ "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file write path" }
|
|
1609
|
+
{ "key": "path_traversal_type", "type": "string", "required": false, "description": "Type of path traversal detected in the file write path" },
|
|
1610
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1611
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1612
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1613
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1614
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1615
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1616
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1617
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1618
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1619
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1620
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1621
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1622
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1491
1623
|
]
|
|
1492
1624
|
},
|
|
1493
1625
|
{
|
|
@@ -1506,7 +1638,20 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1506
1638
|
{ "key": "mcp_risk_score", "type": "number", "required": false, "description": "Risk score for MCP configuration issues (0-100)" },
|
|
1507
1639
|
{ "key": "cross_origin_detected", "type": "boolean", "required": false, "description": "Whether cross-origin escalation patterns were detected in the MCP server connection" },
|
|
1508
1640
|
{ "key": "cross_origin_type", "type": "string", "required": false, "description": "Type of cross-origin escalation detected in server connection" },
|
|
1509
|
-
{ "key": "cross_origin_score", "type": "number", "required": false, "description": "Risk score for cross-origin escalation in server connection (0-100)" }
|
|
1641
|
+
{ "key": "cross_origin_score", "type": "number", "required": false, "description": "Risk score for cross-origin escalation in server connection (0-100)" },
|
|
1642
|
+
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1643
|
+
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
1644
|
+
{ "key": "session_secrets_detected", "type": "boolean", "required": false, "description": "Whether secrets were detected in any previous turn of the session" },
|
|
1645
|
+
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1646
|
+
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1647
|
+
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1648
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1649
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1650
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1651
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1652
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1653
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1654
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1510
1655
|
]
|
|
1511
1656
|
}
|
|
1512
1657
|
]
|
|
@@ -1551,8 +1696,12 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1551
1696
|
{ "key": "sexual_score", "type": "number", "required": true, "description": "Sexual content detection score (0-100)" },
|
|
1552
1697
|
{ "key": "profanity_score", "type": "number", "required": true, "description": "Profanity detection score (0-100)" },
|
|
1553
1698
|
{ "key": "pii_confidence", "type": "number", "required": true, "description": "PII detection ML classifier confidence (0-100)" },
|
|
1554
|
-
{ "key": "injection_confidence", "type": "number", "required": true, "description": "
|
|
1555
|
-
{ "key": "jailbreak_confidence", "type": "number", "required": true, "description": "
|
|
1699
|
+
{ "key": "injection_confidence", "type": "number", "required": true, "description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control" },
|
|
1700
|
+
{ "key": "jailbreak_confidence", "type": "number", "required": true, "description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control" },
|
|
1701
|
+
{ "key": "injection_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for prompt injection (0-100). Raw score from Pulse detector before combination with deep-context. Use for per-detector policy control" },
|
|
1702
|
+
{ "key": "injection_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for prompt injection (0-100). Tracks injection patterns across conversation history. Generally higher confidence than single-turn" },
|
|
1703
|
+
{ "key": "jailbreak_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for jailbreak attempts (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1704
|
+
{ "key": "jailbreak_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for jailbreak attempts (0-100). Detects jailbreak escalation patterns across conversation turns" },
|
|
1556
1705
|
{ "key": "indirect_injection_score", "type": "number", "required": true, "description": "Indirect prompt injection risk score (0-100) — injection via tool outputs or retrieved content" },
|
|
1557
1706
|
{ "key": "session_pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in any previous turn of the session" },
|
|
1558
1707
|
{ "key": "session_pii_types", "type": "array", "required": false, "description": "PII types detected across the session (accumulated)" },
|
|
@@ -1560,7 +1709,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1560
1709
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1561
1710
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1562
1711
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1563
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1712
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1713
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1714
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1715
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1716
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1717
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1718
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1564
1719
|
]
|
|
1565
1720
|
},
|
|
1566
1721
|
{
|
|
@@ -1598,8 +1753,12 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1598
1753
|
{ "key": "sexual_score", "type": "number", "required": false, "description": "Sexual content detection score (0-100)" },
|
|
1599
1754
|
{ "key": "profanity_score", "type": "number", "required": false, "description": "Profanity detection score (0-100)" },
|
|
1600
1755
|
{ "key": "pii_confidence", "type": "number", "required": false, "description": "PII detection ML classifier confidence (0-100)" },
|
|
1601
|
-
{ "key": "injection_confidence", "type": "number", "required": false, "description": "
|
|
1602
|
-
{ "key": "jailbreak_confidence", "type": "number", "required": false, "description": "
|
|
1756
|
+
{ "key": "injection_confidence", "type": "number", "required": false, "description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control" },
|
|
1757
|
+
{ "key": "jailbreak_confidence", "type": "number", "required": false, "description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control" },
|
|
1758
|
+
{ "key": "injection_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for prompt injection in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1759
|
+
{ "key": "injection_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for prompt injection in tool arguments (0-100). Tracks injection patterns across tool call history" },
|
|
1760
|
+
{ "key": "jailbreak_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for jailbreak in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1761
|
+
{ "key": "jailbreak_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for jailbreak in tool arguments (0-100). Detects jailbreak escalation patterns across tool call turns" },
|
|
1603
1762
|
{ "key": "tool_poisoning_score", "type": "number", "required": false, "description": "Tool description manipulation risk score (0-100) — hidden instructions in tool descriptions or arguments" },
|
|
1604
1763
|
{ "key": "tool_poisoning_detected", "type": "boolean", "required": false, "description": "Whether tool poisoning patterns were explicitly detected" },
|
|
1605
1764
|
{ "key": "rug_pull_score", "type": "number", "required": false, "description": "Tool behavioral drift score (0-100) — deviation from established tool behavior patterns" },
|
|
@@ -1622,7 +1781,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1622
1781
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1623
1782
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1624
1783
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1625
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1784
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1785
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1786
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1787
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1788
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1789
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1790
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1626
1791
|
]
|
|
1627
1792
|
},
|
|
1628
1793
|
{
|
|
@@ -1653,7 +1818,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1653
1818
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1654
1819
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1655
1820
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1656
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1821
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1822
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1823
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1824
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1825
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1826
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1827
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1657
1828
|
]
|
|
1658
1829
|
},
|
|
1659
1830
|
{
|
|
@@ -1684,7 +1855,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1684
1855
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1685
1856
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1686
1857
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1687
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1858
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1859
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1860
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1861
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1862
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1863
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1864
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1688
1865
|
]
|
|
1689
1866
|
},
|
|
1690
1867
|
{
|
|
@@ -1716,7 +1893,13 @@ export const OVERWATCH_CONTEXT = {
|
|
|
1716
1893
|
{ "key": "session_secret_types", "type": "array", "required": false, "description": "Secret types detected across the session (accumulated)" },
|
|
1717
1894
|
{ "key": "session_injection_detected", "type": "boolean", "required": false, "description": "Whether prompt injection was detected in any previous turn of the session" },
|
|
1718
1895
|
{ "key": "session_command_injection", "type": "boolean", "required": false, "description": "Whether command injection was detected in any previous turn of the session" },
|
|
1719
|
-
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" }
|
|
1896
|
+
{ "key": "session_threat_turns", "type": "number", "required": false, "description": "Number of turns in the session where threats were detected" },
|
|
1897
|
+
{ "key": "session_max_injection_score", "type": "number", "required": false, "description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions" },
|
|
1898
|
+
{ "key": "session_max_jailbreak_score", "type": "number", "required": false, "description": "Highest jailbreak detection score seen in any turn of the session (0-100)" },
|
|
1899
|
+
{ "key": "session_max_command_injection_score", "type": "number", "required": false, "description": "Highest command injection score seen in any turn of the session (0-100)" },
|
|
1900
|
+
{ "key": "session_max_pii_score", "type": "number", "required": false, "description": "Highest PII risk score seen in any turn of the session (0-100)" },
|
|
1901
|
+
{ "key": "session_max_secret_score", "type": "number", "required": false, "description": "Highest secret detection score seen in any turn of the session (0-100)" },
|
|
1902
|
+
{ "key": "session_cumulative_risk_score", "type": "number", "required": false, "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant" }
|
|
1720
1903
|
]
|
|
1721
1904
|
}
|
|
1722
1905
|
]
|
package/package.json
CHANGED
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
// =============================================================================
|
|
2
|
-
// Chat Assistant Profile
|
|
3
|
-
// =============================================================================
|
|
4
|
-
// Optimized for customer-facing chatbots and conversational AI.
|
|
5
|
-
// Strict content moderation, PII protection, and injection defense.
|
|
6
|
-
// No tool-related policies (chat assistants don't call tools).
|
|
7
|
-
//
|
|
8
|
-
// Deploy: One-click deploy scopes all rules to a specific application via
|
|
9
|
-
// resource == Guardrails::App::"<app-uuid>" (injected at deploy time).
|
|
10
|
-
//
|
|
11
|
-
// Category: organization
|
|
12
|
-
// Namespace: Guardrails
|
|
13
|
-
// =============================================================================
|
|
14
|
-
|
|
15
|
-
@id("chat-toxicity-strict")
|
|
16
|
-
@name("Strict toxicity for chat")
|
|
17
|
-
@description("Lower thresholds for customer-facing content — blocks toxicity above 70")
|
|
18
|
-
@severity("critical")
|
|
19
|
-
@tags("profile,chat-assistant,toxicity,trust-safety")
|
|
20
|
-
forbid (
|
|
21
|
-
principal,
|
|
22
|
-
action == Guardrails::Action::"process_prompt",
|
|
23
|
-
resource
|
|
24
|
-
) when {
|
|
25
|
-
context.violence_score > 70 ||
|
|
26
|
-
context.hate_speech_score > 70 ||
|
|
27
|
-
context.sexual_score > 70 ||
|
|
28
|
-
context.profanity_score > 70
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
@id("chat-pii-block-input-output")
|
|
32
|
-
@name("Block PII in both directions")
|
|
33
|
-
@description("Blocks PII in both user inputs and assistant outputs for chat applications")
|
|
34
|
-
@severity("high")
|
|
35
|
-
@tags("profile,chat-assistant,pii,privacy")
|
|
36
|
-
forbid (
|
|
37
|
-
principal,
|
|
38
|
-
action == Guardrails::Action::"process_prompt",
|
|
39
|
-
resource
|
|
40
|
-
) when {
|
|
41
|
-
context.pii_detected == true
|
|
42
|
-
};
|
|
43
|
-
|
|
44
|
-
@id("chat-injection-lower-threshold")
|
|
45
|
-
@name("Aggressive injection defense for chat")
|
|
46
|
-
@description("Lower injection threshold for public-facing chat — blocks above 70")
|
|
47
|
-
@severity("high")
|
|
48
|
-
@tags("profile,chat-assistant,injection,security")
|
|
49
|
-
forbid (
|
|
50
|
-
principal,
|
|
51
|
-
action == Guardrails::Action::"process_prompt",
|
|
52
|
-
resource
|
|
53
|
-
) when {
|
|
54
|
-
context.injection_score > 70
|
|
55
|
-
};
|
|
56
|
-
|
|
57
|
-
@id("chat-jailbreak-lower-threshold")
|
|
58
|
-
@name("Aggressive jailbreak defense for chat")
|
|
59
|
-
@description("Lower jailbreak threshold for public-facing chat — blocks above 65")
|
|
60
|
-
@severity("high")
|
|
61
|
-
@tags("profile,chat-assistant,jailbreak,security")
|
|
62
|
-
forbid (
|
|
63
|
-
principal,
|
|
64
|
-
action == Guardrails::Action::"process_prompt",
|
|
65
|
-
resource
|
|
66
|
-
) when {
|
|
67
|
-
context.jailbreak_score > 65
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
@id("chat-topic-restriction")
|
|
71
|
-
@name("Block restricted topics in chat")
|
|
72
|
-
@description("Prevents chat assistants from discussing dangerous or regulated topics")
|
|
73
|
-
@severity("high")
|
|
74
|
-
@tags("profile,chat-assistant,semantic,compliance")
|
|
75
|
-
forbid (
|
|
76
|
-
principal,
|
|
77
|
-
action == Guardrails::Action::"process_prompt",
|
|
78
|
-
resource
|
|
79
|
-
) when {
|
|
80
|
-
context.topic_confidence > 70 &&
|
|
81
|
-
(context.content_topics.contains("weapons_manufacturing") ||
|
|
82
|
-
context.content_topics.contains("illegal_activity") ||
|
|
83
|
-
context.content_topics.contains("controlled_substances") ||
|
|
84
|
-
context.content_topics.contains("financial_fraud"))
|
|
85
|
-
};
|