@highflame/policy 2.1.4 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/_schemas/guardrails/context.json +432 -6
- package/_schemas/guardrails/schema.cedarschema +39 -3
- package/_schemas/guardrails/templates/defaults/injection.cedar +6 -6
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +1 -1
- package/_schemas/overwatch/context.json +232 -4
- package/_schemas/overwatch/schema.cedarschema +42 -4
- package/dist/guardrails-context.gen.d.ts +19 -2
- package/dist/guardrails-context.gen.js +19 -2
- package/dist/guardrails-defaults.gen.js +9 -9
- package/dist/overwatch-context.gen.d.ts +10 -0
- package/dist/overwatch-context.gen.js +10 -0
- package/dist/service-schemas.gen.d.ts +2 -2
- package/dist/service-schemas.gen.js +207 -24
- package/package.json +1 -1
- package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +0 -85
- package/_schemas/guardrails/templates/profiles/code_agent.cedar +0 -125
- package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +0 -111
|
@@ -18,7 +18,7 @@ forbid (
|
|
|
18
18
|
action,
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
|
-
context has
|
|
21
|
+
context has injection_confidence && context.injection_confidence > 70
|
|
22
22
|
};
|
|
23
23
|
|
|
24
24
|
@id("chat-jailbreak-lower-threshold")
|
|
@@ -31,5 +31,5 @@ forbid (
|
|
|
31
31
|
action,
|
|
32
32
|
resource
|
|
33
33
|
) when {
|
|
34
|
-
context has
|
|
34
|
+
context has jailbreak_confidence && context.jailbreak_confidence > 65
|
|
35
35
|
};
|
|
@@ -179,13 +179,37 @@
|
|
|
179
179
|
"key": "injection_confidence",
|
|
180
180
|
"type": "number",
|
|
181
181
|
"required": true,
|
|
182
|
-
"description": "
|
|
182
|
+
"description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
|
|
183
183
|
},
|
|
184
184
|
{
|
|
185
185
|
"key": "jailbreak_confidence",
|
|
186
186
|
"type": "number",
|
|
187
187
|
"required": true,
|
|
188
|
-
"description": "
|
|
188
|
+
"description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control"
|
|
189
|
+
},
|
|
190
|
+
{
|
|
191
|
+
"key": "injection_pulse_score",
|
|
192
|
+
"type": "number",
|
|
193
|
+
"required": false,
|
|
194
|
+
"description": "Highflame single-turn classifier score for prompt injection (0-100). Raw score from Pulse detector before combination with deep-context. Use for per-detector policy control"
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"key": "injection_deep_context_score",
|
|
198
|
+
"type": "number",
|
|
199
|
+
"required": false,
|
|
200
|
+
"description": "DeepContext multi-turn analyzer score for prompt injection (0-100). Tracks injection patterns across conversation history. Generally higher confidence than single-turn"
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
"key": "jailbreak_pulse_score",
|
|
204
|
+
"type": "number",
|
|
205
|
+
"required": false,
|
|
206
|
+
"description": "Highflame single-turn classifier score for jailbreak attempts (0-100). Raw score from Pulse detector before combination with deep-context"
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
"key": "jailbreak_deep_context_score",
|
|
210
|
+
"type": "number",
|
|
211
|
+
"required": false,
|
|
212
|
+
"description": "DeepContext multi-turn analyzer score for jailbreak attempts (0-100). Detects jailbreak escalation patterns across conversation turns"
|
|
189
213
|
},
|
|
190
214
|
{
|
|
191
215
|
"key": "indirect_injection_score",
|
|
@@ -234,6 +258,42 @@
|
|
|
234
258
|
"type": "number",
|
|
235
259
|
"required": false,
|
|
236
260
|
"description": "Number of turns in the session where threats were detected"
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
"key": "session_max_injection_score",
|
|
264
|
+
"type": "number",
|
|
265
|
+
"required": false,
|
|
266
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
"key": "session_max_jailbreak_score",
|
|
270
|
+
"type": "number",
|
|
271
|
+
"required": false,
|
|
272
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
"key": "session_max_command_injection_score",
|
|
276
|
+
"type": "number",
|
|
277
|
+
"required": false,
|
|
278
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
"key": "session_max_pii_score",
|
|
282
|
+
"type": "number",
|
|
283
|
+
"required": false,
|
|
284
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
285
|
+
},
|
|
286
|
+
{
|
|
287
|
+
"key": "session_max_secret_score",
|
|
288
|
+
"type": "number",
|
|
289
|
+
"required": false,
|
|
290
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
"key": "session_cumulative_risk_score",
|
|
294
|
+
"type": "number",
|
|
295
|
+
"required": false,
|
|
296
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
237
297
|
}
|
|
238
298
|
]
|
|
239
299
|
},
|
|
@@ -431,13 +491,37 @@
|
|
|
431
491
|
"key": "injection_confidence",
|
|
432
492
|
"type": "number",
|
|
433
493
|
"required": false,
|
|
434
|
-
"description": "
|
|
494
|
+
"description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
|
|
435
495
|
},
|
|
436
496
|
{
|
|
437
497
|
"key": "jailbreak_confidence",
|
|
438
498
|
"type": "number",
|
|
439
499
|
"required": false,
|
|
440
|
-
"description": "
|
|
500
|
+
"description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control"
|
|
501
|
+
},
|
|
502
|
+
{
|
|
503
|
+
"key": "injection_pulse_score",
|
|
504
|
+
"type": "number",
|
|
505
|
+
"required": false,
|
|
506
|
+
"description": "Highflame single-turn classifier score for prompt injection in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context"
|
|
507
|
+
},
|
|
508
|
+
{
|
|
509
|
+
"key": "injection_deep_context_score",
|
|
510
|
+
"type": "number",
|
|
511
|
+
"required": false,
|
|
512
|
+
"description": "DeepContext multi-turn analyzer score for prompt injection in tool arguments (0-100). Tracks injection patterns across tool call history"
|
|
513
|
+
},
|
|
514
|
+
{
|
|
515
|
+
"key": "jailbreak_pulse_score",
|
|
516
|
+
"type": "number",
|
|
517
|
+
"required": false,
|
|
518
|
+
"description": "Highflame single-turn classifier score for jailbreak in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context"
|
|
519
|
+
},
|
|
520
|
+
{
|
|
521
|
+
"key": "jailbreak_deep_context_score",
|
|
522
|
+
"type": "number",
|
|
523
|
+
"required": false,
|
|
524
|
+
"description": "DeepContext multi-turn analyzer score for jailbreak in tool arguments (0-100). Detects jailbreak escalation patterns across tool call turns"
|
|
441
525
|
},
|
|
442
526
|
{
|
|
443
527
|
"key": "tool_poisoning_score",
|
|
@@ -576,6 +660,42 @@
|
|
|
576
660
|
"type": "number",
|
|
577
661
|
"required": false,
|
|
578
662
|
"description": "Number of turns in the session where threats were detected"
|
|
663
|
+
},
|
|
664
|
+
{
|
|
665
|
+
"key": "session_max_injection_score",
|
|
666
|
+
"type": "number",
|
|
667
|
+
"required": false,
|
|
668
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
669
|
+
},
|
|
670
|
+
{
|
|
671
|
+
"key": "session_max_jailbreak_score",
|
|
672
|
+
"type": "number",
|
|
673
|
+
"required": false,
|
|
674
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
675
|
+
},
|
|
676
|
+
{
|
|
677
|
+
"key": "session_max_command_injection_score",
|
|
678
|
+
"type": "number",
|
|
679
|
+
"required": false,
|
|
680
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
681
|
+
},
|
|
682
|
+
{
|
|
683
|
+
"key": "session_max_pii_score",
|
|
684
|
+
"type": "number",
|
|
685
|
+
"required": false,
|
|
686
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
687
|
+
},
|
|
688
|
+
{
|
|
689
|
+
"key": "session_max_secret_score",
|
|
690
|
+
"type": "number",
|
|
691
|
+
"required": false,
|
|
692
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
693
|
+
},
|
|
694
|
+
{
|
|
695
|
+
"key": "session_cumulative_risk_score",
|
|
696
|
+
"type": "number",
|
|
697
|
+
"required": false,
|
|
698
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
579
699
|
}
|
|
580
700
|
]
|
|
581
701
|
},
|
|
@@ -732,6 +852,42 @@
|
|
|
732
852
|
"type": "number",
|
|
733
853
|
"required": false,
|
|
734
854
|
"description": "Number of turns in the session where threats were detected"
|
|
855
|
+
},
|
|
856
|
+
{
|
|
857
|
+
"key": "session_max_injection_score",
|
|
858
|
+
"type": "number",
|
|
859
|
+
"required": false,
|
|
860
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
861
|
+
},
|
|
862
|
+
{
|
|
863
|
+
"key": "session_max_jailbreak_score",
|
|
864
|
+
"type": "number",
|
|
865
|
+
"required": false,
|
|
866
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
867
|
+
},
|
|
868
|
+
{
|
|
869
|
+
"key": "session_max_command_injection_score",
|
|
870
|
+
"type": "number",
|
|
871
|
+
"required": false,
|
|
872
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
873
|
+
},
|
|
874
|
+
{
|
|
875
|
+
"key": "session_max_pii_score",
|
|
876
|
+
"type": "number",
|
|
877
|
+
"required": false,
|
|
878
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
879
|
+
},
|
|
880
|
+
{
|
|
881
|
+
"key": "session_max_secret_score",
|
|
882
|
+
"type": "number",
|
|
883
|
+
"required": false,
|
|
884
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
885
|
+
},
|
|
886
|
+
{
|
|
887
|
+
"key": "session_cumulative_risk_score",
|
|
888
|
+
"type": "number",
|
|
889
|
+
"required": false,
|
|
890
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
735
891
|
}
|
|
736
892
|
]
|
|
737
893
|
},
|
|
@@ -888,6 +1044,42 @@
|
|
|
888
1044
|
"type": "number",
|
|
889
1045
|
"required": false,
|
|
890
1046
|
"description": "Number of turns in the session where threats were detected"
|
|
1047
|
+
},
|
|
1048
|
+
{
|
|
1049
|
+
"key": "session_max_injection_score",
|
|
1050
|
+
"type": "number",
|
|
1051
|
+
"required": false,
|
|
1052
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
1053
|
+
},
|
|
1054
|
+
{
|
|
1055
|
+
"key": "session_max_jailbreak_score",
|
|
1056
|
+
"type": "number",
|
|
1057
|
+
"required": false,
|
|
1058
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
1059
|
+
},
|
|
1060
|
+
{
|
|
1061
|
+
"key": "session_max_command_injection_score",
|
|
1062
|
+
"type": "number",
|
|
1063
|
+
"required": false,
|
|
1064
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
1065
|
+
},
|
|
1066
|
+
{
|
|
1067
|
+
"key": "session_max_pii_score",
|
|
1068
|
+
"type": "number",
|
|
1069
|
+
"required": false,
|
|
1070
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
1071
|
+
},
|
|
1072
|
+
{
|
|
1073
|
+
"key": "session_max_secret_score",
|
|
1074
|
+
"type": "number",
|
|
1075
|
+
"required": false,
|
|
1076
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
1077
|
+
},
|
|
1078
|
+
{
|
|
1079
|
+
"key": "session_cumulative_risk_score",
|
|
1080
|
+
"type": "number",
|
|
1081
|
+
"required": false,
|
|
1082
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
891
1083
|
}
|
|
892
1084
|
]
|
|
893
1085
|
},
|
|
@@ -1050,6 +1242,42 @@
|
|
|
1050
1242
|
"type": "number",
|
|
1051
1243
|
"required": false,
|
|
1052
1244
|
"description": "Number of turns in the session where threats were detected"
|
|
1245
|
+
},
|
|
1246
|
+
{
|
|
1247
|
+
"key": "session_max_injection_score",
|
|
1248
|
+
"type": "number",
|
|
1249
|
+
"required": false,
|
|
1250
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
1251
|
+
},
|
|
1252
|
+
{
|
|
1253
|
+
"key": "session_max_jailbreak_score",
|
|
1254
|
+
"type": "number",
|
|
1255
|
+
"required": false,
|
|
1256
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
1257
|
+
},
|
|
1258
|
+
{
|
|
1259
|
+
"key": "session_max_command_injection_score",
|
|
1260
|
+
"type": "number",
|
|
1261
|
+
"required": false,
|
|
1262
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
1263
|
+
},
|
|
1264
|
+
{
|
|
1265
|
+
"key": "session_max_pii_score",
|
|
1266
|
+
"type": "number",
|
|
1267
|
+
"required": false,
|
|
1268
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
1269
|
+
},
|
|
1270
|
+
{
|
|
1271
|
+
"key": "session_max_secret_score",
|
|
1272
|
+
"type": "number",
|
|
1273
|
+
"required": false,
|
|
1274
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
1275
|
+
},
|
|
1276
|
+
{
|
|
1277
|
+
"key": "session_cumulative_risk_score",
|
|
1278
|
+
"type": "number",
|
|
1279
|
+
"required": false,
|
|
1280
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
1053
1281
|
}
|
|
1054
1282
|
]
|
|
1055
1283
|
}
|
|
@@ -120,8 +120,12 @@ action process_prompt appliesTo {
|
|
|
120
120
|
|
|
121
121
|
// --- ML Detector Confidence Scores (0-100) ---
|
|
122
122
|
pii_confidence: Long, // PII detection classifier confidence
|
|
123
|
-
injection_confidence: Long, //
|
|
124
|
-
jailbreak_confidence: Long, //
|
|
123
|
+
injection_confidence: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
124
|
+
jailbreak_confidence: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
125
|
+
injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
126
|
+
injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
127
|
+
jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
128
|
+
jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
125
129
|
|
|
126
130
|
// --- Agent Security (0-100) ---
|
|
127
131
|
indirect_injection_score: Long, // Indirect prompt injection risk (OWASP LLM01, ASI01)
|
|
@@ -134,6 +138,12 @@ action process_prompt appliesTo {
|
|
|
134
138
|
session_injection_detected?: Bool,
|
|
135
139
|
session_command_injection?: Bool,
|
|
136
140
|
session_threat_turns?: Long,
|
|
141
|
+
session_max_injection_score?: Long,
|
|
142
|
+
session_max_jailbreak_score?: Long,
|
|
143
|
+
session_max_command_injection_score?: Long,
|
|
144
|
+
session_max_pii_score?: Long,
|
|
145
|
+
session_max_secret_score?: Long,
|
|
146
|
+
session_cumulative_risk_score?: Long,
|
|
137
147
|
|
|
138
148
|
// --- Legacy ---
|
|
139
149
|
prompt_text?: String, // Same as content (backward compatibility)
|
|
@@ -196,8 +206,12 @@ action call_tool appliesTo {
|
|
|
196
206
|
|
|
197
207
|
// --- ML Detector Confidence Scores (0-100) ---
|
|
198
208
|
pii_confidence?: Long,
|
|
199
|
-
injection_confidence?: Long,
|
|
200
|
-
jailbreak_confidence?: Long,
|
|
209
|
+
injection_confidence?: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
210
|
+
jailbreak_confidence?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
211
|
+
injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
212
|
+
injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
213
|
+
jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier
|
|
214
|
+
jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn
|
|
201
215
|
|
|
202
216
|
// --- Agent Security (0-100) --- (OWASP ASI01, ASI02, ASI04; MITRE AML.T0051)
|
|
203
217
|
tool_poisoning_score?: Long, // Hidden instructions in tool description/args
|
|
@@ -231,6 +245,12 @@ action call_tool appliesTo {
|
|
|
231
245
|
session_injection_detected?: Bool,
|
|
232
246
|
session_command_injection?: Bool,
|
|
233
247
|
session_threat_turns?: Long,
|
|
248
|
+
session_max_injection_score?: Long,
|
|
249
|
+
session_max_jailbreak_score?: Long,
|
|
250
|
+
session_max_command_injection_score?: Long,
|
|
251
|
+
session_max_pii_score?: Long,
|
|
252
|
+
session_max_secret_score?: Long,
|
|
253
|
+
session_cumulative_risk_score?: Long,
|
|
234
254
|
|
|
235
255
|
// --- Legacy ---
|
|
236
256
|
response_content?: String,
|
|
@@ -275,6 +295,12 @@ action connect_server appliesTo {
|
|
|
275
295
|
session_injection_detected?: Bool,
|
|
276
296
|
session_command_injection?: Bool,
|
|
277
297
|
session_threat_turns?: Long,
|
|
298
|
+
session_max_injection_score?: Long,
|
|
299
|
+
session_max_jailbreak_score?: Long,
|
|
300
|
+
session_max_command_injection_score?: Long,
|
|
301
|
+
session_max_pii_score?: Long,
|
|
302
|
+
session_max_secret_score?: Long,
|
|
303
|
+
session_cumulative_risk_score?: Long,
|
|
278
304
|
},
|
|
279
305
|
};
|
|
280
306
|
|
|
@@ -317,6 +343,12 @@ action read_file appliesTo {
|
|
|
317
343
|
session_injection_detected?: Bool,
|
|
318
344
|
session_command_injection?: Bool,
|
|
319
345
|
session_threat_turns?: Long,
|
|
346
|
+
session_max_injection_score?: Long,
|
|
347
|
+
session_max_jailbreak_score?: Long,
|
|
348
|
+
session_max_command_injection_score?: Long,
|
|
349
|
+
session_max_pii_score?: Long,
|
|
350
|
+
session_max_secret_score?: Long,
|
|
351
|
+
session_cumulative_risk_score?: Long,
|
|
320
352
|
},
|
|
321
353
|
};
|
|
322
354
|
|
|
@@ -359,6 +391,12 @@ action write_file appliesTo {
|
|
|
359
391
|
session_injection_detected?: Bool,
|
|
360
392
|
session_command_injection?: Bool,
|
|
361
393
|
session_threat_turns?: Long,
|
|
394
|
+
session_max_injection_score?: Long,
|
|
395
|
+
session_max_jailbreak_score?: Long,
|
|
396
|
+
session_max_command_injection_score?: Long,
|
|
397
|
+
session_max_pii_score?: Long,
|
|
398
|
+
session_max_secret_score?: Long,
|
|
399
|
+
session_cumulative_risk_score?: Long,
|
|
362
400
|
},
|
|
363
401
|
};
|
|
364
402
|
|
|
@@ -36,12 +36,16 @@ export declare const GuardrailsContextKey: {
|
|
|
36
36
|
readonly FactualityScore: "factuality_score";
|
|
37
37
|
readonly HallucinationScore: "hallucination_score";
|
|
38
38
|
readonly HateSpeechScore: "hate_speech_score";
|
|
39
|
-
readonly
|
|
39
|
+
readonly InjectionConfidence: "injection_confidence";
|
|
40
|
+
readonly InjectionDeepContextScore: "injection_deep_context_score";
|
|
41
|
+
readonly InjectionPulseScore: "injection_pulse_score";
|
|
40
42
|
readonly InjectionType: "injection_type";
|
|
41
43
|
readonly InvisibleCharsScore: "invisible_chars_score";
|
|
42
44
|
readonly IsEnglish: "is_english";
|
|
43
45
|
readonly IsLatinScript: "is_latin_script";
|
|
44
|
-
readonly
|
|
46
|
+
readonly JailbreakConfidence: "jailbreak_confidence";
|
|
47
|
+
readonly JailbreakDeepContextScore: "jailbreak_deep_context_score";
|
|
48
|
+
readonly JailbreakPulseScore: "jailbreak_pulse_score";
|
|
45
49
|
readonly KeywordCategories: "keyword_categories";
|
|
46
50
|
readonly KeywordCount: "keyword_count";
|
|
47
51
|
readonly KeywordMatched: "keyword_matched";
|
|
@@ -74,6 +78,19 @@ export declare const GuardrailsContextKey: {
|
|
|
74
78
|
readonly SecretTypes: "secret_types";
|
|
75
79
|
readonly SentimentScore: "sentiment_score";
|
|
76
80
|
readonly SequenceRisk: "sequence_risk";
|
|
81
|
+
readonly SessionCommandInjection: "session_command_injection";
|
|
82
|
+
readonly SessionCumulativeRiskScore: "session_cumulative_risk_score";
|
|
83
|
+
readonly SessionInjectionDetected: "session_injection_detected";
|
|
84
|
+
readonly SessionMaxCommandInjectionScore: "session_max_command_injection_score";
|
|
85
|
+
readonly SessionMaxInjectionScore: "session_max_injection_score";
|
|
86
|
+
readonly SessionMaxJailbreakScore: "session_max_jailbreak_score";
|
|
87
|
+
readonly SessionMaxPiiScore: "session_max_pii_score";
|
|
88
|
+
readonly SessionMaxSecretScore: "session_max_secret_score";
|
|
89
|
+
readonly SessionPiiDetected: "session_pii_detected";
|
|
90
|
+
readonly SessionPiiTypes: "session_pii_types";
|
|
91
|
+
readonly SessionSecretTypes: "session_secret_types";
|
|
92
|
+
readonly SessionSecretsDetected: "session_secrets_detected";
|
|
93
|
+
readonly SessionThreatTurns: "session_threat_turns";
|
|
77
94
|
readonly SexualScore: "sexual_score";
|
|
78
95
|
readonly SqlInjectionDetected: "sql_injection_detected";
|
|
79
96
|
readonly SqlInjectionScore: "sql_injection_score";
|
|
@@ -38,12 +38,16 @@ export const GuardrailsContextKey = {
|
|
|
38
38
|
FactualityScore: 'factuality_score',
|
|
39
39
|
HallucinationScore: 'hallucination_score',
|
|
40
40
|
HateSpeechScore: 'hate_speech_score',
|
|
41
|
-
|
|
41
|
+
InjectionConfidence: 'injection_confidence',
|
|
42
|
+
InjectionDeepContextScore: 'injection_deep_context_score',
|
|
43
|
+
InjectionPulseScore: 'injection_pulse_score',
|
|
42
44
|
InjectionType: 'injection_type',
|
|
43
45
|
InvisibleCharsScore: 'invisible_chars_score',
|
|
44
46
|
IsEnglish: 'is_english',
|
|
45
47
|
IsLatinScript: 'is_latin_script',
|
|
46
|
-
|
|
48
|
+
JailbreakConfidence: 'jailbreak_confidence',
|
|
49
|
+
JailbreakDeepContextScore: 'jailbreak_deep_context_score',
|
|
50
|
+
JailbreakPulseScore: 'jailbreak_pulse_score',
|
|
47
51
|
KeywordCategories: 'keyword_categories',
|
|
48
52
|
KeywordCount: 'keyword_count',
|
|
49
53
|
KeywordMatched: 'keyword_matched',
|
|
@@ -76,6 +80,19 @@ export const GuardrailsContextKey = {
|
|
|
76
80
|
SecretTypes: 'secret_types',
|
|
77
81
|
SentimentScore: 'sentiment_score',
|
|
78
82
|
SequenceRisk: 'sequence_risk',
|
|
83
|
+
SessionCommandInjection: 'session_command_injection',
|
|
84
|
+
SessionCumulativeRiskScore: 'session_cumulative_risk_score',
|
|
85
|
+
SessionInjectionDetected: 'session_injection_detected',
|
|
86
|
+
SessionMaxCommandInjectionScore: 'session_max_command_injection_score',
|
|
87
|
+
SessionMaxInjectionScore: 'session_max_injection_score',
|
|
88
|
+
SessionMaxJailbreakScore: 'session_max_jailbreak_score',
|
|
89
|
+
SessionMaxPiiScore: 'session_max_pii_score',
|
|
90
|
+
SessionMaxSecretScore: 'session_max_secret_score',
|
|
91
|
+
SessionPiiDetected: 'session_pii_detected',
|
|
92
|
+
SessionPiiTypes: 'session_pii_types',
|
|
93
|
+
SessionSecretTypes: 'session_secret_types',
|
|
94
|
+
SessionSecretsDetected: 'session_secrets_detected',
|
|
95
|
+
SessionThreatTurns: 'session_threat_turns',
|
|
79
96
|
SexualScore: 'sexual_score',
|
|
80
97
|
SqlInjectionDetected: 'sql_injection_detected',
|
|
81
98
|
SqlInjectionScore: 'sql_injection_score',
|
|
@@ -80,8 +80,8 @@ const GUARDRAILS_INJECTION_DEFAULT_CEDAR = `// =================================
|
|
|
80
80
|
// Uses ML-based confidence scores from normalized context.
|
|
81
81
|
//
|
|
82
82
|
// Context keys used (normalized by projection layer):
|
|
83
|
-
// -
|
|
84
|
-
// -
|
|
83
|
+
// - injection_confidence: Long (0-100) - Overall injection confidence
|
|
84
|
+
// - jailbreak_confidence: Long (0-100) - Jailbreak attempt confidence
|
|
85
85
|
// - injection_type: String - Type of injection detected
|
|
86
86
|
// - contains_invisible_chars: Bool - Invisible Unicode characters detected
|
|
87
87
|
// - invisible_chars_score: Long (0-100) - Invisible character density
|
|
@@ -100,7 +100,7 @@ forbid (
|
|
|
100
100
|
action,
|
|
101
101
|
resource
|
|
102
102
|
) when {
|
|
103
|
-
context has
|
|
103
|
+
context has injection_confidence && context.injection_confidence > 85
|
|
104
104
|
};
|
|
105
105
|
|
|
106
106
|
@id("jailbreak-block-high-confidence")
|
|
@@ -113,7 +113,7 @@ forbid (
|
|
|
113
113
|
action,
|
|
114
114
|
resource
|
|
115
115
|
) when {
|
|
116
|
-
context has
|
|
116
|
+
context has jailbreak_confidence && context.jailbreak_confidence > 80
|
|
117
117
|
};
|
|
118
118
|
|
|
119
119
|
@id("injection-combined-threshold")
|
|
@@ -126,8 +126,8 @@ forbid (
|
|
|
126
126
|
action,
|
|
127
127
|
resource
|
|
128
128
|
) when {
|
|
129
|
-
context has
|
|
130
|
-
context.
|
|
129
|
+
context has injection_confidence && context has jailbreak_confidence &&
|
|
130
|
+
context.injection_confidence > 60 && context.jailbreak_confidence > 60
|
|
131
131
|
};
|
|
132
132
|
|
|
133
133
|
@id("injection-invisible-chars")
|
|
@@ -634,7 +634,7 @@ forbid (
|
|
|
634
634
|
action,
|
|
635
635
|
resource
|
|
636
636
|
) when {
|
|
637
|
-
context has
|
|
637
|
+
context has injection_confidence && context.injection_confidence > 70
|
|
638
638
|
};
|
|
639
639
|
|
|
640
640
|
@id("chat-jailbreak-lower-threshold")
|
|
@@ -647,7 +647,7 @@ forbid (
|
|
|
647
647
|
action,
|
|
648
648
|
resource
|
|
649
649
|
) when {
|
|
650
|
-
context has
|
|
650
|
+
context has jailbreak_confidence && context.jailbreak_confidence > 65
|
|
651
651
|
};
|
|
652
652
|
`;
|
|
653
653
|
const GUARDRAILS_CHAT_ASSISTANT_PRIVACY_CEDAR = `// =============================================================================
|
|
@@ -938,7 +938,7 @@ forbid (
|
|
|
938
938
|
action,
|
|
939
939
|
resource
|
|
940
940
|
) when {
|
|
941
|
-
context has
|
|
941
|
+
context has injection_confidence && context.injection_confidence > 65
|
|
942
942
|
};
|
|
943
943
|
`;
|
|
944
944
|
const GUARDRAILS_DATA_PIPELINE_AGENTIC_SECURITY_CEDAR = `// =============================================================================
|
|
@@ -16,8 +16,12 @@ export declare const OverwatchContextKey: {
|
|
|
16
16
|
readonly HighestSeverity: "highest_severity";
|
|
17
17
|
readonly IndirectInjectionScore: "indirect_injection_score";
|
|
18
18
|
readonly InjectionConfidence: "injection_confidence";
|
|
19
|
+
readonly InjectionDeepContextScore: "injection_deep_context_score";
|
|
20
|
+
readonly InjectionPulseScore: "injection_pulse_score";
|
|
19
21
|
readonly InvisibleCharsScore: "invisible_chars_score";
|
|
20
22
|
readonly JailbreakConfidence: "jailbreak_confidence";
|
|
23
|
+
readonly JailbreakDeepContextScore: "jailbreak_deep_context_score";
|
|
24
|
+
readonly JailbreakPulseScore: "jailbreak_pulse_score";
|
|
21
25
|
readonly LoopCount: "loop_count";
|
|
22
26
|
readonly LoopDetected: "loop_detected";
|
|
23
27
|
readonly LoopTool: "loop_tool";
|
|
@@ -42,7 +46,13 @@ export declare const OverwatchContextKey: {
|
|
|
42
46
|
readonly SecretTypes: "secret_types";
|
|
43
47
|
readonly SequenceRisk: "sequence_risk";
|
|
44
48
|
readonly SessionCommandInjection: "session_command_injection";
|
|
49
|
+
readonly SessionCumulativeRiskScore: "session_cumulative_risk_score";
|
|
45
50
|
readonly SessionInjectionDetected: "session_injection_detected";
|
|
51
|
+
readonly SessionMaxCommandInjectionScore: "session_max_command_injection_score";
|
|
52
|
+
readonly SessionMaxInjectionScore: "session_max_injection_score";
|
|
53
|
+
readonly SessionMaxJailbreakScore: "session_max_jailbreak_score";
|
|
54
|
+
readonly SessionMaxPiiScore: "session_max_pii_score";
|
|
55
|
+
readonly SessionMaxSecretScore: "session_max_secret_score";
|
|
46
56
|
readonly SessionPiiDetected: "session_pii_detected";
|
|
47
57
|
readonly SessionPiiTypes: "session_pii_types";
|
|
48
58
|
readonly SessionSecretTypes: "session_secret_types";
|
|
@@ -18,8 +18,12 @@ export const OverwatchContextKey = {
|
|
|
18
18
|
HighestSeverity: 'highest_severity',
|
|
19
19
|
IndirectInjectionScore: 'indirect_injection_score',
|
|
20
20
|
InjectionConfidence: 'injection_confidence',
|
|
21
|
+
InjectionDeepContextScore: 'injection_deep_context_score',
|
|
22
|
+
InjectionPulseScore: 'injection_pulse_score',
|
|
21
23
|
InvisibleCharsScore: 'invisible_chars_score',
|
|
22
24
|
JailbreakConfidence: 'jailbreak_confidence',
|
|
25
|
+
JailbreakDeepContextScore: 'jailbreak_deep_context_score',
|
|
26
|
+
JailbreakPulseScore: 'jailbreak_pulse_score',
|
|
23
27
|
LoopCount: 'loop_count',
|
|
24
28
|
LoopDetected: 'loop_detected',
|
|
25
29
|
LoopTool: 'loop_tool',
|
|
@@ -44,7 +48,13 @@ export const OverwatchContextKey = {
|
|
|
44
48
|
SecretTypes: 'secret_types',
|
|
45
49
|
SequenceRisk: 'sequence_risk',
|
|
46
50
|
SessionCommandInjection: 'session_command_injection',
|
|
51
|
+
SessionCumulativeRiskScore: 'session_cumulative_risk_score',
|
|
47
52
|
SessionInjectionDetected: 'session_injection_detected',
|
|
53
|
+
SessionMaxCommandInjectionScore: 'session_max_command_injection_score',
|
|
54
|
+
SessionMaxInjectionScore: 'session_max_injection_score',
|
|
55
|
+
SessionMaxJailbreakScore: 'session_max_jailbreak_score',
|
|
56
|
+
SessionMaxPiiScore: 'session_max_pii_score',
|
|
57
|
+
SessionMaxSecretScore: 'session_max_secret_score',
|
|
48
58
|
SessionPiiDetected: 'session_pii_detected',
|
|
49
59
|
SessionPiiTypes: 'session_pii_types',
|
|
50
60
|
SessionSecretTypes: 'session_secret_types',
|