@highflame/policy 2.1.4 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/_schemas/guardrails/context.json +432 -6
- package/_schemas/guardrails/schema.cedarschema +39 -3
- package/_schemas/guardrails/templates/defaults/injection.cedar +6 -6
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +1 -1
- package/_schemas/overwatch/context.json +232 -4
- package/_schemas/overwatch/schema.cedarschema +42 -4
- package/dist/guardrails-context.gen.d.ts +19 -2
- package/dist/guardrails-context.gen.js +19 -2
- package/dist/guardrails-defaults.gen.js +9 -9
- package/dist/overwatch-context.gen.d.ts +10 -0
- package/dist/overwatch-context.gen.js +10 -0
- package/dist/service-schemas.gen.d.ts +2 -2
- package/dist/service-schemas.gen.js +207 -24
- package/package.json +1 -1
- package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +0 -85
- package/_schemas/guardrails/templates/profiles/code_agent.cedar +0 -125
- package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +0 -111
package/README.md
CHANGED
|
@@ -168,6 +168,47 @@ result.unstructured.forEach(policy => {
|
|
|
168
168
|
});
|
|
169
169
|
```
|
|
170
170
|
|
|
171
|
+
## Condition Groups (Visual Builder Support)
|
|
172
|
+
|
|
173
|
+
Bidirectional conversion between recursive `ConditionExpression` ASTs and flat `ConditionGroup` arrays for visual condition builder UIs.
|
|
174
|
+
|
|
175
|
+
```typescript
|
|
176
|
+
import {
|
|
177
|
+
expressionToGroups,
|
|
178
|
+
groupsToExpression,
|
|
179
|
+
expressionToCedar,
|
|
180
|
+
extractContextFields,
|
|
181
|
+
} from '@highflame/policy/types';
|
|
182
|
+
|
|
183
|
+
// Parse Cedar → edit in UI → generate Cedar
|
|
184
|
+
const result = parseCedarToRules(cedarText);
|
|
185
|
+
const rule = result.rules[0];
|
|
186
|
+
|
|
187
|
+
if (rule.conditionExpression) {
|
|
188
|
+
// Convert AST to flat groups for visual builder
|
|
189
|
+
const groups = expressionToGroups(rule.conditionExpression);
|
|
190
|
+
|
|
191
|
+
// User edits groups in UI...
|
|
192
|
+
|
|
193
|
+
// Convert back to AST
|
|
194
|
+
const expr = groupsToExpression(groups);
|
|
195
|
+
|
|
196
|
+
// Render to Cedar text
|
|
197
|
+
const cedarCondition = expressionToCedar(expr);
|
|
198
|
+
}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Why Top-Level AND Between Groups?
|
|
202
|
+
|
|
203
|
+
Groups are always combined with **AND** at the top level. This reflects Cedar's authorization model:
|
|
204
|
+
|
|
205
|
+
- **Cedar provides OR between policies for free** — if ANY `forbid` matches, the request is denied
|
|
206
|
+
- **AND within a rule**: "block if injection > 70 AND jailbreak > 65" → conditions in one AND group
|
|
207
|
+
- **OR within a rule**: "block if violence > 70 OR hate > 70" → conditions in one OR group
|
|
208
|
+
- **OR between rules**: separate `forbid` rules — Cedar ORs them automatically
|
|
209
|
+
|
|
210
|
+
This means `(A && B) || (C && D)` is expressed as two separate rules, which is cleaner, more auditable, and idiomatic Cedar.
|
|
211
|
+
|
|
171
212
|
## Available Constants
|
|
172
213
|
|
|
173
214
|
- **17 Entity Types**: `EntityType.User`, `Scanner`, `Artifact`, `Tool`, etc.
|
|
@@ -38,16 +38,40 @@
|
|
|
38
38
|
"description": "Number of detectors that were executed for this request"
|
|
39
39
|
},
|
|
40
40
|
{
|
|
41
|
-
"key": "
|
|
41
|
+
"key": "injection_confidence",
|
|
42
42
|
"type": "number",
|
|
43
43
|
"required": false,
|
|
44
|
-
"description": "
|
|
44
|
+
"description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
|
|
45
45
|
},
|
|
46
46
|
{
|
|
47
|
-
"key": "
|
|
47
|
+
"key": "jailbreak_confidence",
|
|
48
48
|
"type": "number",
|
|
49
49
|
"required": false,
|
|
50
|
-
"description": "
|
|
50
|
+
"description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"key": "injection_pulse_score",
|
|
54
|
+
"type": "number",
|
|
55
|
+
"required": false,
|
|
56
|
+
"description": "Highflame single-turn classifier score for prompt injection (0-100). Raw score from Pulse detector before combination with deep-context. Use for per-detector policy control"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"key": "injection_deep_context_score",
|
|
60
|
+
"type": "number",
|
|
61
|
+
"required": false,
|
|
62
|
+
"description": "DeepContext multi-turn analyzer score for prompt injection (0-100). Tracks injection patterns across conversation history. Generally higher confidence than single-turn"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"key": "jailbreak_pulse_score",
|
|
66
|
+
"type": "number",
|
|
67
|
+
"required": false,
|
|
68
|
+
"description": "Highflame single-turn classifier score for jailbreak attempts (0-100). Raw score from Pulse detector before combination with deep-context"
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"key": "jailbreak_deep_context_score",
|
|
72
|
+
"type": "number",
|
|
73
|
+
"required": false,
|
|
74
|
+
"description": "DeepContext multi-turn analyzer score for jailbreak attempts (0-100). Detects jailbreak escalation patterns across conversation turns"
|
|
51
75
|
},
|
|
52
76
|
{
|
|
53
77
|
"key": "injection_type",
|
|
@@ -372,6 +396,84 @@
|
|
|
372
396
|
"type": "boolean",
|
|
373
397
|
"required": false,
|
|
374
398
|
"description": "Whether multi-turn injection patterns were detected (attack spread across multiple conversation turns)"
|
|
399
|
+
},
|
|
400
|
+
{
|
|
401
|
+
"key": "session_pii_detected",
|
|
402
|
+
"type": "boolean",
|
|
403
|
+
"required": false,
|
|
404
|
+
"description": "Whether PII was detected in any previous turn of the session"
|
|
405
|
+
},
|
|
406
|
+
{
|
|
407
|
+
"key": "session_pii_types",
|
|
408
|
+
"type": "array",
|
|
409
|
+
"required": false,
|
|
410
|
+
"description": "PII types detected across the session (accumulated)"
|
|
411
|
+
},
|
|
412
|
+
{
|
|
413
|
+
"key": "session_secrets_detected",
|
|
414
|
+
"type": "boolean",
|
|
415
|
+
"required": false,
|
|
416
|
+
"description": "Whether secrets were detected in any previous turn of the session"
|
|
417
|
+
},
|
|
418
|
+
{
|
|
419
|
+
"key": "session_secret_types",
|
|
420
|
+
"type": "array",
|
|
421
|
+
"required": false,
|
|
422
|
+
"description": "Secret types detected across the session (accumulated)"
|
|
423
|
+
},
|
|
424
|
+
{
|
|
425
|
+
"key": "session_injection_detected",
|
|
426
|
+
"type": "boolean",
|
|
427
|
+
"required": false,
|
|
428
|
+
"description": "Whether prompt injection was detected in any previous turn of the session"
|
|
429
|
+
},
|
|
430
|
+
{
|
|
431
|
+
"key": "session_command_injection",
|
|
432
|
+
"type": "boolean",
|
|
433
|
+
"required": false,
|
|
434
|
+
"description": "Whether command injection was detected in any previous turn of the session"
|
|
435
|
+
},
|
|
436
|
+
{
|
|
437
|
+
"key": "session_threat_turns",
|
|
438
|
+
"type": "number",
|
|
439
|
+
"required": false,
|
|
440
|
+
"description": "Number of turns in the session where threats were detected"
|
|
441
|
+
},
|
|
442
|
+
{
|
|
443
|
+
"key": "session_max_injection_score",
|
|
444
|
+
"type": "number",
|
|
445
|
+
"required": false,
|
|
446
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
447
|
+
},
|
|
448
|
+
{
|
|
449
|
+
"key": "session_max_jailbreak_score",
|
|
450
|
+
"type": "number",
|
|
451
|
+
"required": false,
|
|
452
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
453
|
+
},
|
|
454
|
+
{
|
|
455
|
+
"key": "session_max_command_injection_score",
|
|
456
|
+
"type": "number",
|
|
457
|
+
"required": false,
|
|
458
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
459
|
+
},
|
|
460
|
+
{
|
|
461
|
+
"key": "session_max_pii_score",
|
|
462
|
+
"type": "number",
|
|
463
|
+
"required": false,
|
|
464
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
465
|
+
},
|
|
466
|
+
{
|
|
467
|
+
"key": "session_max_secret_score",
|
|
468
|
+
"type": "number",
|
|
469
|
+
"required": false,
|
|
470
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
471
|
+
},
|
|
472
|
+
{
|
|
473
|
+
"key": "session_cumulative_risk_score",
|
|
474
|
+
"type": "number",
|
|
475
|
+
"required": false,
|
|
476
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
375
477
|
}
|
|
376
478
|
]
|
|
377
479
|
},
|
|
@@ -524,10 +626,22 @@
|
|
|
524
626
|
"description": "Array of PII types found in tool arguments"
|
|
525
627
|
},
|
|
526
628
|
{
|
|
527
|
-
"key": "
|
|
629
|
+
"key": "injection_confidence",
|
|
630
|
+
"type": "number",
|
|
631
|
+
"required": false,
|
|
632
|
+
"description": "Combined prompt injection confidence in tool arguments (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
|
|
633
|
+
},
|
|
634
|
+
{
|
|
635
|
+
"key": "injection_pulse_score",
|
|
636
|
+
"type": "number",
|
|
637
|
+
"required": false,
|
|
638
|
+
"description": "Highflame single-turn classifier score for prompt injection in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context"
|
|
639
|
+
},
|
|
640
|
+
{
|
|
641
|
+
"key": "injection_deep_context_score",
|
|
528
642
|
"type": "number",
|
|
529
643
|
"required": false,
|
|
530
|
-
"description": "
|
|
644
|
+
"description": "DeepContext multi-turn analyzer score for prompt injection in tool arguments (0-100). Tracks injection patterns across tool call history"
|
|
531
645
|
},
|
|
532
646
|
{
|
|
533
647
|
"key": "command_injection_detected",
|
|
@@ -690,6 +804,84 @@
|
|
|
690
804
|
"type": "boolean",
|
|
691
805
|
"required": false,
|
|
692
806
|
"description": "Whether multi-turn injection patterns were detected across tool calls in the session"
|
|
807
|
+
},
|
|
808
|
+
{
|
|
809
|
+
"key": "session_pii_detected",
|
|
810
|
+
"type": "boolean",
|
|
811
|
+
"required": false,
|
|
812
|
+
"description": "Whether PII was detected in any previous turn of the session"
|
|
813
|
+
},
|
|
814
|
+
{
|
|
815
|
+
"key": "session_pii_types",
|
|
816
|
+
"type": "array",
|
|
817
|
+
"required": false,
|
|
818
|
+
"description": "PII types detected across the session (accumulated)"
|
|
819
|
+
},
|
|
820
|
+
{
|
|
821
|
+
"key": "session_secrets_detected",
|
|
822
|
+
"type": "boolean",
|
|
823
|
+
"required": false,
|
|
824
|
+
"description": "Whether secrets were detected in any previous turn of the session"
|
|
825
|
+
},
|
|
826
|
+
{
|
|
827
|
+
"key": "session_secret_types",
|
|
828
|
+
"type": "array",
|
|
829
|
+
"required": false,
|
|
830
|
+
"description": "Secret types detected across the session (accumulated)"
|
|
831
|
+
},
|
|
832
|
+
{
|
|
833
|
+
"key": "session_injection_detected",
|
|
834
|
+
"type": "boolean",
|
|
835
|
+
"required": false,
|
|
836
|
+
"description": "Whether prompt injection was detected in any previous turn of the session"
|
|
837
|
+
},
|
|
838
|
+
{
|
|
839
|
+
"key": "session_command_injection",
|
|
840
|
+
"type": "boolean",
|
|
841
|
+
"required": false,
|
|
842
|
+
"description": "Whether command injection was detected in any previous turn of the session"
|
|
843
|
+
},
|
|
844
|
+
{
|
|
845
|
+
"key": "session_threat_turns",
|
|
846
|
+
"type": "number",
|
|
847
|
+
"required": false,
|
|
848
|
+
"description": "Number of turns in the session where threats were detected"
|
|
849
|
+
},
|
|
850
|
+
{
|
|
851
|
+
"key": "session_max_injection_score",
|
|
852
|
+
"type": "number",
|
|
853
|
+
"required": false,
|
|
854
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
855
|
+
},
|
|
856
|
+
{
|
|
857
|
+
"key": "session_max_jailbreak_score",
|
|
858
|
+
"type": "number",
|
|
859
|
+
"required": false,
|
|
860
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
861
|
+
},
|
|
862
|
+
{
|
|
863
|
+
"key": "session_max_command_injection_score",
|
|
864
|
+
"type": "number",
|
|
865
|
+
"required": false,
|
|
866
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
867
|
+
},
|
|
868
|
+
{
|
|
869
|
+
"key": "session_max_pii_score",
|
|
870
|
+
"type": "number",
|
|
871
|
+
"required": false,
|
|
872
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
873
|
+
},
|
|
874
|
+
{
|
|
875
|
+
"key": "session_max_secret_score",
|
|
876
|
+
"type": "number",
|
|
877
|
+
"required": false,
|
|
878
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
879
|
+
},
|
|
880
|
+
{
|
|
881
|
+
"key": "session_cumulative_risk_score",
|
|
882
|
+
"type": "number",
|
|
883
|
+
"required": false,
|
|
884
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
693
885
|
}
|
|
694
886
|
]
|
|
695
887
|
},
|
|
@@ -750,6 +942,84 @@
|
|
|
750
942
|
"type": "string",
|
|
751
943
|
"required": false,
|
|
752
944
|
"description": "Type of path traversal detected in the file read path"
|
|
945
|
+
},
|
|
946
|
+
{
|
|
947
|
+
"key": "session_pii_detected",
|
|
948
|
+
"type": "boolean",
|
|
949
|
+
"required": false,
|
|
950
|
+
"description": "Whether PII was detected in any previous turn of the session"
|
|
951
|
+
},
|
|
952
|
+
{
|
|
953
|
+
"key": "session_pii_types",
|
|
954
|
+
"type": "array",
|
|
955
|
+
"required": false,
|
|
956
|
+
"description": "PII types detected across the session (accumulated)"
|
|
957
|
+
},
|
|
958
|
+
{
|
|
959
|
+
"key": "session_secrets_detected",
|
|
960
|
+
"type": "boolean",
|
|
961
|
+
"required": false,
|
|
962
|
+
"description": "Whether secrets were detected in any previous turn of the session"
|
|
963
|
+
},
|
|
964
|
+
{
|
|
965
|
+
"key": "session_secret_types",
|
|
966
|
+
"type": "array",
|
|
967
|
+
"required": false,
|
|
968
|
+
"description": "Secret types detected across the session (accumulated)"
|
|
969
|
+
},
|
|
970
|
+
{
|
|
971
|
+
"key": "session_injection_detected",
|
|
972
|
+
"type": "boolean",
|
|
973
|
+
"required": false,
|
|
974
|
+
"description": "Whether prompt injection was detected in any previous turn of the session"
|
|
975
|
+
},
|
|
976
|
+
{
|
|
977
|
+
"key": "session_command_injection",
|
|
978
|
+
"type": "boolean",
|
|
979
|
+
"required": false,
|
|
980
|
+
"description": "Whether command injection was detected in any previous turn of the session"
|
|
981
|
+
},
|
|
982
|
+
{
|
|
983
|
+
"key": "session_threat_turns",
|
|
984
|
+
"type": "number",
|
|
985
|
+
"required": false,
|
|
986
|
+
"description": "Number of turns in the session where threats were detected"
|
|
987
|
+
},
|
|
988
|
+
{
|
|
989
|
+
"key": "session_max_injection_score",
|
|
990
|
+
"type": "number",
|
|
991
|
+
"required": false,
|
|
992
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
993
|
+
},
|
|
994
|
+
{
|
|
995
|
+
"key": "session_max_jailbreak_score",
|
|
996
|
+
"type": "number",
|
|
997
|
+
"required": false,
|
|
998
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
999
|
+
},
|
|
1000
|
+
{
|
|
1001
|
+
"key": "session_max_command_injection_score",
|
|
1002
|
+
"type": "number",
|
|
1003
|
+
"required": false,
|
|
1004
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
1005
|
+
},
|
|
1006
|
+
{
|
|
1007
|
+
"key": "session_max_pii_score",
|
|
1008
|
+
"type": "number",
|
|
1009
|
+
"required": false,
|
|
1010
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
1011
|
+
},
|
|
1012
|
+
{
|
|
1013
|
+
"key": "session_max_secret_score",
|
|
1014
|
+
"type": "number",
|
|
1015
|
+
"required": false,
|
|
1016
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
1017
|
+
},
|
|
1018
|
+
{
|
|
1019
|
+
"key": "session_cumulative_risk_score",
|
|
1020
|
+
"type": "number",
|
|
1021
|
+
"required": false,
|
|
1022
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
753
1023
|
}
|
|
754
1024
|
]
|
|
755
1025
|
},
|
|
@@ -810,6 +1080,84 @@
|
|
|
810
1080
|
"type": "string",
|
|
811
1081
|
"required": false,
|
|
812
1082
|
"description": "Type of path traversal detected in the file write path"
|
|
1083
|
+
},
|
|
1084
|
+
{
|
|
1085
|
+
"key": "session_pii_detected",
|
|
1086
|
+
"type": "boolean",
|
|
1087
|
+
"required": false,
|
|
1088
|
+
"description": "Whether PII was detected in any previous turn of the session"
|
|
1089
|
+
},
|
|
1090
|
+
{
|
|
1091
|
+
"key": "session_pii_types",
|
|
1092
|
+
"type": "array",
|
|
1093
|
+
"required": false,
|
|
1094
|
+
"description": "PII types detected across the session (accumulated)"
|
|
1095
|
+
},
|
|
1096
|
+
{
|
|
1097
|
+
"key": "session_secrets_detected",
|
|
1098
|
+
"type": "boolean",
|
|
1099
|
+
"required": false,
|
|
1100
|
+
"description": "Whether secrets were detected in any previous turn of the session"
|
|
1101
|
+
},
|
|
1102
|
+
{
|
|
1103
|
+
"key": "session_secret_types",
|
|
1104
|
+
"type": "array",
|
|
1105
|
+
"required": false,
|
|
1106
|
+
"description": "Secret types detected across the session (accumulated)"
|
|
1107
|
+
},
|
|
1108
|
+
{
|
|
1109
|
+
"key": "session_injection_detected",
|
|
1110
|
+
"type": "boolean",
|
|
1111
|
+
"required": false,
|
|
1112
|
+
"description": "Whether prompt injection was detected in any previous turn of the session"
|
|
1113
|
+
},
|
|
1114
|
+
{
|
|
1115
|
+
"key": "session_command_injection",
|
|
1116
|
+
"type": "boolean",
|
|
1117
|
+
"required": false,
|
|
1118
|
+
"description": "Whether command injection was detected in any previous turn of the session"
|
|
1119
|
+
},
|
|
1120
|
+
{
|
|
1121
|
+
"key": "session_threat_turns",
|
|
1122
|
+
"type": "number",
|
|
1123
|
+
"required": false,
|
|
1124
|
+
"description": "Number of turns in the session where threats were detected"
|
|
1125
|
+
},
|
|
1126
|
+
{
|
|
1127
|
+
"key": "session_max_injection_score",
|
|
1128
|
+
"type": "number",
|
|
1129
|
+
"required": false,
|
|
1130
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
1131
|
+
},
|
|
1132
|
+
{
|
|
1133
|
+
"key": "session_max_jailbreak_score",
|
|
1134
|
+
"type": "number",
|
|
1135
|
+
"required": false,
|
|
1136
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
1137
|
+
},
|
|
1138
|
+
{
|
|
1139
|
+
"key": "session_max_command_injection_score",
|
|
1140
|
+
"type": "number",
|
|
1141
|
+
"required": false,
|
|
1142
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
1143
|
+
},
|
|
1144
|
+
{
|
|
1145
|
+
"key": "session_max_pii_score",
|
|
1146
|
+
"type": "number",
|
|
1147
|
+
"required": false,
|
|
1148
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
1149
|
+
},
|
|
1150
|
+
{
|
|
1151
|
+
"key": "session_max_secret_score",
|
|
1152
|
+
"type": "number",
|
|
1153
|
+
"required": false,
|
|
1154
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
1155
|
+
},
|
|
1156
|
+
{
|
|
1157
|
+
"key": "session_cumulative_risk_score",
|
|
1158
|
+
"type": "number",
|
|
1159
|
+
"required": false,
|
|
1160
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
813
1161
|
}
|
|
814
1162
|
]
|
|
815
1163
|
},
|
|
@@ -894,6 +1242,84 @@
|
|
|
894
1242
|
"type": "number",
|
|
895
1243
|
"required": false,
|
|
896
1244
|
"description": "Risk score for cross-origin escalation in server connection (0-100)"
|
|
1245
|
+
},
|
|
1246
|
+
{
|
|
1247
|
+
"key": "session_pii_detected",
|
|
1248
|
+
"type": "boolean",
|
|
1249
|
+
"required": false,
|
|
1250
|
+
"description": "Whether PII was detected in any previous turn of the session"
|
|
1251
|
+
},
|
|
1252
|
+
{
|
|
1253
|
+
"key": "session_pii_types",
|
|
1254
|
+
"type": "array",
|
|
1255
|
+
"required": false,
|
|
1256
|
+
"description": "PII types detected across the session (accumulated)"
|
|
1257
|
+
},
|
|
1258
|
+
{
|
|
1259
|
+
"key": "session_secrets_detected",
|
|
1260
|
+
"type": "boolean",
|
|
1261
|
+
"required": false,
|
|
1262
|
+
"description": "Whether secrets were detected in any previous turn of the session"
|
|
1263
|
+
},
|
|
1264
|
+
{
|
|
1265
|
+
"key": "session_secret_types",
|
|
1266
|
+
"type": "array",
|
|
1267
|
+
"required": false,
|
|
1268
|
+
"description": "Secret types detected across the session (accumulated)"
|
|
1269
|
+
},
|
|
1270
|
+
{
|
|
1271
|
+
"key": "session_injection_detected",
|
|
1272
|
+
"type": "boolean",
|
|
1273
|
+
"required": false,
|
|
1274
|
+
"description": "Whether prompt injection was detected in any previous turn of the session"
|
|
1275
|
+
},
|
|
1276
|
+
{
|
|
1277
|
+
"key": "session_command_injection",
|
|
1278
|
+
"type": "boolean",
|
|
1279
|
+
"required": false,
|
|
1280
|
+
"description": "Whether command injection was detected in any previous turn of the session"
|
|
1281
|
+
},
|
|
1282
|
+
{
|
|
1283
|
+
"key": "session_threat_turns",
|
|
1284
|
+
"type": "number",
|
|
1285
|
+
"required": false,
|
|
1286
|
+
"description": "Number of turns in the session where threats were detected"
|
|
1287
|
+
},
|
|
1288
|
+
{
|
|
1289
|
+
"key": "session_max_injection_score",
|
|
1290
|
+
"type": "number",
|
|
1291
|
+
"required": false,
|
|
1292
|
+
"description": "Highest prompt injection score seen in any turn of the session (0-100). Use for tiered responses: >75 hard block, >50 restrict permissions"
|
|
1293
|
+
},
|
|
1294
|
+
{
|
|
1295
|
+
"key": "session_max_jailbreak_score",
|
|
1296
|
+
"type": "number",
|
|
1297
|
+
"required": false,
|
|
1298
|
+
"description": "Highest jailbreak detection score seen in any turn of the session (0-100)"
|
|
1299
|
+
},
|
|
1300
|
+
{
|
|
1301
|
+
"key": "session_max_command_injection_score",
|
|
1302
|
+
"type": "number",
|
|
1303
|
+
"required": false,
|
|
1304
|
+
"description": "Highest command injection score seen in any turn of the session (0-100)"
|
|
1305
|
+
},
|
|
1306
|
+
{
|
|
1307
|
+
"key": "session_max_pii_score",
|
|
1308
|
+
"type": "number",
|
|
1309
|
+
"required": false,
|
|
1310
|
+
"description": "Highest PII risk score seen in any turn of the session (0-100)"
|
|
1311
|
+
},
|
|
1312
|
+
{
|
|
1313
|
+
"key": "session_max_secret_score",
|
|
1314
|
+
"type": "number",
|
|
1315
|
+
"required": false,
|
|
1316
|
+
"description": "Highest secret detection score seen in any turn of the session (0-100)"
|
|
1317
|
+
},
|
|
1318
|
+
{
|
|
1319
|
+
"key": "session_cumulative_risk_score",
|
|
1320
|
+
"type": "number",
|
|
1321
|
+
"required": false,
|
|
1322
|
+
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
897
1323
|
}
|
|
898
1324
|
]
|
|
899
1325
|
}
|
|
@@ -96,8 +96,12 @@ namespace Guardrails {
|
|
|
96
96
|
"detector_count": Long,
|
|
97
97
|
|
|
98
98
|
// Security - Injection & Jailbreak (optional)
|
|
99
|
-
"
|
|
100
|
-
"
|
|
99
|
+
"injection_confidence"?: Long, // Combined injection confidence: MAX(pulse, deep_context)
|
|
100
|
+
"jailbreak_confidence"?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
|
|
101
|
+
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
102
|
+
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
103
|
+
"jailbreak_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
104
|
+
"jailbreak_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
101
105
|
"injection_type"?: String, // "prompt" | "sql" | "command" | "none"
|
|
102
106
|
|
|
103
107
|
// Privacy - Secrets (optional)
|
|
@@ -183,6 +187,12 @@ namespace Guardrails {
|
|
|
183
187
|
"session_injection_detected"?: Bool,
|
|
184
188
|
"session_command_injection"?: Bool,
|
|
185
189
|
"session_threat_turns"?: Long,
|
|
190
|
+
"session_max_injection_score"?: Long,
|
|
191
|
+
"session_max_jailbreak_score"?: Long,
|
|
192
|
+
"session_max_command_injection_score"?: Long,
|
|
193
|
+
"session_max_pii_score"?: Long,
|
|
194
|
+
"session_max_secret_score"?: Long,
|
|
195
|
+
"session_cumulative_risk_score"?: Long,
|
|
186
196
|
|
|
187
197
|
};
|
|
188
198
|
|
|
@@ -227,7 +237,9 @@ namespace Guardrails {
|
|
|
227
237
|
"secret_types"?: Set<String>,
|
|
228
238
|
"pii_detected"?: Bool,
|
|
229
239
|
"pii_types"?: Set<String>,
|
|
230
|
-
"
|
|
240
|
+
"injection_confidence"?: Long,
|
|
241
|
+
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
242
|
+
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
231
243
|
|
|
232
244
|
// Security - Pattern Detection (optional)
|
|
233
245
|
"command_injection_detected"?: Bool,
|
|
@@ -276,6 +288,12 @@ namespace Guardrails {
|
|
|
276
288
|
"session_injection_detected"?: Bool,
|
|
277
289
|
"session_command_injection"?: Bool,
|
|
278
290
|
"session_threat_turns"?: Long,
|
|
291
|
+
"session_max_injection_score"?: Long,
|
|
292
|
+
"session_max_jailbreak_score"?: Long,
|
|
293
|
+
"session_max_command_injection_score"?: Long,
|
|
294
|
+
"session_max_pii_score"?: Long,
|
|
295
|
+
"session_max_secret_score"?: Long,
|
|
296
|
+
"session_cumulative_risk_score"?: Long,
|
|
279
297
|
|
|
280
298
|
};
|
|
281
299
|
|
|
@@ -304,6 +322,12 @@ namespace Guardrails {
|
|
|
304
322
|
"session_injection_detected"?: Bool,
|
|
305
323
|
"session_command_injection"?: Bool,
|
|
306
324
|
"session_threat_turns"?: Long,
|
|
325
|
+
"session_max_injection_score"?: Long,
|
|
326
|
+
"session_max_jailbreak_score"?: Long,
|
|
327
|
+
"session_max_command_injection_score"?: Long,
|
|
328
|
+
"session_max_pii_score"?: Long,
|
|
329
|
+
"session_max_secret_score"?: Long,
|
|
330
|
+
"session_cumulative_risk_score"?: Long,
|
|
307
331
|
|
|
308
332
|
};
|
|
309
333
|
|
|
@@ -332,6 +356,12 @@ namespace Guardrails {
|
|
|
332
356
|
"session_injection_detected"?: Bool,
|
|
333
357
|
"session_command_injection"?: Bool,
|
|
334
358
|
"session_threat_turns"?: Long,
|
|
359
|
+
"session_max_injection_score"?: Long,
|
|
360
|
+
"session_max_jailbreak_score"?: Long,
|
|
361
|
+
"session_max_command_injection_score"?: Long,
|
|
362
|
+
"session_max_pii_score"?: Long,
|
|
363
|
+
"session_max_secret_score"?: Long,
|
|
364
|
+
"session_cumulative_risk_score"?: Long,
|
|
335
365
|
|
|
336
366
|
};
|
|
337
367
|
|
|
@@ -368,6 +398,12 @@ namespace Guardrails {
|
|
|
368
398
|
"session_injection_detected"?: Bool,
|
|
369
399
|
"session_command_injection"?: Bool,
|
|
370
400
|
"session_threat_turns"?: Long,
|
|
401
|
+
"session_max_injection_score"?: Long,
|
|
402
|
+
"session_max_jailbreak_score"?: Long,
|
|
403
|
+
"session_max_command_injection_score"?: Long,
|
|
404
|
+
"session_max_pii_score"?: Long,
|
|
405
|
+
"session_max_secret_score"?: Long,
|
|
406
|
+
"session_cumulative_risk_score"?: Long,
|
|
371
407
|
|
|
372
408
|
};
|
|
373
409
|
}
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
// Uses ML-based confidence scores from normalized context.
|
|
6
6
|
//
|
|
7
7
|
// Context keys used (normalized by projection layer):
|
|
8
|
-
// -
|
|
9
|
-
// -
|
|
8
|
+
// - injection_confidence: Long (0-100) - Overall injection confidence
|
|
9
|
+
// - jailbreak_confidence: Long (0-100) - Jailbreak attempt confidence
|
|
10
10
|
// - injection_type: String - Type of injection detected
|
|
11
11
|
// - contains_invisible_chars: Bool - Invisible Unicode characters detected
|
|
12
12
|
// - invisible_chars_score: Long (0-100) - Invisible character density
|
|
@@ -25,7 +25,7 @@ forbid (
|
|
|
25
25
|
action,
|
|
26
26
|
resource
|
|
27
27
|
) when {
|
|
28
|
-
context has
|
|
28
|
+
context has injection_confidence && context.injection_confidence > 85
|
|
29
29
|
};
|
|
30
30
|
|
|
31
31
|
@id("jailbreak-block-high-confidence")
|
|
@@ -38,7 +38,7 @@ forbid (
|
|
|
38
38
|
action,
|
|
39
39
|
resource
|
|
40
40
|
) when {
|
|
41
|
-
context has
|
|
41
|
+
context has jailbreak_confidence && context.jailbreak_confidence > 80
|
|
42
42
|
};
|
|
43
43
|
|
|
44
44
|
@id("injection-combined-threshold")
|
|
@@ -51,8 +51,8 @@ forbid (
|
|
|
51
51
|
action,
|
|
52
52
|
resource
|
|
53
53
|
) when {
|
|
54
|
-
context has
|
|
55
|
-
context.
|
|
54
|
+
context has injection_confidence && context has jailbreak_confidence &&
|
|
55
|
+
context.injection_confidence > 60 && context.jailbreak_confidence > 60
|
|
56
56
|
};
|
|
57
57
|
|
|
58
58
|
@id("injection-invisible-chars")
|