@highflame/policy 2.1.4 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +41 -0
  2. package/_schemas/guardrails/context.json +600 -6
  3. package/_schemas/guardrails/schema.cedarschema +79 -3
  4. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +118 -0
  5. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +4 -4
  6. package/_schemas/guardrails/templates/defaults/injection.cedar +10 -10
  7. package/_schemas/guardrails/templates/defaults/secrets.cedar +2 -2
  8. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +1 -1
  9. package/_schemas/guardrails/templates/defaults/toxicity.cedar +3 -3
  10. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +1 -1
  11. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +4 -4
  12. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +2 -2
  13. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +3 -3
  14. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +1 -1
  15. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +2 -2
  16. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +3 -3
  17. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +157 -0
  18. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +140 -0
  19. package/_schemas/guardrails/templates/templates.json +41 -0
  20. package/_schemas/overwatch/context.json +232 -4
  21. package/_schemas/overwatch/schema.cedarschema +42 -4
  22. package/dist/guardrails-context.gen.d.ts +24 -2
  23. package/dist/guardrails-context.gen.js +24 -2
  24. package/dist/guardrails-defaults.gen.d.ts +1 -1
  25. package/dist/guardrails-defaults.gen.js +524 -36
  26. package/dist/overwatch-context.gen.d.ts +10 -0
  27. package/dist/overwatch-context.gen.js +10 -0
  28. package/dist/service-schemas.gen.d.ts +2 -2
  29. package/dist/service-schemas.gen.js +275 -24
  30. package/package.json +1 -1
  31. package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +0 -85
  32. package/_schemas/guardrails/templates/profiles/code_agent.cedar +0 -125
  33. package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +0 -111
@@ -120,8 +120,12 @@ action process_prompt appliesTo {
120
120
 
121
121
  // --- ML Detector Confidence Scores (0-100) ---
122
122
  pii_confidence: Long, // PII detection classifier confidence
123
- injection_confidence: Long, // Prompt injection classifier confidence
124
- jailbreak_confidence: Long, // Jailbreak detection classifier confidence
123
+ injection_confidence: Long, // Combined injection confidence: MAX(pulse, deep_context)
124
+ jailbreak_confidence: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
125
+ injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier
126
+ injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn
127
+ jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier
128
+ jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn
125
129
 
126
130
  // --- Agent Security (0-100) ---
127
131
  indirect_injection_score: Long, // Indirect prompt injection risk (OWASP LLM01, ASI01)
@@ -134,6 +138,12 @@ action process_prompt appliesTo {
134
138
  session_injection_detected?: Bool,
135
139
  session_command_injection?: Bool,
136
140
  session_threat_turns?: Long,
141
+ session_max_injection_score?: Long,
142
+ session_max_jailbreak_score?: Long,
143
+ session_max_command_injection_score?: Long,
144
+ session_max_pii_score?: Long,
145
+ session_max_secret_score?: Long,
146
+ session_cumulative_risk_score?: Long,
137
147
 
138
148
  // --- Legacy ---
139
149
  prompt_text?: String, // Same as content (backward compatibility)
@@ -196,8 +206,12 @@ action call_tool appliesTo {
196
206
 
197
207
  // --- ML Detector Confidence Scores (0-100) ---
198
208
  pii_confidence?: Long,
199
- injection_confidence?: Long,
200
- jailbreak_confidence?: Long,
209
+ injection_confidence?: Long, // Combined injection confidence: MAX(pulse, deep_context)
210
+ jailbreak_confidence?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
211
+ injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier
212
+ injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn
213
+ jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier
214
+ jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn
201
215
 
202
216
  // --- Agent Security (0-100) --- (OWASP ASI01, ASI02, ASI04; MITRE AML.T0051)
203
217
  tool_poisoning_score?: Long, // Hidden instructions in tool description/args
@@ -231,6 +245,12 @@ action call_tool appliesTo {
231
245
  session_injection_detected?: Bool,
232
246
  session_command_injection?: Bool,
233
247
  session_threat_turns?: Long,
248
+ session_max_injection_score?: Long,
249
+ session_max_jailbreak_score?: Long,
250
+ session_max_command_injection_score?: Long,
251
+ session_max_pii_score?: Long,
252
+ session_max_secret_score?: Long,
253
+ session_cumulative_risk_score?: Long,
234
254
 
235
255
  // --- Legacy ---
236
256
  response_content?: String,
@@ -275,6 +295,12 @@ action connect_server appliesTo {
275
295
  session_injection_detected?: Bool,
276
296
  session_command_injection?: Bool,
277
297
  session_threat_turns?: Long,
298
+ session_max_injection_score?: Long,
299
+ session_max_jailbreak_score?: Long,
300
+ session_max_command_injection_score?: Long,
301
+ session_max_pii_score?: Long,
302
+ session_max_secret_score?: Long,
303
+ session_cumulative_risk_score?: Long,
278
304
  },
279
305
  };
280
306
 
@@ -317,6 +343,12 @@ action read_file appliesTo {
317
343
  session_injection_detected?: Bool,
318
344
  session_command_injection?: Bool,
319
345
  session_threat_turns?: Long,
346
+ session_max_injection_score?: Long,
347
+ session_max_jailbreak_score?: Long,
348
+ session_max_command_injection_score?: Long,
349
+ session_max_pii_score?: Long,
350
+ session_max_secret_score?: Long,
351
+ session_cumulative_risk_score?: Long,
320
352
  },
321
353
  };
322
354
 
@@ -359,6 +391,12 @@ action write_file appliesTo {
359
391
  session_injection_detected?: Bool,
360
392
  session_command_injection?: Bool,
361
393
  session_threat_turns?: Long,
394
+ session_max_injection_score?: Long,
395
+ session_max_jailbreak_score?: Long,
396
+ session_max_command_injection_score?: Long,
397
+ session_max_pii_score?: Long,
398
+ session_max_secret_score?: Long,
399
+ session_cumulative_risk_score?: Long,
362
400
  },
363
401
  };
364
402
 
@@ -5,6 +5,11 @@
5
5
  * Guardrails Cedar schema and are used at policy evaluation time.
6
6
  */
7
7
  export declare const GuardrailsContextKey: {
8
+ readonly AgentFramework: "agent_framework";
9
+ readonly AgentId: "agent_id";
10
+ readonly AgentPublisher: "agent_publisher";
11
+ readonly AgentTrustLevel: "agent_trust_level";
12
+ readonly AgentType: "agent_type";
8
13
  readonly BudgetExceeded: "budget_exceeded";
9
14
  readonly BudgetRemainingPct: "budget_remaining_pct";
10
15
  readonly CodeLanguages: "code_languages";
@@ -36,12 +41,16 @@ export declare const GuardrailsContextKey: {
36
41
  readonly FactualityScore: "factuality_score";
37
42
  readonly HallucinationScore: "hallucination_score";
38
43
  readonly HateSpeechScore: "hate_speech_score";
39
- readonly InjectionScore: "injection_score";
44
+ readonly InjectionConfidence: "injection_confidence";
45
+ readonly InjectionDeepContextScore: "injection_deep_context_score";
46
+ readonly InjectionPulseScore: "injection_pulse_score";
40
47
  readonly InjectionType: "injection_type";
41
48
  readonly InvisibleCharsScore: "invisible_chars_score";
42
49
  readonly IsEnglish: "is_english";
43
50
  readonly IsLatinScript: "is_latin_script";
44
- readonly JailbreakScore: "jailbreak_score";
51
+ readonly JailbreakConfidence: "jailbreak_confidence";
52
+ readonly JailbreakDeepContextScore: "jailbreak_deep_context_score";
53
+ readonly JailbreakPulseScore: "jailbreak_pulse_score";
45
54
  readonly KeywordCategories: "keyword_categories";
46
55
  readonly KeywordCount: "keyword_count";
47
56
  readonly KeywordMatched: "keyword_matched";
@@ -74,6 +83,19 @@ export declare const GuardrailsContextKey: {
74
83
  readonly SecretTypes: "secret_types";
75
84
  readonly SentimentScore: "sentiment_score";
76
85
  readonly SequenceRisk: "sequence_risk";
86
+ readonly SessionCommandInjection: "session_command_injection";
87
+ readonly SessionCumulativeRiskScore: "session_cumulative_risk_score";
88
+ readonly SessionInjectionDetected: "session_injection_detected";
89
+ readonly SessionMaxCommandInjectionScore: "session_max_command_injection_score";
90
+ readonly SessionMaxInjectionScore: "session_max_injection_score";
91
+ readonly SessionMaxJailbreakScore: "session_max_jailbreak_score";
92
+ readonly SessionMaxPiiScore: "session_max_pii_score";
93
+ readonly SessionMaxSecretScore: "session_max_secret_score";
94
+ readonly SessionPiiDetected: "session_pii_detected";
95
+ readonly SessionPiiTypes: "session_pii_types";
96
+ readonly SessionSecretTypes: "session_secret_types";
97
+ readonly SessionSecretsDetected: "session_secrets_detected";
98
+ readonly SessionThreatTurns: "session_threat_turns";
77
99
  readonly SexualScore: "sexual_score";
78
100
  readonly SqlInjectionDetected: "sql_injection_detected";
79
101
  readonly SqlInjectionScore: "sql_injection_score";
@@ -7,6 +7,11 @@
7
7
  * Guardrails Cedar schema and are used at policy evaluation time.
8
8
  */
9
9
  export const GuardrailsContextKey = {
10
+ AgentFramework: 'agent_framework',
11
+ AgentId: 'agent_id',
12
+ AgentPublisher: 'agent_publisher',
13
+ AgentTrustLevel: 'agent_trust_level',
14
+ AgentType: 'agent_type',
10
15
  BudgetExceeded: 'budget_exceeded',
11
16
  BudgetRemainingPct: 'budget_remaining_pct',
12
17
  CodeLanguages: 'code_languages',
@@ -38,12 +43,16 @@ export const GuardrailsContextKey = {
38
43
  FactualityScore: 'factuality_score',
39
44
  HallucinationScore: 'hallucination_score',
40
45
  HateSpeechScore: 'hate_speech_score',
41
- InjectionScore: 'injection_score',
46
+ InjectionConfidence: 'injection_confidence',
47
+ InjectionDeepContextScore: 'injection_deep_context_score',
48
+ InjectionPulseScore: 'injection_pulse_score',
42
49
  InjectionType: 'injection_type',
43
50
  InvisibleCharsScore: 'invisible_chars_score',
44
51
  IsEnglish: 'is_english',
45
52
  IsLatinScript: 'is_latin_script',
46
- JailbreakScore: 'jailbreak_score',
53
+ JailbreakConfidence: 'jailbreak_confidence',
54
+ JailbreakDeepContextScore: 'jailbreak_deep_context_score',
55
+ JailbreakPulseScore: 'jailbreak_pulse_score',
47
56
  KeywordCategories: 'keyword_categories',
48
57
  KeywordCount: 'keyword_count',
49
58
  KeywordMatched: 'keyword_matched',
@@ -76,6 +85,19 @@ export const GuardrailsContextKey = {
76
85
  SecretTypes: 'secret_types',
77
86
  SentimentScore: 'sentiment_score',
78
87
  SequenceRisk: 'sequence_risk',
88
+ SessionCommandInjection: 'session_command_injection',
89
+ SessionCumulativeRiskScore: 'session_cumulative_risk_score',
90
+ SessionInjectionDetected: 'session_injection_detected',
91
+ SessionMaxCommandInjectionScore: 'session_max_command_injection_score',
92
+ SessionMaxInjectionScore: 'session_max_injection_score',
93
+ SessionMaxJailbreakScore: 'session_max_jailbreak_score',
94
+ SessionMaxPiiScore: 'session_max_pii_score',
95
+ SessionMaxSecretScore: 'session_max_secret_score',
96
+ SessionPiiDetected: 'session_pii_detected',
97
+ SessionPiiTypes: 'session_pii_types',
98
+ SessionSecretTypes: 'session_secret_types',
99
+ SessionSecretsDetected: 'session_secrets_detected',
100
+ SessionThreatTurns: 'session_threat_turns',
79
101
  SexualScore: 'sexual_score',
80
102
  SqlInjectionDetected: 'sql_injection_detected',
81
103
  SqlInjectionScore: 'sql_injection_score',
@@ -2,7 +2,7 @@
2
2
  * Guardrails policy category identifiers.
3
3
  * Maps to UI tab names in Studio.
4
4
  */
5
- export type GuardrailsCategory = 'security' | 'privacy' | 'trust_safety' | 'agentic_security' | 'organization';
5
+ export type GuardrailsCategory = 'security' | 'privacy' | 'trust_safety' | 'agentic_security' | 'agent_identity' | 'organization';
6
6
  /**
7
7
  * Category metadata for UI display.
8
8
  */