@highflame/policy 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -126,15 +126,63 @@ namespace Guardrails {
126
126
  "contains_invisible_chars"?: Bool,
127
127
  "invisible_chars_score"?: Long, // 0-100
128
128
 
129
- // Additional detectors (optional)
129
+ // Security - Pattern Detection (optional)
130
+ "command_injection_detected"?: Bool,
131
+ "command_injection_type"?: String, // "reverse_shell" | "privilege_escalation" | "code_execution" | "destructive_command" | "data_exfiltration"
132
+ "command_injection_score"?: Long, // 0-100
133
+ "path_traversal_detected"?: Bool,
134
+ "path_traversal_severity"?: String, // "critical" | "high" | "medium" | "low" | "none"
135
+ "path_traversal_type"?: String,
136
+ "sql_injection_detected"?: Bool,
137
+ "sql_injection_type"?: String, // "tautology" | "union_based" | "destructive" | "blind" | "error_based"
138
+ "sql_injection_score"?: Long, // 0-100
139
+
140
+ // Security - Cross-Origin Escalation (optional)
141
+ "cross_origin_detected"?: Bool,
142
+ "cross_origin_type"?: String, // "cross_origin_tool" | "cross_origin_server" | "none"
143
+ "cross_origin_score"?: Long, // 0-100
144
+
145
+ // Security - Encoded Injection (optional)
146
+ "encoded_content_detected"?: Bool,
147
+ "encoded_types"?: Set<String>, // ["base64", "hex", "unicode", "url", ...]
148
+ "encoded_count"?: Long,
149
+ "encoded_score"?: Long, // 0-100
150
+
151
+ // Language & Script Detection (optional)
152
+ "detected_language"?: String, // ISO language code
153
+ "is_english"?: Bool,
154
+ "language_confidence"?: Long, // 0-100
155
+ "detected_script"?: String, // "latin" | "cyrillic" | "arabic" | "unknown" | ...
156
+ "is_latin_script"?: Bool,
157
+ "script_confidence"?: Long, // 0-100
158
+
159
+ // Content Analysis (optional)
130
160
  "hallucination_score"?: Long,
161
+ "factuality_score"?: Long, // 0-100
131
162
  "sentiment_score"?: Long,
132
163
  "contains_code"?: Bool,
133
164
  "code_languages"?: Set<String>,
165
+ "code_ratio"?: Long, // 0-100, percentage of content that is code
134
166
  "keyword_matched"?: Bool,
135
167
  "keyword_categories"?: Set<String>,
136
- "detected_language"?: String,
168
+ "keyword_count"?: Long,
169
+ "contains_non_ascii"?: Bool,
137
170
  "phishing_detected"?: Bool,
171
+ "content_safety_score"?: Long, // 0-100
172
+ "content_safety_blocked"?: Bool,
173
+
174
+ // Agentic - Multi-Turn Context (optional)
175
+ "conversation_turn"?: Long,
176
+ "multi_turn_detection"?: Bool,
177
+
178
+ // Session Detection History — cross-turn sticky flags (optional)
179
+ "session_pii_detected"?: Bool,
180
+ "session_pii_types"?: Set<String>,
181
+ "session_secrets_detected"?: Bool,
182
+ "session_secret_types"?: Set<String>,
183
+ "session_injection_detected"?: Bool,
184
+ "session_command_injection"?: Bool,
185
+ "session_threat_turns"?: Long,
138
186
 
139
187
  };
140
188
 
@@ -181,6 +229,54 @@ namespace Guardrails {
181
229
  "pii_types"?: Set<String>,
182
230
  "injection_score"?: Long,
183
231
 
232
+ // Security - Pattern Detection (optional)
233
+ "command_injection_detected"?: Bool,
234
+ "command_injection_type"?: String,
235
+ "command_injection_score"?: Long, // 0-100
236
+ "path_traversal_detected"?: Bool,
237
+ "path_traversal_severity"?: String,
238
+ "path_traversal_type"?: String,
239
+ "sql_injection_detected"?: Bool,
240
+ "sql_injection_type"?: String,
241
+ "sql_injection_score"?: Long, // 0-100
242
+
243
+ // Security - Cross-Origin Escalation (optional)
244
+ "cross_origin_detected"?: Bool,
245
+ "cross_origin_type"?: String,
246
+ "cross_origin_score"?: Long, // 0-100
247
+
248
+ // Security - Encoded Injection (optional)
249
+ "encoded_content_detected"?: Bool,
250
+ "encoded_types"?: Set<String>,
251
+ "encoded_count"?: Long,
252
+ "encoded_score"?: Long, // 0-100
253
+
254
+ // Agentic - Agent Security (optional)
255
+ "tool_poisoning_detected"?: Bool,
256
+ "tool_poisoning_score"?: Long, // 0-100
257
+ "tool_poisoning_type"?: String, // "hidden_instructions" | "system_prompt_injection" | "authority_hijack"
258
+ "rug_pull_detected"?: Bool,
259
+ "rug_pull_score"?: Long, // 0-100
260
+ "rug_pull_type"?: String, // "risk_spike" | "pattern_change" | "combined" | "none"
261
+
262
+ // Agentic - MCP Risk (optional)
263
+ "mcp_config_risk"?: Bool,
264
+ "mcp_risk_type"?: String, // "inline_execution" | "suspicious_url" | "cross_origin"
265
+ "mcp_risk_score"?: Long, // 0-100
266
+
267
+ // Agentic - Multi-Turn Context (optional)
268
+ "conversation_turn"?: Long,
269
+ "multi_turn_detection"?: Bool,
270
+
271
+ // Session Detection History — cross-turn sticky flags (optional)
272
+ "session_pii_detected"?: Bool,
273
+ "session_pii_types"?: Set<String>,
274
+ "session_secrets_detected"?: Bool,
275
+ "session_secret_types"?: Set<String>,
276
+ "session_injection_detected"?: Bool,
277
+ "session_command_injection"?: Bool,
278
+ "session_threat_turns"?: Long,
279
+
184
280
  };
185
281
 
186
282
  /// Context for read_file action
@@ -195,6 +291,20 @@ namespace Guardrails {
195
291
  "pii_detected"?: Bool,
196
292
  "pii_types"?: Set<String>,
197
293
 
294
+ // Security - Path Traversal (optional)
295
+ "path_traversal_detected"?: Bool,
296
+ "path_traversal_severity"?: String,
297
+ "path_traversal_type"?: String,
298
+
299
+ // Session Detection History — cross-turn sticky flags (optional)
300
+ "session_pii_detected"?: Bool,
301
+ "session_pii_types"?: Set<String>,
302
+ "session_secrets_detected"?: Bool,
303
+ "session_secret_types"?: Set<String>,
304
+ "session_injection_detected"?: Bool,
305
+ "session_command_injection"?: Bool,
306
+ "session_threat_turns"?: Long,
307
+
198
308
  };
199
309
 
200
310
  /// Context for write_file action
@@ -209,6 +319,20 @@ namespace Guardrails {
209
319
  "pii_detected"?: Bool,
210
320
  "pii_types"?: Set<String>,
211
321
 
322
+ // Security - Path Traversal (optional)
323
+ "path_traversal_detected"?: Bool,
324
+ "path_traversal_severity"?: String,
325
+ "path_traversal_type"?: String,
326
+
327
+ // Session Detection History — cross-turn sticky flags (optional)
328
+ "session_pii_detected"?: Bool,
329
+ "session_pii_types"?: Set<String>,
330
+ "session_secrets_detected"?: Bool,
331
+ "session_secret_types"?: Set<String>,
332
+ "session_injection_detected"?: Bool,
333
+ "session_command_injection"?: Bool,
334
+ "session_threat_turns"?: Long,
335
+
212
336
  };
213
337
 
214
338
  /// Context for connect_server action (MCP server connections)
@@ -221,5 +345,29 @@ namespace Guardrails {
221
345
  "mcp_server"?: String, // MCP server name (e.g., "github", "filesystem")
222
346
  "mcp_server_verified"?: Bool, // Whether server is from verified registry
223
347
 
348
+ // Agentic - Agent Security (optional)
349
+ "tool_poisoning_detected"?: Bool,
350
+ "tool_poisoning_score"?: Long,
351
+ "tool_poisoning_type"?: String,
352
+
353
+ // Agentic - MCP Risk (optional)
354
+ "mcp_config_risk"?: Bool,
355
+ "mcp_risk_type"?: String,
356
+ "mcp_risk_score"?: Long,
357
+
358
+ // Security - Cross-Origin Escalation (optional)
359
+ "cross_origin_detected"?: Bool,
360
+ "cross_origin_type"?: String,
361
+ "cross_origin_score"?: Long,
362
+
363
+ // Session Detection History — cross-turn sticky flags (optional)
364
+ "session_pii_detected"?: Bool,
365
+ "session_pii_types"?: Set<String>,
366
+ "session_secrets_detected"?: Bool,
367
+ "session_secret_types"?: Set<String>,
368
+ "session_injection_detected"?: Bool,
369
+ "session_command_injection"?: Bool,
370
+ "session_threat_turns"?: Long,
371
+
224
372
  };
225
373
  }
@@ -92,3 +92,48 @@ forbid (
92
92
  context.budget_remaining_pct < 5 &&
93
93
  context.budget_remaining_pct > 0
94
94
  };
95
+
96
+ // =============================================================================
97
+ // Agent Security — Supply Chain & Behavioral Drift
98
+ // =============================================================================
99
+
100
+ @id("agentic-block-tool-poisoning")
101
+ @name("Block tool poisoning attacks")
102
+ @description("Forbids tool calls or server connections when hidden instructions or authority hijack patterns are detected in tool descriptions or arguments")
103
+ @severity("critical")
104
+ @tags("agentic,tool-poisoning,supply-chain")
105
+ forbid (
106
+ principal,
107
+ action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
108
+ resource
109
+ ) when {
110
+ context has tool_poisoning_score && context.tool_poisoning_score >= 70
111
+ };
112
+
113
+ @id("agentic-block-rug-pull")
114
+ @name("Block rug pull attacks")
115
+ @description("Forbids tool calls when significant behavioral drift is detected (tool output deviates from established patterns)")
116
+ @severity("high")
117
+ @tags("agentic,rug-pull,behavioral-drift")
118
+ forbid (
119
+ principal,
120
+ action == Guardrails::Action::"call_tool",
121
+ resource
122
+ ) when {
123
+ context has rug_pull_detected && context.rug_pull_detected == true &&
124
+ context has rug_pull_score && context.rug_pull_score >= 70
125
+ };
126
+
127
+ @id("agentic-block-mcp-config-risk")
128
+ @name("Block risky MCP configurations")
129
+ @description("Forbids tool calls or server connections when MCP configuration risks are detected (inline execution, suspicious URLs, cross-origin issues)")
130
+ @severity("high")
131
+ @tags("agentic,mcp-risk,supply-chain")
132
+ forbid (
133
+ principal,
134
+ action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
135
+ resource
136
+ ) when {
137
+ context has mcp_config_risk && context.mcp_config_risk == true &&
138
+ context has mcp_risk_score && context.mcp_risk_score >= 70
139
+ };
@@ -0,0 +1,59 @@
1
+ // =============================================================================
2
+ // Security Pattern Detection Policy
3
+ // =============================================================================
4
+ // Blocks command injection, path traversal, and SQL injection attacks using
5
+ // regex-based pattern detection from Shield's security detectors.
6
+ //
7
+ // Context keys used (normalized by projection layer):
8
+ // - command_injection_detected: Bool - Command injection pattern found
9
+ // - command_injection_score: Long (0-100) - Detection confidence
10
+ // - path_traversal_detected: Bool - Path traversal pattern found
11
+ // - path_traversal_severity: String - Severity level (critical/high/medium/low)
12
+ // - sql_injection_detected: Bool - SQL injection pattern found
13
+ // - sql_injection_score: Long (0-100) - Detection confidence
14
+ //
15
+ // Category: security
16
+ // Namespace: Guardrails
17
+ // =============================================================================
18
+
19
+ @id("security-block-command-injection")
20
+ @name("Block command injection")
21
+ @description("Forbids requests containing command injection patterns such as reverse shells, privilege escalation, or destructive commands")
22
+ @severity("critical")
23
+ @tags("command-injection,security")
24
+ forbid (
25
+ principal,
26
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
27
+ resource
28
+ ) when {
29
+ context has command_injection_detected && context.command_injection_detected == true
30
+ };
31
+
32
+ @id("security-block-path-traversal")
33
+ @name("Block high-severity path traversal")
34
+ @description("Forbids requests containing path traversal patterns targeting sensitive system files or using deep directory traversal")
35
+ @severity("high")
36
+ @tags("path-traversal,security")
37
+ forbid (
38
+ principal,
39
+ action,
40
+ resource
41
+ ) when {
42
+ context has path_traversal_detected && context.path_traversal_detected == true &&
43
+ context has path_traversal_severity &&
44
+ (context.path_traversal_severity == "critical" || context.path_traversal_severity == "high")
45
+ };
46
+
47
+ @id("security-block-sql-injection")
48
+ @name("Block high-confidence SQL injection")
49
+ @description("Forbids requests with SQL injection confidence above 75% (tautologies, UNION-based, destructive queries)")
50
+ @severity("high")
51
+ @tags("sql-injection,security")
52
+ forbid (
53
+ principal,
54
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
55
+ resource
56
+ ) when {
57
+ context has sql_injection_detected && context.sql_injection_detected == true &&
58
+ context has sql_injection_score && context.sql_injection_score >= 75
59
+ };
@@ -93,11 +93,21 @@
93
93
  {
94
94
  "id": "agentic-safety-default",
95
95
  "name": "Agentic Safety",
96
- "description": "Block tool call loops, data exfiltration patterns, high-risk sequences, and budget violations",
96
+ "description": "Block tool call loops, data exfiltration patterns, high-risk sequences, budget violations, tool poisoning, rug pull attacks, and MCP configuration risks",
97
97
  "category": "agentic_security",
98
98
  "file": "defaults/agentic_safety.cedar",
99
99
  "severity": "high",
100
- "tags": ["agentic", "safety", "loops", "exfiltration", "budget"],
100
+ "tags": ["agentic", "safety", "loops", "exfiltration", "budget", "tool-poisoning", "rug-pull", "mcp-risk"],
101
+ "is_active": true
102
+ },
103
+ {
104
+ "id": "security-patterns-default",
105
+ "name": "Security Pattern Detection",
106
+ "description": "Block command injection, path traversal, and SQL injection attacks using regex-based pattern detection",
107
+ "category": "security",
108
+ "file": "defaults/security_patterns.cedar",
109
+ "severity": "critical",
110
+ "tags": ["command-injection", "path-traversal", "sql-injection", "security"],
101
111
  "is_active": true
102
112
  }
103
113
  ],