@highflame/policy 2.1.36 → 2.1.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/ai_gateway/context.json +431 -11
- package/_schemas/ai_gateway/schema.cedarschema +91 -11
- package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
- package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
- package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
- package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
- package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
- package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
- package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
- package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
- package/_schemas/ai_gateway/templates/templates.json +42 -47
- package/_schemas/guardrails/context.json +12 -12
- package/_schemas/guardrails/schema.cedarschema +12 -12
- package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
- package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
- package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
- package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
- package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
- package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
- package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
- package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
- package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
- package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
- package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
- package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
- package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
- package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
- package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
- package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
- package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
- package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
- package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
- package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
- package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
- package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
- package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
- package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
- package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
- package/_schemas/guardrails/templates/templates.json +188 -210
- package/_schemas/overwatch/context.json +14 -14
- package/_schemas/overwatch/schema.cedarschema +12 -12
- package/_schemas/sentry/context.json +11 -11
- package/_schemas/sentry/schema.cedarschema +11 -11
- package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
- package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
- package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
- package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
- package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
- package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
- package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
- package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
- package/_schemas/sentry/templates/templates.json +46 -46
- package/dist/ai_gateway-context.gen.d.ts +18 -4
- package/dist/ai_gateway-context.gen.js +18 -4
- package/dist/ai_gateway-defaults.gen.d.ts +1 -1
- package/dist/ai_gateway-defaults.gen.js +377 -313
- package/dist/guardrails-context.gen.d.ts +5 -5
- package/dist/guardrails-context.gen.js +5 -5
- package/dist/guardrails-defaults.gen.d.ts +1 -1
- package/dist/guardrails-defaults.gen.js +2070 -1849
- package/dist/overwatch-context.gen.d.ts +5 -5
- package/dist/overwatch-context.gen.js +5 -5
- package/dist/overwatch-defaults.gen.d.ts +1 -1
- package/dist/overwatch-defaults.gen.js +547 -573
- package/dist/sentry-context.gen.d.ts +3 -3
- package/dist/sentry-context.gen.js +3 -3
- package/dist/sentry-defaults.gen.d.ts +1 -1
- package/dist/sentry-defaults.gen.js +379 -460
- package/dist/service-schemas.gen.d.ts +4 -4
- package/dist/service-schemas.gen.js +249 -99
- package/package.json +1 -1
|
@@ -87,7 +87,7 @@ action call_tool appliesTo {
|
|
|
87
87
|
threat_categories?: Set<String>, // Threat category names
|
|
88
88
|
detected_threats?: Set<String>, // Detection rule names that matched
|
|
89
89
|
max_threat_severity?: Long, // Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)
|
|
90
|
-
|
|
90
|
+
secrets_detected?: Bool, // Whether secrets/credentials detected
|
|
91
91
|
|
|
92
92
|
// --- Secrets (granular) ---
|
|
93
93
|
secret_types?: Set<String>,
|
|
@@ -97,10 +97,11 @@ action call_tool appliesTo {
|
|
|
97
97
|
pii_detected?: Bool,
|
|
98
98
|
pii_types?: Set<String>,
|
|
99
99
|
pii_count?: Long,
|
|
100
|
+
pii_score?: Long,
|
|
100
101
|
|
|
101
102
|
// --- ML Detector Confidence Scores (0-100) ---
|
|
102
|
-
|
|
103
|
-
|
|
103
|
+
injection_score?: Long, // Prompt injection classifier confidence
|
|
104
|
+
jailbreak_score?: Long, // Jailbreak detection classifier confidence
|
|
104
105
|
|
|
105
106
|
// --- Agent Security (0-100) ---
|
|
106
107
|
tool_poisoning_score?: Long, // Hidden instructions in tool description/args
|
|
@@ -127,7 +128,7 @@ action call_tool appliesTo {
|
|
|
127
128
|
profanity_score?: Long,
|
|
128
129
|
|
|
129
130
|
// --- Encoding & Unicode Attacks ---
|
|
130
|
-
|
|
131
|
+
invisible_chars_detected?: Bool,
|
|
131
132
|
invisible_chars_score?: Long,
|
|
132
133
|
|
|
133
134
|
// --- Behavioral Analysis ---
|
|
@@ -138,6 +139,21 @@ action call_tool appliesTo {
|
|
|
138
139
|
pattern_type?: String,
|
|
139
140
|
sequence_risk?: Long,
|
|
140
141
|
|
|
142
|
+
// --- Session-Aware Threat Aggregation ---
|
|
143
|
+
session_threat_turns?: Long, // Count of turns with detected threats
|
|
144
|
+
session_cumulative_risk_score?: Long, // Accumulated risk across turns
|
|
145
|
+
session_injection_detected?: Bool, // Injection seen in any prior turn
|
|
146
|
+
session_command_injection?: Bool, // Command injection seen in any prior turn
|
|
147
|
+
session_pii_detected?: Bool, // PII seen in any prior turn
|
|
148
|
+
session_pii_types?: Set<String>, // PII types accumulated across session
|
|
149
|
+
session_secrets_detected?: Bool, // Secrets seen in any prior turn
|
|
150
|
+
session_secret_types?: Set<String>, // Secret types accumulated across session
|
|
151
|
+
session_max_injection_score?: Long, // Max injection score across session turns
|
|
152
|
+
session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
|
|
153
|
+
session_max_command_injection_score?: Long, // Max command-injection score across session turns
|
|
154
|
+
session_max_pii_score?: Long, // Max pii score across session turns
|
|
155
|
+
session_max_secret_score?: Long, // Max secret score across session turns
|
|
156
|
+
|
|
141
157
|
// --- Usage Budget (multi-window, multi-dimension) ---
|
|
142
158
|
budget_remaining_pct?: Long,
|
|
143
159
|
budget_exceeded?: Bool,
|
|
@@ -177,7 +193,7 @@ action connect_server appliesTo {
|
|
|
177
193
|
indirect_injection_score?: Long,
|
|
178
194
|
|
|
179
195
|
// --- Secrets ---
|
|
180
|
-
|
|
196
|
+
secrets_detected?: Bool,
|
|
181
197
|
secret_types?: Set<String>,
|
|
182
198
|
secret_count?: Long,
|
|
183
199
|
|
|
@@ -185,12 +201,28 @@ action connect_server appliesTo {
|
|
|
185
201
|
pii_detected?: Bool,
|
|
186
202
|
pii_types?: Set<String>,
|
|
187
203
|
pii_count?: Long,
|
|
204
|
+
pii_score?: Long,
|
|
188
205
|
|
|
189
206
|
// --- MCP Trust & Config Risk ---
|
|
190
207
|
mcp_server_verified?: Bool,
|
|
191
208
|
mcp_config_risk?: Bool,
|
|
192
209
|
mcp_risk_score?: Long,
|
|
193
210
|
|
|
211
|
+
// --- Session-Aware Threat Aggregation ---
|
|
212
|
+
session_threat_turns?: Long, // Count of turns with detected threats
|
|
213
|
+
session_cumulative_risk_score?: Long, // Accumulated risk across turns
|
|
214
|
+
session_injection_detected?: Bool, // Injection seen in any prior turn
|
|
215
|
+
session_command_injection?: Bool, // Command injection seen in any prior turn
|
|
216
|
+
session_pii_detected?: Bool, // PII seen in any prior turn
|
|
217
|
+
session_pii_types?: Set<String>, // PII types accumulated across session
|
|
218
|
+
session_secrets_detected?: Bool, // Secrets seen in any prior turn
|
|
219
|
+
session_secret_types?: Set<String>, // Secret types accumulated across session
|
|
220
|
+
session_max_injection_score?: Long, // Max injection score across session turns
|
|
221
|
+
session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
|
|
222
|
+
session_max_command_injection_score?: Long, // Max command-injection score across session turns
|
|
223
|
+
session_max_pii_score?: Long, // Max pii score across session turns
|
|
224
|
+
session_max_secret_score?: Long, // Max secret score across session turns
|
|
225
|
+
|
|
194
226
|
// --- Usage Budget (multi-window, multi-dimension) ---
|
|
195
227
|
budget_remaining_pct?: Long,
|
|
196
228
|
budget_exceeded?: Bool,
|
|
@@ -222,7 +254,7 @@ action process_prompt appliesTo {
|
|
|
222
254
|
threat_categories?: Set<String>,
|
|
223
255
|
detected_threats?: Set<String>,
|
|
224
256
|
max_threat_severity?: Long,
|
|
225
|
-
|
|
257
|
+
secrets_detected?: Bool,
|
|
226
258
|
|
|
227
259
|
// --- Secrets ---
|
|
228
260
|
secret_types?: Set<String>,
|
|
@@ -232,10 +264,11 @@ action process_prompt appliesTo {
|
|
|
232
264
|
pii_detected?: Bool,
|
|
233
265
|
pii_types?: Set<String>,
|
|
234
266
|
pii_count?: Long,
|
|
267
|
+
pii_score?: Long,
|
|
235
268
|
|
|
236
269
|
// --- ML Detector Confidence Scores (0-100) ---
|
|
237
|
-
|
|
238
|
-
|
|
270
|
+
injection_score?: Long,
|
|
271
|
+
jailbreak_score?: Long,
|
|
239
272
|
|
|
240
273
|
// --- Content Safety Scores (0-100) ---
|
|
241
274
|
violence_score?: Long,
|
|
@@ -246,13 +279,28 @@ action process_prompt appliesTo {
|
|
|
246
279
|
profanity_score?: Long,
|
|
247
280
|
|
|
248
281
|
// --- Encoding ---
|
|
249
|
-
|
|
282
|
+
invisible_chars_detected?: Bool,
|
|
250
283
|
invisible_chars_score?: Long,
|
|
251
284
|
|
|
252
285
|
// --- LLM-specific ---
|
|
253
286
|
model_name?: String, // Target model name (e.g., "gpt-4", "claude-3-opus")
|
|
254
287
|
model_provider?: String, // Provider name (e.g., "openai", "anthropic", "bedrock")
|
|
255
288
|
|
|
289
|
+
// --- Session-Aware Threat Aggregation ---
|
|
290
|
+
session_threat_turns?: Long, // Count of turns with detected threats
|
|
291
|
+
session_cumulative_risk_score?: Long, // Accumulated risk across turns
|
|
292
|
+
session_injection_detected?: Bool, // Injection seen in any prior turn
|
|
293
|
+
session_command_injection?: Bool, // Command injection seen in any prior turn
|
|
294
|
+
session_pii_detected?: Bool, // PII seen in any prior turn
|
|
295
|
+
session_pii_types?: Set<String>, // PII types accumulated across session
|
|
296
|
+
session_secrets_detected?: Bool, // Secrets seen in any prior turn
|
|
297
|
+
session_secret_types?: Set<String>, // Secret types accumulated across session
|
|
298
|
+
session_max_injection_score?: Long, // Max injection score across session turns
|
|
299
|
+
session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
|
|
300
|
+
session_max_command_injection_score?: Long, // Max command-injection score across session turns
|
|
301
|
+
session_max_pii_score?: Long, // Max pii score across session turns
|
|
302
|
+
session_max_secret_score?: Long, // Max secret score across session turns
|
|
303
|
+
|
|
256
304
|
// --- Usage Budget (multi-window, multi-dimension) ---
|
|
257
305
|
budget_remaining_pct?: Long,
|
|
258
306
|
budget_exceeded?: Bool,
|
|
@@ -284,7 +332,7 @@ action read_file appliesTo {
|
|
|
284
332
|
threat_categories?: Set<String>,
|
|
285
333
|
detected_threats?: Set<String>,
|
|
286
334
|
max_threat_severity?: Long,
|
|
287
|
-
|
|
335
|
+
secrets_detected?: Bool,
|
|
288
336
|
|
|
289
337
|
// --- Secrets ---
|
|
290
338
|
secret_types?: Set<String>,
|
|
@@ -294,6 +342,22 @@ action read_file appliesTo {
|
|
|
294
342
|
pii_detected?: Bool,
|
|
295
343
|
pii_types?: Set<String>,
|
|
296
344
|
pii_count?: Long,
|
|
345
|
+
pii_score?: Long,
|
|
346
|
+
|
|
347
|
+
// --- Session-Aware Threat Aggregation ---
|
|
348
|
+
session_threat_turns?: Long, // Count of turns with detected threats
|
|
349
|
+
session_cumulative_risk_score?: Long, // Accumulated risk across turns
|
|
350
|
+
session_injection_detected?: Bool, // Injection seen in any prior turn
|
|
351
|
+
session_command_injection?: Bool, // Command injection seen in any prior turn
|
|
352
|
+
session_pii_detected?: Bool, // PII seen in any prior turn
|
|
353
|
+
session_pii_types?: Set<String>, // PII types accumulated across session
|
|
354
|
+
session_secrets_detected?: Bool, // Secrets seen in any prior turn
|
|
355
|
+
session_secret_types?: Set<String>, // Secret types accumulated across session
|
|
356
|
+
session_max_injection_score?: Long, // Max injection score across session turns
|
|
357
|
+
session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
|
|
358
|
+
session_max_command_injection_score?: Long, // Max command-injection score across session turns
|
|
359
|
+
session_max_pii_score?: Long, // Max pii score across session turns
|
|
360
|
+
session_max_secret_score?: Long, // Max secret score across session turns
|
|
297
361
|
|
|
298
362
|
// --- Usage Budget (multi-window, multi-dimension) ---
|
|
299
363
|
budget_remaining_pct?: Long,
|
|
@@ -326,7 +390,7 @@ action write_file appliesTo {
|
|
|
326
390
|
threat_categories?: Set<String>,
|
|
327
391
|
detected_threats?: Set<String>,
|
|
328
392
|
max_threat_severity?: Long,
|
|
329
|
-
|
|
393
|
+
secrets_detected?: Bool,
|
|
330
394
|
|
|
331
395
|
// --- Secrets ---
|
|
332
396
|
secret_types?: Set<String>,
|
|
@@ -336,6 +400,22 @@ action write_file appliesTo {
|
|
|
336
400
|
pii_detected?: Bool,
|
|
337
401
|
pii_types?: Set<String>,
|
|
338
402
|
pii_count?: Long,
|
|
403
|
+
pii_score?: Long,
|
|
404
|
+
|
|
405
|
+
// --- Session-Aware Threat Aggregation ---
|
|
406
|
+
session_threat_turns?: Long, // Count of turns with detected threats
|
|
407
|
+
session_cumulative_risk_score?: Long, // Accumulated risk across turns
|
|
408
|
+
session_injection_detected?: Bool, // Injection seen in any prior turn
|
|
409
|
+
session_command_injection?: Bool, // Command injection seen in any prior turn
|
|
410
|
+
session_pii_detected?: Bool, // PII seen in any prior turn
|
|
411
|
+
session_pii_types?: Set<String>, // PII types accumulated across session
|
|
412
|
+
session_secrets_detected?: Bool, // Secrets seen in any prior turn
|
|
413
|
+
session_secret_types?: Set<String>, // Secret types accumulated across session
|
|
414
|
+
session_max_injection_score?: Long, // Max injection score across session turns
|
|
415
|
+
session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
|
|
416
|
+
session_max_command_injection_score?: Long, // Max command-injection score across session turns
|
|
417
|
+
session_max_pii_score?: Long, // Max pii score across session turns
|
|
418
|
+
session_max_secret_score?: Long, // Max secret score across session turns
|
|
339
419
|
|
|
340
420
|
// --- Usage Budget (multi-window, multi-dimension) ---
|
|
341
421
|
budget_remaining_pct?: Long,
|
|
@@ -1,20 +1,38 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Agent Security
|
|
2
|
+
// Agent Security (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
//
|
|
5
|
-
// and MCP supply
|
|
4
|
+
// Blocks tool poisoning, rug pull behavioral drift, indirect prompt injection
|
|
5
|
+
// from tool outputs, and MCP supply-chain risks (unverified servers, risky
|
|
6
|
+
// configurations).
|
|
6
7
|
//
|
|
7
|
-
//
|
|
8
|
+
// Context keys consumed:
|
|
9
|
+
// - tool_poisoning_score: Long (0-100)
|
|
10
|
+
// - rug_pull_score: Long (0-100)
|
|
11
|
+
// - indirect_injection_score: Long (0-100)
|
|
12
|
+
// - tool_is_sensitive: Bool
|
|
13
|
+
// - mcp_server_verified: Bool
|
|
14
|
+
// - mcp_config_risk: Bool
|
|
15
|
+
// - mcp_risk_score: Long (0-100)
|
|
16
|
+
// - threat_count: Long
|
|
17
|
+
//
|
|
18
|
+
// Compliance:
|
|
19
|
+
// - OWASP LLM01, OWASP ASI01, OWASP ASI02, OWASP ASI04
|
|
20
|
+
// - OWASP MCP02, MCP03, MCP05
|
|
21
|
+
//
|
|
22
|
+
// Category: agent-security
|
|
8
23
|
// Namespace: AIGateway
|
|
9
24
|
// =============================================================================
|
|
10
25
|
|
|
11
|
-
//
|
|
12
|
-
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Section 1: Tool poisoning
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
@id("agent-security.block-tool-poisoning")
|
|
13
31
|
@name("Block tool poisoning")
|
|
14
|
-
@description("
|
|
32
|
+
@description("Blocks call_tool when tool_poisoning_score >= 70.")
|
|
15
33
|
@severity("critical")
|
|
16
|
-
@tags("tool-poisoning,
|
|
17
|
-
@reject_message("Tool execution blocked: hidden manipulation instructions detected
|
|
34
|
+
@tags("category:agent-security,threat:tool-poisoning,detection:ml,surface:call-tool,owasp:asi01")
|
|
35
|
+
@reject_message("Tool execution blocked: hidden manipulation instructions detected (OWASP ASI01).")
|
|
18
36
|
forbid (
|
|
19
37
|
principal,
|
|
20
38
|
action == AIGateway::Action::"call_tool",
|
|
@@ -24,12 +42,11 @@ when {
|
|
|
24
42
|
context has tool_poisoning_score && context.tool_poisoning_score >= 70
|
|
25
43
|
};
|
|
26
44
|
|
|
27
|
-
|
|
28
|
-
@id("as-block-server-poisoning")
|
|
45
|
+
@id("agent-security.block-server-poisoning")
|
|
29
46
|
@name("Block poisoned MCP servers")
|
|
30
|
-
@description("
|
|
47
|
+
@description("Blocks connect_server when tool_poisoning_score >= 60.")
|
|
31
48
|
@severity("critical")
|
|
32
|
-
@tags("tool-poisoning,
|
|
49
|
+
@tags("category:agent-security,threat:tool-poisoning,detection:ml,surface:connect-server,owasp:asi04,owasp:mcp02")
|
|
33
50
|
@reject_message("MCP server connection blocked: tool poisoning patterns detected in server tool descriptions.")
|
|
34
51
|
forbid (
|
|
35
52
|
principal,
|
|
@@ -40,13 +57,16 @@ when {
|
|
|
40
57
|
context has tool_poisoning_score && context.tool_poisoning_score >= 60
|
|
41
58
|
};
|
|
42
59
|
|
|
43
|
-
//
|
|
44
|
-
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
// Section 2: Rug pull (behavioral drift)
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
@id("agent-security.block-rug-pull")
|
|
45
65
|
@name("Block rug pull attacks")
|
|
46
|
-
@description("
|
|
66
|
+
@description("Blocks call_tool and connect_server when rug_pull_score >= 70.")
|
|
47
67
|
@severity("critical")
|
|
48
|
-
@tags("rug-pull,
|
|
49
|
-
@reject_message("Tool execution blocked: tool behavior has
|
|
68
|
+
@tags("category:agent-security,threat:rug-pull,detection:ml,owasp:asi04")
|
|
69
|
+
@reject_message("Tool execution blocked: tool behavior has drifted significantly from its established pattern.")
|
|
50
70
|
forbid (
|
|
51
71
|
principal,
|
|
52
72
|
action in [AIGateway::Action::"call_tool", AIGateway::Action::"connect_server"],
|
|
@@ -56,12 +76,15 @@ when {
|
|
|
56
76
|
context has rug_pull_score && context.rug_pull_score >= 70
|
|
57
77
|
};
|
|
58
78
|
|
|
59
|
-
//
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
// Section 3: Indirect prompt injection
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
@id("agent-security.block-indirect-injection")
|
|
84
|
+
@name("Block indirect injection")
|
|
85
|
+
@description("Blocks call_tool and connect_server when indirect_injection_score >= 70.")
|
|
63
86
|
@severity("critical")
|
|
64
|
-
@tags("indirect-injection,owasp
|
|
87
|
+
@tags("category:agent-security,threat:indirect-injection,detection:ml,owasp:llm01,owasp:asi01")
|
|
65
88
|
@reject_message("Content blocked: indirect prompt injection detected in tool output or retrieved content.")
|
|
66
89
|
forbid (
|
|
67
90
|
principal,
|
|
@@ -72,13 +95,12 @@ when {
|
|
|
72
95
|
context has indirect_injection_score && context.indirect_injection_score >= 70
|
|
73
96
|
};
|
|
74
97
|
|
|
75
|
-
|
|
76
|
-
@id("as-block-indirect-injection-sensitive-tools")
|
|
98
|
+
@id("agent-security.block-indirect-injection-sensitive")
|
|
77
99
|
@name("Block indirect injection on sensitive tools")
|
|
78
|
-
@description("
|
|
100
|
+
@description("Blocks call_tool when tool_is_sensitive is true and indirect_injection_score >= 50.")
|
|
79
101
|
@severity("critical")
|
|
80
|
-
@tags("indirect-injection,
|
|
81
|
-
@reject_message("
|
|
102
|
+
@tags("category:agent-security,threat:indirect-injection,detection:ml,surface:call-tool,owasp:asi02")
|
|
103
|
+
@reject_message("Tool execution blocked: a sensitive tool was called with moderate indirect-injection risk.")
|
|
82
104
|
forbid (
|
|
83
105
|
principal,
|
|
84
106
|
action == AIGateway::Action::"call_tool",
|
|
@@ -86,15 +108,18 @@ forbid (
|
|
|
86
108
|
)
|
|
87
109
|
when {
|
|
88
110
|
context has indirect_injection_score && context.indirect_injection_score >= 50 &&
|
|
89
|
-
context has tool_is_sensitive && context.tool_is_sensitive
|
|
111
|
+
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
90
112
|
};
|
|
91
113
|
|
|
92
|
-
//
|
|
93
|
-
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
// Section 4: MCP supply chain & trust
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
@id("agent-security.block-unverified-threats")
|
|
94
119
|
@name("Block unverified server threats")
|
|
95
|
-
@description("
|
|
120
|
+
@description("Blocks call_tool when mcp_server_verified is false and threat_count >= 1.")
|
|
96
121
|
@severity("high")
|
|
97
|
-
@tags("
|
|
122
|
+
@tags("category:agent-security,threat:supply-chain,detection:aggregate,surface:call-tool,owasp:asi04")
|
|
98
123
|
@reject_message("Tool execution blocked: the MCP server is unverified and security threats were detected.")
|
|
99
124
|
forbid (
|
|
100
125
|
principal,
|
|
@@ -103,15 +128,14 @@ forbid (
|
|
|
103
128
|
)
|
|
104
129
|
when {
|
|
105
130
|
context has mcp_server_verified && context.mcp_server_verified == false &&
|
|
106
|
-
context has threat_count && context.threat_count
|
|
131
|
+
context has threat_count && context.threat_count >= 1
|
|
107
132
|
};
|
|
108
133
|
|
|
109
|
-
|
|
110
|
-
@id("as-block-mcp-config-risk")
|
|
134
|
+
@id("agent-security.block-mcp-config-risk")
|
|
111
135
|
@name("Block risky MCP server configs")
|
|
112
|
-
@description("
|
|
136
|
+
@description("Blocks connect_server when mcp_config_risk is true and mcp_risk_score >= 70.")
|
|
113
137
|
@severity("high")
|
|
114
|
-
@tags("mcp-config,owasp
|
|
138
|
+
@tags("category:agent-security,threat:mcp-config-risk,detection:ml,surface:connect-server,owasp:mcp03")
|
|
115
139
|
@reject_message("MCP server connection blocked: risky server configuration detected.")
|
|
116
140
|
forbid (
|
|
117
141
|
principal,
|
|
@@ -119,16 +143,15 @@ forbid (
|
|
|
119
143
|
resource
|
|
120
144
|
)
|
|
121
145
|
when {
|
|
122
|
-
context has mcp_config_risk && context.mcp_config_risk &&
|
|
146
|
+
context has mcp_config_risk && context.mcp_config_risk == true &&
|
|
123
147
|
context has mcp_risk_score && context.mcp_risk_score >= 70
|
|
124
148
|
};
|
|
125
149
|
|
|
126
|
-
|
|
127
|
-
@id("as-block-unverified-server-connect")
|
|
150
|
+
@id("agent-security.block-unverified-server")
|
|
128
151
|
@name("Block unverified MCP server connections")
|
|
129
|
-
@description("
|
|
152
|
+
@description("Blocks connect_server when mcp_server_verified is false.")
|
|
130
153
|
@severity("high")
|
|
131
|
-
@tags("
|
|
154
|
+
@tags("category:agent-security,threat:supply-chain,detection:rule,surface:connect-server,owasp:asi04,owasp:mcp05")
|
|
132
155
|
@reject_message("MCP server connection blocked: server is not from a verified registry.")
|
|
133
156
|
forbid (
|
|
134
157
|
principal,
|
|
@@ -1,21 +1,19 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Baseline Permit
|
|
2
|
+
// Baseline Permit (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Permits all actions by default. Threat-specific forbid policies
|
|
5
|
-
// this
|
|
4
|
+
// Permits all AI Gateway actions by default. Threat-specific forbid policies
|
|
5
|
+
// override this when detectors fire. Cedar is default-deny: without at least
|
|
6
|
+
// one permit rule, every request is denied regardless of forbid rules.
|
|
6
7
|
//
|
|
7
|
-
//
|
|
8
|
-
// is denied regardless of forbid rules.
|
|
9
|
-
//
|
|
10
|
-
// Category: organization
|
|
8
|
+
// Category: organization
|
|
11
9
|
// Namespace: AIGateway
|
|
12
10
|
// =============================================================================
|
|
13
11
|
|
|
14
|
-
@id("
|
|
15
|
-
@name("Permit
|
|
16
|
-
@description("
|
|
12
|
+
@id("organization.permit-baseline")
|
|
13
|
+
@name("Permit baseline")
|
|
14
|
+
@description("Permits all AI Gateway actions.")
|
|
17
15
|
@severity("low")
|
|
18
|
-
@tags("
|
|
16
|
+
@tags("category:organization,posture:permit-default")
|
|
19
17
|
permit (
|
|
20
18
|
principal,
|
|
21
19
|
action,
|
|
@@ -1,20 +1,40 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Semantic Threat Detection
|
|
2
|
+
// Semantic Threat Detection (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
//
|
|
5
|
-
//
|
|
4
|
+
// Blocks prompt injection, jailbreak attempts, and high-severity threats in
|
|
5
|
+
// MCP tool calls and LLM prompts. Uses both detection-engine rule triggers and
|
|
6
|
+
// ML classifier confidence scores.
|
|
6
7
|
//
|
|
7
|
-
//
|
|
8
|
+
// Detection layers:
|
|
9
|
+
// - Rule triggers (detected_threats), always available
|
|
10
|
+
// - ML classifier scores (injection_score, jailbreak_score), require API token
|
|
11
|
+
// - Severity aggregation (highest_severity, threat_count), catch-all rules
|
|
12
|
+
//
|
|
13
|
+
// Context keys consumed:
|
|
14
|
+
// - detected_threats: Set<String>
|
|
15
|
+
// - injection_score: Long (0-100)
|
|
16
|
+
// - jailbreak_score: Long (0-100)
|
|
17
|
+
// - highest_severity: String
|
|
18
|
+
// - threat_count: Long
|
|
19
|
+
//
|
|
20
|
+
// Compliance:
|
|
21
|
+
// - OWASP LLM01, OWASP LLM02
|
|
22
|
+
// - MITRE ATLAS AML.T0051, AML.T0054
|
|
23
|
+
//
|
|
24
|
+
// Category: semantic
|
|
8
25
|
// Namespace: AIGateway
|
|
9
26
|
// =============================================================================
|
|
10
27
|
|
|
11
|
-
//
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Section 1: Prompt injection
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
@id("semantic.block-injection-rule")
|
|
33
|
+
@name("Block injection (rule)")
|
|
34
|
+
@description("Blocks call_tool when detected_threats contains \"prompt_injection\".")
|
|
15
35
|
@severity("critical")
|
|
16
|
-
@tags("injection,
|
|
17
|
-
@reject_message("Tool
|
|
36
|
+
@tags("category:semantic,threat:injection,detection:rule,surface:call-tool,owasp:llm01,mitre:atlas-t0051,posture:baseline")
|
|
37
|
+
@reject_message("Tool execution blocked: prompt injection patterns detected (OWASP LLM01).")
|
|
18
38
|
forbid (
|
|
19
39
|
principal,
|
|
20
40
|
action == AIGateway::Action::"call_tool",
|
|
@@ -24,29 +44,31 @@ when {
|
|
|
24
44
|
context has detected_threats && context.detected_threats.contains("prompt_injection")
|
|
25
45
|
};
|
|
26
46
|
|
|
27
|
-
|
|
28
|
-
@
|
|
29
|
-
@
|
|
30
|
-
@description("Block tool calls when the ML injection classifier confidence exceeds 75/100")
|
|
47
|
+
@id("semantic.block-injection-ml")
|
|
48
|
+
@name("Block injection (ML)")
|
|
49
|
+
@description("Blocks call_tool when injection_score >= 75.")
|
|
31
50
|
@severity("critical")
|
|
32
|
-
@tags("injection,ml-
|
|
33
|
-
@reject_message("Tool
|
|
51
|
+
@tags("category:semantic,threat:injection,detection:ml,surface:call-tool,owasp:llm01,mitre:atlas-t0051")
|
|
52
|
+
@reject_message("Tool execution blocked: the ML classifier detected prompt injection with high confidence.")
|
|
34
53
|
forbid (
|
|
35
54
|
principal,
|
|
36
55
|
action == AIGateway::Action::"call_tool",
|
|
37
56
|
resource
|
|
38
57
|
)
|
|
39
58
|
when {
|
|
40
|
-
context has
|
|
59
|
+
context has injection_score && context.injection_score >= 75
|
|
41
60
|
};
|
|
42
61
|
|
|
43
|
-
//
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
// Section 2: Jailbreak
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
@id("semantic.block-jailbreak-rule")
|
|
67
|
+
@name("Block jailbreak (rule)")
|
|
68
|
+
@description("Blocks call_tool when detected_threats contains \"jailbreak\".")
|
|
47
69
|
@severity("critical")
|
|
48
|
-
@tags("jailbreak,
|
|
49
|
-
@reject_message("Tool
|
|
70
|
+
@tags("category:semantic,threat:jailbreak,detection:rule,surface:call-tool,owasp:llm02,mitre:atlas-t0054,posture:baseline")
|
|
71
|
+
@reject_message("Tool execution blocked: jailbreak patterns detected (OWASP LLM02).")
|
|
50
72
|
forbid (
|
|
51
73
|
principal,
|
|
52
74
|
action == AIGateway::Action::"call_tool",
|
|
@@ -56,29 +78,31 @@ when {
|
|
|
56
78
|
context has detected_threats && context.detected_threats.contains("jailbreak")
|
|
57
79
|
};
|
|
58
80
|
|
|
59
|
-
|
|
60
|
-
@
|
|
61
|
-
@
|
|
62
|
-
@description("Block tool calls when the ML jailbreak classifier confidence exceeds 75/100")
|
|
81
|
+
@id("semantic.block-jailbreak-ml")
|
|
82
|
+
@name("Block jailbreak (ML)")
|
|
83
|
+
@description("Blocks call_tool when jailbreak_score >= 75.")
|
|
63
84
|
@severity("critical")
|
|
64
|
-
@tags("jailbreak,ml-
|
|
65
|
-
@reject_message("Tool
|
|
85
|
+
@tags("category:semantic,threat:jailbreak,detection:ml,surface:call-tool,owasp:llm02,mitre:atlas-t0054")
|
|
86
|
+
@reject_message("Tool execution blocked: the ML classifier detected a jailbreak attempt with high confidence.")
|
|
66
87
|
forbid (
|
|
67
88
|
principal,
|
|
68
89
|
action == AIGateway::Action::"call_tool",
|
|
69
90
|
resource
|
|
70
91
|
)
|
|
71
92
|
when {
|
|
72
|
-
context has
|
|
93
|
+
context has jailbreak_score && context.jailbreak_score >= 75
|
|
73
94
|
};
|
|
74
95
|
|
|
75
|
-
//
|
|
76
|
-
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// Section 3: Severity aggregation
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
@id("semantic.block-critical")
|
|
77
101
|
@name("Block critical threats")
|
|
78
|
-
@description("
|
|
102
|
+
@description("Blocks all MCP operations when highest_severity equals \"critical\".")
|
|
79
103
|
@severity("critical")
|
|
80
|
-
@tags("
|
|
81
|
-
@reject_message("MCP operation
|
|
104
|
+
@tags("category:semantic,detection:aggregate,posture:catch-all")
|
|
105
|
+
@reject_message("MCP operation blocked: a critical-severity threat was detected.")
|
|
82
106
|
forbid (
|
|
83
107
|
principal,
|
|
84
108
|
action,
|
|
@@ -88,13 +112,12 @@ when {
|
|
|
88
112
|
context has highest_severity && context.highest_severity == "critical"
|
|
89
113
|
};
|
|
90
114
|
|
|
91
|
-
|
|
92
|
-
@id("semantic-block-multi-threat-tools")
|
|
115
|
+
@id("semantic.block-multi-threat")
|
|
93
116
|
@name("Block multi-threat tool calls")
|
|
94
|
-
@description("
|
|
117
|
+
@description("Blocks call_tool when threat_count >= 3.")
|
|
95
118
|
@severity("high")
|
|
96
|
-
@tags("
|
|
97
|
-
@reject_message("Tool execution
|
|
119
|
+
@tags("category:semantic,detection:aggregate,surface:call-tool,posture:catch-all")
|
|
120
|
+
@reject_message("Tool execution blocked: multiple concurrent security threats were detected.")
|
|
98
121
|
forbid (
|
|
99
122
|
principal,
|
|
100
123
|
action == AIGateway::Action::"call_tool",
|