@highflame/policy 2.1.36 → 2.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/_schemas/ai_gateway/context.json +431 -11
  2. package/_schemas/ai_gateway/schema.cedarschema +91 -11
  3. package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
  4. package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
  5. package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
  6. package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
  7. package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
  8. package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
  9. package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
  10. package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
  11. package/_schemas/ai_gateway/templates/templates.json +42 -47
  12. package/_schemas/guardrails/context.json +12 -12
  13. package/_schemas/guardrails/schema.cedarschema +12 -12
  14. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
  15. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
  16. package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
  17. package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
  18. package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
  19. package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
  20. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
  21. package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
  22. package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
  23. package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
  24. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
  25. package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
  26. package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
  27. package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
  28. package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
  29. package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
  30. package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
  31. package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
  32. package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
  33. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
  34. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
  35. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
  36. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
  37. package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
  38. package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
  39. package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
  40. package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
  41. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
  42. package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
  43. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
  44. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
  45. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
  46. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
  47. package/_schemas/guardrails/templates/templates.json +188 -210
  48. package/_schemas/overwatch/context.json +14 -14
  49. package/_schemas/overwatch/schema.cedarschema +12 -12
  50. package/_schemas/sentry/context.json +11 -11
  51. package/_schemas/sentry/schema.cedarschema +11 -11
  52. package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
  53. package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
  54. package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
  55. package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
  56. package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
  57. package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
  58. package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
  59. package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
  60. package/_schemas/sentry/templates/templates.json +46 -46
  61. package/dist/ai_gateway-context.gen.d.ts +18 -4
  62. package/dist/ai_gateway-context.gen.js +18 -4
  63. package/dist/ai_gateway-defaults.gen.d.ts +1 -1
  64. package/dist/ai_gateway-defaults.gen.js +377 -313
  65. package/dist/guardrails-context.gen.d.ts +5 -5
  66. package/dist/guardrails-context.gen.js +5 -5
  67. package/dist/guardrails-defaults.gen.d.ts +1 -1
  68. package/dist/guardrails-defaults.gen.js +2070 -1849
  69. package/dist/overwatch-context.gen.d.ts +5 -5
  70. package/dist/overwatch-context.gen.js +5 -5
  71. package/dist/overwatch-defaults.gen.d.ts +1 -1
  72. package/dist/overwatch-defaults.gen.js +547 -573
  73. package/dist/sentry-context.gen.d.ts +3 -3
  74. package/dist/sentry-context.gen.js +3 -3
  75. package/dist/sentry-defaults.gen.d.ts +1 -1
  76. package/dist/sentry-defaults.gen.js +379 -460
  77. package/dist/service-schemas.gen.d.ts +4 -4
  78. package/dist/service-schemas.gen.js +249 -99
  79. package/package.json +1 -1
@@ -87,7 +87,7 @@ action call_tool appliesTo {
87
87
  threat_categories?: Set<String>, // Threat category names
88
88
  detected_threats?: Set<String>, // Detection rule names that matched
89
89
  max_threat_severity?: Long, // Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)
90
- contains_secrets?: Bool, // Whether secrets/credentials detected
90
+ secrets_detected?: Bool, // Whether secrets/credentials detected
91
91
 
92
92
  // --- Secrets (granular) ---
93
93
  secret_types?: Set<String>,
@@ -97,10 +97,11 @@ action call_tool appliesTo {
97
97
  pii_detected?: Bool,
98
98
  pii_types?: Set<String>,
99
99
  pii_count?: Long,
100
+ pii_score?: Long,
100
101
 
101
102
  // --- ML Detector Confidence Scores (0-100) ---
102
- injection_confidence?: Long, // Prompt injection classifier confidence
103
- jailbreak_confidence?: Long, // Jailbreak detection classifier confidence
103
+ injection_score?: Long, // Prompt injection classifier confidence
104
+ jailbreak_score?: Long, // Jailbreak detection classifier confidence
104
105
 
105
106
  // --- Agent Security (0-100) ---
106
107
  tool_poisoning_score?: Long, // Hidden instructions in tool description/args
@@ -127,7 +128,7 @@ action call_tool appliesTo {
127
128
  profanity_score?: Long,
128
129
 
129
130
  // --- Encoding & Unicode Attacks ---
130
- contains_invisible_chars?: Bool,
131
+ invisible_chars_detected?: Bool,
131
132
  invisible_chars_score?: Long,
132
133
 
133
134
  // --- Behavioral Analysis ---
@@ -138,6 +139,21 @@ action call_tool appliesTo {
138
139
  pattern_type?: String,
139
140
  sequence_risk?: Long,
140
141
 
142
+ // --- Session-Aware Threat Aggregation ---
143
+ session_threat_turns?: Long, // Count of turns with detected threats
144
+ session_cumulative_risk_score?: Long, // Accumulated risk across turns
145
+ session_injection_detected?: Bool, // Injection seen in any prior turn
146
+ session_command_injection?: Bool, // Command injection seen in any prior turn
147
+ session_pii_detected?: Bool, // PII seen in any prior turn
148
+ session_pii_types?: Set<String>, // PII types accumulated across session
149
+ session_secrets_detected?: Bool, // Secrets seen in any prior turn
150
+ session_secret_types?: Set<String>, // Secret types accumulated across session
151
+ session_max_injection_score?: Long, // Max injection score across session turns
152
+ session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
153
+ session_max_command_injection_score?: Long, // Max command-injection score across session turns
154
+ session_max_pii_score?: Long, // Max pii score across session turns
155
+ session_max_secret_score?: Long, // Max secret score across session turns
156
+
141
157
  // --- Usage Budget (multi-window, multi-dimension) ---
142
158
  budget_remaining_pct?: Long,
143
159
  budget_exceeded?: Bool,
@@ -177,7 +193,7 @@ action connect_server appliesTo {
177
193
  indirect_injection_score?: Long,
178
194
 
179
195
  // --- Secrets ---
180
- contains_secrets?: Bool,
196
+ secrets_detected?: Bool,
181
197
  secret_types?: Set<String>,
182
198
  secret_count?: Long,
183
199
 
@@ -185,12 +201,28 @@ action connect_server appliesTo {
185
201
  pii_detected?: Bool,
186
202
  pii_types?: Set<String>,
187
203
  pii_count?: Long,
204
+ pii_score?: Long,
188
205
 
189
206
  // --- MCP Trust & Config Risk ---
190
207
  mcp_server_verified?: Bool,
191
208
  mcp_config_risk?: Bool,
192
209
  mcp_risk_score?: Long,
193
210
 
211
+ // --- Session-Aware Threat Aggregation ---
212
+ session_threat_turns?: Long, // Count of turns with detected threats
213
+ session_cumulative_risk_score?: Long, // Accumulated risk across turns
214
+ session_injection_detected?: Bool, // Injection seen in any prior turn
215
+ session_command_injection?: Bool, // Command injection seen in any prior turn
216
+ session_pii_detected?: Bool, // PII seen in any prior turn
217
+ session_pii_types?: Set<String>, // PII types accumulated across session
218
+ session_secrets_detected?: Bool, // Secrets seen in any prior turn
219
+ session_secret_types?: Set<String>, // Secret types accumulated across session
220
+ session_max_injection_score?: Long, // Max injection score across session turns
221
+ session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
222
+ session_max_command_injection_score?: Long, // Max command-injection score across session turns
223
+ session_max_pii_score?: Long, // Max pii score across session turns
224
+ session_max_secret_score?: Long, // Max secret score across session turns
225
+
194
226
  // --- Usage Budget (multi-window, multi-dimension) ---
195
227
  budget_remaining_pct?: Long,
196
228
  budget_exceeded?: Bool,
@@ -222,7 +254,7 @@ action process_prompt appliesTo {
222
254
  threat_categories?: Set<String>,
223
255
  detected_threats?: Set<String>,
224
256
  max_threat_severity?: Long,
225
- contains_secrets?: Bool,
257
+ secrets_detected?: Bool,
226
258
 
227
259
  // --- Secrets ---
228
260
  secret_types?: Set<String>,
@@ -232,10 +264,11 @@ action process_prompt appliesTo {
232
264
  pii_detected?: Bool,
233
265
  pii_types?: Set<String>,
234
266
  pii_count?: Long,
267
+ pii_score?: Long,
235
268
 
236
269
  // --- ML Detector Confidence Scores (0-100) ---
237
- injection_confidence?: Long,
238
- jailbreak_confidence?: Long,
270
+ injection_score?: Long,
271
+ jailbreak_score?: Long,
239
272
 
240
273
  // --- Content Safety Scores (0-100) ---
241
274
  violence_score?: Long,
@@ -246,13 +279,28 @@ action process_prompt appliesTo {
246
279
  profanity_score?: Long,
247
280
 
248
281
  // --- Encoding ---
249
- contains_invisible_chars?: Bool,
282
+ invisible_chars_detected?: Bool,
250
283
  invisible_chars_score?: Long,
251
284
 
252
285
  // --- LLM-specific ---
253
286
  model_name?: String, // Target model name (e.g., "gpt-4", "claude-3-opus")
254
287
  model_provider?: String, // Provider name (e.g., "openai", "anthropic", "bedrock")
255
288
 
289
+ // --- Session-Aware Threat Aggregation ---
290
+ session_threat_turns?: Long, // Count of turns with detected threats
291
+ session_cumulative_risk_score?: Long, // Accumulated risk across turns
292
+ session_injection_detected?: Bool, // Injection seen in any prior turn
293
+ session_command_injection?: Bool, // Command injection seen in any prior turn
294
+ session_pii_detected?: Bool, // PII seen in any prior turn
295
+ session_pii_types?: Set<String>, // PII types accumulated across session
296
+ session_secrets_detected?: Bool, // Secrets seen in any prior turn
297
+ session_secret_types?: Set<String>, // Secret types accumulated across session
298
+ session_max_injection_score?: Long, // Max injection score across session turns
299
+ session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
300
+ session_max_command_injection_score?: Long, // Max command-injection score across session turns
301
+ session_max_pii_score?: Long, // Max pii score across session turns
302
+ session_max_secret_score?: Long, // Max secret score across session turns
303
+
256
304
  // --- Usage Budget (multi-window, multi-dimension) ---
257
305
  budget_remaining_pct?: Long,
258
306
  budget_exceeded?: Bool,
@@ -284,7 +332,7 @@ action read_file appliesTo {
284
332
  threat_categories?: Set<String>,
285
333
  detected_threats?: Set<String>,
286
334
  max_threat_severity?: Long,
287
- contains_secrets?: Bool,
335
+ secrets_detected?: Bool,
288
336
 
289
337
  // --- Secrets ---
290
338
  secret_types?: Set<String>,
@@ -294,6 +342,22 @@ action read_file appliesTo {
294
342
  pii_detected?: Bool,
295
343
  pii_types?: Set<String>,
296
344
  pii_count?: Long,
345
+ pii_score?: Long,
346
+
347
+ // --- Session-Aware Threat Aggregation ---
348
+ session_threat_turns?: Long, // Count of turns with detected threats
349
+ session_cumulative_risk_score?: Long, // Accumulated risk across turns
350
+ session_injection_detected?: Bool, // Injection seen in any prior turn
351
+ session_command_injection?: Bool, // Command injection seen in any prior turn
352
+ session_pii_detected?: Bool, // PII seen in any prior turn
353
+ session_pii_types?: Set<String>, // PII types accumulated across session
354
+ session_secrets_detected?: Bool, // Secrets seen in any prior turn
355
+ session_secret_types?: Set<String>, // Secret types accumulated across session
356
+ session_max_injection_score?: Long, // Max injection score across session turns
357
+ session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
358
+ session_max_command_injection_score?: Long, // Max command-injection score across session turns
359
+ session_max_pii_score?: Long, // Max pii score across session turns
360
+ session_max_secret_score?: Long, // Max secret score across session turns
297
361
 
298
362
  // --- Usage Budget (multi-window, multi-dimension) ---
299
363
  budget_remaining_pct?: Long,
@@ -326,7 +390,7 @@ action write_file appliesTo {
326
390
  threat_categories?: Set<String>,
327
391
  detected_threats?: Set<String>,
328
392
  max_threat_severity?: Long,
329
- contains_secrets?: Bool,
393
+ secrets_detected?: Bool,
330
394
 
331
395
  // --- Secrets ---
332
396
  secret_types?: Set<String>,
@@ -336,6 +400,22 @@ action write_file appliesTo {
336
400
  pii_detected?: Bool,
337
401
  pii_types?: Set<String>,
338
402
  pii_count?: Long,
403
+ pii_score?: Long,
404
+
405
+ // --- Session-Aware Threat Aggregation ---
406
+ session_threat_turns?: Long, // Count of turns with detected threats
407
+ session_cumulative_risk_score?: Long, // Accumulated risk across turns
408
+ session_injection_detected?: Bool, // Injection seen in any prior turn
409
+ session_command_injection?: Bool, // Command injection seen in any prior turn
410
+ session_pii_detected?: Bool, // PII seen in any prior turn
411
+ session_pii_types?: Set<String>, // PII types accumulated across session
412
+ session_secrets_detected?: Bool, // Secrets seen in any prior turn
413
+ session_secret_types?: Set<String>, // Secret types accumulated across session
414
+ session_max_injection_score?: Long, // Max injection score across session turns
415
+ session_max_jailbreak_score?: Long, // Max jailbreak score across session turns
416
+ session_max_command_injection_score?: Long, // Max command-injection score across session turns
417
+ session_max_pii_score?: Long, // Max pii score across session turns
418
+ session_max_secret_score?: Long, // Max secret score across session turns
339
419
 
340
420
  // --- Usage Budget (multi-window, multi-dimension) ---
341
421
  budget_remaining_pct?: Long,
@@ -1,20 +1,38 @@
1
1
  // =============================================================================
2
- // Agent Security Policy (Default)
2
+ // Agent Security (Default)
3
3
  // =============================================================================
4
- // Detects and blocks tool poisoning, rug pull attacks, indirect prompt injection,
5
- // and MCP supply chain threats.
4
+ // Blocks tool poisoning, rug pull behavioral drift, indirect prompt injection
5
+ // from tool outputs, and MCP supply-chain risks (unverified servers, risky
6
+ // configurations).
6
7
  //
7
- // Category: agent_security
8
+ // Context keys consumed:
9
+ // - tool_poisoning_score: Long (0-100)
10
+ // - rug_pull_score: Long (0-100)
11
+ // - indirect_injection_score: Long (0-100)
12
+ // - tool_is_sensitive: Bool
13
+ // - mcp_server_verified: Bool
14
+ // - mcp_config_risk: Bool
15
+ // - mcp_risk_score: Long (0-100)
16
+ // - threat_count: Long
17
+ //
18
+ // Compliance:
19
+ // - OWASP LLM01, OWASP ASI01, OWASP ASI02, OWASP ASI04
20
+ // - OWASP MCP02, MCP03, MCP05
21
+ //
22
+ // Category: agent-security
8
23
  // Namespace: AIGateway
9
24
  // =============================================================================
10
25
 
11
- // Block tool calls with tool poisoning risk
12
- @id("as-block-tool-poisoning")
26
+ // ---------------------------------------------------------------------------
27
+ // Section 1: Tool poisoning
28
+ // ---------------------------------------------------------------------------
29
+
30
+ @id("agent-security.block-tool-poisoning")
13
31
  @name("Block tool poisoning")
14
- @description("Block tool execution when hidden instructions are detected in tool descriptions or arguments (score >= 70)")
32
+ @description("Blocks call_tool when tool_poisoning_score >= 70.")
15
33
  @severity("critical")
16
- @tags("tool-poisoning,agent-security,owasp-asi01")
17
- @reject_message("Tool execution blocked: hidden manipulation instructions detected in tool description or arguments (OWASP ASI01).")
34
+ @tags("category:agent-security,threat:tool-poisoning,detection:ml,surface:call-tool,owasp:asi01")
35
+ @reject_message("Tool execution blocked: hidden manipulation instructions detected (OWASP ASI01).")
18
36
  forbid (
19
37
  principal,
20
38
  action == AIGateway::Action::"call_tool",
@@ -24,12 +42,11 @@ when {
24
42
  context has tool_poisoning_score && context.tool_poisoning_score >= 70
25
43
  };
26
44
 
27
- // Block MCP server connections with poisoning risk
28
- @id("as-block-server-poisoning")
45
+ @id("agent-security.block-server-poisoning")
29
46
  @name("Block poisoned MCP servers")
30
- @description("Block connections to MCP servers when tool poisoning patterns are detected (score >= 60)")
47
+ @description("Blocks connect_server when tool_poisoning_score >= 60.")
31
48
  @severity("critical")
32
- @tags("tool-poisoning,mcp-security,owasp-asi04,owasp-mcp02")
49
+ @tags("category:agent-security,threat:tool-poisoning,detection:ml,surface:connect-server,owasp:asi04,owasp:mcp02")
33
50
  @reject_message("MCP server connection blocked: tool poisoning patterns detected in server tool descriptions.")
34
51
  forbid (
35
52
  principal,
@@ -40,13 +57,16 @@ when {
40
57
  context has tool_poisoning_score && context.tool_poisoning_score >= 60
41
58
  };
42
59
 
43
- // Block tool calls with behavioral drift (rug pull)
44
- @id("as-block-rug-pull")
60
+ // ---------------------------------------------------------------------------
61
+ // Section 2: Rug pull (behavioral drift)
62
+ // ---------------------------------------------------------------------------
63
+
64
+ @id("agent-security.block-rug-pull")
45
65
  @name("Block rug pull attacks")
46
- @description("Block tool execution when behavioral drift is detected (score >= 70)")
66
+ @description("Blocks call_tool and connect_server when rug_pull_score >= 70.")
47
67
  @severity("critical")
48
- @tags("rug-pull,agent-security,owasp-asi04")
49
- @reject_message("Tool execution blocked: tool behavior has changed significantly from its established pattern.")
68
+ @tags("category:agent-security,threat:rug-pull,detection:ml,owasp:asi04")
69
+ @reject_message("Tool execution blocked: tool behavior has drifted significantly from its established pattern.")
50
70
  forbid (
51
71
  principal,
52
72
  action in [AIGateway::Action::"call_tool", AIGateway::Action::"connect_server"],
@@ -56,12 +76,15 @@ when {
56
76
  context has rug_pull_score && context.rug_pull_score >= 70
57
77
  };
58
78
 
59
- // Block with indirect injection from tool outputs
60
- @id("as-block-indirect-injection")
61
- @name("Block indirect prompt injection")
62
- @description("Block when indirect prompt injection is detected in tool outputs (score >= 70)")
79
+ // ---------------------------------------------------------------------------
80
+ // Section 3: Indirect prompt injection
81
+ // ---------------------------------------------------------------------------
82
+
83
+ @id("agent-security.block-indirect-injection")
84
+ @name("Block indirect injection")
85
+ @description("Blocks call_tool and connect_server when indirect_injection_score >= 70.")
63
86
  @severity("critical")
64
- @tags("indirect-injection,owasp-llm01,owasp-asi01")
87
+ @tags("category:agent-security,threat:indirect-injection,detection:ml,owasp:llm01,owasp:asi01")
65
88
  @reject_message("Content blocked: indirect prompt injection detected in tool output or retrieved content.")
66
89
  forbid (
67
90
  principal,
@@ -72,13 +95,12 @@ when {
72
95
  context has indirect_injection_score && context.indirect_injection_score >= 70
73
96
  };
74
97
 
75
- // Strict indirect injection for sensitive tool calls
76
- @id("as-block-indirect-injection-sensitive-tools")
98
+ @id("agent-security.block-indirect-injection-sensitive")
77
99
  @name("Block indirect injection on sensitive tools")
78
- @description("Lower threshold (>= 50) for indirect injection when the tool is classified as sensitive")
100
+ @description("Blocks call_tool when tool_is_sensitive is true and indirect_injection_score >= 50.")
79
101
  @severity("critical")
80
- @tags("indirect-injection,sensitive-tools,owasp-asi02")
81
- @reject_message("Sensitive tool execution blocked: moderate indirect injection risk detected.")
102
+ @tags("category:agent-security,threat:indirect-injection,detection:ml,surface:call-tool,owasp:asi02")
103
+ @reject_message("Tool execution blocked: a sensitive tool was called with moderate indirect-injection risk.")
82
104
  forbid (
83
105
  principal,
84
106
  action == AIGateway::Action::"call_tool",
@@ -86,15 +108,18 @@ forbid (
86
108
  )
87
109
  when {
88
110
  context has indirect_injection_score && context.indirect_injection_score >= 50 &&
89
- context has tool_is_sensitive && context.tool_is_sensitive
111
+ context has tool_is_sensitive && context.tool_is_sensitive == true
90
112
  };
91
113
 
92
- // Block unverified MCP server tool calls with detected threats
93
- @id("as-block-unverified-threats")
114
+ // ---------------------------------------------------------------------------
115
+ // Section 4: MCP supply chain & trust
116
+ // ---------------------------------------------------------------------------
117
+
118
+ @id("agent-security.block-unverified-threats")
94
119
  @name("Block unverified server threats")
95
- @description("Block tool calls from unverified MCP servers when any threat is detected")
120
+ @description("Blocks call_tool when mcp_server_verified is false and threat_count >= 1.")
96
121
  @severity("high")
97
- @tags("mcp-trust,owasp-asi04,supply-chain")
122
+ @tags("category:agent-security,threat:supply-chain,detection:aggregate,surface:call-tool,owasp:asi04")
98
123
  @reject_message("Tool execution blocked: the MCP server is unverified and security threats were detected.")
99
124
  forbid (
100
125
  principal,
@@ -103,15 +128,14 @@ forbid (
103
128
  )
104
129
  when {
105
130
  context has mcp_server_verified && context.mcp_server_verified == false &&
106
- context has threat_count && context.threat_count > 0
131
+ context has threat_count && context.threat_count >= 1
107
132
  };
108
133
 
109
- // Block connections to MCP servers with risky configurations
110
- @id("as-block-mcp-config-risk")
134
+ @id("agent-security.block-mcp-config-risk")
111
135
  @name("Block risky MCP server configs")
112
- @description("Block MCP server connections when risky configuration patterns are detected (score >= 70)")
136
+ @description("Blocks connect_server when mcp_config_risk is true and mcp_risk_score >= 70.")
113
137
  @severity("high")
114
- @tags("mcp-config,owasp-mcp03,supply-chain")
138
+ @tags("category:agent-security,threat:mcp-config-risk,detection:ml,surface:connect-server,owasp:mcp03")
115
139
  @reject_message("MCP server connection blocked: risky server configuration detected.")
116
140
  forbid (
117
141
  principal,
@@ -119,16 +143,15 @@ forbid (
119
143
  resource
120
144
  )
121
145
  when {
122
- context has mcp_config_risk && context.mcp_config_risk &&
146
+ context has mcp_config_risk && context.mcp_config_risk == true &&
123
147
  context has mcp_risk_score && context.mcp_risk_score >= 70
124
148
  };
125
149
 
126
- // Block connections to unverified MCP servers
127
- @id("as-block-unverified-server-connect")
150
+ @id("agent-security.block-unverified-server")
128
151
  @name("Block unverified MCP server connections")
129
- @description("Block connections to MCP servers that are not from a verified registry")
152
+ @description("Blocks connect_server when mcp_server_verified is false.")
130
153
  @severity("high")
131
- @tags("mcp-trust,owasp-asi04,owasp-mcp05,supply-chain")
154
+ @tags("category:agent-security,threat:supply-chain,detection:rule,surface:connect-server,owasp:asi04,owasp:mcp05")
132
155
  @reject_message("MCP server connection blocked: server is not from a verified registry.")
133
156
  forbid (
134
157
  principal,
@@ -1,21 +1,19 @@
1
1
  // =============================================================================
2
- // Baseline Permit Policy (Default)
2
+ // Baseline Permit (Default)
3
3
  // =============================================================================
4
- // Permits all actions by default. Threat-specific forbid policies override
5
- // this to block when detection engines identify issues.
4
+ // Permits all AI Gateway actions by default. Threat-specific forbid policies
5
+ // override this when detectors fire. Cedar is default-deny: without at least
6
+ // one permit rule, every request is denied regardless of forbid rules.
6
7
  //
7
- // Cedar is default-deny: without at least one permit rule, every request
8
- // is denied regardless of forbid rules.
9
- //
10
- // Category: organization
8
+ // Category: organization
11
9
  // Namespace: AIGateway
12
10
  // =============================================================================
13
11
 
14
- @id("baseline-permit-all")
15
- @name("Permit all actions by default")
16
- @description("Baseline permit for all actions -- threat-specific forbid policies override this when threats are detected")
12
+ @id("organization.permit-baseline")
13
+ @name("Permit baseline")
14
+ @description("Permits all AI Gateway actions.")
17
15
  @severity("low")
18
- @tags("baseline,permit-default,organization")
16
+ @tags("category:organization,posture:permit-default")
19
17
  permit (
20
18
  principal,
21
19
  action,
@@ -1,20 +1,40 @@
1
1
  // =============================================================================
2
- // Semantic Threat Detection Policy (Default)
2
+ // Semantic Threat Detection (Default)
3
3
  // =============================================================================
4
- // Detects and blocks prompt injection, jailbreak attempts, and high-severity
5
- // threats in MCP tool calls and server connections.
4
+ // Blocks prompt injection, jailbreak attempts, and high-severity threats in
5
+ // MCP tool calls and LLM prompts. Uses both detection-engine rule triggers and
6
+ // ML classifier confidence scores.
6
7
  //
7
- // Category: semantic
8
+ // Detection layers:
9
+ // - Rule triggers (detected_threats), always available
10
+ // - ML classifier scores (injection_score, jailbreak_score), require API token
11
+ // - Severity aggregation (highest_severity, threat_count), catch-all rules
12
+ //
13
+ // Context keys consumed:
14
+ // - detected_threats: Set<String>
15
+ // - injection_score: Long (0-100)
16
+ // - jailbreak_score: Long (0-100)
17
+ // - highest_severity: String
18
+ // - threat_count: Long
19
+ //
20
+ // Compliance:
21
+ // - OWASP LLM01, OWASP LLM02
22
+ // - MITRE ATLAS AML.T0051, AML.T0054
23
+ //
24
+ // Category: semantic
8
25
  // Namespace: AIGateway
9
26
  // =============================================================================
10
27
 
11
- // Block content with prompt injection patterns detected by rules
12
- @id("semantic-block-injection")
13
- @name("Block prompt injection")
14
- @description("Block tool calls when detection engine rules identify prompt injection patterns in tool arguments or content")
28
+ // ---------------------------------------------------------------------------
29
+ // Section 1: Prompt injection
30
+ // ---------------------------------------------------------------------------
31
+
32
+ @id("semantic.block-injection-rule")
33
+ @name("Block injection (rule)")
34
+ @description("Blocks call_tool when detected_threats contains \"prompt_injection\".")
15
35
  @severity("critical")
16
- @tags("injection,security,owasp-llm01,baseline")
17
- @reject_message("Tool call was blocked because prompt injection patterns were detected in the content (OWASP LLM01).")
36
+ @tags("category:semantic,threat:injection,detection:rule,surface:call-tool,owasp:llm01,mitre:atlas-t0051,posture:baseline")
37
+ @reject_message("Tool execution blocked: prompt injection patterns detected (OWASP LLM01).")
18
38
  forbid (
19
39
  principal,
20
40
  action == AIGateway::Action::"call_tool",
@@ -24,29 +44,31 @@ when {
24
44
  context has detected_threats && context.detected_threats.contains("prompt_injection")
25
45
  };
26
46
 
27
- // Block content with high ML injection confidence
28
- @id("semantic-block-injection-score")
29
- @name("Block high-confidence injection")
30
- @description("Block tool calls when the ML injection classifier confidence exceeds 75/100")
47
+ @id("semantic.block-injection-ml")
48
+ @name("Block injection (ML)")
49
+ @description("Blocks call_tool when injection_score >= 75.")
31
50
  @severity("critical")
32
- @tags("injection,ml-classifier,security,owasp-llm01")
33
- @reject_message("Tool call was blocked because the ML classifier detected prompt injection with high confidence.")
51
+ @tags("category:semantic,threat:injection,detection:ml,surface:call-tool,owasp:llm01,mitre:atlas-t0051")
52
+ @reject_message("Tool execution blocked: the ML classifier detected prompt injection with high confidence.")
34
53
  forbid (
35
54
  principal,
36
55
  action == AIGateway::Action::"call_tool",
37
56
  resource
38
57
  )
39
58
  when {
40
- context has injection_confidence && context.injection_confidence >= 75
59
+ context has injection_score && context.injection_score >= 75
41
60
  };
42
61
 
43
- // Block content with jailbreak patterns
44
- @id("semantic-block-jailbreak")
45
- @name("Block jailbreak attempts")
46
- @description("Block tool calls when jailbreak patterns are detected in content")
62
+ // ---------------------------------------------------------------------------
63
+ // Section 2: Jailbreak
64
+ // ---------------------------------------------------------------------------
65
+
66
+ @id("semantic.block-jailbreak-rule")
67
+ @name("Block jailbreak (rule)")
68
+ @description("Blocks call_tool when detected_threats contains \"jailbreak\".")
47
69
  @severity("critical")
48
- @tags("jailbreak,security,owasp-llm02,baseline")
49
- @reject_message("Tool call was blocked because jailbreak patterns were detected.")
70
+ @tags("category:semantic,threat:jailbreak,detection:rule,surface:call-tool,owasp:llm02,mitre:atlas-t0054,posture:baseline")
71
+ @reject_message("Tool execution blocked: jailbreak patterns detected (OWASP LLM02).")
50
72
  forbid (
51
73
  principal,
52
74
  action == AIGateway::Action::"call_tool",
@@ -56,29 +78,31 @@ when {
56
78
  context has detected_threats && context.detected_threats.contains("jailbreak")
57
79
  };
58
80
 
59
- // Block content with high ML jailbreak confidence
60
- @id("semantic-block-jailbreak-score")
61
- @name("Block high-confidence jailbreak")
62
- @description("Block tool calls when the ML jailbreak classifier confidence exceeds 75/100")
81
+ @id("semantic.block-jailbreak-ml")
82
+ @name("Block jailbreak (ML)")
83
+ @description("Blocks call_tool when jailbreak_score >= 75.")
63
84
  @severity("critical")
64
- @tags("jailbreak,ml-classifier,security,owasp-llm02")
65
- @reject_message("Tool call was blocked because the ML classifier detected a jailbreak attempt with high confidence.")
85
+ @tags("category:semantic,threat:jailbreak,detection:ml,surface:call-tool,owasp:llm02,mitre:atlas-t0054")
86
+ @reject_message("Tool execution blocked: the ML classifier detected a jailbreak attempt with high confidence.")
66
87
  forbid (
67
88
  principal,
68
89
  action == AIGateway::Action::"call_tool",
69
90
  resource
70
91
  )
71
92
  when {
72
- context has jailbreak_confidence && context.jailbreak_confidence >= 75
93
+ context has jailbreak_score && context.jailbreak_score >= 75
73
94
  };
74
95
 
75
- // Block any content with critical severity threats
76
- @id("semantic-block-critical")
96
+ // ---------------------------------------------------------------------------
97
+ // Section 3: Severity aggregation
98
+ // ---------------------------------------------------------------------------
99
+
100
+ @id("semantic.block-critical")
77
101
  @name("Block critical threats")
78
- @description("Block all MCP operations when any detection engine reports critical severity")
102
+ @description("Blocks all MCP operations when highest_severity equals \"critical\".")
79
103
  @severity("critical")
80
- @tags("critical,baseline,security,catch-all")
81
- @reject_message("MCP operation was blocked because security scanners detected a critical-severity threat.")
104
+ @tags("category:semantic,detection:aggregate,posture:catch-all")
105
+ @reject_message("MCP operation blocked: a critical-severity threat was detected.")
82
106
  forbid (
83
107
  principal,
84
108
  action,
@@ -88,13 +112,12 @@ when {
88
112
  context has highest_severity && context.highest_severity == "critical"
89
113
  };
90
114
 
91
- // Block tool calls with multiple concurrent threats
92
- @id("semantic-block-multi-threat-tools")
115
+ @id("semantic.block-multi-threat")
93
116
  @name("Block multi-threat tool calls")
94
- @description("Block tool execution when 3+ distinct threats are detected simultaneously")
117
+ @description("Blocks call_tool when threat_count >= 3.")
95
118
  @severity("high")
96
- @tags("multi-threat,tools,security,defense-in-depth")
97
- @reject_message("Tool execution was blocked because multiple security threats were detected simultaneously.")
119
+ @tags("category:semantic,detection:aggregate,surface:call-tool,posture:catch-all")
120
+ @reject_message("Tool execution blocked: multiple concurrent security threats were detected.")
98
121
  forbid (
99
122
  principal,
100
123
  action == AIGateway::Action::"call_tool",