@highflame/policy 2.1.36 → 2.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/_schemas/ai_gateway/context.json +431 -11
  2. package/_schemas/ai_gateway/schema.cedarschema +91 -11
  3. package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
  4. package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
  5. package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
  6. package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
  7. package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
  8. package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
  9. package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
  10. package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
  11. package/_schemas/ai_gateway/templates/templates.json +42 -47
  12. package/_schemas/guardrails/context.json +12 -12
  13. package/_schemas/guardrails/schema.cedarschema +12 -12
  14. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
  15. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
  16. package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
  17. package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
  18. package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
  19. package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
  20. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
  21. package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
  22. package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
  23. package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
  24. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
  25. package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
  26. package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
  27. package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
  28. package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
  29. package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
  30. package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
  31. package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
  32. package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
  33. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
  34. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
  35. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
  36. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
  37. package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
  38. package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
  39. package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
  40. package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
  41. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
  42. package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
  43. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
  44. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
  45. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
  46. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
  47. package/_schemas/guardrails/templates/templates.json +188 -210
  48. package/_schemas/overwatch/context.json +14 -14
  49. package/_schemas/overwatch/schema.cedarschema +12 -12
  50. package/_schemas/sentry/context.json +11 -11
  51. package/_schemas/sentry/schema.cedarschema +11 -11
  52. package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
  53. package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
  54. package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
  55. package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
  56. package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
  57. package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
  58. package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
  59. package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
  60. package/_schemas/sentry/templates/templates.json +46 -46
  61. package/dist/ai_gateway-context.gen.d.ts +18 -4
  62. package/dist/ai_gateway-context.gen.js +18 -4
  63. package/dist/ai_gateway-defaults.gen.d.ts +1 -1
  64. package/dist/ai_gateway-defaults.gen.js +377 -313
  65. package/dist/guardrails-context.gen.d.ts +5 -5
  66. package/dist/guardrails-context.gen.js +5 -5
  67. package/dist/guardrails-defaults.gen.d.ts +1 -1
  68. package/dist/guardrails-defaults.gen.js +2070 -1849
  69. package/dist/overwatch-context.gen.d.ts +5 -5
  70. package/dist/overwatch-context.gen.js +5 -5
  71. package/dist/overwatch-defaults.gen.d.ts +1 -1
  72. package/dist/overwatch-defaults.gen.js +635 -595
  73. package/dist/sentry-context.gen.d.ts +3 -3
  74. package/dist/sentry-context.gen.js +3 -3
  75. package/dist/sentry-defaults.gen.d.ts +1 -1
  76. package/dist/sentry-defaults.gen.js +379 -460
  77. package/dist/service-schemas.gen.d.ts +4 -4
  78. package/dist/service-schemas.gen.js +249 -99
  79. package/package.json +1 -1
@@ -38,13 +38,13 @@
38
38
  "description": "Number of detectors that were executed for this request"
39
39
  },
40
40
  {
41
- "key": "injection_confidence",
41
+ "key": "injection_score",
42
42
  "type": "number",
43
43
  "required": false,
44
44
  "description": "Combined prompt injection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
45
45
  },
46
46
  {
47
- "key": "jailbreak_confidence",
47
+ "key": "jailbreak_score",
48
48
  "type": "number",
49
49
  "required": false,
50
50
  "description": "Combined jailbreak detection confidence (0-100). MAX of all detector scores (Pulse + DeepContext). Use jailbreak_pulse_score / jailbreak_deep_context_score for individual detector control"
@@ -80,7 +80,7 @@
80
80
  "description": "Type of injection detected: 'prompt', 'sql', 'command', or 'none'. Use this to apply different policies per injection type"
81
81
  },
82
82
  {
83
- "key": "contains_secrets",
83
+ "key": "secrets_detected",
84
84
  "type": "boolean",
85
85
  "required": false,
86
86
  "description": "Whether any API keys, tokens, passwords, or credentials were detected in the content. True indicates presence of secrets"
@@ -116,7 +116,7 @@
116
116
  "description": "Array of PII types detected (e.g., ['email', 'phone', 'ssn', 'credit_card']). Use .contains() to block specific sensitive types"
117
117
  },
118
118
  {
119
- "key": "pii_confidence",
119
+ "key": "pii_score",
120
120
  "type": "number",
121
121
  "required": false,
122
122
  "description": "PII detection ML classifier confidence (0-100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss. Typical threshold: >=80 for high-confidence blocking"
@@ -176,7 +176,7 @@
176
176
  "description": "Confidence score from topic classifier (0-100). Use with content_topics to tune sensitivity — higher thresholds reduce false positives"
177
177
  },
178
178
  {
179
- "key": "contains_invisible_chars",
179
+ "key": "invisible_chars_detected",
180
180
  "type": "boolean",
181
181
  "required": false,
182
182
  "description": "Whether invisible Unicode characters (zero-width joiners, RTL marks, etc.) were detected in the content. Commonly used for prompt injection evasion"
@@ -644,7 +644,7 @@
644
644
  "description": "Confidence score from topic classifier for tool content (0-100)"
645
645
  },
646
646
  {
647
- "key": "contains_secrets",
647
+ "key": "secrets_detected",
648
648
  "type": "boolean",
649
649
  "required": false,
650
650
  "description": "Whether secrets were detected in tool arguments or content"
@@ -680,13 +680,13 @@
680
680
  "description": "Number of PII pattern matches in tool arguments or content. Use >=3 to detect bulk PII exposure indicating data dumps or CSV pastes"
681
681
  },
682
682
  {
683
- "key": "pii_confidence",
683
+ "key": "pii_score",
684
684
  "type": "number",
685
685
  "required": false,
686
686
  "description": "PII detection ML classifier confidence for tool content (0-100). Catches novel PII patterns that escape regex detection"
687
687
  },
688
688
  {
689
- "key": "injection_confidence",
689
+ "key": "injection_score",
690
690
  "type": "number",
691
691
  "required": false,
692
692
  "description": "Combined prompt injection confidence in tool arguments (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control"
@@ -854,7 +854,7 @@
854
854
  "description": "File path targeted by the tool call (if file operation). Use for path-based blocking of .env files, credential files, system directories, and credential directories"
855
855
  },
856
856
  {
857
- "key": "contains_invisible_chars",
857
+ "key": "invisible_chars_detected",
858
858
  "type": "boolean",
859
859
  "required": false,
860
860
  "description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in tool arguments. Tool args should be plain text — invisible chars indicate payload injection"
@@ -1028,7 +1028,7 @@
1028
1028
  "description": "File path being read. Use for path-based access control policies (e.g., block .env files, system directories, credential directories)"
1029
1029
  },
1030
1030
  {
1031
- "key": "contains_secrets",
1031
+ "key": "secrets_detected",
1032
1032
  "type": "boolean",
1033
1033
  "required": false,
1034
1034
  "description": "Whether secrets were detected in the file content being read"
@@ -1208,7 +1208,7 @@
1208
1208
  "description": "File path being written. Use for path-based blocking policies (e.g., block writes to .env files, credential directories)"
1209
1209
  },
1210
1210
  {
1211
- "key": "contains_invisible_chars",
1211
+ "key": "invisible_chars_detected",
1212
1212
  "type": "boolean",
1213
1213
  "required": false,
1214
1214
  "description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in the content being written. Prevents persistence of invisible payloads in source code"
@@ -1220,7 +1220,7 @@
1220
1220
  "description": "Invisible character attack severity score (0-100) in write content"
1221
1221
  },
1222
1222
  {
1223
- "key": "contains_secrets",
1223
+ "key": "secrets_detected",
1224
1224
  "type": "boolean",
1225
1225
  "required": false,
1226
1226
  "description": "Whether secrets were detected in the content being written. Block writes containing credentials"
@@ -104,8 +104,8 @@ namespace Guardrails {
104
104
  "detector_count": Long,
105
105
 
106
106
  // Security - Injection & Jailbreak (optional)
107
- "injection_confidence"?: Long, // Combined injection confidence: MAX(pulse, deep_context)
108
- "jailbreak_confidence"?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
107
+ "injection_score"?: Long, // Combined injection confidence: MAX(pulse, deep_context)
108
+ "jailbreak_score"?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)
109
109
  "injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
110
110
  "injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
111
111
  "jailbreak_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
@@ -113,7 +113,7 @@ namespace Guardrails {
113
113
  "injection_type"?: String, // "prompt" | "sql" | "command" | "none"
114
114
 
115
115
  // Privacy - Secrets (optional)
116
- "contains_secrets"?: Bool,
116
+ "secrets_detected"?: Bool,
117
117
  "secret_count"?: Long,
118
118
  "secret_types"?: Set<String>, // ["aws_access_key", "github_token", ...]
119
119
 
@@ -121,7 +121,7 @@ namespace Guardrails {
121
121
  "pii_detected"?: Bool,
122
122
  "pii_count"?: Long,
123
123
  "pii_types"?: Set<String>, // ["email", "phone", "ssn", "credit_card", ...]
124
- "pii_confidence"?: Long, // PII ML classifier confidence (0-100) — catches novel PII patterns that escape regex detection
124
+ "pii_score"?: Long, // PII ML classifier confidence (0-100) — catches novel PII patterns that escape regex detection
125
125
 
126
126
  // Threat Severity Aggregation (optional)
127
127
  "highest_severity"?: String, // Highest severity across all detectors: "critical" | "high" | "medium" | "low" | "none"
@@ -139,7 +139,7 @@ namespace Guardrails {
139
139
  "topic_confidence"?: Long, // 0-100
140
140
 
141
141
  // Security - Invisible Character Detection (optional)
142
- "contains_invisible_chars"?: Bool,
142
+ "invisible_chars_detected"?: Bool,
143
143
  "invisible_chars_score"?: Long, // 0-100
144
144
 
145
145
  // Security - Pattern Detection (optional)
@@ -281,14 +281,14 @@ namespace Guardrails {
281
281
  "topic_confidence"?: Long, // 0-100
282
282
 
283
283
  // Security checks on tool arguments (optional)
284
- "contains_secrets"?: Bool,
284
+ "secrets_detected"?: Bool,
285
285
  "secret_count"?: Long,
286
286
  "secret_types"?: Set<String>,
287
287
  "pii_detected"?: Bool,
288
288
  "pii_types"?: Set<String>,
289
289
  "pii_count"?: Long, // Number of PII pattern matches in tool content
290
- "pii_confidence"?: Long, // PII ML classifier confidence (0-100)
291
- "injection_confidence"?: Long,
290
+ "pii_score"?: Long, // PII ML classifier confidence (0-100)
291
+ "injection_score"?: Long,
292
292
  "injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
293
293
  "injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
294
294
 
@@ -312,7 +312,7 @@ namespace Guardrails {
312
312
  "path"?: String, // File path when tool operates on files
313
313
 
314
314
  // Security - Invisible Character Detection in tool args (optional)
315
- "contains_invisible_chars"?: Bool, // Whether invisible Unicode chars detected in tool args
315
+ "invisible_chars_detected"?: Bool, // Whether invisible Unicode chars detected in tool args
316
316
  "invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
317
317
 
318
318
  // Security - Encoded Injection (optional)
@@ -376,7 +376,7 @@ namespace Guardrails {
376
376
  "path"?: String, // File path being read
377
377
 
378
378
  // Security checks on file content (optional)
379
- "contains_secrets"?: Bool,
379
+ "secrets_detected"?: Bool,
380
380
  "secret_count"?: Long,
381
381
  "secret_types"?: Set<String>,
382
382
  "pii_detected"?: Bool,
@@ -436,11 +436,11 @@ namespace Guardrails {
436
436
  "path"?: String, // File path being written
437
437
 
438
438
  // Security - Invisible Character Detection in write content (optional)
439
- "contains_invisible_chars"?: Bool, // Whether invisible Unicode chars detected in write content
439
+ "invisible_chars_detected"?: Bool, // Whether invisible Unicode chars detected in write content
440
440
  "invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
441
441
 
442
442
  // Security checks on content being written (optional)
443
- "contains_secrets"?: Bool,
443
+ "secrets_detected"?: Bool,
444
444
  "secret_count"?: Long,
445
445
  "secret_types"?: Set<String>,
446
446
  "pii_detected"?: Bool,
@@ -1,9 +1,9 @@
1
1
  // =============================================================================
2
- // Agent Identity Policy Agent-to-Agent Security Defaults
2
+ // Agent Identity & Trust (Default)
3
3
  // =============================================================================
4
- // Enforces trust-based access control for AI agents authenticated via API key
5
- // or JWT with agent claims. These policies use agent identity context keys
6
- // populated from Shield's authentication layer.
4
+ // Trust-based access control for AI agents authenticated via API key or JWT
5
+ // with agent claims. Uses identity context keys populated from Shield's
6
+ // authentication layer.
7
7
  //
8
8
  // Agent trust levels:
9
9
  // - first_party: Your own agents (highest trust)
@@ -11,108 +11,112 @@
11
11
  // - unverified: Unknown/untrusted agents (lowest trust)
12
12
  //
13
13
  // Agent types:
14
- // - orchestrator: Coordinates sub-agents
15
- // - autonomous: Self-directed, no human in the loop
16
- // - tool_agent: Single-purpose tool execution
17
- // - human_proxy: Acts on behalf of a human
14
+ // - orchestrator, autonomous, tool_agent, human_proxy
18
15
  //
19
- // Context keys used:
20
- // - agent_id: String - Unique agent identifier
21
- // - agent_type: String - Agent classification
22
- // - agent_trust_level: String - Trust tier
23
- // - agent_framework: String - Agent framework/SDK
24
- // - agent_publisher: String - Publishing organization
25
- // - tool_name: String - Tool being called
26
- // - tool_category: String - "safe" | "sensitive" | "dangerous"
27
- // - tool_risk_score: Long (0-100) - Computed risk score
28
- // - injection_confidence: Long (0-100) - Injection detection score
16
+ // Context keys consumed:
17
+ // - agent_id, agent_type, agent_trust_level, agent_framework, agent_publisher
18
+ // - tool_name, tool_category, tool_is_sensitive, tool_risk_score
19
+ // - injection_score
20
+ // - session_threat_turns
29
21
  //
30
- // Category: agent_identity
22
+ // Category: agent-identity
31
23
  // Namespace: Guardrails
32
24
  // =============================================================================
33
25
 
34
- // -----------------------------------------------------------------------------
35
- // Trust-Based Tool Access
36
- // -----------------------------------------------------------------------------
26
+ // ---------------------------------------------------------------------------
27
+ // Section 1: Trust-based tool access
28
+ // ---------------------------------------------------------------------------
37
29
 
38
- @id("agent-block-unverified-dangerous-tools")
30
+ @id("agent-identity.block-unverified-dangerous-tools")
39
31
  @name("Block unverified agents from dangerous tools")
40
- @description("Unverified agents cannot execute tools classified as dangerous. Require first_party or verified_third_party trust level for high-risk operations")
32
+ @description("Blocks call_tool when agent_trust_level is \"unverified\" and tool_category is \"dangerous\".")
41
33
  @severity("critical")
42
- @tags("agent-identity,trust,tools,a2a")
34
+ @tags("category:agent-identity,scope:per-agent,detection:rule,surface:call-tool")
35
+ @reject_message("Tool execution blocked: unverified agents cannot execute dangerous tools.")
43
36
  forbid (
44
37
  principal is Guardrails::Agent,
45
38
  action == Guardrails::Action::"call_tool",
46
39
  resource
47
- ) when {
40
+ )
41
+ when {
48
42
  context has agent_trust_level && context.agent_trust_level == "unverified" &&
49
43
  context has tool_category && context.tool_category == "dangerous"
50
44
  };
51
45
 
52
- @id("agent-block-unverified-shell")
46
+ @id("agent-identity.block-unverified-shell")
53
47
  @name("Block unverified agents from shell execution")
54
- @description("Shell and command execution are restricted to first_party and verified_third_party agents")
48
+ @description("Blocks call_tool when agent_trust_level is \"unverified\" and tool_name is a shell tool.")
55
49
  @severity("critical")
56
- @tags("agent-identity,trust,shell,a2a")
50
+ @tags("category:agent-identity,threat:command-injection,scope:per-agent,detection:rule,surface:call-tool")
51
+ @reject_message("Tool execution blocked: shell and command execution require a verified agent.")
57
52
  forbid (
58
53
  principal is Guardrails::Agent,
59
54
  action == Guardrails::Action::"call_tool",
60
55
  resource
61
- ) when {
56
+ )
57
+ when {
62
58
  context has agent_trust_level && context.agent_trust_level == "unverified" &&
63
59
  context has tool_name &&
64
- (context.tool_name == "shell" ||
65
- context.tool_name == "execute_command" ||
66
- context.tool_name == "bash")
60
+ (
61
+ context.tool_name == "shell" ||
62
+ context.tool_name == "execute_command" ||
63
+ context.tool_name == "bash"
64
+ )
67
65
  };
68
66
 
69
- @id("agent-block-unverified-sensitive-tools")
70
- @name("Block unverified agents from sensitive tools with elevated risk")
71
- @description("Unverified agents cannot execute sensitive tools with risk score above 60")
67
+ @id("agent-identity.block-unverified-sensitive-tools")
68
+ @name("Block unverified agents from sensitive tools")
69
+ @description("Blocks call_tool when agent_trust_level is \"unverified\", tool_is_sensitive is true, and tool_risk_score >= 61.")
72
70
  @severity("high")
73
- @tags("agent-identity,trust,tools,a2a")
71
+ @tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool")
72
+ @reject_message("Tool execution blocked: unverified agent attempted a sensitive tool with elevated risk.")
74
73
  forbid (
75
74
  principal is Guardrails::Agent,
76
75
  action == Guardrails::Action::"call_tool",
77
76
  resource
78
- ) when {
77
+ )
78
+ when {
79
79
  context has agent_trust_level && context.agent_trust_level == "unverified" &&
80
80
  context has tool_is_sensitive && context.tool_is_sensitive == true &&
81
- context has tool_risk_score && context.tool_risk_score > 60
81
+ context has tool_risk_score && context.tool_risk_score >= 61
82
82
  };
83
83
 
84
- // -----------------------------------------------------------------------------
85
- // Autonomous Agent Restrictions
86
- // -----------------------------------------------------------------------------
84
+ // ---------------------------------------------------------------------------
85
+ // Section 2: Autonomous-agent restrictions
86
+ // ---------------------------------------------------------------------------
87
87
 
88
- @id("agent-block-autonomous-injection")
89
- @name("Stricter injection threshold for autonomous agents")
90
- @description("Autonomous agents operate without human oversight. Apply lower injection confidence threshold (50 vs standard 80) to compensate for lack of human review")
88
+ @id("agent-identity.block-autonomous-injection")
89
+ @name("Block injection on autonomous agents")
90
+ @description("Blocks process_prompt when agent_type is \"autonomous\" and injection_score >= 51.")
91
91
  @severity("high")
92
- @tags("agent-identity,autonomous,injection,a2a")
92
+ @tags("category:agent-identity,threat:injection,detection:ml,surface:process-prompt,owasp:llm01")
93
+ @reject_message("Prompt blocked: lower injection threshold applies to autonomous agents and was exceeded.")
93
94
  forbid (
94
95
  principal is Guardrails::Agent,
95
96
  action == Guardrails::Action::"process_prompt",
96
97
  resource
97
- ) when {
98
+ )
99
+ when {
98
100
  context has agent_type && context.agent_type == "autonomous" &&
99
- context has injection_confidence && context.injection_confidence > 50
101
+ context has injection_score && context.injection_score >= 51
100
102
  };
101
103
 
102
- // -----------------------------------------------------------------------------
103
- // Cross-Turn Agent Trust Enforcement
104
- // -----------------------------------------------------------------------------
104
+ // ---------------------------------------------------------------------------
105
+ // Section 3: Cross-turn enforcement
106
+ // ---------------------------------------------------------------------------
105
107
 
106
- @id("agent-block-unverified-after-threats")
108
+ @id("agent-identity.block-unverified-after-threats")
107
109
  @name("Block unverified agents after session threats")
108
- @description("If any prior turn in the session detected threats, block unverified agents from further tool calls. Prevents compromised sessions from being exploited by untrusted agents")
110
+ @description("Blocks call_tool when agent_trust_level is \"unverified\" and session_threat_turns >= 1.")
109
111
  @severity("critical")
110
- @tags("agent-identity,trust,session,cross-turn,a2a")
112
+ @tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool")
113
+ @reject_message("Tool execution blocked: prior turns in this session detected threats; unverified agents cannot continue.")
111
114
  forbid (
112
115
  principal is Guardrails::Agent,
113
116
  action == Guardrails::Action::"call_tool",
114
117
  resource
115
- ) when {
118
+ )
119
+ when {
116
120
  context has agent_trust_level && context.agent_trust_level == "unverified" &&
117
- context has session_threat_turns && context.session_threat_turns > 0
121
+ context has session_threat_turns && context.session_threat_turns >= 1
118
122
  };
@@ -1,48 +1,63 @@
1
1
  // =============================================================================
2
- // Agentic Safety Policy
2
+ // Agentic Safety (Default)
3
3
  // =============================================================================
4
- // Blocks suspicious agentic behavior patterns:
5
- // - Infinite loops (repeated tool calls)
6
- // - Suspicious action sequences (data exfiltration patterns)
7
- // - Budget violations (token/cost limits)
4
+ // Blocks suspicious agentic behaviour patterns: tool-call loops, data
5
+ // exfiltration sequences, budget overruns, plus MCP supply-chain risks
6
+ // (tool poisoning, rug pull, risky MCP configs).
8
7
  //
9
- // Context keys used (normalized by projection layer):
10
- // - loop_detected: bool - Whether tool call loop detected
11
- // - loop_count: Long - Number of consecutive repeated calls
12
- // - suspicious_pattern: bool - Whether suspicious sequence detected
13
- // - pattern_type: String - Type of pattern (e.g., "data_exfiltration")
14
- // - sequence_risk: Long (0-100) - Risk score from sequence analysis
15
- // - budget_exceeded: bool - Whether token budget exceeded
16
- // - budget_remaining_pct: Long (0-100) - Remaining budget percentage
8
+ // Context keys consumed:
9
+ // - loop_detected: Bool
10
+ // - loop_count: Long
11
+ // - suspicious_pattern: Bool
12
+ // - pattern_type: String
13
+ // - sequence_risk: Long (0-100)
14
+ // - budget_exceeded: Bool
15
+ // - budget_remaining_pct: Long (0-100)
16
+ // - tool_poisoning_score: Long (0-100)
17
+ // - rug_pull_detected: Bool
18
+ // - rug_pull_score: Long (0-100)
19
+ // - mcp_config_risk: Bool
20
+ // - mcp_risk_score: Long (0-100)
17
21
  //
18
- // Category: agentic-security
22
+ // Compliance:
23
+ // - OWASP LLM06, OWASP ASI01, OWASP ASI04
24
+ //
25
+ // Category: agent-security
19
26
  // Namespace: Guardrails
20
27
  // =============================================================================
21
28
 
22
- @id("agentic-block-loops")
29
+ // ---------------------------------------------------------------------------
30
+ // Section 1: Loops, exfiltration, budget
31
+ // ---------------------------------------------------------------------------
32
+
33
+ @id("agent-security.block-loops")
23
34
  @name("Block tool call loops")
24
- @description("Forbids repeated tool calls indicating infinite loop")
35
+ @description("Blocks call_tool when loop_detected is true and loop_count >= 4.")
25
36
  @severity("high")
26
- @tags("agentic,loops,safety")
37
+ @tags("category:agent-security,threat:loop,detection:aggregate,surface:call-tool")
38
+ @reject_message("Tool execution blocked: an infinite tool-call loop was detected.")
27
39
  forbid (
28
40
  principal,
29
41
  action == Guardrails::Action::"call_tool",
30
42
  resource
31
- ) when {
43
+ )
44
+ when {
32
45
  context has loop_detected && context.loop_detected == true &&
33
- context has loop_count && context.loop_count > 3
46
+ context has loop_count && context.loop_count >= 4
34
47
  };
35
48
 
36
- @id("agentic-block-exfiltration")
49
+ @id("agent-security.block-exfiltration")
37
50
  @name("Block data exfiltration patterns")
38
- @description("Forbids suspicious action sequences like read http_post")
51
+ @description("Blocks call_tool when suspicious_pattern is true and pattern_type matches an exfiltration class.")
39
52
  @severity("critical")
40
- @tags("agentic,exfiltration,security")
53
+ @tags("category:agent-security,threat:exfiltration,detection:rule,surface:call-tool")
54
+ @reject_message("Tool execution blocked: suspicious action sequence consistent with data exfiltration.")
41
55
  forbid (
42
56
  principal,
43
57
  action == Guardrails::Action::"call_tool",
44
58
  resource
45
- ) when {
59
+ )
60
+ when {
46
61
  context has suspicious_pattern && context.suspicious_pattern == true &&
47
62
  context has pattern_type &&
48
63
  (
@@ -52,88 +67,98 @@ forbid (
52
67
  )
53
68
  };
54
69
 
55
- @id("agentic-block-high-sequence-risk")
70
+ @id("agent-security.block-sequence-risk")
56
71
  @name("Block high-risk action sequences")
57
- @description("Forbids action sequences with risk score above 80")
72
+ @description("Blocks call_tool when sequence_risk >= 81.")
58
73
  @severity("high")
59
- @tags("agentic,patterns,security")
74
+ @tags("category:agent-security,detection:aggregate,surface:call-tool")
75
+ @reject_message("Tool execution blocked: action sequence scored a high cumulative risk.")
60
76
  forbid (
61
77
  principal,
62
78
  action == Guardrails::Action::"call_tool",
63
79
  resource
64
- ) when {
65
- context has sequence_risk && context.sequence_risk > 80
80
+ )
81
+ when {
82
+ context has sequence_risk && context.sequence_risk >= 81
66
83
  };
67
84
 
68
- @id("agentic-block-budget-exceeded")
69
- @name("Block budget violations")
70
- @description("Forbids requests when token budget is exceeded")
85
+ @id("agent-security.block-budget-exceeded")
86
+ @name("Block on budget exceeded")
87
+ @description("Blocks call_tool when budget_exceeded is true.")
71
88
  @severity("medium")
72
- @tags("agentic,budget,cost-control")
89
+ @tags("category:agent-security,threat:budget-violation,detection:rule,surface:call-tool")
90
+ @reject_message("Tool execution blocked: the agent has exhausted its token or cost budget.")
73
91
  forbid (
74
92
  principal,
75
93
  action == Guardrails::Action::"call_tool",
76
94
  resource
77
- ) when {
95
+ )
96
+ when {
78
97
  context has budget_exceeded && context.budget_exceeded == true
79
98
  };
80
99
 
81
- @id("agentic-block-low-budget")
82
- @name("Block requests when budget critically low")
83
- @description("Forbids requests when remaining budget falls below 5%")
100
+ @id("agent-security.block-budget-low")
101
+ @name("Block when budget critically low")
102
+ @description("Blocks call_tool when budget_remaining_pct <= 4.")
84
103
  @severity("medium")
85
- @tags("agentic,budget,cost-control")
104
+ @tags("category:agent-security,threat:budget-violation,detection:rule,surface:call-tool")
105
+ @reject_message("Tool execution blocked: remaining budget is below the critical threshold.")
86
106
  forbid (
87
107
  principal,
88
108
  action == Guardrails::Action::"call_tool",
89
109
  resource
90
- ) when {
91
- context has budget_remaining_pct &&
92
- context.budget_remaining_pct < 5 &&
93
- context.budget_remaining_pct > 0
110
+ )
111
+ when {
112
+ context has budget_remaining_pct && context.budget_remaining_pct <= 4
94
113
  };
95
114
 
96
- // =============================================================================
97
- // Agent Security Supply Chain & Behavioral Drift
98
- // =============================================================================
115
+ // ---------------------------------------------------------------------------
116
+ // Section 2: MCP supply chain
117
+ // ---------------------------------------------------------------------------
99
118
 
100
- @id("agentic-block-tool-poisoning")
101
- @name("Block tool poisoning attacks")
102
- @description("Forbids tool calls or server connections when hidden instructions or authority hijack patterns are detected in tool descriptions or arguments")
119
+ @id("agent-security.block-tool-poisoning")
120
+ @name("Block tool poisoning")
121
+ @description("Blocks call_tool and connect_server when tool_poisoning_score >= 70.")
103
122
  @severity("critical")
104
- @tags("agentic,tool-poisoning,supply-chain")
123
+ @tags("category:agent-security,threat:tool-poisoning,detection:ml,owasp:asi01")
124
+ @reject_message("Tool execution blocked: hidden manipulation instructions detected in tool description or arguments (OWASP ASI01).")
105
125
  forbid (
106
126
  principal,
107
127
  action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
108
128
  resource
109
- ) when {
129
+ )
130
+ when {
110
131
  context has tool_poisoning_score && context.tool_poisoning_score >= 70
111
132
  };
112
133
 
113
- @id("agentic-block-rug-pull")
134
+ @id("agent-security.block-rug-pull")
114
135
  @name("Block rug pull attacks")
115
- @description("Forbids tool calls when significant behavioral drift is detected (tool output deviates from established patterns)")
136
+ @description("Blocks call_tool when rug_pull_detected is true and rug_pull_score >= 70.")
116
137
  @severity("high")
117
- @tags("agentic,rug-pull,behavioral-drift")
138
+ @tags("category:agent-security,threat:rug-pull,detection:ml,owasp:asi04")
139
+ @reject_message("Tool execution blocked: tool behavior has drifted significantly from its established pattern.")
118
140
  forbid (
119
141
  principal,
120
142
  action == Guardrails::Action::"call_tool",
121
143
  resource
122
- ) when {
144
+ )
145
+ when {
123
146
  context has rug_pull_detected && context.rug_pull_detected == true &&
124
147
  context has rug_pull_score && context.rug_pull_score >= 70
125
148
  };
126
149
 
127
- @id("agentic-block-mcp-config-risk")
128
- @name("Block risky MCP configurations")
129
- @description("Forbids tool calls or server connections when MCP configuration risks are detected (inline execution, suspicious URLs, cross-origin issues)")
150
+ @id("agent-security.block-mcp-config-risk")
151
+ @name("Block risky MCP server configs")
152
+ @description("Blocks call_tool and connect_server when mcp_config_risk is true and mcp_risk_score >= 70.")
130
153
  @severity("high")
131
- @tags("agentic,mcp-risk,supply-chain")
154
+ @tags("category:agent-security,threat:mcp-config-risk,detection:ml,owasp:asi04")
155
+ @reject_message("MCP operation blocked: risky server configuration detected.")
132
156
  forbid (
133
157
  principal,
134
158
  action in [Guardrails::Action::"call_tool", Guardrails::Action::"connect_server"],
135
159
  resource
136
- ) when {
160
+ )
161
+ when {
137
162
  context has mcp_config_risk && context.mcp_config_risk == true &&
138
163
  context has mcp_risk_score && context.mcp_risk_score >= 70
139
164
  };
@@ -1,22 +1,19 @@
1
1
  // =============================================================================
2
- // Baseline Permit Policy (Default)
2
+ // Baseline Permit (Default)
3
3
  // =============================================================================
4
- // Permits all actions by default. Threat-specific forbid policies override
5
- // this to block when detectors find security issues.
4
+ // Permits all Guardrails actions by default. Threat-specific forbid policies
5
+ // override this when detectors fire. Cedar is default-deny: without at least
6
+ // one permit rule, every request is denied regardless of forbid rules.
6
7
  //
7
- // Cedar is default-deny: without at least one permit rule, every request
8
- // is denied regardless of forbid rules. This baseline ensures the system
9
- // is "allow unless blocked" rather than "block everything".
10
- //
11
- // Category: organization
8
+ // Category: organization
12
9
  // Namespace: Guardrails
13
10
  // =============================================================================
14
11
 
15
- @id("baseline-permit-all")
16
- @name("Permit all actions by default")
17
- @description("Baseline permit for all actions — threat-specific forbid policies override this when threats are detected")
12
+ @id("organization.permit-baseline")
13
+ @name("Permit baseline")
14
+ @description("Permits all Guardrails actions.")
18
15
  @severity("low")
19
- @tags("baseline,permit-default,organization")
16
+ @tags("category:organization,posture:permit-default")
20
17
  permit (
21
18
  principal,
22
19
  action,