@highflame/policy 2.1.5 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/_schemas/guardrails/context.json +168 -0
  2. package/_schemas/guardrails/schema.cedarschema +40 -0
  3. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +118 -0
  4. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +4 -4
  5. package/_schemas/guardrails/templates/defaults/injection.cedar +4 -4
  6. package/_schemas/guardrails/templates/defaults/secrets.cedar +2 -2
  7. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +1 -1
  8. package/_schemas/guardrails/templates/defaults/semantic.cedar +10 -7
  9. package/_schemas/guardrails/templates/defaults/toxicity.cedar +3 -3
  10. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +7 -5
  11. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +1 -1
  12. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +2 -2
  13. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +2 -2
  14. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +3 -3
  15. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +1 -1
  16. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +2 -2
  17. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +2 -2
  18. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +157 -0
  19. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +140 -0
  20. package/_schemas/guardrails/templates/templates.json +40 -0
  21. package/dist/engine.d.ts +1 -1
  22. package/dist/engine.js +1 -1
  23. package/dist/guardrails-context.gen.d.ts +5 -0
  24. package/dist/guardrails-context.gen.js +5 -0
  25. package/dist/guardrails-defaults.gen.d.ts +1 -1
  26. package/dist/guardrails-defaults.gen.js +520 -32
  27. package/dist/service-schemas.gen.d.ts +1 -1
  28. package/dist/service-schemas.gen.js +73 -5
  29. package/package.json +1 -1
@@ -474,6 +474,36 @@
474
474
  "type": "number",
475
475
  "required": false,
476
476
  "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
477
+ },
478
+ {
479
+ "key": "agent_id",
480
+ "type": "string",
481
+ "required": false,
482
+ "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
483
+ },
484
+ {
485
+ "key": "agent_type",
486
+ "type": "string",
487
+ "required": false,
488
+ "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
489
+ },
490
+ {
491
+ "key": "agent_trust_level",
492
+ "type": "string",
493
+ "required": false,
494
+ "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
495
+ },
496
+ {
497
+ "key": "agent_framework",
498
+ "type": "string",
499
+ "required": false,
500
+ "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
501
+ },
502
+ {
503
+ "key": "agent_publisher",
504
+ "type": "string",
505
+ "required": false,
506
+ "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
477
507
  }
478
508
  ]
479
509
  },
@@ -607,6 +637,12 @@
607
637
  "required": false,
608
638
  "description": "Whether secrets were detected in tool arguments or content"
609
639
  },
640
+ {
641
+ "key": "secret_count",
642
+ "type": "number",
643
+ "required": false,
644
+ "description": "Total number of secret matches found in tool arguments or content. Multiple matches may indicate data dumps or accidental credential exposure"
645
+ },
610
646
  {
611
647
  "key": "secret_types",
612
648
  "type": "array",
@@ -882,6 +918,36 @@
882
918
  "type": "number",
883
919
  "required": false,
884
920
  "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
921
+ },
922
+ {
923
+ "key": "agent_id",
924
+ "type": "string",
925
+ "required": false,
926
+ "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
927
+ },
928
+ {
929
+ "key": "agent_type",
930
+ "type": "string",
931
+ "required": false,
932
+ "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
933
+ },
934
+ {
935
+ "key": "agent_trust_level",
936
+ "type": "string",
937
+ "required": false,
938
+ "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
939
+ },
940
+ {
941
+ "key": "agent_framework",
942
+ "type": "string",
943
+ "required": false,
944
+ "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
945
+ },
946
+ {
947
+ "key": "agent_publisher",
948
+ "type": "string",
949
+ "required": false,
950
+ "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
885
951
  }
886
952
  ]
887
953
  },
@@ -907,6 +973,12 @@
907
973
  "required": false,
908
974
  "description": "Whether secrets were detected in the file content being read"
909
975
  },
976
+ {
977
+ "key": "secret_count",
978
+ "type": "number",
979
+ "required": false,
980
+ "description": "Total number of secret matches found in file content. Multiple matches may indicate data dumps or accidental credential exposure"
981
+ },
910
982
  {
911
983
  "key": "secret_types",
912
984
  "type": "array",
@@ -1020,6 +1092,36 @@
1020
1092
  "type": "number",
1021
1093
  "required": false,
1022
1094
  "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
1095
+ },
1096
+ {
1097
+ "key": "agent_id",
1098
+ "type": "string",
1099
+ "required": false,
1100
+ "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
1101
+ },
1102
+ {
1103
+ "key": "agent_type",
1104
+ "type": "string",
1105
+ "required": false,
1106
+ "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
1107
+ },
1108
+ {
1109
+ "key": "agent_trust_level",
1110
+ "type": "string",
1111
+ "required": false,
1112
+ "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
1113
+ },
1114
+ {
1115
+ "key": "agent_framework",
1116
+ "type": "string",
1117
+ "required": false,
1118
+ "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
1119
+ },
1120
+ {
1121
+ "key": "agent_publisher",
1122
+ "type": "string",
1123
+ "required": false,
1124
+ "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
1023
1125
  }
1024
1126
  ]
1025
1127
  },
@@ -1045,6 +1147,12 @@
1045
1147
  "required": false,
1046
1148
  "description": "Whether secrets were detected in the content being written. Block writes containing credentials"
1047
1149
  },
1150
+ {
1151
+ "key": "secret_count",
1152
+ "type": "number",
1153
+ "required": false,
1154
+ "description": "Total number of secret matches found in write content. Multiple matches may indicate data dumps or accidental credential exposure"
1155
+ },
1048
1156
  {
1049
1157
  "key": "secret_types",
1050
1158
  "type": "array",
@@ -1158,6 +1266,36 @@
1158
1266
  "type": "number",
1159
1267
  "required": false,
1160
1268
  "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
1269
+ },
1270
+ {
1271
+ "key": "agent_id",
1272
+ "type": "string",
1273
+ "required": false,
1274
+ "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
1275
+ },
1276
+ {
1277
+ "key": "agent_type",
1278
+ "type": "string",
1279
+ "required": false,
1280
+ "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
1281
+ },
1282
+ {
1283
+ "key": "agent_trust_level",
1284
+ "type": "string",
1285
+ "required": false,
1286
+ "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
1287
+ },
1288
+ {
1289
+ "key": "agent_framework",
1290
+ "type": "string",
1291
+ "required": false,
1292
+ "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
1293
+ },
1294
+ {
1295
+ "key": "agent_publisher",
1296
+ "type": "string",
1297
+ "required": false,
1298
+ "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
1161
1299
  }
1162
1300
  ]
1163
1301
  },
@@ -1320,6 +1458,36 @@
1320
1458
  "type": "number",
1321
1459
  "required": false,
1322
1460
  "description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
1461
+ },
1462
+ {
1463
+ "key": "agent_id",
1464
+ "type": "string",
1465
+ "required": false,
1466
+ "description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
1467
+ },
1468
+ {
1469
+ "key": "agent_type",
1470
+ "type": "string",
1471
+ "required": false,
1472
+ "description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
1473
+ },
1474
+ {
1475
+ "key": "agent_trust_level",
1476
+ "type": "string",
1477
+ "required": false,
1478
+ "description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
1479
+ },
1480
+ {
1481
+ "key": "agent_framework",
1482
+ "type": "string",
1483
+ "required": false,
1484
+ "description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
1485
+ },
1486
+ {
1487
+ "key": "agent_publisher",
1488
+ "type": "string",
1489
+ "required": false,
1490
+ "description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
1323
1491
  }
1324
1492
  ]
1325
1493
  }
@@ -194,6 +194,15 @@ namespace Guardrails {
194
194
  "session_max_secret_score"?: Long,
195
195
  "session_cumulative_risk_score"?: Long,
196
196
 
197
+ // Agent Identity — authenticated agent principal metadata (optional)
198
+ // Present when the request is made by an AI agent (API key or JWT with agent claims).
199
+ // Empty strings for human user requests. Use these to write agent-specific policies.
200
+ "agent_id"?: String, // Unique agent identifier (e.g., "agent_research_v3")
201
+ "agent_type"?: String, // "orchestrator" | "autonomous" | "tool_agent" | "human_proxy"
202
+ "agent_trust_level"?: String, // "first_party" | "verified_third_party" | "unverified"
203
+ "agent_framework"?: String, // Agent framework (e.g., "claude-code", "langchain", "crewai")
204
+ "agent_publisher"?: String, // Organization that published the agent
205
+
197
206
  };
198
207
 
199
208
  /// Context for call_tool action (agentic tool execution)
@@ -234,6 +243,7 @@ namespace Guardrails {
234
243
 
235
244
  // Security checks on tool arguments (optional)
236
245
  "contains_secrets"?: Bool,
246
+ "secret_count"?: Long,
237
247
  "secret_types"?: Set<String>,
238
248
  "pii_detected"?: Bool,
239
249
  "pii_types"?: Set<String>,
@@ -295,6 +305,13 @@ namespace Guardrails {
295
305
  "session_max_secret_score"?: Long,
296
306
  "session_cumulative_risk_score"?: Long,
297
307
 
308
+ // Agent Identity — authenticated agent principal metadata (optional)
309
+ "agent_id"?: String,
310
+ "agent_type"?: String,
311
+ "agent_trust_level"?: String,
312
+ "agent_framework"?: String,
313
+ "agent_publisher"?: String,
314
+
298
315
  };
299
316
 
300
317
  /// Context for read_file action
@@ -305,6 +322,7 @@ namespace Guardrails {
305
322
 
306
323
  // Security checks on file content (optional)
307
324
  "contains_secrets"?: Bool,
325
+ "secret_count"?: Long,
308
326
  "secret_types"?: Set<String>,
309
327
  "pii_detected"?: Bool,
310
328
  "pii_types"?: Set<String>,
@@ -329,6 +347,13 @@ namespace Guardrails {
329
347
  "session_max_secret_score"?: Long,
330
348
  "session_cumulative_risk_score"?: Long,
331
349
 
350
+ // Agent Identity — authenticated agent principal metadata (optional)
351
+ "agent_id"?: String,
352
+ "agent_type"?: String,
353
+ "agent_trust_level"?: String,
354
+ "agent_framework"?: String,
355
+ "agent_publisher"?: String,
356
+
332
357
  };
333
358
 
334
359
  /// Context for write_file action
@@ -339,6 +364,7 @@ namespace Guardrails {
339
364
 
340
365
  // Security checks on content being written (optional)
341
366
  "contains_secrets"?: Bool,
367
+ "secret_count"?: Long,
342
368
  "secret_types"?: Set<String>,
343
369
  "pii_detected"?: Bool,
344
370
  "pii_types"?: Set<String>,
@@ -363,6 +389,13 @@ namespace Guardrails {
363
389
  "session_max_secret_score"?: Long,
364
390
  "session_cumulative_risk_score"?: Long,
365
391
 
392
+ // Agent Identity — authenticated agent principal metadata (optional)
393
+ "agent_id"?: String,
394
+ "agent_type"?: String,
395
+ "agent_trust_level"?: String,
396
+ "agent_framework"?: String,
397
+ "agent_publisher"?: String,
398
+
366
399
  };
367
400
 
368
401
  /// Context for connect_server action (MCP server connections)
@@ -405,5 +438,12 @@ namespace Guardrails {
405
438
  "session_max_secret_score"?: Long,
406
439
  "session_cumulative_risk_score"?: Long,
407
440
 
441
+ // Agent Identity — authenticated agent principal metadata (optional)
442
+ "agent_id"?: String,
443
+ "agent_type"?: String,
444
+ "agent_trust_level"?: String,
445
+ "agent_framework"?: String,
446
+ "agent_publisher"?: String,
447
+
408
448
  };
409
449
  }
@@ -0,0 +1,118 @@
1
+ // =============================================================================
2
+ // Agent Identity Policy — Agent-to-Agent Security Defaults
3
+ // =============================================================================
4
+ // Enforces trust-based access control for AI agents authenticated via API key
5
+ // or JWT with agent claims. These policies use agent identity context keys
6
+ // populated from Shield's authentication layer.
7
+ //
8
+ // Agent trust levels:
9
+ // - first_party: Your own agents (highest trust)
10
+ // - verified_third_party: Audited external agents (medium trust)
11
+ // - unverified: Unknown/untrusted agents (lowest trust)
12
+ //
13
+ // Agent types:
14
+ // - orchestrator: Coordinates sub-agents
15
+ // - autonomous: Self-directed, no human in the loop
16
+ // - tool_agent: Single-purpose tool execution
17
+ // - human_proxy: Acts on behalf of a human
18
+ //
19
+ // Context keys used:
20
+ // - agent_id: String - Unique agent identifier
21
+ // - agent_type: String - Agent classification
22
+ // - agent_trust_level: String - Trust tier
23
+ // - agent_framework: String - Agent framework/SDK
24
+ // - agent_publisher: String - Publishing organization
25
+ // - tool_name: String - Tool being called
26
+ // - tool_category: String - "safe" | "sensitive" | "dangerous"
27
+ // - tool_risk_score: Long (0-100) - Computed risk score
28
+ // - injection_confidence: Long (0-100) - Injection detection score
29
+ //
30
+ // Category: agent_identity
31
+ // Namespace: Guardrails
32
+ // =============================================================================
33
+
34
+ // -----------------------------------------------------------------------------
35
+ // Trust-Based Tool Access
36
+ // -----------------------------------------------------------------------------
37
+
38
+ @id("agent-block-unverified-dangerous-tools")
39
+ @name("Block unverified agents from dangerous tools")
40
+ @description("Unverified agents cannot execute tools classified as dangerous. Require first_party or verified_third_party trust level for high-risk operations")
41
+ @severity("critical")
42
+ @tags("agent-identity,trust,tools,a2a")
43
+ forbid (
44
+ principal is Guardrails::Agent,
45
+ action == Guardrails::Action::"call_tool",
46
+ resource
47
+ ) when {
48
+ context has agent_trust_level && context.agent_trust_level == "unverified" &&
49
+ context has tool_category && context.tool_category == "dangerous"
50
+ };
51
+
52
+ @id("agent-block-unverified-shell")
53
+ @name("Block unverified agents from shell execution")
54
+ @description("Shell and command execution are restricted to first_party and verified_third_party agents")
55
+ @severity("critical")
56
+ @tags("agent-identity,trust,shell,a2a")
57
+ forbid (
58
+ principal is Guardrails::Agent,
59
+ action == Guardrails::Action::"call_tool",
60
+ resource
61
+ ) when {
62
+ context has agent_trust_level && context.agent_trust_level == "unverified" &&
63
+ context has tool_name &&
64
+ (context.tool_name == "shell" ||
65
+ context.tool_name == "execute_command" ||
66
+ context.tool_name == "bash")
67
+ };
68
+
69
+ @id("agent-block-unverified-sensitive-tools")
70
+ @name("Block unverified agents from sensitive tools with elevated risk")
71
+ @description("Unverified agents cannot execute sensitive tools with risk score above 60")
72
+ @severity("high")
73
+ @tags("agent-identity,trust,tools,a2a")
74
+ forbid (
75
+ principal is Guardrails::Agent,
76
+ action == Guardrails::Action::"call_tool",
77
+ resource
78
+ ) when {
79
+ context has agent_trust_level && context.agent_trust_level == "unverified" &&
80
+ context has tool_is_sensitive && context.tool_is_sensitive == true &&
81
+ context has tool_risk_score && context.tool_risk_score > 60
82
+ };
83
+
84
+ // -----------------------------------------------------------------------------
85
+ // Autonomous Agent Restrictions
86
+ // -----------------------------------------------------------------------------
87
+
88
+ @id("agent-block-autonomous-injection")
89
+ @name("Stricter injection threshold for autonomous agents")
90
+ @description("Autonomous agents operate without human oversight. Apply lower injection confidence threshold (50 vs standard 80) to compensate for lack of human review")
91
+ @severity("high")
92
+ @tags("agent-identity,autonomous,injection,a2a")
93
+ forbid (
94
+ principal is Guardrails::Agent,
95
+ action == Guardrails::Action::"process_prompt",
96
+ resource
97
+ ) when {
98
+ context has agent_type && context.agent_type == "autonomous" &&
99
+ context has injection_confidence && context.injection_confidence > 50
100
+ };
101
+
102
+ // -----------------------------------------------------------------------------
103
+ // Cross-Turn Agent Trust Enforcement
104
+ // -----------------------------------------------------------------------------
105
+
106
+ @id("agent-block-unverified-after-threats")
107
+ @name("Block unverified agents after session threats")
108
+ @description("If any prior turn in the session detected threats, block unverified agents from further tool calls. Prevents compromised sessions from being exploited by untrusted agents")
109
+ @severity("critical")
110
+ @tags("agent-identity,trust,session,cross-turn,a2a")
111
+ forbid (
112
+ principal is Guardrails::Agent,
113
+ action == Guardrails::Action::"call_tool",
114
+ resource
115
+ ) when {
116
+ context has agent_trust_level && context.agent_trust_level == "unverified" &&
117
+ context has session_threat_turns && context.session_threat_turns > 0
118
+ };
@@ -40,7 +40,7 @@ forbid (
40
40
  @tags("agentic,exfiltration,security")
41
41
  forbid (
42
42
  principal,
43
- action,
43
+ action == Guardrails::Action::"call_tool",
44
44
  resource
45
45
  ) when {
46
46
  context has suspicious_pattern && context.suspicious_pattern == true &&
@@ -59,7 +59,7 @@ forbid (
59
59
  @tags("agentic,patterns,security")
60
60
  forbid (
61
61
  principal,
62
- action,
62
+ action == Guardrails::Action::"call_tool",
63
63
  resource
64
64
  ) when {
65
65
  context has sequence_risk && context.sequence_risk > 80
@@ -72,7 +72,7 @@ forbid (
72
72
  @tags("agentic,budget,cost-control")
73
73
  forbid (
74
74
  principal,
75
- action,
75
+ action == Guardrails::Action::"call_tool",
76
76
  resource
77
77
  ) when {
78
78
  context has budget_exceeded && context.budget_exceeded == true
@@ -85,7 +85,7 @@ forbid (
85
85
  @tags("agentic,budget,cost-control")
86
86
  forbid (
87
87
  principal,
88
- action,
88
+ action == Guardrails::Action::"call_tool",
89
89
  resource
90
90
  ) when {
91
91
  context has budget_remaining_pct &&
@@ -22,7 +22,7 @@
22
22
  @tags("injection,jailbreak,security")
23
23
  forbid (
24
24
  principal,
25
- action,
25
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
26
26
  resource
27
27
  ) when {
28
28
  context has injection_confidence && context.injection_confidence > 85
@@ -35,7 +35,7 @@ forbid (
35
35
  @tags("jailbreak,security")
36
36
  forbid (
37
37
  principal,
38
- action,
38
+ action == Guardrails::Action::"process_prompt",
39
39
  resource
40
40
  ) when {
41
41
  context has jailbreak_confidence && context.jailbreak_confidence > 80
@@ -48,7 +48,7 @@ forbid (
48
48
  @tags("injection,jailbreak,security")
49
49
  forbid (
50
50
  principal,
51
- action,
51
+ action == Guardrails::Action::"process_prompt",
52
52
  resource
53
53
  ) when {
54
54
  context has injection_confidence && context has jailbreak_confidence &&
@@ -62,7 +62,7 @@ forbid (
62
62
  @tags("injection,unicode,security")
63
63
  forbid (
64
64
  principal,
65
- action,
65
+ action == Guardrails::Action::"process_prompt",
66
66
  resource
67
67
  ) when {
68
68
  context has contains_invisible_chars && context.contains_invisible_chars == true &&
@@ -20,7 +20,7 @@
20
20
  @tags("secrets,security,data-leak")
21
21
  forbid (
22
22
  principal,
23
- action,
23
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
24
24
  resource
25
25
  ) when {
26
26
  context has contains_secrets && context.contains_secrets == true
@@ -33,7 +33,7 @@ forbid (
33
33
  @tags("secrets,security,data-leak")
34
34
  forbid (
35
35
  principal,
36
- action,
36
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
37
37
  resource
38
38
  ) when {
39
39
  context has secret_count && context.secret_count > 2
@@ -36,7 +36,7 @@ forbid (
36
36
  @tags("path-traversal,security")
37
37
  forbid (
38
38
  principal,
39
- action,
39
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
40
40
  resource
41
41
  ) when {
42
42
  context has path_traversal_detected && context.path_traversal_detected == true &&
@@ -22,9 +22,10 @@ forbid (
22
22
  action == Guardrails::Action::"process_prompt",
23
23
  resource
24
24
  ) when {
25
- context.content_topics.contains("weapons_manufacturing") ||
26
- context.content_topics.contains("explosive_materials") ||
27
- context.content_topics.contains("illegal_synthesis")
25
+ context has content_topics &&
26
+ (context.content_topics.contains("weapons_manufacturing") ||
27
+ context.content_topics.contains("explosive_materials") ||
28
+ context.content_topics.contains("illegal_synthesis"))
28
29
  };
29
30
 
30
31
  @id("semantic-block-controlled-substances")
@@ -37,9 +38,10 @@ forbid (
37
38
  action == Guardrails::Action::"process_prompt",
38
39
  resource
39
40
  ) when {
40
- context.content_topics.contains("controlled_substances") ||
41
- context.content_topics.contains("drug_manufacturing") ||
42
- context.content_topics.contains("precursor_chemicals")
41
+ context has content_topics &&
42
+ (context.content_topics.contains("controlled_substances") ||
43
+ context.content_topics.contains("drug_manufacturing") ||
44
+ context.content_topics.contains("precursor_chemicals"))
43
45
  };
44
46
 
45
47
  @id("semantic-block-high-confidence-restricted")
@@ -52,7 +54,8 @@ forbid (
52
54
  action == Guardrails::Action::"process_prompt",
53
55
  resource
54
56
  ) when {
55
- context.topic_confidence > 80 &&
57
+ context has topic_confidence && context.topic_confidence > 80 &&
58
+ context has content_topics &&
56
59
  (context.content_topics.contains("illegal_activity") ||
57
60
  context.content_topics.contains("financial_fraud") ||
58
61
  context.content_topics.contains("social_engineering"))
@@ -23,7 +23,7 @@
23
23
  @tags("toxicity,trust-safety")
24
24
  forbid (
25
25
  principal,
26
- action,
26
+ action == Guardrails::Action::"process_prompt",
27
27
  resource
28
28
  ) when {
29
29
  (context has violence_score && context.violence_score > 90) ||
@@ -41,7 +41,7 @@ forbid (
41
41
  @tags("hate-speech,trust-safety")
42
42
  forbid (
43
43
  principal,
44
- action,
44
+ action == Guardrails::Action::"process_prompt",
45
45
  resource
46
46
  ) when {
47
47
  context has hate_speech_score && context.hate_speech_score > 75
@@ -54,7 +54,7 @@ forbid (
54
54
  @tags("toxicity,trust-safety")
55
55
  forbid (
56
56
  principal,
57
- action,
57
+ action == Guardrails::Action::"process_prompt",
58
58
  resource
59
59
  ) when {
60
60
  // Any 2 of 6 toxicity scores above 60 triggers a block
@@ -28,7 +28,8 @@ permit (
28
28
  action == Guardrails::Action::"call_tool",
29
29
  resource
30
30
  ) when {
31
- context.mcp_server == "github" &&
31
+ context has mcp_server && context.mcp_server == "github" &&
32
+ context has tool_name &&
32
33
  (context.tool_name == "read_issues" ||
33
34
  context.tool_name == "get_issue" ||
34
35
  context.tool_name == "list_repos" ||
@@ -47,7 +48,7 @@ forbid (
47
48
  action == Guardrails::Action::"call_tool",
48
49
  resource
49
50
  ) when {
50
- context.mcp_server == "github"
51
+ context has mcp_server && context.mcp_server == "github"
51
52
  };
52
53
 
53
54
  // -- Organization-wide MCP server exclusions --------------------------------
@@ -64,8 +65,9 @@ forbid (
64
65
  ) when {
65
66
  // Add server names to block across the organization.
66
67
  // Modify this list to match your exclusion requirements.
67
- context.mcp_server == "untrusted-server" ||
68
- context.mcp_server == "deprecated-server"
68
+ context has mcp_server &&
69
+ (context.mcp_server == "untrusted-server" ||
70
+ context.mcp_server == "deprecated-server")
69
71
  };
70
72
 
71
73
  // -- Block unverified MCP servers -------------------------------------------
@@ -80,5 +82,5 @@ forbid (
80
82
  action == Guardrails::Action::"call_tool",
81
83
  resource
82
84
  ) when {
83
- context.mcp_server_verified == false
85
+ context has mcp_server_verified && context.mcp_server_verified == false
84
86
  };
@@ -15,7 +15,7 @@
15
15
  @tags("profile,chat-assistant,pii,privacy")
16
16
  forbid (
17
17
  principal,
18
- action,
18
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
19
19
  resource
20
20
  ) when {
21
21
  context has pii_detected && context.pii_detected == true
@@ -15,7 +15,7 @@
15
15
  @tags("profile,chat-assistant,injection,security")
16
16
  forbid (
17
17
  principal,
18
- action,
18
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
19
19
  resource
20
20
  ) when {
21
21
  context has injection_confidence && context.injection_confidence > 70
@@ -28,7 +28,7 @@ forbid (
28
28
  @tags("profile,chat-assistant,jailbreak,security")
29
29
  forbid (
30
30
  principal,
31
- action,
31
+ action == Guardrails::Action::"process_prompt",
32
32
  resource
33
33
  ) when {
34
34
  context has jailbreak_confidence && context.jailbreak_confidence > 65