@highflame/policy 2.1.5 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/guardrails/context.json +168 -0
- package/_schemas/guardrails/schema.cedarschema +40 -0
- package/_schemas/guardrails/templates/defaults/agent_identity.cedar +118 -0
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +4 -4
- package/_schemas/guardrails/templates/defaults/injection.cedar +4 -4
- package/_schemas/guardrails/templates/defaults/secrets.cedar +2 -2
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +1 -1
- package/_schemas/guardrails/templates/defaults/toxicity.cedar +3 -3
- package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +1 -1
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +3 -3
- package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +1 -1
- package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +2 -2
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +157 -0
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +140 -0
- package/_schemas/guardrails/templates/templates.json +41 -0
- package/dist/guardrails-context.gen.d.ts +5 -0
- package/dist/guardrails-context.gen.js +5 -0
- package/dist/guardrails-defaults.gen.d.ts +1 -1
- package/dist/guardrails-defaults.gen.js +515 -27
- package/dist/service-schemas.gen.d.ts +1 -1
- package/dist/service-schemas.gen.js +73 -5
- package/package.json +1 -1
|
@@ -474,6 +474,36 @@
|
|
|
474
474
|
"type": "number",
|
|
475
475
|
"required": false,
|
|
476
476
|
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
477
|
+
},
|
|
478
|
+
{
|
|
479
|
+
"key": "agent_id",
|
|
480
|
+
"type": "string",
|
|
481
|
+
"required": false,
|
|
482
|
+
"description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
|
|
483
|
+
},
|
|
484
|
+
{
|
|
485
|
+
"key": "agent_type",
|
|
486
|
+
"type": "string",
|
|
487
|
+
"required": false,
|
|
488
|
+
"description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
|
|
489
|
+
},
|
|
490
|
+
{
|
|
491
|
+
"key": "agent_trust_level",
|
|
492
|
+
"type": "string",
|
|
493
|
+
"required": false,
|
|
494
|
+
"description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
|
|
495
|
+
},
|
|
496
|
+
{
|
|
497
|
+
"key": "agent_framework",
|
|
498
|
+
"type": "string",
|
|
499
|
+
"required": false,
|
|
500
|
+
"description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
|
|
501
|
+
},
|
|
502
|
+
{
|
|
503
|
+
"key": "agent_publisher",
|
|
504
|
+
"type": "string",
|
|
505
|
+
"required": false,
|
|
506
|
+
"description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
|
|
477
507
|
}
|
|
478
508
|
]
|
|
479
509
|
},
|
|
@@ -607,6 +637,12 @@
|
|
|
607
637
|
"required": false,
|
|
608
638
|
"description": "Whether secrets were detected in tool arguments or content"
|
|
609
639
|
},
|
|
640
|
+
{
|
|
641
|
+
"key": "secret_count",
|
|
642
|
+
"type": "number",
|
|
643
|
+
"required": false,
|
|
644
|
+
"description": "Total number of secret matches found in tool arguments or content. Multiple matches may indicate data dumps or accidental credential exposure"
|
|
645
|
+
},
|
|
610
646
|
{
|
|
611
647
|
"key": "secret_types",
|
|
612
648
|
"type": "array",
|
|
@@ -882,6 +918,36 @@
|
|
|
882
918
|
"type": "number",
|
|
883
919
|
"required": false,
|
|
884
920
|
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
921
|
+
},
|
|
922
|
+
{
|
|
923
|
+
"key": "agent_id",
|
|
924
|
+
"type": "string",
|
|
925
|
+
"required": false,
|
|
926
|
+
"description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
|
|
927
|
+
},
|
|
928
|
+
{
|
|
929
|
+
"key": "agent_type",
|
|
930
|
+
"type": "string",
|
|
931
|
+
"required": false,
|
|
932
|
+
"description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
|
|
933
|
+
},
|
|
934
|
+
{
|
|
935
|
+
"key": "agent_trust_level",
|
|
936
|
+
"type": "string",
|
|
937
|
+
"required": false,
|
|
938
|
+
"description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
|
|
939
|
+
},
|
|
940
|
+
{
|
|
941
|
+
"key": "agent_framework",
|
|
942
|
+
"type": "string",
|
|
943
|
+
"required": false,
|
|
944
|
+
"description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
|
|
945
|
+
},
|
|
946
|
+
{
|
|
947
|
+
"key": "agent_publisher",
|
|
948
|
+
"type": "string",
|
|
949
|
+
"required": false,
|
|
950
|
+
"description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
|
|
885
951
|
}
|
|
886
952
|
]
|
|
887
953
|
},
|
|
@@ -907,6 +973,12 @@
|
|
|
907
973
|
"required": false,
|
|
908
974
|
"description": "Whether secrets were detected in the file content being read"
|
|
909
975
|
},
|
|
976
|
+
{
|
|
977
|
+
"key": "secret_count",
|
|
978
|
+
"type": "number",
|
|
979
|
+
"required": false,
|
|
980
|
+
"description": "Total number of secret matches found in file content. Multiple matches may indicate data dumps or accidental credential exposure"
|
|
981
|
+
},
|
|
910
982
|
{
|
|
911
983
|
"key": "secret_types",
|
|
912
984
|
"type": "array",
|
|
@@ -1020,6 +1092,36 @@
|
|
|
1020
1092
|
"type": "number",
|
|
1021
1093
|
"required": false,
|
|
1022
1094
|
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
1095
|
+
},
|
|
1096
|
+
{
|
|
1097
|
+
"key": "agent_id",
|
|
1098
|
+
"type": "string",
|
|
1099
|
+
"required": false,
|
|
1100
|
+
"description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
|
|
1101
|
+
},
|
|
1102
|
+
{
|
|
1103
|
+
"key": "agent_type",
|
|
1104
|
+
"type": "string",
|
|
1105
|
+
"required": false,
|
|
1106
|
+
"description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
|
|
1107
|
+
},
|
|
1108
|
+
{
|
|
1109
|
+
"key": "agent_trust_level",
|
|
1110
|
+
"type": "string",
|
|
1111
|
+
"required": false,
|
|
1112
|
+
"description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
|
|
1113
|
+
},
|
|
1114
|
+
{
|
|
1115
|
+
"key": "agent_framework",
|
|
1116
|
+
"type": "string",
|
|
1117
|
+
"required": false,
|
|
1118
|
+
"description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
|
|
1119
|
+
},
|
|
1120
|
+
{
|
|
1121
|
+
"key": "agent_publisher",
|
|
1122
|
+
"type": "string",
|
|
1123
|
+
"required": false,
|
|
1124
|
+
"description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
|
|
1023
1125
|
}
|
|
1024
1126
|
]
|
|
1025
1127
|
},
|
|
@@ -1045,6 +1147,12 @@
|
|
|
1045
1147
|
"required": false,
|
|
1046
1148
|
"description": "Whether secrets were detected in the content being written. Block writes containing credentials"
|
|
1047
1149
|
},
|
|
1150
|
+
{
|
|
1151
|
+
"key": "secret_count",
|
|
1152
|
+
"type": "number",
|
|
1153
|
+
"required": false,
|
|
1154
|
+
"description": "Total number of secret matches found in write content. Multiple matches may indicate data dumps or accidental credential exposure"
|
|
1155
|
+
},
|
|
1048
1156
|
{
|
|
1049
1157
|
"key": "secret_types",
|
|
1050
1158
|
"type": "array",
|
|
@@ -1158,6 +1266,36 @@
|
|
|
1158
1266
|
"type": "number",
|
|
1159
1267
|
"required": false,
|
|
1160
1268
|
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
1269
|
+
},
|
|
1270
|
+
{
|
|
1271
|
+
"key": "agent_id",
|
|
1272
|
+
"type": "string",
|
|
1273
|
+
"required": false,
|
|
1274
|
+
"description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
|
|
1275
|
+
},
|
|
1276
|
+
{
|
|
1277
|
+
"key": "agent_type",
|
|
1278
|
+
"type": "string",
|
|
1279
|
+
"required": false,
|
|
1280
|
+
"description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
|
|
1281
|
+
},
|
|
1282
|
+
{
|
|
1283
|
+
"key": "agent_trust_level",
|
|
1284
|
+
"type": "string",
|
|
1285
|
+
"required": false,
|
|
1286
|
+
"description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
|
|
1287
|
+
},
|
|
1288
|
+
{
|
|
1289
|
+
"key": "agent_framework",
|
|
1290
|
+
"type": "string",
|
|
1291
|
+
"required": false,
|
|
1292
|
+
"description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
|
|
1293
|
+
},
|
|
1294
|
+
{
|
|
1295
|
+
"key": "agent_publisher",
|
|
1296
|
+
"type": "string",
|
|
1297
|
+
"required": false,
|
|
1298
|
+
"description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
|
|
1161
1299
|
}
|
|
1162
1300
|
]
|
|
1163
1301
|
},
|
|
@@ -1320,6 +1458,36 @@
|
|
|
1320
1458
|
"type": "number",
|
|
1321
1459
|
"required": false,
|
|
1322
1460
|
"description": "Sum of per-turn risk scores across the session. Catches death-by-a-thousand-cuts where no single turn is high but cumulative risk is significant"
|
|
1461
|
+
},
|
|
1462
|
+
{
|
|
1463
|
+
"key": "agent_id",
|
|
1464
|
+
"type": "string",
|
|
1465
|
+
"required": false,
|
|
1466
|
+
"description": "Unique identifier of the authenticated AI agent making this request (e.g., 'agent_research_v3'). Empty string for human user requests. Use this to write agent-specific policies"
|
|
1467
|
+
},
|
|
1468
|
+
{
|
|
1469
|
+
"key": "agent_type",
|
|
1470
|
+
"type": "string",
|
|
1471
|
+
"required": false,
|
|
1472
|
+
"description": "Type of the authenticated agent: 'orchestrator' (coordinates sub-agents), 'autonomous' (self-directed), 'tool_agent' (single-purpose tool), or 'human_proxy' (acts on behalf of a human). Empty string for human users"
|
|
1473
|
+
},
|
|
1474
|
+
{
|
|
1475
|
+
"key": "agent_trust_level",
|
|
1476
|
+
"type": "string",
|
|
1477
|
+
"required": false,
|
|
1478
|
+
"description": "Trust level of the authenticated agent: 'first_party' (your own agents), 'verified_third_party' (audited external agents), or 'unverified' (unknown agents). Use this for tiered access control — e.g., only first_party agents can call dangerous tools"
|
|
1479
|
+
},
|
|
1480
|
+
{
|
|
1481
|
+
"key": "agent_framework",
|
|
1482
|
+
"type": "string",
|
|
1483
|
+
"required": false,
|
|
1484
|
+
"description": "Framework or SDK the agent is built with (e.g., 'claude-code', 'langchain', 'crewai', 'autogen'). Use this to apply framework-specific policies"
|
|
1485
|
+
},
|
|
1486
|
+
{
|
|
1487
|
+
"key": "agent_publisher",
|
|
1488
|
+
"type": "string",
|
|
1489
|
+
"required": false,
|
|
1490
|
+
"description": "Organization that published the agent (e.g., 'anthropic', 'internal', 'acme-corp'). Use this for organization-level trust policies"
|
|
1323
1491
|
}
|
|
1324
1492
|
]
|
|
1325
1493
|
}
|
|
@@ -194,6 +194,15 @@ namespace Guardrails {
|
|
|
194
194
|
"session_max_secret_score"?: Long,
|
|
195
195
|
"session_cumulative_risk_score"?: Long,
|
|
196
196
|
|
|
197
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
198
|
+
// Present when the request is made by an AI agent (API key or JWT with agent claims).
|
|
199
|
+
// Empty strings for human user requests. Use these to write agent-specific policies.
|
|
200
|
+
"agent_id"?: String, // Unique agent identifier (e.g., "agent_research_v3")
|
|
201
|
+
"agent_type"?: String, // "orchestrator" | "autonomous" | "tool_agent" | "human_proxy"
|
|
202
|
+
"agent_trust_level"?: String, // "first_party" | "verified_third_party" | "unverified"
|
|
203
|
+
"agent_framework"?: String, // Agent framework (e.g., "claude-code", "langchain", "crewai")
|
|
204
|
+
"agent_publisher"?: String, // Organization that published the agent
|
|
205
|
+
|
|
197
206
|
};
|
|
198
207
|
|
|
199
208
|
/// Context for call_tool action (agentic tool execution)
|
|
@@ -234,6 +243,7 @@ namespace Guardrails {
|
|
|
234
243
|
|
|
235
244
|
// Security checks on tool arguments (optional)
|
|
236
245
|
"contains_secrets"?: Bool,
|
|
246
|
+
"secret_count"?: Long,
|
|
237
247
|
"secret_types"?: Set<String>,
|
|
238
248
|
"pii_detected"?: Bool,
|
|
239
249
|
"pii_types"?: Set<String>,
|
|
@@ -295,6 +305,13 @@ namespace Guardrails {
|
|
|
295
305
|
"session_max_secret_score"?: Long,
|
|
296
306
|
"session_cumulative_risk_score"?: Long,
|
|
297
307
|
|
|
308
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
309
|
+
"agent_id"?: String,
|
|
310
|
+
"agent_type"?: String,
|
|
311
|
+
"agent_trust_level"?: String,
|
|
312
|
+
"agent_framework"?: String,
|
|
313
|
+
"agent_publisher"?: String,
|
|
314
|
+
|
|
298
315
|
};
|
|
299
316
|
|
|
300
317
|
/// Context for read_file action
|
|
@@ -305,6 +322,7 @@ namespace Guardrails {
|
|
|
305
322
|
|
|
306
323
|
// Security checks on file content (optional)
|
|
307
324
|
"contains_secrets"?: Bool,
|
|
325
|
+
"secret_count"?: Long,
|
|
308
326
|
"secret_types"?: Set<String>,
|
|
309
327
|
"pii_detected"?: Bool,
|
|
310
328
|
"pii_types"?: Set<String>,
|
|
@@ -329,6 +347,13 @@ namespace Guardrails {
|
|
|
329
347
|
"session_max_secret_score"?: Long,
|
|
330
348
|
"session_cumulative_risk_score"?: Long,
|
|
331
349
|
|
|
350
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
351
|
+
"agent_id"?: String,
|
|
352
|
+
"agent_type"?: String,
|
|
353
|
+
"agent_trust_level"?: String,
|
|
354
|
+
"agent_framework"?: String,
|
|
355
|
+
"agent_publisher"?: String,
|
|
356
|
+
|
|
332
357
|
};
|
|
333
358
|
|
|
334
359
|
/// Context for write_file action
|
|
@@ -339,6 +364,7 @@ namespace Guardrails {
|
|
|
339
364
|
|
|
340
365
|
// Security checks on content being written (optional)
|
|
341
366
|
"contains_secrets"?: Bool,
|
|
367
|
+
"secret_count"?: Long,
|
|
342
368
|
"secret_types"?: Set<String>,
|
|
343
369
|
"pii_detected"?: Bool,
|
|
344
370
|
"pii_types"?: Set<String>,
|
|
@@ -363,6 +389,13 @@ namespace Guardrails {
|
|
|
363
389
|
"session_max_secret_score"?: Long,
|
|
364
390
|
"session_cumulative_risk_score"?: Long,
|
|
365
391
|
|
|
392
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
393
|
+
"agent_id"?: String,
|
|
394
|
+
"agent_type"?: String,
|
|
395
|
+
"agent_trust_level"?: String,
|
|
396
|
+
"agent_framework"?: String,
|
|
397
|
+
"agent_publisher"?: String,
|
|
398
|
+
|
|
366
399
|
};
|
|
367
400
|
|
|
368
401
|
/// Context for connect_server action (MCP server connections)
|
|
@@ -405,5 +438,12 @@ namespace Guardrails {
|
|
|
405
438
|
"session_max_secret_score"?: Long,
|
|
406
439
|
"session_cumulative_risk_score"?: Long,
|
|
407
440
|
|
|
441
|
+
// Agent Identity — authenticated agent principal metadata (optional)
|
|
442
|
+
"agent_id"?: String,
|
|
443
|
+
"agent_type"?: String,
|
|
444
|
+
"agent_trust_level"?: String,
|
|
445
|
+
"agent_framework"?: String,
|
|
446
|
+
"agent_publisher"?: String,
|
|
447
|
+
|
|
408
448
|
};
|
|
409
449
|
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
// =============================================================================
|
|
2
|
+
// Agent Identity Policy — Agent-to-Agent Security Defaults
|
|
3
|
+
// =============================================================================
|
|
4
|
+
// Enforces trust-based access control for AI agents authenticated via API key
|
|
5
|
+
// or JWT with agent claims. These policies use agent identity context keys
|
|
6
|
+
// populated from Shield's authentication layer.
|
|
7
|
+
//
|
|
8
|
+
// Agent trust levels:
|
|
9
|
+
// - first_party: Your own agents (highest trust)
|
|
10
|
+
// - verified_third_party: Audited external agents (medium trust)
|
|
11
|
+
// - unverified: Unknown/untrusted agents (lowest trust)
|
|
12
|
+
//
|
|
13
|
+
// Agent types:
|
|
14
|
+
// - orchestrator: Coordinates sub-agents
|
|
15
|
+
// - autonomous: Self-directed, no human in the loop
|
|
16
|
+
// - tool_agent: Single-purpose tool execution
|
|
17
|
+
// - human_proxy: Acts on behalf of a human
|
|
18
|
+
//
|
|
19
|
+
// Context keys used:
|
|
20
|
+
// - agent_id: String - Unique agent identifier
|
|
21
|
+
// - agent_type: String - Agent classification
|
|
22
|
+
// - agent_trust_level: String - Trust tier
|
|
23
|
+
// - agent_framework: String - Agent framework/SDK
|
|
24
|
+
// - agent_publisher: String - Publishing organization
|
|
25
|
+
// - tool_name: String - Tool being called
|
|
26
|
+
// - tool_category: String - "safe" | "sensitive" | "dangerous"
|
|
27
|
+
// - tool_risk_score: Long (0-100) - Computed risk score
|
|
28
|
+
// - injection_confidence: Long (0-100) - Injection detection score
|
|
29
|
+
//
|
|
30
|
+
// Category: agent_identity
|
|
31
|
+
// Namespace: Guardrails
|
|
32
|
+
// =============================================================================
|
|
33
|
+
|
|
34
|
+
// -----------------------------------------------------------------------------
|
|
35
|
+
// Trust-Based Tool Access
|
|
36
|
+
// -----------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
@id("agent-block-unverified-dangerous-tools")
|
|
39
|
+
@name("Block unverified agents from dangerous tools")
|
|
40
|
+
@description("Unverified agents cannot execute tools classified as dangerous. Require first_party or verified_third_party trust level for high-risk operations")
|
|
41
|
+
@severity("critical")
|
|
42
|
+
@tags("agent-identity,trust,tools,a2a")
|
|
43
|
+
forbid (
|
|
44
|
+
principal is Guardrails::Agent,
|
|
45
|
+
action == Guardrails::Action::"call_tool",
|
|
46
|
+
resource
|
|
47
|
+
) when {
|
|
48
|
+
context.agent_trust_level == "unverified" &&
|
|
49
|
+
context has tool_category && context.tool_category == "dangerous"
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
@id("agent-block-unverified-shell")
|
|
53
|
+
@name("Block unverified agents from shell execution")
|
|
54
|
+
@description("Shell and command execution are restricted to first_party and verified_third_party agents")
|
|
55
|
+
@severity("critical")
|
|
56
|
+
@tags("agent-identity,trust,shell,a2a")
|
|
57
|
+
forbid (
|
|
58
|
+
principal is Guardrails::Agent,
|
|
59
|
+
action == Guardrails::Action::"call_tool",
|
|
60
|
+
resource
|
|
61
|
+
) when {
|
|
62
|
+
context.agent_trust_level == "unverified" &&
|
|
63
|
+
context has tool_name &&
|
|
64
|
+
(context.tool_name == "shell" ||
|
|
65
|
+
context.tool_name == "execute_command" ||
|
|
66
|
+
context.tool_name == "bash")
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
@id("agent-block-unverified-sensitive-tools")
|
|
70
|
+
@name("Block unverified agents from sensitive tools with elevated risk")
|
|
71
|
+
@description("Unverified agents cannot execute sensitive tools with risk score above 60")
|
|
72
|
+
@severity("high")
|
|
73
|
+
@tags("agent-identity,trust,tools,a2a")
|
|
74
|
+
forbid (
|
|
75
|
+
principal is Guardrails::Agent,
|
|
76
|
+
action == Guardrails::Action::"call_tool",
|
|
77
|
+
resource
|
|
78
|
+
) when {
|
|
79
|
+
context.agent_trust_level == "unverified" &&
|
|
80
|
+
context has tool_is_sensitive && context.tool_is_sensitive == true &&
|
|
81
|
+
context has tool_risk_score && context.tool_risk_score > 60
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
// -----------------------------------------------------------------------------
|
|
85
|
+
// Autonomous Agent Restrictions
|
|
86
|
+
// -----------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
@id("agent-block-autonomous-injection")
|
|
89
|
+
@name("Stricter injection threshold for autonomous agents")
|
|
90
|
+
@description("Autonomous agents operate without human oversight. Apply lower injection confidence threshold (50 vs standard 80) to compensate for lack of human review")
|
|
91
|
+
@severity("high")
|
|
92
|
+
@tags("agent-identity,autonomous,injection,a2a")
|
|
93
|
+
forbid (
|
|
94
|
+
principal is Guardrails::Agent,
|
|
95
|
+
action == Guardrails::Action::"process_prompt",
|
|
96
|
+
resource
|
|
97
|
+
) when {
|
|
98
|
+
context.agent_type == "autonomous" &&
|
|
99
|
+
context has injection_confidence && context.injection_confidence > 50
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
// -----------------------------------------------------------------------------
|
|
103
|
+
// Cross-Turn Agent Trust Enforcement
|
|
104
|
+
// -----------------------------------------------------------------------------
|
|
105
|
+
|
|
106
|
+
@id("agent-block-unverified-after-threats")
|
|
107
|
+
@name("Block unverified agents after session threats")
|
|
108
|
+
@description("If any prior turn in the session detected threats, block unverified agents from further tool calls. Prevents compromised sessions from being exploited by untrusted agents")
|
|
109
|
+
@severity("critical")
|
|
110
|
+
@tags("agent-identity,trust,session,cross-turn,a2a")
|
|
111
|
+
forbid (
|
|
112
|
+
principal is Guardrails::Agent,
|
|
113
|
+
action == Guardrails::Action::"call_tool",
|
|
114
|
+
resource
|
|
115
|
+
) when {
|
|
116
|
+
context.agent_trust_level == "unverified" &&
|
|
117
|
+
context has session_threat_turns && context.session_threat_turns > 0
|
|
118
|
+
};
|
|
@@ -40,7 +40,7 @@ forbid (
|
|
|
40
40
|
@tags("agentic,exfiltration,security")
|
|
41
41
|
forbid (
|
|
42
42
|
principal,
|
|
43
|
-
action,
|
|
43
|
+
action == Guardrails::Action::"call_tool",
|
|
44
44
|
resource
|
|
45
45
|
) when {
|
|
46
46
|
context has suspicious_pattern && context.suspicious_pattern == true &&
|
|
@@ -59,7 +59,7 @@ forbid (
|
|
|
59
59
|
@tags("agentic,patterns,security")
|
|
60
60
|
forbid (
|
|
61
61
|
principal,
|
|
62
|
-
action,
|
|
62
|
+
action == Guardrails::Action::"call_tool",
|
|
63
63
|
resource
|
|
64
64
|
) when {
|
|
65
65
|
context has sequence_risk && context.sequence_risk > 80
|
|
@@ -72,7 +72,7 @@ forbid (
|
|
|
72
72
|
@tags("agentic,budget,cost-control")
|
|
73
73
|
forbid (
|
|
74
74
|
principal,
|
|
75
|
-
action,
|
|
75
|
+
action == Guardrails::Action::"call_tool",
|
|
76
76
|
resource
|
|
77
77
|
) when {
|
|
78
78
|
context has budget_exceeded && context.budget_exceeded == true
|
|
@@ -85,7 +85,7 @@ forbid (
|
|
|
85
85
|
@tags("agentic,budget,cost-control")
|
|
86
86
|
forbid (
|
|
87
87
|
principal,
|
|
88
|
-
action,
|
|
88
|
+
action == Guardrails::Action::"call_tool",
|
|
89
89
|
resource
|
|
90
90
|
) when {
|
|
91
91
|
context has budget_remaining_pct &&
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
@tags("injection,jailbreak,security")
|
|
23
23
|
forbid (
|
|
24
24
|
principal,
|
|
25
|
-
action,
|
|
25
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
26
26
|
resource
|
|
27
27
|
) when {
|
|
28
28
|
context has injection_confidence && context.injection_confidence > 85
|
|
@@ -35,7 +35,7 @@ forbid (
|
|
|
35
35
|
@tags("jailbreak,security")
|
|
36
36
|
forbid (
|
|
37
37
|
principal,
|
|
38
|
-
action,
|
|
38
|
+
action == Guardrails::Action::"process_prompt",
|
|
39
39
|
resource
|
|
40
40
|
) when {
|
|
41
41
|
context has jailbreak_confidence && context.jailbreak_confidence > 80
|
|
@@ -48,7 +48,7 @@ forbid (
|
|
|
48
48
|
@tags("injection,jailbreak,security")
|
|
49
49
|
forbid (
|
|
50
50
|
principal,
|
|
51
|
-
action,
|
|
51
|
+
action == Guardrails::Action::"process_prompt",
|
|
52
52
|
resource
|
|
53
53
|
) when {
|
|
54
54
|
context has injection_confidence && context has jailbreak_confidence &&
|
|
@@ -62,7 +62,7 @@ forbid (
|
|
|
62
62
|
@tags("injection,unicode,security")
|
|
63
63
|
forbid (
|
|
64
64
|
principal,
|
|
65
|
-
action,
|
|
65
|
+
action == Guardrails::Action::"process_prompt",
|
|
66
66
|
resource
|
|
67
67
|
) when {
|
|
68
68
|
context has contains_invisible_chars && context.contains_invisible_chars == true &&
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
@tags("secrets,security,data-leak")
|
|
21
21
|
forbid (
|
|
22
22
|
principal,
|
|
23
|
-
action,
|
|
23
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
24
24
|
resource
|
|
25
25
|
) when {
|
|
26
26
|
context has contains_secrets && context.contains_secrets == true
|
|
@@ -33,7 +33,7 @@ forbid (
|
|
|
33
33
|
@tags("secrets,security,data-leak")
|
|
34
34
|
forbid (
|
|
35
35
|
principal,
|
|
36
|
-
action,
|
|
36
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
37
37
|
resource
|
|
38
38
|
) when {
|
|
39
39
|
context has secret_count && context.secret_count > 2
|
|
@@ -36,7 +36,7 @@ forbid (
|
|
|
36
36
|
@tags("path-traversal,security")
|
|
37
37
|
forbid (
|
|
38
38
|
principal,
|
|
39
|
-
action,
|
|
39
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
40
40
|
resource
|
|
41
41
|
) when {
|
|
42
42
|
context has path_traversal_detected && context.path_traversal_detected == true &&
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
@tags("toxicity,trust-safety")
|
|
24
24
|
forbid (
|
|
25
25
|
principal,
|
|
26
|
-
action,
|
|
26
|
+
action == Guardrails::Action::"process_prompt",
|
|
27
27
|
resource
|
|
28
28
|
) when {
|
|
29
29
|
(context has violence_score && context.violence_score > 90) ||
|
|
@@ -41,7 +41,7 @@ forbid (
|
|
|
41
41
|
@tags("hate-speech,trust-safety")
|
|
42
42
|
forbid (
|
|
43
43
|
principal,
|
|
44
|
-
action,
|
|
44
|
+
action == Guardrails::Action::"process_prompt",
|
|
45
45
|
resource
|
|
46
46
|
) when {
|
|
47
47
|
context has hate_speech_score && context.hate_speech_score > 75
|
|
@@ -54,7 +54,7 @@ forbid (
|
|
|
54
54
|
@tags("toxicity,trust-safety")
|
|
55
55
|
forbid (
|
|
56
56
|
principal,
|
|
57
|
-
action,
|
|
57
|
+
action == Guardrails::Action::"process_prompt",
|
|
58
58
|
resource
|
|
59
59
|
) when {
|
|
60
60
|
// Any 2 of 6 toxicity scores above 60 triggers a block
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
@tags("profile,chat-assistant,pii,privacy")
|
|
16
16
|
forbid (
|
|
17
17
|
principal,
|
|
18
|
-
action,
|
|
18
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
21
|
context has pii_detected && context.pii_detected == true
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
@tags("profile,chat-assistant,injection,security")
|
|
16
16
|
forbid (
|
|
17
17
|
principal,
|
|
18
|
-
action,
|
|
18
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
21
|
context has injection_confidence && context.injection_confidence > 70
|
|
@@ -28,7 +28,7 @@ forbid (
|
|
|
28
28
|
@tags("profile,chat-assistant,jailbreak,security")
|
|
29
29
|
forbid (
|
|
30
30
|
principal,
|
|
31
|
-
action,
|
|
31
|
+
action == Guardrails::Action::"process_prompt",
|
|
32
32
|
resource
|
|
33
33
|
) when {
|
|
34
34
|
context has jailbreak_confidence && context.jailbreak_confidence > 65
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
@tags("profile,chat-assistant,toxicity,trust-safety")
|
|
16
16
|
forbid (
|
|
17
17
|
principal,
|
|
18
|
-
action,
|
|
18
|
+
action == Guardrails::Action::"process_prompt",
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
21
|
(context has violence_score && context.violence_score > 70) ||
|
|
@@ -31,7 +31,7 @@ forbid (
|
|
|
31
31
|
@tags("profile,chat-assistant,semantic,compliance")
|
|
32
32
|
forbid (
|
|
33
33
|
principal,
|
|
34
|
-
action,
|
|
34
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
35
35
|
resource
|
|
36
36
|
) when {
|
|
37
37
|
context has topic_confidence && context.topic_confidence > 70 &&
|
|
@@ -73,7 +73,7 @@ forbid (
|
|
|
73
73
|
@tags("profile,code-agent,agentic,exfiltration")
|
|
74
74
|
forbid (
|
|
75
75
|
principal,
|
|
76
|
-
action,
|
|
76
|
+
action == Guardrails::Action::"call_tool",
|
|
77
77
|
resource
|
|
78
78
|
) when {
|
|
79
79
|
context has suspicious_pattern && context.suspicious_pattern == true &&
|
|
@@ -89,7 +89,7 @@ forbid (
|
|
|
89
89
|
@tags("profile,code-agent,agentic,patterns")
|
|
90
90
|
forbid (
|
|
91
91
|
principal,
|
|
92
|
-
action,
|
|
92
|
+
action == Guardrails::Action::"call_tool",
|
|
93
93
|
resource
|
|
94
94
|
) when {
|
|
95
95
|
context has sequence_risk && context.sequence_risk > 75
|
|
@@ -102,7 +102,7 @@ forbid (
|
|
|
102
102
|
@tags("profile,code-agent,budget,cost-control")
|
|
103
103
|
forbid (
|
|
104
104
|
principal,
|
|
105
|
-
action,
|
|
105
|
+
action == Guardrails::Action::"call_tool",
|
|
106
106
|
resource
|
|
107
107
|
) when {
|
|
108
108
|
context has budget_exceeded && context.budget_exceeded == true
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
@tags("profile,data-pipeline,pii,privacy")
|
|
16
16
|
forbid (
|
|
17
17
|
principal,
|
|
18
|
-
action,
|
|
18
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
19
19
|
resource
|
|
20
20
|
) when {
|
|
21
21
|
context has pii_detected && context.pii_detected == true
|
|
@@ -28,7 +28,7 @@ forbid (
|
|
|
28
28
|
@tags("profile,data-pipeline,pii,compliance")
|
|
29
29
|
forbid (
|
|
30
30
|
principal,
|
|
31
|
-
action,
|
|
31
|
+
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
|
|
32
32
|
resource
|
|
33
33
|
) when {
|
|
34
34
|
context has pii_types &&
|