@highflame/policy 2.1.4 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +41 -0
  2. package/_schemas/guardrails/context.json +600 -6
  3. package/_schemas/guardrails/schema.cedarschema +79 -3
  4. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +118 -0
  5. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +4 -4
  6. package/_schemas/guardrails/templates/defaults/injection.cedar +10 -10
  7. package/_schemas/guardrails/templates/defaults/secrets.cedar +2 -2
  8. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +1 -1
  9. package/_schemas/guardrails/templates/defaults/toxicity.cedar +3 -3
  10. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +1 -1
  11. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +4 -4
  12. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +2 -2
  13. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +3 -3
  14. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +1 -1
  15. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +2 -2
  16. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +3 -3
  17. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +157 -0
  18. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +140 -0
  19. package/_schemas/guardrails/templates/templates.json +41 -0
  20. package/_schemas/overwatch/context.json +232 -4
  21. package/_schemas/overwatch/schema.cedarschema +42 -4
  22. package/dist/guardrails-context.gen.d.ts +24 -2
  23. package/dist/guardrails-context.gen.js +24 -2
  24. package/dist/guardrails-defaults.gen.d.ts +1 -1
  25. package/dist/guardrails-defaults.gen.js +524 -36
  26. package/dist/overwatch-context.gen.d.ts +10 -0
  27. package/dist/overwatch-context.gen.js +10 -0
  28. package/dist/service-schemas.gen.d.ts +2 -2
  29. package/dist/service-schemas.gen.js +275 -24
  30. package/package.json +1 -1
  31. package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +0 -85
  32. package/_schemas/guardrails/templates/profiles/code_agent.cedar +0 -125
  33. package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +0 -111
@@ -54,7 +54,7 @@ const GUARDRAILS_SECRETS_DEFAULT_CEDAR = `// ===================================
54
54
  @tags("secrets,security,data-leak")
55
55
  forbid (
56
56
  principal,
57
- action,
57
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
58
58
  resource
59
59
  ) when {
60
60
  context has contains_secrets && context.contains_secrets == true
@@ -67,7 +67,7 @@ forbid (
67
67
  @tags("secrets,security,data-leak")
68
68
  forbid (
69
69
  principal,
70
- action,
70
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
71
71
  resource
72
72
  ) when {
73
73
  context has secret_count && context.secret_count > 2
@@ -80,8 +80,8 @@ const GUARDRAILS_INJECTION_DEFAULT_CEDAR = `// =================================
80
80
  // Uses ML-based confidence scores from normalized context.
81
81
  //
82
82
  // Context keys used (normalized by projection layer):
83
- // - injection_score: Long (0-100) - Overall injection confidence
84
- // - jailbreak_score: Long (0-100) - Jailbreak attempt confidence
83
+ // - injection_confidence: Long (0-100) - Overall injection confidence
84
+ // - jailbreak_confidence: Long (0-100) - Jailbreak attempt confidence
85
85
  // - injection_type: String - Type of injection detected
86
86
  // - contains_invisible_chars: Bool - Invisible Unicode characters detected
87
87
  // - invisible_chars_score: Long (0-100) - Invisible character density
@@ -97,10 +97,10 @@ const GUARDRAILS_INJECTION_DEFAULT_CEDAR = `// =================================
97
97
  @tags("injection,jailbreak,security")
98
98
  forbid (
99
99
  principal,
100
- action,
100
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
101
101
  resource
102
102
  ) when {
103
- context has injection_score && context.injection_score > 85
103
+ context has injection_confidence && context.injection_confidence > 85
104
104
  };
105
105
 
106
106
  @id("jailbreak-block-high-confidence")
@@ -110,10 +110,10 @@ forbid (
110
110
  @tags("jailbreak,security")
111
111
  forbid (
112
112
  principal,
113
- action,
113
+ action == Guardrails::Action::"process_prompt",
114
114
  resource
115
115
  ) when {
116
- context has jailbreak_score && context.jailbreak_score > 80
116
+ context has jailbreak_confidence && context.jailbreak_confidence > 80
117
117
  };
118
118
 
119
119
  @id("injection-combined-threshold")
@@ -123,11 +123,11 @@ forbid (
123
123
  @tags("injection,jailbreak,security")
124
124
  forbid (
125
125
  principal,
126
- action,
126
+ action == Guardrails::Action::"process_prompt",
127
127
  resource
128
128
  ) when {
129
- context has injection_score && context has jailbreak_score &&
130
- context.injection_score > 60 && context.jailbreak_score > 60
129
+ context has injection_confidence && context has jailbreak_confidence &&
130
+ context.injection_confidence > 60 && context.jailbreak_confidence > 60
131
131
  };
132
132
 
133
133
  @id("injection-invisible-chars")
@@ -137,7 +137,7 @@ forbid (
137
137
  @tags("injection,unicode,security")
138
138
  forbid (
139
139
  principal,
140
- action,
140
+ action == Guardrails::Action::"process_prompt",
141
141
  resource
142
142
  ) when {
143
143
  context has contains_invisible_chars && context.contains_invisible_chars == true &&
@@ -218,7 +218,7 @@ const GUARDRAILS_TOXICITY_DEFAULT_CEDAR = `// ==================================
218
218
  @tags("toxicity,trust-safety")
219
219
  forbid (
220
220
  principal,
221
- action,
221
+ action == Guardrails::Action::"process_prompt",
222
222
  resource
223
223
  ) when {
224
224
  (context has violence_score && context.violence_score > 90) ||
@@ -236,7 +236,7 @@ forbid (
236
236
  @tags("hate-speech,trust-safety")
237
237
  forbid (
238
238
  principal,
239
- action,
239
+ action == Guardrails::Action::"process_prompt",
240
240
  resource
241
241
  ) when {
242
242
  context has hate_speech_score && context.hate_speech_score > 75
@@ -249,7 +249,7 @@ forbid (
249
249
  @tags("toxicity,trust-safety")
250
250
  forbid (
251
251
  principal,
252
- action,
252
+ action == Guardrails::Action::"process_prompt",
253
253
  resource
254
254
  ) when {
255
255
  // Any 2 of 6 toxicity scores above 60 triggers a block
@@ -371,7 +371,7 @@ forbid (
371
371
  @tags("agentic,exfiltration,security")
372
372
  forbid (
373
373
  principal,
374
- action,
374
+ action == Guardrails::Action::"call_tool",
375
375
  resource
376
376
  ) when {
377
377
  context has suspicious_pattern && context.suspicious_pattern == true &&
@@ -390,7 +390,7 @@ forbid (
390
390
  @tags("agentic,patterns,security")
391
391
  forbid (
392
392
  principal,
393
- action,
393
+ action == Guardrails::Action::"call_tool",
394
394
  resource
395
395
  ) when {
396
396
  context has sequence_risk && context.sequence_risk > 80
@@ -403,7 +403,7 @@ forbid (
403
403
  @tags("agentic,budget,cost-control")
404
404
  forbid (
405
405
  principal,
406
- action,
406
+ action == Guardrails::Action::"call_tool",
407
407
  resource
408
408
  ) when {
409
409
  context has budget_exceeded && context.budget_exceeded == true
@@ -416,7 +416,7 @@ forbid (
416
416
  @tags("agentic,budget,cost-control")
417
417
  forbid (
418
418
  principal,
419
- action,
419
+ action == Guardrails::Action::"call_tool",
420
420
  resource
421
421
  ) when {
422
422
  context has budget_remaining_pct &&
@@ -507,7 +507,7 @@ forbid (
507
507
  @tags("path-traversal,security")
508
508
  forbid (
509
509
  principal,
510
- action,
510
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
511
511
  resource
512
512
  ) when {
513
513
  context has path_traversal_detected && context.path_traversal_detected == true &&
@@ -529,6 +529,125 @@ forbid (
529
529
  context has sql_injection_score && context.sql_injection_score >= 75
530
530
  };
531
531
  `;
532
+ const GUARDRAILS_AGENT_IDENTITY_DEFAULT_CEDAR = `// =============================================================================
533
+ // Agent Identity Policy — Agent-to-Agent Security Defaults
534
+ // =============================================================================
535
+ // Enforces trust-based access control for AI agents authenticated via API key
536
+ // or JWT with agent claims. These policies use agent identity context keys
537
+ // populated from Shield's authentication layer.
538
+ //
539
+ // Agent trust levels:
540
+ // - first_party: Your own agents (highest trust)
541
+ // - verified_third_party: Audited external agents (medium trust)
542
+ // - unverified: Unknown/untrusted agents (lowest trust)
543
+ //
544
+ // Agent types:
545
+ // - orchestrator: Coordinates sub-agents
546
+ // - autonomous: Self-directed, no human in the loop
547
+ // - tool_agent: Single-purpose tool execution
548
+ // - human_proxy: Acts on behalf of a human
549
+ //
550
+ // Context keys used:
551
+ // - agent_id: String - Unique agent identifier
552
+ // - agent_type: String - Agent classification
553
+ // - agent_trust_level: String - Trust tier
554
+ // - agent_framework: String - Agent framework/SDK
555
+ // - agent_publisher: String - Publishing organization
556
+ // - tool_name: String - Tool being called
557
+ // - tool_category: String - "safe" | "sensitive" | "dangerous"
558
+ // - tool_risk_score: Long (0-100) - Computed risk score
559
+ // - injection_confidence: Long (0-100) - Injection detection score
560
+ //
561
+ // Category: agent_identity
562
+ // Namespace: Guardrails
563
+ // =============================================================================
564
+
565
+ // -----------------------------------------------------------------------------
566
+ // Trust-Based Tool Access
567
+ // -----------------------------------------------------------------------------
568
+
569
+ @id("agent-block-unverified-dangerous-tools")
570
+ @name("Block unverified agents from dangerous tools")
571
+ @description("Unverified agents cannot execute tools classified as dangerous. Require first_party or verified_third_party trust level for high-risk operations")
572
+ @severity("critical")
573
+ @tags("agent-identity,trust,tools,a2a")
574
+ forbid (
575
+ principal is Guardrails::Agent,
576
+ action == Guardrails::Action::"call_tool",
577
+ resource
578
+ ) when {
579
+ context.agent_trust_level == "unverified" &&
580
+ context has tool_category && context.tool_category == "dangerous"
581
+ };
582
+
583
+ @id("agent-block-unverified-shell")
584
+ @name("Block unverified agents from shell execution")
585
+ @description("Shell and command execution are restricted to first_party and verified_third_party agents")
586
+ @severity("critical")
587
+ @tags("agent-identity,trust,shell,a2a")
588
+ forbid (
589
+ principal is Guardrails::Agent,
590
+ action == Guardrails::Action::"call_tool",
591
+ resource
592
+ ) when {
593
+ context.agent_trust_level == "unverified" &&
594
+ context has tool_name &&
595
+ (context.tool_name == "shell" ||
596
+ context.tool_name == "execute_command" ||
597
+ context.tool_name == "bash")
598
+ };
599
+
600
+ @id("agent-block-unverified-sensitive-tools")
601
+ @name("Block unverified agents from sensitive tools with elevated risk")
602
+ @description("Unverified agents cannot execute sensitive tools with risk score above 60")
603
+ @severity("high")
604
+ @tags("agent-identity,trust,tools,a2a")
605
+ forbid (
606
+ principal is Guardrails::Agent,
607
+ action == Guardrails::Action::"call_tool",
608
+ resource
609
+ ) when {
610
+ context.agent_trust_level == "unverified" &&
611
+ context has tool_is_sensitive && context.tool_is_sensitive == true &&
612
+ context has tool_risk_score && context.tool_risk_score > 60
613
+ };
614
+
615
+ // -----------------------------------------------------------------------------
616
+ // Autonomous Agent Restrictions
617
+ // -----------------------------------------------------------------------------
618
+
619
+ @id("agent-block-autonomous-injection")
620
+ @name("Stricter injection threshold for autonomous agents")
621
+ @description("Autonomous agents operate without human oversight. Apply lower injection confidence threshold (50 vs standard 80) to compensate for lack of human review")
622
+ @severity("high")
623
+ @tags("agent-identity,autonomous,injection,a2a")
624
+ forbid (
625
+ principal is Guardrails::Agent,
626
+ action == Guardrails::Action::"process_prompt",
627
+ resource
628
+ ) when {
629
+ context.agent_type == "autonomous" &&
630
+ context has injection_confidence && context.injection_confidence > 50
631
+ };
632
+
633
+ // -----------------------------------------------------------------------------
634
+ // Cross-Turn Agent Trust Enforcement
635
+ // -----------------------------------------------------------------------------
636
+
637
+ @id("agent-block-unverified-after-threats")
638
+ @name("Block unverified agents after session threats")
639
+ @description("If any prior turn in the session detected threats, block unverified agents from further tool calls. Prevents compromised sessions from being exploited by untrusted agents")
640
+ @severity("critical")
641
+ @tags("agent-identity,trust,session,cross-turn,a2a")
642
+ forbid (
643
+ principal is Guardrails::Agent,
644
+ action == Guardrails::Action::"call_tool",
645
+ resource
646
+ ) when {
647
+ context.agent_trust_level == "unverified" &&
648
+ context has session_threat_turns && context.session_threat_turns > 0
649
+ };
650
+ `;
532
651
  const GUARDRAILS_MCP_TOOL_PERMISSIONS_CEDAR = `// =============================================================================
533
652
  // MCP Tool Permissions Template
534
653
  // =============================================================================
@@ -631,10 +750,10 @@ const GUARDRAILS_CHAT_ASSISTANT_SECURITY_CEDAR = `// ===========================
631
750
  @tags("profile,chat-assistant,injection,security")
632
751
  forbid (
633
752
  principal,
634
- action,
753
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
635
754
  resource
636
755
  ) when {
637
- context has injection_score && context.injection_score > 70
756
+ context has injection_confidence && context.injection_confidence > 70
638
757
  };
639
758
 
640
759
  @id("chat-jailbreak-lower-threshold")
@@ -644,10 +763,10 @@ forbid (
644
763
  @tags("profile,chat-assistant,jailbreak,security")
645
764
  forbid (
646
765
  principal,
647
- action,
766
+ action == Guardrails::Action::"process_prompt",
648
767
  resource
649
768
  ) when {
650
- context has jailbreak_score && context.jailbreak_score > 65
769
+ context has jailbreak_confidence && context.jailbreak_confidence > 65
651
770
  };
652
771
  `;
653
772
  const GUARDRAILS_CHAT_ASSISTANT_PRIVACY_CEDAR = `// =============================================================================
@@ -667,7 +786,7 @@ const GUARDRAILS_CHAT_ASSISTANT_PRIVACY_CEDAR = `// ============================
667
786
  @tags("profile,chat-assistant,pii,privacy")
668
787
  forbid (
669
788
  principal,
670
- action,
789
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
671
790
  resource
672
791
  ) when {
673
792
  context has pii_detected && context.pii_detected == true
@@ -690,7 +809,7 @@ const GUARDRAILS_CHAT_ASSISTANT_TRUST_SAFETY_CEDAR = `// =======================
690
809
  @tags("profile,chat-assistant,toxicity,trust-safety")
691
810
  forbid (
692
811
  principal,
693
- action,
812
+ action == Guardrails::Action::"process_prompt",
694
813
  resource
695
814
  ) when {
696
815
  (context has violence_score && context.violence_score > 70) ||
@@ -706,7 +825,7 @@ forbid (
706
825
  @tags("profile,chat-assistant,semantic,compliance")
707
826
  forbid (
708
827
  principal,
709
- action,
828
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
710
829
  resource
711
830
  ) when {
712
831
  context has topic_confidence && context.topic_confidence > 70 &&
@@ -792,7 +911,7 @@ forbid (
792
911
  @tags("profile,code-agent,agentic,exfiltration")
793
912
  forbid (
794
913
  principal,
795
- action,
914
+ action == Guardrails::Action::"call_tool",
796
915
  resource
797
916
  ) when {
798
917
  context has suspicious_pattern && context.suspicious_pattern == true &&
@@ -808,7 +927,7 @@ forbid (
808
927
  @tags("profile,code-agent,agentic,patterns")
809
928
  forbid (
810
929
  principal,
811
- action,
930
+ action == Guardrails::Action::"call_tool",
812
931
  resource
813
932
  ) when {
814
933
  context has sequence_risk && context.sequence_risk > 75
@@ -821,7 +940,7 @@ forbid (
821
940
  @tags("profile,code-agent,budget,cost-control")
822
941
  forbid (
823
942
  principal,
824
- action,
943
+ action == Guardrails::Action::"call_tool",
825
944
  resource
826
945
  ) when {
827
946
  context has budget_exceeded && context.budget_exceeded == true
@@ -867,7 +986,7 @@ const GUARDRAILS_DATA_PIPELINE_PRIVACY_CEDAR = `// =============================
867
986
  @tags("profile,data-pipeline,pii,privacy")
868
987
  forbid (
869
988
  principal,
870
- action,
989
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
871
990
  resource
872
991
  ) when {
873
992
  context has pii_detected && context.pii_detected == true
@@ -880,7 +999,7 @@ forbid (
880
999
  @tags("profile,data-pipeline,pii,compliance")
881
1000
  forbid (
882
1001
  principal,
883
- action,
1002
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
884
1003
  resource
885
1004
  ) when {
886
1005
  context has pii_types &&
@@ -908,7 +1027,7 @@ const GUARDRAILS_DATA_PIPELINE_SECURITY_CEDAR = `// ============================
908
1027
  @tags("profile,data-pipeline,secrets,security")
909
1028
  forbid (
910
1029
  principal,
911
- action,
1030
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
912
1031
  resource
913
1032
  ) when {
914
1033
  context has contains_secrets && context.contains_secrets == true
@@ -935,10 +1054,10 @@ forbid (
935
1054
  @tags("profile,data-pipeline,injection,security")
936
1055
  forbid (
937
1056
  principal,
938
- action,
1057
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
939
1058
  resource
940
1059
  ) when {
941
- context has injection_score && context.injection_score > 65
1060
+ context has injection_confidence && context.injection_confidence > 65
942
1061
  };
943
1062
  `;
944
1063
  const GUARDRAILS_DATA_PIPELINE_AGENTIC_SECURITY_CEDAR = `// =============================================================================
@@ -958,7 +1077,7 @@ const GUARDRAILS_DATA_PIPELINE_AGENTIC_SECURITY_CEDAR = `// ====================
958
1077
  @tags("profile,data-pipeline,exfiltration,security")
959
1078
  forbid (
960
1079
  principal,
961
- action,
1080
+ action == Guardrails::Action::"call_tool",
962
1081
  resource
963
1082
  ) when {
964
1083
  context has suspicious_pattern && context.suspicious_pattern == true &&
@@ -980,6 +1099,305 @@ forbid (
980
1099
  context has tool_risk_score && context.tool_risk_score > 60
981
1100
  };
982
1101
  `;
1102
+ const GUARDRAILS_MULTI_AGENT_TRUST_CEDAR = `// =============================================================================
1103
+ // Multi-Agent Orchestration — Agent Trust Policies
1104
+ // =============================================================================
1105
+ // Production-grade trust policies for multi-agent systems where an orchestrator
1106
+ // coordinates sub-agents with varying trust levels. These policies enforce
1107
+ // least-privilege access: each agent gets only the permissions its trust level
1108
+ // and type warrant.
1109
+ //
1110
+ // Architecture supported:
1111
+ // Orchestrator (first_party)
1112
+ // ├── Research Agent (verified_third_party, autonomous)
1113
+ // ├── Code Agent (first_party, tool_agent)
1114
+ // └── External Plugin (unverified, tool_agent)
1115
+ //
1116
+ // Context keys used:
1117
+ // - agent_id: String - Unique agent identifier
1118
+ // - agent_type: String - orchestrator | autonomous | tool_agent | human_proxy
1119
+ // - agent_trust_level: String - first_party | verified_third_party | unverified
1120
+ // - agent_framework: String - Agent framework/SDK
1121
+ // - agent_publisher: String - Publishing organization
1122
+ // - tool_name: String - Tool being called
1123
+ // - tool_category: String - safe | sensitive | dangerous
1124
+ // - tool_risk_score: Long (0-100) - Computed risk score
1125
+ // - tool_is_sensitive: Bool - Whether tool is sensitive
1126
+ // - mcp_server_verified: Bool - Whether MCP server is verified
1127
+ // - injection_confidence: Long (0-100) - Injection detection score
1128
+ // - jailbreak_confidence: Long (0-100) - Jailbreak detection score
1129
+ //
1130
+ // Category: agent_identity
1131
+ // Namespace: Guardrails
1132
+ // =============================================================================
1133
+
1134
+ // -----------------------------------------------------------------------------
1135
+ // Tiered Tool Access — The Core A2A Trust Model
1136
+ // -----------------------------------------------------------------------------
1137
+
1138
+ @id("multi-agent-only-first-party-dangerous")
1139
+ @name("Only first-party agents can use dangerous tools")
1140
+ @description("Dangerous tools (shell, delete_file, run_sql) are restricted to first-party agents only. Third-party and unverified agents are blocked regardless of other signals")
1141
+ @severity("critical")
1142
+ @tags("profile,multi-agent,trust,tools,a2a")
1143
+ forbid (
1144
+ principal is Guardrails::Agent,
1145
+ action == Guardrails::Action::"call_tool",
1146
+ resource
1147
+ ) when {
1148
+ context.agent_trust_level != "first_party" &&
1149
+ context has tool_category && context.tool_category == "dangerous"
1150
+ };
1151
+
1152
+ @id("multi-agent-block-unverified-sensitive")
1153
+ @name("Block unverified agents from all sensitive tools")
1154
+ @description("Unverified agents can only use tools classified as safe. Sensitive tools (write_file, http_post, send_email) require at least verified_third_party trust")
1155
+ @severity("high")
1156
+ @tags("profile,multi-agent,trust,tools,a2a")
1157
+ forbid (
1158
+ principal is Guardrails::Agent,
1159
+ action == Guardrails::Action::"call_tool",
1160
+ resource
1161
+ ) when {
1162
+ context.agent_trust_level == "unverified" &&
1163
+ context has tool_is_sensitive && context.tool_is_sensitive == true
1164
+ };
1165
+
1166
+ @id("multi-agent-block-unverified-mcp")
1167
+ @name("Block unverified agents from unverified MCP servers")
1168
+ @description("Unverified agents cannot call tools from unverified MCP servers. Double-unverified (agent + server) presents unacceptable supply chain risk")
1169
+ @severity("critical")
1170
+ @tags("profile,multi-agent,trust,mcp,a2a")
1171
+ forbid (
1172
+ principal is Guardrails::Agent,
1173
+ action == Guardrails::Action::"call_tool",
1174
+ resource
1175
+ ) when {
1176
+ context.agent_trust_level == "unverified" &&
1177
+ context has mcp_server_verified && context.mcp_server_verified == false
1178
+ };
1179
+
1180
+ // -----------------------------------------------------------------------------
1181
+ // Autonomous Agent Safeguards
1182
+ // -----------------------------------------------------------------------------
1183
+
1184
+ @id("multi-agent-autonomous-tool-risk-cap")
1185
+ @name("Cap tool risk for autonomous agents")
1186
+ @description("Autonomous agents (no human in the loop) have a lower tool risk ceiling. Tools with risk > 70 require human oversight that autonomous agents lack")
1187
+ @severity("high")
1188
+ @tags("profile,multi-agent,autonomous,tools,a2a")
1189
+ forbid (
1190
+ principal is Guardrails::Agent,
1191
+ action == Guardrails::Action::"call_tool",
1192
+ resource
1193
+ ) when {
1194
+ context.agent_type == "autonomous" &&
1195
+ context has tool_risk_score && context.tool_risk_score > 70
1196
+ };
1197
+
1198
+ @id("multi-agent-autonomous-injection-defense")
1199
+ @name("Enhanced injection defense for autonomous agents")
1200
+ @description("Autonomous agents are high-value targets for injection. Lower the threshold to 50 (vs 80 standard) since there is no human to catch false negatives")
1201
+ @severity("high")
1202
+ @tags("profile,multi-agent,autonomous,injection,a2a")
1203
+ forbid (
1204
+ principal is Guardrails::Agent,
1205
+ action == Guardrails::Action::"process_prompt",
1206
+ resource
1207
+ ) when {
1208
+ context.agent_type == "autonomous" &&
1209
+ context has injection_confidence && context.injection_confidence > 50
1210
+ };
1211
+
1212
+ @id("multi-agent-autonomous-jailbreak-defense")
1213
+ @name("Enhanced jailbreak defense for autonomous agents")
1214
+ @description("Lower jailbreak threshold for autonomous agents. Without human review, we must be more conservative")
1215
+ @severity("high")
1216
+ @tags("profile,multi-agent,autonomous,jailbreak,a2a")
1217
+ forbid (
1218
+ principal is Guardrails::Agent,
1219
+ action == Guardrails::Action::"process_prompt",
1220
+ resource
1221
+ ) when {
1222
+ context.agent_type == "autonomous" &&
1223
+ context has jailbreak_confidence && context.jailbreak_confidence > 50
1224
+ };
1225
+
1226
+ // -----------------------------------------------------------------------------
1227
+ // MCP Server Connection Trust
1228
+ // -----------------------------------------------------------------------------
1229
+
1230
+ @id("multi-agent-block-unverified-server-connect")
1231
+ @name("Block unverified agents from connecting to MCP servers")
1232
+ @description("Unverified agents cannot establish new MCP server connections. Limits blast radius of compromised or rogue agents")
1233
+ @severity("high")
1234
+ @tags("profile,multi-agent,trust,mcp,connect,a2a")
1235
+ forbid (
1236
+ principal is Guardrails::Agent,
1237
+ action == Guardrails::Action::"connect_server",
1238
+ resource
1239
+ ) when {
1240
+ context.agent_trust_level == "unverified"
1241
+ };
1242
+ `;
1243
+ const GUARDRAILS_MULTI_AGENT_SAFETY_CEDAR = `// =============================================================================
1244
+ // Multi-Agent Orchestration — Cross-Turn Agent Safety
1245
+ // =============================================================================
1246
+ // Session-aware policies that use cross-turn detection history combined with
1247
+ // agent identity for defense-in-depth. These policies handle the scenario
1248
+ // where one agent in a multi-agent session detects a threat, and subsequent
1249
+ // agents in the same session must be restricted accordingly.
1250
+ //
1251
+ // Key insight: In multi-agent orchestration, Agent A may detect PII in turn 3,
1252
+ // and Agent B (a different agent) arrives in turn 5 wanting to call http_post.
1253
+ // Cross-turn session flags + agent trust level enable this policy:
1254
+ // "If PII was seen AND this agent is unverified → block network tools."
1255
+ //
1256
+ // Context keys used:
1257
+ // - agent_trust_level: String - Trust tier of the current agent
1258
+ // - agent_type: String - Agent classification
1259
+ // - session_pii_detected: Bool - PII seen in any prior turn
1260
+ // - session_pii_types: Set<String> - PII types accumulated
1261
+ // - session_secrets_detected: Bool - Secrets seen in any prior turn
1262
+ // - session_injection_detected: Bool - Injection seen in any prior turn
1263
+ // - session_command_injection: Bool - Command injection in any prior turn
1264
+ // - session_threat_turns: Long - Count of turns with threats
1265
+ // - session_cumulative_risk_score: Long - Total accumulated risk
1266
+ // - tool_name: String - Tool being called
1267
+ // - tool_is_sensitive: Bool - Whether tool is sensitive
1268
+ // - suspicious_pattern: Bool - Whether exfiltration pattern detected
1269
+ //
1270
+ // Category: agent_identity
1271
+ // Namespace: Guardrails
1272
+ // =============================================================================
1273
+
1274
+ // -----------------------------------------------------------------------------
1275
+ // PII Containment — Prevent Agent Data Leakage
1276
+ // -----------------------------------------------------------------------------
1277
+
1278
+ @id("multi-agent-pii-block-network-tools")
1279
+ @name("Block network tools after PII detection for non-first-party agents")
1280
+ @description("If PII was detected in any prior turn, block non-first-party agents from calling network-facing tools. Prevents data exfiltration by untrusted agents in sessions containing sensitive data")
1281
+ @severity("critical")
1282
+ @tags("profile,multi-agent,pii,exfiltration,cross-turn,a2a")
1283
+ forbid (
1284
+ principal is Guardrails::Agent,
1285
+ action == Guardrails::Action::"call_tool",
1286
+ resource
1287
+ ) when {
1288
+ context.agent_trust_level != "first_party" &&
1289
+ context has session_pii_detected && context.session_pii_detected == true &&
1290
+ context has tool_name &&
1291
+ (context.tool_name == "http_post" ||
1292
+ context.tool_name == "send_email" ||
1293
+ context.tool_name == "http_request" ||
1294
+ context.tool_name == "webhook")
1295
+ };
1296
+
1297
+ @id("multi-agent-pii-block-unverified-file-write")
1298
+ @name("Block unverified agents from writing files after PII detection")
1299
+ @description("If PII was detected in the session, unverified agents cannot write files. Prevents PII persistence by untrusted agents")
1300
+ @severity("high")
1301
+ @tags("profile,multi-agent,pii,file-write,cross-turn,a2a")
1302
+ forbid (
1303
+ principal is Guardrails::Agent,
1304
+ action == Guardrails::Action::"write_file",
1305
+ resource
1306
+ ) when {
1307
+ context.agent_trust_level == "unverified" &&
1308
+ context has session_pii_detected && context.session_pii_detected == true
1309
+ };
1310
+
1311
+ // -----------------------------------------------------------------------------
1312
+ // Secrets Containment — Lock Down After Credential Exposure
1313
+ // -----------------------------------------------------------------------------
1314
+
1315
+ @id("multi-agent-secrets-lockdown")
1316
+ @name("Lock down sensitive tools after secrets detection")
1317
+ @description("If secrets (API keys, tokens) were detected in any prior turn, block all non-first-party agents from sensitive tool calls. Prevents credential exfiltration in compromised sessions")
1318
+ @severity("critical")
1319
+ @tags("profile,multi-agent,secrets,lockdown,cross-turn,a2a")
1320
+ forbid (
1321
+ principal is Guardrails::Agent,
1322
+ action == Guardrails::Action::"call_tool",
1323
+ resource
1324
+ ) when {
1325
+ context.agent_trust_level != "first_party" &&
1326
+ context has session_secrets_detected && context.session_secrets_detected == true &&
1327
+ context has tool_is_sensitive && context.tool_is_sensitive == true
1328
+ };
1329
+
1330
+ // -----------------------------------------------------------------------------
1331
+ // Injection Escalation — Tighten After Prior Attacks
1332
+ // -----------------------------------------------------------------------------
1333
+
1334
+ @id("multi-agent-post-injection-lockdown")
1335
+ @name("Restrict unverified agents after injection detection")
1336
+ @description("If injection was detected in any prior turn, block unverified agents from all tool calls. An injection in a prior turn may have poisoned the context, making subsequent unverified agent actions high risk")
1337
+ @severity("critical")
1338
+ @tags("profile,multi-agent,injection,lockdown,cross-turn,a2a")
1339
+ forbid (
1340
+ principal is Guardrails::Agent,
1341
+ action == Guardrails::Action::"call_tool",
1342
+ resource
1343
+ ) when {
1344
+ context.agent_trust_level == "unverified" &&
1345
+ context has session_injection_detected && context.session_injection_detected == true
1346
+ };
1347
+
1348
+ @id("multi-agent-post-command-injection-block-shell")
1349
+ @name("Block all agent shell access after command injection")
1350
+ @description("If command injection was detected in any prior turn, no agent (regardless of trust) can execute shell commands. Defense against persistent shell compromise")
1351
+ @severity("critical")
1352
+ @tags("profile,multi-agent,command-injection,shell,cross-turn,a2a")
1353
+ forbid (
1354
+ principal is Guardrails::Agent,
1355
+ action == Guardrails::Action::"call_tool",
1356
+ resource
1357
+ ) when {
1358
+ context has session_command_injection && context.session_command_injection == true &&
1359
+ context has tool_name &&
1360
+ (context.tool_name == "shell" ||
1361
+ context.tool_name == "execute_command" ||
1362
+ context.tool_name == "bash")
1363
+ };
1364
+
1365
+ // -----------------------------------------------------------------------------
1366
+ // Cumulative Risk — Session-Level Circuit Breaker
1367
+ // -----------------------------------------------------------------------------
1368
+
1369
+ @id("multi-agent-high-cumulative-risk-restrict")
1370
+ @name("Restrict non-first-party agents in high-risk sessions")
1371
+ @description("When cumulative session risk exceeds 200, restrict non-first-party agents to safe tools only. Acts as a circuit breaker for sessions that have accumulated multiple risk signals across turns")
1372
+ @severity("high")
1373
+ @tags("profile,multi-agent,cumulative-risk,circuit-breaker,a2a")
1374
+ forbid (
1375
+ principal is Guardrails::Agent,
1376
+ action == Guardrails::Action::"call_tool",
1377
+ resource
1378
+ ) when {
1379
+ context.agent_trust_level != "first_party" &&
1380
+ context has session_cumulative_risk_score && context.session_cumulative_risk_score > 200 &&
1381
+ context has tool_is_sensitive && context.tool_is_sensitive == true
1382
+ };
1383
+
1384
+ @id("multi-agent-extreme-risk-full-lockdown")
1385
+ @name("Full lockdown for unverified agents in extreme-risk sessions")
1386
+ @description("When cumulative session risk exceeds 500 or more than 5 threat turns are detected, block ALL tool calls from unverified agents. Emergency circuit breaker for compromised sessions")
1387
+ @severity("critical")
1388
+ @tags("profile,multi-agent,extreme-risk,lockdown,a2a")
1389
+ forbid (
1390
+ principal is Guardrails::Agent,
1391
+ action == Guardrails::Action::"call_tool",
1392
+ resource
1393
+ ) when {
1394
+ context.agent_trust_level == "unverified" &&
1395
+ (
1396
+ (context has session_cumulative_risk_score && context.session_cumulative_risk_score > 500) ||
1397
+ (context has session_threat_turns && context.session_threat_turns > 5)
1398
+ )
1399
+ };
1400
+ `;
983
1401
  // =============================================================================
984
1402
  // CATEGORIES
985
1403
  // =============================================================================
@@ -988,6 +1406,7 @@ export const GUARDRAILS_CATEGORIES = [
988
1406
  { id: 'privacy', name: 'Privacy', description: 'Detect and block personally identifiable information (PII) in prompts and responses' },
989
1407
  { id: 'trust_safety', name: 'Trust & Safety', description: 'Detect and block toxic, violent, hateful, sexual, or profane content' },
990
1408
  { id: 'agentic_security', name: 'Agentic Security', description: 'Detect tool abuse, data exfiltration patterns, infinite loops, and budget violations' },
1409
+ { id: 'agent_identity', name: 'Agent-to-Agent Security', description: 'Trust-based access control for AI agents — tiered permissions by trust level, agent type restrictions, cross-turn session lockdowns for multi-agent orchestration' },
991
1410
  { id: 'organization', name: 'Organization', description: 'Organization-wide baselines and default permit/deny policies' },
992
1411
  ];
993
1412
  // =============================================================================
@@ -1074,6 +1493,16 @@ export const GUARDRAILS_DEFAULTS = [
1074
1493
  tags: ['command-injection', 'path-traversal', 'sql-injection', 'security'],
1075
1494
  isActive: true,
1076
1495
  },
1496
+ {
1497
+ id: 'agent-identity-default',
1498
+ name: 'Agent Identity & Trust',
1499
+ description: 'Trust-based access control for AI agents: block unverified agents from dangerous/sensitive tools, apply stricter thresholds for autonomous agents, restrict unverified agents after session threats',
1500
+ category: 'agent_identity',
1501
+ cedarText: GUARDRAILS_AGENT_IDENTITY_DEFAULT_CEDAR,
1502
+ severity: 'critical',
1503
+ tags: ['agent-identity', 'trust', 'a2a', 'autonomous', 'cross-turn'],
1504
+ isActive: true,
1505
+ },
1077
1506
  ];
1078
1507
  // =============================================================================
1079
1508
  // ALL TEMPLATES
@@ -1160,6 +1589,24 @@ export const GUARDRAILS_TEMPLATES = [
1160
1589
  severity: 'critical',
1161
1590
  tags: ['profile', 'data-pipeline', 'exfiltration', 'tools'],
1162
1591
  },
1592
+ {
1593
+ id: 'multi-agent-trust',
1594
+ name: 'Multi-Agent Orchestration — Agent Trust',
1595
+ description: 'Tiered trust policies for multi-agent systems: only first-party agents can use dangerous tools, unverified agents restricted to safe tools, autonomous agents have lower risk ceilings, MCP server connection trust enforcement',
1596
+ category: 'agent_identity',
1597
+ cedarText: GUARDRAILS_MULTI_AGENT_TRUST_CEDAR,
1598
+ severity: 'critical',
1599
+ tags: ['profile', 'multi-agent', 'trust', 'a2a', 'autonomous', 'mcp'],
1600
+ },
1601
+ {
1602
+ id: 'multi-agent-safety',
1603
+ name: 'Multi-Agent Orchestration — Cross-Turn Safety',
1604
+ description: 'Session-aware agent safety policies: PII containment across agents, secrets lockdown, injection escalation response, cumulative risk circuit breakers for multi-agent sessions',
1605
+ category: 'agent_identity',
1606
+ cedarText: GUARDRAILS_MULTI_AGENT_SAFETY_CEDAR,
1607
+ severity: 'critical',
1608
+ tags: ['profile', 'multi-agent', 'cross-turn', 'a2a', 'pii', 'secrets', 'injection', 'circuit-breaker'],
1609
+ },
1163
1610
  ];
1164
1611
  // =============================================================================
1165
1612
  // TEMPLATES METADATA
@@ -1190,6 +1637,11 @@ export const GUARDRAILS_TEMPLATES_JSON = `{
1190
1637
  "name": "Agentic Security",
1191
1638
  "description": "Detect tool abuse, data exfiltration patterns, infinite loops, and budget violations"
1192
1639
  },
1640
+ {
1641
+ "id": "agent_identity",
1642
+ "name": "Agent-to-Agent Security",
1643
+ "description": "Trust-based access control for AI agents — tiered permissions by trust level, agent type restrictions, cross-turn session lockdowns for multi-agent orchestration"
1644
+ },
1193
1645
  {
1194
1646
  "id": "organization",
1195
1647
  "name": "Organization",
@@ -1276,6 +1728,16 @@ export const GUARDRAILS_TEMPLATES_JSON = `{
1276
1728
  "severity": "critical",
1277
1729
  "tags": ["command-injection", "path-traversal", "sql-injection", "security"],
1278
1730
  "is_active": true
1731
+ },
1732
+ {
1733
+ "id": "agent-identity-default",
1734
+ "name": "Agent Identity & Trust",
1735
+ "description": "Trust-based access control for AI agents: block unverified agents from dangerous/sensitive tools, apply stricter thresholds for autonomous agents, restrict unverified agents after session threats",
1736
+ "category": "agent_identity",
1737
+ "file": "defaults/agent_identity.cedar",
1738
+ "severity": "critical",
1739
+ "tags": ["agent-identity", "trust", "a2a", "autonomous", "cross-turn"],
1740
+ "is_active": true
1279
1741
  }
1280
1742
  ],
1281
1743
  "templates": [
@@ -1359,6 +1821,24 @@ export const GUARDRAILS_TEMPLATES_JSON = `{
1359
1821
  "file": "profiles/data_pipeline/agentic_security.cedar",
1360
1822
  "severity": "critical",
1361
1823
  "tags": ["profile", "data-pipeline", "exfiltration", "tools"]
1824
+ },
1825
+ {
1826
+ "id": "multi-agent-trust",
1827
+ "name": "Multi-Agent Orchestration — Agent Trust",
1828
+ "description": "Tiered trust policies for multi-agent systems: only first-party agents can use dangerous tools, unverified agents restricted to safe tools, autonomous agents have lower risk ceilings, MCP server connection trust enforcement",
1829
+ "category": "agent_identity",
1830
+ "file": "profiles/multi_agent/agent_trust.cedar",
1831
+ "severity": "critical",
1832
+ "tags": ["profile", "multi-agent", "trust", "a2a", "autonomous", "mcp"]
1833
+ },
1834
+ {
1835
+ "id": "multi-agent-safety",
1836
+ "name": "Multi-Agent Orchestration — Cross-Turn Safety",
1837
+ "description": "Session-aware agent safety policies: PII containment across agents, secrets lockdown, injection escalation response, cumulative risk circuit breakers for multi-agent sessions",
1838
+ "category": "agent_identity",
1839
+ "file": "profiles/multi_agent/agent_safety.cedar",
1840
+ "severity": "critical",
1841
+ "tags": ["profile", "multi-agent", "cross-turn", "a2a", "pii", "secrets", "injection", "circuit-breaker"]
1362
1842
  }
1363
1843
  ],
1364
1844
  "profiles": [
@@ -1385,6 +1865,14 @@ export const GUARDRAILS_TEMPLATES_JSON = `{
1385
1865
  "severity": "critical",
1386
1866
  "tags": ["data-pipeline", "pii", "secrets", "exfiltration"],
1387
1867
  "template_ids": ["data-pipeline-privacy", "data-pipeline-security", "data-pipeline-agentic-security"]
1868
+ },
1869
+ {
1870
+ "id": "multi-agent",
1871
+ "name": "Multi-Agent Orchestration",
1872
+ "description": "Production-grade A2A guardrails for multi-agent systems — tiered trust access control, autonomous agent safeguards, cross-turn PII/secrets containment, injection escalation response, cumulative risk circuit breakers",
1873
+ "severity": "critical",
1874
+ "tags": ["multi-agent", "a2a", "trust", "cross-turn", "circuit-breaker"],
1875
+ "template_ids": ["multi-agent-trust", "multi-agent-safety"]
1388
1876
  }
1389
1877
  ]
1390
1878
  }