@highflame/policy 2.1.36 → 2.1.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/ai_gateway/context.json +431 -11
- package/_schemas/ai_gateway/schema.cedarschema +91 -11
- package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
- package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
- package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
- package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
- package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
- package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
- package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
- package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
- package/_schemas/ai_gateway/templates/templates.json +42 -47
- package/_schemas/guardrails/context.json +12 -12
- package/_schemas/guardrails/schema.cedarschema +12 -12
- package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
- package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
- package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
- package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
- package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
- package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
- package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
- package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
- package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
- package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
- package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
- package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
- package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
- package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
- package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
- package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
- package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
- package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
- package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
- package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
- package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
- package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
- package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
- package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
- package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
- package/_schemas/guardrails/templates/templates.json +188 -210
- package/_schemas/overwatch/context.json +14 -14
- package/_schemas/overwatch/schema.cedarschema +12 -12
- package/_schemas/sentry/context.json +11 -11
- package/_schemas/sentry/schema.cedarschema +11 -11
- package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
- package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
- package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
- package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
- package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
- package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
- package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
- package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
- package/_schemas/sentry/templates/templates.json +46 -46
- package/dist/ai_gateway-context.gen.d.ts +18 -4
- package/dist/ai_gateway-context.gen.js +18 -4
- package/dist/ai_gateway-defaults.gen.d.ts +1 -1
- package/dist/ai_gateway-defaults.gen.js +377 -313
- package/dist/guardrails-context.gen.d.ts +5 -5
- package/dist/guardrails-context.gen.js +5 -5
- package/dist/guardrails-defaults.gen.d.ts +1 -1
- package/dist/guardrails-defaults.gen.js +2070 -1849
- package/dist/overwatch-context.gen.d.ts +5 -5
- package/dist/overwatch-context.gen.js +5 -5
- package/dist/overwatch-defaults.gen.d.ts +1 -1
- package/dist/overwatch-defaults.gen.js +547 -573
- package/dist/sentry-context.gen.d.ts +3 -3
- package/dist/sentry-context.gen.js +3 -3
- package/dist/sentry-defaults.gen.d.ts +1 -1
- package/dist/sentry-defaults.gen.js +379 -460
- package/dist/service-schemas.gen.d.ts +4 -4
- package/dist/service-schemas.gen.js +249 -99
- package/package.json +1 -1
|
@@ -1,76 +1,86 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Toxicity & Content Moderation
|
|
2
|
+
// Toxicity & Content Moderation (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Blocks toxic, harmful, or inappropriate content
|
|
5
|
-
//
|
|
4
|
+
// Blocks toxic, harmful, or inappropriate content across the six standard
|
|
5
|
+
// toxicity dimensions. Three rules: a strict critical-threshold catch-all,
|
|
6
|
+
// a hate-speech-specific lower threshold, and a "any two moderate signals"
|
|
7
|
+
// combined-toxicity rule.
|
|
6
8
|
//
|
|
7
|
-
// Context keys
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
9
|
+
// Context keys consumed:
|
|
10
|
+
// - violence_score: Long (0-100)
|
|
11
|
+
// - hate_speech_score: Long (0-100)
|
|
12
|
+
// - sexual_score: Long (0-100)
|
|
13
|
+
// - weapons_score: Long (0-100)
|
|
14
|
+
// - crime_score: Long (0-100)
|
|
15
|
+
// - profanity_score: Long (0-100)
|
|
14
16
|
//
|
|
15
|
-
//
|
|
17
|
+
// Compliance:
|
|
18
|
+
// - EU AI Act, ISO 42001, NIST SI-4
|
|
19
|
+
//
|
|
20
|
+
// Category: trust-safety
|
|
16
21
|
// Namespace: Guardrails
|
|
17
22
|
// =============================================================================
|
|
18
23
|
|
|
19
|
-
@id("
|
|
24
|
+
@id("trust-safety.block-toxicity-critical")
|
|
20
25
|
@name("Block critical toxicity")
|
|
21
|
-
@description("
|
|
26
|
+
@description("Blocks process_prompt when any toxicity score is >= 91.")
|
|
22
27
|
@severity("critical")
|
|
23
|
-
@tags("
|
|
28
|
+
@tags("category:trust-safety,threat:harmful,detection:ml,compliance:eu-ai-act,compliance:iso-42001")
|
|
29
|
+
@reject_message("Prompt blocked: critical-severity toxic content detected.")
|
|
24
30
|
forbid (
|
|
25
31
|
principal,
|
|
26
32
|
action == Guardrails::Action::"process_prompt",
|
|
27
33
|
resource
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
(context has
|
|
31
|
-
(context has
|
|
32
|
-
(context has
|
|
33
|
-
(context has
|
|
34
|
-
(context has
|
|
34
|
+
)
|
|
35
|
+
when {
|
|
36
|
+
(context has violence_score && context.violence_score >= 91) ||
|
|
37
|
+
(context has hate_speech_score && context.hate_speech_score >= 91) ||
|
|
38
|
+
(context has sexual_score && context.sexual_score >= 91) ||
|
|
39
|
+
(context has weapons_score && context.weapons_score >= 91) ||
|
|
40
|
+
(context has crime_score && context.crime_score >= 91) ||
|
|
41
|
+
(context has profanity_score && context.profanity_score >= 91)
|
|
35
42
|
};
|
|
36
43
|
|
|
37
|
-
@id("
|
|
44
|
+
@id("trust-safety.block-hate-speech")
|
|
38
45
|
@name("Block hate speech")
|
|
39
|
-
@description("
|
|
46
|
+
@description("Blocks process_prompt when hate_speech_score >= 76.")
|
|
40
47
|
@severity("high")
|
|
41
|
-
@tags("hate-speech,
|
|
48
|
+
@tags("category:trust-safety,threat:hate-speech,detection:ml,compliance:eu-ai-act")
|
|
49
|
+
@reject_message("Prompt blocked: hate speech detected.")
|
|
42
50
|
forbid (
|
|
43
51
|
principal,
|
|
44
52
|
action == Guardrails::Action::"process_prompt",
|
|
45
53
|
resource
|
|
46
|
-
)
|
|
47
|
-
|
|
54
|
+
)
|
|
55
|
+
when {
|
|
56
|
+
context has hate_speech_score && context.hate_speech_score >= 76
|
|
48
57
|
};
|
|
49
58
|
|
|
50
|
-
@id("
|
|
51
|
-
@name("Block combined
|
|
52
|
-
@description("
|
|
59
|
+
@id("trust-safety.block-toxicity-combined")
|
|
60
|
+
@name("Block combined toxicity")
|
|
61
|
+
@description("Blocks process_prompt when any two toxicity scores are >= 61.")
|
|
53
62
|
@severity("high")
|
|
54
|
-
@tags("
|
|
63
|
+
@tags("category:trust-safety,threat:harmful,detection:aggregate")
|
|
64
|
+
@reject_message("Prompt blocked: multiple moderate toxicity signals were detected together.")
|
|
55
65
|
forbid (
|
|
56
66
|
principal,
|
|
57
67
|
action == Guardrails::Action::"process_prompt",
|
|
58
68
|
resource
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
(context has violence_score
|
|
62
|
-
(context has violence_score
|
|
63
|
-
(context has violence_score
|
|
64
|
-
(context has violence_score
|
|
65
|
-
(context has violence_score
|
|
66
|
-
(context has hate_speech_score && context has sexual_score
|
|
67
|
-
(context has hate_speech_score && context has weapons_score
|
|
68
|
-
(context has hate_speech_score && context has crime_score
|
|
69
|
-
(context has hate_speech_score && context has profanity_score
|
|
70
|
-
(context has sexual_score
|
|
71
|
-
(context has sexual_score
|
|
72
|
-
(context has sexual_score
|
|
73
|
-
(context has weapons_score
|
|
74
|
-
(context has weapons_score
|
|
75
|
-
(context has crime_score
|
|
69
|
+
)
|
|
70
|
+
when {
|
|
71
|
+
(context has violence_score && context has hate_speech_score && context.violence_score >= 61 && context.hate_speech_score >= 61) ||
|
|
72
|
+
(context has violence_score && context has sexual_score && context.violence_score >= 61 && context.sexual_score >= 61) ||
|
|
73
|
+
(context has violence_score && context has weapons_score && context.violence_score >= 61 && context.weapons_score >= 61) ||
|
|
74
|
+
(context has violence_score && context has crime_score && context.violence_score >= 61 && context.crime_score >= 61) ||
|
|
75
|
+
(context has violence_score && context has profanity_score && context.violence_score >= 61 && context.profanity_score >= 61) ||
|
|
76
|
+
(context has hate_speech_score && context has sexual_score && context.hate_speech_score >= 61 && context.sexual_score >= 61) ||
|
|
77
|
+
(context has hate_speech_score && context has weapons_score && context.hate_speech_score >= 61 && context.weapons_score >= 61) ||
|
|
78
|
+
(context has hate_speech_score && context has crime_score && context.hate_speech_score >= 61 && context.crime_score >= 61) ||
|
|
79
|
+
(context has hate_speech_score && context has profanity_score && context.hate_speech_score >= 61 && context.profanity_score >= 61) ||
|
|
80
|
+
(context has sexual_score && context has weapons_score && context.sexual_score >= 61 && context.weapons_score >= 61) ||
|
|
81
|
+
(context has sexual_score && context has crime_score && context.sexual_score >= 61 && context.crime_score >= 61) ||
|
|
82
|
+
(context has sexual_score && context has profanity_score && context.sexual_score >= 61 && context.profanity_score >= 61) ||
|
|
83
|
+
(context has weapons_score && context has crime_score && context.weapons_score >= 61 && context.crime_score >= 61) ||
|
|
84
|
+
(context has weapons_score && context has profanity_score && context.weapons_score >= 61 && context.profanity_score >= 61) ||
|
|
85
|
+
(context has crime_score && context has profanity_score && context.crime_score >= 61 && context.profanity_score >= 61)
|
|
76
86
|
};
|
|
@@ -1,86 +1,103 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// MCP Tool Permissions
|
|
2
|
+
// MCP Tool Permissions
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Per-tool access control for MCP servers
|
|
5
|
-
//
|
|
4
|
+
// Per-tool access control for MCP servers — fine-grained permissioning that
|
|
5
|
+
// allows specific tools from a server while denying others.
|
|
6
6
|
//
|
|
7
|
-
//
|
|
7
|
+
// Bundled rules cover:
|
|
8
|
+
// - GitHub MCP example: permit read tools, block write tools
|
|
9
|
+
// - Org-wide exclusion list (untrusted / deprecated servers)
|
|
10
|
+
// - Unverified MCP server block
|
|
8
11
|
//
|
|
9
|
-
// Context keys
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
// - mcp_server_verified: Bool - Whether server is verified
|
|
12
|
+
// Context keys consumed:
|
|
13
|
+
// - mcp_server: String
|
|
14
|
+
// - tool_name: String
|
|
15
|
+
// - mcp_server_verified: Bool
|
|
14
16
|
//
|
|
15
|
-
// Category:
|
|
17
|
+
// Category: tools
|
|
16
18
|
// Namespace: Guardrails
|
|
17
19
|
// =============================================================================
|
|
18
20
|
|
|
19
|
-
//
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Section 1: Example — GitHub MCP read/write split
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
20
24
|
|
|
21
|
-
@id("
|
|
22
|
-
@name("Allow read
|
|
23
|
-
@description("
|
|
25
|
+
@id("tools.allow-github-mcp-read")
|
|
26
|
+
@name("Allow GitHub MCP read tools")
|
|
27
|
+
@description("Permits call_tool when mcp_server is \"github\" and tool_name is a read-only operation.")
|
|
24
28
|
@severity("medium")
|
|
25
|
-
@tags("
|
|
29
|
+
@tags("category:tools,scope:per-tool,surface:call-tool,posture:deny-default")
|
|
26
30
|
permit (
|
|
27
31
|
principal,
|
|
28
32
|
action == Guardrails::Action::"call_tool",
|
|
29
33
|
resource
|
|
30
|
-
)
|
|
34
|
+
)
|
|
35
|
+
when {
|
|
31
36
|
context has mcp_server && context.mcp_server == "github" &&
|
|
32
37
|
context has tool_name &&
|
|
33
|
-
(
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
(
|
|
39
|
+
context.tool_name == "read_issues" ||
|
|
40
|
+
context.tool_name == "get_issue" ||
|
|
41
|
+
context.tool_name == "list_repos" ||
|
|
42
|
+
context.tool_name == "get_pull_request" ||
|
|
43
|
+
context.tool_name == "search_code" ||
|
|
44
|
+
context.tool_name == "get_file_contents"
|
|
45
|
+
)
|
|
39
46
|
};
|
|
40
47
|
|
|
41
|
-
@id("
|
|
42
|
-
@name("
|
|
43
|
-
@description("
|
|
48
|
+
@id("tools.block-github-mcp-write")
|
|
49
|
+
@name("Block GitHub MCP write tools")
|
|
50
|
+
@description("Blocks call_tool when mcp_server is \"github\"; combine with the read-allow permit above.")
|
|
44
51
|
@severity("high")
|
|
45
|
-
@tags("
|
|
52
|
+
@tags("category:tools,scope:per-tool,surface:call-tool,posture:deny-default")
|
|
53
|
+
@reject_message("Tool execution blocked: write operations against the GitHub MCP server are not permitted.")
|
|
46
54
|
forbid (
|
|
47
55
|
principal,
|
|
48
56
|
action == Guardrails::Action::"call_tool",
|
|
49
57
|
resource
|
|
50
|
-
)
|
|
58
|
+
)
|
|
59
|
+
when {
|
|
51
60
|
context has mcp_server && context.mcp_server == "github"
|
|
52
61
|
};
|
|
53
62
|
|
|
54
|
-
//
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Section 2: Org-wide exclusion list
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
55
66
|
|
|
56
|
-
@id("mcp-
|
|
57
|
-
@name("
|
|
58
|
-
@description("
|
|
67
|
+
@id("tools.exclude-mcp-servers")
|
|
68
|
+
@name("Block excluded MCP servers")
|
|
69
|
+
@description("Blocks call_tool when mcp_server is in the org-wide exclusion list.")
|
|
59
70
|
@severity("critical")
|
|
60
|
-
@tags("
|
|
71
|
+
@tags("category:tools,scope:org-wide,surface:call-tool,posture:deny-default")
|
|
72
|
+
@reject_message("Tool execution blocked: MCP server is on the org-wide exclusion list.")
|
|
61
73
|
forbid (
|
|
62
74
|
principal,
|
|
63
75
|
action == Guardrails::Action::"call_tool",
|
|
64
76
|
resource
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
// Modify this list to match your exclusion requirements.
|
|
77
|
+
)
|
|
78
|
+
when {
|
|
68
79
|
context has mcp_server &&
|
|
69
|
-
(
|
|
70
|
-
|
|
80
|
+
(
|
|
81
|
+
context.mcp_server == "untrusted-server" ||
|
|
82
|
+
context.mcp_server == "deprecated-server"
|
|
83
|
+
)
|
|
71
84
|
};
|
|
72
85
|
|
|
73
|
-
//
|
|
86
|
+
// ---------------------------------------------------------------------------
|
|
87
|
+
// Section 3: Unverified server block
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
74
89
|
|
|
75
|
-
@id("
|
|
76
|
-
@name("Block unverified MCP
|
|
77
|
-
@description("
|
|
90
|
+
@id("tools.block-unverified-mcp-tools")
|
|
91
|
+
@name("Block unverified MCP server tools")
|
|
92
|
+
@description("Blocks call_tool when mcp_server_verified is false.")
|
|
78
93
|
@severity("high")
|
|
79
|
-
@tags("
|
|
94
|
+
@tags("category:tools,threat:supply-chain,detection:rule,surface:call-tool")
|
|
95
|
+
@reject_message("Tool execution blocked: MCP server is not from a verified registry.")
|
|
80
96
|
forbid (
|
|
81
97
|
principal,
|
|
82
98
|
action == Guardrails::Action::"call_tool",
|
|
83
99
|
resource
|
|
84
|
-
)
|
|
100
|
+
)
|
|
101
|
+
when {
|
|
85
102
|
context has mcp_server_verified && context.mcp_server_verified == false
|
|
86
103
|
};
|
|
@@ -1,43 +1,33 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
2
|
// A2A Security — Cross-Origin Trust Boundary Enforcement
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Detects and blocks confused
|
|
5
|
-
// domain attempts to operate in another.
|
|
6
|
-
//
|
|
7
|
-
// - Mixed-security tool chains span trust boundaries
|
|
8
|
-
// - URL injection redirects agent communication to untrusted origins
|
|
4
|
+
// Detects and blocks confused-deputy attacks where an agent from one trust
|
|
5
|
+
// domain attempts to operate in another. Primary signal in A2A
|
|
6
|
+
// communication where independent agents cross separate trust domains.
|
|
9
7
|
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
8
|
+
// Shield cross-origin detector scores (discrete):
|
|
9
|
+
// 90 mixed localhost + external | 85 URL injection | 80 proxy/redirect
|
|
10
|
+
// 75 multi-origin tool configs | 70 mixed HTTP/HTTPS | 65 JSON multi-URL
|
|
11
|
+
// 60 generic multi-domain patterns
|
|
14
12
|
//
|
|
15
|
-
//
|
|
16
|
-
//
|
|
17
|
-
//
|
|
18
|
-
//
|
|
19
|
-
// 75 — multi-origin tool configs / JSON origin fields
|
|
20
|
-
// 70 — mixed HTTP/HTTPS or ws/wss schemes
|
|
21
|
-
// 65 — JSON arrays with multiple URLs
|
|
22
|
-
// 60 — generic multi-domain patterns
|
|
13
|
+
// Context keys consumed:
|
|
14
|
+
// - cross_origin_detected, cross_origin_score
|
|
15
|
+
// - agent_id, agent_trust_level
|
|
16
|
+
// - tool_is_sensitive
|
|
23
17
|
//
|
|
24
18
|
// Compliance:
|
|
25
|
-
// OWASP LLM08
|
|
26
|
-
// OWASP ASI03 (Excessive Permissions)
|
|
27
|
-
// MITRE ATLAS AML.T0051.002 (Indirect Prompt Injection via delegation)
|
|
28
|
-
// NIST 800-53 AC-4 (Information Flow Enforcement)
|
|
19
|
+
// - OWASP LLM08, OWASP ASI03; NIST 800-53 AC-4
|
|
29
20
|
//
|
|
30
|
-
// Category:
|
|
21
|
+
// Category: agent-identity
|
|
31
22
|
// Namespace: Guardrails
|
|
32
23
|
// =============================================================================
|
|
33
24
|
|
|
34
|
-
|
|
35
|
-
@id("a2a-cross-origin-block-critical")
|
|
25
|
+
@id("agent-identity.a2a-block-cross-origin-critical")
|
|
36
26
|
@name("Block critical cross-origin from any agent")
|
|
37
|
-
@description("
|
|
27
|
+
@description("Blocks process_prompt and call_tool when cross_origin_detected is true and cross_origin_score >= 80.")
|
|
38
28
|
@severity("critical")
|
|
39
|
-
@tags("
|
|
40
|
-
@reject_message("Request blocked: high-confidence cross-origin trust
|
|
29
|
+
@tags("category:agent-identity,threat:supply-chain,detection:ml,owasp:llm08")
|
|
30
|
+
@reject_message("Request blocked: high-confidence cross-origin trust-boundary violation detected — confused-deputy attack.")
|
|
41
31
|
forbid (
|
|
42
32
|
principal is Guardrails::Agent,
|
|
43
33
|
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
@@ -49,13 +39,12 @@ when {
|
|
|
49
39
|
context has cross_origin_score && context.cross_origin_score >= 80
|
|
50
40
|
};
|
|
51
41
|
|
|
52
|
-
|
|
53
|
-
@id("a2a-cross-origin-block-unverified")
|
|
42
|
+
@id("agent-identity.a2a-block-cross-origin-unverified")
|
|
54
43
|
@name("Block cross-origin from unverified agents")
|
|
55
|
-
@description("
|
|
44
|
+
@description("Blocks process_prompt and call_tool when agent_trust_level is \"unverified\", cross_origin_detected is true, and cross_origin_score >= 60.")
|
|
56
45
|
@severity("high")
|
|
57
|
-
@tags("
|
|
58
|
-
@reject_message("Request blocked: cross-origin activity
|
|
46
|
+
@tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:ml,owasp:asi03")
|
|
47
|
+
@reject_message("Request blocked: unverified agent attempted cross-origin activity — no trust attestation across trust domains.")
|
|
59
48
|
forbid (
|
|
60
49
|
principal is Guardrails::Agent,
|
|
61
50
|
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
@@ -67,13 +56,12 @@ when {
|
|
|
67
56
|
context has cross_origin_score && context.cross_origin_score >= 60
|
|
68
57
|
};
|
|
69
58
|
|
|
70
|
-
|
|
71
|
-
@id("a2a-cross-origin-block-server-connect")
|
|
59
|
+
@id("agent-identity.a2a-block-cross-origin-server-connect")
|
|
72
60
|
@name("Block cross-origin MCP server connections")
|
|
73
|
-
@description("
|
|
61
|
+
@description("Blocks connect_server when agent_trust_level is not \"first_party\", cross_origin_detected is true, and cross_origin_score >= 65.")
|
|
74
62
|
@severity("critical")
|
|
75
|
-
@tags("
|
|
76
|
-
@reject_message("MCP server connection blocked:
|
|
63
|
+
@tags("category:agent-identity,threat:supply-chain,scope:per-agent,detection:ml,surface:connect-server")
|
|
64
|
+
@reject_message("MCP server connection blocked: non-first-party agent crossed trust boundaries during connection — supply-chain risk.")
|
|
77
65
|
forbid (
|
|
78
66
|
principal is Guardrails::Agent,
|
|
79
67
|
action == Guardrails::Action::"connect_server",
|
|
@@ -85,13 +73,12 @@ when {
|
|
|
85
73
|
context has cross_origin_score && context.cross_origin_score >= 65
|
|
86
74
|
};
|
|
87
75
|
|
|
88
|
-
|
|
89
|
-
@id("a2a-cross-origin-block-sensitive-tools")
|
|
76
|
+
@id("agent-identity.a2a-block-cross-origin-sensitive-tools")
|
|
90
77
|
@name("Block cross-origin on sensitive tools")
|
|
91
|
-
@description("
|
|
78
|
+
@description("Blocks call_tool when cross_origin_detected is true, cross_origin_score >= 60, and tool_is_sensitive is true.")
|
|
92
79
|
@severity("high")
|
|
93
|
-
@tags("
|
|
94
|
-
@reject_message("Sensitive tool execution blocked: cross-origin
|
|
80
|
+
@tags("category:agent-identity,threat:supply-chain,detection:ml,surface:call-tool,owasp:llm08")
|
|
81
|
+
@reject_message("Sensitive tool execution blocked: cross-origin signal present — sensitive tools amplify confused-deputy impact.")
|
|
95
82
|
forbid (
|
|
96
83
|
principal is Guardrails::Agent,
|
|
97
84
|
action == Guardrails::Action::"call_tool",
|
|
@@ -1,47 +1,36 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
2
|
// A2A Security — Escalation Detection & Circuit Breakers
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Detects progressive capability escalation across agent turns
|
|
5
|
-
//
|
|
4
|
+
// Detects progressive capability escalation across agent turns. Lower
|
|
5
|
+
// circuit-breaker thresholds than the multi-agent profile because A2A
|
|
6
|
+
// communication spans separate trust domains with no shared orchestrator
|
|
7
|
+
// to reset session state.
|
|
6
8
|
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
// Lower threshold than MAS (200) because cross-system communication has
|
|
14
|
-
// higher adversarial surface area.
|
|
15
|
-
//
|
|
16
|
-
// 3. THREAT TURN ESCALATION: After 3+ turns with detected threats, unverified
|
|
17
|
-
// agents are fully locked out. This catches turn-by-turn probing attacks
|
|
18
|
-
// where an attacker incrementally tests boundaries.
|
|
19
|
-
//
|
|
20
|
-
// Key A2A distinction: In MAS, the orchestrator can reset or contain sessions.
|
|
21
|
-
// In A2A, independent agents have no shared circuit breaker, so policy must
|
|
22
|
-
// enforce escalation detection at the evaluation layer.
|
|
9
|
+
// Context keys consumed:
|
|
10
|
+
// - agent_trust_level
|
|
11
|
+
// - session_max_injection_score, session_max_jailbreak_score: Long (0-100)
|
|
12
|
+
// - session_cumulative_risk_score: Long
|
|
13
|
+
// - session_threat_turns: Long
|
|
14
|
+
// - tool_is_sensitive: Bool
|
|
23
15
|
//
|
|
24
16
|
// Compliance:
|
|
25
|
-
// OWASP LLM01 (
|
|
26
|
-
//
|
|
27
|
-
// NIST 800-53 SI-4 (System Monitoring)
|
|
28
|
-
// NIST 800-53 IR-4 (Incident Handling)
|
|
17
|
+
// - OWASP LLM01 (multi-turn); MITRE ATLAS AML.T0051
|
|
18
|
+
// - NIST 800-53 SI-4, IR-4
|
|
29
19
|
//
|
|
30
|
-
// Category:
|
|
20
|
+
// Category: agent-identity
|
|
31
21
|
// Namespace: Guardrails
|
|
32
22
|
// =============================================================================
|
|
33
23
|
|
|
34
|
-
//
|
|
35
|
-
// Session
|
|
36
|
-
//
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Section 1: Session peak score monitoring
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
37
27
|
|
|
38
|
-
|
|
39
|
-
@
|
|
40
|
-
@
|
|
41
|
-
@description("Block non-first-party agents from tool calls and prompt processing when the maximum injection score across all prior turns exceeds 70. Unlike MAS policies that use boolean session_injection_detected, A2A uses the numeric peak score for graduated response — a session with a prior score of 75 is more dangerous than one with 45, even though both set the boolean flag.")
|
|
28
|
+
@id("agent-identity.a2a-block-session-injection-peak")
|
|
29
|
+
@name("Block agents at high session injection peak")
|
|
30
|
+
@description("Blocks call_tool and process_prompt when agent_trust_level is not \"first_party\" and session_max_injection_score >= 70.")
|
|
42
31
|
@severity("critical")
|
|
43
|
-
@tags("
|
|
44
|
-
@reject_message("Request blocked: this session
|
|
32
|
+
@tags("category:agent-identity,threat:injection,scope:per-agent,detection:aggregate,owasp:llm01")
|
|
33
|
+
@reject_message("Request blocked: this session previously crossed the injection peak threshold; non-first-party agents are restricted.")
|
|
45
34
|
forbid (
|
|
46
35
|
principal is Guardrails::Agent,
|
|
47
36
|
action in [Guardrails::Action::"call_tool", Guardrails::Action::"process_prompt"],
|
|
@@ -52,13 +41,12 @@ when {
|
|
|
52
41
|
context has session_max_injection_score && context.session_max_injection_score >= 70
|
|
53
42
|
};
|
|
54
43
|
|
|
55
|
-
|
|
56
|
-
@
|
|
57
|
-
@
|
|
58
|
-
@description("Block non-first-party agents from prompt processing when the maximum jailbreak score across all prior turns exceeds 70. A high jailbreak peak indicates the session's guardrails may have been partially eroded — subsequent agent requests in this session should be treated as potentially manipulated.")
|
|
44
|
+
@id("agent-identity.a2a-block-session-jailbreak-peak")
|
|
45
|
+
@name("Block agents at high session jailbreak peak")
|
|
46
|
+
@description("Blocks process_prompt when agent_trust_level is not \"first_party\" and session_max_jailbreak_score >= 70.")
|
|
59
47
|
@severity("critical")
|
|
60
|
-
@tags("
|
|
61
|
-
@reject_message("Request blocked: this session
|
|
48
|
+
@tags("category:agent-identity,threat:jailbreak,scope:per-agent,detection:aggregate,owasp:llm02")
|
|
49
|
+
@reject_message("Request blocked: this session previously crossed the jailbreak peak threshold; non-first-party agents are restricted.")
|
|
62
50
|
forbid (
|
|
63
51
|
principal is Guardrails::Agent,
|
|
64
52
|
action == Guardrails::Action::"process_prompt",
|
|
@@ -69,17 +57,16 @@ when {
|
|
|
69
57
|
context has session_max_jailbreak_score && context.session_max_jailbreak_score >= 70
|
|
70
58
|
};
|
|
71
59
|
|
|
72
|
-
//
|
|
73
|
-
// Cumulative
|
|
74
|
-
//
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
// Section 2: Cumulative risk circuit breaker
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
75
63
|
|
|
76
|
-
|
|
77
|
-
@
|
|
78
|
-
@
|
|
79
|
-
@description("Block non-first-party agents from sensitive tools when cumulative session risk exceeds 150. Lower threshold than MAS (200) because A2A sessions with cross-system agents should trip the circuit breaker earlier — independent agents cannot coordinate session recovery the way an orchestrator can.")
|
|
64
|
+
@id("agent-identity.a2a-cumulative-risk-lockdown")
|
|
65
|
+
@name("Block non-first-party agents at cumulative risk")
|
|
66
|
+
@description("Blocks call_tool when agent_trust_level is not \"first_party\", session_cumulative_risk_score >= 151, and tool_is_sensitive is true.")
|
|
80
67
|
@severity("high")
|
|
81
|
-
@tags("
|
|
82
|
-
@reject_message("
|
|
68
|
+
@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool,posture:catch-all")
|
|
69
|
+
@reject_message("Tool execution blocked: cumulative session risk exceeded the A2A circuit-breaker threshold for sensitive tools.")
|
|
83
70
|
forbid (
|
|
84
71
|
principal is Guardrails::Agent,
|
|
85
72
|
action == Guardrails::Action::"call_tool",
|
|
@@ -87,21 +74,20 @@ forbid (
|
|
|
87
74
|
)
|
|
88
75
|
when {
|
|
89
76
|
context has agent_trust_level && context.agent_trust_level != "first_party" &&
|
|
90
|
-
context has session_cumulative_risk_score && context.session_cumulative_risk_score
|
|
77
|
+
context has session_cumulative_risk_score && context.session_cumulative_risk_score >= 151 &&
|
|
91
78
|
context has tool_is_sensitive && context.tool_is_sensitive == true
|
|
92
79
|
};
|
|
93
80
|
|
|
94
|
-
//
|
|
95
|
-
//
|
|
96
|
-
//
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
// Section 3: Threat turn escalation
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
97
84
|
|
|
98
|
-
|
|
99
|
-
@id("a2a-threat-turn-escalation-block")
|
|
85
|
+
@id("agent-identity.a2a-threat-turn-escalation")
|
|
100
86
|
@name("Block unverified agents after repeated threat turns")
|
|
101
|
-
@description("
|
|
87
|
+
@description("Blocks call_tool when agent_trust_level is \"unverified\" and session_threat_turns >= 3.")
|
|
102
88
|
@severity("critical")
|
|
103
|
-
@tags("
|
|
104
|
-
@reject_message("Tool execution blocked: 3+ threat turns
|
|
89
|
+
@tags("category:agent-identity,scope:per-agent,detection:aggregate,surface:call-tool,posture:catch-all")
|
|
90
|
+
@reject_message("Tool execution blocked: 3+ threat turns in this session — unverified agents are locked out to stop adversarial probing.")
|
|
105
91
|
forbid (
|
|
106
92
|
principal is Guardrails::Agent,
|
|
107
93
|
action == Guardrails::Action::"call_tool",
|
|
@@ -109,5 +95,5 @@ forbid (
|
|
|
109
95
|
)
|
|
110
96
|
when {
|
|
111
97
|
context has agent_trust_level && context.agent_trust_level == "unverified" &&
|
|
112
|
-
context has session_threat_turns && context.session_threat_turns
|
|
98
|
+
context has session_threat_turns && context.session_threat_turns >= 3
|
|
113
99
|
};
|