@highflame/policy 2.1.36 → 2.1.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/ai_gateway/context.json +431 -11
- package/_schemas/ai_gateway/schema.cedarschema +91 -11
- package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
- package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
- package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
- package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
- package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
- package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
- package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
- package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
- package/_schemas/ai_gateway/templates/templates.json +42 -47
- package/_schemas/guardrails/context.json +12 -12
- package/_schemas/guardrails/schema.cedarschema +12 -12
- package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
- package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
- package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
- package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
- package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
- package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
- package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
- package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
- package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
- package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
- package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
- package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
- package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
- package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
- package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
- package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
- package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
- package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
- package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
- package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
- package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
- package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
- package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
- package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
- package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
- package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
- package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
- package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
- package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
- package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
- package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
- package/_schemas/guardrails/templates/templates.json +188 -210
- package/_schemas/overwatch/context.json +14 -14
- package/_schemas/overwatch/schema.cedarschema +12 -12
- package/_schemas/sentry/context.json +11 -11
- package/_schemas/sentry/schema.cedarschema +11 -11
- package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
- package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
- package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
- package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
- package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
- package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
- package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
- package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
- package/_schemas/sentry/templates/templates.json +46 -46
- package/dist/ai_gateway-context.gen.d.ts +18 -4
- package/dist/ai_gateway-context.gen.js +18 -4
- package/dist/ai_gateway-defaults.gen.d.ts +1 -1
- package/dist/ai_gateway-defaults.gen.js +377 -313
- package/dist/guardrails-context.gen.d.ts +5 -5
- package/dist/guardrails-context.gen.js +5 -5
- package/dist/guardrails-defaults.gen.d.ts +1 -1
- package/dist/guardrails-defaults.gen.js +2070 -1849
- package/dist/overwatch-context.gen.d.ts +5 -5
- package/dist/overwatch-context.gen.js +5 -5
- package/dist/overwatch-defaults.gen.d.ts +1 -1
- package/dist/overwatch-defaults.gen.js +635 -595
- package/dist/sentry-context.gen.d.ts +3 -3
- package/dist/sentry-context.gen.js +3 -3
- package/dist/sentry-defaults.gen.d.ts +1 -1
- package/dist/sentry-defaults.gen.js +379 -460
- package/dist/service-schemas.gen.d.ts +4 -4
- package/dist/service-schemas.gen.js +249 -99
- package/package.json +1 -1
|
@@ -1,70 +1,82 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Injection & Jailbreak Detection
|
|
2
|
+
// Injection & Jailbreak Detection (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Blocks prompt injection
|
|
5
|
-
//
|
|
4
|
+
// Blocks prompt injection and jailbreak attempts using ML classifier
|
|
5
|
+
// confidence scores plus an invisible-character defence for encoded
|
|
6
|
+
// injection payloads.
|
|
6
7
|
//
|
|
7
|
-
// Context keys
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
// - invisible_chars_score: Long (0-100) - Invisible character density
|
|
8
|
+
// Context keys consumed:
|
|
9
|
+
// - injection_score: Long (0-100)
|
|
10
|
+
// - jailbreak_score: Long (0-100)
|
|
11
|
+
// - invisible_chars_detected: Bool
|
|
12
|
+
// - invisible_chars_score: Long (0-100)
|
|
13
13
|
//
|
|
14
|
-
//
|
|
14
|
+
// Compliance:
|
|
15
|
+
// - OWASP LLM01, OWASP LLM02
|
|
16
|
+
// - MITRE ATLAS AML.T0051, AML.T0054
|
|
17
|
+
//
|
|
18
|
+
// Category: security
|
|
15
19
|
// Namespace: Guardrails
|
|
16
20
|
// =============================================================================
|
|
17
21
|
|
|
18
|
-
@id("
|
|
19
|
-
@name("Block
|
|
20
|
-
@description("
|
|
22
|
+
@id("security.block-injection-ml")
|
|
23
|
+
@name("Block injection (ML)")
|
|
24
|
+
@description("Blocks process_prompt and call_tool when injection_score >= 86.")
|
|
21
25
|
@severity("high")
|
|
22
|
-
@tags("injection,
|
|
26
|
+
@tags("category:security,threat:injection,detection:ml,owasp:llm01,mitre:atlas-t0051")
|
|
27
|
+
@reject_message("Prompt blocked: the ML classifier detected prompt injection with high confidence.")
|
|
23
28
|
forbid (
|
|
24
29
|
principal,
|
|
25
30
|
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
26
31
|
resource
|
|
27
|
-
)
|
|
28
|
-
|
|
32
|
+
)
|
|
33
|
+
when {
|
|
34
|
+
context has injection_score && context.injection_score >= 86
|
|
29
35
|
};
|
|
30
36
|
|
|
31
|
-
@id("
|
|
32
|
-
@name("Block
|
|
33
|
-
@description("
|
|
37
|
+
@id("security.block-jailbreak-ml")
|
|
38
|
+
@name("Block jailbreak (ML)")
|
|
39
|
+
@description("Blocks process_prompt when jailbreak_score >= 81.")
|
|
34
40
|
@severity("high")
|
|
35
|
-
@tags("jailbreak,
|
|
41
|
+
@tags("category:security,threat:jailbreak,detection:ml,owasp:llm02,mitre:atlas-t0054")
|
|
42
|
+
@reject_message("Prompt blocked: the ML classifier detected a jailbreak attempt with high confidence.")
|
|
36
43
|
forbid (
|
|
37
44
|
principal,
|
|
38
45
|
action == Guardrails::Action::"process_prompt",
|
|
39
46
|
resource
|
|
40
|
-
)
|
|
41
|
-
|
|
47
|
+
)
|
|
48
|
+
when {
|
|
49
|
+
context has jailbreak_score && context.jailbreak_score >= 81
|
|
42
50
|
};
|
|
43
51
|
|
|
44
|
-
@id("injection-combined
|
|
45
|
-
@name("Block
|
|
46
|
-
@description("
|
|
52
|
+
@id("security.block-injection-combined")
|
|
53
|
+
@name("Block injection + jailbreak (combined)")
|
|
54
|
+
@description("Blocks process_prompt when injection_score >= 61 and jailbreak_score >= 61.")
|
|
47
55
|
@severity("high")
|
|
48
|
-
@tags("injection,jailbreak,
|
|
56
|
+
@tags("category:security,threat:injection,threat:jailbreak,detection:ml")
|
|
57
|
+
@reject_message("Prompt blocked: moderate injection and jailbreak signals were detected together.")
|
|
49
58
|
forbid (
|
|
50
59
|
principal,
|
|
51
60
|
action == Guardrails::Action::"process_prompt",
|
|
52
61
|
resource
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
context
|
|
62
|
+
)
|
|
63
|
+
when {
|
|
64
|
+
context has injection_score && context has jailbreak_score &&
|
|
65
|
+
context.injection_score >= 61 && context.jailbreak_score >= 61
|
|
56
66
|
};
|
|
57
67
|
|
|
58
|
-
@id("
|
|
59
|
-
@name("Block invisible
|
|
60
|
-
@description("
|
|
68
|
+
@id("security.block-invisible-chars")
|
|
69
|
+
@name("Block invisible-character injection")
|
|
70
|
+
@description("Blocks process_prompt when invisible_chars_detected is true and invisible_chars_score >= 51.")
|
|
61
71
|
@severity("high")
|
|
62
|
-
@tags("injection,
|
|
72
|
+
@tags("category:security,threat:invisible-chars,threat:injection,detection:pattern")
|
|
73
|
+
@reject_message("Prompt blocked: invisible Unicode characters often used for injection were detected.")
|
|
63
74
|
forbid (
|
|
64
75
|
principal,
|
|
65
76
|
action == Guardrails::Action::"process_prompt",
|
|
66
77
|
resource
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
context has
|
|
78
|
+
)
|
|
79
|
+
when {
|
|
80
|
+
context has invisible_chars_detected && context.invisible_chars_detected == true &&
|
|
81
|
+
context has invisible_chars_score && context.invisible_chars_score >= 51
|
|
70
82
|
};
|
|
@@ -1,44 +1,51 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// PII (
|
|
2
|
+
// PII Detection (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Blocks
|
|
5
|
-
//
|
|
4
|
+
// Blocks LLM responses that contain personally identifiable information,
|
|
5
|
+
// with a stricter rule for the most sensitive PII types (SSN, credit card,
|
|
6
|
+
// passport).
|
|
6
7
|
//
|
|
7
|
-
// Context keys
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
// - direction: String - "input" or "output"
|
|
8
|
+
// Context keys consumed:
|
|
9
|
+
// - pii_detected: Bool
|
|
10
|
+
// - pii_types: Set<String>
|
|
11
|
+
// - direction: String ("input" | "output")
|
|
12
12
|
//
|
|
13
|
-
//
|
|
13
|
+
// Compliance:
|
|
14
|
+
// - GDPR, HIPAA, PCI-DSS (depending on data classification)
|
|
15
|
+
//
|
|
16
|
+
// Category: privacy
|
|
14
17
|
// Namespace: Guardrails
|
|
15
18
|
// =============================================================================
|
|
16
19
|
|
|
17
|
-
@id("pii-
|
|
20
|
+
@id("privacy.block-pii-output")
|
|
18
21
|
@name("Block PII in outputs")
|
|
19
|
-
@description("
|
|
22
|
+
@description("Blocks process_prompt outputs when pii_detected is true.")
|
|
20
23
|
@severity("high")
|
|
21
|
-
@tags("pii,
|
|
24
|
+
@tags("category:privacy,threat:pii,detection:rule,surface:process-prompt,compliance:gdpr")
|
|
25
|
+
@reject_message("Response blocked: personally identifiable information was detected in the output.")
|
|
22
26
|
forbid (
|
|
23
27
|
principal,
|
|
24
28
|
action == Guardrails::Action::"process_prompt",
|
|
25
29
|
resource
|
|
26
|
-
)
|
|
30
|
+
)
|
|
31
|
+
when {
|
|
27
32
|
context has pii_detected && context.pii_detected == true &&
|
|
28
|
-
context.direction == "output"
|
|
33
|
+
context has direction && context.direction == "output"
|
|
29
34
|
};
|
|
30
35
|
|
|
31
|
-
@id("pii-
|
|
32
|
-
@name("Block sensitive PII types")
|
|
33
|
-
@description("
|
|
36
|
+
@id("privacy.block-pii-sensitive")
|
|
37
|
+
@name("Block sensitive PII types in outputs")
|
|
38
|
+
@description("Blocks process_prompt outputs when pii_types contains SSN, credit_card, or passport.")
|
|
34
39
|
@severity("critical")
|
|
35
|
-
@tags("pii,
|
|
40
|
+
@tags("category:privacy,threat:pii,detection:rule,surface:process-prompt,compliance:gdpr,compliance:hipaa,compliance:pci-dss")
|
|
41
|
+
@reject_message("Response blocked: highly sensitive PII (SSN, credit card, or passport) was detected.")
|
|
36
42
|
forbid (
|
|
37
43
|
principal,
|
|
38
44
|
action == Guardrails::Action::"process_prompt",
|
|
39
45
|
resource
|
|
40
|
-
)
|
|
41
|
-
|
|
46
|
+
)
|
|
47
|
+
when {
|
|
48
|
+
context has direction && context.direction == "output" &&
|
|
42
49
|
context has pii_types &&
|
|
43
50
|
(
|
|
44
51
|
context.pii_types.contains("ssn") ||
|
|
@@ -1,40 +1,57 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Secrets Detection
|
|
2
|
+
// Secrets Detection (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Blocks
|
|
5
|
-
//
|
|
4
|
+
// Blocks content containing API keys, tokens, credentials, or other secrets
|
|
5
|
+
// across LLM prompts, tool calls, and file operations.
|
|
6
6
|
//
|
|
7
|
-
// Context keys
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
7
|
+
// Context keys consumed:
|
|
8
|
+
// - secrets_detected: Bool
|
|
9
|
+
// - secret_count: Long
|
|
10
|
+
// - secret_types: Set<String>
|
|
11
11
|
//
|
|
12
|
-
//
|
|
12
|
+
// Compliance:
|
|
13
|
+
// - OWASP LLM06
|
|
14
|
+
//
|
|
15
|
+
// Category: data-protection
|
|
13
16
|
// Namespace: Guardrails
|
|
14
17
|
// =============================================================================
|
|
15
18
|
|
|
16
|
-
@id("
|
|
17
|
-
@name("Block
|
|
18
|
-
@description("
|
|
19
|
+
@id("data-protection.block-secrets")
|
|
20
|
+
@name("Block secrets")
|
|
21
|
+
@description("Blocks process_prompt, call_tool, read_file, and write_file when secrets_detected is true.")
|
|
19
22
|
@severity("critical")
|
|
20
|
-
@tags("secrets,
|
|
23
|
+
@tags("category:data-protection,threat:secrets,detection:rule,owasp:llm06")
|
|
24
|
+
@reject_message("Request blocked: secrets or credentials were detected in the content.")
|
|
21
25
|
forbid (
|
|
22
26
|
principal,
|
|
23
|
-
action in [
|
|
27
|
+
action in [
|
|
28
|
+
Guardrails::Action::"process_prompt",
|
|
29
|
+
Guardrails::Action::"call_tool",
|
|
30
|
+
Guardrails::Action::"read_file",
|
|
31
|
+
Guardrails::Action::"write_file"
|
|
32
|
+
],
|
|
24
33
|
resource
|
|
25
|
-
)
|
|
26
|
-
|
|
34
|
+
)
|
|
35
|
+
when {
|
|
36
|
+
context has secrets_detected && context.secrets_detected == true
|
|
27
37
|
};
|
|
28
38
|
|
|
29
|
-
@id("
|
|
30
|
-
@name("Block
|
|
31
|
-
@description("
|
|
39
|
+
@id("data-protection.block-secrets-bulk")
|
|
40
|
+
@name("Block secrets (bulk)")
|
|
41
|
+
@description("Blocks process_prompt, call_tool, read_file, and write_file when secret_count >= 3.")
|
|
32
42
|
@severity("critical")
|
|
33
|
-
@tags("secrets,
|
|
43
|
+
@tags("category:data-protection,threat:secrets,threat:exfiltration,detection:aggregate,owasp:llm06")
|
|
44
|
+
@reject_message("Request blocked: multiple distinct secrets were detected — possible credential dump.")
|
|
34
45
|
forbid (
|
|
35
46
|
principal,
|
|
36
|
-
action in [
|
|
47
|
+
action in [
|
|
48
|
+
Guardrails::Action::"process_prompt",
|
|
49
|
+
Guardrails::Action::"call_tool",
|
|
50
|
+
Guardrails::Action::"read_file",
|
|
51
|
+
Guardrails::Action::"write_file"
|
|
52
|
+
],
|
|
37
53
|
resource
|
|
38
|
-
)
|
|
39
|
-
|
|
54
|
+
)
|
|
55
|
+
when {
|
|
56
|
+
context has secret_count && context.secret_count >= 3
|
|
40
57
|
};
|
|
@@ -1,59 +1,72 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Security Pattern Detection
|
|
2
|
+
// Security Pattern Detection (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Blocks command injection, path traversal, and SQL injection
|
|
4
|
+
// Blocks command injection, path traversal, and SQL injection using
|
|
5
5
|
// regex-based pattern detection from Shield's security detectors.
|
|
6
6
|
//
|
|
7
|
-
// Context keys
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
// - sql_injection_score: Long (0-100) - Detection confidence
|
|
7
|
+
// Context keys consumed:
|
|
8
|
+
// - command_injection_detected: Bool
|
|
9
|
+
// - path_traversal_detected: Bool
|
|
10
|
+
// - path_traversal_severity: String
|
|
11
|
+
// - sql_injection_detected: Bool
|
|
12
|
+
// - sql_injection_score: Long (0-100)
|
|
14
13
|
//
|
|
15
|
-
//
|
|
14
|
+
// Compliance:
|
|
15
|
+
// - MITRE T1059 (Command Injection), T1005 (Data from Local System)
|
|
16
|
+
//
|
|
17
|
+
// Category: security
|
|
16
18
|
// Namespace: Guardrails
|
|
17
19
|
// =============================================================================
|
|
18
20
|
|
|
19
|
-
@id("security
|
|
21
|
+
@id("security.block-command-injection")
|
|
20
22
|
@name("Block command injection")
|
|
21
|
-
@description("
|
|
23
|
+
@description("Blocks process_prompt and call_tool when command_injection_detected is true.")
|
|
22
24
|
@severity("critical")
|
|
23
|
-
@tags("command-injection,
|
|
25
|
+
@tags("category:security,threat:command-injection,detection:pattern,mitre:t1059")
|
|
26
|
+
@reject_message("Request blocked: command injection pattern detected — reverse shell, destructive command, or privilege escalation.")
|
|
24
27
|
forbid (
|
|
25
28
|
principal,
|
|
26
29
|
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
27
30
|
resource
|
|
28
|
-
)
|
|
31
|
+
)
|
|
32
|
+
when {
|
|
29
33
|
context has command_injection_detected && context.command_injection_detected == true
|
|
30
34
|
};
|
|
31
35
|
|
|
32
|
-
@id("security
|
|
33
|
-
@name("Block
|
|
34
|
-
@description("
|
|
36
|
+
@id("security.block-path-traversal")
|
|
37
|
+
@name("Block path traversal")
|
|
38
|
+
@description("Blocks process_prompt, call_tool, read_file, and write_file when path_traversal_detected is true and severity is high or critical.")
|
|
35
39
|
@severity("high")
|
|
36
|
-
@tags("path-traversal,
|
|
40
|
+
@tags("category:security,threat:path-traversal,detection:pattern,mitre:t1005")
|
|
41
|
+
@reject_message("Request blocked: path traversal pattern detected — sensitive system files or deep directory traversal.")
|
|
37
42
|
forbid (
|
|
38
43
|
principal,
|
|
39
|
-
action in [
|
|
44
|
+
action in [
|
|
45
|
+
Guardrails::Action::"process_prompt",
|
|
46
|
+
Guardrails::Action::"call_tool",
|
|
47
|
+
Guardrails::Action::"read_file",
|
|
48
|
+
Guardrails::Action::"write_file"
|
|
49
|
+
],
|
|
40
50
|
resource
|
|
41
|
-
)
|
|
51
|
+
)
|
|
52
|
+
when {
|
|
42
53
|
context has path_traversal_detected && context.path_traversal_detected == true &&
|
|
43
54
|
context has path_traversal_severity &&
|
|
44
55
|
(context.path_traversal_severity == "critical" || context.path_traversal_severity == "high")
|
|
45
56
|
};
|
|
46
57
|
|
|
47
|
-
@id("security
|
|
48
|
-
@name("Block
|
|
49
|
-
@description("
|
|
58
|
+
@id("security.block-sql-injection")
|
|
59
|
+
@name("Block SQL injection")
|
|
60
|
+
@description("Blocks process_prompt and call_tool when sql_injection_detected is true and sql_injection_score >= 75.")
|
|
50
61
|
@severity("high")
|
|
51
|
-
@tags("sql-injection,
|
|
62
|
+
@tags("category:security,threat:sql-injection,detection:pattern")
|
|
63
|
+
@reject_message("Request blocked: SQL injection pattern detected — tautology, UNION attack, or destructive query.")
|
|
52
64
|
forbid (
|
|
53
65
|
principal,
|
|
54
66
|
action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
|
|
55
67
|
resource
|
|
56
|
-
)
|
|
68
|
+
)
|
|
69
|
+
when {
|
|
57
70
|
context has sql_injection_detected && context.sql_injection_detected == true &&
|
|
58
71
|
context has sql_injection_score && context.sql_injection_score >= 75
|
|
59
72
|
};
|
|
@@ -1,62 +1,78 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Semantic Topic Enforcement
|
|
2
|
+
// Semantic Topic Enforcement (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Blocks content based on semantic topic classification
|
|
5
|
-
//
|
|
4
|
+
// Blocks content based on semantic topic classification — used for
|
|
5
|
+
// application-specific topic restrictions (medical, weapons, illegal
|
|
6
|
+
// activity, etc.).
|
|
6
7
|
//
|
|
7
|
-
// Context keys
|
|
8
|
-
//
|
|
9
|
-
//
|
|
8
|
+
// Context keys consumed:
|
|
9
|
+
// - content_topics: Set<String>
|
|
10
|
+
// - topic_confidence: Long (0-100)
|
|
10
11
|
//
|
|
11
|
-
//
|
|
12
|
+
// Compliance:
|
|
13
|
+
// - EU AI Act, ISO 42001
|
|
14
|
+
//
|
|
15
|
+
// Category: trust-safety
|
|
12
16
|
// Namespace: Guardrails
|
|
13
17
|
// =============================================================================
|
|
14
18
|
|
|
15
|
-
@id("
|
|
16
|
-
@name("Block dangerous
|
|
17
|
-
@description("
|
|
19
|
+
@id("trust-safety.block-dangerous-topics")
|
|
20
|
+
@name("Block dangerous topics")
|
|
21
|
+
@description("Blocks process_prompt when content_topics contains weapons or explosives categories.")
|
|
18
22
|
@severity("critical")
|
|
19
|
-
@tags("
|
|
23
|
+
@tags("category:trust-safety,threat:harmful,detection:ml,compliance:eu-ai-act")
|
|
24
|
+
@reject_message("Prompt blocked: content classified under a dangerous topic (weapons, explosives, illegal synthesis).")
|
|
20
25
|
forbid (
|
|
21
26
|
principal,
|
|
22
27
|
action == Guardrails::Action::"process_prompt",
|
|
23
28
|
resource
|
|
24
|
-
)
|
|
29
|
+
)
|
|
30
|
+
when {
|
|
25
31
|
context has content_topics &&
|
|
26
|
-
(
|
|
27
|
-
|
|
28
|
-
|
|
32
|
+
(
|
|
33
|
+
context.content_topics.contains("weapons_manufacturing") ||
|
|
34
|
+
context.content_topics.contains("explosive_materials") ||
|
|
35
|
+
context.content_topics.contains("illegal_synthesis")
|
|
36
|
+
)
|
|
29
37
|
};
|
|
30
38
|
|
|
31
|
-
@id("
|
|
32
|
-
@name("Block controlled
|
|
33
|
-
@description("
|
|
39
|
+
@id("trust-safety.block-controlled-substances")
|
|
40
|
+
@name("Block controlled substances")
|
|
41
|
+
@description("Blocks process_prompt when content_topics contains controlled substances or precursor chemicals.")
|
|
34
42
|
@severity("high")
|
|
35
|
-
@tags("
|
|
43
|
+
@tags("category:trust-safety,threat:harmful,detection:ml,compliance:eu-ai-act")
|
|
44
|
+
@reject_message("Prompt blocked: content discusses controlled substances or precursor chemicals.")
|
|
36
45
|
forbid (
|
|
37
46
|
principal,
|
|
38
47
|
action == Guardrails::Action::"process_prompt",
|
|
39
48
|
resource
|
|
40
|
-
)
|
|
49
|
+
)
|
|
50
|
+
when {
|
|
41
51
|
context has content_topics &&
|
|
42
|
-
(
|
|
43
|
-
|
|
44
|
-
|
|
52
|
+
(
|
|
53
|
+
context.content_topics.contains("controlled_substances") ||
|
|
54
|
+
context.content_topics.contains("drug_manufacturing") ||
|
|
55
|
+
context.content_topics.contains("precursor_chemicals")
|
|
56
|
+
)
|
|
45
57
|
};
|
|
46
58
|
|
|
47
|
-
@id("
|
|
59
|
+
@id("trust-safety.block-restricted-topics")
|
|
48
60
|
@name("Block high-confidence restricted topics")
|
|
49
|
-
@description("
|
|
61
|
+
@description("Blocks process_prompt when topic_confidence >= 81 and content_topics contains a restricted category.")
|
|
50
62
|
@severity("high")
|
|
51
|
-
@tags("
|
|
63
|
+
@tags("category:trust-safety,threat:harmful,detection:ml")
|
|
64
|
+
@reject_message("Prompt blocked: content high-confidence classified into a restricted topic (illegal activity, fraud, social engineering).")
|
|
52
65
|
forbid (
|
|
53
66
|
principal,
|
|
54
67
|
action == Guardrails::Action::"process_prompt",
|
|
55
68
|
resource
|
|
56
|
-
)
|
|
57
|
-
|
|
69
|
+
)
|
|
70
|
+
when {
|
|
71
|
+
context has topic_confidence && context.topic_confidence >= 81 &&
|
|
58
72
|
context has content_topics &&
|
|
59
|
-
(
|
|
60
|
-
|
|
61
|
-
|
|
73
|
+
(
|
|
74
|
+
context.content_topics.contains("illegal_activity") ||
|
|
75
|
+
context.content_topics.contains("financial_fraud") ||
|
|
76
|
+
context.content_topics.contains("social_engineering")
|
|
77
|
+
)
|
|
62
78
|
};
|
|
@@ -1,58 +1,66 @@
|
|
|
1
1
|
// =============================================================================
|
|
2
|
-
// Tool Risk
|
|
2
|
+
// Tool Risk (Default)
|
|
3
3
|
// =============================================================================
|
|
4
|
-
// Blocks dangerous tool calls based on risk scoring
|
|
5
|
-
//
|
|
4
|
+
// Blocks dangerous tool calls based on risk scoring, tool classification,
|
|
5
|
+
// and well-known dangerous tool names (shell, execute_command).
|
|
6
6
|
//
|
|
7
|
-
// Context keys
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
7
|
+
// Context keys consumed:
|
|
8
|
+
// - tool_name: String
|
|
9
|
+
// - tool_risk_score: Long (0-100)
|
|
10
|
+
// - tool_is_sensitive: Bool
|
|
11
|
+
// - tool_category: String ("safe" | "sensitive" | "dangerous")
|
|
12
12
|
//
|
|
13
|
-
//
|
|
13
|
+
// Compliance:
|
|
14
|
+
// - OWASP LLM06, OWASP ASI02
|
|
15
|
+
//
|
|
16
|
+
// Category: agent-security
|
|
14
17
|
// Namespace: Guardrails
|
|
15
18
|
// =============================================================================
|
|
16
19
|
|
|
17
|
-
@id("
|
|
20
|
+
@id("agent-security.block-dangerous-tool")
|
|
18
21
|
@name("Block dangerous tools")
|
|
19
|
-
@description("
|
|
22
|
+
@description("Blocks call_tool when tool_risk_score >= 86 or tool_category equals \"dangerous\".")
|
|
20
23
|
@severity("critical")
|
|
21
|
-
@tags("
|
|
24
|
+
@tags("category:agent-security,detection:aggregate,surface:call-tool,owasp:llm06,owasp:asi02")
|
|
25
|
+
@reject_message("Tool execution blocked: tool is classified as dangerous or scored a high risk.")
|
|
22
26
|
forbid (
|
|
23
27
|
principal,
|
|
24
28
|
action == Guardrails::Action::"call_tool",
|
|
25
29
|
resource
|
|
26
|
-
)
|
|
27
|
-
|
|
30
|
+
)
|
|
31
|
+
when {
|
|
32
|
+
(context has tool_risk_score && context.tool_risk_score >= 86) ||
|
|
28
33
|
(context has tool_category && context.tool_category == "dangerous")
|
|
29
34
|
};
|
|
30
35
|
|
|
31
|
-
@id("
|
|
36
|
+
@id("agent-security.block-shell-commands")
|
|
32
37
|
@name("Block shell command execution")
|
|
33
|
-
@description("
|
|
38
|
+
@description("Blocks call_tool when tool_name equals \"shell\" or \"execute_command\".")
|
|
34
39
|
@severity("high")
|
|
35
|
-
@tags("
|
|
40
|
+
@tags("category:agent-security,threat:command-injection,detection:rule,surface:call-tool,mitre:t1059")
|
|
41
|
+
@reject_message("Tool execution blocked: direct shell or command execution is not permitted.")
|
|
36
42
|
forbid (
|
|
37
43
|
principal,
|
|
38
44
|
action == Guardrails::Action::"call_tool",
|
|
39
45
|
resource
|
|
40
|
-
)
|
|
46
|
+
)
|
|
47
|
+
when {
|
|
41
48
|
context has tool_name &&
|
|
42
|
-
(context.tool_name == "shell" ||
|
|
43
|
-
context.tool_name == "execute_command")
|
|
49
|
+
(context.tool_name == "shell" || context.tool_name == "execute_command")
|
|
44
50
|
};
|
|
45
51
|
|
|
46
|
-
@id("
|
|
47
|
-
@name("Block high-risk
|
|
48
|
-
@description("
|
|
52
|
+
@id("agent-security.block-sensitive-high-risk")
|
|
53
|
+
@name("Block sensitive high-risk tools")
|
|
54
|
+
@description("Blocks call_tool when tool_is_sensitive is true and tool_risk_score >= 71.")
|
|
49
55
|
@severity("high")
|
|
50
|
-
@tags("
|
|
56
|
+
@tags("category:agent-security,detection:aggregate,surface:call-tool,owasp:llm06")
|
|
57
|
+
@reject_message("Tool execution blocked: a sensitive tool was called with elevated risk score.")
|
|
51
58
|
forbid (
|
|
52
59
|
principal,
|
|
53
60
|
action == Guardrails::Action::"call_tool",
|
|
54
61
|
resource
|
|
55
|
-
)
|
|
62
|
+
)
|
|
63
|
+
when {
|
|
56
64
|
context has tool_is_sensitive && context.tool_is_sensitive == true &&
|
|
57
|
-
context has tool_risk_score && context.tool_risk_score
|
|
65
|
+
context has tool_risk_score && context.tool_risk_score >= 71
|
|
58
66
|
};
|