@highflame/policy 2.1.36 → 2.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/_schemas/ai_gateway/context.json +431 -11
  2. package/_schemas/ai_gateway/schema.cedarschema +91 -11
  3. package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
  4. package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
  5. package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
  6. package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
  7. package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
  8. package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
  9. package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
  10. package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
  11. package/_schemas/ai_gateway/templates/templates.json +42 -47
  12. package/_schemas/guardrails/context.json +12 -12
  13. package/_schemas/guardrails/schema.cedarschema +12 -12
  14. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
  15. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
  16. package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
  17. package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
  18. package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
  19. package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
  20. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
  21. package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
  22. package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
  23. package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
  24. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
  25. package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
  26. package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
  27. package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
  28. package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
  29. package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
  30. package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
  31. package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
  32. package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
  33. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
  34. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
  35. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
  36. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
  37. package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
  38. package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
  39. package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
  40. package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
  41. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
  42. package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
  43. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
  44. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
  45. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
  46. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
  47. package/_schemas/guardrails/templates/templates.json +188 -210
  48. package/_schemas/overwatch/context.json +14 -14
  49. package/_schemas/overwatch/schema.cedarschema +12 -12
  50. package/_schemas/sentry/context.json +11 -11
  51. package/_schemas/sentry/schema.cedarschema +11 -11
  52. package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
  53. package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
  54. package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
  55. package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
  56. package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
  57. package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
  58. package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
  59. package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
  60. package/_schemas/sentry/templates/templates.json +46 -46
  61. package/dist/ai_gateway-context.gen.d.ts +18 -4
  62. package/dist/ai_gateway-context.gen.js +18 -4
  63. package/dist/ai_gateway-defaults.gen.d.ts +1 -1
  64. package/dist/ai_gateway-defaults.gen.js +377 -313
  65. package/dist/guardrails-context.gen.d.ts +5 -5
  66. package/dist/guardrails-context.gen.js +5 -5
  67. package/dist/guardrails-defaults.gen.d.ts +1 -1
  68. package/dist/guardrails-defaults.gen.js +2070 -1849
  69. package/dist/overwatch-context.gen.d.ts +5 -5
  70. package/dist/overwatch-context.gen.js +5 -5
  71. package/dist/overwatch-defaults.gen.d.ts +1 -1
  72. package/dist/overwatch-defaults.gen.js +635 -595
  73. package/dist/sentry-context.gen.d.ts +3 -3
  74. package/dist/sentry-context.gen.js +3 -3
  75. package/dist/sentry-defaults.gen.d.ts +1 -1
  76. package/dist/sentry-defaults.gen.js +379 -460
  77. package/dist/service-schemas.gen.d.ts +4 -4
  78. package/dist/service-schemas.gen.js +249 -99
  79. package/package.json +1 -1
@@ -1,70 +1,82 @@
1
1
  // =============================================================================
2
- // Injection & Jailbreak Detection Policy
2
+ // Injection & Jailbreak Detection (Default)
3
3
  // =============================================================================
4
- // Blocks prompt injection, jailbreak attempts, and command injection.
5
- // Uses ML-based confidence scores from normalized context.
4
+ // Blocks prompt injection and jailbreak attempts using ML classifier
5
+ // confidence scores plus an invisible-character defence for encoded
6
+ // injection payloads.
6
7
  //
7
- // Context keys used (normalized by projection layer):
8
- // - injection_confidence: Long (0-100) - Overall injection confidence
9
- // - jailbreak_confidence: Long (0-100) - Jailbreak attempt confidence
10
- // - injection_type: String - Type of injection detected
11
- // - contains_invisible_chars: Bool - Invisible Unicode characters detected
12
- // - invisible_chars_score: Long (0-100) - Invisible character density
8
+ // Context keys consumed:
9
+ // - injection_score: Long (0-100)
10
+ // - jailbreak_score: Long (0-100)
11
+ // - invisible_chars_detected: Bool
12
+ // - invisible_chars_score: Long (0-100)
13
13
  //
14
- // Category: security
14
+ // Compliance:
15
+ // - OWASP LLM01, OWASP LLM02
16
+ // - MITRE ATLAS AML.T0051, AML.T0054
17
+ //
18
+ // Category: security
15
19
  // Namespace: Guardrails
16
20
  // =============================================================================
17
21
 
18
- @id("injection-block-high-confidence")
19
- @name("Block high-confidence injection attempts")
20
- @description("Forbids requests with injection confidence above 85%")
22
+ @id("security.block-injection-ml")
23
+ @name("Block injection (ML)")
24
+ @description("Blocks process_prompt and call_tool when injection_score >= 86.")
21
25
  @severity("high")
22
- @tags("injection,jailbreak,security")
26
+ @tags("category:security,threat:injection,detection:ml,owasp:llm01,mitre:atlas-t0051")
27
+ @reject_message("Prompt blocked: the ML classifier detected prompt injection with high confidence.")
23
28
  forbid (
24
29
  principal,
25
30
  action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
26
31
  resource
27
- ) when {
28
- context has injection_confidence && context.injection_confidence > 85
32
+ )
33
+ when {
34
+ context has injection_score && context.injection_score >= 86
29
35
  };
30
36
 
31
- @id("jailbreak-block-high-confidence")
32
- @name("Block high-confidence jailbreak attempts")
33
- @description("Forbids requests with jailbreak confidence above 80%")
37
+ @id("security.block-jailbreak-ml")
38
+ @name("Block jailbreak (ML)")
39
+ @description("Blocks process_prompt when jailbreak_score >= 81.")
34
40
  @severity("high")
35
- @tags("jailbreak,security")
41
+ @tags("category:security,threat:jailbreak,detection:ml,owasp:llm02,mitre:atlas-t0054")
42
+ @reject_message("Prompt blocked: the ML classifier detected a jailbreak attempt with high confidence.")
36
43
  forbid (
37
44
  principal,
38
45
  action == Guardrails::Action::"process_prompt",
39
46
  resource
40
- ) when {
41
- context has jailbreak_confidence && context.jailbreak_confidence > 80
47
+ )
48
+ when {
49
+ context has jailbreak_score && context.jailbreak_score >= 81
42
50
  };
43
51
 
44
- @id("injection-combined-threshold")
45
- @name("Block combined injection and jailbreak")
46
- @description("Forbids requests with moderate scores in both injection and jailbreak")
52
+ @id("security.block-injection-combined")
53
+ @name("Block injection + jailbreak (combined)")
54
+ @description("Blocks process_prompt when injection_score >= 61 and jailbreak_score >= 61.")
47
55
  @severity("high")
48
- @tags("injection,jailbreak,security")
56
+ @tags("category:security,threat:injection,threat:jailbreak,detection:ml")
57
+ @reject_message("Prompt blocked: moderate injection and jailbreak signals were detected together.")
49
58
  forbid (
50
59
  principal,
51
60
  action == Guardrails::Action::"process_prompt",
52
61
  resource
53
- ) when {
54
- context has injection_confidence && context has jailbreak_confidence &&
55
- context.injection_confidence > 60 && context.jailbreak_confidence > 60
62
+ )
63
+ when {
64
+ context has injection_score && context has jailbreak_score &&
65
+ context.injection_score >= 61 && context.jailbreak_score >= 61
56
66
  };
57
67
 
58
- @id("injection-invisible-chars")
59
- @name("Block invisible character injection")
60
- @description("Forbids requests containing invisible Unicode characters (zero-width joiners, etc.) commonly used for prompt injection")
68
+ @id("security.block-invisible-chars")
69
+ @name("Block invisible-character injection")
70
+ @description("Blocks process_prompt when invisible_chars_detected is true and invisible_chars_score >= 51.")
61
71
  @severity("high")
62
- @tags("injection,unicode,security")
72
+ @tags("category:security,threat:invisible-chars,threat:injection,detection:pattern")
73
+ @reject_message("Prompt blocked: invisible Unicode characters often used for injection were detected.")
63
74
  forbid (
64
75
  principal,
65
76
  action == Guardrails::Action::"process_prompt",
66
77
  resource
67
- ) when {
68
- context has contains_invisible_chars && context.contains_invisible_chars == true &&
69
- context has invisible_chars_score && context.invisible_chars_score > 50
78
+ )
79
+ when {
80
+ context has invisible_chars_detected && context.invisible_chars_detected == true &&
81
+ context has invisible_chars_score && context.invisible_chars_score >= 51
70
82
  };
@@ -1,44 +1,51 @@
1
1
  // =============================================================================
2
- // PII (Personally Identifiable Information) Policy
2
+ // PII Detection (Default)
3
3
  // =============================================================================
4
- // Blocks content containing PII in output responses.
5
- // Typically applied to LLM outputs to prevent data leakage.
4
+ // Blocks LLM responses that contain personally identifiable information,
5
+ // with a stricter rule for the most sensitive PII types (SSN, credit card,
6
+ // passport).
6
7
  //
7
- // Context keys used (normalized by projection layer):
8
- // - pii_detected: bool - Whether PII was found
9
- // - pii_count: Long - Number of PII matches
10
- // - pii_types: Set<String> - Types of PII detected
11
- // - direction: String - "input" or "output"
8
+ // Context keys consumed:
9
+ // - pii_detected: Bool
10
+ // - pii_types: Set<String>
11
+ // - direction: String ("input" | "output")
12
12
  //
13
- // Category: privacy
13
+ // Compliance:
14
+ // - GDPR, HIPAA, PCI-DSS (depending on data classification)
15
+ //
16
+ // Category: privacy
14
17
  // Namespace: Guardrails
15
18
  // =============================================================================
16
19
 
17
- @id("pii-block-output")
20
+ @id("privacy.block-pii-output")
18
21
  @name("Block PII in outputs")
19
- @description("Forbids LLM responses that contain PII (prevents data leakage)")
22
+ @description("Blocks process_prompt outputs when pii_detected is true.")
20
23
  @severity("high")
21
- @tags("pii,privacy,data-protection")
24
+ @tags("category:privacy,threat:pii,detection:rule,surface:process-prompt,compliance:gdpr")
25
+ @reject_message("Response blocked: personally identifiable information was detected in the output.")
22
26
  forbid (
23
27
  principal,
24
28
  action == Guardrails::Action::"process_prompt",
25
29
  resource
26
- ) when {
30
+ )
31
+ when {
27
32
  context has pii_detected && context.pii_detected == true &&
28
- context.direction == "output"
33
+ context has direction && context.direction == "output"
29
34
  };
30
35
 
31
- @id("pii-block-sensitive-types")
32
- @name("Block sensitive PII types")
33
- @description("Forbids outputs containing SSN, credit cards, or passport numbers")
36
+ @id("privacy.block-pii-sensitive")
37
+ @name("Block sensitive PII types in outputs")
38
+ @description("Blocks process_prompt outputs when pii_types contains SSN, credit_card, or passport.")
34
39
  @severity("critical")
35
- @tags("pii,privacy,sensitive-data")
40
+ @tags("category:privacy,threat:pii,detection:rule,surface:process-prompt,compliance:gdpr,compliance:hipaa,compliance:pci-dss")
41
+ @reject_message("Response blocked: highly sensitive PII (SSN, credit card, or passport) was detected.")
36
42
  forbid (
37
43
  principal,
38
44
  action == Guardrails::Action::"process_prompt",
39
45
  resource
40
- ) when {
41
- context.direction == "output" &&
46
+ )
47
+ when {
48
+ context has direction && context.direction == "output" &&
42
49
  context has pii_types &&
43
50
  (
44
51
  context.pii_types.contains("ssn") ||
@@ -1,40 +1,57 @@
1
1
  // =============================================================================
2
- // Secrets Detection Policy
2
+ // Secrets Detection (Default)
3
3
  // =============================================================================
4
- // Blocks requests containing API keys, tokens, credentials, or other secrets.
5
- // Applies to both input prompts and output responses.
4
+ // Blocks content containing API keys, tokens, credentials, or other secrets
5
+ // across LLM prompts, tool calls, and file operations.
6
6
  //
7
- // Context keys used (normalized by projection layer):
8
- // - contains_secrets: bool - Whether secrets were detected
9
- // - secret_count: Long - Number of secret matches
10
- // - secret_types: Set<String> - Types of secrets found
7
+ // Context keys consumed:
8
+ // - secrets_detected: Bool
9
+ // - secret_count: Long
10
+ // - secret_types: Set<String>
11
11
  //
12
- // Category: security
12
+ // Compliance:
13
+ // - OWASP LLM06
14
+ //
15
+ // Category: data-protection
13
16
  // Namespace: Guardrails
14
17
  // =============================================================================
15
18
 
16
- @id("secrets-block-all")
17
- @name("Block content containing secrets")
18
- @description("Forbids requests that contain API keys, tokens, or credentials")
19
+ @id("data-protection.block-secrets")
20
+ @name("Block secrets")
21
+ @description("Blocks process_prompt, call_tool, read_file, and write_file when secrets_detected is true.")
19
22
  @severity("critical")
20
- @tags("secrets,security,data-leak")
23
+ @tags("category:data-protection,threat:secrets,detection:rule,owasp:llm06")
24
+ @reject_message("Request blocked: secrets or credentials were detected in the content.")
21
25
  forbid (
22
26
  principal,
23
- action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
27
+ action in [
28
+ Guardrails::Action::"process_prompt",
29
+ Guardrails::Action::"call_tool",
30
+ Guardrails::Action::"read_file",
31
+ Guardrails::Action::"write_file"
32
+ ],
24
33
  resource
25
- ) when {
26
- context has contains_secrets && context.contains_secrets == true
34
+ )
35
+ when {
36
+ context has secrets_detected && context.secrets_detected == true
27
37
  };
28
38
 
29
- @id("secrets-block-high-count")
30
- @name("Block multiple secrets")
31
- @description("Forbids requests with multiple secret matches (potential data dump)")
39
+ @id("data-protection.block-secrets-bulk")
40
+ @name("Block secrets (bulk)")
41
+ @description("Blocks process_prompt, call_tool, read_file, and write_file when secret_count >= 3.")
32
42
  @severity("critical")
33
- @tags("secrets,security,data-leak")
43
+ @tags("category:data-protection,threat:secrets,threat:exfiltration,detection:aggregate,owasp:llm06")
44
+ @reject_message("Request blocked: multiple distinct secrets were detected — possible credential dump.")
34
45
  forbid (
35
46
  principal,
36
- action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
47
+ action in [
48
+ Guardrails::Action::"process_prompt",
49
+ Guardrails::Action::"call_tool",
50
+ Guardrails::Action::"read_file",
51
+ Guardrails::Action::"write_file"
52
+ ],
37
53
  resource
38
- ) when {
39
- context has secret_count && context.secret_count > 2
54
+ )
55
+ when {
56
+ context has secret_count && context.secret_count >= 3
40
57
  };
@@ -1,59 +1,72 @@
1
1
  // =============================================================================
2
- // Security Pattern Detection Policy
2
+ // Security Pattern Detection (Default)
3
3
  // =============================================================================
4
- // Blocks command injection, path traversal, and SQL injection attacks using
4
+ // Blocks command injection, path traversal, and SQL injection using
5
5
  // regex-based pattern detection from Shield's security detectors.
6
6
  //
7
- // Context keys used (normalized by projection layer):
8
- // - command_injection_detected: Bool - Command injection pattern found
9
- // - command_injection_score: Long (0-100) - Detection confidence
10
- // - path_traversal_detected: Bool - Path traversal pattern found
11
- // - path_traversal_severity: String - Severity level (critical/high/medium/low)
12
- // - sql_injection_detected: Bool - SQL injection pattern found
13
- // - sql_injection_score: Long (0-100) - Detection confidence
7
+ // Context keys consumed:
8
+ // - command_injection_detected: Bool
9
+ // - path_traversal_detected: Bool
10
+ // - path_traversal_severity: String
11
+ // - sql_injection_detected: Bool
12
+ // - sql_injection_score: Long (0-100)
14
13
  //
15
- // Category: security
14
+ // Compliance:
15
+ // - MITRE T1059 (Command Injection), T1005 (Data from Local System)
16
+ //
17
+ // Category: security
16
18
  // Namespace: Guardrails
17
19
  // =============================================================================
18
20
 
19
- @id("security-block-command-injection")
21
+ @id("security.block-command-injection")
20
22
  @name("Block command injection")
21
- @description("Forbids requests containing command injection patterns such as reverse shells, privilege escalation, or destructive commands")
23
+ @description("Blocks process_prompt and call_tool when command_injection_detected is true.")
22
24
  @severity("critical")
23
- @tags("command-injection,security")
25
+ @tags("category:security,threat:command-injection,detection:pattern,mitre:t1059")
26
+ @reject_message("Request blocked: command injection pattern detected — reverse shell, destructive command, or privilege escalation.")
24
27
  forbid (
25
28
  principal,
26
29
  action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
27
30
  resource
28
- ) when {
31
+ )
32
+ when {
29
33
  context has command_injection_detected && context.command_injection_detected == true
30
34
  };
31
35
 
32
- @id("security-block-path-traversal")
33
- @name("Block high-severity path traversal")
34
- @description("Forbids requests containing path traversal patterns targeting sensitive system files or using deep directory traversal")
36
+ @id("security.block-path-traversal")
37
+ @name("Block path traversal")
38
+ @description("Blocks process_prompt, call_tool, read_file, and write_file when path_traversal_detected is true and severity is high or critical.")
35
39
  @severity("high")
36
- @tags("path-traversal,security")
40
+ @tags("category:security,threat:path-traversal,detection:pattern,mitre:t1005")
41
+ @reject_message("Request blocked: path traversal pattern detected — sensitive system files or deep directory traversal.")
37
42
  forbid (
38
43
  principal,
39
- action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
44
+ action in [
45
+ Guardrails::Action::"process_prompt",
46
+ Guardrails::Action::"call_tool",
47
+ Guardrails::Action::"read_file",
48
+ Guardrails::Action::"write_file"
49
+ ],
40
50
  resource
41
- ) when {
51
+ )
52
+ when {
42
53
  context has path_traversal_detected && context.path_traversal_detected == true &&
43
54
  context has path_traversal_severity &&
44
55
  (context.path_traversal_severity == "critical" || context.path_traversal_severity == "high")
45
56
  };
46
57
 
47
- @id("security-block-sql-injection")
48
- @name("Block high-confidence SQL injection")
49
- @description("Forbids requests with SQL injection confidence above 75% (tautologies, UNION-based, destructive queries)")
58
+ @id("security.block-sql-injection")
59
+ @name("Block SQL injection")
60
+ @description("Blocks process_prompt and call_tool when sql_injection_detected is true and sql_injection_score >= 75.")
50
61
  @severity("high")
51
- @tags("sql-injection,security")
62
+ @tags("category:security,threat:sql-injection,detection:pattern")
63
+ @reject_message("Request blocked: SQL injection pattern detected — tautology, UNION attack, or destructive query.")
52
64
  forbid (
53
65
  principal,
54
66
  action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
55
67
  resource
56
- ) when {
68
+ )
69
+ when {
57
70
  context has sql_injection_detected && context.sql_injection_detected == true &&
58
71
  context has sql_injection_score && context.sql_injection_score >= 75
59
72
  };
@@ -1,62 +1,78 @@
1
1
  // =============================================================================
2
- // Semantic Topic Enforcement Policy
2
+ // Semantic Topic Enforcement (Default)
3
3
  // =============================================================================
4
- // Blocks content based on semantic topic classification. Enables per-application
5
- // content restrictions (e.g., medical agent cannot discuss controlled substances).
4
+ // Blocks content based on semantic topic classification used for
5
+ // application-specific topic restrictions (medical, weapons, illegal
6
+ // activity, etc.).
6
7
  //
7
- // Context keys used (populated by topic classifier detector):
8
- // - content_topics: Set<String> - Semantic topics detected in content
9
- // - topic_confidence: Long (0-100) - Classifier confidence score
8
+ // Context keys consumed:
9
+ // - content_topics: Set<String>
10
+ // - topic_confidence: Long (0-100)
10
11
  //
11
- // Category: semantic
12
+ // Compliance:
13
+ // - EU AI Act, ISO 42001
14
+ //
15
+ // Category: trust-safety
12
16
  // Namespace: Guardrails
13
17
  // =============================================================================
14
18
 
15
- @id("semantic-block-dangerous-topics")
16
- @name("Block dangerous content topics")
17
- @description("Forbids content classified under dangerous topics such as weapons manufacturing, explosives, or illegal synthesis")
19
+ @id("trust-safety.block-dangerous-topics")
20
+ @name("Block dangerous topics")
21
+ @description("Blocks process_prompt when content_topics contains weapons or explosives categories.")
18
22
  @severity("critical")
19
- @tags("semantic,compliance,safety")
23
+ @tags("category:trust-safety,threat:harmful,detection:ml,compliance:eu-ai-act")
24
+ @reject_message("Prompt blocked: content classified under a dangerous topic (weapons, explosives, illegal synthesis).")
20
25
  forbid (
21
26
  principal,
22
27
  action == Guardrails::Action::"process_prompt",
23
28
  resource
24
- ) when {
29
+ )
30
+ when {
25
31
  context has content_topics &&
26
- (context.content_topics.contains("weapons_manufacturing") ||
27
- context.content_topics.contains("explosive_materials") ||
28
- context.content_topics.contains("illegal_synthesis"))
32
+ (
33
+ context.content_topics.contains("weapons_manufacturing") ||
34
+ context.content_topics.contains("explosive_materials") ||
35
+ context.content_topics.contains("illegal_synthesis")
36
+ )
29
37
  };
30
38
 
31
- @id("semantic-block-controlled-substances")
32
- @name("Block controlled substance content")
33
- @description("Forbids content discussing controlled substances, drug manufacturing, or precursor chemicals")
39
+ @id("trust-safety.block-controlled-substances")
40
+ @name("Block controlled substances")
41
+ @description("Blocks process_prompt when content_topics contains controlled substances or precursor chemicals.")
34
42
  @severity("high")
35
- @tags("semantic,compliance,medical")
43
+ @tags("category:trust-safety,threat:harmful,detection:ml,compliance:eu-ai-act")
44
+ @reject_message("Prompt blocked: content discusses controlled substances or precursor chemicals.")
36
45
  forbid (
37
46
  principal,
38
47
  action == Guardrails::Action::"process_prompt",
39
48
  resource
40
- ) when {
49
+ )
50
+ when {
41
51
  context has content_topics &&
42
- (context.content_topics.contains("controlled_substances") ||
43
- context.content_topics.contains("drug_manufacturing") ||
44
- context.content_topics.contains("precursor_chemicals"))
52
+ (
53
+ context.content_topics.contains("controlled_substances") ||
54
+ context.content_topics.contains("drug_manufacturing") ||
55
+ context.content_topics.contains("precursor_chemicals")
56
+ )
45
57
  };
46
58
 
47
- @id("semantic-block-high-confidence-restricted")
59
+ @id("trust-safety.block-restricted-topics")
48
60
  @name("Block high-confidence restricted topics")
49
- @description("Forbids content with high-confidence classification in any restricted topic category")
61
+ @description("Blocks process_prompt when topic_confidence >= 81 and content_topics contains a restricted category.")
50
62
  @severity("high")
51
- @tags("semantic,compliance")
63
+ @tags("category:trust-safety,threat:harmful,detection:ml")
64
+ @reject_message("Prompt blocked: content high-confidence classified into a restricted topic (illegal activity, fraud, social engineering).")
52
65
  forbid (
53
66
  principal,
54
67
  action == Guardrails::Action::"process_prompt",
55
68
  resource
56
- ) when {
57
- context has topic_confidence && context.topic_confidence > 80 &&
69
+ )
70
+ when {
71
+ context has topic_confidence && context.topic_confidence >= 81 &&
58
72
  context has content_topics &&
59
- (context.content_topics.contains("illegal_activity") ||
60
- context.content_topics.contains("financial_fraud") ||
61
- context.content_topics.contains("social_engineering"))
73
+ (
74
+ context.content_topics.contains("illegal_activity") ||
75
+ context.content_topics.contains("financial_fraud") ||
76
+ context.content_topics.contains("social_engineering")
77
+ )
62
78
  };
@@ -1,58 +1,66 @@
1
1
  // =============================================================================
2
- // Tool Risk Policy
2
+ // Tool Risk (Default)
3
3
  // =============================================================================
4
- // Blocks dangerous tool calls based on risk scoring.
5
- // Considers tool sensitivity, argument patterns, and MCP verification.
4
+ // Blocks dangerous tool calls based on risk scoring, tool classification,
5
+ // and well-known dangerous tool names (shell, execute_command).
6
6
  //
7
- // Context keys used (normalized by projection layer):
8
- // - tool_name: String - Name of the tool
9
- // - tool_risk_score: Long (0-100) - Computed risk score
10
- // - tool_is_sensitive: bool - Whether tool is classified as sensitive
11
- // - tool_category: String - "safe" | "sensitive" | "dangerous"
7
+ // Context keys consumed:
8
+ // - tool_name: String
9
+ // - tool_risk_score: Long (0-100)
10
+ // - tool_is_sensitive: Bool
11
+ // - tool_category: String ("safe" | "sensitive" | "dangerous")
12
12
  //
13
- // Category: agentic-security
13
+ // Compliance:
14
+ // - OWASP LLM06, OWASP ASI02
15
+ //
16
+ // Category: agent-security
14
17
  // Namespace: Guardrails
15
18
  // =============================================================================
16
19
 
17
- @id("tool-block-dangerous")
20
+ @id("agent-security.block-dangerous-tool")
18
21
  @name("Block dangerous tools")
19
- @description("Forbids tools classified as dangerous (risk > 85)")
22
+ @description("Blocks call_tool when tool_risk_score >= 86 or tool_category equals \"dangerous\".")
20
23
  @severity("critical")
21
- @tags("tools,agentic,security")
24
+ @tags("category:agent-security,detection:aggregate,surface:call-tool,owasp:llm06,owasp:asi02")
25
+ @reject_message("Tool execution blocked: tool is classified as dangerous or scored a high risk.")
22
26
  forbid (
23
27
  principal,
24
28
  action == Guardrails::Action::"call_tool",
25
29
  resource
26
- ) when {
27
- (context has tool_risk_score && context.tool_risk_score > 85) ||
30
+ )
31
+ when {
32
+ (context has tool_risk_score && context.tool_risk_score >= 86) ||
28
33
  (context has tool_category && context.tool_category == "dangerous")
29
34
  };
30
35
 
31
- @id("tool-block-shell-commands")
36
+ @id("agent-security.block-shell-commands")
32
37
  @name("Block shell command execution")
33
- @description("Forbids direct shell/execute_command tool calls")
38
+ @description("Blocks call_tool when tool_name equals \"shell\" or \"execute_command\".")
34
39
  @severity("high")
35
- @tags("tools,shell,security")
40
+ @tags("category:agent-security,threat:command-injection,detection:rule,surface:call-tool,mitre:t1059")
41
+ @reject_message("Tool execution blocked: direct shell or command execution is not permitted.")
36
42
  forbid (
37
43
  principal,
38
44
  action == Guardrails::Action::"call_tool",
39
45
  resource
40
- ) when {
46
+ )
47
+ when {
41
48
  context has tool_name &&
42
- (context.tool_name == "shell" ||
43
- context.tool_name == "execute_command")
49
+ (context.tool_name == "shell" || context.tool_name == "execute_command")
44
50
  };
45
51
 
46
- @id("tool-block-sensitive-high-risk")
47
- @name("Block high-risk sensitive tools")
48
- @description("Forbids sensitive tool calls with elevated risk scores")
52
+ @id("agent-security.block-sensitive-high-risk")
53
+ @name("Block sensitive high-risk tools")
54
+ @description("Blocks call_tool when tool_is_sensitive is true and tool_risk_score >= 71.")
49
55
  @severity("high")
50
- @tags("tools,agentic,security")
56
+ @tags("category:agent-security,detection:aggregate,surface:call-tool,owasp:llm06")
57
+ @reject_message("Tool execution blocked: a sensitive tool was called with elevated risk score.")
51
58
  forbid (
52
59
  principal,
53
60
  action == Guardrails::Action::"call_tool",
54
61
  resource
55
- ) when {
62
+ )
63
+ when {
56
64
  context has tool_is_sensitive && context.tool_is_sensitive == true &&
57
- context has tool_risk_score && context.tool_risk_score > 70
65
+ context has tool_risk_score && context.tool_risk_score >= 71
58
66
  };