@highflame/policy 2.1.35 → 2.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/_schemas/ai_gateway/context.json +431 -11
  2. package/_schemas/ai_gateway/schema.cedarschema +91 -11
  3. package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
  4. package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
  5. package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
  6. package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
  7. package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
  8. package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
  9. package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
  10. package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
  11. package/_schemas/ai_gateway/templates/templates.json +52 -50
  12. package/_schemas/guardrails/context.json +12 -12
  13. package/_schemas/guardrails/schema.cedarschema +12 -12
  14. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
  15. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
  16. package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
  17. package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
  18. package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
  19. package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
  20. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
  21. package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
  22. package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
  23. package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
  24. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
  25. package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
  26. package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
  27. package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
  28. package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
  29. package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
  30. package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
  31. package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
  32. package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
  33. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
  34. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
  35. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
  36. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
  37. package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
  38. package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
  39. package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
  40. package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
  41. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
  42. package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
  43. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
  44. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
  45. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
  46. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
  47. package/_schemas/guardrails/templates/templates.json +198 -217
  48. package/_schemas/overwatch/context.json +14 -14
  49. package/_schemas/overwatch/schema.cedarschema +12 -12
  50. package/_schemas/sentry/context.json +11 -11
  51. package/_schemas/sentry/schema.cedarschema +11 -11
  52. package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
  53. package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
  54. package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
  55. package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
  56. package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
  57. package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
  58. package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
  59. package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
  60. package/_schemas/sentry/templates/templates.json +53 -43
  61. package/dist/ai_gateway-context.gen.d.ts +18 -4
  62. package/dist/ai_gateway-context.gen.js +18 -4
  63. package/dist/ai_gateway-defaults.gen.d.ts +4 -1
  64. package/dist/ai_gateway-defaults.gen.js +398 -320
  65. package/dist/builder.d.ts +2 -1
  66. package/dist/builder.js +5 -2
  67. package/dist/guardrails-context.gen.d.ts +5 -5
  68. package/dist/guardrails-context.gen.js +5 -5
  69. package/dist/guardrails-defaults.gen.d.ts +4 -1
  70. package/dist/guardrails-defaults.gen.js +2130 -1903
  71. package/dist/overwatch-context.gen.d.ts +5 -5
  72. package/dist/overwatch-context.gen.js +5 -5
  73. package/dist/overwatch-defaults.gen.d.ts +4 -1
  74. package/dist/overwatch-defaults.gen.js +560 -566
  75. package/dist/sentry-context.gen.d.ts +3 -3
  76. package/dist/sentry-context.gen.js +3 -3
  77. package/dist/sentry-defaults.gen.d.ts +4 -1
  78. package/dist/sentry-defaults.gen.js +392 -453
  79. package/dist/service-schemas.gen.d.ts +4 -4
  80. package/dist/service-schemas.gen.js +249 -99
  81. package/package.json +1 -1
@@ -1,66 +1,74 @@
1
1
  // =============================================================================
2
2
  // Advanced Detection — Granular Secrets
3
3
  // =============================================================================
4
- // Blocks specific high-risk credential types and API tokens using granular
5
- // secret_types matching. Goes beyond the boolean contains_secrets detection
6
- // to identify and block cloud provider keys, GitHub tokens, SSH keys,
7
- // database credentials, and API tokens.
4
+ // Blocks specific high-risk credential types using granular secret_types
5
+ // matching. Goes beyond the boolean secrets_detected flag to identify
6
+ // cloud provider keys, GitHub tokens, SSH keys, database credentials,
7
+ // and general API tokens.
8
8
  //
9
- // These policies benefit any Guardrails deployment — not just coding agents.
10
- //
11
- // Adapted from Overwatch granular secret type policies for Guardrails namespace.
9
+ // Context keys consumed:
10
+ // - secret_types: Set<String>
12
11
  //
13
12
  // Compliance:
14
- // NIST 800-53 IA-5 (Authenticator Management)
15
- // NIST 800-53 SC-28 (Protection of Information at Rest)
16
- // MITRE ATT&CK T1552 (Unsecured Credentials)
17
- // CIS Benchmark 1.4 (Secrets Management)
13
+ // - NIST 800-53 IA-5, SC-28; MITRE ATT&CK T1552; CIS 1.4
18
14
  //
19
- // Category: security
15
+ // Category: data-protection
20
16
  // Namespace: Guardrails
21
17
  // =============================================================================
22
18
 
23
- // Block high-risk credential types across all actions
24
- @id("detection-block-high-risk-secret-types")
19
+ @id("data-protection.advanced-block-high-risk-secrets")
25
20
  @name("Block high-risk credential types")
26
- @description("Block content containing cloud provider keys (AWS, GCP, Azure), GitHub tokens, SSH private keys, or database connection strings. These credential types pose the highest exfiltration risk and must never pass through AI agents.")
21
+ @description("Blocks process_prompt, call_tool, read_file, and write_file when secret_types contains a cloud, GitHub, SSH, or database credential.")
27
22
  @severity("critical")
28
- @tags("profile,advanced-detection,secrets,aws,gcp,azure,github,ssh,database,nist-ia-5,mitre-t1552")
29
- @reject_message("Content blocked: high-risk credentials detected (cloud provider keys, GitHub tokens, SSH keys, or database credentials). Use a secrets manager — never pass credentials through AI agents.")
23
+ @tags("category:data-protection,threat:secrets,detection:rule,owasp:llm06")
24
+ @reject_message("Content blocked: high-risk credentials detected (cloud keys, GitHub tokens, SSH keys, or database URLs).")
30
25
  forbid (
31
26
  principal,
32
- action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
27
+ action in [
28
+ Guardrails::Action::"process_prompt",
29
+ Guardrails::Action::"call_tool",
30
+ Guardrails::Action::"read_file",
31
+ Guardrails::Action::"write_file"
32
+ ],
33
33
  resource
34
34
  )
35
35
  when {
36
36
  context has secret_types &&
37
- (context.secret_types.contains("aws_access_key") ||
38
- context.secret_types.contains("aws_secret_key") ||
39
- context.secret_types.contains("gcp_service_account") ||
40
- context.secret_types.contains("azure_client_secret") ||
41
- context.secret_types.contains("github_token") ||
42
- context.secret_types.contains("github_pat") ||
43
- context.secret_types.contains("ssh_private_key") ||
44
- context.secret_types.contains("database_url"))
37
+ (
38
+ context.secret_types.contains("aws_access_key") ||
39
+ context.secret_types.contains("aws_secret_key") ||
40
+ context.secret_types.contains("gcp_service_account") ||
41
+ context.secret_types.contains("azure_client_secret") ||
42
+ context.secret_types.contains("github_token") ||
43
+ context.secret_types.contains("github_pat") ||
44
+ context.secret_types.contains("ssh_private_key") ||
45
+ context.secret_types.contains("database_url")
46
+ )
45
47
  };
46
48
 
47
- // Block API keys and bearer tokens across all actions
48
- @id("detection-block-api-keys")
49
+ @id("data-protection.advanced-block-api-tokens")
49
50
  @name("Block API keys and bearer tokens")
50
- @description("Block content containing generic API keys, bearer tokens, JWT tokens, and OAuth credentials. These are the most commonly leaked credential types in AI agent interactions.")
51
+ @description("Blocks process_prompt, call_tool, read_file, and write_file when secret_types contains api_key, bearer_token, jwt_token, or OAuth credentials.")
51
52
  @severity("high")
52
- @tags("profile,advanced-detection,secrets,api-key,bearer,jwt,oauth,nist-ia-5")
53
- @reject_message("Content blocked: API keys, bearer tokens, or OAuth credentials detected. These must never be passed through AI agent prompts or tool calls.")
53
+ @tags("category:data-protection,threat:secrets,detection:rule,owasp:llm06")
54
+ @reject_message("Content blocked: API keys, bearer tokens, or OAuth credentials detected.")
54
55
  forbid (
55
56
  principal,
56
- action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
57
+ action in [
58
+ Guardrails::Action::"process_prompt",
59
+ Guardrails::Action::"call_tool",
60
+ Guardrails::Action::"read_file",
61
+ Guardrails::Action::"write_file"
62
+ ],
57
63
  resource
58
64
  )
59
65
  when {
60
66
  context has secret_types &&
61
- (context.secret_types.contains("api_key") ||
62
- context.secret_types.contains("bearer_token") ||
63
- context.secret_types.contains("jwt_token") ||
64
- context.secret_types.contains("oauth_token") ||
65
- context.secret_types.contains("oauth_secret"))
67
+ (
68
+ context.secret_types.contains("api_key") ||
69
+ context.secret_types.contains("bearer_token") ||
70
+ context.secret_types.contains("jwt_token") ||
71
+ context.secret_types.contains("oauth_token") ||
72
+ context.secret_types.contains("oauth_secret")
73
+ )
66
74
  };
@@ -1,30 +1,25 @@
1
1
  // =============================================================================
2
2
  // Advanced Detection — Threat Severity
3
3
  // =============================================================================
4
- // Severity-based catch-all policy that blocks any content flagged as critical
5
- // severity by detection engines. Acts as a safety net behind all other policies
6
- // — if any detector reports critical severity, the content is blocked regardless
7
- // of whether a specific category policy caught it.
4
+ // Catch-all that blocks any content flagged as critical severity by any
5
+ // detector. Acts as a safety net behind all other policies.
8
6
  //
9
- // This policy benefits any Guardrails deployment — not just coding agents.
10
- //
11
- // Adapted from Overwatch threat severity aggregation for Guardrails namespace.
7
+ // Context keys consumed:
8
+ // - highest_severity: String
12
9
  //
13
10
  // Compliance:
14
- // NIST 800-53 SI-3 (Malicious Code Protection)
15
- // NIST 800-53 SI-4 (Information System Monitoring)
11
+ // - NIST 800-53 SI-3, SI-4
16
12
  //
17
- // Category: security
13
+ // Category: security
18
14
  // Namespace: Guardrails
19
15
  // =============================================================================
20
16
 
21
- // Block any content with critical severity threats
22
- @id("detection-block-critical-severity")
23
- @name("Block critical severity threats")
24
- @description("Block all content when any detection engine reports critical severity. This is the ultimate catch-all — critical threats are blocked regardless of type or source. Acts as a safety net behind all other policies.")
17
+ @id("security.advanced-block-critical-severity")
18
+ @name("Block critical-severity threats")
19
+ @description("Blocks process_prompt when highest_severity equals \"critical\".")
25
20
  @severity("critical")
26
- @tags("profile,advanced-detection,severity,critical,catch-all,nist-si-3")
27
- @reject_message("Your content was blocked because security scanners detected a critical-severity threat. This content cannot be processed.")
21
+ @tags("category:security,detection:aggregate,surface:process-prompt,posture:catch-all,compliance:nist-si-3")
22
+ @reject_message("Prompt blocked: a critical-severity threat was reported by at least one detector.")
28
23
  forbid (
29
24
  principal,
30
25
  action == Guardrails::Action::"process_prompt",
@@ -1,22 +1,35 @@
1
1
  // =============================================================================
2
2
  // Chat Assistant — Privacy
3
3
  // =============================================================================
4
- // PII protection for customer-facing chatbots.
5
- // Blocks PII in both user inputs and assistant outputs.
4
+ // PII protection for customer-facing chatbots. Blocks PII in both directions
5
+ // (input + output) across prompts, tool calls, and file operations.
6
6
  //
7
- // Category: privacy
7
+ // Context keys consumed:
8
+ // - pii_detected: Bool
9
+ //
10
+ // Compliance:
11
+ // - GDPR Art. 32, HIPAA, PCI-DSS
12
+ //
13
+ // Category: privacy
8
14
  // Namespace: Guardrails
9
15
  // =============================================================================
10
16
 
11
- @id("chat-pii-block-input-output")
12
- @name("Block PII in both directions")
13
- @description("Blocks PII in both user inputs and assistant outputs for chat applications")
17
+ @id("privacy.chat-block-pii")
18
+ @name("Block PII in chat (both directions)")
19
+ @description("Blocks process_prompt, call_tool, read_file, and write_file when pii_detected is true.")
14
20
  @severity("high")
15
- @tags("profile,chat-assistant,pii,privacy")
21
+ @tags("category:privacy,threat:pii,detection:rule,compliance:gdpr,compliance:hipaa")
22
+ @reject_message("Request blocked: personally identifiable information was detected.")
16
23
  forbid (
17
24
  principal,
18
- action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
25
+ action in [
26
+ Guardrails::Action::"process_prompt",
27
+ Guardrails::Action::"call_tool",
28
+ Guardrails::Action::"read_file",
29
+ Guardrails::Action::"write_file"
30
+ ],
19
31
  resource
20
- ) when {
32
+ )
33
+ when {
21
34
  context has pii_detected && context.pii_detected == true
22
35
  };
@@ -1,35 +1,47 @@
1
1
  // =============================================================================
2
2
  // Chat Assistant — Security
3
3
  // =============================================================================
4
- // Aggressive injection and jailbreak defense for customer-facing chatbots.
5
- // Lower thresholds than defaults — public-facing chat is high-risk for attacks.
4
+ // Aggressive injection and jailbreak defence for customer-facing chatbots.
5
+ // Lower thresholds than the org-wide defaults — public-facing chat has the
6
+ // highest adversarial surface.
6
7
  //
7
- // Category: security
8
+ // Context keys consumed:
9
+ // - injection_score: Long (0-100)
10
+ // - jailbreak_score: Long (0-100)
11
+ //
12
+ // Compliance:
13
+ // - OWASP LLM01, OWASP LLM02
14
+ //
15
+ // Category: security
8
16
  // Namespace: Guardrails
9
17
  // =============================================================================
10
18
 
11
- @id("chat-injection-lower-threshold")
12
- @name("Aggressive injection defense for chat")
13
- @description("Lower injection threshold for public-facing chat — blocks above 70")
19
+ @id("security.chat-block-injection")
20
+ @name("Block injection (chat profile)")
21
+ @description("Blocks process_prompt and call_tool when injection_score >= 71 (lower threshold for public chat).")
14
22
  @severity("high")
15
- @tags("profile,chat-assistant,injection,security")
23
+ @tags("category:security,threat:injection,detection:ml,owasp:llm01")
24
+ @reject_message("Prompt blocked: prompt injection detected with high confidence (chat profile).")
16
25
  forbid (
17
26
  principal,
18
27
  action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
19
28
  resource
20
- ) when {
21
- context has injection_confidence && context.injection_confidence > 70
29
+ )
30
+ when {
31
+ context has injection_score && context.injection_score >= 71
22
32
  };
23
33
 
24
- @id("chat-jailbreak-lower-threshold")
25
- @name("Aggressive jailbreak defense for chat")
26
- @description("Lower jailbreak threshold for public-facing chat — blocks above 65")
34
+ @id("security.chat-block-jailbreak")
35
+ @name("Block jailbreak (chat profile)")
36
+ @description("Blocks process_prompt when jailbreak_score >= 66 (lower threshold for public chat).")
27
37
  @severity("high")
28
- @tags("profile,chat-assistant,jailbreak,security")
38
+ @tags("category:security,threat:jailbreak,detection:ml,owasp:llm02")
39
+ @reject_message("Prompt blocked: jailbreak attempt detected with high confidence (chat profile).")
29
40
  forbid (
30
41
  principal,
31
42
  action == Guardrails::Action::"process_prompt",
32
43
  resource
33
- ) when {
34
- context has jailbreak_confidence && context.jailbreak_confidence > 65
44
+ )
45
+ when {
46
+ context has jailbreak_score && context.jailbreak_score >= 66
35
47
  };
@@ -1,43 +1,58 @@
1
1
  // =============================================================================
2
2
  // Chat Assistant — Trust & Safety
3
3
  // =============================================================================
4
- // Strict content moderation for customer-facing chatbots.
5
- // Lower toxicity thresholds and topic restrictions for public-facing content.
4
+ // Strict content moderation for customer-facing chatbots. Lower toxicity
5
+ // thresholds than defaults plus topic restrictions for public-facing
6
+ // content.
6
7
  //
7
- // Category: trust_safety
8
+ // Context keys consumed:
9
+ // - violence_score, hate_speech_score, sexual_score, profanity_score: Long
10
+ // - content_topics: Set<String>
11
+ // - topic_confidence: Long (0-100)
12
+ //
13
+ // Compliance:
14
+ // - EU AI Act, ISO 42001
15
+ //
16
+ // Category: trust-safety
8
17
  // Namespace: Guardrails
9
18
  // =============================================================================
10
19
 
11
- @id("chat-toxicity-strict")
12
- @name("Strict toxicity for chat")
13
- @description("Lower thresholds for customer-facing content blocks toxicity above 70")
20
+ @id("trust-safety.chat-block-toxicity")
21
+ @name("Block toxicity (chat profile)")
22
+ @description("Blocks process_prompt when any toxicity score is >= 71 (lower threshold for public chat).")
14
23
  @severity("critical")
15
- @tags("profile,chat-assistant,toxicity,trust-safety")
24
+ @tags("category:trust-safety,threat:harmful,threat:hate-speech,detection:ml,compliance:eu-ai-act")
25
+ @reject_message("Prompt blocked: toxic content detected at the chat-profile threshold.")
16
26
  forbid (
17
27
  principal,
18
28
  action == Guardrails::Action::"process_prompt",
19
29
  resource
20
- ) when {
21
- (context has violence_score && context.violence_score > 70) ||
22
- (context has hate_speech_score && context.hate_speech_score > 70) ||
23
- (context has sexual_score && context.sexual_score > 70) ||
24
- (context has profanity_score && context.profanity_score > 70)
30
+ )
31
+ when {
32
+ (context has violence_score && context.violence_score >= 71) ||
33
+ (context has hate_speech_score && context.hate_speech_score >= 71) ||
34
+ (context has sexual_score && context.sexual_score >= 71) ||
35
+ (context has profanity_score && context.profanity_score >= 71)
25
36
  };
26
37
 
27
- @id("chat-topic-restriction")
28
- @name("Block restricted topics in chat")
29
- @description("Prevents chat assistants from discussing dangerous or regulated topics")
38
+ @id("trust-safety.chat-block-restricted-topics")
39
+ @name("Block restricted topics (chat profile)")
40
+ @description("Blocks process_prompt and call_tool when topic_confidence >= 71 and content_topics contains a restricted category.")
30
41
  @severity("high")
31
- @tags("profile,chat-assistant,semantic,compliance")
42
+ @tags("category:trust-safety,threat:harmful,detection:ml,compliance:eu-ai-act")
43
+ @reject_message("Prompt blocked: content discusses a topic restricted for chat assistants.")
32
44
  forbid (
33
45
  principal,
34
46
  action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
35
47
  resource
36
- ) when {
37
- context has topic_confidence && context.topic_confidence > 70 &&
48
+ )
49
+ when {
50
+ context has topic_confidence && context.topic_confidence >= 71 &&
38
51
  context has content_topics &&
39
- (context.content_topics.contains("weapons_manufacturing") ||
40
- context.content_topics.contains("illegal_activity") ||
41
- context.content_topics.contains("controlled_substances") ||
42
- context.content_topics.contains("financial_fraud"))
52
+ (
53
+ context.content_topics.contains("weapons_manufacturing") ||
54
+ context.content_topics.contains("illegal_activity") ||
55
+ context.content_topics.contains("controlled_substances") ||
56
+ context.content_topics.contains("financial_fraud")
57
+ )
43
58
  };
@@ -1,109 +1,130 @@
1
1
  // =============================================================================
2
2
  // Code Agent — Agentic Security
3
3
  // =============================================================================
4
- // Tool risk controls, shell execution blocking, loop detection,
5
- // exfiltration prevention, and budget enforcement for coding assistants.
4
+ // Tool risk controls, shell execution blocking, loop detection, exfiltration
5
+ // prevention, and budget enforcement for coding assistants.
6
6
  //
7
- // Category: agentic_security
7
+ // Context keys consumed:
8
+ // - tool_risk_score, tool_category, tool_is_sensitive, tool_name
9
+ // - loop_detected, loop_count
10
+ // - suspicious_pattern, pattern_type
11
+ // - sequence_risk: Long (0-100)
12
+ // - budget_exceeded: Bool
13
+ //
14
+ // Compliance:
15
+ // - OWASP LLM06, OWASP ASI02, MITRE T1059
16
+ //
17
+ // Category: agent-security
8
18
  // Namespace: Guardrails
9
19
  // =============================================================================
10
20
 
11
- @id("code-block-dangerous-tools")
12
- @name("Block dangerous tool execution")
13
- @description("Forbids tools classified as dangerous or with very high risk scores")
21
+ @id("agent-security.code-block-dangerous-tools")
22
+ @name("Block dangerous tools (code profile)")
23
+ @description("Blocks call_tool when tool_risk_score >= 86 or tool_category equals \"dangerous\".")
14
24
  @severity("critical")
15
- @tags("profile,code-agent,tools,agentic")
25
+ @tags("category:agent-security,detection:aggregate,surface:call-tool,owasp:llm06")
26
+ @reject_message("Tool execution blocked: tool is classified as dangerous or scored high risk (code profile).")
16
27
  forbid (
17
28
  principal,
18
29
  action == Guardrails::Action::"call_tool",
19
30
  resource
20
- ) when {
21
- (context has tool_risk_score && context.tool_risk_score > 85) ||
31
+ )
32
+ when {
33
+ (context has tool_risk_score && context.tool_risk_score >= 86) ||
22
34
  (context has tool_category && context.tool_category == "dangerous")
23
35
  };
24
36
 
25
- @id("code-block-shell-execution")
26
- @name("Block direct shell commands")
27
- @description("Forbids direct shell and command execution tools")
37
+ @id("agent-security.code-block-shell-execution")
38
+ @name("Block shell commands (code profile)")
39
+ @description("Blocks call_tool when tool_name is a shell tool.")
28
40
  @severity("high")
29
- @tags("profile,code-agent,tools,shell")
41
+ @tags("category:agent-security,threat:command-injection,detection:rule,surface:call-tool,mitre:t1059")
42
+ @reject_message("Tool execution blocked: direct shell or command execution is not permitted for the code profile.")
30
43
  forbid (
31
44
  principal,
32
45
  action == Guardrails::Action::"call_tool",
33
46
  resource
34
- ) when {
47
+ )
48
+ when {
35
49
  context has tool_name &&
36
- (context.tool_name == "shell" ||
37
- context.tool_name == "execute_command" ||
38
- context.tool_name == "bash")
50
+ (context.tool_name == "shell" || context.tool_name == "execute_command" || context.tool_name == "bash")
39
51
  };
40
52
 
41
- @id("code-block-sensitive-tools")
42
- @name("Block sensitive tools with elevated risk")
43
- @description("Forbids sensitive tool calls with risk above 70")
53
+ @id("agent-security.code-block-sensitive-tools")
54
+ @name("Block sensitive tools with elevated risk (code profile)")
55
+ @description("Blocks call_tool when tool_is_sensitive is true and tool_risk_score >= 71.")
44
56
  @severity("high")
45
- @tags("profile,code-agent,tools,security")
57
+ @tags("category:agent-security,detection:aggregate,surface:call-tool,owasp:llm06")
58
+ @reject_message("Tool execution blocked: sensitive tool with elevated risk (code profile).")
46
59
  forbid (
47
60
  principal,
48
61
  action == Guardrails::Action::"call_tool",
49
62
  resource
50
- ) when {
63
+ )
64
+ when {
51
65
  context has tool_is_sensitive && context.tool_is_sensitive == true &&
52
- context has tool_risk_score && context.tool_risk_score > 70
66
+ context has tool_risk_score && context.tool_risk_score >= 71
53
67
  };
54
68
 
55
- @id("code-block-loops")
56
- @name("Block tool call loops")
57
- @description("Stops infinite tool call loops in agentic workflows")
69
+ @id("agent-security.code-block-loops")
70
+ @name("Block tool call loops (code profile)")
71
+ @description("Blocks call_tool when loop_detected is true and loop_count >= 6.")
58
72
  @severity("high")
59
- @tags("profile,code-agent,agentic,loops")
73
+ @tags("category:agent-security,threat:loop,detection:aggregate,surface:call-tool")
74
+ @reject_message("Tool execution blocked: infinite tool-call loop detected (code profile).")
60
75
  forbid (
61
76
  principal,
62
77
  action == Guardrails::Action::"call_tool",
63
78
  resource
64
- ) when {
79
+ )
80
+ when {
65
81
  context has loop_detected && context.loop_detected == true &&
66
- context has loop_count && context.loop_count > 5
82
+ context has loop_count && context.loop_count >= 6
67
83
  };
68
84
 
69
- @id("code-block-exfiltration")
70
- @name("Block data exfiltration patterns")
71
- @description("Detects and blocks read send patterns indicating data theft")
85
+ @id("agent-security.code-block-exfiltration")
86
+ @name("Block data exfiltration patterns (code profile)")
87
+ @description("Blocks call_tool when suspicious_pattern is true and pattern_type matches an exfiltration class.")
72
88
  @severity("critical")
73
- @tags("profile,code-agent,agentic,exfiltration")
89
+ @tags("category:agent-security,threat:exfiltration,detection:rule,surface:call-tool")
90
+ @reject_message("Tool execution blocked: data exfiltration pattern detected (code profile).")
74
91
  forbid (
75
92
  principal,
76
93
  action == Guardrails::Action::"call_tool",
77
94
  resource
78
- ) when {
95
+ )
96
+ when {
79
97
  context has suspicious_pattern && context.suspicious_pattern == true &&
80
98
  context has pattern_type &&
81
- (context.pattern_type == "data_exfiltration" ||
82
- context.pattern_type == "secret_exfiltration")
99
+ (context.pattern_type == "data_exfiltration" || context.pattern_type == "secret_exfiltration")
83
100
  };
84
101
 
85
- @id("code-block-high-sequence-risk")
86
- @name("Block high-risk action sequences")
87
- @description("Forbids suspicious multi-step tool sequences with risk above 75")
102
+ @id("agent-security.code-block-sequence-risk")
103
+ @name("Block high-risk action sequences (code profile)")
104
+ @description("Blocks call_tool when sequence_risk >= 76.")
88
105
  @severity("high")
89
- @tags("profile,code-agent,agentic,patterns")
106
+ @tags("category:agent-security,detection:aggregate,surface:call-tool")
107
+ @reject_message("Tool execution blocked: action sequence scored a high cumulative risk (code profile).")
90
108
  forbid (
91
109
  principal,
92
110
  action == Guardrails::Action::"call_tool",
93
111
  resource
94
- ) when {
95
- context has sequence_risk && context.sequence_risk > 75
112
+ )
113
+ when {
114
+ context has sequence_risk && context.sequence_risk >= 76
96
115
  };
97
116
 
98
- @id("code-block-budget-exceeded")
99
- @name("Block on budget exceeded")
100
- @description("Stops agent execution when token budget is exhausted")
117
+ @id("agent-security.code-block-budget-exceeded")
118
+ @name("Block when budget exceeded (code profile)")
119
+ @description("Blocks call_tool when budget_exceeded is true.")
101
120
  @severity("medium")
102
- @tags("profile,code-agent,budget,cost-control")
121
+ @tags("category:agent-security,threat:budget-violation,detection:rule,surface:call-tool")
122
+ @reject_message("Tool execution blocked: agent has exhausted its token or cost budget (code profile).")
103
123
  forbid (
104
124
  principal,
105
125
  action == Guardrails::Action::"call_tool",
106
126
  resource
107
- ) when {
127
+ )
128
+ when {
108
129
  context has budget_exceeded && context.budget_exceeded == true
109
130
  };
@@ -8,48 +8,44 @@
8
8
  // (zero-width joiners, bidirectional overrides, tag characters) indicate
9
9
  // payload injection or encoding evasion attempts.
10
10
  //
11
- // Adapted from Overwatch encoding attack policies for Guardrails namespace.
11
+ // Ref: EchoLeak CVE-2025-32711, Rules File Backdoor (Pillar Security)
12
12
  //
13
- // Ref: EchoLeak CVE-2025-32711 (invisible prompt injection via Unicode)
14
- // Rules File Backdoor (Pillar Security, March 2025)
13
+ // Context keys consumed:
14
+ // - invisible_chars_detected: Bool
15
15
  //
16
16
  // Compliance:
17
- // OWASP LLM01 (Prompt Injection) encoding evasion
18
- // OWASP ASI01 (Agent Goal Hijack) — hidden instructions
19
- // NIST 800-53 SI-10 (Information Input Validation)
17
+ // - OWASP LLM01 (encoding evasion), OWASP ASI01, NIST 800-53 SI-10
20
18
  //
21
- // Category: security
19
+ // Category: security
22
20
  // Namespace: Guardrails
23
21
  // =============================================================================
24
22
 
25
- // Block tool calls with invisible characters in arguments
26
- @id("code-block-invisible-tool-args")
27
- @name("Block invisible characters in tool calls")
28
- @description("Block tool execution when invisible Unicode characters are detected in tool arguments or content. Tool arguments should be plain text/JSON — invisible characters in tool calls are almost certainly malicious payload injection.")
23
+ @id("security.code-block-invisible-tool-args")
24
+ @name("Block invisible characters in tool calls (code profile)")
25
+ @description("Blocks call_tool when invisible_chars_detected is true.")
29
26
  @severity("critical")
30
- @tags("profile,code-agent,encoding,unicode,invisible-chars,tools,owasp-asi01")
31
- @reject_message("Tool execution blocked: invisible Unicode characters detected in tool arguments. Tool calls should contain only plain text invisible characters indicate payload injection or encoding evasion.")
27
+ @tags("category:security,threat:invisible-chars,threat:injection,detection:pattern,surface:call-tool,owasp:asi01")
28
+ @reject_message("Tool execution blocked: invisible Unicode characters detected in tool arguments — encoding evasion or payload injection.")
32
29
  forbid (
33
30
  principal,
34
31
  action == Guardrails::Action::"call_tool",
35
32
  resource
36
33
  )
37
34
  when {
38
- context has contains_invisible_chars && context.contains_invisible_chars == true
35
+ context has invisible_chars_detected && context.invisible_chars_detected == true
39
36
  };
40
37
 
41
- // Block file writes with invisible characters
42
- @id("code-block-invisible-file-write")
43
- @name("Block invisible characters in file writes")
44
- @description("Block file writes when invisible Unicode characters are detected. Prevents persistence of invisible payloads in source code, config files, or documentation where they could later be processed by AI agents.")
38
+ @id("security.code-block-invisible-file-write")
39
+ @name("Block invisible characters in file writes (code profile)")
40
+ @description("Blocks write_file when invisible_chars_detected is true.")
45
41
  @severity("high")
46
- @tags("profile,code-agent,encoding,unicode,invisible-chars,file-write,owasp-asi01")
47
- @reject_message("File write blocked: invisible Unicode characters detected in content. Writing invisible characters to files can create persistent backdoors that affect AI agents processing those files later.")
42
+ @tags("category:security,threat:invisible-chars,detection:pattern,surface:write-file,owasp:asi01")
43
+ @reject_message("File write blocked: invisible Unicode characters detected persistent invisible payloads can backdoor downstream agents.")
48
44
  forbid (
49
45
  principal,
50
46
  action == Guardrails::Action::"write_file",
51
47
  resource
52
48
  )
53
49
  when {
54
- context has contains_invisible_chars && context.contains_invisible_chars == true
50
+ context has invisible_chars_detected && context.invisible_chars_detected == true
55
51
  };