@highflame/policy 2.1.35 → 2.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/_schemas/ai_gateway/context.json +431 -11
  2. package/_schemas/ai_gateway/schema.cedarschema +91 -11
  3. package/_schemas/ai_gateway/templates/defaults/agent_security.cedar +66 -43
  4. package/_schemas/ai_gateway/templates/defaults/baseline.cedar +9 -11
  5. package/_schemas/ai_gateway/templates/defaults/semantic.cedar +63 -40
  6. package/_schemas/ai_gateway/templates/defaults/tools.cedar +48 -36
  7. package/_schemas/ai_gateway/templates/llm_default_allow.cedar +9 -10
  8. package/_schemas/ai_gateway/templates/mcp_server_allowlist.cedar +22 -14
  9. package/_schemas/ai_gateway/templates/mcp_tool_permissions.cedar +29 -27
  10. package/_schemas/ai_gateway/templates/pii_redaction.cedar +38 -33
  11. package/_schemas/ai_gateway/templates/templates.json +52 -50
  12. package/_schemas/guardrails/context.json +12 -12
  13. package/_schemas/guardrails/schema.cedarschema +12 -12
  14. package/_schemas/guardrails/templates/defaults/agent_identity.cedar +60 -56
  15. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +83 -58
  16. package/_schemas/guardrails/templates/defaults/baseline.cedar +9 -12
  17. package/_schemas/guardrails/templates/defaults/injection.cedar +48 -36
  18. package/_schemas/guardrails/templates/defaults/pii.cedar +27 -20
  19. package/_schemas/guardrails/templates/defaults/secrets.cedar +39 -22
  20. package/_schemas/guardrails/templates/defaults/security_patterns.cedar +38 -25
  21. package/_schemas/guardrails/templates/defaults/semantic.cedar +47 -31
  22. package/_schemas/guardrails/templates/defaults/tool_risk.cedar +34 -26
  23. package/_schemas/guardrails/templates/defaults/toxicity.cedar +57 -47
  24. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +60 -43
  25. package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +29 -42
  26. package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +43 -57
  27. package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +40 -57
  28. package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +48 -62
  29. package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +40 -56
  30. package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +24 -34
  31. package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +45 -37
  32. package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +11 -16
  33. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -9
  34. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +27 -15
  35. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +37 -22
  36. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +68 -47
  37. package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +17 -21
  38. package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +74 -73
  39. package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +13 -9
  40. package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +36 -58
  41. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +22 -15
  42. package/_schemas/guardrails/templates/profiles/data_pipeline/data_protection.cedar +52 -0
  43. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +41 -18
  44. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +18 -36
  45. package/_schemas/guardrails/templates/profiles/multi_agent/agent_safety.cedar +86 -79
  46. package/_schemas/guardrails/templates/profiles/multi_agent/agent_trust.cedar +73 -70
  47. package/_schemas/guardrails/templates/templates.json +198 -217
  48. package/_schemas/overwatch/context.json +14 -14
  49. package/_schemas/overwatch/schema.cedarschema +12 -12
  50. package/_schemas/sentry/context.json +11 -11
  51. package/_schemas/sentry/schema.cedarschema +11 -11
  52. package/_schemas/sentry/templates/defaults/baseline.cedar +8 -12
  53. package/_schemas/sentry/templates/defaults/clipboard.cedar +43 -42
  54. package/_schemas/sentry/templates/defaults/content_safety.cedar +38 -68
  55. package/_schemas/sentry/templates/defaults/file_safety.cedar +18 -26
  56. package/_schemas/sentry/templates/defaults/organization.cedar +10 -17
  57. package/_schemas/sentry/templates/defaults/pii.cedar +52 -73
  58. package/_schemas/sentry/templates/defaults/secrets.cedar +65 -58
  59. package/_schemas/sentry/templates/defaults/semantic.cedar +40 -59
  60. package/_schemas/sentry/templates/templates.json +53 -43
  61. package/dist/ai_gateway-context.gen.d.ts +18 -4
  62. package/dist/ai_gateway-context.gen.js +18 -4
  63. package/dist/ai_gateway-defaults.gen.d.ts +4 -1
  64. package/dist/ai_gateway-defaults.gen.js +398 -320
  65. package/dist/builder.d.ts +2 -1
  66. package/dist/builder.js +5 -2
  67. package/dist/guardrails-context.gen.d.ts +5 -5
  68. package/dist/guardrails-context.gen.js +5 -5
  69. package/dist/guardrails-defaults.gen.d.ts +4 -1
  70. package/dist/guardrails-defaults.gen.js +2130 -1903
  71. package/dist/overwatch-context.gen.d.ts +5 -5
  72. package/dist/overwatch-context.gen.js +5 -5
  73. package/dist/overwatch-defaults.gen.d.ts +4 -1
  74. package/dist/overwatch-defaults.gen.js +560 -566
  75. package/dist/sentry-context.gen.d.ts +3 -3
  76. package/dist/sentry-context.gen.js +3 -3
  77. package/dist/sentry-defaults.gen.d.ts +4 -1
  78. package/dist/sentry-defaults.gen.js +392 -453
  79. package/dist/service-schemas.gen.d.ts +4 -4
  80. package/dist/service-schemas.gen.js +249 -99
  81. package/package.json +1 -1
@@ -1,38 +1,44 @@
1
1
  // =============================================================================
2
- // Secrets Detection Policy (Default)
2
+ // Secrets Detection (Default)
3
3
  // =============================================================================
4
- // Block credential and secret leakage across messages and file uploads.
5
- // Shield SecretsDetector identifies 18+ secret types via regex.
4
+ // Blocks credentials and secrets across messages and file uploads. Covers
5
+ // general detection, high-risk credential types, common API keys, SSH and
6
+ // PEM key material, bulk exposure, and detector rule triggers.
6
7
  //
7
- // Paste-targeted secret rules live in clipboard.cedar.
8
+ // Context keys consumed:
9
+ // - secrets_detected: Bool
10
+ // - secret_types: Set<String>
11
+ // - secret_count: Long
12
+ // - detected_threats: Set<String>
8
13
  //
9
- // Category: secrets
14
+ // Compliance:
15
+ // - NIST 800-53 SC-28, IA-5; MITRE ATT&CK T1552
16
+ //
17
+ // Category: data-protection
10
18
  // Namespace: Sentry
11
19
  // =============================================================================
12
20
 
13
- // Block messages and uploads containing secrets
14
- @id("sentry-org-block-secrets-messages")
15
- @name("Block messages and uploads with secrets")
16
- @description("Block messages and file uploads when detection engines identify API keys, tokens, or credential patterns. First line of defense against accidental credential exposure in AI chat interactions.")
21
+ @id("data-protection.block-secrets")
22
+ @name("Block secrets in messages and uploads")
23
+ @description("Blocks process_prompt and upload_file when secrets_detected is true.")
17
24
  @severity("critical")
18
- @tags("secrets,credentials,messages,nist-sc-28,nist-ia-5")
19
- @reject_message("Content blocked: detected secrets such as API keys, tokens, or credentials. Remove all secrets before sending to AI services.")
25
+ @tags("category:data-protection,threat:secrets,detection:rule,owasp:llm06")
26
+ @reject_message("Content blocked: secrets such as API keys, tokens, or credentials detected remove before sending to AI services.")
20
27
  forbid (
21
28
  principal,
22
29
  action in [Sentry::Action::"process_prompt", Sentry::Action::"upload_file"],
23
30
  resource
24
31
  )
25
32
  when {
26
- context has contains_secrets && context.contains_secrets
33
+ context has secrets_detected && context.secrets_detected == true
27
34
  };
28
35
 
29
- // Block high-risk secret types across messages and file uploads
30
- @id("sentry-org-block-high-risk-secrets")
36
+ @id("data-protection.block-high-risk-secrets")
31
37
  @name("Block high-risk credential types")
32
- @description("Block messages and file uploads containing cloud provider keys (AWS, GCP, Azure), GitHub tokens, SSH private keys, or database connection strings. These credential types pose the highest exfiltration risk.")
38
+ @description("Blocks process_prompt and upload_file when secret_types contains a cloud, GitHub, or private-key credential.")
33
39
  @severity("critical")
34
- @tags("secrets,aws,github,ssh,cloud,nist-ia-5,mitre-t1552")
35
- @reject_message("Content blocked: high-risk credentials detected (cloud keys, GitHub tokens, SSH keys). Use a secrets manager — never share credentials with AI services.")
40
+ @tags("category:data-protection,threat:secrets,detection:rule,owasp:llm06")
41
+ @reject_message("Content blocked: high-risk credentials detected (cloud keys, GitHub tokens, SSH keys) — never share credentials with AI services.")
36
42
  forbid (
37
43
  principal,
38
44
  action in [Sentry::Action::"process_prompt", Sentry::Action::"upload_file"],
@@ -40,22 +46,23 @@ forbid (
40
46
  )
41
47
  when {
42
48
  context has secret_types &&
43
- (context.secret_types.contains("aws_access_key") ||
44
- context.secret_types.contains("aws_secret_key") ||
45
- context.secret_types.contains("gcp_service_account") ||
46
- context.secret_types.contains("azure_connection_string") ||
47
- context.secret_types.contains("github_token") ||
48
- context.secret_types.contains("github_fine_grained") ||
49
- context.secret_types.contains("private_key"))
49
+ (
50
+ context.secret_types.contains("aws_access_key") ||
51
+ context.secret_types.contains("aws_secret_key") ||
52
+ context.secret_types.contains("gcp_service_account") ||
53
+ context.secret_types.contains("azure_connection_string") ||
54
+ context.secret_types.contains("github_token") ||
55
+ context.secret_types.contains("github_fine_grained") ||
56
+ context.secret_types.contains("private_key")
57
+ )
50
58
  };
51
59
 
52
- // Block API keys and tokens across messages and file uploads
53
- @id("sentry-org-block-api-keys")
60
+ @id("data-protection.block-api-keys")
54
61
  @name("Block API keys and tokens")
55
- @description("Block messages and file uploads containing generic API keys, JWT tokens, and OAuth credentials. These are the most commonly leaked credential types when users interact with AI services.")
62
+ @description("Blocks process_prompt and upload_file when secret_types contains a generic API key, JWT, OpenAI, Anthropic, or Stripe key.")
56
63
  @severity("high")
57
- @tags("secrets,api-key,jwt,oauth,nist-ia-5")
58
- @reject_message("Content blocked: API keys, JWT tokens, or OAuth credentials detected. These must never be shared with AI services.")
64
+ @tags("category:data-protection,threat:secrets,detection:rule,owasp:llm06")
65
+ @reject_message("Content blocked: API keys, JWT tokens, or OAuth credentials detected.")
59
66
  forbid (
60
67
  principal,
61
68
  action in [Sentry::Action::"process_prompt", Sentry::Action::"upload_file"],
@@ -63,20 +70,21 @@ forbid (
63
70
  )
64
71
  when {
65
72
  context has secret_types &&
66
- (context.secret_types.contains("generic_api_key") ||
67
- context.secret_types.contains("jwt_token") ||
68
- context.secret_types.contains("openai_key") ||
69
- context.secret_types.contains("anthropic_key") ||
70
- context.secret_types.contains("stripe_key"))
73
+ (
74
+ context.secret_types.contains("generic_api_key") ||
75
+ context.secret_types.contains("jwt_token") ||
76
+ context.secret_types.contains("openai_key") ||
77
+ context.secret_types.contains("anthropic_key") ||
78
+ context.secret_types.contains("stripe_key")
79
+ )
71
80
  };
72
81
 
73
- // Block SSH key exposure across messages and file uploads
74
- @id("sentry-secrets-block-ssh-keys")
82
+ @id("data-protection.block-ssh-keys")
75
83
  @name("Block SSH key exposure")
76
- @description("Block when SSH private key content or SSH key file paths are detected. Covers messages and file uploads. AI chat services must not receive SSH credentials.")
84
+ @description("Blocks process_prompt and upload_file when secret_types contains \"ssh_key\".")
77
85
  @severity("critical")
78
- @tags("secrets,ssh,credentials,nist-ia-5,mitre-t1552")
79
- @reject_message("Blocked: SSH private key content or key file path detected. AI chat services must not receive SSH credentials.")
86
+ @tags("category:data-protection,threat:secrets,detection:rule,compliance:nist-si-3")
87
+ @reject_message("Content blocked: SSH private key content or key file path detected.")
80
88
  forbid (
81
89
  principal,
82
90
  action in [Sentry::Action::"process_prompt", Sentry::Action::"upload_file"],
@@ -86,13 +94,12 @@ when {
86
94
  context has secret_types && context.secret_types.contains("ssh_key")
87
95
  };
88
96
 
89
- // Block PEM/certificate key exposure across messages and file uploads
90
- @id("sentry-secrets-block-pem-keys")
91
- @name("Block PEM/certificate key exposure")
92
- @description("Block when PEM private key content or certificate key file paths (.pem, .key, .p12, .pfx) are detected. AI chat services must not receive certificate credentials.")
97
+ @id("data-protection.block-pem-keys")
98
+ @name("Block PEM and certificate keys")
99
+ @description("Blocks process_prompt and upload_file when secret_types contains \"pem_certificate\".")
93
100
  @severity("critical")
94
- @tags("secrets,certificates,pem,nist-ia-5,mitre-t1552")
95
- @reject_message("Blocked: PEM private key or certificate key file detected. AI chat services must not receive certificate credentials.")
101
+ @tags("category:data-protection,threat:secrets,detection:rule,compliance:nist-si-3")
102
+ @reject_message("Content blocked: PEM private key or certificate key file detected.")
96
103
  forbid (
97
104
  principal,
98
105
  action in [Sentry::Action::"process_prompt", Sentry::Action::"upload_file"],
@@ -102,13 +109,12 @@ when {
102
109
  context has secret_types && context.secret_types.contains("pem_certificate")
103
110
  };
104
111
 
105
- // Block bulk secret exposure
106
- @id("sentry-org-block-bulk-secrets")
112
+ @id("data-protection.block-secrets-bulk")
107
113
  @name("Block bulk secret exposure")
108
- @description("Block messages and file uploads when 3+ distinct secrets are found. Multiple secrets indicate a configuration dump, .env file paste, or credential harvesting being sent to AI services.")
114
+ @description("Blocks process_prompt and upload_file when secret_count >= 3.")
109
115
  @severity("critical")
110
- @tags("secrets,bulk,data-exfiltration,nist-sc-28")
111
- @reject_message("Content blocked: multiple credentials detected (3+). Configuration dumps and credential lists must never be shared with AI services.")
116
+ @tags("category:data-protection,threat:secrets,threat:exfiltration,detection:aggregate,owasp:llm06")
117
+ @reject_message("Content blocked: multiple credentials detected (3+) possible configuration dump or credential harvesting.")
112
118
  forbid (
113
119
  principal,
114
120
  action in [Sentry::Action::"process_prompt", Sentry::Action::"upload_file"],
@@ -118,13 +124,12 @@ when {
118
124
  context has secret_count && context.secret_count >= 3
119
125
  };
120
126
 
121
- // Block detected credential patterns
122
- @id("sentry-org-block-detected-credentials")
127
+ @id("data-protection.block-credential-patterns")
123
128
  @name("Block detected credential patterns")
124
- @description("Block messages and file uploads flagged by detection engine rules for credential exposure, API key leaks, and token exposure. Defense-in-depth behind contains_secrets.")
129
+ @description("Blocks process_prompt and upload_file when detected_threats contains a credential-pattern rule trigger.")
125
130
  @severity("critical")
126
- @tags("secrets,credentials,detection-rules,nist-ia-5")
127
- @reject_message("Content blocked: detection engines identified credential patterns including secret exposure, API keys, or token leaks.")
131
+ @tags("category:data-protection,threat:secrets,detection:rule,owasp:llm06")
132
+ @reject_message("Content blocked: detection engines identified credential patterns (secret exposure, API key leaks, or token exposure).")
128
133
  forbid (
129
134
  principal,
130
135
  action in [Sentry::Action::"process_prompt", Sentry::Action::"upload_file"],
@@ -132,7 +137,9 @@ forbid (
132
137
  )
133
138
  when {
134
139
  context has detected_threats &&
135
- (context.detected_threats.contains("secret_exposure") ||
136
- context.detected_threats.contains("credential_leak") ||
137
- context.detected_threats.contains("api_key_exposure"))
140
+ (
141
+ context.detected_threats.contains("secret_exposure") ||
142
+ context.detected_threats.contains("credential_leak") ||
143
+ context.detected_threats.contains("api_key_exposure")
144
+ )
138
145
  };
@@ -1,39 +1,35 @@
1
1
  // =============================================================================
2
- // Semantic Threat Detection Policy (Default)
2
+ // Semantic Threat Detection (Default)
3
3
  // =============================================================================
4
- // Detects and blocks prompt injection, jailbreak attempts, and high-severity
5
- // threats across browser AI interactions: messages and file uploads.
6
- // Paste-targeted semantic rules live in clipboard.cedar.
4
+ // Blocks prompt injection, jailbreak attempts, and high-severity threats in
5
+ // messages and file uploads. Paste-targeted semantic rules live in
6
+ // clipboard.cedar.
7
7
  //
8
- // Uses multi-layered detection from Shield:
9
- // 1. ML classifier scores (injection_score, jailbreak_score)
10
- // 2. Detection engine rule triggers (detected_threats)
11
- // 3. Threat severity aggregation (max_threat_severity, highest_severity)
8
+ // Detection layers:
9
+ // - Rule triggers (detected_threats) — always available
10
+ // - ML classifier scores (injection_score, jailbreak_score) — require API token
11
+ // - Threat severity aggregation (highest_severity) — catch-all
12
+ //
13
+ // Context keys consumed:
14
+ // - detected_threats: Set<String>
15
+ // - injection_score: Long (0-100)
16
+ // - jailbreak_score: Long (0-100)
17
+ // - highest_severity: String
12
18
  //
13
19
  // Compliance:
14
- // OWASP LLM01 (Prompt Injection) direct + indirect
15
- // OWASP LLM02 (Insecure Output Handling)
16
- // MITRE ATLAS AML.T0051 (LLM Prompt Injection)
17
- // MITRE ATLAS AML.T0054 (LLM Jailbreak)
18
- // NIST 800-53 SI-3, SI-4
20
+ // - OWASP LLM01, LLM02; MITRE ATLAS AML.T0051, AML.T0054
21
+ // - NIST 800-53 SI-3, SI-4
19
22
  //
20
- // Category: semantic
23
+ // Category: semantic
21
24
  // Namespace: Sentry
22
25
  // =============================================================================
23
26
 
24
- // ---------------------------------------------------------------------------
25
- // Section 1: Prompt Injection Detection
26
- // Blocks injection attempts in messages and uploaded files.
27
- // Paste-targeted injection rules live in clipboard.cedar.
28
- // ---------------------------------------------------------------------------
29
-
30
- // Block messages with prompt injection patterns
31
- @id("sentry-semantic-block-injection")
32
- @name("Block prompt injection")
33
- @description("Block messages when detection engine rules identify prompt injection patterns. Catches instruction override, role assumption, and manipulation techniques in user input (OWASP LLM01).")
27
+ @id("semantic.block-injection-rule")
28
+ @name("Block injection (rule)")
29
+ @description("Blocks process_prompt when detected_threats contains \"prompt_injection\".")
34
30
  @severity("critical")
35
- @tags("injection,security,owasp-llm01,mitre-aml-t0051,baseline")
36
- @reject_message("Content blocked: prompt injection patterns were detected. This prevents manipulation of AI agent behavior. Remove adversarial instructions and try again.")
31
+ @tags("category:semantic,threat:injection,detection:rule,surface:process-prompt,owasp:llm01,mitre:atlas-t0051")
32
+ @reject_message("Prompt blocked: prompt injection patterns detected (OWASP LLM01).")
37
33
  forbid (
38
34
  principal,
39
35
  action == Sentry::Action::"process_prompt",
@@ -43,12 +39,11 @@ when {
43
39
  context has detected_threats && context.detected_threats.contains("prompt_injection")
44
40
  };
45
41
 
46
- // Block content with high ML injection confidence
47
- @id("sentry-semantic-block-injection-score")
48
- @name("Block high-confidence injection")
49
- @description("Block content when the ML injection classifier confidence exceeds threshold (75/100). Catches novel injection techniques including polymorphic payloads, encoding tricks, and obfuscated instructions.")
42
+ @id("semantic.block-injection-ml")
43
+ @name("Block injection (ML)")
44
+ @description("Blocks process_prompt and upload_file when injection_score >= 75.")
50
45
  @severity("critical")
51
- @tags("injection,ml-classifier,security,owasp-llm01")
46
+ @tags("category:semantic,threat:injection,detection:ml,owasp:llm01,mitre:atlas-t0051")
52
47
  @reject_message("Content blocked: the ML classifier detected prompt injection with high confidence.")
53
48
  forbid (
54
49
  principal,
@@ -59,18 +54,12 @@ when {
59
54
  context has injection_score && context.injection_score >= 75
60
55
  };
61
56
 
62
- // ---------------------------------------------------------------------------
63
- // Section 2: Jailbreak Detection
64
- // Blocks jailbreak attempts in messages sent to AI services.
65
- // ---------------------------------------------------------------------------
66
-
67
- // Block messages with jailbreak attempts
68
- @id("sentry-semantic-block-jailbreak")
69
- @name("Block jailbreak attempts")
70
- @description("Block messages when detection engine rules identify jailbreak patterns: DAN-style prompts, role-play exploits, safety bypass instructions, and constraint removal attempts (OWASP LLM02).")
57
+ @id("semantic.block-jailbreak-rule")
58
+ @name("Block jailbreak (rule)")
59
+ @description("Blocks process_prompt when detected_threats contains \"jailbreak\".")
71
60
  @severity("critical")
72
- @tags("jailbreak,bypass,security,owasp-llm02,mitre-aml-t0054,baseline")
73
- @reject_message("Content blocked: jailbreak patterns were detected. This prevents circumvention of AI safety controls.")
61
+ @tags("category:semantic,threat:jailbreak,detection:rule,surface:process-prompt,owasp:llm02,mitre:atlas-t0054")
62
+ @reject_message("Prompt blocked: jailbreak patterns detected (OWASP LLM02).")
74
63
  forbid (
75
64
  principal,
76
65
  action == Sentry::Action::"process_prompt",
@@ -80,13 +69,12 @@ when {
80
69
  context has detected_threats && context.detected_threats.contains("jailbreak")
81
70
  };
82
71
 
83
- // Block content with high ML jailbreak confidence
84
- @id("sentry-semantic-block-jailbreak-score")
85
- @name("Block high-confidence jailbreak")
86
- @description("Block content when the ML jailbreak classifier exceeds threshold (75/100). Catches sophisticated jailbreak techniques including multi-turn manipulation and encoded payloads.")
72
+ @id("semantic.block-jailbreak-ml")
73
+ @name("Block jailbreak (ML)")
74
+ @description("Blocks process_prompt when jailbreak_score >= 75.")
87
75
  @severity("critical")
88
- @tags("jailbreak,ml-classifier,security,owasp-llm02")
89
- @reject_message("Content blocked: the ML classifier detected a jailbreak attempt with high confidence.")
76
+ @tags("category:semantic,threat:jailbreak,detection:ml,surface:process-prompt,owasp:llm02,mitre:atlas-t0054")
77
+ @reject_message("Prompt blocked: the ML classifier detected a jailbreak attempt with high confidence.")
90
78
  forbid (
91
79
  principal,
92
80
  action == Sentry::Action::"process_prompt",
@@ -96,18 +84,12 @@ when {
96
84
  context has jailbreak_score && context.jailbreak_score >= 75
97
85
  };
98
86
 
99
- // ---------------------------------------------------------------------------
100
- // Section 3: Threat Severity Aggregation
101
- // Catch-all rules based on aggregated threat severity across all detectors.
102
- // ---------------------------------------------------------------------------
103
-
104
- // Block any content with critical severity threats
105
- @id("sentry-semantic-block-critical")
87
+ @id("semantic.block-critical")
106
88
  @name("Block critical threats")
107
- @description("Block messages and file uploads when any detection engine reports critical severity. This is the ultimate catch-all for critical-severity threats regardless of type or source.")
89
+ @description("Blocks process_prompt and upload_file when highest_severity equals \"critical\".")
108
90
  @severity("critical")
109
- @tags("critical,baseline,security,catch-all")
110
- @reject_message("Content blocked: security scanners detected a critical-severity threat. This content cannot be processed by AI services.")
91
+ @tags("category:semantic,detection:aggregate,posture:catch-all")
92
+ @reject_message("Content blocked: a critical-severity threat was reported by at least one detector.")
111
93
  forbid (
112
94
  principal,
113
95
  action in [Sentry::Action::"process_prompt", Sentry::Action::"upload_file"],
@@ -116,4 +98,3 @@ forbid (
116
98
  when {
117
99
  context has highest_severity && context.highest_severity == "critical"
118
100
  };
119
-
@@ -1,119 +1,129 @@
1
1
  {
2
2
  "service": "sentry",
3
- "version": "1.1.0",
3
+ "version": "2.0.0",
4
4
  "description": "Sentry policy templates for browser AI security",
5
5
  "categories": [
6
6
  {
7
- "id": "secrets",
8
- "name": "Secrets Detection",
9
- "description": "Detect and block secrets, API keys, tokens, and other credentials in messages and AI responses"
7
+ "id": "data-protection",
8
+ "name": "Secrets & Data Protection",
9
+ "description": "Block secrets, API keys, tokens, and credentials in messages and uploads."
10
10
  },
11
11
  {
12
- "id": "pii",
12
+ "id": "privacy",
13
13
  "name": "PII Detection",
14
- "description": "Detect and block personally identifiable information (PII) such as credit card numbers, SSNs, health data, and other sensitive personal data from being shared with AI chat services"
14
+ "description": "Block personally identifiable information across messages and uploads."
15
15
  },
16
16
  {
17
17
  "id": "semantic",
18
18
  "name": "Semantic Threat Detection",
19
- "description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats in messages, pasted content, and uploaded files"
19
+ "description": "Block prompt injection, jailbreak attempts, and high-severity threats."
20
20
  },
21
21
  {
22
- "id": "content_safety",
22
+ "id": "trust-safety",
23
23
  "name": "Content Safety",
24
- "description": "Detect and block violent, harmful, hateful, sexual, and profane content in AI interactions across messages, paste, and file uploads"
24
+ "description": "Block violent, harmful, hateful, sexual, or profane content."
25
25
  },
26
26
  {
27
- "id": "file_safety",
27
+ "id": "file-safety",
28
28
  "name": "File & Attachment Safety",
29
- "description": "Block file uploads containing secrets or PII in document content"
29
+ "description": "Block file uploads containing secrets or PII."
30
30
  },
31
31
  {
32
32
  "id": "clipboard",
33
33
  "name": "Clipboard Policy",
34
- "description": "Control paste operations into AI chat services — block paste outright, block when secrets, PII, source code, large threat-laden pastes, encoded payloads, or invisible characters are detected"
34
+ "description": "Control paste operations into AI chat services."
35
35
  },
36
36
  {
37
37
  "id": "organization",
38
38
  "name": "Organization Rules",
39
- "description": "Cross-cutting organization-wide rules: session-aware threat escalation"
39
+ "description": "Organization-wide baselines and session-aware threat escalation."
40
40
  }
41
41
  ],
42
42
  "defaults": [
43
43
  {
44
- "id": "sentry-baseline-default",
44
+ "id": "organization.permit-baseline",
45
45
  "name": "Baseline Permit",
46
- "description": "Permits all actions by default threat-specific forbid policies override this when threats are detected",
46
+ "description": "Permits all actions by default; threat-specific forbid policies override this when detectors fire.",
47
47
  "category": "organization",
48
48
  "file": "defaults/baseline.cedar",
49
49
  "severity": "low",
50
- "tags": ["baseline", "permit-default", "organization"],
50
+ "tags": ["category:organization", "posture:permit-default"],
51
51
  "is_active": true
52
52
  }
53
53
  ],
54
54
  "templates": [
55
55
  {
56
- "id": "sentry-semantic-default",
56
+ "id": "organization.permit-baseline",
57
+ "name": "Baseline Permit",
58
+ "description": "Permits all actions by default; threat-specific forbid policies override this when detectors fire.",
59
+ "category": "organization",
60
+ "file": "defaults/baseline.cedar",
61
+ "severity": "low",
62
+ "tags": ["category:organization", "posture:permit-default"],
63
+ "auto_deploy": true
64
+ },
65
+ {
66
+ "id": "semantic.defaults",
57
67
  "name": "Semantic Threat Detection",
58
- "description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats across messages, paste, and file uploads",
68
+ "description": "Block prompt injection, jailbreak attempts, and critical-severity threats in messages and uploads.",
59
69
  "category": "semantic",
60
70
  "file": "defaults/semantic.cedar",
61
71
  "severity": "critical",
62
- "tags": ["injection", "jailbreak", "owasp-llm01", "owasp-llm02", "baseline"]
72
+ "tags": ["category:semantic", "threat:injection", "threat:jailbreak", "owasp:llm01", "owasp:llm02"]
63
73
  },
64
74
  {
65
- "id": "sentry-content-safety-default",
75
+ "id": "trust-safety.defaults",
66
76
  "name": "Content Safety",
67
- "description": "Detect and block violent, harmful, hateful, sexual, and profane content across messages, paste, and file uploads",
68
- "category": "content_safety",
77
+ "description": "Block violent, weapons, hateful, criminal, sexual, or excessively profane content.",
78
+ "category": "trust-safety",
69
79
  "file": "defaults/content_safety.cedar",
70
80
  "severity": "critical",
71
- "tags": ["violence", "hate-speech", "sexual", "profanity", "content-safety", "baseline"]
81
+ "tags": ["category:trust-safety", "threat:harmful", "threat:hate-speech", "compliance:eu-ai-act", "compliance:iso-42001"]
72
82
  },
73
83
  {
74
- "id": "sentry-secrets-default",
84
+ "id": "data-protection.defaults",
75
85
  "name": "Secrets Detection",
76
- "description": "Block secrets, API keys, tokens, and credential leakage in messages and AI responses across all interactions",
77
- "category": "secrets",
86
+ "description": "Block secrets, API keys, tokens, and credential leakage in messages and AI responses.",
87
+ "category": "data-protection",
78
88
  "file": "defaults/secrets.cedar",
79
89
  "severity": "critical",
80
- "tags": ["secrets", "credentials", "api-keys", "data-protection"]
90
+ "tags": ["category:data-protection", "threat:secrets", "owasp:llm06"]
81
91
  },
82
92
  {
83
- "id": "sentry-pii-default",
93
+ "id": "privacy.defaults",
84
94
  "name": "PII Detection",
85
- "description": "Detect and block credit card numbers, SSNs, health data, and other PII in messages, pasted content, file uploads, and AI responses",
86
- "category": "pii",
95
+ "description": "Block credit card numbers, SSNs, passport/IBAN, and other PII across messages and file uploads.",
96
+ "category": "privacy",
87
97
  "file": "defaults/pii.cedar",
88
98
  "severity": "critical",
89
- "tags": ["pii", "privacy", "compliance", "pci-dss", "gdpr", "hipaa", "baseline"]
99
+ "tags": ["category:privacy", "threat:pii", "compliance:pci-dss", "compliance:gdpr", "compliance:hipaa"]
90
100
  },
91
101
  {
92
- "id": "sentry-file-safety-default",
102
+ "id": "file-safety.defaults",
93
103
  "name": "File & Attachment Safety",
94
- "description": "Block file uploads containing secrets or PII in document content",
95
- "category": "file_safety",
104
+ "description": "Block file uploads containing secrets or PII in document content.",
105
+ "category": "file-safety",
96
106
  "file": "defaults/file_safety.cedar",
97
107
  "severity": "critical",
98
- "tags": ["file-upload", "secrets", "pii", "dlp"]
108
+ "tags": ["category:file-safety", "threat:secrets", "threat:pii"]
99
109
  },
100
110
  {
101
- "id": "sentry-clipboard-default",
111
+ "id": "clipboard.defaults",
102
112
  "name": "Clipboard Policy",
103
- "description": "Control paste into AI chat services: blanket paste blocking, paste-with-secrets, paste-with-PII, paste-with-source-code, large pastes carrying threats, encoded injection payloads, and invisible-character payloads",
113
+ "description": "Block all-paste, paste-with-secrets, paste-with-PII, encoded payload pastes, and pastes with invisible characters.",
104
114
  "category": "clipboard",
105
115
  "file": "defaults/clipboard.cedar",
106
- "severity": "high",
107
- "tags": ["paste", "clipboard", "data-protection", "source-code", "secrets", "pii", "encoding", "invisible-chars"]
116
+ "severity": "critical",
117
+ "tags": ["category:clipboard", "threat:secrets", "threat:pii", "threat:encoded-payload", "threat:invisible-chars"]
108
118
  },
109
119
  {
110
- "id": "sentry-organization-default",
111
- "name": "Organization Rules",
112
- "description": "Cross-cutting organization-wide policies: session-aware threat escalation",
120
+ "id": "organization.block-session-threat-escalation",
121
+ "name": "Session Threat Escalation",
122
+ "description": "Block all actions when threats have been detected in 3+ turns of the session.",
113
123
  "category": "organization",
114
124
  "file": "defaults/organization.cedar",
115
125
  "severity": "high",
116
- "tags": ["session", "escalation", "organization"]
126
+ "tags": ["category:organization", "detection:aggregate", "posture:catch-all"]
117
127
  }
118
128
  ]
119
129
  }
@@ -5,17 +5,16 @@
5
5
  * AiGateway Cedar schema and are used at policy evaluation time.
6
6
  */
7
7
  export declare const AiGatewayContextKey: {
8
- readonly ContainsInvisibleChars: "contains_invisible_chars";
9
- readonly ContainsSecrets: "contains_secrets";
10
8
  readonly Content: "content";
11
9
  readonly CrimeScore: "crime_score";
12
10
  readonly DetectedThreats: "detected_threats";
13
11
  readonly HateSpeechScore: "hate_speech_score";
14
12
  readonly HighestSeverity: "highest_severity";
15
13
  readonly IndirectInjectionScore: "indirect_injection_score";
16
- readonly InjectionConfidence: "injection_confidence";
14
+ readonly InjectionScore: "injection_score";
15
+ readonly InvisibleCharsDetected: "invisible_chars_detected";
17
16
  readonly InvisibleCharsScore: "invisible_chars_score";
18
- readonly JailbreakConfidence: "jailbreak_confidence";
17
+ readonly JailbreakScore: "jailbreak_score";
19
18
  readonly LoopCount: "loop_count";
20
19
  readonly LoopDetected: "loop_detected";
21
20
  readonly MaxThreatSeverity: "max_threat_severity";
@@ -29,13 +28,28 @@ export declare const AiGatewayContextKey: {
29
28
  readonly PatternType: "pattern_type";
30
29
  readonly PiiCount: "pii_count";
31
30
  readonly PiiDetected: "pii_detected";
31
+ readonly PiiScore: "pii_score";
32
32
  readonly PiiTypes: "pii_types";
33
33
  readonly ProfanityScore: "profanity_score";
34
34
  readonly RugPullDetected: "rug_pull_detected";
35
35
  readonly RugPullScore: "rug_pull_score";
36
36
  readonly SecretCount: "secret_count";
37
37
  readonly SecretTypes: "secret_types";
38
+ readonly SecretsDetected: "secrets_detected";
38
39
  readonly SequenceRisk: "sequence_risk";
40
+ readonly SessionCommandInjection: "session_command_injection";
41
+ readonly SessionCumulativeRiskScore: "session_cumulative_risk_score";
42
+ readonly SessionInjectionDetected: "session_injection_detected";
43
+ readonly SessionMaxCommandInjectionScore: "session_max_command_injection_score";
44
+ readonly SessionMaxInjectionScore: "session_max_injection_score";
45
+ readonly SessionMaxJailbreakScore: "session_max_jailbreak_score";
46
+ readonly SessionMaxPiiScore: "session_max_pii_score";
47
+ readonly SessionMaxSecretScore: "session_max_secret_score";
48
+ readonly SessionPiiDetected: "session_pii_detected";
49
+ readonly SessionPiiTypes: "session_pii_types";
50
+ readonly SessionSecretTypes: "session_secret_types";
51
+ readonly SessionSecretsDetected: "session_secrets_detected";
52
+ readonly SessionThreatTurns: "session_threat_turns";
39
53
  readonly SexualScore: "sexual_score";
40
54
  readonly SuspiciousPattern: "suspicious_pattern";
41
55
  readonly ThreatCategories: "threat_categories";
@@ -7,17 +7,16 @@
7
7
  * AiGateway Cedar schema and are used at policy evaluation time.
8
8
  */
9
9
  export const AiGatewayContextKey = {
10
- ContainsInvisibleChars: 'contains_invisible_chars',
11
- ContainsSecrets: 'contains_secrets',
12
10
  Content: 'content',
13
11
  CrimeScore: 'crime_score',
14
12
  DetectedThreats: 'detected_threats',
15
13
  HateSpeechScore: 'hate_speech_score',
16
14
  HighestSeverity: 'highest_severity',
17
15
  IndirectInjectionScore: 'indirect_injection_score',
18
- InjectionConfidence: 'injection_confidence',
16
+ InjectionScore: 'injection_score',
17
+ InvisibleCharsDetected: 'invisible_chars_detected',
19
18
  InvisibleCharsScore: 'invisible_chars_score',
20
- JailbreakConfidence: 'jailbreak_confidence',
19
+ JailbreakScore: 'jailbreak_score',
21
20
  LoopCount: 'loop_count',
22
21
  LoopDetected: 'loop_detected',
23
22
  MaxThreatSeverity: 'max_threat_severity',
@@ -31,13 +30,28 @@ export const AiGatewayContextKey = {
31
30
  PatternType: 'pattern_type',
32
31
  PiiCount: 'pii_count',
33
32
  PiiDetected: 'pii_detected',
33
+ PiiScore: 'pii_score',
34
34
  PiiTypes: 'pii_types',
35
35
  ProfanityScore: 'profanity_score',
36
36
  RugPullDetected: 'rug_pull_detected',
37
37
  RugPullScore: 'rug_pull_score',
38
38
  SecretCount: 'secret_count',
39
39
  SecretTypes: 'secret_types',
40
+ SecretsDetected: 'secrets_detected',
40
41
  SequenceRisk: 'sequence_risk',
42
+ SessionCommandInjection: 'session_command_injection',
43
+ SessionCumulativeRiskScore: 'session_cumulative_risk_score',
44
+ SessionInjectionDetected: 'session_injection_detected',
45
+ SessionMaxCommandInjectionScore: 'session_max_command_injection_score',
46
+ SessionMaxInjectionScore: 'session_max_injection_score',
47
+ SessionMaxJailbreakScore: 'session_max_jailbreak_score',
48
+ SessionMaxPiiScore: 'session_max_pii_score',
49
+ SessionMaxSecretScore: 'session_max_secret_score',
50
+ SessionPiiDetected: 'session_pii_detected',
51
+ SessionPiiTypes: 'session_pii_types',
52
+ SessionSecretTypes: 'session_secret_types',
53
+ SessionSecretsDetected: 'session_secrets_detected',
54
+ SessionThreatTurns: 'session_threat_turns',
41
55
  SexualScore: 'sexual_score',
42
56
  SuspiciousPattern: 'suspicious_pattern',
43
57
  ThreatCategories: 'threat_categories',