@highflame/policy 2.1.3 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,388 @@
1
+ // =============================================================================
2
+ // Sentry Cedar Schema
3
+ // =============================================================================
4
+ // Browser Security — monitors AI chat interactions in the browser and enforces
5
+ // data-protection, content-safety, and compliance policies at point of use.
6
+ //
7
+ // Sentry is a lightweight browser extension (JSA) that intercepts:
8
+ // - Messages sent to AI chat services (ChatGPT, Gemini, Claude, Copilot, etc.)
9
+ // - AI responses returned to the user
10
+ // - Cut/paste operations transferring content into AI chats
11
+ // - File/document uploads into AI chat services
12
+ //
13
+ // Architecture:
14
+ // User → Browser Extension → Shield Detection Engine → Cedar Policy → Allow/Block
15
+ //
16
+ // Threat Coverage:
17
+ // - Data Leakage: PII, PHI, credentials, source code, confidential documents
18
+ // - Content Safety: Violence, hate speech, sexual content, restricted topics
19
+ // - Prompt Injection: Direct and indirect injection via pasted/uploaded content
20
+ // - Document Sensitivity: MIP label enforcement, classification-aware blocking
21
+ // - Compliance: GDPR, HIPAA, PCI DSS, CCPA, EU AI Act
22
+ //
23
+ // Supported AI Services:
24
+ // - ChatGPT (chat.openai.com)
25
+ // - Google Gemini (gemini.google.com)
26
+ // - Claude (claude.ai)
27
+ // - GitHub Copilot Chat
28
+ // - Microsoft Copilot
29
+ // - Custom/enterprise AI chat endpoints
30
+
31
+ namespace Sentry {
32
+
33
+ // =============================================================================
34
+ // ENTITIES - Tenant Hierarchy (ReBAC)
35
+ // =============================================================================
36
+ // Aligned with Guardrails/Overwatch entity hierarchy (Account -> Project).
37
+ //
38
+ // Entity hierarchy enables Cedar's `in` operator for policy scoping:
39
+ // Account (org root)
40
+ // └── Project in [Account]
41
+ // └── ChatSession in [Project]
42
+ //
43
+ // Policy scoping examples:
44
+ // resource in Sentry::Account::"<uuid>" → org-wide
45
+ // resource in Sentry::Project::"<uuid>" → project-wide
46
+ // resource == Sentry::ChatSession::"<id>" → specific session
47
+
48
+ /// Account represents an organization (top-level tenant)
49
+ entity Account;
50
+
51
+ /// Project represents a project within an account
52
+ entity Project in [Account];
53
+
54
+ // =============================================================================
55
+ // ENTITIES - Principals
56
+ // =============================================================================
57
+
58
+ /// Human user interacting with AI chat in the browser
59
+ entity User;
60
+
61
+ // =============================================================================
62
+ // ENTITIES - Resources (scoped under Project)
63
+ // =============================================================================
64
+
65
+ /// AI chat session — resource for send_message and receive_response actions
66
+ entity ChatSession in [Project];
67
+
68
+ /// Document or file being uploaded — resource for upload_file action
69
+ entity Document in [Project];
70
+
71
+ // =============================================================================
72
+ // ACTIONS
73
+ // =============================================================================
74
+
75
+ // User sends a message (prompt) to an AI chat service
76
+ // Threat focus: data leakage (PII, secrets, confidential data), injection, content safety
77
+ action send_message appliesTo {
78
+ principal: [User],
79
+ resource: [ChatSession],
80
+ context: {
81
+ // --- Core Metadata ---
82
+ content: String, // Raw message content being sent
83
+ source: String, // Browser extension identifier: "sentry"
84
+ event: String, // Event type: "send_message"
85
+ user_email: String, // User identifier (SSO/OAuth verified)
86
+ target_app: String, // AI service: "chatgpt", "gemini", "claude", "copilot", "custom"
87
+ target_url?: String, // Full URL of the AI chat service
88
+
89
+ // --- Aggregated Threat Summary (from Shield NormalizeAggregation) ---
90
+ threat_count: Long, // Total threats detected
91
+ highest_severity: String, // "critical", "high", "medium", "low", "none"
92
+ threat_categories: Set<String>, // Threat category names
93
+ detected_threats: Set<String>, // Detection rule names that matched
94
+ max_threat_severity: Long, // Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)
95
+
96
+ // --- Secrets Detection (from SecretsDetector) ---
97
+ contains_secrets: Bool, // Whether secrets/credentials detected
98
+ secret_types?: Set<String>, // Types: "aws_access_key", "github_token", "ssh_private_key", etc.
99
+ secret_count?: Long, // Number of distinct secrets found
100
+
101
+ // --- PII Detection (from PIIRegexDetector, normalized) ---
102
+ pii_detected?: Bool, // Whether any PII patterns matched
103
+ pii_types?: Set<String>, // Types: "ssn", "credit_card", "email", "phone", etc.
104
+ pii_count?: Long, // Number of PII matches
105
+ pii_confidence?: Long, // PII detection confidence (0-100)
106
+
107
+ // --- Content Safety Scores (from ToxicityDetector, 0-100) ---
108
+ violence_score: Long,
109
+ weapons_score: Long,
110
+ hate_speech_score: Long,
111
+ crime_score: Long,
112
+ sexual_score: Long,
113
+ profanity_score: Long,
114
+
115
+ // --- ML Detector Confidence Scores (0-100) ---
116
+ injection_score: Long, // Prompt injection score (max of InjectionDetector + DeepContextDetector)
117
+ jailbreak_score: Long, // Jailbreak detection score (max of JailbreakDetector + DeepContextDetector)
118
+
119
+ // --- Topic Classification (from TopicDetector) ---
120
+ content_topics?: Set<String>, // Detected topics: "controlled_substances", "weapons_manufacturing", etc.
121
+ topic_confidence?: Long, // Topic classifier confidence (0-100)
122
+
123
+ // --- Encoding & Unicode Attacks (from SecurityFiltersDetector, EncodedInjectionDetector) ---
124
+ contains_invisible_chars?: Bool, // Zero-width chars, bidi overrides, tag chars
125
+ invisible_chars_score?: Long, // Unicode attack severity (0-100)
126
+ encoded_content_detected?: Bool, // Base64, hex, unicode, URL encoded content
127
+ encoded_types?: Set<String>, // Encoding types detected
128
+ encoded_count?: Long, // Number of encoded segments
129
+ encoded_score?: Long, // Encoded injection severity (0-100)
130
+
131
+ // --- Code Detection (from CodeDetector) ---
132
+ contains_code?: Bool, // Whether content contains source code
133
+ code_languages?: Set<String>, // Detected languages: "python", "javascript", etc.
134
+ code_ratio?: Long, // Percentage of content that is code (0-100)
135
+
136
+ // --- Language Detection (from LanguageDetector, ScriptDetector) ---
137
+ detected_language?: String, // ISO language code
138
+ is_english?: Bool,
139
+ language_confidence?: Long, // 0-100
140
+ detected_script?: String, // "latin", "cyrillic", "arabic", "unknown"
141
+ is_latin_script?: Bool,
142
+ script_confidence?: Long, // 0-100
143
+
144
+ // --- Keyword Detection (from KeywordDetector) ---
145
+ keyword_matched?: Bool, // Whether any keywords matched
146
+ keyword_categories?: Set<String>, // Matched keyword categories
147
+ keyword_count?: Long, // Number of keyword matches
148
+
149
+ // --- Phishing Detection (from CheckPhishDetector) ---
150
+ phishing_detected?: Bool, // Whether phishing URLs detected in content
151
+
152
+ // --- Session Detection History (cross-turn sticky flags) ---
153
+ session_pii_detected?: Bool,
154
+ session_pii_types?: Set<String>,
155
+ session_secrets_detected?: Bool,
156
+ session_secret_types?: Set<String>,
157
+ session_injection_detected?: Bool,
158
+ session_threat_turns?: Long,
159
+ },
160
+ };
161
+
162
+ // AI service responds to the user
163
+ // Threat focus: harmful content in responses, hallucination, data leakage in output
164
+ action receive_response appliesTo {
165
+ principal: [User],
166
+ resource: [ChatSession],
167
+ context: {
168
+ // --- Core Metadata ---
169
+ content: String, // AI response content
170
+ source: String,
171
+ event: String, // "receive_response"
172
+ user_email: String,
173
+ target_app: String,
174
+ target_url?: String,
175
+
176
+ // --- Aggregated Threat Summary ---
177
+ threat_count: Long,
178
+ highest_severity: String,
179
+ threat_categories: Set<String>,
180
+ detected_threats: Set<String>,
181
+ max_threat_severity: Long,
182
+
183
+ // --- Secrets Detection ---
184
+ contains_secrets: Bool,
185
+ secret_types?: Set<String>,
186
+ secret_count?: Long,
187
+
188
+ // --- PII Detection ---
189
+ pii_detected?: Bool,
190
+ pii_types?: Set<String>,
191
+ pii_count?: Long,
192
+ pii_confidence?: Long,
193
+
194
+ // --- Content Safety Scores (0-100) ---
195
+ violence_score: Long,
196
+ weapons_score: Long,
197
+ hate_speech_score: Long,
198
+ crime_score: Long,
199
+ sexual_score: Long,
200
+ profanity_score: Long,
201
+
202
+ // --- ML Detector Scores (0-100) ---
203
+ injection_score: Long, // Indirect injection in response content
204
+ jailbreak_score: Long,
205
+
206
+ // --- Hallucination Detection (from HallucinationDetector) ---
207
+ hallucination_score?: Long, // Hallucination confidence (0-100)
208
+ factuality_score?: Long, // Factuality score (0-100)
209
+
210
+ // --- Code in Response ---
211
+ contains_code?: Bool,
212
+ code_languages?: Set<String>,
213
+ code_ratio?: Long,
214
+
215
+ // --- Phishing ---
216
+ phishing_detected?: Bool,
217
+
218
+ // --- Session History ---
219
+ session_pii_detected?: Bool,
220
+ session_pii_types?: Set<String>,
221
+ session_secrets_detected?: Bool,
222
+ session_secret_types?: Set<String>,
223
+ session_injection_detected?: Bool,
224
+ session_threat_turns?: Long,
225
+ },
226
+ };
227
+
228
+ // User pastes content into an AI chat (clipboard, cross-tab, cross-app)
229
+ // Threat focus: data leakage via cut/paste, injection payloads in pasted content
230
+ action paste_content appliesTo {
231
+ principal: [User],
232
+ resource: [ChatSession],
233
+ context: {
234
+ // --- Core Metadata ---
235
+ content: String, // Pasted content
236
+ source: String,
237
+ event: String, // "paste_content"
238
+ user_email: String,
239
+ target_app: String,
240
+ target_url?: String,
241
+
242
+ // --- Paste Context ---
243
+ paste_source_app?: String, // Source application (e.g., "outlook", "excel", "vscode", "terminal")
244
+ paste_source_url?: String, // Source URL if from another browser tab
245
+ paste_length?: Long, // Character length of pasted content
246
+
247
+ // --- Aggregated Threat Summary ---
248
+ threat_count: Long,
249
+ highest_severity: String,
250
+ threat_categories: Set<String>,
251
+ detected_threats: Set<String>,
252
+ max_threat_severity: Long,
253
+
254
+ // --- Secrets Detection ---
255
+ contains_secrets: Bool,
256
+ secret_types?: Set<String>,
257
+ secret_count?: Long,
258
+
259
+ // --- PII Detection ---
260
+ pii_detected?: Bool,
261
+ pii_types?: Set<String>,
262
+ pii_count?: Long,
263
+ pii_confidence?: Long,
264
+
265
+ // --- Content Safety Scores (0-100) ---
266
+ violence_score: Long,
267
+ weapons_score: Long,
268
+ hate_speech_score: Long,
269
+ crime_score: Long,
270
+ sexual_score: Long,
271
+ profanity_score: Long,
272
+
273
+ // --- ML Detector Scores (0-100) ---
274
+ injection_score: Long,
275
+ jailbreak_score: Long,
276
+
277
+ // --- Code Detection ---
278
+ contains_code?: Bool,
279
+ code_languages?: Set<String>,
280
+ code_ratio?: Long,
281
+
282
+ // --- Encoding Attacks ---
283
+ contains_invisible_chars?: Bool,
284
+ invisible_chars_score?: Long,
285
+ encoded_content_detected?: Bool,
286
+ encoded_types?: Set<String>,
287
+ encoded_count?: Long,
288
+ encoded_score?: Long,
289
+
290
+ // --- Keyword Detection ---
291
+ keyword_matched?: Bool,
292
+ keyword_categories?: Set<String>,
293
+ keyword_count?: Long,
294
+
295
+ // --- Session History ---
296
+ session_pii_detected?: Bool,
297
+ session_pii_types?: Set<String>,
298
+ session_secrets_detected?: Bool,
299
+ session_secret_types?: Set<String>,
300
+ session_injection_detected?: Bool,
301
+ session_threat_turns?: Long,
302
+ },
303
+ };
304
+
305
+ // User uploads a file or document into an AI chat
306
+ // Threat focus: document sensitivity (MIP labels), PII/secrets in files, malware
307
+ action upload_file appliesTo {
308
+ principal: [User],
309
+ resource: [Document, ChatSession],
310
+ context: {
311
+ // --- Core Metadata ---
312
+ content: String, // Extracted file text content (for scanning)
313
+ source: String,
314
+ event: String, // "upload_file"
315
+ user_email: String,
316
+ target_app: String,
317
+ target_url?: String,
318
+
319
+ // --- File Metadata ---
320
+ file_name?: String, // Original file name
321
+ file_type?: String, // MIME type: "application/pdf", "text/csv", etc.
322
+ file_size_bytes?: Long, // File size in bytes
323
+ file_extension?: String, // Extension: "pdf", "docx", "xlsx", "csv", "txt"
324
+
325
+ // --- Document Sensitivity (MIP Labels) ---
326
+ mip_label_id?: String, // Microsoft Information Protection label ID
327
+ mip_label_name?: String, // Label display name: "Public", "Internal", "Confidential", "Highly Confidential"
328
+ sensitivity_level?: String, // Normalized: "public", "internal", "confidential", "restricted"
329
+ is_encrypted?: Bool, // Whether file is encrypted (MIP protection)
330
+ is_rights_managed?: Bool, // Whether file has rights management restrictions
331
+
332
+ // --- Aggregated Threat Summary ---
333
+ threat_count: Long,
334
+ highest_severity: String,
335
+ threat_categories: Set<String>,
336
+ detected_threats: Set<String>,
337
+ max_threat_severity: Long,
338
+
339
+ // --- Secrets Detection ---
340
+ contains_secrets: Bool,
341
+ secret_types?: Set<String>,
342
+ secret_count?: Long,
343
+
344
+ // --- PII Detection ---
345
+ pii_detected?: Bool,
346
+ pii_types?: Set<String>,
347
+ pii_count?: Long,
348
+ pii_confidence?: Long,
349
+
350
+ // --- Content Safety Scores (0-100) ---
351
+ violence_score: Long,
352
+ weapons_score: Long,
353
+ hate_speech_score: Long,
354
+ crime_score: Long,
355
+ sexual_score: Long,
356
+ profanity_score: Long,
357
+
358
+ // --- ML Detector Scores (0-100) ---
359
+ injection_score: Long, // Prompt injection payloads hidden in documents
360
+ jailbreak_score: Long,
361
+
362
+ // --- Code Detection ---
363
+ contains_code?: Bool,
364
+ code_languages?: Set<String>,
365
+ code_ratio?: Long,
366
+
367
+ // --- Phishing ---
368
+ phishing_detected?: Bool,
369
+
370
+ // --- Encoding Attacks ---
371
+ contains_invisible_chars?: Bool,
372
+ invisible_chars_score?: Long,
373
+ encoded_content_detected?: Bool,
374
+ encoded_types?: Set<String>,
375
+ encoded_count?: Long,
376
+ encoded_score?: Long,
377
+
378
+ // --- Session History ---
379
+ session_pii_detected?: Bool,
380
+ session_pii_types?: Set<String>,
381
+ session_secrets_detected?: Bool,
382
+ session_secret_types?: Set<String>,
383
+ session_injection_detected?: Bool,
384
+ session_threat_turns?: Long,
385
+ },
386
+ };
387
+
388
+ }
@@ -0,0 +1,24 @@
1
+ // =============================================================================
2
+ // Baseline Permit Policy (Default)
3
+ // =============================================================================
4
+ // Permits all actions by default. Threat-specific forbid policies override
5
+ // this to block when detection engines identify issues.
6
+ //
7
+ // Cedar is default-deny: without at least one permit rule, every request
8
+ // is denied regardless of forbid rules. This baseline ensures the system
9
+ // is "allow unless blocked" rather than "block everything".
10
+ //
11
+ // Category: organization
12
+ // Namespace: Sentry
13
+ // =============================================================================
14
+
15
+ @id("sentry-baseline-permit-all")
16
+ @name("Permit all actions by default")
17
+ @description("Baseline permit for all actions — threat-specific forbid policies override this when threats are detected")
18
+ @severity("low")
19
+ @tags("baseline,permit-default,organization")
20
+ permit (
21
+ principal,
22
+ action,
23
+ resource
24
+ );
@@ -0,0 +1,232 @@
1
+ // =============================================================================
2
+ // Content Safety Policy (Default)
3
+ // =============================================================================
4
+ // Detects and blocks violent, harmful, hateful, sexual, and profane content
5
+ // in AI chat interactions. Includes cut-and-paste safety rules to prevent
6
+ // unsafe content from being transferred into AI services.
7
+ //
8
+ // The detection engine runs ML classifiers (toxicity, content safety) and
9
+ // produces normalized scores (0-100) for each category.
10
+ //
11
+ // Compliance:
12
+ // NIST 800-53 SI-4 (Information System Monitoring)
13
+ // EU AI Act Art. 52 (Transparency for AI Systems)
14
+ // OWASP LLM02 (Insecure Output Handling)
15
+ // ISO 42001 (AI Management System)
16
+ //
17
+ // Category: content_safety
18
+ // Namespace: Sentry
19
+ // =============================================================================
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Section 1: Violence & Weapons
23
+ // Blocks content promoting, describing, or instructing violence and weapons.
24
+ // Applies to messages, paste, and file uploads.
25
+ // ---------------------------------------------------------------------------
26
+
27
+ // Block violent content across all input channels
28
+ @id("sentry-cs-block-violence")
29
+ @name("Block violent content")
30
+ @description("Block content when the ML violence detection score exceeds threshold (80/100). Catches graphic violence descriptions, instructions for causing harm, and violent threat language in messages, pastes, and uploads.")
31
+ @severity("critical")
32
+ @tags("violence,content-safety,trust-safety,nist-si-4,iso-42001")
33
+ @reject_message("Content blocked: violent content detected. AI services must not process violent content in enterprise environments. Please rephrase without violence-related language.")
34
+ forbid (
35
+ principal,
36
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content", Sentry::Action::"upload_file"],
37
+ resource
38
+ )
39
+ when {
40
+ context has violence_score && context.violence_score >= 80
41
+ };
42
+
43
+ // Block weapons content
44
+ @id("sentry-cs-block-weapons")
45
+ @name("Block weapons content")
46
+ @description("Block content when the ML weapons detection score exceeds threshold (80/100). Catches weapon manufacturing instructions, procurement guidance, and detailed specifications.")
47
+ @severity("critical")
48
+ @tags("weapons,content-safety,trust-safety,nist-si-4")
49
+ @reject_message("Content blocked: weapons-related content detected. AI services must not process weapons manufacturing, procurement, or specification content.")
50
+ forbid (
51
+ principal,
52
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content", Sentry::Action::"upload_file"],
53
+ resource
54
+ )
55
+ when {
56
+ context has weapons_score && context.weapons_score >= 80
57
+ };
58
+
59
+ // ---------------------------------------------------------------------------
60
+ // Section 2: Hate Speech & Discrimination
61
+ // Lower threshold (75) — enterprises have zero tolerance for hate speech.
62
+ // ---------------------------------------------------------------------------
63
+
64
+ // Block hate speech content
65
+ @id("sentry-cs-block-hate-speech")
66
+ @name("Block hate speech")
67
+ @description("Block content when the ML hate speech score exceeds threshold (75/100). Lower threshold than other categories because enterprises have zero tolerance for discriminatory content. Catches slurs, dehumanizing language, and targeted harassment.")
68
+ @severity("critical")
69
+ @tags("hate-speech,content-safety,trust-safety,nist-si-4,eu-ai-act")
70
+ @reject_message("Content blocked: hate speech or discriminatory content detected. AI services must not process hateful, discriminatory, or dehumanizing content.")
71
+ forbid (
72
+ principal,
73
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content", Sentry::Action::"upload_file"],
74
+ resource
75
+ )
76
+ when {
77
+ context has hate_speech_score && context.hate_speech_score >= 75
78
+ };
79
+
80
+ // ---------------------------------------------------------------------------
81
+ // Section 3: Criminal Content
82
+ // ---------------------------------------------------------------------------
83
+
84
+ // Block criminal content
85
+ @id("sentry-cs-block-crime")
86
+ @name("Block criminal content")
87
+ @description("Block content when the ML criminal activity detection score exceeds threshold (80/100). Catches illegal activity instructions, fraud techniques, and criminal behavior content.")
88
+ @severity("high")
89
+ @tags("crime,content-safety,trust-safety,nist-si-4")
90
+ @reject_message("Content blocked: criminal activity content detected. AI services must not process content related to illegal activities or fraud.")
91
+ forbid (
92
+ principal,
93
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content", Sentry::Action::"upload_file"],
94
+ resource
95
+ )
96
+ when {
97
+ context has crime_score && context.crime_score >= 80
98
+ };
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // Section 4: Sexual Content
102
+ // ---------------------------------------------------------------------------
103
+
104
+ // Block sexual content
105
+ @id("sentry-cs-block-sexual")
106
+ @name("Block sexual content")
107
+ @description("Block content when the ML sexual content score exceeds threshold (80/100). Ensures AI services do not process sexually explicit material in enterprise environments.")
108
+ @severity("high")
109
+ @tags("sexual,content-safety,trust-safety,eu-ai-act,iso-42001")
110
+ @reject_message("Content blocked: sexual content detected. AI services must not process sexually explicit material in enterprise environments.")
111
+ forbid (
112
+ principal,
113
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content", Sentry::Action::"upload_file"],
114
+ resource
115
+ )
116
+ when {
117
+ context has sexual_score && context.sexual_score >= 80
118
+ };
119
+
120
+ // ---------------------------------------------------------------------------
121
+ // Section 5: Profanity
122
+ // Higher threshold (90) — allows normal expression while blocking abuse.
123
+ // ---------------------------------------------------------------------------
124
+
125
+ // Block excessive profanity
126
+ @id("sentry-cs-block-profanity")
127
+ @name("Block profanity")
128
+ @description("Block content when the ML profanity detection score exceeds threshold (90/100). Higher threshold allows normal expression while blocking abusive or harassing language patterns.")
129
+ @severity("medium")
130
+ @tags("profanity,content-safety,trust-safety")
131
+ @reject_message("Content blocked: excessive profanity detected. Please rephrase in a professional manner.")
132
+ forbid (
133
+ principal,
134
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content"],
135
+ resource
136
+ )
137
+ when {
138
+ context has profanity_score && context.profanity_score >= 90
139
+ };
140
+
141
+ // ---------------------------------------------------------------------------
142
+ // Section 6: Cut & Paste Safety
143
+ // Specific rules for content pasted from external sources into AI chats.
144
+ // Paste operations are a primary vector for data leakage.
145
+ // ---------------------------------------------------------------------------
146
+
147
+ // Block large pastes with any detected threats
148
+ @id("sentry-cs-block-large-paste-threats")
149
+ @name("Block large pastes with threats")
150
+ @description("Block large paste operations (>5000 chars) when any threats are detected. Large pastes with threats likely indicate bulk data dumps from emails, documents, or databases being leaked to AI services.")
151
+ @severity("high")
152
+ @tags("paste-safety,data-leakage,content-safety")
153
+ @reject_message("Large paste operation blocked: security threats were detected in the pasted content. Large data transfers to AI services require threat-free content.")
154
+ forbid (
155
+ principal,
156
+ action == Sentry::Action::"paste_content",
157
+ resource
158
+ )
159
+ when {
160
+ context has paste_length && context has threat_count &&
161
+ context.paste_length > 5000 && context.threat_count >= 1
162
+ };
163
+
164
+ // Block pastes containing encoded injection payloads
165
+ @id("sentry-cs-block-paste-encoded")
166
+ @name("Block encoded paste content")
167
+ @description("Block paste operations when encoded injection payloads (base64, hex, unicode) are detected. Attackers use encoding to smuggle injection payloads via clipboard transfer.")
168
+ @severity("high")
169
+ @tags("paste-safety,encoding,injection,content-safety")
170
+ @reject_message("Paste blocked: encoded injection payloads detected in pasted content. Content with hidden encoded instructions cannot be shared with AI services.")
171
+ forbid (
172
+ principal,
173
+ action == Sentry::Action::"paste_content",
174
+ resource
175
+ )
176
+ when {
177
+ context has encoded_content_detected && context.encoded_content_detected &&
178
+ context has encoded_score && context.encoded_score >= 60
179
+ };
180
+
181
+ // Block pastes with invisible characters
182
+ @id("sentry-cs-block-paste-invisible")
183
+ @name("Block paste with invisible characters")
184
+ @description("Block paste operations containing invisible Unicode characters (zero-width, bidi overrides). These can hide malicious instructions that appear invisible to users but are processed by AI models.")
185
+ @severity("high")
186
+ @tags("paste-safety,unicode,invisible-chars,content-safety")
187
+ @reject_message("Paste blocked: invisible Unicode characters detected. Hidden characters can disguise malicious instructions that AI models process but users cannot see.")
188
+ forbid (
189
+ principal,
190
+ action == Sentry::Action::"paste_content",
191
+ resource
192
+ )
193
+ when {
194
+ context has contains_invisible_chars && context.contains_invisible_chars &&
195
+ context has invisible_chars_score && context.invisible_chars_score >= 50
196
+ };
197
+
198
+ // ---------------------------------------------------------------------------
199
+ // Section 7: AI Response Safety
200
+ // Block harmful content in AI responses before user sees it.
201
+ // ---------------------------------------------------------------------------
202
+
203
+ // Block violent/harmful AI responses
204
+ @id("sentry-cs-block-response-safety")
205
+ @name("Block harmful AI responses")
206
+ @description("Block AI responses containing high-severity violent, hateful, or criminal content. Prevents harmful AI-generated content from reaching users in enterprise environments.")
207
+ @severity("critical")
208
+ @tags("response-safety,content-safety,owasp-llm02")
209
+ @reject_message("AI response blocked: harmful content detected in the response. The AI service generated content that violates enterprise content safety policies.")
210
+ forbid (
211
+ principal,
212
+ action == Sentry::Action::"receive_response",
213
+ resource
214
+ )
215
+ when {
216
+ context has violence_score && context.violence_score >= 80
217
+ };
218
+
219
+ @id("sentry-cs-block-response-hate")
220
+ @name("Block hateful AI responses")
221
+ @description("Block AI responses with hate speech or discriminatory content.")
222
+ @severity("critical")
223
+ @tags("response-safety,hate-speech,content-safety,owasp-llm02")
224
+ @reject_message("AI response blocked: hate speech or discriminatory content detected in the response.")
225
+ forbid (
226
+ principal,
227
+ action == Sentry::Action::"receive_response",
228
+ resource
229
+ )
230
+ when {
231
+ context has hate_speech_score && context.hate_speech_score >= 75
232
+ };