@highflame/policy 2.1.3 → 2.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +41 -0
  2. package/_schemas/guardrails/context.json +466 -76
  3. package/_schemas/guardrails/schema.cedarschema +39 -3
  4. package/_schemas/guardrails/templates/defaults/injection.cedar +6 -6
  5. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +2 -2
  6. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +1 -1
  7. package/_schemas/overwatch/context.json +443 -5
  8. package/_schemas/overwatch/schema.cedarschema +42 -4
  9. package/_schemas/palisade/context.json +1 -1
  10. package/_schemas/sentry/context.json +1165 -0
  11. package/_schemas/sentry/schema.cedarschema +388 -0
  12. package/_schemas/sentry/templates/defaults/baseline.cedar +24 -0
  13. package/_schemas/sentry/templates/defaults/content_safety.cedar +232 -0
  14. package/_schemas/sentry/templates/defaults/file_safety.cedar +174 -0
  15. package/_schemas/sentry/templates/defaults/organization.cedar +207 -0
  16. package/_schemas/sentry/templates/defaults/pii.cedar +229 -0
  17. package/_schemas/sentry/templates/defaults/semantic.cedar +167 -0
  18. package/_schemas/sentry/templates/templates.json +93 -0
  19. package/dist/builder.d.ts +32 -0
  20. package/dist/builder.js +6 -6
  21. package/dist/condition-groups.d.ts +69 -0
  22. package/dist/condition-groups.js +305 -0
  23. package/dist/guardrails-context.gen.d.ts +19 -2
  24. package/dist/guardrails-context.gen.js +19 -2
  25. package/dist/guardrails-defaults.gen.js +9 -9
  26. package/dist/index.d.ts +6 -1
  27. package/dist/index.js +6 -1
  28. package/dist/overwatch-context.gen.d.ts +17 -0
  29. package/dist/overwatch-context.gen.js +17 -0
  30. package/dist/sentry-context.gen.d.ts +76 -0
  31. package/dist/sentry-context.gen.js +77 -0
  32. package/dist/sentry-defaults.gen.d.ts +61 -0
  33. package/dist/sentry-defaults.gen.js +1235 -0
  34. package/dist/sentry-entities.gen.d.ts +11 -0
  35. package/dist/sentry-entities.gen.js +33 -0
  36. package/dist/service-schemas.gen.d.ts +12 -2
  37. package/dist/service-schemas.gen.js +861 -25
  38. package/dist/types.d.ts +6 -1
  39. package/dist/types.js +6 -1
  40. package/package.json +1 -1
  41. package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +0 -85
  42. package/_schemas/guardrails/templates/profiles/code_agent.cedar +0 -125
  43. package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +0 -111
@@ -0,0 +1,167 @@
1
+ // =============================================================================
2
+ // Semantic Threat Detection Policy (Default)
3
+ // =============================================================================
4
+ // Detects and blocks prompt injection, jailbreak attempts, and high-severity
5
+ // threats across all browser AI interactions: messages, paste, file uploads.
6
+ //
7
+ // Uses multi-layered detection from Shield:
8
+ // 1. ML classifier scores (injection_score, jailbreak_score)
9
+ // 2. Detection engine rule triggers (detected_threats)
10
+ // 3. Threat severity aggregation (max_threat_severity, highest_severity)
11
+ //
12
+ // Compliance:
13
+ // OWASP LLM01 (Prompt Injection) — direct + indirect
14
+ // OWASP LLM02 (Insecure Output Handling)
15
+ // MITRE ATLAS AML.T0051 (LLM Prompt Injection)
16
+ // MITRE ATLAS AML.T0054 (LLM Jailbreak)
17
+ // NIST 800-53 SI-3, SI-4
18
+ //
19
+ // Category: semantic
20
+ // Namespace: Sentry
21
+ // =============================================================================
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Section 1: Prompt Injection Detection
25
+ // Blocks injection attempts in messages, pasted content, and uploaded files.
26
+ // Users may inadvertently paste injection payloads from compromised sources.
27
+ // ---------------------------------------------------------------------------
28
+
29
+ // Block messages and pastes with prompt injection patterns
30
+ @id("sentry-semantic-block-injection")
31
+ @name("Block prompt injection")
32
+ @description("Block messages and pasted content when detection engine rules identify prompt injection patterns. Catches instruction override, role assumption, and manipulation techniques in user input and pasted content (OWASP LLM01).")
33
+ @severity("critical")
34
+ @tags("injection,security,owasp-llm01,mitre-aml-t0051,baseline")
35
+ @reject_message("Content was blocked because prompt injection patterns were detected. This prevents manipulation of AI agent behavior. Remove adversarial instructions and try again.")
36
+ forbid (
37
+ principal,
38
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content"],
39
+ resource
40
+ )
41
+ when {
42
+ context has detected_threats && context.detected_threats.contains("prompt_injection")
43
+ };
44
+
45
+ // Block content with high ML injection confidence
46
+ @id("sentry-semantic-block-injection-score")
47
+ @name("Block high-confidence injection")
48
+ @description("Block content when the ML injection classifier confidence exceeds threshold (75/100). Catches novel injection techniques including polymorphic payloads, encoding tricks, and obfuscated instructions.")
49
+ @severity("critical")
50
+ @tags("injection,ml-classifier,security,owasp-llm01")
51
+ @reject_message("Your content was blocked because the ML classifier detected prompt injection with high confidence.")
52
+ forbid (
53
+ principal,
54
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content", Sentry::Action::"upload_file"],
55
+ resource
56
+ )
57
+ when {
58
+ context has injection_score && context.injection_score >= 75
59
+ };
60
+
61
+ // Block injection payloads hidden in uploaded documents
62
+ @id("sentry-semantic-block-file-injection")
63
+ @name("Block injection in uploaded files")
64
+ @description("Block file uploads when prompt injection patterns are detected in the document content. Attackers embed injection payloads in PDFs, documents, and spreadsheets to hijack AI behavior via RAG or file analysis.")
65
+ @severity("critical")
66
+ @tags("injection,file-upload,security,owasp-llm01")
67
+ @reject_message("File upload was blocked because prompt injection patterns were detected in the document. Files containing adversarial instructions cannot be shared with AI services.")
68
+ forbid (
69
+ principal,
70
+ action == Sentry::Action::"upload_file",
71
+ resource
72
+ )
73
+ when {
74
+ context has detected_threats && context.detected_threats.contains("prompt_injection")
75
+ };
76
+
77
+ // ---------------------------------------------------------------------------
78
+ // Section 2: Jailbreak Detection
79
+ // Blocks jailbreak attempts in messages sent to AI services.
80
+ // ---------------------------------------------------------------------------
81
+
82
+ // Block messages with jailbreak attempts
83
+ @id("sentry-semantic-block-jailbreak")
84
+ @name("Block jailbreak attempts")
85
+ @description("Block messages when detection engine rules identify jailbreak patterns: DAN-style prompts, role-play exploits, safety bypass instructions, and constraint removal attempts (OWASP LLM02).")
86
+ @severity("critical")
87
+ @tags("jailbreak,bypass,security,owasp-llm02,mitre-aml-t0054,baseline")
88
+ @reject_message("Your message was blocked because jailbreak patterns were detected. This prevents circumvention of AI safety controls.")
89
+ forbid (
90
+ principal,
91
+ action == Sentry::Action::"send_message",
92
+ resource
93
+ )
94
+ when {
95
+ context has detected_threats && context.detected_threats.contains("jailbreak")
96
+ };
97
+
98
+ // Block content with high ML jailbreak confidence
99
+ @id("sentry-semantic-block-jailbreak-score")
100
+ @name("Block high-confidence jailbreak")
101
+ @description("Block content when the ML jailbreak classifier exceeds threshold (75/100). Catches sophisticated jailbreak techniques including multi-turn manipulation and encoded payloads.")
102
+ @severity("critical")
103
+ @tags("jailbreak,ml-classifier,security,owasp-llm02")
104
+ @reject_message("Your content was blocked because the ML classifier detected a jailbreak attempt with high confidence.")
105
+ forbid (
106
+ principal,
107
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content"],
108
+ resource
109
+ )
110
+ when {
111
+ context has jailbreak_score && context.jailbreak_score >= 75
112
+ };
113
+
114
+ // ---------------------------------------------------------------------------
115
+ // Section 3: Threat Severity Aggregation
116
+ // Catch-all rules based on aggregated threat severity across all detectors.
117
+ // ---------------------------------------------------------------------------
118
+
119
+ // Block any content with critical severity threats
120
+ @id("sentry-semantic-block-critical")
121
+ @name("Block critical threats")
122
+ @description("Block all content when any detection engine reports critical severity. This is the ultimate catch-all for critical-severity threats regardless of type or source.")
123
+ @severity("critical")
124
+ @tags("critical,baseline,security,catch-all")
125
+ @reject_message("Your content was blocked because security scanners detected a critical-severity threat. This content cannot be processed by AI services.")
126
+ forbid (
127
+ principal,
128
+ action,
129
+ resource
130
+ )
131
+ when {
132
+ context has highest_severity && context.highest_severity == "critical"
133
+ };
134
+
135
+ // Block messages with high severity semantic threats
136
+ @id("sentry-semantic-block-high-severity")
137
+ @name("Block high severity threats")
138
+ @description("Block messages when threat detection reports high severity (>= 3) in semantic categories. Catches threats that individually are below critical but collectively indicate adversarial intent.")
139
+ @severity("high")
140
+ @tags("semantic,severity,security,defense-in-depth")
141
+ @reject_message("Your message was blocked because security scanners detected high severity issues. Review your content for manipulative or adversarial patterns.")
142
+ forbid (
143
+ principal,
144
+ action == Sentry::Action::"send_message",
145
+ resource
146
+ )
147
+ when {
148
+ context has threat_categories && context has max_threat_severity &&
149
+ context.threat_categories.contains("injection") &&
150
+ context.max_threat_severity >= 3
151
+ };
152
+
153
+ // Block content with multiple concurrent threats
154
+ @id("sentry-semantic-block-multi-threat")
155
+ @name("Block multi-threat content")
156
+ @description("Block content when multiple distinct threats are detected simultaneously (3+). Multiple concurrent threats strongly indicate an adversarial attack chain or compromised content.")
157
+ @severity("high")
158
+ @tags("multi-threat,security,defense-in-depth")
159
+ @reject_message("Content was blocked because multiple security threats were detected simultaneously. This pattern indicates potentially adversarial content.")
160
+ forbid (
161
+ principal,
162
+ action in [Sentry::Action::"send_message", Sentry::Action::"paste_content", Sentry::Action::"upload_file"],
163
+ resource
164
+ )
165
+ when {
166
+ context has threat_count && context.threat_count >= 3
167
+ };
@@ -0,0 +1,93 @@
1
+ {
2
+ "service": "sentry",
3
+ "version": "1.0.0",
4
+ "description": "Sentry policy templates for browser AI security",
5
+ "categories": [
6
+ {
7
+ "id": "pii",
8
+ "name": "PII Detection",
9
+ "description": "Detect and block personally identifiable information (PII) such as credit card numbers, SSNs, health data, and other sensitive personal data from being shared with AI chat services"
10
+ },
11
+ {
12
+ "id": "semantic",
13
+ "name": "Semantic Threat Detection",
14
+ "description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats in messages, pasted content, and uploaded files"
15
+ },
16
+ {
17
+ "id": "content_safety",
18
+ "name": "Content Safety",
19
+ "description": "Detect and block violent, harmful, hateful, sexual, and profane content in AI interactions, including cut-and-paste safety rules"
20
+ },
21
+ {
22
+ "id": "file_safety",
23
+ "name": "File & Attachment Safety",
24
+ "description": "Enforce document sensitivity controls (MIP labels), block sensitive file uploads, detect secrets and PII in uploaded documents"
25
+ },
26
+ {
27
+ "id": "organization",
28
+ "name": "Organization Rules",
29
+ "description": "Organization-wide baselines, AI service allowlists, credential leakage prevention, and source code protection"
30
+ }
31
+ ],
32
+ "defaults": [
33
+ {
34
+ "id": "sentry-baseline-default",
35
+ "name": "Baseline Permit",
36
+ "description": "Permits all actions by default — threat-specific forbid policies override this when threats are detected",
37
+ "category": "organization",
38
+ "file": "defaults/baseline.cedar",
39
+ "severity": "low",
40
+ "tags": ["baseline", "permit-default", "organization"],
41
+ "is_active": true
42
+ },
43
+ {
44
+ "id": "sentry-semantic-default",
45
+ "name": "Semantic Threat Detection",
46
+ "description": "Detect and block prompt injection, jailbreak attempts, and high-severity threats across messages, paste, and file uploads",
47
+ "category": "semantic",
48
+ "file": "defaults/semantic.cedar",
49
+ "severity": "critical",
50
+ "tags": ["injection", "jailbreak", "owasp-llm01", "owasp-llm02", "baseline"],
51
+ "is_active": true
52
+ },
53
+ {
54
+ "id": "sentry-content-safety-default",
55
+ "name": "Content Safety",
56
+ "description": "Detect and block violent, harmful, hateful, sexual, and profane content including cut-and-paste safety enforcement",
57
+ "category": "content_safety",
58
+ "file": "defaults/content_safety.cedar",
59
+ "severity": "critical",
60
+ "tags": ["violence", "hate-speech", "sexual", "profanity", "content-safety", "paste-safety", "baseline"],
61
+ "is_active": true
62
+ }
63
+ ],
64
+ "templates": [
65
+ {
66
+ "id": "sentry-pii-default",
67
+ "name": "PII Detection",
68
+ "description": "Detect and block credit card numbers, SSNs, health data, and other PII in messages, pasted content, file uploads, and AI responses",
69
+ "category": "pii",
70
+ "file": "defaults/pii.cedar",
71
+ "severity": "critical",
72
+ "tags": ["pii", "privacy", "compliance", "pci-dss", "gdpr", "hipaa", "baseline"]
73
+ },
74
+ {
75
+ "id": "sentry-file-safety-default",
76
+ "name": "File & Attachment Safety",
77
+ "description": "Enforce MIP sensitivity labels, block confidential document uploads, detect secrets and PII in files, and restrict file types",
78
+ "category": "file_safety",
79
+ "file": "defaults/file_safety.cedar",
80
+ "severity": "critical",
81
+ "tags": ["mip", "document-sensitivity", "file-upload", "dlp", "compliance"]
82
+ },
83
+ {
84
+ "id": "sentry-organization-default",
85
+ "name": "Organization Rules",
86
+ "description": "Organization-wide policies: credential leakage prevention, source code protection, and secrets blocking across all interactions",
87
+ "category": "organization",
88
+ "file": "defaults/organization.cedar",
89
+ "severity": "critical",
90
+ "tags": ["secrets", "credentials", "source-code", "data-protection", "organization"]
91
+ }
92
+ ]
93
+ }
package/dist/builder.d.ts CHANGED
@@ -34,6 +34,24 @@ import { EntityType, EntityUID } from './entities.gen.js';
34
34
  import { ActionType } from './actions.gen.js';
35
35
  import { type PolicyAnnotations, type CustomAnnotations, type PolicySeverity } from './annotations.js';
36
36
  import type { ServiceContext } from './service-schemas.gen.js';
37
+ /**
38
+ * Escape a string value for use in Cedar string literals.
39
+ * This prevents injection attacks by escaping backslashes and double quotes.
40
+ */
41
+ export declare function escapeCedarString(value: string): string;
42
+ /**
43
+ * Check if a string is a valid Cedar identifier.
44
+ */
45
+ export declare function isValidIdentifier(s: string): boolean;
46
+ /**
47
+ * Sanitize an identifier, replacing invalid characters with underscores.
48
+ */
49
+ export declare function sanitizeIdentifier(s: string, context: string): string;
50
+ /**
51
+ * Validate a raw condition string for potentially dangerous patterns.
52
+ * Returns true if the condition is safe to use.
53
+ */
54
+ export declare function isValidRawCondition(condition: string): boolean;
37
55
  /**
38
56
  * Policy effect - permit or forbid
39
57
  */
@@ -266,6 +284,20 @@ export declare class Policy {
266
284
  * ```
267
285
  */
268
286
  export declare function getOptionalFields(serviceContext: ServiceContext, actions: string | string[]): Set<string>;
287
+ /**
288
+ * Convert a condition to Cedar syntax.
289
+ * Field names are sanitized to prevent injection attacks.
290
+ *
291
+ * When `optionalFields` is provided and the condition's field is in the set,
292
+ * the output is wrapped with a `context has` guard:
293
+ * `context has field && context.field > value`
294
+ */
295
+ export declare function conditionToCedar(condition: PolicyCondition, optionalFields?: Set<string>): string;
296
+ /**
297
+ * Convert a value to Cedar string representation.
298
+ * String values are escaped to prevent injection attacks.
299
+ */
300
+ export declare function valueToString(value: string | number | boolean | string[]): string;
269
301
  /**
270
302
  * Convert a PolicyRule to Cedar policy text with proper annotations.
271
303
  *
package/dist/builder.js CHANGED
@@ -48,19 +48,19 @@ const DANGEROUS_PATTERN_REGEX = /;|\/\/|\/\*|\*\/|permit\s*\(|forbid\s*\(/;
48
48
  * Escape a string value for use in Cedar string literals.
49
49
  * This prevents injection attacks by escaping backslashes and double quotes.
50
50
  */
51
- function escapeCedarString(value) {
51
+ export function escapeCedarString(value) {
52
52
  return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
53
53
  }
54
54
  /**
55
55
  * Check if a string is a valid Cedar identifier.
56
56
  */
57
- function isValidIdentifier(s) {
57
+ export function isValidIdentifier(s) {
58
58
  return VALID_IDENTIFIER_REGEX.test(s);
59
59
  }
60
60
  /**
61
61
  * Sanitize an identifier, replacing invalid characters with underscores.
62
62
  */
63
- function sanitizeIdentifier(s, context) {
63
+ export function sanitizeIdentifier(s, context) {
64
64
  if (isValidIdentifier(s)) {
65
65
  return s;
66
66
  }
@@ -75,7 +75,7 @@ function sanitizeIdentifier(s, context) {
75
75
  * Validate a raw condition string for potentially dangerous patterns.
76
76
  * Returns true if the condition is safe to use.
77
77
  */
78
- function isValidRawCondition(condition) {
78
+ export function isValidRawCondition(condition) {
79
79
  return !DANGEROUS_PATTERN_REGEX.test(condition);
80
80
  }
81
81
  /**
@@ -218,7 +218,7 @@ export function getOptionalFields(serviceContext, actions) {
218
218
  * the output is wrapped with a `context has` guard:
219
219
  * `context has field && context.field > value`
220
220
  */
221
- function conditionToCedar(condition, optionalFields) {
221
+ export function conditionToCedar(condition, optionalFields) {
222
222
  const field = sanitizeIdentifier(condition.field, 'field');
223
223
  const { operator, value } = condition;
224
224
  const valueStr = valueToString(value);
@@ -271,7 +271,7 @@ function conditionToCedar(condition, optionalFields) {
271
271
  * Convert a value to Cedar string representation.
272
272
  * String values are escaped to prevent injection attacks.
273
273
  */
274
- function valueToString(value) {
274
+ export function valueToString(value) {
275
275
  if (typeof value === 'string') {
276
276
  return `"${escapeCedarString(value)}"`;
277
277
  }
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Condition Groups — flat UI-friendly representation of ConditionExpression trees.
3
+ *
4
+ * Provides bidirectional conversion between recursive ConditionExpression ASTs
5
+ * and flat ConditionGroup arrays suitable for visual condition builder UIs.
6
+ *
7
+ * Also provides:
8
+ * - expressionToCedar(): render any AST node to valid Cedar condition text
9
+ * - extractContextFields(): collect all context field names from an AST
10
+ */
11
+ import type { ConditionExpression, PolicyCondition } from './builder.js';
12
+ /** Logical operator for combining conditions within a group. */
13
+ export type GroupLogic = 'and' | 'or';
14
+ /**
15
+ * A flat, UI-friendly condition group.
16
+ *
17
+ * Each group maps 1:1 to a visual block in the condition builder.
18
+ * Groups are implicitly combined with AND at the top level.
19
+ */
20
+ export interface ConditionGroup {
21
+ /** Unique group ID (for React keys / reordering). */
22
+ id: string;
23
+ /** How conditions within this group combine. */
24
+ logic: GroupLogic;
25
+ /** Leaf conditions in the group. */
26
+ conditions: PolicyCondition[];
27
+ /** If true, the entire group is wrapped in NOT. */
28
+ negated: boolean;
29
+ }
30
+ /** Sentinel field name used for raw (unparseable) conditions. */
31
+ export declare const RAW_CONDITION_FIELD = "__raw";
32
+ /** Reset the group counter (for testing). */
33
+ export declare function resetGroupCounter(): void;
34
+ /**
35
+ * Convert a ConditionExpression AST into a flat array of ConditionGroups.
36
+ *
37
+ * The top-level AND is split into separate groups. Each OR subtree becomes
38
+ * a single group with `logic: 'or'`. NOT wrappers set `negated: true`.
39
+ * Raw nodes produce a sentinel condition with `field: "__raw"`.
40
+ */
41
+ export declare function expressionToGroups(expr: ConditionExpression): ConditionGroup[];
42
+ /**
43
+ * Convert a flat array of ConditionGroups back into a ConditionExpression AST.
44
+ *
45
+ * Each group becomes an AND/OR node (or single leaf if only one condition).
46
+ * If `negated`, the group is wrapped in NOT. Multiple groups are combined
47
+ * with a top-level AND.
48
+ */
49
+ export declare function groupsToExpression(groups: ConditionGroup[]): ConditionExpression;
50
+ /**
51
+ * Render any ConditionExpression node to valid Cedar condition text.
52
+ *
53
+ * This handles the full AST including AND, OR, NOT, and raw nodes —
54
+ * unlike `conditionToCedar()` which only handles leaf PolicyConditions.
55
+ *
56
+ * @param expr - The expression tree to render
57
+ * @param optionalFields - Optional set of field names that need `context has` guards
58
+ * @returns Cedar condition text (without the `when { ... }` wrapper)
59
+ */
60
+ export declare function expressionToCedar(expr: ConditionExpression, optionalFields?: Set<string>): string;
61
+ /**
62
+ * Extract all unique context field names referenced in a ConditionExpression tree.
63
+ *
64
+ * Used by Shield to determine which detectors to run — only detectors that
65
+ * produce fields referenced in active policies need to execute.
66
+ *
67
+ * @returns Sorted array of unique field names
68
+ */
69
+ export declare function extractContextFields(expr: ConditionExpression): string[];