@highflame/policy 2.0.10 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/_schemas/guardrails/context.json +435 -0
  2. package/_schemas/guardrails/schema.cedarschema +225 -0
  3. package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +94 -0
  4. package/_schemas/guardrails/templates/defaults/baseline.cedar +24 -0
  5. package/_schemas/guardrails/templates/defaults/injection.cedar +70 -0
  6. package/_schemas/guardrails/templates/defaults/pii.cedar +48 -0
  7. package/_schemas/guardrails/templates/defaults/secrets.cedar +40 -0
  8. package/_schemas/guardrails/templates/defaults/semantic.cedar +59 -0
  9. package/_schemas/guardrails/templates/defaults/tool_risk.cedar +58 -0
  10. package/_schemas/guardrails/templates/defaults/toxicity.cedar +76 -0
  11. package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +84 -0
  12. package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -0
  13. package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +35 -0
  14. package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +43 -0
  15. package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +85 -0
  16. package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +109 -0
  17. package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +22 -0
  18. package/_schemas/guardrails/templates/profiles/code_agent.cedar +125 -0
  19. package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +38 -0
  20. package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +40 -0
  21. package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +49 -0
  22. package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +111 -0
  23. package/_schemas/guardrails/templates/templates.json +213 -0
  24. package/dist/builder.d.ts +61 -0
  25. package/dist/builder.js +4 -2
  26. package/dist/entities.gen.d.ts +4 -0
  27. package/dist/entities.gen.js +4 -0
  28. package/dist/explain.d.ts +178 -0
  29. package/dist/explain.js +390 -0
  30. package/dist/guardrails-context.gen.d.ts +49 -0
  31. package/dist/guardrails-context.gen.js +50 -0
  32. package/dist/guardrails-defaults.gen.d.ts +61 -0
  33. package/dist/guardrails-defaults.gen.js +1278 -0
  34. package/dist/guardrails-entities.gen.d.ts +11 -0
  35. package/dist/guardrails-entities.gen.js +37 -0
  36. package/dist/index.d.ts +6 -1
  37. package/dist/index.js +6 -1
  38. package/dist/overwatch-defaults.gen.js +98 -0
  39. package/dist/parser.js +136 -4
  40. package/dist/schema.gen.d.ts +1 -1
  41. package/dist/schema.gen.js +6 -0
  42. package/dist/service-schemas.gen.d.ts +10 -0
  43. package/dist/service-schemas.gen.js +337 -1
  44. package/dist/types.d.ts +6 -1
  45. package/dist/types.js +6 -1
  46. package/package.json +5 -1
@@ -0,0 +1,1278 @@
1
+ // Code generated by highflame-policy-codegen. DO NOT EDIT.
2
+ // Source: schemas/guardrails/templates/templates.json
3
+ //
4
+ // Guardrails default policies and templates.
5
+ // Cedar text is embedded at build time. PolicyRule[] can be parsed at runtime
6
+ // using parseCedarToRules().
7
+ // =============================================================================
8
+ // EMBEDDED CEDAR POLICY TEXT
9
+ // =============================================================================
10
+ const GUARDRAILS_BASELINE_DEFAULT_CEDAR = `// =============================================================================
11
+ // Baseline Permit Policy (Default)
12
+ // =============================================================================
13
+ // Permits all actions by default. Threat-specific forbid policies override
14
+ // this to block when detectors find security issues.
15
+ //
16
+ // Cedar is default-deny: without at least one permit rule, every request
17
+ // is denied regardless of forbid rules. This baseline ensures the system
18
+ // is "allow unless blocked" rather than "block everything".
19
+ //
20
+ // Category: organization
21
+ // Namespace: Guardrails
22
+ // =============================================================================
23
+
24
+ @id("baseline-permit-all")
25
+ @name("Permit all actions by default")
26
+ @description("Baseline permit for all actions — threat-specific forbid policies override this when threats are detected")
27
+ @severity("low")
28
+ @tags("baseline,permit-default,organization")
29
+ permit (
30
+ principal,
31
+ action,
32
+ resource
33
+ );
34
+ `;
35
+ const GUARDRAILS_SECRETS_DEFAULT_CEDAR = `// =============================================================================
36
+ // Secrets Detection Policy
37
+ // =============================================================================
38
+ // Blocks requests containing API keys, tokens, credentials, or other secrets.
39
+ // Applies to both input prompts and output responses.
40
+ //
41
+ // Context keys used (normalized by projection layer):
42
+ // - contains_secrets: bool - Whether secrets were detected
43
+ // - secret_count: Long - Number of secret matches
44
+ // - secret_types: Set<String> - Types of secrets found
45
+ //
46
+ // Category: security
47
+ // Namespace: Guardrails
48
+ // =============================================================================
49
+
50
+ @id("secrets-block-all")
51
+ @name("Block content containing secrets")
52
+ @description("Forbids requests that contain API keys, tokens, or credentials")
53
+ @severity("critical")
54
+ @tags("secrets,security,data-leak")
55
+ forbid (
56
+ principal,
57
+ action,
58
+ resource
59
+ ) when {
60
+ context has contains_secrets && context.contains_secrets == true
61
+ };
62
+
63
+ @id("secrets-block-high-count")
64
+ @name("Block multiple secrets")
65
+ @description("Forbids requests with multiple secret matches (potential data dump)")
66
+ @severity("critical")
67
+ @tags("secrets,security,data-leak")
68
+ forbid (
69
+ principal,
70
+ action,
71
+ resource
72
+ ) when {
73
+ context has secret_count && context.secret_count > 2
74
+ };
75
+ `;
76
+ const GUARDRAILS_INJECTION_DEFAULT_CEDAR = `// =============================================================================
77
+ // Injection & Jailbreak Detection Policy
78
+ // =============================================================================
79
+ // Blocks prompt injection, jailbreak attempts, and command injection.
80
+ // Uses ML-based confidence scores from normalized context.
81
+ //
82
+ // Context keys used (normalized by projection layer):
83
+ // - injection_score: Long (0-100) - Overall injection confidence
84
+ // - jailbreak_score: Long (0-100) - Jailbreak attempt confidence
85
+ // - injection_type: String - Type of injection detected
86
+ // - contains_invisible_chars: Bool - Invisible Unicode characters detected
87
+ // - invisible_chars_score: Long (0-100) - Invisible character density
88
+ //
89
+ // Category: security
90
+ // Namespace: Guardrails
91
+ // =============================================================================
92
+
93
+ @id("injection-block-high-confidence")
94
+ @name("Block high-confidence injection attempts")
95
+ @description("Forbids requests with injection confidence above 85%")
96
+ @severity("high")
97
+ @tags("injection,jailbreak,security")
98
+ forbid (
99
+ principal,
100
+ action,
101
+ resource
102
+ ) when {
103
+ context has injection_score && context.injection_score > 85
104
+ };
105
+
106
+ @id("jailbreak-block-high-confidence")
107
+ @name("Block high-confidence jailbreak attempts")
108
+ @description("Forbids requests with jailbreak confidence above 80%")
109
+ @severity("high")
110
+ @tags("jailbreak,security")
111
+ forbid (
112
+ principal,
113
+ action,
114
+ resource
115
+ ) when {
116
+ context has jailbreak_score && context.jailbreak_score > 80
117
+ };
118
+
119
+ @id("injection-combined-threshold")
120
+ @name("Block combined injection and jailbreak")
121
+ @description("Forbids requests with moderate scores in both injection and jailbreak")
122
+ @severity("high")
123
+ @tags("injection,jailbreak,security")
124
+ forbid (
125
+ principal,
126
+ action,
127
+ resource
128
+ ) when {
129
+ context has injection_score && context has jailbreak_score &&
130
+ context.injection_score > 60 && context.jailbreak_score > 60
131
+ };
132
+
133
+ @id("injection-invisible-chars")
134
+ @name("Block invisible character injection")
135
+ @description("Forbids requests containing invisible Unicode characters (zero-width joiners, etc.) commonly used for prompt injection")
136
+ @severity("high")
137
+ @tags("injection,unicode,security")
138
+ forbid (
139
+ principal,
140
+ action,
141
+ resource
142
+ ) when {
143
+ context has contains_invisible_chars && context.contains_invisible_chars == true &&
144
+ context has invisible_chars_score && context.invisible_chars_score > 50
145
+ };
146
+ `;
147
+ const GUARDRAILS_PII_DEFAULT_CEDAR = `// =============================================================================
148
+ // PII (Personally Identifiable Information) Policy
149
+ // =============================================================================
150
+ // Blocks content containing PII in output responses.
151
+ // Typically applied to LLM outputs to prevent data leakage.
152
+ //
153
+ // Context keys used (normalized by projection layer):
154
+ // - pii_detected: bool - Whether PII was found
155
+ // - pii_count: Long - Number of PII matches
156
+ // - pii_types: Set<String> - Types of PII detected
157
+ // - direction: String - "input" or "output"
158
+ //
159
+ // Category: privacy
160
+ // Namespace: Guardrails
161
+ // =============================================================================
162
+
163
+ @id("pii-block-output")
164
+ @name("Block PII in outputs")
165
+ @description("Forbids LLM responses that contain PII (prevents data leakage)")
166
+ @severity("high")
167
+ @tags("pii,privacy,data-protection")
168
+ forbid (
169
+ principal,
170
+ action == Guardrails::Action::"process_prompt",
171
+ resource
172
+ ) when {
173
+ context has pii_detected && context.pii_detected == true &&
174
+ context.direction == "output"
175
+ };
176
+
177
+ @id("pii-block-sensitive-types")
178
+ @name("Block sensitive PII types")
179
+ @description("Forbids outputs containing SSN, credit cards, or passport numbers")
180
+ @severity("critical")
181
+ @tags("pii,privacy,sensitive-data")
182
+ forbid (
183
+ principal,
184
+ action == Guardrails::Action::"process_prompt",
185
+ resource
186
+ ) when {
187
+ context.direction == "output" &&
188
+ context has pii_types &&
189
+ (
190
+ context.pii_types.contains("ssn") ||
191
+ context.pii_types.contains("credit_card") ||
192
+ context.pii_types.contains("passport")
193
+ )
194
+ };
195
+ `;
196
+ const GUARDRAILS_TOXICITY_DEFAULT_CEDAR = `// =============================================================================
197
+ // Toxicity & Content Moderation Policy
198
+ // =============================================================================
199
+ // Blocks toxic, harmful, or inappropriate content.
200
+ // Covers violence, hate speech, sexual content, weapons, crime, and profanity.
201
+ //
202
+ // Context keys used (normalized by projection layer):
203
+ // - violence_score: Long (0-100) - Violence reference score
204
+ // - hate_speech_score: Long (0-100) - Hate speech score
205
+ // - sexual_score: Long (0-100) - Sexual content score
206
+ // - weapons_score: Long (0-100) - Weapons reference score
207
+ // - crime_score: Long (0-100) - Criminal activity score
208
+ // - profanity_score: Long (0-100) - Profanity score
209
+ //
210
+ // Category: trust-safety
211
+ // Namespace: Guardrails
212
+ // =============================================================================
213
+
214
+ @id("toxicity-block-critical")
215
+ @name("Block critical toxicity")
216
+ @description("Forbids content with any toxicity score above 90")
217
+ @severity("critical")
218
+ @tags("toxicity,trust-safety")
219
+ forbid (
220
+ principal,
221
+ action,
222
+ resource
223
+ ) when {
224
+ (context has violence_score && context.violence_score > 90) ||
225
+ (context has hate_speech_score && context.hate_speech_score > 90) ||
226
+ (context has sexual_score && context.sexual_score > 90) ||
227
+ (context has weapons_score && context.weapons_score > 90) ||
228
+ (context has crime_score && context.crime_score > 90) ||
229
+ (context has profanity_score && context.profanity_score > 90)
230
+ };
231
+
232
+ @id("toxicity-block-hate-speech")
233
+ @name("Block hate speech")
234
+ @description("Forbids content with hate speech above 75")
235
+ @severity("high")
236
+ @tags("hate-speech,trust-safety")
237
+ forbid (
238
+ principal,
239
+ action,
240
+ resource
241
+ ) when {
242
+ context has hate_speech_score && context.hate_speech_score > 75
243
+ };
244
+
245
+ @id("toxicity-block-combined")
246
+ @name("Block combined moderate toxicity")
247
+ @description("Forbids content with two or more moderate toxicity scores above 60")
248
+ @severity("high")
249
+ @tags("toxicity,trust-safety")
250
+ forbid (
251
+ principal,
252
+ action,
253
+ resource
254
+ ) when {
255
+ // Any 2 of 6 toxicity scores above 60 triggers a block
256
+ (context has violence_score && context has hate_speech_score && context.violence_score > 60 && context.hate_speech_score > 60) ||
257
+ (context has violence_score && context has sexual_score && context.violence_score > 60 && context.sexual_score > 60) ||
258
+ (context has violence_score && context has weapons_score && context.violence_score > 60 && context.weapons_score > 60) ||
259
+ (context has violence_score && context has crime_score && context.violence_score > 60 && context.crime_score > 60) ||
260
+ (context has violence_score && context has profanity_score && context.violence_score > 60 && context.profanity_score > 60) ||
261
+ (context has hate_speech_score && context has sexual_score && context.hate_speech_score > 60 && context.sexual_score > 60) ||
262
+ (context has hate_speech_score && context has weapons_score && context.hate_speech_score > 60 && context.weapons_score > 60) ||
263
+ (context has hate_speech_score && context has crime_score && context.hate_speech_score > 60 && context.crime_score > 60) ||
264
+ (context has hate_speech_score && context has profanity_score && context.hate_speech_score > 60 && context.profanity_score > 60) ||
265
+ (context has sexual_score && context has weapons_score && context.sexual_score > 60 && context.weapons_score > 60) ||
266
+ (context has sexual_score && context has crime_score && context.sexual_score > 60 && context.crime_score > 60) ||
267
+ (context has sexual_score && context has profanity_score && context.sexual_score > 60 && context.profanity_score > 60) ||
268
+ (context has weapons_score && context has crime_score && context.weapons_score > 60 && context.crime_score > 60) ||
269
+ (context has weapons_score && context has profanity_score && context.weapons_score > 60 && context.profanity_score > 60) ||
270
+ (context has crime_score && context has profanity_score && context.crime_score > 60 && context.profanity_score > 60)
271
+ };
272
+ `;
273
+ const GUARDRAILS_TOOL_RISK_DEFAULT_CEDAR = `// =============================================================================
274
+ // Tool Risk Policy
275
+ // =============================================================================
276
+ // Blocks dangerous tool calls based on risk scoring.
277
+ // Considers tool sensitivity, argument patterns, and MCP verification.
278
+ //
279
+ // Context keys used (normalized by projection layer):
280
+ // - tool_name: String - Name of the tool
281
+ // - tool_risk_score: Long (0-100) - Computed risk score
282
+ // - tool_is_sensitive: bool - Whether tool is classified as sensitive
283
+ // - tool_category: String - "safe" | "sensitive" | "dangerous"
284
+ //
285
+ // Category: agentic-security
286
+ // Namespace: Guardrails
287
+ // =============================================================================
288
+
289
+ @id("tool-block-dangerous")
290
+ @name("Block dangerous tools")
291
+ @description("Forbids tools classified as dangerous (risk > 85)")
292
+ @severity("critical")
293
+ @tags("tools,agentic,security")
294
+ forbid (
295
+ principal,
296
+ action == Guardrails::Action::"call_tool",
297
+ resource
298
+ ) when {
299
+ (context has tool_risk_score && context.tool_risk_score > 85) ||
300
+ (context has tool_category && context.tool_category == "dangerous")
301
+ };
302
+
303
+ @id("tool-block-shell-commands")
304
+ @name("Block shell command execution")
305
+ @description("Forbids direct shell/execute_command tool calls")
306
+ @severity("high")
307
+ @tags("tools,shell,security")
308
+ forbid (
309
+ principal,
310
+ action == Guardrails::Action::"call_tool",
311
+ resource
312
+ ) when {
313
+ context has tool_name &&
314
+ (context.tool_name == "shell" ||
315
+ context.tool_name == "execute_command")
316
+ };
317
+
318
+ @id("tool-block-sensitive-high-risk")
319
+ @name("Block high-risk sensitive tools")
320
+ @description("Forbids sensitive tool calls with elevated risk scores")
321
+ @severity("high")
322
+ @tags("tools,agentic,security")
323
+ forbid (
324
+ principal,
325
+ action == Guardrails::Action::"call_tool",
326
+ resource
327
+ ) when {
328
+ context has tool_is_sensitive && context.tool_is_sensitive == true &&
329
+ context has tool_risk_score && context.tool_risk_score > 70
330
+ };
331
+ `;
332
+ const GUARDRAILS_AGENTIC_SAFETY_DEFAULT_CEDAR = `// =============================================================================
333
+ // Agentic Safety Policy
334
+ // =============================================================================
335
+ // Blocks suspicious agentic behavior patterns:
336
+ // - Infinite loops (repeated tool calls)
337
+ // - Suspicious action sequences (data exfiltration patterns)
338
+ // - Budget violations (token/cost limits)
339
+ //
340
+ // Context keys used (normalized by projection layer):
341
+ // - loop_detected: bool - Whether tool call loop detected
342
+ // - loop_count: Long - Number of consecutive repeated calls
343
+ // - suspicious_pattern: bool - Whether suspicious sequence detected
344
+ // - pattern_type: String - Type of pattern (e.g., "data_exfiltration")
345
+ // - sequence_risk: Long (0-100) - Risk score from sequence analysis
346
+ // - budget_exceeded: bool - Whether token budget exceeded
347
+ // - budget_remaining_pct: Long (0-100) - Remaining budget percentage
348
+ //
349
+ // Category: agentic-security
350
+ // Namespace: Guardrails
351
+ // =============================================================================
352
+
353
+ @id("agentic-block-loops")
354
+ @name("Block tool call loops")
355
+ @description("Forbids repeated tool calls indicating infinite loop")
356
+ @severity("high")
357
+ @tags("agentic,loops,safety")
358
+ forbid (
359
+ principal,
360
+ action == Guardrails::Action::"call_tool",
361
+ resource
362
+ ) when {
363
+ context has loop_detected && context.loop_detected == true &&
364
+ context has loop_count && context.loop_count > 3
365
+ };
366
+
367
+ @id("agentic-block-exfiltration")
368
+ @name("Block data exfiltration patterns")
369
+ @description("Forbids suspicious action sequences like read → http_post")
370
+ @severity("critical")
371
+ @tags("agentic,exfiltration,security")
372
+ forbid (
373
+ principal,
374
+ action,
375
+ resource
376
+ ) when {
377
+ context has suspicious_pattern && context.suspicious_pattern == true &&
378
+ context has pattern_type &&
379
+ (
380
+ context.pattern_type == "data_exfiltration" ||
381
+ context.pattern_type == "secret_exfiltration" ||
382
+ context.pattern_type == "db_exfiltration"
383
+ )
384
+ };
385
+
386
+ @id("agentic-block-high-sequence-risk")
387
+ @name("Block high-risk action sequences")
388
+ @description("Forbids action sequences with risk score above 80")
389
+ @severity("high")
390
+ @tags("agentic,patterns,security")
391
+ forbid (
392
+ principal,
393
+ action,
394
+ resource
395
+ ) when {
396
+ context has sequence_risk && context.sequence_risk > 80
397
+ };
398
+
399
+ @id("agentic-block-budget-exceeded")
400
+ @name("Block budget violations")
401
+ @description("Forbids requests when token budget is exceeded")
402
+ @severity("medium")
403
+ @tags("agentic,budget,cost-control")
404
+ forbid (
405
+ principal,
406
+ action,
407
+ resource
408
+ ) when {
409
+ context has budget_exceeded && context.budget_exceeded == true
410
+ };
411
+
412
+ @id("agentic-block-low-budget")
413
+ @name("Block requests when budget critically low")
414
+ @description("Forbids requests when remaining budget falls below 5%")
415
+ @severity("medium")
416
+ @tags("agentic,budget,cost-control")
417
+ forbid (
418
+ principal,
419
+ action,
420
+ resource
421
+ ) when {
422
+ context has budget_remaining_pct &&
423
+ context.budget_remaining_pct < 5 &&
424
+ context.budget_remaining_pct > 0
425
+ };
426
+ `;
427
+ const GUARDRAILS_MCP_TOOL_PERMISSIONS_CEDAR = `// =============================================================================
428
+ // MCP Tool Permissions Template
429
+ // =============================================================================
430
+ // Per-tool access control for MCP servers. Allows fine-grained permissioning:
431
+ // allow specific tools from a server while denying others.
432
+ //
433
+ // Example: Allow read_issues from GitHub but deny create_issues.
434
+ //
435
+ // Context keys used:
436
+ // - mcp_server: String - MCP server name
437
+ // - mcp_tool: String - Tool name within the server
438
+ // - tool_name: String - Normalized tool name
439
+ // - mcp_server_verified: Bool - Whether server is verified
440
+ //
441
+ // Category: agentic_security
442
+ // Namespace: Guardrails
443
+ // =============================================================================
444
+
445
+ // -- GitHub MCP: Read-only access -------------------------------------------
446
+
447
+ @id("mcp-github-allow-read")
448
+ @name("Allow read-only GitHub tools")
449
+ @description("Permit read operations from GitHub MCP server (issues, PRs, code search)")
450
+ @severity("medium")
451
+ @tags("mcp,github,read-only,least-privilege")
452
+ permit (
453
+ principal,
454
+ action == Guardrails::Action::"call_tool",
455
+ resource
456
+ ) when {
457
+ context.mcp_server == "github" &&
458
+ (context.tool_name == "read_issues" ||
459
+ context.tool_name == "get_issue" ||
460
+ context.tool_name == "list_repos" ||
461
+ context.tool_name == "get_pull_request" ||
462
+ context.tool_name == "search_code" ||
463
+ context.tool_name == "get_file_contents")
464
+ };
465
+
466
+ @id("mcp-github-deny-write")
467
+ @name("Deny write GitHub tools")
468
+ @description("Block create/update/delete operations on GitHub MCP server")
469
+ @severity("high")
470
+ @tags("mcp,github,write-block,least-privilege")
471
+ forbid (
472
+ principal,
473
+ action == Guardrails::Action::"call_tool",
474
+ resource
475
+ ) when {
476
+ context.mcp_server == "github"
477
+ };
478
+
479
+ // -- Organization-wide MCP server exclusions --------------------------------
480
+
481
+ @id("mcp-exclude-server")
482
+ @name("Exclude specific MCP servers")
483
+ @description("Block all tool calls from excluded MCP servers (org-wide exclusion list)")
484
+ @severity("critical")
485
+ @tags("mcp,exclusion,org-wide,block")
486
+ forbid (
487
+ principal,
488
+ action == Guardrails::Action::"call_tool",
489
+ resource
490
+ ) when {
491
+ // Add server names to block across the organization.
492
+ // Modify this list to match your exclusion requirements.
493
+ context.mcp_server == "untrusted-server" ||
494
+ context.mcp_server == "deprecated-server"
495
+ };
496
+
497
+ // -- Block unverified MCP servers -------------------------------------------
498
+
499
+ @id("mcp-block-unverified")
500
+ @name("Block unverified MCP servers")
501
+ @description("Deny all tool calls from MCP servers not in the verified registry")
502
+ @severity("high")
503
+ @tags("mcp,trust,verification")
504
+ forbid (
505
+ principal,
506
+ action == Guardrails::Action::"call_tool",
507
+ resource
508
+ ) when {
509
+ context.mcp_server_verified == false
510
+ };
511
+ `;
512
+ const GUARDRAILS_CHAT_ASSISTANT_SECURITY_CEDAR = `// =============================================================================
513
+ // Chat Assistant — Security
514
+ // =============================================================================
515
+ // Aggressive injection and jailbreak defense for customer-facing chatbots.
516
+ // Lower thresholds than defaults — public-facing chat is high-risk for attacks.
517
+ //
518
+ // Category: security
519
+ // Namespace: Guardrails
520
+ // =============================================================================
521
+
522
+ @id("chat-injection-lower-threshold")
523
+ @name("Aggressive injection defense for chat")
524
+ @description("Lower injection threshold for public-facing chat — blocks above 70")
525
+ @severity("high")
526
+ @tags("profile,chat-assistant,injection,security")
527
+ forbid (
528
+ principal,
529
+ action,
530
+ resource
531
+ ) when {
532
+ context has injection_score && context.injection_score > 70
533
+ };
534
+
535
+ @id("chat-jailbreak-lower-threshold")
536
+ @name("Aggressive jailbreak defense for chat")
537
+ @description("Lower jailbreak threshold for public-facing chat — blocks above 65")
538
+ @severity("high")
539
+ @tags("profile,chat-assistant,jailbreak,security")
540
+ forbid (
541
+ principal,
542
+ action,
543
+ resource
544
+ ) when {
545
+ context has jailbreak_score && context.jailbreak_score > 65
546
+ };
547
+ `;
548
+ const GUARDRAILS_CHAT_ASSISTANT_PRIVACY_CEDAR = `// =============================================================================
549
+ // Chat Assistant — Privacy
550
+ // =============================================================================
551
+ // PII protection for customer-facing chatbots.
552
+ // Blocks PII in both user inputs and assistant outputs.
553
+ //
554
+ // Category: privacy
555
+ // Namespace: Guardrails
556
+ // =============================================================================
557
+
558
+ @id("chat-pii-block-input-output")
559
+ @name("Block PII in both directions")
560
+ @description("Blocks PII in both user inputs and assistant outputs for chat applications")
561
+ @severity("high")
562
+ @tags("profile,chat-assistant,pii,privacy")
563
+ forbid (
564
+ principal,
565
+ action,
566
+ resource
567
+ ) when {
568
+ context has pii_detected && context.pii_detected == true
569
+ };
570
+ `;
571
+ const GUARDRAILS_CHAT_ASSISTANT_TRUST_SAFETY_CEDAR = `// =============================================================================
572
+ // Chat Assistant — Trust & Safety
573
+ // =============================================================================
574
+ // Strict content moderation for customer-facing chatbots.
575
+ // Lower toxicity thresholds and topic restrictions for public-facing content.
576
+ //
577
+ // Category: trust_safety
578
+ // Namespace: Guardrails
579
+ // =============================================================================
580
+
581
+ @id("chat-toxicity-strict")
582
+ @name("Strict toxicity for chat")
583
+ @description("Lower thresholds for customer-facing content — blocks toxicity above 70")
584
+ @severity("critical")
585
+ @tags("profile,chat-assistant,toxicity,trust-safety")
586
+ forbid (
587
+ principal,
588
+ action,
589
+ resource
590
+ ) when {
591
+ (context has violence_score && context.violence_score > 70) ||
592
+ (context has hate_speech_score && context.hate_speech_score > 70) ||
593
+ (context has sexual_score && context.sexual_score > 70) ||
594
+ (context has profanity_score && context.profanity_score > 70)
595
+ };
596
+
597
+ @id("chat-topic-restriction")
598
+ @name("Block restricted topics in chat")
599
+ @description("Prevents chat assistants from discussing dangerous or regulated topics")
600
+ @severity("high")
601
+ @tags("profile,chat-assistant,semantic,compliance")
602
+ forbid (
603
+ principal,
604
+ action,
605
+ resource
606
+ ) when {
607
+ context has topic_confidence && context.topic_confidence > 70 &&
608
+ context has content_topics &&
609
+ (context.content_topics.contains("weapons_manufacturing") ||
610
+ context.content_topics.contains("illegal_activity") ||
611
+ context.content_topics.contains("controlled_substances") ||
612
+ context.content_topics.contains("financial_fraud"))
613
+ };
614
+ `;
615
+ const GUARDRAILS_CODE_AGENT_AGENTIC_SECURITY_CEDAR = `// =============================================================================
616
+ // Code Agent — Agentic Security
617
+ // =============================================================================
618
+ // Tool risk controls, shell execution blocking, loop detection,
619
+ // exfiltration prevention, and budget enforcement for coding assistants.
620
+ //
621
+ // Category: agentic_security
622
+ // Namespace: Guardrails
623
+ // =============================================================================
624
+
625
+ @id("code-block-dangerous-tools")
626
+ @name("Block dangerous tool execution")
627
+ @description("Forbids tools classified as dangerous or with very high risk scores")
628
+ @severity("critical")
629
+ @tags("profile,code-agent,tools,agentic")
630
+ forbid (
631
+ principal,
632
+ action == Guardrails::Action::"call_tool",
633
+ resource
634
+ ) when {
635
+ (context has tool_risk_score && context.tool_risk_score > 85) ||
636
+ (context has tool_category && context.tool_category == "dangerous")
637
+ };
638
+
639
+ @id("code-block-shell-execution")
640
+ @name("Block direct shell commands")
641
+ @description("Forbids direct shell and command execution tools")
642
+ @severity("high")
643
+ @tags("profile,code-agent,tools,shell")
644
+ forbid (
645
+ principal,
646
+ action == Guardrails::Action::"call_tool",
647
+ resource
648
+ ) when {
649
+ context has tool_name &&
650
+ (context.tool_name == "shell" ||
651
+ context.tool_name == "execute_command" ||
652
+ context.tool_name == "bash")
653
+ };
654
+
655
+ @id("code-block-sensitive-tools")
656
+ @name("Block sensitive tools with elevated risk")
657
+ @description("Forbids sensitive tool calls with risk above 70")
658
+ @severity("high")
659
+ @tags("profile,code-agent,tools,security")
660
+ forbid (
661
+ principal,
662
+ action == Guardrails::Action::"call_tool",
663
+ resource
664
+ ) when {
665
+ context has tool_is_sensitive && context.tool_is_sensitive == true &&
666
+ context has tool_risk_score && context.tool_risk_score > 70
667
+ };
668
+
669
+ @id("code-block-loops")
670
+ @name("Block tool call loops")
671
+ @description("Stops infinite tool call loops in agentic workflows")
672
+ @severity("high")
673
+ @tags("profile,code-agent,agentic,loops")
674
+ forbid (
675
+ principal,
676
+ action == Guardrails::Action::"call_tool",
677
+ resource
678
+ ) when {
679
+ context has loop_detected && context.loop_detected == true &&
680
+ context has loop_count && context.loop_count > 5
681
+ };
682
+
683
+ @id("code-block-exfiltration")
684
+ @name("Block data exfiltration patterns")
685
+ @description("Detects and blocks read → send patterns indicating data theft")
686
+ @severity("critical")
687
+ @tags("profile,code-agent,agentic,exfiltration")
688
+ forbid (
689
+ principal,
690
+ action,
691
+ resource
692
+ ) when {
693
+ context has suspicious_pattern && context.suspicious_pattern == true &&
694
+ context has pattern_type &&
695
+ (context.pattern_type == "data_exfiltration" ||
696
+ context.pattern_type == "secret_exfiltration")
697
+ };
698
+
699
+ @id("code-block-high-sequence-risk")
700
+ @name("Block high-risk action sequences")
701
+ @description("Forbids suspicious multi-step tool sequences with risk above 75")
702
+ @severity("high")
703
+ @tags("profile,code-agent,agentic,patterns")
704
+ forbid (
705
+ principal,
706
+ action,
707
+ resource
708
+ ) when {
709
+ context has sequence_risk && context.sequence_risk > 75
710
+ };
711
+
712
+ @id("code-block-budget-exceeded")
713
+ @name("Block on budget exceeded")
714
+ @description("Stops agent execution when token budget is exhausted")
715
+ @severity("medium")
716
+ @tags("profile,code-agent,budget,cost-control")
717
+ forbid (
718
+ principal,
719
+ action,
720
+ resource
721
+ ) when {
722
+ context has budget_exceeded && context.budget_exceeded == true
723
+ };
724
+ `;
725
+ const GUARDRAILS_CODE_AGENT_SECURITY_CEDAR = `// =============================================================================
726
+ // Code Agent — Security
727
+ // =============================================================================
728
+ // Secrets protection for coding assistants.
729
+ // Prevents code agents from writing detected secrets to output files.
730
+ //
731
+ // Category: security
732
+ // Namespace: Guardrails
733
+ // =============================================================================
734
+
735
+ @id("code-block-write-secrets")
736
+ @name("Block writing secrets to files")
737
+ @description("Prevents code agents from writing detected secrets to output files")
738
+ @severity("critical")
739
+ @tags("profile,code-agent,secrets,security")
740
+ forbid (
741
+ principal,
742
+ action == Guardrails::Action::"write_file",
743
+ resource
744
+ ) when {
745
+ context has contains_secrets && context.contains_secrets == true
746
+ };
747
+ `;
748
+ const GUARDRAILS_DATA_PIPELINE_PRIVACY_CEDAR = `// =============================================================================
749
+ // Data Pipeline — Privacy
750
+ // =============================================================================
751
+ // Strict PII protection for RAG pipelines and data processing agents.
752
+ // Zero-tolerance for sensitive PII types — data pipelines must not leak PII.
753
+ //
754
+ // Category: privacy
755
+ // Namespace: Guardrails
756
+ // =============================================================================
757
+
758
+ @id("data-pii-block-all")
759
+ @name("Block all PII in data pipeline")
760
+ @description("Forbids any PII in both inputs and outputs — data pipelines must not process or leak PII")
761
+ @severity("critical")
762
+ @tags("profile,data-pipeline,pii,privacy")
763
+ forbid (
764
+ principal,
765
+ action,
766
+ resource
767
+ ) when {
768
+ context has pii_detected && context.pii_detected == true
769
+ };
770
+
771
+ @id("data-pii-block-sensitive-types")
772
+ @name("Block sensitive PII types strictly")
773
+ @description("Zero-tolerance for SSN, credit cards, passport numbers, and medical IDs in data pipelines")
774
+ @severity("critical")
775
+ @tags("profile,data-pipeline,pii,compliance")
776
+ forbid (
777
+ principal,
778
+ action,
779
+ resource
780
+ ) when {
781
+ context has pii_types &&
782
+ (context.pii_types.contains("ssn") ||
783
+ context.pii_types.contains("credit_card") ||
784
+ context.pii_types.contains("passport") ||
785
+ context.pii_types.contains("medical_id") ||
786
+ context.pii_types.contains("tax_id"))
787
+ };
788
+ `;
789
+ const GUARDRAILS_DATA_PIPELINE_SECURITY_CEDAR = `// =============================================================================
790
+ // Data Pipeline — Security
791
+ // =============================================================================
792
+ // Strict secrets detection and injection defense for data pipelines.
793
+ // RAG inputs are high-risk for injection — lower thresholds than defaults.
794
+ //
795
+ // Category: security
796
+ // Namespace: Guardrails
797
+ // =============================================================================
798
+
799
+ @id("data-secrets-strict")
800
+ @name("Strict secrets detection for data pipeline")
801
+ @description("Blocks any content containing secrets — even a single match")
802
+ @severity("critical")
803
+ @tags("profile,data-pipeline,secrets,security")
804
+ forbid (
805
+ principal,
806
+ action,
807
+ resource
808
+ ) when {
809
+ context has contains_secrets && context.contains_secrets == true
810
+ };
811
+
812
+ @id("data-block-output-secrets")
813
+ @name("Block secrets in pipeline outputs")
814
+ @description("Prevents data pipeline from writing secrets to any output")
815
+ @severity("critical")
816
+ @tags("profile,data-pipeline,secrets,output")
817
+ forbid (
818
+ principal,
819
+ action == Guardrails::Action::"write_file",
820
+ resource
821
+ ) when {
822
+ (context has contains_secrets && context.contains_secrets == true) ||
823
+ (context has secret_count && context.secret_count > 0)
824
+ };
825
+
826
+ @id("data-injection-defense")
827
+ @name("Pipeline injection defense")
828
+ @description("Lower injection threshold for data pipelines — RAG inputs are high-risk for injection")
829
+ @severity("high")
830
+ @tags("profile,data-pipeline,injection,security")
831
+ forbid (
832
+ principal,
833
+ action,
834
+ resource
835
+ ) when {
836
+ context has injection_score && context.injection_score > 65
837
+ };
838
+ `;
839
+ const GUARDRAILS_DATA_PIPELINE_AGENTIC_SECURITY_CEDAR = `// =============================================================================
840
+ // Data Pipeline — Agentic Security
841
+ // =============================================================================
842
+ // Exfiltration prevention and tool risk controls for data pipelines.
843
+ // Prevents retrieval data from being sent to external endpoints.
844
+ //
845
+ // Category: agentic_security
846
+ // Namespace: Guardrails
847
+ // =============================================================================
848
+
849
+ @id("data-block-exfiltration")
850
+ @name("Block data exfiltration from pipeline")
851
+ @description("Prevents retrieval data from being sent to external endpoints")
852
+ @severity("critical")
853
+ @tags("profile,data-pipeline,exfiltration,security")
854
+ forbid (
855
+ principal,
856
+ action,
857
+ resource
858
+ ) when {
859
+ context has suspicious_pattern && context.suspicious_pattern == true &&
860
+ context has pattern_type &&
861
+ (context.pattern_type == "data_exfiltration" ||
862
+ context.pattern_type == "db_exfiltration")
863
+ };
864
+
865
+ @id("data-block-high-risk-tools")
866
+ @name("Block high-risk tools in pipeline")
867
+ @description("Forbids tools with elevated risk in data processing context")
868
+ @severity("high")
869
+ @tags("profile,data-pipeline,tools,security")
870
+ forbid (
871
+ principal,
872
+ action == Guardrails::Action::"call_tool",
873
+ resource
874
+ ) when {
875
+ context has tool_risk_score && context.tool_risk_score > 60
876
+ };
877
+ `;
878
+ // =============================================================================
879
+ // CATEGORIES
880
+ // =============================================================================
881
+ export const GUARDRAILS_CATEGORIES = [
882
+ { id: 'security', name: 'Security', description: 'Detect and block prompt injection, jailbreak attempts, and credential leakage' },
883
+ { id: 'privacy', name: 'Privacy', description: 'Detect and block personally identifiable information (PII) in prompts and responses' },
884
+ { id: 'trust_safety', name: 'Trust & Safety', description: 'Detect and block toxic, violent, hateful, sexual, or profane content' },
885
+ { id: 'agentic_security', name: 'Agentic Security', description: 'Detect tool abuse, data exfiltration patterns, infinite loops, and budget violations' },
886
+ { id: 'organization', name: 'Organization', description: 'Organization-wide baselines and default permit/deny policies' },
887
+ ];
888
+ // =============================================================================
889
+ // DEFAULT POLICIES
890
+ // =============================================================================
891
+ export const GUARDRAILS_DEFAULTS = [
892
+ {
893
+ id: 'baseline-default',
894
+ name: 'Baseline Permit',
895
+ description: 'Permits all actions by default — threat-specific forbid policies override this when threats are detected',
896
+ category: 'organization',
897
+ cedarText: GUARDRAILS_BASELINE_DEFAULT_CEDAR,
898
+ severity: 'low',
899
+ tags: ['baseline', 'permit-default', 'organization'],
900
+ isActive: true,
901
+ },
902
+ {
903
+ id: 'secrets-default',
904
+ name: 'Secrets Detection',
905
+ description: 'Block content containing API keys, tokens, credentials, or other secrets',
906
+ category: 'security',
907
+ cedarText: GUARDRAILS_SECRETS_DEFAULT_CEDAR,
908
+ severity: 'critical',
909
+ tags: ['secrets', 'api-keys', 'credentials', 'data-leak'],
910
+ isActive: true,
911
+ },
912
+ {
913
+ id: 'injection-default',
914
+ name: 'Injection & Jailbreak Detection',
915
+ description: 'Block prompt injection, jailbreak attempts, and command injection using ML confidence scores',
916
+ category: 'security',
917
+ cedarText: GUARDRAILS_INJECTION_DEFAULT_CEDAR,
918
+ severity: 'high',
919
+ tags: ['injection', 'jailbreak', 'security'],
920
+ isActive: true,
921
+ },
922
+ {
923
+ id: 'pii-default',
924
+ name: 'PII Detection',
925
+ description: 'Block content containing PII such as SSN, credit cards, or passport numbers in outputs',
926
+ category: 'privacy',
927
+ cedarText: GUARDRAILS_PII_DEFAULT_CEDAR,
928
+ severity: 'high',
929
+ tags: ['pii', 'privacy', 'data-protection'],
930
+ isActive: true,
931
+ },
932
+ {
933
+ id: 'toxicity-default',
934
+ name: 'Toxicity & Content Moderation',
935
+ description: 'Block toxic, violent, hateful, sexual, and profane content based on classifier scores',
936
+ category: 'trust_safety',
937
+ cedarText: GUARDRAILS_TOXICITY_DEFAULT_CEDAR,
938
+ severity: 'critical',
939
+ tags: ['toxicity', 'trust-safety', 'content-moderation'],
940
+ isActive: true,
941
+ },
942
+ {
943
+ id: 'tool-risk-default',
944
+ name: 'Tool Risk',
945
+ description: 'Block dangerous tool calls, shell execution, and sensitive tool usage based on risk scoring',
946
+ category: 'agentic_security',
947
+ cedarText: GUARDRAILS_TOOL_RISK_DEFAULT_CEDAR,
948
+ severity: 'critical',
949
+ tags: ['tools', 'agentic', 'security'],
950
+ isActive: true,
951
+ },
952
+ {
953
+ id: 'agentic-safety-default',
954
+ name: 'Agentic Safety',
955
+ description: 'Block tool call loops, data exfiltration patterns, high-risk sequences, and budget violations',
956
+ category: 'agentic_security',
957
+ cedarText: GUARDRAILS_AGENTIC_SAFETY_DEFAULT_CEDAR,
958
+ severity: 'high',
959
+ tags: ['agentic', 'safety', 'loops', 'exfiltration', 'budget'],
960
+ isActive: true,
961
+ },
962
+ ];
963
+ // =============================================================================
964
+ // ALL TEMPLATES
965
+ // =============================================================================
966
+ export const GUARDRAILS_TEMPLATES = [
967
+ {
968
+ id: 'mcp-tool-permissions',
969
+ name: 'MCP Tool Permissions',
970
+ description: 'Per-tool access control for MCP servers — allow specific tools while denying others, exclude servers org-wide, block unverified sources',
971
+ category: 'agentic_security',
972
+ cedarText: GUARDRAILS_MCP_TOOL_PERMISSIONS_CEDAR,
973
+ severity: 'high',
974
+ tags: ['mcp', 'tools', 'least-privilege', 'per-server', 'exclusion'],
975
+ },
976
+ {
977
+ id: 'chat-assistant-security',
978
+ name: 'Chat Assistant — Security',
979
+ description: 'Aggressive injection and jailbreak defense for customer-facing chatbots with lower thresholds',
980
+ category: 'security',
981
+ cedarText: GUARDRAILS_CHAT_ASSISTANT_SECURITY_CEDAR,
982
+ severity: 'high',
983
+ tags: ['profile', 'chat-assistant', 'injection', 'jailbreak', 'security'],
984
+ },
985
+ {
986
+ id: 'chat-assistant-privacy',
987
+ name: 'Chat Assistant — Privacy',
988
+ description: 'Block PII in both user inputs and assistant outputs for chat applications',
989
+ category: 'privacy',
990
+ cedarText: GUARDRAILS_CHAT_ASSISTANT_PRIVACY_CEDAR,
991
+ severity: 'high',
992
+ tags: ['profile', 'chat-assistant', 'pii', 'privacy'],
993
+ },
994
+ {
995
+ id: 'chat-assistant-trust-safety',
996
+ name: 'Chat Assistant — Trust & Safety',
997
+ description: 'Strict content moderation with lower toxicity thresholds and topic restrictions for public-facing chat',
998
+ category: 'trust_safety',
999
+ cedarText: GUARDRAILS_CHAT_ASSISTANT_TRUST_SAFETY_CEDAR,
1000
+ severity: 'critical',
1001
+ tags: ['profile', 'chat-assistant', 'toxicity', 'trust-safety', 'topics'],
1002
+ },
1003
+ {
1004
+ id: 'code-agent-agentic-security',
1005
+ name: 'Code Agent — Agentic Security',
1006
+ description: 'Tool risk controls, shell blocking, loop detection, exfiltration prevention, and budget enforcement for coding assistants',
1007
+ category: 'agentic_security',
1008
+ cedarText: GUARDRAILS_CODE_AGENT_AGENTIC_SECURITY_CEDAR,
1009
+ severity: 'high',
1010
+ tags: ['profile', 'code-agent', 'tools', 'agentic', 'exfiltration', 'budget'],
1011
+ },
1012
+ {
1013
+ id: 'code-agent-security',
1014
+ name: 'Code Agent — Security',
1015
+ description: 'Prevent code agents from writing detected secrets to output files',
1016
+ category: 'security',
1017
+ cedarText: GUARDRAILS_CODE_AGENT_SECURITY_CEDAR,
1018
+ severity: 'critical',
1019
+ tags: ['profile', 'code-agent', 'secrets', 'security'],
1020
+ },
1021
+ {
1022
+ id: 'data-pipeline-privacy',
1023
+ name: 'Data Pipeline — Privacy',
1024
+ description: 'Strict PII protection with zero-tolerance for sensitive PII types in data pipelines',
1025
+ category: 'privacy',
1026
+ cedarText: GUARDRAILS_DATA_PIPELINE_PRIVACY_CEDAR,
1027
+ severity: 'critical',
1028
+ tags: ['profile', 'data-pipeline', 'pii', 'privacy', 'compliance'],
1029
+ },
1030
+ {
1031
+ id: 'data-pipeline-security',
1032
+ name: 'Data Pipeline — Security',
1033
+ description: 'Strict secrets detection and lower injection thresholds for RAG and data processing pipelines',
1034
+ category: 'security',
1035
+ cedarText: GUARDRAILS_DATA_PIPELINE_SECURITY_CEDAR,
1036
+ severity: 'critical',
1037
+ tags: ['profile', 'data-pipeline', 'secrets', 'injection', 'security'],
1038
+ },
1039
+ {
1040
+ id: 'data-pipeline-agentic-security',
1041
+ name: 'Data Pipeline — Agentic Security',
1042
+ description: 'Exfiltration prevention and tool risk controls for data processing pipelines',
1043
+ category: 'agentic_security',
1044
+ cedarText: GUARDRAILS_DATA_PIPELINE_AGENTIC_SECURITY_CEDAR,
1045
+ severity: 'critical',
1046
+ tags: ['profile', 'data-pipeline', 'exfiltration', 'tools'],
1047
+ },
1048
+ ];
1049
+ // =============================================================================
1050
+ // TEMPLATES METADATA
1051
+ // =============================================================================
1052
+ /** Raw templates.json metadata for the Guardrails service. */
1053
+ export const GUARDRAILS_TEMPLATES_JSON = `{
1054
+ "service": "guardrails",
1055
+ "version": "1.0.0",
1056
+ "description": "Guardrails policy templates for LLM application security",
1057
+ "categories": [
1058
+ {
1059
+ "id": "security",
1060
+ "name": "Security",
1061
+ "description": "Detect and block prompt injection, jailbreak attempts, and credential leakage"
1062
+ },
1063
+ {
1064
+ "id": "privacy",
1065
+ "name": "Privacy",
1066
+ "description": "Detect and block personally identifiable information (PII) in prompts and responses"
1067
+ },
1068
+ {
1069
+ "id": "trust_safety",
1070
+ "name": "Trust & Safety",
1071
+ "description": "Detect and block toxic, violent, hateful, sexual, or profane content"
1072
+ },
1073
+ {
1074
+ "id": "agentic_security",
1075
+ "name": "Agentic Security",
1076
+ "description": "Detect tool abuse, data exfiltration patterns, infinite loops, and budget violations"
1077
+ },
1078
+ {
1079
+ "id": "organization",
1080
+ "name": "Organization",
1081
+ "description": "Organization-wide baselines and default permit/deny policies"
1082
+ }
1083
+ ],
1084
+ "defaults": [
1085
+ {
1086
+ "id": "baseline-default",
1087
+ "name": "Baseline Permit",
1088
+ "description": "Permits all actions by default — threat-specific forbid policies override this when threats are detected",
1089
+ "category": "organization",
1090
+ "file": "defaults/baseline.cedar",
1091
+ "severity": "low",
1092
+ "tags": ["baseline", "permit-default", "organization"],
1093
+ "is_active": true
1094
+ },
1095
+ {
1096
+ "id": "secrets-default",
1097
+ "name": "Secrets Detection",
1098
+ "description": "Block content containing API keys, tokens, credentials, or other secrets",
1099
+ "category": "security",
1100
+ "file": "defaults/secrets.cedar",
1101
+ "severity": "critical",
1102
+ "tags": ["secrets", "api-keys", "credentials", "data-leak"],
1103
+ "is_active": true
1104
+ },
1105
+ {
1106
+ "id": "injection-default",
1107
+ "name": "Injection & Jailbreak Detection",
1108
+ "description": "Block prompt injection, jailbreak attempts, and command injection using ML confidence scores",
1109
+ "category": "security",
1110
+ "file": "defaults/injection.cedar",
1111
+ "severity": "high",
1112
+ "tags": ["injection", "jailbreak", "security"],
1113
+ "is_active": true
1114
+ },
1115
+ {
1116
+ "id": "pii-default",
1117
+ "name": "PII Detection",
1118
+ "description": "Block content containing PII such as SSN, credit cards, or passport numbers in outputs",
1119
+ "category": "privacy",
1120
+ "file": "defaults/pii.cedar",
1121
+ "severity": "high",
1122
+ "tags": ["pii", "privacy", "data-protection"],
1123
+ "is_active": true
1124
+ },
1125
+ {
1126
+ "id": "toxicity-default",
1127
+ "name": "Toxicity & Content Moderation",
1128
+ "description": "Block toxic, violent, hateful, sexual, and profane content based on classifier scores",
1129
+ "category": "trust_safety",
1130
+ "file": "defaults/toxicity.cedar",
1131
+ "severity": "critical",
1132
+ "tags": ["toxicity", "trust-safety", "content-moderation"],
1133
+ "is_active": true
1134
+ },
1135
+ {
1136
+ "id": "tool-risk-default",
1137
+ "name": "Tool Risk",
1138
+ "description": "Block dangerous tool calls, shell execution, and sensitive tool usage based on risk scoring",
1139
+ "category": "agentic_security",
1140
+ "file": "defaults/tool_risk.cedar",
1141
+ "severity": "critical",
1142
+ "tags": ["tools", "agentic", "security"],
1143
+ "is_active": true
1144
+ },
1145
+ {
1146
+ "id": "agentic-safety-default",
1147
+ "name": "Agentic Safety",
1148
+ "description": "Block tool call loops, data exfiltration patterns, high-risk sequences, and budget violations",
1149
+ "category": "agentic_security",
1150
+ "file": "defaults/agentic_safety.cedar",
1151
+ "severity": "high",
1152
+ "tags": ["agentic", "safety", "loops", "exfiltration", "budget"],
1153
+ "is_active": true
1154
+ }
1155
+ ],
1156
+ "templates": [
1157
+ {
1158
+ "id": "mcp-tool-permissions",
1159
+ "name": "MCP Tool Permissions",
1160
+ "description": "Per-tool access control for MCP servers — allow specific tools while denying others, exclude servers org-wide, block unverified sources",
1161
+ "category": "agentic_security",
1162
+ "file": "mcp_tool_permissions.cedar",
1163
+ "severity": "high",
1164
+ "tags": ["mcp", "tools", "least-privilege", "per-server", "exclusion"]
1165
+ },
1166
+ {
1167
+ "id": "chat-assistant-security",
1168
+ "name": "Chat Assistant — Security",
1169
+ "description": "Aggressive injection and jailbreak defense for customer-facing chatbots with lower thresholds",
1170
+ "category": "security",
1171
+ "file": "profiles/chat_assistant/security.cedar",
1172
+ "severity": "high",
1173
+ "tags": ["profile", "chat-assistant", "injection", "jailbreak", "security"]
1174
+ },
1175
+ {
1176
+ "id": "chat-assistant-privacy",
1177
+ "name": "Chat Assistant — Privacy",
1178
+ "description": "Block PII in both user inputs and assistant outputs for chat applications",
1179
+ "category": "privacy",
1180
+ "file": "profiles/chat_assistant/privacy.cedar",
1181
+ "severity": "high",
1182
+ "tags": ["profile", "chat-assistant", "pii", "privacy"]
1183
+ },
1184
+ {
1185
+ "id": "chat-assistant-trust-safety",
1186
+ "name": "Chat Assistant — Trust & Safety",
1187
+ "description": "Strict content moderation with lower toxicity thresholds and topic restrictions for public-facing chat",
1188
+ "category": "trust_safety",
1189
+ "file": "profiles/chat_assistant/trust_safety.cedar",
1190
+ "severity": "critical",
1191
+ "tags": ["profile", "chat-assistant", "toxicity", "trust-safety", "topics"]
1192
+ },
1193
+ {
1194
+ "id": "code-agent-agentic-security",
1195
+ "name": "Code Agent — Agentic Security",
1196
+ "description": "Tool risk controls, shell blocking, loop detection, exfiltration prevention, and budget enforcement for coding assistants",
1197
+ "category": "agentic_security",
1198
+ "file": "profiles/code_agent/agentic_security.cedar",
1199
+ "severity": "high",
1200
+ "tags": ["profile", "code-agent", "tools", "agentic", "exfiltration", "budget"]
1201
+ },
1202
+ {
1203
+ "id": "code-agent-security",
1204
+ "name": "Code Agent — Security",
1205
+ "description": "Prevent code agents from writing detected secrets to output files",
1206
+ "category": "security",
1207
+ "file": "profiles/code_agent/security.cedar",
1208
+ "severity": "critical",
1209
+ "tags": ["profile", "code-agent", "secrets", "security"]
1210
+ },
1211
+ {
1212
+ "id": "data-pipeline-privacy",
1213
+ "name": "Data Pipeline — Privacy",
1214
+ "description": "Strict PII protection with zero-tolerance for sensitive PII types in data pipelines",
1215
+ "category": "privacy",
1216
+ "file": "profiles/data_pipeline/privacy.cedar",
1217
+ "severity": "critical",
1218
+ "tags": ["profile", "data-pipeline", "pii", "privacy", "compliance"]
1219
+ },
1220
+ {
1221
+ "id": "data-pipeline-security",
1222
+ "name": "Data Pipeline — Security",
1223
+ "description": "Strict secrets detection and lower injection thresholds for RAG and data processing pipelines",
1224
+ "category": "security",
1225
+ "file": "profiles/data_pipeline/security.cedar",
1226
+ "severity": "critical",
1227
+ "tags": ["profile", "data-pipeline", "secrets", "injection", "security"]
1228
+ },
1229
+ {
1230
+ "id": "data-pipeline-agentic-security",
1231
+ "name": "Data Pipeline — Agentic Security",
1232
+ "description": "Exfiltration prevention and tool risk controls for data processing pipelines",
1233
+ "category": "agentic_security",
1234
+ "file": "profiles/data_pipeline/agentic_security.cedar",
1235
+ "severity": "critical",
1236
+ "tags": ["profile", "data-pipeline", "exfiltration", "tools"]
1237
+ }
1238
+ ],
1239
+ "profiles": [
1240
+ {
1241
+ "id": "chat-assistant",
1242
+ "name": "Chat Assistant",
1243
+ "description": "Optimized for customer-facing chatbots — strict toxicity, PII blocking, aggressive injection defense, topic restrictions",
1244
+ "severity": "high",
1245
+ "tags": ["chat-assistant", "toxicity", "pii", "injection"],
1246
+ "template_ids": ["chat-assistant-security", "chat-assistant-privacy", "chat-assistant-trust-safety"]
1247
+ },
1248
+ {
1249
+ "id": "code-agent",
1250
+ "name": "Code Agent",
1251
+ "description": "Optimized for coding assistants — tool risk controls, shell blocking, loop detection, exfiltration prevention, budget enforcement",
1252
+ "severity": "high",
1253
+ "tags": ["code-agent", "tools", "agentic", "exfiltration"],
1254
+ "template_ids": ["code-agent-agentic-security", "code-agent-security"]
1255
+ },
1256
+ {
1257
+ "id": "data-pipeline",
1258
+ "name": "Data Pipeline",
1259
+ "description": "Optimized for RAG and data processing — strict PII/secrets protection, exfiltration detection, pipeline injection defense",
1260
+ "severity": "critical",
1261
+ "tags": ["data-pipeline", "pii", "secrets", "exfiltration"],
1262
+ "template_ids": ["data-pipeline-privacy", "data-pipeline-security", "data-pipeline-agentic-security"]
1263
+ }
1264
+ ]
1265
+ }
1266
+ `;
1267
+ // =============================================================================
1268
+ // HELPER FUNCTIONS
1269
+ // =============================================================================
1270
+ export function getGuardrailsDefaultsByCategory(category) {
1271
+ return GUARDRAILS_DEFAULTS.filter(d => d.category === category);
1272
+ }
1273
+ export function getGuardrailsTemplatesByCategory(category) {
1274
+ return GUARDRAILS_TEMPLATES.filter(t => t.category === category);
1275
+ }
1276
+ export function getGuardrailsTemplateById(id) {
1277
+ return GUARDRAILS_TEMPLATES.find(t => t.id === id);
1278
+ }