proxilion 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. proxilion/__init__.py +136 -0
  2. proxilion/audit/__init__.py +133 -0
  3. proxilion/audit/base_exporters.py +527 -0
  4. proxilion/audit/compliance/__init__.py +130 -0
  5. proxilion/audit/compliance/base.py +457 -0
  6. proxilion/audit/compliance/eu_ai_act.py +603 -0
  7. proxilion/audit/compliance/iso27001.py +544 -0
  8. proxilion/audit/compliance/soc2.py +491 -0
  9. proxilion/audit/events.py +493 -0
  10. proxilion/audit/explainability.py +1173 -0
  11. proxilion/audit/exporters/__init__.py +58 -0
  12. proxilion/audit/exporters/aws_s3.py +636 -0
  13. proxilion/audit/exporters/azure_storage.py +608 -0
  14. proxilion/audit/exporters/cloud_base.py +468 -0
  15. proxilion/audit/exporters/gcp_storage.py +570 -0
  16. proxilion/audit/exporters/multi_exporter.py +498 -0
  17. proxilion/audit/hash_chain.py +652 -0
  18. proxilion/audit/logger.py +543 -0
  19. proxilion/caching/__init__.py +49 -0
  20. proxilion/caching/tool_cache.py +633 -0
  21. proxilion/context/__init__.py +73 -0
  22. proxilion/context/context_window.py +556 -0
  23. proxilion/context/message_history.py +505 -0
  24. proxilion/context/session.py +735 -0
  25. proxilion/contrib/__init__.py +51 -0
  26. proxilion/contrib/anthropic.py +609 -0
  27. proxilion/contrib/google.py +1012 -0
  28. proxilion/contrib/langchain.py +641 -0
  29. proxilion/contrib/mcp.py +893 -0
  30. proxilion/contrib/openai.py +646 -0
  31. proxilion/core.py +3058 -0
  32. proxilion/decorators.py +966 -0
  33. proxilion/engines/__init__.py +287 -0
  34. proxilion/engines/base.py +266 -0
  35. proxilion/engines/casbin_engine.py +412 -0
  36. proxilion/engines/opa_engine.py +493 -0
  37. proxilion/engines/simple.py +437 -0
  38. proxilion/exceptions.py +887 -0
  39. proxilion/guards/__init__.py +54 -0
  40. proxilion/guards/input_guard.py +522 -0
  41. proxilion/guards/output_guard.py +634 -0
  42. proxilion/observability/__init__.py +198 -0
  43. proxilion/observability/cost_tracker.py +866 -0
  44. proxilion/observability/hooks.py +683 -0
  45. proxilion/observability/metrics.py +798 -0
  46. proxilion/observability/session_cost_tracker.py +1063 -0
  47. proxilion/policies/__init__.py +67 -0
  48. proxilion/policies/base.py +304 -0
  49. proxilion/policies/builtin.py +486 -0
  50. proxilion/policies/registry.py +376 -0
  51. proxilion/providers/__init__.py +201 -0
  52. proxilion/providers/adapter.py +468 -0
  53. proxilion/providers/anthropic_adapter.py +330 -0
  54. proxilion/providers/gemini_adapter.py +391 -0
  55. proxilion/providers/openai_adapter.py +294 -0
  56. proxilion/py.typed +0 -0
  57. proxilion/resilience/__init__.py +81 -0
  58. proxilion/resilience/degradation.py +615 -0
  59. proxilion/resilience/fallback.py +555 -0
  60. proxilion/resilience/retry.py +554 -0
  61. proxilion/scheduling/__init__.py +57 -0
  62. proxilion/scheduling/priority_queue.py +419 -0
  63. proxilion/scheduling/scheduler.py +459 -0
  64. proxilion/security/__init__.py +244 -0
  65. proxilion/security/agent_trust.py +968 -0
  66. proxilion/security/behavioral_drift.py +794 -0
  67. proxilion/security/cascade_protection.py +869 -0
  68. proxilion/security/circuit_breaker.py +428 -0
  69. proxilion/security/cost_limiter.py +690 -0
  70. proxilion/security/idor_protection.py +460 -0
  71. proxilion/security/intent_capsule.py +849 -0
  72. proxilion/security/intent_validator.py +495 -0
  73. proxilion/security/memory_integrity.py +767 -0
  74. proxilion/security/rate_limiter.py +509 -0
  75. proxilion/security/scope_enforcer.py +680 -0
  76. proxilion/security/sequence_validator.py +636 -0
  77. proxilion/security/trust_boundaries.py +784 -0
  78. proxilion/streaming/__init__.py +70 -0
  79. proxilion/streaming/detector.py +761 -0
  80. proxilion/streaming/transformer.py +674 -0
  81. proxilion/timeouts/__init__.py +55 -0
  82. proxilion/timeouts/decorators.py +477 -0
  83. proxilion/timeouts/manager.py +545 -0
  84. proxilion/tools/__init__.py +69 -0
  85. proxilion/tools/decorators.py +493 -0
  86. proxilion/tools/registry.py +732 -0
  87. proxilion/types.py +339 -0
  88. proxilion/validation/__init__.py +93 -0
  89. proxilion/validation/pydantic_schema.py +351 -0
  90. proxilion/validation/schema.py +651 -0
  91. proxilion-0.0.1.dist-info/METADATA +872 -0
  92. proxilion-0.0.1.dist-info/RECORD +94 -0
  93. proxilion-0.0.1.dist-info/WHEEL +4 -0
  94. proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,634 @@
1
+ """
2
+ Output guard for detecting sensitive data leakage.
3
+
4
+ Provides pattern-based detection of credentials, API keys, private keys,
5
+ internal paths, and other sensitive information that may leak through
6
+ LLM outputs.
7
+
8
+ Example:
9
+ >>> from proxilion.guards import OutputGuard
10
+ >>>
11
+ >>> guard = OutputGuard()
12
+ >>>
13
+ >>> # Check for leakage
14
+ >>> result = guard.check("The API key is sk-abc123...")
15
+ >>> if not result.passed:
16
+ ... print(f"Leakage detected: {result.matched_patterns}")
17
+ >>>
18
+ >>> # Redact sensitive data
19
+ >>> safe_output = guard.redact("Bearer token: eyJhbGc...")
20
+ >>> print(safe_output) # "Bearer token: [REDACTED]"
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import logging
26
+ import re
27
+ from collections.abc import Callable
28
+ from dataclasses import dataclass, field
29
+ from enum import Enum
30
+ from typing import Any
31
+
32
+ from proxilion.guards.input_guard import GuardAction, GuardResult
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class LeakageCategory(Enum):
38
+ """Category of data leakage."""
39
+
40
+ CREDENTIAL = "credential"
41
+ """API keys, passwords, tokens."""
42
+
43
+ INTERNAL = "internal"
44
+ """Internal paths, URLs, infrastructure details."""
45
+
46
+ SYSTEM_PROMPT = "system_prompt"
47
+ """Leakage of system prompt or instructions."""
48
+
49
+ PII = "pii"
50
+ """Personally identifiable information."""
51
+
52
+ FINANCIAL = "financial"
53
+ """Credit card numbers, bank accounts."""
54
+
55
+ INFRASTRUCTURE = "infrastructure"
56
+ """Internal hostnames, IP addresses, database names."""
57
+
58
+
59
+ @dataclass
60
+ class LeakagePattern:
61
+ """
62
+ Pattern for detecting sensitive data leakage.
63
+
64
+ Attributes:
65
+ name: Unique identifier for the pattern.
66
+ pattern: Regex pattern string.
67
+ category: Category of data this detects.
68
+ severity: Severity score from 0.0 to 1.0.
69
+ description: Human-readable description.
70
+ redaction: Text to replace matches with.
71
+ """
72
+
73
+ name: str
74
+ pattern: str
75
+ category: LeakageCategory
76
+ severity: float = 0.8
77
+ description: str = ""
78
+ redaction: str = "[REDACTED]"
79
+ _compiled: re.Pattern[str] | None = field(default=None, repr=False, compare=False)
80
+
81
+ def __post_init__(self) -> None:
82
+ """Compile the regex pattern."""
83
+ if self._compiled is None:
84
+ try:
85
+ self._compiled = re.compile(self.pattern, re.IGNORECASE | re.MULTILINE)
86
+ except re.error as e:
87
+ logger.error(f"Invalid regex pattern for {self.name}: {e}")
88
+ raise ValueError(f"Invalid regex pattern for {self.name}: {e}") from e
89
+
90
+ @property
91
+ def compiled(self) -> re.Pattern[str]:
92
+ """Get the compiled regex pattern."""
93
+ if self._compiled is None:
94
+ self._compiled = re.compile(self.pattern, re.IGNORECASE | re.MULTILINE)
95
+ return self._compiled
96
+
97
+ def match(self, text: str) -> list[re.Match[str]]:
98
+ """Find all matches of this pattern in text."""
99
+ return list(self.compiled.finditer(text))
100
+
101
+
102
+ @dataclass
103
+ class OutputFilter:
104
+ """
105
+ Custom filter for output validation.
106
+
107
+ Allows for custom validation logic beyond regex patterns.
108
+
109
+ Attributes:
110
+ name: Unique identifier for the filter.
111
+ check_func: Function that returns True if output is safe.
112
+ action: Action to take if filter fails.
113
+ description: Human-readable description.
114
+ """
115
+
116
+ name: str
117
+ check_func: Callable[[str, dict[str, Any] | None], bool]
118
+ action: GuardAction = GuardAction.WARN
119
+ description: str = ""
120
+
121
+
122
+ # Built-in leakage patterns
123
+ DEFAULT_LEAKAGE_PATTERNS: list[LeakagePattern] = [
124
+ # API Keys and Tokens
125
+ LeakagePattern(
126
+ name="api_key_generic",
127
+ pattern=r"(?i)(api[_-]?key|apikey|api_secret|api_token)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{20,})['\"]?",
128
+ category=LeakageCategory.CREDENTIAL,
129
+ severity=0.95,
130
+ description="Generic API key patterns",
131
+ redaction="[API_KEY_REDACTED]",
132
+ ),
133
+ LeakagePattern(
134
+ name="bearer_token",
135
+ pattern=r"(?i)bearer\s+([a-zA-Z0-9_\-\.]+\.[a-zA-Z0-9_\-\.]+\.[a-zA-Z0-9_\-\.]+)",
136
+ category=LeakageCategory.CREDENTIAL,
137
+ severity=0.95,
138
+ description="Bearer authentication tokens (JWT)",
139
+ redaction="Bearer [TOKEN_REDACTED]",
140
+ ),
141
+ LeakagePattern(
142
+ name="openai_key",
143
+ pattern=r"sk-(?:proj-)?[a-zA-Z0-9\-_]{20,}",
144
+ category=LeakageCategory.CREDENTIAL,
145
+ severity=0.95,
146
+ description="OpenAI API keys (including project keys)",
147
+ redaction="[OPENAI_KEY_REDACTED]",
148
+ ),
149
+ LeakagePattern(
150
+ name="anthropic_key",
151
+ pattern=r"sk-ant-[a-zA-Z0-9\-]{20,}",
152
+ category=LeakageCategory.CREDENTIAL,
153
+ severity=0.95,
154
+ description="Anthropic API keys",
155
+ redaction="[ANTHROPIC_KEY_REDACTED]",
156
+ ),
157
+ LeakagePattern(
158
+ name="aws_key",
159
+ pattern=r"(?i)(AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}",
160
+ category=LeakageCategory.CREDENTIAL,
161
+ severity=0.95,
162
+ description="AWS access key IDs",
163
+ redaction="[AWS_KEY_REDACTED]",
164
+ ),
165
+ LeakagePattern(
166
+ name="aws_secret",
167
+ pattern=r"(?i)(aws_secret_access_key|aws_secret)\s*[:=]\s*['\"]?([a-zA-Z0-9/+=]{40})['\"]?",
168
+ category=LeakageCategory.CREDENTIAL,
169
+ severity=0.95,
170
+ description="AWS secret access keys",
171
+ redaction="[AWS_SECRET_REDACTED]",
172
+ ),
173
+ LeakagePattern(
174
+ name="gcp_key",
175
+ pattern=r"(?i)(gcp|google)[_-]?(api[_-]?key|key)\s*[:=]\s*['\"]?AIza[a-zA-Z0-9_\-]{35}['\"]?",
176
+ category=LeakageCategory.CREDENTIAL,
177
+ severity=0.95,
178
+ description="Google Cloud API keys",
179
+ redaction="[GCP_KEY_REDACTED]",
180
+ ),
181
+ LeakagePattern(
182
+ name="azure_key",
183
+ pattern=r"(?i)(azure|az)[_-]?(storage|account)[_-]?key\s*[:=]\s*['\"]?[a-zA-Z0-9/+=]{88}['\"]?",
184
+ category=LeakageCategory.CREDENTIAL,
185
+ severity=0.95,
186
+ description="Azure storage keys",
187
+ redaction="[AZURE_KEY_REDACTED]",
188
+ ),
189
+ LeakagePattern(
190
+ name="github_token",
191
+ pattern=r"(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{36,}",
192
+ category=LeakageCategory.CREDENTIAL,
193
+ severity=0.95,
194
+ description="GitHub personal access tokens",
195
+ redaction="[GITHUB_TOKEN_REDACTED]",
196
+ ),
197
+ LeakagePattern(
198
+ name="slack_token",
199
+ pattern=r"xox[baprs]-[0-9a-zA-Z\-]{10,}",
200
+ category=LeakageCategory.CREDENTIAL,
201
+ severity=0.9,
202
+ description="Slack API tokens",
203
+ redaction="[SLACK_TOKEN_REDACTED]",
204
+ ),
205
+
206
+ # Private Keys
207
+ LeakagePattern(
208
+ name="private_key",
209
+ pattern=r"-----BEGIN\s+(RSA\s+|EC\s+|DSA\s+|OPENSSH\s+)?PRIVATE\s+KEY-----",
210
+ category=LeakageCategory.CREDENTIAL,
211
+ severity=0.99,
212
+ description="Private key headers",
213
+ redaction="[PRIVATE_KEY_REDACTED]",
214
+ ),
215
+
216
+ # Connection Strings
217
+ LeakagePattern(
218
+ name="connection_string_mongodb",
219
+ pattern=r"mongodb(\+srv)?:\/\/[^:]+:[^@]+@[^\s]+",
220
+ category=LeakageCategory.CREDENTIAL,
221
+ severity=0.95,
222
+ description="MongoDB connection strings with credentials",
223
+ redaction="[MONGODB_CONN_REDACTED]",
224
+ ),
225
+ LeakagePattern(
226
+ name="connection_string_postgres",
227
+ pattern=r"postgres(ql)?:\/\/[^:]+:[^@]+@[^\s]+",
228
+ category=LeakageCategory.CREDENTIAL,
229
+ severity=0.95,
230
+ description="PostgreSQL connection strings with credentials",
231
+ redaction="[POSTGRES_CONN_REDACTED]",
232
+ ),
233
+ LeakagePattern(
234
+ name="connection_string_mysql",
235
+ pattern=r"mysql:\/\/[^:]+:[^@]+@[^\s]+",
236
+ category=LeakageCategory.CREDENTIAL,
237
+ severity=0.95,
238
+ description="MySQL connection strings with credentials",
239
+ redaction="[MYSQL_CONN_REDACTED]",
240
+ ),
241
+ LeakagePattern(
242
+ name="connection_string_redis",
243
+ pattern=r"redis(s)?:\/\/[^:]*:[^@]+@[^\s]+",
244
+ category=LeakageCategory.CREDENTIAL,
245
+ severity=0.95,
246
+ description="Redis connection strings with credentials",
247
+ redaction="[REDIS_CONN_REDACTED]",
248
+ ),
249
+
250
+ # Internal Paths
251
+ LeakagePattern(
252
+ name="internal_path_unix",
253
+ pattern=r"(?i)(\/home\/[a-zA-Z0-9_\-]+|\/Users\/[a-zA-Z0-9_\-]+|\/var\/[a-zA-Z0-9_\-\/]+|\/etc\/[a-zA-Z0-9_\-\/]+|\/opt\/[a-zA-Z0-9_\-\/]+)\/[^\s]*",
254
+ category=LeakageCategory.INTERNAL,
255
+ severity=0.6,
256
+ description="Unix internal paths",
257
+ redaction="[PATH_REDACTED]",
258
+ ),
259
+ LeakagePattern(
260
+ name="internal_path_windows",
261
+ pattern=r"(?i)C:\\Users\\[a-zA-Z0-9_\-]+\\[^\s]*",
262
+ category=LeakageCategory.INTERNAL,
263
+ severity=0.6,
264
+ description="Windows user paths",
265
+ redaction="[PATH_REDACTED]",
266
+ ),
267
+
268
+ # System Prompt Leakage
269
+ LeakagePattern(
270
+ name="system_prompt_leak",
271
+ pattern=r"(?i)(my\s+instructions\s+are|i\s+was\s+told\s+to|my\s+system\s+prompt|my\s+initial\s+instructions|i\s+am\s+programmed\s+to|my\s+guidelines\s+state)",
272
+ category=LeakageCategory.SYSTEM_PROMPT,
273
+ severity=0.85,
274
+ description="Indicators of system prompt disclosure",
275
+ redaction="[SYSTEM_PROMPT_CONTENT_REDACTED]",
276
+ ),
277
+ LeakagePattern(
278
+ name="system_prompt_markers",
279
+ pattern=r"(?i)(<<SYS>>|<\|system\|>|\[SYSTEM\]|###\s*System)",
280
+ category=LeakageCategory.SYSTEM_PROMPT,
281
+ severity=0.9,
282
+ description="System prompt formatting markers",
283
+ redaction="[SYSTEM_MARKER_REDACTED]",
284
+ ),
285
+
286
+ # PII Patterns
287
+ LeakagePattern(
288
+ name="email_address",
289
+ pattern=r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
290
+ category=LeakageCategory.PII,
291
+ severity=0.5,
292
+ description="Email addresses",
293
+ redaction="[EMAIL_REDACTED]",
294
+ ),
295
+ LeakagePattern(
296
+ name="phone_number",
297
+ pattern=r"(?i)(\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}",
298
+ category=LeakageCategory.PII,
299
+ severity=0.5,
300
+ description="Phone numbers (US format)",
301
+ redaction="[PHONE_REDACTED]",
302
+ ),
303
+ LeakagePattern(
304
+ name="ssn",
305
+ pattern=r"\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b",
306
+ category=LeakageCategory.PII,
307
+ severity=0.9,
308
+ description="Social Security Numbers",
309
+ redaction="[SSN_REDACTED]",
310
+ ),
311
+
312
+ # Financial
313
+ LeakagePattern(
314
+ name="credit_card",
315
+ pattern=r"\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9][0-9])[0-9]{12})\b",
316
+ category=LeakageCategory.FINANCIAL,
317
+ severity=0.95,
318
+ description="Credit card numbers (Visa, MC, Amex, Discover)",
319
+ redaction="[CARD_REDACTED]",
320
+ ),
321
+
322
+ # Infrastructure
323
+ LeakagePattern(
324
+ name="internal_ip",
325
+ pattern=r"\b(10\.\d{1,3}\.\d{1,3}\.\d{1,3}|172\.(1[6-9]|2\d|3[01])\.\d{1,3}\.\d{1,3}|192\.168\.\d{1,3}\.\d{1,3})\b",
326
+ category=LeakageCategory.INFRASTRUCTURE,
327
+ severity=0.6,
328
+ description="Internal/private IP addresses",
329
+ redaction="[INTERNAL_IP_REDACTED]",
330
+ ),
331
+ LeakagePattern(
332
+ name="password_in_text",
333
+ pattern=r"(?i)(password|passwd|pwd)\s*[:=]\s*['\"]?[^\s'\"]{4,}['\"]?",
334
+ category=LeakageCategory.CREDENTIAL,
335
+ severity=0.9,
336
+ description="Passwords in plaintext",
337
+ redaction="[PASSWORD_REDACTED]",
338
+ ),
339
+ ]
340
+
341
+
342
+ class OutputGuard:
343
+ """
344
+ Guard for detecting sensitive data leakage in LLM outputs.
345
+
346
+ Uses pattern matching to detect credentials, API keys, internal paths,
347
+ and other sensitive information that may leak through model outputs.
348
+
349
+ Example:
350
+ >>> guard = OutputGuard()
351
+ >>>
352
+ >>> # Check output
353
+ >>> result = guard.check("The API key is sk-abc123def456...")
354
+ >>> if not result.passed:
355
+ ... print(f"Leakage: {result.matched_patterns}")
356
+ >>>
357
+ >>> # Redact sensitive data
358
+ >>> safe = guard.redact("Connection: mongodb://user:pass@host")
359
+ >>> print(safe) # Connection: [MONGODB_CONN_REDACTED]
360
+
361
+ Attributes:
362
+ patterns: List of leakage patterns to check.
363
+ filters: List of custom output filters.
364
+ action: Default action on violations.
365
+ threshold: Risk score threshold.
366
+ """
367
+
368
+ def __init__(
369
+ self,
370
+ patterns: list[LeakagePattern] | None = None,
371
+ filters: list[OutputFilter] | None = None,
372
+ action: GuardAction = GuardAction.WARN,
373
+ threshold: float = 0.5,
374
+ enable_pii: bool = False,
375
+ ) -> None:
376
+ """
377
+ Initialize the output guard.
378
+
379
+ Args:
380
+ patterns: Custom patterns (uses defaults if None).
381
+ filters: Custom output filters.
382
+ action: Action to take when threshold is exceeded.
383
+ threshold: Risk score threshold (0.0 to 1.0).
384
+ enable_pii: Whether to enable PII detection patterns.
385
+ """
386
+ if patterns is not None:
387
+ self.patterns = patterns
388
+ else:
389
+ # Filter out PII patterns if not enabled
390
+ self.patterns = [
391
+ p for p in DEFAULT_LEAKAGE_PATTERNS
392
+ if enable_pii or p.category != LeakageCategory.PII
393
+ ]
394
+
395
+ self.filters = filters or []
396
+ self.action = action
397
+ self.threshold = threshold
398
+ self._pattern_index: dict[str, LeakagePattern] = {p.name: p for p in self.patterns}
399
+
400
+ def add_pattern(self, pattern: LeakagePattern) -> None:
401
+ """
402
+ Add a custom leakage pattern.
403
+
404
+ Args:
405
+ pattern: The pattern to add.
406
+ """
407
+ self.patterns.append(pattern)
408
+ self._pattern_index[pattern.name] = pattern
409
+
410
+ def remove_pattern(self, name: str) -> bool:
411
+ """
412
+ Remove a pattern by name.
413
+
414
+ Args:
415
+ name: The pattern name to remove.
416
+
417
+ Returns:
418
+ True if pattern was removed, False if not found.
419
+ """
420
+ if name in self._pattern_index:
421
+ pattern = self._pattern_index.pop(name)
422
+ self.patterns.remove(pattern)
423
+ return True
424
+ return False
425
+
426
+ def add_filter(self, filter_: OutputFilter) -> None:
427
+ """
428
+ Add a custom output filter.
429
+
430
+ Args:
431
+ filter_: The filter to add.
432
+ """
433
+ self.filters.append(filter_)
434
+
435
+ def get_patterns(self) -> list[LeakagePattern]:
436
+ """Get all registered patterns."""
437
+ return list(self.patterns)
438
+
439
+ def check(
440
+ self,
441
+ output_text: str,
442
+ context: dict[str, Any] | None = None,
443
+ ) -> GuardResult:
444
+ """
445
+ Check output text for sensitive data leakage.
446
+
447
+ Args:
448
+ output_text: The output to check.
449
+ context: Optional context for evaluation.
450
+
451
+ Returns:
452
+ GuardResult with check outcome.
453
+ """
454
+ if not output_text:
455
+ return GuardResult.allow()
456
+
457
+ context = context or {}
458
+ matched_patterns: list[str] = []
459
+ all_matches: list[dict[str, Any]] = []
460
+ severities: list[float] = []
461
+
462
+ # Check each pattern
463
+ for pattern in self.patterns:
464
+ matches = pattern.match(output_text)
465
+ if matches:
466
+ matched_patterns.append(pattern.name)
467
+ severities.append(pattern.severity)
468
+
469
+ for match in matches:
470
+ all_matches.append({
471
+ "pattern": pattern.name,
472
+ "category": pattern.category.value,
473
+ "severity": pattern.severity,
474
+ "matched_text": self._truncate_match(match.group()),
475
+ "start": match.start(),
476
+ "end": match.end(),
477
+ "redaction": pattern.redaction,
478
+ })
479
+
480
+ # Run custom filters
481
+ filter_failures: list[str] = []
482
+ for filter_ in self.filters:
483
+ try:
484
+ if not filter_.check_func(output_text, context):
485
+ filter_failures.append(filter_.name)
486
+ if filter_.action == GuardAction.BLOCK:
487
+ severities.append(1.0)
488
+ else:
489
+ severities.append(0.7)
490
+ except Exception as e:
491
+ logger.error(f"Output filter {filter_.name} raised exception: {e}")
492
+
493
+ # Calculate risk score
494
+ risk_score = self._calculate_risk_score(severities)
495
+
496
+ # Determine if check passed
497
+ passed = risk_score < self.threshold
498
+
499
+ # Determine action
500
+ action = GuardAction.ALLOW if passed else self.action
501
+
502
+ # Log based on action
503
+ if not passed:
504
+ if action == GuardAction.WARN:
505
+ logger.warning(
506
+ f"Output guard warning: risk_score={risk_score:.2f}, "
507
+ f"patterns={matched_patterns}, filters={filter_failures}"
508
+ )
509
+ elif action == GuardAction.BLOCK:
510
+ logger.info(
511
+ f"Output guard blocked: risk_score={risk_score:.2f}, "
512
+ f"patterns={matched_patterns}, filters={filter_failures}"
513
+ )
514
+
515
+ return GuardResult(
516
+ passed=passed,
517
+ action=action,
518
+ matched_patterns=matched_patterns + filter_failures,
519
+ risk_score=risk_score,
520
+ matches=all_matches,
521
+ context={"output_length": len(output_text), **context},
522
+ )
523
+
524
+ def redact(
525
+ self,
526
+ output_text: str,
527
+ categories: list[LeakageCategory] | None = None,
528
+ ) -> str:
529
+ """
530
+ Redact sensitive data from output text.
531
+
532
+ Args:
533
+ output_text: Text to redact.
534
+ categories: Categories to redact (all if None).
535
+
536
+ Returns:
537
+ Text with sensitive data redacted.
538
+ """
539
+ if not output_text:
540
+ return output_text
541
+
542
+ result = output_text
543
+
544
+ for pattern in self.patterns:
545
+ # Filter by category if specified
546
+ if categories is not None and pattern.category not in categories:
547
+ continue
548
+
549
+ # Replace all matches with redaction text
550
+ result = pattern.compiled.sub(pattern.redaction, result)
551
+
552
+ return result
553
+
554
+ def _calculate_risk_score(self, severities: list[float]) -> float:
555
+ """
556
+ Calculate overall risk score from matched pattern severities.
557
+
558
+ Args:
559
+ severities: List of severity scores from matches.
560
+
561
+ Returns:
562
+ Risk score between 0.0 and 1.0.
563
+ """
564
+ if not severities:
565
+ return 0.0
566
+
567
+ base_score = max(severities)
568
+ bonus = 0.1 * (len(severities) - 1)
569
+ return min(1.0, base_score + bonus)
570
+
571
+ def _truncate_match(self, text: str, max_length: int = 20) -> str:
572
+ """Truncate matched text for logging (avoid leaking in logs)."""
573
+ if len(text) <= max_length:
574
+ return text[:4] + "..." + text[-4:] if len(text) > 8 else "[...]"
575
+ return text[:8] + "..." + text[-4:]
576
+
577
+ def configure(
578
+ self,
579
+ action: GuardAction | None = None,
580
+ threshold: float | None = None,
581
+ ) -> None:
582
+ """
583
+ Update guard configuration.
584
+
585
+ Args:
586
+ action: New default action.
587
+ threshold: New risk threshold.
588
+ """
589
+ if action is not None:
590
+ self.action = action
591
+ if threshold is not None:
592
+ if not 0.0 <= threshold <= 1.0:
593
+ raise ValueError("Threshold must be between 0.0 and 1.0")
594
+ self.threshold = threshold
595
+
596
+
597
+ def create_output_guard(
598
+ include_defaults: bool = True,
599
+ custom_patterns: list[LeakagePattern] | None = None,
600
+ enable_pii: bool = False,
601
+ action: GuardAction = GuardAction.WARN,
602
+ threshold: float = 0.5,
603
+ ) -> OutputGuard:
604
+ """
605
+ Factory function to create an OutputGuard.
606
+
607
+ Args:
608
+ include_defaults: Whether to include default patterns.
609
+ custom_patterns: Additional custom patterns.
610
+ enable_pii: Whether to enable PII detection.
611
+ action: Action to take on violations.
612
+ threshold: Risk score threshold.
613
+
614
+ Returns:
615
+ Configured OutputGuard instance.
616
+ """
617
+ patterns: list[LeakagePattern] = []
618
+
619
+ if include_defaults:
620
+ default_patterns = [
621
+ p for p in DEFAULT_LEAKAGE_PATTERNS
622
+ if enable_pii or p.category != LeakageCategory.PII
623
+ ]
624
+ patterns.extend(default_patterns)
625
+
626
+ if custom_patterns:
627
+ patterns.extend(custom_patterns)
628
+
629
+ return OutputGuard(
630
+ patterns=patterns,
631
+ action=action,
632
+ threshold=threshold,
633
+ enable_pii=enable_pii,
634
+ )