proxilion 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. proxilion/__init__.py +136 -0
  2. proxilion/audit/__init__.py +133 -0
  3. proxilion/audit/base_exporters.py +527 -0
  4. proxilion/audit/compliance/__init__.py +130 -0
  5. proxilion/audit/compliance/base.py +457 -0
  6. proxilion/audit/compliance/eu_ai_act.py +603 -0
  7. proxilion/audit/compliance/iso27001.py +544 -0
  8. proxilion/audit/compliance/soc2.py +491 -0
  9. proxilion/audit/events.py +493 -0
  10. proxilion/audit/explainability.py +1173 -0
  11. proxilion/audit/exporters/__init__.py +58 -0
  12. proxilion/audit/exporters/aws_s3.py +636 -0
  13. proxilion/audit/exporters/azure_storage.py +608 -0
  14. proxilion/audit/exporters/cloud_base.py +468 -0
  15. proxilion/audit/exporters/gcp_storage.py +570 -0
  16. proxilion/audit/exporters/multi_exporter.py +498 -0
  17. proxilion/audit/hash_chain.py +652 -0
  18. proxilion/audit/logger.py +543 -0
  19. proxilion/caching/__init__.py +49 -0
  20. proxilion/caching/tool_cache.py +633 -0
  21. proxilion/context/__init__.py +73 -0
  22. proxilion/context/context_window.py +556 -0
  23. proxilion/context/message_history.py +505 -0
  24. proxilion/context/session.py +735 -0
  25. proxilion/contrib/__init__.py +51 -0
  26. proxilion/contrib/anthropic.py +609 -0
  27. proxilion/contrib/google.py +1012 -0
  28. proxilion/contrib/langchain.py +641 -0
  29. proxilion/contrib/mcp.py +893 -0
  30. proxilion/contrib/openai.py +646 -0
  31. proxilion/core.py +3058 -0
  32. proxilion/decorators.py +966 -0
  33. proxilion/engines/__init__.py +287 -0
  34. proxilion/engines/base.py +266 -0
  35. proxilion/engines/casbin_engine.py +412 -0
  36. proxilion/engines/opa_engine.py +493 -0
  37. proxilion/engines/simple.py +437 -0
  38. proxilion/exceptions.py +887 -0
  39. proxilion/guards/__init__.py +54 -0
  40. proxilion/guards/input_guard.py +522 -0
  41. proxilion/guards/output_guard.py +634 -0
  42. proxilion/observability/__init__.py +198 -0
  43. proxilion/observability/cost_tracker.py +866 -0
  44. proxilion/observability/hooks.py +683 -0
  45. proxilion/observability/metrics.py +798 -0
  46. proxilion/observability/session_cost_tracker.py +1063 -0
  47. proxilion/policies/__init__.py +67 -0
  48. proxilion/policies/base.py +304 -0
  49. proxilion/policies/builtin.py +486 -0
  50. proxilion/policies/registry.py +376 -0
  51. proxilion/providers/__init__.py +201 -0
  52. proxilion/providers/adapter.py +468 -0
  53. proxilion/providers/anthropic_adapter.py +330 -0
  54. proxilion/providers/gemini_adapter.py +391 -0
  55. proxilion/providers/openai_adapter.py +294 -0
  56. proxilion/py.typed +0 -0
  57. proxilion/resilience/__init__.py +81 -0
  58. proxilion/resilience/degradation.py +615 -0
  59. proxilion/resilience/fallback.py +555 -0
  60. proxilion/resilience/retry.py +554 -0
  61. proxilion/scheduling/__init__.py +57 -0
  62. proxilion/scheduling/priority_queue.py +419 -0
  63. proxilion/scheduling/scheduler.py +459 -0
  64. proxilion/security/__init__.py +244 -0
  65. proxilion/security/agent_trust.py +968 -0
  66. proxilion/security/behavioral_drift.py +794 -0
  67. proxilion/security/cascade_protection.py +869 -0
  68. proxilion/security/circuit_breaker.py +428 -0
  69. proxilion/security/cost_limiter.py +690 -0
  70. proxilion/security/idor_protection.py +460 -0
  71. proxilion/security/intent_capsule.py +849 -0
  72. proxilion/security/intent_validator.py +495 -0
  73. proxilion/security/memory_integrity.py +767 -0
  74. proxilion/security/rate_limiter.py +509 -0
  75. proxilion/security/scope_enforcer.py +680 -0
  76. proxilion/security/sequence_validator.py +636 -0
  77. proxilion/security/trust_boundaries.py +784 -0
  78. proxilion/streaming/__init__.py +70 -0
  79. proxilion/streaming/detector.py +761 -0
  80. proxilion/streaming/transformer.py +674 -0
  81. proxilion/timeouts/__init__.py +55 -0
  82. proxilion/timeouts/decorators.py +477 -0
  83. proxilion/timeouts/manager.py +545 -0
  84. proxilion/tools/__init__.py +69 -0
  85. proxilion/tools/decorators.py +493 -0
  86. proxilion/tools/registry.py +732 -0
  87. proxilion/types.py +339 -0
  88. proxilion/validation/__init__.py +93 -0
  89. proxilion/validation/pydantic_schema.py +351 -0
  90. proxilion/validation/schema.py +651 -0
  91. proxilion-0.0.1.dist-info/METADATA +872 -0
  92. proxilion-0.0.1.dist-info/RECORD +94 -0
  93. proxilion-0.0.1.dist-info/WHEEL +4 -0
  94. proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1173 @@
1
+ """
2
+ Explainable Decisions for Proxilion.
3
+
4
+ Provides human-readable audit trails explaining WHY each security
5
+ decision was made. Designed for CA SB 53 compliance and regulatory
6
+ transparency requirements.
7
+
8
+ Features:
9
+ - Human-readable explanation generation for all decision types
10
+ - Decision tree visualization
11
+ - Factor contribution analysis
12
+ - Multi-language explanation templates
13
+ - Confidence scoring with uncertainty breakdown
14
+ - Counterfactual explanations ("what would change the decision")
15
+
16
+ Example:
17
+ >>> from proxilion.audit.explainability import (
18
+ ... ExplainableDecision,
19
+ ... DecisionExplainer,
20
+ ... ExplanationFormat,
21
+ ... )
22
+ >>>
23
+ >>> # Create an explainer
24
+ >>> explainer = DecisionExplainer()
25
+ >>>
26
+ >>> # Explain an authorization decision
27
+ >>> decision = ExplainableDecision(
28
+ ... decision_type="authorization",
29
+ ... outcome="DENIED",
30
+ ... factors=[
31
+ ... DecisionFactor("role_check", False, 0.4, "User lacks 'admin' role"),
32
+ ... DecisionFactor("rate_limit", True, 0.3, "Within rate limits"),
33
+ ... DecisionFactor("time_window", True, 0.3, "Within allowed hours"),
34
+ ... ],
35
+ ... context={"user_id": "user_123", "tool": "delete_user"},
36
+ ... )
37
+ >>>
38
+ >>> # Generate human-readable explanation
39
+ >>> explanation = explainer.explain(decision)
40
+ >>> print(explanation.summary)
41
+ "Access DENIED: User lacks required 'admin' role for delete_user operation"
42
+ >>>
43
+ >>> # Get counterfactual
44
+ >>> print(explanation.counterfactual)
45
+ "Access would be ALLOWED if: User had 'admin' role"
46
+ """
47
+
48
+ from __future__ import annotations
49
+
50
+ import hashlib
51
+ import json
52
+ import logging
53
+ import re
54
+ import threading
55
+ from dataclasses import asdict, dataclass, field
56
+ from datetime import datetime, timezone
57
+ from enum import Enum
58
+ from typing import Any, Callable
59
+
60
+ logger = logging.getLogger(__name__)
61
+
62
+
63
+ class DecisionType(str, Enum):
64
+ """Types of security decisions that can be explained."""
65
+
66
+ AUTHORIZATION = "authorization"
67
+ RATE_LIMIT = "rate_limit"
68
+ INPUT_GUARD = "input_guard"
69
+ OUTPUT_GUARD = "output_guard"
70
+ CIRCUIT_BREAKER = "circuit_breaker"
71
+ TRUST_BOUNDARY = "trust_boundary"
72
+ INTENT_VALIDATION = "intent_validation"
73
+ BEHAVIORAL_DRIFT = "behavioral_drift"
74
+ BUDGET = "budget"
75
+ CASCADE = "cascade"
76
+
77
+
78
+ class ExplanationFormat(str, Enum):
79
+ """Output formats for explanations."""
80
+
81
+ TEXT = "text" # Plain text, human-readable
82
+ MARKDOWN = "markdown" # Markdown formatted
83
+ HTML = "html" # HTML formatted
84
+ JSON = "json" # Structured JSON
85
+ LEGAL = "legal" # Legal/compliance format
86
+
87
+
88
+ class Outcome(str, Enum):
89
+ """Decision outcomes."""
90
+
91
+ ALLOWED = "ALLOWED"
92
+ DENIED = "DENIED"
93
+ WARNED = "WARNED"
94
+ MODIFIED = "MODIFIED" # e.g., output was redacted
95
+ DEFERRED = "DEFERRED" # Decision pending more info
96
+
97
+
98
+ @dataclass
99
+ class DecisionFactor:
100
+ """
101
+ A single factor contributing to a decision.
102
+
103
+ Attributes:
104
+ name: Factor identifier (e.g., "role_check", "rate_limit").
105
+ passed: Whether this factor passed (True) or failed (False).
106
+ weight: Importance weight of this factor (0.0 to 1.0).
107
+ reason: Human-readable explanation of the factor result.
108
+ details: Additional structured details.
109
+ evidence: Evidence/data that led to this factor result.
110
+ """
111
+
112
+ name: str
113
+ passed: bool
114
+ weight: float
115
+ reason: str
116
+ details: dict[str, Any] = field(default_factory=dict)
117
+ evidence: list[str] = field(default_factory=list)
118
+
119
+ def to_dict(self) -> dict[str, Any]:
120
+ """Convert to dictionary."""
121
+ return {
122
+ "name": self.name,
123
+ "passed": self.passed,
124
+ "weight": self.weight,
125
+ "reason": self.reason,
126
+ "details": self.details,
127
+ "evidence": self.evidence,
128
+ }
129
+
130
+
131
+ @dataclass
132
+ class ExplainableDecision:
133
+ """
134
+ A security decision with full explainability metadata.
135
+
136
+ Attributes:
137
+ decision_id: Unique identifier for this decision.
138
+ decision_type: Type of security decision.
139
+ outcome: The decision outcome.
140
+ factors: List of factors that contributed to the decision.
141
+ context: Contextual information (user, tool, etc.).
142
+ timestamp: When the decision was made.
143
+ confidence: Confidence score (0.0 to 1.0).
144
+ latency_ms: Time taken to make the decision.
145
+ policy_version: Version of the policy used.
146
+ metadata: Additional metadata.
147
+ """
148
+
149
+ decision_type: DecisionType | str
150
+ outcome: Outcome | str
151
+ factors: list[DecisionFactor]
152
+ context: dict[str, Any] = field(default_factory=dict)
153
+ decision_id: str = ""
154
+ timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
155
+ confidence: float = 1.0
156
+ latency_ms: float = 0.0
157
+ policy_version: str = "1.0"
158
+ metadata: dict[str, Any] = field(default_factory=dict)
159
+
160
+ def __post_init__(self):
161
+ if not self.decision_id:
162
+ # Generate deterministic ID from decision content
163
+ content = f"{self.decision_type}:{self.outcome}:{self.timestamp.isoformat()}"
164
+ self.decision_id = hashlib.sha256(content.encode()).hexdigest()[:16]
165
+
166
+ # Convert string enums
167
+ if isinstance(self.decision_type, str):
168
+ try:
169
+ self.decision_type = DecisionType(self.decision_type)
170
+ except ValueError:
171
+ pass # Keep as string if not a known type
172
+
173
+ if isinstance(self.outcome, str):
174
+ try:
175
+ self.outcome = Outcome(self.outcome)
176
+ except ValueError:
177
+ pass
178
+
179
+ @property
180
+ def passed(self) -> bool:
181
+ """Whether the decision resulted in an allowed outcome."""
182
+ return self.outcome in (Outcome.ALLOWED, "ALLOWED")
183
+
184
+ @property
185
+ def primary_factor(self) -> DecisionFactor | None:
186
+ """Get the most important factor (by weight or first failing)."""
187
+ if not self.factors:
188
+ return None
189
+
190
+ # If denied, return first failing factor with highest weight
191
+ if not self.passed:
192
+ failing = [f for f in self.factors if not f.passed]
193
+ if failing:
194
+ return max(failing, key=lambda f: f.weight)
195
+
196
+ # Otherwise return highest weight factor
197
+ return max(self.factors, key=lambda f: f.weight)
198
+
199
+ def to_dict(self) -> dict[str, Any]:
200
+ """Convert to dictionary."""
201
+ return {
202
+ "decision_id": self.decision_id,
203
+ "decision_type": str(self.decision_type.value if isinstance(self.decision_type, DecisionType) else self.decision_type),
204
+ "outcome": str(self.outcome.value if isinstance(self.outcome, Outcome) else self.outcome),
205
+ "factors": [f.to_dict() for f in self.factors],
206
+ "context": self.context,
207
+ "timestamp": self.timestamp.isoformat(),
208
+ "confidence": self.confidence,
209
+ "latency_ms": self.latency_ms,
210
+ "policy_version": self.policy_version,
211
+ "metadata": self.metadata,
212
+ }
213
+
214
+ def to_json(self) -> str:
215
+ """Convert to JSON string."""
216
+ return json.dumps(self.to_dict(), indent=2)
217
+
218
+
219
+ @dataclass
220
+ class Explanation:
221
+ """
222
+ Human-readable explanation of a decision.
223
+
224
+ Attributes:
225
+ decision_id: ID of the explained decision.
226
+ summary: One-line summary of the decision.
227
+ detailed: Multi-paragraph detailed explanation.
228
+ factors_explained: Per-factor explanations.
229
+ counterfactual: What would change the decision.
230
+ confidence_breakdown: Explanation of confidence score.
231
+ recommendations: Suggestions for the user.
232
+ format: Output format used.
233
+ language: Language code (e.g., "en", "es").
234
+ """
235
+
236
+ decision_id: str
237
+ summary: str
238
+ detailed: str
239
+ factors_explained: list[str]
240
+ counterfactual: str | None = None
241
+ confidence_breakdown: str | None = None
242
+ recommendations: list[str] = field(default_factory=list)
243
+ format: ExplanationFormat = ExplanationFormat.TEXT
244
+ language: str = "en"
245
+
246
+ def to_dict(self) -> dict[str, Any]:
247
+ """Convert to dictionary."""
248
+ return {
249
+ "decision_id": self.decision_id,
250
+ "summary": self.summary,
251
+ "detailed": self.detailed,
252
+ "factors_explained": self.factors_explained,
253
+ "counterfactual": self.counterfactual,
254
+ "confidence_breakdown": self.confidence_breakdown,
255
+ "recommendations": self.recommendations,
256
+ "format": self.format.value,
257
+ "language": self.language,
258
+ }
259
+
260
+
261
+ # Default explanation templates
262
+ DEFAULT_TEMPLATES: dict[str, dict[str, str]] = {
263
+ "en": {
264
+ # Authorization
265
+ "auth_allowed": "Access ALLOWED: {reason}",
266
+ "auth_denied": "Access DENIED: {reason}",
267
+ "auth_factor_pass": "✓ {name}: {reason}",
268
+ "auth_factor_fail": "✗ {name}: {reason}",
269
+
270
+ # Rate limiting
271
+ "rate_allowed": "Request ALLOWED: Within rate limits ({current}/{limit} requests)",
272
+ "rate_denied": "Request DENIED: Rate limit exceeded ({current}/{limit} requests)",
273
+ "rate_counterfactual": "Request would be allowed after {wait_seconds} seconds",
274
+
275
+ # Guards
276
+ "guard_pass": "Content ALLOWED: No policy violations detected",
277
+ "guard_block": "Content BLOCKED: {violation_type} detected - {reason}",
278
+ "guard_redact": "Content MODIFIED: Sensitive information redacted",
279
+
280
+ # Circuit breaker
281
+ "circuit_closed": "Service AVAILABLE: Circuit breaker closed",
282
+ "circuit_open": "Service UNAVAILABLE: Circuit breaker open after {failures} failures",
283
+ "circuit_half_open": "Service TESTING: Circuit breaker allowing test request",
284
+
285
+ # Trust boundary
286
+ "trust_allowed": "Inter-agent communication ALLOWED: {from_agent} → {to_agent}",
287
+ "trust_denied": "Inter-agent communication DENIED: Trust level insufficient",
288
+
289
+ # Intent validation
290
+ "intent_valid": "Tool call ALLOWED: Consistent with original intent",
291
+ "intent_hijack": "Tool call BLOCKED: Potential intent hijack detected",
292
+
293
+ # Budget
294
+ "budget_ok": "Budget OK: {spent:.2f}/{limit:.2f} USD ({percentage:.0%})",
295
+ "budget_exceeded": "Budget EXCEEDED: {spent:.2f}/{limit:.2f} USD",
296
+
297
+ # Behavioral drift
298
+ "drift_normal": "Behavior NORMAL: Within baseline parameters",
299
+ "drift_detected": "Behavior ANOMALOUS: {metric} deviated by {deviation:.1f} std devs",
300
+
301
+ # Generic
302
+ "counterfactual_prefix": "Decision would change if: ",
303
+ "no_counterfactual": "No simple change would alter this decision",
304
+ "confidence_high": "High confidence ({confidence:.0%}): All factors clearly determined",
305
+ "confidence_medium": "Medium confidence ({confidence:.0%}): Some uncertainty in factors",
306
+ "confidence_low": "Low confidence ({confidence:.0%}): Significant uncertainty",
307
+ }
308
+ }
309
+
310
+
311
+ class DecisionExplainer:
312
+ """
313
+ Generates human-readable explanations for security decisions.
314
+
315
+ The explainer uses templates and decision factors to create
316
+ clear, actionable explanations suitable for end users,
317
+ compliance audits, and debugging.
318
+
319
+ Example:
320
+ >>> explainer = DecisionExplainer()
321
+ >>>
322
+ >>> decision = ExplainableDecision(
323
+ ... decision_type=DecisionType.AUTHORIZATION,
324
+ ... outcome=Outcome.DENIED,
325
+ ... factors=[
326
+ ... DecisionFactor("role", False, 0.5, "Missing admin role"),
327
+ ... ],
328
+ ... context={"user_id": "alice", "tool": "delete_user"},
329
+ ... )
330
+ >>>
331
+ >>> explanation = explainer.explain(decision)
332
+ >>> print(explanation.summary)
333
+ """
334
+
335
+ def __init__(
336
+ self,
337
+ templates: dict[str, dict[str, str]] | None = None,
338
+ default_language: str = "en",
339
+ include_evidence: bool = True,
340
+ include_recommendations: bool = True,
341
+ ) -> None:
342
+ """
343
+ Initialize the explainer.
344
+
345
+ Args:
346
+ templates: Custom explanation templates by language.
347
+ default_language: Default language for explanations.
348
+ include_evidence: Whether to include evidence in explanations.
349
+ include_recommendations: Whether to include recommendations.
350
+ """
351
+ self._templates = dict(DEFAULT_TEMPLATES)
352
+ if templates:
353
+ for lang, tmpl in templates.items():
354
+ if lang in self._templates:
355
+ self._templates[lang].update(tmpl)
356
+ else:
357
+ self._templates[lang] = tmpl
358
+
359
+ self._default_language = default_language
360
+ self._include_evidence = include_evidence
361
+ self._include_recommendations = include_recommendations
362
+
363
+ # Custom explainers for specific decision types
364
+ self._custom_explainers: dict[str, Callable[[ExplainableDecision], Explanation]] = {}
365
+
366
+ def register_explainer(
367
+ self,
368
+ decision_type: DecisionType | str,
369
+ explainer: Callable[[ExplainableDecision], Explanation],
370
+ ) -> None:
371
+ """
372
+ Register a custom explainer for a decision type.
373
+
374
+ Args:
375
+ decision_type: Decision type to handle.
376
+ explainer: Function that generates explanations.
377
+ """
378
+ key = decision_type.value if isinstance(decision_type, DecisionType) else decision_type
379
+ self._custom_explainers[key] = explainer
380
+
381
+ def explain(
382
+ self,
383
+ decision: ExplainableDecision,
384
+ format: ExplanationFormat = ExplanationFormat.TEXT,
385
+ language: str | None = None,
386
+ ) -> Explanation:
387
+ """
388
+ Generate a human-readable explanation for a decision.
389
+
390
+ Args:
391
+ decision: The decision to explain.
392
+ format: Desired output format.
393
+ language: Language for the explanation.
394
+
395
+ Returns:
396
+ Explanation with summary, details, and counterfactual.
397
+ """
398
+ lang = language or self._default_language
399
+ templates = self._templates.get(lang, self._templates["en"])
400
+
401
+ # Check for custom explainer
402
+ decision_type_key = (
403
+ decision.decision_type.value
404
+ if isinstance(decision.decision_type, DecisionType)
405
+ else str(decision.decision_type)
406
+ )
407
+
408
+ if decision_type_key in self._custom_explainers:
409
+ return self._custom_explainers[decision_type_key](decision)
410
+
411
+ # Generate explanation based on decision type
412
+ summary = self._generate_summary(decision, templates)
413
+ detailed = self._generate_detailed(decision, templates)
414
+ factors_explained = self._explain_factors(decision, templates)
415
+ counterfactual = self._generate_counterfactual(decision, templates)
416
+ confidence_breakdown = self._explain_confidence(decision, templates)
417
+ recommendations = self._generate_recommendations(decision) if self._include_recommendations else []
418
+
419
+ # Format the output
420
+ if format == ExplanationFormat.MARKDOWN:
421
+ summary = f"**{summary}**"
422
+ detailed = self._to_markdown(detailed, factors_explained)
423
+ elif format == ExplanationFormat.HTML:
424
+ summary = f"<strong>{summary}</strong>"
425
+ detailed = self._to_html(detailed, factors_explained)
426
+ elif format == ExplanationFormat.LEGAL:
427
+ detailed = self._to_legal_format(decision, detailed, factors_explained)
428
+
429
+ return Explanation(
430
+ decision_id=decision.decision_id,
431
+ summary=summary,
432
+ detailed=detailed,
433
+ factors_explained=factors_explained,
434
+ counterfactual=counterfactual,
435
+ confidence_breakdown=confidence_breakdown,
436
+ recommendations=recommendations,
437
+ format=format,
438
+ language=lang,
439
+ )
440
+
441
+ def _generate_summary(
442
+ self,
443
+ decision: ExplainableDecision,
444
+ templates: dict[str, str],
445
+ ) -> str:
446
+ """Generate one-line summary."""
447
+ dt = decision.decision_type
448
+ outcome = decision.outcome
449
+ context = decision.context
450
+ primary = decision.primary_factor
451
+
452
+ # Determine template key based on decision type and outcome
453
+ if dt == DecisionType.AUTHORIZATION or dt == "authorization":
454
+ if outcome in (Outcome.ALLOWED, "ALLOWED"):
455
+ template = templates.get("auth_allowed", "Access ALLOWED: {reason}")
456
+ else:
457
+ template = templates.get("auth_denied", "Access DENIED: {reason}")
458
+ reason = primary.reason if primary else "Policy evaluation"
459
+ return template.format(reason=reason, **context)
460
+
461
+ elif dt == DecisionType.RATE_LIMIT or dt == "rate_limit":
462
+ if outcome in (Outcome.ALLOWED, "ALLOWED"):
463
+ template = templates.get("rate_allowed", "Request allowed")
464
+ else:
465
+ template = templates.get("rate_denied", "Rate limit exceeded")
466
+ return template.format(**context)
467
+
468
+ elif dt in (DecisionType.INPUT_GUARD, DecisionType.OUTPUT_GUARD) or dt in ("input_guard", "output_guard"):
469
+ if outcome in (Outcome.ALLOWED, "ALLOWED"):
470
+ return templates.get("guard_pass", "Content allowed")
471
+ elif outcome in (Outcome.MODIFIED, "MODIFIED"):
472
+ return templates.get("guard_redact", "Content modified")
473
+ else:
474
+ violation = context.get("violation_type", "Policy violation")
475
+ reason = primary.reason if primary else "Security policy"
476
+ return templates.get("guard_block", "Content blocked: {reason}").format(
477
+ violation_type=violation, reason=reason
478
+ )
479
+
480
+ elif dt == DecisionType.CIRCUIT_BREAKER or dt == "circuit_breaker":
481
+ state = context.get("state", "unknown")
482
+ if state == "closed":
483
+ return templates.get("circuit_closed", "Service available")
484
+ elif state == "open":
485
+ failures = context.get("failures", 0)
486
+ return templates.get("circuit_open", "Service unavailable").format(failures=failures)
487
+ else:
488
+ return templates.get("circuit_half_open", "Service testing")
489
+
490
+ elif dt == DecisionType.INTENT_VALIDATION or dt == "intent_validation":
491
+ if outcome in (Outcome.ALLOWED, "ALLOWED"):
492
+ return templates.get("intent_valid", "Tool call consistent with intent")
493
+ else:
494
+ return templates.get("intent_hijack", "Potential intent hijack detected")
495
+
496
+ elif dt == DecisionType.BUDGET or dt == "budget":
497
+ spent = context.get("spent", 0)
498
+ limit = context.get("limit", 0)
499
+ percentage = spent / limit if limit > 0 else 0
500
+ if outcome in (Outcome.ALLOWED, "ALLOWED"):
501
+ return templates.get("budget_ok", "Within budget").format(
502
+ spent=spent, limit=limit, percentage=percentage
503
+ )
504
+ else:
505
+ return templates.get("budget_exceeded", "Budget exceeded").format(
506
+ spent=spent, limit=limit
507
+ )
508
+
509
+ elif dt == DecisionType.BEHAVIORAL_DRIFT or dt == "behavioral_drift":
510
+ if outcome in (Outcome.ALLOWED, "ALLOWED"):
511
+ return templates.get("drift_normal", "Behavior within normal range")
512
+ else:
513
+ metric = context.get("metric", "unknown")
514
+ deviation = context.get("deviation", 0)
515
+ return templates.get("drift_detected", "Behavioral anomaly").format(
516
+ metric=metric, deviation=deviation
517
+ )
518
+
519
+ # Default summary
520
+ outcome_str = outcome.value if isinstance(outcome, Outcome) else str(outcome)
521
+ dt_str = dt.value if isinstance(dt, DecisionType) else str(dt)
522
+ reason = primary.reason if primary else "Policy decision"
523
+ return f"{dt_str.title()} {outcome_str}: {reason}"
524
+
525
+ def _generate_detailed(
526
+ self,
527
+ decision: ExplainableDecision,
528
+ templates: dict[str, str],
529
+ ) -> str:
530
+ """Generate detailed multi-paragraph explanation."""
531
+ lines = []
532
+
533
+ # Opening paragraph
534
+ dt_str = (
535
+ decision.decision_type.value
536
+ if isinstance(decision.decision_type, DecisionType)
537
+ else str(decision.decision_type)
538
+ )
539
+ outcome_str = (
540
+ decision.outcome.value
541
+ if isinstance(decision.outcome, Outcome)
542
+ else str(decision.outcome)
543
+ )
544
+
545
+ lines.append(
546
+ f"A {dt_str.replace('_', ' ')} decision was made at "
547
+ f"{decision.timestamp.strftime('%Y-%m-%d %H:%M:%S UTC')}."
548
+ )
549
+ lines.append(f"The final outcome was: {outcome_str}.")
550
+ lines.append("")
551
+
552
+ # Context paragraph
553
+ if decision.context:
554
+ context_items = []
555
+ for key, value in decision.context.items():
556
+ if key not in ("_internal", "raw"):
557
+ context_items.append(f"- {key.replace('_', ' ').title()}: {value}")
558
+ if context_items:
559
+ lines.append("Context:")
560
+ lines.extend(context_items)
561
+ lines.append("")
562
+
563
+ # Factors paragraph
564
+ if decision.factors:
565
+ lines.append("The following factors were evaluated:")
566
+ for factor in decision.factors:
567
+ status = "✓ PASSED" if factor.passed else "✗ FAILED"
568
+ lines.append(f"- {factor.name}: {status} (weight: {factor.weight:.0%})")
569
+ lines.append(f" Reason: {factor.reason}")
570
+ if self._include_evidence and factor.evidence:
571
+ for ev in factor.evidence:
572
+ lines.append(f" Evidence: {ev}")
573
+ lines.append("")
574
+
575
+ # Confidence paragraph
576
+ lines.append(f"Decision confidence: {decision.confidence:.0%}")
577
+ lines.append(f"Processing time: {decision.latency_ms:.2f}ms")
578
+ lines.append(f"Policy version: {decision.policy_version}")
579
+
580
+ return "\n".join(lines)
581
+
582
+ def _explain_factors(
583
+ self,
584
+ decision: ExplainableDecision,
585
+ templates: dict[str, str],
586
+ ) -> list[str]:
587
+ """Generate per-factor explanations."""
588
+ explanations = []
589
+
590
+ for factor in decision.factors:
591
+ if factor.passed:
592
+ template = templates.get("auth_factor_pass", "✓ {name}: {reason}")
593
+ else:
594
+ template = templates.get("auth_factor_fail", "✗ {name}: {reason}")
595
+
596
+ explanations.append(template.format(name=factor.name, reason=factor.reason))
597
+
598
+ return explanations
599
+
600
+ def _generate_counterfactual(
601
+ self,
602
+ decision: ExplainableDecision,
603
+ templates: dict[str, str],
604
+ ) -> str | None:
605
+ """Generate counterfactual explanation."""
606
+ if not decision.factors:
607
+ return None
608
+
609
+ prefix = templates.get("counterfactual_prefix", "Decision would change if: ")
610
+
611
+ if decision.passed:
612
+ # What would cause denial?
613
+ passing_factors = [f for f in decision.factors if f.passed]
614
+ if passing_factors:
615
+ critical = max(passing_factors, key=lambda f: f.weight)
616
+ return f"{prefix}{critical.name} check failed"
617
+ else:
618
+ # What would cause approval?
619
+ failing_factors = [f for f in decision.factors if not f.passed]
620
+ if failing_factors:
621
+ changes = []
622
+ for f in failing_factors:
623
+ # Generate specific counterfactual based on factor name
624
+ if "role" in f.name.lower():
625
+ changes.append(f"User had the required role")
626
+ elif "rate" in f.name.lower():
627
+ changes.append(f"Request was within rate limits")
628
+ elif "budget" in f.name.lower():
629
+ changes.append(f"Budget was not exceeded")
630
+ elif "trust" in f.name.lower():
631
+ changes.append(f"Trust level was sufficient")
632
+ else:
633
+ changes.append(f"{f.name} check passed")
634
+
635
+ if changes:
636
+ return prefix + "; ".join(changes)
637
+
638
+ return templates.get("no_counterfactual", "No simple change would alter this decision")
639
+
640
+ def _explain_confidence(
641
+ self,
642
+ decision: ExplainableDecision,
643
+ templates: dict[str, str],
644
+ ) -> str:
645
+ """Explain the confidence score."""
646
+ conf = decision.confidence
647
+
648
+ if conf >= 0.9:
649
+ template = templates.get("confidence_high", "High confidence ({confidence:.0%})")
650
+ elif conf >= 0.7:
651
+ template = templates.get("confidence_medium", "Medium confidence ({confidence:.0%})")
652
+ else:
653
+ template = templates.get("confidence_low", "Low confidence ({confidence:.0%})")
654
+
655
+ return template.format(confidence=conf)
656
+
657
+ def _generate_recommendations(self, decision: ExplainableDecision) -> list[str]:
658
+ """Generate actionable recommendations."""
659
+ recommendations = []
660
+
661
+ if not decision.passed:
662
+ failing = [f for f in decision.factors if not f.passed]
663
+ for factor in failing:
664
+ name = factor.name.lower()
665
+
666
+ if "role" in name or "permission" in name:
667
+ recommendations.append(
668
+ "Contact your administrator to request the necessary permissions"
669
+ )
670
+ elif "rate" in name:
671
+ recommendations.append(
672
+ "Wait before retrying, or contact support for rate limit increase"
673
+ )
674
+ elif "budget" in name:
675
+ recommendations.append(
676
+ "Review your usage or request a budget increase from your admin"
677
+ )
678
+ elif "trust" in name:
679
+ recommendations.append(
680
+ "Ensure proper agent authentication and delegation chains"
681
+ )
682
+ elif "intent" in name:
683
+ recommendations.append(
684
+ "Verify the tool call matches the original user request"
685
+ )
686
+ elif "circuit" in name:
687
+ recommendations.append(
688
+ "The service may be experiencing issues; retry later"
689
+ )
690
+
691
+ # Remove duplicates while preserving order
692
+ seen = set()
693
+ unique = []
694
+ for rec in recommendations:
695
+ if rec not in seen:
696
+ seen.add(rec)
697
+ unique.append(rec)
698
+
699
+ return unique[:3] # Limit to top 3 recommendations
700
+
701
+ def _to_markdown(self, detailed: str, factors: list[str]) -> str:
702
+ """Convert explanation to Markdown format."""
703
+ lines = detailed.split("\n")
704
+ md_lines = []
705
+
706
+ for line in lines:
707
+ if line.endswith(":"):
708
+ md_lines.append(f"\n### {line}\n")
709
+ elif line.startswith("- "):
710
+ md_lines.append(line)
711
+ elif line.startswith(" "):
712
+ md_lines.append(f" {line.strip()}")
713
+ else:
714
+ md_lines.append(line)
715
+
716
+ return "\n".join(md_lines)
717
+
718
+ def _to_html(self, detailed: str, factors: list[str]) -> str:
719
+ """Convert explanation to HTML format."""
720
+ # Simple HTML conversion
721
+ html = detailed.replace("\n\n", "</p><p>")
722
+ html = html.replace("\n", "<br>")
723
+ html = f"<div class='explanation'><p>{html}</p></div>"
724
+ return html
725
+
726
+ def _to_legal_format(
727
+ self,
728
+ decision: ExplainableDecision,
729
+ detailed: str,
730
+ factors: list[str],
731
+ ) -> str:
732
+ """Convert to legal/compliance format (CA SB 53 style)."""
733
+ lines = [
734
+ "=" * 60,
735
+ "AUTOMATED DECISION DISCLOSURE",
736
+ "(Per California SB 53 - AI Transparency Requirements)",
737
+ "=" * 60,
738
+ "",
739
+ f"Decision ID: {decision.decision_id}",
740
+ f"Timestamp: {decision.timestamp.isoformat()}",
741
+ f"Decision Type: {decision.decision_type}",
742
+ f"Outcome: {decision.outcome}",
743
+ "",
744
+ "FACTORS CONSIDERED:",
745
+ "-" * 40,
746
+ ]
747
+
748
+ for i, factor in enumerate(decision.factors, 1):
749
+ lines.append(f"{i}. {factor.name}")
750
+ lines.append(f" Result: {'PASSED' if factor.passed else 'FAILED'}")
751
+ lines.append(f" Weight: {factor.weight:.0%}")
752
+ lines.append(f" Explanation: {factor.reason}")
753
+ if factor.evidence:
754
+ lines.append(f" Evidence: {'; '.join(factor.evidence)}")
755
+ lines.append("")
756
+
757
+ lines.extend([
758
+ "-" * 40,
759
+ f"Confidence Level: {decision.confidence:.0%}",
760
+ f"Policy Version: {decision.policy_version}",
761
+ "",
762
+ "This decision was made by an automated system. For questions",
763
+ "or to request human review, contact your administrator.",
764
+ "=" * 60,
765
+ ])
766
+
767
+ return "\n".join(lines)
768
+
769
+
770
+ class ExplainabilityLogger:
771
+ """
772
+ Logs explainable decisions for audit and compliance.
773
+
774
+ Integrates with the main AuditLogger to provide a complete
775
+ record of all security decisions with full explanations.
776
+
777
+ Example:
778
+ >>> from proxilion.audit import AuditLogger
779
+ >>> from proxilion.audit.explainability import ExplainabilityLogger
780
+ >>>
781
+ >>> audit_logger = AuditLogger(config)
782
+ >>> explainability_logger = ExplainabilityLogger(audit_logger)
783
+ >>>
784
+ >>> # Log an explained decision
785
+ >>> decision = ExplainableDecision(...)
786
+ >>> explainability_logger.log_decision(decision)
787
+ """
788
+
789
+ def __init__(
790
+ self,
791
+ audit_logger: Any | None = None,
792
+ explainer: DecisionExplainer | None = None,
793
+ auto_explain: bool = True,
794
+ store_explanations: bool = True,
795
+ max_stored: int = 10000,
796
+ ) -> None:
797
+ """
798
+ Initialize the explainability logger.
799
+
800
+ Args:
801
+ audit_logger: Optional AuditLogger instance for integration.
802
+ explainer: DecisionExplainer to use (creates default if None).
803
+ auto_explain: Whether to auto-generate explanations.
804
+ store_explanations: Whether to store explanations in memory.
805
+ max_stored: Maximum explanations to store.
806
+ """
807
+ self._audit_logger = audit_logger
808
+ self._explainer = explainer or DecisionExplainer()
809
+ self._auto_explain = auto_explain
810
+ self._store_explanations = store_explanations
811
+ self._max_stored = max_stored
812
+
813
+ self._lock = threading.RLock()
814
+ self._decisions: list[ExplainableDecision] = []
815
+ self._explanations: dict[str, Explanation] = {}
816
+
817
+ def log_decision(
818
+ self,
819
+ decision: ExplainableDecision,
820
+ format: ExplanationFormat = ExplanationFormat.TEXT,
821
+ ) -> Explanation | None:
822
+ """
823
+ Log a decision and optionally generate explanation.
824
+
825
+ Args:
826
+ decision: The decision to log.
827
+ format: Format for the explanation.
828
+
829
+ Returns:
830
+ Explanation if auto_explain is enabled.
831
+ """
832
+ explanation = None
833
+
834
+ if self._auto_explain:
835
+ explanation = self._explainer.explain(decision, format=format)
836
+
837
+ with self._lock:
838
+ # Store decision
839
+ self._decisions.append(decision)
840
+ if len(self._decisions) > self._max_stored:
841
+ self._decisions = self._decisions[-self._max_stored:]
842
+
843
+ # Store explanation
844
+ if explanation and self._store_explanations:
845
+ self._explanations[decision.decision_id] = explanation
846
+ if len(self._explanations) > self._max_stored:
847
+ # Remove oldest
848
+ oldest = list(self._explanations.keys())[:100]
849
+ for key in oldest:
850
+ del self._explanations[key]
851
+
852
+ # Log to audit logger if available
853
+ if self._audit_logger is not None:
854
+ try:
855
+ self._audit_logger.log_custom(
856
+ event_type="explainable_decision",
857
+ data={
858
+ "decision": decision.to_dict(),
859
+ "explanation": explanation.to_dict() if explanation else None,
860
+ },
861
+ )
862
+ except Exception as e:
863
+ logger.warning(f"Failed to log to audit logger: {e}")
864
+
865
+ logger.debug(
866
+ f"Logged explainable decision: {decision.decision_id} "
867
+ f"({decision.decision_type} -> {decision.outcome})"
868
+ )
869
+
870
+ return explanation
871
+
872
+ def get_decision(self, decision_id: str) -> ExplainableDecision | None:
873
+ """Get a decision by ID."""
874
+ with self._lock:
875
+ for decision in reversed(self._decisions):
876
+ if decision.decision_id == decision_id:
877
+ return decision
878
+ return None
879
+
880
+ def get_explanation(self, decision_id: str) -> Explanation | None:
881
+ """Get an explanation by decision ID."""
882
+ with self._lock:
883
+ return self._explanations.get(decision_id)
884
+
885
+ def explain(
886
+ self,
887
+ decision_id: str,
888
+ format: ExplanationFormat = ExplanationFormat.TEXT,
889
+ ) -> Explanation | None:
890
+ """
891
+ Get or generate explanation for a decision.
892
+
893
+ Args:
894
+ decision_id: ID of the decision to explain.
895
+ format: Desired output format.
896
+
897
+ Returns:
898
+ Explanation or None if decision not found.
899
+ """
900
+ # Check if already explained
901
+ with self._lock:
902
+ if decision_id in self._explanations:
903
+ return self._explanations[decision_id]
904
+
905
+ # Find and explain decision
906
+ decision = self.get_decision(decision_id)
907
+ if decision is None:
908
+ return None
909
+
910
+ explanation = self._explainer.explain(decision, format=format)
911
+
912
+ with self._lock:
913
+ self._explanations[decision_id] = explanation
914
+
915
+ return explanation
916
+
917
+ def get_decisions(
918
+ self,
919
+ decision_type: DecisionType | str | None = None,
920
+ outcome: Outcome | str | None = None,
921
+ user_id: str | None = None,
922
+ limit: int = 100,
923
+ ) -> list[ExplainableDecision]:
924
+ """
925
+ Get decisions with optional filters.
926
+
927
+ Args:
928
+ decision_type: Filter by decision type.
929
+ outcome: Filter by outcome.
930
+ user_id: Filter by user ID in context.
931
+ limit: Maximum decisions to return.
932
+
933
+ Returns:
934
+ List of matching decisions.
935
+ """
936
+ with self._lock:
937
+ results = []
938
+
939
+ for decision in reversed(self._decisions):
940
+ # Apply filters
941
+ if decision_type is not None:
942
+ dt = decision.decision_type
943
+ dt_str = dt.value if isinstance(dt, DecisionType) else str(dt)
944
+ filter_str = (
945
+ decision_type.value
946
+ if isinstance(decision_type, DecisionType)
947
+ else str(decision_type)
948
+ )
949
+ if dt_str != filter_str:
950
+ continue
951
+
952
+ if outcome is not None:
953
+ oc = decision.outcome
954
+ oc_str = oc.value if isinstance(oc, Outcome) else str(oc)
955
+ filter_str = (
956
+ outcome.value
957
+ if isinstance(outcome, Outcome)
958
+ else str(outcome)
959
+ )
960
+ if oc_str != filter_str:
961
+ continue
962
+
963
+ if user_id is not None:
964
+ if decision.context.get("user_id") != user_id:
965
+ continue
966
+
967
+ results.append(decision)
968
+
969
+ if len(results) >= limit:
970
+ break
971
+
972
+ return results
973
+
974
+ def export_decisions(
975
+ self,
976
+ format: str = "json",
977
+ include_explanations: bool = True,
978
+ ) -> str:
979
+ """
980
+ Export all decisions and explanations.
981
+
982
+ Args:
983
+ format: Output format ("json" or "jsonl").
984
+ include_explanations: Whether to include explanations.
985
+
986
+ Returns:
987
+ Exported data as string.
988
+ """
989
+ with self._lock:
990
+ records = []
991
+
992
+ for decision in self._decisions:
993
+ record = decision.to_dict()
994
+
995
+ if include_explanations and decision.decision_id in self._explanations:
996
+ record["explanation"] = self._explanations[decision.decision_id].to_dict()
997
+
998
+ records.append(record)
999
+
1000
+ if format == "jsonl":
1001
+ return "\n".join(json.dumps(r) for r in records)
1002
+ else:
1003
+ return json.dumps(records, indent=2)
1004
+
1005
+ def clear(self) -> int:
1006
+ """Clear all stored decisions and explanations."""
1007
+ with self._lock:
1008
+ count = len(self._decisions)
1009
+ self._decisions.clear()
1010
+ self._explanations.clear()
1011
+ return count
1012
+
1013
+
1014
+ # Convenience functions
1015
+
1016
+ def create_authorization_decision(
1017
+ user_id: str,
1018
+ tool_name: str,
1019
+ allowed: bool,
1020
+ factors: list[DecisionFactor],
1021
+ context: dict[str, Any] | None = None,
1022
+ ) -> ExplainableDecision:
1023
+ """
1024
+ Create an explainable authorization decision.
1025
+
1026
+ Args:
1027
+ user_id: User making the request.
1028
+ tool_name: Tool being accessed.
1029
+ allowed: Whether access was granted.
1030
+ factors: Factors that contributed to the decision.
1031
+ context: Additional context.
1032
+
1033
+ Returns:
1034
+ ExplainableDecision ready for logging.
1035
+ """
1036
+ ctx = context or {}
1037
+ ctx["user_id"] = user_id
1038
+ ctx["tool_name"] = tool_name
1039
+
1040
+ return ExplainableDecision(
1041
+ decision_type=DecisionType.AUTHORIZATION,
1042
+ outcome=Outcome.ALLOWED if allowed else Outcome.DENIED,
1043
+ factors=factors,
1044
+ context=ctx,
1045
+ )
1046
+
1047
+
1048
+ def create_guard_decision(
1049
+ guard_type: str,
1050
+ passed: bool,
1051
+ factors: list[DecisionFactor],
1052
+ content_sample: str | None = None,
1053
+ modified: bool = False,
1054
+ ) -> ExplainableDecision:
1055
+ """
1056
+ Create an explainable guard decision.
1057
+
1058
+ Args:
1059
+ guard_type: Type of guard ("input" or "output").
1060
+ passed: Whether content passed the guard.
1061
+ factors: Factors that contributed to the decision.
1062
+ content_sample: Sample of the content (truncated).
1063
+ modified: Whether content was modified (redacted).
1064
+
1065
+ Returns:
1066
+ ExplainableDecision ready for logging.
1067
+ """
1068
+ if modified:
1069
+ outcome = Outcome.MODIFIED
1070
+ elif passed:
1071
+ outcome = Outcome.ALLOWED
1072
+ else:
1073
+ outcome = Outcome.DENIED
1074
+
1075
+ decision_type = (
1076
+ DecisionType.INPUT_GUARD if guard_type == "input"
1077
+ else DecisionType.OUTPUT_GUARD
1078
+ )
1079
+
1080
+ context = {"guard_type": guard_type}
1081
+ if content_sample:
1082
+ # Truncate and sanitize
1083
+ context["content_preview"] = content_sample[:100] + "..." if len(content_sample) > 100 else content_sample
1084
+
1085
+ return ExplainableDecision(
1086
+ decision_type=decision_type,
1087
+ outcome=outcome,
1088
+ factors=factors,
1089
+ context=context,
1090
+ )
1091
+
1092
+
1093
+ def create_rate_limit_decision(
1094
+ user_id: str,
1095
+ allowed: bool,
1096
+ current_count: int,
1097
+ limit: int,
1098
+ window_seconds: int,
1099
+ ) -> ExplainableDecision:
1100
+ """
1101
+ Create an explainable rate limit decision.
1102
+
1103
+ Args:
1104
+ user_id: User being rate limited.
1105
+ allowed: Whether request was allowed.
1106
+ current_count: Current request count in window.
1107
+ limit: Maximum requests allowed.
1108
+ window_seconds: Window size in seconds.
1109
+
1110
+ Returns:
1111
+ ExplainableDecision ready for logging.
1112
+ """
1113
+ return ExplainableDecision(
1114
+ decision_type=DecisionType.RATE_LIMIT,
1115
+ outcome=Outcome.ALLOWED if allowed else Outcome.DENIED,
1116
+ factors=[
1117
+ DecisionFactor(
1118
+ name="request_count",
1119
+ passed=current_count <= limit,
1120
+ weight=1.0,
1121
+ reason=f"{current_count}/{limit} requests in {window_seconds}s window",
1122
+ )
1123
+ ],
1124
+ context={
1125
+ "user_id": user_id,
1126
+ "current": current_count,
1127
+ "limit": limit,
1128
+ "window_seconds": window_seconds,
1129
+ },
1130
+ )
1131
+
1132
+
1133
+ def create_budget_decision(
1134
+ user_id: str,
1135
+ allowed: bool,
1136
+ spent: float,
1137
+ limit: float,
1138
+ period: str = "daily",
1139
+ ) -> ExplainableDecision:
1140
+ """
1141
+ Create an explainable budget decision.
1142
+
1143
+ Args:
1144
+ user_id: User being budget-checked.
1145
+ allowed: Whether within budget.
1146
+ spent: Amount spent.
1147
+ limit: Budget limit.
1148
+ period: Budget period ("hourly", "daily", "monthly").
1149
+
1150
+ Returns:
1151
+ ExplainableDecision ready for logging.
1152
+ """
1153
+ percentage = spent / limit if limit > 0 else 0
1154
+
1155
+ return ExplainableDecision(
1156
+ decision_type=DecisionType.BUDGET,
1157
+ outcome=Outcome.ALLOWED if allowed else Outcome.DENIED,
1158
+ factors=[
1159
+ DecisionFactor(
1160
+ name=f"{period}_budget",
1161
+ passed=spent <= limit,
1162
+ weight=1.0,
1163
+ reason=f"${spent:.2f}/${limit:.2f} ({percentage:.0%}) {period} budget used",
1164
+ )
1165
+ ],
1166
+ context={
1167
+ "user_id": user_id,
1168
+ "spent": spent,
1169
+ "limit": limit,
1170
+ "period": period,
1171
+ "percentage": percentage,
1172
+ },
1173
+ )