proxilion 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proxilion/__init__.py +136 -0
- proxilion/audit/__init__.py +133 -0
- proxilion/audit/base_exporters.py +527 -0
- proxilion/audit/compliance/__init__.py +130 -0
- proxilion/audit/compliance/base.py +457 -0
- proxilion/audit/compliance/eu_ai_act.py +603 -0
- proxilion/audit/compliance/iso27001.py +544 -0
- proxilion/audit/compliance/soc2.py +491 -0
- proxilion/audit/events.py +493 -0
- proxilion/audit/explainability.py +1173 -0
- proxilion/audit/exporters/__init__.py +58 -0
- proxilion/audit/exporters/aws_s3.py +636 -0
- proxilion/audit/exporters/azure_storage.py +608 -0
- proxilion/audit/exporters/cloud_base.py +468 -0
- proxilion/audit/exporters/gcp_storage.py +570 -0
- proxilion/audit/exporters/multi_exporter.py +498 -0
- proxilion/audit/hash_chain.py +652 -0
- proxilion/audit/logger.py +543 -0
- proxilion/caching/__init__.py +49 -0
- proxilion/caching/tool_cache.py +633 -0
- proxilion/context/__init__.py +73 -0
- proxilion/context/context_window.py +556 -0
- proxilion/context/message_history.py +505 -0
- proxilion/context/session.py +735 -0
- proxilion/contrib/__init__.py +51 -0
- proxilion/contrib/anthropic.py +609 -0
- proxilion/contrib/google.py +1012 -0
- proxilion/contrib/langchain.py +641 -0
- proxilion/contrib/mcp.py +893 -0
- proxilion/contrib/openai.py +646 -0
- proxilion/core.py +3058 -0
- proxilion/decorators.py +966 -0
- proxilion/engines/__init__.py +287 -0
- proxilion/engines/base.py +266 -0
- proxilion/engines/casbin_engine.py +412 -0
- proxilion/engines/opa_engine.py +493 -0
- proxilion/engines/simple.py +437 -0
- proxilion/exceptions.py +887 -0
- proxilion/guards/__init__.py +54 -0
- proxilion/guards/input_guard.py +522 -0
- proxilion/guards/output_guard.py +634 -0
- proxilion/observability/__init__.py +198 -0
- proxilion/observability/cost_tracker.py +866 -0
- proxilion/observability/hooks.py +683 -0
- proxilion/observability/metrics.py +798 -0
- proxilion/observability/session_cost_tracker.py +1063 -0
- proxilion/policies/__init__.py +67 -0
- proxilion/policies/base.py +304 -0
- proxilion/policies/builtin.py +486 -0
- proxilion/policies/registry.py +376 -0
- proxilion/providers/__init__.py +201 -0
- proxilion/providers/adapter.py +468 -0
- proxilion/providers/anthropic_adapter.py +330 -0
- proxilion/providers/gemini_adapter.py +391 -0
- proxilion/providers/openai_adapter.py +294 -0
- proxilion/py.typed +0 -0
- proxilion/resilience/__init__.py +81 -0
- proxilion/resilience/degradation.py +615 -0
- proxilion/resilience/fallback.py +555 -0
- proxilion/resilience/retry.py +554 -0
- proxilion/scheduling/__init__.py +57 -0
- proxilion/scheduling/priority_queue.py +419 -0
- proxilion/scheduling/scheduler.py +459 -0
- proxilion/security/__init__.py +244 -0
- proxilion/security/agent_trust.py +968 -0
- proxilion/security/behavioral_drift.py +794 -0
- proxilion/security/cascade_protection.py +869 -0
- proxilion/security/circuit_breaker.py +428 -0
- proxilion/security/cost_limiter.py +690 -0
- proxilion/security/idor_protection.py +460 -0
- proxilion/security/intent_capsule.py +849 -0
- proxilion/security/intent_validator.py +495 -0
- proxilion/security/memory_integrity.py +767 -0
- proxilion/security/rate_limiter.py +509 -0
- proxilion/security/scope_enforcer.py +680 -0
- proxilion/security/sequence_validator.py +636 -0
- proxilion/security/trust_boundaries.py +784 -0
- proxilion/streaming/__init__.py +70 -0
- proxilion/streaming/detector.py +761 -0
- proxilion/streaming/transformer.py +674 -0
- proxilion/timeouts/__init__.py +55 -0
- proxilion/timeouts/decorators.py +477 -0
- proxilion/timeouts/manager.py +545 -0
- proxilion/tools/__init__.py +69 -0
- proxilion/tools/decorators.py +493 -0
- proxilion/tools/registry.py +732 -0
- proxilion/types.py +339 -0
- proxilion/validation/__init__.py +93 -0
- proxilion/validation/pydantic_schema.py +351 -0
- proxilion/validation/schema.py +651 -0
- proxilion-0.0.1.dist-info/METADATA +872 -0
- proxilion-0.0.1.dist-info/RECORD +94 -0
- proxilion-0.0.1.dist-info/WHEEL +4 -0
- proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Explainable Decisions for Proxilion.
|
|
3
|
+
|
|
4
|
+
Provides human-readable audit trails explaining WHY each security
|
|
5
|
+
decision was made. Designed for CA SB 53 compliance and regulatory
|
|
6
|
+
transparency requirements.
|
|
7
|
+
|
|
8
|
+
Features:
|
|
9
|
+
- Human-readable explanation generation for all decision types
|
|
10
|
+
- Decision tree visualization
|
|
11
|
+
- Factor contribution analysis
|
|
12
|
+
- Multi-language explanation templates
|
|
13
|
+
- Confidence scoring with uncertainty breakdown
|
|
14
|
+
- Counterfactual explanations ("what would change the decision")
|
|
15
|
+
|
|
16
|
+
Example:
|
|
17
|
+
>>> from proxilion.audit.explainability import (
|
|
18
|
+
... ExplainableDecision,
|
|
19
|
+
... DecisionExplainer,
|
|
20
|
+
... ExplanationFormat,
|
|
21
|
+
... )
|
|
22
|
+
>>>
|
|
23
|
+
>>> # Create an explainer
|
|
24
|
+
>>> explainer = DecisionExplainer()
|
|
25
|
+
>>>
|
|
26
|
+
>>> # Explain an authorization decision
|
|
27
|
+
>>> decision = ExplainableDecision(
|
|
28
|
+
... decision_type="authorization",
|
|
29
|
+
... outcome="DENIED",
|
|
30
|
+
... factors=[
|
|
31
|
+
... DecisionFactor("role_check", False, 0.4, "User lacks 'admin' role"),
|
|
32
|
+
... DecisionFactor("rate_limit", True, 0.3, "Within rate limits"),
|
|
33
|
+
... DecisionFactor("time_window", True, 0.3, "Within allowed hours"),
|
|
34
|
+
... ],
|
|
35
|
+
... context={"user_id": "user_123", "tool": "delete_user"},
|
|
36
|
+
... )
|
|
37
|
+
>>>
|
|
38
|
+
>>> # Generate human-readable explanation
|
|
39
|
+
>>> explanation = explainer.explain(decision)
|
|
40
|
+
>>> print(explanation.summary)
|
|
41
|
+
"Access DENIED: User lacks required 'admin' role for delete_user operation"
|
|
42
|
+
>>>
|
|
43
|
+
>>> # Get counterfactual
|
|
44
|
+
>>> print(explanation.counterfactual)
|
|
45
|
+
"Access would be ALLOWED if: User had 'admin' role"
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
from __future__ import annotations
|
|
49
|
+
|
|
50
|
+
import hashlib
|
|
51
|
+
import json
|
|
52
|
+
import logging
|
|
53
|
+
import re
|
|
54
|
+
import threading
|
|
55
|
+
from dataclasses import asdict, dataclass, field
|
|
56
|
+
from datetime import datetime, timezone
|
|
57
|
+
from enum import Enum
|
|
58
|
+
from typing import Any, Callable
|
|
59
|
+
|
|
60
|
+
logger = logging.getLogger(__name__)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class DecisionType(str, Enum):
|
|
64
|
+
"""Types of security decisions that can be explained."""
|
|
65
|
+
|
|
66
|
+
AUTHORIZATION = "authorization"
|
|
67
|
+
RATE_LIMIT = "rate_limit"
|
|
68
|
+
INPUT_GUARD = "input_guard"
|
|
69
|
+
OUTPUT_GUARD = "output_guard"
|
|
70
|
+
CIRCUIT_BREAKER = "circuit_breaker"
|
|
71
|
+
TRUST_BOUNDARY = "trust_boundary"
|
|
72
|
+
INTENT_VALIDATION = "intent_validation"
|
|
73
|
+
BEHAVIORAL_DRIFT = "behavioral_drift"
|
|
74
|
+
BUDGET = "budget"
|
|
75
|
+
CASCADE = "cascade"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ExplanationFormat(str, Enum):
|
|
79
|
+
"""Output formats for explanations."""
|
|
80
|
+
|
|
81
|
+
TEXT = "text" # Plain text, human-readable
|
|
82
|
+
MARKDOWN = "markdown" # Markdown formatted
|
|
83
|
+
HTML = "html" # HTML formatted
|
|
84
|
+
JSON = "json" # Structured JSON
|
|
85
|
+
LEGAL = "legal" # Legal/compliance format
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class Outcome(str, Enum):
|
|
89
|
+
"""Decision outcomes."""
|
|
90
|
+
|
|
91
|
+
ALLOWED = "ALLOWED"
|
|
92
|
+
DENIED = "DENIED"
|
|
93
|
+
WARNED = "WARNED"
|
|
94
|
+
MODIFIED = "MODIFIED" # e.g., output was redacted
|
|
95
|
+
DEFERRED = "DEFERRED" # Decision pending more info
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass
|
|
99
|
+
class DecisionFactor:
|
|
100
|
+
"""
|
|
101
|
+
A single factor contributing to a decision.
|
|
102
|
+
|
|
103
|
+
Attributes:
|
|
104
|
+
name: Factor identifier (e.g., "role_check", "rate_limit").
|
|
105
|
+
passed: Whether this factor passed (True) or failed (False).
|
|
106
|
+
weight: Importance weight of this factor (0.0 to 1.0).
|
|
107
|
+
reason: Human-readable explanation of the factor result.
|
|
108
|
+
details: Additional structured details.
|
|
109
|
+
evidence: Evidence/data that led to this factor result.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
name: str
|
|
113
|
+
passed: bool
|
|
114
|
+
weight: float
|
|
115
|
+
reason: str
|
|
116
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
117
|
+
evidence: list[str] = field(default_factory=list)
|
|
118
|
+
|
|
119
|
+
def to_dict(self) -> dict[str, Any]:
|
|
120
|
+
"""Convert to dictionary."""
|
|
121
|
+
return {
|
|
122
|
+
"name": self.name,
|
|
123
|
+
"passed": self.passed,
|
|
124
|
+
"weight": self.weight,
|
|
125
|
+
"reason": self.reason,
|
|
126
|
+
"details": self.details,
|
|
127
|
+
"evidence": self.evidence,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass
|
|
132
|
+
class ExplainableDecision:
|
|
133
|
+
"""
|
|
134
|
+
A security decision with full explainability metadata.
|
|
135
|
+
|
|
136
|
+
Attributes:
|
|
137
|
+
decision_id: Unique identifier for this decision.
|
|
138
|
+
decision_type: Type of security decision.
|
|
139
|
+
outcome: The decision outcome.
|
|
140
|
+
factors: List of factors that contributed to the decision.
|
|
141
|
+
context: Contextual information (user, tool, etc.).
|
|
142
|
+
timestamp: When the decision was made.
|
|
143
|
+
confidence: Confidence score (0.0 to 1.0).
|
|
144
|
+
latency_ms: Time taken to make the decision.
|
|
145
|
+
policy_version: Version of the policy used.
|
|
146
|
+
metadata: Additional metadata.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
decision_type: DecisionType | str
|
|
150
|
+
outcome: Outcome | str
|
|
151
|
+
factors: list[DecisionFactor]
|
|
152
|
+
context: dict[str, Any] = field(default_factory=dict)
|
|
153
|
+
decision_id: str = ""
|
|
154
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
155
|
+
confidence: float = 1.0
|
|
156
|
+
latency_ms: float = 0.0
|
|
157
|
+
policy_version: str = "1.0"
|
|
158
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
159
|
+
|
|
160
|
+
def __post_init__(self):
|
|
161
|
+
if not self.decision_id:
|
|
162
|
+
# Generate deterministic ID from decision content
|
|
163
|
+
content = f"{self.decision_type}:{self.outcome}:{self.timestamp.isoformat()}"
|
|
164
|
+
self.decision_id = hashlib.sha256(content.encode()).hexdigest()[:16]
|
|
165
|
+
|
|
166
|
+
# Convert string enums
|
|
167
|
+
if isinstance(self.decision_type, str):
|
|
168
|
+
try:
|
|
169
|
+
self.decision_type = DecisionType(self.decision_type)
|
|
170
|
+
except ValueError:
|
|
171
|
+
pass # Keep as string if not a known type
|
|
172
|
+
|
|
173
|
+
if isinstance(self.outcome, str):
|
|
174
|
+
try:
|
|
175
|
+
self.outcome = Outcome(self.outcome)
|
|
176
|
+
except ValueError:
|
|
177
|
+
pass
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def passed(self) -> bool:
|
|
181
|
+
"""Whether the decision resulted in an allowed outcome."""
|
|
182
|
+
return self.outcome in (Outcome.ALLOWED, "ALLOWED")
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def primary_factor(self) -> DecisionFactor | None:
|
|
186
|
+
"""Get the most important factor (by weight or first failing)."""
|
|
187
|
+
if not self.factors:
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
# If denied, return first failing factor with highest weight
|
|
191
|
+
if not self.passed:
|
|
192
|
+
failing = [f for f in self.factors if not f.passed]
|
|
193
|
+
if failing:
|
|
194
|
+
return max(failing, key=lambda f: f.weight)
|
|
195
|
+
|
|
196
|
+
# Otherwise return highest weight factor
|
|
197
|
+
return max(self.factors, key=lambda f: f.weight)
|
|
198
|
+
|
|
199
|
+
def to_dict(self) -> dict[str, Any]:
|
|
200
|
+
"""Convert to dictionary."""
|
|
201
|
+
return {
|
|
202
|
+
"decision_id": self.decision_id,
|
|
203
|
+
"decision_type": str(self.decision_type.value if isinstance(self.decision_type, DecisionType) else self.decision_type),
|
|
204
|
+
"outcome": str(self.outcome.value if isinstance(self.outcome, Outcome) else self.outcome),
|
|
205
|
+
"factors": [f.to_dict() for f in self.factors],
|
|
206
|
+
"context": self.context,
|
|
207
|
+
"timestamp": self.timestamp.isoformat(),
|
|
208
|
+
"confidence": self.confidence,
|
|
209
|
+
"latency_ms": self.latency_ms,
|
|
210
|
+
"policy_version": self.policy_version,
|
|
211
|
+
"metadata": self.metadata,
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
def to_json(self) -> str:
|
|
215
|
+
"""Convert to JSON string."""
|
|
216
|
+
return json.dumps(self.to_dict(), indent=2)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@dataclass
|
|
220
|
+
class Explanation:
|
|
221
|
+
"""
|
|
222
|
+
Human-readable explanation of a decision.
|
|
223
|
+
|
|
224
|
+
Attributes:
|
|
225
|
+
decision_id: ID of the explained decision.
|
|
226
|
+
summary: One-line summary of the decision.
|
|
227
|
+
detailed: Multi-paragraph detailed explanation.
|
|
228
|
+
factors_explained: Per-factor explanations.
|
|
229
|
+
counterfactual: What would change the decision.
|
|
230
|
+
confidence_breakdown: Explanation of confidence score.
|
|
231
|
+
recommendations: Suggestions for the user.
|
|
232
|
+
format: Output format used.
|
|
233
|
+
language: Language code (e.g., "en", "es").
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
decision_id: str
|
|
237
|
+
summary: str
|
|
238
|
+
detailed: str
|
|
239
|
+
factors_explained: list[str]
|
|
240
|
+
counterfactual: str | None = None
|
|
241
|
+
confidence_breakdown: str | None = None
|
|
242
|
+
recommendations: list[str] = field(default_factory=list)
|
|
243
|
+
format: ExplanationFormat = ExplanationFormat.TEXT
|
|
244
|
+
language: str = "en"
|
|
245
|
+
|
|
246
|
+
def to_dict(self) -> dict[str, Any]:
|
|
247
|
+
"""Convert to dictionary."""
|
|
248
|
+
return {
|
|
249
|
+
"decision_id": self.decision_id,
|
|
250
|
+
"summary": self.summary,
|
|
251
|
+
"detailed": self.detailed,
|
|
252
|
+
"factors_explained": self.factors_explained,
|
|
253
|
+
"counterfactual": self.counterfactual,
|
|
254
|
+
"confidence_breakdown": self.confidence_breakdown,
|
|
255
|
+
"recommendations": self.recommendations,
|
|
256
|
+
"format": self.format.value,
|
|
257
|
+
"language": self.language,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# Default explanation templates
|
|
262
|
+
DEFAULT_TEMPLATES: dict[str, dict[str, str]] = {
|
|
263
|
+
"en": {
|
|
264
|
+
# Authorization
|
|
265
|
+
"auth_allowed": "Access ALLOWED: {reason}",
|
|
266
|
+
"auth_denied": "Access DENIED: {reason}",
|
|
267
|
+
"auth_factor_pass": "✓ {name}: {reason}",
|
|
268
|
+
"auth_factor_fail": "✗ {name}: {reason}",
|
|
269
|
+
|
|
270
|
+
# Rate limiting
|
|
271
|
+
"rate_allowed": "Request ALLOWED: Within rate limits ({current}/{limit} requests)",
|
|
272
|
+
"rate_denied": "Request DENIED: Rate limit exceeded ({current}/{limit} requests)",
|
|
273
|
+
"rate_counterfactual": "Request would be allowed after {wait_seconds} seconds",
|
|
274
|
+
|
|
275
|
+
# Guards
|
|
276
|
+
"guard_pass": "Content ALLOWED: No policy violations detected",
|
|
277
|
+
"guard_block": "Content BLOCKED: {violation_type} detected - {reason}",
|
|
278
|
+
"guard_redact": "Content MODIFIED: Sensitive information redacted",
|
|
279
|
+
|
|
280
|
+
# Circuit breaker
|
|
281
|
+
"circuit_closed": "Service AVAILABLE: Circuit breaker closed",
|
|
282
|
+
"circuit_open": "Service UNAVAILABLE: Circuit breaker open after {failures} failures",
|
|
283
|
+
"circuit_half_open": "Service TESTING: Circuit breaker allowing test request",
|
|
284
|
+
|
|
285
|
+
# Trust boundary
|
|
286
|
+
"trust_allowed": "Inter-agent communication ALLOWED: {from_agent} → {to_agent}",
|
|
287
|
+
"trust_denied": "Inter-agent communication DENIED: Trust level insufficient",
|
|
288
|
+
|
|
289
|
+
# Intent validation
|
|
290
|
+
"intent_valid": "Tool call ALLOWED: Consistent with original intent",
|
|
291
|
+
"intent_hijack": "Tool call BLOCKED: Potential intent hijack detected",
|
|
292
|
+
|
|
293
|
+
# Budget
|
|
294
|
+
"budget_ok": "Budget OK: {spent:.2f}/{limit:.2f} USD ({percentage:.0%})",
|
|
295
|
+
"budget_exceeded": "Budget EXCEEDED: {spent:.2f}/{limit:.2f} USD",
|
|
296
|
+
|
|
297
|
+
# Behavioral drift
|
|
298
|
+
"drift_normal": "Behavior NORMAL: Within baseline parameters",
|
|
299
|
+
"drift_detected": "Behavior ANOMALOUS: {metric} deviated by {deviation:.1f} std devs",
|
|
300
|
+
|
|
301
|
+
# Generic
|
|
302
|
+
"counterfactual_prefix": "Decision would change if: ",
|
|
303
|
+
"no_counterfactual": "No simple change would alter this decision",
|
|
304
|
+
"confidence_high": "High confidence ({confidence:.0%}): All factors clearly determined",
|
|
305
|
+
"confidence_medium": "Medium confidence ({confidence:.0%}): Some uncertainty in factors",
|
|
306
|
+
"confidence_low": "Low confidence ({confidence:.0%}): Significant uncertainty",
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class DecisionExplainer:
|
|
312
|
+
"""
|
|
313
|
+
Generates human-readable explanations for security decisions.
|
|
314
|
+
|
|
315
|
+
The explainer uses templates and decision factors to create
|
|
316
|
+
clear, actionable explanations suitable for end users,
|
|
317
|
+
compliance audits, and debugging.
|
|
318
|
+
|
|
319
|
+
Example:
|
|
320
|
+
>>> explainer = DecisionExplainer()
|
|
321
|
+
>>>
|
|
322
|
+
>>> decision = ExplainableDecision(
|
|
323
|
+
... decision_type=DecisionType.AUTHORIZATION,
|
|
324
|
+
... outcome=Outcome.DENIED,
|
|
325
|
+
... factors=[
|
|
326
|
+
... DecisionFactor("role", False, 0.5, "Missing admin role"),
|
|
327
|
+
... ],
|
|
328
|
+
... context={"user_id": "alice", "tool": "delete_user"},
|
|
329
|
+
... )
|
|
330
|
+
>>>
|
|
331
|
+
>>> explanation = explainer.explain(decision)
|
|
332
|
+
>>> print(explanation.summary)
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
def __init__(
|
|
336
|
+
self,
|
|
337
|
+
templates: dict[str, dict[str, str]] | None = None,
|
|
338
|
+
default_language: str = "en",
|
|
339
|
+
include_evidence: bool = True,
|
|
340
|
+
include_recommendations: bool = True,
|
|
341
|
+
) -> None:
|
|
342
|
+
"""
|
|
343
|
+
Initialize the explainer.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
templates: Custom explanation templates by language.
|
|
347
|
+
default_language: Default language for explanations.
|
|
348
|
+
include_evidence: Whether to include evidence in explanations.
|
|
349
|
+
include_recommendations: Whether to include recommendations.
|
|
350
|
+
"""
|
|
351
|
+
self._templates = dict(DEFAULT_TEMPLATES)
|
|
352
|
+
if templates:
|
|
353
|
+
for lang, tmpl in templates.items():
|
|
354
|
+
if lang in self._templates:
|
|
355
|
+
self._templates[lang].update(tmpl)
|
|
356
|
+
else:
|
|
357
|
+
self._templates[lang] = tmpl
|
|
358
|
+
|
|
359
|
+
self._default_language = default_language
|
|
360
|
+
self._include_evidence = include_evidence
|
|
361
|
+
self._include_recommendations = include_recommendations
|
|
362
|
+
|
|
363
|
+
# Custom explainers for specific decision types
|
|
364
|
+
self._custom_explainers: dict[str, Callable[[ExplainableDecision], Explanation]] = {}
|
|
365
|
+
|
|
366
|
+
def register_explainer(
|
|
367
|
+
self,
|
|
368
|
+
decision_type: DecisionType | str,
|
|
369
|
+
explainer: Callable[[ExplainableDecision], Explanation],
|
|
370
|
+
) -> None:
|
|
371
|
+
"""
|
|
372
|
+
Register a custom explainer for a decision type.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
decision_type: Decision type to handle.
|
|
376
|
+
explainer: Function that generates explanations.
|
|
377
|
+
"""
|
|
378
|
+
key = decision_type.value if isinstance(decision_type, DecisionType) else decision_type
|
|
379
|
+
self._custom_explainers[key] = explainer
|
|
380
|
+
|
|
381
|
+
def explain(
|
|
382
|
+
self,
|
|
383
|
+
decision: ExplainableDecision,
|
|
384
|
+
format: ExplanationFormat = ExplanationFormat.TEXT,
|
|
385
|
+
language: str | None = None,
|
|
386
|
+
) -> Explanation:
|
|
387
|
+
"""
|
|
388
|
+
Generate a human-readable explanation for a decision.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
decision: The decision to explain.
|
|
392
|
+
format: Desired output format.
|
|
393
|
+
language: Language for the explanation.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
Explanation with summary, details, and counterfactual.
|
|
397
|
+
"""
|
|
398
|
+
lang = language or self._default_language
|
|
399
|
+
templates = self._templates.get(lang, self._templates["en"])
|
|
400
|
+
|
|
401
|
+
# Check for custom explainer
|
|
402
|
+
decision_type_key = (
|
|
403
|
+
decision.decision_type.value
|
|
404
|
+
if isinstance(decision.decision_type, DecisionType)
|
|
405
|
+
else str(decision.decision_type)
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
if decision_type_key in self._custom_explainers:
|
|
409
|
+
return self._custom_explainers[decision_type_key](decision)
|
|
410
|
+
|
|
411
|
+
# Generate explanation based on decision type
|
|
412
|
+
summary = self._generate_summary(decision, templates)
|
|
413
|
+
detailed = self._generate_detailed(decision, templates)
|
|
414
|
+
factors_explained = self._explain_factors(decision, templates)
|
|
415
|
+
counterfactual = self._generate_counterfactual(decision, templates)
|
|
416
|
+
confidence_breakdown = self._explain_confidence(decision, templates)
|
|
417
|
+
recommendations = self._generate_recommendations(decision) if self._include_recommendations else []
|
|
418
|
+
|
|
419
|
+
# Format the output
|
|
420
|
+
if format == ExplanationFormat.MARKDOWN:
|
|
421
|
+
summary = f"**{summary}**"
|
|
422
|
+
detailed = self._to_markdown(detailed, factors_explained)
|
|
423
|
+
elif format == ExplanationFormat.HTML:
|
|
424
|
+
summary = f"<strong>{summary}</strong>"
|
|
425
|
+
detailed = self._to_html(detailed, factors_explained)
|
|
426
|
+
elif format == ExplanationFormat.LEGAL:
|
|
427
|
+
detailed = self._to_legal_format(decision, detailed, factors_explained)
|
|
428
|
+
|
|
429
|
+
return Explanation(
|
|
430
|
+
decision_id=decision.decision_id,
|
|
431
|
+
summary=summary,
|
|
432
|
+
detailed=detailed,
|
|
433
|
+
factors_explained=factors_explained,
|
|
434
|
+
counterfactual=counterfactual,
|
|
435
|
+
confidence_breakdown=confidence_breakdown,
|
|
436
|
+
recommendations=recommendations,
|
|
437
|
+
format=format,
|
|
438
|
+
language=lang,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
def _generate_summary(
|
|
442
|
+
self,
|
|
443
|
+
decision: ExplainableDecision,
|
|
444
|
+
templates: dict[str, str],
|
|
445
|
+
) -> str:
|
|
446
|
+
"""Generate one-line summary."""
|
|
447
|
+
dt = decision.decision_type
|
|
448
|
+
outcome = decision.outcome
|
|
449
|
+
context = decision.context
|
|
450
|
+
primary = decision.primary_factor
|
|
451
|
+
|
|
452
|
+
# Determine template key based on decision type and outcome
|
|
453
|
+
if dt == DecisionType.AUTHORIZATION or dt == "authorization":
|
|
454
|
+
if outcome in (Outcome.ALLOWED, "ALLOWED"):
|
|
455
|
+
template = templates.get("auth_allowed", "Access ALLOWED: {reason}")
|
|
456
|
+
else:
|
|
457
|
+
template = templates.get("auth_denied", "Access DENIED: {reason}")
|
|
458
|
+
reason = primary.reason if primary else "Policy evaluation"
|
|
459
|
+
return template.format(reason=reason, **context)
|
|
460
|
+
|
|
461
|
+
elif dt == DecisionType.RATE_LIMIT or dt == "rate_limit":
|
|
462
|
+
if outcome in (Outcome.ALLOWED, "ALLOWED"):
|
|
463
|
+
template = templates.get("rate_allowed", "Request allowed")
|
|
464
|
+
else:
|
|
465
|
+
template = templates.get("rate_denied", "Rate limit exceeded")
|
|
466
|
+
return template.format(**context)
|
|
467
|
+
|
|
468
|
+
elif dt in (DecisionType.INPUT_GUARD, DecisionType.OUTPUT_GUARD) or dt in ("input_guard", "output_guard"):
|
|
469
|
+
if outcome in (Outcome.ALLOWED, "ALLOWED"):
|
|
470
|
+
return templates.get("guard_pass", "Content allowed")
|
|
471
|
+
elif outcome in (Outcome.MODIFIED, "MODIFIED"):
|
|
472
|
+
return templates.get("guard_redact", "Content modified")
|
|
473
|
+
else:
|
|
474
|
+
violation = context.get("violation_type", "Policy violation")
|
|
475
|
+
reason = primary.reason if primary else "Security policy"
|
|
476
|
+
return templates.get("guard_block", "Content blocked: {reason}").format(
|
|
477
|
+
violation_type=violation, reason=reason
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
elif dt == DecisionType.CIRCUIT_BREAKER or dt == "circuit_breaker":
|
|
481
|
+
state = context.get("state", "unknown")
|
|
482
|
+
if state == "closed":
|
|
483
|
+
return templates.get("circuit_closed", "Service available")
|
|
484
|
+
elif state == "open":
|
|
485
|
+
failures = context.get("failures", 0)
|
|
486
|
+
return templates.get("circuit_open", "Service unavailable").format(failures=failures)
|
|
487
|
+
else:
|
|
488
|
+
return templates.get("circuit_half_open", "Service testing")
|
|
489
|
+
|
|
490
|
+
elif dt == DecisionType.INTENT_VALIDATION or dt == "intent_validation":
|
|
491
|
+
if outcome in (Outcome.ALLOWED, "ALLOWED"):
|
|
492
|
+
return templates.get("intent_valid", "Tool call consistent with intent")
|
|
493
|
+
else:
|
|
494
|
+
return templates.get("intent_hijack", "Potential intent hijack detected")
|
|
495
|
+
|
|
496
|
+
elif dt == DecisionType.BUDGET or dt == "budget":
|
|
497
|
+
spent = context.get("spent", 0)
|
|
498
|
+
limit = context.get("limit", 0)
|
|
499
|
+
percentage = spent / limit if limit > 0 else 0
|
|
500
|
+
if outcome in (Outcome.ALLOWED, "ALLOWED"):
|
|
501
|
+
return templates.get("budget_ok", "Within budget").format(
|
|
502
|
+
spent=spent, limit=limit, percentage=percentage
|
|
503
|
+
)
|
|
504
|
+
else:
|
|
505
|
+
return templates.get("budget_exceeded", "Budget exceeded").format(
|
|
506
|
+
spent=spent, limit=limit
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
elif dt == DecisionType.BEHAVIORAL_DRIFT or dt == "behavioral_drift":
|
|
510
|
+
if outcome in (Outcome.ALLOWED, "ALLOWED"):
|
|
511
|
+
return templates.get("drift_normal", "Behavior within normal range")
|
|
512
|
+
else:
|
|
513
|
+
metric = context.get("metric", "unknown")
|
|
514
|
+
deviation = context.get("deviation", 0)
|
|
515
|
+
return templates.get("drift_detected", "Behavioral anomaly").format(
|
|
516
|
+
metric=metric, deviation=deviation
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# Default summary
|
|
520
|
+
outcome_str = outcome.value if isinstance(outcome, Outcome) else str(outcome)
|
|
521
|
+
dt_str = dt.value if isinstance(dt, DecisionType) else str(dt)
|
|
522
|
+
reason = primary.reason if primary else "Policy decision"
|
|
523
|
+
return f"{dt_str.title()} {outcome_str}: {reason}"
|
|
524
|
+
|
|
525
|
+
def _generate_detailed(
|
|
526
|
+
self,
|
|
527
|
+
decision: ExplainableDecision,
|
|
528
|
+
templates: dict[str, str],
|
|
529
|
+
) -> str:
|
|
530
|
+
"""Generate detailed multi-paragraph explanation."""
|
|
531
|
+
lines = []
|
|
532
|
+
|
|
533
|
+
# Opening paragraph
|
|
534
|
+
dt_str = (
|
|
535
|
+
decision.decision_type.value
|
|
536
|
+
if isinstance(decision.decision_type, DecisionType)
|
|
537
|
+
else str(decision.decision_type)
|
|
538
|
+
)
|
|
539
|
+
outcome_str = (
|
|
540
|
+
decision.outcome.value
|
|
541
|
+
if isinstance(decision.outcome, Outcome)
|
|
542
|
+
else str(decision.outcome)
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
lines.append(
|
|
546
|
+
f"A {dt_str.replace('_', ' ')} decision was made at "
|
|
547
|
+
f"{decision.timestamp.strftime('%Y-%m-%d %H:%M:%S UTC')}."
|
|
548
|
+
)
|
|
549
|
+
lines.append(f"The final outcome was: {outcome_str}.")
|
|
550
|
+
lines.append("")
|
|
551
|
+
|
|
552
|
+
# Context paragraph
|
|
553
|
+
if decision.context:
|
|
554
|
+
context_items = []
|
|
555
|
+
for key, value in decision.context.items():
|
|
556
|
+
if key not in ("_internal", "raw"):
|
|
557
|
+
context_items.append(f"- {key.replace('_', ' ').title()}: {value}")
|
|
558
|
+
if context_items:
|
|
559
|
+
lines.append("Context:")
|
|
560
|
+
lines.extend(context_items)
|
|
561
|
+
lines.append("")
|
|
562
|
+
|
|
563
|
+
# Factors paragraph
|
|
564
|
+
if decision.factors:
|
|
565
|
+
lines.append("The following factors were evaluated:")
|
|
566
|
+
for factor in decision.factors:
|
|
567
|
+
status = "✓ PASSED" if factor.passed else "✗ FAILED"
|
|
568
|
+
lines.append(f"- {factor.name}: {status} (weight: {factor.weight:.0%})")
|
|
569
|
+
lines.append(f" Reason: {factor.reason}")
|
|
570
|
+
if self._include_evidence and factor.evidence:
|
|
571
|
+
for ev in factor.evidence:
|
|
572
|
+
lines.append(f" Evidence: {ev}")
|
|
573
|
+
lines.append("")
|
|
574
|
+
|
|
575
|
+
# Confidence paragraph
|
|
576
|
+
lines.append(f"Decision confidence: {decision.confidence:.0%}")
|
|
577
|
+
lines.append(f"Processing time: {decision.latency_ms:.2f}ms")
|
|
578
|
+
lines.append(f"Policy version: {decision.policy_version}")
|
|
579
|
+
|
|
580
|
+
return "\n".join(lines)
|
|
581
|
+
|
|
582
|
+
def _explain_factors(
|
|
583
|
+
self,
|
|
584
|
+
decision: ExplainableDecision,
|
|
585
|
+
templates: dict[str, str],
|
|
586
|
+
) -> list[str]:
|
|
587
|
+
"""Generate per-factor explanations."""
|
|
588
|
+
explanations = []
|
|
589
|
+
|
|
590
|
+
for factor in decision.factors:
|
|
591
|
+
if factor.passed:
|
|
592
|
+
template = templates.get("auth_factor_pass", "✓ {name}: {reason}")
|
|
593
|
+
else:
|
|
594
|
+
template = templates.get("auth_factor_fail", "✗ {name}: {reason}")
|
|
595
|
+
|
|
596
|
+
explanations.append(template.format(name=factor.name, reason=factor.reason))
|
|
597
|
+
|
|
598
|
+
return explanations
|
|
599
|
+
|
|
600
|
+
def _generate_counterfactual(
|
|
601
|
+
self,
|
|
602
|
+
decision: ExplainableDecision,
|
|
603
|
+
templates: dict[str, str],
|
|
604
|
+
) -> str | None:
|
|
605
|
+
"""Generate counterfactual explanation."""
|
|
606
|
+
if not decision.factors:
|
|
607
|
+
return None
|
|
608
|
+
|
|
609
|
+
prefix = templates.get("counterfactual_prefix", "Decision would change if: ")
|
|
610
|
+
|
|
611
|
+
if decision.passed:
|
|
612
|
+
# What would cause denial?
|
|
613
|
+
passing_factors = [f for f in decision.factors if f.passed]
|
|
614
|
+
if passing_factors:
|
|
615
|
+
critical = max(passing_factors, key=lambda f: f.weight)
|
|
616
|
+
return f"{prefix}{critical.name} check failed"
|
|
617
|
+
else:
|
|
618
|
+
# What would cause approval?
|
|
619
|
+
failing_factors = [f for f in decision.factors if not f.passed]
|
|
620
|
+
if failing_factors:
|
|
621
|
+
changes = []
|
|
622
|
+
for f in failing_factors:
|
|
623
|
+
# Generate specific counterfactual based on factor name
|
|
624
|
+
if "role" in f.name.lower():
|
|
625
|
+
changes.append(f"User had the required role")
|
|
626
|
+
elif "rate" in f.name.lower():
|
|
627
|
+
changes.append(f"Request was within rate limits")
|
|
628
|
+
elif "budget" in f.name.lower():
|
|
629
|
+
changes.append(f"Budget was not exceeded")
|
|
630
|
+
elif "trust" in f.name.lower():
|
|
631
|
+
changes.append(f"Trust level was sufficient")
|
|
632
|
+
else:
|
|
633
|
+
changes.append(f"{f.name} check passed")
|
|
634
|
+
|
|
635
|
+
if changes:
|
|
636
|
+
return prefix + "; ".join(changes)
|
|
637
|
+
|
|
638
|
+
return templates.get("no_counterfactual", "No simple change would alter this decision")
|
|
639
|
+
|
|
640
|
+
def _explain_confidence(
|
|
641
|
+
self,
|
|
642
|
+
decision: ExplainableDecision,
|
|
643
|
+
templates: dict[str, str],
|
|
644
|
+
) -> str:
|
|
645
|
+
"""Explain the confidence score."""
|
|
646
|
+
conf = decision.confidence
|
|
647
|
+
|
|
648
|
+
if conf >= 0.9:
|
|
649
|
+
template = templates.get("confidence_high", "High confidence ({confidence:.0%})")
|
|
650
|
+
elif conf >= 0.7:
|
|
651
|
+
template = templates.get("confidence_medium", "Medium confidence ({confidence:.0%})")
|
|
652
|
+
else:
|
|
653
|
+
template = templates.get("confidence_low", "Low confidence ({confidence:.0%})")
|
|
654
|
+
|
|
655
|
+
return template.format(confidence=conf)
|
|
656
|
+
|
|
657
|
+
def _generate_recommendations(self, decision: ExplainableDecision) -> list[str]:
|
|
658
|
+
"""Generate actionable recommendations."""
|
|
659
|
+
recommendations = []
|
|
660
|
+
|
|
661
|
+
if not decision.passed:
|
|
662
|
+
failing = [f for f in decision.factors if not f.passed]
|
|
663
|
+
for factor in failing:
|
|
664
|
+
name = factor.name.lower()
|
|
665
|
+
|
|
666
|
+
if "role" in name or "permission" in name:
|
|
667
|
+
recommendations.append(
|
|
668
|
+
"Contact your administrator to request the necessary permissions"
|
|
669
|
+
)
|
|
670
|
+
elif "rate" in name:
|
|
671
|
+
recommendations.append(
|
|
672
|
+
"Wait before retrying, or contact support for rate limit increase"
|
|
673
|
+
)
|
|
674
|
+
elif "budget" in name:
|
|
675
|
+
recommendations.append(
|
|
676
|
+
"Review your usage or request a budget increase from your admin"
|
|
677
|
+
)
|
|
678
|
+
elif "trust" in name:
|
|
679
|
+
recommendations.append(
|
|
680
|
+
"Ensure proper agent authentication and delegation chains"
|
|
681
|
+
)
|
|
682
|
+
elif "intent" in name:
|
|
683
|
+
recommendations.append(
|
|
684
|
+
"Verify the tool call matches the original user request"
|
|
685
|
+
)
|
|
686
|
+
elif "circuit" in name:
|
|
687
|
+
recommendations.append(
|
|
688
|
+
"The service may be experiencing issues; retry later"
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
# Remove duplicates while preserving order
|
|
692
|
+
seen = set()
|
|
693
|
+
unique = []
|
|
694
|
+
for rec in recommendations:
|
|
695
|
+
if rec not in seen:
|
|
696
|
+
seen.add(rec)
|
|
697
|
+
unique.append(rec)
|
|
698
|
+
|
|
699
|
+
return unique[:3] # Limit to top 3 recommendations
|
|
700
|
+
|
|
701
|
+
def _to_markdown(self, detailed: str, factors: list[str]) -> str:
|
|
702
|
+
"""Convert explanation to Markdown format."""
|
|
703
|
+
lines = detailed.split("\n")
|
|
704
|
+
md_lines = []
|
|
705
|
+
|
|
706
|
+
for line in lines:
|
|
707
|
+
if line.endswith(":"):
|
|
708
|
+
md_lines.append(f"\n### {line}\n")
|
|
709
|
+
elif line.startswith("- "):
|
|
710
|
+
md_lines.append(line)
|
|
711
|
+
elif line.startswith(" "):
|
|
712
|
+
md_lines.append(f" {line.strip()}")
|
|
713
|
+
else:
|
|
714
|
+
md_lines.append(line)
|
|
715
|
+
|
|
716
|
+
return "\n".join(md_lines)
|
|
717
|
+
|
|
718
|
+
def _to_html(self, detailed: str, factors: list[str]) -> str:
|
|
719
|
+
"""Convert explanation to HTML format."""
|
|
720
|
+
# Simple HTML conversion
|
|
721
|
+
html = detailed.replace("\n\n", "</p><p>")
|
|
722
|
+
html = html.replace("\n", "<br>")
|
|
723
|
+
html = f"<div class='explanation'><p>{html}</p></div>"
|
|
724
|
+
return html
|
|
725
|
+
|
|
726
|
+
def _to_legal_format(
|
|
727
|
+
self,
|
|
728
|
+
decision: ExplainableDecision,
|
|
729
|
+
detailed: str,
|
|
730
|
+
factors: list[str],
|
|
731
|
+
) -> str:
|
|
732
|
+
"""Convert to legal/compliance format (CA SB 53 style)."""
|
|
733
|
+
lines = [
|
|
734
|
+
"=" * 60,
|
|
735
|
+
"AUTOMATED DECISION DISCLOSURE",
|
|
736
|
+
"(Per California SB 53 - AI Transparency Requirements)",
|
|
737
|
+
"=" * 60,
|
|
738
|
+
"",
|
|
739
|
+
f"Decision ID: {decision.decision_id}",
|
|
740
|
+
f"Timestamp: {decision.timestamp.isoformat()}",
|
|
741
|
+
f"Decision Type: {decision.decision_type}",
|
|
742
|
+
f"Outcome: {decision.outcome}",
|
|
743
|
+
"",
|
|
744
|
+
"FACTORS CONSIDERED:",
|
|
745
|
+
"-" * 40,
|
|
746
|
+
]
|
|
747
|
+
|
|
748
|
+
for i, factor in enumerate(decision.factors, 1):
|
|
749
|
+
lines.append(f"{i}. {factor.name}")
|
|
750
|
+
lines.append(f" Result: {'PASSED' if factor.passed else 'FAILED'}")
|
|
751
|
+
lines.append(f" Weight: {factor.weight:.0%}")
|
|
752
|
+
lines.append(f" Explanation: {factor.reason}")
|
|
753
|
+
if factor.evidence:
|
|
754
|
+
lines.append(f" Evidence: {'; '.join(factor.evidence)}")
|
|
755
|
+
lines.append("")
|
|
756
|
+
|
|
757
|
+
lines.extend([
|
|
758
|
+
"-" * 40,
|
|
759
|
+
f"Confidence Level: {decision.confidence:.0%}",
|
|
760
|
+
f"Policy Version: {decision.policy_version}",
|
|
761
|
+
"",
|
|
762
|
+
"This decision was made by an automated system. For questions",
|
|
763
|
+
"or to request human review, contact your administrator.",
|
|
764
|
+
"=" * 60,
|
|
765
|
+
])
|
|
766
|
+
|
|
767
|
+
return "\n".join(lines)
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
class ExplainabilityLogger:
|
|
771
|
+
"""
|
|
772
|
+
Logs explainable decisions for audit and compliance.
|
|
773
|
+
|
|
774
|
+
Integrates with the main AuditLogger to provide a complete
|
|
775
|
+
record of all security decisions with full explanations.
|
|
776
|
+
|
|
777
|
+
Example:
|
|
778
|
+
>>> from proxilion.audit import AuditLogger
|
|
779
|
+
>>> from proxilion.audit.explainability import ExplainabilityLogger
|
|
780
|
+
>>>
|
|
781
|
+
>>> audit_logger = AuditLogger(config)
|
|
782
|
+
>>> explainability_logger = ExplainabilityLogger(audit_logger)
|
|
783
|
+
>>>
|
|
784
|
+
>>> # Log an explained decision
|
|
785
|
+
>>> decision = ExplainableDecision(...)
|
|
786
|
+
>>> explainability_logger.log_decision(decision)
|
|
787
|
+
"""
|
|
788
|
+
|
|
789
|
+
def __init__(
|
|
790
|
+
self,
|
|
791
|
+
audit_logger: Any | None = None,
|
|
792
|
+
explainer: DecisionExplainer | None = None,
|
|
793
|
+
auto_explain: bool = True,
|
|
794
|
+
store_explanations: bool = True,
|
|
795
|
+
max_stored: int = 10000,
|
|
796
|
+
) -> None:
|
|
797
|
+
"""
|
|
798
|
+
Initialize the explainability logger.
|
|
799
|
+
|
|
800
|
+
Args:
|
|
801
|
+
audit_logger: Optional AuditLogger instance for integration.
|
|
802
|
+
explainer: DecisionExplainer to use (creates default if None).
|
|
803
|
+
auto_explain: Whether to auto-generate explanations.
|
|
804
|
+
store_explanations: Whether to store explanations in memory.
|
|
805
|
+
max_stored: Maximum explanations to store.
|
|
806
|
+
"""
|
|
807
|
+
self._audit_logger = audit_logger
|
|
808
|
+
self._explainer = explainer or DecisionExplainer()
|
|
809
|
+
self._auto_explain = auto_explain
|
|
810
|
+
self._store_explanations = store_explanations
|
|
811
|
+
self._max_stored = max_stored
|
|
812
|
+
|
|
813
|
+
self._lock = threading.RLock()
|
|
814
|
+
self._decisions: list[ExplainableDecision] = []
|
|
815
|
+
self._explanations: dict[str, Explanation] = {}
|
|
816
|
+
|
|
817
|
+
def log_decision(
|
|
818
|
+
self,
|
|
819
|
+
decision: ExplainableDecision,
|
|
820
|
+
format: ExplanationFormat = ExplanationFormat.TEXT,
|
|
821
|
+
) -> Explanation | None:
|
|
822
|
+
"""
|
|
823
|
+
Log a decision and optionally generate explanation.
|
|
824
|
+
|
|
825
|
+
Args:
|
|
826
|
+
decision: The decision to log.
|
|
827
|
+
format: Format for the explanation.
|
|
828
|
+
|
|
829
|
+
Returns:
|
|
830
|
+
Explanation if auto_explain is enabled.
|
|
831
|
+
"""
|
|
832
|
+
explanation = None
|
|
833
|
+
|
|
834
|
+
if self._auto_explain:
|
|
835
|
+
explanation = self._explainer.explain(decision, format=format)
|
|
836
|
+
|
|
837
|
+
with self._lock:
|
|
838
|
+
# Store decision
|
|
839
|
+
self._decisions.append(decision)
|
|
840
|
+
if len(self._decisions) > self._max_stored:
|
|
841
|
+
self._decisions = self._decisions[-self._max_stored:]
|
|
842
|
+
|
|
843
|
+
# Store explanation
|
|
844
|
+
if explanation and self._store_explanations:
|
|
845
|
+
self._explanations[decision.decision_id] = explanation
|
|
846
|
+
if len(self._explanations) > self._max_stored:
|
|
847
|
+
# Remove oldest
|
|
848
|
+
oldest = list(self._explanations.keys())[:100]
|
|
849
|
+
for key in oldest:
|
|
850
|
+
del self._explanations[key]
|
|
851
|
+
|
|
852
|
+
# Log to audit logger if available
|
|
853
|
+
if self._audit_logger is not None:
|
|
854
|
+
try:
|
|
855
|
+
self._audit_logger.log_custom(
|
|
856
|
+
event_type="explainable_decision",
|
|
857
|
+
data={
|
|
858
|
+
"decision": decision.to_dict(),
|
|
859
|
+
"explanation": explanation.to_dict() if explanation else None,
|
|
860
|
+
},
|
|
861
|
+
)
|
|
862
|
+
except Exception as e:
|
|
863
|
+
logger.warning(f"Failed to log to audit logger: {e}")
|
|
864
|
+
|
|
865
|
+
logger.debug(
|
|
866
|
+
f"Logged explainable decision: {decision.decision_id} "
|
|
867
|
+
f"({decision.decision_type} -> {decision.outcome})"
|
|
868
|
+
)
|
|
869
|
+
|
|
870
|
+
return explanation
|
|
871
|
+
|
|
872
|
+
def get_decision(self, decision_id: str) -> ExplainableDecision | None:
|
|
873
|
+
"""Get a decision by ID."""
|
|
874
|
+
with self._lock:
|
|
875
|
+
for decision in reversed(self._decisions):
|
|
876
|
+
if decision.decision_id == decision_id:
|
|
877
|
+
return decision
|
|
878
|
+
return None
|
|
879
|
+
|
|
880
|
+
def get_explanation(self, decision_id: str) -> Explanation | None:
|
|
881
|
+
"""Get an explanation by decision ID."""
|
|
882
|
+
with self._lock:
|
|
883
|
+
return self._explanations.get(decision_id)
|
|
884
|
+
|
|
885
|
+
def explain(
|
|
886
|
+
self,
|
|
887
|
+
decision_id: str,
|
|
888
|
+
format: ExplanationFormat = ExplanationFormat.TEXT,
|
|
889
|
+
) -> Explanation | None:
|
|
890
|
+
"""
|
|
891
|
+
Get or generate explanation for a decision.
|
|
892
|
+
|
|
893
|
+
Args:
|
|
894
|
+
decision_id: ID of the decision to explain.
|
|
895
|
+
format: Desired output format.
|
|
896
|
+
|
|
897
|
+
Returns:
|
|
898
|
+
Explanation or None if decision not found.
|
|
899
|
+
"""
|
|
900
|
+
# Check if already explained
|
|
901
|
+
with self._lock:
|
|
902
|
+
if decision_id in self._explanations:
|
|
903
|
+
return self._explanations[decision_id]
|
|
904
|
+
|
|
905
|
+
# Find and explain decision
|
|
906
|
+
decision = self.get_decision(decision_id)
|
|
907
|
+
if decision is None:
|
|
908
|
+
return None
|
|
909
|
+
|
|
910
|
+
explanation = self._explainer.explain(decision, format=format)
|
|
911
|
+
|
|
912
|
+
with self._lock:
|
|
913
|
+
self._explanations[decision_id] = explanation
|
|
914
|
+
|
|
915
|
+
return explanation
|
|
916
|
+
|
|
917
|
+
def get_decisions(
|
|
918
|
+
self,
|
|
919
|
+
decision_type: DecisionType | str | None = None,
|
|
920
|
+
outcome: Outcome | str | None = None,
|
|
921
|
+
user_id: str | None = None,
|
|
922
|
+
limit: int = 100,
|
|
923
|
+
) -> list[ExplainableDecision]:
|
|
924
|
+
"""
|
|
925
|
+
Get decisions with optional filters.
|
|
926
|
+
|
|
927
|
+
Args:
|
|
928
|
+
decision_type: Filter by decision type.
|
|
929
|
+
outcome: Filter by outcome.
|
|
930
|
+
user_id: Filter by user ID in context.
|
|
931
|
+
limit: Maximum decisions to return.
|
|
932
|
+
|
|
933
|
+
Returns:
|
|
934
|
+
List of matching decisions.
|
|
935
|
+
"""
|
|
936
|
+
with self._lock:
|
|
937
|
+
results = []
|
|
938
|
+
|
|
939
|
+
for decision in reversed(self._decisions):
|
|
940
|
+
# Apply filters
|
|
941
|
+
if decision_type is not None:
|
|
942
|
+
dt = decision.decision_type
|
|
943
|
+
dt_str = dt.value if isinstance(dt, DecisionType) else str(dt)
|
|
944
|
+
filter_str = (
|
|
945
|
+
decision_type.value
|
|
946
|
+
if isinstance(decision_type, DecisionType)
|
|
947
|
+
else str(decision_type)
|
|
948
|
+
)
|
|
949
|
+
if dt_str != filter_str:
|
|
950
|
+
continue
|
|
951
|
+
|
|
952
|
+
if outcome is not None:
|
|
953
|
+
oc = decision.outcome
|
|
954
|
+
oc_str = oc.value if isinstance(oc, Outcome) else str(oc)
|
|
955
|
+
filter_str = (
|
|
956
|
+
outcome.value
|
|
957
|
+
if isinstance(outcome, Outcome)
|
|
958
|
+
else str(outcome)
|
|
959
|
+
)
|
|
960
|
+
if oc_str != filter_str:
|
|
961
|
+
continue
|
|
962
|
+
|
|
963
|
+
if user_id is not None:
|
|
964
|
+
if decision.context.get("user_id") != user_id:
|
|
965
|
+
continue
|
|
966
|
+
|
|
967
|
+
results.append(decision)
|
|
968
|
+
|
|
969
|
+
if len(results) >= limit:
|
|
970
|
+
break
|
|
971
|
+
|
|
972
|
+
return results
|
|
973
|
+
|
|
974
|
+
def export_decisions(
|
|
975
|
+
self,
|
|
976
|
+
format: str = "json",
|
|
977
|
+
include_explanations: bool = True,
|
|
978
|
+
) -> str:
|
|
979
|
+
"""
|
|
980
|
+
Export all decisions and explanations.
|
|
981
|
+
|
|
982
|
+
Args:
|
|
983
|
+
format: Output format ("json" or "jsonl").
|
|
984
|
+
include_explanations: Whether to include explanations.
|
|
985
|
+
|
|
986
|
+
Returns:
|
|
987
|
+
Exported data as string.
|
|
988
|
+
"""
|
|
989
|
+
with self._lock:
|
|
990
|
+
records = []
|
|
991
|
+
|
|
992
|
+
for decision in self._decisions:
|
|
993
|
+
record = decision.to_dict()
|
|
994
|
+
|
|
995
|
+
if include_explanations and decision.decision_id in self._explanations:
|
|
996
|
+
record["explanation"] = self._explanations[decision.decision_id].to_dict()
|
|
997
|
+
|
|
998
|
+
records.append(record)
|
|
999
|
+
|
|
1000
|
+
if format == "jsonl":
|
|
1001
|
+
return "\n".join(json.dumps(r) for r in records)
|
|
1002
|
+
else:
|
|
1003
|
+
return json.dumps(records, indent=2)
|
|
1004
|
+
|
|
1005
|
+
def clear(self) -> int:
|
|
1006
|
+
"""Clear all stored decisions and explanations."""
|
|
1007
|
+
with self._lock:
|
|
1008
|
+
count = len(self._decisions)
|
|
1009
|
+
self._decisions.clear()
|
|
1010
|
+
self._explanations.clear()
|
|
1011
|
+
return count
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
# Convenience functions
|
|
1015
|
+
|
|
1016
|
+
def create_authorization_decision(
|
|
1017
|
+
user_id: str,
|
|
1018
|
+
tool_name: str,
|
|
1019
|
+
allowed: bool,
|
|
1020
|
+
factors: list[DecisionFactor],
|
|
1021
|
+
context: dict[str, Any] | None = None,
|
|
1022
|
+
) -> ExplainableDecision:
|
|
1023
|
+
"""
|
|
1024
|
+
Create an explainable authorization decision.
|
|
1025
|
+
|
|
1026
|
+
Args:
|
|
1027
|
+
user_id: User making the request.
|
|
1028
|
+
tool_name: Tool being accessed.
|
|
1029
|
+
allowed: Whether access was granted.
|
|
1030
|
+
factors: Factors that contributed to the decision.
|
|
1031
|
+
context: Additional context.
|
|
1032
|
+
|
|
1033
|
+
Returns:
|
|
1034
|
+
ExplainableDecision ready for logging.
|
|
1035
|
+
"""
|
|
1036
|
+
ctx = context or {}
|
|
1037
|
+
ctx["user_id"] = user_id
|
|
1038
|
+
ctx["tool_name"] = tool_name
|
|
1039
|
+
|
|
1040
|
+
return ExplainableDecision(
|
|
1041
|
+
decision_type=DecisionType.AUTHORIZATION,
|
|
1042
|
+
outcome=Outcome.ALLOWED if allowed else Outcome.DENIED,
|
|
1043
|
+
factors=factors,
|
|
1044
|
+
context=ctx,
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
|
|
1048
|
+
def create_guard_decision(
|
|
1049
|
+
guard_type: str,
|
|
1050
|
+
passed: bool,
|
|
1051
|
+
factors: list[DecisionFactor],
|
|
1052
|
+
content_sample: str | None = None,
|
|
1053
|
+
modified: bool = False,
|
|
1054
|
+
) -> ExplainableDecision:
|
|
1055
|
+
"""
|
|
1056
|
+
Create an explainable guard decision.
|
|
1057
|
+
|
|
1058
|
+
Args:
|
|
1059
|
+
guard_type: Type of guard ("input" or "output").
|
|
1060
|
+
passed: Whether content passed the guard.
|
|
1061
|
+
factors: Factors that contributed to the decision.
|
|
1062
|
+
content_sample: Sample of the content (truncated).
|
|
1063
|
+
modified: Whether content was modified (redacted).
|
|
1064
|
+
|
|
1065
|
+
Returns:
|
|
1066
|
+
ExplainableDecision ready for logging.
|
|
1067
|
+
"""
|
|
1068
|
+
if modified:
|
|
1069
|
+
outcome = Outcome.MODIFIED
|
|
1070
|
+
elif passed:
|
|
1071
|
+
outcome = Outcome.ALLOWED
|
|
1072
|
+
else:
|
|
1073
|
+
outcome = Outcome.DENIED
|
|
1074
|
+
|
|
1075
|
+
decision_type = (
|
|
1076
|
+
DecisionType.INPUT_GUARD if guard_type == "input"
|
|
1077
|
+
else DecisionType.OUTPUT_GUARD
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
context = {"guard_type": guard_type}
|
|
1081
|
+
if content_sample:
|
|
1082
|
+
# Truncate and sanitize
|
|
1083
|
+
context["content_preview"] = content_sample[:100] + "..." if len(content_sample) > 100 else content_sample
|
|
1084
|
+
|
|
1085
|
+
return ExplainableDecision(
|
|
1086
|
+
decision_type=decision_type,
|
|
1087
|
+
outcome=outcome,
|
|
1088
|
+
factors=factors,
|
|
1089
|
+
context=context,
|
|
1090
|
+
)
|
|
1091
|
+
|
|
1092
|
+
|
|
1093
|
+
def create_rate_limit_decision(
|
|
1094
|
+
user_id: str,
|
|
1095
|
+
allowed: bool,
|
|
1096
|
+
current_count: int,
|
|
1097
|
+
limit: int,
|
|
1098
|
+
window_seconds: int,
|
|
1099
|
+
) -> ExplainableDecision:
|
|
1100
|
+
"""
|
|
1101
|
+
Create an explainable rate limit decision.
|
|
1102
|
+
|
|
1103
|
+
Args:
|
|
1104
|
+
user_id: User being rate limited.
|
|
1105
|
+
allowed: Whether request was allowed.
|
|
1106
|
+
current_count: Current request count in window.
|
|
1107
|
+
limit: Maximum requests allowed.
|
|
1108
|
+
window_seconds: Window size in seconds.
|
|
1109
|
+
|
|
1110
|
+
Returns:
|
|
1111
|
+
ExplainableDecision ready for logging.
|
|
1112
|
+
"""
|
|
1113
|
+
return ExplainableDecision(
|
|
1114
|
+
decision_type=DecisionType.RATE_LIMIT,
|
|
1115
|
+
outcome=Outcome.ALLOWED if allowed else Outcome.DENIED,
|
|
1116
|
+
factors=[
|
|
1117
|
+
DecisionFactor(
|
|
1118
|
+
name="request_count",
|
|
1119
|
+
passed=current_count <= limit,
|
|
1120
|
+
weight=1.0,
|
|
1121
|
+
reason=f"{current_count}/{limit} requests in {window_seconds}s window",
|
|
1122
|
+
)
|
|
1123
|
+
],
|
|
1124
|
+
context={
|
|
1125
|
+
"user_id": user_id,
|
|
1126
|
+
"current": current_count,
|
|
1127
|
+
"limit": limit,
|
|
1128
|
+
"window_seconds": window_seconds,
|
|
1129
|
+
},
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
def create_budget_decision(
|
|
1134
|
+
user_id: str,
|
|
1135
|
+
allowed: bool,
|
|
1136
|
+
spent: float,
|
|
1137
|
+
limit: float,
|
|
1138
|
+
period: str = "daily",
|
|
1139
|
+
) -> ExplainableDecision:
|
|
1140
|
+
"""
|
|
1141
|
+
Create an explainable budget decision.
|
|
1142
|
+
|
|
1143
|
+
Args:
|
|
1144
|
+
user_id: User being budget-checked.
|
|
1145
|
+
allowed: Whether within budget.
|
|
1146
|
+
spent: Amount spent.
|
|
1147
|
+
limit: Budget limit.
|
|
1148
|
+
period: Budget period ("hourly", "daily", "monthly").
|
|
1149
|
+
|
|
1150
|
+
Returns:
|
|
1151
|
+
ExplainableDecision ready for logging.
|
|
1152
|
+
"""
|
|
1153
|
+
percentage = spent / limit if limit > 0 else 0
|
|
1154
|
+
|
|
1155
|
+
return ExplainableDecision(
|
|
1156
|
+
decision_type=DecisionType.BUDGET,
|
|
1157
|
+
outcome=Outcome.ALLOWED if allowed else Outcome.DENIED,
|
|
1158
|
+
factors=[
|
|
1159
|
+
DecisionFactor(
|
|
1160
|
+
name=f"{period}_budget",
|
|
1161
|
+
passed=spent <= limit,
|
|
1162
|
+
weight=1.0,
|
|
1163
|
+
reason=f"${spent:.2f}/${limit:.2f} ({percentage:.0%}) {period} budget used",
|
|
1164
|
+
)
|
|
1165
|
+
],
|
|
1166
|
+
context={
|
|
1167
|
+
"user_id": user_id,
|
|
1168
|
+
"spent": spent,
|
|
1169
|
+
"limit": limit,
|
|
1170
|
+
"period": period,
|
|
1171
|
+
"percentage": percentage,
|
|
1172
|
+
},
|
|
1173
|
+
)
|