drift-detection 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- drift/__init__.py +28 -0
- drift/callbacks/__init__.py +1 -0
- drift/callbacks/langchain.py +243 -0
- drift/core.py +190 -0
- drift/detectors/__init__.py +34 -0
- drift/detectors/latency.py +148 -0
- drift/detectors/output_drift.py +213 -0
- drift/detectors/sequence.py +153 -0
- drift/models.py +101 -0
- drift_detection-0.1.0.dist-info/METADATA +181 -0
- drift_detection-0.1.0.dist-info/RECORD +13 -0
- drift_detection-0.1.0.dist-info/WHEEL +4 -0
- drift_detection-0.1.0.dist-info/licenses/LICENSE +21 -0
drift/__init__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Drift — Statistical anomaly detection for AI agent workflows.
|
|
2
|
+
|
|
3
|
+
Catch silent failures, hallucination drift, and off-script behavior
|
|
4
|
+
in your LangChain, CrewAI, and custom AI agents.
|
|
5
|
+
|
|
6
|
+
Quickstart:
|
|
7
|
+
from drift import DriftGuard
|
|
8
|
+
from drift.callbacks.langchain import DriftCallbackHandler
|
|
9
|
+
|
|
10
|
+
guard = DriftGuard()
|
|
11
|
+
handler = DriftCallbackHandler(guard)
|
|
12
|
+
agent.run("your query", callbacks=[handler])
|
|
13
|
+
guard.report()
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from drift.core import DriftGuard
|
|
17
|
+
from drift.models import AgentEvent, Anomaly, AnomalyType, EventType, Severity
|
|
18
|
+
|
|
19
|
+
__version__ = "0.1.0"
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"DriftGuard",
|
|
23
|
+
"AgentEvent",
|
|
24
|
+
"Anomaly",
|
|
25
|
+
"AnomalyType",
|
|
26
|
+
"EventType",
|
|
27
|
+
"Severity",
|
|
28
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Framework callback integrations for Drift."""
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""LangChain callback handler for Drift.
|
|
2
|
+
|
|
3
|
+
Integrates with LangChain's callback system to automatically capture
|
|
4
|
+
agent events and feed them into the DriftGuard detection engine.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from drift import DriftGuard
|
|
8
|
+
from drift.callbacks.langchain import DriftCallbackHandler
|
|
9
|
+
|
|
10
|
+
guard = DriftGuard()
|
|
11
|
+
handler = DriftCallbackHandler(guard)
|
|
12
|
+
|
|
13
|
+
# Use with any LangChain agent/chain:
|
|
14
|
+
agent.run("your query", callbacks=[handler])
|
|
15
|
+
|
|
16
|
+
# Check results:
|
|
17
|
+
guard.report()
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import time
|
|
23
|
+
from typing import Any, Optional
|
|
24
|
+
from uuid import UUID
|
|
25
|
+
|
|
26
|
+
from drift.core import DriftGuard
|
|
27
|
+
from drift.models import AgentEvent, EventType
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
|
31
|
+
except ImportError:
|
|
32
|
+
raise ImportError(
|
|
33
|
+
"LangChain integration requires langchain-core. "
|
|
34
|
+
"Install it with: pip install driftguard[langchain]"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DriftCallbackHandler(BaseCallbackHandler):
|
|
39
|
+
"""LangChain callback handler that feeds events into DriftGuard.
|
|
40
|
+
|
|
41
|
+
Captures LLM calls, tool calls, and chain executions with timing
|
|
42
|
+
data and feeds them into the anomaly detection pipeline.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, guard: DriftGuard):
|
|
46
|
+
self.guard = guard
|
|
47
|
+
self._start_times: dict[str, float] = {}
|
|
48
|
+
self._event_names: dict[str, str] = {}
|
|
49
|
+
self._event_inputs: dict[str, dict[str, Any]] = {}
|
|
50
|
+
|
|
51
|
+
def _run_key(self, run_id: UUID) -> str:
|
|
52
|
+
return str(run_id)
|
|
53
|
+
|
|
54
|
+
# ---- LLM events ----
|
|
55
|
+
|
|
56
|
+
def on_llm_start(
|
|
57
|
+
self,
|
|
58
|
+
serialized: dict[str, Any],
|
|
59
|
+
prompts: list[str],
|
|
60
|
+
*,
|
|
61
|
+
run_id: UUID,
|
|
62
|
+
parent_run_id: Optional[UUID] = None,
|
|
63
|
+
**kwargs: Any,
|
|
64
|
+
) -> None:
|
|
65
|
+
key = self._run_key(run_id)
|
|
66
|
+
self._start_times[key] = time.time()
|
|
67
|
+
|
|
68
|
+
name = serialized.get("id", ["unknown"])
|
|
69
|
+
if isinstance(name, list):
|
|
70
|
+
name = name[-1] if name else "unknown"
|
|
71
|
+
self._event_names[key] = str(name)
|
|
72
|
+
|
|
73
|
+
self.guard.ingest(AgentEvent(
|
|
74
|
+
event_type=EventType.LLM_START,
|
|
75
|
+
name=str(name),
|
|
76
|
+
run_id=key,
|
|
77
|
+
parent_id=self._run_key(parent_run_id) if parent_run_id else None,
|
|
78
|
+
inputs={"prompts": prompts[:1]}, # Only first prompt to save memory
|
|
79
|
+
))
|
|
80
|
+
|
|
81
|
+
def on_llm_end(
|
|
82
|
+
self,
|
|
83
|
+
response: Any,
|
|
84
|
+
*,
|
|
85
|
+
run_id: UUID,
|
|
86
|
+
parent_run_id: Optional[UUID] = None,
|
|
87
|
+
**kwargs: Any,
|
|
88
|
+
) -> None:
|
|
89
|
+
key = self._run_key(run_id)
|
|
90
|
+
start = self._start_times.pop(key, None)
|
|
91
|
+
latency_ms = (time.time() - start) * 1000 if start else None
|
|
92
|
+
name = self._event_names.pop(key, "unknown")
|
|
93
|
+
|
|
94
|
+
# Extract output text and token usage
|
|
95
|
+
output_text = ""
|
|
96
|
+
token_count = None
|
|
97
|
+
input_tokens = None
|
|
98
|
+
output_tokens = None
|
|
99
|
+
|
|
100
|
+
if hasattr(response, "generations") and response.generations:
|
|
101
|
+
gen = response.generations[0]
|
|
102
|
+
if gen:
|
|
103
|
+
output_text = gen[0].text if hasattr(gen[0], "text") else str(gen[0])
|
|
104
|
+
|
|
105
|
+
if hasattr(response, "llm_output") and response.llm_output:
|
|
106
|
+
usage = response.llm_output.get("token_usage", {})
|
|
107
|
+
input_tokens = usage.get("prompt_tokens")
|
|
108
|
+
output_tokens = usage.get("completion_tokens")
|
|
109
|
+
token_count = usage.get("total_tokens")
|
|
110
|
+
|
|
111
|
+
self.guard.ingest(AgentEvent(
|
|
112
|
+
event_type=EventType.LLM_END,
|
|
113
|
+
name=name,
|
|
114
|
+
run_id=key,
|
|
115
|
+
parent_id=self._run_key(parent_run_id) if parent_run_id else None,
|
|
116
|
+
latency_ms=latency_ms,
|
|
117
|
+
output_text=output_text,
|
|
118
|
+
token_count=token_count,
|
|
119
|
+
input_tokens=input_tokens,
|
|
120
|
+
output_tokens=output_tokens,
|
|
121
|
+
))
|
|
122
|
+
|
|
123
|
+
def on_llm_error(
|
|
124
|
+
self,
|
|
125
|
+
error: BaseException,
|
|
126
|
+
*,
|
|
127
|
+
run_id: UUID,
|
|
128
|
+
parent_run_id: Optional[UUID] = None,
|
|
129
|
+
**kwargs: Any,
|
|
130
|
+
) -> None:
|
|
131
|
+
key = self._run_key(run_id)
|
|
132
|
+
start = self._start_times.pop(key, None)
|
|
133
|
+
latency_ms = (time.time() - start) * 1000 if start else None
|
|
134
|
+
name = self._event_names.pop(key, "unknown")
|
|
135
|
+
|
|
136
|
+
self.guard.ingest(AgentEvent(
|
|
137
|
+
event_type=EventType.ERROR,
|
|
138
|
+
name=name,
|
|
139
|
+
run_id=key,
|
|
140
|
+
latency_ms=latency_ms,
|
|
141
|
+
error=str(error),
|
|
142
|
+
))
|
|
143
|
+
|
|
144
|
+
# ---- Tool events ----
|
|
145
|
+
|
|
146
|
+
def on_tool_start(
|
|
147
|
+
self,
|
|
148
|
+
serialized: dict[str, Any],
|
|
149
|
+
input_str: str,
|
|
150
|
+
*,
|
|
151
|
+
run_id: UUID,
|
|
152
|
+
parent_run_id: Optional[UUID] = None,
|
|
153
|
+
**kwargs: Any,
|
|
154
|
+
) -> None:
|
|
155
|
+
key = self._run_key(run_id)
|
|
156
|
+
self._start_times[key] = time.time()
|
|
157
|
+
|
|
158
|
+
name = serialized.get("name", "unknown_tool")
|
|
159
|
+
self._event_names[key] = name
|
|
160
|
+
|
|
161
|
+
self.guard.ingest(AgentEvent(
|
|
162
|
+
event_type=EventType.TOOL_START,
|
|
163
|
+
name=name,
|
|
164
|
+
run_id=key,
|
|
165
|
+
parent_id=self._run_key(parent_run_id) if parent_run_id else None,
|
|
166
|
+
inputs={"input": input_str[:500]}, # Truncate large inputs
|
|
167
|
+
))
|
|
168
|
+
|
|
169
|
+
def on_tool_end(
|
|
170
|
+
self,
|
|
171
|
+
output: Any,
|
|
172
|
+
*,
|
|
173
|
+
run_id: UUID,
|
|
174
|
+
parent_run_id: Optional[UUID] = None,
|
|
175
|
+
**kwargs: Any,
|
|
176
|
+
) -> None:
|
|
177
|
+
key = self._run_key(run_id)
|
|
178
|
+
start = self._start_times.pop(key, None)
|
|
179
|
+
latency_ms = (time.time() - start) * 1000 if start else None
|
|
180
|
+
name = self._event_names.pop(key, "unknown_tool")
|
|
181
|
+
|
|
182
|
+
output_text = str(output)[:2000] if output else ""
|
|
183
|
+
|
|
184
|
+
self.guard.ingest(AgentEvent(
|
|
185
|
+
event_type=EventType.TOOL_END,
|
|
186
|
+
name=name,
|
|
187
|
+
run_id=key,
|
|
188
|
+
parent_id=self._run_key(parent_run_id) if parent_run_id else None,
|
|
189
|
+
latency_ms=latency_ms,
|
|
190
|
+
output_text=output_text,
|
|
191
|
+
))
|
|
192
|
+
|
|
193
|
+
def on_tool_error(
|
|
194
|
+
self,
|
|
195
|
+
error: BaseException,
|
|
196
|
+
*,
|
|
197
|
+
run_id: UUID,
|
|
198
|
+
parent_run_id: Optional[UUID] = None,
|
|
199
|
+
**kwargs: Any,
|
|
200
|
+
) -> None:
|
|
201
|
+
key = self._run_key(run_id)
|
|
202
|
+
start = self._start_times.pop(key, None)
|
|
203
|
+
latency_ms = (time.time() - start) * 1000 if start else None
|
|
204
|
+
name = self._event_names.pop(key, "unknown_tool")
|
|
205
|
+
|
|
206
|
+
self.guard.ingest(AgentEvent(
|
|
207
|
+
event_type=EventType.ERROR,
|
|
208
|
+
name=name,
|
|
209
|
+
run_id=key,
|
|
210
|
+
latency_ms=latency_ms,
|
|
211
|
+
error=str(error),
|
|
212
|
+
))
|
|
213
|
+
|
|
214
|
+
# ---- Chain events ----
|
|
215
|
+
|
|
216
|
+
def on_chain_start(
|
|
217
|
+
self,
|
|
218
|
+
serialized: dict[str, Any],
|
|
219
|
+
inputs: dict[str, Any],
|
|
220
|
+
*,
|
|
221
|
+
run_id: UUID,
|
|
222
|
+
parent_run_id: Optional[UUID] = None,
|
|
223
|
+
**kwargs: Any,
|
|
224
|
+
) -> None:
|
|
225
|
+
key = self._run_key(run_id)
|
|
226
|
+
self._start_times[key] = time.time()
|
|
227
|
+
|
|
228
|
+
name = serialized.get("id", ["unknown"])
|
|
229
|
+
if isinstance(name, list):
|
|
230
|
+
name = name[-1] if name else "unknown"
|
|
231
|
+
self._event_names[key] = str(name)
|
|
232
|
+
|
|
233
|
+
def on_chain_end(
|
|
234
|
+
self,
|
|
235
|
+
outputs: dict[str, Any],
|
|
236
|
+
*,
|
|
237
|
+
run_id: UUID,
|
|
238
|
+
parent_run_id: Optional[UUID] = None,
|
|
239
|
+
**kwargs: Any,
|
|
240
|
+
) -> None:
|
|
241
|
+
key = self._run_key(run_id)
|
|
242
|
+
self._start_times.pop(key, None)
|
|
243
|
+
self._event_names.pop(key, None)
|
drift/core.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Core Drift engine — orchestrates detectors and manages the event stream."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Callable, Optional
|
|
7
|
+
|
|
8
|
+
from drift.detectors import BaseDetector
|
|
9
|
+
from drift.detectors.latency import LatencyDetector
|
|
10
|
+
from drift.detectors.output_drift import OutputDriftDetector
|
|
11
|
+
from drift.detectors.sequence import SequenceDetector
|
|
12
|
+
from drift.models import AgentEvent, Anomaly, Severity
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Type alias for anomaly callbacks
|
|
16
|
+
AnomalyCallback = Callable[[Anomaly], None]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DriftGuard:
|
|
20
|
+
"""Main entry point for Drift anomaly detection.
|
|
21
|
+
|
|
22
|
+
Manages a set of detectors, ingests agent events, collects anomalies,
|
|
23
|
+
and optionally fires callbacks when anomalies are detected.
|
|
24
|
+
|
|
25
|
+
Usage:
|
|
26
|
+
guard = DriftGuard()
|
|
27
|
+
|
|
28
|
+
# Use with LangChain:
|
|
29
|
+
from drift.callbacks.langchain import DriftCallbackHandler
|
|
30
|
+
handler = DriftCallbackHandler(guard)
|
|
31
|
+
agent.run("query", callbacks=[handler])
|
|
32
|
+
|
|
33
|
+
# Or ingest events directly:
|
|
34
|
+
guard.ingest(event)
|
|
35
|
+
|
|
36
|
+
# Check results:
|
|
37
|
+
guard.report()
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
detectors: list[BaseDetector] | None = None,
|
|
43
|
+
on_anomaly: AnomalyCallback | None = None,
|
|
44
|
+
min_severity: Severity = Severity.LOW,
|
|
45
|
+
):
|
|
46
|
+
"""Initialize DriftGuard.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
detectors: Custom list of detectors. If None, uses all defaults.
|
|
50
|
+
on_anomaly: Callback fired for each anomaly (e.g., log, alert, raise).
|
|
51
|
+
min_severity: Minimum severity to record/report.
|
|
52
|
+
"""
|
|
53
|
+
if detectors is None:
|
|
54
|
+
self.detectors: list[BaseDetector] = [
|
|
55
|
+
LatencyDetector(),
|
|
56
|
+
SequenceDetector(),
|
|
57
|
+
OutputDriftDetector(),
|
|
58
|
+
]
|
|
59
|
+
else:
|
|
60
|
+
self.detectors = detectors
|
|
61
|
+
|
|
62
|
+
self.on_anomaly = on_anomaly
|
|
63
|
+
self.min_severity = min_severity
|
|
64
|
+
self._anomalies: list[Anomaly] = []
|
|
65
|
+
self._events: list[AgentEvent] = []
|
|
66
|
+
self._lock = threading.Lock()
|
|
67
|
+
|
|
68
|
+
def ingest(self, event: AgentEvent) -> list[Anomaly]:
|
|
69
|
+
"""Process an event through all detectors.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
event: The agent event to analyze.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of anomalies detected from this event.
|
|
76
|
+
"""
|
|
77
|
+
new_anomalies: list[Anomaly] = []
|
|
78
|
+
|
|
79
|
+
with self._lock:
|
|
80
|
+
self._events.append(event)
|
|
81
|
+
|
|
82
|
+
for detector in self.detectors:
|
|
83
|
+
try:
|
|
84
|
+
detected = detector.ingest(event)
|
|
85
|
+
for anomaly in detected:
|
|
86
|
+
if self._severity_value(anomaly.severity) >= self._severity_value(
|
|
87
|
+
self.min_severity
|
|
88
|
+
):
|
|
89
|
+
new_anomalies.append(anomaly)
|
|
90
|
+
self._anomalies.append(anomaly)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
# Detectors should never crash the agent
|
|
93
|
+
import sys
|
|
94
|
+
print(
|
|
95
|
+
f"[drift] Detector {detector.name!r} error: {e}",
|
|
96
|
+
file=sys.stderr,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Fire callbacks outside the lock
|
|
100
|
+
if self.on_anomaly:
|
|
101
|
+
for anomaly in new_anomalies:
|
|
102
|
+
try:
|
|
103
|
+
self.on_anomaly(anomaly)
|
|
104
|
+
except Exception:
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
return new_anomalies
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def anomalies(self) -> list[Anomaly]:
|
|
111
|
+
"""All anomalies detected so far."""
|
|
112
|
+
return list(self._anomalies)
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def events(self) -> list[AgentEvent]:
|
|
116
|
+
"""All events ingested so far."""
|
|
117
|
+
return list(self._events)
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def anomaly_count(self) -> int:
|
|
121
|
+
return len(self._anomalies)
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def event_count(self) -> int:
|
|
125
|
+
return len(self._events)
|
|
126
|
+
|
|
127
|
+
def report(self, verbose: bool = False) -> str:
|
|
128
|
+
"""Generate a human-readable anomaly report.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
verbose: If True, include statistical context for each anomaly.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Formatted report string (also printed to stdout).
|
|
135
|
+
"""
|
|
136
|
+
lines: list[str] = []
|
|
137
|
+
lines.append("")
|
|
138
|
+
lines.append("=" * 60)
|
|
139
|
+
lines.append(" DRIFT ANOMALY REPORT")
|
|
140
|
+
lines.append("=" * 60)
|
|
141
|
+
lines.append(f" Events processed: {self.event_count}")
|
|
142
|
+
lines.append(f" Anomalies found: {self.anomaly_count}")
|
|
143
|
+
|
|
144
|
+
if not self._anomalies:
|
|
145
|
+
lines.append("")
|
|
146
|
+
lines.append(" ✓ No anomalies detected.")
|
|
147
|
+
else:
|
|
148
|
+
# Group by severity
|
|
149
|
+
by_severity: dict[Severity, list[Anomaly]] = {}
|
|
150
|
+
for a in self._anomalies:
|
|
151
|
+
by_severity.setdefault(a.severity, []).append(a)
|
|
152
|
+
|
|
153
|
+
for sev in [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM, Severity.LOW]:
|
|
154
|
+
group = by_severity.get(sev, [])
|
|
155
|
+
if not group:
|
|
156
|
+
continue
|
|
157
|
+
lines.append("")
|
|
158
|
+
lines.append(f" [{sev.value.upper()}] ({len(group)})")
|
|
159
|
+
for a in group:
|
|
160
|
+
lines.append(f" • {a.anomaly_type.value}: {a.message}")
|
|
161
|
+
if verbose and a.z_score is not None:
|
|
162
|
+
lines.append(f" z-score: {a.z_score:.2f}")
|
|
163
|
+
if verbose and a.expected_range is not None:
|
|
164
|
+
lo, hi = a.expected_range
|
|
165
|
+
lines.append(f" expected range: [{lo:.1f}, {hi:.1f}]")
|
|
166
|
+
|
|
167
|
+
lines.append("")
|
|
168
|
+
lines.append("=" * 60)
|
|
169
|
+
lines.append("")
|
|
170
|
+
|
|
171
|
+
report_text = "\n".join(lines)
|
|
172
|
+
print(report_text)
|
|
173
|
+
return report_text
|
|
174
|
+
|
|
175
|
+
def reset(self) -> None:
|
|
176
|
+
"""Reset all state — detectors, events, anomalies."""
|
|
177
|
+
with self._lock:
|
|
178
|
+
for detector in self.detectors:
|
|
179
|
+
detector.reset()
|
|
180
|
+
self._anomalies.clear()
|
|
181
|
+
self._events.clear()
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def _severity_value(severity: Severity) -> int:
|
|
185
|
+
return {
|
|
186
|
+
Severity.LOW: 0,
|
|
187
|
+
Severity.MEDIUM: 1,
|
|
188
|
+
Severity.HIGH: 2,
|
|
189
|
+
Severity.CRITICAL: 3,
|
|
190
|
+
}[severity]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Base detector interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
|
|
7
|
+
from drift.models import AgentEvent, Anomaly
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BaseDetector(ABC):
|
|
11
|
+
"""Abstract base class for all anomaly detectors."""
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def name(self) -> str:
|
|
16
|
+
"""Human-readable detector name."""
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def ingest(self, event: AgentEvent) -> list[Anomaly]:
|
|
21
|
+
"""Process an event and return any anomalies detected.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
event: The agent event to analyze.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
List of anomalies (empty if none detected).
|
|
28
|
+
"""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def reset(self) -> None:
|
|
33
|
+
"""Reset detector state (clear baselines, history, etc.)."""
|
|
34
|
+
...
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Latency and token count anomaly detection via statistical process control.
|
|
2
|
+
|
|
3
|
+
Uses a rolling window to compute mean and standard deviation, then flags
|
|
4
|
+
events whose latency or token count falls outside a configurable z-score
|
|
5
|
+
threshold. This is the simplest and most immediately useful detector —
|
|
6
|
+
it catches hung API calls, runaway generation, and upstream provider issues.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
|
|
16
|
+
from drift.detectors import BaseDetector
|
|
17
|
+
from drift.models import AgentEvent, Anomaly, AnomalyType, EventType, Severity
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class LatencyDetectorConfig:
|
|
22
|
+
"""Configuration for the latency/token detector."""
|
|
23
|
+
window_size: int = 50 # Rolling window for baseline stats
|
|
24
|
+
z_threshold: float = 3.0 # Standard deviations before flagging
|
|
25
|
+
min_samples: int = 5 # Minimum events before detection activates
|
|
26
|
+
critical_z: float = 5.0 # Z-score threshold for CRITICAL severity
|
|
27
|
+
track_tokens: bool = True # Also monitor token counts
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LatencyDetector(BaseDetector):
|
|
31
|
+
"""Detects anomalous latency and token counts using rolling z-scores.
|
|
32
|
+
|
|
33
|
+
Maintains per-tool and per-model baselines, so a slow tool won't
|
|
34
|
+
pollute the baseline for a fast one.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, config: LatencyDetectorConfig | None = None):
|
|
38
|
+
self.config = config or LatencyDetectorConfig()
|
|
39
|
+
# Keyed by event name (tool name or model name)
|
|
40
|
+
self._latency_windows: dict[str, list[float]] = defaultdict(list)
|
|
41
|
+
self._token_windows: dict[str, list[float]] = defaultdict(list)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def name(self) -> str:
|
|
45
|
+
return "latency_detector"
|
|
46
|
+
|
|
47
|
+
def ingest(self, event: AgentEvent) -> list[Anomaly]:
|
|
48
|
+
anomalies: list[Anomaly] = []
|
|
49
|
+
|
|
50
|
+
# Only analyze completion events (they have latency data)
|
|
51
|
+
if event.event_type not in (EventType.LLM_END, EventType.TOOL_END):
|
|
52
|
+
return anomalies
|
|
53
|
+
|
|
54
|
+
key = event.name or "unknown"
|
|
55
|
+
|
|
56
|
+
# --- Latency check ---
|
|
57
|
+
if event.latency_ms is not None:
|
|
58
|
+
window = self._latency_windows[key]
|
|
59
|
+
anomaly = self._check_value(
|
|
60
|
+
value=event.latency_ms,
|
|
61
|
+
window=window,
|
|
62
|
+
metric_name="latency",
|
|
63
|
+
unit="ms",
|
|
64
|
+
anomaly_type=AnomalyType.LATENCY_SPIKE,
|
|
65
|
+
event=event,
|
|
66
|
+
)
|
|
67
|
+
if anomaly:
|
|
68
|
+
anomalies.append(anomaly)
|
|
69
|
+
|
|
70
|
+
# Update window
|
|
71
|
+
window.append(event.latency_ms)
|
|
72
|
+
if len(window) > self.config.window_size:
|
|
73
|
+
window.pop(0)
|
|
74
|
+
|
|
75
|
+
# --- Token count check ---
|
|
76
|
+
if self.config.track_tokens and event.token_count is not None:
|
|
77
|
+
window = self._token_windows[key]
|
|
78
|
+
anomaly = self._check_value(
|
|
79
|
+
value=float(event.token_count),
|
|
80
|
+
window=window,
|
|
81
|
+
metric_name="token_count",
|
|
82
|
+
unit="tokens",
|
|
83
|
+
anomaly_type=AnomalyType.TOKEN_ANOMALY,
|
|
84
|
+
event=event,
|
|
85
|
+
)
|
|
86
|
+
if anomaly:
|
|
87
|
+
anomalies.append(anomaly)
|
|
88
|
+
|
|
89
|
+
window.append(float(event.token_count))
|
|
90
|
+
if len(window) > self.config.window_size:
|
|
91
|
+
window.pop(0)
|
|
92
|
+
|
|
93
|
+
return anomalies
|
|
94
|
+
|
|
95
|
+
def _check_value(
|
|
96
|
+
self,
|
|
97
|
+
value: float,
|
|
98
|
+
window: list[float],
|
|
99
|
+
metric_name: str,
|
|
100
|
+
unit: str,
|
|
101
|
+
anomaly_type: AnomalyType,
|
|
102
|
+
event: AgentEvent,
|
|
103
|
+
) -> Anomaly | None:
|
|
104
|
+
"""Check a single metric against its rolling baseline."""
|
|
105
|
+
if len(window) < self.config.min_samples:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
arr = np.array(window)
|
|
109
|
+
mean = float(np.mean(arr))
|
|
110
|
+
std = float(np.std(arr))
|
|
111
|
+
|
|
112
|
+
if std < 1e-9: # Constant values — can't compute z-score meaningfully
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
z = (value - mean) / std
|
|
116
|
+
|
|
117
|
+
if abs(z) < self.config.z_threshold:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
# Determine severity
|
|
121
|
+
if abs(z) >= self.config.critical_z:
|
|
122
|
+
severity = Severity.CRITICAL
|
|
123
|
+
elif abs(z) >= self.config.z_threshold + 1:
|
|
124
|
+
severity = Severity.HIGH
|
|
125
|
+
else:
|
|
126
|
+
severity = Severity.MEDIUM
|
|
127
|
+
|
|
128
|
+
direction = "above" if z > 0 else "below"
|
|
129
|
+
|
|
130
|
+
return Anomaly(
|
|
131
|
+
anomaly_type=anomaly_type,
|
|
132
|
+
severity=severity,
|
|
133
|
+
message=(
|
|
134
|
+
f"{event.name!r} {metric_name} is {value:.1f}{unit}, "
|
|
135
|
+
f"{abs(z):.1f}σ {direction} mean ({mean:.1f}{unit} ± {std:.1f})"
|
|
136
|
+
),
|
|
137
|
+
event=event,
|
|
138
|
+
detector_name=self.name,
|
|
139
|
+
observed_value=value,
|
|
140
|
+
expected_range=(mean - self.config.z_threshold * std,
|
|
141
|
+
mean + self.config.z_threshold * std),
|
|
142
|
+
z_score=z,
|
|
143
|
+
context={"window_size": len(window), "mean": mean, "std": std},
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def reset(self) -> None:
|
|
147
|
+
self._latency_windows.clear()
|
|
148
|
+
self._token_windows.clear()
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""Output drift detection.
|
|
2
|
+
|
|
3
|
+
Tracks statistical properties of agent outputs over time and flags when
|
|
4
|
+
the distribution shifts significantly. This catches hallucination drift,
|
|
5
|
+
prompt injection effects, and gradual quality degradation.
|
|
6
|
+
|
|
7
|
+
Uses lightweight heuristics (output length, vocabulary diversity,
|
|
8
|
+
structural patterns) rather than embeddings for zero-dependency operation.
|
|
9
|
+
Embedding-based drift detection can be added as an optional enhancement.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import math
|
|
15
|
+
import re
|
|
16
|
+
from collections import defaultdict
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
from drift.detectors import BaseDetector
|
|
22
|
+
from drift.models import AgentEvent, Anomaly, AnomalyType, EventType, Severity
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class OutputDriftConfig:
|
|
27
|
+
"""Configuration for the output drift detector."""
|
|
28
|
+
window_size: int = 30 # Rolling window for baseline
|
|
29
|
+
min_samples: int = 8 # Minimum samples before detection
|
|
30
|
+
length_z_threshold: float = 3.0 # Z-score for output length anomaly
|
|
31
|
+
vocab_z_threshold: float = 3.0 # Z-score for vocabulary diversity anomaly
|
|
32
|
+
structure_change: bool = True # Track structural pattern changes
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class OutputDriftDetector(BaseDetector):
|
|
36
|
+
"""Detects drift in agent output characteristics.
|
|
37
|
+
|
|
38
|
+
Tracks multiple lightweight signals per tool/model:
|
|
39
|
+
- Output length distribution
|
|
40
|
+
- Vocabulary diversity (unique words / total words)
|
|
41
|
+
- Structural patterns (presence of code blocks, JSON, lists, etc.)
|
|
42
|
+
|
|
43
|
+
All signals are zero-dependency (no embedding models required).
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, config: OutputDriftConfig | None = None):
|
|
47
|
+
self.config = config or OutputDriftConfig()
|
|
48
|
+
self._length_windows: dict[str, list[float]] = defaultdict(list)
|
|
49
|
+
self._vocab_windows: dict[str, list[float]] = defaultdict(list)
|
|
50
|
+
self._structure_counts: dict[str, dict[str, int]] = defaultdict(
|
|
51
|
+
lambda: defaultdict(int)
|
|
52
|
+
)
|
|
53
|
+
self._total_by_key: dict[str, int] = defaultdict(int)
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def name(self) -> str:
|
|
57
|
+
return "output_drift_detector"
|
|
58
|
+
|
|
59
|
+
def ingest(self, event: AgentEvent) -> list[Anomaly]:
|
|
60
|
+
anomalies: list[Anomaly] = []
|
|
61
|
+
|
|
62
|
+
if event.event_type not in (EventType.LLM_END, EventType.TOOL_END):
|
|
63
|
+
return anomalies
|
|
64
|
+
|
|
65
|
+
if not event.output_text:
|
|
66
|
+
return anomalies
|
|
67
|
+
|
|
68
|
+
key = event.name or "unknown"
|
|
69
|
+
text = event.output_text
|
|
70
|
+
|
|
71
|
+
# --- Length anomaly ---
|
|
72
|
+
length = float(len(text))
|
|
73
|
+
length_anomaly = self._check_zscore(
|
|
74
|
+
value=length,
|
|
75
|
+
window=self._length_windows[key],
|
|
76
|
+
metric="output_length",
|
|
77
|
+
unit="chars",
|
|
78
|
+
event=event,
|
|
79
|
+
z_threshold=self.config.length_z_threshold,
|
|
80
|
+
)
|
|
81
|
+
if length_anomaly:
|
|
82
|
+
anomalies.append(length_anomaly)
|
|
83
|
+
self._update_window(self._length_windows[key], length)
|
|
84
|
+
|
|
85
|
+
# --- Vocabulary diversity ---
|
|
86
|
+
words = text.lower().split()
|
|
87
|
+
if len(words) > 5:
|
|
88
|
+
diversity = len(set(words)) / len(words)
|
|
89
|
+
vocab_anomaly = self._check_zscore(
|
|
90
|
+
value=diversity,
|
|
91
|
+
window=self._vocab_windows[key],
|
|
92
|
+
metric="vocab_diversity",
|
|
93
|
+
unit="ratio",
|
|
94
|
+
event=event,
|
|
95
|
+
z_threshold=self.config.vocab_z_threshold,
|
|
96
|
+
)
|
|
97
|
+
if vocab_anomaly:
|
|
98
|
+
anomalies.append(vocab_anomaly)
|
|
99
|
+
self._update_window(self._vocab_windows[key], diversity)
|
|
100
|
+
|
|
101
|
+
# --- Structural pattern change ---
|
|
102
|
+
if self.config.structure_change:
|
|
103
|
+
structure = self._extract_structure(text)
|
|
104
|
+
struct_anomaly = self._check_structure(key, structure, event)
|
|
105
|
+
if struct_anomaly:
|
|
106
|
+
anomalies.append(struct_anomaly)
|
|
107
|
+
|
|
108
|
+
return anomalies
|
|
109
|
+
|
|
110
|
+
def _check_zscore(
|
|
111
|
+
self,
|
|
112
|
+
value: float,
|
|
113
|
+
window: list[float],
|
|
114
|
+
metric: str,
|
|
115
|
+
unit: str,
|
|
116
|
+
event: AgentEvent,
|
|
117
|
+
z_threshold: float,
|
|
118
|
+
) -> Anomaly | None:
|
|
119
|
+
"""Generic z-score check against a rolling window."""
|
|
120
|
+
if len(window) < self.config.min_samples:
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
arr = np.array(window)
|
|
124
|
+
mean = float(np.mean(arr))
|
|
125
|
+
std = float(np.std(arr))
|
|
126
|
+
|
|
127
|
+
if std < 1e-9:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
z = (value - mean) / std
|
|
131
|
+
if abs(z) < z_threshold:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
severity = Severity.HIGH if abs(z) > z_threshold + 2 else Severity.MEDIUM
|
|
135
|
+
direction = "above" if z > 0 else "below"
|
|
136
|
+
|
|
137
|
+
return Anomaly(
|
|
138
|
+
anomaly_type=AnomalyType.OUTPUT_DRIFT,
|
|
139
|
+
severity=severity,
|
|
140
|
+
message=(
|
|
141
|
+
f"{event.name!r} {metric} is {value:.1f} {unit}, "
|
|
142
|
+
f"{abs(z):.1f}σ {direction} baseline ({mean:.1f} ± {std:.1f})"
|
|
143
|
+
),
|
|
144
|
+
event=event,
|
|
145
|
+
detector_name=self.name,
|
|
146
|
+
observed_value=value,
|
|
147
|
+
expected_range=(mean - z_threshold * std, mean + z_threshold * std),
|
|
148
|
+
z_score=z,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def _extract_structure(self, text: str) -> str:
|
|
152
|
+
"""Classify the structural pattern of an output."""
|
|
153
|
+
patterns: list[str] = []
|
|
154
|
+
if re.search(r"```", text):
|
|
155
|
+
patterns.append("code_block")
|
|
156
|
+
if re.search(r"[{\[].*[}\]]", text, re.DOTALL):
|
|
157
|
+
patterns.append("json_like")
|
|
158
|
+
if re.search(r"^\s*[-*]\s", text, re.MULTILINE):
|
|
159
|
+
patterns.append("bullet_list")
|
|
160
|
+
if re.search(r"^\s*\d+\.\s", text, re.MULTILINE):
|
|
161
|
+
patterns.append("numbered_list")
|
|
162
|
+
if re.search(r"^#+\s", text, re.MULTILINE):
|
|
163
|
+
patterns.append("markdown_headers")
|
|
164
|
+
if len(text.strip()) == 0:
|
|
165
|
+
patterns.append("empty")
|
|
166
|
+
return "|".join(sorted(patterns)) if patterns else "plain_text"
|
|
167
|
+
|
|
168
|
+
def _check_structure(
|
|
169
|
+
self, key: str, structure: str, event: AgentEvent
|
|
170
|
+
) -> Anomaly | None:
|
|
171
|
+
"""Flag if the output structure is novel for this tool/model."""
|
|
172
|
+
self._total_by_key[key] += 1
|
|
173
|
+
counts = self._structure_counts[key]
|
|
174
|
+
|
|
175
|
+
total = self._total_by_key[key]
|
|
176
|
+
if total < self.config.min_samples:
|
|
177
|
+
counts[structure] += 1
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
# Check if this structure has been seen before
|
|
181
|
+
if structure not in counts:
|
|
182
|
+
anomaly = Anomaly(
|
|
183
|
+
anomaly_type=AnomalyType.OUTPUT_DRIFT,
|
|
184
|
+
severity=Severity.MEDIUM,
|
|
185
|
+
message=(
|
|
186
|
+
f"{event.name!r} produced a novel output structure: {structure!r} "
|
|
187
|
+
f"(previously seen: {list(counts.keys())})"
|
|
188
|
+
),
|
|
189
|
+
event=event,
|
|
190
|
+
detector_name=self.name,
|
|
191
|
+
context={
|
|
192
|
+
"novel_structure": structure,
|
|
193
|
+
"known_structures": dict(counts),
|
|
194
|
+
"total_observations": total,
|
|
195
|
+
},
|
|
196
|
+
)
|
|
197
|
+
counts[structure] += 1
|
|
198
|
+
return anomaly
|
|
199
|
+
|
|
200
|
+
counts[structure] += 1
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
def _update_window(self, window: list[float], value: float) -> None:
|
|
204
|
+
"""Append value and trim to window size."""
|
|
205
|
+
window.append(value)
|
|
206
|
+
if len(window) > self.config.window_size:
|
|
207
|
+
window.pop(0)
|
|
208
|
+
|
|
209
|
+
def reset(self) -> None:
|
|
210
|
+
self._length_windows.clear()
|
|
211
|
+
self._vocab_windows.clear()
|
|
212
|
+
self._structure_counts.clear()
|
|
213
|
+
self._total_by_key.clear()
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Action sequence anomaly detection.
|
|
2
|
+
|
|
3
|
+
Builds a transition probability matrix from observed tool/LLM call sequences
|
|
4
|
+
and flags when the agent takes a path that has never or rarely been seen.
|
|
5
|
+
This catches agents going "off-script" — calling tools in unexpected orders,
|
|
6
|
+
skipping required steps, or entering novel execution paths.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
from drift.detectors import BaseDetector
|
|
15
|
+
from drift.models import AgentEvent, Anomaly, AnomalyType, EventType, Severity
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class SequenceDetectorConfig:
|
|
20
|
+
"""Configuration for the sequence anomaly detector."""
|
|
21
|
+
min_observations: int = 10 # Min transitions before detection activates
|
|
22
|
+
novel_severity: Severity = Severity.HIGH # Severity for never-seen transitions
|
|
23
|
+
rare_threshold: float = 0.02 # Transitions below this probability are flagged
|
|
24
|
+
rare_severity: Severity = Severity.MEDIUM # Severity for rare transitions
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SequenceDetector(BaseDetector):
|
|
28
|
+
"""Detects anomalous action sequences by tracking transition probabilities.
|
|
29
|
+
|
|
30
|
+
Maintains a first-order Markov transition matrix over tool/LLM call names.
|
|
31
|
+
When an agent takes a transition that's never been observed or is very rare
|
|
32
|
+
relative to the baseline, it fires an anomaly.
|
|
33
|
+
|
|
34
|
+
Example: If your agent always calls search → parse → respond, and suddenly
|
|
35
|
+
calls search → delete → respond, the search→delete transition gets flagged.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, config: SequenceDetectorConfig | None = None):
|
|
39
|
+
self.config = config or SequenceDetectorConfig()
|
|
40
|
+
# transition_counts[from_action][to_action] = count
|
|
41
|
+
self._transition_counts: dict[str, dict[str, int]] = defaultdict(
|
|
42
|
+
lambda: defaultdict(int)
|
|
43
|
+
)
|
|
44
|
+
self._total_from: dict[str, int] = defaultdict(int)
|
|
45
|
+
self._last_action: str | None = None
|
|
46
|
+
self._total_transitions: int = 0
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def name(self) -> str:
|
|
50
|
+
return "sequence_detector"
|
|
51
|
+
|
|
52
|
+
def ingest(self, event: AgentEvent) -> list[Anomaly]:
|
|
53
|
+
anomalies: list[Anomaly] = []
|
|
54
|
+
|
|
55
|
+
# Only track start events (they represent the agent's decision to act)
|
|
56
|
+
if event.event_type not in (
|
|
57
|
+
EventType.TOOL_START, EventType.LLM_START, EventType.CHAIN_START
|
|
58
|
+
):
|
|
59
|
+
return anomalies
|
|
60
|
+
|
|
61
|
+
current_action = event.name or "unknown"
|
|
62
|
+
|
|
63
|
+
if self._last_action is not None and self._total_transitions >= self.config.min_observations:
|
|
64
|
+
anomaly = self._check_transition(self._last_action, current_action, event)
|
|
65
|
+
if anomaly:
|
|
66
|
+
anomalies.append(anomaly)
|
|
67
|
+
|
|
68
|
+
# Update transition matrix
|
|
69
|
+
if self._last_action is not None:
|
|
70
|
+
self._transition_counts[self._last_action][current_action] += 1
|
|
71
|
+
self._total_from[self._last_action] += 1
|
|
72
|
+
self._total_transitions += 1
|
|
73
|
+
|
|
74
|
+
self._last_action = current_action
|
|
75
|
+
return anomalies
|
|
76
|
+
|
|
77
|
+
def _check_transition(
|
|
78
|
+
self, from_action: str, to_action: str, event: AgentEvent
|
|
79
|
+
) -> Anomaly | None:
|
|
80
|
+
"""Check if a transition is anomalous."""
|
|
81
|
+
from_counts = self._transition_counts.get(from_action)
|
|
82
|
+
|
|
83
|
+
# Case 1: We've never seen ANY transition from this action
|
|
84
|
+
if from_counts is None or self._total_from.get(from_action, 0) == 0:
|
|
85
|
+
return None # Can't evaluate — not enough data for this source
|
|
86
|
+
|
|
87
|
+
total_from = self._total_from[from_action]
|
|
88
|
+
to_count = from_counts.get(to_action, 0)
|
|
89
|
+
|
|
90
|
+
# Case 2: Never-seen transition from a known source
|
|
91
|
+
if to_count == 0:
|
|
92
|
+
seen_targets = list(from_counts.keys())
|
|
93
|
+
return Anomaly(
|
|
94
|
+
anomaly_type=AnomalyType.SEQUENCE_ANOMALY,
|
|
95
|
+
severity=self.config.novel_severity,
|
|
96
|
+
message=(
|
|
97
|
+
f"Novel transition: {from_action!r} → {to_action!r} "
|
|
98
|
+
f"(never observed; known transitions from {from_action!r}: "
|
|
99
|
+
f"{seen_targets})"
|
|
100
|
+
),
|
|
101
|
+
event=event,
|
|
102
|
+
detector_name=self.name,
|
|
103
|
+
observed_value=0.0,
|
|
104
|
+
context={
|
|
105
|
+
"from_action": from_action,
|
|
106
|
+
"to_action": to_action,
|
|
107
|
+
"known_transitions": dict(from_counts),
|
|
108
|
+
"total_from_count": total_from,
|
|
109
|
+
},
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Case 3: Rare transition
|
|
113
|
+
probability = to_count / total_from
|
|
114
|
+
if probability < self.config.rare_threshold:
|
|
115
|
+
return Anomaly(
|
|
116
|
+
anomaly_type=AnomalyType.SEQUENCE_ANOMALY,
|
|
117
|
+
severity=self.config.rare_severity,
|
|
118
|
+
message=(
|
|
119
|
+
f"Rare transition: {from_action!r} → {to_action!r} "
|
|
120
|
+
f"(p={probability:.3f}, seen {to_count}/{total_from} times)"
|
|
121
|
+
),
|
|
122
|
+
event=event,
|
|
123
|
+
detector_name=self.name,
|
|
124
|
+
observed_value=probability,
|
|
125
|
+
expected_range=(self.config.rare_threshold, 1.0),
|
|
126
|
+
context={
|
|
127
|
+
"from_action": from_action,
|
|
128
|
+
"to_action": to_action,
|
|
129
|
+
"probability": probability,
|
|
130
|
+
"count": to_count,
|
|
131
|
+
"total_from_count": total_from,
|
|
132
|
+
},
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
def get_transition_matrix(self) -> dict[str, dict[str, float]]:
|
|
138
|
+
"""Return the current transition probability matrix (for debugging/viz)."""
|
|
139
|
+
matrix: dict[str, dict[str, float]] = {}
|
|
140
|
+
for from_action, targets in self._transition_counts.items():
|
|
141
|
+
total = self._total_from[from_action]
|
|
142
|
+
if total > 0:
|
|
143
|
+
matrix[from_action] = {
|
|
144
|
+
to_action: count / total
|
|
145
|
+
for to_action, count in targets.items()
|
|
146
|
+
}
|
|
147
|
+
return matrix
|
|
148
|
+
|
|
149
|
+
def reset(self) -> None:
|
|
150
|
+
self._transition_counts.clear()
|
|
151
|
+
self._total_from.clear()
|
|
152
|
+
self._last_action = None
|
|
153
|
+
self._total_transitions = 0
|
drift/models.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Data models for Drift agent events and anomalies."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class EventType(Enum):
|
|
13
|
+
"""Types of agent events we track."""
|
|
14
|
+
LLM_START = "llm_start"
|
|
15
|
+
LLM_END = "llm_end"
|
|
16
|
+
TOOL_START = "tool_start"
|
|
17
|
+
TOOL_END = "tool_end"
|
|
18
|
+
CHAIN_START = "chain_start"
|
|
19
|
+
CHAIN_END = "chain_end"
|
|
20
|
+
ERROR = "error"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class AnomalyType(Enum):
|
|
24
|
+
"""Categories of detected anomalies."""
|
|
25
|
+
LATENCY_SPIKE = "latency_spike"
|
|
26
|
+
TOKEN_ANOMALY = "token_anomaly"
|
|
27
|
+
SEQUENCE_ANOMALY = "sequence_anomaly"
|
|
28
|
+
OUTPUT_DRIFT = "output_drift"
|
|
29
|
+
ERROR_RATE = "error_rate"
|
|
30
|
+
COST_SPIKE = "cost_spike"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Severity(Enum):
|
|
34
|
+
"""Anomaly severity levels."""
|
|
35
|
+
LOW = "low"
|
|
36
|
+
MEDIUM = "medium"
|
|
37
|
+
HIGH = "high"
|
|
38
|
+
CRITICAL = "critical"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class AgentEvent:
|
|
43
|
+
"""A single event in an agent's execution trace."""
|
|
44
|
+
event_type: EventType
|
|
45
|
+
timestamp: float = field(default_factory=time.time)
|
|
46
|
+
event_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
|
|
47
|
+
run_id: Optional[str] = None
|
|
48
|
+
|
|
49
|
+
# What happened
|
|
50
|
+
name: str = "" # tool name, llm model, chain name
|
|
51
|
+
inputs: Optional[dict[str, Any]] = None
|
|
52
|
+
outputs: Optional[dict[str, Any]] = None
|
|
53
|
+
|
|
54
|
+
# Measurements
|
|
55
|
+
latency_ms: Optional[float] = None
|
|
56
|
+
token_count: Optional[int] = None
|
|
57
|
+
input_tokens: Optional[int] = None
|
|
58
|
+
output_tokens: Optional[int] = None
|
|
59
|
+
cost_usd: Optional[float] = None
|
|
60
|
+
|
|
61
|
+
# Raw content for drift detection
|
|
62
|
+
output_text: Optional[str] = None
|
|
63
|
+
error: Optional[str] = None
|
|
64
|
+
|
|
65
|
+
# Parent event for nesting
|
|
66
|
+
parent_id: Optional[str] = None
|
|
67
|
+
|
|
68
|
+
def __repr__(self) -> str:
|
|
69
|
+
parts = [f"{self.event_type.value}({self.name!r}"]
|
|
70
|
+
if self.latency_ms is not None:
|
|
71
|
+
parts.append(f", latency={self.latency_ms:.0f}ms")
|
|
72
|
+
if self.token_count is not None:
|
|
73
|
+
parts.append(f", tokens={self.token_count}")
|
|
74
|
+
if self.error:
|
|
75
|
+
parts.append(f", error={self.error!r}")
|
|
76
|
+
return "".join(parts) + ")"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class Anomaly:
|
|
81
|
+
"""A detected anomaly in agent behavior."""
|
|
82
|
+
anomaly_type: AnomalyType
|
|
83
|
+
severity: Severity
|
|
84
|
+
message: str
|
|
85
|
+
timestamp: float = field(default_factory=time.time)
|
|
86
|
+
|
|
87
|
+
# What triggered it
|
|
88
|
+
event: Optional[AgentEvent] = None
|
|
89
|
+
detector_name: str = ""
|
|
90
|
+
|
|
91
|
+
# Statistical context
|
|
92
|
+
observed_value: Optional[float] = None
|
|
93
|
+
expected_range: Optional[tuple[float, float]] = None
|
|
94
|
+
z_score: Optional[float] = None
|
|
95
|
+
|
|
96
|
+
# The raw data window that informed the detection
|
|
97
|
+
context: Optional[dict[str, Any]] = None
|
|
98
|
+
|
|
99
|
+
def __repr__(self) -> str:
|
|
100
|
+
sev = self.severity.value.upper()
|
|
101
|
+
return f"[{sev}] {self.anomaly_type.value}: {self.message}"
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: drift-detection
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Statistical anomaly detection for AI agent workflows
|
|
5
|
+
Project-URL: Homepage, https://github.com/dombinic/Drift
|
|
6
|
+
Project-URL: Repository, https://github.com/dombinic/Drift
|
|
7
|
+
Project-URL: Issues, https://github.com/dombinic/Drift/issues
|
|
8
|
+
Author: Dominic
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agents,ai,anomaly-detection,langchain,llm,monitoring,observability
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Requires-Dist: numpy>=1.24.0
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: langchain-core>=0.1.0; extra == 'all'
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
30
|
+
Provides-Extra: langchain
|
|
31
|
+
Requires-Dist: langchain-core>=0.1.0; extra == 'langchain'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# Drift
|
|
35
|
+
|
|
36
|
+
**Statistical anomaly detection for AI agent workflows.**
|
|
37
|
+
|
|
38
|
+
Catch silent failures, hallucination drift, and off-script behavior before they corrupt your data.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
Your AI agents are failing silently. A tool call takes 10x longer than usual. The agent calls `delete_file` when it's never done that before. Output quality gradually degrades over hundreds of runs. Traditional monitoring tools weren't built for non-deterministic systems — Drift is.
|
|
43
|
+
|
|
44
|
+
## What it does
|
|
45
|
+
|
|
46
|
+
Drift hooks into your agent's execution and applies statistical anomaly detection to the event stream:
|
|
47
|
+
|
|
48
|
+
- **Latency & token SPC** — Flags when a tool call or LLM response takes significantly longer or uses significantly more tokens than its rolling baseline. Catches hung API calls, runaway generation, and upstream provider issues.
|
|
49
|
+
|
|
50
|
+
- **Sequence anomaly detection** — Builds a transition matrix of tool-call sequences and flags when the agent takes a path that's never or rarely been seen. Catches agents going off-script, skipping required steps, or entering novel execution paths.
|
|
51
|
+
|
|
52
|
+
- **Output drift detection** — Tracks output length, vocabulary diversity, and structural patterns over time. Flags when outputs shift significantly from baseline. Catches hallucination drift, prompt injection effects, and gradual quality degradation.
|
|
53
|
+
|
|
54
|
+
## Install
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install driftguard
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
With LangChain support:
|
|
61
|
+
```bash
|
|
62
|
+
pip install driftguard[langchain]
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Quickstart
|
|
66
|
+
|
|
67
|
+
### With LangChain
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from drift import DriftGuard
|
|
71
|
+
from drift.callbacks.langchain import DriftCallbackHandler
|
|
72
|
+
|
|
73
|
+
guard = DriftGuard(on_anomaly=lambda a: print(f"🚨 {a}"))
|
|
74
|
+
handler = DriftCallbackHandler(guard)
|
|
75
|
+
|
|
76
|
+
# Use with any LangChain agent, chain, or LLM
|
|
77
|
+
agent.run("your query", callbacks=[handler])
|
|
78
|
+
|
|
79
|
+
# See what happened
|
|
80
|
+
guard.report()
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Standalone (no framework required)
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from drift import DriftGuard, AgentEvent, EventType
|
|
87
|
+
|
|
88
|
+
guard = DriftGuard(on_anomaly=lambda a: print(f"🚨 {a}"))
|
|
89
|
+
|
|
90
|
+
# Feed events from any source
|
|
91
|
+
guard.ingest(AgentEvent(
|
|
92
|
+
event_type=EventType.TOOL_END,
|
|
93
|
+
name="search_web",
|
|
94
|
+
latency_ms=150.0,
|
|
95
|
+
token_count=85,
|
|
96
|
+
output_text="Found 3 results for query...",
|
|
97
|
+
))
|
|
98
|
+
|
|
99
|
+
guard.report()
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Run the demo
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
python examples/demo.py
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
This simulates normal agent operation, builds baselines, then injects latency spikes, sequence anomalies, and output drift — showing each detector catching real failure modes.
|
|
109
|
+
|
|
110
|
+
## Architecture
|
|
111
|
+
|
|
112
|
+
```
|
|
113
|
+
drift/
|
|
114
|
+
├── core.py # DriftGuard engine — orchestrates detectors
|
|
115
|
+
├── models.py # AgentEvent, Anomaly, Severity data models
|
|
116
|
+
├── detectors/
|
|
117
|
+
│ ├── latency.py # Statistical process control on latency/tokens
|
|
118
|
+
│ ├── sequence.py # Action transition probability anomalies
|
|
119
|
+
│ └── output_drift.py # Output distribution shift detection
|
|
120
|
+
└── callbacks/
|
|
121
|
+
└── langchain.py # LangChain callback integration
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Design principles:**
|
|
125
|
+
|
|
126
|
+
1. **Zero-overhead default** — Detectors use numpy for fast rolling statistics. No embedding models, no external services, no network calls.
|
|
127
|
+
2. **Per-tool baselines** — Each tool and model gets its own statistical baseline, so a slow tool won't pollute the baseline for a fast one.
|
|
128
|
+
3. **Framework-agnostic core** — The detection engine works with raw `AgentEvent` objects. Framework integrations (LangChain, CrewAI, etc.) are thin adapters that translate framework callbacks into events.
|
|
129
|
+
4. **Non-blocking** — Drift never throws exceptions that would crash your agent. Detector errors are caught and logged to stderr.
|
|
130
|
+
|
|
131
|
+
## Detectors
|
|
132
|
+
|
|
133
|
+
| Detector | What it catches | Method |
|
|
134
|
+
|----------|----------------|--------|
|
|
135
|
+
| `LatencyDetector` | Hung calls, slow APIs, runaway generation | Rolling z-score on latency and token counts |
|
|
136
|
+
| `SequenceDetector` | Off-script behavior, unexpected tool calls | First-order Markov transition probabilities |
|
|
137
|
+
| `OutputDriftDetector` | Hallucination drift, prompt injection, quality degradation | Output length, vocab diversity, structural pattern tracking |
|
|
138
|
+
|
|
139
|
+
## Configuration
|
|
140
|
+
|
|
141
|
+
Each detector is independently configurable:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from drift import DriftGuard
|
|
145
|
+
from drift.detectors.latency import LatencyDetector, LatencyDetectorConfig
|
|
146
|
+
from drift.detectors.sequence import SequenceDetector, SequenceDetectorConfig
|
|
147
|
+
|
|
148
|
+
guard = DriftGuard(detectors=[
|
|
149
|
+
LatencyDetector(LatencyDetectorConfig(
|
|
150
|
+
window_size=100, # Longer baseline window
|
|
151
|
+
z_threshold=2.5, # More sensitive
|
|
152
|
+
min_samples=10, # Require more data before alerting
|
|
153
|
+
)),
|
|
154
|
+
SequenceDetector(SequenceDetectorConfig(
|
|
155
|
+
min_observations=20, # Require more transitions before flagging
|
|
156
|
+
)),
|
|
157
|
+
])
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## Roadmap
|
|
161
|
+
|
|
162
|
+
- [ ] CrewAI callback handler
|
|
163
|
+
- [ ] OpenAI Agents SDK integration
|
|
164
|
+
- [ ] Slack / PagerDuty alerting
|
|
165
|
+
- [ ] Persistent baselines (save/load detector state)
|
|
166
|
+
- [ ] Embedding-based output drift (optional dependency)
|
|
167
|
+
- [ ] Web dashboard
|
|
168
|
+
- [ ] Cost anomaly detection (track spend per run)
|
|
169
|
+
|
|
170
|
+
## Contributing
|
|
171
|
+
|
|
172
|
+
Issues and PRs welcome. Run tests with:
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
pip install -e ".[dev]"
|
|
176
|
+
pytest
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## License
|
|
180
|
+
|
|
181
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
drift/__init__.py,sha256=Yyh-NYOmurrocCTKgQe0kcTptJ6J_At38yQvKr7L7ws,689
|
|
2
|
+
drift/core.py,sha256=F9H6LXOiU5luUhGVnK7LPhcwDWe3FLxlB-HTfocV1ms,6250
|
|
3
|
+
drift/models.py,sha256=J8xpHWDxW8Qw6TrrP9WFsOt4y_2RiLhXTcIetOxVIYo,2820
|
|
4
|
+
drift/callbacks/__init__.py,sha256=5qGB2nKy3iZXKMf5yDhit0TOwaq_HgLGjPJrDaL3Mts,49
|
|
5
|
+
drift/callbacks/langchain.py,sha256=e-VnXf9_3LaJFDtDb-iF5SbME9hi1U52P4pggAWpYXU,7212
|
|
6
|
+
drift/detectors/__init__.py,sha256=Dpmtfg911Z2o-CJ2O8KTBgZPukI6dgf9IKA-iYOJQds,805
|
|
7
|
+
drift/detectors/latency.py,sha256=JgZx-oDZ4KIMUggPTVeOn2nwK5MxbdEZapWQ2BUry6k,5047
|
|
8
|
+
drift/detectors/output_drift.py,sha256=uzaHoRNYubQOHr5N5Rb7ETvIgUOEm3GUiYzZ8NY26iw,7461
|
|
9
|
+
drift/detectors/sequence.py,sha256=0CuBQEX4DPR4sCet4RFGRw3p_y9Me_38r_fPRSALhk4,6112
|
|
10
|
+
drift_detection-0.1.0.dist-info/METADATA,sha256=3Fc8FXTWEClYk1lTiUOw_UXC7gPqgqMeBju5L0BZsrA,6488
|
|
11
|
+
drift_detection-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
12
|
+
drift_detection-0.1.0.dist-info/licenses/LICENSE,sha256=ZbH4VA0-TPt-OXg5a8JPLpKNj6zzckRf4sZx-rxomjE,1065
|
|
13
|
+
drift_detection-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 dombinic
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|