driftvane 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- driftvane/__init__.py +25 -0
- driftvane/detector.py +42 -0
- driftvane/detectors/__init__.py +6 -0
- driftvane/detectors/embedding.py +112 -0
- driftvane/detectors/latency.py +90 -0
- driftvane/detectors/response.py +128 -0
- driftvane/detectors/retrieval.py +113 -0
- driftvane/report.py +99 -0
- driftvane-0.1.0.dist-info/METADATA +135 -0
- driftvane-0.1.0.dist-info/RECORD +12 -0
- driftvane-0.1.0.dist-info/WHEEL +4 -0
- driftvane-0.1.0.dist-info/licenses/LICENSE +21 -0
driftvane/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""driftvane — compose drift detectors for RAG and agent systems.
|
|
2
|
+
|
|
3
|
+
A small library that lets you wire up multiple drift signals (embedding,
|
|
4
|
+
retrieval, response, latency) into one DriftReport. No server, no UI.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from driftvane.detector import DriftAlert, DriftSignal
|
|
8
|
+
from driftvane.detectors.embedding import EmbeddingDrift
|
|
9
|
+
from driftvane.detectors.latency import LatencyDrift
|
|
10
|
+
from driftvane.detectors.response import ResponseDrift
|
|
11
|
+
from driftvane.detectors.retrieval import RetrievalDrift
|
|
12
|
+
from driftvane.report import DriftReport
|
|
13
|
+
|
|
14
|
+
__version__ = "0.1.0"
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"DriftAlert",
|
|
18
|
+
"DriftReport",
|
|
19
|
+
"DriftSignal",
|
|
20
|
+
"EmbeddingDrift",
|
|
21
|
+
"LatencyDrift",
|
|
22
|
+
"ResponseDrift",
|
|
23
|
+
"RetrievalDrift",
|
|
24
|
+
"__version__",
|
|
25
|
+
]
|
driftvane/detector.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Core types: DriftSignal, DriftAlert."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class DriftSignal:
|
|
11
|
+
"""One detector's verdict.
|
|
12
|
+
|
|
13
|
+
name: stable identifier, e.g. "embedding_mmd", "retrieval_jaccard_at_10"
|
|
14
|
+
value: the raw statistic
|
|
15
|
+
threshold: the configured threshold; None means "report only, don't flag"
|
|
16
|
+
drifted: True when value exceeds threshold
|
|
17
|
+
metadata: detector-specific extras (sample sizes, kernel sigma, etc.)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
name: str
|
|
21
|
+
value: float
|
|
22
|
+
threshold: float | None = None
|
|
23
|
+
drifted: bool = False
|
|
24
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
25
|
+
|
|
26
|
+
def to_dict(self) -> dict[str, Any]:
|
|
27
|
+
return {
|
|
28
|
+
"name": self.name,
|
|
29
|
+
"value": self.value,
|
|
30
|
+
"threshold": self.threshold,
|
|
31
|
+
"drifted": self.drifted,
|
|
32
|
+
"metadata": self.metadata,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DriftAlert(Exception):
|
|
37
|
+
"""Raised by DriftReport.alert_if when a threshold is breached."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, breaches: list[DriftSignal]):
|
|
40
|
+
self.breaches = breaches
|
|
41
|
+
names = ", ".join(f"{s.name}={s.value:.4f}>{s.threshold}" for s in breaches)
|
|
42
|
+
super().__init__(f"drift detected: {names}")
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from driftvane.detectors.embedding import EmbeddingDrift
|
|
2
|
+
from driftvane.detectors.latency import LatencyDrift
|
|
3
|
+
from driftvane.detectors.response import ResponseDrift
|
|
4
|
+
from driftvane.detectors.retrieval import RetrievalDrift
|
|
5
|
+
|
|
6
|
+
__all__ = ["EmbeddingDrift", "LatencyDrift", "ResponseDrift", "RetrievalDrift"]
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""EmbeddingDrift — Maximum Mean Discrepancy with RBF kernel.
|
|
2
|
+
|
|
3
|
+
MMD is a kernel two-sample test. It tests whether two batches of embeddings
|
|
4
|
+
were drawn from the same distribution. MMD^2 is zero when the distributions
|
|
5
|
+
match and grows with the distance between them.
|
|
6
|
+
|
|
7
|
+
We compute the squared MMD with the RBF (Gaussian) kernel:
|
|
8
|
+
k(x, y) = exp(-||x - y||^2 / (2 * sigma^2))
|
|
9
|
+
MMD^2 = E[k(X, X')] + E[k(Y, Y')] - 2 E[k(X, Y)]
|
|
10
|
+
|
|
11
|
+
When sigma is None we use the median heuristic on the merged sample, which
|
|
12
|
+
is the standard default and removes the main hyperparameter footgun.
|
|
13
|
+
|
|
14
|
+
Cost is O(n^2) memory and time, so call this with batches up to a few
|
|
15
|
+
thousand vectors. For larger sets, subsample first.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
|
|
22
|
+
from driftvane.detector import DriftSignal
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _pairwise_sq_dists(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
|
26
|
+
"""Squared Euclidean distance matrix, shape (len(a), len(b))."""
|
|
27
|
+
a2 = np.sum(a * a, axis=1)[:, None]
|
|
28
|
+
b2 = np.sum(b * b, axis=1)[None, :]
|
|
29
|
+
return np.maximum(a2 + b2 - 2.0 * a @ b.T, 0.0)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _median_heuristic_sigma(x: np.ndarray, y: np.ndarray) -> float:
|
|
33
|
+
"""Median pairwise distance on the merged sample. Robust default for sigma."""
|
|
34
|
+
z = np.concatenate([x, y], axis=0)
|
|
35
|
+
# subsample to keep this cheap on big inputs
|
|
36
|
+
if len(z) > 1000:
|
|
37
|
+
rng = np.random.default_rng(0)
|
|
38
|
+
idx = rng.choice(len(z), size=1000, replace=False)
|
|
39
|
+
z = z[idx]
|
|
40
|
+
d2 = _pairwise_sq_dists(z, z)
|
|
41
|
+
iu = np.triu_indices_from(d2, k=1)
|
|
42
|
+
median_sq = float(np.median(d2[iu]))
|
|
43
|
+
# sigma is the bandwidth, not sigma^2; floor to avoid div-by-zero
|
|
44
|
+
return max(np.sqrt(median_sq / 2.0), 1e-8)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def mmd_rbf(x: np.ndarray, y: np.ndarray, sigma: float | None = None) -> tuple[float, float]:
|
|
48
|
+
"""Compute MMD^2 between two batches with RBF kernel.
|
|
49
|
+
|
|
50
|
+
Returns (mmd_squared, sigma_used).
|
|
51
|
+
"""
|
|
52
|
+
if sigma is None:
|
|
53
|
+
sigma = _median_heuristic_sigma(x, y)
|
|
54
|
+
gamma = 1.0 / (2.0 * sigma * sigma)
|
|
55
|
+
|
|
56
|
+
kxx = np.exp(-gamma * _pairwise_sq_dists(x, x))
|
|
57
|
+
kyy = np.exp(-gamma * _pairwise_sq_dists(y, y))
|
|
58
|
+
kxy = np.exp(-gamma * _pairwise_sq_dists(x, y))
|
|
59
|
+
|
|
60
|
+
mmd2 = float(kxx.mean() + kyy.mean() - 2.0 * kxy.mean())
|
|
61
|
+
# numerical noise can push the value slightly negative; clamp at 0
|
|
62
|
+
return max(mmd2, 0.0), sigma
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class EmbeddingDrift:
|
|
66
|
+
"""Detect distribution shift between two batches of embedding vectors.
|
|
67
|
+
|
|
68
|
+
ed = EmbeddingDrift(threshold=0.1)
|
|
69
|
+
signal = ed.compute(reference=ref_emb, current=cur_emb)
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(
|
|
73
|
+
self,
|
|
74
|
+
method: str = "mmd",
|
|
75
|
+
sigma: float | None = None,
|
|
76
|
+
threshold: float | None = None,
|
|
77
|
+
name: str = "embedding_mmd",
|
|
78
|
+
) -> None:
|
|
79
|
+
if method != "mmd":
|
|
80
|
+
raise ValueError(f"unknown method: {method!r}; only 'mmd' is supported")
|
|
81
|
+
self.method = method
|
|
82
|
+
self.sigma = sigma
|
|
83
|
+
self.threshold = threshold
|
|
84
|
+
self.name = name
|
|
85
|
+
|
|
86
|
+
def compute(self, reference: np.ndarray, current: np.ndarray) -> DriftSignal:
|
|
87
|
+
ref = np.asarray(reference, dtype=np.float64)
|
|
88
|
+
cur = np.asarray(current, dtype=np.float64)
|
|
89
|
+
if ref.ndim != 2 or cur.ndim != 2:
|
|
90
|
+
raise ValueError("reference and current must be 2-D (n_samples, n_dims)")
|
|
91
|
+
if ref.shape[1] != cur.shape[1]:
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f"dim mismatch: reference has {ref.shape[1]}, current has {cur.shape[1]}"
|
|
94
|
+
)
|
|
95
|
+
if len(ref) < 2 or len(cur) < 2:
|
|
96
|
+
raise ValueError("need at least 2 samples in each set")
|
|
97
|
+
|
|
98
|
+
value, sigma_used = mmd_rbf(ref, cur, sigma=self.sigma)
|
|
99
|
+
drifted = self.threshold is not None and value > self.threshold
|
|
100
|
+
return DriftSignal(
|
|
101
|
+
name=self.name,
|
|
102
|
+
value=value,
|
|
103
|
+
threshold=self.threshold,
|
|
104
|
+
drifted=drifted,
|
|
105
|
+
metadata={
|
|
106
|
+
"n_ref": int(ref.shape[0]),
|
|
107
|
+
"n_cur": int(cur.shape[0]),
|
|
108
|
+
"dim": int(ref.shape[1]),
|
|
109
|
+
"sigma": float(sigma_used),
|
|
110
|
+
"method": self.method,
|
|
111
|
+
},
|
|
112
|
+
)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""LatencyDrift — Kolmogorov-Smirnov two-sample test on latency arrays.
|
|
2
|
+
|
|
3
|
+
KS compares the empirical CDFs of two samples. The statistic is the maximum
|
|
4
|
+
absolute difference between the CDFs and is bounded in [0, 1]. It is robust
|
|
5
|
+
to scale and doesn't assume any particular distribution, which matches how
|
|
6
|
+
real LLM latency tails behave.
|
|
7
|
+
|
|
8
|
+
We compute the KS statistic from sorted arrays without scipy so the install
|
|
9
|
+
stays light. For an approximate p-value we use the standard asymptotic form
|
|
10
|
+
sqrt(-0.5 * ln(alpha/2) * (n1+n2)/(n1*n2)).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import math
|
|
16
|
+
from collections.abc import Sequence
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
from driftvane.detector import DriftSignal
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def ks_2samp(x: Sequence[float], y: Sequence[float]) -> tuple[float, float]:
|
|
24
|
+
"""Return (D, approx_p_value). Numpy-only two-sample KS."""
|
|
25
|
+
a = np.sort(np.asarray(x, dtype=np.float64))
|
|
26
|
+
b = np.sort(np.asarray(y, dtype=np.float64))
|
|
27
|
+
n1, n2 = len(a), len(b)
|
|
28
|
+
if n1 == 0 or n2 == 0:
|
|
29
|
+
raise ValueError("both arrays must be non-empty")
|
|
30
|
+
all_v = np.concatenate([a, b])
|
|
31
|
+
cdf_a = np.searchsorted(a, all_v, side="right") / n1
|
|
32
|
+
cdf_b = np.searchsorted(b, all_v, side="right") / n2
|
|
33
|
+
d = float(np.max(np.abs(cdf_a - cdf_b)))
|
|
34
|
+
|
|
35
|
+
if d == 0.0:
|
|
36
|
+
# asymptotic series degenerates at d=0; the null is trivially consistent
|
|
37
|
+
return 0.0, 1.0
|
|
38
|
+
|
|
39
|
+
en = math.sqrt(n1 * n2 / (n1 + n2))
|
|
40
|
+
# asymptotic two-sided p-value (Smirnov)
|
|
41
|
+
lam = (en + 0.12 + 0.11 / en) * d
|
|
42
|
+
p = 2.0 * sum(((-1) ** (k - 1)) * math.exp(-2.0 * lam * lam * k * k) for k in range(1, 101))
|
|
43
|
+
p = max(0.0, min(1.0, p))
|
|
44
|
+
return d, p
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class LatencyDrift:
|
|
48
|
+
"""Detect distribution shift in latency (or any 1-D numeric array).
|
|
49
|
+
|
|
50
|
+
ld = LatencyDrift(threshold=0.2) # threshold on KS statistic
|
|
51
|
+
signal = ld.compute(reference=ref_lat, current=cur_lat)
|
|
52
|
+
|
|
53
|
+
Or threshold on p-value:
|
|
54
|
+
|
|
55
|
+
ld = LatencyDrift(p_threshold=0.01)
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
threshold: float | None = None,
|
|
61
|
+
p_threshold: float | None = None,
|
|
62
|
+
name: str = "latency_ks",
|
|
63
|
+
) -> None:
|
|
64
|
+
if threshold is not None and p_threshold is not None:
|
|
65
|
+
raise ValueError("set either threshold or p_threshold, not both")
|
|
66
|
+
self.threshold = threshold
|
|
67
|
+
self.p_threshold = p_threshold
|
|
68
|
+
self.name = name
|
|
69
|
+
|
|
70
|
+
def compute(self, reference: Sequence[float], current: Sequence[float]) -> DriftSignal:
|
|
71
|
+
d, p = ks_2samp(reference, current)
|
|
72
|
+
if self.p_threshold is not None:
|
|
73
|
+
drifted = p < self.p_threshold
|
|
74
|
+
else:
|
|
75
|
+
drifted = self.threshold is not None and d > self.threshold
|
|
76
|
+
|
|
77
|
+
return DriftSignal(
|
|
78
|
+
name=self.name,
|
|
79
|
+
value=d,
|
|
80
|
+
threshold=self.threshold,
|
|
81
|
+
drifted=drifted,
|
|
82
|
+
metadata={
|
|
83
|
+
"n_ref": len(reference),
|
|
84
|
+
"n_cur": len(current),
|
|
85
|
+
"ks_p_value": p,
|
|
86
|
+
"p_threshold": self.p_threshold,
|
|
87
|
+
"median_ref": float(np.median(reference)),
|
|
88
|
+
"median_cur": float(np.median(current)),
|
|
89
|
+
},
|
|
90
|
+
)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""ResponseDrift — answer-vs-context grounding drift across batches.
|
|
2
|
+
|
|
3
|
+
For each (intent, context, answer) triple, compute Jaccard overlap of token
|
|
4
|
+
sets between the answer and the context. Then compare the *distribution* of
|
|
5
|
+
those scores between reference and current batches.
|
|
6
|
+
|
|
7
|
+
The drift value is the absolute difference of the mean grounding scores. A
|
|
8
|
+
shrinking mean answer-to-context overlap is the signal you want to catch:
|
|
9
|
+
the model is wandering off the retrieved context.
|
|
10
|
+
|
|
11
|
+
If `context-drift-detector-py` is installed we delegate per-triple scoring
|
|
12
|
+
to it for compatibility with that library's signal definitions; otherwise
|
|
13
|
+
we use the inline tokenizer below. Either way, the aggregation is ours.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import re
|
|
19
|
+
from collections.abc import Iterable
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
|
|
22
|
+
from driftvane.detector import DriftSignal
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class Triple:
|
|
27
|
+
intent: str
|
|
28
|
+
context: str | list[str]
|
|
29
|
+
answer: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_WORD_RE = re.compile(r"[a-z0-9]+")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _tokens(text: str) -> set[str]:
|
|
36
|
+
return set(_WORD_RE.findall(text.lower()))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _flatten_context(ctx: str | Iterable[str]) -> str:
|
|
40
|
+
if isinstance(ctx, str):
|
|
41
|
+
return ctx
|
|
42
|
+
return " ".join(ctx)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _grounding_score(triple: Triple) -> float:
|
|
46
|
+
"""answer ∩ context / answer (recall-style; 1.0 = fully grounded)."""
|
|
47
|
+
ans = _tokens(triple.answer)
|
|
48
|
+
if not ans:
|
|
49
|
+
return 1.0
|
|
50
|
+
ctx = _tokens(_flatten_context(triple.context))
|
|
51
|
+
return len(ans & ctx) / len(ans)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _try_load_external_scorer():
|
|
55
|
+
try:
|
|
56
|
+
from context_drift_detector import detect # type: ignore
|
|
57
|
+
except ImportError:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
def _score(triple: Triple) -> float:
|
|
61
|
+
ctx = triple.context if isinstance(triple.context, list) else [triple.context]
|
|
62
|
+
result = detect(triple.intent, ctx, triple.answer)
|
|
63
|
+
# context-drift-detector-py exposes signals dict with answer_to_context
|
|
64
|
+
return float(result.signals.get("answer_to_context", _grounding_score(triple)))
|
|
65
|
+
|
|
66
|
+
return _score
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ResponseDrift:
|
|
70
|
+
"""Detect drift in how well answers stay grounded in retrieved context.
|
|
71
|
+
|
|
72
|
+
rsp = ResponseDrift(threshold=0.15)
|
|
73
|
+
signal = rsp.compute(
|
|
74
|
+
reference=[Triple("...", "...", "..."), ...],
|
|
75
|
+
current=[Triple("...", "...", "..."), ...],
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
Pass `use_external=False` to force the inline tokenizer even when
|
|
79
|
+
context-drift-detector-py is installed.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
threshold: float | None = None,
|
|
85
|
+
name: str = "response_grounding_shift",
|
|
86
|
+
use_external: bool = True,
|
|
87
|
+
) -> None:
|
|
88
|
+
self.threshold = threshold
|
|
89
|
+
self.name = name
|
|
90
|
+
self.use_external = use_external
|
|
91
|
+
self._scorer = _try_load_external_scorer() if use_external else None
|
|
92
|
+
|
|
93
|
+
def compute(
|
|
94
|
+
self,
|
|
95
|
+
reference: Iterable[Triple | dict],
|
|
96
|
+
current: Iterable[Triple | dict],
|
|
97
|
+
) -> DriftSignal:
|
|
98
|
+
ref = [t if isinstance(t, Triple) else Triple(**t) for t in reference]
|
|
99
|
+
cur = [t if isinstance(t, Triple) else Triple(**t) for t in current]
|
|
100
|
+
if not ref or not cur:
|
|
101
|
+
raise ValueError("need at least 1 triple in each batch")
|
|
102
|
+
|
|
103
|
+
score = self._scorer or _grounding_score
|
|
104
|
+
ref_scores = [score(t) for t in ref]
|
|
105
|
+
cur_scores = [score(t) for t in cur]
|
|
106
|
+
|
|
107
|
+
mean_ref = sum(ref_scores) / len(ref_scores)
|
|
108
|
+
mean_cur = sum(cur_scores) / len(cur_scores)
|
|
109
|
+
# we care about *worsening* grounding, so use signed shift but report
|
|
110
|
+
# absolute as the drift value
|
|
111
|
+
signed_shift = mean_cur - mean_ref
|
|
112
|
+
drift_value = abs(signed_shift)
|
|
113
|
+
drifted = self.threshold is not None and drift_value > self.threshold
|
|
114
|
+
|
|
115
|
+
return DriftSignal(
|
|
116
|
+
name=self.name,
|
|
117
|
+
value=drift_value,
|
|
118
|
+
threshold=self.threshold,
|
|
119
|
+
drifted=drifted,
|
|
120
|
+
metadata={
|
|
121
|
+
"n_ref": len(ref),
|
|
122
|
+
"n_cur": len(cur),
|
|
123
|
+
"mean_ref_grounding": mean_ref,
|
|
124
|
+
"mean_cur_grounding": mean_cur,
|
|
125
|
+
"signed_shift": signed_shift,
|
|
126
|
+
"scorer": "external" if self._scorer else "inline_jaccard",
|
|
127
|
+
},
|
|
128
|
+
)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""RetrievalDrift — measure shift in retriever output for the same queries.
|
|
2
|
+
|
|
3
|
+
Inputs are paired top-k document-id lists: for each query, the reference
|
|
4
|
+
retriever produced one ranked list and the current retriever produced another.
|
|
5
|
+
Drift = how much the top-k sets and rank order have moved.
|
|
6
|
+
|
|
7
|
+
Two metrics:
|
|
8
|
+
* mean_jaccard_at_k: average Jaccard overlap of the top-k sets (1.0 = identical)
|
|
9
|
+
* mean_rbo: rank-biased overlap, weights early positions more (1.0 = identical)
|
|
10
|
+
|
|
11
|
+
The reported drift value is 1 - mean_jaccard_at_k so that "more drift = larger
|
|
12
|
+
value" matches the convention in the other detectors.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from collections.abc import Sequence
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from driftvane.detector import DriftSignal
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _jaccard(a: set[Any], b: set[Any]) -> float:
|
|
24
|
+
if not a and not b:
|
|
25
|
+
return 1.0
|
|
26
|
+
return len(a & b) / len(a | b)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _rbo(ref: Sequence[Any], cur: Sequence[Any], p: float = 0.9) -> float:
|
|
30
|
+
"""Rank-biased overlap. Weighted overlap of the two prefix sets at each depth.
|
|
31
|
+
|
|
32
|
+
p controls how top-heavy the weighting is; p=0.9 puts ~86% of weight on the
|
|
33
|
+
top 10. See Webber, Moffat, Zobel 2010.
|
|
34
|
+
"""
|
|
35
|
+
depth = max(len(ref), len(cur))
|
|
36
|
+
if depth == 0:
|
|
37
|
+
return 1.0
|
|
38
|
+
seen_ref: set[Any] = set()
|
|
39
|
+
seen_cur: set[Any] = set()
|
|
40
|
+
weighted_sum = 0.0
|
|
41
|
+
weight_total = 0.0
|
|
42
|
+
for i in range(depth):
|
|
43
|
+
if i < len(ref):
|
|
44
|
+
seen_ref.add(ref[i])
|
|
45
|
+
if i < len(cur):
|
|
46
|
+
seen_cur.add(cur[i])
|
|
47
|
+
agreement = len(seen_ref & seen_cur) / (i + 1)
|
|
48
|
+
w = p**i
|
|
49
|
+
weighted_sum += agreement * w
|
|
50
|
+
weight_total += w
|
|
51
|
+
return weighted_sum / weight_total if weight_total > 0 else 1.0
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class RetrievalDrift:
|
|
55
|
+
"""Detect retrieval drift across paired query→top-k results.
|
|
56
|
+
|
|
57
|
+
rd = RetrievalDrift(k=10, threshold=0.3)
|
|
58
|
+
signal = rd.compute(
|
|
59
|
+
reference=[["doc_1", "doc_2", ...], ...],
|
|
60
|
+
current=[["doc_1", "doc_3", ...], ...],
|
|
61
|
+
)
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
k: int = 10,
|
|
67
|
+
threshold: float | None = None,
|
|
68
|
+
name: str | None = None,
|
|
69
|
+
) -> None:
|
|
70
|
+
if k < 1:
|
|
71
|
+
raise ValueError("k must be >= 1")
|
|
72
|
+
self.k = k
|
|
73
|
+
self.threshold = threshold
|
|
74
|
+
self.name = name or f"retrieval_jaccard_at_{k}"
|
|
75
|
+
|
|
76
|
+
def compute(
|
|
77
|
+
self,
|
|
78
|
+
reference: Sequence[Sequence[Any]],
|
|
79
|
+
current: Sequence[Sequence[Any]],
|
|
80
|
+
) -> DriftSignal:
|
|
81
|
+
if len(reference) != len(current):
|
|
82
|
+
raise ValueError(
|
|
83
|
+
f"reference and current must have the same number of queries; "
|
|
84
|
+
f"got {len(reference)} vs {len(current)}"
|
|
85
|
+
)
|
|
86
|
+
if not reference:
|
|
87
|
+
raise ValueError("need at least 1 query")
|
|
88
|
+
|
|
89
|
+
jaccards: list[float] = []
|
|
90
|
+
rbos: list[float] = []
|
|
91
|
+
for ref_list, cur_list in zip(reference, current, strict=True):
|
|
92
|
+
ref_top = list(ref_list[: self.k])
|
|
93
|
+
cur_top = list(cur_list[: self.k])
|
|
94
|
+
jaccards.append(_jaccard(set(ref_top), set(cur_top)))
|
|
95
|
+
rbos.append(_rbo(ref_top, cur_top))
|
|
96
|
+
|
|
97
|
+
mean_jaccard = sum(jaccards) / len(jaccards)
|
|
98
|
+
mean_rbo = sum(rbos) / len(rbos)
|
|
99
|
+
drift_value = 1.0 - mean_jaccard
|
|
100
|
+
drifted = self.threshold is not None and drift_value > self.threshold
|
|
101
|
+
|
|
102
|
+
return DriftSignal(
|
|
103
|
+
name=self.name,
|
|
104
|
+
value=drift_value,
|
|
105
|
+
threshold=self.threshold,
|
|
106
|
+
drifted=drifted,
|
|
107
|
+
metadata={
|
|
108
|
+
"n_queries": len(reference),
|
|
109
|
+
"k": self.k,
|
|
110
|
+
"mean_jaccard_at_k": mean_jaccard,
|
|
111
|
+
"mean_rbo": mean_rbo,
|
|
112
|
+
},
|
|
113
|
+
)
|
driftvane/report.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""DriftReport — collect signals from multiple detectors."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from driftvane.detector import DriftAlert, DriftSignal
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DriftReport:
|
|
11
|
+
"""A bag of DriftSignals with output helpers.
|
|
12
|
+
|
|
13
|
+
Build it incrementally:
|
|
14
|
+
|
|
15
|
+
report = DriftReport()
|
|
16
|
+
report.add(EmbeddingDrift().compute(ref_emb, cur_emb))
|
|
17
|
+
report.add(LatencyDrift().compute(ref_lat, cur_lat))
|
|
18
|
+
|
|
19
|
+
Or in one shot:
|
|
20
|
+
|
|
21
|
+
report = DriftReport.from_signals([
|
|
22
|
+
EmbeddingDrift().compute(ref_emb, cur_emb),
|
|
23
|
+
LatencyDrift().compute(ref_lat, cur_lat),
|
|
24
|
+
])
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self) -> None:
|
|
28
|
+
self._signals: list[DriftSignal] = []
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def from_signals(cls, signals: list[DriftSignal]) -> DriftReport:
|
|
32
|
+
r = cls()
|
|
33
|
+
for s in signals:
|
|
34
|
+
r.add(s)
|
|
35
|
+
return r
|
|
36
|
+
|
|
37
|
+
def add(self, signal: DriftSignal) -> DriftReport:
|
|
38
|
+
self._signals.append(signal)
|
|
39
|
+
return self
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def signals(self) -> list[DriftSignal]:
|
|
43
|
+
return list(self._signals)
|
|
44
|
+
|
|
45
|
+
def get(self, name: str) -> DriftSignal | None:
|
|
46
|
+
for s in self._signals:
|
|
47
|
+
if s.name == name:
|
|
48
|
+
return s
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
def any_drifted(self) -> bool:
|
|
52
|
+
return any(s.drifted for s in self._signals)
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> dict[str, Any]:
|
|
55
|
+
return {
|
|
56
|
+
"signals": [s.to_dict() for s in self._signals],
|
|
57
|
+
"any_drifted": self.any_drifted(),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
def to_pandas(self):
|
|
61
|
+
# imported lazily so pandas isn't required for non-DataFrame users
|
|
62
|
+
import pandas as pd
|
|
63
|
+
|
|
64
|
+
if not self._signals:
|
|
65
|
+
return pd.DataFrame(columns=["name", "value", "threshold", "drifted"])
|
|
66
|
+
return pd.DataFrame(
|
|
67
|
+
[
|
|
68
|
+
{
|
|
69
|
+
"name": s.name,
|
|
70
|
+
"value": s.value,
|
|
71
|
+
"threshold": s.threshold,
|
|
72
|
+
"drifted": s.drifted,
|
|
73
|
+
**{f"meta_{k}": v for k, v in s.metadata.items()},
|
|
74
|
+
}
|
|
75
|
+
for s in self._signals
|
|
76
|
+
]
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def alert_if(self, thresholds: dict[str, float]) -> None:
|
|
80
|
+
"""Raise DriftAlert if any of the given signals exceeds its threshold.
|
|
81
|
+
|
|
82
|
+
Overrides the threshold each signal was computed with. Use this when the
|
|
83
|
+
report is being evaluated against a different policy than the detector
|
|
84
|
+
was constructed with (e.g. CI vs. prod).
|
|
85
|
+
"""
|
|
86
|
+
breaches = []
|
|
87
|
+
for s in self._signals:
|
|
88
|
+
if s.name in thresholds and s.value > thresholds[s.name]:
|
|
89
|
+
breaches.append(
|
|
90
|
+
DriftSignal(
|
|
91
|
+
name=s.name,
|
|
92
|
+
value=s.value,
|
|
93
|
+
threshold=thresholds[s.name],
|
|
94
|
+
drifted=True,
|
|
95
|
+
metadata=s.metadata,
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
if breaches:
|
|
99
|
+
raise DriftAlert(breaches)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: driftvane
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Compose drift detectors (embedding, retrieval, response, latency) into one report. Library-only, no server, no UI.
|
|
5
|
+
Project-URL: Homepage, https://github.com/MukundaKatta/driftvane
|
|
6
|
+
Project-URL: Issues, https://github.com/MukundaKatta/driftvane/issues
|
|
7
|
+
Project-URL: Source, https://github.com/MukundaKatta/driftvane
|
|
8
|
+
Author-email: Mukunda Rao Katta <mukunda.vjcs6@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agents,ai,drift,embedding-drift,evals,llm,mlops,monitoring,rag,retrieval-drift
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Requires-Dist: numpy>=1.24
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pandas>=2.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
31
|
+
Provides-Extra: external-response
|
|
32
|
+
Requires-Dist: context-drift-detector-py>=0.1; extra == 'external-response'
|
|
33
|
+
Provides-Extra: pandas
|
|
34
|
+
Requires-Dist: pandas>=2.0; extra == 'pandas'
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# driftvane
|
|
38
|
+
|
|
39
|
+
[](https://github.com/MukundaKatta/driftvane/actions/workflows/ci.yml)
|
|
40
|
+
[](https://pypi.org/project/driftvane/)
|
|
41
|
+
[](LICENSE)
|
|
42
|
+
|
|
43
|
+
**Compose drift detectors for RAG and agent systems.**
|
|
44
|
+
|
|
45
|
+
Most drift libraries are either tabular-only (Evidently, DataDrift) or are
|
|
46
|
+
platforms that want you to ship telemetry to their backend (Phoenix, Arize).
|
|
47
|
+
`driftvane` is a small Python library that lets you wire up multiple drift
|
|
48
|
+
signals — embedding, retrieval, response, latency — into one report. No
|
|
49
|
+
server, no UI, no telemetry. Plug it into a Lambda or Glue job, get a
|
|
50
|
+
`pandas.DataFrame` or a JSON dict back.
|
|
51
|
+
|
|
52
|
+
## Install
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install driftvane
|
|
56
|
+
# optional
|
|
57
|
+
pip install "driftvane[pandas]" # to_pandas()
|
|
58
|
+
pip install "driftvane[external-response]" # delegate response scoring to context-drift-detector-py
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Quickstart
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
import numpy as np
|
|
65
|
+
from driftvane import (
|
|
66
|
+
DriftReport,
|
|
67
|
+
EmbeddingDrift,
|
|
68
|
+
RetrievalDrift,
|
|
69
|
+
ResponseDrift,
|
|
70
|
+
LatencyDrift,
|
|
71
|
+
)
|
|
72
|
+
from driftvane.detectors.response import Triple
|
|
73
|
+
|
|
74
|
+
ref_emb = np.load("reference_query_embeddings.npy") # (n, 768)
|
|
75
|
+
cur_emb = np.load("current_query_embeddings.npy")
|
|
76
|
+
|
|
77
|
+
report = DriftReport.from_signals([
|
|
78
|
+
EmbeddingDrift(threshold=0.1).compute(ref_emb, cur_emb),
|
|
79
|
+
RetrievalDrift(k=10, threshold=0.3).compute(ref_top_k, cur_top_k),
|
|
80
|
+
ResponseDrift(threshold=0.15).compute(ref_triples, cur_triples),
|
|
81
|
+
LatencyDrift(p_threshold=0.01).compute(ref_latencies, cur_latencies),
|
|
82
|
+
])
|
|
83
|
+
|
|
84
|
+
if report.any_drifted():
|
|
85
|
+
print(report.to_pandas())
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Or fail a CI job when retrieval moves too much:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from driftvane import DriftAlert
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
report.alert_if({"retrieval_jaccard_at_10": 0.2})
|
|
95
|
+
except DriftAlert as e:
|
|
96
|
+
sys.exit(f"drift gate failed: {e}")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Detectors
|
|
100
|
+
|
|
101
|
+
| Detector | Input | Statistic | Notes |
|
|
102
|
+
|---|---|---|---|
|
|
103
|
+
| `EmbeddingDrift` | two `(n, d)` arrays | MMD with RBF kernel, median-heuristic sigma | numpy-only, O(n²) — subsample for n > a few thousand |
|
|
104
|
+
| `RetrievalDrift` | paired top-k id lists | 1 − mean Jaccard@k; reports RBO too | aligned queries required |
|
|
105
|
+
| `ResponseDrift` | `(intent, context, answer)` triples | shift in mean answer-to-context grounding | uses `context-drift-detector-py` if installed |
|
|
106
|
+
| `LatencyDrift` | two 1-D arrays of floats | Kolmogorov–Smirnov D + asymptotic p-value | scipy-free |
|
|
107
|
+
|
|
108
|
+
Each detector returns a `DriftSignal(name, value, threshold, drifted, metadata)`.
|
|
109
|
+
`DriftReport` collects them.
|
|
110
|
+
|
|
111
|
+
## What it does NOT do
|
|
112
|
+
|
|
113
|
+
- No server. No UI. No telemetry shipping.
|
|
114
|
+
- No tabular feature drift — use [DataDrift](https://github.com/MukundaKatta/DataDrift)
|
|
115
|
+
for KS/PSI on classical features.
|
|
116
|
+
- No live trace ingestion or OTel collection — point this at parquet/numpy
|
|
117
|
+
arrays you already have.
|
|
118
|
+
- No causal root-cause analysis. It tells you *that* drift is there, not why.
|
|
119
|
+
- No model retraining triggers — emit your own when `report.any_drifted()`.
|
|
120
|
+
|
|
121
|
+
## Why not Phoenix / Arize / Evidently / Ragas?
|
|
122
|
+
|
|
123
|
+
| | driftvane | Phoenix | Arize | Evidently | Ragas |
|
|
124
|
+
|---|---|---|---|---|---|
|
|
125
|
+
| Library-only (no server) | ✓ | ✗ | ✗ | partial | ✓ |
|
|
126
|
+
| RAG-shaped detectors | ✓ | ✓ | ✓ | ✗ | ✓ |
|
|
127
|
+
| Embedding MMD out of the box | ✓ | partial | ✓ | ✗ | ✗ |
|
|
128
|
+
| Retrieval rank-shift | ✓ | ✗ | partial | ✗ | ✗ |
|
|
129
|
+
| Run inside a 5s Lambda | ✓ | ✗ | ✗ | ✓ | partial |
|
|
130
|
+
| numpy-only core deps | ✓ | ✗ | ✗ | ✗ | ✗ |
|
|
131
|
+
|
|
132
|
+
## Status
|
|
133
|
+
|
|
134
|
+
v0.1 — alpha. The four detectors above work and have tests. Public API may
|
|
135
|
+
change before v1.0. Issues and PRs welcome.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
driftvane/__init__.py,sha256=ltF5Y_7F2HPqhDiwXxTk4D55_JmLfyLi-xIBbMgcJro,733
|
|
2
|
+
driftvane/detector.py,sha256=LMhpI1WzcSkP6qTC6N7kT6NsBmhM90ZP9IXfvEcEcN4,1263
|
|
3
|
+
driftvane/report.py,sha256=JqneaQM6AnoMDnfCmamX9su7SgcyJmd-5Q4S0VYxJ2A,2998
|
|
4
|
+
driftvane/detectors/__init__.py,sha256=4L-SW3VE_YAlV5rGCk8rD2W97jENM7ycFNNhuYKpLuo,303
|
|
5
|
+
driftvane/detectors/embedding.py,sha256=nsRYbXi974F9PLWPF_1PaXBgud2kZAdXl6T-HtE-HVo,4095
|
|
6
|
+
driftvane/detectors/latency.py,sha256=rQwLEMX0psy0QrWDqeqU_dKnh7LnDbrQSWHZv2E7aRU,3121
|
|
7
|
+
driftvane/detectors/response.py,sha256=0Lb81RqfdlYKLMJB7zzWDBbnLDsNdM5R2N8vGykemm8,4210
|
|
8
|
+
driftvane/detectors/retrieval.py,sha256=phd4thgF1DRJ3peI9xwgG4FtEeEIX-Kl1mQLs6xrg80,3682
|
|
9
|
+
driftvane-0.1.0.dist-info/METADATA,sha256=GP5Ho99nY-MP08CPlVFJqzxWLofE0xLhd2OUzROwiaQ,5399
|
|
10
|
+
driftvane-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
11
|
+
driftvane-0.1.0.dist-info/licenses/LICENSE,sha256=p1GujHnprYaKo-fuZc9Tpy9i711QOy8PeYBhNM0VOdw,1074
|
|
12
|
+
driftvane-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mukunda Rao Katta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|