driftvane 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
driftvane/__init__.py ADDED
@@ -0,0 +1,25 @@
1
+ """driftvane — compose drift detectors for RAG and agent systems.
2
+
3
+ A small library that lets you wire up multiple drift signals (embedding,
4
+ retrieval, response, latency) into one DriftReport. No server, no UI.
5
+ """
6
+
7
+ from driftvane.detector import DriftAlert, DriftSignal
8
+ from driftvane.detectors.embedding import EmbeddingDrift
9
+ from driftvane.detectors.latency import LatencyDrift
10
+ from driftvane.detectors.response import ResponseDrift
11
+ from driftvane.detectors.retrieval import RetrievalDrift
12
+ from driftvane.report import DriftReport
13
+
14
+ __version__ = "0.1.0"
15
+
16
+ __all__ = [
17
+ "DriftAlert",
18
+ "DriftReport",
19
+ "DriftSignal",
20
+ "EmbeddingDrift",
21
+ "LatencyDrift",
22
+ "ResponseDrift",
23
+ "RetrievalDrift",
24
+ "__version__",
25
+ ]
driftvane/detector.py ADDED
@@ -0,0 +1,42 @@
1
+ """Core types: DriftSignal, DriftAlert."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class DriftSignal:
11
+ """One detector's verdict.
12
+
13
+ name: stable identifier, e.g. "embedding_mmd", "retrieval_jaccard_at_10"
14
+ value: the raw statistic
15
+ threshold: the configured threshold; None means "report only, don't flag"
16
+ drifted: True when value exceeds threshold
17
+ metadata: detector-specific extras (sample sizes, kernel sigma, etc.)
18
+ """
19
+
20
+ name: str
21
+ value: float
22
+ threshold: float | None = None
23
+ drifted: bool = False
24
+ metadata: dict[str, Any] = field(default_factory=dict)
25
+
26
+ def to_dict(self) -> dict[str, Any]:
27
+ return {
28
+ "name": self.name,
29
+ "value": self.value,
30
+ "threshold": self.threshold,
31
+ "drifted": self.drifted,
32
+ "metadata": self.metadata,
33
+ }
34
+
35
+
36
+ class DriftAlert(Exception):
37
+ """Raised by DriftReport.alert_if when a threshold is breached."""
38
+
39
+ def __init__(self, breaches: list[DriftSignal]):
40
+ self.breaches = breaches
41
+ names = ", ".join(f"{s.name}={s.value:.4f}>{s.threshold}" for s in breaches)
42
+ super().__init__(f"drift detected: {names}")
@@ -0,0 +1,6 @@
1
+ from driftvane.detectors.embedding import EmbeddingDrift
2
+ from driftvane.detectors.latency import LatencyDrift
3
+ from driftvane.detectors.response import ResponseDrift
4
+ from driftvane.detectors.retrieval import RetrievalDrift
5
+
6
+ __all__ = ["EmbeddingDrift", "LatencyDrift", "ResponseDrift", "RetrievalDrift"]
@@ -0,0 +1,112 @@
1
+ """EmbeddingDrift — Maximum Mean Discrepancy with RBF kernel.
2
+
3
+ MMD is a kernel two-sample test. It tests whether two batches of embeddings
4
+ were drawn from the same distribution. MMD^2 is zero when the distributions
5
+ match and grows with the distance between them.
6
+
7
+ We compute the squared MMD with the RBF (Gaussian) kernel:
8
+ k(x, y) = exp(-||x - y||^2 / (2 * sigma^2))
9
+ MMD^2 = E[k(X, X')] + E[k(Y, Y')] - 2 E[k(X, Y)]
10
+
11
+ When sigma is None we use the median heuristic on the merged sample, which
12
+ is the standard default and removes the main hyperparameter footgun.
13
+
14
+ Cost is O(n^2) memory and time, so call this with batches up to a few
15
+ thousand vectors. For larger sets, subsample first.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import numpy as np
21
+
22
+ from driftvane.detector import DriftSignal
23
+
24
+
25
+ def _pairwise_sq_dists(a: np.ndarray, b: np.ndarray) -> np.ndarray:
26
+ """Squared Euclidean distance matrix, shape (len(a), len(b))."""
27
+ a2 = np.sum(a * a, axis=1)[:, None]
28
+ b2 = np.sum(b * b, axis=1)[None, :]
29
+ return np.maximum(a2 + b2 - 2.0 * a @ b.T, 0.0)
30
+
31
+
32
+ def _median_heuristic_sigma(x: np.ndarray, y: np.ndarray) -> float:
33
+ """Median pairwise distance on the merged sample. Robust default for sigma."""
34
+ z = np.concatenate([x, y], axis=0)
35
+ # subsample to keep this cheap on big inputs
36
+ if len(z) > 1000:
37
+ rng = np.random.default_rng(0)
38
+ idx = rng.choice(len(z), size=1000, replace=False)
39
+ z = z[idx]
40
+ d2 = _pairwise_sq_dists(z, z)
41
+ iu = np.triu_indices_from(d2, k=1)
42
+ median_sq = float(np.median(d2[iu]))
43
+ # sigma is the bandwidth, not sigma^2; floor to avoid div-by-zero
44
+ return max(np.sqrt(median_sq / 2.0), 1e-8)
45
+
46
+
47
+ def mmd_rbf(x: np.ndarray, y: np.ndarray, sigma: float | None = None) -> tuple[float, float]:
48
+ """Compute MMD^2 between two batches with RBF kernel.
49
+
50
+ Returns (mmd_squared, sigma_used).
51
+ """
52
+ if sigma is None:
53
+ sigma = _median_heuristic_sigma(x, y)
54
+ gamma = 1.0 / (2.0 * sigma * sigma)
55
+
56
+ kxx = np.exp(-gamma * _pairwise_sq_dists(x, x))
57
+ kyy = np.exp(-gamma * _pairwise_sq_dists(y, y))
58
+ kxy = np.exp(-gamma * _pairwise_sq_dists(x, y))
59
+
60
+ mmd2 = float(kxx.mean() + kyy.mean() - 2.0 * kxy.mean())
61
+ # numerical noise can push the value slightly negative; clamp at 0
62
+ return max(mmd2, 0.0), sigma
63
+
64
+
65
+ class EmbeddingDrift:
66
+ """Detect distribution shift between two batches of embedding vectors.
67
+
68
+ ed = EmbeddingDrift(threshold=0.1)
69
+ signal = ed.compute(reference=ref_emb, current=cur_emb)
70
+ """
71
+
72
+ def __init__(
73
+ self,
74
+ method: str = "mmd",
75
+ sigma: float | None = None,
76
+ threshold: float | None = None,
77
+ name: str = "embedding_mmd",
78
+ ) -> None:
79
+ if method != "mmd":
80
+ raise ValueError(f"unknown method: {method!r}; only 'mmd' is supported")
81
+ self.method = method
82
+ self.sigma = sigma
83
+ self.threshold = threshold
84
+ self.name = name
85
+
86
+ def compute(self, reference: np.ndarray, current: np.ndarray) -> DriftSignal:
87
+ ref = np.asarray(reference, dtype=np.float64)
88
+ cur = np.asarray(current, dtype=np.float64)
89
+ if ref.ndim != 2 or cur.ndim != 2:
90
+ raise ValueError("reference and current must be 2-D (n_samples, n_dims)")
91
+ if ref.shape[1] != cur.shape[1]:
92
+ raise ValueError(
93
+ f"dim mismatch: reference has {ref.shape[1]}, current has {cur.shape[1]}"
94
+ )
95
+ if len(ref) < 2 or len(cur) < 2:
96
+ raise ValueError("need at least 2 samples in each set")
97
+
98
+ value, sigma_used = mmd_rbf(ref, cur, sigma=self.sigma)
99
+ drifted = self.threshold is not None and value > self.threshold
100
+ return DriftSignal(
101
+ name=self.name,
102
+ value=value,
103
+ threshold=self.threshold,
104
+ drifted=drifted,
105
+ metadata={
106
+ "n_ref": int(ref.shape[0]),
107
+ "n_cur": int(cur.shape[0]),
108
+ "dim": int(ref.shape[1]),
109
+ "sigma": float(sigma_used),
110
+ "method": self.method,
111
+ },
112
+ )
@@ -0,0 +1,90 @@
1
+ """LatencyDrift — Kolmogorov-Smirnov two-sample test on latency arrays.
2
+
3
+ KS compares the empirical CDFs of two samples. The statistic is the maximum
4
+ absolute difference between the CDFs and is bounded in [0, 1]. It is robust
5
+ to scale and doesn't assume any particular distribution, which matches how
6
+ real LLM latency tails behave.
7
+
8
+ We compute the KS statistic from sorted arrays without scipy so the install
9
+ stays light. For an approximate p-value we use the standard asymptotic form
10
+ sqrt(-0.5 * ln(alpha/2) * (n1+n2)/(n1*n2)).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import math
16
+ from collections.abc import Sequence
17
+
18
+ import numpy as np
19
+
20
+ from driftvane.detector import DriftSignal
21
+
22
+
23
+ def ks_2samp(x: Sequence[float], y: Sequence[float]) -> tuple[float, float]:
24
+ """Return (D, approx_p_value). Numpy-only two-sample KS."""
25
+ a = np.sort(np.asarray(x, dtype=np.float64))
26
+ b = np.sort(np.asarray(y, dtype=np.float64))
27
+ n1, n2 = len(a), len(b)
28
+ if n1 == 0 or n2 == 0:
29
+ raise ValueError("both arrays must be non-empty")
30
+ all_v = np.concatenate([a, b])
31
+ cdf_a = np.searchsorted(a, all_v, side="right") / n1
32
+ cdf_b = np.searchsorted(b, all_v, side="right") / n2
33
+ d = float(np.max(np.abs(cdf_a - cdf_b)))
34
+
35
+ if d == 0.0:
36
+ # asymptotic series degenerates at d=0; the null is trivially consistent
37
+ return 0.0, 1.0
38
+
39
+ en = math.sqrt(n1 * n2 / (n1 + n2))
40
+ # asymptotic two-sided p-value (Smirnov)
41
+ lam = (en + 0.12 + 0.11 / en) * d
42
+ p = 2.0 * sum(((-1) ** (k - 1)) * math.exp(-2.0 * lam * lam * k * k) for k in range(1, 101))
43
+ p = max(0.0, min(1.0, p))
44
+ return d, p
45
+
46
+
47
+ class LatencyDrift:
48
+ """Detect distribution shift in latency (or any 1-D numeric array).
49
+
50
+ ld = LatencyDrift(threshold=0.2) # threshold on KS statistic
51
+ signal = ld.compute(reference=ref_lat, current=cur_lat)
52
+
53
+ Or threshold on p-value:
54
+
55
+ ld = LatencyDrift(p_threshold=0.01)
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ threshold: float | None = None,
61
+ p_threshold: float | None = None,
62
+ name: str = "latency_ks",
63
+ ) -> None:
64
+ if threshold is not None and p_threshold is not None:
65
+ raise ValueError("set either threshold or p_threshold, not both")
66
+ self.threshold = threshold
67
+ self.p_threshold = p_threshold
68
+ self.name = name
69
+
70
+ def compute(self, reference: Sequence[float], current: Sequence[float]) -> DriftSignal:
71
+ d, p = ks_2samp(reference, current)
72
+ if self.p_threshold is not None:
73
+ drifted = p < self.p_threshold
74
+ else:
75
+ drifted = self.threshold is not None and d > self.threshold
76
+
77
+ return DriftSignal(
78
+ name=self.name,
79
+ value=d,
80
+ threshold=self.threshold,
81
+ drifted=drifted,
82
+ metadata={
83
+ "n_ref": len(reference),
84
+ "n_cur": len(current),
85
+ "ks_p_value": p,
86
+ "p_threshold": self.p_threshold,
87
+ "median_ref": float(np.median(reference)),
88
+ "median_cur": float(np.median(current)),
89
+ },
90
+ )
@@ -0,0 +1,128 @@
1
+ """ResponseDrift — answer-vs-context grounding drift across batches.
2
+
3
+ For each (intent, context, answer) triple, compute Jaccard overlap of token
4
+ sets between the answer and the context. Then compare the *distribution* of
5
+ those scores between reference and current batches.
6
+
7
+ The drift value is the absolute difference of the mean grounding scores. A
8
+ shrinking mean answer-to-context overlap is the signal you want to catch:
9
+ the model is wandering off the retrieved context.
10
+
11
+ If `context-drift-detector-py` is installed we delegate per-triple scoring
12
+ to it for compatibility with that library's signal definitions; otherwise
13
+ we use the inline tokenizer below. Either way, the aggregation is ours.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import re
19
+ from collections.abc import Iterable
20
+ from dataclasses import dataclass
21
+
22
+ from driftvane.detector import DriftSignal
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class Triple:
27
+ intent: str
28
+ context: str | list[str]
29
+ answer: str
30
+
31
+
32
+ _WORD_RE = re.compile(r"[a-z0-9]+")
33
+
34
+
35
+ def _tokens(text: str) -> set[str]:
36
+ return set(_WORD_RE.findall(text.lower()))
37
+
38
+
39
+ def _flatten_context(ctx: str | Iterable[str]) -> str:
40
+ if isinstance(ctx, str):
41
+ return ctx
42
+ return " ".join(ctx)
43
+
44
+
45
+ def _grounding_score(triple: Triple) -> float:
46
+ """answer ∩ context / answer (recall-style; 1.0 = fully grounded)."""
47
+ ans = _tokens(triple.answer)
48
+ if not ans:
49
+ return 1.0
50
+ ctx = _tokens(_flatten_context(triple.context))
51
+ return len(ans & ctx) / len(ans)
52
+
53
+
54
+ def _try_load_external_scorer():
55
+ try:
56
+ from context_drift_detector import detect # type: ignore
57
+ except ImportError:
58
+ return None
59
+
60
+ def _score(triple: Triple) -> float:
61
+ ctx = triple.context if isinstance(triple.context, list) else [triple.context]
62
+ result = detect(triple.intent, ctx, triple.answer)
63
+ # context-drift-detector-py exposes signals dict with answer_to_context
64
+ return float(result.signals.get("answer_to_context", _grounding_score(triple)))
65
+
66
+ return _score
67
+
68
+
69
+ class ResponseDrift:
70
+ """Detect drift in how well answers stay grounded in retrieved context.
71
+
72
+ rsp = ResponseDrift(threshold=0.15)
73
+ signal = rsp.compute(
74
+ reference=[Triple("...", "...", "..."), ...],
75
+ current=[Triple("...", "...", "..."), ...],
76
+ )
77
+
78
+ Pass `use_external=False` to force the inline tokenizer even when
79
+ context-drift-detector-py is installed.
80
+ """
81
+
82
+ def __init__(
83
+ self,
84
+ threshold: float | None = None,
85
+ name: str = "response_grounding_shift",
86
+ use_external: bool = True,
87
+ ) -> None:
88
+ self.threshold = threshold
89
+ self.name = name
90
+ self.use_external = use_external
91
+ self._scorer = _try_load_external_scorer() if use_external else None
92
+
93
+ def compute(
94
+ self,
95
+ reference: Iterable[Triple | dict],
96
+ current: Iterable[Triple | dict],
97
+ ) -> DriftSignal:
98
+ ref = [t if isinstance(t, Triple) else Triple(**t) for t in reference]
99
+ cur = [t if isinstance(t, Triple) else Triple(**t) for t in current]
100
+ if not ref or not cur:
101
+ raise ValueError("need at least 1 triple in each batch")
102
+
103
+ score = self._scorer or _grounding_score
104
+ ref_scores = [score(t) for t in ref]
105
+ cur_scores = [score(t) for t in cur]
106
+
107
+ mean_ref = sum(ref_scores) / len(ref_scores)
108
+ mean_cur = sum(cur_scores) / len(cur_scores)
109
+ # we care about *worsening* grounding, so use signed shift but report
110
+ # absolute as the drift value
111
+ signed_shift = mean_cur - mean_ref
112
+ drift_value = abs(signed_shift)
113
+ drifted = self.threshold is not None and drift_value > self.threshold
114
+
115
+ return DriftSignal(
116
+ name=self.name,
117
+ value=drift_value,
118
+ threshold=self.threshold,
119
+ drifted=drifted,
120
+ metadata={
121
+ "n_ref": len(ref),
122
+ "n_cur": len(cur),
123
+ "mean_ref_grounding": mean_ref,
124
+ "mean_cur_grounding": mean_cur,
125
+ "signed_shift": signed_shift,
126
+ "scorer": "external" if self._scorer else "inline_jaccard",
127
+ },
128
+ )
@@ -0,0 +1,113 @@
1
+ """RetrievalDrift — measure shift in retriever output for the same queries.
2
+
3
+ Inputs are paired top-k document-id lists: for each query, the reference
4
+ retriever produced one ranked list and the current retriever produced another.
5
+ Drift = how much the top-k sets and rank order have moved.
6
+
7
+ Two metrics:
8
+ * mean_jaccard_at_k: average Jaccard overlap of the top-k sets (1.0 = identical)
9
+ * mean_rbo: rank-biased overlap, weights early positions more (1.0 = identical)
10
+
11
+ The reported drift value is 1 - mean_jaccard_at_k so that "more drift = larger
12
+ value" matches the convention in the other detectors.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from collections.abc import Sequence
18
+ from typing import Any
19
+
20
+ from driftvane.detector import DriftSignal
21
+
22
+
23
+ def _jaccard(a: set[Any], b: set[Any]) -> float:
24
+ if not a and not b:
25
+ return 1.0
26
+ return len(a & b) / len(a | b)
27
+
28
+
29
+ def _rbo(ref: Sequence[Any], cur: Sequence[Any], p: float = 0.9) -> float:
30
+ """Rank-biased overlap. Weighted overlap of the two prefix sets at each depth.
31
+
32
+ p controls how top-heavy the weighting is; p=0.9 puts ~86% of weight on the
33
+ top 10. See Webber, Moffat, Zobel 2010.
34
+ """
35
+ depth = max(len(ref), len(cur))
36
+ if depth == 0:
37
+ return 1.0
38
+ seen_ref: set[Any] = set()
39
+ seen_cur: set[Any] = set()
40
+ weighted_sum = 0.0
41
+ weight_total = 0.0
42
+ for i in range(depth):
43
+ if i < len(ref):
44
+ seen_ref.add(ref[i])
45
+ if i < len(cur):
46
+ seen_cur.add(cur[i])
47
+ agreement = len(seen_ref & seen_cur) / (i + 1)
48
+ w = p**i
49
+ weighted_sum += agreement * w
50
+ weight_total += w
51
+ return weighted_sum / weight_total if weight_total > 0 else 1.0
52
+
53
+
54
+ class RetrievalDrift:
55
+ """Detect retrieval drift across paired query→top-k results.
56
+
57
+ rd = RetrievalDrift(k=10, threshold=0.3)
58
+ signal = rd.compute(
59
+ reference=[["doc_1", "doc_2", ...], ...],
60
+ current=[["doc_1", "doc_3", ...], ...],
61
+ )
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ k: int = 10,
67
+ threshold: float | None = None,
68
+ name: str | None = None,
69
+ ) -> None:
70
+ if k < 1:
71
+ raise ValueError("k must be >= 1")
72
+ self.k = k
73
+ self.threshold = threshold
74
+ self.name = name or f"retrieval_jaccard_at_{k}"
75
+
76
+ def compute(
77
+ self,
78
+ reference: Sequence[Sequence[Any]],
79
+ current: Sequence[Sequence[Any]],
80
+ ) -> DriftSignal:
81
+ if len(reference) != len(current):
82
+ raise ValueError(
83
+ f"reference and current must have the same number of queries; "
84
+ f"got {len(reference)} vs {len(current)}"
85
+ )
86
+ if not reference:
87
+ raise ValueError("need at least 1 query")
88
+
89
+ jaccards: list[float] = []
90
+ rbos: list[float] = []
91
+ for ref_list, cur_list in zip(reference, current, strict=True):
92
+ ref_top = list(ref_list[: self.k])
93
+ cur_top = list(cur_list[: self.k])
94
+ jaccards.append(_jaccard(set(ref_top), set(cur_top)))
95
+ rbos.append(_rbo(ref_top, cur_top))
96
+
97
+ mean_jaccard = sum(jaccards) / len(jaccards)
98
+ mean_rbo = sum(rbos) / len(rbos)
99
+ drift_value = 1.0 - mean_jaccard
100
+ drifted = self.threshold is not None and drift_value > self.threshold
101
+
102
+ return DriftSignal(
103
+ name=self.name,
104
+ value=drift_value,
105
+ threshold=self.threshold,
106
+ drifted=drifted,
107
+ metadata={
108
+ "n_queries": len(reference),
109
+ "k": self.k,
110
+ "mean_jaccard_at_k": mean_jaccard,
111
+ "mean_rbo": mean_rbo,
112
+ },
113
+ )
driftvane/report.py ADDED
@@ -0,0 +1,99 @@
1
+ """DriftReport — collect signals from multiple detectors."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from driftvane.detector import DriftAlert, DriftSignal
8
+
9
+
10
+ class DriftReport:
11
+ """A bag of DriftSignals with output helpers.
12
+
13
+ Build it incrementally:
14
+
15
+ report = DriftReport()
16
+ report.add(EmbeddingDrift().compute(ref_emb, cur_emb))
17
+ report.add(LatencyDrift().compute(ref_lat, cur_lat))
18
+
19
+ Or in one shot:
20
+
21
+ report = DriftReport.from_signals([
22
+ EmbeddingDrift().compute(ref_emb, cur_emb),
23
+ LatencyDrift().compute(ref_lat, cur_lat),
24
+ ])
25
+ """
26
+
27
+ def __init__(self) -> None:
28
+ self._signals: list[DriftSignal] = []
29
+
30
+ @classmethod
31
+ def from_signals(cls, signals: list[DriftSignal]) -> DriftReport:
32
+ r = cls()
33
+ for s in signals:
34
+ r.add(s)
35
+ return r
36
+
37
+ def add(self, signal: DriftSignal) -> DriftReport:
38
+ self._signals.append(signal)
39
+ return self
40
+
41
+ @property
42
+ def signals(self) -> list[DriftSignal]:
43
+ return list(self._signals)
44
+
45
+ def get(self, name: str) -> DriftSignal | None:
46
+ for s in self._signals:
47
+ if s.name == name:
48
+ return s
49
+ return None
50
+
51
+ def any_drifted(self) -> bool:
52
+ return any(s.drifted for s in self._signals)
53
+
54
+ def to_dict(self) -> dict[str, Any]:
55
+ return {
56
+ "signals": [s.to_dict() for s in self._signals],
57
+ "any_drifted": self.any_drifted(),
58
+ }
59
+
60
+ def to_pandas(self):
61
+ # imported lazily so pandas isn't required for non-DataFrame users
62
+ import pandas as pd
63
+
64
+ if not self._signals:
65
+ return pd.DataFrame(columns=["name", "value", "threshold", "drifted"])
66
+ return pd.DataFrame(
67
+ [
68
+ {
69
+ "name": s.name,
70
+ "value": s.value,
71
+ "threshold": s.threshold,
72
+ "drifted": s.drifted,
73
+ **{f"meta_{k}": v for k, v in s.metadata.items()},
74
+ }
75
+ for s in self._signals
76
+ ]
77
+ )
78
+
79
+ def alert_if(self, thresholds: dict[str, float]) -> None:
80
+ """Raise DriftAlert if any of the given signals exceeds its threshold.
81
+
82
+ Overrides the threshold each signal was computed with. Use this when the
83
+ report is being evaluated against a different policy than the detector
84
+ was constructed with (e.g. CI vs. prod).
85
+ """
86
+ breaches = []
87
+ for s in self._signals:
88
+ if s.name in thresholds and s.value > thresholds[s.name]:
89
+ breaches.append(
90
+ DriftSignal(
91
+ name=s.name,
92
+ value=s.value,
93
+ threshold=thresholds[s.name],
94
+ drifted=True,
95
+ metadata=s.metadata,
96
+ )
97
+ )
98
+ if breaches:
99
+ raise DriftAlert(breaches)
@@ -0,0 +1,135 @@
1
+ Metadata-Version: 2.4
2
+ Name: driftvane
3
+ Version: 0.1.0
4
+ Summary: Compose drift detectors (embedding, retrieval, response, latency) into one report. Library-only, no server, no UI.
5
+ Project-URL: Homepage, https://github.com/MukundaKatta/driftvane
6
+ Project-URL: Issues, https://github.com/MukundaKatta/driftvane/issues
7
+ Project-URL: Source, https://github.com/MukundaKatta/driftvane
8
+ Author-email: Mukunda Rao Katta <mukunda.vjcs6@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: agents,ai,drift,embedding-drift,evals,llm,mlops,monitoring,rag,retrieval-drift
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: numpy>=1.24
27
+ Provides-Extra: dev
28
+ Requires-Dist: pandas>=2.0; extra == 'dev'
29
+ Requires-Dist: pytest>=8.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.4; extra == 'dev'
31
+ Provides-Extra: external-response
32
+ Requires-Dist: context-drift-detector-py>=0.1; extra == 'external-response'
33
+ Provides-Extra: pandas
34
+ Requires-Dist: pandas>=2.0; extra == 'pandas'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # driftvane
38
+
39
+ [![CI](https://github.com/MukundaKatta/driftvane/actions/workflows/ci.yml/badge.svg)](https://github.com/MukundaKatta/driftvane/actions/workflows/ci.yml)
40
+ [![Python](https://img.shields.io/pypi/pyversions/driftvane.svg)](https://pypi.org/project/driftvane/)
41
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
42
+
43
+ **Compose drift detectors for RAG and agent systems.**
44
+
45
+ Most drift libraries are either tabular-only (Evidently, DataDrift) or are
46
+ platforms that want you to ship telemetry to their backend (Phoenix, Arize).
47
+ `driftvane` is a small Python library that lets you wire up multiple drift
48
+ signals — embedding, retrieval, response, latency — into one report. No
49
+ server, no UI, no telemetry. Plug it into a Lambda or Glue job, get a
50
+ `pandas.DataFrame` or a JSON dict back.
51
+
52
+ ## Install
53
+
54
+ ```bash
55
+ pip install driftvane
56
+ # optional
57
+ pip install "driftvane[pandas]" # to_pandas()
58
+ pip install "driftvane[external-response]" # delegate response scoring to context-drift-detector-py
59
+ ```
60
+
61
+ ## Quickstart
62
+
63
+ ```python
64
+ import numpy as np
65
+ from driftvane import (
66
+ DriftReport,
67
+ EmbeddingDrift,
68
+ RetrievalDrift,
69
+ ResponseDrift,
70
+ LatencyDrift,
71
+ )
72
+ from driftvane.detectors.response import Triple
73
+
74
+ ref_emb = np.load("reference_query_embeddings.npy") # (n, 768)
75
+ cur_emb = np.load("current_query_embeddings.npy")
76
+
77
+ report = DriftReport.from_signals([
78
+ EmbeddingDrift(threshold=0.1).compute(ref_emb, cur_emb),
79
+ RetrievalDrift(k=10, threshold=0.3).compute(ref_top_k, cur_top_k),
80
+ ResponseDrift(threshold=0.15).compute(ref_triples, cur_triples),
81
+ LatencyDrift(p_threshold=0.01).compute(ref_latencies, cur_latencies),
82
+ ])
83
+
84
+ if report.any_drifted():
85
+ print(report.to_pandas())
86
+ ```
87
+
88
+ Or fail a CI job when retrieval moves too much:
89
+
90
+ ```python
91
+ from driftvane import DriftAlert
92
+
93
+ try:
94
+ report.alert_if({"retrieval_jaccard_at_10": 0.2})
95
+ except DriftAlert as e:
96
+ sys.exit(f"drift gate failed: {e}")
97
+ ```
98
+
99
+ ## Detectors
100
+
101
+ | Detector | Input | Statistic | Notes |
102
+ |---|---|---|---|
103
+ | `EmbeddingDrift` | two `(n, d)` arrays | MMD with RBF kernel, median-heuristic sigma | numpy-only, O(n²) — subsample for n > a few thousand |
104
+ | `RetrievalDrift` | paired top-k id lists | 1 − mean Jaccard@k; reports RBO too | aligned queries required |
105
+ | `ResponseDrift` | `(intent, context, answer)` triples | shift in mean answer-to-context grounding | uses `context-drift-detector-py` if installed |
106
+ | `LatencyDrift` | two 1-D arrays of floats | Kolmogorov–Smirnov D + asymptotic p-value | scipy-free |
107
+
108
+ Each detector returns a `DriftSignal(name, value, threshold, drifted, metadata)`.
109
+ `DriftReport` collects them.
110
+
111
+ ## What it does NOT do
112
+
113
+ - No server. No UI. No telemetry shipping.
114
+ - No tabular feature drift — use [DataDrift](https://github.com/MukundaKatta/DataDrift)
115
+ for KS/PSI on classical features.
116
+ - No live trace ingestion or OTel collection — point this at parquet/numpy
117
+ arrays you already have.
118
+ - No causal root-cause analysis. It tells you *that* drift is there, not why.
119
+ - No model retraining triggers — emit your own when `report.any_drifted()`.
120
+
121
+ ## Why not Phoenix / Arize / Evidently / Ragas?
122
+
123
+ | | driftvane | Phoenix | Arize | Evidently | Ragas |
124
+ |---|---|---|---|---|---|
125
+ | Library-only (no server) | ✓ | ✗ | ✗ | partial | ✓ |
126
+ | RAG-shaped detectors | ✓ | ✓ | ✓ | ✗ | ✓ |
127
+ | Embedding MMD out of the box | ✓ | partial | ✓ | ✗ | ✗ |
128
+ | Retrieval rank-shift | ✓ | ✗ | partial | ✗ | ✗ |
129
+ | Run inside a 5s Lambda | ✓ | ✗ | ✗ | ✓ | partial |
130
+ | numpy-only core deps | ✓ | ✗ | ✗ | ✗ | ✗ |
131
+
132
+ ## Status
133
+
134
+ v0.1 — alpha. The four detectors above work and have tests. Public API may
135
+ change before v1.0. Issues and PRs welcome.
@@ -0,0 +1,12 @@
1
+ driftvane/__init__.py,sha256=ltF5Y_7F2HPqhDiwXxTk4D55_JmLfyLi-xIBbMgcJro,733
2
+ driftvane/detector.py,sha256=LMhpI1WzcSkP6qTC6N7kT6NsBmhM90ZP9IXfvEcEcN4,1263
3
+ driftvane/report.py,sha256=JqneaQM6AnoMDnfCmamX9su7SgcyJmd-5Q4S0VYxJ2A,2998
4
+ driftvane/detectors/__init__.py,sha256=4L-SW3VE_YAlV5rGCk8rD2W97jENM7ycFNNhuYKpLuo,303
5
+ driftvane/detectors/embedding.py,sha256=nsRYbXi974F9PLWPF_1PaXBgud2kZAdXl6T-HtE-HVo,4095
6
+ driftvane/detectors/latency.py,sha256=rQwLEMX0psy0QrWDqeqU_dKnh7LnDbrQSWHZv2E7aRU,3121
7
+ driftvane/detectors/response.py,sha256=0Lb81RqfdlYKLMJB7zzWDBbnLDsNdM5R2N8vGykemm8,4210
8
+ driftvane/detectors/retrieval.py,sha256=phd4thgF1DRJ3peI9xwgG4FtEeEIX-Kl1mQLs6xrg80,3682
9
+ driftvane-0.1.0.dist-info/METADATA,sha256=GP5Ho99nY-MP08CPlVFJqzxWLofE0xLhd2OUzROwiaQ,5399
10
+ driftvane-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
11
+ driftvane-0.1.0.dist-info/licenses/LICENSE,sha256=p1GujHnprYaKo-fuZc9Tpy9i711QOy8PeYBhNM0VOdw,1074
12
+ driftvane-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mukunda Rao Katta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.