driftvane 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ build/
7
+ dist/
8
+ *.egg-info/
9
+ *.egg
10
+ .pytest_cache/
11
+ .ruff_cache/
12
+ .coverage
13
+ htmlcov/
14
+ .tox/
15
+ .mypy_cache/
16
+ .venv/
17
+ venv/
18
+ env/
19
+ .idea/
20
+ .vscode/
21
+ .DS_Store
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mukunda Rao Katta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,135 @@
1
+ Metadata-Version: 2.4
2
+ Name: driftvane
3
+ Version: 0.1.0
4
+ Summary: Compose drift detectors (embedding, retrieval, response, latency) into one report. Library-only, no server, no UI.
5
+ Project-URL: Homepage, https://github.com/MukundaKatta/driftvane
6
+ Project-URL: Issues, https://github.com/MukundaKatta/driftvane/issues
7
+ Project-URL: Source, https://github.com/MukundaKatta/driftvane
8
+ Author-email: Mukunda Rao Katta <mukunda.vjcs6@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: agents,ai,drift,embedding-drift,evals,llm,mlops,monitoring,rag,retrieval-drift
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: numpy>=1.24
27
+ Provides-Extra: dev
28
+ Requires-Dist: pandas>=2.0; extra == 'dev'
29
+ Requires-Dist: pytest>=8.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.4; extra == 'dev'
31
+ Provides-Extra: external-response
32
+ Requires-Dist: context-drift-detector-py>=0.1; extra == 'external-response'
33
+ Provides-Extra: pandas
34
+ Requires-Dist: pandas>=2.0; extra == 'pandas'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # driftvane
38
+
39
+ [![CI](https://github.com/MukundaKatta/driftvane/actions/workflows/ci.yml/badge.svg)](https://github.com/MukundaKatta/driftvane/actions/workflows/ci.yml)
40
+ [![Python](https://img.shields.io/pypi/pyversions/driftvane.svg)](https://pypi.org/project/driftvane/)
41
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
42
+
43
+ **Compose drift detectors for RAG and agent systems.**
44
+
45
+ Most drift libraries are either tabular-only (Evidently, DataDrift) or are
46
+ platforms that want you to ship telemetry to their backend (Phoenix, Arize).
47
+ `driftvane` is a small Python library that lets you wire up multiple drift
48
+ signals — embedding, retrieval, response, latency — into one report. No
49
+ server, no UI, no telemetry. Plug it into a Lambda or Glue job, get a
50
+ `pandas.DataFrame` or a JSON dict back.
51
+
52
+ ## Install
53
+
54
+ ```bash
55
+ pip install driftvane
56
+ # optional
57
+ pip install "driftvane[pandas]" # to_pandas()
58
+ pip install "driftvane[external-response]" # delegate response scoring to context-drift-detector-py
59
+ ```
60
+
61
+ ## Quickstart
62
+
63
+ ```python
64
+ import numpy as np
65
+ from driftvane import (
66
+ DriftReport,
67
+ EmbeddingDrift,
68
+ RetrievalDrift,
69
+ ResponseDrift,
70
+ LatencyDrift,
71
+ )
72
+ from driftvane.detectors.response import Triple
73
+
74
+ ref_emb = np.load("reference_query_embeddings.npy") # (n, 768)
75
+ cur_emb = np.load("current_query_embeddings.npy")
76
+
77
+ report = DriftReport.from_signals([
78
+ EmbeddingDrift(threshold=0.1).compute(ref_emb, cur_emb),
79
+ RetrievalDrift(k=10, threshold=0.3).compute(ref_top_k, cur_top_k),
80
+ ResponseDrift(threshold=0.15).compute(ref_triples, cur_triples),
81
+ LatencyDrift(p_threshold=0.01).compute(ref_latencies, cur_latencies),
82
+ ])
83
+
84
+ if report.any_drifted():
85
+ print(report.to_pandas())
86
+ ```
87
+
88
+ Or fail a CI job when retrieval moves too much:
89
+
90
+ ```python
91
+ from driftvane import DriftAlert
92
+
93
+ try:
94
+ report.alert_if({"retrieval_jaccard_at_10": 0.2})
95
+ except DriftAlert as e:
96
+ sys.exit(f"drift gate failed: {e}")
97
+ ```
98
+
99
+ ## Detectors
100
+
101
+ | Detector | Input | Statistic | Notes |
102
+ |---|---|---|---|
103
+ | `EmbeddingDrift` | two `(n, d)` arrays | MMD with RBF kernel, median-heuristic sigma | numpy-only, O(n²) — subsample for n > a few thousand |
104
+ | `RetrievalDrift` | paired top-k id lists | 1 − mean Jaccard@k; reports RBO too | aligned queries required |
105
+ | `ResponseDrift` | `(intent, context, answer)` triples | shift in mean answer-to-context grounding | uses `context-drift-detector-py` if installed |
106
+ | `LatencyDrift` | two 1-D arrays of floats | Kolmogorov–Smirnov D + asymptotic p-value | scipy-free |
107
+
108
+ Each detector returns a `DriftSignal(name, value, threshold, drifted, metadata)`.
109
+ `DriftReport` collects them.
110
+
111
+ ## What it does NOT do
112
+
113
+ - No server. No UI. No telemetry shipping.
114
+ - No tabular feature drift — use [DataDrift](https://github.com/MukundaKatta/DataDrift)
115
+ for KS/PSI on classical features.
116
+ - No live trace ingestion or OTel collection — point this at parquet/numpy
117
+ arrays you already have.
118
+ - No causal root-cause analysis. It tells you *that* drift is there, not why.
119
+ - No model retraining triggers — emit your own when `report.any_drifted()`.
120
+
121
+ ## Why not Phoenix / Arize / Evidently / Ragas?
122
+
123
+ | | driftvane | Phoenix | Arize | Evidently | Ragas |
124
+ |---|---|---|---|---|---|
125
+ | Library-only (no server) | ✓ | ✗ | ✗ | partial | ✓ |
126
+ | RAG-shaped detectors | ✓ | ✓ | ✓ | ✗ | ✓ |
127
+ | Embedding MMD out of the box | ✓ | partial | ✓ | ✗ | ✗ |
128
+ | Retrieval rank-shift | ✓ | ✗ | partial | ✗ | ✗ |
129
+ | Run inside a 5s Lambda | ✓ | ✗ | ✗ | ✓ | partial |
130
+ | numpy-only core deps | ✓ | ✗ | ✗ | ✗ | ✗ |
131
+
132
+ ## Status
133
+
134
+ v0.1 — alpha. The four detectors above work and have tests. Public API may
135
+ change before v1.0. Issues and PRs welcome.
@@ -0,0 +1,99 @@
1
+ # driftvane
2
+
3
+ [![CI](https://github.com/MukundaKatta/driftvane/actions/workflows/ci.yml/badge.svg)](https://github.com/MukundaKatta/driftvane/actions/workflows/ci.yml)
4
+ [![Python](https://img.shields.io/pypi/pyversions/driftvane.svg)](https://pypi.org/project/driftvane/)
5
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
6
+
7
+ **Compose drift detectors for RAG and agent systems.**
8
+
9
+ Most drift libraries are either tabular-only (Evidently, DataDrift) or are
10
+ platforms that want you to ship telemetry to their backend (Phoenix, Arize).
11
+ `driftvane` is a small Python library that lets you wire up multiple drift
12
+ signals — embedding, retrieval, response, latency — into one report. No
13
+ server, no UI, no telemetry. Plug it into a Lambda or Glue job, get a
14
+ `pandas.DataFrame` or a JSON dict back.
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ pip install driftvane
20
+ # optional
21
+ pip install "driftvane[pandas]" # to_pandas()
22
+ pip install "driftvane[external-response]" # delegate response scoring to context-drift-detector-py
23
+ ```
24
+
25
+ ## Quickstart
26
+
27
+ ```python
28
+ import numpy as np
29
+ from driftvane import (
30
+ DriftReport,
31
+ EmbeddingDrift,
32
+ RetrievalDrift,
33
+ ResponseDrift,
34
+ LatencyDrift,
35
+ )
36
+ from driftvane.detectors.response import Triple
37
+
38
+ ref_emb = np.load("reference_query_embeddings.npy") # (n, 768)
39
+ cur_emb = np.load("current_query_embeddings.npy")
40
+
41
+ report = DriftReport.from_signals([
42
+ EmbeddingDrift(threshold=0.1).compute(ref_emb, cur_emb),
43
+ RetrievalDrift(k=10, threshold=0.3).compute(ref_top_k, cur_top_k),
44
+ ResponseDrift(threshold=0.15).compute(ref_triples, cur_triples),
45
+ LatencyDrift(p_threshold=0.01).compute(ref_latencies, cur_latencies),
46
+ ])
47
+
48
+ if report.any_drifted():
49
+ print(report.to_pandas())
50
+ ```
51
+
52
+ Or fail a CI job when retrieval moves too much:
53
+
54
+ ```python
55
+ from driftvane import DriftAlert
56
+
57
+ try:
58
+ report.alert_if({"retrieval_jaccard_at_10": 0.2})
59
+ except DriftAlert as e:
60
+ sys.exit(f"drift gate failed: {e}")
61
+ ```
62
+
63
+ ## Detectors
64
+
65
+ | Detector | Input | Statistic | Notes |
66
+ |---|---|---|---|
67
+ | `EmbeddingDrift` | two `(n, d)` arrays | MMD with RBF kernel, median-heuristic sigma | numpy-only, O(n²) — subsample for n > a few thousand |
68
+ | `RetrievalDrift` | paired top-k id lists | 1 − mean Jaccard@k; reports RBO too | aligned queries required |
69
+ | `ResponseDrift` | `(intent, context, answer)` triples | shift in mean answer-to-context grounding | uses `context-drift-detector-py` if installed |
70
+ | `LatencyDrift` | two 1-D arrays of floats | Kolmogorov–Smirnov D + asymptotic p-value | scipy-free |
71
+
72
+ Each detector returns a `DriftSignal(name, value, threshold, drifted, metadata)`.
73
+ `DriftReport` collects them.
74
+
75
+ ## What it does NOT do
76
+
77
+ - No server. No UI. No telemetry shipping.
78
+ - No tabular feature drift — use [DataDrift](https://github.com/MukundaKatta/DataDrift)
79
+ for KS/PSI on classical features.
80
+ - No live trace ingestion or OTel collection — point this at parquet/numpy
81
+ arrays you already have.
82
+ - No causal root-cause analysis. It tells you *that* drift is there, not why.
83
+ - No model retraining triggers — emit your own when `report.any_drifted()`.
84
+
85
+ ## Why not Phoenix / Arize / Evidently / Ragas?
86
+
87
+ | | driftvane | Phoenix | Arize | Evidently | Ragas |
88
+ |---|---|---|---|---|---|
89
+ | Library-only (no server) | ✓ | ✗ | ✗ | partial | ✓ |
90
+ | RAG-shaped detectors | ✓ | ✓ | ✓ | ✗ | ✓ |
91
+ | Embedding MMD out of the box | ✓ | partial | ✓ | ✗ | ✗ |
92
+ | Retrieval rank-shift | ✓ | ✗ | partial | ✗ | ✗ |
93
+ | Run inside a 5s Lambda | ✓ | ✗ | ✗ | ✓ | partial |
94
+ | numpy-only core deps | ✓ | ✗ | ✗ | ✗ | ✗ |
95
+
96
+ ## Status
97
+
98
+ v0.1 — alpha. The four detectors above work and have tests. Public API may
99
+ change before v1.0. Issues and PRs welcome.
@@ -0,0 +1,69 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.24"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "driftvane"
7
+ version = "0.1.0"
8
+ description = "Compose drift detectors (embedding, retrieval, response, latency) into one report. Library-only, no server, no UI."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Mukunda Rao Katta", email = "mukunda.vjcs6@gmail.com" }]
13
+ keywords = [
14
+ "ai",
15
+ "llm",
16
+ "rag",
17
+ "agents",
18
+ "drift",
19
+ "embedding-drift",
20
+ "retrieval-drift",
21
+ "monitoring",
22
+ "evals",
23
+ "mlops",
24
+ ]
25
+ classifiers = [
26
+ "Development Status :: 3 - Alpha",
27
+ "Intended Audience :: Developers",
28
+ "License :: OSI Approved :: MIT License",
29
+ "Operating System :: OS Independent",
30
+ "Programming Language :: Python :: 3",
31
+ "Programming Language :: Python :: 3 :: Only",
32
+ "Programming Language :: Python :: 3.10",
33
+ "Programming Language :: Python :: 3.11",
34
+ "Programming Language :: Python :: 3.12",
35
+ "Programming Language :: Python :: 3.13",
36
+ "Programming Language :: Python :: 3.14",
37
+ "Topic :: Software Development :: Libraries :: Python Modules",
38
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
39
+ ]
40
+ dependencies = [
41
+ "numpy>=1.24",
42
+ ]
43
+
44
+ [project.optional-dependencies]
45
+ pandas = ["pandas>=2.0"]
46
+ external-response = ["context-drift-detector-py>=0.1"]
47
+ dev = ["pytest>=8.0", "pandas>=2.0", "ruff>=0.4"]
48
+
49
+ [project.urls]
50
+ Homepage = "https://github.com/MukundaKatta/driftvane"
51
+ Issues = "https://github.com/MukundaKatta/driftvane/issues"
52
+ Source = "https://github.com/MukundaKatta/driftvane"
53
+
54
+ [tool.hatch.build.targets.wheel]
55
+ packages = ["src/driftvane"]
56
+
57
+ [tool.hatch.build.targets.sdist]
58
+ include = ["/src", "/tests", "/README.md", "/LICENSE"]
59
+
60
+ [tool.pytest.ini_options]
61
+ testpaths = ["tests"]
62
+ pythonpath = ["src"]
63
+
64
+ [tool.ruff]
65
+ target-version = "py310"
66
+ line-length = 100
67
+
68
+ [tool.ruff.lint]
69
+ select = ["E", "F", "I", "UP", "B", "SIM"]
@@ -0,0 +1,25 @@
1
+ """driftvane — compose drift detectors for RAG and agent systems.
2
+
3
+ A small library that lets you wire up multiple drift signals (embedding,
4
+ retrieval, response, latency) into one DriftReport. No server, no UI.
5
+ """
6
+
7
+ from driftvane.detector import DriftAlert, DriftSignal
8
+ from driftvane.detectors.embedding import EmbeddingDrift
9
+ from driftvane.detectors.latency import LatencyDrift
10
+ from driftvane.detectors.response import ResponseDrift
11
+ from driftvane.detectors.retrieval import RetrievalDrift
12
+ from driftvane.report import DriftReport
13
+
14
+ __version__ = "0.1.0"
15
+
16
+ __all__ = [
17
+ "DriftAlert",
18
+ "DriftReport",
19
+ "DriftSignal",
20
+ "EmbeddingDrift",
21
+ "LatencyDrift",
22
+ "ResponseDrift",
23
+ "RetrievalDrift",
24
+ "__version__",
25
+ ]
@@ -0,0 +1,42 @@
1
+ """Core types: DriftSignal, DriftAlert."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class DriftSignal:
11
+ """One detector's verdict.
12
+
13
+ name: stable identifier, e.g. "embedding_mmd", "retrieval_jaccard_at_10"
14
+ value: the raw statistic
15
+ threshold: the configured threshold; None means "report only, don't flag"
16
+ drifted: True when value exceeds threshold
17
+ metadata: detector-specific extras (sample sizes, kernel sigma, etc.)
18
+ """
19
+
20
+ name: str
21
+ value: float
22
+ threshold: float | None = None
23
+ drifted: bool = False
24
+ metadata: dict[str, Any] = field(default_factory=dict)
25
+
26
+ def to_dict(self) -> dict[str, Any]:
27
+ return {
28
+ "name": self.name,
29
+ "value": self.value,
30
+ "threshold": self.threshold,
31
+ "drifted": self.drifted,
32
+ "metadata": self.metadata,
33
+ }
34
+
35
+
36
+ class DriftAlert(Exception):
37
+ """Raised by DriftReport.alert_if when a threshold is breached."""
38
+
39
+ def __init__(self, breaches: list[DriftSignal]):
40
+ self.breaches = breaches
41
+ names = ", ".join(f"{s.name}={s.value:.4f}>{s.threshold}" for s in breaches)
42
+ super().__init__(f"drift detected: {names}")
@@ -0,0 +1,6 @@
1
+ from driftvane.detectors.embedding import EmbeddingDrift
2
+ from driftvane.detectors.latency import LatencyDrift
3
+ from driftvane.detectors.response import ResponseDrift
4
+ from driftvane.detectors.retrieval import RetrievalDrift
5
+
6
+ __all__ = ["EmbeddingDrift", "LatencyDrift", "ResponseDrift", "RetrievalDrift"]
@@ -0,0 +1,112 @@
1
+ """EmbeddingDrift — Maximum Mean Discrepancy with RBF kernel.
2
+
3
+ MMD is a kernel two-sample test. It tests whether two batches of embeddings
4
+ were drawn from the same distribution. MMD^2 is zero when the distributions
5
+ match and grows with the distance between them.
6
+
7
+ We compute the squared MMD with the RBF (Gaussian) kernel:
8
+ k(x, y) = exp(-||x - y||^2 / (2 * sigma^2))
9
+ MMD^2 = E[k(X, X')] + E[k(Y, Y')] - 2 E[k(X, Y)]
10
+
11
+ When sigma is None we use the median heuristic on the merged sample, which
12
+ is the standard default and removes the main hyperparameter footgun.
13
+
14
+ Cost is O(n^2) memory and time, so call this with batches up to a few
15
+ thousand vectors. For larger sets, subsample first.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import numpy as np
21
+
22
+ from driftvane.detector import DriftSignal
23
+
24
+
25
+ def _pairwise_sq_dists(a: np.ndarray, b: np.ndarray) -> np.ndarray:
26
+ """Squared Euclidean distance matrix, shape (len(a), len(b))."""
27
+ a2 = np.sum(a * a, axis=1)[:, None]
28
+ b2 = np.sum(b * b, axis=1)[None, :]
29
+ return np.maximum(a2 + b2 - 2.0 * a @ b.T, 0.0)
30
+
31
+
32
+ def _median_heuristic_sigma(x: np.ndarray, y: np.ndarray) -> float:
33
+ """Median pairwise distance on the merged sample. Robust default for sigma."""
34
+ z = np.concatenate([x, y], axis=0)
35
+ # subsample to keep this cheap on big inputs
36
+ if len(z) > 1000:
37
+ rng = np.random.default_rng(0)
38
+ idx = rng.choice(len(z), size=1000, replace=False)
39
+ z = z[idx]
40
+ d2 = _pairwise_sq_dists(z, z)
41
+ iu = np.triu_indices_from(d2, k=1)
42
+ median_sq = float(np.median(d2[iu]))
43
+ # sigma is the bandwidth, not sigma^2; floor to avoid div-by-zero
44
+ return max(np.sqrt(median_sq / 2.0), 1e-8)
45
+
46
+
47
+ def mmd_rbf(x: np.ndarray, y: np.ndarray, sigma: float | None = None) -> tuple[float, float]:
48
+ """Compute MMD^2 between two batches with RBF kernel.
49
+
50
+ Returns (mmd_squared, sigma_used).
51
+ """
52
+ if sigma is None:
53
+ sigma = _median_heuristic_sigma(x, y)
54
+ gamma = 1.0 / (2.0 * sigma * sigma)
55
+
56
+ kxx = np.exp(-gamma * _pairwise_sq_dists(x, x))
57
+ kyy = np.exp(-gamma * _pairwise_sq_dists(y, y))
58
+ kxy = np.exp(-gamma * _pairwise_sq_dists(x, y))
59
+
60
+ mmd2 = float(kxx.mean() + kyy.mean() - 2.0 * kxy.mean())
61
+ # numerical noise can push the value slightly negative; clamp at 0
62
+ return max(mmd2, 0.0), sigma
63
+
64
+
65
+ class EmbeddingDrift:
66
+ """Detect distribution shift between two batches of embedding vectors.
67
+
68
+ ed = EmbeddingDrift(threshold=0.1)
69
+ signal = ed.compute(reference=ref_emb, current=cur_emb)
70
+ """
71
+
72
+ def __init__(
73
+ self,
74
+ method: str = "mmd",
75
+ sigma: float | None = None,
76
+ threshold: float | None = None,
77
+ name: str = "embedding_mmd",
78
+ ) -> None:
79
+ if method != "mmd":
80
+ raise ValueError(f"unknown method: {method!r}; only 'mmd' is supported")
81
+ self.method = method
82
+ self.sigma = sigma
83
+ self.threshold = threshold
84
+ self.name = name
85
+
86
+ def compute(self, reference: np.ndarray, current: np.ndarray) -> DriftSignal:
87
+ ref = np.asarray(reference, dtype=np.float64)
88
+ cur = np.asarray(current, dtype=np.float64)
89
+ if ref.ndim != 2 or cur.ndim != 2:
90
+ raise ValueError("reference and current must be 2-D (n_samples, n_dims)")
91
+ if ref.shape[1] != cur.shape[1]:
92
+ raise ValueError(
93
+ f"dim mismatch: reference has {ref.shape[1]}, current has {cur.shape[1]}"
94
+ )
95
+ if len(ref) < 2 or len(cur) < 2:
96
+ raise ValueError("need at least 2 samples in each set")
97
+
98
+ value, sigma_used = mmd_rbf(ref, cur, sigma=self.sigma)
99
+ drifted = self.threshold is not None and value > self.threshold
100
+ return DriftSignal(
101
+ name=self.name,
102
+ value=value,
103
+ threshold=self.threshold,
104
+ drifted=drifted,
105
+ metadata={
106
+ "n_ref": int(ref.shape[0]),
107
+ "n_cur": int(cur.shape[0]),
108
+ "dim": int(ref.shape[1]),
109
+ "sigma": float(sigma_used),
110
+ "method": self.method,
111
+ },
112
+ )
@@ -0,0 +1,90 @@
1
+ """LatencyDrift — Kolmogorov-Smirnov two-sample test on latency arrays.
2
+
3
+ KS compares the empirical CDFs of two samples. The statistic is the maximum
4
+ absolute difference between the CDFs and is bounded in [0, 1]. It is robust
5
+ to scale and doesn't assume any particular distribution, which matches how
6
+ real LLM latency tails behave.
7
+
8
+ We compute the KS statistic from sorted arrays without scipy so the install
9
+ stays light. For an approximate p-value we use the standard asymptotic form
10
+ sqrt(-0.5 * ln(alpha/2) * (n1+n2)/(n1*n2)).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import math
16
+ from collections.abc import Sequence
17
+
18
+ import numpy as np
19
+
20
+ from driftvane.detector import DriftSignal
21
+
22
+
23
+ def ks_2samp(x: Sequence[float], y: Sequence[float]) -> tuple[float, float]:
24
+ """Return (D, approx_p_value). Numpy-only two-sample KS."""
25
+ a = np.sort(np.asarray(x, dtype=np.float64))
26
+ b = np.sort(np.asarray(y, dtype=np.float64))
27
+ n1, n2 = len(a), len(b)
28
+ if n1 == 0 or n2 == 0:
29
+ raise ValueError("both arrays must be non-empty")
30
+ all_v = np.concatenate([a, b])
31
+ cdf_a = np.searchsorted(a, all_v, side="right") / n1
32
+ cdf_b = np.searchsorted(b, all_v, side="right") / n2
33
+ d = float(np.max(np.abs(cdf_a - cdf_b)))
34
+
35
+ if d == 0.0:
36
+ # asymptotic series degenerates at d=0; the null is trivially consistent
37
+ return 0.0, 1.0
38
+
39
+ en = math.sqrt(n1 * n2 / (n1 + n2))
40
+ # asymptotic two-sided p-value (Smirnov)
41
+ lam = (en + 0.12 + 0.11 / en) * d
42
+ p = 2.0 * sum(((-1) ** (k - 1)) * math.exp(-2.0 * lam * lam * k * k) for k in range(1, 101))
43
+ p = max(0.0, min(1.0, p))
44
+ return d, p
45
+
46
+
47
+ class LatencyDrift:
48
+ """Detect distribution shift in latency (or any 1-D numeric array).
49
+
50
+ ld = LatencyDrift(threshold=0.2) # threshold on KS statistic
51
+ signal = ld.compute(reference=ref_lat, current=cur_lat)
52
+
53
+ Or threshold on p-value:
54
+
55
+ ld = LatencyDrift(p_threshold=0.01)
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ threshold: float | None = None,
61
+ p_threshold: float | None = None,
62
+ name: str = "latency_ks",
63
+ ) -> None:
64
+ if threshold is not None and p_threshold is not None:
65
+ raise ValueError("set either threshold or p_threshold, not both")
66
+ self.threshold = threshold
67
+ self.p_threshold = p_threshold
68
+ self.name = name
69
+
70
+ def compute(self, reference: Sequence[float], current: Sequence[float]) -> DriftSignal:
71
+ d, p = ks_2samp(reference, current)
72
+ if self.p_threshold is not None:
73
+ drifted = p < self.p_threshold
74
+ else:
75
+ drifted = self.threshold is not None and d > self.threshold
76
+
77
+ return DriftSignal(
78
+ name=self.name,
79
+ value=d,
80
+ threshold=self.threshold,
81
+ drifted=drifted,
82
+ metadata={
83
+ "n_ref": len(reference),
84
+ "n_cur": len(current),
85
+ "ks_p_value": p,
86
+ "p_threshold": self.p_threshold,
87
+ "median_ref": float(np.median(reference)),
88
+ "median_cur": float(np.median(current)),
89
+ },
90
+ )
@@ -0,0 +1,128 @@
1
+ """ResponseDrift — answer-vs-context grounding drift across batches.
2
+
3
+ For each (intent, context, answer) triple, compute Jaccard overlap of token
4
+ sets between the answer and the context. Then compare the *distribution* of
5
+ those scores between reference and current batches.
6
+
7
+ The drift value is the absolute difference of the mean grounding scores. A
8
+ shrinking mean answer-to-context overlap is the signal you want to catch:
9
+ the model is wandering off the retrieved context.
10
+
11
+ If `context-drift-detector-py` is installed we delegate per-triple scoring
12
+ to it for compatibility with that library's signal definitions; otherwise
13
+ we use the inline tokenizer below. Either way, the aggregation is ours.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import re
19
+ from collections.abc import Iterable
20
+ from dataclasses import dataclass
21
+
22
+ from driftvane.detector import DriftSignal
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class Triple:
27
+ intent: str
28
+ context: str | list[str]
29
+ answer: str
30
+
31
+
32
+ _WORD_RE = re.compile(r"[a-z0-9]+")
33
+
34
+
35
+ def _tokens(text: str) -> set[str]:
36
+ return set(_WORD_RE.findall(text.lower()))
37
+
38
+
39
+ def _flatten_context(ctx: str | Iterable[str]) -> str:
40
+ if isinstance(ctx, str):
41
+ return ctx
42
+ return " ".join(ctx)
43
+
44
+
45
+ def _grounding_score(triple: Triple) -> float:
46
+ """answer ∩ context / answer (recall-style; 1.0 = fully grounded)."""
47
+ ans = _tokens(triple.answer)
48
+ if not ans:
49
+ return 1.0
50
+ ctx = _tokens(_flatten_context(triple.context))
51
+ return len(ans & ctx) / len(ans)
52
+
53
+
54
+ def _try_load_external_scorer():
55
+ try:
56
+ from context_drift_detector import detect # type: ignore
57
+ except ImportError:
58
+ return None
59
+
60
+ def _score(triple: Triple) -> float:
61
+ ctx = triple.context if isinstance(triple.context, list) else [triple.context]
62
+ result = detect(triple.intent, ctx, triple.answer)
63
+ # context-drift-detector-py exposes signals dict with answer_to_context
64
+ return float(result.signals.get("answer_to_context", _grounding_score(triple)))
65
+
66
+ return _score
67
+
68
+
69
+ class ResponseDrift:
70
+ """Detect drift in how well answers stay grounded in retrieved context.
71
+
72
+ rsp = ResponseDrift(threshold=0.15)
73
+ signal = rsp.compute(
74
+ reference=[Triple("...", "...", "..."), ...],
75
+ current=[Triple("...", "...", "..."), ...],
76
+ )
77
+
78
+ Pass `use_external=False` to force the inline tokenizer even when
79
+ context-drift-detector-py is installed.
80
+ """
81
+
82
+ def __init__(
83
+ self,
84
+ threshold: float | None = None,
85
+ name: str = "response_grounding_shift",
86
+ use_external: bool = True,
87
+ ) -> None:
88
+ self.threshold = threshold
89
+ self.name = name
90
+ self.use_external = use_external
91
+ self._scorer = _try_load_external_scorer() if use_external else None
92
+
93
+ def compute(
94
+ self,
95
+ reference: Iterable[Triple | dict],
96
+ current: Iterable[Triple | dict],
97
+ ) -> DriftSignal:
98
+ ref = [t if isinstance(t, Triple) else Triple(**t) for t in reference]
99
+ cur = [t if isinstance(t, Triple) else Triple(**t) for t in current]
100
+ if not ref or not cur:
101
+ raise ValueError("need at least 1 triple in each batch")
102
+
103
+ score = self._scorer or _grounding_score
104
+ ref_scores = [score(t) for t in ref]
105
+ cur_scores = [score(t) for t in cur]
106
+
107
+ mean_ref = sum(ref_scores) / len(ref_scores)
108
+ mean_cur = sum(cur_scores) / len(cur_scores)
109
+ # we care about *worsening* grounding, so use signed shift but report
110
+ # absolute as the drift value
111
+ signed_shift = mean_cur - mean_ref
112
+ drift_value = abs(signed_shift)
113
+ drifted = self.threshold is not None and drift_value > self.threshold
114
+
115
+ return DriftSignal(
116
+ name=self.name,
117
+ value=drift_value,
118
+ threshold=self.threshold,
119
+ drifted=drifted,
120
+ metadata={
121
+ "n_ref": len(ref),
122
+ "n_cur": len(cur),
123
+ "mean_ref_grounding": mean_ref,
124
+ "mean_cur_grounding": mean_cur,
125
+ "signed_shift": signed_shift,
126
+ "scorer": "external" if self._scorer else "inline_jaccard",
127
+ },
128
+ )
@@ -0,0 +1,113 @@
1
+ """RetrievalDrift — measure shift in retriever output for the same queries.
2
+
3
+ Inputs are paired top-k document-id lists: for each query, the reference
4
+ retriever produced one ranked list and the current retriever produced another.
5
+ Drift = how much the top-k sets and rank order have moved.
6
+
7
+ Two metrics:
8
+ * mean_jaccard_at_k: average Jaccard overlap of the top-k sets (1.0 = identical)
9
+ * mean_rbo: rank-biased overlap, weights early positions more (1.0 = identical)
10
+
11
+ The reported drift value is 1 - mean_jaccard_at_k so that "more drift = larger
12
+ value" matches the convention in the other detectors.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from collections.abc import Sequence
18
+ from typing import Any
19
+
20
+ from driftvane.detector import DriftSignal
21
+
22
+
23
+ def _jaccard(a: set[Any], b: set[Any]) -> float:
24
+ if not a and not b:
25
+ return 1.0
26
+ return len(a & b) / len(a | b)
27
+
28
+
29
+ def _rbo(ref: Sequence[Any], cur: Sequence[Any], p: float = 0.9) -> float:
30
+ """Rank-biased overlap. Weighted overlap of the two prefix sets at each depth.
31
+
32
+ p controls how top-heavy the weighting is; p=0.9 puts ~86% of weight on the
33
+ top 10. See Webber, Moffat, Zobel 2010.
34
+ """
35
+ depth = max(len(ref), len(cur))
36
+ if depth == 0:
37
+ return 1.0
38
+ seen_ref: set[Any] = set()
39
+ seen_cur: set[Any] = set()
40
+ weighted_sum = 0.0
41
+ weight_total = 0.0
42
+ for i in range(depth):
43
+ if i < len(ref):
44
+ seen_ref.add(ref[i])
45
+ if i < len(cur):
46
+ seen_cur.add(cur[i])
47
+ agreement = len(seen_ref & seen_cur) / (i + 1)
48
+ w = p**i
49
+ weighted_sum += agreement * w
50
+ weight_total += w
51
+ return weighted_sum / weight_total if weight_total > 0 else 1.0
52
+
53
+
54
+ class RetrievalDrift:
55
+ """Detect retrieval drift across paired query→top-k results.
56
+
57
+ rd = RetrievalDrift(k=10, threshold=0.3)
58
+ signal = rd.compute(
59
+ reference=[["doc_1", "doc_2", ...], ...],
60
+ current=[["doc_1", "doc_3", ...], ...],
61
+ )
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ k: int = 10,
67
+ threshold: float | None = None,
68
+ name: str | None = None,
69
+ ) -> None:
70
+ if k < 1:
71
+ raise ValueError("k must be >= 1")
72
+ self.k = k
73
+ self.threshold = threshold
74
+ self.name = name or f"retrieval_jaccard_at_{k}"
75
+
76
+ def compute(
77
+ self,
78
+ reference: Sequence[Sequence[Any]],
79
+ current: Sequence[Sequence[Any]],
80
+ ) -> DriftSignal:
81
+ if len(reference) != len(current):
82
+ raise ValueError(
83
+ f"reference and current must have the same number of queries; "
84
+ f"got {len(reference)} vs {len(current)}"
85
+ )
86
+ if not reference:
87
+ raise ValueError("need at least 1 query")
88
+
89
+ jaccards: list[float] = []
90
+ rbos: list[float] = []
91
+ for ref_list, cur_list in zip(reference, current, strict=True):
92
+ ref_top = list(ref_list[: self.k])
93
+ cur_top = list(cur_list[: self.k])
94
+ jaccards.append(_jaccard(set(ref_top), set(cur_top)))
95
+ rbos.append(_rbo(ref_top, cur_top))
96
+
97
+ mean_jaccard = sum(jaccards) / len(jaccards)
98
+ mean_rbo = sum(rbos) / len(rbos)
99
+ drift_value = 1.0 - mean_jaccard
100
+ drifted = self.threshold is not None and drift_value > self.threshold
101
+
102
+ return DriftSignal(
103
+ name=self.name,
104
+ value=drift_value,
105
+ threshold=self.threshold,
106
+ drifted=drifted,
107
+ metadata={
108
+ "n_queries": len(reference),
109
+ "k": self.k,
110
+ "mean_jaccard_at_k": mean_jaccard,
111
+ "mean_rbo": mean_rbo,
112
+ },
113
+ )
@@ -0,0 +1,99 @@
1
+ """DriftReport — collect signals from multiple detectors."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from driftvane.detector import DriftAlert, DriftSignal
8
+
9
+
10
+ class DriftReport:
11
+ """A bag of DriftSignals with output helpers.
12
+
13
+ Build it incrementally:
14
+
15
+ report = DriftReport()
16
+ report.add(EmbeddingDrift().compute(ref_emb, cur_emb))
17
+ report.add(LatencyDrift().compute(ref_lat, cur_lat))
18
+
19
+ Or in one shot:
20
+
21
+ report = DriftReport.from_signals([
22
+ EmbeddingDrift().compute(ref_emb, cur_emb),
23
+ LatencyDrift().compute(ref_lat, cur_lat),
24
+ ])
25
+ """
26
+
27
+ def __init__(self) -> None:
28
+ self._signals: list[DriftSignal] = []
29
+
30
+ @classmethod
31
+ def from_signals(cls, signals: list[DriftSignal]) -> DriftReport:
32
+ r = cls()
33
+ for s in signals:
34
+ r.add(s)
35
+ return r
36
+
37
+ def add(self, signal: DriftSignal) -> DriftReport:
38
+ self._signals.append(signal)
39
+ return self
40
+
41
+ @property
42
+ def signals(self) -> list[DriftSignal]:
43
+ return list(self._signals)
44
+
45
+ def get(self, name: str) -> DriftSignal | None:
46
+ for s in self._signals:
47
+ if s.name == name:
48
+ return s
49
+ return None
50
+
51
+ def any_drifted(self) -> bool:
52
+ return any(s.drifted for s in self._signals)
53
+
54
+ def to_dict(self) -> dict[str, Any]:
55
+ return {
56
+ "signals": [s.to_dict() for s in self._signals],
57
+ "any_drifted": self.any_drifted(),
58
+ }
59
+
60
+ def to_pandas(self):
61
+ # imported lazily so pandas isn't required for non-DataFrame users
62
+ import pandas as pd
63
+
64
+ if not self._signals:
65
+ return pd.DataFrame(columns=["name", "value", "threshold", "drifted"])
66
+ return pd.DataFrame(
67
+ [
68
+ {
69
+ "name": s.name,
70
+ "value": s.value,
71
+ "threshold": s.threshold,
72
+ "drifted": s.drifted,
73
+ **{f"meta_{k}": v for k, v in s.metadata.items()},
74
+ }
75
+ for s in self._signals
76
+ ]
77
+ )
78
+
79
+ def alert_if(self, thresholds: dict[str, float]) -> None:
80
+ """Raise DriftAlert if any of the given signals exceeds its threshold.
81
+
82
+ Overrides the threshold each signal was computed with. Use this when the
83
+ report is being evaluated against a different policy than the detector
84
+ was constructed with (e.g. CI vs. prod).
85
+ """
86
+ breaches = []
87
+ for s in self._signals:
88
+ if s.name in thresholds and s.value > thresholds[s.name]:
89
+ breaches.append(
90
+ DriftSignal(
91
+ name=s.name,
92
+ value=s.value,
93
+ threshold=thresholds[s.name],
94
+ drifted=True,
95
+ metadata=s.metadata,
96
+ )
97
+ )
98
+ if breaches:
99
+ raise DriftAlert(breaches)
File without changes
@@ -0,0 +1,73 @@
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from driftvane import EmbeddingDrift
5
+
6
+
7
+ def _gen(n: int, dim: int, mean: float = 0.0, scale: float = 1.0, seed: int = 0):
8
+ rng = np.random.default_rng(seed)
9
+ return rng.normal(loc=mean, scale=scale, size=(n, dim))
10
+
11
+
12
+ def test_no_drift_for_same_distribution():
13
+ a = _gen(200, 16, seed=1)
14
+ b = _gen(200, 16, seed=2)
15
+ sig = EmbeddingDrift().compute(a, b)
16
+ # MMD between two N(0,1) samples of the same shape should be small
17
+ assert sig.value < 0.05
18
+ assert sig.metadata["n_ref"] == 200
19
+ assert sig.metadata["dim"] == 16
20
+
21
+
22
+ def test_drift_for_shifted_mean():
23
+ a = _gen(200, 16, mean=0.0, seed=1)
24
+ b = _gen(200, 16, mean=2.0, seed=2)
25
+ sig = EmbeddingDrift().compute(a, b)
26
+ assert sig.value > 0.1
27
+
28
+
29
+ def test_threshold_flags_drift():
30
+ a = _gen(200, 16, mean=0.0, seed=1)
31
+ b = _gen(200, 16, mean=2.0, seed=2)
32
+ sig = EmbeddingDrift(threshold=0.1).compute(a, b)
33
+ assert sig.drifted is True
34
+
35
+
36
+ def test_threshold_passes_when_under():
37
+ a = _gen(200, 16, seed=1)
38
+ b = _gen(200, 16, seed=2)
39
+ sig = EmbeddingDrift(threshold=0.5).compute(a, b)
40
+ assert sig.drifted is False
41
+
42
+
43
+ def test_dim_mismatch_raises():
44
+ a = _gen(50, 8)
45
+ b = _gen(50, 16)
46
+ with pytest.raises(ValueError, match="dim mismatch"):
47
+ EmbeddingDrift().compute(a, b)
48
+
49
+
50
+ def test_one_sample_raises():
51
+ a = _gen(1, 8)
52
+ b = _gen(50, 8)
53
+ with pytest.raises(ValueError, match="at least 2 samples"):
54
+ EmbeddingDrift().compute(a, b)
55
+
56
+
57
+ def test_unknown_method_raises():
58
+ with pytest.raises(ValueError, match="unknown method"):
59
+ EmbeddingDrift(method="kld")
60
+
61
+
62
+ def test_explicit_sigma_used():
63
+ a = _gen(50, 8)
64
+ b = _gen(50, 8)
65
+ sig = EmbeddingDrift(sigma=2.5).compute(a, b)
66
+ assert sig.metadata["sigma"] == 2.5
67
+
68
+
69
+ def test_value_is_non_negative():
70
+ a = _gen(200, 16, seed=1)
71
+ b = _gen(200, 16, seed=1)
72
+ sig = EmbeddingDrift().compute(a, b)
73
+ assert sig.value >= 0.0
@@ -0,0 +1,63 @@
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from driftvane import LatencyDrift
5
+ from driftvane.detectors.latency import ks_2samp
6
+
7
+
8
+ def test_identical_samples_have_zero_ks():
9
+ rng = np.random.default_rng(0)
10
+ a = rng.normal(size=500)
11
+ sig = LatencyDrift().compute(a, a)
12
+ assert sig.value == 0.0
13
+ assert sig.metadata["ks_p_value"] == pytest.approx(1.0, abs=0.05)
14
+
15
+
16
+ def test_shifted_distribution_has_high_ks():
17
+ rng = np.random.default_rng(0)
18
+ a = rng.normal(loc=0.0, size=500)
19
+ b = rng.normal(loc=2.0, size=500)
20
+ sig = LatencyDrift().compute(a, b)
21
+ assert sig.value > 0.5
22
+ assert sig.metadata["ks_p_value"] < 0.001
23
+
24
+
25
+ def test_threshold_flags_on_d_statistic():
26
+ rng = np.random.default_rng(0)
27
+ a = rng.normal(loc=0.0, size=200)
28
+ b = rng.normal(loc=2.0, size=200)
29
+ sig = LatencyDrift(threshold=0.3).compute(a, b)
30
+ assert sig.drifted is True
31
+
32
+
33
+ def test_p_threshold_flags():
34
+ rng = np.random.default_rng(0)
35
+ a = rng.normal(loc=0.0, size=200)
36
+ b = rng.normal(loc=2.0, size=200)
37
+ sig = LatencyDrift(p_threshold=0.01).compute(a, b)
38
+ assert sig.drifted is True
39
+
40
+
41
+ def test_cant_set_both_thresholds():
42
+ with pytest.raises(ValueError, match="not both"):
43
+ LatencyDrift(threshold=0.1, p_threshold=0.01)
44
+
45
+
46
+ def test_empty_raises():
47
+ with pytest.raises(ValueError, match="non-empty"):
48
+ ks_2samp([], [1.0, 2.0])
49
+
50
+
51
+ def test_ks_returns_in_unit_interval():
52
+ rng = np.random.default_rng(0)
53
+ a = rng.normal(size=100)
54
+ b = rng.normal(size=100)
55
+ d, p = ks_2samp(a, b)
56
+ assert 0.0 <= d <= 1.0
57
+ assert 0.0 <= p <= 1.0
58
+
59
+
60
+ def test_median_metadata_present():
61
+ sig = LatencyDrift().compute([1.0, 2.0, 3.0, 4.0], [10.0, 20.0, 30.0, 40.0])
62
+ assert sig.metadata["median_ref"] == pytest.approx(2.5)
63
+ assert sig.metadata["median_cur"] == pytest.approx(25.0)
@@ -0,0 +1,61 @@
1
+ import pytest
2
+
3
+ from driftvane import DriftAlert, DriftReport, DriftSignal
4
+
5
+
6
+ def _sig(
7
+ name: str,
8
+ value: float,
9
+ threshold: float | None = None,
10
+ drifted: bool = False,
11
+ ) -> DriftSignal:
12
+ return DriftSignal(name=name, value=value, threshold=threshold, drifted=drifted)
13
+
14
+
15
+ def test_empty_report_is_not_drifted():
16
+ r = DriftReport()
17
+ assert r.signals == []
18
+ assert r.any_drifted() is False
19
+ assert r.to_dict() == {"signals": [], "any_drifted": False}
20
+
21
+
22
+ def test_add_chains():
23
+ r = DriftReport()
24
+ out = r.add(_sig("a", 0.1)).add(_sig("b", 0.2))
25
+ assert out is r
26
+ assert [s.name for s in r.signals] == ["a", "b"]
27
+
28
+
29
+ def test_from_signals():
30
+ r = DriftReport.from_signals([_sig("a", 0.1), _sig("b", 0.2, threshold=0.1, drifted=True)])
31
+ assert r.any_drifted() is True
32
+ assert r.get("a").value == 0.1
33
+ assert r.get("missing") is None
34
+
35
+
36
+ def test_to_pandas_includes_metadata_columns():
37
+ r = DriftReport().add(
38
+ DriftSignal(name="x", value=0.5, metadata={"sigma": 1.5, "n": 100})
39
+ )
40
+ df = r.to_pandas()
41
+ assert list(df.columns) == ["name", "value", "threshold", "drifted", "meta_sigma", "meta_n"]
42
+ assert df.iloc[0]["meta_sigma"] == 1.5
43
+
44
+
45
+ def test_alert_if_raises_on_breach():
46
+ r = DriftReport().add(_sig("emb", 0.3)).add(_sig("lat", 0.05))
47
+ with pytest.raises(DriftAlert) as exc:
48
+ r.alert_if({"emb": 0.2, "lat": 0.1})
49
+ assert len(exc.value.breaches) == 1
50
+ assert exc.value.breaches[0].name == "emb"
51
+
52
+
53
+ def test_alert_if_no_breach():
54
+ r = DriftReport().add(_sig("emb", 0.05))
55
+ # should not raise
56
+ r.alert_if({"emb": 0.2})
57
+
58
+
59
+ def test_alert_if_ignores_unlisted_signals():
60
+ r = DriftReport().add(_sig("emb", 999.0))
61
+ r.alert_if({"latency": 0.1}) # emb has no threshold here, should not raise
@@ -0,0 +1,67 @@
1
+ import pytest
2
+
3
+ from driftvane import ResponseDrift
4
+ from driftvane.detectors.response import Triple
5
+
6
+
7
+ def test_grounded_answers_have_no_drift():
8
+ triples = [
9
+ Triple(
10
+ intent="What is the capital of France?",
11
+ context="Paris is the capital of France.",
12
+ answer="Paris is the capital of France.",
13
+ ),
14
+ Triple(
15
+ intent="What is 2+2?",
16
+ context="Two plus two equals four.",
17
+ answer="Two plus two equals four.",
18
+ ),
19
+ ]
20
+ sig = ResponseDrift(use_external=False).compute(triples, triples)
21
+ assert sig.value == pytest.approx(0.0)
22
+
23
+
24
+ def test_ungrounded_current_drifts_negative():
25
+ grounded = [
26
+ Triple(
27
+ intent="capital of France",
28
+ context="Paris is the capital of France.",
29
+ answer="Paris is the capital of France.",
30
+ )
31
+ ]
32
+ ungrounded = [
33
+ Triple(
34
+ intent="capital of France",
35
+ context="Paris is the capital of France.",
36
+ answer="Wombats live in Australia.",
37
+ )
38
+ ]
39
+ sig = ResponseDrift(use_external=False).compute(grounded, ungrounded)
40
+ assert sig.metadata["signed_shift"] < 0
41
+ assert sig.value > 0
42
+
43
+
44
+ def test_threshold_flags():
45
+ a = [Triple("q", "the answer is forty two", "the answer is forty two")]
46
+ b = [Triple("q", "the answer is forty two", "completely unrelated text")]
47
+ sig = ResponseDrift(threshold=0.3, use_external=False).compute(a, b)
48
+ assert sig.drifted is True
49
+
50
+
51
+ def test_accepts_dicts_and_list_context():
52
+ a = [{"intent": "q", "context": ["fact a", "fact b"], "answer": "fact a"}]
53
+ b = [{"intent": "q", "context": ["fact a"], "answer": "fact b fact a"}]
54
+ sig = ResponseDrift(use_external=False).compute(a, b)
55
+ assert sig.metadata["n_ref"] == 1
56
+ assert sig.metadata["n_cur"] == 1
57
+
58
+
59
+ def test_empty_raises():
60
+ with pytest.raises(ValueError, match="at least 1 triple"):
61
+ ResponseDrift(use_external=False).compute([], [])
62
+
63
+
64
+ def test_inline_scorer_used_when_external_disabled():
65
+ triples = [Triple("q", "ctx", "ctx")]
66
+ sig = ResponseDrift(use_external=False).compute(triples, triples)
67
+ assert sig.metadata["scorer"] == "inline_jaccard"
@@ -0,0 +1,60 @@
1
+ import pytest
2
+
3
+ from driftvane import RetrievalDrift
4
+
5
+
6
+ def test_identical_rankings_have_zero_drift():
7
+ ref = [["a", "b", "c", "d"], ["x", "y", "z"]]
8
+ cur = [["a", "b", "c", "d"], ["x", "y", "z"]]
9
+ sig = RetrievalDrift(k=4).compute(ref, cur)
10
+ assert sig.value == 0.0
11
+ assert sig.metadata["mean_jaccard_at_k"] == 1.0
12
+ assert sig.metadata["mean_rbo"] == 1.0
13
+
14
+
15
+ def test_disjoint_rankings_have_max_drift():
16
+ ref = [["a", "b", "c"]]
17
+ cur = [["x", "y", "z"]]
18
+ sig = RetrievalDrift(k=3).compute(ref, cur)
19
+ assert sig.value == 1.0
20
+ assert sig.metadata["mean_jaccard_at_k"] == 0.0
21
+
22
+
23
+ def test_partial_overlap():
24
+ ref = [["a", "b", "c", "d"]]
25
+ cur = [["a", "b", "x", "y"]]
26
+ sig = RetrievalDrift(k=4).compute(ref, cur)
27
+ # Jaccard of {a,b,c,d} vs {a,b,x,y} = 2/6 = 1/3
28
+ assert sig.metadata["mean_jaccard_at_k"] == pytest.approx(1 / 3)
29
+
30
+
31
+ def test_threshold_flags():
32
+ ref = [["a", "b", "c"]]
33
+ cur = [["a", "x", "y"]]
34
+ sig = RetrievalDrift(k=3, threshold=0.5).compute(ref, cur)
35
+ # 1 - (1/5) = 0.8 > 0.5
36
+ assert sig.drifted is True
37
+
38
+
39
+ def test_mismatched_lengths_raises():
40
+ with pytest.raises(ValueError, match="same number of queries"):
41
+ RetrievalDrift().compute([["a"]], [["a"], ["b"]])
42
+
43
+
44
+ def test_empty_input_raises():
45
+ with pytest.raises(ValueError, match="at least 1 query"):
46
+ RetrievalDrift().compute([], [])
47
+
48
+
49
+ def test_k_must_be_positive():
50
+ with pytest.raises(ValueError, match="k must be"):
51
+ RetrievalDrift(k=0)
52
+
53
+
54
+ def test_rbo_weights_top_positions_more():
55
+ # Same set, different order: Jaccard sees no drift, RBO does
56
+ ref = [["a", "b", "c", "d", "e"]]
57
+ cur = [["e", "d", "c", "b", "a"]]
58
+ sig = RetrievalDrift(k=5).compute(ref, cur)
59
+ assert sig.metadata["mean_jaccard_at_k"] == 1.0
60
+ assert sig.metadata["mean_rbo"] < 1.0