embspec 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- embspec-0.1.0/LICENSE +201 -0
- embspec-0.1.0/PKG-INFO +167 -0
- embspec-0.1.0/README.md +138 -0
- embspec-0.1.0/pyproject.toml +49 -0
- embspec-0.1.0/src/embspec/__init__.py +64 -0
- embspec-0.1.0/src/embspec/_adapter.py +116 -0
- embspec-0.1.0/src/embspec/_assert.py +73 -0
- embspec-0.1.0/src/embspec/_errors.py +48 -0
- embspec-0.1.0/src/embspec/_manifest.py +146 -0
- embspec-0.1.0/src/embspec/_stability.py +103 -0
- embspec-0.1.0/src/embspec/py.typed +0 -0
embspec-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or
|
|
95
|
+
Derivative Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and
|
|
117
|
+
do not modify the License. You may add Your own attribution
|
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
120
|
+
that such additional attribution notices cannot be construed
|
|
121
|
+
as modifying the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for describing the origin of the Work and
|
|
141
|
+
reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Support. While redistributing the Work or
|
|
166
|
+
Derivative Works thereof, You may choose to offer, and charge a
|
|
167
|
+
fee for, acceptance of support, warranty, indemnity, or other
|
|
168
|
+
liability obligations and/or rights consistent with this License.
|
|
169
|
+
However, in accepting such obligations, You may act only on Your
|
|
170
|
+
own behalf and on Your sole responsibility, not on behalf of any
|
|
171
|
+
other Contributor, and only if You agree to indemnify, defend,
|
|
172
|
+
and hold each Contributor harmless for any liability incurred by,
|
|
173
|
+
or claims asserted against, such Contributor by reason of your
|
|
174
|
+
accepting any such warranty or support.
|
|
175
|
+
|
|
176
|
+
END OF TERMS AND CONDITIONS
|
|
177
|
+
|
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
179
|
+
|
|
180
|
+
To apply the Apache License to your work, attach the following
|
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
182
|
+
replaced with your own identifying information. (Don't include
|
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
184
|
+
comment syntax for the file format. We also recommend that a
|
|
185
|
+
file or class name and description of purpose be included on the
|
|
186
|
+
same "printed page" as the copyright notice for easier
|
|
187
|
+
identification within third-party archives.
|
|
188
|
+
|
|
189
|
+
Copyright 2026 Mukunda Katta
|
|
190
|
+
|
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
|
+
you may not use this file except in compliance with the License.
|
|
193
|
+
You may obtain a copy of the License at
|
|
194
|
+
|
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
196
|
+
|
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
200
|
+
implied. See the License for the specific language governing
|
|
201
|
+
permissions and limitations under the License.
|
embspec-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: embspec
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Embedding pipeline ops + drift detection for production RAG: index manifests, version assertions, neighbor-stability eval, Drift-Adapter for in-place model migrations.
|
|
5
|
+
Keywords: rag,embeddings,vector,drift,retrieval,production
|
|
6
|
+
Author: Mukunda Katta
|
|
7
|
+
Author-email: Mukunda Katta <mukunda.vjcs6@gmail.com>
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Dist: numpy>=1.24
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Project-URL: Homepage, https://github.com/MukundaKatta/embspec
|
|
25
|
+
Project-URL: Source, https://github.com/MukundaKatta/embspec
|
|
26
|
+
Project-URL: Issues, https://github.com/MukundaKatta/embspec/issues
|
|
27
|
+
Project-URL: Changelog, https://github.com/MukundaKatta/embspec/blob/main/CHANGELOG.md
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# embspec
|
|
31
|
+
|
|
32
|
+
Embedding pipeline ops + drift detection for production RAG.
|
|
33
|
+
|
|
34
|
+
The single failure mode this library prevents: query encoder upgrade ships before the index is re-encoded; every health check stays 200 OK while retrieval accuracy silently collapses. The [decompressed.io RAG observability post-mortem (2026-03-09)](https://decompressed.io/learn/rag-observability-postmortem) describes this exact bug — \$15K of emergency re-encoding plus 2-5 days of engineer time before someone diagnosed it.
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install embspec
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from embspec import IndexManifest, embed_assert
|
|
42
|
+
|
|
43
|
+
manifest = IndexManifest.load("s3://my-rag/index-prod/manifest.json")
|
|
44
|
+
|
|
45
|
+
@embed_assert(manifest, model_id="amazon.titan-embed-text-v2:0", dimension=1024)
|
|
46
|
+
def search(query: str):
|
|
47
|
+
qv = embed_query(query)
|
|
48
|
+
return opensearch.knn_search(qv, ...)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
If the query encoder ever drifts off the manifest, `search` raises `EmbeddingVersionMismatch` instead of silently returning bad results.
|
|
52
|
+
|
|
53
|
+
## What's in v0.1
|
|
54
|
+
|
|
55
|
+
| Primitive | What it does | Anchored in |
|
|
56
|
+
|---|---|---|
|
|
57
|
+
| `IndexManifest` + `EmbeddingSpec` | Single-file source of truth for "what does this index contain" — model id, dimension, version, normalization | [decompressed.io post-mortem](https://decompressed.io/learn/rag-observability-postmortem) |
|
|
58
|
+
| `assert_compatible()` / `embed_assert()` decorator | Fast-fail on encoder/index drift; raise or warn modes | same |
|
|
59
|
+
| `DriftAdapter` | Linear adapter from new-model embeddings to old-model space; lets you swap the query encoder without re-encoding the corpus | [Drift-Adapter, Vejendla 2025 (arxiv:2509.23471)](https://arxiv.org/abs/2509.23471) |
|
|
60
|
+
| `neighbor_stability()` | Compare two retrievers on a frozen probe set; reports overlap, Jaccard, regression list, deploy-safety verdict | [RAGOps survey, Xu et al. 2025 (arxiv:2506.03401)](https://arxiv.org/abs/2506.03401) |
|
|
61
|
+
|
|
62
|
+
## Why not Evidently / Phoenix / WhyLogs?
|
|
63
|
+
|
|
64
|
+
- **Evidently** — tabular-ML drift heritage; LLM additions are recent and platform-shaped. Not a drop-in primitive.
|
|
65
|
+
- **Phoenix** — embedding-drift visualization is a sub-feature of a full observability platform. You adopt the platform.
|
|
66
|
+
- **WhyLogs** — generic data-logging primitive; not embedding-aware; last commit 2025-01.
|
|
67
|
+
- **embspec** — three small primitives (`IndexManifest`, `DriftAdapter`, `neighbor_stability`) you compose with whatever vector DB and tracer you already have. No platform, no UI, no agent framework.
|
|
68
|
+
|
|
69
|
+
## Usage
|
|
70
|
+
|
|
71
|
+
### Manifest + version assertion
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from datetime import datetime, timezone
|
|
75
|
+
from embspec import IndexManifest, EmbeddingSpec
|
|
76
|
+
|
|
77
|
+
# When you build the index, write a manifest alongside it
|
|
78
|
+
manifest = IndexManifest(
|
|
79
|
+
index_name="prod-v3",
|
|
80
|
+
embedding=EmbeddingSpec(
|
|
81
|
+
model_id="amazon.titan-embed-text-v2:0",
|
|
82
|
+
dimension=1024,
|
|
83
|
+
normalization="l2",
|
|
84
|
+
),
|
|
85
|
+
created_at=datetime.now(timezone.utc),
|
|
86
|
+
doc_count=8_000_000,
|
|
87
|
+
)
|
|
88
|
+
manifest.save("s3://my-rag/index-prod/manifest.json")
|
|
89
|
+
# (or any local path; manifest.save uses pathlib.Path.write_text)
|
|
90
|
+
|
|
91
|
+
# At query time, assert the encoder matches before searching
|
|
92
|
+
from embspec import embed_assert
|
|
93
|
+
|
|
94
|
+
@embed_assert(
|
|
95
|
+
"s3://my-rag/index-prod/manifest.json", # path or IndexManifest
|
|
96
|
+
model_id="amazon.titan-embed-text-v2:0",
|
|
97
|
+
dimension=1024,
|
|
98
|
+
mode="raise", # or "log" for canary rollout
|
|
99
|
+
)
|
|
100
|
+
def search(query: str) -> list[dict]:
|
|
101
|
+
qv = embed_query(query)
|
|
102
|
+
return opensearch.knn_search(qv, ...)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Drift-Adapter for in-place model migration
|
|
106
|
+
|
|
107
|
+
When you want to upgrade the query encoder without re-encoding 8M docs:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from embspec import DriftAdapter
|
|
111
|
+
import numpy as np
|
|
112
|
+
|
|
113
|
+
# Sample, e.g., 50K docs and embed them with both old and new models
|
|
114
|
+
old_emb = embed_with_old_model(sample_texts) # shape (50000, 1024)
|
|
115
|
+
new_emb = embed_with_new_model(sample_texts) # shape (50000, 1536)
|
|
116
|
+
|
|
117
|
+
adapter = DriftAdapter.fit(
|
|
118
|
+
new_embeddings=new_emb,
|
|
119
|
+
old_embeddings=old_emb,
|
|
120
|
+
regularization=0.01, # ridge; helps when new_emb is rank-deficient
|
|
121
|
+
)
|
|
122
|
+
adapter.save("s3://my-rag/adapters/v3-to-v4.npz")
|
|
123
|
+
|
|
124
|
+
# At query time, embed with the new model then transform into old space
|
|
125
|
+
qv_new = embed_with_new_model(query)
|
|
126
|
+
qv_compatible = adapter.transform(qv_new) # shape (1024,)
|
|
127
|
+
results = opensearch.knn_search(qv_compatible, ...)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Per Vejendla 2025, this typically recovers 95-99% retrieval at ~1% of the cost of re-encoding the full corpus.
|
|
131
|
+
|
|
132
|
+
### Neighbor stability for safe migrations
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from embspec import neighbor_stability
|
|
136
|
+
|
|
137
|
+
# Run a fixed probe set against both indexes
|
|
138
|
+
old_results = {pid: retrieve_from_v3(q) for pid, q in probes.items()}
|
|
139
|
+
new_results = {pid: retrieve_from_v4(q) for pid, q in probes.items()}
|
|
140
|
+
|
|
141
|
+
report = neighbor_stability(old_results, new_results, k=10)
|
|
142
|
+
print(f"mean overlap@10: {report.mean_overlap_at_k:.1%}")
|
|
143
|
+
print(f"regressions: {report.regression_count}/{report.n_probes}")
|
|
144
|
+
|
|
145
|
+
if report.is_safe_to_deploy(min_mean_overlap=0.85, max_regression_fraction=0.05):
|
|
146
|
+
deploy_v4()
|
|
147
|
+
else:
|
|
148
|
+
investigate(report.regression_probe_ids)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## What it explicitly does NOT do
|
|
152
|
+
|
|
153
|
+
- Not a vector database.
|
|
154
|
+
- Not a RAG framework. No retriever, no chunker, no generator.
|
|
155
|
+
- Not a generic ML drift library. Embedding-and-retrieval-shaped only.
|
|
156
|
+
- Not an eval framework. `neighbor_stability` is the one judgment you can make without a labeled gold set; for richer evals use `ragas`, `trulens`, or a tracer.
|
|
157
|
+
- Does not host or serve embeddings.
|
|
158
|
+
|
|
159
|
+
## Roadmap
|
|
160
|
+
|
|
161
|
+
- v0.2: `dual_write()` context manager for blue/green index migrations across OpenSearch / pgvector / Pinecone / Qdrant.
|
|
162
|
+
- v0.3: `ChunkingExperiment` A/B harness with optional LLM-judge.
|
|
163
|
+
- v0.4: integration helpers for AWS Bedrock embedding models, OpenAI, Cohere, Voyage.
|
|
164
|
+
|
|
165
|
+
## License
|
|
166
|
+
|
|
167
|
+
Apache-2.0. See [LICENSE](./LICENSE).
|
embspec-0.1.0/README.md
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# embspec
|
|
2
|
+
|
|
3
|
+
Embedding pipeline ops + drift detection for production RAG.
|
|
4
|
+
|
|
5
|
+
The single failure mode this library prevents: query encoder upgrade ships before the index is re-encoded; every health check stays 200 OK while retrieval accuracy silently collapses. The [decompressed.io RAG observability post-mortem (2026-03-09)](https://decompressed.io/learn/rag-observability-postmortem) describes this exact bug — \$15K of emergency re-encoding plus 2-5 days of engineer time before someone diagnosed it.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install embspec
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from embspec import IndexManifest, embed_assert
|
|
13
|
+
|
|
14
|
+
manifest = IndexManifest.load("s3://my-rag/index-prod/manifest.json")
|
|
15
|
+
|
|
16
|
+
@embed_assert(manifest, model_id="amazon.titan-embed-text-v2:0", dimension=1024)
|
|
17
|
+
def search(query: str):
|
|
18
|
+
qv = embed_query(query)
|
|
19
|
+
return opensearch.knn_search(qv, ...)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
If the query encoder ever drifts off the manifest, `search` raises `EmbeddingVersionMismatch` instead of silently returning bad results.
|
|
23
|
+
|
|
24
|
+
## What's in v0.1
|
|
25
|
+
|
|
26
|
+
| Primitive | What it does | Anchored in |
|
|
27
|
+
|---|---|---|
|
|
28
|
+
| `IndexManifest` + `EmbeddingSpec` | Single-file source of truth for "what does this index contain" — model id, dimension, version, normalization | [decompressed.io post-mortem](https://decompressed.io/learn/rag-observability-postmortem) |
|
|
29
|
+
| `assert_compatible()` / `embed_assert()` decorator | Fast-fail on encoder/index drift; raise or warn modes | same |
|
|
30
|
+
| `DriftAdapter` | Linear adapter from new-model embeddings to old-model space; lets you swap the query encoder without re-encoding the corpus | [Drift-Adapter, Vejendla 2025 (arxiv:2509.23471)](https://arxiv.org/abs/2509.23471) |
|
|
31
|
+
| `neighbor_stability()` | Compare two retrievers on a frozen probe set; reports overlap, Jaccard, regression list, deploy-safety verdict | [RAGOps survey, Xu et al. 2025 (arxiv:2506.03401)](https://arxiv.org/abs/2506.03401) |
|
|
32
|
+
|
|
33
|
+
## Why not Evidently / Phoenix / WhyLogs?
|
|
34
|
+
|
|
35
|
+
- **Evidently** — tabular-ML drift heritage; LLM additions are recent and platform-shaped. Not a drop-in primitive.
|
|
36
|
+
- **Phoenix** — embedding-drift visualization is a sub-feature of a full observability platform. You adopt the platform.
|
|
37
|
+
- **WhyLogs** — generic data-logging primitive; not embedding-aware; last commit 2025-01.
|
|
38
|
+
- **embspec** — three small primitives (`IndexManifest`, `DriftAdapter`, `neighbor_stability`) you compose with whatever vector DB and tracer you already have. No platform, no UI, no agent framework.
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
### Manifest + version assertion
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from datetime import datetime, timezone
|
|
46
|
+
from embspec import IndexManifest, EmbeddingSpec
|
|
47
|
+
|
|
48
|
+
# When you build the index, write a manifest alongside it
|
|
49
|
+
manifest = IndexManifest(
|
|
50
|
+
index_name="prod-v3",
|
|
51
|
+
embedding=EmbeddingSpec(
|
|
52
|
+
model_id="amazon.titan-embed-text-v2:0",
|
|
53
|
+
dimension=1024,
|
|
54
|
+
normalization="l2",
|
|
55
|
+
),
|
|
56
|
+
created_at=datetime.now(timezone.utc),
|
|
57
|
+
doc_count=8_000_000,
|
|
58
|
+
)
|
|
59
|
+
manifest.save("s3://my-rag/index-prod/manifest.json")
|
|
60
|
+
# (or any local path; manifest.save uses pathlib.Path.write_text)
|
|
61
|
+
|
|
62
|
+
# At query time, assert the encoder matches before searching
|
|
63
|
+
from embspec import embed_assert
|
|
64
|
+
|
|
65
|
+
@embed_assert(
|
|
66
|
+
"s3://my-rag/index-prod/manifest.json", # path or IndexManifest
|
|
67
|
+
model_id="amazon.titan-embed-text-v2:0",
|
|
68
|
+
dimension=1024,
|
|
69
|
+
mode="raise", # or "log" for canary rollout
|
|
70
|
+
)
|
|
71
|
+
def search(query: str) -> list[dict]:
|
|
72
|
+
qv = embed_query(query)
|
|
73
|
+
return opensearch.knn_search(qv, ...)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Drift-Adapter for in-place model migration
|
|
77
|
+
|
|
78
|
+
When you want to upgrade the query encoder without re-encoding 8M docs:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from embspec import DriftAdapter
|
|
82
|
+
import numpy as np
|
|
83
|
+
|
|
84
|
+
# Sample, e.g., 50K docs and embed them with both old and new models
|
|
85
|
+
old_emb = embed_with_old_model(sample_texts) # shape (50000, 1024)
|
|
86
|
+
new_emb = embed_with_new_model(sample_texts) # shape (50000, 1536)
|
|
87
|
+
|
|
88
|
+
adapter = DriftAdapter.fit(
|
|
89
|
+
new_embeddings=new_emb,
|
|
90
|
+
old_embeddings=old_emb,
|
|
91
|
+
regularization=0.01, # ridge; helps when new_emb is rank-deficient
|
|
92
|
+
)
|
|
93
|
+
adapter.save("s3://my-rag/adapters/v3-to-v4.npz")
|
|
94
|
+
|
|
95
|
+
# At query time, embed with the new model then transform into old space
|
|
96
|
+
qv_new = embed_with_new_model(query)
|
|
97
|
+
qv_compatible = adapter.transform(qv_new) # shape (1024,)
|
|
98
|
+
results = opensearch.knn_search(qv_compatible, ...)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Per Vejendla 2025, this typically recovers 95-99% retrieval at ~1% of the cost of re-encoding the full corpus.
|
|
102
|
+
|
|
103
|
+
### Neighbor stability for safe migrations
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from embspec import neighbor_stability
|
|
107
|
+
|
|
108
|
+
# Run a fixed probe set against both indexes
|
|
109
|
+
old_results = {pid: retrieve_from_v3(q) for pid, q in probes.items()}
|
|
110
|
+
new_results = {pid: retrieve_from_v4(q) for pid, q in probes.items()}
|
|
111
|
+
|
|
112
|
+
report = neighbor_stability(old_results, new_results, k=10)
|
|
113
|
+
print(f"mean overlap@10: {report.mean_overlap_at_k:.1%}")
|
|
114
|
+
print(f"regressions: {report.regression_count}/{report.n_probes}")
|
|
115
|
+
|
|
116
|
+
if report.is_safe_to_deploy(min_mean_overlap=0.85, max_regression_fraction=0.05):
|
|
117
|
+
deploy_v4()
|
|
118
|
+
else:
|
|
119
|
+
investigate(report.regression_probe_ids)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## What it explicitly does NOT do
|
|
123
|
+
|
|
124
|
+
- Not a vector database.
|
|
125
|
+
- Not a RAG framework. No retriever, no chunker, no generator.
|
|
126
|
+
- Not a generic ML drift library. Embedding-and-retrieval-shaped only.
|
|
127
|
+
- Not an eval framework. `neighbor_stability` is the one judgment you can make without a labeled gold set; for richer evals use `ragas`, `trulens`, or a tracer.
|
|
128
|
+
- Does not host or serve embeddings.
|
|
129
|
+
|
|
130
|
+
## Roadmap
|
|
131
|
+
|
|
132
|
+
- v0.2: `dual_write()` context manager for blue/green index migrations across OpenSearch / pgvector / Pinecone / Qdrant.
|
|
133
|
+
- v0.3: `ChunkingExperiment` A/B harness with optional LLM-judge.
|
|
134
|
+
- v0.4: integration helpers for AWS Bedrock embedding models, OpenAI, Cohere, Voyage.
|
|
135
|
+
|
|
136
|
+
## License
|
|
137
|
+
|
|
138
|
+
Apache-2.0. See [LICENSE](./LICENSE).
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "embspec"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Embedding pipeline ops + drift detection for production RAG: index manifests, version assertions, neighbor-stability eval, Drift-Adapter for in-place model migrations."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Mukunda Katta", email = "mukunda.vjcs6@gmail.com" }
|
|
8
|
+
]
|
|
9
|
+
license = "Apache-2.0"
|
|
10
|
+
license-files = ["LICENSE"]
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
keywords = ["rag", "embeddings", "vector", "drift", "retrieval", "production"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
24
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
25
|
+
"Typing :: Typed",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"numpy>=1.24",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/MukundaKatta/embspec"
|
|
33
|
+
Source = "https://github.com/MukundaKatta/embspec"
|
|
34
|
+
Issues = "https://github.com/MukundaKatta/embspec/issues"
|
|
35
|
+
Changelog = "https://github.com/MukundaKatta/embspec/blob/main/CHANGELOG.md"
|
|
36
|
+
|
|
37
|
+
[dependency-groups]
|
|
38
|
+
dev = [
|
|
39
|
+
"pytest>=8.0",
|
|
40
|
+
"pytest-cov>=5.0",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[build-system]
|
|
44
|
+
requires = ["uv_build>=0.11.7,<0.12.0"]
|
|
45
|
+
build-backend = "uv_build"
|
|
46
|
+
|
|
47
|
+
[tool.pytest.ini_options]
|
|
48
|
+
testpaths = ["tests"]
|
|
49
|
+
addopts = "-q"
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""embspec — embedding pipeline ops + drift detection for production RAG.
|
|
2
|
+
|
|
3
|
+
The single primitive this library asserts:
|
|
4
|
+
|
|
5
|
+
"The query encoder must use the same embedding model+version as the
|
|
6
|
+
index it is searching."
|
|
7
|
+
|
|
8
|
+
The decompressed.io RAG observability post-mortem (2026-03-09) describes
|
|
9
|
+
exactly the failure this library prevents: the query embedding model gets
|
|
10
|
+
upgraded, the index still holds vectors from the old model, every health
|
|
11
|
+
check stays 200 OK while retrieval accuracy silently collapses. Reporter
|
|
12
|
+
spent $15k on emergency re-encoding and 2-5 days of engineer time before
|
|
13
|
+
diagnosis.
|
|
14
|
+
|
|
15
|
+
Quick start::
|
|
16
|
+
|
|
17
|
+
from embspec import IndexManifest, EmbeddingSpec, embed_assert
|
|
18
|
+
|
|
19
|
+
# Recorded once when the index was built
|
|
20
|
+
manifest = IndexManifest.load("s3://my-rag/index-prod/manifest.json")
|
|
21
|
+
|
|
22
|
+
@embed_assert(manifest, model_id="amazon.titan-embed-text-v2:0", dimension=1024)
|
|
23
|
+
def search(query: str):
|
|
24
|
+
qv = embed_query(query)
|
|
25
|
+
return opensearch.knn_search(qv, ...)
|
|
26
|
+
|
|
27
|
+
If the query encoder ever drifts off the manifest, ``search`` raises
|
|
28
|
+
:class:`EmbeddingVersionMismatch` instead of returning silently-degraded
|
|
29
|
+
results.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
from ._adapter import DriftAdapter
|
|
35
|
+
from ._assert import embed_assert
|
|
36
|
+
from ._errors import (
|
|
37
|
+
AdapterShapeError,
|
|
38
|
+
EmbeddingVersionMismatch,
|
|
39
|
+
EmbspecError,
|
|
40
|
+
ManifestFormatError,
|
|
41
|
+
)
|
|
42
|
+
from ._manifest import (
|
|
43
|
+
EmbeddingSpec,
|
|
44
|
+
IndexManifest,
|
|
45
|
+
assert_compatible,
|
|
46
|
+
)
|
|
47
|
+
from ._stability import StabilityReport, neighbor_stability
|
|
48
|
+
|
|
49
|
+
__version__ = "0.1.0"
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"AdapterShapeError",
|
|
53
|
+
"DriftAdapter",
|
|
54
|
+
"EmbeddingSpec",
|
|
55
|
+
"EmbeddingVersionMismatch",
|
|
56
|
+
"EmbspecError",
|
|
57
|
+
"IndexManifest",
|
|
58
|
+
"ManifestFormatError",
|
|
59
|
+
"StabilityReport",
|
|
60
|
+
"__version__",
|
|
61
|
+
"assert_compatible",
|
|
62
|
+
"embed_assert",
|
|
63
|
+
"neighbor_stability",
|
|
64
|
+
]
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Drift-Adapter: linear map from new-model embeddings into old-model embedding space.
|
|
2
|
+
|
|
3
|
+
Implements the pattern from Vejendla 2025 (arxiv:2509.23471, "Drift-Adapter:
|
|
4
|
+
Closing the Embedding-Drift Gap in Production Vector Stores"). Lets you swap
|
|
5
|
+
the query encoder to a newer embedding model without re-encoding the
|
|
6
|
+
corpus, by fitting a small linear transform on a sample of paired
|
|
7
|
+
(old_model, new_model) embeddings.
|
|
8
|
+
|
|
9
|
+
Trade-off: the adapter is a least-squares fit so it loses some signal vs.
|
|
10
|
+
true re-encoding. The paper reports 95-99% retrieval recovery with adapter
|
|
11
|
+
sizes ~1% of the corpus. Use it as a cost-saving migration path, then
|
|
12
|
+
re-encode the corpus on a slower schedule (or never if recall holds).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
from ._errors import AdapterShapeError
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
import numpy as np
|
|
24
|
+
from numpy.typing import NDArray
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DriftAdapter:
|
|
28
|
+
"""Linear adapter that maps embeddings from one model into another's space.
|
|
29
|
+
|
|
30
|
+
Fitting solves ``W = argmin || X_new @ W - X_old ||_F^2`` where
|
|
31
|
+
``X_old`` and ``X_new`` are aligned matrices of paired embeddings.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, weight: NDArray) -> None:
|
|
35
|
+
if weight.ndim != 2:
|
|
36
|
+
raise AdapterShapeError(f"weight must be 2-D, got shape {weight.shape}")
|
|
37
|
+
self.weight = weight
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def input_dim(self) -> int:
|
|
41
|
+
return int(self.weight.shape[0])
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def output_dim(self) -> int:
|
|
45
|
+
return int(self.weight.shape[1])
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def fit(
|
|
49
|
+
cls,
|
|
50
|
+
new_embeddings: NDArray,
|
|
51
|
+
old_embeddings: NDArray,
|
|
52
|
+
*,
|
|
53
|
+
regularization: float = 0.0,
|
|
54
|
+
) -> DriftAdapter:
|
|
55
|
+
"""Fit the adapter on aligned (new, old) embedding pairs via least squares.
|
|
56
|
+
|
|
57
|
+
``new_embeddings`` and ``old_embeddings`` must have the same number
|
|
58
|
+
of rows (``n``) and may have different column counts (the new and
|
|
59
|
+
old model dimensions). ``regularization`` adds an L2 penalty on the
|
|
60
|
+
weight matrix (ridge regression); useful for ill-conditioned data.
|
|
61
|
+
"""
|
|
62
|
+
import numpy as np # noqa: PLC0415
|
|
63
|
+
|
|
64
|
+
if new_embeddings.ndim != 2 or old_embeddings.ndim != 2:
|
|
65
|
+
raise AdapterShapeError(
|
|
66
|
+
f"embedding arrays must be 2-D, got {new_embeddings.shape} and {old_embeddings.shape}"
|
|
67
|
+
)
|
|
68
|
+
if new_embeddings.shape[0] != old_embeddings.shape[0]:
|
|
69
|
+
raise AdapterShapeError(
|
|
70
|
+
f"row counts differ: new={new_embeddings.shape[0]}, old={old_embeddings.shape[0]}; "
|
|
71
|
+
"embeddings must be paired"
|
|
72
|
+
)
|
|
73
|
+
if new_embeddings.shape[0] < new_embeddings.shape[1]:
|
|
74
|
+
raise AdapterShapeError(
|
|
75
|
+
f"need at least as many paired samples as new-model dimensions; "
|
|
76
|
+
f"got {new_embeddings.shape[0]} samples for {new_embeddings.shape[1]} dimensions"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if regularization > 0:
|
|
80
|
+
n_features = new_embeddings.shape[1]
|
|
81
|
+
xtx = new_embeddings.T @ new_embeddings + regularization * np.eye(n_features)
|
|
82
|
+
xty = new_embeddings.T @ old_embeddings
|
|
83
|
+
weight = np.linalg.solve(xtx, xty)
|
|
84
|
+
else:
|
|
85
|
+
weight, _, _, _ = np.linalg.lstsq(
|
|
86
|
+
new_embeddings, old_embeddings, rcond=None
|
|
87
|
+
)
|
|
88
|
+
return cls(weight=weight)
|
|
89
|
+
|
|
90
|
+
def transform(self, new_embeddings: NDArray) -> NDArray:
|
|
91
|
+
"""Map new-model embeddings into the old-model embedding space."""
|
|
92
|
+
if new_embeddings.ndim == 1:
|
|
93
|
+
if new_embeddings.shape[0] != self.input_dim:
|
|
94
|
+
raise AdapterShapeError(
|
|
95
|
+
f"expected vector of dim {self.input_dim}, got {new_embeddings.shape[0]}"
|
|
96
|
+
)
|
|
97
|
+
return new_embeddings @ self.weight
|
|
98
|
+
if new_embeddings.ndim != 2 or new_embeddings.shape[1] != self.input_dim:
|
|
99
|
+
raise AdapterShapeError(
|
|
100
|
+
f"expected (n, {self.input_dim}) input, got {new_embeddings.shape}"
|
|
101
|
+
)
|
|
102
|
+
return new_embeddings @ self.weight
|
|
103
|
+
|
|
104
|
+
def save(self, path: str | Path) -> None:
|
|
105
|
+
"""Save the adapter to a compressed ``.npz`` file."""
|
|
106
|
+
import numpy as np # noqa: PLC0415
|
|
107
|
+
|
|
108
|
+
np.savez_compressed(str(path), weight=self.weight)
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def load(cls, path: str | Path) -> DriftAdapter:
|
|
112
|
+
"""Load an adapter previously saved with :meth:`save`."""
|
|
113
|
+
import numpy as np # noqa: PLC0415
|
|
114
|
+
|
|
115
|
+
data = np.load(str(path))
|
|
116
|
+
return cls(weight=data["weight"])
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Decorator for asserting embedding-spec compatibility on every retrieval call."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Literal, TypeVar
|
|
9
|
+
|
|
10
|
+
from ._manifest import EmbeddingSpec, IndexManifest
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def embed_assert(
|
|
17
|
+
manifest: IndexManifest | str | Path,
|
|
18
|
+
*,
|
|
19
|
+
model_id: str,
|
|
20
|
+
dimension: int,
|
|
21
|
+
model_version: str | None = None,
|
|
22
|
+
normalization: Literal["l2", "none"] = "l2",
|
|
23
|
+
mode: Literal["raise", "log"] = "raise",
|
|
24
|
+
) -> Callable[[F], F]:
|
|
25
|
+
"""Assert the function uses an embedding spec compatible with the index manifest.
|
|
26
|
+
|
|
27
|
+
``manifest`` may be an :class:`IndexManifest` or a path the manifest is
|
|
28
|
+
loaded from (the path is resolved on first call so the decorated symbol
|
|
29
|
+
stays import-cheap).
|
|
30
|
+
|
|
31
|
+
``mode="raise"`` (default) raises :class:`EmbeddingVersionMismatch` on
|
|
32
|
+
drift. ``mode="log"`` is the safer rollout mode: it never raises but
|
|
33
|
+
logs via :mod:`warnings` so production stays up while you wire alerts.
|
|
34
|
+
"""
|
|
35
|
+
spec = EmbeddingSpec(
|
|
36
|
+
model_id=model_id,
|
|
37
|
+
dimension=dimension,
|
|
38
|
+
model_version=model_version,
|
|
39
|
+
normalization=normalization,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
state: dict[str, Any] = {"resolved": None}
|
|
43
|
+
|
|
44
|
+
def _resolve() -> tuple[IndexManifest, str | None]:
|
|
45
|
+
if state["resolved"] is None:
|
|
46
|
+
if isinstance(manifest, IndexManifest):
|
|
47
|
+
state["resolved"] = (manifest, None)
|
|
48
|
+
else:
|
|
49
|
+
path = str(manifest)
|
|
50
|
+
state["resolved"] = (IndexManifest.load(path), path)
|
|
51
|
+
return state["resolved"]
|
|
52
|
+
|
|
53
|
+
def decorator(fn: F) -> F:
|
|
54
|
+
@functools.wraps(fn)
|
|
55
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
56
|
+
m, path = _resolve()
|
|
57
|
+
try:
|
|
58
|
+
m.assert_compatible(spec, manifest_path=path)
|
|
59
|
+
except Exception:
|
|
60
|
+
if mode == "log":
|
|
61
|
+
import warnings
|
|
62
|
+
|
|
63
|
+
warnings.warn(
|
|
64
|
+
f"embspec drift on call to {fn.__qualname__}",
|
|
65
|
+
stacklevel=2,
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
raise
|
|
69
|
+
return fn(*args, **kwargs)
|
|
70
|
+
|
|
71
|
+
return wrapper # type: ignore[return-value]
|
|
72
|
+
|
|
73
|
+
return decorator
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Typed errors for embspec."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EmbspecError(Exception):
|
|
7
|
+
"""Base class for all embspec errors."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EmbeddingVersionMismatch(EmbspecError):
|
|
11
|
+
"""The query encoder's embedding spec does not match the index manifest.
|
|
12
|
+
|
|
13
|
+
Raised by :func:`embspec.assert_compatible` and the
|
|
14
|
+
:func:`embspec.embed_assert` decorator. Catching this exception is the
|
|
15
|
+
fast-fail signal that prevents the silent-accuracy-collapse failure
|
|
16
|
+
mode described in the decompressed.io RAG observability post-mortem
|
|
17
|
+
(2026-03-09): the system stays 200 OK with normal latency while
|
|
18
|
+
retrieval quality silently tanks.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
*,
|
|
24
|
+
index_name: str,
|
|
25
|
+
manifest_field: str,
|
|
26
|
+
manifest_value: object,
|
|
27
|
+
query_value: object,
|
|
28
|
+
manifest_path: str | None = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
suffix = f" (manifest at {manifest_path})" if manifest_path else ""
|
|
31
|
+
super().__init__(
|
|
32
|
+
f"Index {index_name!r} manifest declares {manifest_field}="
|
|
33
|
+
f"{manifest_value!r} but query encoder uses {query_value!r}{suffix}. "
|
|
34
|
+
f"Re-encode the corpus or roll the query encoder back."
|
|
35
|
+
)
|
|
36
|
+
self.index_name = index_name
|
|
37
|
+
self.manifest_field = manifest_field
|
|
38
|
+
self.manifest_value = manifest_value
|
|
39
|
+
self.query_value = query_value
|
|
40
|
+
self.manifest_path = manifest_path
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ManifestFormatError(EmbspecError):
|
|
44
|
+
"""The manifest file is missing required fields or has an unknown format version."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class AdapterShapeError(EmbspecError):
|
|
48
|
+
"""Embedding tensors have incompatible shapes for fitting or transforming."""
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Index manifests track what embedding model + version a vector index was built with.
|
|
2
|
+
|
|
3
|
+
The manifest is the single source of truth for "what does this index contain";
|
|
4
|
+
asserting against it at every search prevents the silent failure mode where
|
|
5
|
+
a query encoder upgrade ships before the index is re-encoded and accuracy
|
|
6
|
+
collapses while every health check stays green.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from dataclasses import asdict, dataclass, field
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Literal
|
|
16
|
+
|
|
17
|
+
from ._errors import EmbeddingVersionMismatch, ManifestFormatError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_FORMAT_VERSION: int = 1
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(frozen=True)
|
|
24
|
+
class EmbeddingSpec:
|
|
25
|
+
"""The embedding configuration used to produce a set of vectors."""
|
|
26
|
+
|
|
27
|
+
model_id: str
|
|
28
|
+
dimension: int
|
|
29
|
+
model_version: str | None = None
|
|
30
|
+
normalization: Literal["l2", "none"] = "l2"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class IndexManifest:
|
|
35
|
+
"""Manifest describing the embedding configuration of a vector index."""
|
|
36
|
+
|
|
37
|
+
index_name: str
|
|
38
|
+
embedding: EmbeddingSpec
|
|
39
|
+
created_at: datetime
|
|
40
|
+
doc_count: int | None = None
|
|
41
|
+
extra: dict[str, Any] = field(default_factory=dict)
|
|
42
|
+
|
|
43
|
+
def to_dict(self) -> dict[str, Any]:
|
|
44
|
+
return {
|
|
45
|
+
"embspec_format_version": _FORMAT_VERSION,
|
|
46
|
+
"index_name": self.index_name,
|
|
47
|
+
"embedding": asdict(self.embedding),
|
|
48
|
+
"created_at": self.created_at.astimezone(timezone.utc).isoformat(),
|
|
49
|
+
"doc_count": self.doc_count,
|
|
50
|
+
"extra": self.extra,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
def save(self, path: str | Path) -> None:
|
|
54
|
+
Path(path).write_text(json.dumps(self.to_dict(), indent=2, sort_keys=True))
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def load(cls, path: str | Path) -> IndexManifest:
|
|
58
|
+
return cls.from_dict(json.loads(Path(path).read_text()), source=str(path))
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def from_dict(cls, data: dict[str, Any], *, source: str | None = None) -> IndexManifest:
|
|
62
|
+
version = data.get("embspec_format_version")
|
|
63
|
+
if version != _FORMAT_VERSION:
|
|
64
|
+
raise ManifestFormatError(
|
|
65
|
+
f"Unknown embspec_format_version={version!r} in manifest"
|
|
66
|
+
+ (f" at {source}" if source else "")
|
|
67
|
+
)
|
|
68
|
+
if "embedding" not in data or "index_name" not in data:
|
|
69
|
+
raise ManifestFormatError(
|
|
70
|
+
f"Manifest missing required fields"
|
|
71
|
+
+ (f" at {source}" if source else "")
|
|
72
|
+
)
|
|
73
|
+
emb = data["embedding"]
|
|
74
|
+
embedding = EmbeddingSpec(
|
|
75
|
+
model_id=emb["model_id"],
|
|
76
|
+
dimension=int(emb["dimension"]),
|
|
77
|
+
model_version=emb.get("model_version"),
|
|
78
|
+
normalization=emb.get("normalization", "l2"),
|
|
79
|
+
)
|
|
80
|
+
created_at = (
|
|
81
|
+
datetime.fromisoformat(data["created_at"])
|
|
82
|
+
if "created_at" in data
|
|
83
|
+
else datetime.now(timezone.utc)
|
|
84
|
+
)
|
|
85
|
+
return cls(
|
|
86
|
+
index_name=data["index_name"],
|
|
87
|
+
embedding=embedding,
|
|
88
|
+
created_at=created_at,
|
|
89
|
+
doc_count=data.get("doc_count"),
|
|
90
|
+
extra=data.get("extra") or {},
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def assert_compatible(
|
|
94
|
+
self,
|
|
95
|
+
spec: EmbeddingSpec,
|
|
96
|
+
*,
|
|
97
|
+
manifest_path: str | None = None,
|
|
98
|
+
) -> None:
|
|
99
|
+
"""Raise :class:`EmbeddingVersionMismatch` if ``spec`` is not the same as this manifest's embedding.
|
|
100
|
+
|
|
101
|
+
Compatibility is exact-match across every field of :class:`EmbeddingSpec`
|
|
102
|
+
— query and index must use the same model, dimension, version, and
|
|
103
|
+
normalization. Any drift on any field is a failure mode.
|
|
104
|
+
"""
|
|
105
|
+
if spec.model_id != self.embedding.model_id:
|
|
106
|
+
raise EmbeddingVersionMismatch(
|
|
107
|
+
index_name=self.index_name,
|
|
108
|
+
manifest_field="embedding.model_id",
|
|
109
|
+
manifest_value=self.embedding.model_id,
|
|
110
|
+
query_value=spec.model_id,
|
|
111
|
+
manifest_path=manifest_path,
|
|
112
|
+
)
|
|
113
|
+
if spec.dimension != self.embedding.dimension:
|
|
114
|
+
raise EmbeddingVersionMismatch(
|
|
115
|
+
index_name=self.index_name,
|
|
116
|
+
manifest_field="embedding.dimension",
|
|
117
|
+
manifest_value=self.embedding.dimension,
|
|
118
|
+
query_value=spec.dimension,
|
|
119
|
+
manifest_path=manifest_path,
|
|
120
|
+
)
|
|
121
|
+
if spec.model_version != self.embedding.model_version:
|
|
122
|
+
raise EmbeddingVersionMismatch(
|
|
123
|
+
index_name=self.index_name,
|
|
124
|
+
manifest_field="embedding.model_version",
|
|
125
|
+
manifest_value=self.embedding.model_version,
|
|
126
|
+
query_value=spec.model_version,
|
|
127
|
+
manifest_path=manifest_path,
|
|
128
|
+
)
|
|
129
|
+
if spec.normalization != self.embedding.normalization:
|
|
130
|
+
raise EmbeddingVersionMismatch(
|
|
131
|
+
index_name=self.index_name,
|
|
132
|
+
manifest_field="embedding.normalization",
|
|
133
|
+
manifest_value=self.embedding.normalization,
|
|
134
|
+
query_value=spec.normalization,
|
|
135
|
+
manifest_path=manifest_path,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def assert_compatible(
|
|
140
|
+
manifest: IndexManifest,
|
|
141
|
+
spec: EmbeddingSpec,
|
|
142
|
+
*,
|
|
143
|
+
manifest_path: str | None = None,
|
|
144
|
+
) -> None:
|
|
145
|
+
"""Functional alias for :meth:`IndexManifest.assert_compatible`."""
|
|
146
|
+
manifest.assert_compatible(spec, manifest_path=manifest_path)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Neighbor stability: compare two retrievers on a fixed probe set.
|
|
2
|
+
|
|
3
|
+
This is the missing primitive RAGOps (Xu et al. 2025, arxiv:2506.03401)
|
|
4
|
+
calls out: "existing work provides limited support for observability in
|
|
5
|
+
the retrieval process of RAG applications." A pre/post snapshot of which
|
|
6
|
+
documents come back for a frozen set of probe queries lets you decide
|
|
7
|
+
whether an embedding model change, chunker change, or rerank change is
|
|
8
|
+
safe to deploy.
|
|
9
|
+
|
|
10
|
+
The function is pure: caller runs both retrievers, passes the result
|
|
11
|
+
dictionaries in. No vector-DB-specific code lives here.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class StabilityReport:
|
|
21
|
+
"""Per-set stability metrics over a probe set."""
|
|
22
|
+
|
|
23
|
+
n_probes: int
|
|
24
|
+
k: int
|
|
25
|
+
mean_overlap_at_k: float
|
|
26
|
+
"""Mean ``|new ∩ old| / k`` across probes. 1.0 = identical top-k. 0.0 = disjoint."""
|
|
27
|
+
|
|
28
|
+
mean_jaccard_at_k: float
|
|
29
|
+
"""Mean Jaccard similarity ``|new ∩ old| / |new ∪ old|`` across probes."""
|
|
30
|
+
|
|
31
|
+
regression_probe_ids: tuple[str, ...]
|
|
32
|
+
"""Probe ids whose overlap fell below ``regression_threshold``."""
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def regression_count(self) -> int:
|
|
36
|
+
return len(self.regression_probe_ids)
|
|
37
|
+
|
|
38
|
+
def is_safe_to_deploy(
|
|
39
|
+
self,
|
|
40
|
+
*,
|
|
41
|
+
min_mean_overlap: float = 0.85,
|
|
42
|
+
max_regression_fraction: float = 0.05,
|
|
43
|
+
) -> bool:
|
|
44
|
+
"""Heuristic deploy gate.
|
|
45
|
+
|
|
46
|
+
Returns True when both: mean overlap is at least ``min_mean_overlap``
|
|
47
|
+
AND the fraction of regressed probes is at most ``max_regression_fraction``.
|
|
48
|
+
"""
|
|
49
|
+
if self.n_probes == 0:
|
|
50
|
+
return False
|
|
51
|
+
regression_fraction = self.regression_count / self.n_probes
|
|
52
|
+
return (
|
|
53
|
+
self.mean_overlap_at_k >= min_mean_overlap
|
|
54
|
+
and regression_fraction <= max_regression_fraction
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def neighbor_stability(
|
|
59
|
+
old_results: dict[str, list[str]],
|
|
60
|
+
new_results: dict[str, list[str]],
|
|
61
|
+
*,
|
|
62
|
+
k: int = 10,
|
|
63
|
+
regression_threshold: float = 0.5,
|
|
64
|
+
) -> StabilityReport:
|
|
65
|
+
"""Compute :class:`StabilityReport` from two retrieval result sets.
|
|
66
|
+
|
|
67
|
+
Both arguments map ``probe_id -> list of doc_id`` (top-k). Probe ids
|
|
68
|
+
must agree across the two dicts; any extra ids in either are ignored.
|
|
69
|
+
The first ``k`` doc ids of each list are compared.
|
|
70
|
+
"""
|
|
71
|
+
common = set(old_results) & set(new_results)
|
|
72
|
+
if not common:
|
|
73
|
+
return StabilityReport(
|
|
74
|
+
n_probes=0,
|
|
75
|
+
k=k,
|
|
76
|
+
mean_overlap_at_k=0.0,
|
|
77
|
+
mean_jaccard_at_k=0.0,
|
|
78
|
+
regression_probe_ids=(),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
overlap_sum = 0.0
|
|
82
|
+
jaccard_sum = 0.0
|
|
83
|
+
regressions: list[str] = []
|
|
84
|
+
for probe_id in sorted(common):
|
|
85
|
+
old_topk = set(old_results[probe_id][:k])
|
|
86
|
+
new_topk = set(new_results[probe_id][:k])
|
|
87
|
+
intersect = len(old_topk & new_topk)
|
|
88
|
+
union = len(old_topk | new_topk)
|
|
89
|
+
overlap = intersect / k if k > 0 else 0.0
|
|
90
|
+
jaccard = intersect / union if union > 0 else 0.0
|
|
91
|
+
overlap_sum += overlap
|
|
92
|
+
jaccard_sum += jaccard
|
|
93
|
+
if overlap < regression_threshold:
|
|
94
|
+
regressions.append(probe_id)
|
|
95
|
+
|
|
96
|
+
n = len(common)
|
|
97
|
+
return StabilityReport(
|
|
98
|
+
n_probes=n,
|
|
99
|
+
k=k,
|
|
100
|
+
mean_overlap_at_k=overlap_sum / n,
|
|
101
|
+
mean_jaccard_at_k=jaccard_sum / n,
|
|
102
|
+
regression_probe_ids=tuple(regressions),
|
|
103
|
+
)
|
|
File without changes
|