quiver-client 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quiver/__init__.py +69 -0
- quiver/async_client.py +457 -0
- quiver/client.py +679 -0
- quiver/dcpe.py +338 -0
- quiver/encryption.py +173 -0
- quiver/haystack.py +258 -0
- quiver/langchain.py +149 -0
- quiver/llamaindex.py +234 -0
- quiver/rerank.py +97 -0
- quiver/vector.py +187 -0
- quiver_client-0.22.0.dist-info/METADATA +140 -0
- quiver_client-0.22.0.dist-info/RECORD +13 -0
- quiver_client-0.22.0.dist-info/WHEEL +4 -0
quiver/__init__.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# SPDX-License-Identifier: AGPL-3.0-only
|
|
2
|
+
"""Quiver — Python client for the security-first vector database.
|
|
3
|
+
|
|
4
|
+
Example::
|
|
5
|
+
|
|
6
|
+
from quiver import Client, Point
|
|
7
|
+
|
|
8
|
+
with Client(api_key="…") as q:
|
|
9
|
+
q.create_collection("items", dim=3, metric="cosine")
|
|
10
|
+
q.upsert("items", [Point("a", [0.1, 0.2, 0.3], {"tag": "x"})])
|
|
11
|
+
hits = q.search("items", [0.1, 0.2, 0.3], k=5)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .client import (
|
|
15
|
+
TEXT_KEY,
|
|
16
|
+
Client,
|
|
17
|
+
CollectionInfo,
|
|
18
|
+
Document,
|
|
19
|
+
DocumentMatch,
|
|
20
|
+
FilterableField,
|
|
21
|
+
Match,
|
|
22
|
+
Point,
|
|
23
|
+
QuiverError,
|
|
24
|
+
SparseVector,
|
|
25
|
+
)
|
|
26
|
+
from .async_client import AsyncClient
|
|
27
|
+
from .rerank import RerankResult, rerank
|
|
28
|
+
from .dcpe import DcpeCipher, DcpeError, EncryptedVector
|
|
29
|
+
from .encryption import ENVELOPE_KEY, PayloadCipher, PayloadError, is_sealed
|
|
30
|
+
from .vector import (
|
|
31
|
+
VECTOR_ENVELOPE_KEY,
|
|
32
|
+
MalformedVectorEnvelopeError,
|
|
33
|
+
NotEncryptedVectorError,
|
|
34
|
+
VectorCipher,
|
|
35
|
+
VectorDecryptError,
|
|
36
|
+
VectorError,
|
|
37
|
+
is_sealed_vector,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
"Client",
|
|
42
|
+
"AsyncClient",
|
|
43
|
+
"Point",
|
|
44
|
+
"Match",
|
|
45
|
+
"SparseVector",
|
|
46
|
+
"TEXT_KEY",
|
|
47
|
+
"rerank",
|
|
48
|
+
"RerankResult",
|
|
49
|
+
"Document",
|
|
50
|
+
"DocumentMatch",
|
|
51
|
+
"CollectionInfo",
|
|
52
|
+
"FilterableField",
|
|
53
|
+
"QuiverError",
|
|
54
|
+
"PayloadCipher",
|
|
55
|
+
"PayloadError",
|
|
56
|
+
"is_sealed",
|
|
57
|
+
"ENVELOPE_KEY",
|
|
58
|
+
"DcpeCipher",
|
|
59
|
+
"DcpeError",
|
|
60
|
+
"EncryptedVector",
|
|
61
|
+
"VectorCipher",
|
|
62
|
+
"VectorError",
|
|
63
|
+
"VectorDecryptError",
|
|
64
|
+
"NotEncryptedVectorError",
|
|
65
|
+
"MalformedVectorEnvelopeError",
|
|
66
|
+
"is_sealed_vector",
|
|
67
|
+
"VECTOR_ENVELOPE_KEY",
|
|
68
|
+
]
|
|
69
|
+
__version__ = "0.19.0"
|
quiver/async_client.py
ADDED
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
# SPDX-License-Identifier: AGPL-3.0-only
|
|
2
|
+
"""An asynchronous REST client for Quiver.
|
|
3
|
+
|
|
4
|
+
`AsyncClient` mirrors the synchronous :class:`quiver.client.Client` over the same
|
|
5
|
+
HTTP contract (``docs/api/rest-grpc.md``), for RAG services and agents that issue
|
|
6
|
+
many concurrent retrievals. It reuses the sync module's pure request/response
|
|
7
|
+
helpers so the two clients cannot drift, and adds a few ergonomic helpers
|
|
8
|
+
(``delete_by_filter``, ``scroll``, ``upsert_iter``) that are also available on the
|
|
9
|
+
sync client.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import TYPE_CHECKING, Any, AsyncIterator, Awaitable, Callable, Iterable, Mapping, Optional, Sequence
|
|
15
|
+
|
|
16
|
+
import httpx
|
|
17
|
+
|
|
18
|
+
from .client import (
|
|
19
|
+
DEFAULT_BASE_URL,
|
|
20
|
+
DEFAULT_TIMEOUT,
|
|
21
|
+
CollectionInfo,
|
|
22
|
+
Document,
|
|
23
|
+
DocumentMatch,
|
|
24
|
+
FilterableField,
|
|
25
|
+
Match,
|
|
26
|
+
PointInput,
|
|
27
|
+
QuiverError,
|
|
28
|
+
SparseVector,
|
|
29
|
+
_client_side_score,
|
|
30
|
+
_collection,
|
|
31
|
+
_document_dict,
|
|
32
|
+
_point_dict,
|
|
33
|
+
_raise_for_status,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from .vector import VectorCipher
|
|
38
|
+
|
|
39
|
+
__all__ = ["AsyncClient"]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class AsyncClient:
|
|
43
|
+
"""An asynchronous Quiver REST client.
|
|
44
|
+
|
|
45
|
+
Usable as an async context manager so the connection pool is closed::
|
|
46
|
+
|
|
47
|
+
async with AsyncClient(api_key="…") as q:
|
|
48
|
+
await q.create_collection("items", dim=384, metric="cosine")
|
|
49
|
+
hits = await q.search("items", embedding, k=5)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
55
|
+
*,
|
|
56
|
+
api_key: Optional[str] = None,
|
|
57
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
58
|
+
verify: bool = True,
|
|
59
|
+
) -> None:
|
|
60
|
+
headers = {}
|
|
61
|
+
if api_key:
|
|
62
|
+
headers["authorization"] = f"Bearer {api_key}"
|
|
63
|
+
self._http = httpx.AsyncClient(
|
|
64
|
+
base_url=base_url.rstrip("/"),
|
|
65
|
+
headers=headers,
|
|
66
|
+
timeout=timeout,
|
|
67
|
+
verify=verify,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
async def __aenter__(self) -> "AsyncClient":
|
|
71
|
+
return self
|
|
72
|
+
|
|
73
|
+
async def __aexit__(self, *_exc: object) -> None:
|
|
74
|
+
await self.aclose()
|
|
75
|
+
|
|
76
|
+
async def aclose(self) -> None:
|
|
77
|
+
"""Close the underlying HTTP connection pool."""
|
|
78
|
+
await self._http.aclose()
|
|
79
|
+
|
|
80
|
+
# --- collections ---
|
|
81
|
+
|
|
82
|
+
async def create_collection(
|
|
83
|
+
self,
|
|
84
|
+
name: str,
|
|
85
|
+
dim: int,
|
|
86
|
+
metric: str = "l2",
|
|
87
|
+
*,
|
|
88
|
+
index: Optional[str] = None,
|
|
89
|
+
pq_subspaces: Optional[int] = None,
|
|
90
|
+
filterable: Optional[Sequence[FilterableField]] = None,
|
|
91
|
+
multivector: bool = False,
|
|
92
|
+
vector_encryption: str = "none",
|
|
93
|
+
) -> CollectionInfo:
|
|
94
|
+
"""Create a collection (see :meth:`quiver.client.Client.create_collection`)."""
|
|
95
|
+
body: dict[str, Any] = {"name": name, "dim": dim, "metric": metric}
|
|
96
|
+
if index is not None:
|
|
97
|
+
body["index"] = index
|
|
98
|
+
if pq_subspaces is not None:
|
|
99
|
+
body["pq_subspaces"] = pq_subspaces
|
|
100
|
+
if filterable:
|
|
101
|
+
body["filterable"] = [
|
|
102
|
+
{"path": f.path, "field_type": f.field_type} for f in filterable
|
|
103
|
+
]
|
|
104
|
+
if multivector:
|
|
105
|
+
body["multivector"] = True
|
|
106
|
+
if vector_encryption != "none":
|
|
107
|
+
body["vector_encryption"] = vector_encryption
|
|
108
|
+
return _collection((await self._send("POST", "/v1/collections", body)).json())
|
|
109
|
+
|
|
110
|
+
async def list_collections(self) -> list[CollectionInfo]:
|
|
111
|
+
"""List all collections."""
|
|
112
|
+
return [_collection(c) for c in (await self._send("GET", "/v1/collections")).json()]
|
|
113
|
+
|
|
114
|
+
async def get_collection(self, name: str) -> CollectionInfo:
|
|
115
|
+
"""Fetch one collection's metadata."""
|
|
116
|
+
return _collection((await self._send("GET", f"/v1/collections/{name}")).json())
|
|
117
|
+
|
|
118
|
+
async def delete_collection(self, name: str) -> bool:
|
|
119
|
+
"""Delete a collection; returns whether it existed."""
|
|
120
|
+
body = (await self._send("DELETE", f"/v1/collections/{name}")).json()
|
|
121
|
+
return bool(body["existed"])
|
|
122
|
+
|
|
123
|
+
# --- points ---
|
|
124
|
+
|
|
125
|
+
async def upsert(self, collection: str, points: Iterable[PointInput]) -> int:
|
|
126
|
+
"""Insert or replace points; returns the number upserted."""
|
|
127
|
+
body = {"points": [_point_dict(p) for p in points]}
|
|
128
|
+
resp = await self._send("POST", f"/v1/collections/{collection}/points", body)
|
|
129
|
+
return int(resp.json()["upserted"])
|
|
130
|
+
|
|
131
|
+
async def delete_points(self, collection: str, ids: Sequence[str]) -> int:
|
|
132
|
+
"""Delete points by id; returns the number deleted."""
|
|
133
|
+
body = {"ids": list(ids)}
|
|
134
|
+
resp = await self._send("DELETE", f"/v1/collections/{collection}/points", body)
|
|
135
|
+
return int(resp.json()["deleted"])
|
|
136
|
+
|
|
137
|
+
async def get_point(self, collection: str, id: str) -> Optional[Match]:
|
|
138
|
+
"""Fetch a point by id, or ``None`` if it does not exist."""
|
|
139
|
+
resp = await self._http.request("GET", f"/v1/collections/{collection}/points/{id}")
|
|
140
|
+
if resp.status_code == 404:
|
|
141
|
+
return None
|
|
142
|
+
_raise_for_status(resp)
|
|
143
|
+
body = resp.json()
|
|
144
|
+
return Match(id=body["id"], score=0.0, payload=body.get("payload"), vector=body.get("vector"))
|
|
145
|
+
|
|
146
|
+
async def search(
|
|
147
|
+
self,
|
|
148
|
+
collection: str,
|
|
149
|
+
vector: Sequence[float],
|
|
150
|
+
*,
|
|
151
|
+
k: int = 10,
|
|
152
|
+
filter: Optional[Mapping[str, Any]] = None,
|
|
153
|
+
ef_search: int = 64,
|
|
154
|
+
with_payload: bool = True,
|
|
155
|
+
with_vector: bool = False,
|
|
156
|
+
) -> list[Match]:
|
|
157
|
+
"""Search for the ``k`` nearest points to ``vector`` (optionally filtered)."""
|
|
158
|
+
body: dict[str, Any] = {
|
|
159
|
+
"vector": list(vector),
|
|
160
|
+
"k": k,
|
|
161
|
+
"ef_search": ef_search,
|
|
162
|
+
"with_payload": with_payload,
|
|
163
|
+
"with_vector": with_vector,
|
|
164
|
+
}
|
|
165
|
+
if filter is not None:
|
|
166
|
+
body["filter"] = filter
|
|
167
|
+
resp = await self._send("POST", f"/v1/collections/{collection}/query", body)
|
|
168
|
+
return [
|
|
169
|
+
Match(id=m["id"], score=m["score"], payload=m.get("payload"), vector=m.get("vector"))
|
|
170
|
+
for m in resp.json()["matches"]
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
async def hybrid_search(
|
|
174
|
+
self,
|
|
175
|
+
collection: str,
|
|
176
|
+
*,
|
|
177
|
+
vector: Optional[Sequence[float]] = None,
|
|
178
|
+
sparse: Optional[SparseVector] = None,
|
|
179
|
+
query_text: Optional[str] = None,
|
|
180
|
+
k: int = 10,
|
|
181
|
+
filter: Optional[Mapping[str, Any]] = None,
|
|
182
|
+
ef_search: int = 64,
|
|
183
|
+
rrf_k0: float = 60.0,
|
|
184
|
+
with_payload: bool = True,
|
|
185
|
+
with_vector: bool = False,
|
|
186
|
+
) -> list[Match]:
|
|
187
|
+
"""Hybrid search fused by Reciprocal Rank Fusion (ADR-0043/0046).
|
|
188
|
+
|
|
189
|
+
Provide a dense ``vector``, a ``sparse`` vector, and/or a full-text
|
|
190
|
+
``query_text`` (BM25); at least one is required."""
|
|
191
|
+
if vector is None and sparse is None and query_text is None:
|
|
192
|
+
raise ValueError(
|
|
193
|
+
"hybrid_search requires a dense vector, a sparse vector, or a text query"
|
|
194
|
+
)
|
|
195
|
+
body: dict[str, Any] = {
|
|
196
|
+
"k": k,
|
|
197
|
+
"ef_search": ef_search,
|
|
198
|
+
"rrf_k0": rrf_k0,
|
|
199
|
+
"with_payload": with_payload,
|
|
200
|
+
"with_vector": with_vector,
|
|
201
|
+
}
|
|
202
|
+
if vector is not None:
|
|
203
|
+
body["vector"] = list(vector)
|
|
204
|
+
if query_text is not None:
|
|
205
|
+
body["query_text"] = query_text
|
|
206
|
+
if sparse is not None:
|
|
207
|
+
body["sparse_indices"] = [int(i) for i in sparse.indices]
|
|
208
|
+
body["sparse_values"] = [float(v) for v in sparse.values]
|
|
209
|
+
if filter is not None:
|
|
210
|
+
body["filter"] = filter
|
|
211
|
+
resp = await self._send("POST", f"/v1/collections/{collection}/query/hybrid", body)
|
|
212
|
+
return [
|
|
213
|
+
Match(id=m["id"], score=m["score"], payload=m.get("payload"), vector=m.get("vector"))
|
|
214
|
+
for m in resp.json()["matches"]
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
async def upsert_text(self, collection: str, points: Iterable[Mapping[str, Any]]) -> int:
|
|
218
|
+
"""Embed each point's text server-side and upsert it (ADR-0047). See
|
|
219
|
+
:meth:`Client.upsert_text`."""
|
|
220
|
+
body = {
|
|
221
|
+
"points": [
|
|
222
|
+
{"id": p["id"], "text": p["text"], **({"payload": p["payload"]} if p.get("payload") is not None else {})}
|
|
223
|
+
for p in points
|
|
224
|
+
]
|
|
225
|
+
}
|
|
226
|
+
resp = await self._send("POST", f"/v1/collections/{collection}/points:text", body)
|
|
227
|
+
return int(resp.json()["upserted"])
|
|
228
|
+
|
|
229
|
+
async def search_text(
|
|
230
|
+
self,
|
|
231
|
+
collection: str,
|
|
232
|
+
text: str,
|
|
233
|
+
*,
|
|
234
|
+
k: int = 10,
|
|
235
|
+
filter: Optional[Mapping[str, Any]] = None,
|
|
236
|
+
ef_search: int = 64,
|
|
237
|
+
rrf_k0: float = 60.0,
|
|
238
|
+
with_payload: bool = True,
|
|
239
|
+
with_vector: bool = False,
|
|
240
|
+
rerank: bool = False,
|
|
241
|
+
) -> list[Match]:
|
|
242
|
+
"""Embed ``text`` server-side and search dense ⊕ BM25, optionally reranking
|
|
243
|
+
(ADR-0047). See :meth:`Client.search_text`."""
|
|
244
|
+
body: dict[str, Any] = {
|
|
245
|
+
"text": text,
|
|
246
|
+
"k": k,
|
|
247
|
+
"ef_search": ef_search,
|
|
248
|
+
"rrf_k0": rrf_k0,
|
|
249
|
+
"with_payload": with_payload,
|
|
250
|
+
"with_vector": with_vector,
|
|
251
|
+
"rerank": rerank,
|
|
252
|
+
}
|
|
253
|
+
if filter is not None:
|
|
254
|
+
body["filter"] = filter
|
|
255
|
+
resp = await self._send("POST", f"/v1/collections/{collection}/query/text", body)
|
|
256
|
+
return [
|
|
257
|
+
Match(id=m["id"], score=m["score"], payload=m.get("payload"), vector=m.get("vector"))
|
|
258
|
+
for m in resp.json()["matches"]
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
async def fetch(
|
|
262
|
+
self,
|
|
263
|
+
collection: str,
|
|
264
|
+
*,
|
|
265
|
+
filter: Optional[Mapping[str, Any]] = None,
|
|
266
|
+
limit: int = 100,
|
|
267
|
+
with_payload: bool = True,
|
|
268
|
+
with_vector: bool = False,
|
|
269
|
+
) -> list[Match]:
|
|
270
|
+
"""Fetch points without ranking; an optional payload ``filter`` narrows the set."""
|
|
271
|
+
body: dict[str, Any] = {
|
|
272
|
+
"limit": limit,
|
|
273
|
+
"with_payload": with_payload,
|
|
274
|
+
"with_vector": with_vector,
|
|
275
|
+
}
|
|
276
|
+
if filter is not None:
|
|
277
|
+
body["filter"] = filter
|
|
278
|
+
resp = await self._send("POST", f"/v1/collections/{collection}/fetch", body)
|
|
279
|
+
return [
|
|
280
|
+
Match(id=p["id"], score=0.0, payload=p.get("payload"), vector=p.get("vector"))
|
|
281
|
+
for p in resp.json()["points"]
|
|
282
|
+
]
|
|
283
|
+
|
|
284
|
+
async def snapshot(self, destination: str) -> dict[str, Any]:
|
|
285
|
+
"""Take a consistent online snapshot of the whole database into a
|
|
286
|
+
server-local ``destination`` directory (ADR-0050); admin-only. See
|
|
287
|
+
:meth:`Client.snapshot`."""
|
|
288
|
+
resp = await self._send("POST", "/v1/snapshot", {"destination": destination})
|
|
289
|
+
return dict(resp.json())
|
|
290
|
+
|
|
291
|
+
async def search_client_side(
|
|
292
|
+
self,
|
|
293
|
+
collection: str,
|
|
294
|
+
query: Sequence[float],
|
|
295
|
+
cipher: "VectorCipher",
|
|
296
|
+
*,
|
|
297
|
+
k: int = 10,
|
|
298
|
+
filter: Optional[Mapping[str, Any]] = None,
|
|
299
|
+
metric: str = "l2",
|
|
300
|
+
candidate_limit: int = 10_000,
|
|
301
|
+
) -> list[Match]:
|
|
302
|
+
"""Client-side NN search over a ``client_side``-encrypted collection (ADR-0032).
|
|
303
|
+
|
|
304
|
+
Fetches the (optionally filtered) candidate set, decrypts each vector with
|
|
305
|
+
``cipher``, ranks by ``metric``, and returns the top ``k``. The server never
|
|
306
|
+
ranks and never sees the key.
|
|
307
|
+
"""
|
|
308
|
+
q = [float(x) for x in query]
|
|
309
|
+
ranked: list[tuple[float, Match]] = []
|
|
310
|
+
for m in await self.fetch(
|
|
311
|
+
collection, filter=filter, limit=candidate_limit, with_payload=True
|
|
312
|
+
):
|
|
313
|
+
vector = cipher.open(m.payload)
|
|
314
|
+
ordering, score = _client_side_score(metric, q, vector)
|
|
315
|
+
ranked.append(
|
|
316
|
+
(ordering, Match(id=m.id, score=score, payload=m.payload, vector=vector))
|
|
317
|
+
)
|
|
318
|
+
ranked.sort(key=lambda pair: pair[0])
|
|
319
|
+
return [m for _, m in ranked[:k]]
|
|
320
|
+
|
|
321
|
+
# --- documents (multi-vector / late interaction) ---
|
|
322
|
+
|
|
323
|
+
async def upsert_documents(self, collection: str, documents: Iterable[Document]) -> int:
|
|
324
|
+
"""Insert or replace multi-vector documents; returns the number upserted."""
|
|
325
|
+
body = {"documents": [_document_dict(d) for d in documents]}
|
|
326
|
+
resp = await self._send("POST", f"/v1/collections/{collection}/documents", body)
|
|
327
|
+
return int(resp.json()["upserted"])
|
|
328
|
+
|
|
329
|
+
async def delete_documents(self, collection: str, ids: Sequence[str]) -> int:
|
|
330
|
+
"""Delete multi-vector documents by id; returns the number deleted."""
|
|
331
|
+
body = {"ids": list(ids)}
|
|
332
|
+
resp = await self._send("DELETE", f"/v1/collections/{collection}/documents", body)
|
|
333
|
+
return int(resp.json()["deleted"])
|
|
334
|
+
|
|
335
|
+
async def search_multi_vector(
|
|
336
|
+
self,
|
|
337
|
+
collection: str,
|
|
338
|
+
query: Sequence[Sequence[float]],
|
|
339
|
+
*,
|
|
340
|
+
k: int = 10,
|
|
341
|
+
filter: Optional[Mapping[str, Any]] = None,
|
|
342
|
+
ef_search: int = 64,
|
|
343
|
+
with_payload: bool = True,
|
|
344
|
+
with_vector: bool = False,
|
|
345
|
+
) -> list[DocumentMatch]:
|
|
346
|
+
"""Rank documents by MaxSim late interaction against the ``query`` token set."""
|
|
347
|
+
body: dict[str, Any] = {
|
|
348
|
+
"query": [list(v) for v in query],
|
|
349
|
+
"k": k,
|
|
350
|
+
"ef_search": ef_search,
|
|
351
|
+
"with_payload": with_payload,
|
|
352
|
+
"with_vector": with_vector,
|
|
353
|
+
}
|
|
354
|
+
if filter is not None:
|
|
355
|
+
body["filter"] = filter
|
|
356
|
+
resp = await self._send("POST", f"/v1/collections/{collection}/documents/query", body)
|
|
357
|
+
return [
|
|
358
|
+
DocumentMatch(
|
|
359
|
+
id=m["id"], score=m["score"], payload=m.get("payload"), vectors=m.get("vectors")
|
|
360
|
+
)
|
|
361
|
+
for m in resp.json()["matches"]
|
|
362
|
+
]
|
|
363
|
+
|
|
364
|
+
# --- ergonomic helpers (RAG/agentic) ---
|
|
365
|
+
|
|
366
|
+
async def upsert_iter(
|
|
367
|
+
self,
|
|
368
|
+
collection: str,
|
|
369
|
+
points: Iterable[PointInput],
|
|
370
|
+
*,
|
|
371
|
+
batch: int = 500,
|
|
372
|
+
on_progress: Optional[Callable[[int], Awaitable[None] | None]] = None,
|
|
373
|
+
) -> int:
|
|
374
|
+
"""Upsert a large iterable in server-friendly batches; returns the total.
|
|
375
|
+
|
|
376
|
+
``batch`` must stay within the server's ``max_batch_size`` (ADR-0040,
|
|
377
|
+
default 1000). ``on_progress`` is called with the running total after each
|
|
378
|
+
batch (may be sync or async).
|
|
379
|
+
"""
|
|
380
|
+
total = 0
|
|
381
|
+
chunk: list[PointInput] = []
|
|
382
|
+
for p in points:
|
|
383
|
+
chunk.append(p)
|
|
384
|
+
if len(chunk) >= batch:
|
|
385
|
+
total += await self.upsert(collection, chunk)
|
|
386
|
+
chunk = []
|
|
387
|
+
if on_progress is not None:
|
|
388
|
+
result = on_progress(total)
|
|
389
|
+
if hasattr(result, "__await__"):
|
|
390
|
+
await result # type: ignore[func-returns-value]
|
|
391
|
+
if chunk:
|
|
392
|
+
total += await self.upsert(collection, chunk)
|
|
393
|
+
if on_progress is not None:
|
|
394
|
+
result = on_progress(total)
|
|
395
|
+
if hasattr(result, "__await__"):
|
|
396
|
+
await result # type: ignore[func-returns-value]
|
|
397
|
+
return total
|
|
398
|
+
|
|
399
|
+
async def scroll(
|
|
400
|
+
self,
|
|
401
|
+
collection: str,
|
|
402
|
+
*,
|
|
403
|
+
filter: Optional[Mapping[str, Any]] = None,
|
|
404
|
+
batch: int = 500,
|
|
405
|
+
with_payload: bool = True,
|
|
406
|
+
with_vector: bool = False,
|
|
407
|
+
) -> AsyncIterator[Match]:
|
|
408
|
+
"""Yield points page by page (for export / re-embedding).
|
|
409
|
+
|
|
410
|
+
Note: the REST ``fetch`` is limit-bounded without a server cursor, so this
|
|
411
|
+
fetches up to ``batch`` points in one page. Provide a narrowing ``filter``
|
|
412
|
+
for large collections; a server-side scroll cursor is a follow-up.
|
|
413
|
+
"""
|
|
414
|
+
for m in await self.fetch(
|
|
415
|
+
collection,
|
|
416
|
+
filter=filter,
|
|
417
|
+
limit=batch,
|
|
418
|
+
with_payload=with_payload,
|
|
419
|
+
with_vector=with_vector,
|
|
420
|
+
):
|
|
421
|
+
yield m
|
|
422
|
+
|
|
423
|
+
async def delete_by_filter(
|
|
424
|
+
self, collection: str, filter: Mapping[str, Any], *, batch: int = 500
|
|
425
|
+
) -> int:
|
|
426
|
+
"""Delete every point matching ``filter``; returns the number deleted.
|
|
427
|
+
|
|
428
|
+
Fetches matching ids (paged by ``batch``) and deletes them until none
|
|
429
|
+
remain. Useful for GDPR erasure and re-indexing.
|
|
430
|
+
"""
|
|
431
|
+
total = 0
|
|
432
|
+
while True:
|
|
433
|
+
ids = [m.id for m in await self.fetch(collection, filter=filter, limit=batch)]
|
|
434
|
+
if not ids:
|
|
435
|
+
return total
|
|
436
|
+
total += await self.delete_points(collection, ids)
|
|
437
|
+
if len(ids) < batch:
|
|
438
|
+
return total
|
|
439
|
+
|
|
440
|
+
# --- health ---
|
|
441
|
+
|
|
442
|
+
async def healthz(self) -> bool:
|
|
443
|
+
"""Whether the server's liveness probe succeeds."""
|
|
444
|
+
try:
|
|
445
|
+
return (await self._http.get("/healthz")).is_success
|
|
446
|
+
except httpx.HTTPError:
|
|
447
|
+
return False
|
|
448
|
+
|
|
449
|
+
# --- internals ---
|
|
450
|
+
|
|
451
|
+
async def _send(self, method: str, path: str, json: Optional[Any] = None) -> httpx.Response:
|
|
452
|
+
try:
|
|
453
|
+
resp = await self._http.request(method, path, json=json)
|
|
454
|
+
except httpx.HTTPError as exc:
|
|
455
|
+
raise QuiverError(f"request to {path} failed: {exc}") from exc
|
|
456
|
+
_raise_for_status(resp)
|
|
457
|
+
return resp
|