poma 0.2.2__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- poma/client.py +90 -47
- poma/integrations/__init__.py +0 -20
- poma/integrations/langchain/__init__.py +12 -0
- poma/integrations/llamaindex/__init__.py +11 -0
- poma/integrations/qdrant/__init__.py +23 -0
- poma/integrations/qdrant/qdrant_poma.py +1326 -0
- {poma-0.2.2.dist-info → poma-0.3.2.dist-info}/METADATA +28 -20
- poma-0.3.2.dist-info/RECORD +16 -0
- {poma-0.2.2.dist-info → poma-0.3.2.dist-info}/WHEEL +1 -1
- poma-0.2.2.dist-info/RECORD +0 -12
- /poma/integrations/{langchain_poma.py → langchain/langchain_poma.py} +0 -0
- /poma/integrations/{llamaindex_poma.py → llamaindex/llamaindex_poma.py} +0 -0
- {poma-0.2.2.dist-info → poma-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {poma-0.2.2.dist-info → poma-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1326 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------
|
|
2
|
+
# POMA integration for Qdrant
|
|
3
|
+
# ---------------------------------------------------------------------
|
|
4
|
+
"""
|
|
5
|
+
Sync POMA–Qdrant integration with dense, sparse, and hybrid search.
|
|
6
|
+
|
|
7
|
+
- BasePomaQdrantCore: shared logic (prepare points, RRF fusion, cheatsheet inputs).
|
|
8
|
+
- SyncClientOps: thin Qdrant I/O adapter.
|
|
9
|
+
- PomaQdrant: supports fastembed, external_*, and inference_* modes.
|
|
10
|
+
- fastembed mode is environment-dependent: it requires a Qdrant client with FastEmbed integration
|
|
11
|
+
(upsert/query_points with models.Document). For production, pass a pre-configured client
|
|
12
|
+
or use embedding_mode='external_dense' (or external_*).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import inspect
|
|
16
|
+
import os
|
|
17
|
+
import uuid
|
|
18
|
+
from collections.abc import Callable, Sequence
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
|
|
21
|
+
from qdrant_client import QdrantClient, models as qcm
|
|
22
|
+
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
23
|
+
from qdrant_client.http import models as qmodels
|
|
24
|
+
|
|
25
|
+
from poma.client import extract_chunks_and_chunksets_from_poma_archive
|
|
26
|
+
from poma.retrieval import generate_cheatsheets
|
|
27
|
+
|
|
28
|
+
DEFAULT_CLOUD_DENSE_MODEL = "openai/text-embedding-3-large"
|
|
29
|
+
DEFAULT_CLOUD_DENSE_DIMENSIONS = 1536
|
|
30
|
+
DEFAULT_CLOUD_SPARSE_MODEL = "Qdrant/bm25"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"PomaQdrant",
|
|
35
|
+
"QdrantConfig",
|
|
36
|
+
"VectorConfig",
|
|
37
|
+
"InferenceConfig",
|
|
38
|
+
"QdrantResponseError",
|
|
39
|
+
"SearchResult",
|
|
40
|
+
"chunk_uuid_string",
|
|
41
|
+
"DenseEmbedSync",
|
|
42
|
+
"SparseEmbedSync",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _chunk_data_from_input(
|
|
47
|
+
chunk_data: dict | str | os.PathLike[str],
|
|
48
|
+
) -> dict:
|
|
49
|
+
"""Accept chunk_data dict or path to a .poma file; return chunk_data dict."""
|
|
50
|
+
if isinstance(chunk_data, (str, os.PathLike)):
|
|
51
|
+
path = os.fspath(chunk_data)
|
|
52
|
+
if not path.lower().endswith(".poma"):
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"Path must point to a .poma file; got {!r}".format(path)
|
|
55
|
+
)
|
|
56
|
+
return extract_chunks_and_chunksets_from_poma_archive(
|
|
57
|
+
poma_archive_path=chunk_data
|
|
58
|
+
)
|
|
59
|
+
return chunk_data
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class SearchResult:
|
|
63
|
+
"""Type-safe search result; matches the dict shape returned by search()."""
|
|
64
|
+
id: str
|
|
65
|
+
score: float
|
|
66
|
+
text: str
|
|
67
|
+
metadata: dict
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class QdrantResponseError(RuntimeError):
|
|
71
|
+
"""SDK-level wrapper for qdrant-client UnexpectedResponse errors."""
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
message: str,
|
|
76
|
+
*,
|
|
77
|
+
status_code: int | None = None,
|
|
78
|
+
reason_phrase: str | None = None,
|
|
79
|
+
raw_content: str | None = None,
|
|
80
|
+
) -> None:
|
|
81
|
+
super().__init__(message)
|
|
82
|
+
self.status_code = status_code
|
|
83
|
+
self.reason_phrase = reason_phrase
|
|
84
|
+
self.raw_content = raw_content
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def chunk_uuid_string(file_id: str, chunkset_index: int) -> str:
|
|
88
|
+
"""
|
|
89
|
+
Deterministic RFC4122 UUID (v5) from (file_id, chunkset_index).
|
|
90
|
+
Same inputs -> same UUID. Different index -> different UUID.
|
|
91
|
+
"""
|
|
92
|
+
if chunkset_index < 0:
|
|
93
|
+
raise ValueError("chunkset_index must be non-negative")
|
|
94
|
+
name = f"{file_id}#{chunkset_index}"
|
|
95
|
+
return str(uuid.uuid5(uuid.NAMESPACE_URL, name))
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# Dense: one float-vector per text.
|
|
99
|
+
# Callables may accept Sequence[str] for flexibility.
|
|
100
|
+
DenseEmbedSync = Callable[[Sequence[str]], list[list[float]]]
|
|
101
|
+
|
|
102
|
+
# Sparse: one sparse vector per text.
|
|
103
|
+
# Accepted per-item return types:
|
|
104
|
+
# - qmodels.SparseVector(indices=[...], values=[...])
|
|
105
|
+
# - tuple[list[int], list[float]] -> (indices, values)
|
|
106
|
+
# - dict with keys {"indices": [...], "values": [...]}
|
|
107
|
+
SparseEmbedSync = Callable[[Sequence[str]], list[object]]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass(frozen=True)
|
|
111
|
+
class QdrantConfig:
|
|
112
|
+
"""
|
|
113
|
+
Qdrant deployment options:
|
|
114
|
+
- mode="memory": in-process, RAM-only (sync only)
|
|
115
|
+
- mode="path": in-process, persistent folder on disk (sync only)
|
|
116
|
+
- mode="url": HTTP endpoint (local server or cloud) (sync)
|
|
117
|
+
- api_key: API key for cloud inference (required if cloud_inference is True)
|
|
118
|
+
- cloud_inference: if True, use cloud inference (default is False)
|
|
119
|
+
- client_kwargs: optional pass-through kwargs for QdrantClient(...)
|
|
120
|
+
(e.g. {"timeout": 120, "pool_size": 20})
|
|
121
|
+
"""
|
|
122
|
+
mode: str = "memory" # "memory" | "path" | "url"
|
|
123
|
+
path: str | None = None # required if mode="path"
|
|
124
|
+
url: str | None = None # required if mode="url"
|
|
125
|
+
api_key: str | None = None # optional (cloud)
|
|
126
|
+
cloud_inference: bool = False # optional (cloud)
|
|
127
|
+
client_kwargs: dict[str, object] | None = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass(frozen=True)
|
|
131
|
+
class VectorConfig:
|
|
132
|
+
"""
|
|
133
|
+
Vector/index configuration used when embedding_mode is external_* or inference_* (we create collections explicitly).
|
|
134
|
+
"""
|
|
135
|
+
distance: qmodels.Distance = qmodels.Distance.COSINE
|
|
136
|
+
dense_size: int | None = None # required for external_dense/external_hybrid and inference_dense/inference_hybrid
|
|
137
|
+
sparse_name: str = "sparse" # name for sparse vector space
|
|
138
|
+
dense_name: str = "dense" # name for dense vector space
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass(frozen=True)
|
|
142
|
+
class InferenceConfig:
|
|
143
|
+
"""
|
|
144
|
+
Inference configuration for embedding_mode inference_* (Qdrant Inference API).
|
|
145
|
+
|
|
146
|
+
dense_model examples:
|
|
147
|
+
- "sentence-transformers/all-minilm-l6-v2"
|
|
148
|
+
- "openai/text-embedding-3-large"
|
|
149
|
+
sparse_model examples:
|
|
150
|
+
- "Qdrant/bm25"
|
|
151
|
+
|
|
152
|
+
dense_options / sparse_options: provider-specific options.
|
|
153
|
+
Dense API key option names used by Qdrant Cloud Inference include:
|
|
154
|
+
- OpenAI: {"openai-api-key": "..."}
|
|
155
|
+
- Cohere: {"cohere-api-key": "..."}
|
|
156
|
+
- Jina AI: {"jina-api-key": "..."}
|
|
157
|
+
- OpenRouter: {"openrouter-api-key": "..."}
|
|
158
|
+
You can also pass provider-specific parameters like {"dimensions": 512}.
|
|
159
|
+
"""
|
|
160
|
+
dense_model: str | None = None
|
|
161
|
+
sparse_model: str | None = None
|
|
162
|
+
dense_options: dict | None = None
|
|
163
|
+
sparse_options: dict | None = None
|
|
164
|
+
|
|
165
|
+
def dense_document(self, text: str) -> qcm.Document:
|
|
166
|
+
if not self.dense_model:
|
|
167
|
+
raise ValueError(
|
|
168
|
+
"InferenceConfig.dense_model is required for inference_dense/inference_hybrid"
|
|
169
|
+
)
|
|
170
|
+
return qcm.Document(
|
|
171
|
+
text=text,
|
|
172
|
+
model=self.dense_model,
|
|
173
|
+
options=self.dense_options,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def sparse_document(self, text: str) -> qcm.Document:
|
|
177
|
+
if not self.sparse_model:
|
|
178
|
+
raise ValueError(
|
|
179
|
+
"InferenceConfig.sparse_model is required for inference_sparse/inference_hybrid"
|
|
180
|
+
)
|
|
181
|
+
return qcm.Document(
|
|
182
|
+
text=text,
|
|
183
|
+
model=self.sparse_model,
|
|
184
|
+
options=self.sparse_options,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class BasePomaQdrantCore:
|
|
189
|
+
"""
|
|
190
|
+
Shared, client-agnostic core: builds payloads/points, fuses rankings, builds cheatsheet inputs.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
@staticmethod
|
|
194
|
+
def _to_sparse_vector(item: object) -> qmodels.SparseVector:
|
|
195
|
+
if isinstance(item, qmodels.SparseVector):
|
|
196
|
+
return item
|
|
197
|
+
if isinstance(item, tuple) and len(item) == 2:
|
|
198
|
+
indices, values = item
|
|
199
|
+
return qmodels.SparseVector(indices=list(indices), values=list(values))
|
|
200
|
+
if isinstance(item, dict):
|
|
201
|
+
indices = item.get("indices")
|
|
202
|
+
values = item.get("values")
|
|
203
|
+
if indices is None or values is None:
|
|
204
|
+
raise ValueError("Sparse embedding dict must contain 'indices' and 'values'")
|
|
205
|
+
return qmodels.SparseVector(indices=list(indices), values=list(values))
|
|
206
|
+
raise ValueError(
|
|
207
|
+
"Sparse embedder must return SparseVector OR (indices, values) tuple OR dict{'indices','values'}"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
def _get_file_id_fallback(chunksets: list[dict], chunks: list[dict]) -> str:
|
|
212
|
+
file_id = None
|
|
213
|
+
if chunksets:
|
|
214
|
+
file_id = chunksets[0].get("file_id")
|
|
215
|
+
if not file_id and chunks:
|
|
216
|
+
file_id = chunks[0].get("file_id") or chunks[0].get("tag")
|
|
217
|
+
return file_id or "unknown"
|
|
218
|
+
|
|
219
|
+
@classmethod
|
|
220
|
+
def prepare_points_from_chunk_data(
|
|
221
|
+
cls,
|
|
222
|
+
chunk_data: dict,
|
|
223
|
+
*,
|
|
224
|
+
store_chunk_details: bool = True,
|
|
225
|
+
) -> tuple[list[str], list[str], list[dict]]:
|
|
226
|
+
"""
|
|
227
|
+
Returns: (ids, documents, payloads)
|
|
228
|
+
- ids: deterministic UUID strings (file_id + chunkset_index)
|
|
229
|
+
- documents: chunkset.contents
|
|
230
|
+
- payloads: metadata including "text"; "chunk_details" only if store_chunk_details=True
|
|
231
|
+
"""
|
|
232
|
+
chunksets: list[dict] = chunk_data.get("chunksets", [])
|
|
233
|
+
chunks: list[dict] = chunk_data.get("chunks", [])
|
|
234
|
+
if not chunksets:
|
|
235
|
+
return [], [], []
|
|
236
|
+
|
|
237
|
+
file_id = cls._get_file_id_fallback(chunksets, chunks)
|
|
238
|
+
chunks_by_index = {c.get("chunk_index"): c for c in chunks}
|
|
239
|
+
|
|
240
|
+
ids: list[str] = []
|
|
241
|
+
documents: list[str] = []
|
|
242
|
+
payloads: list[dict] = []
|
|
243
|
+
|
|
244
|
+
for idx, chunkset in enumerate(chunksets):
|
|
245
|
+
content = chunkset.get("contents", "")
|
|
246
|
+
if not content:
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
chunkset_index = chunkset.get("chunkset_index")
|
|
250
|
+
if chunkset_index is None:
|
|
251
|
+
chunkset_index = idx
|
|
252
|
+
|
|
253
|
+
chunkset_file_id = chunkset.get("file_id") or file_id
|
|
254
|
+
point_id = chunk_uuid_string(chunkset_file_id, int(chunkset_index))
|
|
255
|
+
|
|
256
|
+
raw_chunks = chunkset.get("chunks", [])
|
|
257
|
+
chunk_indices = list(raw_chunks) if isinstance(raw_chunks, (list, tuple)) else []
|
|
258
|
+
|
|
259
|
+
payload: dict = {
|
|
260
|
+
"chunkset_index": int(chunkset_index),
|
|
261
|
+
"chunks": chunk_indices,
|
|
262
|
+
"file_id": chunkset_file_id,
|
|
263
|
+
"text": content,
|
|
264
|
+
}
|
|
265
|
+
if store_chunk_details:
|
|
266
|
+
relevant_chunks = [chunks_by_index.get(ci) for ci in chunk_indices if ci in chunks_by_index]
|
|
267
|
+
payload["chunk_details"] = relevant_chunks
|
|
268
|
+
|
|
269
|
+
ids.append(point_id)
|
|
270
|
+
documents.append(content)
|
|
271
|
+
payloads.append(payload)
|
|
272
|
+
|
|
273
|
+
return ids, documents, payloads
|
|
274
|
+
|
|
275
|
+
@staticmethod
|
|
276
|
+
def rrf_fuse(
|
|
277
|
+
ranked_lists: list[list[str]],
|
|
278
|
+
*,
|
|
279
|
+
k: int,
|
|
280
|
+
weights: list[float] | None = None,
|
|
281
|
+
) -> dict[str, float]:
|
|
282
|
+
"""
|
|
283
|
+
Reciprocal Rank Fusion (optionally weighted):
|
|
284
|
+
score(id) = sum_i weight_i / (k + rank_i)
|
|
285
|
+
If weights is None, all weights are 1.0. Order: [dense, sparse] for hybrid.
|
|
286
|
+
If weights is provided, its length must equal len(ranked_lists).
|
|
287
|
+
"""
|
|
288
|
+
n = len(ranked_lists)
|
|
289
|
+
if weights is None:
|
|
290
|
+
w = [1.0] * n
|
|
291
|
+
else:
|
|
292
|
+
if len(weights) != n:
|
|
293
|
+
raise ValueError(
|
|
294
|
+
f"rrf_fuse: len(weights)={len(weights)} must equal len(ranked_lists)={n}"
|
|
295
|
+
)
|
|
296
|
+
w = list(weights)
|
|
297
|
+
scores: dict[str, float] = {}
|
|
298
|
+
for lst, weight in zip(ranked_lists, w, strict=False):
|
|
299
|
+
for rank, pid in enumerate(lst, start=1):
|
|
300
|
+
scores[pid] = scores.get(pid, 0.0) + (weight / (k + rank))
|
|
301
|
+
return scores
|
|
302
|
+
|
|
303
|
+
@staticmethod
|
|
304
|
+
def results_to_cheatsheet_inputs(results: list[dict]) -> tuple[list[dict], list[dict]]:
|
|
305
|
+
"""
|
|
306
|
+
Extract relevant_chunksets and all_chunks from search results for generate_cheatsheets.
|
|
307
|
+
|
|
308
|
+
When store_chunk_details=False, chunk_details in payload is empty, so all_chunks
|
|
309
|
+
will only be populated from results. Pass chunk_data to search_cheatsheets() to
|
|
310
|
+
supply full chunks for cheatsheet generation.
|
|
311
|
+
"""
|
|
312
|
+
relevant_chunksets: list[dict] = []
|
|
313
|
+
all_chunks: list[dict] = []
|
|
314
|
+
seen_chunks: set[int] = set()
|
|
315
|
+
|
|
316
|
+
for r in results:
|
|
317
|
+
meta = r.get("metadata", {}) or {}
|
|
318
|
+
relevant_chunksets.append(
|
|
319
|
+
{
|
|
320
|
+
"chunkset_index": meta.get("chunkset_index"),
|
|
321
|
+
"chunks": meta.get("chunks", []),
|
|
322
|
+
"file_id": meta.get("file_id", "unknown"),
|
|
323
|
+
}
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
chunk_details = meta.get("chunk_details", []) or []
|
|
327
|
+
for ch in chunk_details:
|
|
328
|
+
if not isinstance(ch, dict):
|
|
329
|
+
continue
|
|
330
|
+
cidx = ch.get("chunk_index")
|
|
331
|
+
if isinstance(cidx, int) and cidx not in seen_chunks:
|
|
332
|
+
seen_chunks.add(cidx)
|
|
333
|
+
all_chunks.append(ch)
|
|
334
|
+
|
|
335
|
+
return relevant_chunksets, all_chunks
|
|
336
|
+
|
|
337
|
+
@staticmethod
|
|
338
|
+
def _validate_dense_size(dense_size: int | None, mode: str) -> None:
|
|
339
|
+
if dense_size is None:
|
|
340
|
+
raise ValueError(f'embedding_mode="{mode}" requires vectors.dense_size')
|
|
341
|
+
try:
|
|
342
|
+
size = int(dense_size)
|
|
343
|
+
except (TypeError, ValueError) as e:
|
|
344
|
+
raise ValueError(f"vectors.dense_size must be an integer, got {type(dense_size).__name__}") from e
|
|
345
|
+
if size <= 0:
|
|
346
|
+
raise ValueError(f'embedding_mode="{mode}" requires vectors.dense_size > 0')
|
|
347
|
+
|
|
348
|
+
@staticmethod
|
|
349
|
+
def _validate_dense_vectors(vectors: list[list[float]], expected_size: int, label: str) -> None:
|
|
350
|
+
for i, v in enumerate(vectors):
|
|
351
|
+
if len(v) != expected_size:
|
|
352
|
+
raise ValueError(
|
|
353
|
+
f"{label} vector at index {i} has length {len(v)}, expected {expected_size} (vectors.dense_size)"
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
@staticmethod
|
|
357
|
+
def _fastembed_dense_spec(client: object, text: str) -> tuple[str, qcm.Document]:
|
|
358
|
+
dense_model = getattr(client, "embedding_model_name", None)
|
|
359
|
+
if not isinstance(dense_model, str) or not dense_model:
|
|
360
|
+
raise RuntimeError(
|
|
361
|
+
"FastEmbed client is missing a valid embedding_model_name."
|
|
362
|
+
)
|
|
363
|
+
get_dense_name = getattr(client, "get_vector_field_name", None)
|
|
364
|
+
if not callable(get_dense_name):
|
|
365
|
+
raise RuntimeError(
|
|
366
|
+
"FastEmbed client is missing get_vector_field_name()."
|
|
367
|
+
)
|
|
368
|
+
dense_name = get_dense_name()
|
|
369
|
+
if not isinstance(dense_name, str) or not dense_name:
|
|
370
|
+
raise RuntimeError(
|
|
371
|
+
"FastEmbed client returned an invalid dense vector field name."
|
|
372
|
+
)
|
|
373
|
+
return dense_name, qcm.Document(text=text, model=dense_model)
|
|
374
|
+
|
|
375
|
+
@staticmethod
|
|
376
|
+
def _fastembed_sparse_spec(client: object, text: str) -> tuple[str, qcm.Document] | None:
|
|
377
|
+
sparse_model = getattr(client, "sparse_embedding_model_name", None)
|
|
378
|
+
if not sparse_model:
|
|
379
|
+
return None
|
|
380
|
+
get_sparse_name = getattr(client, "get_sparse_vector_field_name", None)
|
|
381
|
+
if not callable(get_sparse_name):
|
|
382
|
+
raise RuntimeError(
|
|
383
|
+
"FastEmbed sparse model is configured but client is missing get_sparse_vector_field_name()."
|
|
384
|
+
)
|
|
385
|
+
sparse_name = get_sparse_name()
|
|
386
|
+
if not isinstance(sparse_name, str) or not sparse_name:
|
|
387
|
+
raise RuntimeError(
|
|
388
|
+
"FastEmbed sparse model is configured but sparse vector field name is unavailable."
|
|
389
|
+
)
|
|
390
|
+
return sparse_name, qcm.Document(text=text, model=str(sparse_model))
|
|
391
|
+
|
|
392
|
+
@classmethod
|
|
393
|
+
def _fastembed_vector_payload(cls, client: object, text: str) -> dict[str, qcm.Document]:
|
|
394
|
+
dense_name, dense_doc = cls._fastembed_dense_spec(client, text)
|
|
395
|
+
vector: dict[str, qcm.Document] = {dense_name: dense_doc}
|
|
396
|
+
sparse_spec = cls._fastembed_sparse_spec(client, text)
|
|
397
|
+
if sparse_spec is not None:
|
|
398
|
+
sparse_name, sparse_doc = sparse_spec
|
|
399
|
+
vector[sparse_name] = sparse_doc
|
|
400
|
+
return vector
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
class SyncClientOps:
|
|
404
|
+
"""
|
|
405
|
+
Minimal sync wrappers for the QdrantClient I/O surface used by this module.
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
def __init__(self, client: QdrantClient):
|
|
409
|
+
self.client = client
|
|
410
|
+
|
|
411
|
+
def get_collections_names(self) -> set[str]:
|
|
412
|
+
return {c.name for c in self.client.get_collections().collections}
|
|
413
|
+
|
|
414
|
+
def create_collection_dense(self, collection_name: str, *, vectors: VectorConfig) -> None:
|
|
415
|
+
self.client.create_collection(
|
|
416
|
+
collection_name=collection_name,
|
|
417
|
+
vectors_config={
|
|
418
|
+
vectors.dense_name: qmodels.VectorParams(size=int(vectors.dense_size), distance=vectors.distance)
|
|
419
|
+
},
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
def create_collection_sparse(self, collection_name: str, *, vectors: VectorConfig) -> None:
|
|
423
|
+
try:
|
|
424
|
+
self.client.create_collection(
|
|
425
|
+
collection_name=collection_name,
|
|
426
|
+
vectors_config={},
|
|
427
|
+
sparse_vectors_config={vectors.sparse_name: qmodels.SparseVectorParams()},
|
|
428
|
+
)
|
|
429
|
+
except Exception as e:
|
|
430
|
+
raise RuntimeError(
|
|
431
|
+
"Creating a sparse-only collection failed (some Qdrant setups require at least one dense vector). "
|
|
432
|
+
"Use embedding_mode='external_hybrid' with dense + sparse if needed."
|
|
433
|
+
) from e
|
|
434
|
+
|
|
435
|
+
def create_collection_hybrid(self, collection_name: str, *, vectors: VectorConfig) -> None:
|
|
436
|
+
self.client.create_collection(
|
|
437
|
+
collection_name=collection_name,
|
|
438
|
+
vectors_config={
|
|
439
|
+
vectors.dense_name: qmodels.VectorParams(size=int(vectors.dense_size), distance=vectors.distance)
|
|
440
|
+
},
|
|
441
|
+
sparse_vectors_config={vectors.sparse_name: qmodels.SparseVectorParams()},
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
def ensure_fastembed_collection(self, collection_name: str) -> None:
|
|
445
|
+
existing = self.get_collections_names()
|
|
446
|
+
if collection_name in existing:
|
|
447
|
+
return
|
|
448
|
+
|
|
449
|
+
get_dense_params = getattr(self.client, "get_fastembed_vector_params", None)
|
|
450
|
+
if not callable(get_dense_params):
|
|
451
|
+
raise RuntimeError(
|
|
452
|
+
"FastEmbed client is missing get_fastembed_vector_params()."
|
|
453
|
+
)
|
|
454
|
+
vectors_config = get_dense_params()
|
|
455
|
+
if not vectors_config:
|
|
456
|
+
raise RuntimeError(
|
|
457
|
+
"FastEmbed client returned empty dense vector params."
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
get_sparse_params = getattr(self.client, "get_fastembed_sparse_vector_params", None)
|
|
461
|
+
sparse_vectors_config = (
|
|
462
|
+
get_sparse_params() if callable(get_sparse_params) else None
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
kwargs: dict = {
|
|
466
|
+
"collection_name": collection_name,
|
|
467
|
+
"vectors_config": vectors_config,
|
|
468
|
+
}
|
|
469
|
+
if sparse_vectors_config:
|
|
470
|
+
kwargs["sparse_vectors_config"] = sparse_vectors_config
|
|
471
|
+
self.client.create_collection(**kwargs)
|
|
472
|
+
|
|
473
|
+
def add_documents(self, collection_name: str, *, documents: list[str], payloads: list[dict], ids: list[str]) -> None:
|
|
474
|
+
self.ensure_fastembed_collection(collection_name)
|
|
475
|
+
points: list[qmodels.PointStruct] = []
|
|
476
|
+
for pid, doc, payload in zip(ids, documents, payloads, strict=True):
|
|
477
|
+
vector = BasePomaQdrantCore._fastembed_vector_payload(self.client, doc)
|
|
478
|
+
points.append(qmodels.PointStruct(id=pid, vector=vector, payload=payload))
|
|
479
|
+
self.upsert_points(collection_name, points=points)
|
|
480
|
+
|
|
481
|
+
def upsert_points(self, collection_name: str, *, points: list[qmodels.PointStruct]) -> None:
|
|
482
|
+
self.client.upsert(collection_name=collection_name, points=points)
|
|
483
|
+
|
|
484
|
+
def retrieve_ids(self, collection_name: str, *, ids: list[str]) -> set[str]:
|
|
485
|
+
"""Return the set of point ids that exist in the collection (for insert_only write_mode)."""
|
|
486
|
+
if not ids:
|
|
487
|
+
return set()
|
|
488
|
+
try:
|
|
489
|
+
points = self.client.retrieve(collection_name=collection_name, ids=ids)
|
|
490
|
+
return {str(p.id) for p in points}
|
|
491
|
+
except Exception:
|
|
492
|
+
return set()
|
|
493
|
+
|
|
494
|
+
def query_text(
|
|
495
|
+
self,
|
|
496
|
+
collection_name: str,
|
|
497
|
+
*,
|
|
498
|
+
query: str,
|
|
499
|
+
limit: int,
|
|
500
|
+
query_filter: qmodels.Filter | None = None,
|
|
501
|
+
) -> list[dict]:
|
|
502
|
+
dense_name, dense_doc = BasePomaQdrantCore._fastembed_dense_spec(
|
|
503
|
+
self.client, query
|
|
504
|
+
)
|
|
505
|
+
return self.query_points(
|
|
506
|
+
collection_name,
|
|
507
|
+
query=dense_doc,
|
|
508
|
+
using=dense_name,
|
|
509
|
+
limit=limit,
|
|
510
|
+
query_filter=query_filter,
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
def search_named_vector(
|
|
514
|
+
self,
|
|
515
|
+
collection_name: str,
|
|
516
|
+
*,
|
|
517
|
+
name: str,
|
|
518
|
+
vector: object,
|
|
519
|
+
limit: int,
|
|
520
|
+
query_filter: qmodels.Filter | None = None,
|
|
521
|
+
) -> list[dict]:
|
|
522
|
+
return self.query_points(
|
|
523
|
+
collection_name,
|
|
524
|
+
query=vector,
|
|
525
|
+
using=name,
|
|
526
|
+
limit=limit,
|
|
527
|
+
query_filter=query_filter,
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
def query_points(
|
|
531
|
+
self,
|
|
532
|
+
collection_name: str,
|
|
533
|
+
*,
|
|
534
|
+
query: object,
|
|
535
|
+
using: str | None,
|
|
536
|
+
limit: int,
|
|
537
|
+
query_filter: qmodels.Filter | None = None,
|
|
538
|
+
) -> list[dict]:
|
|
539
|
+
kwargs: dict = {
|
|
540
|
+
"collection_name": collection_name,
|
|
541
|
+
"query": query,
|
|
542
|
+
"limit": limit,
|
|
543
|
+
"with_payload": True,
|
|
544
|
+
}
|
|
545
|
+
if using is not None:
|
|
546
|
+
kwargs["using"] = using
|
|
547
|
+
try:
|
|
548
|
+
res = self.client.query_points(**kwargs, query_filter=query_filter)
|
|
549
|
+
except TypeError:
|
|
550
|
+
res = self.client.query_points(**kwargs, filter=query_filter)
|
|
551
|
+
hits = getattr(res, "points", res) if not isinstance(res, list) else res
|
|
552
|
+
out: list[dict] = []
|
|
553
|
+
for h in hits:
|
|
554
|
+
payload = getattr(h, "payload", None) or {}
|
|
555
|
+
out.append(
|
|
556
|
+
{
|
|
557
|
+
"id": str(getattr(h, "id", "") or ""),
|
|
558
|
+
"score": float(getattr(h, "score", 0.0) or 0.0),
|
|
559
|
+
"text": payload.get("text", "") if isinstance(payload, dict) else "",
|
|
560
|
+
"metadata": payload if isinstance(payload, dict) else {},
|
|
561
|
+
}
|
|
562
|
+
)
|
|
563
|
+
return out
|
|
564
|
+
|
|
565
|
+
def query_points_inference(
|
|
566
|
+
self,
|
|
567
|
+
collection_name: str,
|
|
568
|
+
*,
|
|
569
|
+
query: object,
|
|
570
|
+
using: str | None,
|
|
571
|
+
limit: int,
|
|
572
|
+
query_filter: qmodels.Filter | None = None,
|
|
573
|
+
) -> list[dict]:
|
|
574
|
+
"""Query using an inference object (e.g. qcm.Document)."""
|
|
575
|
+
return self.query_points(
|
|
576
|
+
collection_name,
|
|
577
|
+
query=query,
|
|
578
|
+
using=using,
|
|
579
|
+
limit=limit,
|
|
580
|
+
query_filter=query_filter,
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
class PomaQdrant(BasePomaQdrantCore):
|
|
585
|
+
"""
|
|
586
|
+
Sync implementation.
|
|
587
|
+
|
|
588
|
+
embedding_mode:
|
|
589
|
+
- "fastembed" : upsert/query_points with models.Document
|
|
590
|
+
- "external_dense" : dense_embed_fn + upsert/search on named dense vector
|
|
591
|
+
- "external_sparse" : sparse_embed_fn + upsert/search on named sparse vector
|
|
592
|
+
- "external_hybrid" : both embedders + two searches + RRF fusion
|
|
593
|
+
- "inference_dense" : Qdrant Inference API (inference_config.dense_model)
|
|
594
|
+
- "inference_sparse" : Qdrant Inference API (inference_config.sparse_model)
|
|
595
|
+
- "inference_hybrid" : Qdrant Inference API (dense + sparse + RRF)
|
|
596
|
+
|
|
597
|
+
fastembed: Requires a client with FastEmbed integration (upsert/query_points with models.Document).
|
|
598
|
+
Not guaranteed in all environments; for production, pass a
|
|
599
|
+
pre-configured qdrant_client or use external_* mode. See _ensure_fastembed_client().
|
|
600
|
+
|
|
601
|
+
write_mode: "upsert" (default) or "insert_only" (skip points that already exist).
|
|
602
|
+
batch_size: chunk ingest into batches of this size (default 256).
|
|
603
|
+
store_chunk_details: if True (default), payload includes chunk_details so search_cheatsheets can build cheatsheets without chunk_data.
|
|
604
|
+
"""
|
|
605
|
+
|
|
606
|
+
_INFERENCE_MODES = {"inference_dense", "inference_sparse", "inference_hybrid"}
|
|
607
|
+
_DENSE_PROVIDER_KEY_RULES: tuple[
|
|
608
|
+
tuple[tuple[str, ...], str, tuple[str, ...], str],
|
|
609
|
+
...,
|
|
610
|
+
] = (
|
|
611
|
+
(("openai/",), "openai-api-key", ("OPENAI_API_KEY",), "OpenAI"),
|
|
612
|
+
(("cohere/",), "cohere-api-key", ("COHERE_API_KEY",), "Cohere"),
|
|
613
|
+
(("jinaai/", "jina/"), "jina-api-key", ("JINA_API_KEY", "JINAAI_API_KEY"), "Jina AI"),
|
|
614
|
+
(("openrouter/",), "openrouter-api-key", ("OPENROUTER_API_KEY",), "OpenRouter"),
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
@staticmethod
|
|
618
|
+
def _resolve_cloud_inference_enabled(
|
|
619
|
+
qdrant_client: QdrantClient | None,
|
|
620
|
+
cfg: QdrantConfig,
|
|
621
|
+
) -> bool:
|
|
622
|
+
if qdrant_client is None:
|
|
623
|
+
return bool(cfg.cloud_inference)
|
|
624
|
+
client_flag = getattr(qdrant_client, "cloud_inference", None)
|
|
625
|
+
if client_flag is None:
|
|
626
|
+
return bool(cfg.cloud_inference)
|
|
627
|
+
return bool(client_flag)
|
|
628
|
+
|
|
629
|
+
@staticmethod
|
|
630
|
+
def _copy_options_dict(options: dict | None, field_name: str) -> dict:
|
|
631
|
+
if options is None:
|
|
632
|
+
return {}
|
|
633
|
+
if not isinstance(options, dict):
|
|
634
|
+
raise ValueError(
|
|
635
|
+
f"InferenceConfig.{field_name} must be a dict, got {type(options).__name__}"
|
|
636
|
+
)
|
|
637
|
+
return dict(options)
|
|
638
|
+
|
|
639
|
+
@staticmethod
|
|
640
|
+
def _with_dense_size(vectors: VectorConfig, dense_size: int) -> VectorConfig:
|
|
641
|
+
return VectorConfig(
|
|
642
|
+
distance=vectors.distance,
|
|
643
|
+
dense_size=dense_size,
|
|
644
|
+
sparse_name=vectors.sparse_name,
|
|
645
|
+
dense_name=vectors.dense_name,
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
@staticmethod
|
|
649
|
+
def _copy_client_kwargs(client_kwargs: dict[str, object] | None) -> dict[str, object]:
|
|
650
|
+
if client_kwargs is None:
|
|
651
|
+
return {}
|
|
652
|
+
if not isinstance(client_kwargs, dict):
|
|
653
|
+
raise ValueError(
|
|
654
|
+
f"qdrant.client_kwargs must be a dict, got {type(client_kwargs).__name__}"
|
|
655
|
+
)
|
|
656
|
+
return dict(client_kwargs)
|
|
657
|
+
|
|
658
|
+
@staticmethod
|
|
659
|
+
def _validate_client_kwargs_conflicts(mode: str, client_kwargs: dict[str, object]) -> None:
|
|
660
|
+
blocked_by_mode = {
|
|
661
|
+
"url": {"url", "api_key", "cloud_inference"},
|
|
662
|
+
"path": {"path"},
|
|
663
|
+
"memory": {"location"},
|
|
664
|
+
}
|
|
665
|
+
blocked = blocked_by_mode.get(mode, set())
|
|
666
|
+
conflicts = sorted(k for k in client_kwargs if k in blocked)
|
|
667
|
+
if not conflicts:
|
|
668
|
+
return
|
|
669
|
+
joined = ", ".join(conflicts)
|
|
670
|
+
raise ValueError(
|
|
671
|
+
f'qdrant.client_kwargs for mode="{mode}" cannot include: {joined}. '
|
|
672
|
+
"Set these values via QdrantConfig fields instead."
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
def _apply_dense_provider_api_key_defaults(
|
|
676
|
+
self,
|
|
677
|
+
*,
|
|
678
|
+
dense_model: str,
|
|
679
|
+
dense_options: dict,
|
|
680
|
+
) -> None:
|
|
681
|
+
model_name = dense_model.strip().lower()
|
|
682
|
+
for prefixes, option_key, env_names, provider_name in self._DENSE_PROVIDER_KEY_RULES:
|
|
683
|
+
if not any(model_name.startswith(prefix) for prefix in prefixes):
|
|
684
|
+
continue
|
|
685
|
+
# Explicit user config always wins.
|
|
686
|
+
if option_key in dense_options:
|
|
687
|
+
return
|
|
688
|
+
for env_name in env_names:
|
|
689
|
+
env_api_key = os.getenv(env_name)
|
|
690
|
+
if env_api_key:
|
|
691
|
+
dense_options[option_key] = env_api_key
|
|
692
|
+
return
|
|
693
|
+
env_hint = " or ".join(env_names)
|
|
694
|
+
raise ValueError(
|
|
695
|
+
f"cloud_inference=True with {provider_name} dense inference requires an API key. "
|
|
696
|
+
f"Set InferenceConfig.dense_options['{option_key}'] or {env_hint}."
|
|
697
|
+
)
|
|
698
|
+
# Unknown provider prefix: leave unchanged.
|
|
699
|
+
|
|
700
|
+
def _apply_cloud_inference_defaults(self) -> None:
|
|
701
|
+
dense_mode = self.embedding_mode in {"inference_dense", "inference_hybrid"}
|
|
702
|
+
sparse_mode = self.embedding_mode in {"inference_sparse", "inference_hybrid"}
|
|
703
|
+
|
|
704
|
+
cfg = self.inference_config or InferenceConfig()
|
|
705
|
+
dense_model = cfg.dense_model
|
|
706
|
+
sparse_model = cfg.sparse_model
|
|
707
|
+
dense_options = self._copy_options_dict(cfg.dense_options, "dense_options")
|
|
708
|
+
sparse_options = self._copy_options_dict(cfg.sparse_options, "sparse_options")
|
|
709
|
+
|
|
710
|
+
if dense_mode and not dense_model:
|
|
711
|
+
dense_model = DEFAULT_CLOUD_DENSE_MODEL
|
|
712
|
+
|
|
713
|
+
if sparse_mode and not sparse_model:
|
|
714
|
+
sparse_model = DEFAULT_CLOUD_SPARSE_MODEL
|
|
715
|
+
|
|
716
|
+
normalized_vector_dense_size: int | None = None
|
|
717
|
+
if dense_mode and self.vectors.dense_size is not None:
|
|
718
|
+
try:
|
|
719
|
+
normalized_vector_dense_size = int(self.vectors.dense_size)
|
|
720
|
+
except (TypeError, ValueError) as e:
|
|
721
|
+
raise ValueError("vectors.dense_size must be an integer") from e
|
|
722
|
+
if normalized_vector_dense_size <= 0:
|
|
723
|
+
raise ValueError("vectors.dense_size must be > 0")
|
|
724
|
+
if normalized_vector_dense_size != self.vectors.dense_size:
|
|
725
|
+
self.vectors = self._with_dense_size(
|
|
726
|
+
self.vectors, normalized_vector_dense_size
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
normalized_dense_dim: int | None = None
|
|
730
|
+
if dense_mode and dense_model:
|
|
731
|
+
dense_dim_raw = dense_options.get("dimensions")
|
|
732
|
+
if dense_dim_raw is not None:
|
|
733
|
+
try:
|
|
734
|
+
normalized_dense_dim = int(dense_dim_raw)
|
|
735
|
+
except (TypeError, ValueError) as e:
|
|
736
|
+
raise ValueError(
|
|
737
|
+
"InferenceConfig.dense_options['dimensions'] must be an integer"
|
|
738
|
+
) from e
|
|
739
|
+
if normalized_dense_dim <= 0:
|
|
740
|
+
raise ValueError(
|
|
741
|
+
"InferenceConfig.dense_options['dimensions'] must be > 0"
|
|
742
|
+
)
|
|
743
|
+
dense_options["dimensions"] = normalized_dense_dim
|
|
744
|
+
|
|
745
|
+
# Allow setting dimension in exactly one place (vectors OR dense_options).
|
|
746
|
+
if (
|
|
747
|
+
dense_model == DEFAULT_CLOUD_DENSE_MODEL
|
|
748
|
+
and normalized_dense_dim is None
|
|
749
|
+
and normalized_vector_dense_size is None
|
|
750
|
+
):
|
|
751
|
+
normalized_dense_dim = DEFAULT_CLOUD_DENSE_DIMENSIONS
|
|
752
|
+
dense_options["dimensions"] = normalized_dense_dim
|
|
753
|
+
|
|
754
|
+
if normalized_dense_dim is None and normalized_vector_dense_size is not None:
|
|
755
|
+
normalized_dense_dim = normalized_vector_dense_size
|
|
756
|
+
if dense_model.startswith("openai/"):
|
|
757
|
+
dense_options["dimensions"] = normalized_dense_dim
|
|
758
|
+
elif normalized_dense_dim is not None and normalized_vector_dense_size is None:
|
|
759
|
+
self.vectors = self._with_dense_size(
|
|
760
|
+
self.vectors, normalized_dense_dim
|
|
761
|
+
)
|
|
762
|
+
elif (
|
|
763
|
+
normalized_dense_dim is not None
|
|
764
|
+
and normalized_vector_dense_size is not None
|
|
765
|
+
and normalized_dense_dim != normalized_vector_dense_size
|
|
766
|
+
):
|
|
767
|
+
raise ValueError(
|
|
768
|
+
"vectors.dense_size must match InferenceConfig.dense_options['dimensions']: "
|
|
769
|
+
f"{normalized_vector_dense_size} != {normalized_dense_dim}"
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
self._apply_dense_provider_api_key_defaults(
|
|
773
|
+
dense_model=dense_model,
|
|
774
|
+
dense_options=dense_options,
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
self.inference_config = InferenceConfig(
|
|
778
|
+
dense_model=dense_model,
|
|
779
|
+
sparse_model=sparse_model,
|
|
780
|
+
dense_options=dense_options or None,
|
|
781
|
+
sparse_options=sparse_options or None,
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
def _raise_qdrant_response(
|
|
785
|
+
self,
|
|
786
|
+
exc: UnexpectedResponse,
|
|
787
|
+
*,
|
|
788
|
+
operation: str,
|
|
789
|
+
) -> None:
|
|
790
|
+
content = exc.content.decode("utf-8", errors="replace")
|
|
791
|
+
message = f"Qdrant response error during `{operation}`: {exc}"
|
|
792
|
+
lowered = f"{exc}\n{content}".lower()
|
|
793
|
+
if self.cloud_inference and (
|
|
794
|
+
"endpoint configuration not found for model" in lowered
|
|
795
|
+
or "inference can't process request" in lowered
|
|
796
|
+
):
|
|
797
|
+
message = (
|
|
798
|
+
f"{message}. The model/provider is not configured for this Qdrant Cloud cluster "
|
|
799
|
+
"(check the cluster Inference tab)."
|
|
800
|
+
)
|
|
801
|
+
raise QdrantResponseError(
|
|
802
|
+
message,
|
|
803
|
+
status_code=exc.status_code,
|
|
804
|
+
reason_phrase=exc.reason_phrase,
|
|
805
|
+
raw_content=content,
|
|
806
|
+
) from exc
|
|
807
|
+
|
|
808
|
+
def __init__(
|
|
809
|
+
self,
|
|
810
|
+
collection_name: str = "poma_documents",
|
|
811
|
+
*,
|
|
812
|
+
qdrant: QdrantConfig | None = None,
|
|
813
|
+
qdrant_client: QdrantClient | None = None,
|
|
814
|
+
embedding_mode: str | None = None,
|
|
815
|
+
dense_embed_fn: DenseEmbedSync | None = None,
|
|
816
|
+
sparse_embed_fn: SparseEmbedSync | None = None,
|
|
817
|
+
vectors: VectorConfig | None = None,
|
|
818
|
+
inference_config: InferenceConfig | None = None,
|
|
819
|
+
fusion_k: int = 60,
|
|
820
|
+
fusion_candidates: int = 50,
|
|
821
|
+
fusion_weights: tuple[float, float] | None = None,
|
|
822
|
+
write_mode: str = "upsert",
|
|
823
|
+
batch_size: int = 256,
|
|
824
|
+
store_chunk_details: bool = True,
|
|
825
|
+
):
|
|
826
|
+
self.collection_name = collection_name
|
|
827
|
+
cfg = qdrant or QdrantConfig()
|
|
828
|
+
self.cloud_inference = self._resolve_cloud_inference_enabled(qdrant_client, cfg)
|
|
829
|
+
mode_text = (embedding_mode or "").strip().lower()
|
|
830
|
+
if mode_text:
|
|
831
|
+
self.embedding_mode = mode_text
|
|
832
|
+
else:
|
|
833
|
+
# Strong default: cloud inference uses inference_dense; otherwise keep fastembed.
|
|
834
|
+
self.embedding_mode = "inference_dense" if self.cloud_inference else "fastembed"
|
|
835
|
+
self.dense_embed_fn = dense_embed_fn
|
|
836
|
+
self.sparse_embed_fn = sparse_embed_fn
|
|
837
|
+
self.vectors = vectors or VectorConfig()
|
|
838
|
+
self.inference_config = inference_config
|
|
839
|
+
self.fusion_k = max(1, int(fusion_k))
|
|
840
|
+
self.fusion_candidates = max(1, int(fusion_candidates))
|
|
841
|
+
self.fusion_weights = fusion_weights # (dense_weight, sparse_weight) for hybrid; None = (1, 1)
|
|
842
|
+
self.write_mode = (write_mode or "upsert").strip().lower()
|
|
843
|
+
self.batch_size = max(1, int(batch_size))
|
|
844
|
+
self.store_chunk_details = bool(store_chunk_details)
|
|
845
|
+
if self.write_mode not in ("upsert", "insert_only"):
|
|
846
|
+
raise ValueError('write_mode must be one of: "upsert", "insert_only"')
|
|
847
|
+
|
|
848
|
+
allowed = {
|
|
849
|
+
"fastembed", "external_dense", "external_sparse", "external_hybrid",
|
|
850
|
+
"inference_dense", "inference_sparse", "inference_hybrid",
|
|
851
|
+
}
|
|
852
|
+
if self.embedding_mode not in allowed:
|
|
853
|
+
raise ValueError(f"embedding_mode must be one of: {sorted(allowed)}")
|
|
854
|
+
|
|
855
|
+
if self.cloud_inference and self.embedding_mode == "fastembed":
|
|
856
|
+
raise ValueError(
|
|
857
|
+
"cloud_inference=True is not supported with embedding_mode='fastembed'. "
|
|
858
|
+
"Use cloud_inference=False for local FastEmbed or switch to an inference_* mode "
|
|
859
|
+
"with InferenceConfig."
|
|
860
|
+
)
|
|
861
|
+
if self.cloud_inference and self.embedding_mode in self._INFERENCE_MODES:
|
|
862
|
+
self._apply_cloud_inference_defaults()
|
|
863
|
+
|
|
864
|
+
# Fail-fast: validate external_* requirements at init
|
|
865
|
+
if self.embedding_mode == "external_dense":
|
|
866
|
+
if self.dense_embed_fn is None:
|
|
867
|
+
raise ValueError('embedding_mode="external_dense" requires dense_embed_fn')
|
|
868
|
+
self._validate_dense_size(self.vectors.dense_size, "external_dense")
|
|
869
|
+
elif self.embedding_mode == "external_sparse":
|
|
870
|
+
if self.sparse_embed_fn is None:
|
|
871
|
+
raise ValueError('embedding_mode="external_sparse" requires sparse_embed_fn')
|
|
872
|
+
elif self.embedding_mode == "external_hybrid":
|
|
873
|
+
if self.dense_embed_fn is None or self.sparse_embed_fn is None:
|
|
874
|
+
raise ValueError('embedding_mode="external_hybrid" requires dense_embed_fn AND sparse_embed_fn')
|
|
875
|
+
self._validate_dense_size(self.vectors.dense_size, "external_hybrid")
|
|
876
|
+
elif self.embedding_mode == "inference_dense":
|
|
877
|
+
if self.inference_config is None:
|
|
878
|
+
raise ValueError('embedding_mode="inference_dense" requires inference_config')
|
|
879
|
+
self.inference_config.dense_document("") # validate dense_model set
|
|
880
|
+
self._validate_dense_size(self.vectors.dense_size, "inference_dense")
|
|
881
|
+
elif self.embedding_mode == "inference_sparse":
|
|
882
|
+
if self.inference_config is None:
|
|
883
|
+
raise ValueError('embedding_mode="inference_sparse" requires inference_config')
|
|
884
|
+
self.inference_config.sparse_document("") # validate sparse_model set
|
|
885
|
+
elif self.embedding_mode == "inference_hybrid":
|
|
886
|
+
if self.inference_config is None:
|
|
887
|
+
raise ValueError('embedding_mode="inference_hybrid" requires inference_config')
|
|
888
|
+
self.inference_config.dense_document("")
|
|
889
|
+
self.inference_config.sparse_document("")
|
|
890
|
+
self._validate_dense_size(self.vectors.dense_size, "inference_hybrid")
|
|
891
|
+
|
|
892
|
+
if qdrant_client is not None:
|
|
893
|
+
self.client = qdrant_client
|
|
894
|
+
else:
|
|
895
|
+
mode = (cfg.mode or "").strip().lower()
|
|
896
|
+
client_kwargs = self._copy_client_kwargs(cfg.client_kwargs)
|
|
897
|
+
if mode == "url":
|
|
898
|
+
self._validate_client_kwargs_conflicts(mode, client_kwargs)
|
|
899
|
+
if not cfg.url:
|
|
900
|
+
raise ValueError('qdrant.mode="url" requires qdrant.url="http(s)://..."')
|
|
901
|
+
self.client = QdrantClient(
|
|
902
|
+
url=cfg.url,
|
|
903
|
+
api_key=cfg.api_key,
|
|
904
|
+
cloud_inference=cfg.cloud_inference,
|
|
905
|
+
**client_kwargs,
|
|
906
|
+
)
|
|
907
|
+
elif mode == "path":
|
|
908
|
+
self._validate_client_kwargs_conflicts(mode, client_kwargs)
|
|
909
|
+
if not cfg.path:
|
|
910
|
+
raise ValueError('qdrant.mode="path" requires qdrant.path="..."')
|
|
911
|
+
self.client = QdrantClient(path=cfg.path, **client_kwargs)
|
|
912
|
+
elif mode == "memory":
|
|
913
|
+
self._validate_client_kwargs_conflicts(mode, client_kwargs)
|
|
914
|
+
self.client = QdrantClient(location=":memory:", **client_kwargs)
|
|
915
|
+
else:
|
|
916
|
+
raise ValueError('qdrant.mode must be one of: "memory", "path", "url"')
|
|
917
|
+
|
|
918
|
+
self.ops = SyncClientOps(self.client)
|
|
919
|
+
self._ensure_collection_if_needed_sync()
|
|
920
|
+
|
|
921
|
+
def _ensure_fastembed_client(self) -> None:
|
|
922
|
+
"""Raise with a clear message if fastembed mode is used but client lacks FastEmbed Document support."""
|
|
923
|
+
query_points_fn = getattr(self.client, "query_points", None)
|
|
924
|
+
upsert_fn = getattr(self.client, "upsert", None)
|
|
925
|
+
get_dense_name_fn = getattr(self.client, "get_vector_field_name", None)
|
|
926
|
+
get_dense_params_fn = getattr(self.client, "get_fastembed_vector_params", None)
|
|
927
|
+
if (
|
|
928
|
+
not callable(query_points_fn)
|
|
929
|
+
or not callable(upsert_fn)
|
|
930
|
+
or not callable(get_dense_name_fn)
|
|
931
|
+
or not callable(get_dense_params_fn)
|
|
932
|
+
):
|
|
933
|
+
raise RuntimeError(
|
|
934
|
+
"embedding_mode='fastembed' requires a Qdrant client with FastEmbed integration "
|
|
935
|
+
"(client.query_points(..., query=models.Document(...)) and client.upsert(...)). "
|
|
936
|
+
"Pass a pre-configured client or use embedding_mode='external_dense' (or external_sparse/external_hybrid) "
|
|
937
|
+
"with your own embedder."
|
|
938
|
+
)
|
|
939
|
+
dense_model = getattr(self.client, "embedding_model_name", None)
|
|
940
|
+
if not isinstance(dense_model, str) or not dense_model:
|
|
941
|
+
raise RuntimeError(
|
|
942
|
+
"embedding_mode='fastembed' requires a valid embedding_model_name on the client."
|
|
943
|
+
)
|
|
944
|
+
try:
|
|
945
|
+
sig_query_points = inspect.signature(query_points_fn)
|
|
946
|
+
sig_upsert = inspect.signature(upsert_fn)
|
|
947
|
+
except (ValueError, TypeError):
|
|
948
|
+
raise RuntimeError(
|
|
949
|
+
"embedding_mode='fastembed': client query_points/upsert could not be inspected. "
|
|
950
|
+
"Use a Qdrant client with FastEmbed integration or embedding_mode='external_dense'."
|
|
951
|
+
)
|
|
952
|
+
if "query" not in sig_query_points.parameters:
|
|
953
|
+
raise RuntimeError(
|
|
954
|
+
"embedding_mode='fastembed' requires client.query_points(..., query=..., ...). "
|
|
955
|
+
"This client's query_points() does not accept 'query'. Use a FastEmbed-enabled client or external_* mode."
|
|
956
|
+
)
|
|
957
|
+
if "points" not in sig_upsert.parameters:
|
|
958
|
+
raise RuntimeError(
|
|
959
|
+
"embedding_mode='fastembed' requires client.upsert(..., points=...). "
|
|
960
|
+
"This client's upsert() does not accept 'points'. Use a FastEmbed-enabled client or external_* mode."
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
def _ensure_collection_if_needed_sync(self) -> None:
|
|
964
|
+
existing = self.ops.get_collections_names()
|
|
965
|
+
if self.collection_name in existing:
|
|
966
|
+
return
|
|
967
|
+
|
|
968
|
+
if self.embedding_mode in ("external_dense", "inference_dense"):
|
|
969
|
+
self.ops.create_collection_dense(self.collection_name, vectors=self.vectors)
|
|
970
|
+
elif self.embedding_mode in ("external_sparse", "inference_sparse"):
|
|
971
|
+
self.ops.create_collection_sparse(self.collection_name, vectors=self.vectors)
|
|
972
|
+
elif self.embedding_mode in ("external_hybrid", "inference_hybrid"):
|
|
973
|
+
self.ops.create_collection_hybrid(self.collection_name, vectors=self.vectors)
|
|
974
|
+
# fastembed: created lazily in SyncClientOps.add_documents().
|
|
975
|
+
|
|
976
|
+
def _filter_new_ids_sync(self, ids: list[str]) -> tuple[list[str], list[int]]:
|
|
977
|
+
"""For insert_only: return (ids that are new, indices into original list)."""
|
|
978
|
+
existing = self.ops.retrieve_ids(self.collection_name, ids=ids)
|
|
979
|
+
new_ids = [pid for pid in ids if pid not in existing]
|
|
980
|
+
new_set = set(new_ids)
|
|
981
|
+
indices = [i for i, pid in enumerate(ids) if pid in new_set]
|
|
982
|
+
return new_ids, indices
|
|
983
|
+
|
|
984
|
+
def ingest(
|
|
985
|
+
self,
|
|
986
|
+
chunk_data: dict | str | os.PathLike[str],
|
|
987
|
+
) -> None:
|
|
988
|
+
"""
|
|
989
|
+
Ingest chunk data into the collection.
|
|
990
|
+
chunk_data: either a dict with ``chunks`` and ``chunksets`` keys,
|
|
991
|
+
or a path to a ``.poma`` file (will be extracted first).
|
|
992
|
+
"""
|
|
993
|
+
try:
|
|
994
|
+
chunk_data = _chunk_data_from_input(chunk_data)
|
|
995
|
+
ids, documents, payloads = self.prepare_points_from_chunk_data(
|
|
996
|
+
chunk_data, store_chunk_details=self.store_chunk_details
|
|
997
|
+
)
|
|
998
|
+
if not documents:
|
|
999
|
+
return
|
|
1000
|
+
|
|
1001
|
+
if self.embedding_mode != "fastembed":
|
|
1002
|
+
self._ensure_collection_if_needed_sync()
|
|
1003
|
+
|
|
1004
|
+
if self.write_mode == "insert_only" and ids:
|
|
1005
|
+
new_ids, indices = self._filter_new_ids_sync(ids)
|
|
1006
|
+
if not new_ids:
|
|
1007
|
+
return
|
|
1008
|
+
ids = new_ids
|
|
1009
|
+
documents = [documents[i] for i in indices]
|
|
1010
|
+
payloads = [payloads[i] for i in indices]
|
|
1011
|
+
|
|
1012
|
+
if self.embedding_mode == "fastembed":
|
|
1013
|
+
self._ensure_fastembed_client()
|
|
1014
|
+
for start in range(0, len(ids), self.batch_size):
|
|
1015
|
+
end = start + self.batch_size
|
|
1016
|
+
self.ops.add_documents(
|
|
1017
|
+
self.collection_name,
|
|
1018
|
+
documents=documents[start:end],
|
|
1019
|
+
payloads=payloads[start:end],
|
|
1020
|
+
ids=ids[start:end],
|
|
1021
|
+
)
|
|
1022
|
+
return
|
|
1023
|
+
|
|
1024
|
+
if self.embedding_mode == "external_dense":
|
|
1025
|
+
dense_vecs = self.dense_embed_fn(documents)
|
|
1026
|
+
if len(dense_vecs) != len(documents):
|
|
1027
|
+
raise ValueError("dense_embed_fn returned a different number of vectors than documents")
|
|
1028
|
+
self._validate_dense_vectors(dense_vecs, int(self.vectors.dense_size), "dense_embed_fn")
|
|
1029
|
+
points = [
|
|
1030
|
+
qmodels.PointStruct(id=pid, vector={self.vectors.dense_name: vec}, payload=payload)
|
|
1031
|
+
for pid, vec, payload in zip(ids, dense_vecs, payloads, strict=True)
|
|
1032
|
+
]
|
|
1033
|
+
for start in range(0, len(points), self.batch_size):
|
|
1034
|
+
self.ops.upsert_points(self.collection_name, points=points[start : start + self.batch_size])
|
|
1035
|
+
return
|
|
1036
|
+
|
|
1037
|
+
if self.embedding_mode == "external_sparse":
|
|
1038
|
+
sparse_items = self.sparse_embed_fn(documents)
|
|
1039
|
+
if len(sparse_items) != len(documents):
|
|
1040
|
+
raise ValueError("sparse_embed_fn returned a different number of vectors than documents")
|
|
1041
|
+
points: list[qmodels.PointStruct] = []
|
|
1042
|
+
for pid, item, payload in zip(ids, sparse_items, payloads, strict=True):
|
|
1043
|
+
svec = self._to_sparse_vector(item)
|
|
1044
|
+
points.append(
|
|
1045
|
+
qmodels.PointStruct(id=pid, vector={self.vectors.sparse_name: svec}, payload=payload)
|
|
1046
|
+
)
|
|
1047
|
+
for start in range(0, len(points), self.batch_size):
|
|
1048
|
+
self.ops.upsert_points(self.collection_name, points=points[start : start + self.batch_size])
|
|
1049
|
+
return
|
|
1050
|
+
|
|
1051
|
+
if self.embedding_mode == "inference_dense":
|
|
1052
|
+
points_inf = [
|
|
1053
|
+
qmodels.PointStruct(
|
|
1054
|
+
id=pid,
|
|
1055
|
+
vector={self.vectors.dense_name: self.inference_config.dense_document(doc)},
|
|
1056
|
+
payload=payload,
|
|
1057
|
+
)
|
|
1058
|
+
for pid, doc, payload in zip(ids, documents, payloads, strict=True)
|
|
1059
|
+
]
|
|
1060
|
+
for start in range(0, len(points_inf), self.batch_size):
|
|
1061
|
+
self.ops.upsert_points(self.collection_name, points=points_inf[start : start + self.batch_size])
|
|
1062
|
+
return
|
|
1063
|
+
|
|
1064
|
+
if self.embedding_mode == "inference_sparse":
|
|
1065
|
+
points_inf = [
|
|
1066
|
+
qmodels.PointStruct(
|
|
1067
|
+
id=pid,
|
|
1068
|
+
vector={self.vectors.sparse_name: self.inference_config.sparse_document(doc)},
|
|
1069
|
+
payload=payload,
|
|
1070
|
+
)
|
|
1071
|
+
for pid, doc, payload in zip(ids, documents, payloads, strict=True)
|
|
1072
|
+
]
|
|
1073
|
+
for start in range(0, len(points_inf), self.batch_size):
|
|
1074
|
+
self.ops.upsert_points(self.collection_name, points=points_inf[start : start + self.batch_size])
|
|
1075
|
+
return
|
|
1076
|
+
|
|
1077
|
+
if self.embedding_mode == "inference_hybrid":
|
|
1078
|
+
points_inf = [
|
|
1079
|
+
qmodels.PointStruct(
|
|
1080
|
+
id=pid,
|
|
1081
|
+
vector={
|
|
1082
|
+
self.vectors.dense_name: self.inference_config.dense_document(doc),
|
|
1083
|
+
self.vectors.sparse_name: self.inference_config.sparse_document(doc),
|
|
1084
|
+
},
|
|
1085
|
+
payload=payload,
|
|
1086
|
+
)
|
|
1087
|
+
for pid, doc, payload in zip(ids, documents, payloads, strict=True)
|
|
1088
|
+
]
|
|
1089
|
+
for start in range(0, len(points_inf), self.batch_size):
|
|
1090
|
+
self.ops.upsert_points(self.collection_name, points=points_inf[start : start + self.batch_size])
|
|
1091
|
+
return
|
|
1092
|
+
|
|
1093
|
+
# external_hybrid
|
|
1094
|
+
dense_vecs = self.dense_embed_fn(documents)
|
|
1095
|
+
sparse_items = self.sparse_embed_fn(documents)
|
|
1096
|
+
if len(dense_vecs) != len(documents) or len(sparse_items) != len(documents):
|
|
1097
|
+
raise ValueError("Hybrid embedders returned a different number of vectors than documents")
|
|
1098
|
+
self._validate_dense_vectors(dense_vecs, int(self.vectors.dense_size), "dense_embed_fn")
|
|
1099
|
+
points_list: list[qmodels.PointStruct] = []
|
|
1100
|
+
for pid, dvec, sitem, payload in zip(ids, dense_vecs, sparse_items, payloads, strict=True):
|
|
1101
|
+
svec = self._to_sparse_vector(sitem)
|
|
1102
|
+
points_list.append(
|
|
1103
|
+
qmodels.PointStruct(
|
|
1104
|
+
id=pid,
|
|
1105
|
+
vector={self.vectors.dense_name: dvec, self.vectors.sparse_name: svec},
|
|
1106
|
+
payload=payload,
|
|
1107
|
+
)
|
|
1108
|
+
)
|
|
1109
|
+
for start in range(0, len(points_list), self.batch_size):
|
|
1110
|
+
self.ops.upsert_points(self.collection_name, points=points_list[start : start + self.batch_size])
|
|
1111
|
+
except UnexpectedResponse as exc:
|
|
1112
|
+
self._raise_qdrant_response(exc, operation="ingest")
|
|
1113
|
+
|
|
1114
|
+
def search(
|
|
1115
|
+
self,
|
|
1116
|
+
query: str,
|
|
1117
|
+
*,
|
|
1118
|
+
limit: int = 5,
|
|
1119
|
+
query_filter: qmodels.Filter | None = None,
|
|
1120
|
+
) -> list[dict]:
|
|
1121
|
+
"""Search; returns list[dict] with keys id, score, text, metadata (same shape as SearchResult)."""
|
|
1122
|
+
limit = max(1, int(limit))
|
|
1123
|
+
try:
|
|
1124
|
+
if self.embedding_mode == "fastembed":
|
|
1125
|
+
self._ensure_fastembed_client()
|
|
1126
|
+
dense_name, dense_doc = self._fastembed_dense_spec(self.client, query)
|
|
1127
|
+
sparse_spec = self._fastembed_sparse_spec(self.client, query)
|
|
1128
|
+
if sparse_spec is None:
|
|
1129
|
+
return self.ops.query_points(
|
|
1130
|
+
self.collection_name,
|
|
1131
|
+
query=dense_doc,
|
|
1132
|
+
using=dense_name,
|
|
1133
|
+
limit=limit,
|
|
1134
|
+
query_filter=query_filter,
|
|
1135
|
+
)
|
|
1136
|
+
|
|
1137
|
+
cand = max(limit, self.fusion_candidates)
|
|
1138
|
+
sparse_name, sparse_doc = sparse_spec
|
|
1139
|
+
dense_hits = self.ops.query_points(
|
|
1140
|
+
self.collection_name,
|
|
1141
|
+
query=dense_doc,
|
|
1142
|
+
using=dense_name,
|
|
1143
|
+
limit=cand,
|
|
1144
|
+
query_filter=query_filter,
|
|
1145
|
+
)
|
|
1146
|
+
sparse_hits = self.ops.query_points(
|
|
1147
|
+
self.collection_name,
|
|
1148
|
+
query=sparse_doc,
|
|
1149
|
+
using=sparse_name,
|
|
1150
|
+
limit=cand,
|
|
1151
|
+
query_filter=query_filter,
|
|
1152
|
+
)
|
|
1153
|
+
dense_ids = [str(h["id"]) for h in dense_hits]
|
|
1154
|
+
sparse_ids = [str(h["id"]) for h in sparse_hits]
|
|
1155
|
+
fused = self.rrf_fuse([dense_ids, sparse_ids], k=self.fusion_k, weights=None)
|
|
1156
|
+
payload_by_id: dict[str, dict] = {}
|
|
1157
|
+
for h in dense_hits + sparse_hits:
|
|
1158
|
+
payload_by_id.setdefault(str(h["id"]), h.get("metadata", {}) or {})
|
|
1159
|
+
ranked = sorted(fused.items(), key=lambda kv: kv[1], reverse=True)[:limit]
|
|
1160
|
+
return [
|
|
1161
|
+
{
|
|
1162
|
+
"id": pid,
|
|
1163
|
+
"score": score,
|
|
1164
|
+
"text": payload_by_id.get(pid, {}).get("text", ""),
|
|
1165
|
+
"metadata": payload_by_id.get(pid, {}) or {},
|
|
1166
|
+
}
|
|
1167
|
+
for pid, score in ranked
|
|
1168
|
+
]
|
|
1169
|
+
|
|
1170
|
+
self._ensure_collection_if_needed_sync()
|
|
1171
|
+
|
|
1172
|
+
if self.embedding_mode == "external_dense":
|
|
1173
|
+
qvec = self.dense_embed_fn([query])[0]
|
|
1174
|
+
return self.ops.search_named_vector(
|
|
1175
|
+
self.collection_name,
|
|
1176
|
+
name=self.vectors.dense_name,
|
|
1177
|
+
vector=qvec,
|
|
1178
|
+
limit=limit,
|
|
1179
|
+
query_filter=query_filter,
|
|
1180
|
+
)
|
|
1181
|
+
|
|
1182
|
+
if self.embedding_mode == "external_sparse":
|
|
1183
|
+
sitem = self.sparse_embed_fn([query])[0]
|
|
1184
|
+
svec = self._to_sparse_vector(sitem)
|
|
1185
|
+
return self.ops.search_named_vector(
|
|
1186
|
+
self.collection_name,
|
|
1187
|
+
name=self.vectors.sparse_name,
|
|
1188
|
+
vector=svec,
|
|
1189
|
+
limit=limit,
|
|
1190
|
+
query_filter=query_filter,
|
|
1191
|
+
)
|
|
1192
|
+
|
|
1193
|
+
if self.embedding_mode == "inference_dense":
|
|
1194
|
+
query_doc = self.inference_config.dense_document(query)
|
|
1195
|
+
return self.ops.query_points_inference(
|
|
1196
|
+
self.collection_name,
|
|
1197
|
+
query=query_doc,
|
|
1198
|
+
using=self.vectors.dense_name,
|
|
1199
|
+
limit=limit,
|
|
1200
|
+
query_filter=query_filter,
|
|
1201
|
+
)
|
|
1202
|
+
|
|
1203
|
+
if self.embedding_mode == "inference_sparse":
|
|
1204
|
+
query_doc = self.inference_config.sparse_document(query)
|
|
1205
|
+
return self.ops.query_points_inference(
|
|
1206
|
+
self.collection_name,
|
|
1207
|
+
query=query_doc,
|
|
1208
|
+
using=self.vectors.sparse_name,
|
|
1209
|
+
limit=limit,
|
|
1210
|
+
query_filter=query_filter,
|
|
1211
|
+
)
|
|
1212
|
+
|
|
1213
|
+
if self.embedding_mode == "inference_hybrid":
|
|
1214
|
+
cand = max(limit, self.fusion_candidates)
|
|
1215
|
+
weights = list(self.fusion_weights) if self.fusion_weights else None
|
|
1216
|
+
dense_doc = self.inference_config.dense_document(query)
|
|
1217
|
+
sparse_doc = self.inference_config.sparse_document(query)
|
|
1218
|
+
dense_hits = self.ops.query_points_inference(
|
|
1219
|
+
self.collection_name,
|
|
1220
|
+
query=dense_doc,
|
|
1221
|
+
using=self.vectors.dense_name,
|
|
1222
|
+
limit=cand,
|
|
1223
|
+
query_filter=query_filter,
|
|
1224
|
+
)
|
|
1225
|
+
sparse_hits = self.ops.query_points_inference(
|
|
1226
|
+
self.collection_name,
|
|
1227
|
+
query=sparse_doc,
|
|
1228
|
+
using=self.vectors.sparse_name,
|
|
1229
|
+
limit=cand,
|
|
1230
|
+
query_filter=query_filter,
|
|
1231
|
+
)
|
|
1232
|
+
dense_ids = [str(h["id"]) for h in dense_hits]
|
|
1233
|
+
sparse_ids = [str(h["id"]) for h in sparse_hits]
|
|
1234
|
+
fused = self.rrf_fuse([dense_ids, sparse_ids], k=self.fusion_k, weights=weights)
|
|
1235
|
+
payload_by_id: dict[str, dict] = {}
|
|
1236
|
+
for h in dense_hits + sparse_hits:
|
|
1237
|
+
payload_by_id.setdefault(str(h["id"]), h.get("metadata", {}) or {})
|
|
1238
|
+
ranked = sorted(fused.items(), key=lambda kv: kv[1], reverse=True)[:limit]
|
|
1239
|
+
return [
|
|
1240
|
+
{
|
|
1241
|
+
"id": pid,
|
|
1242
|
+
"score": score,
|
|
1243
|
+
"text": payload_by_id.get(pid, {}).get("text", ""),
|
|
1244
|
+
"metadata": payload_by_id.get(pid, {}) or {},
|
|
1245
|
+
}
|
|
1246
|
+
for pid, score in ranked
|
|
1247
|
+
]
|
|
1248
|
+
|
|
1249
|
+
# external_hybrid: two searches + weighted RRF fusion
|
|
1250
|
+
cand = max(limit, self.fusion_candidates)
|
|
1251
|
+
weights = list(self.fusion_weights) if self.fusion_weights else None
|
|
1252
|
+
|
|
1253
|
+
dense_q = self.dense_embed_fn([query])[0]
|
|
1254
|
+
sparse_item = self.sparse_embed_fn([query])[0]
|
|
1255
|
+
sparse_q = self._to_sparse_vector(sparse_item)
|
|
1256
|
+
|
|
1257
|
+
dense_hits = self.ops.search_named_vector(
|
|
1258
|
+
self.collection_name,
|
|
1259
|
+
name=self.vectors.dense_name,
|
|
1260
|
+
vector=dense_q,
|
|
1261
|
+
limit=cand,
|
|
1262
|
+
query_filter=query_filter,
|
|
1263
|
+
)
|
|
1264
|
+
sparse_hits = self.ops.search_named_vector(
|
|
1265
|
+
self.collection_name,
|
|
1266
|
+
name=self.vectors.sparse_name,
|
|
1267
|
+
vector=sparse_q,
|
|
1268
|
+
limit=cand,
|
|
1269
|
+
query_filter=query_filter,
|
|
1270
|
+
)
|
|
1271
|
+
|
|
1272
|
+
dense_ids = [str(h["id"]) for h in dense_hits]
|
|
1273
|
+
sparse_ids = [str(h["id"]) for h in sparse_hits]
|
|
1274
|
+
fused = self.rrf_fuse([dense_ids, sparse_ids], k=self.fusion_k, weights=weights)
|
|
1275
|
+
|
|
1276
|
+
payload_by_id: dict[str, dict] = {}
|
|
1277
|
+
for h in dense_hits + sparse_hits:
|
|
1278
|
+
payload_by_id.setdefault(str(h["id"]), h.get("metadata", {}) or {})
|
|
1279
|
+
|
|
1280
|
+
ranked = sorted(fused.items(), key=lambda kv: kv[1], reverse=True)[:limit]
|
|
1281
|
+
return [
|
|
1282
|
+
{
|
|
1283
|
+
"id": pid,
|
|
1284
|
+
"score": score,
|
|
1285
|
+
"text": payload_by_id.get(pid, {}).get("text", ""),
|
|
1286
|
+
"metadata": payload_by_id.get(pid, {}) or {},
|
|
1287
|
+
}
|
|
1288
|
+
for pid, score in ranked
|
|
1289
|
+
]
|
|
1290
|
+
except UnexpectedResponse as exc:
|
|
1291
|
+
self._raise_qdrant_response(exc, operation="search")
|
|
1292
|
+
|
|
1293
|
+
def close(self) -> None:
|
|
1294
|
+
"""Close the Qdrant client connection (e.g. HTTP session). No-op if client has no close."""
|
|
1295
|
+
if hasattr(self.client, "close") and callable(self.client.close):
|
|
1296
|
+
self.client.close()
|
|
1297
|
+
|
|
1298
|
+
def search_cheatsheets(
|
|
1299
|
+
self,
|
|
1300
|
+
query: str,
|
|
1301
|
+
*,
|
|
1302
|
+
limit: int = 5,
|
|
1303
|
+
chunk_data: dict | None = None,
|
|
1304
|
+
query_filter: qmodels.Filter | None = None,
|
|
1305
|
+
) -> list[dict]:
|
|
1306
|
+
results = self.search(query, limit=limit, query_filter=query_filter)
|
|
1307
|
+
if not results:
|
|
1308
|
+
return []
|
|
1309
|
+
relevant_chunksets, all_chunks = self.results_to_cheatsheet_inputs(results)
|
|
1310
|
+
if chunk_data:
|
|
1311
|
+
all_chunks = chunk_data.get("chunks", []) or []
|
|
1312
|
+
return generate_cheatsheets(relevant_chunksets, all_chunks) or []
|
|
1313
|
+
|
|
1314
|
+
# Legacy compatibility
|
|
1315
|
+
def store_chunksets(self, chunksets: list[dict], chunks: list[dict], *, file_id: str | None = None) -> None:
|
|
1316
|
+
self.ingest({"chunksets": chunksets, "chunks": chunks})
|
|
1317
|
+
|
|
1318
|
+
def write(self, chunk_data: dict, *, file_id: str | None = None) -> None:
|
|
1319
|
+
self.ingest(chunk_data)
|
|
1320
|
+
|
|
1321
|
+
def query_chunksets(self, query: str, *, top_k: int = 5) -> list[dict]:
|
|
1322
|
+
return [r["metadata"] for r in self.search(query, limit=top_k)]
|
|
1323
|
+
|
|
1324
|
+
def query_and_generate_cheatsheet(self, query: str, *, top_k: int = 5, all_chunks: list[dict] | None = None):
|
|
1325
|
+
chunk_data = {"chunks": all_chunks} if all_chunks else None
|
|
1326
|
+
return self.search_cheatsheets(query, limit=top_k, chunk_data=chunk_data)
|