poma 0.2.3__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1326 @@
1
+ # ---------------------------------------------------------------------
2
+ # POMA integration for Qdrant
3
+ # ---------------------------------------------------------------------
4
+ """
5
+ Sync POMA–Qdrant integration with dense, sparse, and hybrid search.
6
+
7
+ - BasePomaQdrantCore: shared logic (prepare points, RRF fusion, cheatsheet inputs).
8
+ - SyncClientOps: thin Qdrant I/O adapter.
9
+ - PomaQdrant: supports fastembed, external_*, and inference_* modes.
10
+ - fastembed mode is environment-dependent: it requires a Qdrant client with FastEmbed integration
11
+ (upsert/query_points with models.Document). For production, pass a pre-configured client
12
+ or use embedding_mode='external_dense' (or external_*).
13
+ """
14
+
15
+ import inspect
16
+ import os
17
+ import uuid
18
+ from collections.abc import Callable, Sequence
19
+ from dataclasses import dataclass
20
+
21
+ from qdrant_client import QdrantClient, models as qcm
22
+ from qdrant_client.http.exceptions import UnexpectedResponse
23
+ from qdrant_client.http import models as qmodels
24
+
25
+ from poma.client import extract_chunks_and_chunksets_from_poma_archive
26
+ from poma.retrieval import generate_cheatsheets
27
+
28
+ DEFAULT_CLOUD_DENSE_MODEL = "openai/text-embedding-3-large"
29
+ DEFAULT_CLOUD_DENSE_DIMENSIONS = 1536
30
+ DEFAULT_CLOUD_SPARSE_MODEL = "Qdrant/bm25"
31
+
32
+
33
+ __all__ = [
34
+ "PomaQdrant",
35
+ "QdrantConfig",
36
+ "VectorConfig",
37
+ "InferenceConfig",
38
+ "QdrantResponseError",
39
+ "SearchResult",
40
+ "chunk_uuid_string",
41
+ "DenseEmbedSync",
42
+ "SparseEmbedSync",
43
+ ]
44
+
45
+
46
+ def _chunk_data_from_input(
47
+ chunk_data: dict | str | os.PathLike[str],
48
+ ) -> dict:
49
+ """Accept chunk_data dict or path to a .poma file; return chunk_data dict."""
50
+ if isinstance(chunk_data, (str, os.PathLike)):
51
+ path = os.fspath(chunk_data)
52
+ if not path.lower().endswith(".poma"):
53
+ raise ValueError(
54
+ "Path must point to a .poma file; got {!r}".format(path)
55
+ )
56
+ return extract_chunks_and_chunksets_from_poma_archive(
57
+ poma_archive_path=chunk_data
58
+ )
59
+ return chunk_data
60
+
61
+ @dataclass
62
+ class SearchResult:
63
+ """Type-safe search result; matches the dict shape returned by search()."""
64
+ id: str
65
+ score: float
66
+ text: str
67
+ metadata: dict
68
+
69
+
70
+ class QdrantResponseError(RuntimeError):
71
+ """SDK-level wrapper for qdrant-client UnexpectedResponse errors."""
72
+
73
+ def __init__(
74
+ self,
75
+ message: str,
76
+ *,
77
+ status_code: int | None = None,
78
+ reason_phrase: str | None = None,
79
+ raw_content: str | None = None,
80
+ ) -> None:
81
+ super().__init__(message)
82
+ self.status_code = status_code
83
+ self.reason_phrase = reason_phrase
84
+ self.raw_content = raw_content
85
+
86
+
87
+ def chunk_uuid_string(file_id: str, chunkset_index: int) -> str:
88
+ """
89
+ Deterministic RFC4122 UUID (v5) from (file_id, chunkset_index).
90
+ Same inputs -> same UUID. Different index -> different UUID.
91
+ """
92
+ if chunkset_index < 0:
93
+ raise ValueError("chunkset_index must be non-negative")
94
+ name = f"{file_id}#{chunkset_index}"
95
+ return str(uuid.uuid5(uuid.NAMESPACE_URL, name))
96
+
97
+
98
+ # Dense: one float-vector per text.
99
+ # Callables may accept Sequence[str] for flexibility.
100
+ DenseEmbedSync = Callable[[Sequence[str]], list[list[float]]]
101
+
102
+ # Sparse: one sparse vector per text.
103
+ # Accepted per-item return types:
104
+ # - qmodels.SparseVector(indices=[...], values=[...])
105
+ # - tuple[list[int], list[float]] -> (indices, values)
106
+ # - dict with keys {"indices": [...], "values": [...]}
107
+ SparseEmbedSync = Callable[[Sequence[str]], list[object]]
108
+
109
+
110
+ @dataclass(frozen=True)
111
+ class QdrantConfig:
112
+ """
113
+ Qdrant deployment options:
114
+ - mode="memory": in-process, RAM-only (sync only)
115
+ - mode="path": in-process, persistent folder on disk (sync only)
116
+ - mode="url": HTTP endpoint (local server or cloud) (sync)
117
+ - api_key: API key for cloud inference (required if cloud_inference is True)
118
+ - cloud_inference: if True, use cloud inference (default is False)
119
+ - client_kwargs: optional pass-through kwargs for QdrantClient(...)
120
+ (e.g. {"timeout": 120, "pool_size": 20})
121
+ """
122
+ mode: str = "memory" # "memory" | "path" | "url"
123
+ path: str | None = None # required if mode="path"
124
+ url: str | None = None # required if mode="url"
125
+ api_key: str | None = None # optional (cloud)
126
+ cloud_inference: bool = False # optional (cloud)
127
+ client_kwargs: dict[str, object] | None = None
128
+
129
+
130
+ @dataclass(frozen=True)
131
+ class VectorConfig:
132
+ """
133
+ Vector/index configuration used when embedding_mode is external_* or inference_* (we create collections explicitly).
134
+ """
135
+ distance: qmodels.Distance = qmodels.Distance.COSINE
136
+ dense_size: int | None = None # required for external_dense/external_hybrid and inference_dense/inference_hybrid
137
+ sparse_name: str = "sparse" # name for sparse vector space
138
+ dense_name: str = "dense" # name for dense vector space
139
+
140
+
141
+ @dataclass(frozen=True)
142
+ class InferenceConfig:
143
+ """
144
+ Inference configuration for embedding_mode inference_* (Qdrant Inference API).
145
+
146
+ dense_model examples:
147
+ - "sentence-transformers/all-minilm-l6-v2"
148
+ - "openai/text-embedding-3-large"
149
+ sparse_model examples:
150
+ - "Qdrant/bm25"
151
+
152
+ dense_options / sparse_options: provider-specific options.
153
+ Dense API key option names used by Qdrant Cloud Inference include:
154
+ - OpenAI: {"openai-api-key": "..."}
155
+ - Cohere: {"cohere-api-key": "..."}
156
+ - Jina AI: {"jina-api-key": "..."}
157
+ - OpenRouter: {"openrouter-api-key": "..."}
158
+ You can also pass provider-specific parameters like {"dimensions": 512}.
159
+ """
160
+ dense_model: str | None = None
161
+ sparse_model: str | None = None
162
+ dense_options: dict | None = None
163
+ sparse_options: dict | None = None
164
+
165
+ def dense_document(self, text: str) -> qcm.Document:
166
+ if not self.dense_model:
167
+ raise ValueError(
168
+ "InferenceConfig.dense_model is required for inference_dense/inference_hybrid"
169
+ )
170
+ return qcm.Document(
171
+ text=text,
172
+ model=self.dense_model,
173
+ options=self.dense_options,
174
+ )
175
+
176
+ def sparse_document(self, text: str) -> qcm.Document:
177
+ if not self.sparse_model:
178
+ raise ValueError(
179
+ "InferenceConfig.sparse_model is required for inference_sparse/inference_hybrid"
180
+ )
181
+ return qcm.Document(
182
+ text=text,
183
+ model=self.sparse_model,
184
+ options=self.sparse_options,
185
+ )
186
+
187
+
188
+ class BasePomaQdrantCore:
189
+ """
190
+ Shared, client-agnostic core: builds payloads/points, fuses rankings, builds cheatsheet inputs.
191
+ """
192
+
193
+ @staticmethod
194
+ def _to_sparse_vector(item: object) -> qmodels.SparseVector:
195
+ if isinstance(item, qmodels.SparseVector):
196
+ return item
197
+ if isinstance(item, tuple) and len(item) == 2:
198
+ indices, values = item
199
+ return qmodels.SparseVector(indices=list(indices), values=list(values))
200
+ if isinstance(item, dict):
201
+ indices = item.get("indices")
202
+ values = item.get("values")
203
+ if indices is None or values is None:
204
+ raise ValueError("Sparse embedding dict must contain 'indices' and 'values'")
205
+ return qmodels.SparseVector(indices=list(indices), values=list(values))
206
+ raise ValueError(
207
+ "Sparse embedder must return SparseVector OR (indices, values) tuple OR dict{'indices','values'}"
208
+ )
209
+
210
+ @staticmethod
211
+ def _get_file_id_fallback(chunksets: list[dict], chunks: list[dict]) -> str:
212
+ file_id = None
213
+ if chunksets:
214
+ file_id = chunksets[0].get("file_id")
215
+ if not file_id and chunks:
216
+ file_id = chunks[0].get("file_id") or chunks[0].get("tag")
217
+ return file_id or "unknown"
218
+
219
+ @classmethod
220
+ def prepare_points_from_chunk_data(
221
+ cls,
222
+ chunk_data: dict,
223
+ *,
224
+ store_chunk_details: bool = True,
225
+ ) -> tuple[list[str], list[str], list[dict]]:
226
+ """
227
+ Returns: (ids, documents, payloads)
228
+ - ids: deterministic UUID strings (file_id + chunkset_index)
229
+ - documents: chunkset.contents
230
+ - payloads: metadata including "text"; "chunk_details" only if store_chunk_details=True
231
+ """
232
+ chunksets: list[dict] = chunk_data.get("chunksets", [])
233
+ chunks: list[dict] = chunk_data.get("chunks", [])
234
+ if not chunksets:
235
+ return [], [], []
236
+
237
+ file_id = cls._get_file_id_fallback(chunksets, chunks)
238
+ chunks_by_index = {c.get("chunk_index"): c for c in chunks}
239
+
240
+ ids: list[str] = []
241
+ documents: list[str] = []
242
+ payloads: list[dict] = []
243
+
244
+ for idx, chunkset in enumerate(chunksets):
245
+ content = chunkset.get("contents", "")
246
+ if not content:
247
+ continue
248
+
249
+ chunkset_index = chunkset.get("chunkset_index")
250
+ if chunkset_index is None:
251
+ chunkset_index = idx
252
+
253
+ chunkset_file_id = chunkset.get("file_id") or file_id
254
+ point_id = chunk_uuid_string(chunkset_file_id, int(chunkset_index))
255
+
256
+ raw_chunks = chunkset.get("chunks", [])
257
+ chunk_indices = list(raw_chunks) if isinstance(raw_chunks, (list, tuple)) else []
258
+
259
+ payload: dict = {
260
+ "chunkset_index": int(chunkset_index),
261
+ "chunks": chunk_indices,
262
+ "file_id": chunkset_file_id,
263
+ "text": content,
264
+ }
265
+ if store_chunk_details:
266
+ relevant_chunks = [chunks_by_index.get(ci) for ci in chunk_indices if ci in chunks_by_index]
267
+ payload["chunk_details"] = relevant_chunks
268
+
269
+ ids.append(point_id)
270
+ documents.append(content)
271
+ payloads.append(payload)
272
+
273
+ return ids, documents, payloads
274
+
275
+ @staticmethod
276
+ def rrf_fuse(
277
+ ranked_lists: list[list[str]],
278
+ *,
279
+ k: int,
280
+ weights: list[float] | None = None,
281
+ ) -> dict[str, float]:
282
+ """
283
+ Reciprocal Rank Fusion (optionally weighted):
284
+ score(id) = sum_i weight_i / (k + rank_i)
285
+ If weights is None, all weights are 1.0. Order: [dense, sparse] for hybrid.
286
+ If weights is provided, its length must equal len(ranked_lists).
287
+ """
288
+ n = len(ranked_lists)
289
+ if weights is None:
290
+ w = [1.0] * n
291
+ else:
292
+ if len(weights) != n:
293
+ raise ValueError(
294
+ f"rrf_fuse: len(weights)={len(weights)} must equal len(ranked_lists)={n}"
295
+ )
296
+ w = list(weights)
297
+ scores: dict[str, float] = {}
298
+ for lst, weight in zip(ranked_lists, w, strict=False):
299
+ for rank, pid in enumerate(lst, start=1):
300
+ scores[pid] = scores.get(pid, 0.0) + (weight / (k + rank))
301
+ return scores
302
+
303
+ @staticmethod
304
+ def results_to_cheatsheet_inputs(results: list[dict]) -> tuple[list[dict], list[dict]]:
305
+ """
306
+ Extract relevant_chunksets and all_chunks from search results for generate_cheatsheets.
307
+
308
+ When store_chunk_details=False, chunk_details in payload is empty, so all_chunks
309
+ will only be populated from results. Pass chunk_data to search_cheatsheets() to
310
+ supply full chunks for cheatsheet generation.
311
+ """
312
+ relevant_chunksets: list[dict] = []
313
+ all_chunks: list[dict] = []
314
+ seen_chunks: set[int] = set()
315
+
316
+ for r in results:
317
+ meta = r.get("metadata", {}) or {}
318
+ relevant_chunksets.append(
319
+ {
320
+ "chunkset_index": meta.get("chunkset_index"),
321
+ "chunks": meta.get("chunks", []),
322
+ "file_id": meta.get("file_id", "unknown"),
323
+ }
324
+ )
325
+
326
+ chunk_details = meta.get("chunk_details", []) or []
327
+ for ch in chunk_details:
328
+ if not isinstance(ch, dict):
329
+ continue
330
+ cidx = ch.get("chunk_index")
331
+ if isinstance(cidx, int) and cidx not in seen_chunks:
332
+ seen_chunks.add(cidx)
333
+ all_chunks.append(ch)
334
+
335
+ return relevant_chunksets, all_chunks
336
+
337
+ @staticmethod
338
+ def _validate_dense_size(dense_size: int | None, mode: str) -> None:
339
+ if dense_size is None:
340
+ raise ValueError(f'embedding_mode="{mode}" requires vectors.dense_size')
341
+ try:
342
+ size = int(dense_size)
343
+ except (TypeError, ValueError) as e:
344
+ raise ValueError(f"vectors.dense_size must be an integer, got {type(dense_size).__name__}") from e
345
+ if size <= 0:
346
+ raise ValueError(f'embedding_mode="{mode}" requires vectors.dense_size > 0')
347
+
348
+ @staticmethod
349
+ def _validate_dense_vectors(vectors: list[list[float]], expected_size: int, label: str) -> None:
350
+ for i, v in enumerate(vectors):
351
+ if len(v) != expected_size:
352
+ raise ValueError(
353
+ f"{label} vector at index {i} has length {len(v)}, expected {expected_size} (vectors.dense_size)"
354
+ )
355
+
356
+ @staticmethod
357
+ def _fastembed_dense_spec(client: object, text: str) -> tuple[str, qcm.Document]:
358
+ dense_model = getattr(client, "embedding_model_name", None)
359
+ if not isinstance(dense_model, str) or not dense_model:
360
+ raise RuntimeError(
361
+ "FastEmbed client is missing a valid embedding_model_name."
362
+ )
363
+ get_dense_name = getattr(client, "get_vector_field_name", None)
364
+ if not callable(get_dense_name):
365
+ raise RuntimeError(
366
+ "FastEmbed client is missing get_vector_field_name()."
367
+ )
368
+ dense_name = get_dense_name()
369
+ if not isinstance(dense_name, str) or not dense_name:
370
+ raise RuntimeError(
371
+ "FastEmbed client returned an invalid dense vector field name."
372
+ )
373
+ return dense_name, qcm.Document(text=text, model=dense_model)
374
+
375
+ @staticmethod
376
+ def _fastembed_sparse_spec(client: object, text: str) -> tuple[str, qcm.Document] | None:
377
+ sparse_model = getattr(client, "sparse_embedding_model_name", None)
378
+ if not sparse_model:
379
+ return None
380
+ get_sparse_name = getattr(client, "get_sparse_vector_field_name", None)
381
+ if not callable(get_sparse_name):
382
+ raise RuntimeError(
383
+ "FastEmbed sparse model is configured but client is missing get_sparse_vector_field_name()."
384
+ )
385
+ sparse_name = get_sparse_name()
386
+ if not isinstance(sparse_name, str) or not sparse_name:
387
+ raise RuntimeError(
388
+ "FastEmbed sparse model is configured but sparse vector field name is unavailable."
389
+ )
390
+ return sparse_name, qcm.Document(text=text, model=str(sparse_model))
391
+
392
+ @classmethod
393
+ def _fastembed_vector_payload(cls, client: object, text: str) -> dict[str, qcm.Document]:
394
+ dense_name, dense_doc = cls._fastembed_dense_spec(client, text)
395
+ vector: dict[str, qcm.Document] = {dense_name: dense_doc}
396
+ sparse_spec = cls._fastembed_sparse_spec(client, text)
397
+ if sparse_spec is not None:
398
+ sparse_name, sparse_doc = sparse_spec
399
+ vector[sparse_name] = sparse_doc
400
+ return vector
401
+
402
+
403
+ class SyncClientOps:
404
+ """
405
+ Minimal sync wrappers for the QdrantClient I/O surface used by this module.
406
+ """
407
+
408
+ def __init__(self, client: QdrantClient):
409
+ self.client = client
410
+
411
+ def get_collections_names(self) -> set[str]:
412
+ return {c.name for c in self.client.get_collections().collections}
413
+
414
+ def create_collection_dense(self, collection_name: str, *, vectors: VectorConfig) -> None:
415
+ self.client.create_collection(
416
+ collection_name=collection_name,
417
+ vectors_config={
418
+ vectors.dense_name: qmodels.VectorParams(size=int(vectors.dense_size), distance=vectors.distance)
419
+ },
420
+ )
421
+
422
+ def create_collection_sparse(self, collection_name: str, *, vectors: VectorConfig) -> None:
423
+ try:
424
+ self.client.create_collection(
425
+ collection_name=collection_name,
426
+ vectors_config={},
427
+ sparse_vectors_config={vectors.sparse_name: qmodels.SparseVectorParams()},
428
+ )
429
+ except Exception as e:
430
+ raise RuntimeError(
431
+ "Creating a sparse-only collection failed (some Qdrant setups require at least one dense vector). "
432
+ "Use embedding_mode='external_hybrid' with dense + sparse if needed."
433
+ ) from e
434
+
435
+ def create_collection_hybrid(self, collection_name: str, *, vectors: VectorConfig) -> None:
436
+ self.client.create_collection(
437
+ collection_name=collection_name,
438
+ vectors_config={
439
+ vectors.dense_name: qmodels.VectorParams(size=int(vectors.dense_size), distance=vectors.distance)
440
+ },
441
+ sparse_vectors_config={vectors.sparse_name: qmodels.SparseVectorParams()},
442
+ )
443
+
444
+ def ensure_fastembed_collection(self, collection_name: str) -> None:
445
+ existing = self.get_collections_names()
446
+ if collection_name in existing:
447
+ return
448
+
449
+ get_dense_params = getattr(self.client, "get_fastembed_vector_params", None)
450
+ if not callable(get_dense_params):
451
+ raise RuntimeError(
452
+ "FastEmbed client is missing get_fastembed_vector_params()."
453
+ )
454
+ vectors_config = get_dense_params()
455
+ if not vectors_config:
456
+ raise RuntimeError(
457
+ "FastEmbed client returned empty dense vector params."
458
+ )
459
+
460
+ get_sparse_params = getattr(self.client, "get_fastembed_sparse_vector_params", None)
461
+ sparse_vectors_config = (
462
+ get_sparse_params() if callable(get_sparse_params) else None
463
+ )
464
+
465
+ kwargs: dict = {
466
+ "collection_name": collection_name,
467
+ "vectors_config": vectors_config,
468
+ }
469
+ if sparse_vectors_config:
470
+ kwargs["sparse_vectors_config"] = sparse_vectors_config
471
+ self.client.create_collection(**kwargs)
472
+
473
+ def add_documents(self, collection_name: str, *, documents: list[str], payloads: list[dict], ids: list[str]) -> None:
474
+ self.ensure_fastembed_collection(collection_name)
475
+ points: list[qmodels.PointStruct] = []
476
+ for pid, doc, payload in zip(ids, documents, payloads, strict=True):
477
+ vector = BasePomaQdrantCore._fastembed_vector_payload(self.client, doc)
478
+ points.append(qmodels.PointStruct(id=pid, vector=vector, payload=payload))
479
+ self.upsert_points(collection_name, points=points)
480
+
481
+ def upsert_points(self, collection_name: str, *, points: list[qmodels.PointStruct]) -> None:
482
+ self.client.upsert(collection_name=collection_name, points=points)
483
+
484
+ def retrieve_ids(self, collection_name: str, *, ids: list[str]) -> set[str]:
485
+ """Return the set of point ids that exist in the collection (for insert_only write_mode)."""
486
+ if not ids:
487
+ return set()
488
+ try:
489
+ points = self.client.retrieve(collection_name=collection_name, ids=ids)
490
+ return {str(p.id) for p in points}
491
+ except Exception:
492
+ return set()
493
+
494
+ def query_text(
495
+ self,
496
+ collection_name: str,
497
+ *,
498
+ query: str,
499
+ limit: int,
500
+ query_filter: qmodels.Filter | None = None,
501
+ ) -> list[dict]:
502
+ dense_name, dense_doc = BasePomaQdrantCore._fastembed_dense_spec(
503
+ self.client, query
504
+ )
505
+ return self.query_points(
506
+ collection_name,
507
+ query=dense_doc,
508
+ using=dense_name,
509
+ limit=limit,
510
+ query_filter=query_filter,
511
+ )
512
+
513
+ def search_named_vector(
514
+ self,
515
+ collection_name: str,
516
+ *,
517
+ name: str,
518
+ vector: object,
519
+ limit: int,
520
+ query_filter: qmodels.Filter | None = None,
521
+ ) -> list[dict]:
522
+ return self.query_points(
523
+ collection_name,
524
+ query=vector,
525
+ using=name,
526
+ limit=limit,
527
+ query_filter=query_filter,
528
+ )
529
+
530
+ def query_points(
531
+ self,
532
+ collection_name: str,
533
+ *,
534
+ query: object,
535
+ using: str | None,
536
+ limit: int,
537
+ query_filter: qmodels.Filter | None = None,
538
+ ) -> list[dict]:
539
+ kwargs: dict = {
540
+ "collection_name": collection_name,
541
+ "query": query,
542
+ "limit": limit,
543
+ "with_payload": True,
544
+ }
545
+ if using is not None:
546
+ kwargs["using"] = using
547
+ try:
548
+ res = self.client.query_points(**kwargs, query_filter=query_filter)
549
+ except TypeError:
550
+ res = self.client.query_points(**kwargs, filter=query_filter)
551
+ hits = getattr(res, "points", res) if not isinstance(res, list) else res
552
+ out: list[dict] = []
553
+ for h in hits:
554
+ payload = getattr(h, "payload", None) or {}
555
+ out.append(
556
+ {
557
+ "id": str(getattr(h, "id", "") or ""),
558
+ "score": float(getattr(h, "score", 0.0) or 0.0),
559
+ "text": payload.get("text", "") if isinstance(payload, dict) else "",
560
+ "metadata": payload if isinstance(payload, dict) else {},
561
+ }
562
+ )
563
+ return out
564
+
565
+ def query_points_inference(
566
+ self,
567
+ collection_name: str,
568
+ *,
569
+ query: object,
570
+ using: str | None,
571
+ limit: int,
572
+ query_filter: qmodels.Filter | None = None,
573
+ ) -> list[dict]:
574
+ """Query using an inference object (e.g. qcm.Document)."""
575
+ return self.query_points(
576
+ collection_name,
577
+ query=query,
578
+ using=using,
579
+ limit=limit,
580
+ query_filter=query_filter,
581
+ )
582
+
583
+
584
+ class PomaQdrant(BasePomaQdrantCore):
585
+ """
586
+ Sync implementation.
587
+
588
+ embedding_mode:
589
+ - "fastembed" : upsert/query_points with models.Document
590
+ - "external_dense" : dense_embed_fn + upsert/search on named dense vector
591
+ - "external_sparse" : sparse_embed_fn + upsert/search on named sparse vector
592
+ - "external_hybrid" : both embedders + two searches + RRF fusion
593
+ - "inference_dense" : Qdrant Inference API (inference_config.dense_model)
594
+ - "inference_sparse" : Qdrant Inference API (inference_config.sparse_model)
595
+ - "inference_hybrid" : Qdrant Inference API (dense + sparse + RRF)
596
+
597
+ fastembed: Requires a client with FastEmbed integration (upsert/query_points with models.Document).
598
+ Not guaranteed in all environments; for production, pass a
599
+ pre-configured qdrant_client or use external_* mode. See _ensure_fastembed_client().
600
+
601
+ write_mode: "upsert" (default) or "insert_only" (skip points that already exist).
602
+ batch_size: chunk ingest into batches of this size (default 256).
603
+ store_chunk_details: if True (default), payload includes chunk_details so search_cheatsheets can build cheatsheets without chunk_data.
604
+ """
605
+
606
+ _INFERENCE_MODES = {"inference_dense", "inference_sparse", "inference_hybrid"}
607
+ _DENSE_PROVIDER_KEY_RULES: tuple[
608
+ tuple[tuple[str, ...], str, tuple[str, ...], str],
609
+ ...,
610
+ ] = (
611
+ (("openai/",), "openai-api-key", ("OPENAI_API_KEY",), "OpenAI"),
612
+ (("cohere/",), "cohere-api-key", ("COHERE_API_KEY",), "Cohere"),
613
+ (("jinaai/", "jina/"), "jina-api-key", ("JINA_API_KEY", "JINAAI_API_KEY"), "Jina AI"),
614
+ (("openrouter/",), "openrouter-api-key", ("OPENROUTER_API_KEY",), "OpenRouter"),
615
+ )
616
+
617
+ @staticmethod
618
+ def _resolve_cloud_inference_enabled(
619
+ qdrant_client: QdrantClient | None,
620
+ cfg: QdrantConfig,
621
+ ) -> bool:
622
+ if qdrant_client is None:
623
+ return bool(cfg.cloud_inference)
624
+ client_flag = getattr(qdrant_client, "cloud_inference", None)
625
+ if client_flag is None:
626
+ return bool(cfg.cloud_inference)
627
+ return bool(client_flag)
628
+
629
+ @staticmethod
630
+ def _copy_options_dict(options: dict | None, field_name: str) -> dict:
631
+ if options is None:
632
+ return {}
633
+ if not isinstance(options, dict):
634
+ raise ValueError(
635
+ f"InferenceConfig.{field_name} must be a dict, got {type(options).__name__}"
636
+ )
637
+ return dict(options)
638
+
639
+ @staticmethod
640
+ def _with_dense_size(vectors: VectorConfig, dense_size: int) -> VectorConfig:
641
+ return VectorConfig(
642
+ distance=vectors.distance,
643
+ dense_size=dense_size,
644
+ sparse_name=vectors.sparse_name,
645
+ dense_name=vectors.dense_name,
646
+ )
647
+
648
+ @staticmethod
649
+ def _copy_client_kwargs(client_kwargs: dict[str, object] | None) -> dict[str, object]:
650
+ if client_kwargs is None:
651
+ return {}
652
+ if not isinstance(client_kwargs, dict):
653
+ raise ValueError(
654
+ f"qdrant.client_kwargs must be a dict, got {type(client_kwargs).__name__}"
655
+ )
656
+ return dict(client_kwargs)
657
+
658
+ @staticmethod
659
+ def _validate_client_kwargs_conflicts(mode: str, client_kwargs: dict[str, object]) -> None:
660
+ blocked_by_mode = {
661
+ "url": {"url", "api_key", "cloud_inference"},
662
+ "path": {"path"},
663
+ "memory": {"location"},
664
+ }
665
+ blocked = blocked_by_mode.get(mode, set())
666
+ conflicts = sorted(k for k in client_kwargs if k in blocked)
667
+ if not conflicts:
668
+ return
669
+ joined = ", ".join(conflicts)
670
+ raise ValueError(
671
+ f'qdrant.client_kwargs for mode="{mode}" cannot include: {joined}. '
672
+ "Set these values via QdrantConfig fields instead."
673
+ )
674
+
675
+ def _apply_dense_provider_api_key_defaults(
676
+ self,
677
+ *,
678
+ dense_model: str,
679
+ dense_options: dict,
680
+ ) -> None:
681
+ model_name = dense_model.strip().lower()
682
+ for prefixes, option_key, env_names, provider_name in self._DENSE_PROVIDER_KEY_RULES:
683
+ if not any(model_name.startswith(prefix) for prefix in prefixes):
684
+ continue
685
+ # Explicit user config always wins.
686
+ if option_key in dense_options:
687
+ return
688
+ for env_name in env_names:
689
+ env_api_key = os.getenv(env_name)
690
+ if env_api_key:
691
+ dense_options[option_key] = env_api_key
692
+ return
693
+ env_hint = " or ".join(env_names)
694
+ raise ValueError(
695
+ f"cloud_inference=True with {provider_name} dense inference requires an API key. "
696
+ f"Set InferenceConfig.dense_options['{option_key}'] or {env_hint}."
697
+ )
698
+ # Unknown provider prefix: leave unchanged.
699
+
700
+ def _apply_cloud_inference_defaults(self) -> None:
701
+ dense_mode = self.embedding_mode in {"inference_dense", "inference_hybrid"}
702
+ sparse_mode = self.embedding_mode in {"inference_sparse", "inference_hybrid"}
703
+
704
+ cfg = self.inference_config or InferenceConfig()
705
+ dense_model = cfg.dense_model
706
+ sparse_model = cfg.sparse_model
707
+ dense_options = self._copy_options_dict(cfg.dense_options, "dense_options")
708
+ sparse_options = self._copy_options_dict(cfg.sparse_options, "sparse_options")
709
+
710
+ if dense_mode and not dense_model:
711
+ dense_model = DEFAULT_CLOUD_DENSE_MODEL
712
+
713
+ if sparse_mode and not sparse_model:
714
+ sparse_model = DEFAULT_CLOUD_SPARSE_MODEL
715
+
716
+ normalized_vector_dense_size: int | None = None
717
+ if dense_mode and self.vectors.dense_size is not None:
718
+ try:
719
+ normalized_vector_dense_size = int(self.vectors.dense_size)
720
+ except (TypeError, ValueError) as e:
721
+ raise ValueError("vectors.dense_size must be an integer") from e
722
+ if normalized_vector_dense_size <= 0:
723
+ raise ValueError("vectors.dense_size must be > 0")
724
+ if normalized_vector_dense_size != self.vectors.dense_size:
725
+ self.vectors = self._with_dense_size(
726
+ self.vectors, normalized_vector_dense_size
727
+ )
728
+
729
+ normalized_dense_dim: int | None = None
730
+ if dense_mode and dense_model:
731
+ dense_dim_raw = dense_options.get("dimensions")
732
+ if dense_dim_raw is not None:
733
+ try:
734
+ normalized_dense_dim = int(dense_dim_raw)
735
+ except (TypeError, ValueError) as e:
736
+ raise ValueError(
737
+ "InferenceConfig.dense_options['dimensions'] must be an integer"
738
+ ) from e
739
+ if normalized_dense_dim <= 0:
740
+ raise ValueError(
741
+ "InferenceConfig.dense_options['dimensions'] must be > 0"
742
+ )
743
+ dense_options["dimensions"] = normalized_dense_dim
744
+
745
+ # Allow setting dimension in exactly one place (vectors OR dense_options).
746
+ if (
747
+ dense_model == DEFAULT_CLOUD_DENSE_MODEL
748
+ and normalized_dense_dim is None
749
+ and normalized_vector_dense_size is None
750
+ ):
751
+ normalized_dense_dim = DEFAULT_CLOUD_DENSE_DIMENSIONS
752
+ dense_options["dimensions"] = normalized_dense_dim
753
+
754
+ if normalized_dense_dim is None and normalized_vector_dense_size is not None:
755
+ normalized_dense_dim = normalized_vector_dense_size
756
+ if dense_model.startswith("openai/"):
757
+ dense_options["dimensions"] = normalized_dense_dim
758
+ elif normalized_dense_dim is not None and normalized_vector_dense_size is None:
759
+ self.vectors = self._with_dense_size(
760
+ self.vectors, normalized_dense_dim
761
+ )
762
+ elif (
763
+ normalized_dense_dim is not None
764
+ and normalized_vector_dense_size is not None
765
+ and normalized_dense_dim != normalized_vector_dense_size
766
+ ):
767
+ raise ValueError(
768
+ "vectors.dense_size must match InferenceConfig.dense_options['dimensions']: "
769
+ f"{normalized_vector_dense_size} != {normalized_dense_dim}"
770
+ )
771
+
772
+ self._apply_dense_provider_api_key_defaults(
773
+ dense_model=dense_model,
774
+ dense_options=dense_options,
775
+ )
776
+
777
+ self.inference_config = InferenceConfig(
778
+ dense_model=dense_model,
779
+ sparse_model=sparse_model,
780
+ dense_options=dense_options or None,
781
+ sparse_options=sparse_options or None,
782
+ )
783
+
784
+ def _raise_qdrant_response(
785
+ self,
786
+ exc: UnexpectedResponse,
787
+ *,
788
+ operation: str,
789
+ ) -> None:
790
+ content = exc.content.decode("utf-8", errors="replace")
791
+ message = f"Qdrant response error during `{operation}`: {exc}"
792
+ lowered = f"{exc}\n{content}".lower()
793
+ if self.cloud_inference and (
794
+ "endpoint configuration not found for model" in lowered
795
+ or "inference can't process request" in lowered
796
+ ):
797
+ message = (
798
+ f"{message}. The model/provider is not configured for this Qdrant Cloud cluster "
799
+ "(check the cluster Inference tab)."
800
+ )
801
+ raise QdrantResponseError(
802
+ message,
803
+ status_code=exc.status_code,
804
+ reason_phrase=exc.reason_phrase,
805
+ raw_content=content,
806
+ ) from exc
807
+
808
+ def __init__(
809
+ self,
810
+ collection_name: str = "poma_documents",
811
+ *,
812
+ qdrant: QdrantConfig | None = None,
813
+ qdrant_client: QdrantClient | None = None,
814
+ embedding_mode: str | None = None,
815
+ dense_embed_fn: DenseEmbedSync | None = None,
816
+ sparse_embed_fn: SparseEmbedSync | None = None,
817
+ vectors: VectorConfig | None = None,
818
+ inference_config: InferenceConfig | None = None,
819
+ fusion_k: int = 60,
820
+ fusion_candidates: int = 50,
821
+ fusion_weights: tuple[float, float] | None = None,
822
+ write_mode: str = "upsert",
823
+ batch_size: int = 256,
824
+ store_chunk_details: bool = True,
825
+ ):
826
+ self.collection_name = collection_name
827
+ cfg = qdrant or QdrantConfig()
828
+ self.cloud_inference = self._resolve_cloud_inference_enabled(qdrant_client, cfg)
829
+ mode_text = (embedding_mode or "").strip().lower()
830
+ if mode_text:
831
+ self.embedding_mode = mode_text
832
+ else:
833
+ # Strong default: cloud inference uses inference_dense; otherwise keep fastembed.
834
+ self.embedding_mode = "inference_dense" if self.cloud_inference else "fastembed"
835
+ self.dense_embed_fn = dense_embed_fn
836
+ self.sparse_embed_fn = sparse_embed_fn
837
+ self.vectors = vectors or VectorConfig()
838
+ self.inference_config = inference_config
839
+ self.fusion_k = max(1, int(fusion_k))
840
+ self.fusion_candidates = max(1, int(fusion_candidates))
841
+ self.fusion_weights = fusion_weights # (dense_weight, sparse_weight) for hybrid; None = (1, 1)
842
+ self.write_mode = (write_mode or "upsert").strip().lower()
843
+ self.batch_size = max(1, int(batch_size))
844
+ self.store_chunk_details = bool(store_chunk_details)
845
+ if self.write_mode not in ("upsert", "insert_only"):
846
+ raise ValueError('write_mode must be one of: "upsert", "insert_only"')
847
+
848
+ allowed = {
849
+ "fastembed", "external_dense", "external_sparse", "external_hybrid",
850
+ "inference_dense", "inference_sparse", "inference_hybrid",
851
+ }
852
+ if self.embedding_mode not in allowed:
853
+ raise ValueError(f"embedding_mode must be one of: {sorted(allowed)}")
854
+
855
+ if self.cloud_inference and self.embedding_mode == "fastembed":
856
+ raise ValueError(
857
+ "cloud_inference=True is not supported with embedding_mode='fastembed'. "
858
+ "Use cloud_inference=False for local FastEmbed or switch to an inference_* mode "
859
+ "with InferenceConfig."
860
+ )
861
+ if self.cloud_inference and self.embedding_mode in self._INFERENCE_MODES:
862
+ self._apply_cloud_inference_defaults()
863
+
864
+ # Fail-fast: validate external_* requirements at init
865
+ if self.embedding_mode == "external_dense":
866
+ if self.dense_embed_fn is None:
867
+ raise ValueError('embedding_mode="external_dense" requires dense_embed_fn')
868
+ self._validate_dense_size(self.vectors.dense_size, "external_dense")
869
+ elif self.embedding_mode == "external_sparse":
870
+ if self.sparse_embed_fn is None:
871
+ raise ValueError('embedding_mode="external_sparse" requires sparse_embed_fn')
872
+ elif self.embedding_mode == "external_hybrid":
873
+ if self.dense_embed_fn is None or self.sparse_embed_fn is None:
874
+ raise ValueError('embedding_mode="external_hybrid" requires dense_embed_fn AND sparse_embed_fn')
875
+ self._validate_dense_size(self.vectors.dense_size, "external_hybrid")
876
+ elif self.embedding_mode == "inference_dense":
877
+ if self.inference_config is None:
878
+ raise ValueError('embedding_mode="inference_dense" requires inference_config')
879
+ self.inference_config.dense_document("") # validate dense_model set
880
+ self._validate_dense_size(self.vectors.dense_size, "inference_dense")
881
+ elif self.embedding_mode == "inference_sparse":
882
+ if self.inference_config is None:
883
+ raise ValueError('embedding_mode="inference_sparse" requires inference_config')
884
+ self.inference_config.sparse_document("") # validate sparse_model set
885
+ elif self.embedding_mode == "inference_hybrid":
886
+ if self.inference_config is None:
887
+ raise ValueError('embedding_mode="inference_hybrid" requires inference_config')
888
+ self.inference_config.dense_document("")
889
+ self.inference_config.sparse_document("")
890
+ self._validate_dense_size(self.vectors.dense_size, "inference_hybrid")
891
+
892
+ if qdrant_client is not None:
893
+ self.client = qdrant_client
894
+ else:
895
+ mode = (cfg.mode or "").strip().lower()
896
+ client_kwargs = self._copy_client_kwargs(cfg.client_kwargs)
897
+ if mode == "url":
898
+ self._validate_client_kwargs_conflicts(mode, client_kwargs)
899
+ if not cfg.url:
900
+ raise ValueError('qdrant.mode="url" requires qdrant.url="http(s)://..."')
901
+ self.client = QdrantClient(
902
+ url=cfg.url,
903
+ api_key=cfg.api_key,
904
+ cloud_inference=cfg.cloud_inference,
905
+ **client_kwargs,
906
+ )
907
+ elif mode == "path":
908
+ self._validate_client_kwargs_conflicts(mode, client_kwargs)
909
+ if not cfg.path:
910
+ raise ValueError('qdrant.mode="path" requires qdrant.path="..."')
911
+ self.client = QdrantClient(path=cfg.path, **client_kwargs)
912
+ elif mode == "memory":
913
+ self._validate_client_kwargs_conflicts(mode, client_kwargs)
914
+ self.client = QdrantClient(location=":memory:", **client_kwargs)
915
+ else:
916
+ raise ValueError('qdrant.mode must be one of: "memory", "path", "url"')
917
+
918
+ self.ops = SyncClientOps(self.client)
919
+ self._ensure_collection_if_needed_sync()
920
+
921
+ def _ensure_fastembed_client(self) -> None:
922
+ """Raise with a clear message if fastembed mode is used but client lacks FastEmbed Document support."""
923
+ query_points_fn = getattr(self.client, "query_points", None)
924
+ upsert_fn = getattr(self.client, "upsert", None)
925
+ get_dense_name_fn = getattr(self.client, "get_vector_field_name", None)
926
+ get_dense_params_fn = getattr(self.client, "get_fastembed_vector_params", None)
927
+ if (
928
+ not callable(query_points_fn)
929
+ or not callable(upsert_fn)
930
+ or not callable(get_dense_name_fn)
931
+ or not callable(get_dense_params_fn)
932
+ ):
933
+ raise RuntimeError(
934
+ "embedding_mode='fastembed' requires a Qdrant client with FastEmbed integration "
935
+ "(client.query_points(..., query=models.Document(...)) and client.upsert(...)). "
936
+ "Pass a pre-configured client or use embedding_mode='external_dense' (or external_sparse/external_hybrid) "
937
+ "with your own embedder."
938
+ )
939
+ dense_model = getattr(self.client, "embedding_model_name", None)
940
+ if not isinstance(dense_model, str) or not dense_model:
941
+ raise RuntimeError(
942
+ "embedding_mode='fastembed' requires a valid embedding_model_name on the client."
943
+ )
944
+ try:
945
+ sig_query_points = inspect.signature(query_points_fn)
946
+ sig_upsert = inspect.signature(upsert_fn)
947
+ except (ValueError, TypeError):
948
+ raise RuntimeError(
949
+ "embedding_mode='fastembed': client query_points/upsert could not be inspected. "
950
+ "Use a Qdrant client with FastEmbed integration or embedding_mode='external_dense'."
951
+ )
952
+ if "query" not in sig_query_points.parameters:
953
+ raise RuntimeError(
954
+ "embedding_mode='fastembed' requires client.query_points(..., query=..., ...). "
955
+ "This client's query_points() does not accept 'query'. Use a FastEmbed-enabled client or external_* mode."
956
+ )
957
+ if "points" not in sig_upsert.parameters:
958
+ raise RuntimeError(
959
+ "embedding_mode='fastembed' requires client.upsert(..., points=...). "
960
+ "This client's upsert() does not accept 'points'. Use a FastEmbed-enabled client or external_* mode."
961
+ )
962
+
963
+ def _ensure_collection_if_needed_sync(self) -> None:
964
+ existing = self.ops.get_collections_names()
965
+ if self.collection_name in existing:
966
+ return
967
+
968
+ if self.embedding_mode in ("external_dense", "inference_dense"):
969
+ self.ops.create_collection_dense(self.collection_name, vectors=self.vectors)
970
+ elif self.embedding_mode in ("external_sparse", "inference_sparse"):
971
+ self.ops.create_collection_sparse(self.collection_name, vectors=self.vectors)
972
+ elif self.embedding_mode in ("external_hybrid", "inference_hybrid"):
973
+ self.ops.create_collection_hybrid(self.collection_name, vectors=self.vectors)
974
+ # fastembed: created lazily in SyncClientOps.add_documents().
975
+
976
+ def _filter_new_ids_sync(self, ids: list[str]) -> tuple[list[str], list[int]]:
977
+ """For insert_only: return (ids that are new, indices into original list)."""
978
+ existing = self.ops.retrieve_ids(self.collection_name, ids=ids)
979
+ new_ids = [pid for pid in ids if pid not in existing]
980
+ new_set = set(new_ids)
981
+ indices = [i for i, pid in enumerate(ids) if pid in new_set]
982
+ return new_ids, indices
983
+
984
+ def ingest(
985
+ self,
986
+ chunk_data: dict | str | os.PathLike[str],
987
+ ) -> None:
988
+ """
989
+ Ingest chunk data into the collection.
990
+ chunk_data: either a dict with ``chunks`` and ``chunksets`` keys,
991
+ or a path to a ``.poma`` file (will be extracted first).
992
+ """
993
+ try:
994
+ chunk_data = _chunk_data_from_input(chunk_data)
995
+ ids, documents, payloads = self.prepare_points_from_chunk_data(
996
+ chunk_data, store_chunk_details=self.store_chunk_details
997
+ )
998
+ if not documents:
999
+ return
1000
+
1001
+ if self.embedding_mode != "fastembed":
1002
+ self._ensure_collection_if_needed_sync()
1003
+
1004
+ if self.write_mode == "insert_only" and ids:
1005
+ new_ids, indices = self._filter_new_ids_sync(ids)
1006
+ if not new_ids:
1007
+ return
1008
+ ids = new_ids
1009
+ documents = [documents[i] for i in indices]
1010
+ payloads = [payloads[i] for i in indices]
1011
+
1012
+ if self.embedding_mode == "fastembed":
1013
+ self._ensure_fastembed_client()
1014
+ for start in range(0, len(ids), self.batch_size):
1015
+ end = start + self.batch_size
1016
+ self.ops.add_documents(
1017
+ self.collection_name,
1018
+ documents=documents[start:end],
1019
+ payloads=payloads[start:end],
1020
+ ids=ids[start:end],
1021
+ )
1022
+ return
1023
+
1024
+ if self.embedding_mode == "external_dense":
1025
+ dense_vecs = self.dense_embed_fn(documents)
1026
+ if len(dense_vecs) != len(documents):
1027
+ raise ValueError("dense_embed_fn returned a different number of vectors than documents")
1028
+ self._validate_dense_vectors(dense_vecs, int(self.vectors.dense_size), "dense_embed_fn")
1029
+ points = [
1030
+ qmodels.PointStruct(id=pid, vector={self.vectors.dense_name: vec}, payload=payload)
1031
+ for pid, vec, payload in zip(ids, dense_vecs, payloads, strict=True)
1032
+ ]
1033
+ for start in range(0, len(points), self.batch_size):
1034
+ self.ops.upsert_points(self.collection_name, points=points[start : start + self.batch_size])
1035
+ return
1036
+
1037
+ if self.embedding_mode == "external_sparse":
1038
+ sparse_items = self.sparse_embed_fn(documents)
1039
+ if len(sparse_items) != len(documents):
1040
+ raise ValueError("sparse_embed_fn returned a different number of vectors than documents")
1041
+ points: list[qmodels.PointStruct] = []
1042
+ for pid, item, payload in zip(ids, sparse_items, payloads, strict=True):
1043
+ svec = self._to_sparse_vector(item)
1044
+ points.append(
1045
+ qmodels.PointStruct(id=pid, vector={self.vectors.sparse_name: svec}, payload=payload)
1046
+ )
1047
+ for start in range(0, len(points), self.batch_size):
1048
+ self.ops.upsert_points(self.collection_name, points=points[start : start + self.batch_size])
1049
+ return
1050
+
1051
+ if self.embedding_mode == "inference_dense":
1052
+ points_inf = [
1053
+ qmodels.PointStruct(
1054
+ id=pid,
1055
+ vector={self.vectors.dense_name: self.inference_config.dense_document(doc)},
1056
+ payload=payload,
1057
+ )
1058
+ for pid, doc, payload in zip(ids, documents, payloads, strict=True)
1059
+ ]
1060
+ for start in range(0, len(points_inf), self.batch_size):
1061
+ self.ops.upsert_points(self.collection_name, points=points_inf[start : start + self.batch_size])
1062
+ return
1063
+
1064
+ if self.embedding_mode == "inference_sparse":
1065
+ points_inf = [
1066
+ qmodels.PointStruct(
1067
+ id=pid,
1068
+ vector={self.vectors.sparse_name: self.inference_config.sparse_document(doc)},
1069
+ payload=payload,
1070
+ )
1071
+ for pid, doc, payload in zip(ids, documents, payloads, strict=True)
1072
+ ]
1073
+ for start in range(0, len(points_inf), self.batch_size):
1074
+ self.ops.upsert_points(self.collection_name, points=points_inf[start : start + self.batch_size])
1075
+ return
1076
+
1077
+ if self.embedding_mode == "inference_hybrid":
1078
+ points_inf = [
1079
+ qmodels.PointStruct(
1080
+ id=pid,
1081
+ vector={
1082
+ self.vectors.dense_name: self.inference_config.dense_document(doc),
1083
+ self.vectors.sparse_name: self.inference_config.sparse_document(doc),
1084
+ },
1085
+ payload=payload,
1086
+ )
1087
+ for pid, doc, payload in zip(ids, documents, payloads, strict=True)
1088
+ ]
1089
+ for start in range(0, len(points_inf), self.batch_size):
1090
+ self.ops.upsert_points(self.collection_name, points=points_inf[start : start + self.batch_size])
1091
+ return
1092
+
1093
+ # external_hybrid
1094
+ dense_vecs = self.dense_embed_fn(documents)
1095
+ sparse_items = self.sparse_embed_fn(documents)
1096
+ if len(dense_vecs) != len(documents) or len(sparse_items) != len(documents):
1097
+ raise ValueError("Hybrid embedders returned a different number of vectors than documents")
1098
+ self._validate_dense_vectors(dense_vecs, int(self.vectors.dense_size), "dense_embed_fn")
1099
+ points_list: list[qmodels.PointStruct] = []
1100
+ for pid, dvec, sitem, payload in zip(ids, dense_vecs, sparse_items, payloads, strict=True):
1101
+ svec = self._to_sparse_vector(sitem)
1102
+ points_list.append(
1103
+ qmodels.PointStruct(
1104
+ id=pid,
1105
+ vector={self.vectors.dense_name: dvec, self.vectors.sparse_name: svec},
1106
+ payload=payload,
1107
+ )
1108
+ )
1109
+ for start in range(0, len(points_list), self.batch_size):
1110
+ self.ops.upsert_points(self.collection_name, points=points_list[start : start + self.batch_size])
1111
+ except UnexpectedResponse as exc:
1112
+ self._raise_qdrant_response(exc, operation="ingest")
1113
+
1114
+ def search(
1115
+ self,
1116
+ query: str,
1117
+ *,
1118
+ limit: int = 5,
1119
+ query_filter: qmodels.Filter | None = None,
1120
+ ) -> list[dict]:
1121
+ """Search; returns list[dict] with keys id, score, text, metadata (same shape as SearchResult)."""
1122
+ limit = max(1, int(limit))
1123
+ try:
1124
+ if self.embedding_mode == "fastembed":
1125
+ self._ensure_fastembed_client()
1126
+ dense_name, dense_doc = self._fastembed_dense_spec(self.client, query)
1127
+ sparse_spec = self._fastembed_sparse_spec(self.client, query)
1128
+ if sparse_spec is None:
1129
+ return self.ops.query_points(
1130
+ self.collection_name,
1131
+ query=dense_doc,
1132
+ using=dense_name,
1133
+ limit=limit,
1134
+ query_filter=query_filter,
1135
+ )
1136
+
1137
+ cand = max(limit, self.fusion_candidates)
1138
+ sparse_name, sparse_doc = sparse_spec
1139
+ dense_hits = self.ops.query_points(
1140
+ self.collection_name,
1141
+ query=dense_doc,
1142
+ using=dense_name,
1143
+ limit=cand,
1144
+ query_filter=query_filter,
1145
+ )
1146
+ sparse_hits = self.ops.query_points(
1147
+ self.collection_name,
1148
+ query=sparse_doc,
1149
+ using=sparse_name,
1150
+ limit=cand,
1151
+ query_filter=query_filter,
1152
+ )
1153
+ dense_ids = [str(h["id"]) for h in dense_hits]
1154
+ sparse_ids = [str(h["id"]) for h in sparse_hits]
1155
+ fused = self.rrf_fuse([dense_ids, sparse_ids], k=self.fusion_k, weights=None)
1156
+ payload_by_id: dict[str, dict] = {}
1157
+ for h in dense_hits + sparse_hits:
1158
+ payload_by_id.setdefault(str(h["id"]), h.get("metadata", {}) or {})
1159
+ ranked = sorted(fused.items(), key=lambda kv: kv[1], reverse=True)[:limit]
1160
+ return [
1161
+ {
1162
+ "id": pid,
1163
+ "score": score,
1164
+ "text": payload_by_id.get(pid, {}).get("text", ""),
1165
+ "metadata": payload_by_id.get(pid, {}) or {},
1166
+ }
1167
+ for pid, score in ranked
1168
+ ]
1169
+
1170
+ self._ensure_collection_if_needed_sync()
1171
+
1172
+ if self.embedding_mode == "external_dense":
1173
+ qvec = self.dense_embed_fn([query])[0]
1174
+ return self.ops.search_named_vector(
1175
+ self.collection_name,
1176
+ name=self.vectors.dense_name,
1177
+ vector=qvec,
1178
+ limit=limit,
1179
+ query_filter=query_filter,
1180
+ )
1181
+
1182
+ if self.embedding_mode == "external_sparse":
1183
+ sitem = self.sparse_embed_fn([query])[0]
1184
+ svec = self._to_sparse_vector(sitem)
1185
+ return self.ops.search_named_vector(
1186
+ self.collection_name,
1187
+ name=self.vectors.sparse_name,
1188
+ vector=svec,
1189
+ limit=limit,
1190
+ query_filter=query_filter,
1191
+ )
1192
+
1193
+ if self.embedding_mode == "inference_dense":
1194
+ query_doc = self.inference_config.dense_document(query)
1195
+ return self.ops.query_points_inference(
1196
+ self.collection_name,
1197
+ query=query_doc,
1198
+ using=self.vectors.dense_name,
1199
+ limit=limit,
1200
+ query_filter=query_filter,
1201
+ )
1202
+
1203
+ if self.embedding_mode == "inference_sparse":
1204
+ query_doc = self.inference_config.sparse_document(query)
1205
+ return self.ops.query_points_inference(
1206
+ self.collection_name,
1207
+ query=query_doc,
1208
+ using=self.vectors.sparse_name,
1209
+ limit=limit,
1210
+ query_filter=query_filter,
1211
+ )
1212
+
1213
+ if self.embedding_mode == "inference_hybrid":
1214
+ cand = max(limit, self.fusion_candidates)
1215
+ weights = list(self.fusion_weights) if self.fusion_weights else None
1216
+ dense_doc = self.inference_config.dense_document(query)
1217
+ sparse_doc = self.inference_config.sparse_document(query)
1218
+ dense_hits = self.ops.query_points_inference(
1219
+ self.collection_name,
1220
+ query=dense_doc,
1221
+ using=self.vectors.dense_name,
1222
+ limit=cand,
1223
+ query_filter=query_filter,
1224
+ )
1225
+ sparse_hits = self.ops.query_points_inference(
1226
+ self.collection_name,
1227
+ query=sparse_doc,
1228
+ using=self.vectors.sparse_name,
1229
+ limit=cand,
1230
+ query_filter=query_filter,
1231
+ )
1232
+ dense_ids = [str(h["id"]) for h in dense_hits]
1233
+ sparse_ids = [str(h["id"]) for h in sparse_hits]
1234
+ fused = self.rrf_fuse([dense_ids, sparse_ids], k=self.fusion_k, weights=weights)
1235
+ payload_by_id: dict[str, dict] = {}
1236
+ for h in dense_hits + sparse_hits:
1237
+ payload_by_id.setdefault(str(h["id"]), h.get("metadata", {}) or {})
1238
+ ranked = sorted(fused.items(), key=lambda kv: kv[1], reverse=True)[:limit]
1239
+ return [
1240
+ {
1241
+ "id": pid,
1242
+ "score": score,
1243
+ "text": payload_by_id.get(pid, {}).get("text", ""),
1244
+ "metadata": payload_by_id.get(pid, {}) or {},
1245
+ }
1246
+ for pid, score in ranked
1247
+ ]
1248
+
1249
+ # external_hybrid: two searches + weighted RRF fusion
1250
+ cand = max(limit, self.fusion_candidates)
1251
+ weights = list(self.fusion_weights) if self.fusion_weights else None
1252
+
1253
+ dense_q = self.dense_embed_fn([query])[0]
1254
+ sparse_item = self.sparse_embed_fn([query])[0]
1255
+ sparse_q = self._to_sparse_vector(sparse_item)
1256
+
1257
+ dense_hits = self.ops.search_named_vector(
1258
+ self.collection_name,
1259
+ name=self.vectors.dense_name,
1260
+ vector=dense_q,
1261
+ limit=cand,
1262
+ query_filter=query_filter,
1263
+ )
1264
+ sparse_hits = self.ops.search_named_vector(
1265
+ self.collection_name,
1266
+ name=self.vectors.sparse_name,
1267
+ vector=sparse_q,
1268
+ limit=cand,
1269
+ query_filter=query_filter,
1270
+ )
1271
+
1272
+ dense_ids = [str(h["id"]) for h in dense_hits]
1273
+ sparse_ids = [str(h["id"]) for h in sparse_hits]
1274
+ fused = self.rrf_fuse([dense_ids, sparse_ids], k=self.fusion_k, weights=weights)
1275
+
1276
+ payload_by_id: dict[str, dict] = {}
1277
+ for h in dense_hits + sparse_hits:
1278
+ payload_by_id.setdefault(str(h["id"]), h.get("metadata", {}) or {})
1279
+
1280
+ ranked = sorted(fused.items(), key=lambda kv: kv[1], reverse=True)[:limit]
1281
+ return [
1282
+ {
1283
+ "id": pid,
1284
+ "score": score,
1285
+ "text": payload_by_id.get(pid, {}).get("text", ""),
1286
+ "metadata": payload_by_id.get(pid, {}) or {},
1287
+ }
1288
+ for pid, score in ranked
1289
+ ]
1290
+ except UnexpectedResponse as exc:
1291
+ self._raise_qdrant_response(exc, operation="search")
1292
+
1293
+ def close(self) -> None:
1294
+ """Close the Qdrant client connection (e.g. HTTP session). No-op if client has no close."""
1295
+ if hasattr(self.client, "close") and callable(self.client.close):
1296
+ self.client.close()
1297
+
1298
+ def search_cheatsheets(
1299
+ self,
1300
+ query: str,
1301
+ *,
1302
+ limit: int = 5,
1303
+ chunk_data: dict | None = None,
1304
+ query_filter: qmodels.Filter | None = None,
1305
+ ) -> list[dict]:
1306
+ results = self.search(query, limit=limit, query_filter=query_filter)
1307
+ if not results:
1308
+ return []
1309
+ relevant_chunksets, all_chunks = self.results_to_cheatsheet_inputs(results)
1310
+ if chunk_data:
1311
+ all_chunks = chunk_data.get("chunks", []) or []
1312
+ return generate_cheatsheets(relevant_chunksets, all_chunks) or []
1313
+
1314
+ # Legacy compatibility
1315
+ def store_chunksets(self, chunksets: list[dict], chunks: list[dict], *, file_id: str | None = None) -> None:
1316
+ self.ingest({"chunksets": chunksets, "chunks": chunks})
1317
+
1318
+ def write(self, chunk_data: dict, *, file_id: str | None = None) -> None:
1319
+ self.ingest(chunk_data)
1320
+
1321
+ def query_chunksets(self, query: str, *, top_k: int = 5) -> list[dict]:
1322
+ return [r["metadata"] for r in self.search(query, limit=top_k)]
1323
+
1324
+ def query_and_generate_cheatsheet(self, query: str, *, top_k: int = 5, all_chunks: list[dict] | None = None):
1325
+ chunk_data = {"chunks": all_chunks} if all_chunks else None
1326
+ return self.search_cheatsheets(query, limit=top_k, chunk_data=chunk_data)