qdrant-haystack 9.1.1__py3-none-any.whl → 10.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- haystack_integrations/components/retrievers/py.typed +0 -0
- haystack_integrations/components/retrievers/qdrant/retriever.py +158 -87
- haystack_integrations/document_stores/py.typed +0 -0
- haystack_integrations/document_stores/qdrant/converters.py +13 -12
- haystack_integrations/document_stores/qdrant/document_store.py +945 -171
- haystack_integrations/document_stores/qdrant/filters.py +87 -168
- haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +11 -7
- {qdrant_haystack-9.1.1.dist-info → qdrant_haystack-10.2.0.dist-info}/METADATA +9 -25
- qdrant_haystack-10.2.0.dist-info/RECORD +13 -0
- {qdrant_haystack-9.1.1.dist-info → qdrant_haystack-10.2.0.dist-info}/WHEEL +1 -1
- qdrant_haystack-9.1.1.dist-info/RECORD +0 -11
- {qdrant_haystack-9.1.1.dist-info → qdrant_haystack-10.2.0.dist-info}/licenses/LICENSE.txt +0 -0
|
File without changes
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
from haystack import Document, component, default_from_dict, default_to_dict
|
|
4
4
|
from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
@@ -8,6 +8,11 @@ from qdrant_client.http import models
|
|
|
8
8
|
|
|
9
9
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
10
10
|
|
|
11
|
+
FILTER_POLICY_MERGE_ERROR_MESSAGE = (
|
|
12
|
+
"Native Qdrant filters cannot be used with filter_policy set to MERGE. "
|
|
13
|
+
"Set filter_policy to REPLACE or use Haystack filters instead."
|
|
14
|
+
)
|
|
15
|
+
|
|
11
16
|
|
|
12
17
|
@component
|
|
13
18
|
class QdrantEmbeddingRetriever:
|
|
@@ -38,15 +43,15 @@ class QdrantEmbeddingRetriever:
|
|
|
38
43
|
def __init__(
|
|
39
44
|
self,
|
|
40
45
|
document_store: QdrantDocumentStore,
|
|
41
|
-
filters:
|
|
46
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
42
47
|
top_k: int = 10,
|
|
43
48
|
scale_score: bool = False,
|
|
44
49
|
return_embedding: bool = False,
|
|
45
|
-
filter_policy:
|
|
46
|
-
score_threshold:
|
|
47
|
-
group_by:
|
|
48
|
-
group_size:
|
|
49
|
-
):
|
|
50
|
+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
|
|
51
|
+
score_threshold: float | None = None,
|
|
52
|
+
group_by: str | None = None,
|
|
53
|
+
group_size: int | None = None,
|
|
54
|
+
) -> None:
|
|
50
55
|
"""
|
|
51
56
|
Create a QdrantEmbeddingRetriever component.
|
|
52
57
|
|
|
@@ -84,7 +89,7 @@ class QdrantEmbeddingRetriever:
|
|
|
84
89
|
self._group_by = group_by
|
|
85
90
|
self._group_size = group_size
|
|
86
91
|
|
|
87
|
-
def to_dict(self) ->
|
|
92
|
+
def to_dict(self) -> dict[str, Any]:
|
|
88
93
|
"""
|
|
89
94
|
Serializes the component to a dictionary.
|
|
90
95
|
|
|
@@ -108,7 +113,7 @@ class QdrantEmbeddingRetriever:
|
|
|
108
113
|
return d
|
|
109
114
|
|
|
110
115
|
@classmethod
|
|
111
|
-
def from_dict(cls, data:
|
|
116
|
+
def from_dict(cls, data: dict[str, Any]) -> "QdrantEmbeddingRetriever":
|
|
112
117
|
"""
|
|
113
118
|
Deserializes the component from a dictionary.
|
|
114
119
|
|
|
@@ -125,18 +130,18 @@ class QdrantEmbeddingRetriever:
|
|
|
125
130
|
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
126
131
|
return default_from_dict(cls, data)
|
|
127
132
|
|
|
128
|
-
@component.output_types(documents=
|
|
133
|
+
@component.output_types(documents=list[Document])
|
|
129
134
|
def run(
|
|
130
135
|
self,
|
|
131
|
-
query_embedding:
|
|
132
|
-
filters:
|
|
133
|
-
top_k:
|
|
134
|
-
scale_score:
|
|
135
|
-
return_embedding:
|
|
136
|
-
score_threshold:
|
|
137
|
-
group_by:
|
|
138
|
-
group_size:
|
|
139
|
-
):
|
|
136
|
+
query_embedding: list[float],
|
|
137
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
138
|
+
top_k: int | None = None,
|
|
139
|
+
scale_score: bool | None = None,
|
|
140
|
+
return_embedding: bool | None = None,
|
|
141
|
+
score_threshold: float | None = None,
|
|
142
|
+
group_by: str | None = None,
|
|
143
|
+
group_size: int | None = None,
|
|
144
|
+
) -> dict[str, list[Document]]:
|
|
140
145
|
"""
|
|
141
146
|
Run the Embedding Retriever on the given input data.
|
|
142
147
|
|
|
@@ -153,8 +158,19 @@ class QdrantEmbeddingRetriever:
|
|
|
153
158
|
:returns:
|
|
154
159
|
The retrieved documents.
|
|
155
160
|
|
|
161
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
156
162
|
"""
|
|
157
|
-
|
|
163
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
164
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
165
|
+
):
|
|
166
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
167
|
+
|
|
168
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
169
|
+
filters = apply_filter_policy(
|
|
170
|
+
filter_policy=self._filter_policy,
|
|
171
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
172
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
173
|
+
)
|
|
158
174
|
|
|
159
175
|
docs = self._document_store._query_by_embedding(
|
|
160
176
|
query_embedding=query_embedding,
|
|
@@ -169,18 +185,18 @@ class QdrantEmbeddingRetriever:
|
|
|
169
185
|
|
|
170
186
|
return {"documents": docs}
|
|
171
187
|
|
|
172
|
-
@component.output_types(documents=
|
|
188
|
+
@component.output_types(documents=list[Document])
|
|
173
189
|
async def run_async(
|
|
174
190
|
self,
|
|
175
|
-
query_embedding:
|
|
176
|
-
filters:
|
|
177
|
-
top_k:
|
|
178
|
-
scale_score:
|
|
179
|
-
return_embedding:
|
|
180
|
-
score_threshold:
|
|
181
|
-
group_by:
|
|
182
|
-
group_size:
|
|
183
|
-
):
|
|
191
|
+
query_embedding: list[float],
|
|
192
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
193
|
+
top_k: int | None = None,
|
|
194
|
+
scale_score: bool | None = None,
|
|
195
|
+
return_embedding: bool | None = None,
|
|
196
|
+
score_threshold: float | None = None,
|
|
197
|
+
group_by: str | None = None,
|
|
198
|
+
group_size: int | None = None,
|
|
199
|
+
) -> dict[str, list[Document]]:
|
|
184
200
|
"""
|
|
185
201
|
Asynchronously run the Embedding Retriever on the given input data.
|
|
186
202
|
|
|
@@ -197,8 +213,19 @@ class QdrantEmbeddingRetriever:
|
|
|
197
213
|
:returns:
|
|
198
214
|
The retrieved documents.
|
|
199
215
|
|
|
216
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
200
217
|
"""
|
|
201
|
-
|
|
218
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
219
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
220
|
+
):
|
|
221
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
222
|
+
|
|
223
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
224
|
+
filters = apply_filter_policy(
|
|
225
|
+
filter_policy=self._filter_policy,
|
|
226
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
227
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
228
|
+
)
|
|
202
229
|
|
|
203
230
|
docs = await self._document_store._query_by_embedding_async(
|
|
204
231
|
query_embedding=query_embedding,
|
|
@@ -244,15 +271,15 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
244
271
|
def __init__(
|
|
245
272
|
self,
|
|
246
273
|
document_store: QdrantDocumentStore,
|
|
247
|
-
filters:
|
|
274
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
248
275
|
top_k: int = 10,
|
|
249
276
|
scale_score: bool = False,
|
|
250
277
|
return_embedding: bool = False,
|
|
251
|
-
filter_policy:
|
|
252
|
-
score_threshold:
|
|
253
|
-
group_by:
|
|
254
|
-
group_size:
|
|
255
|
-
):
|
|
278
|
+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
|
|
279
|
+
score_threshold: float | None = None,
|
|
280
|
+
group_by: str | None = None,
|
|
281
|
+
group_size: int | None = None,
|
|
282
|
+
) -> None:
|
|
256
283
|
"""
|
|
257
284
|
Create a QdrantSparseEmbeddingRetriever component.
|
|
258
285
|
|
|
@@ -290,7 +317,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
290
317
|
self._group_by = group_by
|
|
291
318
|
self._group_size = group_size
|
|
292
319
|
|
|
293
|
-
def to_dict(self) ->
|
|
320
|
+
def to_dict(self) -> dict[str, Any]:
|
|
294
321
|
"""
|
|
295
322
|
Serializes the component to a dictionary.
|
|
296
323
|
|
|
@@ -314,7 +341,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
314
341
|
return d
|
|
315
342
|
|
|
316
343
|
@classmethod
|
|
317
|
-
def from_dict(cls, data:
|
|
344
|
+
def from_dict(cls, data: dict[str, Any]) -> "QdrantSparseEmbeddingRetriever":
|
|
318
345
|
"""
|
|
319
346
|
Deserializes the component from a dictionary.
|
|
320
347
|
|
|
@@ -331,18 +358,18 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
331
358
|
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
332
359
|
return default_from_dict(cls, data)
|
|
333
360
|
|
|
334
|
-
@component.output_types(documents=
|
|
361
|
+
@component.output_types(documents=list[Document])
|
|
335
362
|
def run(
|
|
336
363
|
self,
|
|
337
364
|
query_sparse_embedding: SparseEmbedding,
|
|
338
|
-
filters:
|
|
339
|
-
top_k:
|
|
340
|
-
scale_score:
|
|
341
|
-
return_embedding:
|
|
342
|
-
score_threshold:
|
|
343
|
-
group_by:
|
|
344
|
-
group_size:
|
|
345
|
-
):
|
|
365
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
366
|
+
top_k: int | None = None,
|
|
367
|
+
scale_score: bool | None = None,
|
|
368
|
+
return_embedding: bool | None = None,
|
|
369
|
+
score_threshold: float | None = None,
|
|
370
|
+
group_by: str | None = None,
|
|
371
|
+
group_size: int | None = None,
|
|
372
|
+
) -> dict[str, list[Document]]:
|
|
346
373
|
"""
|
|
347
374
|
Run the Sparse Embedding Retriever on the given input data.
|
|
348
375
|
|
|
@@ -364,8 +391,19 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
364
391
|
:returns:
|
|
365
392
|
The retrieved documents.
|
|
366
393
|
|
|
394
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
367
395
|
"""
|
|
368
|
-
|
|
396
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
397
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
398
|
+
):
|
|
399
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
400
|
+
|
|
401
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
402
|
+
filters = apply_filter_policy(
|
|
403
|
+
filter_policy=self._filter_policy,
|
|
404
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
405
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
406
|
+
)
|
|
369
407
|
|
|
370
408
|
docs = self._document_store._query_by_sparse(
|
|
371
409
|
query_sparse_embedding=query_sparse_embedding,
|
|
@@ -380,18 +418,18 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
380
418
|
|
|
381
419
|
return {"documents": docs}
|
|
382
420
|
|
|
383
|
-
@component.output_types(documents=
|
|
421
|
+
@component.output_types(documents=list[Document])
|
|
384
422
|
async def run_async(
|
|
385
423
|
self,
|
|
386
424
|
query_sparse_embedding: SparseEmbedding,
|
|
387
|
-
filters:
|
|
388
|
-
top_k:
|
|
389
|
-
scale_score:
|
|
390
|
-
return_embedding:
|
|
391
|
-
score_threshold:
|
|
392
|
-
group_by:
|
|
393
|
-
group_size:
|
|
394
|
-
):
|
|
425
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
426
|
+
top_k: int | None = None,
|
|
427
|
+
scale_score: bool | None = None,
|
|
428
|
+
return_embedding: bool | None = None,
|
|
429
|
+
score_threshold: float | None = None,
|
|
430
|
+
group_by: str | None = None,
|
|
431
|
+
group_size: int | None = None,
|
|
432
|
+
) -> dict[str, list[Document]]:
|
|
395
433
|
"""
|
|
396
434
|
Asynchronously run the Sparse Embedding Retriever on the given input data.
|
|
397
435
|
|
|
@@ -413,8 +451,19 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
413
451
|
:returns:
|
|
414
452
|
The retrieved documents.
|
|
415
453
|
|
|
454
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
416
455
|
"""
|
|
417
|
-
|
|
456
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
457
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
458
|
+
):
|
|
459
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
460
|
+
|
|
461
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
462
|
+
filters = apply_filter_policy(
|
|
463
|
+
filter_policy=self._filter_policy,
|
|
464
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
465
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
466
|
+
)
|
|
418
467
|
|
|
419
468
|
docs = await self._document_store._query_by_sparse_async(
|
|
420
469
|
query_sparse_embedding=query_sparse_embedding,
|
|
@@ -466,14 +515,14 @@ class QdrantHybridRetriever:
|
|
|
466
515
|
def __init__(
|
|
467
516
|
self,
|
|
468
517
|
document_store: QdrantDocumentStore,
|
|
469
|
-
filters:
|
|
518
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
470
519
|
top_k: int = 10,
|
|
471
520
|
return_embedding: bool = False,
|
|
472
|
-
filter_policy:
|
|
473
|
-
score_threshold:
|
|
474
|
-
group_by:
|
|
475
|
-
group_size:
|
|
476
|
-
):
|
|
521
|
+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
|
|
522
|
+
score_threshold: float | None = None,
|
|
523
|
+
group_by: str | None = None,
|
|
524
|
+
group_size: int | None = None,
|
|
525
|
+
) -> None:
|
|
477
526
|
"""
|
|
478
527
|
Create a QdrantHybridRetriever component.
|
|
479
528
|
|
|
@@ -509,7 +558,7 @@ class QdrantHybridRetriever:
|
|
|
509
558
|
self._group_by = group_by
|
|
510
559
|
self._group_size = group_size
|
|
511
560
|
|
|
512
|
-
def to_dict(self) ->
|
|
561
|
+
def to_dict(self) -> dict[str, Any]:
|
|
513
562
|
"""
|
|
514
563
|
Serializes the component to a dictionary.
|
|
515
564
|
|
|
@@ -529,7 +578,7 @@ class QdrantHybridRetriever:
|
|
|
529
578
|
)
|
|
530
579
|
|
|
531
580
|
@classmethod
|
|
532
|
-
def from_dict(cls, data:
|
|
581
|
+
def from_dict(cls, data: dict[str, Any]) -> "QdrantHybridRetriever":
|
|
533
582
|
"""
|
|
534
583
|
Deserializes the component from a dictionary.
|
|
535
584
|
|
|
@@ -546,18 +595,18 @@ class QdrantHybridRetriever:
|
|
|
546
595
|
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
547
596
|
return default_from_dict(cls, data)
|
|
548
597
|
|
|
549
|
-
@component.output_types(documents=
|
|
598
|
+
@component.output_types(documents=list[Document])
|
|
550
599
|
def run(
|
|
551
600
|
self,
|
|
552
|
-
query_embedding:
|
|
601
|
+
query_embedding: list[float],
|
|
553
602
|
query_sparse_embedding: SparseEmbedding,
|
|
554
|
-
filters:
|
|
555
|
-
top_k:
|
|
556
|
-
return_embedding:
|
|
557
|
-
score_threshold:
|
|
558
|
-
group_by:
|
|
559
|
-
group_size:
|
|
560
|
-
):
|
|
603
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
604
|
+
top_k: int | None = None,
|
|
605
|
+
return_embedding: bool | None = None,
|
|
606
|
+
score_threshold: float | None = None,
|
|
607
|
+
group_by: str | None = None,
|
|
608
|
+
group_size: int | None = None,
|
|
609
|
+
) -> dict[str, list[Document]]:
|
|
561
610
|
"""
|
|
562
611
|
Run the Sparse Embedding Retriever on the given input data.
|
|
563
612
|
|
|
@@ -579,8 +628,19 @@ class QdrantHybridRetriever:
|
|
|
579
628
|
:returns:
|
|
580
629
|
The retrieved documents.
|
|
581
630
|
|
|
631
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
582
632
|
"""
|
|
583
|
-
|
|
633
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
634
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
635
|
+
):
|
|
636
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
637
|
+
|
|
638
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
639
|
+
filters = apply_filter_policy(
|
|
640
|
+
filter_policy=self._filter_policy,
|
|
641
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
642
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
643
|
+
)
|
|
584
644
|
|
|
585
645
|
docs = self._document_store._query_hybrid(
|
|
586
646
|
query_embedding=query_embedding,
|
|
@@ -595,18 +655,18 @@ class QdrantHybridRetriever:
|
|
|
595
655
|
|
|
596
656
|
return {"documents": docs}
|
|
597
657
|
|
|
598
|
-
@component.output_types(documents=
|
|
658
|
+
@component.output_types(documents=list[Document])
|
|
599
659
|
async def run_async(
|
|
600
660
|
self,
|
|
601
|
-
query_embedding:
|
|
661
|
+
query_embedding: list[float],
|
|
602
662
|
query_sparse_embedding: SparseEmbedding,
|
|
603
|
-
filters:
|
|
604
|
-
top_k:
|
|
605
|
-
return_embedding:
|
|
606
|
-
score_threshold:
|
|
607
|
-
group_by:
|
|
608
|
-
group_size:
|
|
609
|
-
):
|
|
663
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
664
|
+
top_k: int | None = None,
|
|
665
|
+
return_embedding: bool | None = None,
|
|
666
|
+
score_threshold: float | None = None,
|
|
667
|
+
group_by: str | None = None,
|
|
668
|
+
group_size: int | None = None,
|
|
669
|
+
) -> dict[str, list[Document]]:
|
|
610
670
|
"""
|
|
611
671
|
Asynchronously run the Sparse Embedding Retriever on the given input data.
|
|
612
672
|
|
|
@@ -628,8 +688,19 @@ class QdrantHybridRetriever:
|
|
|
628
688
|
:returns:
|
|
629
689
|
The retrieved documents.
|
|
630
690
|
|
|
691
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
631
692
|
"""
|
|
632
|
-
|
|
693
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
694
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
695
|
+
):
|
|
696
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
697
|
+
|
|
698
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
699
|
+
filters = apply_filter_policy(
|
|
700
|
+
filter_policy=self._filter_policy,
|
|
701
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
702
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
703
|
+
)
|
|
633
704
|
|
|
634
705
|
docs = await self._document_store._query_hybrid_async(
|
|
635
706
|
query_embedding=query_embedding,
|
|
File without changes
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import uuid
|
|
2
|
-
from typing import List, Union
|
|
3
2
|
|
|
4
3
|
from haystack import logging
|
|
5
4
|
from haystack.dataclasses import Document
|
|
@@ -15,10 +14,10 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
|
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
def convert_haystack_documents_to_qdrant_points(
|
|
18
|
-
documents:
|
|
17
|
+
documents: list[Document],
|
|
19
18
|
*,
|
|
20
19
|
use_sparse_embeddings: bool,
|
|
21
|
-
) ->
|
|
20
|
+
) -> list[rest.PointStruct]:
|
|
22
21
|
points = []
|
|
23
22
|
for document in documents:
|
|
24
23
|
payload = document.to_dict(flatten=False)
|
|
@@ -37,7 +36,7 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
37
36
|
|
|
38
37
|
else:
|
|
39
38
|
vector = payload.pop("embedding") or {}
|
|
40
|
-
_id = convert_id(
|
|
39
|
+
_id = convert_id(document.id)
|
|
41
40
|
|
|
42
41
|
point = rest.PointStruct(
|
|
43
42
|
payload=payload,
|
|
@@ -58,23 +57,25 @@ def convert_id(_id: str) -> str:
|
|
|
58
57
|
return uuid.uuid5(UUID_NAMESPACE, _id).hex
|
|
59
58
|
|
|
60
59
|
|
|
61
|
-
QdrantPoint =
|
|
60
|
+
QdrantPoint = rest.ScoredPoint | rest.Record
|
|
62
61
|
|
|
63
62
|
|
|
64
63
|
def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_embeddings: bool) -> Document:
|
|
65
|
-
payload =
|
|
64
|
+
payload = point.payload or {}
|
|
66
65
|
payload["score"] = point.score if hasattr(point, "score") else None
|
|
67
66
|
|
|
68
67
|
if not use_sparse_embeddings:
|
|
69
68
|
payload["embedding"] = point.vector if hasattr(point, "vector") else None
|
|
70
|
-
elif hasattr(point, "vector") and point.vector is not None:
|
|
69
|
+
elif hasattr(point, "vector") and point.vector is not None and isinstance(point.vector, dict):
|
|
71
70
|
payload["embedding"] = point.vector.get(DENSE_VECTORS_NAME)
|
|
72
71
|
|
|
73
72
|
if SPARSE_VECTORS_NAME in point.vector:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
73
|
+
sparse_vector = point.vector[SPARSE_VECTORS_NAME]
|
|
74
|
+
if isinstance(sparse_vector, rest.SparseVector):
|
|
75
|
+
sparse_vector_dict = {
|
|
76
|
+
"indices": sparse_vector.indices,
|
|
77
|
+
"values": sparse_vector.values,
|
|
78
|
+
}
|
|
79
|
+
payload["sparse_embedding"] = sparse_vector_dict
|
|
79
80
|
|
|
80
81
|
return Document.from_dict(payload)
|