llama-index-vector-stores-opensearch 0.5.4__tar.gz → 0.5.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.
- {llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/PKG-INFO +1 -1
- {llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/llama_index/vector_stores/opensearch/base.py +56 -8
- {llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/pyproject.toml +2 -2
- {llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/.gitignore +0 -0
- {llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/LICENSE +0 -0
- {llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/README.md +0 -0
- {llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/llama_index/py.typed +0 -0
- {llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/llama_index/vector_stores/opensearch/__init__.py +0 -0
|
@@ -58,6 +58,7 @@ class OpensearchVectorClient:
|
|
|
58
58
|
space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
|
|
59
59
|
os_client (Optional[OSClient]): Custom synchronous client (see OpenSearch from opensearch-py)
|
|
60
60
|
os_async_client (Optional[OSClient]): Custom asynchronous client (see AsyncOpenSearch from opensearch-py)
|
|
61
|
+
excluded_source_fields (Optional[List[str]]): Optional list of document "source" fields to exclude from OpenSearch responses.
|
|
61
62
|
**kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.
|
|
62
63
|
|
|
63
64
|
"""
|
|
@@ -77,6 +78,7 @@ class OpensearchVectorClient:
|
|
|
77
78
|
search_pipeline: Optional[str] = None,
|
|
78
79
|
os_client: Optional[OSClient] = None,
|
|
79
80
|
os_async_client: Optional[OSClient] = None,
|
|
81
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
80
82
|
**kwargs: Any,
|
|
81
83
|
):
|
|
82
84
|
"""Init params."""
|
|
@@ -99,6 +101,7 @@ class OpensearchVectorClient:
|
|
|
99
101
|
self._index = index
|
|
100
102
|
self._text_field = text_field
|
|
101
103
|
self._max_chunk_bytes = max_chunk_bytes
|
|
104
|
+
self._excluded_source_fields = excluded_source_fields
|
|
102
105
|
|
|
103
106
|
self._search_pipeline = search_pipeline
|
|
104
107
|
http_auth = kwargs.get("http_auth")
|
|
@@ -328,6 +331,7 @@ class OpensearchVectorClient:
|
|
|
328
331
|
k: int = 4,
|
|
329
332
|
filters: Optional[Union[Dict, List]] = None,
|
|
330
333
|
vector_field: str = "embedding",
|
|
334
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
331
335
|
) -> Dict:
|
|
332
336
|
"""For Approximate k-NN Search, this is the default query."""
|
|
333
337
|
query = {
|
|
@@ -345,6 +349,8 @@ class OpensearchVectorClient:
|
|
|
345
349
|
if filters:
|
|
346
350
|
# filter key must be added only when filtering to avoid "filter doesn't support values of type: START_ARRAY" exception
|
|
347
351
|
query["query"]["knn"][vector_field]["filter"] = filters
|
|
352
|
+
if excluded_source_fields:
|
|
353
|
+
query["_source"] = {"exclude": excluded_source_fields}
|
|
348
354
|
return query
|
|
349
355
|
|
|
350
356
|
def _is_text_field(self, value: Any) -> bool:
|
|
@@ -410,6 +416,8 @@ class OpensearchVectorClient:
|
|
|
410
416
|
return {"match": {key: {"query": filter.value, "fuzziness": "AUTO"}}}
|
|
411
417
|
elif op == FilterOperator.CONTAINS:
|
|
412
418
|
return {"wildcard": {key: f"*{filter.value}*"}}
|
|
419
|
+
elif op == FilterOperator.IS_EMPTY:
|
|
420
|
+
return {"bool": {"must_not": {"exists": {"field": key}}}}
|
|
413
421
|
else:
|
|
414
422
|
raise ValueError(f"Unsupported filter operator: {filter.operator}")
|
|
415
423
|
|
|
@@ -445,6 +453,7 @@ class OpensearchVectorClient:
|
|
|
445
453
|
k: int,
|
|
446
454
|
filters: Optional[MetadataFilters] = None,
|
|
447
455
|
search_method="approximate",
|
|
456
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
448
457
|
) -> Dict:
|
|
449
458
|
"""
|
|
450
459
|
Perform a k-Nearest Neighbors (kNN) search.
|
|
@@ -463,6 +472,7 @@ class OpensearchVectorClient:
|
|
|
463
472
|
filters (Optional[MetadataFilters]): Optional filters to apply for the search.
|
|
464
473
|
Supports filter-context queries documented at
|
|
465
474
|
https://opensearch.org/docs/latest/query-dsl/query-filter-context/
|
|
475
|
+
excluded_source_fields: Optional list of document "source" fields to exclude from the response.
|
|
466
476
|
|
|
467
477
|
Returns:
|
|
468
478
|
Dict: Up to k documents closest to query_embedding.
|
|
@@ -475,6 +485,7 @@ class OpensearchVectorClient:
|
|
|
475
485
|
query_embedding,
|
|
476
486
|
k,
|
|
477
487
|
vector_field=embedding_field,
|
|
488
|
+
excluded_source_fields=excluded_source_fields,
|
|
478
489
|
)
|
|
479
490
|
elif (
|
|
480
491
|
search_method == "approximate"
|
|
@@ -491,6 +502,7 @@ class OpensearchVectorClient:
|
|
|
491
502
|
k,
|
|
492
503
|
filters={"bool": {"filter": filters}},
|
|
493
504
|
vector_field=embedding_field,
|
|
505
|
+
excluded_source_fields=excluded_source_fields,
|
|
494
506
|
)
|
|
495
507
|
else:
|
|
496
508
|
if self.is_aoss:
|
|
@@ -502,6 +514,7 @@ class OpensearchVectorClient:
|
|
|
502
514
|
space_type=self.space_type,
|
|
503
515
|
pre_filter={"bool": {"filter": filters}},
|
|
504
516
|
vector_field=embedding_field,
|
|
517
|
+
excluded_source_fields=excluded_source_fields,
|
|
505
518
|
)
|
|
506
519
|
else:
|
|
507
520
|
# https://opensearch.org/docs/latest/search-plugins/knn/painless-functions/
|
|
@@ -511,6 +524,7 @@ class OpensearchVectorClient:
|
|
|
511
524
|
space_type="l2Squared",
|
|
512
525
|
pre_filter={"bool": {"filter": filters}},
|
|
513
526
|
vector_field=embedding_field,
|
|
527
|
+
excluded_source_fields=excluded_source_fields,
|
|
514
528
|
)
|
|
515
529
|
return search_query
|
|
516
530
|
|
|
@@ -522,16 +536,20 @@ class OpensearchVectorClient:
|
|
|
522
536
|
query_embedding: List[float],
|
|
523
537
|
k: int,
|
|
524
538
|
filters: Optional[MetadataFilters] = None,
|
|
539
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
525
540
|
) -> Dict:
|
|
526
541
|
knn_query = self._knn_search_query(embedding_field, query_embedding, k, filters)
|
|
527
542
|
lexical_query = self._lexical_search_query(text_field, query_str, k, filters)
|
|
528
543
|
|
|
529
|
-
|
|
544
|
+
query = {
|
|
530
545
|
"size": k,
|
|
531
546
|
"query": {
|
|
532
547
|
"hybrid": {"queries": [lexical_query["query"], knn_query["query"]]}
|
|
533
548
|
},
|
|
534
549
|
}
|
|
550
|
+
if excluded_source_fields:
|
|
551
|
+
query["_source"] = {"exclude": excluded_source_fields}
|
|
552
|
+
return query
|
|
535
553
|
|
|
536
554
|
def _lexical_search_query(
|
|
537
555
|
self,
|
|
@@ -539,6 +557,7 @@ class OpensearchVectorClient:
|
|
|
539
557
|
query_str: str,
|
|
540
558
|
k: int,
|
|
541
559
|
filters: Optional[MetadataFilters] = None,
|
|
560
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
542
561
|
) -> Dict:
|
|
543
562
|
lexical_query = {
|
|
544
563
|
"bool": {"must": {"match": {text_field: {"query": query_str}}}}
|
|
@@ -548,10 +567,13 @@ class OpensearchVectorClient:
|
|
|
548
567
|
if len(parsed_filters) > 0:
|
|
549
568
|
lexical_query["bool"]["filter"] = parsed_filters
|
|
550
569
|
|
|
551
|
-
|
|
570
|
+
query = {
|
|
552
571
|
"size": k,
|
|
553
572
|
"query": lexical_query,
|
|
554
573
|
}
|
|
574
|
+
if excluded_source_fields:
|
|
575
|
+
query["_source"] = {"exclude": excluded_source_fields}
|
|
576
|
+
return query
|
|
555
577
|
|
|
556
578
|
def __get_painless_scripting_source(
|
|
557
579
|
self, space_type: str, vector_field: str = "embedding"
|
|
@@ -597,6 +619,7 @@ class OpensearchVectorClient:
|
|
|
597
619
|
space_type: str = "l2Squared",
|
|
598
620
|
pre_filter: Optional[Union[Dict, List]] = None,
|
|
599
621
|
vector_field: str = "embedding",
|
|
622
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
600
623
|
) -> Dict:
|
|
601
624
|
"""
|
|
602
625
|
For Scoring Script Search, this is the default query. Has to account for Opensearch Service
|
|
@@ -618,7 +641,7 @@ class OpensearchVectorClient:
|
|
|
618
641
|
script = self._get_painless_scoring_script(
|
|
619
642
|
space_type, vector_field, query_vector
|
|
620
643
|
)
|
|
621
|
-
|
|
644
|
+
query = {
|
|
622
645
|
"size": k,
|
|
623
646
|
"query": {
|
|
624
647
|
"script_score": {
|
|
@@ -627,10 +650,17 @@ class OpensearchVectorClient:
|
|
|
627
650
|
}
|
|
628
651
|
},
|
|
629
652
|
}
|
|
653
|
+
if excluded_source_fields:
|
|
654
|
+
query["_source"] = {"exclude": excluded_source_fields}
|
|
655
|
+
return query
|
|
630
656
|
|
|
631
657
|
def _is_aoss_enabled(self, http_auth: Any) -> bool:
|
|
632
658
|
"""Check if the service is http_auth is set as `aoss`."""
|
|
633
|
-
return
|
|
659
|
+
return (
|
|
660
|
+
http_auth is not None
|
|
661
|
+
and hasattr(http_auth, "service")
|
|
662
|
+
and http_auth.service == "aoss"
|
|
663
|
+
)
|
|
634
664
|
|
|
635
665
|
def _is_efficient_filtering_enabled(self) -> bool:
|
|
636
666
|
"""Check if kNN with efficient filtering is enabled."""
|
|
@@ -811,18 +841,27 @@ class OpensearchVectorClient:
|
|
|
811
841
|
query_embedding,
|
|
812
842
|
k,
|
|
813
843
|
filters=filters,
|
|
844
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
814
845
|
)
|
|
815
846
|
params = {
|
|
816
847
|
"search_pipeline": self._search_pipeline,
|
|
817
848
|
}
|
|
818
849
|
elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
|
|
819
850
|
search_query = self._lexical_search_query(
|
|
820
|
-
self._text_field,
|
|
851
|
+
self._text_field,
|
|
852
|
+
query_str,
|
|
853
|
+
k,
|
|
854
|
+
filters=filters,
|
|
855
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
821
856
|
)
|
|
822
857
|
params = None
|
|
823
858
|
else:
|
|
824
859
|
search_query = self._knn_search_query(
|
|
825
|
-
self._embedding_field,
|
|
860
|
+
self._embedding_field,
|
|
861
|
+
query_embedding,
|
|
862
|
+
k,
|
|
863
|
+
filters=filters,
|
|
864
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
826
865
|
)
|
|
827
866
|
params = None
|
|
828
867
|
|
|
@@ -850,18 +889,27 @@ class OpensearchVectorClient:
|
|
|
850
889
|
query_embedding,
|
|
851
890
|
k,
|
|
852
891
|
filters=filters,
|
|
892
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
853
893
|
)
|
|
854
894
|
params = {
|
|
855
895
|
"search_pipeline": self._search_pipeline,
|
|
856
896
|
}
|
|
857
897
|
elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
|
|
858
898
|
search_query = self._lexical_search_query(
|
|
859
|
-
self._text_field,
|
|
899
|
+
self._text_field,
|
|
900
|
+
query_str,
|
|
901
|
+
k,
|
|
902
|
+
filters=filters,
|
|
903
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
860
904
|
)
|
|
861
905
|
params = None
|
|
862
906
|
else:
|
|
863
907
|
search_query = self._knn_search_query(
|
|
864
|
-
self._embedding_field,
|
|
908
|
+
self._embedding_field,
|
|
909
|
+
query_embedding,
|
|
910
|
+
k,
|
|
911
|
+
filters=filters,
|
|
912
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
865
913
|
)
|
|
866
914
|
params = None
|
|
867
915
|
|
|
@@ -12,7 +12,7 @@ dev = [
|
|
|
12
12
|
"pytest==7.2.1",
|
|
13
13
|
"pytest-asyncio==0.21.0",
|
|
14
14
|
"pytest-mock==3.11.1",
|
|
15
|
-
"ruff==0.
|
|
15
|
+
"ruff==0.11.11",
|
|
16
16
|
"types-Deprecated>=0.1.0",
|
|
17
17
|
"types-PyYAML>=6.0.12.12,<7",
|
|
18
18
|
"types-protobuf>=4.24.0.4,<5",
|
|
@@ -27,7 +27,7 @@ dev = [
|
|
|
27
27
|
|
|
28
28
|
[project]
|
|
29
29
|
name = "llama-index-vector-stores-opensearch"
|
|
30
|
-
version = "0.5.
|
|
30
|
+
version = "0.5.6"
|
|
31
31
|
description = "llama-index vector_stores opensearch integration"
|
|
32
32
|
authors = [{name = "Your Name", email = "you@example.com"}]
|
|
33
33
|
requires-python = ">=3.9,<4.0"
|
{llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/.gitignore
RENAMED
|
File without changes
|
{llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/LICENSE
RENAMED
|
File without changes
|
{llama_index_vector_stores_opensearch-0.5.4 → llama_index_vector_stores_opensearch-0.5.6}/README.md
RENAMED
|
File without changes
|
|
File without changes
|