llama-index-vector-stores-opensearch 0.5.5__tar.gz → 0.5.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.
- {llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/PKG-INFO +1 -1
- {llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/llama_index/vector_stores/opensearch/base.py +54 -8
- {llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/pyproject.toml +2 -2
- {llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/.gitignore +0 -0
- {llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/LICENSE +0 -0
- {llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/README.md +0 -0
- {llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/llama_index/py.typed +0 -0
- {llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/llama_index/vector_stores/opensearch/__init__.py +0 -0
|
@@ -58,6 +58,7 @@ class OpensearchVectorClient:
|
|
|
58
58
|
space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
|
|
59
59
|
os_client (Optional[OSClient]): Custom synchronous client (see OpenSearch from opensearch-py)
|
|
60
60
|
os_async_client (Optional[OSClient]): Custom asynchronous client (see AsyncOpenSearch from opensearch-py)
|
|
61
|
+
excluded_source_fields (Optional[List[str]]): Optional list of document "source" fields to exclude from OpenSearch responses.
|
|
61
62
|
**kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.
|
|
62
63
|
|
|
63
64
|
"""
|
|
@@ -77,6 +78,7 @@ class OpensearchVectorClient:
|
|
|
77
78
|
search_pipeline: Optional[str] = None,
|
|
78
79
|
os_client: Optional[OSClient] = None,
|
|
79
80
|
os_async_client: Optional[OSClient] = None,
|
|
81
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
80
82
|
**kwargs: Any,
|
|
81
83
|
):
|
|
82
84
|
"""Init params."""
|
|
@@ -99,6 +101,7 @@ class OpensearchVectorClient:
|
|
|
99
101
|
self._index = index
|
|
100
102
|
self._text_field = text_field
|
|
101
103
|
self._max_chunk_bytes = max_chunk_bytes
|
|
104
|
+
self._excluded_source_fields = excluded_source_fields
|
|
102
105
|
|
|
103
106
|
self._search_pipeline = search_pipeline
|
|
104
107
|
http_auth = kwargs.get("http_auth")
|
|
@@ -328,6 +331,7 @@ class OpensearchVectorClient:
|
|
|
328
331
|
k: int = 4,
|
|
329
332
|
filters: Optional[Union[Dict, List]] = None,
|
|
330
333
|
vector_field: str = "embedding",
|
|
334
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
331
335
|
) -> Dict:
|
|
332
336
|
"""For Approximate k-NN Search, this is the default query."""
|
|
333
337
|
query = {
|
|
@@ -345,6 +349,8 @@ class OpensearchVectorClient:
|
|
|
345
349
|
if filters:
|
|
346
350
|
# filter key must be added only when filtering to avoid "filter doesn't support values of type: START_ARRAY" exception
|
|
347
351
|
query["query"]["knn"][vector_field]["filter"] = filters
|
|
352
|
+
if excluded_source_fields:
|
|
353
|
+
query["_source"] = {"exclude": excluded_source_fields}
|
|
348
354
|
return query
|
|
349
355
|
|
|
350
356
|
def _is_text_field(self, value: Any) -> bool:
|
|
@@ -447,6 +453,7 @@ class OpensearchVectorClient:
|
|
|
447
453
|
k: int,
|
|
448
454
|
filters: Optional[MetadataFilters] = None,
|
|
449
455
|
search_method="approximate",
|
|
456
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
450
457
|
) -> Dict:
|
|
451
458
|
"""
|
|
452
459
|
Perform a k-Nearest Neighbors (kNN) search.
|
|
@@ -465,6 +472,7 @@ class OpensearchVectorClient:
|
|
|
465
472
|
filters (Optional[MetadataFilters]): Optional filters to apply for the search.
|
|
466
473
|
Supports filter-context queries documented at
|
|
467
474
|
https://opensearch.org/docs/latest/query-dsl/query-filter-context/
|
|
475
|
+
excluded_source_fields: Optional list of document "source" fields to exclude from the response.
|
|
468
476
|
|
|
469
477
|
Returns:
|
|
470
478
|
Dict: Up to k documents closest to query_embedding.
|
|
@@ -477,6 +485,7 @@ class OpensearchVectorClient:
|
|
|
477
485
|
query_embedding,
|
|
478
486
|
k,
|
|
479
487
|
vector_field=embedding_field,
|
|
488
|
+
excluded_source_fields=excluded_source_fields,
|
|
480
489
|
)
|
|
481
490
|
elif (
|
|
482
491
|
search_method == "approximate"
|
|
@@ -493,6 +502,7 @@ class OpensearchVectorClient:
|
|
|
493
502
|
k,
|
|
494
503
|
filters={"bool": {"filter": filters}},
|
|
495
504
|
vector_field=embedding_field,
|
|
505
|
+
excluded_source_fields=excluded_source_fields,
|
|
496
506
|
)
|
|
497
507
|
else:
|
|
498
508
|
if self.is_aoss:
|
|
@@ -504,6 +514,7 @@ class OpensearchVectorClient:
|
|
|
504
514
|
space_type=self.space_type,
|
|
505
515
|
pre_filter={"bool": {"filter": filters}},
|
|
506
516
|
vector_field=embedding_field,
|
|
517
|
+
excluded_source_fields=excluded_source_fields,
|
|
507
518
|
)
|
|
508
519
|
else:
|
|
509
520
|
# https://opensearch.org/docs/latest/search-plugins/knn/painless-functions/
|
|
@@ -513,6 +524,7 @@ class OpensearchVectorClient:
|
|
|
513
524
|
space_type="l2Squared",
|
|
514
525
|
pre_filter={"bool": {"filter": filters}},
|
|
515
526
|
vector_field=embedding_field,
|
|
527
|
+
excluded_source_fields=excluded_source_fields,
|
|
516
528
|
)
|
|
517
529
|
return search_query
|
|
518
530
|
|
|
@@ -524,16 +536,20 @@ class OpensearchVectorClient:
|
|
|
524
536
|
query_embedding: List[float],
|
|
525
537
|
k: int,
|
|
526
538
|
filters: Optional[MetadataFilters] = None,
|
|
539
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
527
540
|
) -> Dict:
|
|
528
541
|
knn_query = self._knn_search_query(embedding_field, query_embedding, k, filters)
|
|
529
542
|
lexical_query = self._lexical_search_query(text_field, query_str, k, filters)
|
|
530
543
|
|
|
531
|
-
|
|
544
|
+
query = {
|
|
532
545
|
"size": k,
|
|
533
546
|
"query": {
|
|
534
547
|
"hybrid": {"queries": [lexical_query["query"], knn_query["query"]]}
|
|
535
548
|
},
|
|
536
549
|
}
|
|
550
|
+
if excluded_source_fields:
|
|
551
|
+
query["_source"] = {"exclude": excluded_source_fields}
|
|
552
|
+
return query
|
|
537
553
|
|
|
538
554
|
def _lexical_search_query(
|
|
539
555
|
self,
|
|
@@ -541,6 +557,7 @@ class OpensearchVectorClient:
|
|
|
541
557
|
query_str: str,
|
|
542
558
|
k: int,
|
|
543
559
|
filters: Optional[MetadataFilters] = None,
|
|
560
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
544
561
|
) -> Dict:
|
|
545
562
|
lexical_query = {
|
|
546
563
|
"bool": {"must": {"match": {text_field: {"query": query_str}}}}
|
|
@@ -550,10 +567,13 @@ class OpensearchVectorClient:
|
|
|
550
567
|
if len(parsed_filters) > 0:
|
|
551
568
|
lexical_query["bool"]["filter"] = parsed_filters
|
|
552
569
|
|
|
553
|
-
|
|
570
|
+
query = {
|
|
554
571
|
"size": k,
|
|
555
572
|
"query": lexical_query,
|
|
556
573
|
}
|
|
574
|
+
if excluded_source_fields:
|
|
575
|
+
query["_source"] = {"exclude": excluded_source_fields}
|
|
576
|
+
return query
|
|
557
577
|
|
|
558
578
|
def __get_painless_scripting_source(
|
|
559
579
|
self, space_type: str, vector_field: str = "embedding"
|
|
@@ -599,6 +619,7 @@ class OpensearchVectorClient:
|
|
|
599
619
|
space_type: str = "l2Squared",
|
|
600
620
|
pre_filter: Optional[Union[Dict, List]] = None,
|
|
601
621
|
vector_field: str = "embedding",
|
|
622
|
+
excluded_source_fields: Optional[List[str]] = None,
|
|
602
623
|
) -> Dict:
|
|
603
624
|
"""
|
|
604
625
|
For Scoring Script Search, this is the default query. Has to account for Opensearch Service
|
|
@@ -620,7 +641,7 @@ class OpensearchVectorClient:
|
|
|
620
641
|
script = self._get_painless_scoring_script(
|
|
621
642
|
space_type, vector_field, query_vector
|
|
622
643
|
)
|
|
623
|
-
|
|
644
|
+
query = {
|
|
624
645
|
"size": k,
|
|
625
646
|
"query": {
|
|
626
647
|
"script_score": {
|
|
@@ -629,10 +650,17 @@ class OpensearchVectorClient:
|
|
|
629
650
|
}
|
|
630
651
|
},
|
|
631
652
|
}
|
|
653
|
+
if excluded_source_fields:
|
|
654
|
+
query["_source"] = {"exclude": excluded_source_fields}
|
|
655
|
+
return query
|
|
632
656
|
|
|
633
657
|
def _is_aoss_enabled(self, http_auth: Any) -> bool:
|
|
634
658
|
"""Check if the service is http_auth is set as `aoss`."""
|
|
635
|
-
return
|
|
659
|
+
return (
|
|
660
|
+
http_auth is not None
|
|
661
|
+
and hasattr(http_auth, "service")
|
|
662
|
+
and http_auth.service == "aoss"
|
|
663
|
+
)
|
|
636
664
|
|
|
637
665
|
def _is_efficient_filtering_enabled(self) -> bool:
|
|
638
666
|
"""Check if kNN with efficient filtering is enabled."""
|
|
@@ -813,18 +841,27 @@ class OpensearchVectorClient:
|
|
|
813
841
|
query_embedding,
|
|
814
842
|
k,
|
|
815
843
|
filters=filters,
|
|
844
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
816
845
|
)
|
|
817
846
|
params = {
|
|
818
847
|
"search_pipeline": self._search_pipeline,
|
|
819
848
|
}
|
|
820
849
|
elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
|
|
821
850
|
search_query = self._lexical_search_query(
|
|
822
|
-
self._text_field,
|
|
851
|
+
self._text_field,
|
|
852
|
+
query_str,
|
|
853
|
+
k,
|
|
854
|
+
filters=filters,
|
|
855
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
823
856
|
)
|
|
824
857
|
params = None
|
|
825
858
|
else:
|
|
826
859
|
search_query = self._knn_search_query(
|
|
827
|
-
self._embedding_field,
|
|
860
|
+
self._embedding_field,
|
|
861
|
+
query_embedding,
|
|
862
|
+
k,
|
|
863
|
+
filters=filters,
|
|
864
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
828
865
|
)
|
|
829
866
|
params = None
|
|
830
867
|
|
|
@@ -852,18 +889,27 @@ class OpensearchVectorClient:
|
|
|
852
889
|
query_embedding,
|
|
853
890
|
k,
|
|
854
891
|
filters=filters,
|
|
892
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
855
893
|
)
|
|
856
894
|
params = {
|
|
857
895
|
"search_pipeline": self._search_pipeline,
|
|
858
896
|
}
|
|
859
897
|
elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
|
|
860
898
|
search_query = self._lexical_search_query(
|
|
861
|
-
self._text_field,
|
|
899
|
+
self._text_field,
|
|
900
|
+
query_str,
|
|
901
|
+
k,
|
|
902
|
+
filters=filters,
|
|
903
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
862
904
|
)
|
|
863
905
|
params = None
|
|
864
906
|
else:
|
|
865
907
|
search_query = self._knn_search_query(
|
|
866
|
-
self._embedding_field,
|
|
908
|
+
self._embedding_field,
|
|
909
|
+
query_embedding,
|
|
910
|
+
k,
|
|
911
|
+
filters=filters,
|
|
912
|
+
excluded_source_fields=self._excluded_source_fields,
|
|
867
913
|
)
|
|
868
914
|
params = None
|
|
869
915
|
|
|
@@ -12,7 +12,7 @@ dev = [
|
|
|
12
12
|
"pytest==7.2.1",
|
|
13
13
|
"pytest-asyncio==0.21.0",
|
|
14
14
|
"pytest-mock==3.11.1",
|
|
15
|
-
"ruff==0.
|
|
15
|
+
"ruff==0.11.11",
|
|
16
16
|
"types-Deprecated>=0.1.0",
|
|
17
17
|
"types-PyYAML>=6.0.12.12,<7",
|
|
18
18
|
"types-protobuf>=4.24.0.4,<5",
|
|
@@ -27,7 +27,7 @@ dev = [
|
|
|
27
27
|
|
|
28
28
|
[project]
|
|
29
29
|
name = "llama-index-vector-stores-opensearch"
|
|
30
|
-
version = "0.5.
|
|
30
|
+
version = "0.5.6"
|
|
31
31
|
description = "llama-index vector_stores opensearch integration"
|
|
32
32
|
authors = [{name = "Your Name", email = "you@example.com"}]
|
|
33
33
|
requires-python = ">=3.9,<4.0"
|
{llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/.gitignore
RENAMED
|
File without changes
|
{llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/LICENSE
RENAMED
|
File without changes
|
{llama_index_vector_stores_opensearch-0.5.5 → llama_index_vector_stores_opensearch-0.5.6}/README.md
RENAMED
|
File without changes
|
|
File without changes
|