llama-index-vector-stores-opensearch 0.5.5__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.

@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-index-vector-stores-opensearch
3
- Version: 0.5.5
3
+ Version: 0.6.0
4
4
  Summary: llama-index vector_stores opensearch integration
5
5
  Author-email: Your Name <you@example.com>
6
6
  License-Expression: MIT
7
7
  License-File: LICENSE
8
8
  Requires-Python: <4.0,>=3.9
9
- Requires-Dist: llama-index-core<0.13,>=0.12.0
9
+ Requires-Dist: llama-index-core<0.14,>=0.13.0
10
10
  Requires-Dist: opensearch-py[async]<3,>=2.4.2
11
11
  Description-Content-Type: text/markdown
12
12
 
@@ -58,6 +58,7 @@ class OpensearchVectorClient:
58
58
  space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
59
59
  os_client (Optional[OSClient]): Custom synchronous client (see OpenSearch from opensearch-py)
60
60
  os_async_client (Optional[OSClient]): Custom asynchronous client (see AsyncOpenSearch from opensearch-py)
61
+ excluded_source_fields (Optional[List[str]]): Optional list of document "source" fields to exclude from OpenSearch responses.
61
62
  **kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.
62
63
 
63
64
  """
@@ -77,6 +78,7 @@ class OpensearchVectorClient:
77
78
  search_pipeline: Optional[str] = None,
78
79
  os_client: Optional[OSClient] = None,
79
80
  os_async_client: Optional[OSClient] = None,
81
+ excluded_source_fields: Optional[List[str]] = None,
80
82
  **kwargs: Any,
81
83
  ):
82
84
  """Init params."""
@@ -99,6 +101,7 @@ class OpensearchVectorClient:
99
101
  self._index = index
100
102
  self._text_field = text_field
101
103
  self._max_chunk_bytes = max_chunk_bytes
104
+ self._excluded_source_fields = excluded_source_fields
102
105
 
103
106
  self._search_pipeline = search_pipeline
104
107
  http_auth = kwargs.get("http_auth")
@@ -328,6 +331,7 @@ class OpensearchVectorClient:
328
331
  k: int = 4,
329
332
  filters: Optional[Union[Dict, List]] = None,
330
333
  vector_field: str = "embedding",
334
+ excluded_source_fields: Optional[List[str]] = None,
331
335
  ) -> Dict:
332
336
  """For Approximate k-NN Search, this is the default query."""
333
337
  query = {
@@ -345,6 +349,8 @@ class OpensearchVectorClient:
345
349
  if filters:
346
350
  # filter key must be added only when filtering to avoid "filter doesn't support values of type: START_ARRAY" exception
347
351
  query["query"]["knn"][vector_field]["filter"] = filters
352
+ if excluded_source_fields:
353
+ query["_source"] = {"exclude": excluded_source_fields}
348
354
  return query
349
355
 
350
356
  def _is_text_field(self, value: Any) -> bool:
@@ -447,6 +453,7 @@ class OpensearchVectorClient:
447
453
  k: int,
448
454
  filters: Optional[MetadataFilters] = None,
449
455
  search_method="approximate",
456
+ excluded_source_fields: Optional[List[str]] = None,
450
457
  ) -> Dict:
451
458
  """
452
459
  Perform a k-Nearest Neighbors (kNN) search.
@@ -465,6 +472,7 @@ class OpensearchVectorClient:
465
472
  filters (Optional[MetadataFilters]): Optional filters to apply for the search.
466
473
  Supports filter-context queries documented at
467
474
  https://opensearch.org/docs/latest/query-dsl/query-filter-context/
475
+ excluded_source_fields: Optional list of document "source" fields to exclude from the response.
468
476
 
469
477
  Returns:
470
478
  Dict: Up to k documents closest to query_embedding.
@@ -477,6 +485,7 @@ class OpensearchVectorClient:
477
485
  query_embedding,
478
486
  k,
479
487
  vector_field=embedding_field,
488
+ excluded_source_fields=excluded_source_fields,
480
489
  )
481
490
  elif (
482
491
  search_method == "approximate"
@@ -493,6 +502,7 @@ class OpensearchVectorClient:
493
502
  k,
494
503
  filters={"bool": {"filter": filters}},
495
504
  vector_field=embedding_field,
505
+ excluded_source_fields=excluded_source_fields,
496
506
  )
497
507
  else:
498
508
  if self.is_aoss:
@@ -504,6 +514,7 @@ class OpensearchVectorClient:
504
514
  space_type=self.space_type,
505
515
  pre_filter={"bool": {"filter": filters}},
506
516
  vector_field=embedding_field,
517
+ excluded_source_fields=excluded_source_fields,
507
518
  )
508
519
  else:
509
520
  # https://opensearch.org/docs/latest/search-plugins/knn/painless-functions/
@@ -513,6 +524,7 @@ class OpensearchVectorClient:
513
524
  space_type="l2Squared",
514
525
  pre_filter={"bool": {"filter": filters}},
515
526
  vector_field=embedding_field,
527
+ excluded_source_fields=excluded_source_fields,
516
528
  )
517
529
  return search_query
518
530
 
@@ -524,16 +536,20 @@ class OpensearchVectorClient:
524
536
  query_embedding: List[float],
525
537
  k: int,
526
538
  filters: Optional[MetadataFilters] = None,
539
+ excluded_source_fields: Optional[List[str]] = None,
527
540
  ) -> Dict:
528
541
  knn_query = self._knn_search_query(embedding_field, query_embedding, k, filters)
529
542
  lexical_query = self._lexical_search_query(text_field, query_str, k, filters)
530
543
 
531
- return {
544
+ query = {
532
545
  "size": k,
533
546
  "query": {
534
547
  "hybrid": {"queries": [lexical_query["query"], knn_query["query"]]}
535
548
  },
536
549
  }
550
+ if excluded_source_fields:
551
+ query["_source"] = {"exclude": excluded_source_fields}
552
+ return query
537
553
 
538
554
  def _lexical_search_query(
539
555
  self,
@@ -541,6 +557,7 @@ class OpensearchVectorClient:
541
557
  query_str: str,
542
558
  k: int,
543
559
  filters: Optional[MetadataFilters] = None,
560
+ excluded_source_fields: Optional[List[str]] = None,
544
561
  ) -> Dict:
545
562
  lexical_query = {
546
563
  "bool": {"must": {"match": {text_field: {"query": query_str}}}}
@@ -550,10 +567,13 @@ class OpensearchVectorClient:
550
567
  if len(parsed_filters) > 0:
551
568
  lexical_query["bool"]["filter"] = parsed_filters
552
569
 
553
- return {
570
+ query = {
554
571
  "size": k,
555
572
  "query": lexical_query,
556
573
  }
574
+ if excluded_source_fields:
575
+ query["_source"] = {"exclude": excluded_source_fields}
576
+ return query
557
577
 
558
578
  def __get_painless_scripting_source(
559
579
  self, space_type: str, vector_field: str = "embedding"
@@ -599,6 +619,7 @@ class OpensearchVectorClient:
599
619
  space_type: str = "l2Squared",
600
620
  pre_filter: Optional[Union[Dict, List]] = None,
601
621
  vector_field: str = "embedding",
622
+ excluded_source_fields: Optional[List[str]] = None,
602
623
  ) -> Dict:
603
624
  """
604
625
  For Scoring Script Search, this is the default query. Has to account for Opensearch Service
@@ -620,7 +641,7 @@ class OpensearchVectorClient:
620
641
  script = self._get_painless_scoring_script(
621
642
  space_type, vector_field, query_vector
622
643
  )
623
- return {
644
+ query = {
624
645
  "size": k,
625
646
  "query": {
626
647
  "script_score": {
@@ -629,10 +650,17 @@ class OpensearchVectorClient:
629
650
  }
630
651
  },
631
652
  }
653
+ if excluded_source_fields:
654
+ query["_source"] = {"exclude": excluded_source_fields}
655
+ return query
632
656
 
633
657
  def _is_aoss_enabled(self, http_auth: Any) -> bool:
634
658
  """Check if the service is http_auth is set as `aoss`."""
635
- return http_auth is not None and hasattr(http_auth, "service") and http_auth.service == "aoss"
659
+ return (
660
+ http_auth is not None
661
+ and hasattr(http_auth, "service")
662
+ and http_auth.service == "aoss"
663
+ )
636
664
 
637
665
  def _is_efficient_filtering_enabled(self) -> bool:
638
666
  """Check if kNN with efficient filtering is enabled."""
@@ -813,18 +841,27 @@ class OpensearchVectorClient:
813
841
  query_embedding,
814
842
  k,
815
843
  filters=filters,
844
+ excluded_source_fields=self._excluded_source_fields,
816
845
  )
817
846
  params = {
818
847
  "search_pipeline": self._search_pipeline,
819
848
  }
820
849
  elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
821
850
  search_query = self._lexical_search_query(
822
- self._text_field, query_str, k, filters=filters
851
+ self._text_field,
852
+ query_str,
853
+ k,
854
+ filters=filters,
855
+ excluded_source_fields=self._excluded_source_fields,
823
856
  )
824
857
  params = None
825
858
  else:
826
859
  search_query = self._knn_search_query(
827
- self._embedding_field, query_embedding, k, filters=filters
860
+ self._embedding_field,
861
+ query_embedding,
862
+ k,
863
+ filters=filters,
864
+ excluded_source_fields=self._excluded_source_fields,
828
865
  )
829
866
  params = None
830
867
 
@@ -852,18 +889,27 @@ class OpensearchVectorClient:
852
889
  query_embedding,
853
890
  k,
854
891
  filters=filters,
892
+ excluded_source_fields=self._excluded_source_fields,
855
893
  )
856
894
  params = {
857
895
  "search_pipeline": self._search_pipeline,
858
896
  }
859
897
  elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
860
898
  search_query = self._lexical_search_query(
861
- self._text_field, query_str, k, filters=filters
899
+ self._text_field,
900
+ query_str,
901
+ k,
902
+ filters=filters,
903
+ excluded_source_fields=self._excluded_source_fields,
862
904
  )
863
905
  params = None
864
906
  else:
865
907
  search_query = self._knn_search_query(
866
- self._embedding_field, query_embedding, k, filters=filters
908
+ self._embedding_field,
909
+ query_embedding,
910
+ k,
911
+ filters=filters,
912
+ excluded_source_fields=self._excluded_source_fields,
867
913
  )
868
914
  params = None
869
915
 
@@ -12,7 +12,7 @@ dev = [
12
12
  "pytest==7.2.1",
13
13
  "pytest-asyncio==0.21.0",
14
14
  "pytest-mock==3.11.1",
15
- "ruff==0.0.292",
15
+ "ruff==0.11.11",
16
16
  "types-Deprecated>=0.1.0",
17
17
  "types-PyYAML>=6.0.12.12,<7",
18
18
  "types-protobuf>=4.24.0.4,<5",
@@ -27,14 +27,14 @@ dev = [
27
27
 
28
28
  [project]
29
29
  name = "llama-index-vector-stores-opensearch"
30
- version = "0.5.5"
30
+ version = "0.6.0"
31
31
  description = "llama-index vector_stores opensearch integration"
32
32
  authors = [{name = "Your Name", email = "you@example.com"}]
33
33
  requires-python = ">=3.9,<4.0"
34
34
  readme = "README.md"
35
35
  license = "MIT"
36
36
  dependencies = [
37
- "llama-index-core>=0.12.0,<0.13",
37
+ "llama-index-core>=0.13.0,<0.14",
38
38
  "opensearch-py[async]>=2.4.2,<3",
39
39
  ]
40
40