llama-index-vector-stores-opensearch 0.5.4__tar.gz → 0.5.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-index-vector-stores-opensearch
3
- Version: 0.5.4
3
+ Version: 0.5.6
4
4
  Summary: llama-index vector_stores opensearch integration
5
5
  Author-email: Your Name <you@example.com>
6
6
  License-Expression: MIT
@@ -58,6 +58,7 @@ class OpensearchVectorClient:
58
58
  space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
59
59
  os_client (Optional[OSClient]): Custom synchronous client (see OpenSearch from opensearch-py)
60
60
  os_async_client (Optional[OSClient]): Custom asynchronous client (see AsyncOpenSearch from opensearch-py)
61
+ excluded_source_fields (Optional[List[str]]): Optional list of document "source" fields to exclude from OpenSearch responses.
61
62
  **kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.
62
63
 
63
64
  """
@@ -77,6 +78,7 @@ class OpensearchVectorClient:
77
78
  search_pipeline: Optional[str] = None,
78
79
  os_client: Optional[OSClient] = None,
79
80
  os_async_client: Optional[OSClient] = None,
81
+ excluded_source_fields: Optional[List[str]] = None,
80
82
  **kwargs: Any,
81
83
  ):
82
84
  """Init params."""
@@ -99,6 +101,7 @@ class OpensearchVectorClient:
99
101
  self._index = index
100
102
  self._text_field = text_field
101
103
  self._max_chunk_bytes = max_chunk_bytes
104
+ self._excluded_source_fields = excluded_source_fields
102
105
 
103
106
  self._search_pipeline = search_pipeline
104
107
  http_auth = kwargs.get("http_auth")
@@ -328,6 +331,7 @@ class OpensearchVectorClient:
328
331
  k: int = 4,
329
332
  filters: Optional[Union[Dict, List]] = None,
330
333
  vector_field: str = "embedding",
334
+ excluded_source_fields: Optional[List[str]] = None,
331
335
  ) -> Dict:
332
336
  """For Approximate k-NN Search, this is the default query."""
333
337
  query = {
@@ -345,6 +349,8 @@ class OpensearchVectorClient:
345
349
  if filters:
346
350
  # filter key must be added only when filtering to avoid "filter doesn't support values of type: START_ARRAY" exception
347
351
  query["query"]["knn"][vector_field]["filter"] = filters
352
+ if excluded_source_fields:
353
+ query["_source"] = {"exclude": excluded_source_fields}
348
354
  return query
349
355
 
350
356
  def _is_text_field(self, value: Any) -> bool:
@@ -410,6 +416,8 @@ class OpensearchVectorClient:
410
416
  return {"match": {key: {"query": filter.value, "fuzziness": "AUTO"}}}
411
417
  elif op == FilterOperator.CONTAINS:
412
418
  return {"wildcard": {key: f"*{filter.value}*"}}
419
+ elif op == FilterOperator.IS_EMPTY:
420
+ return {"bool": {"must_not": {"exists": {"field": key}}}}
413
421
  else:
414
422
  raise ValueError(f"Unsupported filter operator: {filter.operator}")
415
423
 
@@ -445,6 +453,7 @@ class OpensearchVectorClient:
445
453
  k: int,
446
454
  filters: Optional[MetadataFilters] = None,
447
455
  search_method="approximate",
456
+ excluded_source_fields: Optional[List[str]] = None,
448
457
  ) -> Dict:
449
458
  """
450
459
  Perform a k-Nearest Neighbors (kNN) search.
@@ -463,6 +472,7 @@ class OpensearchVectorClient:
463
472
  filters (Optional[MetadataFilters]): Optional filters to apply for the search.
464
473
  Supports filter-context queries documented at
465
474
  https://opensearch.org/docs/latest/query-dsl/query-filter-context/
475
+ excluded_source_fields: Optional list of document "source" fields to exclude from the response.
466
476
 
467
477
  Returns:
468
478
  Dict: Up to k documents closest to query_embedding.
@@ -475,6 +485,7 @@ class OpensearchVectorClient:
475
485
  query_embedding,
476
486
  k,
477
487
  vector_field=embedding_field,
488
+ excluded_source_fields=excluded_source_fields,
478
489
  )
479
490
  elif (
480
491
  search_method == "approximate"
@@ -491,6 +502,7 @@ class OpensearchVectorClient:
491
502
  k,
492
503
  filters={"bool": {"filter": filters}},
493
504
  vector_field=embedding_field,
505
+ excluded_source_fields=excluded_source_fields,
494
506
  )
495
507
  else:
496
508
  if self.is_aoss:
@@ -502,6 +514,7 @@ class OpensearchVectorClient:
502
514
  space_type=self.space_type,
503
515
  pre_filter={"bool": {"filter": filters}},
504
516
  vector_field=embedding_field,
517
+ excluded_source_fields=excluded_source_fields,
505
518
  )
506
519
  else:
507
520
  # https://opensearch.org/docs/latest/search-plugins/knn/painless-functions/
@@ -511,6 +524,7 @@ class OpensearchVectorClient:
511
524
  space_type="l2Squared",
512
525
  pre_filter={"bool": {"filter": filters}},
513
526
  vector_field=embedding_field,
527
+ excluded_source_fields=excluded_source_fields,
514
528
  )
515
529
  return search_query
516
530
 
@@ -522,16 +536,20 @@ class OpensearchVectorClient:
522
536
  query_embedding: List[float],
523
537
  k: int,
524
538
  filters: Optional[MetadataFilters] = None,
539
+ excluded_source_fields: Optional[List[str]] = None,
525
540
  ) -> Dict:
526
541
  knn_query = self._knn_search_query(embedding_field, query_embedding, k, filters)
527
542
  lexical_query = self._lexical_search_query(text_field, query_str, k, filters)
528
543
 
529
- return {
544
+ query = {
530
545
  "size": k,
531
546
  "query": {
532
547
  "hybrid": {"queries": [lexical_query["query"], knn_query["query"]]}
533
548
  },
534
549
  }
550
+ if excluded_source_fields:
551
+ query["_source"] = {"exclude": excluded_source_fields}
552
+ return query
535
553
 
536
554
  def _lexical_search_query(
537
555
  self,
@@ -539,6 +557,7 @@ class OpensearchVectorClient:
539
557
  query_str: str,
540
558
  k: int,
541
559
  filters: Optional[MetadataFilters] = None,
560
+ excluded_source_fields: Optional[List[str]] = None,
542
561
  ) -> Dict:
543
562
  lexical_query = {
544
563
  "bool": {"must": {"match": {text_field: {"query": query_str}}}}
@@ -548,10 +567,13 @@ class OpensearchVectorClient:
548
567
  if len(parsed_filters) > 0:
549
568
  lexical_query["bool"]["filter"] = parsed_filters
550
569
 
551
- return {
570
+ query = {
552
571
  "size": k,
553
572
  "query": lexical_query,
554
573
  }
574
+ if excluded_source_fields:
575
+ query["_source"] = {"exclude": excluded_source_fields}
576
+ return query
555
577
 
556
578
  def __get_painless_scripting_source(
557
579
  self, space_type: str, vector_field: str = "embedding"
@@ -597,6 +619,7 @@ class OpensearchVectorClient:
597
619
  space_type: str = "l2Squared",
598
620
  pre_filter: Optional[Union[Dict, List]] = None,
599
621
  vector_field: str = "embedding",
622
+ excluded_source_fields: Optional[List[str]] = None,
600
623
  ) -> Dict:
601
624
  """
602
625
  For Scoring Script Search, this is the default query. Has to account for Opensearch Service
@@ -618,7 +641,7 @@ class OpensearchVectorClient:
618
641
  script = self._get_painless_scoring_script(
619
642
  space_type, vector_field, query_vector
620
643
  )
621
- return {
644
+ query = {
622
645
  "size": k,
623
646
  "query": {
624
647
  "script_score": {
@@ -627,10 +650,17 @@ class OpensearchVectorClient:
627
650
  }
628
651
  },
629
652
  }
653
+ if excluded_source_fields:
654
+ query["_source"] = {"exclude": excluded_source_fields}
655
+ return query
630
656
 
631
657
  def _is_aoss_enabled(self, http_auth: Any) -> bool:
632
658
  """Check if the service is http_auth is set as `aoss`."""
633
- return http_auth is not None and hasattr(http_auth, "service") and http_auth.service == "aoss"
659
+ return (
660
+ http_auth is not None
661
+ and hasattr(http_auth, "service")
662
+ and http_auth.service == "aoss"
663
+ )
634
664
 
635
665
  def _is_efficient_filtering_enabled(self) -> bool:
636
666
  """Check if kNN with efficient filtering is enabled."""
@@ -811,18 +841,27 @@ class OpensearchVectorClient:
811
841
  query_embedding,
812
842
  k,
813
843
  filters=filters,
844
+ excluded_source_fields=self._excluded_source_fields,
814
845
  )
815
846
  params = {
816
847
  "search_pipeline": self._search_pipeline,
817
848
  }
818
849
  elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
819
850
  search_query = self._lexical_search_query(
820
- self._text_field, query_str, k, filters=filters
851
+ self._text_field,
852
+ query_str,
853
+ k,
854
+ filters=filters,
855
+ excluded_source_fields=self._excluded_source_fields,
821
856
  )
822
857
  params = None
823
858
  else:
824
859
  search_query = self._knn_search_query(
825
- self._embedding_field, query_embedding, k, filters=filters
860
+ self._embedding_field,
861
+ query_embedding,
862
+ k,
863
+ filters=filters,
864
+ excluded_source_fields=self._excluded_source_fields,
826
865
  )
827
866
  params = None
828
867
 
@@ -850,18 +889,27 @@ class OpensearchVectorClient:
850
889
  query_embedding,
851
890
  k,
852
891
  filters=filters,
892
+ excluded_source_fields=self._excluded_source_fields,
853
893
  )
854
894
  params = {
855
895
  "search_pipeline": self._search_pipeline,
856
896
  }
857
897
  elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
858
898
  search_query = self._lexical_search_query(
859
- self._text_field, query_str, k, filters=filters
899
+ self._text_field,
900
+ query_str,
901
+ k,
902
+ filters=filters,
903
+ excluded_source_fields=self._excluded_source_fields,
860
904
  )
861
905
  params = None
862
906
  else:
863
907
  search_query = self._knn_search_query(
864
- self._embedding_field, query_embedding, k, filters=filters
908
+ self._embedding_field,
909
+ query_embedding,
910
+ k,
911
+ filters=filters,
912
+ excluded_source_fields=self._excluded_source_fields,
865
913
  )
866
914
  params = None
867
915
 
@@ -12,7 +12,7 @@ dev = [
12
12
  "pytest==7.2.1",
13
13
  "pytest-asyncio==0.21.0",
14
14
  "pytest-mock==3.11.1",
15
- "ruff==0.0.292",
15
+ "ruff==0.11.11",
16
16
  "types-Deprecated>=0.1.0",
17
17
  "types-PyYAML>=6.0.12.12,<7",
18
18
  "types-protobuf>=4.24.0.4,<5",
@@ -27,7 +27,7 @@ dev = [
27
27
 
28
28
  [project]
29
29
  name = "llama-index-vector-stores-opensearch"
30
- version = "0.5.4"
30
+ version = "0.5.6"
31
31
  description = "llama-index vector_stores opensearch integration"
32
32
  authors = [{name = "Your Name", email = "you@example.com"}]
33
33
  requires-python = ">=3.9,<4.0"