llama-index-vector-stores-opensearch 0.1.10__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-index-vector-stores-opensearch
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: llama-index vector_stores opensearch integration
5
5
  License: MIT
6
6
  Author: Your Name
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.9
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
14
15
  Requires-Dist: llama-index-core (>=0.10.1,<0.11.0)
15
16
  Requires-Dist: opensearch-py[async] (>=2.4.2,<3.0.0)
16
17
  Description-Content-Type: text/markdown
@@ -20,6 +20,7 @@ from llama_index.core.vector_stores.utils import (
20
20
  node_to_metadata_dict,
21
21
  )
22
22
  from opensearchpy import AsyncOpenSearch
23
+ from opensearchpy.client import Client as OSClient
23
24
  from opensearchpy.exceptions import NotFoundError
24
25
  from opensearchpy.helpers import async_bulk
25
26
 
@@ -64,8 +65,10 @@ class OpensearchVectorClient:
64
65
  embedding_field: str = "embedding",
65
66
  text_field: str = "content",
66
67
  method: Optional[dict] = None,
68
+ engine: Optional[str] = "nmslib",
67
69
  max_chunk_bytes: int = 1 * 1024 * 1024,
68
70
  search_pipeline: Optional[str] = None,
71
+ os_client: Optional[OSClient] = None,
69
72
  **kwargs: Any,
70
73
  ):
71
74
  """Init params."""
@@ -73,7 +76,7 @@ class OpensearchVectorClient:
73
76
  method = {
74
77
  "name": "hnsw",
75
78
  "space_type": "l2",
76
- "engine": "nmslib",
79
+ "engine": engine,
77
80
  "parameters": {"ef_construction": 256, "m": 48},
78
81
  }
79
82
  if embedding_field is None:
@@ -102,7 +105,9 @@ class OpensearchVectorClient:
102
105
  }
103
106
  },
104
107
  }
105
- self._os_client = self._get_async_opensearch_client(self._endpoint, **kwargs)
108
+ self._os_client = os_client or self._get_async_opensearch_client(
109
+ self._endpoint, **kwargs
110
+ )
106
111
  not_found_error = self._import_not_found_error()
107
112
 
108
113
  event_loop = asyncio.get_event_loop()
@@ -265,17 +270,34 @@ class OpensearchVectorClient:
265
270
  k: int,
266
271
  filters: Optional[MetadataFilters] = None,
267
272
  ) -> Dict:
268
- knn_query = self._knn_search_query(
269
- embedding_field, query_embedding, k, filters
270
- )["query"]
271
- lexical_query = {"must": {"match": {text_field: {"query": query_str}}}}
273
+ knn_query = self._knn_search_query(embedding_field, query_embedding, k, filters)
274
+ lexical_query = self._lexical_search_query(text_field, query_str, k, filters)
275
+
276
+ return {
277
+ "size": k,
278
+ "query": {
279
+ "hybrid": {"queries": [lexical_query["query"], knn_query["query"]]}
280
+ },
281
+ }
282
+
283
+ def _lexical_search_query(
284
+ self,
285
+ text_field: str,
286
+ query_str: str,
287
+ k: int,
288
+ filters: Optional[MetadataFilters] = None,
289
+ ) -> Dict:
290
+ lexical_query = {
291
+ "bool": {"must": {"match": {text_field: {"query": query_str}}}}
292
+ }
272
293
 
273
294
  parsed_filters = self._parse_filters(filters)
274
295
  if len(parsed_filters) > 0:
275
- lexical_query["filter"] = parsed_filters
296
+ lexical_query["bool"]["filter"] = parsed_filters
297
+
276
298
  return {
277
299
  "size": k,
278
- "query": {"hybrid": {"queries": [{"bool": lexical_query}, knn_query]}},
300
+ "query": lexical_query,
279
301
  }
280
302
 
281
303
  def __get_painless_scripting_source(
@@ -389,6 +411,11 @@ class OpensearchVectorClient:
389
411
  params = {
390
412
  "search_pipeline": self._search_pipeline,
391
413
  }
414
+ elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
415
+ search_query = self._lexical_search_query(
416
+ self._text_field, query_str, k, filters=filters
417
+ )
418
+ params = None
392
419
  else:
393
420
  search_query = self._knn_search_query(
394
421
  self._embedding_field, query_embedding, k, filters=filters
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
27
27
  license = "MIT"
28
28
  name = "llama-index-vector-stores-opensearch"
29
29
  readme = "README.md"
30
- version = "0.1.10"
30
+ version = "0.1.12"
31
31
 
32
32
  [tool.poetry.dependencies]
33
33
  python = ">=3.8.1,<4.0"