llama-index-vector-stores-opensearch 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-index-vector-stores-opensearch
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: llama-index vector_stores opensearch integration
5
5
  License: MIT
6
6
  Author: Your Name
@@ -11,9 +11,8 @@ Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.9
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
- Classifier: Programming Language :: Python :: 3.12
15
14
  Requires-Dist: llama-index-core (>=0.10.1,<0.11.0)
16
- Requires-Dist: opensearch-py (>=2.4.2,<3.0.0)
15
+ Requires-Dist: opensearch-py[async] (>=2.4.2,<3.0.0)
17
16
  Description-Content-Type: text/markdown
18
17
 
19
18
  # LlamaIndex Vector_Stores Integration: Opensearch
@@ -1,7 +1,12 @@
1
1
  """Elasticsearch/Opensearch vector store."""
2
+
3
+ import asyncio
2
4
  import json
3
5
  import uuid
4
6
  from typing import Any, Dict, Iterable, List, Optional, Union, cast
7
+
8
+ import nest_asyncio
9
+
5
10
  from llama_index.core.bridge.pydantic import PrivateAttr
6
11
 
7
12
  from llama_index.core.schema import BaseNode, MetadataMode, TextNode
@@ -16,9 +21,9 @@ from llama_index.core.vector_stores.utils import (
16
21
  metadata_dict_to_node,
17
22
  node_to_metadata_dict,
18
23
  )
19
- from opensearchpy import OpenSearch
24
+ from opensearchpy import AsyncOpenSearch
20
25
  from opensearchpy.exceptions import NotFoundError
21
- from opensearchpy.helpers import bulk
26
+ from opensearchpy.helpers import async_bulk
22
27
 
23
28
  IMPORT_OPENSEARCH_PY_ERROR = (
24
29
  "Could not import OpenSearch. Please install it with `pip install opensearch-py`."
@@ -29,14 +34,14 @@ INVALID_HYBRID_QUERY_ERROR = (
29
34
  MATCH_ALL_QUERY = {"match_all": {}} # type: Dict
30
35
 
31
36
 
32
- def _import_opensearch() -> Any:
37
+ def _import_async_opensearch() -> Any:
33
38
  """Import OpenSearch if available, otherwise raise error."""
34
- return OpenSearch
39
+ return AsyncOpenSearch
35
40
 
36
41
 
37
- def _import_bulk() -> Any:
42
+ def _import_async_bulk() -> Any:
38
43
  """Import bulk if available, otherwise raise error."""
39
- return bulk
44
+ return async_bulk
40
45
 
41
46
 
42
47
  def _import_not_found_error() -> Any:
@@ -44,21 +49,21 @@ def _import_not_found_error() -> Any:
44
49
  return NotFoundError
45
50
 
46
51
 
47
- def _get_opensearch_client(opensearch_url: str, **kwargs: Any) -> Any:
48
- """Get OpenSearch client from the opensearch_url, otherwise raise error."""
52
+ def _get_async_opensearch_client(opensearch_url: str, **kwargs: Any) -> Any:
53
+ """Get AsyncOpenSearch client from the opensearch_url, otherwise raise error."""
49
54
  try:
50
- opensearch = _import_opensearch()
55
+ opensearch = _import_async_opensearch()
51
56
  client = opensearch(opensearch_url, **kwargs)
52
57
 
53
58
  except ValueError as e:
54
59
  raise ValueError(
55
- f"OpenSearch client string provided is not in proper format. "
60
+ f"AsyncOpenSearch client string provided is not in proper format. "
56
61
  f"Got error: {e} "
57
62
  )
58
63
  return client
59
64
 
60
65
 
61
- def _bulk_ingest_embeddings(
66
+ async def _bulk_ingest_embeddings(
62
67
  client: Any,
63
68
  index_name: str,
64
69
  embeddings: List[List[float]],
@@ -71,20 +76,20 @@ def _bulk_ingest_embeddings(
71
76
  max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,
72
77
  is_aoss: bool = False,
73
78
  ) -> List[str]:
74
- """Bulk Ingest Embeddings into given index."""
79
+ """Async Bulk Ingest Embeddings into given index."""
75
80
  if not mapping:
76
81
  mapping = {}
77
82
 
78
- bulk = _import_bulk()
83
+ async_bulk = _import_async_bulk()
79
84
  not_found_error = _import_not_found_error()
80
85
  requests = []
81
86
  return_ids = []
82
87
  mapping = mapping
83
88
 
84
89
  try:
85
- client.indices.get(index=index_name)
90
+ await client.indices.get(index=index_name)
86
91
  except not_found_error:
87
- client.indices.create(index=index_name, body=mapping)
92
+ await client.indices.create(index=index_name, body=mapping)
88
93
 
89
94
  for i, text in enumerate(texts):
90
95
  metadata = metadatas[i] if metadatas else {}
@@ -102,9 +107,9 @@ def _bulk_ingest_embeddings(
102
107
  request["_id"] = _id
103
108
  requests.append(request)
104
109
  return_ids.append(_id)
105
- bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
110
+ await async_bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
106
111
  if not is_aoss:
107
- client.indices.refresh(index=index_name)
112
+ await client.indices.refresh(index=index_name)
108
113
  return return_ids
109
114
 
110
115
 
@@ -135,7 +140,8 @@ def _knn_search_query(
135
140
  k: int,
136
141
  filters: Optional[MetadataFilters] = None,
137
142
  ) -> Dict:
138
- """Do knn search.
143
+ """
144
+ Do knn search.
139
145
 
140
146
  If there are no filters do approx-knn search.
141
147
  If there are (pre)-filters, do an exhaustive exact knn search using 'painless
@@ -243,7 +249,8 @@ def _is_aoss_enabled(http_auth: Any) -> bool:
243
249
 
244
250
 
245
251
  class OpensearchVectorClient:
246
- """Object encapsulating an Opensearch index that has vector search enabled.
252
+ """
253
+ Object encapsulating an Opensearch index that has vector search enabled.
247
254
 
248
255
  If the index does not yet exist, it is created during init.
249
256
  Therefore, the underlying index is assumed to either:
@@ -311,15 +318,22 @@ class OpensearchVectorClient:
311
318
  }
312
319
  },
313
320
  }
314
- self._os_client = _get_opensearch_client(self._endpoint, **kwargs)
321
+ self._os_client = _get_async_opensearch_client(self._endpoint, **kwargs)
315
322
  not_found_error = _import_not_found_error()
323
+ event_loop = asyncio.get_event_loop()
316
324
  try:
317
- self._os_client.indices.get(index=self._index)
325
+ event_loop.run_until_complete(
326
+ self._os_client.indices.get(index=self._index)
327
+ )
318
328
  except not_found_error:
319
- self._os_client.indices.create(index=self._index, body=idx_conf)
320
- self._os_client.indices.refresh(index=self._index)
329
+ event_loop.run_until_complete(
330
+ self._os_client.indices.create(index=self._index, body=idx_conf)
331
+ )
332
+ event_loop.run_until_complete(
333
+ self._os_client.indices.refresh(index=self._index)
334
+ )
321
335
 
322
- def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
336
+ async def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
323
337
  """Store results in the index."""
324
338
  embeddings: List[List[float]] = []
325
339
  texts: List[str] = []
@@ -331,7 +345,7 @@ class OpensearchVectorClient:
331
345
  texts.append(node.get_content(metadata_mode=MetadataMode.NONE))
332
346
  metadatas.append(node_to_metadata_dict(node, remove_text=True))
333
347
 
334
- return _bulk_ingest_embeddings(
348
+ return await _bulk_ingest_embeddings(
335
349
  self._os_client,
336
350
  self._index,
337
351
  embeddings,
@@ -345,16 +359,16 @@ class OpensearchVectorClient:
345
359
  is_aoss=self.is_aoss,
346
360
  )
347
361
 
348
- def delete_doc_id(self, doc_id: str) -> None:
349
- """Delete a document.
362
+ async def delete_doc_id(self, doc_id: str) -> None:
363
+ """
364
+ Delete a document.
350
365
 
351
366
  Args:
352
367
  doc_id (str): document id
353
368
  """
354
- body = {"query": {"match": {"metadata.ref_doc_id": doc_id}}}
355
- self._os_client.delete_by_query(index=self._index, body=body)
369
+ await self._os_client.delete(index=self._index, id=doc_id)
356
370
 
357
- def query(
371
+ async def aquery(
358
372
  self,
359
373
  query_mode: VectorStoreQueryMode,
360
374
  query_str: Optional[str],
@@ -380,7 +394,7 @@ class OpensearchVectorClient:
380
394
  )
381
395
  params = None
382
396
 
383
- res = self._os_client.search(
397
+ res = await self._os_client.search(
384
398
  index=self._index, body=search_query, params=params
385
399
  )
386
400
  nodes = []
@@ -421,7 +435,8 @@ class OpensearchVectorClient:
421
435
 
422
436
 
423
437
  class OpensearchVectorStore(BasePydanticVectorStore):
424
- """Elasticsearch/Opensearch vector store.
438
+ """
439
+ Elasticsearch/Opensearch vector store.
425
440
 
426
441
  Args:
427
442
  client (OpensearchVectorClient): Vector index client to use
@@ -437,6 +452,7 @@ class OpensearchVectorStore(BasePydanticVectorStore):
437
452
  ) -> None:
438
453
  """Initialize params."""
439
454
  super().__init__()
455
+ nest_asyncio.apply()
440
456
  self._client = client
441
457
 
442
458
  @property
@@ -449,13 +465,30 @@ class OpensearchVectorStore(BasePydanticVectorStore):
449
465
  nodes: List[BaseNode],
450
466
  **add_kwargs: Any,
451
467
  ) -> List[str]:
452
- """Add nodes to index.
468
+ """
469
+ Add nodes to index.
470
+
471
+ Args:
472
+ nodes: List[BaseNode]: list of nodes with embeddings.
473
+
474
+ """
475
+ return asyncio.get_event_loop().run_until_complete(
476
+ self.async_add(nodes, **add_kwargs)
477
+ )
478
+
479
+ async def async_add(
480
+ self,
481
+ nodes: List[BaseNode],
482
+ **add_kwargs: Any,
483
+ ) -> List[str]:
484
+ """
485
+ Async add nodes to index.
453
486
 
454
487
  Args:
455
488
  nodes: List[BaseNode]: list of nodes with embeddings.
456
489
 
457
490
  """
458
- self._client.index_results(nodes)
491
+ await self._client.index_results(nodes)
459
492
  return [result.node_id for result in nodes]
460
493
 
461
494
  def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
@@ -466,10 +499,35 @@ class OpensearchVectorStore(BasePydanticVectorStore):
466
499
  ref_doc_id (str): The doc_id of the document to delete.
467
500
 
468
501
  """
469
- self._client.delete_doc_id(ref_doc_id)
502
+ asyncio.get_event_loop().run_until_complete(
503
+ self.adelete(ref_doc_id, **delete_kwargs)
504
+ )
505
+
506
+ async def adelete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
507
+ """
508
+ Async delete nodes using with ref_doc_id.
509
+
510
+ Args:
511
+ ref_doc_id (str): The doc_id of the document to delete.
512
+
513
+ """
514
+ await self._client.delete_doc_id(ref_doc_id)
470
515
 
471
516
  def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
472
- """Query index for top k most similar nodes.
517
+ """
518
+ Query index for top k most similar nodes.
519
+
520
+ Args:
521
+ query (VectorStoreQuery): Store query object.
522
+
523
+ """
524
+ return asyncio.get_event_loop().run_until_complete(self.aquery(query, **kwargs))
525
+
526
+ async def aquery(
527
+ self, query: VectorStoreQuery, **kwargs: Any
528
+ ) -> VectorStoreQueryResult:
529
+ """
530
+ Async query index for top k most similar nodes.
473
531
 
474
532
  Args:
475
533
  query (VectorStoreQuery): Store query object.
@@ -477,7 +535,7 @@ class OpensearchVectorStore(BasePydanticVectorStore):
477
535
  """
478
536
  query_embedding = cast(List[float], query.query_embedding)
479
537
 
480
- return self._client.query(
538
+ return await self._client.aquery(
481
539
  query.mode,
482
540
  query.query_str,
483
541
  query_embedding,
@@ -27,12 +27,15 @@ exclude = ["**/BUILD"]
27
27
  license = "MIT"
28
28
  name = "llama-index-vector-stores-opensearch"
29
29
  readme = "README.md"
30
- version = "0.1.4"
30
+ version = "0.1.5"
31
31
 
32
32
  [tool.poetry.dependencies]
33
33
  python = ">=3.8.1,<4.0"
34
34
  llama-index-core = "^0.10.1"
35
- opensearch-py = "^2.4.2"
35
+
36
+ [tool.poetry.dependencies.opensearch-py]
37
+ extras = ["async"]
38
+ version = "^2.4.2"
36
39
 
37
40
  [tool.poetry.group.dev.dependencies]
38
41
  ipython = "8.10.0"