llama-index-vector-stores-opensearch 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.

@@ -1,12 +1,18 @@
1
1
  """Elasticsearch/Opensearch vector store."""
2
+
3
+ import asyncio
2
4
  import json
3
5
  import uuid
4
6
  from typing import Any, Dict, Iterable, List, Optional, Union, cast
5
7
 
8
+ import nest_asyncio
9
+
10
+ from llama_index.core.bridge.pydantic import PrivateAttr
11
+
6
12
  from llama_index.core.schema import BaseNode, MetadataMode, TextNode
7
13
  from llama_index.core.vector_stores.types import (
8
14
  MetadataFilters,
9
- VectorStore,
15
+ BasePydanticVectorStore,
10
16
  VectorStoreQuery,
11
17
  VectorStoreQueryMode,
12
18
  VectorStoreQueryResult,
@@ -15,9 +21,9 @@ from llama_index.core.vector_stores.utils import (
15
21
  metadata_dict_to_node,
16
22
  node_to_metadata_dict,
17
23
  )
18
- from opensearchpy import OpenSearch
24
+ from opensearchpy import AsyncOpenSearch
19
25
  from opensearchpy.exceptions import NotFoundError
20
- from opensearchpy.helpers import bulk
26
+ from opensearchpy.helpers import async_bulk
21
27
 
22
28
  IMPORT_OPENSEARCH_PY_ERROR = (
23
29
  "Could not import OpenSearch. Please install it with `pip install opensearch-py`."
@@ -28,14 +34,14 @@ INVALID_HYBRID_QUERY_ERROR = (
28
34
  MATCH_ALL_QUERY = {"match_all": {}} # type: Dict
29
35
 
30
36
 
31
- def _import_opensearch() -> Any:
37
+ def _import_async_opensearch() -> Any:
32
38
  """Import OpenSearch if available, otherwise raise error."""
33
- return OpenSearch
39
+ return AsyncOpenSearch
34
40
 
35
41
 
36
- def _import_bulk() -> Any:
42
+ def _import_async_bulk() -> Any:
37
43
  """Import bulk if available, otherwise raise error."""
38
- return bulk
44
+ return async_bulk
39
45
 
40
46
 
41
47
  def _import_not_found_error() -> Any:
@@ -43,21 +49,21 @@ def _import_not_found_error() -> Any:
43
49
  return NotFoundError
44
50
 
45
51
 
46
- def _get_opensearch_client(opensearch_url: str, **kwargs: Any) -> Any:
47
- """Get OpenSearch client from the opensearch_url, otherwise raise error."""
52
+ def _get_async_opensearch_client(opensearch_url: str, **kwargs: Any) -> Any:
53
+ """Get AsyncOpenSearch client from the opensearch_url, otherwise raise error."""
48
54
  try:
49
- opensearch = _import_opensearch()
55
+ opensearch = _import_async_opensearch()
50
56
  client = opensearch(opensearch_url, **kwargs)
51
57
 
52
58
  except ValueError as e:
53
59
  raise ValueError(
54
- f"OpenSearch client string provided is not in proper format. "
60
+ f"AsyncOpenSearch client string provided is not in proper format. "
55
61
  f"Got error: {e} "
56
62
  )
57
63
  return client
58
64
 
59
65
 
60
- def _bulk_ingest_embeddings(
66
+ async def _bulk_ingest_embeddings(
61
67
  client: Any,
62
68
  index_name: str,
63
69
  embeddings: List[List[float]],
@@ -70,20 +76,20 @@ def _bulk_ingest_embeddings(
70
76
  max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,
71
77
  is_aoss: bool = False,
72
78
  ) -> List[str]:
73
- """Bulk Ingest Embeddings into given index."""
79
+ """Async Bulk Ingest Embeddings into given index."""
74
80
  if not mapping:
75
81
  mapping = {}
76
82
 
77
- bulk = _import_bulk()
83
+ async_bulk = _import_async_bulk()
78
84
  not_found_error = _import_not_found_error()
79
85
  requests = []
80
86
  return_ids = []
81
87
  mapping = mapping
82
88
 
83
89
  try:
84
- client.indices.get(index=index_name)
90
+ await client.indices.get(index=index_name)
85
91
  except not_found_error:
86
- client.indices.create(index=index_name, body=mapping)
92
+ await client.indices.create(index=index_name, body=mapping)
87
93
 
88
94
  for i, text in enumerate(texts):
89
95
  metadata = metadatas[i] if metadatas else {}
@@ -101,9 +107,9 @@ def _bulk_ingest_embeddings(
101
107
  request["_id"] = _id
102
108
  requests.append(request)
103
109
  return_ids.append(_id)
104
- bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
110
+ await async_bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
105
111
  if not is_aoss:
106
- client.indices.refresh(index=index_name)
112
+ await client.indices.refresh(index=index_name)
107
113
  return return_ids
108
114
 
109
115
 
@@ -134,7 +140,8 @@ def _knn_search_query(
134
140
  k: int,
135
141
  filters: Optional[MetadataFilters] = None,
136
142
  ) -> Dict:
137
- """Do knn search.
143
+ """
144
+ Do knn search.
138
145
 
139
146
  If there are no filters do approx-knn search.
140
147
  If there are (pre)-filters, do an exhaustive exact knn search using 'painless
@@ -242,7 +249,8 @@ def _is_aoss_enabled(http_auth: Any) -> bool:
242
249
 
243
250
 
244
251
  class OpensearchVectorClient:
245
- """Object encapsulating an Opensearch index that has vector search enabled.
252
+ """
253
+ Object encapsulating an Opensearch index that has vector search enabled.
246
254
 
247
255
  If the index does not yet exist, it is created during init.
248
256
  Therefore, the underlying index is assumed to either:
@@ -310,15 +318,22 @@ class OpensearchVectorClient:
310
318
  }
311
319
  },
312
320
  }
313
- self._os_client = _get_opensearch_client(self._endpoint, **kwargs)
321
+ self._os_client = _get_async_opensearch_client(self._endpoint, **kwargs)
314
322
  not_found_error = _import_not_found_error()
323
+ event_loop = asyncio.get_event_loop()
315
324
  try:
316
- self._os_client.indices.get(index=self._index)
325
+ event_loop.run_until_complete(
326
+ self._os_client.indices.get(index=self._index)
327
+ )
317
328
  except not_found_error:
318
- self._os_client.indices.create(index=self._index, body=idx_conf)
319
- self._os_client.indices.refresh(index=self._index)
329
+ event_loop.run_until_complete(
330
+ self._os_client.indices.create(index=self._index, body=idx_conf)
331
+ )
332
+ event_loop.run_until_complete(
333
+ self._os_client.indices.refresh(index=self._index)
334
+ )
320
335
 
321
- def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
336
+ async def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
322
337
  """Store results in the index."""
323
338
  embeddings: List[List[float]] = []
324
339
  texts: List[str] = []
@@ -330,7 +345,7 @@ class OpensearchVectorClient:
330
345
  texts.append(node.get_content(metadata_mode=MetadataMode.NONE))
331
346
  metadatas.append(node_to_metadata_dict(node, remove_text=True))
332
347
 
333
- return _bulk_ingest_embeddings(
348
+ return await _bulk_ingest_embeddings(
334
349
  self._os_client,
335
350
  self._index,
336
351
  embeddings,
@@ -344,15 +359,16 @@ class OpensearchVectorClient:
344
359
  is_aoss=self.is_aoss,
345
360
  )
346
361
 
347
- def delete_doc_id(self, doc_id: str) -> None:
348
- """Delete a document.
362
+ async def delete_doc_id(self, doc_id: str) -> None:
363
+ """
364
+ Delete a document.
349
365
 
350
366
  Args:
351
367
  doc_id (str): document id
352
368
  """
353
- self._os_client.delete(index=self._index, id=doc_id)
369
+ await self._os_client.delete(index=self._index, id=doc_id)
354
370
 
355
- def query(
371
+ async def aquery(
356
372
  self,
357
373
  query_mode: VectorStoreQueryMode,
358
374
  query_str: Optional[str],
@@ -378,7 +394,7 @@ class OpensearchVectorClient:
378
394
  )
379
395
  params = None
380
396
 
381
- res = self._os_client.search(
397
+ res = await self._os_client.search(
382
398
  index=self._index, body=search_query, params=params
383
399
  )
384
400
  nodes = []
@@ -418,8 +434,9 @@ class OpensearchVectorClient:
418
434
  return VectorStoreQueryResult(nodes=nodes, ids=ids, similarities=scores)
419
435
 
420
436
 
421
- class OpensearchVectorStore(VectorStore):
422
- """Elasticsearch/Opensearch vector store.
437
+ class OpensearchVectorStore(BasePydanticVectorStore):
438
+ """
439
+ Elasticsearch/Opensearch vector store.
423
440
 
424
441
  Args:
425
442
  client (OpensearchVectorClient): Vector index client to use
@@ -427,12 +444,15 @@ class OpensearchVectorStore(VectorStore):
427
444
  """
428
445
 
429
446
  stores_text: bool = True
447
+ _client: OpensearchVectorClient = PrivateAttr(default=None)
430
448
 
431
449
  def __init__(
432
450
  self,
433
451
  client: OpensearchVectorClient,
434
452
  ) -> None:
435
453
  """Initialize params."""
454
+ super().__init__()
455
+ nest_asyncio.apply()
436
456
  self._client = client
437
457
 
438
458
  @property
@@ -445,13 +465,30 @@ class OpensearchVectorStore(VectorStore):
445
465
  nodes: List[BaseNode],
446
466
  **add_kwargs: Any,
447
467
  ) -> List[str]:
448
- """Add nodes to index.
468
+ """
469
+ Add nodes to index.
470
+
471
+ Args:
472
+ nodes: List[BaseNode]: list of nodes with embeddings.
473
+
474
+ """
475
+ return asyncio.get_event_loop().run_until_complete(
476
+ self.async_add(nodes, **add_kwargs)
477
+ )
478
+
479
+ async def async_add(
480
+ self,
481
+ nodes: List[BaseNode],
482
+ **add_kwargs: Any,
483
+ ) -> List[str]:
484
+ """
485
+ Async add nodes to index.
449
486
 
450
487
  Args:
451
488
  nodes: List[BaseNode]: list of nodes with embeddings.
452
489
 
453
490
  """
454
- self._client.index_results(nodes)
491
+ await self._client.index_results(nodes)
455
492
  return [result.node_id for result in nodes]
456
493
 
457
494
  def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
@@ -462,10 +499,35 @@ class OpensearchVectorStore(VectorStore):
462
499
  ref_doc_id (str): The doc_id of the document to delete.
463
500
 
464
501
  """
465
- self._client.delete_doc_id(ref_doc_id)
502
+ asyncio.get_event_loop().run_until_complete(
503
+ self.adelete(ref_doc_id, **delete_kwargs)
504
+ )
505
+
506
+ async def adelete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
507
+ """
508
+ Async delete nodes using with ref_doc_id.
509
+
510
+ Args:
511
+ ref_doc_id (str): The doc_id of the document to delete.
512
+
513
+ """
514
+ await self._client.delete_doc_id(ref_doc_id)
466
515
 
467
516
  def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
468
- """Query index for top k most similar nodes.
517
+ """
518
+ Query index for top k most similar nodes.
519
+
520
+ Args:
521
+ query (VectorStoreQuery): Store query object.
522
+
523
+ """
524
+ return asyncio.get_event_loop().run_until_complete(self.aquery(query, **kwargs))
525
+
526
+ async def aquery(
527
+ self, query: VectorStoreQuery, **kwargs: Any
528
+ ) -> VectorStoreQueryResult:
529
+ """
530
+ Async query index for top k most similar nodes.
469
531
 
470
532
  Args:
471
533
  query (VectorStoreQuery): Store query object.
@@ -473,7 +535,7 @@ class OpensearchVectorStore(VectorStore):
473
535
  """
474
536
  query_embedding = cast(List[float], query.query_embedding)
475
537
 
476
- return self._client.query(
538
+ return await self._client.aquery(
477
539
  query.mode,
478
540
  query.query_str,
479
541
  query_embedding,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-index-vector-stores-opensearch
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: llama-index vector_stores opensearch integration
5
5
  License: MIT
6
6
  Author: Your Name
@@ -11,9 +11,8 @@ Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.9
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
- Classifier: Programming Language :: Python :: 3.12
15
14
  Requires-Dist: llama-index-core (>=0.10.1,<0.11.0)
16
- Requires-Dist: opensearch-py (>=2.4.2,<3.0.0)
15
+ Requires-Dist: opensearch-py[async] (>=2.4.2,<3.0.0)
17
16
  Description-Content-Type: text/markdown
18
17
 
19
18
  # LlamaIndex Vector_Stores Integration: Opensearch
@@ -0,0 +1,5 @@
1
+ llama_index/vector_stores/opensearch/__init__.py,sha256=U1_XAkZb6zcskOk4s10NB8Tjs9AZRGdRQLzOGpbWdBA,176
2
+ llama_index/vector_stores/opensearch/base.py,sha256=IHHfsgsjY_9JKroivl-cFVyS74kQnzIIJCDMqEP7tfk,16877
3
+ llama_index_vector_stores_opensearch-0.1.5.dist-info/METADATA,sha256=fsdPGHC07vUnALJftvDl8ZIFTEnQZxwZo1B1lGStLA0,677
4
+ llama_index_vector_stores_opensearch-0.1.5.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
5
+ llama_index_vector_stores_opensearch-0.1.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.8.1
2
+ Generator: poetry-core 1.7.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,5 +0,0 @@
1
- llama_index/vector_stores/opensearch/__init__.py,sha256=U1_XAkZb6zcskOk4s10NB8Tjs9AZRGdRQLzOGpbWdBA,176
2
- llama_index/vector_stores/opensearch/base.py,sha256=ohHZzOn3XAb3rXNFTq6YC6fNuGgQuKMdHIwPY0f2Wmo,15115
3
- llama_index_vector_stores_opensearch-0.1.3.dist-info/METADATA,sha256=yN7RT9C1S24AA0S9AGXoyzSm693GeKZtbExCp7bjc9A,721
4
- llama_index_vector_stores_opensearch-0.1.3.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
5
- llama_index_vector_stores_opensearch-0.1.3.dist-info/RECORD,,