llama-index-vector-stores-opensearch 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.
- llama_index/vector_stores/opensearch/base.py +95 -37
- {llama_index_vector_stores_opensearch-0.1.4.dist-info → llama_index_vector_stores_opensearch-0.1.5.dist-info}/METADATA +2 -3
- llama_index_vector_stores_opensearch-0.1.5.dist-info/RECORD +5 -0
- {llama_index_vector_stores_opensearch-0.1.4.dist-info → llama_index_vector_stores_opensearch-0.1.5.dist-info}/WHEEL +1 -1
- llama_index_vector_stores_opensearch-0.1.4.dist-info/RECORD +0 -5
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
"""Elasticsearch/Opensearch vector store."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
2
4
|
import json
|
|
3
5
|
import uuid
|
|
4
6
|
from typing import Any, Dict, Iterable, List, Optional, Union, cast
|
|
7
|
+
|
|
8
|
+
import nest_asyncio
|
|
9
|
+
|
|
5
10
|
from llama_index.core.bridge.pydantic import PrivateAttr
|
|
6
11
|
|
|
7
12
|
from llama_index.core.schema import BaseNode, MetadataMode, TextNode
|
|
@@ -16,9 +21,9 @@ from llama_index.core.vector_stores.utils import (
|
|
|
16
21
|
metadata_dict_to_node,
|
|
17
22
|
node_to_metadata_dict,
|
|
18
23
|
)
|
|
19
|
-
from opensearchpy import
|
|
24
|
+
from opensearchpy import AsyncOpenSearch
|
|
20
25
|
from opensearchpy.exceptions import NotFoundError
|
|
21
|
-
from opensearchpy.helpers import
|
|
26
|
+
from opensearchpy.helpers import async_bulk
|
|
22
27
|
|
|
23
28
|
IMPORT_OPENSEARCH_PY_ERROR = (
|
|
24
29
|
"Could not import OpenSearch. Please install it with `pip install opensearch-py`."
|
|
@@ -29,14 +34,14 @@ INVALID_HYBRID_QUERY_ERROR = (
|
|
|
29
34
|
MATCH_ALL_QUERY = {"match_all": {}} # type: Dict
|
|
30
35
|
|
|
31
36
|
|
|
32
|
-
def
|
|
37
|
+
def _import_async_opensearch() -> Any:
|
|
33
38
|
"""Import OpenSearch if available, otherwise raise error."""
|
|
34
|
-
return
|
|
39
|
+
return AsyncOpenSearch
|
|
35
40
|
|
|
36
41
|
|
|
37
|
-
def
|
|
42
|
+
def _import_async_bulk() -> Any:
|
|
38
43
|
"""Import bulk if available, otherwise raise error."""
|
|
39
|
-
return
|
|
44
|
+
return async_bulk
|
|
40
45
|
|
|
41
46
|
|
|
42
47
|
def _import_not_found_error() -> Any:
|
|
@@ -44,21 +49,21 @@ def _import_not_found_error() -> Any:
|
|
|
44
49
|
return NotFoundError
|
|
45
50
|
|
|
46
51
|
|
|
47
|
-
def
|
|
48
|
-
"""Get
|
|
52
|
+
def _get_async_opensearch_client(opensearch_url: str, **kwargs: Any) -> Any:
|
|
53
|
+
"""Get AsyncOpenSearch client from the opensearch_url, otherwise raise error."""
|
|
49
54
|
try:
|
|
50
|
-
opensearch =
|
|
55
|
+
opensearch = _import_async_opensearch()
|
|
51
56
|
client = opensearch(opensearch_url, **kwargs)
|
|
52
57
|
|
|
53
58
|
except ValueError as e:
|
|
54
59
|
raise ValueError(
|
|
55
|
-
f"
|
|
60
|
+
f"AsyncOpenSearch client string provided is not in proper format. "
|
|
56
61
|
f"Got error: {e} "
|
|
57
62
|
)
|
|
58
63
|
return client
|
|
59
64
|
|
|
60
65
|
|
|
61
|
-
def _bulk_ingest_embeddings(
|
|
66
|
+
async def _bulk_ingest_embeddings(
|
|
62
67
|
client: Any,
|
|
63
68
|
index_name: str,
|
|
64
69
|
embeddings: List[List[float]],
|
|
@@ -71,20 +76,20 @@ def _bulk_ingest_embeddings(
|
|
|
71
76
|
max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,
|
|
72
77
|
is_aoss: bool = False,
|
|
73
78
|
) -> List[str]:
|
|
74
|
-
"""Bulk Ingest Embeddings into given index."""
|
|
79
|
+
"""Async Bulk Ingest Embeddings into given index."""
|
|
75
80
|
if not mapping:
|
|
76
81
|
mapping = {}
|
|
77
82
|
|
|
78
|
-
|
|
83
|
+
async_bulk = _import_async_bulk()
|
|
79
84
|
not_found_error = _import_not_found_error()
|
|
80
85
|
requests = []
|
|
81
86
|
return_ids = []
|
|
82
87
|
mapping = mapping
|
|
83
88
|
|
|
84
89
|
try:
|
|
85
|
-
client.indices.get(index=index_name)
|
|
90
|
+
await client.indices.get(index=index_name)
|
|
86
91
|
except not_found_error:
|
|
87
|
-
client.indices.create(index=index_name, body=mapping)
|
|
92
|
+
await client.indices.create(index=index_name, body=mapping)
|
|
88
93
|
|
|
89
94
|
for i, text in enumerate(texts):
|
|
90
95
|
metadata = metadatas[i] if metadatas else {}
|
|
@@ -102,9 +107,9 @@ def _bulk_ingest_embeddings(
|
|
|
102
107
|
request["_id"] = _id
|
|
103
108
|
requests.append(request)
|
|
104
109
|
return_ids.append(_id)
|
|
105
|
-
|
|
110
|
+
await async_bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
|
|
106
111
|
if not is_aoss:
|
|
107
|
-
client.indices.refresh(index=index_name)
|
|
112
|
+
await client.indices.refresh(index=index_name)
|
|
108
113
|
return return_ids
|
|
109
114
|
|
|
110
115
|
|
|
@@ -135,7 +140,8 @@ def _knn_search_query(
|
|
|
135
140
|
k: int,
|
|
136
141
|
filters: Optional[MetadataFilters] = None,
|
|
137
142
|
) -> Dict:
|
|
138
|
-
"""
|
|
143
|
+
"""
|
|
144
|
+
Do knn search.
|
|
139
145
|
|
|
140
146
|
If there are no filters do approx-knn search.
|
|
141
147
|
If there are (pre)-filters, do an exhaustive exact knn search using 'painless
|
|
@@ -243,7 +249,8 @@ def _is_aoss_enabled(http_auth: Any) -> bool:
|
|
|
243
249
|
|
|
244
250
|
|
|
245
251
|
class OpensearchVectorClient:
|
|
246
|
-
"""
|
|
252
|
+
"""
|
|
253
|
+
Object encapsulating an Opensearch index that has vector search enabled.
|
|
247
254
|
|
|
248
255
|
If the index does not yet exist, it is created during init.
|
|
249
256
|
Therefore, the underlying index is assumed to either:
|
|
@@ -311,15 +318,22 @@ class OpensearchVectorClient:
|
|
|
311
318
|
}
|
|
312
319
|
},
|
|
313
320
|
}
|
|
314
|
-
self._os_client =
|
|
321
|
+
self._os_client = _get_async_opensearch_client(self._endpoint, **kwargs)
|
|
315
322
|
not_found_error = _import_not_found_error()
|
|
323
|
+
event_loop = asyncio.get_event_loop()
|
|
316
324
|
try:
|
|
317
|
-
|
|
325
|
+
event_loop.run_until_complete(
|
|
326
|
+
self._os_client.indices.get(index=self._index)
|
|
327
|
+
)
|
|
318
328
|
except not_found_error:
|
|
319
|
-
|
|
320
|
-
|
|
329
|
+
event_loop.run_until_complete(
|
|
330
|
+
self._os_client.indices.create(index=self._index, body=idx_conf)
|
|
331
|
+
)
|
|
332
|
+
event_loop.run_until_complete(
|
|
333
|
+
self._os_client.indices.refresh(index=self._index)
|
|
334
|
+
)
|
|
321
335
|
|
|
322
|
-
def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
|
|
336
|
+
async def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
|
|
323
337
|
"""Store results in the index."""
|
|
324
338
|
embeddings: List[List[float]] = []
|
|
325
339
|
texts: List[str] = []
|
|
@@ -331,7 +345,7 @@ class OpensearchVectorClient:
|
|
|
331
345
|
texts.append(node.get_content(metadata_mode=MetadataMode.NONE))
|
|
332
346
|
metadatas.append(node_to_metadata_dict(node, remove_text=True))
|
|
333
347
|
|
|
334
|
-
return _bulk_ingest_embeddings(
|
|
348
|
+
return await _bulk_ingest_embeddings(
|
|
335
349
|
self._os_client,
|
|
336
350
|
self._index,
|
|
337
351
|
embeddings,
|
|
@@ -345,16 +359,16 @@ class OpensearchVectorClient:
|
|
|
345
359
|
is_aoss=self.is_aoss,
|
|
346
360
|
)
|
|
347
361
|
|
|
348
|
-
def delete_doc_id(self, doc_id: str) -> None:
|
|
349
|
-
"""
|
|
362
|
+
async def delete_doc_id(self, doc_id: str) -> None:
|
|
363
|
+
"""
|
|
364
|
+
Delete a document.
|
|
350
365
|
|
|
351
366
|
Args:
|
|
352
367
|
doc_id (str): document id
|
|
353
368
|
"""
|
|
354
|
-
|
|
355
|
-
self._os_client.delete_by_query(index=self._index, body=body)
|
|
369
|
+
await self._os_client.delete(index=self._index, id=doc_id)
|
|
356
370
|
|
|
357
|
-
def
|
|
371
|
+
async def aquery(
|
|
358
372
|
self,
|
|
359
373
|
query_mode: VectorStoreQueryMode,
|
|
360
374
|
query_str: Optional[str],
|
|
@@ -380,7 +394,7 @@ class OpensearchVectorClient:
|
|
|
380
394
|
)
|
|
381
395
|
params = None
|
|
382
396
|
|
|
383
|
-
res = self._os_client.search(
|
|
397
|
+
res = await self._os_client.search(
|
|
384
398
|
index=self._index, body=search_query, params=params
|
|
385
399
|
)
|
|
386
400
|
nodes = []
|
|
@@ -421,7 +435,8 @@ class OpensearchVectorClient:
|
|
|
421
435
|
|
|
422
436
|
|
|
423
437
|
class OpensearchVectorStore(BasePydanticVectorStore):
|
|
424
|
-
"""
|
|
438
|
+
"""
|
|
439
|
+
Elasticsearch/Opensearch vector store.
|
|
425
440
|
|
|
426
441
|
Args:
|
|
427
442
|
client (OpensearchVectorClient): Vector index client to use
|
|
@@ -437,6 +452,7 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
437
452
|
) -> None:
|
|
438
453
|
"""Initialize params."""
|
|
439
454
|
super().__init__()
|
|
455
|
+
nest_asyncio.apply()
|
|
440
456
|
self._client = client
|
|
441
457
|
|
|
442
458
|
@property
|
|
@@ -449,13 +465,30 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
449
465
|
nodes: List[BaseNode],
|
|
450
466
|
**add_kwargs: Any,
|
|
451
467
|
) -> List[str]:
|
|
452
|
-
"""
|
|
468
|
+
"""
|
|
469
|
+
Add nodes to index.
|
|
470
|
+
|
|
471
|
+
Args:
|
|
472
|
+
nodes: List[BaseNode]: list of nodes with embeddings.
|
|
473
|
+
|
|
474
|
+
"""
|
|
475
|
+
return asyncio.get_event_loop().run_until_complete(
|
|
476
|
+
self.async_add(nodes, **add_kwargs)
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
async def async_add(
|
|
480
|
+
self,
|
|
481
|
+
nodes: List[BaseNode],
|
|
482
|
+
**add_kwargs: Any,
|
|
483
|
+
) -> List[str]:
|
|
484
|
+
"""
|
|
485
|
+
Async add nodes to index.
|
|
453
486
|
|
|
454
487
|
Args:
|
|
455
488
|
nodes: List[BaseNode]: list of nodes with embeddings.
|
|
456
489
|
|
|
457
490
|
"""
|
|
458
|
-
self._client.index_results(nodes)
|
|
491
|
+
await self._client.index_results(nodes)
|
|
459
492
|
return [result.node_id for result in nodes]
|
|
460
493
|
|
|
461
494
|
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
|
|
@@ -466,10 +499,35 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
466
499
|
ref_doc_id (str): The doc_id of the document to delete.
|
|
467
500
|
|
|
468
501
|
"""
|
|
469
|
-
|
|
502
|
+
asyncio.get_event_loop().run_until_complete(
|
|
503
|
+
self.adelete(ref_doc_id, **delete_kwargs)
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
async def adelete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
|
|
507
|
+
"""
|
|
508
|
+
Async delete nodes using with ref_doc_id.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
ref_doc_id (str): The doc_id of the document to delete.
|
|
512
|
+
|
|
513
|
+
"""
|
|
514
|
+
await self._client.delete_doc_id(ref_doc_id)
|
|
470
515
|
|
|
471
516
|
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
|
|
472
|
-
"""
|
|
517
|
+
"""
|
|
518
|
+
Query index for top k most similar nodes.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
query (VectorStoreQuery): Store query object.
|
|
522
|
+
|
|
523
|
+
"""
|
|
524
|
+
return asyncio.get_event_loop().run_until_complete(self.aquery(query, **kwargs))
|
|
525
|
+
|
|
526
|
+
async def aquery(
|
|
527
|
+
self, query: VectorStoreQuery, **kwargs: Any
|
|
528
|
+
) -> VectorStoreQueryResult:
|
|
529
|
+
"""
|
|
530
|
+
Async query index for top k most similar nodes.
|
|
473
531
|
|
|
474
532
|
Args:
|
|
475
533
|
query (VectorStoreQuery): Store query object.
|
|
@@ -477,7 +535,7 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
477
535
|
"""
|
|
478
536
|
query_embedding = cast(List[float], query.query_embedding)
|
|
479
537
|
|
|
480
|
-
return self._client.
|
|
538
|
+
return await self._client.aquery(
|
|
481
539
|
query.mode,
|
|
482
540
|
query.query_str,
|
|
483
541
|
query_embedding,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama-index-vector-stores-opensearch
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: llama-index vector_stores opensearch integration
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Your Name
|
|
@@ -11,9 +11,8 @@ Classifier: Programming Language :: Python :: 3
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.9
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
14
|
Requires-Dist: llama-index-core (>=0.10.1,<0.11.0)
|
|
16
|
-
Requires-Dist: opensearch-py (>=2.4.2,<3.0.0)
|
|
15
|
+
Requires-Dist: opensearch-py[async] (>=2.4.2,<3.0.0)
|
|
17
16
|
Description-Content-Type: text/markdown
|
|
18
17
|
|
|
19
18
|
# LlamaIndex Vector_Stores Integration: Opensearch
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
llama_index/vector_stores/opensearch/__init__.py,sha256=U1_XAkZb6zcskOk4s10NB8Tjs9AZRGdRQLzOGpbWdBA,176
|
|
2
|
+
llama_index/vector_stores/opensearch/base.py,sha256=IHHfsgsjY_9JKroivl-cFVyS74kQnzIIJCDMqEP7tfk,16877
|
|
3
|
+
llama_index_vector_stores_opensearch-0.1.5.dist-info/METADATA,sha256=fsdPGHC07vUnALJftvDl8ZIFTEnQZxwZo1B1lGStLA0,677
|
|
4
|
+
llama_index_vector_stores_opensearch-0.1.5.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
5
|
+
llama_index_vector_stores_opensearch-0.1.5.dist-info/RECORD,,
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
llama_index/vector_stores/opensearch/__init__.py,sha256=U1_XAkZb6zcskOk4s10NB8Tjs9AZRGdRQLzOGpbWdBA,176
|
|
2
|
-
llama_index/vector_stores/opensearch/base.py,sha256=1w7_R3EYOzVRuCksbsun66oYxVpPVd-OTKMqN5u_GHM,15365
|
|
3
|
-
llama_index_vector_stores_opensearch-0.1.4.dist-info/METADATA,sha256=8pKqKNIo0CxoDrQW1aTNEGwd3HF7zL_6ZDmCD2U2gCo,721
|
|
4
|
-
llama_index_vector_stores_opensearch-0.1.4.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
5
|
-
llama_index_vector_stores_opensearch-0.1.4.dist-info/RECORD,,
|