llama-index-vector-stores-opensearch 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.
- {llama_index_vector_stores_opensearch-0.1.3 → llama_index_vector_stores_opensearch-0.1.5}/PKG-INFO +2 -3
- {llama_index_vector_stores_opensearch-0.1.3 → llama_index_vector_stores_opensearch-0.1.5}/llama_index/vector_stores/opensearch/base.py +100 -38
- {llama_index_vector_stores_opensearch-0.1.3 → llama_index_vector_stores_opensearch-0.1.5}/pyproject.toml +5 -3
- {llama_index_vector_stores_opensearch-0.1.3 → llama_index_vector_stores_opensearch-0.1.5}/README.md +0 -0
- {llama_index_vector_stores_opensearch-0.1.3 → llama_index_vector_stores_opensearch-0.1.5}/llama_index/vector_stores/opensearch/__init__.py +0 -0
{llama_index_vector_stores_opensearch-0.1.3 → llama_index_vector_stores_opensearch-0.1.5}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama-index-vector-stores-opensearch
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: llama-index vector_stores opensearch integration
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Your Name
|
|
@@ -11,9 +11,8 @@ Classifier: Programming Language :: Python :: 3
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.9
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
14
|
Requires-Dist: llama-index-core (>=0.10.1,<0.11.0)
|
|
16
|
-
Requires-Dist: opensearch-py (>=2.4.2,<3.0.0)
|
|
15
|
+
Requires-Dist: opensearch-py[async] (>=2.4.2,<3.0.0)
|
|
17
16
|
Description-Content-Type: text/markdown
|
|
18
17
|
|
|
19
18
|
# LlamaIndex Vector_Stores Integration: Opensearch
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
"""Elasticsearch/Opensearch vector store."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
2
4
|
import json
|
|
3
5
|
import uuid
|
|
4
6
|
from typing import Any, Dict, Iterable, List, Optional, Union, cast
|
|
5
7
|
|
|
8
|
+
import nest_asyncio
|
|
9
|
+
|
|
10
|
+
from llama_index.core.bridge.pydantic import PrivateAttr
|
|
11
|
+
|
|
6
12
|
from llama_index.core.schema import BaseNode, MetadataMode, TextNode
|
|
7
13
|
from llama_index.core.vector_stores.types import (
|
|
8
14
|
MetadataFilters,
|
|
9
|
-
|
|
15
|
+
BasePydanticVectorStore,
|
|
10
16
|
VectorStoreQuery,
|
|
11
17
|
VectorStoreQueryMode,
|
|
12
18
|
VectorStoreQueryResult,
|
|
@@ -15,9 +21,9 @@ from llama_index.core.vector_stores.utils import (
|
|
|
15
21
|
metadata_dict_to_node,
|
|
16
22
|
node_to_metadata_dict,
|
|
17
23
|
)
|
|
18
|
-
from opensearchpy import
|
|
24
|
+
from opensearchpy import AsyncOpenSearch
|
|
19
25
|
from opensearchpy.exceptions import NotFoundError
|
|
20
|
-
from opensearchpy.helpers import
|
|
26
|
+
from opensearchpy.helpers import async_bulk
|
|
21
27
|
|
|
22
28
|
IMPORT_OPENSEARCH_PY_ERROR = (
|
|
23
29
|
"Could not import OpenSearch. Please install it with `pip install opensearch-py`."
|
|
@@ -28,14 +34,14 @@ INVALID_HYBRID_QUERY_ERROR = (
|
|
|
28
34
|
MATCH_ALL_QUERY = {"match_all": {}} # type: Dict
|
|
29
35
|
|
|
30
36
|
|
|
31
|
-
def
|
|
37
|
+
def _import_async_opensearch() -> Any:
|
|
32
38
|
"""Import OpenSearch if available, otherwise raise error."""
|
|
33
|
-
return
|
|
39
|
+
return AsyncOpenSearch
|
|
34
40
|
|
|
35
41
|
|
|
36
|
-
def
|
|
42
|
+
def _import_async_bulk() -> Any:
|
|
37
43
|
"""Import bulk if available, otherwise raise error."""
|
|
38
|
-
return
|
|
44
|
+
return async_bulk
|
|
39
45
|
|
|
40
46
|
|
|
41
47
|
def _import_not_found_error() -> Any:
|
|
@@ -43,21 +49,21 @@ def _import_not_found_error() -> Any:
|
|
|
43
49
|
return NotFoundError
|
|
44
50
|
|
|
45
51
|
|
|
46
|
-
def
|
|
47
|
-
"""Get
|
|
52
|
+
def _get_async_opensearch_client(opensearch_url: str, **kwargs: Any) -> Any:
|
|
53
|
+
"""Get AsyncOpenSearch client from the opensearch_url, otherwise raise error."""
|
|
48
54
|
try:
|
|
49
|
-
opensearch =
|
|
55
|
+
opensearch = _import_async_opensearch()
|
|
50
56
|
client = opensearch(opensearch_url, **kwargs)
|
|
51
57
|
|
|
52
58
|
except ValueError as e:
|
|
53
59
|
raise ValueError(
|
|
54
|
-
f"
|
|
60
|
+
f"AsyncOpenSearch client string provided is not in proper format. "
|
|
55
61
|
f"Got error: {e} "
|
|
56
62
|
)
|
|
57
63
|
return client
|
|
58
64
|
|
|
59
65
|
|
|
60
|
-
def _bulk_ingest_embeddings(
|
|
66
|
+
async def _bulk_ingest_embeddings(
|
|
61
67
|
client: Any,
|
|
62
68
|
index_name: str,
|
|
63
69
|
embeddings: List[List[float]],
|
|
@@ -70,20 +76,20 @@ def _bulk_ingest_embeddings(
|
|
|
70
76
|
max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,
|
|
71
77
|
is_aoss: bool = False,
|
|
72
78
|
) -> List[str]:
|
|
73
|
-
"""Bulk Ingest Embeddings into given index."""
|
|
79
|
+
"""Async Bulk Ingest Embeddings into given index."""
|
|
74
80
|
if not mapping:
|
|
75
81
|
mapping = {}
|
|
76
82
|
|
|
77
|
-
|
|
83
|
+
async_bulk = _import_async_bulk()
|
|
78
84
|
not_found_error = _import_not_found_error()
|
|
79
85
|
requests = []
|
|
80
86
|
return_ids = []
|
|
81
87
|
mapping = mapping
|
|
82
88
|
|
|
83
89
|
try:
|
|
84
|
-
client.indices.get(index=index_name)
|
|
90
|
+
await client.indices.get(index=index_name)
|
|
85
91
|
except not_found_error:
|
|
86
|
-
client.indices.create(index=index_name, body=mapping)
|
|
92
|
+
await client.indices.create(index=index_name, body=mapping)
|
|
87
93
|
|
|
88
94
|
for i, text in enumerate(texts):
|
|
89
95
|
metadata = metadatas[i] if metadatas else {}
|
|
@@ -101,9 +107,9 @@ def _bulk_ingest_embeddings(
|
|
|
101
107
|
request["_id"] = _id
|
|
102
108
|
requests.append(request)
|
|
103
109
|
return_ids.append(_id)
|
|
104
|
-
|
|
110
|
+
await async_bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
|
|
105
111
|
if not is_aoss:
|
|
106
|
-
client.indices.refresh(index=index_name)
|
|
112
|
+
await client.indices.refresh(index=index_name)
|
|
107
113
|
return return_ids
|
|
108
114
|
|
|
109
115
|
|
|
@@ -134,7 +140,8 @@ def _knn_search_query(
|
|
|
134
140
|
k: int,
|
|
135
141
|
filters: Optional[MetadataFilters] = None,
|
|
136
142
|
) -> Dict:
|
|
137
|
-
"""
|
|
143
|
+
"""
|
|
144
|
+
Do knn search.
|
|
138
145
|
|
|
139
146
|
If there are no filters do approx-knn search.
|
|
140
147
|
If there are (pre)-filters, do an exhaustive exact knn search using 'painless
|
|
@@ -242,7 +249,8 @@ def _is_aoss_enabled(http_auth: Any) -> bool:
|
|
|
242
249
|
|
|
243
250
|
|
|
244
251
|
class OpensearchVectorClient:
|
|
245
|
-
"""
|
|
252
|
+
"""
|
|
253
|
+
Object encapsulating an Opensearch index that has vector search enabled.
|
|
246
254
|
|
|
247
255
|
If the index does not yet exist, it is created during init.
|
|
248
256
|
Therefore, the underlying index is assumed to either:
|
|
@@ -310,15 +318,22 @@ class OpensearchVectorClient:
|
|
|
310
318
|
}
|
|
311
319
|
},
|
|
312
320
|
}
|
|
313
|
-
self._os_client =
|
|
321
|
+
self._os_client = _get_async_opensearch_client(self._endpoint, **kwargs)
|
|
314
322
|
not_found_error = _import_not_found_error()
|
|
323
|
+
event_loop = asyncio.get_event_loop()
|
|
315
324
|
try:
|
|
316
|
-
|
|
325
|
+
event_loop.run_until_complete(
|
|
326
|
+
self._os_client.indices.get(index=self._index)
|
|
327
|
+
)
|
|
317
328
|
except not_found_error:
|
|
318
|
-
|
|
319
|
-
|
|
329
|
+
event_loop.run_until_complete(
|
|
330
|
+
self._os_client.indices.create(index=self._index, body=idx_conf)
|
|
331
|
+
)
|
|
332
|
+
event_loop.run_until_complete(
|
|
333
|
+
self._os_client.indices.refresh(index=self._index)
|
|
334
|
+
)
|
|
320
335
|
|
|
321
|
-
def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
|
|
336
|
+
async def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
|
|
322
337
|
"""Store results in the index."""
|
|
323
338
|
embeddings: List[List[float]] = []
|
|
324
339
|
texts: List[str] = []
|
|
@@ -330,7 +345,7 @@ class OpensearchVectorClient:
|
|
|
330
345
|
texts.append(node.get_content(metadata_mode=MetadataMode.NONE))
|
|
331
346
|
metadatas.append(node_to_metadata_dict(node, remove_text=True))
|
|
332
347
|
|
|
333
|
-
return _bulk_ingest_embeddings(
|
|
348
|
+
return await _bulk_ingest_embeddings(
|
|
334
349
|
self._os_client,
|
|
335
350
|
self._index,
|
|
336
351
|
embeddings,
|
|
@@ -344,15 +359,16 @@ class OpensearchVectorClient:
|
|
|
344
359
|
is_aoss=self.is_aoss,
|
|
345
360
|
)
|
|
346
361
|
|
|
347
|
-
def delete_doc_id(self, doc_id: str) -> None:
|
|
348
|
-
"""
|
|
362
|
+
async def delete_doc_id(self, doc_id: str) -> None:
|
|
363
|
+
"""
|
|
364
|
+
Delete a document.
|
|
349
365
|
|
|
350
366
|
Args:
|
|
351
367
|
doc_id (str): document id
|
|
352
368
|
"""
|
|
353
|
-
self._os_client.delete(index=self._index, id=doc_id)
|
|
369
|
+
await self._os_client.delete(index=self._index, id=doc_id)
|
|
354
370
|
|
|
355
|
-
def
|
|
371
|
+
async def aquery(
|
|
356
372
|
self,
|
|
357
373
|
query_mode: VectorStoreQueryMode,
|
|
358
374
|
query_str: Optional[str],
|
|
@@ -378,7 +394,7 @@ class OpensearchVectorClient:
|
|
|
378
394
|
)
|
|
379
395
|
params = None
|
|
380
396
|
|
|
381
|
-
res = self._os_client.search(
|
|
397
|
+
res = await self._os_client.search(
|
|
382
398
|
index=self._index, body=search_query, params=params
|
|
383
399
|
)
|
|
384
400
|
nodes = []
|
|
@@ -418,8 +434,9 @@ class OpensearchVectorClient:
|
|
|
418
434
|
return VectorStoreQueryResult(nodes=nodes, ids=ids, similarities=scores)
|
|
419
435
|
|
|
420
436
|
|
|
421
|
-
class OpensearchVectorStore(
|
|
422
|
-
"""
|
|
437
|
+
class OpensearchVectorStore(BasePydanticVectorStore):
|
|
438
|
+
"""
|
|
439
|
+
Elasticsearch/Opensearch vector store.
|
|
423
440
|
|
|
424
441
|
Args:
|
|
425
442
|
client (OpensearchVectorClient): Vector index client to use
|
|
@@ -427,12 +444,15 @@ class OpensearchVectorStore(VectorStore):
|
|
|
427
444
|
"""
|
|
428
445
|
|
|
429
446
|
stores_text: bool = True
|
|
447
|
+
_client: OpensearchVectorClient = PrivateAttr(default=None)
|
|
430
448
|
|
|
431
449
|
def __init__(
|
|
432
450
|
self,
|
|
433
451
|
client: OpensearchVectorClient,
|
|
434
452
|
) -> None:
|
|
435
453
|
"""Initialize params."""
|
|
454
|
+
super().__init__()
|
|
455
|
+
nest_asyncio.apply()
|
|
436
456
|
self._client = client
|
|
437
457
|
|
|
438
458
|
@property
|
|
@@ -445,13 +465,30 @@ class OpensearchVectorStore(VectorStore):
|
|
|
445
465
|
nodes: List[BaseNode],
|
|
446
466
|
**add_kwargs: Any,
|
|
447
467
|
) -> List[str]:
|
|
448
|
-
"""
|
|
468
|
+
"""
|
|
469
|
+
Add nodes to index.
|
|
470
|
+
|
|
471
|
+
Args:
|
|
472
|
+
nodes: List[BaseNode]: list of nodes with embeddings.
|
|
473
|
+
|
|
474
|
+
"""
|
|
475
|
+
return asyncio.get_event_loop().run_until_complete(
|
|
476
|
+
self.async_add(nodes, **add_kwargs)
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
async def async_add(
|
|
480
|
+
self,
|
|
481
|
+
nodes: List[BaseNode],
|
|
482
|
+
**add_kwargs: Any,
|
|
483
|
+
) -> List[str]:
|
|
484
|
+
"""
|
|
485
|
+
Async add nodes to index.
|
|
449
486
|
|
|
450
487
|
Args:
|
|
451
488
|
nodes: List[BaseNode]: list of nodes with embeddings.
|
|
452
489
|
|
|
453
490
|
"""
|
|
454
|
-
self._client.index_results(nodes)
|
|
491
|
+
await self._client.index_results(nodes)
|
|
455
492
|
return [result.node_id for result in nodes]
|
|
456
493
|
|
|
457
494
|
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
|
|
@@ -462,10 +499,35 @@ class OpensearchVectorStore(VectorStore):
|
|
|
462
499
|
ref_doc_id (str): The doc_id of the document to delete.
|
|
463
500
|
|
|
464
501
|
"""
|
|
465
|
-
|
|
502
|
+
asyncio.get_event_loop().run_until_complete(
|
|
503
|
+
self.adelete(ref_doc_id, **delete_kwargs)
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
async def adelete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
|
|
507
|
+
"""
|
|
508
|
+
Async delete nodes using with ref_doc_id.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
ref_doc_id (str): The doc_id of the document to delete.
|
|
512
|
+
|
|
513
|
+
"""
|
|
514
|
+
await self._client.delete_doc_id(ref_doc_id)
|
|
466
515
|
|
|
467
516
|
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
|
|
468
|
-
"""
|
|
517
|
+
"""
|
|
518
|
+
Query index for top k most similar nodes.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
query (VectorStoreQuery): Store query object.
|
|
522
|
+
|
|
523
|
+
"""
|
|
524
|
+
return asyncio.get_event_loop().run_until_complete(self.aquery(query, **kwargs))
|
|
525
|
+
|
|
526
|
+
async def aquery(
|
|
527
|
+
self, query: VectorStoreQuery, **kwargs: Any
|
|
528
|
+
) -> VectorStoreQueryResult:
|
|
529
|
+
"""
|
|
530
|
+
Async query index for top k most similar nodes.
|
|
469
531
|
|
|
470
532
|
Args:
|
|
471
533
|
query (VectorStoreQuery): Store query object.
|
|
@@ -473,7 +535,7 @@ class OpensearchVectorStore(VectorStore):
|
|
|
473
535
|
"""
|
|
474
536
|
query_embedding = cast(List[float], query.query_embedding)
|
|
475
537
|
|
|
476
|
-
return self._client.
|
|
538
|
+
return await self._client.aquery(
|
|
477
539
|
query.mode,
|
|
478
540
|
query.query_str,
|
|
479
541
|
query_embedding,
|
|
@@ -12,7 +12,6 @@ contains_example = false
|
|
|
12
12
|
import_path = "llama_index.vector_stores.opensearch"
|
|
13
13
|
|
|
14
14
|
[tool.llamahub.class_authors]
|
|
15
|
-
OpensearchVectorClient = "llama-index"
|
|
16
15
|
OpensearchVectorStore = "llama-index"
|
|
17
16
|
|
|
18
17
|
[tool.mypy]
|
|
@@ -28,12 +27,15 @@ exclude = ["**/BUILD"]
|
|
|
28
27
|
license = "MIT"
|
|
29
28
|
name = "llama-index-vector-stores-opensearch"
|
|
30
29
|
readme = "README.md"
|
|
31
|
-
version = "0.1.
|
|
30
|
+
version = "0.1.5"
|
|
32
31
|
|
|
33
32
|
[tool.poetry.dependencies]
|
|
34
33
|
python = ">=3.8.1,<4.0"
|
|
35
34
|
llama-index-core = "^0.10.1"
|
|
36
|
-
|
|
35
|
+
|
|
36
|
+
[tool.poetry.dependencies.opensearch-py]
|
|
37
|
+
extras = ["async"]
|
|
38
|
+
version = "^2.4.2"
|
|
37
39
|
|
|
38
40
|
[tool.poetry.group.dev.dependencies]
|
|
39
41
|
ipython = "8.10.0"
|
{llama_index_vector_stores_opensearch-0.1.3 → llama_index_vector_stores_opensearch-0.1.5}/README.md
RENAMED
|
File without changes
|