llama-index-vector-stores-opensearch 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.
- {llama_index_vector_stores_opensearch-0.2.1 → llama_index_vector_stores_opensearch-0.3.0}/PKG-INFO +1 -1
- {llama_index_vector_stores_opensearch-0.2.1 → llama_index_vector_stores_opensearch-0.3.0}/llama_index/vector_stores/opensearch/base.py +317 -70
- {llama_index_vector_stores_opensearch-0.2.1 → llama_index_vector_stores_opensearch-0.3.0}/pyproject.toml +1 -1
- {llama_index_vector_stores_opensearch-0.2.1 → llama_index_vector_stores_opensearch-0.3.0}/README.md +0 -0
- {llama_index_vector_stores_opensearch-0.2.1 → llama_index_vector_stores_opensearch-0.3.0}/llama_index/py.typed +0 -0
- {llama_index_vector_stores_opensearch-0.2.1 → llama_index_vector_stores_opensearch-0.3.0}/llama_index/vector_stores/opensearch/__init__.py +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Elasticsearch/Opensearch vector store."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import uuid
|
|
5
4
|
from datetime import datetime
|
|
6
5
|
from typing import Any, Dict, Iterable, List, Optional, Union, cast
|
|
@@ -22,14 +21,12 @@ from llama_index.core.vector_stores.utils import (
|
|
|
22
21
|
metadata_dict_to_node,
|
|
23
22
|
node_to_metadata_dict,
|
|
24
23
|
)
|
|
25
|
-
from opensearchpy import AsyncOpenSearch
|
|
26
24
|
from opensearchpy.client import Client as OSClient
|
|
27
|
-
from opensearchpy.exceptions import NotFoundError
|
|
28
|
-
from opensearchpy.helpers import async_bulk
|
|
29
25
|
|
|
30
26
|
IMPORT_OPENSEARCH_PY_ERROR = (
|
|
31
27
|
"Could not import OpenSearch. Please install it with `pip install opensearch-py`."
|
|
32
28
|
)
|
|
29
|
+
IMPORT_ASYNC_OPENSEARCH_PY_ERROR = "Could not import AsyncOpenSearch. Please install it with `pip install opensearch-py`."
|
|
33
30
|
INVALID_HYBRID_QUERY_ERROR = (
|
|
34
31
|
"Please specify the lexical_query and search_pipeline for hybrid search."
|
|
35
32
|
)
|
|
@@ -54,8 +51,11 @@ class OpensearchVectorClient:
|
|
|
54
51
|
method (Optional[dict]): Opensearch "method" JSON obj for configuring
|
|
55
52
|
the KNN index.
|
|
56
53
|
This includes engine, metric, and other config params. Defaults to:
|
|
57
|
-
{"name": "hnsw", "space_type": "l2", "engine": "
|
|
54
|
+
{"name": "hnsw", "space_type": "l2", "engine": "nmslib",
|
|
58
55
|
"parameters": {"ef_construction": 256, "m": 48}}
|
|
56
|
+
settings: Optional[dict]: Settings for the Opensearch index creation. Defaults to:
|
|
57
|
+
{"index": {"knn": True, "knn.algo_param.ef_search": 100}}
|
|
58
|
+
space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
|
|
59
59
|
**kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.
|
|
60
60
|
|
|
61
61
|
"""
|
|
@@ -68,7 +68,9 @@ class OpensearchVectorClient:
|
|
|
68
68
|
embedding_field: str = "embedding",
|
|
69
69
|
text_field: str = "content",
|
|
70
70
|
method: Optional[dict] = None,
|
|
71
|
+
settings: Optional[dict] = None,
|
|
71
72
|
engine: Optional[str] = "nmslib",
|
|
73
|
+
space_type: Optional[str] = "l2",
|
|
72
74
|
max_chunk_bytes: int = 1 * 1024 * 1024,
|
|
73
75
|
search_pipeline: Optional[str] = None,
|
|
74
76
|
os_client: Optional[OSClient] = None,
|
|
@@ -82,6 +84,8 @@ class OpensearchVectorClient:
|
|
|
82
84
|
"engine": engine,
|
|
83
85
|
"parameters": {"ef_construction": 256, "m": 48},
|
|
84
86
|
}
|
|
87
|
+
if settings is None:
|
|
88
|
+
settings = {"index": {"knn": True, "knn.algo_param.ef_search": 100}}
|
|
85
89
|
if embedding_field is None:
|
|
86
90
|
embedding_field = "embedding"
|
|
87
91
|
self._embedding_field = embedding_field
|
|
@@ -94,10 +98,11 @@ class OpensearchVectorClient:
|
|
|
94
98
|
|
|
95
99
|
self._search_pipeline = search_pipeline
|
|
96
100
|
http_auth = kwargs.get("http_auth")
|
|
101
|
+
self.space_type = space_type
|
|
97
102
|
self.is_aoss = self._is_aoss_enabled(http_auth=http_auth)
|
|
98
103
|
# initialize mapping
|
|
99
104
|
idx_conf = {
|
|
100
|
-
"settings":
|
|
105
|
+
"settings": settings,
|
|
101
106
|
"mappings": {
|
|
102
107
|
"properties": {
|
|
103
108
|
embedding_field: {
|
|
@@ -108,36 +113,72 @@ class OpensearchVectorClient:
|
|
|
108
113
|
}
|
|
109
114
|
},
|
|
110
115
|
}
|
|
111
|
-
self._os_client = os_client or self.
|
|
116
|
+
self._os_client = os_client or self._get_opensearch_client(
|
|
117
|
+
self._endpoint, **kwargs
|
|
118
|
+
)
|
|
119
|
+
self._os_async_client = self._get_async_opensearch_client(
|
|
112
120
|
self._endpoint, **kwargs
|
|
113
121
|
)
|
|
114
122
|
not_found_error = self._import_not_found_error()
|
|
115
123
|
|
|
116
|
-
event_loop = asyncio.get_event_loop()
|
|
117
124
|
try:
|
|
118
|
-
|
|
119
|
-
self._os_client.indices.get(index=self._index)
|
|
120
|
-
)
|
|
125
|
+
self._os_client.indices.get(index=self._index)
|
|
121
126
|
except not_found_error:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
)
|
|
125
|
-
event_loop.run_until_complete(
|
|
126
|
-
self._os_client.indices.refresh(index=self._index)
|
|
127
|
-
)
|
|
127
|
+
self._os_client.indices.create(index=self._index, body=idx_conf)
|
|
128
|
+
self._os_client.indices.refresh(index=self._index)
|
|
128
129
|
|
|
129
|
-
def
|
|
130
|
+
def _import_opensearch(self) -> Any:
|
|
130
131
|
"""Import OpenSearch if available, otherwise raise error."""
|
|
132
|
+
try:
|
|
133
|
+
from opensearchpy import OpenSearch
|
|
134
|
+
except ImportError:
|
|
135
|
+
raise ImportError(IMPORT_OPENSEARCH_PY_ERROR)
|
|
136
|
+
return OpenSearch
|
|
137
|
+
|
|
138
|
+
def _import_async_opensearch(self) -> Any:
|
|
139
|
+
"""Import AsyncOpenSearch if available, otherwise raise error."""
|
|
140
|
+
try:
|
|
141
|
+
from opensearchpy import AsyncOpenSearch
|
|
142
|
+
except ImportError:
|
|
143
|
+
raise ImportError(IMPORT_ASYNC_OPENSEARCH_PY_ERROR)
|
|
131
144
|
return AsyncOpenSearch
|
|
132
145
|
|
|
133
|
-
def
|
|
146
|
+
def _import_bulk(self) -> Any:
|
|
134
147
|
"""Import bulk if available, otherwise raise error."""
|
|
148
|
+
try:
|
|
149
|
+
from opensearchpy.helpers import bulk
|
|
150
|
+
except ImportError:
|
|
151
|
+
raise ImportError(IMPORT_OPENSEARCH_PY_ERROR)
|
|
152
|
+
return bulk
|
|
153
|
+
|
|
154
|
+
def _import_async_bulk(self) -> Any:
|
|
155
|
+
"""Import async_bulk if available, otherwise raise error."""
|
|
156
|
+
try:
|
|
157
|
+
from opensearchpy.helpers import async_bulk
|
|
158
|
+
except ImportError:
|
|
159
|
+
raise ImportError(IMPORT_ASYNC_OPENSEARCH_PY_ERROR)
|
|
135
160
|
return async_bulk
|
|
136
161
|
|
|
137
162
|
def _import_not_found_error(self) -> Any:
|
|
138
163
|
"""Import not found error if available, otherwise raise error."""
|
|
164
|
+
try:
|
|
165
|
+
from opensearchpy.exceptions import NotFoundError
|
|
166
|
+
except ImportError:
|
|
167
|
+
raise ImportError(IMPORT_OPENSEARCH_PY_ERROR)
|
|
139
168
|
return NotFoundError
|
|
140
169
|
|
|
170
|
+
def _get_opensearch_client(self, opensearch_url: str, **kwargs: Any) -> Any:
|
|
171
|
+
"""Get OpenSearch client from the opensearch_url, otherwise raise error."""
|
|
172
|
+
try:
|
|
173
|
+
opensearch = self._import_opensearch()
|
|
174
|
+
client = opensearch(opensearch_url, **kwargs)
|
|
175
|
+
except ValueError as e:
|
|
176
|
+
raise ImportError(
|
|
177
|
+
f"OpenSearch client string provided is not in proper format. "
|
|
178
|
+
f"Got error: {e} "
|
|
179
|
+
)
|
|
180
|
+
return client
|
|
181
|
+
|
|
141
182
|
def _get_async_opensearch_client(self, opensearch_url: str, **kwargs: Any) -> Any:
|
|
142
183
|
"""Get AsyncOpenSearch client from the opensearch_url, otherwise raise error."""
|
|
143
184
|
try:
|
|
@@ -151,7 +192,58 @@ class OpensearchVectorClient:
|
|
|
151
192
|
)
|
|
152
193
|
return client
|
|
153
194
|
|
|
154
|
-
|
|
195
|
+
def _bulk_ingest_embeddings(
|
|
196
|
+
self,
|
|
197
|
+
client: Any,
|
|
198
|
+
index_name: str,
|
|
199
|
+
embeddings: List[List[float]],
|
|
200
|
+
texts: Iterable[str],
|
|
201
|
+
metadatas: Optional[List[dict]] = None,
|
|
202
|
+
ids: Optional[List[str]] = None,
|
|
203
|
+
vector_field: str = "embedding",
|
|
204
|
+
text_field: str = "content",
|
|
205
|
+
mapping: Optional[Dict] = None,
|
|
206
|
+
max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,
|
|
207
|
+
is_aoss: bool = False,
|
|
208
|
+
) -> List[str]:
|
|
209
|
+
"""Bulk Ingest Embeddings into given index."""
|
|
210
|
+
if not mapping:
|
|
211
|
+
mapping = {}
|
|
212
|
+
|
|
213
|
+
bulk = self._import_bulk()
|
|
214
|
+
not_found_error = self._import_not_found_error()
|
|
215
|
+
requests = []
|
|
216
|
+
return_ids = []
|
|
217
|
+
|
|
218
|
+
try:
|
|
219
|
+
client.indices.get(index=index_name)
|
|
220
|
+
except not_found_error:
|
|
221
|
+
client.indices.create(index=index_name, body=mapping)
|
|
222
|
+
|
|
223
|
+
for i, text in enumerate(texts):
|
|
224
|
+
metadata = metadatas[i] if metadatas else {}
|
|
225
|
+
_id = ids[i] if ids else str(uuid.uuid4())
|
|
226
|
+
request = {
|
|
227
|
+
"_op_type": "index",
|
|
228
|
+
"_index": index_name,
|
|
229
|
+
vector_field: embeddings[i],
|
|
230
|
+
text_field: text,
|
|
231
|
+
"metadata": metadata,
|
|
232
|
+
}
|
|
233
|
+
if is_aoss:
|
|
234
|
+
request["id"] = _id
|
|
235
|
+
else:
|
|
236
|
+
request["_id"] = _id
|
|
237
|
+
requests.append(request)
|
|
238
|
+
return_ids.append(_id)
|
|
239
|
+
|
|
240
|
+
bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
|
|
241
|
+
if not is_aoss:
|
|
242
|
+
client.indices.refresh(index=index_name)
|
|
243
|
+
|
|
244
|
+
return return_ids
|
|
245
|
+
|
|
246
|
+
async def _abulk_ingest_embeddings(
|
|
155
247
|
self,
|
|
156
248
|
client: Any,
|
|
157
249
|
index_name: str,
|
|
@@ -173,7 +265,6 @@ class OpensearchVectorClient:
|
|
|
173
265
|
not_found_error = self._import_not_found_error()
|
|
174
266
|
requests = []
|
|
175
267
|
return_ids = []
|
|
176
|
-
mapping = mapping
|
|
177
268
|
|
|
178
269
|
try:
|
|
179
270
|
await client.indices.get(index=index_name)
|
|
@@ -196,9 +287,11 @@ class OpensearchVectorClient:
|
|
|
196
287
|
request["_id"] = _id
|
|
197
288
|
requests.append(request)
|
|
198
289
|
return_ids.append(_id)
|
|
290
|
+
|
|
199
291
|
await async_bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
|
|
200
292
|
if not is_aoss:
|
|
201
293
|
await client.indices.refresh(index=index_name)
|
|
294
|
+
|
|
202
295
|
return return_ids
|
|
203
296
|
|
|
204
297
|
def _default_approximate_search_query(
|
|
@@ -309,9 +402,11 @@ class OpensearchVectorClient:
|
|
|
309
402
|
|
|
310
403
|
If there are no filters do approx-knn search.
|
|
311
404
|
If there are (pre)-filters, do an exhaustive exact knn search using 'painless
|
|
312
|
-
scripting'.
|
|
405
|
+
scripting' if the version of Opensearch supports it, otherwise uses knn_score scripting score.
|
|
313
406
|
|
|
314
|
-
Note
|
|
407
|
+
Note:
|
|
408
|
+
-AWS Opensearch Serverless does not support the painless scripting functionality at this time according to AWS.
|
|
409
|
+
-Also note that approximate knn search does not support pre-filtering.
|
|
315
410
|
|
|
316
411
|
Args:
|
|
317
412
|
query_embedding: Vector embedding to query.
|
|
@@ -328,16 +423,25 @@ class OpensearchVectorClient:
|
|
|
328
423
|
search_query = self._default_approximate_search_query(
|
|
329
424
|
query_embedding, k, vector_field=embedding_field
|
|
330
425
|
)
|
|
426
|
+
elif self.is_aoss:
|
|
427
|
+
# if is_aoss is set we are using Opensearch Serverless AWS offering which cannot use
|
|
428
|
+
# painless scripting so default scoring script returned will be just normal knn_score script
|
|
429
|
+
search_query = self._default_scoring_script_query(
|
|
430
|
+
query_embedding,
|
|
431
|
+
k,
|
|
432
|
+
space_type=self.space_type,
|
|
433
|
+
pre_filter={"bool": {"filter": pre_filter}},
|
|
434
|
+
vector_field=embedding_field,
|
|
435
|
+
)
|
|
331
436
|
else:
|
|
332
437
|
# https://opensearch.org/docs/latest/search-plugins/knn/painless-functions/
|
|
333
|
-
search_query = self.
|
|
438
|
+
search_query = self._default_scoring_script_query(
|
|
334
439
|
query_embedding,
|
|
335
440
|
k,
|
|
336
441
|
space_type="l2Squared",
|
|
337
442
|
pre_filter={"bool": {"filter": pre_filter}},
|
|
338
443
|
vector_field=embedding_field,
|
|
339
444
|
)
|
|
340
|
-
|
|
341
445
|
return search_query
|
|
342
446
|
|
|
343
447
|
def _hybrid_search_query(
|
|
@@ -382,7 +486,9 @@ class OpensearchVectorClient:
|
|
|
382
486
|
def __get_painless_scripting_source(
|
|
383
487
|
self, space_type: str, vector_field: str = "embedding"
|
|
384
488
|
) -> str:
|
|
385
|
-
"""For Painless Scripting, it returns the script source based on space type.
|
|
489
|
+
"""For Painless Scripting, it returns the script source based on space type.
|
|
490
|
+
This does not work with Opensearch Serverless currently.
|
|
491
|
+
"""
|
|
386
492
|
source_value = (
|
|
387
493
|
f"(1.0 + {space_type}(params.query_value, doc['{vector_field}']))"
|
|
388
494
|
)
|
|
@@ -391,7 +497,29 @@ class OpensearchVectorClient:
|
|
|
391
497
|
else:
|
|
392
498
|
return f"1/{source_value}"
|
|
393
499
|
|
|
394
|
-
def
|
|
500
|
+
def _get_knn_scoring_script(self, space_type, vector_field, query_vector):
|
|
501
|
+
"""Default scoring script that will work with AWS Opensearch Serverless."""
|
|
502
|
+
return {
|
|
503
|
+
"source": "knn_score",
|
|
504
|
+
"lang": "knn",
|
|
505
|
+
"params": {
|
|
506
|
+
"field": vector_field,
|
|
507
|
+
"query_value": query_vector,
|
|
508
|
+
"space_type": space_type,
|
|
509
|
+
},
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
def _get_painless_scoring_script(self, space_type, vector_field, query_vector):
|
|
513
|
+
source = self.__get_painless_scripting_source(space_type, vector_field)
|
|
514
|
+
return {
|
|
515
|
+
"source": source,
|
|
516
|
+
"params": {
|
|
517
|
+
"field": vector_field,
|
|
518
|
+
"query_value": query_vector,
|
|
519
|
+
},
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
def _default_scoring_script_query(
|
|
395
523
|
self,
|
|
396
524
|
query_vector: List[float],
|
|
397
525
|
k: int = 4,
|
|
@@ -399,23 +527,31 @@ class OpensearchVectorClient:
|
|
|
399
527
|
pre_filter: Optional[Union[Dict, List]] = None,
|
|
400
528
|
vector_field: str = "embedding",
|
|
401
529
|
) -> Dict:
|
|
402
|
-
"""For
|
|
530
|
+
"""For Scoring Script Search, this is the default query. Has to account for Opensearch Service
|
|
531
|
+
Serverless which does not support painless scripting functions so defaults to knn_score.
|
|
532
|
+
"""
|
|
403
533
|
if not pre_filter:
|
|
404
534
|
pre_filter = MATCH_ALL_QUERY
|
|
405
535
|
|
|
406
|
-
|
|
536
|
+
# check if we can use painless scripting or have to use default knn_score script
|
|
537
|
+
if self.is_aoss:
|
|
538
|
+
if space_type == "l2Squared":
|
|
539
|
+
raise ValueError(
|
|
540
|
+
"Unsupported space type for aoss. Can only use l1, l2, cosinesimil."
|
|
541
|
+
)
|
|
542
|
+
script = self._get_knn_scoring_script(
|
|
543
|
+
space_type, vector_field, query_vector
|
|
544
|
+
)
|
|
545
|
+
else:
|
|
546
|
+
script = self._get_painless_scoring_script(
|
|
547
|
+
space_type, vector_field, query_vector
|
|
548
|
+
)
|
|
407
549
|
return {
|
|
408
550
|
"size": k,
|
|
409
551
|
"query": {
|
|
410
552
|
"script_score": {
|
|
411
553
|
"query": pre_filter,
|
|
412
|
-
"script":
|
|
413
|
-
"source": source,
|
|
414
|
-
"params": {
|
|
415
|
-
"field": vector_field,
|
|
416
|
-
"query_value": query_vector,
|
|
417
|
-
},
|
|
418
|
-
},
|
|
554
|
+
"script": script,
|
|
419
555
|
}
|
|
420
556
|
},
|
|
421
557
|
}
|
|
@@ -430,7 +566,7 @@ class OpensearchVectorClient:
|
|
|
430
566
|
return True
|
|
431
567
|
return False
|
|
432
568
|
|
|
433
|
-
|
|
569
|
+
def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
|
|
434
570
|
"""Store results in the index."""
|
|
435
571
|
embeddings: List[List[float]] = []
|
|
436
572
|
texts: List[str] = []
|
|
@@ -442,7 +578,7 @@ class OpensearchVectorClient:
|
|
|
442
578
|
texts.append(node.get_content(metadata_mode=MetadataMode.NONE))
|
|
443
579
|
metadatas.append(node_to_metadata_dict(node, remove_text=True))
|
|
444
580
|
|
|
445
|
-
return
|
|
581
|
+
return self._bulk_ingest_embeddings(
|
|
446
582
|
self._os_client,
|
|
447
583
|
self._index,
|
|
448
584
|
embeddings,
|
|
@@ -456,7 +592,33 @@ class OpensearchVectorClient:
|
|
|
456
592
|
is_aoss=self.is_aoss,
|
|
457
593
|
)
|
|
458
594
|
|
|
459
|
-
async def
|
|
595
|
+
async def aindex_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
|
|
596
|
+
"""Store results in the index."""
|
|
597
|
+
embeddings: List[List[float]] = []
|
|
598
|
+
texts: List[str] = []
|
|
599
|
+
metadatas: List[dict] = []
|
|
600
|
+
ids: List[str] = []
|
|
601
|
+
for node in nodes:
|
|
602
|
+
ids.append(node.node_id)
|
|
603
|
+
embeddings.append(node.get_embedding())
|
|
604
|
+
texts.append(node.get_content(metadata_mode=MetadataMode.NONE))
|
|
605
|
+
metadatas.append(node_to_metadata_dict(node, remove_text=True))
|
|
606
|
+
|
|
607
|
+
return await self._abulk_ingest_embeddings(
|
|
608
|
+
self._os_async_client,
|
|
609
|
+
self._index,
|
|
610
|
+
embeddings,
|
|
611
|
+
texts,
|
|
612
|
+
metadatas=metadatas,
|
|
613
|
+
ids=ids,
|
|
614
|
+
vector_field=self._embedding_field,
|
|
615
|
+
text_field=self._text_field,
|
|
616
|
+
mapping=None,
|
|
617
|
+
max_chunk_bytes=self._max_chunk_bytes,
|
|
618
|
+
is_aoss=self.is_aoss,
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
def delete_by_doc_id(self, doc_id: str) -> None:
|
|
460
622
|
"""
|
|
461
623
|
Deletes all OpenSearch documents corresponding to the given LlamaIndex `Document` ID.
|
|
462
624
|
|
|
@@ -466,11 +628,49 @@ class OpensearchVectorClient:
|
|
|
466
628
|
search_query = {
|
|
467
629
|
"query": {"term": {"metadata.doc_id.keyword": {"value": doc_id}}}
|
|
468
630
|
}
|
|
469
|
-
|
|
631
|
+
self._os_client.delete_by_query(
|
|
470
632
|
index=self._index, body=search_query, refresh=True
|
|
471
633
|
)
|
|
472
634
|
|
|
473
|
-
async def
|
|
635
|
+
async def adelete_by_doc_id(self, doc_id: str) -> None:
|
|
636
|
+
"""
|
|
637
|
+
Deletes all OpenSearch documents corresponding to the given LlamaIndex `Document` ID.
|
|
638
|
+
|
|
639
|
+
Args:
|
|
640
|
+
doc_id (str): a LlamaIndex `Document` id
|
|
641
|
+
"""
|
|
642
|
+
search_query = {
|
|
643
|
+
"query": {"term": {"metadata.doc_id.keyword": {"value": doc_id}}}
|
|
644
|
+
}
|
|
645
|
+
await self._os_async_client.delete_by_query(
|
|
646
|
+
index=self._index, body=search_query, refresh=True
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
def delete_nodes(
|
|
650
|
+
self,
|
|
651
|
+
node_ids: Optional[List[str]] = None,
|
|
652
|
+
filters: Optional[MetadataFilters] = None,
|
|
653
|
+
**delete_kwargs: Any,
|
|
654
|
+
) -> None:
|
|
655
|
+
"""Deletes nodes.
|
|
656
|
+
|
|
657
|
+
Args:
|
|
658
|
+
node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
|
|
659
|
+
filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
|
|
660
|
+
"""
|
|
661
|
+
if not node_ids and not filters:
|
|
662
|
+
return
|
|
663
|
+
|
|
664
|
+
query = {"query": {"bool": {"filter": []}}}
|
|
665
|
+
if node_ids:
|
|
666
|
+
query["query"]["bool"]["filter"].append({"terms": {"_id": node_ids or []}})
|
|
667
|
+
|
|
668
|
+
if filters:
|
|
669
|
+
query["query"]["bool"]["filter"].extend(self._parse_filters(filters))
|
|
670
|
+
|
|
671
|
+
self._os_client.delete_by_query(index=self._index, body=query, refresh=True)
|
|
672
|
+
|
|
673
|
+
async def adelete_nodes(
|
|
474
674
|
self,
|
|
475
675
|
node_ids: Optional[List[str]] = None,
|
|
476
676
|
filters: Optional[MetadataFilters] = None,
|
|
@@ -492,17 +692,61 @@ class OpensearchVectorClient:
|
|
|
492
692
|
if filters:
|
|
493
693
|
query["query"]["bool"]["filter"].extend(self._parse_filters(filters))
|
|
494
694
|
|
|
495
|
-
await self.
|
|
695
|
+
await self._os_async_client.delete_by_query(
|
|
496
696
|
index=self._index, body=query, refresh=True
|
|
497
697
|
)
|
|
498
698
|
|
|
499
|
-
|
|
699
|
+
def clear(self) -> None:
|
|
700
|
+
"""Clears index."""
|
|
701
|
+
query = {"query": {"bool": {"filter": []}}}
|
|
702
|
+
self._os_client.delete_by_query(index=self._index, body=query, refresh=True)
|
|
703
|
+
|
|
704
|
+
async def aclear(self) -> None:
|
|
500
705
|
"""Clears index."""
|
|
501
706
|
query = {"query": {"bool": {"filter": []}}}
|
|
502
|
-
await self.
|
|
707
|
+
await self._os_async_client.delete_by_query(
|
|
503
708
|
index=self._index, body=query, refresh=True
|
|
504
709
|
)
|
|
505
710
|
|
|
711
|
+
def query(
|
|
712
|
+
self,
|
|
713
|
+
query_mode: VectorStoreQueryMode,
|
|
714
|
+
query_str: Optional[str],
|
|
715
|
+
query_embedding: List[float],
|
|
716
|
+
k: int,
|
|
717
|
+
filters: Optional[MetadataFilters] = None,
|
|
718
|
+
) -> VectorStoreQueryResult:
|
|
719
|
+
if query_mode == VectorStoreQueryMode.HYBRID:
|
|
720
|
+
if query_str is None or self._search_pipeline is None:
|
|
721
|
+
raise ValueError(INVALID_HYBRID_QUERY_ERROR)
|
|
722
|
+
search_query = self._hybrid_search_query(
|
|
723
|
+
self._text_field,
|
|
724
|
+
query_str,
|
|
725
|
+
self._embedding_field,
|
|
726
|
+
query_embedding,
|
|
727
|
+
k,
|
|
728
|
+
filters=filters,
|
|
729
|
+
)
|
|
730
|
+
params = {
|
|
731
|
+
"search_pipeline": self._search_pipeline,
|
|
732
|
+
}
|
|
733
|
+
elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
|
|
734
|
+
search_query = self._lexical_search_query(
|
|
735
|
+
self._text_field, query_str, k, filters=filters
|
|
736
|
+
)
|
|
737
|
+
params = None
|
|
738
|
+
else:
|
|
739
|
+
search_query = self._knn_search_query(
|
|
740
|
+
self._embedding_field, query_embedding, k, filters=filters
|
|
741
|
+
)
|
|
742
|
+
params = None
|
|
743
|
+
|
|
744
|
+
res = self._os_client.search(
|
|
745
|
+
index=self._index, body=search_query, params=params
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
return self._to_query_result(res)
|
|
749
|
+
|
|
506
750
|
async def aquery(
|
|
507
751
|
self,
|
|
508
752
|
query_mode: VectorStoreQueryMode,
|
|
@@ -536,7 +780,7 @@ class OpensearchVectorClient:
|
|
|
536
780
|
)
|
|
537
781
|
params = None
|
|
538
782
|
|
|
539
|
-
res = await self.
|
|
783
|
+
res = await self._os_async_client.search(
|
|
540
784
|
index=self._index, body=search_query, params=params
|
|
541
785
|
)
|
|
542
786
|
|
|
@@ -647,9 +891,8 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
647
891
|
nodes: List[BaseNode]: list of nodes with embeddings.
|
|
648
892
|
|
|
649
893
|
"""
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
)
|
|
894
|
+
self._client.index_results(nodes)
|
|
895
|
+
return [result.node_id for result in nodes]
|
|
653
896
|
|
|
654
897
|
async def async_add(
|
|
655
898
|
self,
|
|
@@ -663,32 +906,30 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
663
906
|
nodes: List[BaseNode]: list of nodes with embeddings.
|
|
664
907
|
|
|
665
908
|
"""
|
|
666
|
-
await self._client.
|
|
909
|
+
await self._client.aindex_results(nodes)
|
|
667
910
|
return [result.node_id for result in nodes]
|
|
668
911
|
|
|
669
912
|
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
|
|
670
913
|
"""
|
|
671
|
-
Delete nodes using
|
|
914
|
+
Delete nodes using with ref_doc_id.
|
|
672
915
|
|
|
673
916
|
Args:
|
|
674
|
-
ref_doc_id (str): The doc_id of the document
|
|
917
|
+
ref_doc_id (str): The doc_id of the document to delete.
|
|
675
918
|
|
|
676
919
|
"""
|
|
677
|
-
|
|
678
|
-
self.adelete(ref_doc_id, **delete_kwargs)
|
|
679
|
-
)
|
|
920
|
+
self._client.delete_by_doc_id(ref_doc_id)
|
|
680
921
|
|
|
681
922
|
async def adelete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
|
|
682
923
|
"""
|
|
683
|
-
Async delete nodes using
|
|
924
|
+
Async delete nodes using with ref_doc_id.
|
|
684
925
|
|
|
685
926
|
Args:
|
|
686
|
-
ref_doc_id (str): The doc_id of the document
|
|
927
|
+
ref_doc_id (str): The doc_id of the document to delete.
|
|
687
928
|
|
|
688
929
|
"""
|
|
689
|
-
await self._client.
|
|
930
|
+
await self._client.adelete_by_doc_id(ref_doc_id)
|
|
690
931
|
|
|
691
|
-
|
|
932
|
+
def delete_nodes(
|
|
692
933
|
self,
|
|
693
934
|
node_ids: Optional[List[str]] = None,
|
|
694
935
|
filters: Optional[MetadataFilters] = None,
|
|
@@ -700,31 +941,29 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
700
941
|
node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
|
|
701
942
|
filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
|
|
702
943
|
"""
|
|
703
|
-
|
|
944
|
+
self._client.delete_nodes(node_ids, filters, **delete_kwargs)
|
|
704
945
|
|
|
705
|
-
def
|
|
946
|
+
async def adelete_nodes(
|
|
706
947
|
self,
|
|
707
948
|
node_ids: Optional[List[str]] = None,
|
|
708
949
|
filters: Optional[MetadataFilters] = None,
|
|
709
950
|
**delete_kwargs: Any,
|
|
710
951
|
) -> None:
|
|
711
|
-
"""
|
|
952
|
+
"""Async deletes nodes async.
|
|
712
953
|
|
|
713
954
|
Args:
|
|
714
955
|
node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
|
|
715
956
|
filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
|
|
716
957
|
"""
|
|
717
|
-
|
|
718
|
-
self.adelete_nodes(node_ids, filters, **delete_kwargs)
|
|
719
|
-
)
|
|
720
|
-
|
|
721
|
-
async def aclear(self) -> None:
|
|
722
|
-
"""Clears index."""
|
|
723
|
-
await self._client.clear()
|
|
958
|
+
await self._client.adelete_nodes(node_ids, filters, **delete_kwargs)
|
|
724
959
|
|
|
725
960
|
def clear(self) -> None:
|
|
726
961
|
"""Clears index."""
|
|
727
|
-
|
|
962
|
+
self._client.clear()
|
|
963
|
+
|
|
964
|
+
async def aclear(self) -> None:
|
|
965
|
+
"""Async clears index."""
|
|
966
|
+
await self._client.aclear()
|
|
728
967
|
|
|
729
968
|
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
|
|
730
969
|
"""
|
|
@@ -734,7 +973,15 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
734
973
|
query (VectorStoreQuery): Store query object.
|
|
735
974
|
|
|
736
975
|
"""
|
|
737
|
-
|
|
976
|
+
query_embedding = cast(List[float], query.query_embedding)
|
|
977
|
+
|
|
978
|
+
return self._client.query(
|
|
979
|
+
query.mode,
|
|
980
|
+
query.query_str,
|
|
981
|
+
query_embedding,
|
|
982
|
+
query.similarity_top_k,
|
|
983
|
+
filters=query.filters,
|
|
984
|
+
)
|
|
738
985
|
|
|
739
986
|
async def aquery(
|
|
740
987
|
self, query: VectorStoreQuery, **kwargs: Any
|
{llama_index_vector_stores_opensearch-0.2.1 → llama_index_vector_stores_opensearch-0.3.0}/README.md
RENAMED
|
File without changes
|
|
File without changes
|