llama-index-vector-stores-opensearch 0.2.2__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.
- {llama_index_vector_stores_opensearch-0.2.2 → llama_index_vector_stores_opensearch-0.4.0}/PKG-INFO +1 -1
- {llama_index_vector_stores_opensearch-0.2.2 → llama_index_vector_stores_opensearch-0.4.0}/llama_index/vector_stores/opensearch/base.py +338 -84
- {llama_index_vector_stores_opensearch-0.2.2 → llama_index_vector_stores_opensearch-0.4.0}/pyproject.toml +1 -1
- {llama_index_vector_stores_opensearch-0.2.2 → llama_index_vector_stores_opensearch-0.4.0}/README.md +0 -0
- {llama_index_vector_stores_opensearch-0.2.2 → llama_index_vector_stores_opensearch-0.4.0}/llama_index/py.typed +0 -0
- {llama_index_vector_stores_opensearch-0.2.2 → llama_index_vector_stores_opensearch-0.4.0}/llama_index/vector_stores/opensearch/__init__.py +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Elasticsearch/Opensearch vector store."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import uuid
|
|
5
4
|
from datetime import datetime
|
|
6
5
|
from typing import Any, Dict, Iterable, List, Optional, Union, cast
|
|
@@ -22,14 +21,12 @@ from llama_index.core.vector_stores.utils import (
|
|
|
22
21
|
metadata_dict_to_node,
|
|
23
22
|
node_to_metadata_dict,
|
|
24
23
|
)
|
|
25
|
-
from opensearchpy import AsyncOpenSearch
|
|
26
24
|
from opensearchpy.client import Client as OSClient
|
|
27
|
-
from opensearchpy.exceptions import NotFoundError
|
|
28
|
-
from opensearchpy.helpers import async_bulk
|
|
29
25
|
|
|
30
26
|
IMPORT_OPENSEARCH_PY_ERROR = (
|
|
31
27
|
"Could not import OpenSearch. Please install it with `pip install opensearch-py`."
|
|
32
28
|
)
|
|
29
|
+
IMPORT_ASYNC_OPENSEARCH_PY_ERROR = "Could not import AsyncOpenSearch. Please install it with `pip install opensearch-py`."
|
|
33
30
|
INVALID_HYBRID_QUERY_ERROR = (
|
|
34
31
|
"Please specify the lexical_query and search_pipeline for hybrid search."
|
|
35
32
|
)
|
|
@@ -54,8 +51,10 @@ class OpensearchVectorClient:
|
|
|
54
51
|
method (Optional[dict]): Opensearch "method" JSON obj for configuring
|
|
55
52
|
the KNN index.
|
|
56
53
|
This includes engine, metric, and other config params. Defaults to:
|
|
57
|
-
{"name": "hnsw", "space_type": "l2", "engine": "
|
|
54
|
+
{"name": "hnsw", "space_type": "l2", "engine": "nmslib",
|
|
58
55
|
"parameters": {"ef_construction": 256, "m": 48}}
|
|
56
|
+
settings: Optional[dict]: Settings for the Opensearch index creation. Defaults to:
|
|
57
|
+
{"index": {"knn": True, "knn.algo_param.ef_search": 100}}
|
|
59
58
|
space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
|
|
60
59
|
**kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.
|
|
61
60
|
|
|
@@ -69,6 +68,7 @@ class OpensearchVectorClient:
|
|
|
69
68
|
embedding_field: str = "embedding",
|
|
70
69
|
text_field: str = "content",
|
|
71
70
|
method: Optional[dict] = None,
|
|
71
|
+
settings: Optional[dict] = None,
|
|
72
72
|
engine: Optional[str] = "nmslib",
|
|
73
73
|
space_type: Optional[str] = "l2",
|
|
74
74
|
max_chunk_bytes: int = 1 * 1024 * 1024,
|
|
@@ -84,10 +84,13 @@ class OpensearchVectorClient:
|
|
|
84
84
|
"engine": engine,
|
|
85
85
|
"parameters": {"ef_construction": 256, "m": 48},
|
|
86
86
|
}
|
|
87
|
+
if settings is None:
|
|
88
|
+
settings = {"index": {"knn": True, "knn.algo_param.ef_search": 100}}
|
|
87
89
|
if embedding_field is None:
|
|
88
90
|
embedding_field = "embedding"
|
|
89
|
-
self._embedding_field = embedding_field
|
|
90
91
|
|
|
92
|
+
self._method = method
|
|
93
|
+
self._embedding_field = embedding_field
|
|
91
94
|
self._endpoint = endpoint
|
|
92
95
|
self._dim = dim
|
|
93
96
|
self._index = index
|
|
@@ -100,7 +103,7 @@ class OpensearchVectorClient:
|
|
|
100
103
|
self.is_aoss = self._is_aoss_enabled(http_auth=http_auth)
|
|
101
104
|
# initialize mapping
|
|
102
105
|
idx_conf = {
|
|
103
|
-
"settings":
|
|
106
|
+
"settings": settings,
|
|
104
107
|
"mappings": {
|
|
105
108
|
"properties": {
|
|
106
109
|
embedding_field: {
|
|
@@ -111,36 +114,76 @@ class OpensearchVectorClient:
|
|
|
111
114
|
}
|
|
112
115
|
},
|
|
113
116
|
}
|
|
114
|
-
self._os_client = os_client or self.
|
|
117
|
+
self._os_client = os_client or self._get_opensearch_client(
|
|
118
|
+
self._endpoint, **kwargs
|
|
119
|
+
)
|
|
120
|
+
self._os_async_client = self._get_async_opensearch_client(
|
|
115
121
|
self._endpoint, **kwargs
|
|
116
122
|
)
|
|
123
|
+
self._os_version = self._get_opensearch_version()
|
|
124
|
+
self._efficient_filtering_enabled = self._is_efficient_filtering_enabled(
|
|
125
|
+
self._os_version
|
|
126
|
+
)
|
|
117
127
|
not_found_error = self._import_not_found_error()
|
|
118
128
|
|
|
119
|
-
event_loop = asyncio.get_event_loop()
|
|
120
129
|
try:
|
|
121
|
-
|
|
122
|
-
self._os_client.indices.get(index=self._index)
|
|
123
|
-
)
|
|
130
|
+
self._os_client.indices.get(index=self._index)
|
|
124
131
|
except not_found_error:
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
)
|
|
128
|
-
event_loop.run_until_complete(
|
|
129
|
-
self._os_client.indices.refresh(index=self._index)
|
|
130
|
-
)
|
|
132
|
+
self._os_client.indices.create(index=self._index, body=idx_conf)
|
|
133
|
+
self._os_client.indices.refresh(index=self._index)
|
|
131
134
|
|
|
132
|
-
def
|
|
135
|
+
def _import_opensearch(self) -> Any:
|
|
133
136
|
"""Import OpenSearch if available, otherwise raise error."""
|
|
137
|
+
try:
|
|
138
|
+
from opensearchpy import OpenSearch
|
|
139
|
+
except ImportError:
|
|
140
|
+
raise ImportError(IMPORT_OPENSEARCH_PY_ERROR)
|
|
141
|
+
return OpenSearch
|
|
142
|
+
|
|
143
|
+
def _import_async_opensearch(self) -> Any:
|
|
144
|
+
"""Import AsyncOpenSearch if available, otherwise raise error."""
|
|
145
|
+
try:
|
|
146
|
+
from opensearchpy import AsyncOpenSearch
|
|
147
|
+
except ImportError:
|
|
148
|
+
raise ImportError(IMPORT_ASYNC_OPENSEARCH_PY_ERROR)
|
|
134
149
|
return AsyncOpenSearch
|
|
135
150
|
|
|
136
|
-
def
|
|
151
|
+
def _import_bulk(self) -> Any:
|
|
137
152
|
"""Import bulk if available, otherwise raise error."""
|
|
153
|
+
try:
|
|
154
|
+
from opensearchpy.helpers import bulk
|
|
155
|
+
except ImportError:
|
|
156
|
+
raise ImportError(IMPORT_OPENSEARCH_PY_ERROR)
|
|
157
|
+
return bulk
|
|
158
|
+
|
|
159
|
+
def _import_async_bulk(self) -> Any:
|
|
160
|
+
"""Import async_bulk if available, otherwise raise error."""
|
|
161
|
+
try:
|
|
162
|
+
from opensearchpy.helpers import async_bulk
|
|
163
|
+
except ImportError:
|
|
164
|
+
raise ImportError(IMPORT_ASYNC_OPENSEARCH_PY_ERROR)
|
|
138
165
|
return async_bulk
|
|
139
166
|
|
|
140
167
|
def _import_not_found_error(self) -> Any:
|
|
141
168
|
"""Import not found error if available, otherwise raise error."""
|
|
169
|
+
try:
|
|
170
|
+
from opensearchpy.exceptions import NotFoundError
|
|
171
|
+
except ImportError:
|
|
172
|
+
raise ImportError(IMPORT_OPENSEARCH_PY_ERROR)
|
|
142
173
|
return NotFoundError
|
|
143
174
|
|
|
175
|
+
def _get_opensearch_client(self, opensearch_url: str, **kwargs: Any) -> Any:
|
|
176
|
+
"""Get OpenSearch client from the opensearch_url, otherwise raise error."""
|
|
177
|
+
try:
|
|
178
|
+
opensearch = self._import_opensearch()
|
|
179
|
+
client = opensearch(opensearch_url, **kwargs)
|
|
180
|
+
except ValueError as e:
|
|
181
|
+
raise ImportError(
|
|
182
|
+
f"OpenSearch client string provided is not in proper format. "
|
|
183
|
+
f"Got error: {e} "
|
|
184
|
+
)
|
|
185
|
+
return client
|
|
186
|
+
|
|
144
187
|
def _get_async_opensearch_client(self, opensearch_url: str, **kwargs: Any) -> Any:
|
|
145
188
|
"""Get AsyncOpenSearch client from the opensearch_url, otherwise raise error."""
|
|
146
189
|
try:
|
|
@@ -154,7 +197,62 @@ class OpensearchVectorClient:
|
|
|
154
197
|
)
|
|
155
198
|
return client
|
|
156
199
|
|
|
157
|
-
|
|
200
|
+
def _get_opensearch_version(self) -> str:
|
|
201
|
+
info = self._os_client.info()
|
|
202
|
+
return info["version"]["number"]
|
|
203
|
+
|
|
204
|
+
def _bulk_ingest_embeddings(
|
|
205
|
+
self,
|
|
206
|
+
client: Any,
|
|
207
|
+
index_name: str,
|
|
208
|
+
embeddings: List[List[float]],
|
|
209
|
+
texts: Iterable[str],
|
|
210
|
+
metadatas: Optional[List[dict]] = None,
|
|
211
|
+
ids: Optional[List[str]] = None,
|
|
212
|
+
vector_field: str = "embedding",
|
|
213
|
+
text_field: str = "content",
|
|
214
|
+
mapping: Optional[Dict] = None,
|
|
215
|
+
max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,
|
|
216
|
+
is_aoss: bool = False,
|
|
217
|
+
) -> List[str]:
|
|
218
|
+
"""Bulk Ingest Embeddings into given index."""
|
|
219
|
+
if not mapping:
|
|
220
|
+
mapping = {}
|
|
221
|
+
|
|
222
|
+
bulk = self._import_bulk()
|
|
223
|
+
not_found_error = self._import_not_found_error()
|
|
224
|
+
requests = []
|
|
225
|
+
return_ids = []
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
client.indices.get(index=index_name)
|
|
229
|
+
except not_found_error:
|
|
230
|
+
client.indices.create(index=index_name, body=mapping)
|
|
231
|
+
|
|
232
|
+
for i, text in enumerate(texts):
|
|
233
|
+
metadata = metadatas[i] if metadatas else {}
|
|
234
|
+
_id = ids[i] if ids else str(uuid.uuid4())
|
|
235
|
+
request = {
|
|
236
|
+
"_op_type": "index",
|
|
237
|
+
"_index": index_name,
|
|
238
|
+
vector_field: embeddings[i],
|
|
239
|
+
text_field: text,
|
|
240
|
+
"metadata": metadata,
|
|
241
|
+
}
|
|
242
|
+
if is_aoss:
|
|
243
|
+
request["id"] = _id
|
|
244
|
+
else:
|
|
245
|
+
request["_id"] = _id
|
|
246
|
+
requests.append(request)
|
|
247
|
+
return_ids.append(_id)
|
|
248
|
+
|
|
249
|
+
bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
|
|
250
|
+
if not is_aoss:
|
|
251
|
+
client.indices.refresh(index=index_name)
|
|
252
|
+
|
|
253
|
+
return return_ids
|
|
254
|
+
|
|
255
|
+
async def _abulk_ingest_embeddings(
|
|
158
256
|
self,
|
|
159
257
|
client: Any,
|
|
160
258
|
index_name: str,
|
|
@@ -176,7 +274,6 @@ class OpensearchVectorClient:
|
|
|
176
274
|
not_found_error = self._import_not_found_error()
|
|
177
275
|
requests = []
|
|
178
276
|
return_ids = []
|
|
179
|
-
mapping = mapping
|
|
180
277
|
|
|
181
278
|
try:
|
|
182
279
|
await client.indices.get(index=index_name)
|
|
@@ -199,23 +296,38 @@ class OpensearchVectorClient:
|
|
|
199
296
|
request["_id"] = _id
|
|
200
297
|
requests.append(request)
|
|
201
298
|
return_ids.append(_id)
|
|
299
|
+
|
|
202
300
|
await async_bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
|
|
203
301
|
if not is_aoss:
|
|
204
302
|
await client.indices.refresh(index=index_name)
|
|
303
|
+
|
|
205
304
|
return return_ids
|
|
206
305
|
|
|
207
306
|
def _default_approximate_search_query(
|
|
208
307
|
self,
|
|
209
308
|
query_vector: List[float],
|
|
210
309
|
k: int = 4,
|
|
310
|
+
filters: Optional[Union[Dict, List]] = None,
|
|
211
311
|
vector_field: str = "embedding",
|
|
212
312
|
) -> Dict:
|
|
213
313
|
"""For Approximate k-NN Search, this is the default query."""
|
|
214
|
-
|
|
314
|
+
query = {
|
|
215
315
|
"size": k,
|
|
216
|
-
"query": {
|
|
316
|
+
"query": {
|
|
317
|
+
"knn": {
|
|
318
|
+
vector_field: {
|
|
319
|
+
"vector": query_vector,
|
|
320
|
+
"k": k,
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
},
|
|
217
324
|
}
|
|
218
325
|
|
|
326
|
+
if filters:
|
|
327
|
+
# filter key must be added only when filtering to avoid "filter doesn't support values of type: START_ARRAY" exception
|
|
328
|
+
query["query"]["knn"][vector_field]["filter"] = filters
|
|
329
|
+
return query
|
|
330
|
+
|
|
219
331
|
def _is_text_field(self, value: Any) -> bool:
|
|
220
332
|
"""Check if value is a string and keyword filtering needs to be performed.
|
|
221
333
|
|
|
@@ -256,7 +368,12 @@ class OpensearchVectorClient:
|
|
|
256
368
|
}
|
|
257
369
|
}
|
|
258
370
|
elif op in [FilterOperator.IN, FilterOperator.ANY]:
|
|
259
|
-
|
|
371
|
+
if isinstance(filter.value, list) and all(
|
|
372
|
+
self._is_text_field(val) for val in filter.value
|
|
373
|
+
):
|
|
374
|
+
return {"terms": {f"{key}.keyword": filter.value}}
|
|
375
|
+
else:
|
|
376
|
+
return {"terms": {key: filter.value}}
|
|
260
377
|
elif op == FilterOperator.NIN:
|
|
261
378
|
return {"bool": {"must_not": {"terms": {key: filter.value}}}}
|
|
262
379
|
elif op == FilterOperator.ALL:
|
|
@@ -306,52 +423,73 @@ class OpensearchVectorClient:
|
|
|
306
423
|
query_embedding: List[float],
|
|
307
424
|
k: int,
|
|
308
425
|
filters: Optional[MetadataFilters] = None,
|
|
426
|
+
search_method="approximate",
|
|
309
427
|
) -> Dict:
|
|
310
428
|
"""
|
|
311
|
-
|
|
429
|
+
Perform a k-Nearest Neighbors (kNN) search.
|
|
312
430
|
|
|
313
|
-
If
|
|
314
|
-
|
|
315
|
-
|
|
431
|
+
If the search method is "approximate" and the engine is "lucene" or "faiss", use efficient kNN filtering.
|
|
432
|
+
Otherwise, perform an exhaustive exact kNN search using "painless scripting" if the version of
|
|
433
|
+
OpenSearch supports it. If the OpenSearch version does not support it, use scoring script search.
|
|
316
434
|
|
|
317
435
|
Note:
|
|
318
|
-
-AWS
|
|
319
|
-
-
|
|
436
|
+
- AWS OpenSearch Serverless does not support the painless scripting functionality at this time according to AWS.
|
|
437
|
+
- Approximate kNN search does not support pre-filtering.
|
|
320
438
|
|
|
321
439
|
Args:
|
|
322
|
-
query_embedding: Vector embedding to query.
|
|
323
|
-
k: Maximum number of results.
|
|
324
|
-
filters: Optional filters to apply
|
|
440
|
+
query_embedding (List[float]): Vector embedding to query.
|
|
441
|
+
k (int): Maximum number of results.
|
|
442
|
+
filters (Optional[MetadataFilters]): Optional filters to apply for the search.
|
|
325
443
|
Supports filter-context queries documented at
|
|
326
444
|
https://opensearch.org/docs/latest/query-dsl/query-filter-context/
|
|
327
445
|
|
|
328
446
|
Returns:
|
|
329
|
-
Up to k
|
|
447
|
+
Dict: Up to k documents closest to query_embedding.
|
|
330
448
|
"""
|
|
331
|
-
|
|
332
|
-
|
|
449
|
+
filters = self._parse_filters(filters)
|
|
450
|
+
|
|
451
|
+
if not filters:
|
|
333
452
|
search_query = self._default_approximate_search_query(
|
|
334
|
-
query_embedding, k, vector_field=embedding_field
|
|
335
|
-
)
|
|
336
|
-
elif self.is_aoss:
|
|
337
|
-
# if is_aoss is set we are using Opensearch Serverless AWS offering which cannot use
|
|
338
|
-
# painless scripting so default scoring script returned will be just normal knn_score script
|
|
339
|
-
search_query = self._default_scoring_script_query(
|
|
340
453
|
query_embedding,
|
|
341
454
|
k,
|
|
342
|
-
space_type=self.space_type,
|
|
343
|
-
pre_filter={"bool": {"filter": pre_filter}},
|
|
344
455
|
vector_field=embedding_field,
|
|
345
456
|
)
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
457
|
+
elif (
|
|
458
|
+
search_method == "approximate"
|
|
459
|
+
and self._method["engine"]
|
|
460
|
+
in [
|
|
461
|
+
"lucene",
|
|
462
|
+
"faiss",
|
|
463
|
+
]
|
|
464
|
+
and self._efficient_filtering_enabled
|
|
465
|
+
):
|
|
466
|
+
# if engine is lucene or faiss, opensearch recommends efficient-kNN filtering.
|
|
467
|
+
search_query = self._default_approximate_search_query(
|
|
349
468
|
query_embedding,
|
|
350
469
|
k,
|
|
351
|
-
|
|
352
|
-
pre_filter={"bool": {"filter": pre_filter}},
|
|
470
|
+
filters={"bool": {"filter": filters}},
|
|
353
471
|
vector_field=embedding_field,
|
|
354
472
|
)
|
|
473
|
+
else:
|
|
474
|
+
if self.is_aoss:
|
|
475
|
+
# if is_aoss is set we are using Opensearch Serverless AWS offering which cannot use
|
|
476
|
+
# painless scripting so default scoring script returned will be just normal knn_score script
|
|
477
|
+
search_query = self._default_scoring_script_query(
|
|
478
|
+
query_embedding,
|
|
479
|
+
k,
|
|
480
|
+
space_type=self.space_type,
|
|
481
|
+
pre_filter={"bool": {"filter": filters}},
|
|
482
|
+
vector_field=embedding_field,
|
|
483
|
+
)
|
|
484
|
+
else:
|
|
485
|
+
# https://opensearch.org/docs/latest/search-plugins/knn/painless-functions/
|
|
486
|
+
search_query = self._default_scoring_script_query(
|
|
487
|
+
query_embedding,
|
|
488
|
+
k,
|
|
489
|
+
space_type="l2Squared",
|
|
490
|
+
pre_filter={"bool": {"filter": filters}},
|
|
491
|
+
vector_field=embedding_field,
|
|
492
|
+
)
|
|
355
493
|
return search_query
|
|
356
494
|
|
|
357
495
|
def _hybrid_search_query(
|
|
@@ -476,7 +614,12 @@ class OpensearchVectorClient:
|
|
|
476
614
|
return True
|
|
477
615
|
return False
|
|
478
616
|
|
|
479
|
-
|
|
617
|
+
def _is_efficient_filtering_enabled(self, os_version: str) -> bool:
|
|
618
|
+
"""Check if kNN with efficient filtering is enabled."""
|
|
619
|
+
major, minor, patch = os_version.split(".")
|
|
620
|
+
return int(major) >= 2 and int(minor) >= 9
|
|
621
|
+
|
|
622
|
+
def index_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
|
|
480
623
|
"""Store results in the index."""
|
|
481
624
|
embeddings: List[List[float]] = []
|
|
482
625
|
texts: List[str] = []
|
|
@@ -488,7 +631,7 @@ class OpensearchVectorClient:
|
|
|
488
631
|
texts.append(node.get_content(metadata_mode=MetadataMode.NONE))
|
|
489
632
|
metadatas.append(node_to_metadata_dict(node, remove_text=True))
|
|
490
633
|
|
|
491
|
-
return
|
|
634
|
+
return self._bulk_ingest_embeddings(
|
|
492
635
|
self._os_client,
|
|
493
636
|
self._index,
|
|
494
637
|
embeddings,
|
|
@@ -502,7 +645,33 @@ class OpensearchVectorClient:
|
|
|
502
645
|
is_aoss=self.is_aoss,
|
|
503
646
|
)
|
|
504
647
|
|
|
505
|
-
async def
|
|
648
|
+
async def aindex_results(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]:
|
|
649
|
+
"""Store results in the index."""
|
|
650
|
+
embeddings: List[List[float]] = []
|
|
651
|
+
texts: List[str] = []
|
|
652
|
+
metadatas: List[dict] = []
|
|
653
|
+
ids: List[str] = []
|
|
654
|
+
for node in nodes:
|
|
655
|
+
ids.append(node.node_id)
|
|
656
|
+
embeddings.append(node.get_embedding())
|
|
657
|
+
texts.append(node.get_content(metadata_mode=MetadataMode.NONE))
|
|
658
|
+
metadatas.append(node_to_metadata_dict(node, remove_text=True))
|
|
659
|
+
|
|
660
|
+
return await self._abulk_ingest_embeddings(
|
|
661
|
+
self._os_async_client,
|
|
662
|
+
self._index,
|
|
663
|
+
embeddings,
|
|
664
|
+
texts,
|
|
665
|
+
metadatas=metadatas,
|
|
666
|
+
ids=ids,
|
|
667
|
+
vector_field=self._embedding_field,
|
|
668
|
+
text_field=self._text_field,
|
|
669
|
+
mapping=None,
|
|
670
|
+
max_chunk_bytes=self._max_chunk_bytes,
|
|
671
|
+
is_aoss=self.is_aoss,
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
def delete_by_doc_id(self, doc_id: str) -> None:
|
|
506
675
|
"""
|
|
507
676
|
Deletes all OpenSearch documents corresponding to the given LlamaIndex `Document` ID.
|
|
508
677
|
|
|
@@ -512,11 +681,49 @@ class OpensearchVectorClient:
|
|
|
512
681
|
search_query = {
|
|
513
682
|
"query": {"term": {"metadata.doc_id.keyword": {"value": doc_id}}}
|
|
514
683
|
}
|
|
515
|
-
|
|
684
|
+
self._os_client.delete_by_query(
|
|
516
685
|
index=self._index, body=search_query, refresh=True
|
|
517
686
|
)
|
|
518
687
|
|
|
519
|
-
async def
|
|
688
|
+
async def adelete_by_doc_id(self, doc_id: str) -> None:
|
|
689
|
+
"""
|
|
690
|
+
Deletes all OpenSearch documents corresponding to the given LlamaIndex `Document` ID.
|
|
691
|
+
|
|
692
|
+
Args:
|
|
693
|
+
doc_id (str): a LlamaIndex `Document` id
|
|
694
|
+
"""
|
|
695
|
+
search_query = {
|
|
696
|
+
"query": {"term": {"metadata.doc_id.keyword": {"value": doc_id}}}
|
|
697
|
+
}
|
|
698
|
+
await self._os_async_client.delete_by_query(
|
|
699
|
+
index=self._index, body=search_query, refresh=True
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
def delete_nodes(
|
|
703
|
+
self,
|
|
704
|
+
node_ids: Optional[List[str]] = None,
|
|
705
|
+
filters: Optional[MetadataFilters] = None,
|
|
706
|
+
**delete_kwargs: Any,
|
|
707
|
+
) -> None:
|
|
708
|
+
"""Deletes nodes.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
|
|
712
|
+
filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
|
|
713
|
+
"""
|
|
714
|
+
if not node_ids and not filters:
|
|
715
|
+
return
|
|
716
|
+
|
|
717
|
+
query = {"query": {"bool": {"filter": []}}}
|
|
718
|
+
if node_ids:
|
|
719
|
+
query["query"]["bool"]["filter"].append({"terms": {"_id": node_ids or []}})
|
|
720
|
+
|
|
721
|
+
if filters:
|
|
722
|
+
query["query"]["bool"]["filter"].extend(self._parse_filters(filters))
|
|
723
|
+
|
|
724
|
+
self._os_client.delete_by_query(index=self._index, body=query, refresh=True)
|
|
725
|
+
|
|
726
|
+
async def adelete_nodes(
|
|
520
727
|
self,
|
|
521
728
|
node_ids: Optional[List[str]] = None,
|
|
522
729
|
filters: Optional[MetadataFilters] = None,
|
|
@@ -538,17 +745,61 @@ class OpensearchVectorClient:
|
|
|
538
745
|
if filters:
|
|
539
746
|
query["query"]["bool"]["filter"].extend(self._parse_filters(filters))
|
|
540
747
|
|
|
541
|
-
await self.
|
|
748
|
+
await self._os_async_client.delete_by_query(
|
|
542
749
|
index=self._index, body=query, refresh=True
|
|
543
750
|
)
|
|
544
751
|
|
|
545
|
-
|
|
752
|
+
def clear(self) -> None:
|
|
753
|
+
"""Clears index."""
|
|
754
|
+
query = {"query": {"bool": {"filter": []}}}
|
|
755
|
+
self._os_client.delete_by_query(index=self._index, body=query, refresh=True)
|
|
756
|
+
|
|
757
|
+
async def aclear(self) -> None:
|
|
546
758
|
"""Clears index."""
|
|
547
759
|
query = {"query": {"bool": {"filter": []}}}
|
|
548
|
-
await self.
|
|
760
|
+
await self._os_async_client.delete_by_query(
|
|
549
761
|
index=self._index, body=query, refresh=True
|
|
550
762
|
)
|
|
551
763
|
|
|
764
|
+
def query(
|
|
765
|
+
self,
|
|
766
|
+
query_mode: VectorStoreQueryMode,
|
|
767
|
+
query_str: Optional[str],
|
|
768
|
+
query_embedding: List[float],
|
|
769
|
+
k: int,
|
|
770
|
+
filters: Optional[MetadataFilters] = None,
|
|
771
|
+
) -> VectorStoreQueryResult:
|
|
772
|
+
if query_mode == VectorStoreQueryMode.HYBRID:
|
|
773
|
+
if query_str is None or self._search_pipeline is None:
|
|
774
|
+
raise ValueError(INVALID_HYBRID_QUERY_ERROR)
|
|
775
|
+
search_query = self._hybrid_search_query(
|
|
776
|
+
self._text_field,
|
|
777
|
+
query_str,
|
|
778
|
+
self._embedding_field,
|
|
779
|
+
query_embedding,
|
|
780
|
+
k,
|
|
781
|
+
filters=filters,
|
|
782
|
+
)
|
|
783
|
+
params = {
|
|
784
|
+
"search_pipeline": self._search_pipeline,
|
|
785
|
+
}
|
|
786
|
+
elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
|
|
787
|
+
search_query = self._lexical_search_query(
|
|
788
|
+
self._text_field, query_str, k, filters=filters
|
|
789
|
+
)
|
|
790
|
+
params = None
|
|
791
|
+
else:
|
|
792
|
+
search_query = self._knn_search_query(
|
|
793
|
+
self._embedding_field, query_embedding, k, filters=filters
|
|
794
|
+
)
|
|
795
|
+
params = None
|
|
796
|
+
|
|
797
|
+
res = self._os_client.search(
|
|
798
|
+
index=self._index, body=search_query, params=params
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
return self._to_query_result(res)
|
|
802
|
+
|
|
552
803
|
async def aquery(
|
|
553
804
|
self,
|
|
554
805
|
query_mode: VectorStoreQueryMode,
|
|
@@ -582,7 +833,7 @@ class OpensearchVectorClient:
|
|
|
582
833
|
)
|
|
583
834
|
params = None
|
|
584
835
|
|
|
585
|
-
res = await self.
|
|
836
|
+
res = await self._os_async_client.search(
|
|
586
837
|
index=self._index, body=search_query, params=params
|
|
587
838
|
)
|
|
588
839
|
|
|
@@ -693,9 +944,8 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
693
944
|
nodes: List[BaseNode]: list of nodes with embeddings.
|
|
694
945
|
|
|
695
946
|
"""
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
)
|
|
947
|
+
self._client.index_results(nodes)
|
|
948
|
+
return [result.node_id for result in nodes]
|
|
699
949
|
|
|
700
950
|
async def async_add(
|
|
701
951
|
self,
|
|
@@ -709,32 +959,30 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
709
959
|
nodes: List[BaseNode]: list of nodes with embeddings.
|
|
710
960
|
|
|
711
961
|
"""
|
|
712
|
-
await self._client.
|
|
962
|
+
await self._client.aindex_results(nodes)
|
|
713
963
|
return [result.node_id for result in nodes]
|
|
714
964
|
|
|
715
965
|
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
|
|
716
966
|
"""
|
|
717
|
-
Delete nodes using
|
|
967
|
+
Delete nodes using with ref_doc_id.
|
|
718
968
|
|
|
719
969
|
Args:
|
|
720
|
-
ref_doc_id (str): The doc_id of the document
|
|
970
|
+
ref_doc_id (str): The doc_id of the document to delete.
|
|
721
971
|
|
|
722
972
|
"""
|
|
723
|
-
|
|
724
|
-
self.adelete(ref_doc_id, **delete_kwargs)
|
|
725
|
-
)
|
|
973
|
+
self._client.delete_by_doc_id(ref_doc_id)
|
|
726
974
|
|
|
727
975
|
async def adelete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
|
|
728
976
|
"""
|
|
729
|
-
Async delete nodes using
|
|
977
|
+
Async delete nodes using with ref_doc_id.
|
|
730
978
|
|
|
731
979
|
Args:
|
|
732
|
-
ref_doc_id (str): The doc_id of the document
|
|
980
|
+
ref_doc_id (str): The doc_id of the document to delete.
|
|
733
981
|
|
|
734
982
|
"""
|
|
735
|
-
await self._client.
|
|
983
|
+
await self._client.adelete_by_doc_id(ref_doc_id)
|
|
736
984
|
|
|
737
|
-
|
|
985
|
+
def delete_nodes(
|
|
738
986
|
self,
|
|
739
987
|
node_ids: Optional[List[str]] = None,
|
|
740
988
|
filters: Optional[MetadataFilters] = None,
|
|
@@ -746,31 +994,29 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
746
994
|
node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
|
|
747
995
|
filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
|
|
748
996
|
"""
|
|
749
|
-
|
|
997
|
+
self._client.delete_nodes(node_ids, filters, **delete_kwargs)
|
|
750
998
|
|
|
751
|
-
def
|
|
999
|
+
async def adelete_nodes(
|
|
752
1000
|
self,
|
|
753
1001
|
node_ids: Optional[List[str]] = None,
|
|
754
1002
|
filters: Optional[MetadataFilters] = None,
|
|
755
1003
|
**delete_kwargs: Any,
|
|
756
1004
|
) -> None:
|
|
757
|
-
"""
|
|
1005
|
+
"""Async deletes nodes async.
|
|
758
1006
|
|
|
759
1007
|
Args:
|
|
760
1008
|
node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
|
|
761
1009
|
filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
|
|
762
1010
|
"""
|
|
763
|
-
|
|
764
|
-
self.adelete_nodes(node_ids, filters, **delete_kwargs)
|
|
765
|
-
)
|
|
766
|
-
|
|
767
|
-
async def aclear(self) -> None:
|
|
768
|
-
"""Clears index."""
|
|
769
|
-
await self._client.clear()
|
|
1011
|
+
await self._client.adelete_nodes(node_ids, filters, **delete_kwargs)
|
|
770
1012
|
|
|
771
1013
|
def clear(self) -> None:
|
|
772
1014
|
"""Clears index."""
|
|
773
|
-
|
|
1015
|
+
self._client.clear()
|
|
1016
|
+
|
|
1017
|
+
async def aclear(self) -> None:
|
|
1018
|
+
"""Async clears index."""
|
|
1019
|
+
await self._client.aclear()
|
|
774
1020
|
|
|
775
1021
|
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
|
|
776
1022
|
"""
|
|
@@ -780,7 +1026,15 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
780
1026
|
query (VectorStoreQuery): Store query object.
|
|
781
1027
|
|
|
782
1028
|
"""
|
|
783
|
-
|
|
1029
|
+
query_embedding = cast(List[float], query.query_embedding)
|
|
1030
|
+
|
|
1031
|
+
return self._client.query(
|
|
1032
|
+
query.mode,
|
|
1033
|
+
query.query_str,
|
|
1034
|
+
query_embedding,
|
|
1035
|
+
query.similarity_top_k,
|
|
1036
|
+
filters=query.filters,
|
|
1037
|
+
)
|
|
784
1038
|
|
|
785
1039
|
async def aquery(
|
|
786
1040
|
self, query: VectorStoreQuery, **kwargs: Any
|
{llama_index_vector_stores_opensearch-0.2.2 → llama_index_vector_stores_opensearch-0.4.0}/README.md
RENAMED
|
File without changes
|
|
File without changes
|