llama-index-vector-stores-opensearch 0.1.12__tar.gz → 0.1.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.
- {llama_index_vector_stores_opensearch-0.1.12 → llama_index_vector_stores_opensearch-0.1.14}/PKG-INFO +1 -1
- llama_index_vector_stores_opensearch-0.1.14/llama_index/py.typed +0 -0
- {llama_index_vector_stores_opensearch-0.1.12 → llama_index_vector_stores_opensearch-0.1.14}/llama_index/vector_stores/opensearch/base.py +153 -7
- {llama_index_vector_stores_opensearch-0.1.12 → llama_index_vector_stores_opensearch-0.1.14}/pyproject.toml +1 -1
- {llama_index_vector_stores_opensearch-0.1.12 → llama_index_vector_stores_opensearch-0.1.14}/README.md +0 -0
- {llama_index_vector_stores_opensearch-0.1.12 → llama_index_vector_stores_opensearch-0.1.14}/llama_index/vector_stores/opensearch/__init__.py +0 -0
|
File without changes
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
"""Elasticsearch/Opensearch vector store."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import json
|
|
5
4
|
import uuid
|
|
5
|
+
from datetime import datetime
|
|
6
6
|
from typing import Any, Dict, Iterable, List, Optional, Union, cast
|
|
7
7
|
|
|
8
8
|
from llama_index.core.bridge.pydantic import PrivateAttr
|
|
9
9
|
|
|
10
10
|
from llama_index.core.schema import BaseNode, MetadataMode, TextNode
|
|
11
11
|
from llama_index.core.vector_stores.types import (
|
|
12
|
+
FilterCondition,
|
|
13
|
+
FilterOperator,
|
|
14
|
+
MetadataFilter,
|
|
12
15
|
MetadataFilters,
|
|
13
16
|
BasePydanticVectorStore,
|
|
14
17
|
VectorStoreQuery,
|
|
@@ -210,13 +213,89 @@ class OpensearchVectorClient:
|
|
|
210
213
|
"query": {"knn": {vector_field: {"vector": query_vector, "k": k}}},
|
|
211
214
|
}
|
|
212
215
|
|
|
213
|
-
def
|
|
214
|
-
|
|
215
|
-
if filters is not None:
|
|
216
|
-
for f in filters.legacy_filters():
|
|
217
|
-
pre_filter.append({f.key: json.loads(str(f.value))})
|
|
216
|
+
def _is_text_field(self, value: Any) -> bool:
|
|
217
|
+
"""Check if value is a string and keyword filtering needs to be performed.
|
|
218
218
|
|
|
219
|
-
|
|
219
|
+
Not applied to datetime strings.
|
|
220
|
+
"""
|
|
221
|
+
if isinstance(value, str):
|
|
222
|
+
try:
|
|
223
|
+
datetime.fromisoformat(value)
|
|
224
|
+
return False
|
|
225
|
+
except ValueError as e:
|
|
226
|
+
return True
|
|
227
|
+
else:
|
|
228
|
+
return False
|
|
229
|
+
|
|
230
|
+
def _parse_filter(self, filter: MetadataFilter) -> dict:
|
|
231
|
+
"""Parse a single MetadataFilter to equivalent OpenSearch expression.
|
|
232
|
+
|
|
233
|
+
As Opensearch does not differentiate between scalar/array keyword fields, IN and ANY are equivalent.
|
|
234
|
+
"""
|
|
235
|
+
key = f"metadata.{filter.key}"
|
|
236
|
+
op = filter.operator
|
|
237
|
+
|
|
238
|
+
equality_postfix = ".keyword" if self._is_text_field(value=filter.value) else ""
|
|
239
|
+
|
|
240
|
+
if op == FilterOperator.EQ:
|
|
241
|
+
return {"term": {f"{key}{equality_postfix}": filter.value}}
|
|
242
|
+
elif op in [
|
|
243
|
+
FilterOperator.GT,
|
|
244
|
+
FilterOperator.GTE,
|
|
245
|
+
FilterOperator.LT,
|
|
246
|
+
FilterOperator.LTE,
|
|
247
|
+
]:
|
|
248
|
+
return {"range": {key: {filter.operator.name.lower(): filter.value}}}
|
|
249
|
+
elif op == FilterOperator.NE:
|
|
250
|
+
return {
|
|
251
|
+
"bool": {
|
|
252
|
+
"must_not": {"term": {f"{key}{equality_postfix}": filter.value}}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
elif op in [FilterOperator.IN, FilterOperator.ANY]:
|
|
256
|
+
return {"terms": {key: filter.value}}
|
|
257
|
+
elif op == FilterOperator.NIN:
|
|
258
|
+
return {"bool": {"must_not": {"terms": {key: filter.value}}}}
|
|
259
|
+
elif op == FilterOperator.ALL:
|
|
260
|
+
return {
|
|
261
|
+
"terms_set": {
|
|
262
|
+
key: {
|
|
263
|
+
"terms": filter.value,
|
|
264
|
+
"minimum_should_match_script": {"source": "params.num_terms"},
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
elif op == FilterOperator.TEXT_MATCH:
|
|
269
|
+
return {"match": {key: {"query": filter.value, "fuzziness": "AUTO"}}}
|
|
270
|
+
elif op == FilterOperator.CONTAINS:
|
|
271
|
+
return {"wildcard": {key: f"*{filter.value}*"}}
|
|
272
|
+
else:
|
|
273
|
+
raise ValueError(f"Unsupported filter operator: {filter.operator}")
|
|
274
|
+
|
|
275
|
+
def _parse_filters_recursively(self, filters: MetadataFilters) -> dict:
|
|
276
|
+
"""Parse (possibly nested) MetadataFilters to equivalent OpenSearch expression."""
|
|
277
|
+
condition_map = {FilterCondition.AND: "must", FilterCondition.OR: "should"}
|
|
278
|
+
|
|
279
|
+
bool_clause = condition_map[filters.condition]
|
|
280
|
+
bool_query: dict[str, dict[str, list[dict]]] = {"bool": {bool_clause: []}}
|
|
281
|
+
|
|
282
|
+
for filter_item in filters.filters:
|
|
283
|
+
if isinstance(filter_item, MetadataFilter):
|
|
284
|
+
bool_query["bool"][bool_clause].append(self._parse_filter(filter_item))
|
|
285
|
+
elif isinstance(filter_item, MetadataFilters):
|
|
286
|
+
bool_query["bool"][bool_clause].append(
|
|
287
|
+
self._parse_filters_recursively(filter_item)
|
|
288
|
+
)
|
|
289
|
+
else:
|
|
290
|
+
raise ValueError(f"Unsupported filter type: {type(filter_item)}")
|
|
291
|
+
|
|
292
|
+
return bool_query
|
|
293
|
+
|
|
294
|
+
def _parse_filters(self, filters: Optional[MetadataFilters]) -> List[dict]:
|
|
295
|
+
"""Parse MetadataFilters to equivalent OpenSearch expression."""
|
|
296
|
+
if filters is None:
|
|
297
|
+
return []
|
|
298
|
+
return [self._parse_filters_recursively(filters=filters)]
|
|
220
299
|
|
|
221
300
|
def _knn_search_query(
|
|
222
301
|
self,
|
|
@@ -389,6 +468,35 @@ class OpensearchVectorClient:
|
|
|
389
468
|
}
|
|
390
469
|
await self._os_client.delete_by_query(index=self._index, body=search_query)
|
|
391
470
|
|
|
471
|
+
async def delete_nodes(
|
|
472
|
+
self,
|
|
473
|
+
node_ids: Optional[List[str]] = None,
|
|
474
|
+
filters: Optional[MetadataFilters] = None,
|
|
475
|
+
**delete_kwargs: Any,
|
|
476
|
+
) -> None:
|
|
477
|
+
"""Deletes nodes.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
|
|
481
|
+
filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
|
|
482
|
+
"""
|
|
483
|
+
if not node_ids and not filters:
|
|
484
|
+
return
|
|
485
|
+
|
|
486
|
+
query = {"query": {"bool": {"filter": []}}}
|
|
487
|
+
if node_ids:
|
|
488
|
+
query["query"]["bool"]["filter"].append({"terms": {"_id": node_ids or []}})
|
|
489
|
+
|
|
490
|
+
if filters:
|
|
491
|
+
query["query"]["bool"]["filter"].extend(self._parse_filters(filters))
|
|
492
|
+
|
|
493
|
+
await self._os_client.delete_by_query(index=self._index, body=query)
|
|
494
|
+
|
|
495
|
+
async def clear(self) -> None:
|
|
496
|
+
"""Clears index."""
|
|
497
|
+
query = {"query": {"bool": {"filter": []}}}
|
|
498
|
+
await self._os_client.delete_by_query(index=self._index, body=query)
|
|
499
|
+
|
|
392
500
|
async def aquery(
|
|
393
501
|
self,
|
|
394
502
|
query_mode: VectorStoreQueryMode,
|
|
@@ -574,6 +682,44 @@ class OpensearchVectorStore(BasePydanticVectorStore):
|
|
|
574
682
|
"""
|
|
575
683
|
await self._client.delete_by_doc_id(ref_doc_id)
|
|
576
684
|
|
|
685
|
+
async def adelete_nodes(
|
|
686
|
+
self,
|
|
687
|
+
node_ids: Optional[List[str]] = None,
|
|
688
|
+
filters: Optional[MetadataFilters] = None,
|
|
689
|
+
**delete_kwargs: Any,
|
|
690
|
+
) -> None:
|
|
691
|
+
"""Deletes nodes async.
|
|
692
|
+
|
|
693
|
+
Args:
|
|
694
|
+
node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
|
|
695
|
+
filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
|
|
696
|
+
"""
|
|
697
|
+
await self._client.delete_nodes(node_ids, filters, **delete_kwargs)
|
|
698
|
+
|
|
699
|
+
def delete_nodes(
|
|
700
|
+
self,
|
|
701
|
+
node_ids: Optional[List[str]] = None,
|
|
702
|
+
filters: Optional[MetadataFilters] = None,
|
|
703
|
+
**delete_kwargs: Any,
|
|
704
|
+
) -> None:
|
|
705
|
+
"""Deletes nodes.
|
|
706
|
+
|
|
707
|
+
Args:
|
|
708
|
+
node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
|
|
709
|
+
filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
|
|
710
|
+
"""
|
|
711
|
+
asyncio.get_event_loop().run_until_complete(
|
|
712
|
+
self.adelete_nodes(node_ids, filters, **delete_kwargs)
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
async def aclear(self) -> None:
|
|
716
|
+
"""Clears index."""
|
|
717
|
+
await self._client.clear()
|
|
718
|
+
|
|
719
|
+
def clear(self) -> None:
|
|
720
|
+
"""Clears index."""
|
|
721
|
+
asyncio.get_event_loop().run_until_complete(self.aclear())
|
|
722
|
+
|
|
577
723
|
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
|
|
578
724
|
"""
|
|
579
725
|
Query index for top k most similar nodes.
|
|
File without changes
|