llama-index-vector-stores-opensearch 0.1.12__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-index-vector-stores-opensearch
3
- Version: 0.1.12
3
+ Version: 0.1.14
4
4
  Summary: llama-index vector_stores opensearch integration
5
5
  License: MIT
6
6
  Author: Your Name
@@ -1,14 +1,17 @@
1
1
  """Elasticsearch/Opensearch vector store."""
2
2
 
3
3
  import asyncio
4
- import json
5
4
  import uuid
5
+ from datetime import datetime
6
6
  from typing import Any, Dict, Iterable, List, Optional, Union, cast
7
7
 
8
8
  from llama_index.core.bridge.pydantic import PrivateAttr
9
9
 
10
10
  from llama_index.core.schema import BaseNode, MetadataMode, TextNode
11
11
  from llama_index.core.vector_stores.types import (
12
+ FilterCondition,
13
+ FilterOperator,
14
+ MetadataFilter,
12
15
  MetadataFilters,
13
16
  BasePydanticVectorStore,
14
17
  VectorStoreQuery,
@@ -210,13 +213,89 @@ class OpensearchVectorClient:
210
213
  "query": {"knn": {vector_field: {"vector": query_vector, "k": k}}},
211
214
  }
212
215
 
213
- def _parse_filters(self, filters: Optional[MetadataFilters]) -> Any:
214
- pre_filter = []
215
- if filters is not None:
216
- for f in filters.legacy_filters():
217
- pre_filter.append({f.key: json.loads(str(f.value))})
216
+ def _is_text_field(self, value: Any) -> bool:
217
+ """Check if value is a string and keyword filtering needs to be performed.
218
218
 
219
- return pre_filter
219
+ Not applied to datetime strings.
220
+ """
221
+ if isinstance(value, str):
222
+ try:
223
+ datetime.fromisoformat(value)
224
+ return False
225
+ except ValueError as e:
226
+ return True
227
+ else:
228
+ return False
229
+
230
+ def _parse_filter(self, filter: MetadataFilter) -> dict:
231
+ """Parse a single MetadataFilter to equivalent OpenSearch expression.
232
+
233
+ As Opensearch does not differentiate between scalar/array keyword fields, IN and ANY are equivalent.
234
+ """
235
+ key = f"metadata.{filter.key}"
236
+ op = filter.operator
237
+
238
+ equality_postfix = ".keyword" if self._is_text_field(value=filter.value) else ""
239
+
240
+ if op == FilterOperator.EQ:
241
+ return {"term": {f"{key}{equality_postfix}": filter.value}}
242
+ elif op in [
243
+ FilterOperator.GT,
244
+ FilterOperator.GTE,
245
+ FilterOperator.LT,
246
+ FilterOperator.LTE,
247
+ ]:
248
+ return {"range": {key: {filter.operator.name.lower(): filter.value}}}
249
+ elif op == FilterOperator.NE:
250
+ return {
251
+ "bool": {
252
+ "must_not": {"term": {f"{key}{equality_postfix}": filter.value}}
253
+ }
254
+ }
255
+ elif op in [FilterOperator.IN, FilterOperator.ANY]:
256
+ return {"terms": {key: filter.value}}
257
+ elif op == FilterOperator.NIN:
258
+ return {"bool": {"must_not": {"terms": {key: filter.value}}}}
259
+ elif op == FilterOperator.ALL:
260
+ return {
261
+ "terms_set": {
262
+ key: {
263
+ "terms": filter.value,
264
+ "minimum_should_match_script": {"source": "params.num_terms"},
265
+ }
266
+ }
267
+ }
268
+ elif op == FilterOperator.TEXT_MATCH:
269
+ return {"match": {key: {"query": filter.value, "fuzziness": "AUTO"}}}
270
+ elif op == FilterOperator.CONTAINS:
271
+ return {"wildcard": {key: f"*{filter.value}*"}}
272
+ else:
273
+ raise ValueError(f"Unsupported filter operator: {filter.operator}")
274
+
275
+ def _parse_filters_recursively(self, filters: MetadataFilters) -> dict:
276
+ """Parse (possibly nested) MetadataFilters to equivalent OpenSearch expression."""
277
+ condition_map = {FilterCondition.AND: "must", FilterCondition.OR: "should"}
278
+
279
+ bool_clause = condition_map[filters.condition]
280
+ bool_query: dict[str, dict[str, list[dict]]] = {"bool": {bool_clause: []}}
281
+
282
+ for filter_item in filters.filters:
283
+ if isinstance(filter_item, MetadataFilter):
284
+ bool_query["bool"][bool_clause].append(self._parse_filter(filter_item))
285
+ elif isinstance(filter_item, MetadataFilters):
286
+ bool_query["bool"][bool_clause].append(
287
+ self._parse_filters_recursively(filter_item)
288
+ )
289
+ else:
290
+ raise ValueError(f"Unsupported filter type: {type(filter_item)}")
291
+
292
+ return bool_query
293
+
294
+ def _parse_filters(self, filters: Optional[MetadataFilters]) -> List[dict]:
295
+ """Parse MetadataFilters to equivalent OpenSearch expression."""
296
+ if filters is None:
297
+ return []
298
+ return [self._parse_filters_recursively(filters=filters)]
220
299
 
221
300
  def _knn_search_query(
222
301
  self,
@@ -389,6 +468,35 @@ class OpensearchVectorClient:
389
468
  }
390
469
  await self._os_client.delete_by_query(index=self._index, body=search_query)
391
470
 
471
+ async def delete_nodes(
472
+ self,
473
+ node_ids: Optional[List[str]] = None,
474
+ filters: Optional[MetadataFilters] = None,
475
+ **delete_kwargs: Any,
476
+ ) -> None:
477
+ """Deletes nodes.
478
+
479
+ Args:
480
+ node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
481
+ filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
482
+ """
483
+ if not node_ids and not filters:
484
+ return
485
+
486
+ query = {"query": {"bool": {"filter": []}}}
487
+ if node_ids:
488
+ query["query"]["bool"]["filter"].append({"terms": {"_id": node_ids or []}})
489
+
490
+ if filters:
491
+ query["query"]["bool"]["filter"].extend(self._parse_filters(filters))
492
+
493
+ await self._os_client.delete_by_query(index=self._index, body=query)
494
+
495
+ async def clear(self) -> None:
496
+ """Clears index."""
497
+ query = {"query": {"bool": {"filter": []}}}
498
+ await self._os_client.delete_by_query(index=self._index, body=query)
499
+
392
500
  async def aquery(
393
501
  self,
394
502
  query_mode: VectorStoreQueryMode,
@@ -574,6 +682,44 @@ class OpensearchVectorStore(BasePydanticVectorStore):
574
682
  """
575
683
  await self._client.delete_by_doc_id(ref_doc_id)
576
684
 
685
+ async def adelete_nodes(
686
+ self,
687
+ node_ids: Optional[List[str]] = None,
688
+ filters: Optional[MetadataFilters] = None,
689
+ **delete_kwargs: Any,
690
+ ) -> None:
691
+ """Deletes nodes async.
692
+
693
+ Args:
694
+ node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
695
+ filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
696
+ """
697
+ await self._client.delete_nodes(node_ids, filters, **delete_kwargs)
698
+
699
+ def delete_nodes(
700
+ self,
701
+ node_ids: Optional[List[str]] = None,
702
+ filters: Optional[MetadataFilters] = None,
703
+ **delete_kwargs: Any,
704
+ ) -> None:
705
+ """Deletes nodes.
706
+
707
+ Args:
708
+ node_ids (Optional[List[str]], optional): IDs of nodes to delete. Defaults to None.
709
+ filters (Optional[MetadataFilters], optional): Metadata filters. Defaults to None.
710
+ """
711
+ asyncio.get_event_loop().run_until_complete(
712
+ self.adelete_nodes(node_ids, filters, **delete_kwargs)
713
+ )
714
+
715
+ async def aclear(self) -> None:
716
+ """Clears index."""
717
+ await self._client.clear()
718
+
719
+ def clear(self) -> None:
720
+ """Clears index."""
721
+ asyncio.get_event_loop().run_until_complete(self.aclear())
722
+
577
723
  def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
578
724
  """
579
725
  Query index for top k most similar nodes.
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
27
27
  license = "MIT"
28
28
  name = "llama-index-vector-stores-opensearch"
29
29
  readme = "README.md"
30
- version = "0.1.12"
30
+ version = "0.1.14"
31
31
 
32
32
  [tool.poetry.dependencies]
33
33
  python = ">=3.8.1,<4.0"