llama-index-vector-stores-opensearch 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.

llama_index/py.typed ADDED
File without changes
@@ -1,14 +1,17 @@
1
1
  """Elasticsearch/Opensearch vector store."""
2
2
 
3
3
  import asyncio
4
- import json
5
4
  import uuid
5
+ from datetime import datetime
6
6
  from typing import Any, Dict, Iterable, List, Optional, Union, cast
7
7
 
8
8
  from llama_index.core.bridge.pydantic import PrivateAttr
9
9
 
10
10
  from llama_index.core.schema import BaseNode, MetadataMode, TextNode
11
11
  from llama_index.core.vector_stores.types import (
12
+ FilterCondition,
13
+ FilterOperator,
14
+ MetadataFilter,
12
15
  MetadataFilters,
13
16
  BasePydanticVectorStore,
14
17
  VectorStoreQuery,
@@ -210,16 +213,89 @@ class OpensearchVectorClient:
210
213
  "query": {"knn": {vector_field: {"vector": query_vector, "k": k}}},
211
214
  }
212
215
 
213
- def _parse_filters(self, filters: Optional[MetadataFilters]) -> Any:
214
- pre_filter = []
215
- if filters is not None:
216
- for f in filters.legacy_filters():
217
- if isinstance(f.value, str):
218
- pre_filter.append({f.key: f.value})
219
- else:
220
- pre_filter.append({f.key: json.loads(str(f.value))})
216
+ def _is_text_field(self, value: Any) -> bool:
217
+ """Check if value is a string and keyword filtering needs to be performed.
221
218
 
222
- return pre_filter
219
+ Not applied to datetime strings.
220
+ """
221
+ if isinstance(value, str):
222
+ try:
223
+ datetime.fromisoformat(value)
224
+ return False
225
+ except ValueError as e:
226
+ return True
227
+ else:
228
+ return False
229
+
230
+ def _parse_filter(self, filter: MetadataFilter) -> dict:
231
+ """Parse a single MetadataFilter to equivalent OpenSearch expression.
232
+
233
+ As Opensearch does not differentiate between scalar/array keyword fields, IN and ANY are equivalent.
234
+ """
235
+ key = f"metadata.{filter.key}"
236
+ op = filter.operator
237
+
238
+ equality_postfix = ".keyword" if self._is_text_field(value=filter.value) else ""
239
+
240
+ if op == FilterOperator.EQ:
241
+ return {"term": {f"{key}{equality_postfix}": filter.value}}
242
+ elif op in [
243
+ FilterOperator.GT,
244
+ FilterOperator.GTE,
245
+ FilterOperator.LT,
246
+ FilterOperator.LTE,
247
+ ]:
248
+ return {"range": {key: {filter.operator.name.lower(): filter.value}}}
249
+ elif op == FilterOperator.NE:
250
+ return {
251
+ "bool": {
252
+ "must_not": {"term": {f"{key}{equality_postfix}": filter.value}}
253
+ }
254
+ }
255
+ elif op in [FilterOperator.IN, FilterOperator.ANY]:
256
+ return {"terms": {key: filter.value}}
257
+ elif op == FilterOperator.NIN:
258
+ return {"bool": {"must_not": {"terms": {key: filter.value}}}}
259
+ elif op == FilterOperator.ALL:
260
+ return {
261
+ "terms_set": {
262
+ key: {
263
+ "terms": filter.value,
264
+ "minimum_should_match_script": {"source": "params.num_terms"},
265
+ }
266
+ }
267
+ }
268
+ elif op == FilterOperator.TEXT_MATCH:
269
+ return {"match": {key: {"query": filter.value, "fuzziness": "AUTO"}}}
270
+ elif op == FilterOperator.CONTAINS:
271
+ return {"wildcard": {key: f"*{filter.value}*"}}
272
+ else:
273
+ raise ValueError(f"Unsupported filter operator: {filter.operator}")
274
+
275
+ def _parse_filters_recursively(self, filters: MetadataFilters) -> dict:
276
+ """Parse (possibly nested) MetadataFilters to equivalent OpenSearch expression."""
277
+ condition_map = {FilterCondition.AND: "must", FilterCondition.OR: "should"}
278
+
279
+ bool_clause = condition_map[filters.condition]
280
+ bool_query: dict[str, dict[str, list[dict]]] = {"bool": {bool_clause: []}}
281
+
282
+ for filter_item in filters.filters:
283
+ if isinstance(filter_item, MetadataFilter):
284
+ bool_query["bool"][bool_clause].append(self._parse_filter(filter_item))
285
+ elif isinstance(filter_item, MetadataFilters):
286
+ bool_query["bool"][bool_clause].append(
287
+ self._parse_filters_recursively(filter_item)
288
+ )
289
+ else:
290
+ raise ValueError(f"Unsupported filter type: {type(filter_item)}")
291
+
292
+ return bool_query
293
+
294
+ def _parse_filters(self, filters: Optional[MetadataFilters]) -> List[dict]:
295
+ """Parse MetadataFilters to equivalent OpenSearch expression."""
296
+ if filters is None:
297
+ return []
298
+ return [self._parse_filters_recursively(filters=filters)]
223
299
 
224
300
  def _knn_search_query(
225
301
  self,
@@ -412,13 +488,7 @@ class OpensearchVectorClient:
412
488
  query["query"]["bool"]["filter"].append({"terms": {"_id": node_ids or []}})
413
489
 
414
490
  if filters:
415
- for filter in self._parse_filters(filters):
416
- newfilter = {}
417
-
418
- for key in filter:
419
- newfilter[f"metadata.{key}.keyword"] = filter[key]
420
-
421
- query["query"]["bool"]["filter"].append({"term": newfilter})
491
+ query["query"]["bool"]["filter"].extend(self._parse_filters(filters))
422
492
 
423
493
  await self._os_client.delete_by_query(index=self._index, body=query)
424
494
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-index-vector-stores-opensearch
3
- Version: 0.1.13
3
+ Version: 0.2.0
4
4
  Summary: llama-index vector_stores opensearch integration
5
5
  License: MIT
6
6
  Author: Your Name
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3.9
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
- Requires-Dist: llama-index-core (>=0.10.1,<0.11.0)
15
+ Requires-Dist: llama-index-core (>=0.11.0,<0.12.0)
16
16
  Requires-Dist: opensearch-py[async] (>=2.4.2,<3.0.0)
17
17
  Description-Content-Type: text/markdown
18
18
 
@@ -0,0 +1,6 @@
1
+ llama_index/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ llama_index/vector_stores/opensearch/__init__.py,sha256=U1_XAkZb6zcskOk4s10NB8Tjs9AZRGdRQLzOGpbWdBA,176
3
+ llama_index/vector_stores/opensearch/base.py,sha256=1Imxwk6EWwi9_yFd9ZNFTWLVDua6NFVLBXuOq-MXV1g,25492
4
+ llama_index_vector_stores_opensearch-0.2.0.dist-info/METADATA,sha256=MN9n-d3qGqR7N_Q7SoXfHqRKqiNzTPX_YKJJeXo0YK8,728
5
+ llama_index_vector_stores_opensearch-0.2.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
6
+ llama_index_vector_stores_opensearch-0.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 1.8.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,5 +0,0 @@
1
- llama_index/vector_stores/opensearch/__init__.py,sha256=U1_XAkZb6zcskOk4s10NB8Tjs9AZRGdRQLzOGpbWdBA,176
2
- llama_index/vector_stores/opensearch/base.py,sha256=YShOE7dzy3L5G8N9iobA6aenZLF_dJACs1aCBrQIPyg,22584
3
- llama_index_vector_stores_opensearch-0.1.13.dist-info/METADATA,sha256=X2CxNuyUSEoChYA5Rss4QThW0HarjyC3UQpWE24reOU,729
4
- llama_index_vector_stores_opensearch-0.1.13.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
5
- llama_index_vector_stores_opensearch-0.1.13.dist-info/RECORD,,