llama-index-vector-stores-opensearch 0.1.13__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.
- {llama_index_vector_stores_opensearch-0.1.13 → llama_index_vector_stores_opensearch-0.2.0}/PKG-INFO +2 -2
- llama_index_vector_stores_opensearch-0.2.0/llama_index/py.typed +0 -0
- {llama_index_vector_stores_opensearch-0.1.13 → llama_index_vector_stores_opensearch-0.2.0}/llama_index/vector_stores/opensearch/base.py +87 -17
- {llama_index_vector_stores_opensearch-0.1.13 → llama_index_vector_stores_opensearch-0.2.0}/pyproject.toml +2 -2
- {llama_index_vector_stores_opensearch-0.1.13 → llama_index_vector_stores_opensearch-0.2.0}/README.md +0 -0
- {llama_index_vector_stores_opensearch-0.1.13 → llama_index_vector_stores_opensearch-0.2.0}/llama_index/vector_stores/opensearch/__init__.py +0 -0
{llama_index_vector_stores_opensearch-0.1.13 → llama_index_vector_stores_opensearch-0.2.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama-index-vector-stores-opensearch
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: llama-index vector_stores opensearch integration
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Your Name
|
|
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
-
Requires-Dist: llama-index-core (>=0.
|
|
15
|
+
Requires-Dist: llama-index-core (>=0.11.0,<0.12.0)
|
|
16
16
|
Requires-Dist: opensearch-py[async] (>=2.4.2,<3.0.0)
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
|
|
File without changes
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
"""Elasticsearch/Opensearch vector store."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import json
|
|
5
4
|
import uuid
|
|
5
|
+
from datetime import datetime
|
|
6
6
|
from typing import Any, Dict, Iterable, List, Optional, Union, cast
|
|
7
7
|
|
|
8
8
|
from llama_index.core.bridge.pydantic import PrivateAttr
|
|
9
9
|
|
|
10
10
|
from llama_index.core.schema import BaseNode, MetadataMode, TextNode
|
|
11
11
|
from llama_index.core.vector_stores.types import (
|
|
12
|
+
FilterCondition,
|
|
13
|
+
FilterOperator,
|
|
14
|
+
MetadataFilter,
|
|
12
15
|
MetadataFilters,
|
|
13
16
|
BasePydanticVectorStore,
|
|
14
17
|
VectorStoreQuery,
|
|
@@ -210,16 +213,89 @@ class OpensearchVectorClient:
|
|
|
210
213
|
"query": {"knn": {vector_field: {"vector": query_vector, "k": k}}},
|
|
211
214
|
}
|
|
212
215
|
|
|
213
|
-
def
|
|
214
|
-
|
|
215
|
-
if filters is not None:
|
|
216
|
-
for f in filters.legacy_filters():
|
|
217
|
-
if isinstance(f.value, str):
|
|
218
|
-
pre_filter.append({f.key: f.value})
|
|
219
|
-
else:
|
|
220
|
-
pre_filter.append({f.key: json.loads(str(f.value))})
|
|
216
|
+
def _is_text_field(self, value: Any) -> bool:
|
|
217
|
+
"""Check if value is a string and keyword filtering needs to be performed.
|
|
221
218
|
|
|
222
|
-
|
|
219
|
+
Not applied to datetime strings.
|
|
220
|
+
"""
|
|
221
|
+
if isinstance(value, str):
|
|
222
|
+
try:
|
|
223
|
+
datetime.fromisoformat(value)
|
|
224
|
+
return False
|
|
225
|
+
except ValueError as e:
|
|
226
|
+
return True
|
|
227
|
+
else:
|
|
228
|
+
return False
|
|
229
|
+
|
|
230
|
+
def _parse_filter(self, filter: MetadataFilter) -> dict:
|
|
231
|
+
"""Parse a single MetadataFilter to equivalent OpenSearch expression.
|
|
232
|
+
|
|
233
|
+
As Opensearch does not differentiate between scalar/array keyword fields, IN and ANY are equivalent.
|
|
234
|
+
"""
|
|
235
|
+
key = f"metadata.{filter.key}"
|
|
236
|
+
op = filter.operator
|
|
237
|
+
|
|
238
|
+
equality_postfix = ".keyword" if self._is_text_field(value=filter.value) else ""
|
|
239
|
+
|
|
240
|
+
if op == FilterOperator.EQ:
|
|
241
|
+
return {"term": {f"{key}{equality_postfix}": filter.value}}
|
|
242
|
+
elif op in [
|
|
243
|
+
FilterOperator.GT,
|
|
244
|
+
FilterOperator.GTE,
|
|
245
|
+
FilterOperator.LT,
|
|
246
|
+
FilterOperator.LTE,
|
|
247
|
+
]:
|
|
248
|
+
return {"range": {key: {filter.operator.name.lower(): filter.value}}}
|
|
249
|
+
elif op == FilterOperator.NE:
|
|
250
|
+
return {
|
|
251
|
+
"bool": {
|
|
252
|
+
"must_not": {"term": {f"{key}{equality_postfix}": filter.value}}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
elif op in [FilterOperator.IN, FilterOperator.ANY]:
|
|
256
|
+
return {"terms": {key: filter.value}}
|
|
257
|
+
elif op == FilterOperator.NIN:
|
|
258
|
+
return {"bool": {"must_not": {"terms": {key: filter.value}}}}
|
|
259
|
+
elif op == FilterOperator.ALL:
|
|
260
|
+
return {
|
|
261
|
+
"terms_set": {
|
|
262
|
+
key: {
|
|
263
|
+
"terms": filter.value,
|
|
264
|
+
"minimum_should_match_script": {"source": "params.num_terms"},
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
elif op == FilterOperator.TEXT_MATCH:
|
|
269
|
+
return {"match": {key: {"query": filter.value, "fuzziness": "AUTO"}}}
|
|
270
|
+
elif op == FilterOperator.CONTAINS:
|
|
271
|
+
return {"wildcard": {key: f"*{filter.value}*"}}
|
|
272
|
+
else:
|
|
273
|
+
raise ValueError(f"Unsupported filter operator: {filter.operator}")
|
|
274
|
+
|
|
275
|
+
def _parse_filters_recursively(self, filters: MetadataFilters) -> dict:
|
|
276
|
+
"""Parse (possibly nested) MetadataFilters to equivalent OpenSearch expression."""
|
|
277
|
+
condition_map = {FilterCondition.AND: "must", FilterCondition.OR: "should"}
|
|
278
|
+
|
|
279
|
+
bool_clause = condition_map[filters.condition]
|
|
280
|
+
bool_query: dict[str, dict[str, list[dict]]] = {"bool": {bool_clause: []}}
|
|
281
|
+
|
|
282
|
+
for filter_item in filters.filters:
|
|
283
|
+
if isinstance(filter_item, MetadataFilter):
|
|
284
|
+
bool_query["bool"][bool_clause].append(self._parse_filter(filter_item))
|
|
285
|
+
elif isinstance(filter_item, MetadataFilters):
|
|
286
|
+
bool_query["bool"][bool_clause].append(
|
|
287
|
+
self._parse_filters_recursively(filter_item)
|
|
288
|
+
)
|
|
289
|
+
else:
|
|
290
|
+
raise ValueError(f"Unsupported filter type: {type(filter_item)}")
|
|
291
|
+
|
|
292
|
+
return bool_query
|
|
293
|
+
|
|
294
|
+
def _parse_filters(self, filters: Optional[MetadataFilters]) -> List[dict]:
|
|
295
|
+
"""Parse MetadataFilters to equivalent OpenSearch expression."""
|
|
296
|
+
if filters is None:
|
|
297
|
+
return []
|
|
298
|
+
return [self._parse_filters_recursively(filters=filters)]
|
|
223
299
|
|
|
224
300
|
def _knn_search_query(
|
|
225
301
|
self,
|
|
@@ -412,13 +488,7 @@ class OpensearchVectorClient:
|
|
|
412
488
|
query["query"]["bool"]["filter"].append({"terms": {"_id": node_ids or []}})
|
|
413
489
|
|
|
414
490
|
if filters:
|
|
415
|
-
|
|
416
|
-
newfilter = {}
|
|
417
|
-
|
|
418
|
-
for key in filter:
|
|
419
|
-
newfilter[f"metadata.{key}.keyword"] = filter[key]
|
|
420
|
-
|
|
421
|
-
query["query"]["bool"]["filter"].append({"term": newfilter})
|
|
491
|
+
query["query"]["bool"]["filter"].extend(self._parse_filters(filters))
|
|
422
492
|
|
|
423
493
|
await self._os_client.delete_by_query(index=self._index, body=query)
|
|
424
494
|
|
|
@@ -27,11 +27,11 @@ exclude = ["**/BUILD"]
|
|
|
27
27
|
license = "MIT"
|
|
28
28
|
name = "llama-index-vector-stores-opensearch"
|
|
29
29
|
readme = "README.md"
|
|
30
|
-
version = "0.
|
|
30
|
+
version = "0.2.0"
|
|
31
31
|
|
|
32
32
|
[tool.poetry.dependencies]
|
|
33
33
|
python = ">=3.8.1,<4.0"
|
|
34
|
-
llama-index-core = "^0.
|
|
34
|
+
llama-index-core = "^0.11.0"
|
|
35
35
|
|
|
36
36
|
[tool.poetry.dependencies.opensearch-py]
|
|
37
37
|
extras = ["async"]
|
{llama_index_vector_stores_opensearch-0.1.13 → llama_index_vector_stores_opensearch-0.2.0}/README.md
RENAMED
|
File without changes
|