llama-index-vector-stores-opensearch 0.1.9__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-index-vector-stores-opensearch
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: llama-index vector_stores opensearch integration
5
5
  License: MIT
6
6
  Author: Your Name
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.9
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
14
15
  Requires-Dist: llama-index-core (>=0.10.1,<0.11.0)
15
16
  Requires-Dist: opensearch-py[async] (>=2.4.2,<3.0.0)
16
17
  Description-Content-Type: text/markdown
@@ -239,12 +239,12 @@ class OpensearchVectorClient:
239
239
  Returns:
240
240
  Up to k docs closest to query_embedding
241
241
  """
242
- if filters is None:
242
+ pre_filter = self._parse_filters(filters)
243
+ if not pre_filter:
243
244
  search_query = self._default_approximate_search_query(
244
245
  query_embedding, k, vector_field=embedding_field
245
246
  )
246
247
  else:
247
- pre_filter = self._parse_filters(filters)
248
248
  # https://opensearch.org/docs/latest/search-plugins/knn/painless-functions/
249
249
  search_query = self._default_painless_scripting_query(
250
250
  query_embedding,
@@ -265,17 +265,34 @@ class OpensearchVectorClient:
265
265
  k: int,
266
266
  filters: Optional[MetadataFilters] = None,
267
267
  ) -> Dict:
268
- knn_query = self._knn_search_query(
269
- embedding_field, query_embedding, k, filters
270
- )["query"]
271
- lexical_query = {"must": {"match": {text_field: {"query": query_str}}}}
268
+ knn_query = self._knn_search_query(embedding_field, query_embedding, k, filters)
269
+ lexical_query = self._lexical_search_query(text_field, query_str, k, filters)
270
+
271
+ return {
272
+ "size": k,
273
+ "query": {
274
+ "hybrid": {"queries": [lexical_query["query"], knn_query["query"]]}
275
+ },
276
+ }
277
+
278
+ def _lexical_search_query(
279
+ self,
280
+ text_field: str,
281
+ query_str: str,
282
+ k: int,
283
+ filters: Optional[MetadataFilters] = None,
284
+ ) -> Dict:
285
+ lexical_query = {
286
+ "bool": {"must": {"match": {text_field: {"query": query_str}}}}
287
+ }
272
288
 
273
289
  parsed_filters = self._parse_filters(filters)
274
290
  if len(parsed_filters) > 0:
275
- lexical_query["filter"] = parsed_filters
291
+ lexical_query["bool"]["filter"] = parsed_filters
292
+
276
293
  return {
277
294
  "size": k,
278
- "query": {"hybrid": {"queries": [{"bool": lexical_query}, knn_query]}},
295
+ "query": lexical_query,
279
296
  }
280
297
 
281
298
  def __get_painless_scripting_source(
@@ -388,17 +405,25 @@ class OpensearchVectorClient:
388
405
  )
389
406
  params = {
390
407
  "search_pipeline": self._search_pipeline,
391
- "_source_excludes": ["embedding"],
392
408
  }
409
+ elif query_mode == VectorStoreQueryMode.TEXT_SEARCH:
410
+ search_query = self._lexical_search_query(
411
+ self._text_field, query_str, k, filters=filters
412
+ )
413
+ params = None
393
414
  else:
394
415
  search_query = self._knn_search_query(
395
416
  self._embedding_field, query_embedding, k, filters=filters
396
417
  )
397
- params = {"_source_excludes": ["embedding"]}
418
+ params = None
398
419
 
399
420
  res = await self._os_client.search(
400
421
  index=self._index, body=search_query, params=params
401
422
  )
423
+
424
+ return self._to_query_result(res)
425
+
426
+ def _to_query_result(self, res) -> VectorStoreQueryResult:
402
427
  nodes = []
403
428
  ids = []
404
429
  scores = []
@@ -433,6 +458,7 @@ class OpensearchVectorClient:
433
458
  ids.append(node_id)
434
459
  nodes.append(node)
435
460
  scores.append(hit["_score"])
461
+
436
462
  return VectorStoreQueryResult(nodes=nodes, ids=ids, similarities=scores)
437
463
 
438
464
 
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
27
27
  license = "MIT"
28
28
  name = "llama-index-vector-stores-opensearch"
29
29
  readme = "README.md"
30
- version = "0.1.9"
30
+ version = "0.1.11"
31
31
 
32
32
  [tool.poetry.dependencies]
33
33
  python = ">=3.8.1,<4.0"