llama-index-vector-stores-opensearch 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-index-vector-stores-opensearch might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-index-vector-stores-opensearch
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: llama-index vector_stores opensearch integration
5
5
  License: MIT
6
6
  Author: Your Name
@@ -56,6 +56,7 @@ class OpensearchVectorClient:
56
56
  This includes engine, metric, and other config params. Defaults to:
57
57
  {"name": "hnsw", "space_type": "l2", "engine": "faiss",
58
58
  "parameters": {"ef_construction": 256, "m": 48}}
59
+ space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
59
60
  **kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.
60
61
 
61
62
  """
@@ -69,6 +70,7 @@ class OpensearchVectorClient:
69
70
  text_field: str = "content",
70
71
  method: Optional[dict] = None,
71
72
  engine: Optional[str] = "nmslib",
73
+ space_type: Optional[str] = "l2",
72
74
  max_chunk_bytes: int = 1 * 1024 * 1024,
73
75
  search_pipeline: Optional[str] = None,
74
76
  os_client: Optional[OSClient] = None,
@@ -94,6 +96,7 @@ class OpensearchVectorClient:
94
96
 
95
97
  self._search_pipeline = search_pipeline
96
98
  http_auth = kwargs.get("http_auth")
99
+ self.space_type = space_type
97
100
  self.is_aoss = self._is_aoss_enabled(http_auth=http_auth)
98
101
  # initialize mapping
99
102
  idx_conf = {
@@ -309,9 +312,11 @@ class OpensearchVectorClient:
309
312
 
310
313
  If there are no filters do approx-knn search.
311
314
  If there are (pre)-filters, do an exhaustive exact knn search using 'painless
312
- scripting'.
315
+ scripting' if the version of Opensearch supports it, otherwise uses knn_score scripting score.
313
316
 
314
- Note that approximate knn search does not support pre-filtering.
317
+ Note:
318
+ -AWS Opensearch Serverless does not support the painless scripting functionality at this time according to AWS.
319
+ -Also note that approximate knn search does not support pre-filtering.
315
320
 
316
321
  Args:
317
322
  query_embedding: Vector embedding to query.
@@ -328,16 +333,25 @@ class OpensearchVectorClient:
328
333
  search_query = self._default_approximate_search_query(
329
334
  query_embedding, k, vector_field=embedding_field
330
335
  )
336
+ elif self.is_aoss:
337
+ # if is_aoss is set we are using Opensearch Serverless AWS offering which cannot use
338
+ # painless scripting so default scoring script returned will be just normal knn_score script
339
+ search_query = self._default_scoring_script_query(
340
+ query_embedding,
341
+ k,
342
+ space_type=self.space_type,
343
+ pre_filter={"bool": {"filter": pre_filter}},
344
+ vector_field=embedding_field,
345
+ )
331
346
  else:
332
347
  # https://opensearch.org/docs/latest/search-plugins/knn/painless-functions/
333
- search_query = self._default_painless_scripting_query(
348
+ search_query = self._default_scoring_script_query(
334
349
  query_embedding,
335
350
  k,
336
351
  space_type="l2Squared",
337
352
  pre_filter={"bool": {"filter": pre_filter}},
338
353
  vector_field=embedding_field,
339
354
  )
340
-
341
355
  return search_query
342
356
 
343
357
  def _hybrid_search_query(
@@ -382,7 +396,9 @@ class OpensearchVectorClient:
382
396
  def __get_painless_scripting_source(
383
397
  self, space_type: str, vector_field: str = "embedding"
384
398
  ) -> str:
385
- """For Painless Scripting, it returns the script source based on space type."""
399
+ """For Painless Scripting, it returns the script source based on space type.
400
+ This does not work with Opensearch Serverless currently.
401
+ """
386
402
  source_value = (
387
403
  f"(1.0 + {space_type}(params.query_value, doc['{vector_field}']))"
388
404
  )
@@ -391,7 +407,29 @@ class OpensearchVectorClient:
391
407
  else:
392
408
  return f"1/{source_value}"
393
409
 
394
- def _default_painless_scripting_query(
410
+ def _get_knn_scoring_script(self, space_type, vector_field, query_vector):
411
+ """Default scoring script that will work with AWS Opensearch Serverless."""
412
+ return {
413
+ "source": "knn_score",
414
+ "lang": "knn",
415
+ "params": {
416
+ "field": vector_field,
417
+ "query_value": query_vector,
418
+ "space_type": space_type,
419
+ },
420
+ }
421
+
422
+ def _get_painless_scoring_script(self, space_type, vector_field, query_vector):
423
+ source = self.__get_painless_scripting_source(space_type, vector_field)
424
+ return {
425
+ "source": source,
426
+ "params": {
427
+ "field": vector_field,
428
+ "query_value": query_vector,
429
+ },
430
+ }
431
+
432
+ def _default_scoring_script_query(
395
433
  self,
396
434
  query_vector: List[float],
397
435
  k: int = 4,
@@ -399,23 +437,31 @@ class OpensearchVectorClient:
399
437
  pre_filter: Optional[Union[Dict, List]] = None,
400
438
  vector_field: str = "embedding",
401
439
  ) -> Dict:
402
- """For Painless Scripting Search, this is the default query."""
440
+ """For Scoring Script Search, this is the default query. Has to account for Opensearch Service
441
+ Serverless which does not support painless scripting functions so defaults to knn_score.
442
+ """
403
443
  if not pre_filter:
404
444
  pre_filter = MATCH_ALL_QUERY
405
445
 
406
- source = self.__get_painless_scripting_source(space_type, vector_field)
446
+ # check if we can use painless scripting or have to use default knn_score script
447
+ if self.is_aoss:
448
+ if space_type == "l2Squared":
449
+ raise ValueError(
450
+ "Unsupported space type for aoss. Can only use l1, l2, cosinesimil."
451
+ )
452
+ script = self._get_knn_scoring_script(
453
+ space_type, vector_field, query_vector
454
+ )
455
+ else:
456
+ script = self._get_painless_scoring_script(
457
+ space_type, vector_field, query_vector
458
+ )
407
459
  return {
408
460
  "size": k,
409
461
  "query": {
410
462
  "script_score": {
411
463
  "query": pre_filter,
412
- "script": {
413
- "source": source,
414
- "params": {
415
- "field": vector_field,
416
- "query_value": query_vector,
417
- },
418
- },
464
+ "script": script,
419
465
  }
420
466
  },
421
467
  }
@@ -466,7 +512,9 @@ class OpensearchVectorClient:
466
512
  search_query = {
467
513
  "query": {"term": {"metadata.doc_id.keyword": {"value": doc_id}}}
468
514
  }
469
- await self._os_client.delete_by_query(index=self._index, body=search_query)
515
+ await self._os_client.delete_by_query(
516
+ index=self._index, body=search_query, refresh=True
517
+ )
470
518
 
471
519
  async def delete_nodes(
472
520
  self,
@@ -490,12 +538,16 @@ class OpensearchVectorClient:
490
538
  if filters:
491
539
  query["query"]["bool"]["filter"].extend(self._parse_filters(filters))
492
540
 
493
- await self._os_client.delete_by_query(index=self._index, body=query)
541
+ await self._os_client.delete_by_query(
542
+ index=self._index, body=query, refresh=True
543
+ )
494
544
 
495
545
  async def clear(self) -> None:
496
546
  """Clears index."""
497
547
  query = {"query": {"bool": {"filter": []}}}
498
- await self._os_client.delete_by_query(index=self._index, body=query)
548
+ await self._os_client.delete_by_query(
549
+ index=self._index, body=query, refresh=True
550
+ )
499
551
 
500
552
  async def aquery(
501
553
  self,
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
27
27
  license = "MIT"
28
28
  name = "llama-index-vector-stores-opensearch"
29
29
  readme = "README.md"
30
- version = "0.2.0"
30
+ version = "0.2.2"
31
31
 
32
32
  [tool.poetry.dependencies]
33
33
  python = ">=3.8.1,<4.0"