agno 2.0.10__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. agno/agent/agent.py +608 -175
  2. agno/db/in_memory/in_memory_db.py +42 -29
  3. agno/db/postgres/postgres.py +6 -4
  4. agno/exceptions.py +62 -1
  5. agno/guardrails/__init__.py +6 -0
  6. agno/guardrails/base.py +19 -0
  7. agno/guardrails/openai.py +144 -0
  8. agno/guardrails/pii.py +94 -0
  9. agno/guardrails/prompt_injection.py +51 -0
  10. agno/knowledge/embedder/aws_bedrock.py +9 -4
  11. agno/knowledge/embedder/azure_openai.py +54 -0
  12. agno/knowledge/embedder/base.py +2 -0
  13. agno/knowledge/embedder/cohere.py +184 -5
  14. agno/knowledge/embedder/google.py +79 -1
  15. agno/knowledge/embedder/huggingface.py +9 -4
  16. agno/knowledge/embedder/jina.py +63 -0
  17. agno/knowledge/embedder/mistral.py +78 -11
  18. agno/knowledge/embedder/ollama.py +5 -0
  19. agno/knowledge/embedder/openai.py +18 -54
  20. agno/knowledge/embedder/voyageai.py +69 -16
  21. agno/knowledge/knowledge.py +5 -4
  22. agno/knowledge/reader/pdf_reader.py +4 -3
  23. agno/knowledge/reader/website_reader.py +3 -2
  24. agno/models/base.py +125 -32
  25. agno/models/cerebras/cerebras.py +1 -0
  26. agno/models/cerebras/cerebras_openai.py +1 -0
  27. agno/models/dashscope/dashscope.py +1 -0
  28. agno/models/google/gemini.py +27 -5
  29. agno/models/litellm/chat.py +17 -0
  30. agno/models/openai/chat.py +13 -4
  31. agno/models/perplexity/perplexity.py +2 -3
  32. agno/models/requesty/__init__.py +5 -0
  33. agno/models/requesty/requesty.py +49 -0
  34. agno/models/vllm/vllm.py +1 -0
  35. agno/models/xai/xai.py +1 -0
  36. agno/os/app.py +167 -148
  37. agno/os/interfaces/whatsapp/router.py +2 -0
  38. agno/os/mcp.py +1 -1
  39. agno/os/middleware/__init__.py +7 -0
  40. agno/os/middleware/jwt.py +233 -0
  41. agno/os/router.py +181 -45
  42. agno/os/routers/home.py +2 -2
  43. agno/os/routers/memory/memory.py +23 -1
  44. agno/os/routers/memory/schemas.py +1 -1
  45. agno/os/routers/session/session.py +20 -3
  46. agno/os/utils.py +172 -8
  47. agno/run/agent.py +120 -77
  48. agno/run/team.py +115 -72
  49. agno/run/workflow.py +5 -15
  50. agno/session/summary.py +9 -10
  51. agno/session/team.py +2 -1
  52. agno/team/team.py +720 -168
  53. agno/tools/firecrawl.py +4 -4
  54. agno/tools/function.py +42 -2
  55. agno/tools/knowledge.py +3 -3
  56. agno/tools/searxng.py +2 -2
  57. agno/tools/serper.py +2 -2
  58. agno/tools/spider.py +2 -2
  59. agno/tools/workflow.py +4 -5
  60. agno/utils/events.py +66 -1
  61. agno/utils/hooks.py +57 -0
  62. agno/utils/media.py +11 -9
  63. agno/utils/print_response/agent.py +43 -5
  64. agno/utils/print_response/team.py +48 -12
  65. agno/vectordb/cassandra/cassandra.py +44 -4
  66. agno/vectordb/chroma/chromadb.py +79 -8
  67. agno/vectordb/clickhouse/clickhousedb.py +43 -6
  68. agno/vectordb/couchbase/couchbase.py +76 -5
  69. agno/vectordb/lancedb/lance_db.py +38 -3
  70. agno/vectordb/llamaindex/__init__.py +3 -0
  71. agno/vectordb/milvus/milvus.py +76 -4
  72. agno/vectordb/mongodb/mongodb.py +76 -4
  73. agno/vectordb/pgvector/pgvector.py +50 -6
  74. agno/vectordb/pineconedb/pineconedb.py +39 -2
  75. agno/vectordb/qdrant/qdrant.py +76 -26
  76. agno/vectordb/singlestore/singlestore.py +77 -4
  77. agno/vectordb/upstashdb/upstashdb.py +42 -2
  78. agno/vectordb/weaviate/weaviate.py +39 -3
  79. agno/workflow/types.py +1 -0
  80. agno/workflow/workflow.py +58 -2
  81. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/METADATA +4 -3
  82. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/RECORD +85 -75
  83. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/WHEEL +0 -0
  84. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/licenses/LICENSE +0 -0
  85. {agno-2.0.10.dist-info → agno-2.1.0.dist-info}/top_level.txt +0 -0
@@ -1018,8 +1018,44 @@ class MongoDb(VectorDb):
1018
1018
  log_debug(f"Inserting {len(documents)} documents asynchronously")
1019
1019
  collection = await self._get_async_collection()
1020
1020
 
1021
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
1022
- await asyncio.gather(*embed_tasks, return_exceptions=True)
1021
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
1022
+ # Use batch embedding when enabled and supported
1023
+ try:
1024
+ # Extract content from all documents
1025
+ doc_contents = [doc.content for doc in documents]
1026
+
1027
+ # Get batch embeddings and usage
1028
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
1029
+
1030
+ # Process documents with pre-computed embeddings
1031
+ for j, doc in enumerate(documents):
1032
+ try:
1033
+ if j < len(embeddings):
1034
+ doc.embedding = embeddings[j]
1035
+ doc.usage = usages[j] if j < len(usages) else None
1036
+ except Exception as e:
1037
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
1038
+
1039
+ except Exception as e:
1040
+ # Check if this is a rate limit error - don't fall back as it would make things worse
1041
+ error_str = str(e).lower()
1042
+ is_rate_limit = any(
1043
+ phrase in error_str
1044
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
1045
+ )
1046
+
1047
+ if is_rate_limit:
1048
+ logger.error(f"Rate limit detected during batch embedding. {e}")
1049
+ raise e
1050
+ else:
1051
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
1052
+ # Fall back to individual embedding
1053
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
1054
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1055
+ else:
1056
+ # Use individual embedding
1057
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
1058
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1023
1059
 
1024
1060
  prepared_docs = []
1025
1061
  for document in documents:
@@ -1047,8 +1083,44 @@ class MongoDb(VectorDb):
1047
1083
  log_info(f"Upserting {len(documents)} documents asynchronously")
1048
1084
  collection = await self._get_async_collection()
1049
1085
 
1050
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
1051
- await asyncio.gather(*embed_tasks, return_exceptions=True)
1086
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
1087
+ # Use batch embedding when enabled and supported
1088
+ try:
1089
+ # Extract content from all documents
1090
+ doc_contents = [doc.content for doc in documents]
1091
+
1092
+ # Get batch embeddings and usage
1093
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
1094
+
1095
+ # Process documents with pre-computed embeddings
1096
+ for j, doc in enumerate(documents):
1097
+ try:
1098
+ if j < len(embeddings):
1099
+ doc.embedding = embeddings[j]
1100
+ doc.usage = usages[j] if j < len(usages) else None
1101
+ except Exception as e:
1102
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
1103
+
1104
+ except Exception as e:
1105
+ # Check if this is a rate limit error - don't fall back as it would make things worse
1106
+ error_str = str(e).lower()
1107
+ is_rate_limit = any(
1108
+ phrase in error_str
1109
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
1110
+ )
1111
+
1112
+ if is_rate_limit:
1113
+ logger.error(f"Rate limit detected during batch embedding. {e}")
1114
+ raise e
1115
+ else:
1116
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
1117
+ # Fall back to individual embedding
1118
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
1119
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1120
+ else:
1121
+ # Use individual embedding
1122
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
1123
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1052
1124
 
1053
1125
  for document in documents:
1054
1126
  try:
@@ -55,7 +55,6 @@ class PgVector(VectorDb):
55
55
  schema_version: int = 1,
56
56
  auto_upgrade_schema: bool = False,
57
57
  reranker: Optional[Reranker] = None,
58
- use_batch: bool = False,
59
58
  ):
60
59
  """
61
60
  Initialize the PgVector instance.
@@ -96,7 +95,6 @@ class PgVector(VectorDb):
96
95
  self.db_url: Optional[str] = db_url
97
96
  self.db_engine: Engine = db_engine
98
97
  self.metadata: MetaData = MetaData(schema=self.schema)
99
- self.use_batch: bool = use_batch
100
98
 
101
99
  # Embedder for embedding the document contents
102
100
  if embedder is None:
@@ -337,8 +335,8 @@ class PgVector(VectorDb):
337
335
  batch_docs = documents[i : i + batch_size]
338
336
  log_debug(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
339
337
  try:
340
- embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
341
- await asyncio.gather(*embed_tasks, return_exceptions=True)
338
+ # Embed all documents in the batch
339
+ await self._async_embed_documents(batch_docs)
342
340
 
343
341
  # Prepare documents for insertion
344
342
  batch_records = []
@@ -493,6 +491,52 @@ class PgVector(VectorDb):
493
491
  "content_id": doc.content_id,
494
492
  }
495
493
 
494
+ async def _async_embed_documents(self, batch_docs: List[Document]) -> None:
495
+ """
496
+ Embed a batch of documents using either batch embedding or individual embedding.
497
+
498
+ Args:
499
+ batch_docs: List of documents to embed
500
+ """
501
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
502
+ # Use batch embedding when enabled and supported
503
+ try:
504
+ # Extract content from all documents
505
+ doc_contents = [doc.content for doc in batch_docs]
506
+
507
+ # Get batch embeddings and usage
508
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
509
+
510
+ # Process documents with pre-computed embeddings
511
+ for j, doc in enumerate(batch_docs):
512
+ try:
513
+ if j < len(embeddings):
514
+ doc.embedding = embeddings[j]
515
+ doc.usage = usages[j] if j < len(usages) else None
516
+ except Exception as e:
517
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
518
+
519
+ except Exception as e:
520
+ # Check if this is a rate limit error - don't fall back as it would make things worse
521
+ error_str = str(e).lower()
522
+ is_rate_limit = any(
523
+ phrase in error_str
524
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
525
+ )
526
+
527
+ if is_rate_limit:
528
+ logger.error(f"Rate limit detected during batch embedding. {e}")
529
+ raise e
530
+ else:
531
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
532
+ # Fall back to individual embedding
533
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
534
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
535
+ else:
536
+ # Use individual embedding
537
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
538
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
539
+
496
540
  async def async_upsert(
497
541
  self,
498
542
  content_hash: str,
@@ -530,8 +574,8 @@ class PgVector(VectorDb):
530
574
  batch_docs = documents[i : i + batch_size]
531
575
  log_info(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
532
576
  try:
533
- embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
534
- await asyncio.gather(*embed_tasks, return_exceptions=True)
577
+ # Embed all documents in the batch
578
+ await self._async_embed_documents(batch_docs)
535
579
 
536
580
  # Prepare documents for upserting
537
581
  batch_records_dict = {} # Use dict to deduplicate by ID
@@ -338,8 +338,45 @@ class PineconeDb(VectorDb):
338
338
  async def _prepare_vectors(self, documents: List[Document]) -> List[Dict[str, Any]]:
339
339
  """Prepare vectors for upsert."""
340
340
  vectors = []
341
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
342
- await asyncio.gather(*embed_tasks, return_exceptions=True)
341
+
342
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
343
+ # Use batch embedding when enabled and supported
344
+ try:
345
+ # Extract content from all documents
346
+ doc_contents = [doc.content for doc in documents]
347
+
348
+ # Get batch embeddings and usage
349
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
350
+
351
+ # Process documents with pre-computed embeddings
352
+ for j, doc in enumerate(documents):
353
+ try:
354
+ if j < len(embeddings):
355
+ doc.embedding = embeddings[j]
356
+ doc.usage = usages[j] if j < len(usages) else None
357
+ except Exception as e:
358
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
359
+
360
+ except Exception as e:
361
+ # Check if this is a rate limit error - don't fall back as it would make things worse
362
+ error_str = str(e).lower()
363
+ is_rate_limit = any(
364
+ phrase in error_str
365
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
366
+ )
367
+
368
+ if is_rate_limit:
369
+ logger.error(f"Rate limit detected during batch embedding. {e}")
370
+ raise e
371
+ else:
372
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
373
+ # Fall back to individual embedding
374
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
375
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
376
+ else:
377
+ # Use individual embedding
378
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
379
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
343
380
 
344
381
  for doc in documents:
345
382
  doc.meta_data["text"] = doc.content
@@ -131,7 +131,8 @@ class Qdrant(VectorDb):
131
131
  if fastembed_kwargs:
132
132
  default_kwargs.update(fastembed_kwargs)
133
133
 
134
- self.sparse_encoder = SparseTextEmbedding(**default_kwargs)
134
+ # Type ignore for mypy as SparseTextEmbedding constructor accepts flexible kwargs
135
+ self.sparse_encoder = SparseTextEmbedding(**default_kwargs) # type: ignore
135
136
 
136
137
  except ImportError as e:
137
138
  raise ImportError(
@@ -192,10 +193,12 @@ class Qdrant(VectorDb):
192
193
  # Configure vectors based on search type
193
194
  if self.search_type == SearchType.vector:
194
195
  # Maintain backward compatibility with unnamed vectors
195
- vectors_config = models.VectorParams(size=self.dimensions, distance=_distance)
196
+ vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
196
197
  else:
197
198
  # Use named vectors for hybrid search
198
- vectors_config = {self.dense_vector_name: models.VectorParams(size=self.dimensions, distance=_distance)} # type: ignore
199
+ vectors_config = {
200
+ self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
201
+ } # type: ignore
199
202
 
200
203
  self.client.create_collection(
201
204
  collection_name=self.collection,
@@ -220,10 +223,12 @@ class Qdrant(VectorDb):
220
223
  # Configure vectors based on search type
221
224
  if self.search_type == SearchType.vector:
222
225
  # Maintain backward compatibility with unnamed vectors
223
- vectors_config = models.VectorParams(size=self.dimensions, distance=_distance)
226
+ vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
224
227
  else:
225
228
  # Use named vectors for hybrid search
226
- vectors_config = {self.dense_vector_name: models.VectorParams(size=self.dimensions, distance=_distance)} # type: ignore
229
+ vectors_config = {
230
+ self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
231
+ } # type: ignore
227
232
 
228
233
  await self.async_client.create_collection(
229
234
  collection_name=self.collection,
@@ -281,7 +286,7 @@ class Qdrant(VectorDb):
281
286
  return len(scroll_result[0]) > 0
282
287
  return False
283
288
 
284
- async def async_name_exists(self, name: str) -> bool:
289
+ async def async_name_exists(self, name: str) -> bool: # type: ignore[override]
285
290
  """
286
291
  Asynchronously validates if a document with the given name exists in the collection.
287
292
 
@@ -341,7 +346,9 @@ class Qdrant(VectorDb):
341
346
  vector[self.dense_vector_name] = document.embedding
342
347
 
343
348
  if self.search_type in [SearchType.keyword, SearchType.hybrid]:
344
- vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
349
+ vector[self.sparse_vector_name] = next(
350
+ iter(self.sparse_encoder.embed([document.content]))
351
+ ).as_object() # type: ignore
345
352
 
346
353
  # Create payload with document properties
347
354
  payload = {
@@ -363,7 +370,7 @@ class Qdrant(VectorDb):
363
370
  points.append(
364
371
  models.PointStruct(
365
372
  id=doc_id,
366
- vector=vector,
373
+ vector=vector, # type: ignore
367
374
  payload=payload,
368
375
  )
369
376
  )
@@ -384,26 +391,69 @@ class Qdrant(VectorDb):
384
391
  """
385
392
  log_debug(f"Inserting {len(documents)} documents asynchronously")
386
393
 
394
+ # Apply batch embedding when needed for vector or hybrid search
395
+ if self.search_type in [SearchType.vector, SearchType.hybrid]:
396
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
397
+ # Use batch embedding when enabled and supported
398
+ try:
399
+ # Extract content from all documents
400
+ doc_contents = [doc.content for doc in documents]
401
+
402
+ # Get batch embeddings and usage
403
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
404
+
405
+ # Process documents with pre-computed embeddings
406
+ for j, doc in enumerate(documents):
407
+ try:
408
+ if j < len(embeddings):
409
+ doc.embedding = embeddings[j]
410
+ doc.usage = usages[j] if j < len(usages) else None
411
+ except Exception as e:
412
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
413
+
414
+ except Exception as e:
415
+ # Check if this is a rate limit error - don't fall back as it would make things worse
416
+ error_str = str(e).lower()
417
+ is_rate_limit = any(
418
+ phrase in error_str
419
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
420
+ )
421
+
422
+ if is_rate_limit:
423
+ log_error(f"Rate limit detected during batch embedding. {e}")
424
+ raise e
425
+ else:
426
+ log_warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
427
+ # Fall back to individual embedding
428
+ for doc in documents:
429
+ if self.search_type in [SearchType.vector, SearchType.hybrid]:
430
+ doc.embed(embedder=self.embedder)
431
+ else:
432
+ # Use individual embedding
433
+ for doc in documents:
434
+ if self.search_type in [SearchType.vector, SearchType.hybrid]:
435
+ doc.embed(embedder=self.embedder)
436
+
387
437
  async def process_document(document):
388
438
  cleaned_content = document.content.replace("\x00", "\ufffd")
389
439
  doc_id = md5(cleaned_content.encode()).hexdigest()
390
440
 
391
441
  if self.search_type == SearchType.vector:
392
442
  # For vector search, maintain backward compatibility with unnamed vectors
393
- document.embed(embedder=self.embedder)
394
- vector = document.embedding
443
+ vector = document.embedding # Already embedded above
395
444
  else:
396
445
  # For other search types, use named vectors
397
446
  vector = {}
398
447
  if self.search_type in [SearchType.hybrid]:
399
- document.embed(embedder=self.embedder)
400
- vector[self.dense_vector_name] = document.embedding
448
+ vector[self.dense_vector_name] = document.embedding # Already embedded above
401
449
 
402
450
  if self.search_type in [SearchType.keyword, SearchType.hybrid]:
403
- vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
451
+ vector[self.sparse_vector_name] = next(
452
+ iter(self.sparse_encoder.embed([document.content]))
453
+ ).as_object() # type: ignore
404
454
 
405
455
  if self.search_type in [SearchType.keyword, SearchType.hybrid]:
406
- vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
456
+ vector[self.sparse_vector_name] = next(iter(self.sparse_encoder.embed([document.content]))).as_object()
407
457
 
408
458
  # Create payload with document properties
409
459
  payload = {
@@ -423,9 +473,9 @@ class Qdrant(VectorDb):
423
473
  payload["meta_data"].update(filters)
424
474
 
425
475
  log_debug(f"Inserted document asynchronously: {document.name} ({document.meta_data})")
426
- return models.PointStruct(
476
+ return models.PointStruct( # type: ignore
427
477
  id=doc_id,
428
- vector=vector,
478
+ vector=vector, # type: ignore
429
479
  payload=payload,
430
480
  )
431
481
 
@@ -501,12 +551,12 @@ class Qdrant(VectorDb):
501
551
  filters: Optional[Dict[str, Any]],
502
552
  ) -> List[models.ScoredPoint]:
503
553
  dense_embedding = self.embedder.get_embedding(query)
504
- sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
554
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
505
555
  call = self.client.query_points(
506
556
  collection_name=self.collection,
507
557
  prefetch=[
508
558
  models.Prefetch(
509
- query=models.SparseVector(**sparse_embedding),
559
+ query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
510
560
  limit=limit,
511
561
  using=self.sparse_vector_name,
512
562
  ),
@@ -557,10 +607,10 @@ class Qdrant(VectorDb):
557
607
  limit: int,
558
608
  filters: Optional[Dict[str, Any]],
559
609
  ) -> List[models.ScoredPoint]:
560
- sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
610
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
561
611
  call = self.client.query_points(
562
612
  collection_name=self.collection,
563
- query=models.SparseVector(**sparse_embedding),
613
+ query=models.SparseVector(**sparse_embedding), # type: ignore
564
614
  with_vectors=True,
565
615
  with_payload=True,
566
616
  limit=limit,
@@ -606,10 +656,10 @@ class Qdrant(VectorDb):
606
656
  limit: int,
607
657
  filters: Optional[Dict[str, Any]],
608
658
  ) -> List[models.ScoredPoint]:
609
- sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
659
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
610
660
  call = await self.async_client.query_points(
611
661
  collection_name=self.collection,
612
- query=models.SparseVector(**sparse_embedding),
662
+ query=models.SparseVector(**sparse_embedding), # type: ignore
613
663
  with_vectors=True,
614
664
  with_payload=True,
615
665
  limit=limit,
@@ -625,12 +675,12 @@ class Qdrant(VectorDb):
625
675
  filters: Optional[Dict[str, Any]],
626
676
  ) -> List[models.ScoredPoint]:
627
677
  dense_embedding = self.embedder.get_embedding(query)
628
- sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
678
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
629
679
  call = await self.async_client.query_points(
630
680
  collection_name=self.collection,
631
681
  prefetch=[
632
682
  models.Prefetch(
633
- query=models.SparseVector(**sparse_embedding),
683
+ query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
634
684
  limit=limit,
635
685
  using=self.sparse_vector_name,
636
686
  ),
@@ -689,7 +739,7 @@ class Qdrant(VectorDb):
689
739
  filter_conditions.append(models.FieldCondition(key=key, match=models.MatchValue(value=value)))
690
740
 
691
741
  if filter_conditions:
692
- return models.Filter(must=filter_conditions)
742
+ return models.Filter(must=filter_conditions) # type: ignore
693
743
 
694
744
  return None
695
745
 
@@ -807,7 +857,7 @@ class Qdrant(VectorDb):
807
857
  )
808
858
 
809
859
  # Create a filter that requires ALL metadata conditions to match
810
- filter_condition = models.Filter(must=filter_conditions)
860
+ filter_condition = models.Filter(must=filter_conditions) # type: ignore
811
861
 
812
862
  # First, count how many points will be deleted
813
863
  count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
@@ -496,8 +496,44 @@ class SingleStore(VectorDb):
496
496
  documents: List[Document],
497
497
  filters: Optional[Dict[str, Any]] = None,
498
498
  ) -> None:
499
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
500
- await asyncio.gather(*embed_tasks, return_exceptions=True)
499
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
500
+ # Use batch embedding when enabled and supported
501
+ try:
502
+ # Extract content from all documents
503
+ doc_contents = [doc.content for doc in documents]
504
+
505
+ # Get batch embeddings and usage
506
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
507
+
508
+ # Process documents with pre-computed embeddings
509
+ for j, doc in enumerate(documents):
510
+ try:
511
+ if j < len(embeddings):
512
+ doc.embedding = embeddings[j]
513
+ doc.usage = usages[j] if j < len(usages) else None
514
+ except Exception as e:
515
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
516
+
517
+ except Exception as e:
518
+ # Check if this is a rate limit error - don't fall back as it would make things worse
519
+ error_str = str(e).lower()
520
+ is_rate_limit = any(
521
+ phrase in error_str
522
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
523
+ )
524
+
525
+ if is_rate_limit:
526
+ log_error(f"Rate limit detected during batch embedding. {e}")
527
+ raise e
528
+ else:
529
+ log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
530
+ # Fall back to individual embedding
531
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
532
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
533
+ else:
534
+ # Use individual embedding
535
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
536
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
501
537
 
502
538
  with self.Session.begin() as sess:
503
539
  counter = 0
@@ -543,8 +579,45 @@ class SingleStore(VectorDb):
543
579
  filters (Optional[Dict[str, Any]]): Optional filters for the upsert.
544
580
  batch_size (int): Number of documents to upsert in each batch.
545
581
  """
546
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
547
- await asyncio.gather(*embed_tasks, return_exceptions=True)
582
+
583
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
584
+ # Use batch embedding when enabled and supported
585
+ try:
586
+ # Extract content from all documents
587
+ doc_contents = [doc.content for doc in documents]
588
+
589
+ # Get batch embeddings and usage
590
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
591
+
592
+ # Process documents with pre-computed embeddings
593
+ for j, doc in enumerate(documents):
594
+ try:
595
+ if j < len(embeddings):
596
+ doc.embedding = embeddings[j]
597
+ doc.usage = usages[j] if j < len(usages) else None
598
+ except Exception as e:
599
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
600
+
601
+ except Exception as e:
602
+ # Check if this is a rate limit error - don't fall back as it would make things worse
603
+ error_str = str(e).lower()
604
+ is_rate_limit = any(
605
+ phrase in error_str
606
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
607
+ )
608
+
609
+ if is_rate_limit:
610
+ log_error(f"Rate limit detected during batch embedding. {e}")
611
+ raise e
612
+ else:
613
+ log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
614
+ # Fall back to individual embedding
615
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
616
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
617
+ else:
618
+ # Use individual embedding
619
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
620
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
548
621
 
549
622
  with self.Session.begin() as sess:
550
623
  counter = 0
@@ -504,8 +504,48 @@ class UpstashVectorDb(VectorDb):
504
504
  _namespace = self.namespace if namespace is None else namespace
505
505
  vectors = []
506
506
 
507
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
508
- await asyncio.gather(*embed_tasks, return_exceptions=True)
507
+ if (
508
+ self.embedder
509
+ and self.embedder.enable_batch
510
+ and hasattr(self.embedder, "async_get_embeddings_batch_and_usage")
511
+ ):
512
+ # Use batch embedding when enabled and supported
513
+ try:
514
+ # Extract content from all documents
515
+ doc_contents = [doc.content for doc in documents]
516
+
517
+ # Get batch embeddings and usage
518
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
519
+
520
+ # Process documents with pre-computed embeddings
521
+ for j, doc in enumerate(documents):
522
+ try:
523
+ if j < len(embeddings):
524
+ doc.embedding = embeddings[j]
525
+ doc.usage = usages[j] if j < len(usages) else None
526
+ except Exception as e:
527
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
528
+
529
+ except Exception as e:
530
+ # Check if this is a rate limit error - don't fall back as it would make things worse
531
+ error_str = str(e).lower()
532
+ is_rate_limit = any(
533
+ phrase in error_str
534
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
535
+ )
536
+
537
+ if is_rate_limit:
538
+ logger.error(f"Rate limit detected during batch embedding. {e}")
539
+ raise e
540
+ else:
541
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
542
+ # Fall back to individual embedding
543
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
544
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
545
+ else:
546
+ # Use individual embedding
547
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
548
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
509
549
 
510
550
  for i, document in enumerate(documents):
511
551
  if document.id is None:
@@ -270,9 +270,45 @@ class Weaviate(VectorDb):
270
270
  if not documents:
271
271
  return
272
272
 
273
- # Embed document
274
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
275
- await asyncio.gather(*embed_tasks, return_exceptions=True)
273
+ # Apply batch embedding logic
274
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
275
+ # Use batch embedding when enabled and supported
276
+ try:
277
+ # Extract content from all documents
278
+ doc_contents = [doc.content for doc in documents]
279
+
280
+ # Get batch embeddings and usage
281
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
282
+
283
+ # Process documents with pre-computed embeddings
284
+ for j, doc in enumerate(documents):
285
+ try:
286
+ if j < len(embeddings):
287
+ doc.embedding = embeddings[j]
288
+ doc.usage = usages[j] if j < len(usages) else None
289
+ except Exception as e:
290
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
291
+
292
+ except Exception as e:
293
+ # Check if this is a rate limit error - don't fall back as it would make things worse
294
+ error_str = str(e).lower()
295
+ is_rate_limit = any(
296
+ phrase in error_str
297
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
298
+ )
299
+
300
+ if is_rate_limit:
301
+ logger.error(f"Rate limit detected during batch embedding. {e}")
302
+ raise e
303
+ else:
304
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
305
+ # Fall back to individual embedding
306
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
307
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
308
+ else:
309
+ # Use individual embedding
310
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
311
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
276
312
 
277
313
  client = await self.get_async_client()
278
314
  try: