agno 2.0.11__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. agno/agent/agent.py +606 -175
  2. agno/db/in_memory/in_memory_db.py +42 -29
  3. agno/db/postgres/postgres.py +6 -4
  4. agno/exceptions.py +62 -1
  5. agno/guardrails/__init__.py +6 -0
  6. agno/guardrails/base.py +19 -0
  7. agno/guardrails/openai.py +144 -0
  8. agno/guardrails/pii.py +94 -0
  9. agno/guardrails/prompt_injection.py +51 -0
  10. agno/knowledge/embedder/aws_bedrock.py +9 -4
  11. agno/knowledge/embedder/azure_openai.py +54 -0
  12. agno/knowledge/embedder/base.py +2 -0
  13. agno/knowledge/embedder/cohere.py +184 -5
  14. agno/knowledge/embedder/google.py +79 -1
  15. agno/knowledge/embedder/huggingface.py +9 -4
  16. agno/knowledge/embedder/jina.py +63 -0
  17. agno/knowledge/embedder/mistral.py +78 -11
  18. agno/knowledge/embedder/ollama.py +5 -0
  19. agno/knowledge/embedder/openai.py +18 -54
  20. agno/knowledge/embedder/voyageai.py +69 -16
  21. agno/knowledge/knowledge.py +5 -4
  22. agno/knowledge/reader/pdf_reader.py +4 -3
  23. agno/knowledge/reader/website_reader.py +3 -2
  24. agno/models/base.py +125 -32
  25. agno/models/cerebras/cerebras.py +1 -0
  26. agno/models/cerebras/cerebras_openai.py +1 -0
  27. agno/models/dashscope/dashscope.py +1 -0
  28. agno/models/google/gemini.py +27 -5
  29. agno/models/openai/chat.py +13 -4
  30. agno/models/perplexity/perplexity.py +2 -3
  31. agno/models/requesty/__init__.py +5 -0
  32. agno/models/requesty/requesty.py +49 -0
  33. agno/models/vllm/vllm.py +1 -0
  34. agno/models/xai/xai.py +1 -0
  35. agno/os/app.py +98 -126
  36. agno/os/interfaces/whatsapp/router.py +2 -0
  37. agno/os/mcp.py +1 -1
  38. agno/os/middleware/__init__.py +7 -0
  39. agno/os/middleware/jwt.py +233 -0
  40. agno/os/router.py +181 -45
  41. agno/os/routers/home.py +2 -2
  42. agno/os/routers/memory/memory.py +23 -1
  43. agno/os/routers/memory/schemas.py +1 -1
  44. agno/os/routers/session/session.py +20 -3
  45. agno/os/utils.py +74 -8
  46. agno/run/agent.py +120 -77
  47. agno/run/team.py +115 -72
  48. agno/run/workflow.py +5 -15
  49. agno/session/summary.py +9 -10
  50. agno/session/team.py +2 -1
  51. agno/team/team.py +720 -168
  52. agno/tools/firecrawl.py +4 -4
  53. agno/tools/function.py +42 -2
  54. agno/tools/knowledge.py +3 -3
  55. agno/tools/searxng.py +2 -2
  56. agno/tools/serper.py +2 -2
  57. agno/tools/spider.py +2 -2
  58. agno/tools/workflow.py +4 -5
  59. agno/utils/events.py +66 -1
  60. agno/utils/hooks.py +57 -0
  61. agno/utils/media.py +11 -9
  62. agno/utils/print_response/agent.py +43 -5
  63. agno/utils/print_response/team.py +48 -12
  64. agno/vectordb/cassandra/cassandra.py +44 -4
  65. agno/vectordb/chroma/chromadb.py +79 -8
  66. agno/vectordb/clickhouse/clickhousedb.py +43 -6
  67. agno/vectordb/couchbase/couchbase.py +76 -5
  68. agno/vectordb/lancedb/lance_db.py +38 -3
  69. agno/vectordb/milvus/milvus.py +76 -4
  70. agno/vectordb/mongodb/mongodb.py +76 -4
  71. agno/vectordb/pgvector/pgvector.py +50 -6
  72. agno/vectordb/pineconedb/pineconedb.py +39 -2
  73. agno/vectordb/qdrant/qdrant.py +76 -26
  74. agno/vectordb/singlestore/singlestore.py +77 -4
  75. agno/vectordb/upstashdb/upstashdb.py +42 -2
  76. agno/vectordb/weaviate/weaviate.py +39 -3
  77. agno/workflow/types.py +1 -0
  78. agno/workflow/workflow.py +58 -2
  79. {agno-2.0.11.dist-info → agno-2.1.0.dist-info}/METADATA +4 -3
  80. {agno-2.0.11.dist-info → agno-2.1.0.dist-info}/RECORD +83 -73
  81. {agno-2.0.11.dist-info → agno-2.1.0.dist-info}/WHEEL +0 -0
  82. {agno-2.0.11.dist-info → agno-2.1.0.dist-info}/licenses/LICENSE +0 -0
  83. {agno-2.0.11.dist-info → agno-2.1.0.dist-info}/top_level.txt +0 -0
@@ -98,6 +98,20 @@ def print_response(
98
98
  )
99
99
  response_timer.stop()
100
100
 
101
+ if run_response.input is not None and run_response.input.input_content != input:
102
+ # Input was modified during the run
103
+ panels = [status]
104
+ if show_message:
105
+ # Convert message to a panel
106
+ message_content = get_text_from_message(run_response.input.input_content)
107
+ message_panel = create_panel(
108
+ content=Text(message_content, style="green"),
109
+ title="Message",
110
+ border_style="cyan",
111
+ )
112
+ panels.append(message_panel) # type: ignore
113
+ live_console.update(Group(*panels))
114
+
101
115
  team_markdown = False
102
116
  member_markdown = {}
103
117
  if markdown:
@@ -400,6 +414,8 @@ def print_response_stream(
400
414
  **kwargs,
401
415
  )
402
416
 
417
+ input_content = get_text_from_message(input)
418
+
403
419
  team_markdown = None
404
420
  member_markdown = {}
405
421
 
@@ -435,6 +451,10 @@ def print_response_stream(
435
451
  if hasattr(resp, "reasoning_steps") and resp.reasoning_steps is not None: # type: ignore
436
452
  reasoning_steps = resp.reasoning_steps # type: ignore
437
453
 
454
+ if resp.event == TeamRunEvent.pre_hook_completed: # type: ignore
455
+ if resp.run_input is not None: # type: ignore
456
+ input_content = get_text_from_message(resp.run_input.input_content) # type: ignore
457
+
438
458
  # Collect team tool calls, avoiding duplicates
439
459
  if resp.event == TeamRunEvent.tool_call_completed and resp.tool: # type: ignore
440
460
  tool = resp.tool # type: ignore
@@ -479,12 +499,11 @@ def print_response_stream(
479
499
  # Create new panels for each chunk
480
500
  panels = []
481
501
 
482
- if input and show_message:
502
+ if input_content and show_message:
483
503
  render = True
484
504
  # Convert message to a panel
485
- message_content = get_text_from_message(input)
486
505
  message_panel = create_panel(
487
- content=Text(message_content, style="green"),
506
+ content=Text(input_content, style="green"),
488
507
  title="Message",
489
508
  border_style="cyan",
490
509
  )
@@ -663,10 +682,9 @@ def print_response_stream(
663
682
  final_panels = []
664
683
 
665
684
  # Start with the message
666
- if input and show_message:
667
- message_content = get_text_from_message(input)
685
+ if input_content and show_message:
668
686
  message_panel = create_panel(
669
- content=Text(message_content, style="green"),
687
+ content=Text(input_content, style="green"),
670
688
  title="Message",
671
689
  border_style="cyan",
672
690
  )
@@ -911,6 +929,20 @@ async def aprint_response(
911
929
  )
912
930
  response_timer.stop()
913
931
 
932
+ if run_response.input is not None and run_response.input.input_content != input:
933
+ # Input was modified during the run
934
+ panels = [status]
935
+ if show_message:
936
+ # Convert message to a panel
937
+ message_content = get_text_from_message(run_response.input.input_content)
938
+ message_panel = create_panel(
939
+ content=Text(message_content, style="green"),
940
+ title="Message",
941
+ border_style="cyan",
942
+ )
943
+ panels.append(message_panel) # type: ignore
944
+ live_console.update(Group(*panels))
945
+
914
946
  team_markdown = False
915
947
  member_markdown = {}
916
948
  if markdown:
@@ -1196,6 +1228,8 @@ async def aprint_response_stream(
1196
1228
  # Dict to track member response panels by member_id
1197
1229
  member_response_panels = {}
1198
1230
 
1231
+ input_content = get_text_from_message(input)
1232
+
1199
1233
  final_run_response = None
1200
1234
  async for resp in team.arun( # type: ignore
1201
1235
  input=input,
@@ -1245,6 +1279,10 @@ async def aprint_response_stream(
1245
1279
  if hasattr(resp, "reasoning_steps") and resp.reasoning_steps is not None: # type: ignore
1246
1280
  reasoning_steps = resp.reasoning_steps # type: ignore
1247
1281
 
1282
+ if resp.event == TeamRunEvent.pre_hook_completed: # type: ignore
1283
+ if resp.run_input is not None: # type: ignore
1284
+ input_content = get_text_from_message(resp.run_input.input_content) # type: ignore
1285
+
1248
1286
  # Collect team tool calls, avoiding duplicates
1249
1287
  if resp.event == TeamRunEvent.tool_call_completed and resp.tool: # type: ignore
1250
1288
  tool = resp.tool # type: ignore
@@ -1288,12 +1326,11 @@ async def aprint_response_stream(
1288
1326
  # Create new panels for each chunk
1289
1327
  panels = []
1290
1328
 
1291
- if input and show_message:
1329
+ if input_content and show_message:
1292
1330
  render = True
1293
1331
  # Convert message to a panel
1294
- message_content = get_text_from_message(input)
1295
1332
  message_panel = create_panel(
1296
- content=Text(message_content, style="green"),
1333
+ content=Text(input_content, style="green"),
1297
1334
  title="Message",
1298
1335
  border_style="cyan",
1299
1336
  )
@@ -1473,10 +1510,9 @@ async def aprint_response_stream(
1473
1510
  final_panels = []
1474
1511
 
1475
1512
  # Start with the message
1476
- if input and show_message:
1477
- message_content = get_text_from_message(input)
1513
+ if input_content and show_message:
1478
1514
  message_panel = create_panel(
1479
- content=Text(message_content, style="green"),
1515
+ content=Text(input_content, style="green"),
1480
1516
  title="Message",
1481
1517
  border_style="cyan",
1482
1518
  )
@@ -119,12 +119,52 @@ class Cassandra(VectorDb):
119
119
  """Insert documents asynchronously by running in a thread."""
120
120
  log_info(f"Cassandra VectorDB : Inserting Documents to the table {self.table_name}")
121
121
 
122
- for doc in documents:
122
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
123
+ # Use batch embedding when enabled and supported
123
124
  try:
124
- embed_tasks = [doc.async_embed(embedder=self.embedder)]
125
- await asyncio.gather(*embed_tasks, return_exceptions=True)
125
+ # Extract content from all documents
126
+ doc_contents = [doc.content for doc in documents]
127
+
128
+ # Get batch embeddings and usage
129
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
130
+
131
+ # Process documents with pre-computed embeddings
132
+ for j, doc in enumerate(documents):
133
+ try:
134
+ if j < len(embeddings):
135
+ doc.embedding = embeddings[j]
136
+ doc.usage = usages[j] if j < len(usages) else None
137
+ except Exception as e:
138
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
139
+
126
140
  except Exception as e:
127
- log_error(f"Error processing document '{doc.name}': {e}")
141
+ # Check if this is a rate limit error - don't fall back as it would make things worse
142
+ error_str = str(e).lower()
143
+ is_rate_limit = any(
144
+ phrase in error_str
145
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
146
+ )
147
+
148
+ if is_rate_limit:
149
+ log_error(f"Rate limit detected during batch embedding. {e}")
150
+ raise e
151
+ else:
152
+ log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
153
+ # Fall back to individual embedding
154
+ for doc in documents:
155
+ try:
156
+ embed_tasks = [doc.async_embed(embedder=self.embedder)]
157
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
158
+ except Exception as e:
159
+ log_error(f"Error processing document '{doc.name}': {e}")
160
+ else:
161
+ # Use individual embedding (original behavior)
162
+ for doc in documents:
163
+ try:
164
+ embed_tasks = [doc.async_embed(embedder=self.embedder)]
165
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
166
+ except Exception as e:
167
+ log_error(f"Error processing document '{doc.name}': {e}")
128
168
 
129
169
  futures = []
130
170
  for doc in documents:
@@ -215,11 +215,47 @@ class ChromaDb(VectorDb):
215
215
  if not self._collection:
216
216
  self._collection = self.client.get_collection(name=self.collection_name)
217
217
 
218
- try:
219
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
220
- await asyncio.gather(*embed_tasks, return_exceptions=True)
221
- except Exception as e:
222
- log_error(f"Error processing document: {e}")
218
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
219
+ # Use batch embedding when enabled and supported
220
+ try:
221
+ # Extract content from all documents
222
+ doc_contents = [doc.content for doc in documents]
223
+
224
+ # Get batch embeddings and usage
225
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
226
+
227
+ # Process documents with pre-computed embeddings
228
+ for j, doc in enumerate(documents):
229
+ try:
230
+ if j < len(embeddings):
231
+ doc.embedding = embeddings[j]
232
+ doc.usage = usages[j] if j < len(usages) else None
233
+ except Exception as e:
234
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
235
+
236
+ except Exception as e:
237
+ # Check if this is a rate limit error - don't fall back as it would make things worse
238
+ error_str = str(e).lower()
239
+ is_rate_limit = any(
240
+ phrase in error_str
241
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
242
+ )
243
+
244
+ if is_rate_limit:
245
+ logger.error(f"Rate limit detected during batch embedding. {e}")
246
+ raise e
247
+ else:
248
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
249
+ # Fall back to individual embedding
250
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
251
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
252
+ else:
253
+ # Use individual embedding
254
+ try:
255
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
256
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
257
+ except Exception as e:
258
+ log_error(f"Error processing document: {e}")
223
259
 
224
260
  for document in documents:
225
261
  cleaned_content = document.content.replace("\x00", "\ufffd")
@@ -341,8 +377,44 @@ class ChromaDb(VectorDb):
341
377
  if not self._collection:
342
378
  self._collection = self.client.get_collection(name=self.collection_name)
343
379
 
344
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
345
- await asyncio.gather(*embed_tasks, return_exceptions=True)
380
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
381
+ # Use batch embedding when enabled and supported
382
+ try:
383
+ # Extract content from all documents
384
+ doc_contents = [doc.content for doc in documents]
385
+
386
+ # Get batch embeddings and usage
387
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
388
+
389
+ # Process documents with pre-computed embeddings
390
+ for j, doc in enumerate(documents):
391
+ try:
392
+ if j < len(embeddings):
393
+ doc.embedding = embeddings[j]
394
+ doc.usage = usages[j] if j < len(usages) else None
395
+ except Exception as e:
396
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
397
+
398
+ except Exception as e:
399
+ # Check if this is a rate limit error - don't fall back as it would make things worse
400
+ error_str = str(e).lower()
401
+ is_rate_limit = any(
402
+ phrase in error_str
403
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
404
+ )
405
+
406
+ if is_rate_limit:
407
+ logger.error(f"Rate limit detected during batch embedding. {e}")
408
+ raise e
409
+ else:
410
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
411
+ # Fall back to individual embedding
412
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
413
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
414
+ else:
415
+ # Use individual embedding
416
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
417
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
346
418
 
347
419
  for document in documents:
348
420
  cleaned_content = document.content.replace("\x00", "\ufffd")
@@ -709,7 +781,6 @@ class ChromaDb(VectorDb):
709
781
 
710
782
  try:
711
783
  collection: Collection = self.client.get_collection(name=self.collection_name)
712
- print("COLLECTION_----------", collection)
713
784
  # Try to get the document by ID
714
785
  result = collection.get(ids=[id])
715
786
  found_ids = result.get("ids", [])
@@ -81,6 +81,7 @@ class Clickhouse(VectorDb):
81
81
  if self.async_client is None:
82
82
  self.async_client = await clickhouse_connect.get_async_client(
83
83
  host=self.host,
84
+ username=self.username, # type: ignore
84
85
  password=self.password,
85
86
  database=self.database_name,
86
87
  port=self.port,
@@ -228,7 +229,7 @@ class Clickhouse(VectorDb):
228
229
  "SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
229
230
  parameters=parameters,
230
231
  )
231
- return bool(result)
232
+ return len(result.result_rows) > 0 if result.result_rows else False
232
233
 
233
234
  async def async_name_exists(self, name: str) -> bool:
234
235
  """Check if a document with given name exists asynchronously."""
@@ -241,7 +242,7 @@ class Clickhouse(VectorDb):
241
242
  "SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
242
243
  parameters=parameters,
243
244
  )
244
- return bool(result)
245
+ return len(result.result_rows) > 0 if result.result_rows else False
245
246
 
246
247
  def id_exists(self, id: str) -> bool:
247
248
  """
@@ -257,7 +258,7 @@ class Clickhouse(VectorDb):
257
258
  "SELECT id FROM {database_name:Identifier}.{table_name:Identifier} WHERE id = {id:String}",
258
259
  parameters=parameters,
259
260
  )
260
- return bool(result)
261
+ return len(result.result_rows) > 0 if result.result_rows else False
261
262
 
262
263
  def insert(
263
264
  self,
@@ -308,8 +309,44 @@ class Clickhouse(VectorDb):
308
309
  rows: List[List[Any]] = []
309
310
  async_client = await self._ensure_async_client()
310
311
 
311
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
312
- await asyncio.gather(*embed_tasks, return_exceptions=True)
312
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
313
+ # Use batch embedding when enabled and supported
314
+ try:
315
+ # Extract content from all documents
316
+ doc_contents = [doc.content for doc in documents]
317
+
318
+ # Get batch embeddings and usage
319
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
320
+
321
+ # Process documents with pre-computed embeddings
322
+ for j, doc in enumerate(documents):
323
+ try:
324
+ if j < len(embeddings):
325
+ doc.embedding = embeddings[j]
326
+ doc.usage = usages[j] if j < len(usages) else None
327
+ except Exception as e:
328
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
329
+
330
+ except Exception as e:
331
+ # Check if this is a rate limit error - don't fall back as it would make things worse
332
+ error_str = str(e).lower()
333
+ is_rate_limit = any(
334
+ phrase in error_str
335
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
336
+ )
337
+
338
+ if is_rate_limit:
339
+ logger.error(f"Rate limit detected during batch embedding. {e}")
340
+ raise e
341
+ else:
342
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
343
+ # Fall back to individual embedding
344
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
345
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
346
+ else:
347
+ # Use individual embedding
348
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
349
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
313
350
 
314
351
  for document in documents:
315
352
  cleaned_content = document.content.replace("\x00", "\ufffd")
@@ -696,7 +733,7 @@ class Clickhouse(VectorDb):
696
733
  "SELECT content_hash FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
697
734
  parameters=parameters,
698
735
  )
699
- return bool(result)
736
+ return len(result.result_rows) > 0 if result.result_rows else False
700
737
 
701
738
  def _delete_by_content_hash(self, content_hash: str) -> bool:
702
739
  """
@@ -871,8 +871,44 @@ class CouchbaseSearch(VectorDb):
871
871
  async_collection_instance = await self.get_async_collection()
872
872
  all_docs_to_insert: Dict[str, Any] = {}
873
873
 
874
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
875
- await asyncio.gather(*embed_tasks, return_exceptions=True)
874
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
875
+ # Use batch embedding when enabled and supported
876
+ try:
877
+ # Extract content from all documents
878
+ doc_contents = [doc.content for doc in documents]
879
+
880
+ # Get batch embeddings and usage
881
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
882
+
883
+ # Process documents with pre-computed embeddings
884
+ for j, doc in enumerate(documents):
885
+ try:
886
+ if j < len(embeddings):
887
+ doc.embedding = embeddings[j]
888
+ doc.usage = usages[j] if j < len(usages) else None
889
+ except Exception as e:
890
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
891
+
892
+ except Exception as e:
893
+ # Check if this is a rate limit error - don't fall back as it would make things worse
894
+ error_str = str(e).lower()
895
+ is_rate_limit = any(
896
+ phrase in error_str
897
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
898
+ )
899
+
900
+ if is_rate_limit:
901
+ logger.error(f"Rate limit detected during batch embedding. {e}")
902
+ raise e
903
+ else:
904
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
905
+ # Fall back to individual embedding
906
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
907
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
908
+ else:
909
+ # Use individual embedding
910
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
911
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
876
912
 
877
913
  for document in documents:
878
914
  try:
@@ -937,8 +973,44 @@ class CouchbaseSearch(VectorDb):
937
973
  async_collection_instance = await self.get_async_collection()
938
974
  all_docs_to_upsert: Dict[str, Any] = {}
939
975
 
940
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
941
- await asyncio.gather(*embed_tasks, return_exceptions=True)
976
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
977
+ # Use batch embedding when enabled and supported
978
+ try:
979
+ # Extract content from all documents
980
+ doc_contents = [doc.content for doc in documents]
981
+
982
+ # Get batch embeddings and usage
983
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
984
+
985
+ # Process documents with pre-computed embeddings
986
+ for j, doc in enumerate(documents):
987
+ try:
988
+ if j < len(embeddings):
989
+ doc.embedding = embeddings[j]
990
+ doc.usage = usages[j] if j < len(usages) else None
991
+ except Exception as e:
992
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
993
+
994
+ except Exception as e:
995
+ # Check if this is a rate limit error - don't fall back as it would make things worse
996
+ error_str = str(e).lower()
997
+ is_rate_limit = any(
998
+ phrase in error_str
999
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
1000
+ )
1001
+
1002
+ if is_rate_limit:
1003
+ logger.error(f"Rate limit detected during batch embedding. {e}")
1004
+ raise e
1005
+ else:
1006
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
1007
+ # Fall back to individual embedding
1008
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
1009
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1010
+ else:
1011
+ # Use individual embedding
1012
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
1013
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
942
1014
 
943
1015
  for document in documents:
944
1016
  try:
@@ -1225,7 +1297,6 @@ class CouchbaseSearch(VectorDb):
1225
1297
  rows = list(result.rows()) # Collect once
1226
1298
 
1227
1299
  for row in rows:
1228
- print(row)
1229
1300
  self.collection.remove(row.get("doc_id"))
1230
1301
  log_info(f"Deleted {len(rows)} documents with metadata {metadata}")
1231
1302
  return True
@@ -354,9 +354,44 @@ class LanceDb(VectorDb):
354
354
  log_debug(f"Inserting {len(documents)} documents")
355
355
  data = []
356
356
 
357
- # Prepare documents for insertion.
358
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
359
- await asyncio.gather(*embed_tasks, return_exceptions=True)
357
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
358
+ # Use batch embedding when enabled and supported
359
+ try:
360
+ # Extract content from all documents
361
+ doc_contents = [doc.content for doc in documents]
362
+
363
+ # Get batch embeddings and usage
364
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
365
+
366
+ # Process documents with pre-computed embeddings
367
+ for j, doc in enumerate(documents):
368
+ try:
369
+ if j < len(embeddings):
370
+ doc.embedding = embeddings[j]
371
+ doc.usage = usages[j] if j < len(usages) else None
372
+ except Exception as e:
373
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
374
+
375
+ except Exception as e:
376
+ # Check if this is a rate limit error - don't fall back as it would make things worse
377
+ error_str = str(e).lower()
378
+ is_rate_limit = any(
379
+ phrase in error_str
380
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
381
+ )
382
+
383
+ if is_rate_limit:
384
+ logger.error(f"Rate limit detected during batch embedding. {e}")
385
+ raise e
386
+ else:
387
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
388
+ # Fall back to individual embedding
389
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
390
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
391
+ else:
392
+ # Use individual embedding
393
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
394
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
360
395
 
361
396
  for document in documents:
362
397
  if await self.async_doc_exists(document):
@@ -457,8 +457,44 @@ class Milvus(VectorDb):
457
457
  """Insert documents asynchronously based on search type."""
458
458
  log_info(f"Inserting {len(documents)} documents asynchronously")
459
459
 
460
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
461
- await asyncio.gather(*embed_tasks, return_exceptions=True)
460
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
461
+ # Use batch embedding when enabled and supported
462
+ try:
463
+ # Extract content from all documents
464
+ doc_contents = [doc.content for doc in documents]
465
+
466
+ # Get batch embeddings and usage
467
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
468
+
469
+ # Process documents with pre-computed embeddings
470
+ for j, doc in enumerate(documents):
471
+ try:
472
+ if j < len(embeddings):
473
+ doc.embedding = embeddings[j]
474
+ doc.usage = usages[j] if j < len(usages) else None
475
+ except Exception as e:
476
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
477
+
478
+ except Exception as e:
479
+ # Check if this is a rate limit error - don't fall back as it would make things worse
480
+ error_str = str(e).lower()
481
+ is_rate_limit = any(
482
+ phrase in error_str
483
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
484
+ )
485
+
486
+ if is_rate_limit:
487
+ log_error(f"Rate limit detected during batch embedding. {e}")
488
+ raise e
489
+ else:
490
+ log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
491
+ # Fall back to individual embedding
492
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
493
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
494
+ else:
495
+ # Use individual embedding
496
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
497
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
462
498
 
463
499
  if self.search_type == SearchType.hybrid:
464
500
  await asyncio.gather(
@@ -547,8 +583,44 @@ class Milvus(VectorDb):
547
583
  ) -> None:
548
584
  log_debug(f"Upserting {len(documents)} documents asynchronously")
549
585
 
550
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
551
- await asyncio.gather(*embed_tasks, return_exceptions=True)
586
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
587
+ # Use batch embedding when enabled and supported
588
+ try:
589
+ # Extract content from all documents
590
+ doc_contents = [doc.content for doc in documents]
591
+
592
+ # Get batch embeddings and usage
593
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
594
+
595
+ # Process documents with pre-computed embeddings
596
+ for j, doc in enumerate(documents):
597
+ try:
598
+ if j < len(embeddings):
599
+ doc.embedding = embeddings[j]
600
+ doc.usage = usages[j] if j < len(usages) else None
601
+ except Exception as e:
602
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
603
+
604
+ except Exception as e:
605
+ # Check if this is a rate limit error - don't fall back as it would make things worse
606
+ error_str = str(e).lower()
607
+ is_rate_limit = any(
608
+ phrase in error_str
609
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
610
+ )
611
+
612
+ if is_rate_limit:
613
+ log_error(f"Rate limit detected during batch embedding. {e}")
614
+ raise e
615
+ else:
616
+ log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
617
+ # Fall back to individual embedding
618
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
619
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
620
+ else:
621
+ # Use individual embedding
622
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
623
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
552
624
 
553
625
  async def process_document(document):
554
626
  cleaned_content = document.content.replace("\x00", "\ufffd")