agno 2.2.10__py3-none-any.whl → 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. agno/agent/agent.py +75 -48
  2. agno/db/dynamo/utils.py +1 -1
  3. agno/db/firestore/utils.py +1 -1
  4. agno/db/gcs_json/utils.py +1 -1
  5. agno/db/in_memory/utils.py +1 -1
  6. agno/db/json/utils.py +1 -1
  7. agno/db/mongo/utils.py +3 -3
  8. agno/db/mysql/mysql.py +1 -1
  9. agno/db/mysql/utils.py +1 -1
  10. agno/db/postgres/utils.py +1 -1
  11. agno/db/redis/utils.py +1 -1
  12. agno/db/singlestore/singlestore.py +1 -1
  13. agno/db/singlestore/utils.py +1 -1
  14. agno/db/sqlite/async_sqlite.py +1 -1
  15. agno/db/sqlite/sqlite.py +1 -1
  16. agno/db/sqlite/utils.py +1 -1
  17. agno/filters.py +354 -0
  18. agno/knowledge/chunking/agentic.py +8 -9
  19. agno/knowledge/chunking/strategy.py +59 -15
  20. agno/knowledge/embedder/sentence_transformer.py +6 -2
  21. agno/knowledge/knowledge.py +43 -22
  22. agno/knowledge/reader/base.py +6 -2
  23. agno/knowledge/utils.py +20 -0
  24. agno/models/anthropic/claude.py +45 -9
  25. agno/models/base.py +4 -0
  26. agno/os/app.py +23 -7
  27. agno/os/interfaces/slack/router.py +53 -33
  28. agno/os/interfaces/slack/slack.py +9 -1
  29. agno/os/router.py +25 -1
  30. agno/os/routers/health.py +5 -3
  31. agno/os/routers/knowledge/knowledge.py +43 -17
  32. agno/os/routers/knowledge/schemas.py +4 -3
  33. agno/run/agent.py +11 -1
  34. agno/run/base.py +3 -2
  35. agno/session/agent.py +10 -5
  36. agno/team/team.py +57 -18
  37. agno/tools/file_generation.py +4 -4
  38. agno/tools/gmail.py +179 -0
  39. agno/tools/parallel.py +314 -0
  40. agno/utils/agent.py +22 -17
  41. agno/utils/gemini.py +15 -5
  42. agno/utils/knowledge.py +12 -5
  43. agno/utils/log.py +1 -0
  44. agno/utils/models/claude.py +2 -1
  45. agno/utils/print_response/agent.py +5 -4
  46. agno/utils/print_response/team.py +5 -4
  47. agno/vectordb/base.py +2 -4
  48. agno/vectordb/cassandra/cassandra.py +12 -5
  49. agno/vectordb/chroma/chromadb.py +10 -4
  50. agno/vectordb/clickhouse/clickhousedb.py +12 -4
  51. agno/vectordb/couchbase/couchbase.py +12 -3
  52. agno/vectordb/lancedb/lance_db.py +69 -144
  53. agno/vectordb/langchaindb/langchaindb.py +13 -4
  54. agno/vectordb/lightrag/lightrag.py +8 -3
  55. agno/vectordb/llamaindex/llamaindexdb.py +10 -4
  56. agno/vectordb/milvus/milvus.py +16 -5
  57. agno/vectordb/mongodb/mongodb.py +14 -3
  58. agno/vectordb/pgvector/pgvector.py +73 -15
  59. agno/vectordb/pineconedb/pineconedb.py +6 -2
  60. agno/vectordb/qdrant/qdrant.py +25 -13
  61. agno/vectordb/redis/redisdb.py +37 -30
  62. agno/vectordb/singlestore/singlestore.py +9 -4
  63. agno/vectordb/surrealdb/surrealdb.py +13 -3
  64. agno/vectordb/upstashdb/upstashdb.py +8 -5
  65. agno/vectordb/weaviate/weaviate.py +29 -12
  66. agno/workflow/step.py +3 -2
  67. agno/workflow/types.py +20 -1
  68. agno/workflow/workflow.py +103 -14
  69. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/METADATA +4 -1
  70. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/RECORD +73 -71
  71. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/WHEEL +0 -0
  72. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/licenses/LICENSE +0 -0
  73. {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Set, Unio
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
5
+ from agno.filters import FilterExpr
5
6
  from agno.media import Audio, File, Image, Video
6
7
  from agno.models.message import Message
7
8
  from agno.models.response import ToolExecution
@@ -33,7 +34,7 @@ def print_response(
33
34
  videos: Optional[Sequence[Video]] = None,
34
35
  files: Optional[Sequence[File]] = None,
35
36
  markdown: bool = False,
36
- knowledge_filters: Optional[Dict[str, Any]] = None,
37
+ knowledge_filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
37
38
  add_history_to_context: Optional[bool] = None,
38
39
  dependencies: Optional[Dict[str, Any]] = None,
39
40
  add_dependencies_to_context: Optional[bool] = None,
@@ -333,7 +334,7 @@ def print_response_stream(
333
334
  markdown: bool = False,
334
335
  stream_events: bool = False,
335
336
  stream_intermediate_steps: bool = False, # type: ignore
336
- knowledge_filters: Optional[Dict[str, Any]] = None,
337
+ knowledge_filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
337
338
  add_history_to_context: Optional[bool] = None,
338
339
  dependencies: Optional[Dict[str, Any]] = None,
339
340
  add_dependencies_to_context: Optional[bool] = None,
@@ -865,7 +866,7 @@ async def aprint_response(
865
866
  videos: Optional[Sequence[Video]] = None,
866
867
  files: Optional[Sequence[File]] = None,
867
868
  markdown: bool = False,
868
- knowledge_filters: Optional[Dict[str, Any]] = None,
869
+ knowledge_filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
869
870
  add_history_to_context: Optional[bool] = None,
870
871
  dependencies: Optional[Dict[str, Any]] = None,
871
872
  add_dependencies_to_context: Optional[bool] = None,
@@ -1163,7 +1164,7 @@ async def aprint_response_stream(
1163
1164
  markdown: bool = False,
1164
1165
  stream_events: bool = False,
1165
1166
  stream_intermediate_steps: bool = False, # type: ignore
1166
- knowledge_filters: Optional[Dict[str, Any]] = None,
1167
+ knowledge_filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
1167
1168
  add_history_to_context: Optional[bool] = None,
1168
1169
  dependencies: Optional[Dict[str, Any]] = None,
1169
1170
  add_dependencies_to_context: Optional[bool] = None,
agno/vectordb/base.py CHANGED
@@ -72,13 +72,11 @@ class VectorDb(ABC):
72
72
  raise NotImplementedError
73
73
 
74
74
  @abstractmethod
75
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
75
+ def search(self, query: str, limit: int = 5, filters: Optional[Any] = None) -> List[Document]:
76
76
  raise NotImplementedError
77
77
 
78
78
  @abstractmethod
79
- async def async_search(
80
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
81
- ) -> List[Document]:
79
+ async def async_search(self, query: str, limit: int = 5, filters: Optional[Any] = None) -> List[Document]:
82
80
  raise NotImplementedError
83
81
 
84
82
  @abstractmethod
@@ -1,9 +1,10 @@
1
1
  import asyncio
2
- from typing import Any, Dict, Iterable, List, Optional
2
+ from typing import Any, Dict, Iterable, List, Optional, Union
3
3
 
4
+ from agno.filters import FilterExpr
4
5
  from agno.knowledge.document import Document
5
6
  from agno.knowledge.embedder import Embedder
6
- from agno.utils.log import log_debug, log_error, log_info
7
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
7
8
  from agno.vectordb.base import VectorDb
8
9
  from agno.vectordb.cassandra.index import AgnoMetadataVectorCassandraTable
9
10
 
@@ -204,13 +205,17 @@ class Cassandra(VectorDb):
204
205
  self.delete_by_content_hash(content_hash)
205
206
  await self.async_insert(content_hash, documents, filters)
206
207
 
207
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
208
+ def search(
209
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
210
+ ) -> List[Document]:
208
211
  """Keyword-based search on document metadata."""
209
212
  log_debug(f"Cassandra VectorDB : Performing Vector Search on {self.table_name} with query {query}")
213
+ if filters is not None:
214
+ log_warning("Filters are not yet supported in Cassandra. No filters will be applied.")
210
215
  return self.vector_search(query=query, limit=limit)
211
216
 
212
217
  async def async_search(
213
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
218
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
214
219
  ) -> List[Document]:
215
220
  """Search asynchronously by running in a thread."""
216
221
  return await asyncio.to_thread(self.search, query, limit, filters)
@@ -221,7 +226,9 @@ class Cassandra(VectorDb):
221
226
  ) -> List[Document]:
222
227
  return [self._row_to_document(row=hit) for hit in hits]
223
228
 
224
- def vector_search(self, query: str, limit: int = 5) -> List[Document]:
229
+ def vector_search(
230
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
231
+ ) -> List[Document]:
225
232
  """Vector similarity search implementation."""
226
233
  query_embedding = self.embedder.get_embedding(query)
227
234
  hits = list(
@@ -13,10 +13,11 @@ try:
13
13
  except ImportError:
14
14
  raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
15
15
 
16
+ from agno.filters import FilterExpr
16
17
  from agno.knowledge.document import Document
17
18
  from agno.knowledge.embedder import Embedder
18
19
  from agno.knowledge.reranker.base import Reranker
19
- from agno.utils.log import log_debug, log_error, log_info, logger
20
+ from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
20
21
  from agno.vectordb.base import VectorDb
21
22
  from agno.vectordb.distance import Distance
22
23
 
@@ -477,13 +478,15 @@ class ChromaDb(VectorDb):
477
478
  logger.error(f"Error upserting documents by content hash: {e}")
478
479
  raise
479
480
 
480
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
481
+ def search(
482
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
483
+ ) -> List[Document]:
481
484
  """Search the collection for a query.
482
485
 
483
486
  Args:
484
487
  query (str): Query to search for.
485
488
  limit (int): Number of results to return.
486
- filters (Optional[Dict[str, Any]]): Filters to apply while searching.
489
+ filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply while searching.
487
490
  Supports ChromaDB's filtering operators:
488
491
  - $eq, $ne: Equality/Inequality
489
492
  - $gt, $gte, $lt, $lte: Numeric comparisons
@@ -492,6 +495,9 @@ class ChromaDb(VectorDb):
492
495
  Returns:
493
496
  List[Document]: List of search results.
494
497
  """
498
+ if isinstance(filters, list):
499
+ log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
500
+ filters = None
495
501
  query_embedding = self.embedder.get_embedding(query)
496
502
  if query_embedding is None:
497
503
  logger.error(f"Error getting embedding for Query: {query}")
@@ -606,7 +612,7 @@ class ChromaDb(VectorDb):
606
612
  return converted
607
613
 
608
614
  async def async_search(
609
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
615
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
610
616
  ) -> List[Document]:
611
617
  """Search asynchronously by running in a thread."""
612
618
  return await asyncio.to_thread(self.search, query, limit, filters)
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  from hashlib import md5
3
- from typing import Any, Dict, List, Optional
3
+ from typing import Any, Dict, List, Optional, Union
4
4
 
5
5
  from agno.vectordb.clickhouse.index import HNSW
6
6
 
@@ -11,9 +11,10 @@ try:
11
11
  except ImportError:
12
12
  raise ImportError("`clickhouse-connect` not installed. Use `pip install clickhouse-connect` to install it")
13
13
 
14
+ from agno.filters import FilterExpr
14
15
  from agno.knowledge.document import Document
15
16
  from agno.knowledge.embedder import Embedder
16
- from agno.utils.log import log_debug, log_info, logger
17
+ from agno.utils.log import log_debug, log_info, log_warning, logger
17
18
  from agno.vectordb.base import VectorDb
18
19
  from agno.vectordb.distance import Distance
19
20
 
@@ -448,7 +449,11 @@ class Clickhouse(VectorDb):
448
449
  parameters=parameters,
449
450
  )
450
451
 
451
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
452
+ def search(
453
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
454
+ ) -> List[Document]:
455
+ if filters is not None:
456
+ log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
452
457
  query_embedding = self.embedder.get_embedding(query)
453
458
  if query_embedding is None:
454
459
  logger.error(f"Error getting embedding for Query: {query}")
@@ -502,11 +507,14 @@ class Clickhouse(VectorDb):
502
507
  return search_results
503
508
 
504
509
  async def async_search(
505
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
510
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
506
511
  ) -> List[Document]:
507
512
  """Search for documents asynchronously."""
508
513
  async_client = await self._ensure_async_client()
509
514
 
515
+ if filters is not None:
516
+ log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
517
+
510
518
  query_embedding = self.embedder.get_embedding(query)
511
519
  if query_embedding is None:
512
520
  logger.error(f"Error getting embedding for Query: {query}")
@@ -3,10 +3,11 @@ import time
3
3
  from datetime import timedelta
4
4
  from typing import Any, Dict, List, Optional, Union
5
5
 
6
+ from agno.filters import FilterExpr
6
7
  from agno.knowledge.document import Document
7
8
  from agno.knowledge.embedder import Embedder
8
9
  from agno.knowledge.embedder.openai import OpenAIEmbedder
9
- from agno.utils.log import log_debug, log_info, logger
10
+ from agno.utils.log import log_debug, log_info, log_warning, logger
10
11
  from agno.vectordb.base import VectorDb
11
12
 
12
13
  try:
@@ -458,7 +459,12 @@ class CouchbaseSearch(VectorDb):
458
459
  if errors_occurred:
459
460
  logger.warning("Some errors occurred during the upsert operation. Please check logs for details.")
460
461
 
461
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
462
+ def search(
463
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
464
+ ) -> List[Document]:
465
+ if isinstance(filters, List):
466
+ log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
467
+ filters = None
462
468
  """Search the Couchbase bucket for documents relevant to the query."""
463
469
  query_embedding = self.embedder.get_embedding(query)
464
470
  if query_embedding is None:
@@ -1068,8 +1074,11 @@ class CouchbaseSearch(VectorDb):
1068
1074
  logger.info(f"[async] Total successfully upserted: {total_upserted_count}, Total failed: {total_failed_count}.")
1069
1075
 
1070
1076
  async def async_search(
1071
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
1077
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
1072
1078
  ) -> List[Document]:
1079
+ if isinstance(filters, List):
1080
+ log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
1081
+ filters = None
1073
1082
  query_embedding = self.embedder.get_embedding(query)
1074
1083
  if query_embedding is None:
1075
1084
  logger.error(f"[async] Failed to generate embedding for query: {query}")
@@ -2,7 +2,7 @@ import asyncio
2
2
  import json
3
3
  from hashlib import md5
4
4
  from os import getenv
5
- from typing import Any, Dict, List, Optional
5
+ from typing import Any, Dict, List, Optional, Union
6
6
 
7
7
  try:
8
8
  import lancedb
@@ -10,10 +10,11 @@ try:
10
10
  except ImportError:
11
11
  raise ImportError("`lancedb` not installed. Please install using `pip install lancedb`")
12
12
 
13
+ from agno.filters import FilterExpr
13
14
  from agno.knowledge.document import Document
14
15
  from agno.knowledge.embedder import Embedder
15
16
  from agno.knowledge.reranker.base import Reranker
16
- from agno.utils.log import log_debug, log_info, logger
17
+ from agno.utils.log import log_debug, log_info, log_warning, logger
17
18
  from agno.vectordb.base import VectorDb
18
19
  from agno.vectordb.distance import Distance
19
20
  from agno.vectordb.search import SearchType
@@ -158,7 +159,7 @@ class LanceDb(VectorDb):
158
159
 
159
160
  def _prepare_vector(self, embedding) -> List[float]:
160
161
  """Prepare vector embedding for insertion, ensuring correct dimensions and type."""
161
- if embedding is not None:
162
+ if embedding is not None and len(embedding) > 0:
162
163
  # Convert to list of floats
163
164
  vector = [float(x) for x in embedding]
164
165
 
@@ -176,7 +177,7 @@ class LanceDb(VectorDb):
176
177
 
177
178
  return vector
178
179
  else:
179
- # Fallback if embedding is None
180
+ # Fallback if embedding is None or empty
180
181
  return [0.0] * (self.dimensions or 1536)
181
182
 
182
183
  async def _get_async_connection(self) -> lancedb.AsyncConnection:
@@ -200,7 +201,6 @@ class LanceDb(VectorDb):
200
201
  # Re-establish sync connection to see async changes
201
202
  if self.connection and self.table_name in self.connection.table_names():
202
203
  self.table = self.connection.open_table(self.table_name)
203
- log_debug(f"Refreshed sync connection for table: {self.table_name}")
204
204
  except Exception as e:
205
205
  log_debug(f"Could not refresh sync connection: {e}")
206
206
  # If refresh fails, we can still function but sync methods might not see async changes
@@ -359,6 +359,9 @@ class LanceDb(VectorDb):
359
359
  """
360
360
  Asynchronously insert documents into the database.
361
361
 
362
+ Note: Currently wraps sync insert method since LanceDB async insert has sync/async table
363
+ synchronization issues causing empty vectors. We still do async embedding for performance.
364
+
362
365
  Args:
363
366
  documents (List[Document]): List of documents to insert
364
367
  filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
@@ -368,115 +371,36 @@ class LanceDb(VectorDb):
368
371
  return
369
372
 
370
373
  log_debug(f"Inserting {len(documents)} documents")
371
- data = []
372
374
 
375
+ # Still do async embedding for performance
373
376
  if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
374
- # Use batch embedding when enabled and supported
375
377
  try:
376
- # Extract content from all documents
377
378
  doc_contents = [doc.content for doc in documents]
378
-
379
- # Get batch embeddings and usage
380
379
  embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
381
380
 
382
- # Process documents with pre-computed embeddings
383
381
  for j, doc in enumerate(documents):
384
- try:
385
- if j < len(embeddings):
386
- doc.embedding = embeddings[j]
387
- doc.usage = usages[j] if j < len(usages) else None
388
- except Exception as e:
389
- logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
390
-
382
+ if j < len(embeddings):
383
+ doc.embedding = embeddings[j]
384
+ doc.usage = usages[j] if j < len(usages) else None
391
385
  except Exception as e:
392
- # Check if this is a rate limit error - don't fall back as it would make things worse
393
386
  error_str = str(e).lower()
394
387
  is_rate_limit = any(
395
388
  phrase in error_str
396
389
  for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
397
390
  )
398
-
399
391
  if is_rate_limit:
400
392
  logger.error(f"Rate limit detected during batch embedding. {e}")
401
393
  raise e
402
394
  else:
403
395
  logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
404
- # Fall back to individual embedding
405
396
  embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
406
397
  await asyncio.gather(*embed_tasks, return_exceptions=True)
407
398
  else:
408
- # Use individual embedding
409
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
399
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
410
400
  await asyncio.gather(*embed_tasks, return_exceptions=True)
411
401
 
412
- for document in documents:
413
- if await self.async_doc_exists(document):
414
- continue
415
-
416
- # Add filters to document metadata if provided
417
- if filters:
418
- meta_data = document.meta_data.copy() if document.meta_data else {}
419
- meta_data.update(filters)
420
- document.meta_data = meta_data
421
-
422
- cleaned_content = document.content.replace("\x00", "\ufffd")
423
- doc_id = str(md5(cleaned_content.encode()).hexdigest())
424
- payload = {
425
- "name": document.name,
426
- "meta_data": document.meta_data,
427
- "content": cleaned_content,
428
- "usage": document.usage,
429
- "content_id": document.content_id,
430
- "content_hash": content_hash,
431
- }
432
- data.append(
433
- {
434
- "id": doc_id,
435
- "vector": self._prepare_vector(document.embedding),
436
- "payload": json.dumps(payload),
437
- }
438
- )
439
- log_debug(f"Parsed document: {document.name} ({document.meta_data})")
440
-
441
- if not data:
442
- log_debug("No new data to insert")
443
- return
444
-
445
- try:
446
- await self._get_async_connection()
447
-
448
- # Ensure the async table is created before inserting
449
- if self.async_table is None:
450
- try:
451
- await self.async_create()
452
- except Exception as create_e:
453
- logger.error(f"Failed to create async table: {create_e}")
454
- # Continue to fallback logic below
455
-
456
- if self.async_table is None:
457
- # Fall back to sync insertion if async table creation failed
458
- logger.warning("Async table not available, falling back to sync insertion")
459
- return self.insert(content_hash, documents, filters)
460
-
461
- if self.on_bad_vectors is not None:
462
- await self.async_table.add(data, on_bad_vectors=self.on_bad_vectors, fill_value=self.fill_value) # type: ignore
463
- else:
464
- await self.async_table.add(data) # type: ignore
465
-
466
- log_debug(f"Asynchronously inserted {len(data)} documents")
467
-
468
- # Refresh sync connection to see async changes
469
- self._refresh_sync_connection()
470
- except Exception as e:
471
- logger.error(f"Error during async document insertion: {e}")
472
- # Try falling back to sync insertion as a last resort
473
- try:
474
- logger.warning("Async insertion failed, attempting sync fallback")
475
- self.insert(content_hash, documents, filters)
476
- logger.info("Sync fallback successful")
477
- except Exception as sync_e:
478
- logger.error(f"Sync fallback also failed: {sync_e}")
479
- raise e from sync_e
402
+ # Use sync insert to avoid sync/async table synchronization issues
403
+ self.insert(content_hash, documents, filters)
480
404
 
481
405
  def upsert_available(self) -> bool:
482
406
  """Check if upsert is available in LanceDB."""
@@ -497,11 +421,42 @@ class LanceDb(VectorDb):
497
421
  async def async_upsert(
498
422
  self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
499
423
  ) -> None:
500
- if self.content_hash_exists(content_hash):
501
- self._delete_by_content_hash(content_hash)
502
- await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
424
+ """
425
+ Asynchronously upsert documents into the database.
503
426
 
504
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
427
+ Note: Uses async embedding for performance, then sync upsert for reliability.
428
+ """
429
+ if len(documents) > 0:
430
+ # Do async embedding for performance
431
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
432
+ try:
433
+ doc_contents = [doc.content for doc in documents]
434
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
435
+ for j, doc in enumerate(documents):
436
+ if j < len(embeddings):
437
+ doc.embedding = embeddings[j]
438
+ doc.usage = usages[j] if j < len(usages) else None
439
+ except Exception as e:
440
+ error_str = str(e).lower()
441
+ is_rate_limit = any(
442
+ phrase in error_str
443
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
444
+ )
445
+ if is_rate_limit:
446
+ raise e
447
+ else:
448
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
449
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
450
+ else:
451
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
452
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
453
+
454
+ # Use sync upsert for reliability
455
+ self.upsert(content_hash=content_hash, documents=documents, filters=filters)
456
+
457
+ def search(
458
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
459
+ ) -> List[Document]:
505
460
  """
506
461
  Search for documents matching the query.
507
462
 
@@ -518,6 +473,10 @@ class LanceDb(VectorDb):
518
473
 
519
474
  results = None
520
475
 
476
+ if isinstance(filters, list):
477
+ log_warning("Filter Expressions are not yet supported in LanceDB. No filters will be applied.")
478
+ filters = None
479
+
521
480
  if self.search_type == SearchType.vector:
522
481
  results = self.vector_search(query, limit)
523
482
  elif self.search_type == SearchType.keyword:
@@ -559,11 +518,14 @@ class LanceDb(VectorDb):
559
518
  return search_results
560
519
 
561
520
  async def async_search(
562
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
521
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
563
522
  ) -> List[Document]:
564
523
  """
565
524
  Asynchronously search for documents matching the query.
566
525
 
526
+ Note: Currently wraps sync search method since LanceDB async search has sync/async table
527
+ synchronization issues. Performance impact is minimal for search operations.
528
+
567
529
  Args:
568
530
  query (str): Query string to search for
569
531
  limit (int): Maximum number of results to return
@@ -572,53 +534,12 @@ class LanceDb(VectorDb):
572
534
  Returns:
573
535
  List[Document]: List of matching documents
574
536
  """
575
- # TODO: Search is not yet supported in async (https://github.com/lancedb/lancedb/pull/2049)
576
- if self.connection:
577
- self.table = self.connection.open_table(name=self.table_name)
578
-
579
- results = None
580
-
581
- if self.search_type == SearchType.vector:
582
- results = self.vector_search(query, limit)
583
- elif self.search_type == SearchType.keyword:
584
- results = self.keyword_search(query, limit)
585
- elif self.search_type == SearchType.hybrid:
586
- results = self.hybrid_search(query, limit)
587
- else:
588
- logger.error(f"Invalid search type '{self.search_type}'.")
589
- return []
590
-
591
- if results is None:
592
- return []
593
-
594
- search_results = self._build_search_results(results)
537
+ # Wrap sync search method to avoid sync/async table synchronization issues
538
+ return self.search(query=query, limit=limit, filters=filters)
595
539
 
596
- # Filter results based on metadata if filters are provided
597
- if filters and search_results:
598
- filtered_results = []
599
- for doc in search_results:
600
- if doc.meta_data is None:
601
- continue
602
-
603
- # Check if all filter criteria match
604
- match = True
605
- for key, value in filters.items():
606
- if key not in doc.meta_data or doc.meta_data[key] != value:
607
- match = False
608
- break
609
-
610
- if match:
611
- filtered_results.append(doc)
612
-
613
- search_results = filtered_results
614
-
615
- if self.reranker and search_results:
616
- search_results = self.reranker.rerank(query=query, documents=search_results)
617
-
618
- log_info(f"Found {len(search_results)} documents")
619
- return search_results
620
-
621
- def vector_search(self, query: str, limit: int = 5) -> List[Document]:
540
+ def vector_search(
541
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
542
+ ) -> List[Document]:
622
543
  query_embedding = self.embedder.get_embedding(query)
623
544
  if query_embedding is None:
624
545
  logger.error(f"Error getting embedding for Query: {query}")
@@ -638,7 +559,9 @@ class LanceDb(VectorDb):
638
559
 
639
560
  return results.to_pandas()
640
561
 
641
- def hybrid_search(self, query: str, limit: int = 5) -> List[Document]:
562
+ def hybrid_search(
563
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
564
+ ) -> List[Document]:
642
565
  query_embedding = self.embedder.get_embedding(query)
643
566
  if query_embedding is None:
644
567
  logger.error(f"Error getting embedding for Query: {query}")
@@ -667,7 +590,9 @@ class LanceDb(VectorDb):
667
590
 
668
591
  return results.to_pandas()
669
592
 
670
- def keyword_search(self, query: str, limit: int = 5) -> List[Document]:
593
+ def keyword_search(
594
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
595
+ ) -> List[Document]:
671
596
  if self.table is None:
672
597
  logger.error("Table not initialized. Please create the table first")
673
598
  return []
@@ -1,7 +1,8 @@
1
- from typing import Any, Dict, List, Optional
1
+ from typing import Any, Dict, List, Optional, Union
2
2
 
3
+ from agno.filters import FilterExpr
3
4
  from agno.knowledge.document import Document
4
- from agno.utils.log import log_debug, logger
5
+ from agno.utils.log import log_debug, log_warning, logger
5
6
  from agno.vectordb.base import VectorDb
6
7
 
7
8
 
@@ -70,9 +71,17 @@ class LangChainVectorDb(VectorDb):
70
71
  logger.warning("LangChainKnowledgeBase.async_upsert() not supported - please check the vectorstore manually.")
71
72
  raise NotImplementedError
72
73
 
73
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
74
+ def search(
75
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
76
+ ) -> List[Document]:
74
77
  """Returns relevant documents matching the query"""
75
78
 
79
+ if isinstance(filters, List):
80
+ log_warning(
81
+ "Filter Expressions are not supported in LangChainDB. No filters will be applied. Use filters as a dictionary."
82
+ )
83
+ filters = None
84
+
76
85
  try:
77
86
  from langchain_core.documents import Document as LangChainDocument
78
87
  from langchain_core.retrievers import BaseRetriever
@@ -109,7 +118,7 @@ class LangChainVectorDb(VectorDb):
109
118
  return documents
110
119
 
111
120
  async def async_search(
112
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
121
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
113
122
  ) -> List[Document]:
114
123
  return self.search(query, limit, filters)
115
124
 
@@ -1,8 +1,9 @@
1
1
  import asyncio
2
- from typing import Any, Dict, List, Optional
2
+ from typing import Any, Dict, List, Optional, Union
3
3
 
4
4
  import httpx
5
5
 
6
+ from agno.filters import FilterExpr
6
7
  from agno.knowledge.document import Document
7
8
  from agno.utils.log import log_debug, log_error, log_info, log_warning
8
9
  from agno.vectordb.base import VectorDb
@@ -92,14 +93,18 @@ class LightRag(VectorDb):
92
93
  """Async upsert documents into the vector database"""
93
94
  pass
94
95
 
95
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
96
+ def search(
97
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
98
+ ) -> List[Document]:
96
99
  result = asyncio.run(self.async_search(query, limit=limit, filters=filters))
97
100
  return result if result is not None else []
98
101
 
99
102
  async def async_search(
100
- self, query: str, limit: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
103
+ self, query: str, limit: Optional[int] = None, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
101
104
  ) -> Optional[List[Document]]:
102
105
  mode: str = "hybrid" # Default mode, can be "local", "global", or "hybrid"
106
+ if filters is not None:
107
+ log_warning("Filters are not supported in LightRAG. No filters will be applied.")
103
108
  try:
104
109
  async with httpx.AsyncClient(timeout=30.0) as client:
105
110
  response = await client.post(