agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +5540 -2273
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/compression/__init__.py +3 -0
  5. agno/compression/manager.py +247 -0
  6. agno/culture/__init__.py +3 -0
  7. agno/culture/manager.py +956 -0
  8. agno/db/async_postgres/__init__.py +3 -0
  9. agno/db/base.py +689 -6
  10. agno/db/dynamo/dynamo.py +933 -37
  11. agno/db/dynamo/schemas.py +174 -10
  12. agno/db/dynamo/utils.py +63 -4
  13. agno/db/firestore/firestore.py +831 -9
  14. agno/db/firestore/schemas.py +51 -0
  15. agno/db/firestore/utils.py +102 -4
  16. agno/db/gcs_json/gcs_json_db.py +660 -12
  17. agno/db/gcs_json/utils.py +60 -26
  18. agno/db/in_memory/in_memory_db.py +287 -14
  19. agno/db/in_memory/utils.py +60 -2
  20. agno/db/json/json_db.py +590 -14
  21. agno/db/json/utils.py +60 -26
  22. agno/db/migrations/manager.py +199 -0
  23. agno/db/migrations/v1_to_v2.py +43 -13
  24. agno/db/migrations/versions/__init__.py +0 -0
  25. agno/db/migrations/versions/v2_3_0.py +938 -0
  26. agno/db/mongo/__init__.py +15 -1
  27. agno/db/mongo/async_mongo.py +2760 -0
  28. agno/db/mongo/mongo.py +879 -11
  29. agno/db/mongo/schemas.py +42 -0
  30. agno/db/mongo/utils.py +80 -8
  31. agno/db/mysql/__init__.py +2 -1
  32. agno/db/mysql/async_mysql.py +2912 -0
  33. agno/db/mysql/mysql.py +946 -68
  34. agno/db/mysql/schemas.py +72 -10
  35. agno/db/mysql/utils.py +198 -7
  36. agno/db/postgres/__init__.py +2 -1
  37. agno/db/postgres/async_postgres.py +2579 -0
  38. agno/db/postgres/postgres.py +942 -57
  39. agno/db/postgres/schemas.py +81 -18
  40. agno/db/postgres/utils.py +164 -2
  41. agno/db/redis/redis.py +671 -7
  42. agno/db/redis/schemas.py +50 -0
  43. agno/db/redis/utils.py +65 -7
  44. agno/db/schemas/__init__.py +2 -1
  45. agno/db/schemas/culture.py +120 -0
  46. agno/db/schemas/evals.py +1 -0
  47. agno/db/schemas/memory.py +17 -2
  48. agno/db/singlestore/schemas.py +63 -0
  49. agno/db/singlestore/singlestore.py +949 -83
  50. agno/db/singlestore/utils.py +60 -2
  51. agno/db/sqlite/__init__.py +2 -1
  52. agno/db/sqlite/async_sqlite.py +2911 -0
  53. agno/db/sqlite/schemas.py +62 -0
  54. agno/db/sqlite/sqlite.py +965 -46
  55. agno/db/sqlite/utils.py +169 -8
  56. agno/db/surrealdb/__init__.py +3 -0
  57. agno/db/surrealdb/metrics.py +292 -0
  58. agno/db/surrealdb/models.py +334 -0
  59. agno/db/surrealdb/queries.py +71 -0
  60. agno/db/surrealdb/surrealdb.py +1908 -0
  61. agno/db/surrealdb/utils.py +147 -0
  62. agno/db/utils.py +2 -0
  63. agno/eval/__init__.py +10 -0
  64. agno/eval/accuracy.py +75 -55
  65. agno/eval/agent_as_judge.py +861 -0
  66. agno/eval/base.py +29 -0
  67. agno/eval/performance.py +16 -7
  68. agno/eval/reliability.py +28 -16
  69. agno/eval/utils.py +35 -17
  70. agno/exceptions.py +27 -2
  71. agno/filters.py +354 -0
  72. agno/guardrails/prompt_injection.py +1 -0
  73. agno/hooks/__init__.py +3 -0
  74. agno/hooks/decorator.py +164 -0
  75. agno/integrations/discord/client.py +1 -1
  76. agno/knowledge/chunking/agentic.py +13 -10
  77. agno/knowledge/chunking/fixed.py +4 -1
  78. agno/knowledge/chunking/semantic.py +9 -4
  79. agno/knowledge/chunking/strategy.py +59 -15
  80. agno/knowledge/embedder/fastembed.py +1 -1
  81. agno/knowledge/embedder/nebius.py +1 -1
  82. agno/knowledge/embedder/ollama.py +8 -0
  83. agno/knowledge/embedder/openai.py +8 -8
  84. agno/knowledge/embedder/sentence_transformer.py +6 -2
  85. agno/knowledge/embedder/vllm.py +262 -0
  86. agno/knowledge/knowledge.py +1618 -318
  87. agno/knowledge/reader/base.py +6 -2
  88. agno/knowledge/reader/csv_reader.py +8 -10
  89. agno/knowledge/reader/docx_reader.py +5 -6
  90. agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
  91. agno/knowledge/reader/json_reader.py +5 -4
  92. agno/knowledge/reader/markdown_reader.py +8 -8
  93. agno/knowledge/reader/pdf_reader.py +17 -19
  94. agno/knowledge/reader/pptx_reader.py +101 -0
  95. agno/knowledge/reader/reader_factory.py +32 -3
  96. agno/knowledge/reader/s3_reader.py +3 -3
  97. agno/knowledge/reader/tavily_reader.py +193 -0
  98. agno/knowledge/reader/text_reader.py +22 -10
  99. agno/knowledge/reader/web_search_reader.py +1 -48
  100. agno/knowledge/reader/website_reader.py +10 -10
  101. agno/knowledge/reader/wikipedia_reader.py +33 -1
  102. agno/knowledge/types.py +1 -0
  103. agno/knowledge/utils.py +72 -7
  104. agno/media.py +22 -6
  105. agno/memory/__init__.py +14 -1
  106. agno/memory/manager.py +544 -83
  107. agno/memory/strategies/__init__.py +15 -0
  108. agno/memory/strategies/base.py +66 -0
  109. agno/memory/strategies/summarize.py +196 -0
  110. agno/memory/strategies/types.py +37 -0
  111. agno/models/aimlapi/aimlapi.py +17 -0
  112. agno/models/anthropic/claude.py +515 -40
  113. agno/models/aws/bedrock.py +102 -21
  114. agno/models/aws/claude.py +131 -274
  115. agno/models/azure/ai_foundry.py +41 -19
  116. agno/models/azure/openai_chat.py +39 -8
  117. agno/models/base.py +1249 -525
  118. agno/models/cerebras/cerebras.py +91 -21
  119. agno/models/cerebras/cerebras_openai.py +21 -2
  120. agno/models/cohere/chat.py +40 -6
  121. agno/models/cometapi/cometapi.py +18 -1
  122. agno/models/dashscope/dashscope.py +2 -3
  123. agno/models/deepinfra/deepinfra.py +18 -1
  124. agno/models/deepseek/deepseek.py +69 -3
  125. agno/models/fireworks/fireworks.py +18 -1
  126. agno/models/google/gemini.py +877 -80
  127. agno/models/google/utils.py +22 -0
  128. agno/models/groq/groq.py +51 -18
  129. agno/models/huggingface/huggingface.py +17 -6
  130. agno/models/ibm/watsonx.py +16 -6
  131. agno/models/internlm/internlm.py +18 -1
  132. agno/models/langdb/langdb.py +13 -1
  133. agno/models/litellm/chat.py +44 -9
  134. agno/models/litellm/litellm_openai.py +18 -1
  135. agno/models/message.py +28 -5
  136. agno/models/meta/llama.py +47 -14
  137. agno/models/meta/llama_openai.py +22 -17
  138. agno/models/mistral/mistral.py +8 -4
  139. agno/models/nebius/nebius.py +6 -7
  140. agno/models/nvidia/nvidia.py +20 -3
  141. agno/models/ollama/chat.py +24 -8
  142. agno/models/openai/chat.py +104 -29
  143. agno/models/openai/responses.py +101 -81
  144. agno/models/openrouter/openrouter.py +60 -3
  145. agno/models/perplexity/perplexity.py +17 -1
  146. agno/models/portkey/portkey.py +7 -6
  147. agno/models/requesty/requesty.py +24 -4
  148. agno/models/response.py +73 -2
  149. agno/models/sambanova/sambanova.py +20 -3
  150. agno/models/siliconflow/siliconflow.py +19 -2
  151. agno/models/together/together.py +20 -3
  152. agno/models/utils.py +254 -8
  153. agno/models/vercel/v0.py +20 -3
  154. agno/models/vertexai/__init__.py +0 -0
  155. agno/models/vertexai/claude.py +190 -0
  156. agno/models/vllm/vllm.py +19 -14
  157. agno/models/xai/xai.py +19 -2
  158. agno/os/app.py +549 -152
  159. agno/os/auth.py +190 -3
  160. agno/os/config.py +23 -0
  161. agno/os/interfaces/a2a/router.py +8 -11
  162. agno/os/interfaces/a2a/utils.py +1 -1
  163. agno/os/interfaces/agui/router.py +18 -3
  164. agno/os/interfaces/agui/utils.py +152 -39
  165. agno/os/interfaces/slack/router.py +55 -37
  166. agno/os/interfaces/slack/slack.py +9 -1
  167. agno/os/interfaces/whatsapp/router.py +0 -1
  168. agno/os/interfaces/whatsapp/security.py +3 -1
  169. agno/os/mcp.py +110 -52
  170. agno/os/middleware/__init__.py +2 -0
  171. agno/os/middleware/jwt.py +676 -112
  172. agno/os/router.py +40 -1478
  173. agno/os/routers/agents/__init__.py +3 -0
  174. agno/os/routers/agents/router.py +599 -0
  175. agno/os/routers/agents/schema.py +261 -0
  176. agno/os/routers/evals/evals.py +96 -39
  177. agno/os/routers/evals/schemas.py +65 -33
  178. agno/os/routers/evals/utils.py +80 -10
  179. agno/os/routers/health.py +10 -4
  180. agno/os/routers/knowledge/knowledge.py +196 -38
  181. agno/os/routers/knowledge/schemas.py +82 -22
  182. agno/os/routers/memory/memory.py +279 -52
  183. agno/os/routers/memory/schemas.py +46 -17
  184. agno/os/routers/metrics/metrics.py +20 -8
  185. agno/os/routers/metrics/schemas.py +16 -16
  186. agno/os/routers/session/session.py +462 -34
  187. agno/os/routers/teams/__init__.py +3 -0
  188. agno/os/routers/teams/router.py +512 -0
  189. agno/os/routers/teams/schema.py +257 -0
  190. agno/os/routers/traces/__init__.py +3 -0
  191. agno/os/routers/traces/schemas.py +414 -0
  192. agno/os/routers/traces/traces.py +499 -0
  193. agno/os/routers/workflows/__init__.py +3 -0
  194. agno/os/routers/workflows/router.py +624 -0
  195. agno/os/routers/workflows/schema.py +75 -0
  196. agno/os/schema.py +256 -693
  197. agno/os/scopes.py +469 -0
  198. agno/os/utils.py +514 -36
  199. agno/reasoning/anthropic.py +80 -0
  200. agno/reasoning/gemini.py +73 -0
  201. agno/reasoning/openai.py +5 -0
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +155 -32
  205. agno/run/base.py +55 -3
  206. agno/run/requirement.py +181 -0
  207. agno/run/team.py +125 -38
  208. agno/run/workflow.py +72 -18
  209. agno/session/agent.py +102 -89
  210. agno/session/summary.py +56 -15
  211. agno/session/team.py +164 -90
  212. agno/session/workflow.py +405 -40
  213. agno/table.py +10 -0
  214. agno/team/team.py +3974 -1903
  215. agno/tools/dalle.py +2 -4
  216. agno/tools/eleven_labs.py +23 -25
  217. agno/tools/exa.py +21 -16
  218. agno/tools/file.py +153 -23
  219. agno/tools/file_generation.py +16 -10
  220. agno/tools/firecrawl.py +15 -7
  221. agno/tools/function.py +193 -38
  222. agno/tools/gmail.py +238 -14
  223. agno/tools/google_drive.py +271 -0
  224. agno/tools/googlecalendar.py +36 -8
  225. agno/tools/googlesheets.py +20 -5
  226. agno/tools/jira.py +20 -0
  227. agno/tools/mcp/__init__.py +10 -0
  228. agno/tools/mcp/mcp.py +331 -0
  229. agno/tools/mcp/multi_mcp.py +347 -0
  230. agno/tools/mcp/params.py +24 -0
  231. agno/tools/mcp_toolbox.py +3 -3
  232. agno/tools/models/nebius.py +5 -5
  233. agno/tools/models_labs.py +20 -10
  234. agno/tools/nano_banana.py +151 -0
  235. agno/tools/notion.py +204 -0
  236. agno/tools/parallel.py +314 -0
  237. agno/tools/postgres.py +76 -36
  238. agno/tools/redshift.py +406 -0
  239. agno/tools/scrapegraph.py +1 -1
  240. agno/tools/shopify.py +1519 -0
  241. agno/tools/slack.py +18 -3
  242. agno/tools/spotify.py +919 -0
  243. agno/tools/tavily.py +146 -0
  244. agno/tools/toolkit.py +25 -0
  245. agno/tools/workflow.py +8 -1
  246. agno/tools/yfinance.py +12 -11
  247. agno/tracing/__init__.py +12 -0
  248. agno/tracing/exporter.py +157 -0
  249. agno/tracing/schemas.py +276 -0
  250. agno/tracing/setup.py +111 -0
  251. agno/utils/agent.py +938 -0
  252. agno/utils/cryptography.py +22 -0
  253. agno/utils/dttm.py +33 -0
  254. agno/utils/events.py +151 -3
  255. agno/utils/gemini.py +15 -5
  256. agno/utils/hooks.py +118 -4
  257. agno/utils/http.py +113 -2
  258. agno/utils/knowledge.py +12 -5
  259. agno/utils/log.py +1 -0
  260. agno/utils/mcp.py +92 -2
  261. agno/utils/media.py +187 -1
  262. agno/utils/merge_dict.py +3 -3
  263. agno/utils/message.py +60 -0
  264. agno/utils/models/ai_foundry.py +9 -2
  265. agno/utils/models/claude.py +49 -14
  266. agno/utils/models/cohere.py +9 -2
  267. agno/utils/models/llama.py +9 -2
  268. agno/utils/models/mistral.py +4 -2
  269. agno/utils/print_response/agent.py +109 -16
  270. agno/utils/print_response/team.py +223 -30
  271. agno/utils/print_response/workflow.py +251 -34
  272. agno/utils/streamlit.py +1 -1
  273. agno/utils/team.py +98 -9
  274. agno/utils/tokens.py +657 -0
  275. agno/vectordb/base.py +39 -7
  276. agno/vectordb/cassandra/cassandra.py +21 -5
  277. agno/vectordb/chroma/chromadb.py +43 -12
  278. agno/vectordb/clickhouse/clickhousedb.py +21 -5
  279. agno/vectordb/couchbase/couchbase.py +29 -5
  280. agno/vectordb/lancedb/lance_db.py +92 -181
  281. agno/vectordb/langchaindb/langchaindb.py +24 -4
  282. agno/vectordb/lightrag/lightrag.py +17 -3
  283. agno/vectordb/llamaindex/llamaindexdb.py +25 -5
  284. agno/vectordb/milvus/milvus.py +50 -37
  285. agno/vectordb/mongodb/__init__.py +7 -1
  286. agno/vectordb/mongodb/mongodb.py +36 -30
  287. agno/vectordb/pgvector/pgvector.py +201 -77
  288. agno/vectordb/pineconedb/pineconedb.py +41 -23
  289. agno/vectordb/qdrant/qdrant.py +67 -54
  290. agno/vectordb/redis/__init__.py +9 -0
  291. agno/vectordb/redis/redisdb.py +682 -0
  292. agno/vectordb/singlestore/singlestore.py +50 -29
  293. agno/vectordb/surrealdb/surrealdb.py +31 -41
  294. agno/vectordb/upstashdb/upstashdb.py +34 -6
  295. agno/vectordb/weaviate/weaviate.py +53 -14
  296. agno/workflow/__init__.py +2 -0
  297. agno/workflow/agent.py +299 -0
  298. agno/workflow/condition.py +120 -18
  299. agno/workflow/loop.py +77 -10
  300. agno/workflow/parallel.py +231 -143
  301. agno/workflow/router.py +118 -17
  302. agno/workflow/step.py +609 -170
  303. agno/workflow/steps.py +73 -6
  304. agno/workflow/types.py +96 -21
  305. agno/workflow/workflow.py +2039 -262
  306. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
  307. agno-2.3.13.dist-info/RECORD +613 -0
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -679
  310. agno/tools/memori.py +0 -339
  311. agno-2.1.2.dist-info/RECORD +0 -543
  312. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
  313. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import asyncio
2
2
  import json
3
3
  from hashlib import md5
4
4
  from os import getenv
5
- from typing import Any, Dict, List, Optional
5
+ from typing import Any, Dict, List, Optional, Union
6
6
 
7
7
  try:
8
8
  import lancedb
@@ -10,10 +10,11 @@ try:
10
10
  except ImportError:
11
11
  raise ImportError("`lancedb` not installed. Please install using `pip install lancedb`")
12
12
 
13
+ from agno.filters import FilterExpr
13
14
  from agno.knowledge.document import Document
14
15
  from agno.knowledge.embedder import Embedder
15
16
  from agno.knowledge.reranker.base import Reranker
16
- from agno.utils.log import log_debug, log_info, logger
17
+ from agno.utils.log import log_debug, log_info, log_warning, logger
17
18
  from agno.vectordb.base import VectorDb
18
19
  from agno.vectordb.distance import Distance
19
20
  from agno.vectordb.search import SearchType
@@ -25,6 +26,8 @@ class LanceDb(VectorDb):
25
26
 
26
27
  Args:
27
28
  uri: The URI of the LanceDB database.
29
+ name: Name of the vector database.
30
+ description: Description of the vector database.
28
31
  connection: The LanceDB connection to use.
29
32
  table: The LanceDB table instance to use.
30
33
  async_connection: The LanceDB async connection to use.
@@ -44,6 +47,9 @@ class LanceDb(VectorDb):
44
47
  def __init__(
45
48
  self,
46
49
  uri: lancedb.URI = "/tmp/lancedb",
50
+ name: Optional[str] = None,
51
+ description: Optional[str] = None,
52
+ id: Optional[str] = None,
47
53
  connection: Optional[lancedb.LanceDBConnection] = None,
48
54
  table: Optional[lancedb.db.LanceTable] = None,
49
55
  async_connection: Optional[lancedb.AsyncConnection] = None,
@@ -59,6 +65,17 @@ class LanceDb(VectorDb):
59
65
  on_bad_vectors: Optional[str] = None, # One of "error", "drop", "fill", "null".
60
66
  fill_value: Optional[float] = None, # Only used if on_bad_vectors is "fill"
61
67
  ):
68
+ # Dynamic ID generation based on unique identifiers
69
+ if id is None:
70
+ from agno.utils.string import generate_id
71
+
72
+ table_identifier = table_name or "default_table"
73
+ seed = f"{uri}#{table_identifier}"
74
+ id = generate_id(seed)
75
+
76
+ # Initialize base class with name, description, and generated ID
77
+ super().__init__(id=id, name=name, description=description)
78
+
62
79
  # Embedder for embedding the document contents
63
80
  if embedder is None:
64
81
  from agno.knowledge.embedder.openai import OpenAIEmbedder
@@ -142,7 +159,7 @@ class LanceDb(VectorDb):
142
159
 
143
160
  def _prepare_vector(self, embedding) -> List[float]:
144
161
  """Prepare vector embedding for insertion, ensuring correct dimensions and type."""
145
- if embedding is not None:
162
+ if embedding is not None and len(embedding) > 0:
146
163
  # Convert to list of floats
147
164
  vector = [float(x) for x in embedding]
148
165
 
@@ -160,7 +177,7 @@ class LanceDb(VectorDb):
160
177
 
161
178
  return vector
162
179
  else:
163
- # Fallback if embedding is None
180
+ # Fallback if embedding is None or empty
164
181
  return [0.0] * (self.dimensions or 1536)
165
182
 
166
183
  async def _get_async_connection(self) -> lancedb.AsyncConnection:
@@ -184,7 +201,6 @@ class LanceDb(VectorDb):
184
201
  # Re-establish sync connection to see async changes
185
202
  if self.connection and self.table_name in self.connection.table_names():
186
203
  self.table = self.connection.open_table(self.table_name)
187
- log_debug(f"Refreshed sync connection for table: {self.table_name}")
188
204
  except Exception as e:
189
205
  log_debug(f"Could not refresh sync connection: {e}")
190
206
  # If refresh fails, we can still function but sync methods might not see async changes
@@ -244,39 +260,6 @@ class LanceDb(VectorDb):
244
260
  tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite", exist_ok=True) # type: ignore
245
261
  return tbl # type: ignore
246
262
 
247
- def doc_exists(self, document: Document) -> bool:
248
- """
249
- Validating if the document exists or not
250
-
251
- Args:
252
- document (Document): Document to validate
253
- """
254
- try:
255
- if self.table is not None:
256
- cleaned_content = document.content.replace("\x00", "\ufffd")
257
- doc_id = md5(cleaned_content.encode()).hexdigest()
258
- result = self.table.search().where(f"{self._id}='{doc_id}'").to_arrow()
259
- return len(result) > 0
260
- except Exception:
261
- # Search sometimes fails with stale cache data, it means the doc doesn't exist
262
- return False
263
-
264
- return False
265
-
266
- async def async_doc_exists(self, document: Document) -> bool:
267
- """
268
- Asynchronously validate if the document exists
269
-
270
- Args:
271
- document (Document): Document to validate
272
-
273
- Returns:
274
- bool: True if document exists, False otherwise
275
- """
276
- if self.connection:
277
- self.table = self.connection.open_table(name=self.table_name)
278
- return self.doc_exists(document)
279
-
280
263
  def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
281
264
  """
282
265
  Insert documents into the database.
@@ -293,9 +276,6 @@ class LanceDb(VectorDb):
293
276
  data = []
294
277
 
295
278
  for document in documents:
296
- if self.doc_exists(document):
297
- continue
298
-
299
279
  # Add filters to document metadata if provided
300
280
  if filters:
301
281
  meta_data = document.meta_data.copy() if document.meta_data else {}
@@ -304,7 +284,9 @@ class LanceDb(VectorDb):
304
284
 
305
285
  document.embed(embedder=self.embedder)
306
286
  cleaned_content = document.content.replace("\x00", "\ufffd")
307
- doc_id = str(md5(cleaned_content.encode()).hexdigest())
287
+ # Include content_hash in ID to ensure uniqueness across different content hashes
288
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
289
+ doc_id = str(md5(f"{base_id}_{content_hash}".encode()).hexdigest())
308
290
  payload = {
309
291
  "name": document.name,
310
292
  "meta_data": document.meta_data,
@@ -343,6 +325,9 @@ class LanceDb(VectorDb):
343
325
  """
344
326
  Asynchronously insert documents into the database.
345
327
 
328
+ Note: Currently wraps sync insert method since LanceDB async insert has sync/async table
329
+ synchronization issues causing empty vectors. We still do async embedding for performance.
330
+
346
331
  Args:
347
332
  documents (List[Document]): List of documents to insert
348
333
  filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
@@ -352,115 +337,36 @@ class LanceDb(VectorDb):
352
337
  return
353
338
 
354
339
  log_debug(f"Inserting {len(documents)} documents")
355
- data = []
356
340
 
341
+ # Still do async embedding for performance
357
342
  if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
358
- # Use batch embedding when enabled and supported
359
343
  try:
360
- # Extract content from all documents
361
344
  doc_contents = [doc.content for doc in documents]
362
-
363
- # Get batch embeddings and usage
364
345
  embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
365
346
 
366
- # Process documents with pre-computed embeddings
367
347
  for j, doc in enumerate(documents):
368
- try:
369
- if j < len(embeddings):
370
- doc.embedding = embeddings[j]
371
- doc.usage = usages[j] if j < len(usages) else None
372
- except Exception as e:
373
- logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
374
-
348
+ if j < len(embeddings):
349
+ doc.embedding = embeddings[j]
350
+ doc.usage = usages[j] if j < len(usages) else None
375
351
  except Exception as e:
376
- # Check if this is a rate limit error - don't fall back as it would make things worse
377
352
  error_str = str(e).lower()
378
353
  is_rate_limit = any(
379
354
  phrase in error_str
380
355
  for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
381
356
  )
382
-
383
357
  if is_rate_limit:
384
358
  logger.error(f"Rate limit detected during batch embedding. {e}")
385
359
  raise e
386
360
  else:
387
361
  logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
388
- # Fall back to individual embedding
389
362
  embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
390
363
  await asyncio.gather(*embed_tasks, return_exceptions=True)
391
364
  else:
392
- # Use individual embedding
393
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
365
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
394
366
  await asyncio.gather(*embed_tasks, return_exceptions=True)
395
367
 
396
- for document in documents:
397
- if await self.async_doc_exists(document):
398
- continue
399
-
400
- # Add filters to document metadata if provided
401
- if filters:
402
- meta_data = document.meta_data.copy() if document.meta_data else {}
403
- meta_data.update(filters)
404
- document.meta_data = meta_data
405
-
406
- cleaned_content = document.content.replace("\x00", "\ufffd")
407
- doc_id = str(md5(cleaned_content.encode()).hexdigest())
408
- payload = {
409
- "name": document.name,
410
- "meta_data": document.meta_data,
411
- "content": cleaned_content,
412
- "usage": document.usage,
413
- "content_id": document.content_id,
414
- "content_hash": content_hash,
415
- }
416
- data.append(
417
- {
418
- "id": doc_id,
419
- "vector": self._prepare_vector(document.embedding),
420
- "payload": json.dumps(payload),
421
- }
422
- )
423
- log_debug(f"Parsed document: {document.name} ({document.meta_data})")
424
-
425
- if not data:
426
- log_debug("No new data to insert")
427
- return
428
-
429
- try:
430
- await self._get_async_connection()
431
-
432
- # Ensure the async table is created before inserting
433
- if self.async_table is None:
434
- try:
435
- await self.async_create()
436
- except Exception as create_e:
437
- logger.error(f"Failed to create async table: {create_e}")
438
- # Continue to fallback logic below
439
-
440
- if self.async_table is None:
441
- # Fall back to sync insertion if async table creation failed
442
- logger.warning("Async table not available, falling back to sync insertion")
443
- return self.insert(content_hash, documents, filters)
444
-
445
- if self.on_bad_vectors is not None:
446
- await self.async_table.add(data, on_bad_vectors=self.on_bad_vectors, fill_value=self.fill_value) # type: ignore
447
- else:
448
- await self.async_table.add(data) # type: ignore
449
-
450
- log_debug(f"Asynchronously inserted {len(data)} documents")
451
-
452
- # Refresh sync connection to see async changes
453
- self._refresh_sync_connection()
454
- except Exception as e:
455
- logger.error(f"Error during async document insertion: {e}")
456
- # Try falling back to sync insertion as a last resort
457
- try:
458
- logger.warning("Async insertion failed, attempting sync fallback")
459
- self.insert(content_hash, documents, filters)
460
- logger.info("Sync fallback successful")
461
- except Exception as sync_e:
462
- logger.error(f"Sync fallback also failed: {sync_e}")
463
- raise e from sync_e
368
+ # Use sync insert to avoid sync/async table synchronization issues
369
+ self.insert(content_hash, documents, filters)
464
370
 
465
371
  def upsert_available(self) -> bool:
466
372
  """Check if upsert is available in LanceDB."""
@@ -481,11 +387,42 @@ class LanceDb(VectorDb):
481
387
  async def async_upsert(
482
388
  self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
483
389
  ) -> None:
484
- if self.content_hash_exists(content_hash):
485
- self._delete_by_content_hash(content_hash)
486
- await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
390
+ """
391
+ Asynchronously upsert documents into the database.
487
392
 
488
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
393
+ Note: Uses async embedding for performance, then sync upsert for reliability.
394
+ """
395
+ if len(documents) > 0:
396
+ # Do async embedding for performance
397
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
398
+ try:
399
+ doc_contents = [doc.content for doc in documents]
400
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
401
+ for j, doc in enumerate(documents):
402
+ if j < len(embeddings):
403
+ doc.embedding = embeddings[j]
404
+ doc.usage = usages[j] if j < len(usages) else None
405
+ except Exception as e:
406
+ error_str = str(e).lower()
407
+ is_rate_limit = any(
408
+ phrase in error_str
409
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
410
+ )
411
+ if is_rate_limit:
412
+ raise e
413
+ else:
414
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
415
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
416
+ else:
417
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
418
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
419
+
420
+ # Use sync upsert for reliability
421
+ self.upsert(content_hash=content_hash, documents=documents, filters=filters)
422
+
423
+ def search(
424
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
425
+ ) -> List[Document]:
489
426
  """
490
427
  Search for documents matching the query.
491
428
 
@@ -502,6 +439,10 @@ class LanceDb(VectorDb):
502
439
 
503
440
  results = None
504
441
 
442
+ if isinstance(filters, list):
443
+ log_warning("Filter Expressions are not yet supported in LanceDB. No filters will be applied.")
444
+ filters = None
445
+
505
446
  if self.search_type == SearchType.vector:
506
447
  results = self.vector_search(query, limit)
507
448
  elif self.search_type == SearchType.keyword:
@@ -543,11 +484,14 @@ class LanceDb(VectorDb):
543
484
  return search_results
544
485
 
545
486
  async def async_search(
546
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
487
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
547
488
  ) -> List[Document]:
548
489
  """
549
490
  Asynchronously search for documents matching the query.
550
491
 
492
+ Note: Currently wraps sync search method since LanceDB async search has sync/async table
493
+ synchronization issues. Performance impact is minimal for search operations.
494
+
551
495
  Args:
552
496
  query (str): Query string to search for
553
497
  limit (int): Maximum number of results to return
@@ -556,53 +500,12 @@ class LanceDb(VectorDb):
556
500
  Returns:
557
501
  List[Document]: List of matching documents
558
502
  """
559
- # TODO: Search is not yet supported in async (https://github.com/lancedb/lancedb/pull/2049)
560
- if self.connection:
561
- self.table = self.connection.open_table(name=self.table_name)
503
+ # Wrap sync search method to avoid sync/async table synchronization issues
504
+ return self.search(query=query, limit=limit, filters=filters)
562
505
 
563
- results = None
564
-
565
- if self.search_type == SearchType.vector:
566
- results = self.vector_search(query, limit)
567
- elif self.search_type == SearchType.keyword:
568
- results = self.keyword_search(query, limit)
569
- elif self.search_type == SearchType.hybrid:
570
- results = self.hybrid_search(query, limit)
571
- else:
572
- logger.error(f"Invalid search type '{self.search_type}'.")
573
- return []
574
-
575
- if results is None:
576
- return []
577
-
578
- search_results = self._build_search_results(results)
579
-
580
- # Filter results based on metadata if filters are provided
581
- if filters and search_results:
582
- filtered_results = []
583
- for doc in search_results:
584
- if doc.meta_data is None:
585
- continue
586
-
587
- # Check if all filter criteria match
588
- match = True
589
- for key, value in filters.items():
590
- if key not in doc.meta_data or doc.meta_data[key] != value:
591
- match = False
592
- break
593
-
594
- if match:
595
- filtered_results.append(doc)
596
-
597
- search_results = filtered_results
598
-
599
- if self.reranker and search_results:
600
- search_results = self.reranker.rerank(query=query, documents=search_results)
601
-
602
- log_info(f"Found {len(search_results)} documents")
603
- return search_results
604
-
605
- def vector_search(self, query: str, limit: int = 5) -> List[Document]:
506
+ def vector_search(
507
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
508
+ ) -> List[Document]:
606
509
  query_embedding = self.embedder.get_embedding(query)
607
510
  if query_embedding is None:
608
511
  logger.error(f"Error getting embedding for Query: {query}")
@@ -622,7 +525,9 @@ class LanceDb(VectorDb):
622
525
 
623
526
  return results.to_pandas()
624
527
 
625
- def hybrid_search(self, query: str, limit: int = 5) -> List[Document]:
528
+ def hybrid_search(
529
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
530
+ ) -> List[Document]:
626
531
  query_embedding = self.embedder.get_embedding(query)
627
532
  if query_embedding is None:
628
533
  logger.error(f"Error getting embedding for Query: {query}")
@@ -651,7 +556,9 @@ class LanceDb(VectorDb):
651
556
 
652
557
  return results.to_pandas()
653
558
 
654
- def keyword_search(self, query: str, limit: int = 5) -> List[Document]:
559
+ def keyword_search(
560
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
561
+ ) -> List[Document]:
655
562
  if self.table is None:
656
563
  logger.error("Table not initialized. Please create the table first")
657
564
  return []
@@ -1048,3 +955,7 @@ class LanceDb(VectorDb):
1048
955
  except Exception as e:
1049
956
  logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
1050
957
  raise
958
+
959
+ def get_supported_search_types(self) -> List[str]:
960
+ """Get the supported search types for this vector database."""
961
+ return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
@@ -1,7 +1,8 @@
1
- from typing import Any, Dict, List, Optional
1
+ from typing import Any, Dict, List, Optional, Union
2
2
 
3
+ from agno.filters import FilterExpr
3
4
  from agno.knowledge.document import Document
4
- from agno.utils.log import log_debug, logger
5
+ from agno.utils.log import log_debug, log_warning, logger
5
6
  from agno.vectordb.base import VectorDb
6
7
 
7
8
 
@@ -11,16 +12,23 @@ class LangChainVectorDb(VectorDb):
11
12
  vectorstore: Optional[Any] = None,
12
13
  search_kwargs: Optional[dict] = None,
13
14
  knowledge_retriever: Optional[Any] = None,
15
+ name: Optional[str] = None,
16
+ description: Optional[str] = None,
14
17
  ):
15
18
  """
16
19
  Initialize LangChainVectorDb.
17
20
 
18
21
  Args:
19
22
  vectorstore: The LangChain vectorstore instance
23
+ name (Optional[str]): Name of the vector database.
24
+ description (Optional[str]): Description of the vector database.
20
25
  search_kwargs: Additional search parameters for the retriever
21
26
  knowledge_retriever: An optional LangChain retriever instance
22
27
  """
23
28
  self.vectorstore = vectorstore
29
+ # Initialize base class with name and description
30
+ super().__init__(name=name, description=description)
31
+
24
32
  self.search_kwargs = search_kwargs
25
33
  self.knowledge_retriever = knowledge_retriever
26
34
 
@@ -63,9 +71,17 @@ class LangChainVectorDb(VectorDb):
63
71
  logger.warning("LangChainKnowledgeBase.async_upsert() not supported - please check the vectorstore manually.")
64
72
  raise NotImplementedError
65
73
 
66
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
74
+ def search(
75
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
76
+ ) -> List[Document]:
67
77
  """Returns relevant documents matching the query"""
68
78
 
79
+ if isinstance(filters, List):
80
+ log_warning(
81
+ "Filter Expressions are not supported in LangChainDB. No filters will be applied. Use filters as a dictionary."
82
+ )
83
+ filters = None
84
+
69
85
  try:
70
86
  from langchain_core.documents import Document as LangChainDocument
71
87
  from langchain_core.retrievers import BaseRetriever
@@ -102,7 +118,7 @@ class LangChainVectorDb(VectorDb):
102
118
  return documents
103
119
 
104
120
  async def async_search(
105
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
121
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
106
122
  ) -> List[Document]:
107
123
  return self.search(query, limit, filters)
108
124
 
@@ -141,3 +157,7 @@ class LangChainVectorDb(VectorDb):
141
157
  metadata (Dict[str, Any]): The metadata to update
142
158
  """
143
159
  raise NotImplementedError("update_metadata not supported for LangChain vectorstores")
160
+
161
+ def get_supported_search_types(self) -> List[str]:
162
+ """Get the supported search types for this vector database."""
163
+ return [] # LangChainVectorDb doesn't use SearchType enum
@@ -1,8 +1,9 @@
1
1
  import asyncio
2
- from typing import Any, Dict, List, Optional
2
+ from typing import Any, Dict, List, Optional, Union
3
3
 
4
4
  import httpx
5
5
 
6
+ from agno.filters import FilterExpr
6
7
  from agno.knowledge.document import Document
7
8
  from agno.utils.log import log_debug, log_error, log_info, log_warning
8
9
  from agno.vectordb.base import VectorDb
@@ -21,9 +22,14 @@ class LightRag(VectorDb):
21
22
  api_key: Optional[str] = None,
22
23
  auth_header_name: str = "X-API-KEY",
23
24
  auth_header_format: str = "{api_key}",
25
+ name: Optional[str] = None,
26
+ description: Optional[str] = None,
24
27
  ):
25
28
  self.server_url = server_url
26
29
  self.api_key = api_key
30
+ # Initialize base class with name and description
31
+ super().__init__(name=name, description=description)
32
+
27
33
  self.auth_header_name = auth_header_name
28
34
  self.auth_header_format = auth_header_format
29
35
 
@@ -87,14 +93,18 @@ class LightRag(VectorDb):
87
93
  """Async upsert documents into the vector database"""
88
94
  pass
89
95
 
90
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
96
+ def search(
97
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
98
+ ) -> List[Document]:
91
99
  result = asyncio.run(self.async_search(query, limit=limit, filters=filters))
92
100
  return result if result is not None else []
93
101
 
94
102
  async def async_search(
95
- self, query: str, limit: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
103
+ self, query: str, limit: Optional[int] = None, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
96
104
  ) -> Optional[List[Document]]:
97
105
  mode: str = "hybrid" # Default mode, can be "local", "global", or "hybrid"
106
+ if filters is not None:
107
+ log_warning("Filters are not supported in LightRAG. No filters will be applied.")
98
108
  try:
99
109
  async with httpx.AsyncClient(timeout=30.0) as client:
100
110
  response = await client.post(
@@ -372,3 +382,7 @@ class LightRag(VectorDb):
372
382
  metadata (Dict[str, Any]): The metadata to update
373
383
  """
374
384
  raise NotImplementedError("update_metadata not supported for LightRag - use LightRag's native methods")
385
+
386
+ def get_supported_search_types(self) -> List[str]:
387
+ """Get the supported search types for this vector database."""
388
+ return [] # LightRag doesn't use SearchType enum
@@ -1,7 +1,8 @@
1
- from typing import Any, Callable, Dict, List, Optional
1
+ from typing import Any, Callable, Dict, List, Optional, Union
2
2
 
3
+ from agno.filters import FilterExpr
3
4
  from agno.knowledge.document import Document
4
- from agno.utils.log import logger
5
+ from agno.utils.log import log_warning, logger
5
6
  from agno.vectordb.base import VectorDb
6
7
 
7
8
  try:
@@ -17,8 +18,18 @@ class LlamaIndexVectorDb(VectorDb):
17
18
  knowledge_retriever: BaseRetriever
18
19
  loader: Optional[Callable] = None
19
20
 
20
- def __init__(self, knowledge_retriever: BaseRetriever, loader: Optional[Callable] = None, **kwargs):
21
+ def __init__(
22
+ self,
23
+ knowledge_retriever: BaseRetriever,
24
+ loader: Optional[Callable] = None,
25
+ name: Optional[str] = None,
26
+ description: Optional[str] = None,
27
+ **kwargs,
28
+ ):
21
29
  super().__init__(**kwargs)
30
+ # Initialize base class with name and description
31
+ super().__init__(name=name, description=description)
32
+
22
33
  self.knowledge_retriever = knowledge_retriever
23
34
  self.loader = loader
24
35
 
@@ -58,7 +69,9 @@ class LlamaIndexVectorDb(VectorDb):
58
69
  logger.warning("LlamaIndexVectorDb.async_upsert() not supported - please check the vectorstore manually.")
59
70
  raise NotImplementedError
60
71
 
61
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
72
+ def search(
73
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
74
+ ) -> List[Document]:
62
75
  """
63
76
  Returns relevant documents matching the query.
64
77
 
@@ -72,6 +85,9 @@ class LlamaIndexVectorDb(VectorDb):
72
85
  Raises:
73
86
  ValueError: If the knowledge retriever is not of type BaseRetriever.
74
87
  """
88
+ if filters is not None:
89
+ log_warning("Filters are not supported in LlamaIndex. No filters will be applied.")
90
+
75
91
  if not isinstance(self.knowledge_retriever, BaseRetriever):
76
92
  raise ValueError(f"Knowledge retriever is not of type BaseRetriever: {self.knowledge_retriever}")
77
93
 
@@ -89,7 +105,7 @@ class LlamaIndexVectorDb(VectorDb):
89
105
  return documents
90
106
 
91
107
  async def async_search(
92
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
108
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
93
109
  ) -> List[Document]:
94
110
  return self.search(query, limit, filters)
95
111
 
@@ -144,3 +160,7 @@ class LlamaIndexVectorDb(VectorDb):
144
160
  "LlamaIndexVectorDb.delete_by_content_id() not supported - please check the vectorstore manually."
145
161
  )
146
162
  return False
163
+
164
+ def get_supported_search_types(self) -> List[str]:
165
+ """Get the supported search types for this vector database."""
166
+ return [] # LlamaIndexVectorDb doesn't use SearchType enum