agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. agno/agent/agent.py +6009 -2874
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/culture/__init__.py +3 -0
  5. agno/culture/manager.py +956 -0
  6. agno/db/async_postgres/__init__.py +3 -0
  7. agno/db/base.py +385 -6
  8. agno/db/dynamo/dynamo.py +388 -81
  9. agno/db/dynamo/schemas.py +47 -10
  10. agno/db/dynamo/utils.py +63 -4
  11. agno/db/firestore/firestore.py +435 -64
  12. agno/db/firestore/schemas.py +11 -0
  13. agno/db/firestore/utils.py +102 -4
  14. agno/db/gcs_json/gcs_json_db.py +384 -42
  15. agno/db/gcs_json/utils.py +60 -26
  16. agno/db/in_memory/in_memory_db.py +351 -66
  17. agno/db/in_memory/utils.py +60 -2
  18. agno/db/json/json_db.py +339 -48
  19. agno/db/json/utils.py +60 -26
  20. agno/db/migrations/manager.py +199 -0
  21. agno/db/migrations/v1_to_v2.py +510 -37
  22. agno/db/migrations/versions/__init__.py +0 -0
  23. agno/db/migrations/versions/v2_3_0.py +938 -0
  24. agno/db/mongo/__init__.py +15 -1
  25. agno/db/mongo/async_mongo.py +2036 -0
  26. agno/db/mongo/mongo.py +653 -76
  27. agno/db/mongo/schemas.py +13 -0
  28. agno/db/mongo/utils.py +80 -8
  29. agno/db/mysql/mysql.py +687 -25
  30. agno/db/mysql/schemas.py +61 -37
  31. agno/db/mysql/utils.py +60 -2
  32. agno/db/postgres/__init__.py +2 -1
  33. agno/db/postgres/async_postgres.py +2001 -0
  34. agno/db/postgres/postgres.py +676 -57
  35. agno/db/postgres/schemas.py +43 -18
  36. agno/db/postgres/utils.py +164 -2
  37. agno/db/redis/redis.py +344 -38
  38. agno/db/redis/schemas.py +18 -0
  39. agno/db/redis/utils.py +60 -2
  40. agno/db/schemas/__init__.py +2 -1
  41. agno/db/schemas/culture.py +120 -0
  42. agno/db/schemas/memory.py +13 -0
  43. agno/db/singlestore/schemas.py +26 -1
  44. agno/db/singlestore/singlestore.py +687 -53
  45. agno/db/singlestore/utils.py +60 -2
  46. agno/db/sqlite/__init__.py +2 -1
  47. agno/db/sqlite/async_sqlite.py +2371 -0
  48. agno/db/sqlite/schemas.py +24 -0
  49. agno/db/sqlite/sqlite.py +774 -85
  50. agno/db/sqlite/utils.py +168 -5
  51. agno/db/surrealdb/__init__.py +3 -0
  52. agno/db/surrealdb/metrics.py +292 -0
  53. agno/db/surrealdb/models.py +309 -0
  54. agno/db/surrealdb/queries.py +71 -0
  55. agno/db/surrealdb/surrealdb.py +1361 -0
  56. agno/db/surrealdb/utils.py +147 -0
  57. agno/db/utils.py +50 -22
  58. agno/eval/accuracy.py +50 -43
  59. agno/eval/performance.py +6 -3
  60. agno/eval/reliability.py +6 -3
  61. agno/eval/utils.py +33 -16
  62. agno/exceptions.py +68 -1
  63. agno/filters.py +354 -0
  64. agno/guardrails/__init__.py +6 -0
  65. agno/guardrails/base.py +19 -0
  66. agno/guardrails/openai.py +144 -0
  67. agno/guardrails/pii.py +94 -0
  68. agno/guardrails/prompt_injection.py +52 -0
  69. agno/integrations/discord/client.py +1 -0
  70. agno/knowledge/chunking/agentic.py +13 -10
  71. agno/knowledge/chunking/fixed.py +1 -1
  72. agno/knowledge/chunking/semantic.py +40 -8
  73. agno/knowledge/chunking/strategy.py +59 -15
  74. agno/knowledge/embedder/aws_bedrock.py +9 -4
  75. agno/knowledge/embedder/azure_openai.py +54 -0
  76. agno/knowledge/embedder/base.py +2 -0
  77. agno/knowledge/embedder/cohere.py +184 -5
  78. agno/knowledge/embedder/fastembed.py +1 -1
  79. agno/knowledge/embedder/google.py +79 -1
  80. agno/knowledge/embedder/huggingface.py +9 -4
  81. agno/knowledge/embedder/jina.py +63 -0
  82. agno/knowledge/embedder/mistral.py +78 -11
  83. agno/knowledge/embedder/nebius.py +1 -1
  84. agno/knowledge/embedder/ollama.py +13 -0
  85. agno/knowledge/embedder/openai.py +37 -65
  86. agno/knowledge/embedder/sentence_transformer.py +8 -4
  87. agno/knowledge/embedder/vllm.py +262 -0
  88. agno/knowledge/embedder/voyageai.py +69 -16
  89. agno/knowledge/knowledge.py +595 -187
  90. agno/knowledge/reader/base.py +9 -2
  91. agno/knowledge/reader/csv_reader.py +8 -10
  92. agno/knowledge/reader/docx_reader.py +5 -6
  93. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  94. agno/knowledge/reader/json_reader.py +6 -5
  95. agno/knowledge/reader/markdown_reader.py +13 -13
  96. agno/knowledge/reader/pdf_reader.py +43 -68
  97. agno/knowledge/reader/pptx_reader.py +101 -0
  98. agno/knowledge/reader/reader_factory.py +51 -6
  99. agno/knowledge/reader/s3_reader.py +3 -15
  100. agno/knowledge/reader/tavily_reader.py +194 -0
  101. agno/knowledge/reader/text_reader.py +13 -13
  102. agno/knowledge/reader/web_search_reader.py +2 -43
  103. agno/knowledge/reader/website_reader.py +43 -25
  104. agno/knowledge/reranker/__init__.py +3 -0
  105. agno/knowledge/types.py +9 -0
  106. agno/knowledge/utils.py +20 -0
  107. agno/media.py +339 -266
  108. agno/memory/manager.py +336 -82
  109. agno/models/aimlapi/aimlapi.py +2 -2
  110. agno/models/anthropic/claude.py +183 -37
  111. agno/models/aws/bedrock.py +52 -112
  112. agno/models/aws/claude.py +33 -1
  113. agno/models/azure/ai_foundry.py +33 -15
  114. agno/models/azure/openai_chat.py +25 -8
  115. agno/models/base.py +1011 -566
  116. agno/models/cerebras/cerebras.py +19 -13
  117. agno/models/cerebras/cerebras_openai.py +8 -5
  118. agno/models/cohere/chat.py +27 -1
  119. agno/models/cometapi/__init__.py +5 -0
  120. agno/models/cometapi/cometapi.py +57 -0
  121. agno/models/dashscope/dashscope.py +1 -0
  122. agno/models/deepinfra/deepinfra.py +2 -2
  123. agno/models/deepseek/deepseek.py +2 -2
  124. agno/models/fireworks/fireworks.py +2 -2
  125. agno/models/google/gemini.py +110 -37
  126. agno/models/groq/groq.py +28 -11
  127. agno/models/huggingface/huggingface.py +2 -1
  128. agno/models/internlm/internlm.py +2 -2
  129. agno/models/langdb/langdb.py +4 -4
  130. agno/models/litellm/chat.py +18 -1
  131. agno/models/litellm/litellm_openai.py +2 -2
  132. agno/models/llama_cpp/__init__.py +5 -0
  133. agno/models/llama_cpp/llama_cpp.py +22 -0
  134. agno/models/message.py +143 -4
  135. agno/models/meta/llama.py +27 -10
  136. agno/models/meta/llama_openai.py +5 -17
  137. agno/models/nebius/nebius.py +6 -6
  138. agno/models/nexus/__init__.py +3 -0
  139. agno/models/nexus/nexus.py +22 -0
  140. agno/models/nvidia/nvidia.py +2 -2
  141. agno/models/ollama/chat.py +60 -6
  142. agno/models/openai/chat.py +102 -43
  143. agno/models/openai/responses.py +103 -106
  144. agno/models/openrouter/openrouter.py +41 -3
  145. agno/models/perplexity/perplexity.py +4 -5
  146. agno/models/portkey/portkey.py +3 -3
  147. agno/models/requesty/__init__.py +5 -0
  148. agno/models/requesty/requesty.py +52 -0
  149. agno/models/response.py +81 -5
  150. agno/models/sambanova/sambanova.py +2 -2
  151. agno/models/siliconflow/__init__.py +5 -0
  152. agno/models/siliconflow/siliconflow.py +25 -0
  153. agno/models/together/together.py +2 -2
  154. agno/models/utils.py +254 -8
  155. agno/models/vercel/v0.py +2 -2
  156. agno/models/vertexai/__init__.py +0 -0
  157. agno/models/vertexai/claude.py +96 -0
  158. agno/models/vllm/vllm.py +1 -0
  159. agno/models/xai/xai.py +3 -2
  160. agno/os/app.py +543 -175
  161. agno/os/auth.py +24 -14
  162. agno/os/config.py +1 -0
  163. agno/os/interfaces/__init__.py +1 -0
  164. agno/os/interfaces/a2a/__init__.py +3 -0
  165. agno/os/interfaces/a2a/a2a.py +42 -0
  166. agno/os/interfaces/a2a/router.py +250 -0
  167. agno/os/interfaces/a2a/utils.py +924 -0
  168. agno/os/interfaces/agui/agui.py +23 -7
  169. agno/os/interfaces/agui/router.py +27 -3
  170. agno/os/interfaces/agui/utils.py +242 -142
  171. agno/os/interfaces/base.py +6 -2
  172. agno/os/interfaces/slack/router.py +81 -23
  173. agno/os/interfaces/slack/slack.py +29 -14
  174. agno/os/interfaces/whatsapp/router.py +11 -4
  175. agno/os/interfaces/whatsapp/whatsapp.py +14 -7
  176. agno/os/mcp.py +111 -54
  177. agno/os/middleware/__init__.py +7 -0
  178. agno/os/middleware/jwt.py +233 -0
  179. agno/os/router.py +556 -139
  180. agno/os/routers/evals/evals.py +71 -34
  181. agno/os/routers/evals/schemas.py +31 -31
  182. agno/os/routers/evals/utils.py +6 -5
  183. agno/os/routers/health.py +31 -0
  184. agno/os/routers/home.py +52 -0
  185. agno/os/routers/knowledge/knowledge.py +185 -38
  186. agno/os/routers/knowledge/schemas.py +82 -22
  187. agno/os/routers/memory/memory.py +158 -53
  188. agno/os/routers/memory/schemas.py +20 -16
  189. agno/os/routers/metrics/metrics.py +20 -8
  190. agno/os/routers/metrics/schemas.py +16 -16
  191. agno/os/routers/session/session.py +499 -38
  192. agno/os/schema.py +308 -198
  193. agno/os/utils.py +401 -41
  194. agno/reasoning/anthropic.py +80 -0
  195. agno/reasoning/azure_ai_foundry.py +2 -2
  196. agno/reasoning/deepseek.py +2 -2
  197. agno/reasoning/default.py +3 -1
  198. agno/reasoning/gemini.py +73 -0
  199. agno/reasoning/groq.py +2 -2
  200. agno/reasoning/ollama.py +2 -2
  201. agno/reasoning/openai.py +7 -2
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +266 -112
  205. agno/run/base.py +53 -24
  206. agno/run/team.py +252 -111
  207. agno/run/workflow.py +156 -45
  208. agno/session/agent.py +105 -89
  209. agno/session/summary.py +65 -25
  210. agno/session/team.py +176 -96
  211. agno/session/workflow.py +406 -40
  212. agno/team/team.py +3854 -1692
  213. agno/tools/brightdata.py +3 -3
  214. agno/tools/cartesia.py +3 -5
  215. agno/tools/dalle.py +9 -8
  216. agno/tools/decorator.py +4 -2
  217. agno/tools/desi_vocal.py +2 -2
  218. agno/tools/duckduckgo.py +15 -11
  219. agno/tools/e2b.py +20 -13
  220. agno/tools/eleven_labs.py +26 -28
  221. agno/tools/exa.py +21 -16
  222. agno/tools/fal.py +4 -4
  223. agno/tools/file.py +153 -23
  224. agno/tools/file_generation.py +350 -0
  225. agno/tools/firecrawl.py +4 -4
  226. agno/tools/function.py +257 -37
  227. agno/tools/giphy.py +2 -2
  228. agno/tools/gmail.py +238 -14
  229. agno/tools/google_drive.py +270 -0
  230. agno/tools/googlecalendar.py +36 -8
  231. agno/tools/googlesheets.py +20 -5
  232. agno/tools/jira.py +20 -0
  233. agno/tools/knowledge.py +3 -3
  234. agno/tools/lumalab.py +3 -3
  235. agno/tools/mcp/__init__.py +10 -0
  236. agno/tools/mcp/mcp.py +331 -0
  237. agno/tools/mcp/multi_mcp.py +347 -0
  238. agno/tools/mcp/params.py +24 -0
  239. agno/tools/mcp_toolbox.py +284 -0
  240. agno/tools/mem0.py +11 -17
  241. agno/tools/memori.py +1 -53
  242. agno/tools/memory.py +419 -0
  243. agno/tools/models/azure_openai.py +2 -2
  244. agno/tools/models/gemini.py +3 -3
  245. agno/tools/models/groq.py +3 -5
  246. agno/tools/models/nebius.py +7 -7
  247. agno/tools/models_labs.py +25 -15
  248. agno/tools/notion.py +204 -0
  249. agno/tools/openai.py +4 -9
  250. agno/tools/opencv.py +3 -3
  251. agno/tools/parallel.py +314 -0
  252. agno/tools/replicate.py +7 -7
  253. agno/tools/scrapegraph.py +58 -31
  254. agno/tools/searxng.py +2 -2
  255. agno/tools/serper.py +2 -2
  256. agno/tools/slack.py +18 -3
  257. agno/tools/spider.py +2 -2
  258. agno/tools/tavily.py +146 -0
  259. agno/tools/whatsapp.py +1 -1
  260. agno/tools/workflow.py +278 -0
  261. agno/tools/yfinance.py +12 -11
  262. agno/utils/agent.py +820 -0
  263. agno/utils/audio.py +27 -0
  264. agno/utils/common.py +90 -1
  265. agno/utils/events.py +222 -7
  266. agno/utils/gemini.py +181 -23
  267. agno/utils/hooks.py +57 -0
  268. agno/utils/http.py +111 -0
  269. agno/utils/knowledge.py +12 -5
  270. agno/utils/log.py +1 -0
  271. agno/utils/mcp.py +95 -5
  272. agno/utils/media.py +188 -10
  273. agno/utils/merge_dict.py +22 -1
  274. agno/utils/message.py +60 -0
  275. agno/utils/models/claude.py +40 -11
  276. agno/utils/models/cohere.py +1 -1
  277. agno/utils/models/watsonx.py +1 -1
  278. agno/utils/openai.py +1 -1
  279. agno/utils/print_response/agent.py +105 -21
  280. agno/utils/print_response/team.py +103 -38
  281. agno/utils/print_response/workflow.py +251 -34
  282. agno/utils/reasoning.py +22 -1
  283. agno/utils/serialize.py +32 -0
  284. agno/utils/streamlit.py +16 -10
  285. agno/utils/string.py +41 -0
  286. agno/utils/team.py +98 -9
  287. agno/utils/tools.py +1 -1
  288. agno/vectordb/base.py +23 -4
  289. agno/vectordb/cassandra/cassandra.py +65 -9
  290. agno/vectordb/chroma/chromadb.py +182 -38
  291. agno/vectordb/clickhouse/clickhousedb.py +64 -11
  292. agno/vectordb/couchbase/couchbase.py +105 -10
  293. agno/vectordb/lancedb/lance_db.py +183 -135
  294. agno/vectordb/langchaindb/langchaindb.py +25 -7
  295. agno/vectordb/lightrag/lightrag.py +17 -3
  296. agno/vectordb/llamaindex/__init__.py +3 -0
  297. agno/vectordb/llamaindex/llamaindexdb.py +46 -7
  298. agno/vectordb/milvus/milvus.py +126 -9
  299. agno/vectordb/mongodb/__init__.py +7 -1
  300. agno/vectordb/mongodb/mongodb.py +112 -7
  301. agno/vectordb/pgvector/pgvector.py +142 -21
  302. agno/vectordb/pineconedb/pineconedb.py +80 -8
  303. agno/vectordb/qdrant/qdrant.py +125 -39
  304. agno/vectordb/redis/__init__.py +9 -0
  305. agno/vectordb/redis/redisdb.py +694 -0
  306. agno/vectordb/singlestore/singlestore.py +111 -25
  307. agno/vectordb/surrealdb/surrealdb.py +31 -5
  308. agno/vectordb/upstashdb/upstashdb.py +76 -8
  309. agno/vectordb/weaviate/weaviate.py +86 -15
  310. agno/workflow/__init__.py +2 -0
  311. agno/workflow/agent.py +299 -0
  312. agno/workflow/condition.py +112 -18
  313. agno/workflow/loop.py +69 -10
  314. agno/workflow/parallel.py +266 -118
  315. agno/workflow/router.py +110 -17
  316. agno/workflow/step.py +645 -136
  317. agno/workflow/steps.py +65 -6
  318. agno/workflow/types.py +71 -33
  319. agno/workflow/workflow.py +2113 -300
  320. agno-2.3.0.dist-info/METADATA +618 -0
  321. agno-2.3.0.dist-info/RECORD +577 -0
  322. agno-2.3.0.dist-info/licenses/LICENSE +201 -0
  323. agno/knowledge/reader/url_reader.py +0 -128
  324. agno/tools/googlesearch.py +0 -98
  325. agno/tools/mcp.py +0 -610
  326. agno/utils/models/aws_claude.py +0 -170
  327. agno-2.0.0rc2.dist-info/METADATA +0 -355
  328. agno-2.0.0rc2.dist-info/RECORD +0 -515
  329. agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
  330. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
  331. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import json
2
3
  from hashlib import md5
3
4
  from typing import Any, Dict, List, Mapping, Optional, Union, cast
4
5
 
@@ -12,10 +13,11 @@ try:
12
13
  except ImportError:
13
14
  raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
14
15
 
16
+ from agno.filters import FilterExpr
15
17
  from agno.knowledge.document import Document
16
18
  from agno.knowledge.embedder import Embedder
17
19
  from agno.knowledge.reranker.base import Reranker
18
- from agno.utils.log import log_debug, log_error, log_info, logger
20
+ from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
19
21
  from agno.vectordb.base import VectorDb
20
22
  from agno.vectordb.distance import Distance
21
23
 
@@ -24,6 +26,9 @@ class ChromaDb(VectorDb):
24
26
  def __init__(
25
27
  self,
26
28
  collection: str,
29
+ name: Optional[str] = None,
30
+ description: Optional[str] = None,
31
+ id: Optional[str] = None,
27
32
  embedder: Optional[Embedder] = None,
28
33
  distance: Distance = Distance.cosine,
29
34
  path: str = "tmp/chromadb",
@@ -31,9 +36,22 @@ class ChromaDb(VectorDb):
31
36
  reranker: Optional[Reranker] = None,
32
37
  **kwargs,
33
38
  ):
39
+ # Validate required parameters
40
+ if not collection:
41
+ raise ValueError("Collection name must be provided.")
42
+
43
+ # Dynamic ID generation based on unique identifiers
44
+ if id is None:
45
+ from agno.utils.string import generate_id
46
+
47
+ seed = f"{path}#{collection}"
48
+ id = generate_id(seed)
49
+
50
+ # Initialize base class with name, description, and generated ID
51
+ super().__init__(id=id, name=name, description=description)
52
+
34
53
  # Collection attributes
35
54
  self.collection_name: str = collection
36
-
37
55
  # Embedder for embedding the document contents
38
56
  if embedder is None:
39
57
  from agno.knowledge.embedder.openai import OpenAIEmbedder
@@ -60,6 +78,44 @@ class ChromaDb(VectorDb):
60
78
  # Chroma client kwargs
61
79
  self.kwargs = kwargs
62
80
 
81
+ def _flatten_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Union[str, int, float, bool]]:
82
+ """
83
+ Flatten nested metadata to ChromaDB-compatible format.
84
+
85
+ Args:
86
+ metadata: Dictionary that may contain nested structures
87
+
88
+ Returns:
89
+ Flattened dictionary with only primitive values
90
+ """
91
+ flattened: Dict[str, Any] = {}
92
+
93
+ def _flatten_recursive(obj: Any, prefix: str = "") -> None:
94
+ if isinstance(obj, dict):
95
+ if len(obj) == 0:
96
+ # Handle empty dictionaries by converting to JSON string
97
+ flattened[prefix] = json.dumps(obj)
98
+ else:
99
+ for key, value in obj.items():
100
+ new_key = f"{prefix}.{key}" if prefix else key
101
+ _flatten_recursive(value, new_key)
102
+ elif isinstance(obj, (list, tuple)):
103
+ # Convert lists/tuples to JSON strings
104
+ flattened[prefix] = json.dumps(obj)
105
+ elif isinstance(obj, (str, int, float, bool)) or obj is None:
106
+ if obj is not None: # ChromaDB doesn't accept None values
107
+ flattened[prefix] = obj
108
+ else:
109
+ # Convert other complex types to JSON strings
110
+ try:
111
+ flattened[prefix] = json.dumps(obj)
112
+ except (TypeError, ValueError):
113
+ # If it can't be serialized, convert to string
114
+ flattened[prefix] = str(obj)
115
+
116
+ _flatten_recursive(metadata)
117
+ return flattened
118
+
63
119
  @property
64
120
  def client(self) -> ClientAPI:
65
121
  if self._client is None:
@@ -147,11 +203,14 @@ class ChromaDb(VectorDb):
147
203
 
148
204
  metadata["content_hash"] = content_hash
149
205
 
206
+ # Flatten metadata for ChromaDB compatibility
207
+ flattened_metadata = self._flatten_metadata(metadata)
208
+
150
209
  docs_embeddings.append(document.embedding)
151
210
  docs.append(cleaned_content)
152
211
  ids.append(doc_id)
153
- docs_metadata.append(metadata)
154
- log_debug(f"Prepared document: {document.id} | {document.name} | {metadata}")
212
+ docs_metadata.append(flattened_metadata)
213
+ log_debug(f"Prepared document: {document.id} | {document.name} | {flattened_metadata}")
155
214
 
156
215
  if self._collection is None:
157
216
  logger.warning("Collection does not exist")
@@ -173,11 +232,47 @@ class ChromaDb(VectorDb):
173
232
  if not self._collection:
174
233
  self._collection = self.client.get_collection(name=self.collection_name)
175
234
 
176
- try:
177
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
178
- await asyncio.gather(*embed_tasks, return_exceptions=True)
179
- except Exception as e:
180
- log_error(f"Error processing document: {e}")
235
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
236
+ # Use batch embedding when enabled and supported
237
+ try:
238
+ # Extract content from all documents
239
+ doc_contents = [doc.content for doc in documents]
240
+
241
+ # Get batch embeddings and usage
242
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
243
+
244
+ # Process documents with pre-computed embeddings
245
+ for j, doc in enumerate(documents):
246
+ try:
247
+ if j < len(embeddings):
248
+ doc.embedding = embeddings[j]
249
+ doc.usage = usages[j] if j < len(usages) else None
250
+ except Exception as e:
251
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
252
+
253
+ except Exception as e:
254
+ # Check if this is a rate limit error - don't fall back as it would make things worse
255
+ error_str = str(e).lower()
256
+ is_rate_limit = any(
257
+ phrase in error_str
258
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
259
+ )
260
+
261
+ if is_rate_limit:
262
+ logger.error(f"Rate limit detected during batch embedding. {e}")
263
+ raise e
264
+ else:
265
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
266
+ # Fall back to individual embedding
267
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
268
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
269
+ else:
270
+ # Use individual embedding
271
+ try:
272
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
273
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
274
+ except Exception as e:
275
+ log_error(f"Error processing document: {e}")
181
276
 
182
277
  for document in documents:
183
278
  cleaned_content = document.content.replace("\x00", "\ufffd")
@@ -196,11 +291,14 @@ class ChromaDb(VectorDb):
196
291
 
197
292
  metadata["content_hash"] = content_hash
198
293
 
294
+ # Flatten metadata for ChromaDB compatibility
295
+ flattened_metadata = self._flatten_metadata(metadata)
296
+
199
297
  docs_embeddings.append(document.embedding)
200
298
  docs.append(cleaned_content)
201
299
  ids.append(doc_id)
202
- docs_metadata.append(metadata)
203
- log_debug(f"Prepared document: {document.id} | {document.name} | {metadata}")
300
+ docs_metadata.append(flattened_metadata)
301
+ log_debug(f"Prepared document: {document.id} | {document.name} | {flattened_metadata}")
204
302
 
205
303
  if self._collection is None:
206
304
  logger.warning("Collection does not exist")
@@ -262,11 +360,14 @@ class ChromaDb(VectorDb):
262
360
 
263
361
  metadata["content_hash"] = content_hash
264
362
 
363
+ # Flatten metadata for ChromaDB compatibility
364
+ flattened_metadata = self._flatten_metadata(metadata)
365
+
265
366
  docs_embeddings.append(document.embedding)
266
367
  docs.append(cleaned_content)
267
368
  ids.append(doc_id)
268
- docs_metadata.append(metadata)
269
- log_debug(f"Upserted document: {document.id} | {document.name} | {metadata}")
369
+ docs_metadata.append(flattened_metadata)
370
+ log_debug(f"Upserted document: {document.id} | {document.name} | {flattened_metadata}")
270
371
 
271
372
  if self._collection is None:
272
373
  logger.warning("Collection does not exist")
@@ -293,8 +394,44 @@ class ChromaDb(VectorDb):
293
394
  if not self._collection:
294
395
  self._collection = self.client.get_collection(name=self.collection_name)
295
396
 
296
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
297
- await asyncio.gather(*embed_tasks, return_exceptions=True)
397
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
398
+ # Use batch embedding when enabled and supported
399
+ try:
400
+ # Extract content from all documents
401
+ doc_contents = [doc.content for doc in documents]
402
+
403
+ # Get batch embeddings and usage
404
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
405
+
406
+ # Process documents with pre-computed embeddings
407
+ for j, doc in enumerate(documents):
408
+ try:
409
+ if j < len(embeddings):
410
+ doc.embedding = embeddings[j]
411
+ doc.usage = usages[j] if j < len(usages) else None
412
+ except Exception as e:
413
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
414
+
415
+ except Exception as e:
416
+ # Check if this is a rate limit error - don't fall back as it would make things worse
417
+ error_str = str(e).lower()
418
+ is_rate_limit = any(
419
+ phrase in error_str
420
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
421
+ )
422
+
423
+ if is_rate_limit:
424
+ logger.error(f"Rate limit detected during batch embedding. {e}")
425
+ raise e
426
+ else:
427
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
428
+ # Fall back to individual embedding
429
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
430
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
431
+ else:
432
+ # Use individual embedding
433
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
434
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
298
435
 
299
436
  for document in documents:
300
437
  cleaned_content = document.content.replace("\x00", "\ufffd")
@@ -313,11 +450,14 @@ class ChromaDb(VectorDb):
313
450
 
314
451
  metadata["content_hash"] = content_hash
315
452
 
453
+ # Flatten metadata for ChromaDB compatibility
454
+ flattened_metadata = self._flatten_metadata(metadata)
455
+
316
456
  docs_embeddings.append(document.embedding)
317
457
  docs.append(cleaned_content)
318
458
  ids.append(doc_id)
319
- docs_metadata.append(metadata)
320
- log_debug(f"Upserted document: {document.id} | {document.name} | {metadata}")
459
+ docs_metadata.append(flattened_metadata)
460
+ log_debug(f"Upserted document: {document.id} | {document.name} | {flattened_metadata}")
321
461
 
322
462
  if self._collection is None:
323
463
  logger.warning("Collection does not exist")
@@ -338,13 +478,15 @@ class ChromaDb(VectorDb):
338
478
  logger.error(f"Error upserting documents by content hash: {e}")
339
479
  raise
340
480
 
341
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
481
+ def search(
482
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
483
+ ) -> List[Document]:
342
484
  """Search the collection for a query.
343
485
 
344
486
  Args:
345
487
  query (str): Query to search for.
346
488
  limit (int): Number of results to return.
347
- filters (Optional[Dict[str, Any]]): Filters to apply while searching.
489
+ filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply while searching.
348
490
  Supports ChromaDB's filtering operators:
349
491
  - $eq, $ne: Equality/Inequality
350
492
  - $gt, $gte, $lt, $lte: Numeric comparisons
@@ -353,6 +495,9 @@ class ChromaDb(VectorDb):
353
495
  Returns:
354
496
  List[Document]: List of search results.
355
497
  """
498
+ if isinstance(filters, list):
499
+ log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
500
+ filters = None
356
501
  query_embedding = self.embedder.get_embedding(query)
357
502
  if query_embedding is None:
358
503
  logger.error(f"Error getting embedding for Query: {query}")
@@ -374,11 +519,11 @@ class ChromaDb(VectorDb):
374
519
  # Build search results
375
520
  search_results: List[Document] = []
376
521
 
377
- ids_list = result.get("ids", [[]])
378
- metadata_list = result.get("metadatas", [[{}]])
379
- documents_list = result.get("documents", [[]])
380
- embeddings_list = result.get("embeddings")
381
- distances_list = result.get("distances", [[]])
522
+ ids_list = result.get("ids", [[]]) # type: ignore
523
+ metadata_list = result.get("metadatas", [[{}]]) # type: ignore
524
+ documents_list = result.get("documents", [[]]) # type: ignore
525
+ embeddings_list = result.get("embeddings") # type: ignore
526
+ distances_list = result.get("distances", [[]]) # type: ignore
382
527
 
383
528
  if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
384
529
  return search_results
@@ -467,7 +612,7 @@ class ChromaDb(VectorDb):
467
612
  return converted
468
613
 
469
614
  async def async_search(
470
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
615
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
471
616
  ) -> List[Document]:
472
617
  """Search asynchronously by running in a thread."""
473
618
  return await asyncio.to_thread(self.search, query, limit, filters)
@@ -658,7 +803,6 @@ class ChromaDb(VectorDb):
658
803
 
659
804
  try:
660
805
  collection: Collection = self.client.get_collection(name=self.collection_name)
661
- print("COLLECTION_----------", collection)
662
806
  # Try to get the document by ID
663
807
  result = collection.get(ids=[id])
664
808
  found_ids = result.get("ids", [])
@@ -747,6 +891,9 @@ class ChromaDb(VectorDb):
747
891
  logger.debug(f"No documents found with content_id: {content_id}")
748
892
  return
749
893
 
894
+ # Flatten the new metadata first
895
+ flattened_new_metadata = self._flatten_metadata(metadata)
896
+
750
897
  # Merge metadata for each document
751
898
  updated_metadatas = []
752
899
  for i, current_meta in enumerate(current_metadatas or []):
@@ -754,21 +901,14 @@ class ChromaDb(VectorDb):
754
901
  meta_dict: Dict[str, Any] = {}
755
902
  else:
756
903
  meta_dict = dict(current_meta) # Convert Mapping to dict
757
- updated_meta: Dict[str, Any] = meta_dict.copy()
758
- updated_meta.update(metadata)
759
904
 
760
- if "filters" not in updated_meta:
761
- updated_meta["filters"] = {}
762
- if isinstance(updated_meta["filters"], dict):
763
- updated_meta["filters"].update(metadata)
764
- else:
765
- updated_meta["filters"] = metadata
766
- updated_metadatas.append(updated_meta)
905
+ # Update with flattened metadata
906
+ meta_dict.update(flattened_new_metadata)
907
+ updated_metadatas.append(meta_dict)
767
908
 
768
- # Update the documents
769
909
  # Convert to the expected type for ChromaDB
770
- chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool, None]]], updated_metadatas)
771
- collection.update(ids=ids, metadatas=chroma_metadatas)
910
+ chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool]]], updated_metadatas)
911
+ collection.update(ids=ids, metadatas=chroma_metadatas) # type: ignore
772
912
  logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
773
913
 
774
914
  except TypeError as te:
@@ -783,3 +923,7 @@ class ChromaDb(VectorDb):
783
923
  except Exception as e:
784
924
  logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
785
925
  raise
926
+
927
+ def get_supported_search_types(self) -> List[str]:
928
+ """Get the supported search types for this vector database."""
929
+ return [] # ChromaDb doesn't use SearchType enum
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  from hashlib import md5
3
- from typing import Any, Dict, List, Optional
3
+ from typing import Any, Dict, List, Optional, Union
4
4
 
5
5
  from agno.vectordb.clickhouse.index import HNSW
6
6
 
@@ -11,9 +11,10 @@ try:
11
11
  except ImportError:
12
12
  raise ImportError("`clickhouse-connect` not installed. Use `pip install clickhouse-connect` to install it")
13
13
 
14
+ from agno.filters import FilterExpr
14
15
  from agno.knowledge.document import Document
15
16
  from agno.knowledge.embedder import Embedder
16
- from agno.utils.log import log_debug, log_info, logger
17
+ from agno.utils.log import log_debug, log_info, log_warning, logger
17
18
  from agno.vectordb.base import VectorDb
18
19
  from agno.vectordb.distance import Distance
19
20
 
@@ -23,6 +24,8 @@ class Clickhouse(VectorDb):
23
24
  self,
24
25
  table_name: str,
25
26
  host: str,
27
+ name: Optional[str] = None,
28
+ description: Optional[str] = None,
26
29
  username: Optional[str] = None,
27
30
  password: str = "",
28
31
  port: int = 0,
@@ -41,9 +44,11 @@ class Clickhouse(VectorDb):
41
44
  self.password = password
42
45
  self.port = port
43
46
  self.dsn = dsn
47
+ # Initialize base class with name and description
48
+ super().__init__(name=name, description=description)
49
+
44
50
  self.compress = compress
45
51
  self.database_name = database_name
46
-
47
52
  if not client:
48
53
  client = clickhouse_connect.get_client(
49
54
  host=self.host,
@@ -81,6 +86,7 @@ class Clickhouse(VectorDb):
81
86
  if self.async_client is None:
82
87
  self.async_client = await clickhouse_connect.get_async_client(
83
88
  host=self.host,
89
+ username=self.username, # type: ignore
84
90
  password=self.password,
85
91
  database=self.database_name,
86
92
  port=self.port,
@@ -228,7 +234,7 @@ class Clickhouse(VectorDb):
228
234
  "SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
229
235
  parameters=parameters,
230
236
  )
231
- return bool(result)
237
+ return len(result.result_rows) > 0 if result.result_rows else False
232
238
 
233
239
  async def async_name_exists(self, name: str) -> bool:
234
240
  """Check if a document with given name exists asynchronously."""
@@ -241,7 +247,7 @@ class Clickhouse(VectorDb):
241
247
  "SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
242
248
  parameters=parameters,
243
249
  )
244
- return bool(result)
250
+ return len(result.result_rows) > 0 if result.result_rows else False
245
251
 
246
252
  def id_exists(self, id: str) -> bool:
247
253
  """
@@ -257,7 +263,7 @@ class Clickhouse(VectorDb):
257
263
  "SELECT id FROM {database_name:Identifier}.{table_name:Identifier} WHERE id = {id:String}",
258
264
  parameters=parameters,
259
265
  )
260
- return bool(result)
266
+ return len(result.result_rows) > 0 if result.result_rows else False
261
267
 
262
268
  def insert(
263
269
  self,
@@ -308,8 +314,44 @@ class Clickhouse(VectorDb):
308
314
  rows: List[List[Any]] = []
309
315
  async_client = await self._ensure_async_client()
310
316
 
311
- embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
312
- await asyncio.gather(*embed_tasks, return_exceptions=True)
317
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
318
+ # Use batch embedding when enabled and supported
319
+ try:
320
+ # Extract content from all documents
321
+ doc_contents = [doc.content for doc in documents]
322
+
323
+ # Get batch embeddings and usage
324
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
325
+
326
+ # Process documents with pre-computed embeddings
327
+ for j, doc in enumerate(documents):
328
+ try:
329
+ if j < len(embeddings):
330
+ doc.embedding = embeddings[j]
331
+ doc.usage = usages[j] if j < len(usages) else None
332
+ except Exception as e:
333
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
334
+
335
+ except Exception as e:
336
+ # Check if this is a rate limit error - don't fall back as it would make things worse
337
+ error_str = str(e).lower()
338
+ is_rate_limit = any(
339
+ phrase in error_str
340
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
341
+ )
342
+
343
+ if is_rate_limit:
344
+ logger.error(f"Rate limit detected during batch embedding. {e}")
345
+ raise e
346
+ else:
347
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
348
+ # Fall back to individual embedding
349
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
350
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
351
+ else:
352
+ # Use individual embedding
353
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
354
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
313
355
 
314
356
  for document in documents:
315
357
  cleaned_content = document.content.replace("\x00", "\ufffd")
@@ -407,7 +449,11 @@ class Clickhouse(VectorDb):
407
449
  parameters=parameters,
408
450
  )
409
451
 
410
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
452
+ def search(
453
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
454
+ ) -> List[Document]:
455
+ if filters is not None:
456
+ log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
411
457
  query_embedding = self.embedder.get_embedding(query)
412
458
  if query_embedding is None:
413
459
  logger.error(f"Error getting embedding for Query: {query}")
@@ -461,11 +507,14 @@ class Clickhouse(VectorDb):
461
507
  return search_results
462
508
 
463
509
  async def async_search(
464
- self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
510
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
465
511
  ) -> List[Document]:
466
512
  """Search for documents asynchronously."""
467
513
  async_client = await self._ensure_async_client()
468
514
 
515
+ if filters is not None:
516
+ log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
517
+
469
518
  query_embedding = self.embedder.get_embedding(query)
470
519
  if query_embedding is None:
471
520
  logger.error(f"Error getting embedding for Query: {query}")
@@ -696,7 +745,7 @@ class Clickhouse(VectorDb):
696
745
  "SELECT content_hash FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
697
746
  parameters=parameters,
698
747
  )
699
- return bool(result)
748
+ return len(result.result_rows) > 0 if result.result_rows else False
700
749
 
701
750
  def _delete_by_content_hash(self, content_hash: str) -> bool:
702
751
  """
@@ -780,3 +829,7 @@ class Clickhouse(VectorDb):
780
829
  except Exception as e:
781
830
  logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
782
831
  raise
832
+
833
+ def get_supported_search_types(self) -> List[str]:
834
+ """Get the supported search types for this vector database."""
835
+ return [] # Clickhouse doesn't use SearchType enum