agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,31 @@
1
+ import asyncio
1
2
  from hashlib import md5
2
- from typing import Any, Dict, List, Optional
3
+ from typing import Any, Dict, List, Optional, Union
3
4
 
4
5
  from agno.vectordb.clickhouse.index import HNSW
5
6
 
6
7
  try:
7
8
  import clickhouse_connect
9
+ import clickhouse_connect.driver.asyncclient
8
10
  import clickhouse_connect.driver.client
9
11
  except ImportError:
10
12
  raise ImportError("`clickhouse-connect` not installed. Use `pip install clickhouse-connect` to install it")
11
13
 
12
- from agno.document import Document
13
- from agno.embedder import Embedder
14
- from agno.utils.log import logger
14
+ from agno.filters import FilterExpr
15
+ from agno.knowledge.document import Document
16
+ from agno.knowledge.embedder import Embedder
17
+ from agno.utils.log import log_debug, log_info, log_warning, logger
15
18
  from agno.vectordb.base import VectorDb
16
19
  from agno.vectordb.distance import Distance
17
20
 
18
21
 
19
- class ClickhouseDb(VectorDb):
22
+ class Clickhouse(VectorDb):
20
23
  def __init__(
21
24
  self,
22
25
  table_name: str,
23
26
  host: str,
27
+ name: Optional[str] = None,
28
+ description: Optional[str] = None,
24
29
  username: Optional[str] = None,
25
30
  password: str = "",
26
31
  port: int = 0,
@@ -28,32 +33,45 @@ class ClickhouseDb(VectorDb):
28
33
  dsn: Optional[str] = None,
29
34
  compress: str = "lz4",
30
35
  client: Optional[clickhouse_connect.driver.client.Client] = None,
36
+ asyncclient: Optional[clickhouse_connect.driver.asyncclient.AsyncClient] = None,
31
37
  embedder: Optional[Embedder] = None,
32
38
  distance: Distance = Distance.cosine,
33
39
  index: Optional[HNSW] = HNSW(),
34
40
  ):
41
+ # Store connection parameters as instance attributes
42
+ self.host = host
43
+ self.username = username
44
+ self.password = password
45
+ self.port = port
46
+ self.dsn = dsn
47
+ # Initialize base class with name and description
48
+ super().__init__(name=name, description=description)
49
+
50
+ self.compress = compress
51
+ self.database_name = database_name
35
52
  if not client:
36
53
  client = clickhouse_connect.get_client(
37
- host=host,
38
- username=username, # type: ignore
39
- password=password,
40
- database=database_name,
41
- port=port,
42
- dsn=dsn,
43
- compress=compress,
54
+ host=self.host,
55
+ username=self.username, # type: ignore
56
+ password=self.password,
57
+ database=self.database_name,
58
+ port=self.port,
59
+ dsn=self.dsn,
60
+ compress=self.compress,
44
61
  )
45
62
 
46
63
  # Database attributes
47
64
  self.client = client
48
- self.database_name = database_name
65
+ self.async_client = asyncclient
49
66
  self.table_name = table_name
50
67
 
51
68
  # Embedder for embedding the document contents
52
69
  _embedder = embedder
53
70
  if _embedder is None:
54
- from agno.embedder.openai import OpenAIEmbedder
71
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
55
72
 
56
73
  _embedder = OpenAIEmbedder()
74
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
57
75
  self.embedder: Embedder = _embedder
58
76
  self.dimensions: Optional[int] = self.embedder.dimensions
59
77
 
@@ -63,6 +81,21 @@ class ClickhouseDb(VectorDb):
63
81
  # Index for the collection
64
82
  self.index: Optional[HNSW] = index
65
83
 
84
+ async def _ensure_async_client(self):
85
+ """Ensure we have an initialized async client."""
86
+ if self.async_client is None:
87
+ self.async_client = await clickhouse_connect.get_async_client(
88
+ host=self.host,
89
+ username=self.username, # type: ignore
90
+ password=self.password,
91
+ database=self.database_name,
92
+ port=self.port,
93
+ dsn=self.dsn,
94
+ compress=self.compress,
95
+ settings={"allow_experimental_vector_similarity_index": 1},
96
+ )
97
+ return self.async_client
98
+
66
99
  def _get_base_parameters(self) -> Dict[str, Any]:
67
100
  return {
68
101
  "table_name": self.table_name,
@@ -70,7 +103,7 @@ class ClickhouseDb(VectorDb):
70
103
  }
71
104
 
72
105
  def table_exists(self) -> bool:
73
- logger.debug(f"Checking if table exists: {self.table_name}")
106
+ log_debug(f"Checking if table exists: {self.table_name}")
74
107
  try:
75
108
  parameters = self._get_base_parameters()
76
109
  return bool(
@@ -83,22 +116,38 @@ class ClickhouseDb(VectorDb):
83
116
  logger.error(e)
84
117
  return False
85
118
 
119
+ async def async_table_exists(self) -> bool:
120
+ """Check if a table exists asynchronously."""
121
+ log_debug(f"Async checking if table exists: {self.table_name}")
122
+ try:
123
+ async_client = await self._ensure_async_client()
124
+
125
+ parameters = self._get_base_parameters()
126
+ result = await async_client.command(
127
+ "EXISTS TABLE {database_name:Identifier}.{table_name:Identifier}",
128
+ parameters=parameters,
129
+ )
130
+ return bool(result)
131
+ except Exception as e:
132
+ logger.error(f"Async error checking if table exists: {e}")
133
+ return False
134
+
86
135
  def create(self) -> None:
87
136
  if not self.table_exists():
88
- logger.debug(f"Creating Database: {self.database_name}")
137
+ log_debug(f"Creating Database: {self.database_name}")
89
138
  parameters = {"database_name": self.database_name}
90
139
  self.client.command(
91
140
  "CREATE DATABASE IF NOT EXISTS {database_name:Identifier}",
92
141
  parameters=parameters,
93
142
  )
94
143
 
95
- logger.debug(f"Creating table: {self.table_name}")
144
+ log_debug(f"Creating table: {self.table_name}")
96
145
 
97
146
  parameters = self._get_base_parameters()
98
147
 
99
148
  if isinstance(self.index, HNSW):
100
149
  index = (
101
- f"INDEX embedding_index embedding TYPE vector_similarity('hnsw', 'L2Distance', {self.index.quantization}, "
150
+ f"INDEX embedding_index embedding TYPE vector_similarity('hnsw', 'L2Distance', {self.embedder.dimensions}, {self.index.quantization}, "
102
151
  f"{self.index.hnsw_max_connections_per_layer}, {self.index.hnsw_candidate_list_size_for_construction})"
103
152
  )
104
153
  self.client.command("SET allow_experimental_vector_similarity_index = 1")
@@ -115,6 +164,7 @@ class ClickhouseDb(VectorDb):
115
164
  meta_data JSON DEFAULT '{{}}',
116
165
  filters JSON DEFAULT '{{}}',
117
166
  content String,
167
+ content_id String,
118
168
  embedding Array(Float32),
119
169
  usage JSON,
120
170
  created_at DateTime('UTC') DEFAULT now(),
@@ -125,22 +175,50 @@ class ClickhouseDb(VectorDb):
125
175
  parameters=parameters,
126
176
  )
127
177
 
128
- def doc_exists(self, document: Document) -> bool:
129
- """
130
- Validating if the document exists or not
178
+ async def async_create(self) -> None:
179
+ """Create database and table asynchronously."""
180
+ if not await self.async_table_exists():
181
+ log_debug(f"Async creating Database: {self.database_name}")
182
+ async_client = await self._ensure_async_client()
131
183
 
132
- Args:
133
- document (Document): Document to validate
134
- """
135
- cleaned_content = document.content.replace("\x00", "\ufffd")
136
- parameters = self._get_base_parameters()
137
- parameters["content_hash"] = md5(cleaned_content.encode()).hexdigest()
184
+ parameters = {"database_name": self.database_name}
185
+ await async_client.command(
186
+ "CREATE DATABASE IF NOT EXISTS {database_name:Identifier}",
187
+ parameters=parameters,
188
+ )
138
189
 
139
- result = self.client.query(
140
- "SELECT content_hash FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
141
- parameters=parameters,
142
- )
143
- return bool(result.result_rows)
190
+ log_debug(f"Async creating table: {self.table_name}")
191
+ parameters = self._get_base_parameters()
192
+
193
+ if isinstance(self.index, HNSW):
194
+ index = (
195
+ f"INDEX embedding_index embedding TYPE vector_similarity('hnsw', 'L2Distance', {self.index.quantization}, "
196
+ f"{self.index.hnsw_max_connections_per_layer}, {self.index.hnsw_candidate_list_size_for_construction})"
197
+ )
198
+ await async_client.command("SET allow_experimental_vector_similarity_index = 1")
199
+ else:
200
+ raise NotImplementedError(f"Not implemented index {type(self.index)!r} is passed")
201
+
202
+ await self.async_client.command("SET enable_json_type = 1") # type: ignore
203
+
204
+ await self.async_client.command( # type: ignore
205
+ f"""CREATE TABLE IF NOT EXISTS {{database_name:Identifier}}.{{table_name:Identifier}}
206
+ (
207
+ id String,
208
+ name String,
209
+ meta_data JSON DEFAULT '{{}}',
210
+ filters JSON DEFAULT '{{}}',
211
+ content String,
212
+ content_id String,
213
+ embedding Array(Float32),
214
+ usage JSON,
215
+ created_at DateTime('UTC') DEFAULT now(),
216
+ content_hash String,
217
+ PRIMARY KEY (id),
218
+ {index}
219
+ ) ENGINE = ReplacingMergeTree ORDER BY id""",
220
+ parameters=parameters,
221
+ )
144
222
 
145
223
  def name_exists(self, name: str) -> bool:
146
224
  """
@@ -156,7 +234,20 @@ class ClickhouseDb(VectorDb):
156
234
  "SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
157
235
  parameters=parameters,
158
236
  )
159
- return bool(result)
237
+ return len(result.result_rows) > 0 if result.result_rows else False
238
+
239
+ async def async_name_exists(self, name: str) -> bool:
240
+ """Check if a document with given name exists asynchronously."""
241
+ parameters = self._get_base_parameters()
242
+ async_client = await self._ensure_async_client()
243
+
244
+ parameters["name"] = name
245
+
246
+ result = await async_client.query(
247
+ "SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
248
+ parameters=parameters,
249
+ )
250
+ return len(result.result_rows) > 0 if result.result_rows else False
160
251
 
161
252
  def id_exists(self, id: str) -> bool:
162
253
  """
@@ -172,10 +263,11 @@ class ClickhouseDb(VectorDb):
172
263
  "SELECT id FROM {database_name:Identifier}.{table_name:Identifier} WHERE id = {id:String}",
173
264
  parameters=parameters,
174
265
  )
175
- return bool(result)
266
+ return len(result.result_rows) > 0 if result.result_rows else False
176
267
 
177
268
  def insert(
178
269
  self,
270
+ content_hash: str,
179
271
  documents: List[Document],
180
272
  filters: Optional[Dict[str, Any]] = None,
181
273
  ) -> None:
@@ -183,8 +275,7 @@ class ClickhouseDb(VectorDb):
183
275
  for document in documents:
184
276
  document.embed(embedder=self.embedder)
185
277
  cleaned_content = document.content.replace("\x00", "\ufffd")
186
- content_hash = md5(cleaned_content.encode()).hexdigest()
187
- _id = document.id or content_hash
278
+ _id = md5(cleaned_content.encode()).hexdigest()
188
279
 
189
280
  row: List[Any] = [
190
281
  _id,
@@ -192,6 +283,7 @@ class ClickhouseDb(VectorDb):
192
283
  document.meta_data,
193
284
  filters,
194
285
  cleaned_content,
286
+ document.content_id,
195
287
  document.embedding,
196
288
  document.usage,
197
289
  content_hash,
@@ -207,18 +299,113 @@ class ClickhouseDb(VectorDb):
207
299
  "meta_data",
208
300
  "filters",
209
301
  "content",
302
+ "content_id",
210
303
  "embedding",
211
304
  "usage",
212
305
  "content_hash",
213
306
  ],
214
307
  )
215
- logger.debug(f"Inserted {len(documents)} documents")
308
+ log_debug(f"Inserted {len(documents)} documents")
309
+
310
+ async def async_insert(
311
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
312
+ ) -> None:
313
+ """Insert documents asynchronously."""
314
+ rows: List[List[Any]] = []
315
+ async_client = await self._ensure_async_client()
316
+
317
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
318
+ # Use batch embedding when enabled and supported
319
+ try:
320
+ # Extract content from all documents
321
+ doc_contents = [doc.content for doc in documents]
322
+
323
+ # Get batch embeddings and usage
324
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
325
+
326
+ # Process documents with pre-computed embeddings
327
+ for j, doc in enumerate(documents):
328
+ try:
329
+ if j < len(embeddings):
330
+ doc.embedding = embeddings[j]
331
+ doc.usage = usages[j] if j < len(usages) else None
332
+ except Exception as e:
333
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
334
+
335
+ except Exception as e:
336
+ # Check if this is a rate limit error - don't fall back as it would make things worse
337
+ error_str = str(e).lower()
338
+ is_rate_limit = any(
339
+ phrase in error_str
340
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
341
+ )
342
+
343
+ if is_rate_limit:
344
+ logger.error(f"Rate limit detected during batch embedding. {e}")
345
+ raise e
346
+ else:
347
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
348
+ # Fall back to individual embedding
349
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
350
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
351
+ else:
352
+ # Use individual embedding
353
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
354
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
355
+
356
+ for document in documents:
357
+ cleaned_content = document.content.replace("\x00", "\ufffd")
358
+ _id = md5(cleaned_content.encode()).hexdigest()
359
+
360
+ row: List[Any] = [
361
+ _id,
362
+ document.name,
363
+ document.meta_data,
364
+ filters,
365
+ cleaned_content,
366
+ document.content_id,
367
+ document.embedding,
368
+ document.usage,
369
+ content_hash,
370
+ ]
371
+ rows.append(row)
372
+
373
+ await async_client.insert(
374
+ f"{self.database_name}.{self.table_name}",
375
+ rows,
376
+ column_names=[
377
+ "id",
378
+ "name",
379
+ "meta_data",
380
+ "filters",
381
+ "content",
382
+ "content_id",
383
+ "embedding",
384
+ "usage",
385
+ "content_hash",
386
+ ],
387
+ )
388
+ log_debug(f"Async inserted {len(documents)} documents")
216
389
 
217
390
  def upsert_available(self) -> bool:
218
391
  return True
219
392
 
220
393
  def upsert(
221
394
  self,
395
+ content_hash: str,
396
+ documents: List[Document],
397
+ filters: Optional[Dict[str, Any]] = None,
398
+ ) -> None:
399
+ """
400
+ Upsert documents into the database.
401
+ """
402
+ if self.content_hash_exists(content_hash):
403
+ self._delete_by_content_hash(content_hash)
404
+ self.insert(content_hash=content_hash, documents=documents, filters=filters)
405
+
406
+ def _upsert(
407
+ self,
408
+ content_hash: str,
222
409
  documents: List[Document],
223
410
  filters: Optional[Dict[str, Any]] = None,
224
411
  ) -> None:
@@ -232,7 +419,7 @@ class ClickhouseDb(VectorDb):
232
419
  """
233
420
  # We are using ReplacingMergeTree engine in our table, so we need to insert the documents,
234
421
  # then call SELECT with FINAL
235
- self.insert(documents=documents, filters=filters)
422
+ self.insert(content_hash=content_hash, documents=documents, filters=filters)
236
423
 
237
424
  parameters = self._get_base_parameters()
238
425
  self.client.query(
@@ -240,7 +427,33 @@ class ClickhouseDb(VectorDb):
240
427
  parameters=parameters,
241
428
  )
242
429
 
243
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
430
+ async def async_upsert(
431
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
432
+ ) -> None:
433
+ """Upsert documents asynchronously."""
434
+ if self.content_hash_exists(content_hash):
435
+ self._delete_by_content_hash(content_hash)
436
+ await self._async_upsert(content_hash=content_hash, documents=documents, filters=filters)
437
+
438
+ async def _async_upsert(
439
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
440
+ ) -> None:
441
+ """Upsert documents asynchronously."""
442
+ # We are using ReplacingMergeTree engine in our table, so we need to insert the documents,
443
+ # then call SELECT with FINAL
444
+ await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
445
+
446
+ parameters = self._get_base_parameters()
447
+ await self.async_client.query( # type: ignore
448
+ "SELECT id FROM {database_name:Identifier}.{table_name:Identifier} FINAL",
449
+ parameters=parameters,
450
+ )
451
+
452
+ def search(
453
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
454
+ ) -> List[Document]:
455
+ if filters is not None:
456
+ log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
244
457
  query_embedding = self.embedder.get_embedding(query)
245
458
  if query_embedding is None:
246
459
  logger.error(f"Error getting embedding for Query: {query}")
@@ -248,13 +461,6 @@ class ClickhouseDb(VectorDb):
248
461
 
249
462
  parameters = self._get_base_parameters()
250
463
  where_query = ""
251
- if filters:
252
- query_filters: List[str] = []
253
- for key, value in filters.values():
254
- query_filters.append(f"{{{key}_key:String}} = {{{key}_value:String}}")
255
- parameters[f"{key}_key"] = key
256
- parameters[f"{key}_value"] = value
257
- where_query = f"WHERE {' AND '.join(query_filters)}"
258
464
 
259
465
  order_by_query = ""
260
466
  if self.distance == Distance.l2 or self.distance == Distance.max_inner_product:
@@ -265,12 +471,12 @@ class ClickhouseDb(VectorDb):
265
471
  parameters["query_embedding"] = query_embedding
266
472
 
267
473
  clickhouse_query = (
268
- "SELECT name, meta_data, content, embedding, usage FROM "
474
+ "SELECT name, meta_data, content, content_id, embedding, usage FROM "
269
475
  "{database_name:Identifier}.{table_name:Identifier} "
270
476
  f"{where_query} {order_by_query} LIMIT {limit}"
271
477
  )
272
- logger.debug(f"Query: {clickhouse_query}")
273
- logger.debug(f"Params: {parameters}")
478
+ log_debug(f"Query: {clickhouse_query}")
479
+ log_debug(f"Params: {parameters}")
274
480
 
275
481
  try:
276
482
  results = self.client.query(
@@ -291,9 +497,71 @@ class ClickhouseDb(VectorDb):
291
497
  name=result[0],
292
498
  meta_data=result[1],
293
499
  content=result[2],
500
+ content_id=result[3],
501
+ embedder=self.embedder,
502
+ embedding=result[4],
503
+ usage=result[5],
504
+ )
505
+ )
506
+
507
+ return search_results
508
+
509
+ async def async_search(
510
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
511
+ ) -> List[Document]:
512
+ """Search for documents asynchronously."""
513
+ async_client = await self._ensure_async_client()
514
+
515
+ if filters is not None:
516
+ log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
517
+
518
+ query_embedding = self.embedder.get_embedding(query)
519
+ if query_embedding is None:
520
+ logger.error(f"Error getting embedding for Query: {query}")
521
+ return []
522
+
523
+ parameters = self._get_base_parameters()
524
+ where_query = ""
525
+
526
+ order_by_query = ""
527
+ if self.distance == Distance.l2 or self.distance == Distance.max_inner_product:
528
+ order_by_query = "ORDER BY L2Distance(embedding, {query_embedding:Array(Float32)})"
529
+ parameters["query_embedding"] = query_embedding
530
+ if self.distance == Distance.cosine:
531
+ order_by_query = "ORDER BY cosineDistance(embedding, {query_embedding:Array(Float32)})"
532
+ parameters["query_embedding"] = query_embedding
533
+
534
+ clickhouse_query = (
535
+ "SELECT name, meta_data, content, content_id, embedding, usage FROM "
536
+ "{database_name:Identifier}.{table_name:Identifier} "
537
+ f"{where_query} {order_by_query} LIMIT {limit}"
538
+ )
539
+ log_debug(f"Async Query: {clickhouse_query}")
540
+ log_debug(f"Async Params: {parameters}")
541
+
542
+ try:
543
+ results = await async_client.query(
544
+ clickhouse_query,
545
+ parameters=parameters,
546
+ )
547
+ except Exception as e:
548
+ logger.error(f"Async error searching for documents: {e}")
549
+ logger.error("Table might not exist, creating for future use")
550
+ await self.async_create()
551
+ return []
552
+
553
+ # Build search results
554
+ search_results: List[Document] = []
555
+ for result in results.result_rows:
556
+ search_results.append(
557
+ Document(
558
+ name=result[0],
559
+ meta_data=result[1],
560
+ content=result[2],
561
+ content_id=result[3],
294
562
  embedder=self.embedder,
295
- embedding=result[3],
296
- usage=result[4],
563
+ embedding=result[4],
564
+ usage=result[5],
297
565
  )
298
566
  )
299
567
 
@@ -301,16 +569,29 @@ class ClickhouseDb(VectorDb):
301
569
 
302
570
  def drop(self) -> None:
303
571
  if self.table_exists():
304
- logger.debug(f"Deleting table: {self.table_name}")
572
+ log_debug(f"Deleting table: {self.table_name}")
305
573
  parameters = self._get_base_parameters()
306
574
  self.client.command(
307
575
  "DROP TABLE {database_name:Identifier}.{table_name:Identifier}",
308
576
  parameters=parameters,
309
577
  )
310
578
 
579
+ async def async_drop(self) -> None:
580
+ """Drop the table asynchronously."""
581
+ if await self.async_exists():
582
+ log_debug(f"Async dropping table: {self.table_name}")
583
+ parameters = self._get_base_parameters()
584
+ await self.async_client.command( # type: ignore
585
+ "DROP TABLE {database_name:Identifier}.{table_name:Identifier}",
586
+ parameters=parameters,
587
+ )
588
+
311
589
  def exists(self) -> bool:
312
590
  return self.table_exists()
313
591
 
592
+ async def async_exists(self) -> bool:
593
+ return await self.async_table_exists()
594
+
314
595
  def get_count(self) -> int:
315
596
  parameters = self._get_base_parameters()
316
597
  result = self.client.query(
@@ -323,7 +604,7 @@ class ClickhouseDb(VectorDb):
323
604
  return 0
324
605
 
325
606
  def optimize(self) -> None:
326
- logger.debug("==== No need to optimize Clickhouse DB. Skipping this step ====")
607
+ log_debug("==== No need to optimize Clickhouse DB. Skipping this step ====")
327
608
 
328
609
  def delete(self) -> bool:
329
610
  parameters = self._get_base_parameters()
@@ -332,3 +613,223 @@ class ClickhouseDb(VectorDb):
332
613
  parameters=parameters,
333
614
  )
334
615
  return True
616
+
617
+ def delete_by_id(self, id: str) -> bool:
618
+ """
619
+
620
+ Delete a document by its ID.
621
+
622
+ Args:
623
+ id (str): The document ID to delete
624
+
625
+ Returns:
626
+ bool: True if document was deleted, False otherwise
627
+ """
628
+ try:
629
+ log_debug(f"ClickHouse VectorDB : Deleting document with ID {id}")
630
+ if not self.id_exists(id):
631
+ return False
632
+
633
+ parameters = self._get_base_parameters()
634
+ parameters["id"] = id
635
+
636
+ self.client.command(
637
+ "DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE id = {id:String}",
638
+ parameters=parameters,
639
+ )
640
+ return True
641
+ except Exception as e:
642
+ log_info(f"Error deleting document with ID {id}: {e}")
643
+ return False
644
+
645
+ def delete_by_name(self, name: str) -> bool:
646
+ """
647
+ Delete documents by name.
648
+
649
+ Args:
650
+ name (str): The document name to delete
651
+
652
+ Returns:
653
+ bool: True if documents were deleted, False otherwise
654
+ """
655
+ try:
656
+ log_debug(f"ClickHouse VectorDB : Deleting documents with name {name}")
657
+ if not self.name_exists(name):
658
+ return False
659
+
660
+ parameters = self._get_base_parameters()
661
+ parameters["name"] = name
662
+
663
+ self.client.command(
664
+ "DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
665
+ parameters=parameters,
666
+ )
667
+ return True
668
+ except Exception as e:
669
+ log_info(f"Error deleting documents with name {name}: {e}")
670
+ return False
671
+
672
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
673
+ """
674
+ Delete documents by metadata.
675
+
676
+ Args:
677
+ metadata (Dict[str, Any]): The metadata to match for deletion
678
+
679
+ Returns:
680
+ bool: True if documents were deleted, False otherwise
681
+ """
682
+ try:
683
+ log_debug(f"ClickHouse VectorDB : Deleting documents with metadata {metadata}")
684
+ parameters = self._get_base_parameters()
685
+
686
+ # Build WHERE clause for metadata matching using proper ClickHouse JSON syntax
687
+ where_conditions = []
688
+ for key, value in metadata.items():
689
+ if isinstance(value, bool):
690
+ where_conditions.append(f"JSONExtractBool(toString(filters), '{key}') = {str(value).lower()}")
691
+ elif isinstance(value, (int, float)):
692
+ where_conditions.append(f"JSONExtractFloat(toString(filters), '{key}') = {value}")
693
+ else:
694
+ where_conditions.append(f"JSONExtractString(toString(filters), '{key}') = '{value}'")
695
+
696
+ if not where_conditions:
697
+ return False
698
+
699
+ where_clause = " AND ".join(where_conditions)
700
+
701
+ self.client.command(
702
+ f"DELETE FROM {{database_name:Identifier}}.{{table_name:Identifier}} WHERE {where_clause}",
703
+ parameters=parameters,
704
+ )
705
+ return True
706
+ except Exception as e:
707
+ log_info(f"Error deleting documents with metadata {metadata}: {e}")
708
+ return False
709
+
710
+ def delete_by_content_id(self, content_id: str) -> bool:
711
+ """
712
+ Delete documents by content ID.
713
+
714
+ Args:
715
+ content_id (str): The content ID to delete
716
+
717
+ Returns:
718
+ bool: True if documents were deleted, False otherwise
719
+ """
720
+ try:
721
+ log_debug(f"ClickHouse VectorDB : Deleting documents with content_id {content_id}")
722
+ parameters = self._get_base_parameters()
723
+ parameters["content_id"] = content_id
724
+
725
+ self.client.command(
726
+ "DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_id = {content_id:String}",
727
+ parameters=parameters,
728
+ )
729
+ return True
730
+ except Exception as e:
731
+ log_info(f"Error deleting documents with content_id {content_id}: {e}")
732
+ return False
733
+
734
+ def content_hash_exists(self, content_hash: str) -> bool:
735
+ """
736
+ Validate if a row with this content_hash exists or not
737
+
738
+ Args:
739
+ content_hash (str): Content hash to check
740
+ """
741
+ parameters = self._get_base_parameters()
742
+ parameters["content_hash"] = content_hash
743
+
744
+ result = self.client.query(
745
+ "SELECT content_hash FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
746
+ parameters=parameters,
747
+ )
748
+ return len(result.result_rows) > 0 if result.result_rows else False
749
+
750
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
751
+ """
752
+ Delete documents by content hash.
753
+ """
754
+ try:
755
+ parameters = self._get_base_parameters()
756
+ parameters["content_hash"] = content_hash
757
+
758
+ self.client.command(
759
+ "DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
760
+ parameters=parameters,
761
+ )
762
+ return True
763
+ except Exception:
764
+ return False
765
+
766
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
767
+ """
768
+ Update the metadata for documents with the given content_id.
769
+
770
+ Args:
771
+ content_id (str): The content ID to update
772
+ metadata (Dict[str, Any]): The metadata to update
773
+ """
774
+ import json
775
+
776
+ try:
777
+ parameters = self._get_base_parameters()
778
+ parameters["content_id"] = content_id
779
+
780
+ # First, get existing documents with their current metadata and filters
781
+ result = self.client.query(
782
+ "SELECT id, meta_data, filters FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_id = {content_id:String}",
783
+ parameters=parameters,
784
+ )
785
+
786
+ if not result.result_rows:
787
+ logger.debug(f"No documents found with content_id: {content_id}")
788
+ return
789
+
790
+ # Update each document
791
+ updated_count = 0
792
+ for row in result.result_rows:
793
+ doc_id, current_meta_json, current_filters_json = row
794
+
795
+ # Parse existing metadata
796
+ try:
797
+ current_metadata = json.loads(current_meta_json) if current_meta_json else {}
798
+ except (json.JSONDecodeError, TypeError):
799
+ current_metadata = {}
800
+
801
+ # Parse existing filters
802
+ try:
803
+ current_filters = json.loads(current_filters_json) if current_filters_json else {}
804
+ except (json.JSONDecodeError, TypeError):
805
+ current_filters = {}
806
+
807
+ # Merge existing metadata with new metadata
808
+ updated_metadata = current_metadata.copy()
809
+ updated_metadata.update(metadata)
810
+
811
+ # Merge existing filters with new metadata
812
+ updated_filters = current_filters.copy()
813
+ updated_filters.update(metadata)
814
+
815
+ # Update the document
816
+ update_params = parameters.copy()
817
+ update_params["doc_id"] = doc_id
818
+ update_params["metadata_json"] = json.dumps(updated_metadata)
819
+ update_params["filters_json"] = json.dumps(updated_filters)
820
+
821
+ self.client.command(
822
+ "ALTER TABLE {database_name:Identifier}.{table_name:Identifier} UPDATE meta_data = {metadata_json:String}, filters = {filters_json:String} WHERE id = {doc_id:String}",
823
+ parameters=update_params,
824
+ )
825
+ updated_count += 1
826
+
827
+ logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
828
+
829
+ except Exception as e:
830
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
831
+ raise
832
+
833
+ def get_supported_search_types(self) -> List[str]:
834
+ """Get the supported search types for this vector database."""
835
+ return [] # Clickhouse doesn't use SearchType enum