agno 2.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (575) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +51 -0
  3. agno/agent/agent.py +10405 -0
  4. agno/api/__init__.py +0 -0
  5. agno/api/agent.py +28 -0
  6. agno/api/api.py +40 -0
  7. agno/api/evals.py +22 -0
  8. agno/api/os.py +17 -0
  9. agno/api/routes.py +13 -0
  10. agno/api/schemas/__init__.py +9 -0
  11. agno/api/schemas/agent.py +16 -0
  12. agno/api/schemas/evals.py +16 -0
  13. agno/api/schemas/os.py +14 -0
  14. agno/api/schemas/response.py +6 -0
  15. agno/api/schemas/team.py +16 -0
  16. agno/api/schemas/utils.py +21 -0
  17. agno/api/schemas/workflows.py +16 -0
  18. agno/api/settings.py +53 -0
  19. agno/api/team.py +30 -0
  20. agno/api/workflow.py +28 -0
  21. agno/cloud/aws/base.py +214 -0
  22. agno/cloud/aws/s3/__init__.py +2 -0
  23. agno/cloud/aws/s3/api_client.py +43 -0
  24. agno/cloud/aws/s3/bucket.py +195 -0
  25. agno/cloud/aws/s3/object.py +57 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +598 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2042 -0
  33. agno/db/dynamo/schemas.py +314 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +1795 -0
  37. agno/db/firestore/schemas.py +140 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1335 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1160 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1328 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/__init__.py +0 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/mongo/__init__.py +17 -0
  51. agno/db/mongo/async_mongo.py +2026 -0
  52. agno/db/mongo/mongo.py +1982 -0
  53. agno/db/mongo/schemas.py +87 -0
  54. agno/db/mongo/utils.py +259 -0
  55. agno/db/mysql/__init__.py +3 -0
  56. agno/db/mysql/mysql.py +2308 -0
  57. agno/db/mysql/schemas.py +138 -0
  58. agno/db/mysql/utils.py +355 -0
  59. agno/db/postgres/__init__.py +4 -0
  60. agno/db/postgres/async_postgres.py +1927 -0
  61. agno/db/postgres/postgres.py +2260 -0
  62. agno/db/postgres/schemas.py +139 -0
  63. agno/db/postgres/utils.py +442 -0
  64. agno/db/redis/__init__.py +3 -0
  65. agno/db/redis/redis.py +1660 -0
  66. agno/db/redis/schemas.py +123 -0
  67. agno/db/redis/utils.py +346 -0
  68. agno/db/schemas/__init__.py +4 -0
  69. agno/db/schemas/culture.py +120 -0
  70. agno/db/schemas/evals.py +33 -0
  71. agno/db/schemas/knowledge.py +40 -0
  72. agno/db/schemas/memory.py +46 -0
  73. agno/db/schemas/metrics.py +0 -0
  74. agno/db/singlestore/__init__.py +3 -0
  75. agno/db/singlestore/schemas.py +130 -0
  76. agno/db/singlestore/singlestore.py +2272 -0
  77. agno/db/singlestore/utils.py +384 -0
  78. agno/db/sqlite/__init__.py +4 -0
  79. agno/db/sqlite/async_sqlite.py +2293 -0
  80. agno/db/sqlite/schemas.py +133 -0
  81. agno/db/sqlite/sqlite.py +2288 -0
  82. agno/db/sqlite/utils.py +431 -0
  83. agno/db/surrealdb/__init__.py +3 -0
  84. agno/db/surrealdb/metrics.py +292 -0
  85. agno/db/surrealdb/models.py +309 -0
  86. agno/db/surrealdb/queries.py +71 -0
  87. agno/db/surrealdb/surrealdb.py +1353 -0
  88. agno/db/surrealdb/utils.py +147 -0
  89. agno/db/utils.py +116 -0
  90. agno/debug.py +18 -0
  91. agno/eval/__init__.py +14 -0
  92. agno/eval/accuracy.py +834 -0
  93. agno/eval/performance.py +773 -0
  94. agno/eval/reliability.py +306 -0
  95. agno/eval/utils.py +119 -0
  96. agno/exceptions.py +161 -0
  97. agno/filters.py +354 -0
  98. agno/guardrails/__init__.py +6 -0
  99. agno/guardrails/base.py +19 -0
  100. agno/guardrails/openai.py +144 -0
  101. agno/guardrails/pii.py +94 -0
  102. agno/guardrails/prompt_injection.py +52 -0
  103. agno/integrations/__init__.py +0 -0
  104. agno/integrations/discord/__init__.py +3 -0
  105. agno/integrations/discord/client.py +203 -0
  106. agno/knowledge/__init__.py +5 -0
  107. agno/knowledge/chunking/__init__.py +0 -0
  108. agno/knowledge/chunking/agentic.py +79 -0
  109. agno/knowledge/chunking/document.py +91 -0
  110. agno/knowledge/chunking/fixed.py +57 -0
  111. agno/knowledge/chunking/markdown.py +151 -0
  112. agno/knowledge/chunking/recursive.py +63 -0
  113. agno/knowledge/chunking/row.py +39 -0
  114. agno/knowledge/chunking/semantic.py +86 -0
  115. agno/knowledge/chunking/strategy.py +165 -0
  116. agno/knowledge/content.py +74 -0
  117. agno/knowledge/document/__init__.py +5 -0
  118. agno/knowledge/document/base.py +58 -0
  119. agno/knowledge/embedder/__init__.py +5 -0
  120. agno/knowledge/embedder/aws_bedrock.py +343 -0
  121. agno/knowledge/embedder/azure_openai.py +210 -0
  122. agno/knowledge/embedder/base.py +23 -0
  123. agno/knowledge/embedder/cohere.py +323 -0
  124. agno/knowledge/embedder/fastembed.py +62 -0
  125. agno/knowledge/embedder/fireworks.py +13 -0
  126. agno/knowledge/embedder/google.py +258 -0
  127. agno/knowledge/embedder/huggingface.py +94 -0
  128. agno/knowledge/embedder/jina.py +182 -0
  129. agno/knowledge/embedder/langdb.py +22 -0
  130. agno/knowledge/embedder/mistral.py +206 -0
  131. agno/knowledge/embedder/nebius.py +13 -0
  132. agno/knowledge/embedder/ollama.py +154 -0
  133. agno/knowledge/embedder/openai.py +195 -0
  134. agno/knowledge/embedder/sentence_transformer.py +63 -0
  135. agno/knowledge/embedder/together.py +13 -0
  136. agno/knowledge/embedder/vllm.py +262 -0
  137. agno/knowledge/embedder/voyageai.py +165 -0
  138. agno/knowledge/knowledge.py +1988 -0
  139. agno/knowledge/reader/__init__.py +7 -0
  140. agno/knowledge/reader/arxiv_reader.py +81 -0
  141. agno/knowledge/reader/base.py +95 -0
  142. agno/knowledge/reader/csv_reader.py +166 -0
  143. agno/knowledge/reader/docx_reader.py +82 -0
  144. agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
  145. agno/knowledge/reader/firecrawl_reader.py +201 -0
  146. agno/knowledge/reader/json_reader.py +87 -0
  147. agno/knowledge/reader/markdown_reader.py +137 -0
  148. agno/knowledge/reader/pdf_reader.py +431 -0
  149. agno/knowledge/reader/pptx_reader.py +101 -0
  150. agno/knowledge/reader/reader_factory.py +313 -0
  151. agno/knowledge/reader/s3_reader.py +89 -0
  152. agno/knowledge/reader/tavily_reader.py +194 -0
  153. agno/knowledge/reader/text_reader.py +115 -0
  154. agno/knowledge/reader/web_search_reader.py +372 -0
  155. agno/knowledge/reader/website_reader.py +455 -0
  156. agno/knowledge/reader/wikipedia_reader.py +59 -0
  157. agno/knowledge/reader/youtube_reader.py +78 -0
  158. agno/knowledge/remote_content/__init__.py +0 -0
  159. agno/knowledge/remote_content/remote_content.py +88 -0
  160. agno/knowledge/reranker/__init__.py +3 -0
  161. agno/knowledge/reranker/base.py +14 -0
  162. agno/knowledge/reranker/cohere.py +64 -0
  163. agno/knowledge/reranker/infinity.py +195 -0
  164. agno/knowledge/reranker/sentence_transformer.py +54 -0
  165. agno/knowledge/types.py +39 -0
  166. agno/knowledge/utils.py +189 -0
  167. agno/media.py +462 -0
  168. agno/memory/__init__.py +3 -0
  169. agno/memory/manager.py +1327 -0
  170. agno/models/__init__.py +0 -0
  171. agno/models/aimlapi/__init__.py +5 -0
  172. agno/models/aimlapi/aimlapi.py +45 -0
  173. agno/models/anthropic/__init__.py +5 -0
  174. agno/models/anthropic/claude.py +757 -0
  175. agno/models/aws/__init__.py +15 -0
  176. agno/models/aws/bedrock.py +701 -0
  177. agno/models/aws/claude.py +378 -0
  178. agno/models/azure/__init__.py +18 -0
  179. agno/models/azure/ai_foundry.py +485 -0
  180. agno/models/azure/openai_chat.py +131 -0
  181. agno/models/base.py +2175 -0
  182. agno/models/cerebras/__init__.py +12 -0
  183. agno/models/cerebras/cerebras.py +501 -0
  184. agno/models/cerebras/cerebras_openai.py +112 -0
  185. agno/models/cohere/__init__.py +5 -0
  186. agno/models/cohere/chat.py +389 -0
  187. agno/models/cometapi/__init__.py +5 -0
  188. agno/models/cometapi/cometapi.py +57 -0
  189. agno/models/dashscope/__init__.py +5 -0
  190. agno/models/dashscope/dashscope.py +91 -0
  191. agno/models/deepinfra/__init__.py +5 -0
  192. agno/models/deepinfra/deepinfra.py +28 -0
  193. agno/models/deepseek/__init__.py +5 -0
  194. agno/models/deepseek/deepseek.py +61 -0
  195. agno/models/defaults.py +1 -0
  196. agno/models/fireworks/__init__.py +5 -0
  197. agno/models/fireworks/fireworks.py +26 -0
  198. agno/models/google/__init__.py +5 -0
  199. agno/models/google/gemini.py +1085 -0
  200. agno/models/groq/__init__.py +5 -0
  201. agno/models/groq/groq.py +556 -0
  202. agno/models/huggingface/__init__.py +5 -0
  203. agno/models/huggingface/huggingface.py +491 -0
  204. agno/models/ibm/__init__.py +5 -0
  205. agno/models/ibm/watsonx.py +422 -0
  206. agno/models/internlm/__init__.py +3 -0
  207. agno/models/internlm/internlm.py +26 -0
  208. agno/models/langdb/__init__.py +1 -0
  209. agno/models/langdb/langdb.py +48 -0
  210. agno/models/litellm/__init__.py +14 -0
  211. agno/models/litellm/chat.py +468 -0
  212. agno/models/litellm/litellm_openai.py +25 -0
  213. agno/models/llama_cpp/__init__.py +5 -0
  214. agno/models/llama_cpp/llama_cpp.py +22 -0
  215. agno/models/lmstudio/__init__.py +5 -0
  216. agno/models/lmstudio/lmstudio.py +25 -0
  217. agno/models/message.py +434 -0
  218. agno/models/meta/__init__.py +12 -0
  219. agno/models/meta/llama.py +475 -0
  220. agno/models/meta/llama_openai.py +78 -0
  221. agno/models/metrics.py +120 -0
  222. agno/models/mistral/__init__.py +5 -0
  223. agno/models/mistral/mistral.py +432 -0
  224. agno/models/nebius/__init__.py +3 -0
  225. agno/models/nebius/nebius.py +54 -0
  226. agno/models/nexus/__init__.py +3 -0
  227. agno/models/nexus/nexus.py +22 -0
  228. agno/models/nvidia/__init__.py +5 -0
  229. agno/models/nvidia/nvidia.py +28 -0
  230. agno/models/ollama/__init__.py +5 -0
  231. agno/models/ollama/chat.py +441 -0
  232. agno/models/openai/__init__.py +9 -0
  233. agno/models/openai/chat.py +883 -0
  234. agno/models/openai/like.py +27 -0
  235. agno/models/openai/responses.py +1050 -0
  236. agno/models/openrouter/__init__.py +5 -0
  237. agno/models/openrouter/openrouter.py +66 -0
  238. agno/models/perplexity/__init__.py +5 -0
  239. agno/models/perplexity/perplexity.py +187 -0
  240. agno/models/portkey/__init__.py +3 -0
  241. agno/models/portkey/portkey.py +81 -0
  242. agno/models/requesty/__init__.py +5 -0
  243. agno/models/requesty/requesty.py +52 -0
  244. agno/models/response.py +199 -0
  245. agno/models/sambanova/__init__.py +5 -0
  246. agno/models/sambanova/sambanova.py +28 -0
  247. agno/models/siliconflow/__init__.py +5 -0
  248. agno/models/siliconflow/siliconflow.py +25 -0
  249. agno/models/together/__init__.py +5 -0
  250. agno/models/together/together.py +25 -0
  251. agno/models/utils.py +266 -0
  252. agno/models/vercel/__init__.py +3 -0
  253. agno/models/vercel/v0.py +26 -0
  254. agno/models/vertexai/__init__.py +0 -0
  255. agno/models/vertexai/claude.py +70 -0
  256. agno/models/vllm/__init__.py +3 -0
  257. agno/models/vllm/vllm.py +78 -0
  258. agno/models/xai/__init__.py +3 -0
  259. agno/models/xai/xai.py +113 -0
  260. agno/os/__init__.py +3 -0
  261. agno/os/app.py +876 -0
  262. agno/os/auth.py +57 -0
  263. agno/os/config.py +104 -0
  264. agno/os/interfaces/__init__.py +1 -0
  265. agno/os/interfaces/a2a/__init__.py +3 -0
  266. agno/os/interfaces/a2a/a2a.py +42 -0
  267. agno/os/interfaces/a2a/router.py +250 -0
  268. agno/os/interfaces/a2a/utils.py +924 -0
  269. agno/os/interfaces/agui/__init__.py +3 -0
  270. agno/os/interfaces/agui/agui.py +47 -0
  271. agno/os/interfaces/agui/router.py +144 -0
  272. agno/os/interfaces/agui/utils.py +534 -0
  273. agno/os/interfaces/base.py +25 -0
  274. agno/os/interfaces/slack/__init__.py +3 -0
  275. agno/os/interfaces/slack/router.py +148 -0
  276. agno/os/interfaces/slack/security.py +30 -0
  277. agno/os/interfaces/slack/slack.py +47 -0
  278. agno/os/interfaces/whatsapp/__init__.py +3 -0
  279. agno/os/interfaces/whatsapp/router.py +211 -0
  280. agno/os/interfaces/whatsapp/security.py +53 -0
  281. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  282. agno/os/mcp.py +292 -0
  283. agno/os/middleware/__init__.py +7 -0
  284. agno/os/middleware/jwt.py +233 -0
  285. agno/os/router.py +1763 -0
  286. agno/os/routers/__init__.py +3 -0
  287. agno/os/routers/evals/__init__.py +3 -0
  288. agno/os/routers/evals/evals.py +430 -0
  289. agno/os/routers/evals/schemas.py +142 -0
  290. agno/os/routers/evals/utils.py +162 -0
  291. agno/os/routers/health.py +31 -0
  292. agno/os/routers/home.py +52 -0
  293. agno/os/routers/knowledge/__init__.py +3 -0
  294. agno/os/routers/knowledge/knowledge.py +997 -0
  295. agno/os/routers/knowledge/schemas.py +178 -0
  296. agno/os/routers/memory/__init__.py +3 -0
  297. agno/os/routers/memory/memory.py +515 -0
  298. agno/os/routers/memory/schemas.py +62 -0
  299. agno/os/routers/metrics/__init__.py +3 -0
  300. agno/os/routers/metrics/metrics.py +190 -0
  301. agno/os/routers/metrics/schemas.py +47 -0
  302. agno/os/routers/session/__init__.py +3 -0
  303. agno/os/routers/session/session.py +997 -0
  304. agno/os/schema.py +1055 -0
  305. agno/os/settings.py +43 -0
  306. agno/os/utils.py +630 -0
  307. agno/py.typed +0 -0
  308. agno/reasoning/__init__.py +0 -0
  309. agno/reasoning/anthropic.py +80 -0
  310. agno/reasoning/azure_ai_foundry.py +67 -0
  311. agno/reasoning/deepseek.py +63 -0
  312. agno/reasoning/default.py +97 -0
  313. agno/reasoning/gemini.py +73 -0
  314. agno/reasoning/groq.py +71 -0
  315. agno/reasoning/helpers.py +63 -0
  316. agno/reasoning/ollama.py +67 -0
  317. agno/reasoning/openai.py +86 -0
  318. agno/reasoning/step.py +31 -0
  319. agno/reasoning/vertexai.py +76 -0
  320. agno/run/__init__.py +6 -0
  321. agno/run/agent.py +787 -0
  322. agno/run/base.py +229 -0
  323. agno/run/cancel.py +81 -0
  324. agno/run/messages.py +32 -0
  325. agno/run/team.py +753 -0
  326. agno/run/workflow.py +708 -0
  327. agno/session/__init__.py +10 -0
  328. agno/session/agent.py +295 -0
  329. agno/session/summary.py +265 -0
  330. agno/session/team.py +392 -0
  331. agno/session/workflow.py +205 -0
  332. agno/team/__init__.py +37 -0
  333. agno/team/team.py +8793 -0
  334. agno/tools/__init__.py +10 -0
  335. agno/tools/agentql.py +120 -0
  336. agno/tools/airflow.py +69 -0
  337. agno/tools/api.py +122 -0
  338. agno/tools/apify.py +314 -0
  339. agno/tools/arxiv.py +127 -0
  340. agno/tools/aws_lambda.py +53 -0
  341. agno/tools/aws_ses.py +66 -0
  342. agno/tools/baidusearch.py +89 -0
  343. agno/tools/bitbucket.py +292 -0
  344. agno/tools/brandfetch.py +213 -0
  345. agno/tools/bravesearch.py +106 -0
  346. agno/tools/brightdata.py +367 -0
  347. agno/tools/browserbase.py +209 -0
  348. agno/tools/calcom.py +255 -0
  349. agno/tools/calculator.py +151 -0
  350. agno/tools/cartesia.py +187 -0
  351. agno/tools/clickup.py +244 -0
  352. agno/tools/confluence.py +240 -0
  353. agno/tools/crawl4ai.py +158 -0
  354. agno/tools/csv_toolkit.py +185 -0
  355. agno/tools/dalle.py +110 -0
  356. agno/tools/daytona.py +475 -0
  357. agno/tools/decorator.py +262 -0
  358. agno/tools/desi_vocal.py +108 -0
  359. agno/tools/discord.py +161 -0
  360. agno/tools/docker.py +716 -0
  361. agno/tools/duckdb.py +379 -0
  362. agno/tools/duckduckgo.py +91 -0
  363. agno/tools/e2b.py +703 -0
  364. agno/tools/eleven_labs.py +196 -0
  365. agno/tools/email.py +67 -0
  366. agno/tools/evm.py +129 -0
  367. agno/tools/exa.py +396 -0
  368. agno/tools/fal.py +127 -0
  369. agno/tools/file.py +240 -0
  370. agno/tools/file_generation.py +350 -0
  371. agno/tools/financial_datasets.py +288 -0
  372. agno/tools/firecrawl.py +143 -0
  373. agno/tools/function.py +1187 -0
  374. agno/tools/giphy.py +93 -0
  375. agno/tools/github.py +1760 -0
  376. agno/tools/gmail.py +922 -0
  377. agno/tools/google_bigquery.py +117 -0
  378. agno/tools/google_drive.py +270 -0
  379. agno/tools/google_maps.py +253 -0
  380. agno/tools/googlecalendar.py +674 -0
  381. agno/tools/googlesearch.py +98 -0
  382. agno/tools/googlesheets.py +377 -0
  383. agno/tools/hackernews.py +77 -0
  384. agno/tools/jina.py +101 -0
  385. agno/tools/jira.py +170 -0
  386. agno/tools/knowledge.py +218 -0
  387. agno/tools/linear.py +426 -0
  388. agno/tools/linkup.py +58 -0
  389. agno/tools/local_file_system.py +90 -0
  390. agno/tools/lumalab.py +183 -0
  391. agno/tools/mcp/__init__.py +10 -0
  392. agno/tools/mcp/mcp.py +331 -0
  393. agno/tools/mcp/multi_mcp.py +347 -0
  394. agno/tools/mcp/params.py +24 -0
  395. agno/tools/mcp_toolbox.py +284 -0
  396. agno/tools/mem0.py +193 -0
  397. agno/tools/memori.py +339 -0
  398. agno/tools/memory.py +419 -0
  399. agno/tools/mlx_transcribe.py +139 -0
  400. agno/tools/models/__init__.py +0 -0
  401. agno/tools/models/azure_openai.py +190 -0
  402. agno/tools/models/gemini.py +203 -0
  403. agno/tools/models/groq.py +158 -0
  404. agno/tools/models/morph.py +186 -0
  405. agno/tools/models/nebius.py +124 -0
  406. agno/tools/models_labs.py +195 -0
  407. agno/tools/moviepy_video.py +349 -0
  408. agno/tools/neo4j.py +134 -0
  409. agno/tools/newspaper.py +46 -0
  410. agno/tools/newspaper4k.py +93 -0
  411. agno/tools/notion.py +204 -0
  412. agno/tools/openai.py +202 -0
  413. agno/tools/openbb.py +160 -0
  414. agno/tools/opencv.py +321 -0
  415. agno/tools/openweather.py +233 -0
  416. agno/tools/oxylabs.py +385 -0
  417. agno/tools/pandas.py +102 -0
  418. agno/tools/parallel.py +314 -0
  419. agno/tools/postgres.py +257 -0
  420. agno/tools/pubmed.py +188 -0
  421. agno/tools/python.py +205 -0
  422. agno/tools/reasoning.py +283 -0
  423. agno/tools/reddit.py +467 -0
  424. agno/tools/replicate.py +117 -0
  425. agno/tools/resend.py +62 -0
  426. agno/tools/scrapegraph.py +222 -0
  427. agno/tools/searxng.py +152 -0
  428. agno/tools/serpapi.py +116 -0
  429. agno/tools/serper.py +255 -0
  430. agno/tools/shell.py +53 -0
  431. agno/tools/slack.py +136 -0
  432. agno/tools/sleep.py +20 -0
  433. agno/tools/spider.py +116 -0
  434. agno/tools/sql.py +154 -0
  435. agno/tools/streamlit/__init__.py +0 -0
  436. agno/tools/streamlit/components.py +113 -0
  437. agno/tools/tavily.py +254 -0
  438. agno/tools/telegram.py +48 -0
  439. agno/tools/todoist.py +218 -0
  440. agno/tools/tool_registry.py +1 -0
  441. agno/tools/toolkit.py +146 -0
  442. agno/tools/trafilatura.py +388 -0
  443. agno/tools/trello.py +274 -0
  444. agno/tools/twilio.py +186 -0
  445. agno/tools/user_control_flow.py +78 -0
  446. agno/tools/valyu.py +228 -0
  447. agno/tools/visualization.py +467 -0
  448. agno/tools/webbrowser.py +28 -0
  449. agno/tools/webex.py +76 -0
  450. agno/tools/website.py +54 -0
  451. agno/tools/webtools.py +45 -0
  452. agno/tools/whatsapp.py +286 -0
  453. agno/tools/wikipedia.py +63 -0
  454. agno/tools/workflow.py +278 -0
  455. agno/tools/x.py +335 -0
  456. agno/tools/yfinance.py +257 -0
  457. agno/tools/youtube.py +184 -0
  458. agno/tools/zendesk.py +82 -0
  459. agno/tools/zep.py +454 -0
  460. agno/tools/zoom.py +382 -0
  461. agno/utils/__init__.py +0 -0
  462. agno/utils/agent.py +820 -0
  463. agno/utils/audio.py +49 -0
  464. agno/utils/certs.py +27 -0
  465. agno/utils/code_execution.py +11 -0
  466. agno/utils/common.py +132 -0
  467. agno/utils/dttm.py +13 -0
  468. agno/utils/enum.py +22 -0
  469. agno/utils/env.py +11 -0
  470. agno/utils/events.py +696 -0
  471. agno/utils/format_str.py +16 -0
  472. agno/utils/functions.py +166 -0
  473. agno/utils/gemini.py +426 -0
  474. agno/utils/hooks.py +57 -0
  475. agno/utils/http.py +74 -0
  476. agno/utils/json_schema.py +234 -0
  477. agno/utils/knowledge.py +36 -0
  478. agno/utils/location.py +19 -0
  479. agno/utils/log.py +255 -0
  480. agno/utils/mcp.py +214 -0
  481. agno/utils/media.py +352 -0
  482. agno/utils/merge_dict.py +41 -0
  483. agno/utils/message.py +118 -0
  484. agno/utils/models/__init__.py +0 -0
  485. agno/utils/models/ai_foundry.py +43 -0
  486. agno/utils/models/claude.py +358 -0
  487. agno/utils/models/cohere.py +87 -0
  488. agno/utils/models/llama.py +78 -0
  489. agno/utils/models/mistral.py +98 -0
  490. agno/utils/models/openai_responses.py +140 -0
  491. agno/utils/models/schema_utils.py +153 -0
  492. agno/utils/models/watsonx.py +41 -0
  493. agno/utils/openai.py +257 -0
  494. agno/utils/pickle.py +32 -0
  495. agno/utils/pprint.py +178 -0
  496. agno/utils/print_response/__init__.py +0 -0
  497. agno/utils/print_response/agent.py +842 -0
  498. agno/utils/print_response/team.py +1724 -0
  499. agno/utils/print_response/workflow.py +1668 -0
  500. agno/utils/prompts.py +111 -0
  501. agno/utils/reasoning.py +108 -0
  502. agno/utils/response.py +163 -0
  503. agno/utils/response_iterator.py +17 -0
  504. agno/utils/safe_formatter.py +24 -0
  505. agno/utils/serialize.py +32 -0
  506. agno/utils/shell.py +22 -0
  507. agno/utils/streamlit.py +487 -0
  508. agno/utils/string.py +231 -0
  509. agno/utils/team.py +139 -0
  510. agno/utils/timer.py +41 -0
  511. agno/utils/tools.py +102 -0
  512. agno/utils/web.py +23 -0
  513. agno/utils/whatsapp.py +305 -0
  514. agno/utils/yaml_io.py +25 -0
  515. agno/vectordb/__init__.py +3 -0
  516. agno/vectordb/base.py +127 -0
  517. agno/vectordb/cassandra/__init__.py +5 -0
  518. agno/vectordb/cassandra/cassandra.py +501 -0
  519. agno/vectordb/cassandra/extra_param_mixin.py +11 -0
  520. agno/vectordb/cassandra/index.py +13 -0
  521. agno/vectordb/chroma/__init__.py +5 -0
  522. agno/vectordb/chroma/chromadb.py +929 -0
  523. agno/vectordb/clickhouse/__init__.py +9 -0
  524. agno/vectordb/clickhouse/clickhousedb.py +835 -0
  525. agno/vectordb/clickhouse/index.py +9 -0
  526. agno/vectordb/couchbase/__init__.py +3 -0
  527. agno/vectordb/couchbase/couchbase.py +1442 -0
  528. agno/vectordb/distance.py +7 -0
  529. agno/vectordb/lancedb/__init__.py +6 -0
  530. agno/vectordb/lancedb/lance_db.py +995 -0
  531. agno/vectordb/langchaindb/__init__.py +5 -0
  532. agno/vectordb/langchaindb/langchaindb.py +163 -0
  533. agno/vectordb/lightrag/__init__.py +5 -0
  534. agno/vectordb/lightrag/lightrag.py +388 -0
  535. agno/vectordb/llamaindex/__init__.py +3 -0
  536. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  537. agno/vectordb/milvus/__init__.py +4 -0
  538. agno/vectordb/milvus/milvus.py +1182 -0
  539. agno/vectordb/mongodb/__init__.py +9 -0
  540. agno/vectordb/mongodb/mongodb.py +1417 -0
  541. agno/vectordb/pgvector/__init__.py +12 -0
  542. agno/vectordb/pgvector/index.py +23 -0
  543. agno/vectordb/pgvector/pgvector.py +1462 -0
  544. agno/vectordb/pineconedb/__init__.py +5 -0
  545. agno/vectordb/pineconedb/pineconedb.py +747 -0
  546. agno/vectordb/qdrant/__init__.py +5 -0
  547. agno/vectordb/qdrant/qdrant.py +1134 -0
  548. agno/vectordb/redis/__init__.py +9 -0
  549. agno/vectordb/redis/redisdb.py +694 -0
  550. agno/vectordb/search.py +7 -0
  551. agno/vectordb/singlestore/__init__.py +10 -0
  552. agno/vectordb/singlestore/index.py +41 -0
  553. agno/vectordb/singlestore/singlestore.py +763 -0
  554. agno/vectordb/surrealdb/__init__.py +3 -0
  555. agno/vectordb/surrealdb/surrealdb.py +699 -0
  556. agno/vectordb/upstashdb/__init__.py +5 -0
  557. agno/vectordb/upstashdb/upstashdb.py +718 -0
  558. agno/vectordb/weaviate/__init__.py +8 -0
  559. agno/vectordb/weaviate/index.py +15 -0
  560. agno/vectordb/weaviate/weaviate.py +1005 -0
  561. agno/workflow/__init__.py +23 -0
  562. agno/workflow/agent.py +299 -0
  563. agno/workflow/condition.py +738 -0
  564. agno/workflow/loop.py +735 -0
  565. agno/workflow/parallel.py +824 -0
  566. agno/workflow/router.py +702 -0
  567. agno/workflow/step.py +1432 -0
  568. agno/workflow/steps.py +592 -0
  569. agno/workflow/types.py +520 -0
  570. agno/workflow/workflow.py +4321 -0
  571. agno-2.2.13.dist-info/METADATA +614 -0
  572. agno-2.2.13.dist-info/RECORD +575 -0
  573. agno-2.2.13.dist-info/WHEEL +5 -0
  574. agno-2.2.13.dist-info/licenses/LICENSE +201 -0
  575. agno-2.2.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,929 @@
1
+ import asyncio
2
+ import json
3
+ from hashlib import md5
4
+ from typing import Any, Dict, List, Mapping, Optional, Union, cast
5
+
6
+ try:
7
+ from chromadb import Client as ChromaDbClient
8
+ from chromadb import PersistentClient as PersistentChromaDbClient
9
+ from chromadb.api.client import ClientAPI
10
+ from chromadb.api.models.Collection import Collection
11
+ from chromadb.api.types import QueryResult
12
+
13
+ except ImportError:
14
+ raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
15
+
16
+ from agno.filters import FilterExpr
17
+ from agno.knowledge.document import Document
18
+ from agno.knowledge.embedder import Embedder
19
+ from agno.knowledge.reranker.base import Reranker
20
+ from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
21
+ from agno.vectordb.base import VectorDb
22
+ from agno.vectordb.distance import Distance
23
+
24
+
25
+ class ChromaDb(VectorDb):
26
+ def __init__(
27
+ self,
28
+ collection: str,
29
+ name: Optional[str] = None,
30
+ description: Optional[str] = None,
31
+ id: Optional[str] = None,
32
+ embedder: Optional[Embedder] = None,
33
+ distance: Distance = Distance.cosine,
34
+ path: str = "tmp/chromadb",
35
+ persistent_client: bool = False,
36
+ reranker: Optional[Reranker] = None,
37
+ **kwargs,
38
+ ):
39
+ # Validate required parameters
40
+ if not collection:
41
+ raise ValueError("Collection name must be provided.")
42
+
43
+ # Dynamic ID generation based on unique identifiers
44
+ if id is None:
45
+ from agno.utils.string import generate_id
46
+
47
+ seed = f"{path}#{collection}"
48
+ id = generate_id(seed)
49
+
50
+ # Initialize base class with name, description, and generated ID
51
+ super().__init__(id=id, name=name, description=description)
52
+
53
+ # Collection attributes
54
+ self.collection_name: str = collection
55
+ # Embedder for embedding the document contents
56
+ if embedder is None:
57
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
58
+
59
+ embedder = OpenAIEmbedder()
60
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
61
+ self.embedder: Embedder = embedder
62
+ # Distance metric
63
+ self.distance: Distance = distance
64
+
65
+ # Chroma client instance
66
+ self._client: Optional[ClientAPI] = None
67
+
68
+ # Chroma collection instance
69
+ self._collection: Optional[Collection] = None
70
+
71
+ # Persistent Chroma client instance
72
+ self.persistent_client: bool = persistent_client
73
+ self.path: str = path
74
+
75
+ # Reranker instance
76
+ self.reranker: Optional[Reranker] = reranker
77
+
78
+ # Chroma client kwargs
79
+ self.kwargs = kwargs
80
+
81
+ def _flatten_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Union[str, int, float, bool]]:
82
+ """
83
+ Flatten nested metadata to ChromaDB-compatible format.
84
+
85
+ Args:
86
+ metadata: Dictionary that may contain nested structures
87
+
88
+ Returns:
89
+ Flattened dictionary with only primitive values
90
+ """
91
+ flattened: Dict[str, Any] = {}
92
+
93
+ def _flatten_recursive(obj: Any, prefix: str = "") -> None:
94
+ if isinstance(obj, dict):
95
+ if len(obj) == 0:
96
+ # Handle empty dictionaries by converting to JSON string
97
+ flattened[prefix] = json.dumps(obj)
98
+ else:
99
+ for key, value in obj.items():
100
+ new_key = f"{prefix}.{key}" if prefix else key
101
+ _flatten_recursive(value, new_key)
102
+ elif isinstance(obj, (list, tuple)):
103
+ # Convert lists/tuples to JSON strings
104
+ flattened[prefix] = json.dumps(obj)
105
+ elif isinstance(obj, (str, int, float, bool)) or obj is None:
106
+ if obj is not None: # ChromaDB doesn't accept None values
107
+ flattened[prefix] = obj
108
+ else:
109
+ # Convert other complex types to JSON strings
110
+ try:
111
+ flattened[prefix] = json.dumps(obj)
112
+ except (TypeError, ValueError):
113
+ # If it can't be serialized, convert to string
114
+ flattened[prefix] = str(obj)
115
+
116
+ _flatten_recursive(metadata)
117
+ return flattened
118
+
119
+ @property
120
+ def client(self) -> ClientAPI:
121
+ if self._client is None:
122
+ if not self.persistent_client:
123
+ log_debug("Creating Chroma Client")
124
+ self._client = ChromaDbClient(
125
+ **self.kwargs,
126
+ )
127
+ elif self.persistent_client:
128
+ log_debug("Creating Persistent Chroma Client")
129
+ self._client = PersistentChromaDbClient(
130
+ path=self.path,
131
+ **self.kwargs,
132
+ )
133
+ return self._client
134
+
135
+ def create(self) -> None:
136
+ """Create the collection in ChromaDb."""
137
+ if self.exists():
138
+ log_debug(f"Collection already exists: {self.collection_name}")
139
+ self._collection = self.client.get_collection(name=self.collection_name)
140
+ else:
141
+ log_debug(f"Creating collection: {self.collection_name}")
142
+ self._collection = self.client.create_collection(
143
+ name=self.collection_name, metadata={"hnsw:space": self.distance.value}
144
+ )
145
+
146
+ async def async_create(self) -> None:
147
+ """Create the collection asynchronously by running in a thread."""
148
+ await asyncio.to_thread(self.create)
149
+
150
+ def name_exists(self, name: str) -> bool:
151
+ """Check if a document with a given name exists in the collection.
152
+ Args:
153
+ name (str): Name of the document to check.
154
+ Returns:
155
+ bool: True if document exists, False otherwise."""
156
+ if not self.client:
157
+ logger.warning("Client not initialized")
158
+ return False
159
+
160
+ try:
161
+ collection: Collection = self.client.get_collection(name=self.collection_name)
162
+ result = collection.get(where=cast(Any, {"name": {"$eq": name}}), limit=1)
163
+ return len(result.get("ids", [])) > 0
164
+ except Exception as e:
165
+ logger.error(f"Error checking name existence: {e}")
166
+ return False
167
+
168
+ async def async_name_exists(self, name: str) -> bool:
169
+ """Check if a document with given name exists asynchronously."""
170
+ return await asyncio.to_thread(self.name_exists, name)
171
+
172
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
173
+ """Insert documents into the collection.
174
+
175
+ Args:
176
+ documents (List[Document]): List of documents to insert
177
+ filters (Optional[Dict[str, Any]]): Filters to merge with document metadata
178
+ """
179
+ log_info(f"Inserting {len(documents)} documents")
180
+ ids: List = []
181
+ docs: List = []
182
+ docs_embeddings: List = []
183
+ docs_metadata: List = []
184
+
185
+ if not self._collection:
186
+ self._collection = self.client.get_collection(name=self.collection_name)
187
+
188
+ for document in documents:
189
+ document.embed(embedder=self.embedder)
190
+ cleaned_content = document.content.replace("\x00", "\ufffd")
191
+ doc_id = md5(cleaned_content.encode()).hexdigest()
192
+
193
+ # Handle metadata and filters
194
+ metadata = document.meta_data or {}
195
+ if filters:
196
+ metadata.update(filters)
197
+
198
+ # Add name, content_id to metadata
199
+ if document.name is not None:
200
+ metadata["name"] = document.name
201
+ if document.content_id is not None:
202
+ metadata["content_id"] = document.content_id
203
+
204
+ metadata["content_hash"] = content_hash
205
+
206
+ # Flatten metadata for ChromaDB compatibility
207
+ flattened_metadata = self._flatten_metadata(metadata)
208
+
209
+ docs_embeddings.append(document.embedding)
210
+ docs.append(cleaned_content)
211
+ ids.append(doc_id)
212
+ docs_metadata.append(flattened_metadata)
213
+ log_debug(f"Prepared document: {document.id} | {document.name} | {flattened_metadata}")
214
+
215
+ if self._collection is None:
216
+ logger.warning("Collection does not exist")
217
+ else:
218
+ if len(docs) > 0:
219
+ self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
220
+ log_debug(f"Committed {len(docs)} documents")
221
+
222
+ async def async_insert(
223
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
224
+ ) -> None:
225
+ """Insert documents asynchronously by running in a thread."""
226
+ log_info(f"Async Inserting {len(documents)} documents")
227
+ ids: List = []
228
+ docs: List = []
229
+ docs_embeddings: List = []
230
+ docs_metadata: List = []
231
+
232
+ if not self._collection:
233
+ self._collection = self.client.get_collection(name=self.collection_name)
234
+
235
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
236
+ # Use batch embedding when enabled and supported
237
+ try:
238
+ # Extract content from all documents
239
+ doc_contents = [doc.content for doc in documents]
240
+
241
+ # Get batch embeddings and usage
242
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
243
+
244
+ # Process documents with pre-computed embeddings
245
+ for j, doc in enumerate(documents):
246
+ try:
247
+ if j < len(embeddings):
248
+ doc.embedding = embeddings[j]
249
+ doc.usage = usages[j] if j < len(usages) else None
250
+ except Exception as e:
251
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
252
+
253
+ except Exception as e:
254
+ # Check if this is a rate limit error - don't fall back as it would make things worse
255
+ error_str = str(e).lower()
256
+ is_rate_limit = any(
257
+ phrase in error_str
258
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
259
+ )
260
+
261
+ if is_rate_limit:
262
+ logger.error(f"Rate limit detected during batch embedding. {e}")
263
+ raise e
264
+ else:
265
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
266
+ # Fall back to individual embedding
267
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
268
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
269
+ else:
270
+ # Use individual embedding
271
+ try:
272
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
273
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
274
+ except Exception as e:
275
+ log_error(f"Error processing document: {e}")
276
+
277
+ for document in documents:
278
+ cleaned_content = document.content.replace("\x00", "\ufffd")
279
+ doc_id = md5(cleaned_content.encode()).hexdigest()
280
+
281
+ # Handle metadata and filters
282
+ metadata = document.meta_data or {}
283
+ if filters:
284
+ metadata.update(filters)
285
+
286
+ # Add name, content_id to metadata
287
+ if document.name is not None:
288
+ metadata["name"] = document.name
289
+ if document.content_id is not None:
290
+ metadata["content_id"] = document.content_id
291
+
292
+ metadata["content_hash"] = content_hash
293
+
294
+ # Flatten metadata for ChromaDB compatibility
295
+ flattened_metadata = self._flatten_metadata(metadata)
296
+
297
+ docs_embeddings.append(document.embedding)
298
+ docs.append(cleaned_content)
299
+ ids.append(doc_id)
300
+ docs_metadata.append(flattened_metadata)
301
+ log_debug(f"Prepared document: {document.id} | {document.name} | {flattened_metadata}")
302
+
303
+ if self._collection is None:
304
+ logger.warning("Collection does not exist")
305
+ else:
306
+ if len(docs) > 0:
307
+ self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
308
+ log_debug(f"Committed {len(docs)} documents")
309
+
310
+ def upsert_available(self) -> bool:
311
+ """Check if upsert is available in ChromaDB."""
312
+ return True
313
+
314
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
315
+ """Upsert documents into the collection.
316
+
317
+ Args:
318
+ documents (List[Document]): List of documents to upsert
319
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
320
+ """
321
+ try:
322
+ if self.content_hash_exists(content_hash):
323
+ self._delete_by_content_hash(content_hash)
324
+ self._upsert(content_hash, documents, filters)
325
+ except Exception as e:
326
+ logger.error(f"Error upserting documents by content hash: {e}")
327
+ raise
328
+
329
+ def _upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
330
+ """Upsert documents into the collection.
331
+
332
+ Args:
333
+ documents (List[Document]): List of documents to upsert
334
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
335
+ """
336
+ log_info(f"Upserting {len(documents)} documents")
337
+ ids: List = []
338
+ docs: List = []
339
+ docs_embeddings: List = []
340
+ docs_metadata: List = []
341
+
342
+ if not self._collection:
343
+ self._collection = self.client.get_collection(name=self.collection_name)
344
+
345
+ for document in documents:
346
+ document.embed(embedder=self.embedder)
347
+ cleaned_content = document.content.replace("\x00", "\ufffd")
348
+ doc_id = md5(cleaned_content.encode()).hexdigest()
349
+
350
+ # Handle metadata and filters
351
+ metadata = document.meta_data or {}
352
+ if filters:
353
+ metadata.update(filters)
354
+
355
+ # Add name, content_id to metadata
356
+ if document.name is not None:
357
+ metadata["name"] = document.name
358
+ if document.content_id is not None:
359
+ metadata["content_id"] = document.content_id
360
+
361
+ metadata["content_hash"] = content_hash
362
+
363
+ # Flatten metadata for ChromaDB compatibility
364
+ flattened_metadata = self._flatten_metadata(metadata)
365
+
366
+ docs_embeddings.append(document.embedding)
367
+ docs.append(cleaned_content)
368
+ ids.append(doc_id)
369
+ docs_metadata.append(flattened_metadata)
370
+ log_debug(f"Upserted document: {document.id} | {document.name} | {flattened_metadata}")
371
+
372
+ if self._collection is None:
373
+ logger.warning("Collection does not exist")
374
+ else:
375
+ if len(docs) > 0:
376
+ self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
377
+ log_debug(f"Committed {len(docs)} documents")
378
+
379
+ async def _async_upsert(
380
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
381
+ ) -> None:
382
+ """Upsert documents into the collection.
383
+
384
+ Args:
385
+ documents (List[Document]): List of documents to upsert
386
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
387
+ """
388
+ log_info(f"Async Upserting {len(documents)} documents")
389
+ ids: List = []
390
+ docs: List = []
391
+ docs_embeddings: List = []
392
+ docs_metadata: List = []
393
+
394
+ if not self._collection:
395
+ self._collection = self.client.get_collection(name=self.collection_name)
396
+
397
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
398
+ # Use batch embedding when enabled and supported
399
+ try:
400
+ # Extract content from all documents
401
+ doc_contents = [doc.content for doc in documents]
402
+
403
+ # Get batch embeddings and usage
404
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
405
+
406
+ # Process documents with pre-computed embeddings
407
+ for j, doc in enumerate(documents):
408
+ try:
409
+ if j < len(embeddings):
410
+ doc.embedding = embeddings[j]
411
+ doc.usage = usages[j] if j < len(usages) else None
412
+ except Exception as e:
413
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
414
+
415
+ except Exception as e:
416
+ # Check if this is a rate limit error - don't fall back as it would make things worse
417
+ error_str = str(e).lower()
418
+ is_rate_limit = any(
419
+ phrase in error_str
420
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
421
+ )
422
+
423
+ if is_rate_limit:
424
+ logger.error(f"Rate limit detected during batch embedding. {e}")
425
+ raise e
426
+ else:
427
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
428
+ # Fall back to individual embedding
429
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
430
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
431
+ else:
432
+ # Use individual embedding
433
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
434
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
435
+
436
+ for document in documents:
437
+ cleaned_content = document.content.replace("\x00", "\ufffd")
438
+ doc_id = md5(cleaned_content.encode()).hexdigest()
439
+
440
+ # Handle metadata and filters
441
+ metadata = document.meta_data or {}
442
+ if filters:
443
+ metadata.update(filters)
444
+
445
+ # Add name, content_id to metadata
446
+ if document.name is not None:
447
+ metadata["name"] = document.name
448
+ if document.content_id is not None:
449
+ metadata["content_id"] = document.content_id
450
+
451
+ metadata["content_hash"] = content_hash
452
+
453
+ # Flatten metadata for ChromaDB compatibility
454
+ flattened_metadata = self._flatten_metadata(metadata)
455
+
456
+ docs_embeddings.append(document.embedding)
457
+ docs.append(cleaned_content)
458
+ ids.append(doc_id)
459
+ docs_metadata.append(flattened_metadata)
460
+ log_debug(f"Upserted document: {document.id} | {document.name} | {flattened_metadata}")
461
+
462
+ if self._collection is None:
463
+ logger.warning("Collection does not exist")
464
+ else:
465
+ if len(docs) > 0:
466
+ self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
467
+ log_debug(f"Committed {len(docs)} documents")
468
+
469
+ async def async_upsert(
470
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
471
+ ) -> None:
472
+ """Upsert documents asynchronously by running in a thread."""
473
+ try:
474
+ if self.content_hash_exists(content_hash):
475
+ self._delete_by_content_hash(content_hash)
476
+ await self._async_upsert(content_hash, documents, filters)
477
+ except Exception as e:
478
+ logger.error(f"Error upserting documents by content hash: {e}")
479
+ raise
480
+
481
+ def search(
482
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
483
+ ) -> List[Document]:
484
+ """Search the collection for a query.
485
+
486
+ Args:
487
+ query (str): Query to search for.
488
+ limit (int): Number of results to return.
489
+ filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply while searching.
490
+ Supports ChromaDB's filtering operators:
491
+ - $eq, $ne: Equality/Inequality
492
+ - $gt, $gte, $lt, $lte: Numeric comparisons
493
+ - $in, $nin: List inclusion/exclusion
494
+ - $and, $or: Logical operators
495
+ Returns:
496
+ List[Document]: List of search results.
497
+ """
498
+ if isinstance(filters, list):
499
+ log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
500
+ filters = None
501
+ query_embedding = self.embedder.get_embedding(query)
502
+ if query_embedding is None:
503
+ logger.error(f"Error getting embedding for Query: {query}")
504
+ return []
505
+
506
+ if not self._collection:
507
+ self._collection = self.client.get_collection(name=self.collection_name)
508
+
509
+ # Convert simple filters to ChromaDB's format if needed
510
+ where_filter = self._convert_filters(filters) if filters else None
511
+
512
+ result: QueryResult = self._collection.query(
513
+ query_embeddings=query_embedding,
514
+ n_results=limit,
515
+ where=where_filter, # Add where filter
516
+ include=["metadatas", "documents", "embeddings", "distances", "uris"],
517
+ )
518
+
519
+ # Build search results
520
+ search_results: List[Document] = []
521
+
522
+ ids_list = result.get("ids", [[]]) # type: ignore
523
+ metadata_list = result.get("metadatas", [[{}]]) # type: ignore
524
+ documents_list = result.get("documents", [[]]) # type: ignore
525
+ embeddings_list = result.get("embeddings") # type: ignore
526
+ distances_list = result.get("distances", [[]]) # type: ignore
527
+
528
+ if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
529
+ return search_results
530
+
531
+ ids = ids_list[0]
532
+ metadata = [dict(m) if m else {} for m in metadata_list[0]] # Convert to mutable dicts
533
+ documents = documents_list[0]
534
+ embeddings_raw = embeddings_list[0] if embeddings_list else []
535
+ embeddings = []
536
+ for e in embeddings_raw:
537
+ if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
538
+ try:
539
+ embeddings.append(list(cast(Any, e).tolist()))
540
+ except (AttributeError, TypeError):
541
+ embeddings.append(list(e) if isinstance(e, (list, tuple)) else [])
542
+ elif isinstance(e, (list, tuple)):
543
+ embeddings.append([float(x) for x in e if isinstance(x, (int, float))])
544
+ elif isinstance(e, (int, float)):
545
+ embeddings.append([float(e)])
546
+ else:
547
+ embeddings.append([])
548
+ distances = distances_list[0]
549
+
550
+ for idx, distance in enumerate(distances):
551
+ if idx < len(metadata):
552
+ metadata[idx]["distances"] = distance
553
+
554
+ try:
555
+ for idx, (id_, doc_metadata, document) in enumerate(zip(ids, metadata, documents)):
556
+ # Extract the fields we added to metadata
557
+ name_val = doc_metadata.pop("name", None)
558
+ content_id_val = doc_metadata.pop("content_id", None)
559
+
560
+ # Convert types to match Document constructor expectations
561
+ name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
562
+ content_id = (
563
+ str(content_id_val)
564
+ if content_id_val is not None and not isinstance(content_id_val, str)
565
+ else content_id_val
566
+ )
567
+ content = str(document) if document is not None else ""
568
+ embedding = embeddings[idx] if idx < len(embeddings) else None
569
+
570
+ search_results.append(
571
+ Document(
572
+ id=id_,
573
+ name=name,
574
+ meta_data=doc_metadata,
575
+ content=content,
576
+ embedding=embedding,
577
+ content_id=content_id,
578
+ )
579
+ )
580
+ except Exception as e:
581
+ logger.error(f"Error building search results: {e}")
582
+
583
+ if self.reranker:
584
+ search_results = self.reranker.rerank(query=query, documents=search_results)
585
+
586
+ log_info(f"Found {len(search_results)} documents")
587
+ return search_results
588
+
589
+ def _convert_filters(self, filters: Dict[str, Any]) -> Dict[str, Any]:
590
+ """Convert simple filters to ChromaDB's filter format.
591
+
592
+ Handles conversion of simple key-value filters to ChromaDB's operator format
593
+ when needed.
594
+ """
595
+ if not filters:
596
+ return {}
597
+
598
+ # If filters already use ChromaDB operators ($eq, $ne, etc.), return as is
599
+ if any(key.startswith("$") for key in filters.keys()):
600
+ return filters
601
+
602
+ # Convert simple key-value pairs to ChromaDB's format
603
+ converted = {}
604
+ for key, value in filters.items():
605
+ if isinstance(value, (list, tuple)):
606
+ # Convert lists to $in operator
607
+ converted[key] = {"$in": list(value)}
608
+ else:
609
+ # Convert simple equality to $eq
610
+ converted[key] = {"$eq": value}
611
+
612
+ return converted
613
+
614
+ async def async_search(
615
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
616
+ ) -> List[Document]:
617
+ """Search asynchronously by running in a thread."""
618
+ return await asyncio.to_thread(self.search, query, limit, filters)
619
+
620
+ def drop(self) -> None:
621
+ """Delete the collection."""
622
+ if self.exists():
623
+ log_debug(f"Deleting collection: {self.collection_name}")
624
+ self.client.delete_collection(name=self.collection_name)
625
+
626
+ async def async_drop(self) -> None:
627
+ """Drop the collection asynchronously by running in a thread."""
628
+ await asyncio.to_thread(self.drop)
629
+
630
+ def exists(self) -> bool:
631
+ """Check if the collection exists."""
632
+ try:
633
+ self.client.get_collection(name=self.collection_name)
634
+ return True
635
+ except Exception as e:
636
+ log_debug(f"Collection does not exist: {e}")
637
+ return False
638
+
639
+ async def async_exists(self) -> bool:
640
+ """Check if collection exists asynchronously by running in a thread."""
641
+ return await asyncio.to_thread(self.exists)
642
+
643
+ def get_count(self) -> int:
644
+ """Get the count of documents in the collection."""
645
+ if self.exists():
646
+ try:
647
+ collection: Collection = self.client.get_collection(name=self.collection_name)
648
+ return collection.count()
649
+ except Exception as e:
650
+ logger.error(f"Error getting count: {e}")
651
+ return 0
652
+
653
+ def optimize(self) -> None:
654
+ raise NotImplementedError
655
+
656
+ def delete(self) -> bool:
657
+ try:
658
+ self.client.delete_collection(name=self.collection_name)
659
+ return True
660
+ except Exception as e:
661
+ logger.error(f"Error clearing collection: {e}")
662
+ return False
663
+
664
+ def delete_by_id(self, id: str) -> bool:
665
+ """Delete document by ID."""
666
+ if not self.client:
667
+ logger.error("Client not initialized")
668
+ return False
669
+
670
+ try:
671
+ collection: Collection = self.client.get_collection(name=self.collection_name)
672
+
673
+ # Check if document exists
674
+ if not self.id_exists(id):
675
+ log_info(f"Document with ID '{id}' not found")
676
+ return False
677
+
678
+ # Delete the document
679
+ collection.delete(ids=[id])
680
+ log_info(f"Deleted document with ID '{id}'")
681
+ return True
682
+ except Exception as e:
683
+ logger.error(f"Error deleting document by ID '{id}': {e}")
684
+ return False
685
+
686
+ def delete_by_name(self, name: str) -> bool:
687
+ """Delete documents by name."""
688
+ if not self.client:
689
+ logger.error("Client not initialized")
690
+ return False
691
+
692
+ try:
693
+ collection: Collection = self.client.get_collection(name=self.collection_name)
694
+
695
+ # Find all documents with the given name
696
+ result = collection.get(where=cast(Any, {"name": {"$eq": name}}))
697
+ ids_to_delete = result.get("ids", [])
698
+
699
+ if not ids_to_delete:
700
+ log_info(f"No documents found with name '{name}'")
701
+ return False
702
+
703
+ # Delete all matching documents
704
+ collection.delete(ids=ids_to_delete)
705
+ log_info(f"Deleted {len(ids_to_delete)} documents with name '{name}'")
706
+ return True
707
+ except Exception as e:
708
+ logger.error(f"Error deleting documents by name '{name}': {e}")
709
+ return False
710
+
711
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
712
+ """Delete documents by metadata."""
713
+ if not self.client:
714
+ logger.error("Client not initialized")
715
+ return False
716
+
717
+ try:
718
+ collection: Collection = self.client.get_collection(name=self.collection_name)
719
+
720
+ # Build where clause for metadata filtering
721
+ where_clause = {}
722
+ for key, value in metadata.items():
723
+ where_clause[key] = {"$eq": value}
724
+
725
+ # Find all documents with the matching metadata
726
+ result = collection.get(where=cast(Any, where_clause))
727
+ ids_to_delete = result.get("ids", [])
728
+
729
+ if not ids_to_delete:
730
+ log_info(f"No documents found with metadata '{metadata}'")
731
+ return False
732
+
733
+ # Delete all matching documents
734
+ collection.delete(ids=ids_to_delete)
735
+ log_info(f"Deleted {len(ids_to_delete)} documents with metadata '{metadata}'")
736
+ return True
737
+ except Exception as e:
738
+ logger.error(f"Error deleting documents by metadata '{metadata}': {e}")
739
+ return False
740
+
741
+ def delete_by_content_id(self, content_id: str) -> bool:
742
+ """Delete documents by content ID."""
743
+ if not self.client:
744
+ logger.error("Client not initialized")
745
+ return False
746
+
747
+ try:
748
+ collection: Collection = self.client.get_collection(name=self.collection_name)
749
+
750
+ # Find all documents with the given content_id
751
+ result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
752
+ ids_to_delete = result.get("ids", [])
753
+
754
+ if not ids_to_delete:
755
+ log_info(f"No documents found with content_id '{content_id}'")
756
+ return False
757
+
758
+ # Delete all matching documents
759
+ collection.delete(ids=ids_to_delete)
760
+ log_info(f"Deleted {len(ids_to_delete)} documents with content_id '{content_id}'")
761
+ return True
762
+ except Exception as e:
763
+ logger.error(f"Error deleting documents by content_id '{content_id}': {e}")
764
+ return False
765
+
766
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
767
+ """Delete documents by content hash."""
768
+ if not self.client:
769
+ logger.error("Client not initialized")
770
+ return False
771
+
772
+ try:
773
+ collection: Collection = self.client.get_collection(name=self.collection_name)
774
+
775
+ # Find all documents with the given content_hash
776
+ result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
777
+ ids_to_delete = result.get("ids", [])
778
+
779
+ if not ids_to_delete:
780
+ log_info(f"No documents found with content_hash '{content_hash}'")
781
+ return False
782
+
783
+ # Delete all matching documents
784
+ collection.delete(ids=ids_to_delete)
785
+ log_info(f"Deleted {len(ids_to_delete)} documents with content_hash '{content_hash}'")
786
+ return True
787
+ except Exception as e:
788
+ logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
789
+ return False
790
+
791
+ def id_exists(self, id: str) -> bool:
792
+ """Check if a document with the given ID exists in the collection.
793
+
794
+ Args:
795
+ id (str): The document ID to check.
796
+
797
+ Returns:
798
+ bool: True if the document exists, False otherwise.
799
+ """
800
+ if not self.client:
801
+ logger.error("Client not initialized")
802
+ return False
803
+
804
+ try:
805
+ collection: Collection = self.client.get_collection(name=self.collection_name)
806
+ # Try to get the document by ID
807
+ result = collection.get(ids=[id])
808
+ found_ids = result.get("ids", [])
809
+
810
+ # Return True if the document was found
811
+ return len(found_ids) > 0
812
+ except Exception as e:
813
+ logger.error(f"Error checking if ID '{id}' exists: {e}")
814
+ return False
815
+
816
+ def content_hash_exists(self, content_hash: str) -> bool:
817
+ """Check if documents with the given content hash exist."""
818
+ if not self.client:
819
+ logger.error("Client not initialized")
820
+ return False
821
+
822
+ try:
823
+ collection: Collection = self.client.get_collection(name=self.collection_name)
824
+
825
+ # Try to query for documents with the given content_hash
826
+ try:
827
+ result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
828
+ # Safely extract ids from result
829
+ if hasattr(result, "get") and callable(result.get):
830
+ found_ids = result.get("ids", [])
831
+ elif hasattr(result, "__getitem__") and "ids" in result:
832
+ found_ids = result["ids"]
833
+ else:
834
+ found_ids = []
835
+
836
+ # Return True if any documents were found
837
+ if isinstance(found_ids, (list, tuple)):
838
+ return len(found_ids) > 0
839
+ elif isinstance(found_ids, int):
840
+ # Some ChromaDB versions might return a count instead of a list
841
+ return found_ids > 0
842
+ else:
843
+ return False
844
+
845
+ except TypeError as te:
846
+ if "object of type 'int' has no len()" in str(te):
847
+ # Known issue with ChromaDB 0.5.0 - internal bug
848
+ # As a workaround, assume content doesn't exist to allow processing to continue
849
+ logger.warning(
850
+ f"ChromaDB internal error (version 0.5.0 bug): {te}. Assuming content_hash '{content_hash}' does not exist."
851
+ )
852
+ return False
853
+ else:
854
+ raise te
855
+
856
+ except Exception as e:
857
+ logger.error(f"Error checking if content_hash '{content_hash}' exists: {e}")
858
+ return False
859
+
860
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
861
+ """
862
+ Update the metadata for documents with the given content_id.
863
+
864
+ Args:
865
+ content_id (str): The content ID to update
866
+ metadata (Dict[str, Any]): The metadata to update
867
+ """
868
+ try:
869
+ if not self.client:
870
+ logger.error("Client not initialized")
871
+ return
872
+
873
+ collection: Collection = self.client.get_collection(name=self.collection_name)
874
+
875
+ # Find documents with the given content_id
876
+ try:
877
+ result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
878
+
879
+ # Extract IDs and current metadata
880
+ if hasattr(result, "get") and callable(result.get):
881
+ ids = result.get("ids", [])
882
+ current_metadatas = result.get("metadatas", [])
883
+ elif hasattr(result, "__getitem__"):
884
+ ids = result.get("ids", []) if "ids" in result else []
885
+ current_metadatas = result.get("metadatas", []) if "metadatas" in result else []
886
+ else:
887
+ ids = []
888
+ current_metadatas = []
889
+
890
+ if not ids:
891
+ logger.debug(f"No documents found with content_id: {content_id}")
892
+ return
893
+
894
+ # Flatten the new metadata first
895
+ flattened_new_metadata = self._flatten_metadata(metadata)
896
+
897
+ # Merge metadata for each document
898
+ updated_metadatas = []
899
+ for i, current_meta in enumerate(current_metadatas or []):
900
+ if current_meta is None:
901
+ meta_dict: Dict[str, Any] = {}
902
+ else:
903
+ meta_dict = dict(current_meta) # Convert Mapping to dict
904
+
905
+ # Update with flattened metadata
906
+ meta_dict.update(flattened_new_metadata)
907
+ updated_metadatas.append(meta_dict)
908
+
909
+ # Convert to the expected type for ChromaDB
910
+ chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool]]], updated_metadatas)
911
+ collection.update(ids=ids, metadatas=chroma_metadatas) # type: ignore
912
+ logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
913
+
914
+ except TypeError as te:
915
+ if "object of type 'int' has no len()" in str(te):
916
+ logger.warning(
917
+ f"ChromaDB internal error (version 0.5.0 bug): {te}. Cannot update metadata for content_id '{content_id}'."
918
+ )
919
+ return
920
+ else:
921
+ raise te
922
+
923
+ except Exception as e:
924
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
925
+ raise
926
+
927
+ def get_supported_search_types(self) -> List[str]:
928
+ """Get the supported search types for this vector database."""
929
+ return [] # ChromaDb doesn't use SearchType enum