agno 1.8.1__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (580) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +19 -27
  3. agno/agent/agent.py +2778 -4123
  4. agno/api/agent.py +9 -65
  5. agno/api/api.py +5 -46
  6. agno/api/evals.py +6 -17
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -41
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +5 -21
  11. agno/api/schemas/evals.py +7 -16
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +5 -21
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +11 -7
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +9 -64
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/db/__init__.py +24 -0
  25. agno/db/base.py +245 -0
  26. agno/db/dynamo/__init__.py +3 -0
  27. agno/db/dynamo/dynamo.py +1749 -0
  28. agno/db/dynamo/schemas.py +278 -0
  29. agno/db/dynamo/utils.py +684 -0
  30. agno/db/firestore/__init__.py +3 -0
  31. agno/db/firestore/firestore.py +1438 -0
  32. agno/db/firestore/schemas.py +130 -0
  33. agno/db/firestore/utils.py +278 -0
  34. agno/db/gcs_json/__init__.py +3 -0
  35. agno/db/gcs_json/gcs_json_db.py +1001 -0
  36. agno/db/gcs_json/utils.py +194 -0
  37. agno/db/in_memory/__init__.py +3 -0
  38. agno/db/in_memory/in_memory_db.py +888 -0
  39. agno/db/in_memory/utils.py +172 -0
  40. agno/db/json/__init__.py +3 -0
  41. agno/db/json/json_db.py +1051 -0
  42. agno/db/json/utils.py +196 -0
  43. agno/db/migrations/v1_to_v2.py +162 -0
  44. agno/db/mongo/__init__.py +3 -0
  45. agno/db/mongo/mongo.py +1417 -0
  46. agno/db/mongo/schemas.py +77 -0
  47. agno/db/mongo/utils.py +204 -0
  48. agno/db/mysql/__init__.py +3 -0
  49. agno/db/mysql/mysql.py +1719 -0
  50. agno/db/mysql/schemas.py +124 -0
  51. agno/db/mysql/utils.py +298 -0
  52. agno/db/postgres/__init__.py +3 -0
  53. agno/db/postgres/postgres.py +1720 -0
  54. agno/db/postgres/schemas.py +124 -0
  55. agno/db/postgres/utils.py +281 -0
  56. agno/db/redis/__init__.py +3 -0
  57. agno/db/redis/redis.py +1371 -0
  58. agno/db/redis/schemas.py +109 -0
  59. agno/db/redis/utils.py +288 -0
  60. agno/db/schemas/__init__.py +3 -0
  61. agno/db/schemas/evals.py +33 -0
  62. agno/db/schemas/knowledge.py +40 -0
  63. agno/db/schemas/memory.py +46 -0
  64. agno/db/singlestore/__init__.py +3 -0
  65. agno/db/singlestore/schemas.py +116 -0
  66. agno/db/singlestore/singlestore.py +1722 -0
  67. agno/db/singlestore/utils.py +327 -0
  68. agno/db/sqlite/__init__.py +3 -0
  69. agno/db/sqlite/schemas.py +119 -0
  70. agno/db/sqlite/sqlite.py +1680 -0
  71. agno/db/sqlite/utils.py +269 -0
  72. agno/db/utils.py +88 -0
  73. agno/eval/__init__.py +14 -0
  74. agno/eval/accuracy.py +142 -43
  75. agno/eval/performance.py +88 -23
  76. agno/eval/reliability.py +73 -20
  77. agno/eval/utils.py +23 -13
  78. agno/integrations/discord/__init__.py +3 -0
  79. agno/{app → integrations}/discord/client.py +10 -10
  80. agno/knowledge/__init__.py +2 -2
  81. agno/{document → knowledge}/chunking/agentic.py +2 -2
  82. agno/{document → knowledge}/chunking/document.py +2 -2
  83. agno/{document → knowledge}/chunking/fixed.py +3 -3
  84. agno/{document → knowledge}/chunking/markdown.py +2 -2
  85. agno/{document → knowledge}/chunking/recursive.py +2 -2
  86. agno/{document → knowledge}/chunking/row.py +2 -2
  87. agno/knowledge/chunking/semantic.py +59 -0
  88. agno/knowledge/chunking/strategy.py +121 -0
  89. agno/knowledge/content.py +74 -0
  90. agno/knowledge/document/__init__.py +5 -0
  91. agno/{document → knowledge/document}/base.py +12 -2
  92. agno/knowledge/embedder/__init__.py +5 -0
  93. agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
  94. agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
  95. agno/{embedder → knowledge/embedder}/base.py +6 -0
  96. agno/{embedder → knowledge/embedder}/cohere.py +72 -1
  97. agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
  98. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  99. agno/{embedder → knowledge/embedder}/google.py +74 -1
  100. agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
  101. agno/{embedder → knowledge/embedder}/jina.py +48 -2
  102. agno/knowledge/embedder/langdb.py +22 -0
  103. agno/knowledge/embedder/mistral.py +139 -0
  104. agno/{embedder → knowledge/embedder}/nebius.py +1 -1
  105. agno/{embedder → knowledge/embedder}/ollama.py +54 -3
  106. agno/knowledge/embedder/openai.py +223 -0
  107. agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
  108. agno/{embedder → knowledge/embedder}/together.py +1 -1
  109. agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
  110. agno/knowledge/knowledge.py +1515 -0
  111. agno/knowledge/reader/__init__.py +7 -0
  112. agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
  113. agno/knowledge/reader/base.py +88 -0
  114. agno/{document → knowledge}/reader/csv_reader.py +68 -15
  115. agno/knowledge/reader/docx_reader.py +83 -0
  116. agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
  117. agno/knowledge/reader/gcs_reader.py +67 -0
  118. agno/{document → knowledge}/reader/json_reader.py +30 -9
  119. agno/{document → knowledge}/reader/markdown_reader.py +36 -9
  120. agno/{document → knowledge}/reader/pdf_reader.py +79 -21
  121. agno/knowledge/reader/reader_factory.py +275 -0
  122. agno/knowledge/reader/s3_reader.py +171 -0
  123. agno/{document → knowledge}/reader/text_reader.py +31 -10
  124. agno/knowledge/reader/url_reader.py +84 -0
  125. agno/knowledge/reader/web_search_reader.py +389 -0
  126. agno/{document → knowledge}/reader/website_reader.py +37 -10
  127. agno/knowledge/reader/wikipedia_reader.py +59 -0
  128. agno/knowledge/reader/youtube_reader.py +78 -0
  129. agno/knowledge/remote_content/remote_content.py +88 -0
  130. agno/{reranker → knowledge/reranker}/base.py +1 -1
  131. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  132. agno/{reranker → knowledge/reranker}/infinity.py +2 -2
  133. agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
  134. agno/knowledge/types.py +30 -0
  135. agno/knowledge/utils.py +169 -0
  136. agno/memory/__init__.py +2 -10
  137. agno/memory/manager.py +1003 -148
  138. agno/models/aimlapi/__init__.py +2 -2
  139. agno/models/aimlapi/aimlapi.py +6 -6
  140. agno/models/anthropic/claude.py +129 -82
  141. agno/models/aws/bedrock.py +107 -175
  142. agno/models/aws/claude.py +64 -18
  143. agno/models/azure/ai_foundry.py +73 -23
  144. agno/models/base.py +347 -287
  145. agno/models/cerebras/cerebras.py +84 -27
  146. agno/models/cohere/chat.py +106 -98
  147. agno/models/google/gemini.py +100 -42
  148. agno/models/groq/groq.py +97 -35
  149. agno/models/huggingface/huggingface.py +92 -27
  150. agno/models/ibm/watsonx.py +72 -13
  151. agno/models/litellm/chat.py +85 -13
  152. agno/models/message.py +38 -144
  153. agno/models/meta/llama.py +85 -49
  154. agno/models/metrics.py +120 -0
  155. agno/models/mistral/mistral.py +90 -21
  156. agno/models/ollama/__init__.py +0 -2
  157. agno/models/ollama/chat.py +84 -46
  158. agno/models/openai/chat.py +121 -23
  159. agno/models/openai/responses.py +178 -105
  160. agno/models/perplexity/perplexity.py +26 -2
  161. agno/models/portkey/portkey.py +0 -7
  162. agno/models/response.py +14 -8
  163. agno/models/utils.py +20 -0
  164. agno/models/vercel/__init__.py +2 -2
  165. agno/models/vercel/v0.py +1 -1
  166. agno/models/vllm/__init__.py +2 -2
  167. agno/models/vllm/vllm.py +3 -3
  168. agno/models/xai/xai.py +10 -10
  169. agno/os/__init__.py +3 -0
  170. agno/os/app.py +393 -0
  171. agno/os/auth.py +47 -0
  172. agno/os/config.py +103 -0
  173. agno/os/interfaces/agui/__init__.py +3 -0
  174. agno/os/interfaces/agui/agui.py +31 -0
  175. agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
  176. agno/{app → os/interfaces}/agui/utils.py +65 -28
  177. agno/os/interfaces/base.py +21 -0
  178. agno/os/interfaces/slack/__init__.py +3 -0
  179. agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
  180. agno/os/interfaces/slack/slack.py +33 -0
  181. agno/os/interfaces/whatsapp/__init__.py +3 -0
  182. agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
  183. agno/os/interfaces/whatsapp/whatsapp.py +30 -0
  184. agno/os/router.py +843 -0
  185. agno/os/routers/__init__.py +3 -0
  186. agno/os/routers/evals/__init__.py +3 -0
  187. agno/os/routers/evals/evals.py +204 -0
  188. agno/os/routers/evals/schemas.py +142 -0
  189. agno/os/routers/evals/utils.py +161 -0
  190. agno/os/routers/knowledge/__init__.py +3 -0
  191. agno/os/routers/knowledge/knowledge.py +413 -0
  192. agno/os/routers/knowledge/schemas.py +118 -0
  193. agno/os/routers/memory/__init__.py +3 -0
  194. agno/os/routers/memory/memory.py +179 -0
  195. agno/os/routers/memory/schemas.py +58 -0
  196. agno/os/routers/metrics/__init__.py +3 -0
  197. agno/os/routers/metrics/metrics.py +58 -0
  198. agno/os/routers/metrics/schemas.py +47 -0
  199. agno/os/routers/session/__init__.py +3 -0
  200. agno/os/routers/session/session.py +163 -0
  201. agno/os/schema.py +892 -0
  202. agno/{app/playground → os}/settings.py +8 -15
  203. agno/os/utils.py +270 -0
  204. agno/reasoning/azure_ai_foundry.py +4 -4
  205. agno/reasoning/deepseek.py +4 -4
  206. agno/reasoning/default.py +6 -11
  207. agno/reasoning/groq.py +4 -4
  208. agno/reasoning/helpers.py +4 -6
  209. agno/reasoning/ollama.py +4 -4
  210. agno/reasoning/openai.py +4 -4
  211. agno/run/{response.py → agent.py} +144 -72
  212. agno/run/base.py +44 -58
  213. agno/run/cancel.py +83 -0
  214. agno/run/team.py +133 -77
  215. agno/run/workflow.py +537 -12
  216. agno/session/__init__.py +10 -0
  217. agno/session/agent.py +244 -0
  218. agno/session/summary.py +225 -0
  219. agno/session/team.py +262 -0
  220. agno/{storage/session/v2 → session}/workflow.py +47 -24
  221. agno/team/__init__.py +15 -16
  222. agno/team/team.py +2961 -4253
  223. agno/tools/agentql.py +14 -5
  224. agno/tools/airflow.py +9 -4
  225. agno/tools/api.py +7 -3
  226. agno/tools/apify.py +2 -46
  227. agno/tools/arxiv.py +8 -3
  228. agno/tools/aws_lambda.py +7 -5
  229. agno/tools/aws_ses.py +7 -1
  230. agno/tools/baidusearch.py +4 -1
  231. agno/tools/bitbucket.py +4 -4
  232. agno/tools/brandfetch.py +14 -11
  233. agno/tools/bravesearch.py +4 -1
  234. agno/tools/brightdata.py +42 -22
  235. agno/tools/browserbase.py +13 -4
  236. agno/tools/calcom.py +12 -10
  237. agno/tools/calculator.py +10 -27
  238. agno/tools/cartesia.py +18 -13
  239. agno/tools/{clickup_tool.py → clickup.py} +12 -25
  240. agno/tools/confluence.py +8 -8
  241. agno/tools/crawl4ai.py +7 -1
  242. agno/tools/csv_toolkit.py +9 -8
  243. agno/tools/dalle.py +18 -11
  244. agno/tools/daytona.py +13 -16
  245. agno/tools/decorator.py +6 -3
  246. agno/tools/desi_vocal.py +16 -7
  247. agno/tools/discord.py +11 -8
  248. agno/tools/docker.py +30 -42
  249. agno/tools/duckdb.py +34 -53
  250. agno/tools/duckduckgo.py +8 -7
  251. agno/tools/e2b.py +61 -61
  252. agno/tools/eleven_labs.py +35 -28
  253. agno/tools/email.py +4 -1
  254. agno/tools/evm.py +7 -1
  255. agno/tools/exa.py +19 -14
  256. agno/tools/fal.py +29 -29
  257. agno/tools/file.py +9 -8
  258. agno/tools/financial_datasets.py +25 -44
  259. agno/tools/firecrawl.py +22 -22
  260. agno/tools/function.py +68 -17
  261. agno/tools/giphy.py +22 -10
  262. agno/tools/github.py +48 -126
  263. agno/tools/gmail.py +45 -61
  264. agno/tools/google_bigquery.py +7 -6
  265. agno/tools/google_maps.py +11 -26
  266. agno/tools/googlesearch.py +7 -2
  267. agno/tools/googlesheets.py +21 -17
  268. agno/tools/hackernews.py +9 -5
  269. agno/tools/jina.py +5 -4
  270. agno/tools/jira.py +18 -9
  271. agno/tools/knowledge.py +31 -32
  272. agno/tools/linear.py +18 -33
  273. agno/tools/linkup.py +5 -1
  274. agno/tools/local_file_system.py +8 -5
  275. agno/tools/lumalab.py +31 -19
  276. agno/tools/mem0.py +18 -12
  277. agno/tools/memori.py +14 -10
  278. agno/tools/mlx_transcribe.py +3 -2
  279. agno/tools/models/azure_openai.py +32 -14
  280. agno/tools/models/gemini.py +58 -31
  281. agno/tools/models/groq.py +29 -20
  282. agno/tools/models/nebius.py +27 -11
  283. agno/tools/models_labs.py +39 -15
  284. agno/tools/moviepy_video.py +7 -6
  285. agno/tools/neo4j.py +10 -8
  286. agno/tools/newspaper.py +7 -2
  287. agno/tools/newspaper4k.py +8 -3
  288. agno/tools/openai.py +57 -26
  289. agno/tools/openbb.py +12 -11
  290. agno/tools/opencv.py +62 -46
  291. agno/tools/openweather.py +14 -12
  292. agno/tools/pandas.py +11 -3
  293. agno/tools/postgres.py +4 -12
  294. agno/tools/pubmed.py +4 -1
  295. agno/tools/python.py +9 -22
  296. agno/tools/reasoning.py +35 -27
  297. agno/tools/reddit.py +11 -26
  298. agno/tools/replicate.py +54 -41
  299. agno/tools/resend.py +4 -1
  300. agno/tools/scrapegraph.py +15 -14
  301. agno/tools/searxng.py +10 -23
  302. agno/tools/serpapi.py +6 -3
  303. agno/tools/serper.py +13 -4
  304. agno/tools/shell.py +9 -2
  305. agno/tools/slack.py +12 -11
  306. agno/tools/sleep.py +3 -2
  307. agno/tools/spider.py +24 -4
  308. agno/tools/sql.py +7 -6
  309. agno/tools/tavily.py +6 -4
  310. agno/tools/telegram.py +12 -4
  311. agno/tools/todoist.py +11 -31
  312. agno/tools/toolkit.py +1 -1
  313. agno/tools/trafilatura.py +22 -6
  314. agno/tools/trello.py +9 -22
  315. agno/tools/twilio.py +10 -3
  316. agno/tools/user_control_flow.py +6 -1
  317. agno/tools/valyu.py +34 -5
  318. agno/tools/visualization.py +19 -28
  319. agno/tools/webbrowser.py +4 -3
  320. agno/tools/webex.py +11 -7
  321. agno/tools/website.py +15 -46
  322. agno/tools/webtools.py +12 -4
  323. agno/tools/whatsapp.py +5 -9
  324. agno/tools/wikipedia.py +20 -13
  325. agno/tools/x.py +14 -13
  326. agno/tools/yfinance.py +13 -40
  327. agno/tools/youtube.py +26 -20
  328. agno/tools/zendesk.py +7 -2
  329. agno/tools/zep.py +10 -7
  330. agno/tools/zoom.py +10 -9
  331. agno/utils/common.py +1 -19
  332. agno/utils/events.py +95 -118
  333. agno/utils/knowledge.py +29 -0
  334. agno/utils/log.py +2 -2
  335. agno/utils/mcp.py +11 -5
  336. agno/utils/media.py +39 -0
  337. agno/utils/message.py +12 -1
  338. agno/utils/models/claude.py +6 -4
  339. agno/utils/models/mistral.py +8 -7
  340. agno/utils/models/schema_utils.py +3 -3
  341. agno/utils/pprint.py +33 -32
  342. agno/utils/print_response/agent.py +779 -0
  343. agno/utils/print_response/team.py +1565 -0
  344. agno/utils/print_response/workflow.py +1451 -0
  345. agno/utils/prompts.py +14 -14
  346. agno/utils/reasoning.py +87 -0
  347. agno/utils/response.py +42 -42
  348. agno/utils/string.py +8 -22
  349. agno/utils/team.py +50 -0
  350. agno/utils/timer.py +2 -2
  351. agno/vectordb/base.py +33 -21
  352. agno/vectordb/cassandra/cassandra.py +287 -23
  353. agno/vectordb/chroma/chromadb.py +482 -59
  354. agno/vectordb/clickhouse/clickhousedb.py +270 -63
  355. agno/vectordb/couchbase/couchbase.py +309 -29
  356. agno/vectordb/lancedb/lance_db.py +360 -21
  357. agno/vectordb/langchaindb/__init__.py +5 -0
  358. agno/vectordb/langchaindb/langchaindb.py +145 -0
  359. agno/vectordb/lightrag/__init__.py +5 -0
  360. agno/vectordb/lightrag/lightrag.py +374 -0
  361. agno/vectordb/llamaindex/llamaindexdb.py +127 -0
  362. agno/vectordb/milvus/milvus.py +242 -32
  363. agno/vectordb/mongodb/mongodb.py +200 -24
  364. agno/vectordb/pgvector/pgvector.py +319 -37
  365. agno/vectordb/pineconedb/pineconedb.py +221 -27
  366. agno/vectordb/qdrant/qdrant.py +334 -14
  367. agno/vectordb/singlestore/singlestore.py +286 -29
  368. agno/vectordb/surrealdb/surrealdb.py +187 -7
  369. agno/vectordb/upstashdb/upstashdb.py +342 -26
  370. agno/vectordb/weaviate/weaviate.py +227 -165
  371. agno/workflow/__init__.py +17 -13
  372. agno/workflow/{v2/condition.py → condition.py} +135 -32
  373. agno/workflow/{v2/loop.py → loop.py} +115 -28
  374. agno/workflow/{v2/parallel.py → parallel.py} +138 -108
  375. agno/workflow/{v2/router.py → router.py} +133 -32
  376. agno/workflow/{v2/step.py → step.py} +200 -42
  377. agno/workflow/{v2/steps.py → steps.py} +147 -66
  378. agno/workflow/types.py +482 -0
  379. agno/workflow/workflow.py +2394 -696
  380. agno-2.0.0a1.dist-info/METADATA +355 -0
  381. agno-2.0.0a1.dist-info/RECORD +514 -0
  382. agno/agent/metrics.py +0 -107
  383. agno/api/app.py +0 -35
  384. agno/api/playground.py +0 -92
  385. agno/api/schemas/app.py +0 -12
  386. agno/api/schemas/playground.py +0 -22
  387. agno/api/schemas/user.py +0 -35
  388. agno/api/schemas/workspace.py +0 -46
  389. agno/api/user.py +0 -160
  390. agno/api/workflows.py +0 -33
  391. agno/api/workspace.py +0 -175
  392. agno/app/agui/__init__.py +0 -3
  393. agno/app/agui/app.py +0 -17
  394. agno/app/agui/sync_router.py +0 -120
  395. agno/app/base.py +0 -186
  396. agno/app/discord/__init__.py +0 -3
  397. agno/app/fastapi/__init__.py +0 -3
  398. agno/app/fastapi/app.py +0 -107
  399. agno/app/fastapi/async_router.py +0 -457
  400. agno/app/fastapi/sync_router.py +0 -448
  401. agno/app/playground/app.py +0 -228
  402. agno/app/playground/async_router.py +0 -1050
  403. agno/app/playground/deploy.py +0 -249
  404. agno/app/playground/operator.py +0 -183
  405. agno/app/playground/schemas.py +0 -220
  406. agno/app/playground/serve.py +0 -55
  407. agno/app/playground/sync_router.py +0 -1042
  408. agno/app/playground/utils.py +0 -46
  409. agno/app/settings.py +0 -15
  410. agno/app/slack/__init__.py +0 -3
  411. agno/app/slack/app.py +0 -19
  412. agno/app/slack/sync_router.py +0 -92
  413. agno/app/utils.py +0 -54
  414. agno/app/whatsapp/__init__.py +0 -3
  415. agno/app/whatsapp/app.py +0 -15
  416. agno/app/whatsapp/sync_router.py +0 -197
  417. agno/cli/auth_server.py +0 -249
  418. agno/cli/config.py +0 -274
  419. agno/cli/console.py +0 -88
  420. agno/cli/credentials.py +0 -23
  421. agno/cli/entrypoint.py +0 -571
  422. agno/cli/operator.py +0 -357
  423. agno/cli/settings.py +0 -96
  424. agno/cli/ws/ws_cli.py +0 -817
  425. agno/constants.py +0 -13
  426. agno/document/__init__.py +0 -5
  427. agno/document/chunking/semantic.py +0 -45
  428. agno/document/chunking/strategy.py +0 -31
  429. agno/document/reader/__init__.py +0 -5
  430. agno/document/reader/base.py +0 -47
  431. agno/document/reader/docx_reader.py +0 -60
  432. agno/document/reader/gcs/pdf_reader.py +0 -44
  433. agno/document/reader/s3/pdf_reader.py +0 -59
  434. agno/document/reader/s3/text_reader.py +0 -63
  435. agno/document/reader/url_reader.py +0 -59
  436. agno/document/reader/youtube_reader.py +0 -58
  437. agno/embedder/__init__.py +0 -5
  438. agno/embedder/langdb.py +0 -80
  439. agno/embedder/mistral.py +0 -82
  440. agno/embedder/openai.py +0 -78
  441. agno/file/__init__.py +0 -5
  442. agno/file/file.py +0 -16
  443. agno/file/local/csv.py +0 -32
  444. agno/file/local/txt.py +0 -19
  445. agno/infra/app.py +0 -240
  446. agno/infra/base.py +0 -144
  447. agno/infra/context.py +0 -20
  448. agno/infra/db_app.py +0 -52
  449. agno/infra/resource.py +0 -205
  450. agno/infra/resources.py +0 -55
  451. agno/knowledge/agent.py +0 -702
  452. agno/knowledge/arxiv.py +0 -33
  453. agno/knowledge/combined.py +0 -36
  454. agno/knowledge/csv.py +0 -144
  455. agno/knowledge/csv_url.py +0 -124
  456. agno/knowledge/document.py +0 -223
  457. agno/knowledge/docx.py +0 -137
  458. agno/knowledge/firecrawl.py +0 -34
  459. agno/knowledge/gcs/__init__.py +0 -0
  460. agno/knowledge/gcs/base.py +0 -39
  461. agno/knowledge/gcs/pdf.py +0 -125
  462. agno/knowledge/json.py +0 -137
  463. agno/knowledge/langchain.py +0 -71
  464. agno/knowledge/light_rag.py +0 -273
  465. agno/knowledge/llamaindex.py +0 -66
  466. agno/knowledge/markdown.py +0 -154
  467. agno/knowledge/pdf.py +0 -164
  468. agno/knowledge/pdf_bytes.py +0 -42
  469. agno/knowledge/pdf_url.py +0 -148
  470. agno/knowledge/s3/__init__.py +0 -0
  471. agno/knowledge/s3/base.py +0 -64
  472. agno/knowledge/s3/pdf.py +0 -33
  473. agno/knowledge/s3/text.py +0 -34
  474. agno/knowledge/text.py +0 -141
  475. agno/knowledge/url.py +0 -46
  476. agno/knowledge/website.py +0 -179
  477. agno/knowledge/wikipedia.py +0 -32
  478. agno/knowledge/youtube.py +0 -35
  479. agno/memory/agent.py +0 -423
  480. agno/memory/classifier.py +0 -104
  481. agno/memory/db/__init__.py +0 -5
  482. agno/memory/db/base.py +0 -42
  483. agno/memory/db/mongodb.py +0 -189
  484. agno/memory/db/postgres.py +0 -203
  485. agno/memory/db/sqlite.py +0 -193
  486. agno/memory/memory.py +0 -22
  487. agno/memory/row.py +0 -36
  488. agno/memory/summarizer.py +0 -201
  489. agno/memory/summary.py +0 -19
  490. agno/memory/team.py +0 -415
  491. agno/memory/v2/__init__.py +0 -2
  492. agno/memory/v2/db/__init__.py +0 -1
  493. agno/memory/v2/db/base.py +0 -42
  494. agno/memory/v2/db/firestore.py +0 -339
  495. agno/memory/v2/db/mongodb.py +0 -196
  496. agno/memory/v2/db/postgres.py +0 -214
  497. agno/memory/v2/db/redis.py +0 -187
  498. agno/memory/v2/db/schema.py +0 -54
  499. agno/memory/v2/db/sqlite.py +0 -209
  500. agno/memory/v2/manager.py +0 -437
  501. agno/memory/v2/memory.py +0 -1097
  502. agno/memory/v2/schema.py +0 -55
  503. agno/memory/v2/summarizer.py +0 -215
  504. agno/memory/workflow.py +0 -38
  505. agno/models/ollama/tools.py +0 -430
  506. agno/models/qwen/__init__.py +0 -5
  507. agno/playground/__init__.py +0 -10
  508. agno/playground/deploy.py +0 -3
  509. agno/playground/playground.py +0 -3
  510. agno/playground/serve.py +0 -3
  511. agno/playground/settings.py +0 -3
  512. agno/reranker/__init__.py +0 -0
  513. agno/run/v2/__init__.py +0 -0
  514. agno/run/v2/workflow.py +0 -567
  515. agno/storage/__init__.py +0 -0
  516. agno/storage/agent/__init__.py +0 -0
  517. agno/storage/agent/dynamodb.py +0 -1
  518. agno/storage/agent/json.py +0 -1
  519. agno/storage/agent/mongodb.py +0 -1
  520. agno/storage/agent/postgres.py +0 -1
  521. agno/storage/agent/singlestore.py +0 -1
  522. agno/storage/agent/sqlite.py +0 -1
  523. agno/storage/agent/yaml.py +0 -1
  524. agno/storage/base.py +0 -60
  525. agno/storage/dynamodb.py +0 -673
  526. agno/storage/firestore.py +0 -297
  527. agno/storage/gcs_json.py +0 -261
  528. agno/storage/in_memory.py +0 -234
  529. agno/storage/json.py +0 -237
  530. agno/storage/mongodb.py +0 -328
  531. agno/storage/mysql.py +0 -685
  532. agno/storage/postgres.py +0 -682
  533. agno/storage/redis.py +0 -336
  534. agno/storage/session/__init__.py +0 -16
  535. agno/storage/session/agent.py +0 -64
  536. agno/storage/session/team.py +0 -63
  537. agno/storage/session/v2/__init__.py +0 -5
  538. agno/storage/session/workflow.py +0 -61
  539. agno/storage/singlestore.py +0 -606
  540. agno/storage/sqlite.py +0 -646
  541. agno/storage/workflow/__init__.py +0 -0
  542. agno/storage/workflow/mongodb.py +0 -1
  543. agno/storage/workflow/postgres.py +0 -1
  544. agno/storage/workflow/sqlite.py +0 -1
  545. agno/storage/yaml.py +0 -241
  546. agno/tools/thinking.py +0 -73
  547. agno/utils/defaults.py +0 -57
  548. agno/utils/filesystem.py +0 -39
  549. agno/utils/git.py +0 -52
  550. agno/utils/json_io.py +0 -30
  551. agno/utils/load_env.py +0 -19
  552. agno/utils/py_io.py +0 -19
  553. agno/utils/pyproject.py +0 -18
  554. agno/utils/resource_filter.py +0 -31
  555. agno/workflow/v2/__init__.py +0 -21
  556. agno/workflow/v2/types.py +0 -357
  557. agno/workflow/v2/workflow.py +0 -3312
  558. agno/workspace/__init__.py +0 -0
  559. agno/workspace/config.py +0 -325
  560. agno/workspace/enums.py +0 -6
  561. agno/workspace/helpers.py +0 -52
  562. agno/workspace/operator.py +0 -757
  563. agno/workspace/settings.py +0 -158
  564. agno-1.8.1.dist-info/METADATA +0 -982
  565. agno-1.8.1.dist-info/RECORD +0 -566
  566. agno-1.8.1.dist-info/entry_points.txt +0 -3
  567. /agno/{app → db/migrations}/__init__.py +0 -0
  568. /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
  569. /agno/{cli → integrations}/__init__.py +0 -0
  570. /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
  571. /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
  572. /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
  573. /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
  574. /agno/{app → os/interfaces}/slack/security.py +0 -0
  575. /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
  576. /agno/{file/local → utils/print_response}/__init__.py +0 -0
  577. /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
  578. {agno-1.8.1.dist-info → agno-2.0.0a1.dist-info}/WHEEL +0 -0
  579. {agno-1.8.1.dist-info → agno-2.0.0a1.dist-info}/licenses/LICENSE +0 -0
  580. {agno-1.8.1.dist-info → agno-2.0.0a1.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,21 @@
1
1
  import asyncio
2
2
  from hashlib import md5
3
- from typing import Any, Dict, List, Optional
3
+ from typing import Any, Dict, List, Mapping, Optional, Union, cast
4
4
 
5
5
  try:
6
6
  from chromadb import Client as ChromaDbClient
7
7
  from chromadb import PersistentClient as PersistentChromaDbClient
8
8
  from chromadb.api.client import ClientAPI
9
9
  from chromadb.api.models.Collection import Collection
10
- from chromadb.api.types import GetResult, QueryResult
10
+ from chromadb.api.types import QueryResult
11
11
 
12
12
  except ImportError:
13
13
  raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
14
14
 
15
- from agno.document import Document
16
- from agno.embedder import Embedder
17
- from agno.reranker.base import Reranker
18
- from agno.utils.log import log_debug, log_info, logger
15
+ from agno.knowledge.document import Document
16
+ from agno.knowledge.embedder import Embedder
17
+ from agno.knowledge.reranker.base import Reranker
18
+ from agno.utils.log import log_debug, log_error, log_info, logger
19
19
  from agno.vectordb.base import VectorDb
20
20
  from agno.vectordb.distance import Distance
21
21
 
@@ -36,7 +36,7 @@ class ChromaDb(VectorDb):
36
36
 
37
37
  # Embedder for embedding the document contents
38
38
  if embedder is None:
39
- from agno.embedder.openai import OpenAIEmbedder
39
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
40
40
 
41
41
  embedder = OpenAIEmbedder()
42
42
  log_info("Embedder not provided, using OpenAIEmbedder as default.")
@@ -91,60 +91,36 @@ class ChromaDb(VectorDb):
91
91
  """Create the collection asynchronously by running in a thread."""
92
92
  await asyncio.to_thread(self.create)
93
93
 
94
- def doc_exists(self, document: Document) -> bool:
95
- """Check if a document exists in the collection.
94
+ def name_exists(self, name: str) -> bool:
95
+ """Check if a document with a given name exists in the collection.
96
96
  Args:
97
- document (Document): Document to check.
97
+ name (str): Name of the document to check.
98
98
  Returns:
99
- bool: True if document exists, False otherwise.
100
- """
99
+ bool: True if document exists, False otherwise."""
101
100
  if not self.client:
102
101
  logger.warning("Client not initialized")
103
102
  return False
104
103
 
105
104
  try:
106
105
  collection: Collection = self.client.get_collection(name=self.collection_name)
107
- collection_data: GetResult = collection.get(include=["documents"]) # type: ignore
108
- existing_documents = collection_data.get("documents", [])
109
- cleaned_content = document.content.replace("\x00", "\ufffd")
110
- if cleaned_content in existing_documents: # type: ignore
111
- return True
106
+ result = collection.get(where=cast(Any, {"name": {"$eq": name}}), limit=1)
107
+ return len(result.get("ids", [])) > 0
112
108
  except Exception as e:
113
- logger.error(f"Document does not exist: {e}")
114
- return False
115
-
116
- async def async_doc_exists(self, document: Document) -> bool:
117
- """Check if a document exists asynchronously."""
118
- return await asyncio.to_thread(self.doc_exists, document)
119
-
120
- def name_exists(self, name: str) -> bool:
121
- """Check if a document with a given name exists in the collection.
122
- Args:
123
- name (str): Name of the document to check.
124
- Returns:
125
- bool: True if document exists, False otherwise."""
126
- if self.client:
127
- try:
128
- collections: Collection = self.client.get_collection(name=self.collection_name)
129
- for collection in collections: # type: ignore
130
- if name in collection:
131
- return True
132
- except Exception as e:
133
- logger.error(f"Document with given name does not exist: {e}")
109
+ logger.error(f"Error checking name existence: {e}")
134
110
  return False
135
111
 
136
112
  async def async_name_exists(self, name: str) -> bool:
137
113
  """Check if a document with given name exists asynchronously."""
138
114
  return await asyncio.to_thread(self.name_exists, name)
139
115
 
140
- def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
116
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
141
117
  """Insert documents into the collection.
142
118
 
143
119
  Args:
144
120
  documents (List[Document]): List of documents to insert
145
121
  filters (Optional[Dict[str, Any]]): Filters to merge with document metadata
146
122
  """
147
- log_debug(f"Inserting {len(documents)} documents")
123
+ log_info(f"Inserting {len(documents)} documents")
148
124
  ids: List = []
149
125
  docs: List = []
150
126
  docs_embeddings: List = []
@@ -163,6 +139,14 @@ class ChromaDb(VectorDb):
163
139
  if filters:
164
140
  metadata.update(filters)
165
141
 
142
+ # Add name, content_id to metadata
143
+ if document.name is not None:
144
+ metadata["name"] = document.name
145
+ if document.content_id is not None:
146
+ metadata["content_id"] = document.content_id
147
+
148
+ metadata["content_hash"] = content_hash
149
+
166
150
  docs_embeddings.append(document.embedding)
167
151
  docs.append(cleaned_content)
168
152
  ids.append(doc_id)
@@ -176,22 +160,82 @@ class ChromaDb(VectorDb):
176
160
  self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
177
161
  log_debug(f"Committed {len(docs)} documents")
178
162
 
179
- async def async_insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
163
+ async def async_insert(
164
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
165
+ ) -> None:
180
166
  """Insert documents asynchronously by running in a thread."""
181
- await asyncio.to_thread(self.insert, documents, filters)
167
+ log_info(f"Async Inserting {len(documents)} documents")
168
+ ids: List = []
169
+ docs: List = []
170
+ docs_embeddings: List = []
171
+ docs_metadata: List = []
172
+
173
+ if not self._collection:
174
+ self._collection = self.client.get_collection(name=self.collection_name)
175
+
176
+ try:
177
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
178
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
179
+ except Exception as e:
180
+ log_error(f"Error processing document: {e}")
181
+
182
+ for document in documents:
183
+ cleaned_content = document.content.replace("\x00", "\ufffd")
184
+ doc_id = md5(cleaned_content.encode()).hexdigest()
185
+
186
+ # Handle metadata and filters
187
+ metadata = document.meta_data or {}
188
+ if filters:
189
+ metadata.update(filters)
190
+
191
+ # Add name, content_id to metadata
192
+ if document.name is not None:
193
+ metadata["name"] = document.name
194
+ if document.content_id is not None:
195
+ metadata["content_id"] = document.content_id
196
+
197
+ metadata["content_hash"] = content_hash
198
+
199
+ docs_embeddings.append(document.embedding)
200
+ docs.append(cleaned_content)
201
+ ids.append(doc_id)
202
+ docs_metadata.append(metadata)
203
+ log_debug(f"Prepared document: {document.id} | {document.name} | {metadata}")
204
+
205
+ if self._collection is None:
206
+ logger.warning("Collection does not exist")
207
+ else:
208
+ if len(docs) > 0:
209
+ self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
210
+ log_debug(f"Committed {len(docs)} documents")
182
211
 
183
212
  def upsert_available(self) -> bool:
184
213
  """Check if upsert is available in ChromaDB."""
185
214
  return True
186
215
 
187
- def upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
216
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
188
217
  """Upsert documents into the collection.
189
218
 
190
219
  Args:
191
220
  documents (List[Document]): List of documents to upsert
192
221
  filters (Optional[Dict[str, Any]]): Filters to apply while upserting
193
222
  """
194
- log_debug(f"Upserting {len(documents)} documents")
223
+ try:
224
+ if self.content_hash_exists(content_hash):
225
+ self._delete_by_content_hash(content_hash)
226
+ self._upsert(content_hash, documents, filters)
227
+ except Exception as e:
228
+ logger.error(f"Error upserting documents by content hash: {e}")
229
+ raise
230
+
231
+ def _upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
232
+ """Upsert documents into the collection.
233
+
234
+ Args:
235
+ documents (List[Document]): List of documents to upsert
236
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
237
+ """
238
+ log_info(f"Upserting {len(documents)} documents")
195
239
  ids: List = []
196
240
  docs: List = []
197
241
  docs_embeddings: List = []
@@ -204,11 +248,25 @@ class ChromaDb(VectorDb):
204
248
  document.embed(embedder=self.embedder)
205
249
  cleaned_content = document.content.replace("\x00", "\ufffd")
206
250
  doc_id = md5(cleaned_content.encode()).hexdigest()
251
+
252
+ # Handle metadata and filters
253
+ metadata = document.meta_data or {}
254
+ if filters:
255
+ metadata.update(filters)
256
+
257
+ # Add name, content_id to metadata
258
+ if document.name is not None:
259
+ metadata["name"] = document.name
260
+ if document.content_id is not None:
261
+ metadata["content_id"] = document.content_id
262
+
263
+ metadata["content_hash"] = content_hash
264
+
207
265
  docs_embeddings.append(document.embedding)
208
266
  docs.append(cleaned_content)
209
267
  ids.append(doc_id)
210
- docs_metadata.append(document.meta_data)
211
- log_debug(f"Upserted document: {document.id} | {document.name} | {document.meta_data}")
268
+ docs_metadata.append(metadata)
269
+ log_debug(f"Upserted document: {document.id} | {document.name} | {metadata}")
212
270
 
213
271
  if self._collection is None:
214
272
  logger.warning("Collection does not exist")
@@ -217,9 +275,68 @@ class ChromaDb(VectorDb):
217
275
  self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
218
276
  log_debug(f"Committed {len(docs)} documents")
219
277
 
220
- async def async_upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
278
+ async def _async_upsert(
279
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
280
+ ) -> None:
281
+ """Upsert documents into the collection.
282
+
283
+ Args:
284
+ documents (List[Document]): List of documents to upsert
285
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
286
+ """
287
+ log_info(f"Async Upserting {len(documents)} documents")
288
+ ids: List = []
289
+ docs: List = []
290
+ docs_embeddings: List = []
291
+ docs_metadata: List = []
292
+
293
+ if not self._collection:
294
+ self._collection = self.client.get_collection(name=self.collection_name)
295
+
296
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
297
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
298
+
299
+ for document in documents:
300
+ cleaned_content = document.content.replace("\x00", "\ufffd")
301
+ doc_id = md5(cleaned_content.encode()).hexdigest()
302
+
303
+ # Handle metadata and filters
304
+ metadata = document.meta_data or {}
305
+ if filters:
306
+ metadata.update(filters)
307
+
308
+ # Add name, content_id to metadata
309
+ if document.name is not None:
310
+ metadata["name"] = document.name
311
+ if document.content_id is not None:
312
+ metadata["content_id"] = document.content_id
313
+
314
+ metadata["content_hash"] = content_hash
315
+
316
+ docs_embeddings.append(document.embedding)
317
+ docs.append(cleaned_content)
318
+ ids.append(doc_id)
319
+ docs_metadata.append(metadata)
320
+ log_debug(f"Upserted document: {document.id} | {document.name} | {metadata}")
321
+
322
+ if self._collection is None:
323
+ logger.warning("Collection does not exist")
324
+ else:
325
+ if len(docs) > 0:
326
+ self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
327
+ log_debug(f"Committed {len(docs)} documents")
328
+
329
+ async def async_upsert(
330
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
331
+ ) -> None:
221
332
  """Upsert documents asynchronously by running in a thread."""
222
- await asyncio.to_thread(self.upsert, documents, filters)
333
+ try:
334
+ if self.content_hash_exists(content_hash):
335
+ self._delete_by_content_hash(content_hash)
336
+ await self._async_upsert(content_hash, documents, filters)
337
+ except Exception as e:
338
+ logger.error(f"Error upserting documents by content hash: {e}")
339
+ raise
223
340
 
224
341
  def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
225
342
  """Search the collection for a query.
@@ -257,24 +374,62 @@ class ChromaDb(VectorDb):
257
374
  # Build search results
258
375
  search_results: List[Document] = []
259
376
 
260
- ids = result.get("ids", [[]])[0]
261
- metadata = result.get("metadatas", [{}])[0]
262
- documents = result.get("documents", [[]])[0]
263
- embeddings = result.get("embeddings")[0]
264
- embeddings = [e.tolist() if hasattr(e, "tolist") else e for e in embeddings]
265
- distances = result.get("distances", [[]])[0]
377
+ ids_list = result.get("ids", [[]])
378
+ metadata_list = result.get("metadatas", [[{}]])
379
+ documents_list = result.get("documents", [[]])
380
+ embeddings_list = result.get("embeddings")
381
+ distances_list = result.get("distances", [[]])
382
+
383
+ if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
384
+ return search_results
385
+
386
+ ids = ids_list[0]
387
+ metadata = [dict(m) if m else {} for m in metadata_list[0]] # Convert to mutable dicts
388
+ documents = documents_list[0]
389
+ embeddings_raw = embeddings_list[0] if embeddings_list else []
390
+ embeddings = []
391
+ for e in embeddings_raw:
392
+ if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
393
+ try:
394
+ embeddings.append(list(cast(Any, e).tolist()))
395
+ except (AttributeError, TypeError):
396
+ embeddings.append(list(e) if isinstance(e, (list, tuple)) else [])
397
+ elif isinstance(e, (list, tuple)):
398
+ embeddings.append([float(x) for x in e if isinstance(x, (int, float))])
399
+ elif isinstance(e, (int, float)):
400
+ embeddings.append([float(e)])
401
+ else:
402
+ embeddings.append([])
403
+ distances = distances_list[0]
266
404
 
267
405
  for idx, distance in enumerate(distances):
268
- metadata[idx]["distances"] = distance
406
+ if idx < len(metadata):
407
+ metadata[idx]["distances"] = distance
269
408
 
270
409
  try:
271
- for idx, (id_, metadata, document) in enumerate(zip(ids, metadata, documents)):
410
+ for idx, (id_, doc_metadata, document) in enumerate(zip(ids, metadata, documents)):
411
+ # Extract the fields we added to metadata
412
+ name_val = doc_metadata.pop("name", None)
413
+ content_id_val = doc_metadata.pop("content_id", None)
414
+
415
+ # Convert types to match Document constructor expectations
416
+ name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
417
+ content_id = (
418
+ str(content_id_val)
419
+ if content_id_val is not None and not isinstance(content_id_val, str)
420
+ else content_id_val
421
+ )
422
+ content = str(document) if document is not None else ""
423
+ embedding = embeddings[idx] if idx < len(embeddings) else None
424
+
272
425
  search_results.append(
273
426
  Document(
274
427
  id=id_,
275
- meta_data=metadata,
276
- content=document,
277
- embedding=embeddings[idx],
428
+ name=name,
429
+ meta_data=doc_metadata,
430
+ content=content,
431
+ embedding=embedding,
432
+ content_id=content_id,
278
433
  )
279
434
  )
280
435
  except Exception as e:
@@ -360,3 +515,271 @@ class ChromaDb(VectorDb):
360
515
  except Exception as e:
361
516
  logger.error(f"Error clearing collection: {e}")
362
517
  return False
518
+
519
+ def delete_by_id(self, id: str) -> bool:
520
+ """Delete document by ID."""
521
+ if not self.client:
522
+ logger.error("Client not initialized")
523
+ return False
524
+
525
+ try:
526
+ collection: Collection = self.client.get_collection(name=self.collection_name)
527
+
528
+ # Check if document exists
529
+ if not self.id_exists(id):
530
+ log_info(f"Document with ID '{id}' not found")
531
+ return False
532
+
533
+ # Delete the document
534
+ collection.delete(ids=[id])
535
+ log_info(f"Deleted document with ID '{id}'")
536
+ return True
537
+ except Exception as e:
538
+ logger.error(f"Error deleting document by ID '{id}': {e}")
539
+ return False
540
+
541
+ def delete_by_name(self, name: str) -> bool:
542
+ """Delete documents by name."""
543
+ if not self.client:
544
+ logger.error("Client not initialized")
545
+ return False
546
+
547
+ try:
548
+ collection: Collection = self.client.get_collection(name=self.collection_name)
549
+
550
+ # Find all documents with the given name
551
+ result = collection.get(where=cast(Any, {"name": {"$eq": name}}))
552
+ ids_to_delete = result.get("ids", [])
553
+
554
+ if not ids_to_delete:
555
+ log_info(f"No documents found with name '{name}'")
556
+ return False
557
+
558
+ # Delete all matching documents
559
+ collection.delete(ids=ids_to_delete)
560
+ log_info(f"Deleted {len(ids_to_delete)} documents with name '{name}'")
561
+ return True
562
+ except Exception as e:
563
+ logger.error(f"Error deleting documents by name '{name}': {e}")
564
+ return False
565
+
566
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
567
+ """Delete documents by metadata."""
568
+ if not self.client:
569
+ logger.error("Client not initialized")
570
+ return False
571
+
572
+ try:
573
+ collection: Collection = self.client.get_collection(name=self.collection_name)
574
+
575
+ # Build where clause for metadata filtering
576
+ where_clause = {}
577
+ for key, value in metadata.items():
578
+ where_clause[key] = {"$eq": value}
579
+
580
+ # Find all documents with the matching metadata
581
+ result = collection.get(where=cast(Any, where_clause))
582
+ ids_to_delete = result.get("ids", [])
583
+
584
+ if not ids_to_delete:
585
+ log_info(f"No documents found with metadata '{metadata}'")
586
+ return False
587
+
588
+ # Delete all matching documents
589
+ collection.delete(ids=ids_to_delete)
590
+ log_info(f"Deleted {len(ids_to_delete)} documents with metadata '{metadata}'")
591
+ return True
592
+ except Exception as e:
593
+ logger.error(f"Error deleting documents by metadata '{metadata}': {e}")
594
+ return False
595
+
596
+ def delete_by_content_id(self, content_id: str) -> bool:
597
+ """Delete documents by content ID."""
598
+ if not self.client:
599
+ logger.error("Client not initialized")
600
+ return False
601
+
602
+ try:
603
+ collection: Collection = self.client.get_collection(name=self.collection_name)
604
+
605
+ # Find all documents with the given content_id
606
+ result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
607
+ ids_to_delete = result.get("ids", [])
608
+
609
+ if not ids_to_delete:
610
+ log_info(f"No documents found with content_id '{content_id}'")
611
+ return False
612
+
613
+ # Delete all matching documents
614
+ collection.delete(ids=ids_to_delete)
615
+ log_info(f"Deleted {len(ids_to_delete)} documents with content_id '{content_id}'")
616
+ return True
617
+ except Exception as e:
618
+ logger.error(f"Error deleting documents by content_id '{content_id}': {e}")
619
+ return False
620
+
621
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
622
+ """Delete documents by content hash."""
623
+ if not self.client:
624
+ logger.error("Client not initialized")
625
+ return False
626
+
627
+ try:
628
+ collection: Collection = self.client.get_collection(name=self.collection_name)
629
+
630
+ # Find all documents with the given content_hash
631
+ result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
632
+ ids_to_delete = result.get("ids", [])
633
+
634
+ if not ids_to_delete:
635
+ log_info(f"No documents found with content_hash '{content_hash}'")
636
+ return False
637
+
638
+ # Delete all matching documents
639
+ collection.delete(ids=ids_to_delete)
640
+ log_info(f"Deleted {len(ids_to_delete)} documents with content_hash '{content_hash}'")
641
+ return True
642
+ except Exception as e:
643
+ logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
644
+ return False
645
+
646
+ def id_exists(self, id: str) -> bool:
647
+ """Check if a document with the given ID exists in the collection.
648
+
649
+ Args:
650
+ id (str): The document ID to check.
651
+
652
+ Returns:
653
+ bool: True if the document exists, False otherwise.
654
+ """
655
+ if not self.client:
656
+ logger.error("Client not initialized")
657
+ return False
658
+
659
+ try:
660
+ collection: Collection = self.client.get_collection(name=self.collection_name)
661
+ print("COLLECTION_----------", collection)
662
+ # Try to get the document by ID
663
+ result = collection.get(ids=[id])
664
+ found_ids = result.get("ids", [])
665
+
666
+ # Return True if the document was found
667
+ return len(found_ids) > 0
668
+ except Exception as e:
669
+ logger.error(f"Error checking if ID '{id}' exists: {e}")
670
+ return False
671
+
672
+ def content_hash_exists(self, content_hash: str) -> bool:
673
+ """Check if documents with the given content hash exist."""
674
+ if not self.client:
675
+ logger.error("Client not initialized")
676
+ return False
677
+
678
+ try:
679
+ collection: Collection = self.client.get_collection(name=self.collection_name)
680
+
681
+ # Try to query for documents with the given content_hash
682
+ try:
683
+ result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
684
+ # Safely extract ids from result
685
+ if hasattr(result, "get") and callable(result.get):
686
+ found_ids = result.get("ids", [])
687
+ elif hasattr(result, "__getitem__") and "ids" in result:
688
+ found_ids = result["ids"]
689
+ else:
690
+ found_ids = []
691
+
692
+ # Return True if any documents were found
693
+ if isinstance(found_ids, (list, tuple)):
694
+ return len(found_ids) > 0
695
+ elif isinstance(found_ids, int):
696
+ # Some ChromaDB versions might return a count instead of a list
697
+ return found_ids > 0
698
+ else:
699
+ return False
700
+
701
+ except TypeError as te:
702
+ if "object of type 'int' has no len()" in str(te):
703
+ # Known issue with ChromaDB 0.5.0 - internal bug
704
+ # As a workaround, assume content doesn't exist to allow processing to continue
705
+ logger.warning(
706
+ f"ChromaDB internal error (version 0.5.0 bug): {te}. Assuming content_hash '{content_hash}' does not exist."
707
+ )
708
+ return False
709
+ else:
710
+ raise te
711
+
712
+ except Exception as e:
713
+ logger.error(f"Error checking if content_hash '{content_hash}' exists: {e}")
714
+ return False
715
+
716
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
717
+ """
718
+ Update the metadata for documents with the given content_id.
719
+
720
+ Args:
721
+ content_id (str): The content ID to update
722
+ metadata (Dict[str, Any]): The metadata to update
723
+ """
724
+ try:
725
+ if not self.client:
726
+ logger.error("Client not initialized")
727
+ return
728
+
729
+ collection: Collection = self.client.get_collection(name=self.collection_name)
730
+
731
+ # Find documents with the given content_id
732
+ try:
733
+ result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
734
+
735
+ # Extract IDs and current metadata
736
+ if hasattr(result, "get") and callable(result.get):
737
+ ids = result.get("ids", [])
738
+ current_metadatas = result.get("metadatas", [])
739
+ elif hasattr(result, "__getitem__"):
740
+ ids = result.get("ids", []) if "ids" in result else []
741
+ current_metadatas = result.get("metadatas", []) if "metadatas" in result else []
742
+ else:
743
+ ids = []
744
+ current_metadatas = []
745
+
746
+ if not ids:
747
+ logger.debug(f"No documents found with content_id: {content_id}")
748
+ return
749
+
750
+ # Merge metadata for each document
751
+ updated_metadatas = []
752
+ for i, current_meta in enumerate(current_metadatas or []):
753
+ if current_meta is None:
754
+ meta_dict: Dict[str, Any] = {}
755
+ else:
756
+ meta_dict = dict(current_meta) # Convert Mapping to dict
757
+ updated_meta: Dict[str, Any] = meta_dict.copy()
758
+ updated_meta.update(metadata)
759
+
760
+ if "filters" not in updated_meta:
761
+ updated_meta["filters"] = {}
762
+ if isinstance(updated_meta["filters"], dict):
763
+ updated_meta["filters"].update(metadata)
764
+ else:
765
+ updated_meta["filters"] = metadata
766
+ updated_metadatas.append(updated_meta)
767
+
768
+ # Update the documents
769
+ # Convert to the expected type for ChromaDB
770
+ chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool, None]]], updated_metadatas)
771
+ collection.update(ids=ids, metadatas=chroma_metadatas)
772
+ logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
773
+
774
+ except TypeError as te:
775
+ if "object of type 'int' has no len()" in str(te):
776
+ logger.warning(
777
+ f"ChromaDB internal error (version 0.5.0 bug): {te}. Cannot update metadata for content_id '{content_id}'."
778
+ )
779
+ return
780
+ else:
781
+ raise te
782
+
783
+ except Exception as e:
784
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
785
+ raise