agno 1.8.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (589) hide show
  1. agno/agent/__init__.py +19 -27
  2. agno/agent/agent.py +3143 -4170
  3. agno/api/agent.py +11 -67
  4. agno/api/api.py +5 -46
  5. agno/api/evals.py +8 -19
  6. agno/api/os.py +17 -0
  7. agno/api/routes.py +6 -41
  8. agno/api/schemas/__init__.py +9 -0
  9. agno/api/schemas/agent.py +5 -21
  10. agno/api/schemas/evals.py +7 -16
  11. agno/api/schemas/os.py +14 -0
  12. agno/api/schemas/team.py +5 -21
  13. agno/api/schemas/utils.py +21 -0
  14. agno/api/schemas/workflows.py +11 -7
  15. agno/api/settings.py +53 -0
  16. agno/api/team.py +11 -66
  17. agno/api/workflow.py +28 -0
  18. agno/cloud/aws/base.py +214 -0
  19. agno/cloud/aws/s3/__init__.py +2 -0
  20. agno/cloud/aws/s3/api_client.py +43 -0
  21. agno/cloud/aws/s3/bucket.py +195 -0
  22. agno/cloud/aws/s3/object.py +57 -0
  23. agno/db/__init__.py +24 -0
  24. agno/db/base.py +245 -0
  25. agno/db/dynamo/__init__.py +3 -0
  26. agno/db/dynamo/dynamo.py +1743 -0
  27. agno/db/dynamo/schemas.py +278 -0
  28. agno/db/dynamo/utils.py +684 -0
  29. agno/db/firestore/__init__.py +3 -0
  30. agno/db/firestore/firestore.py +1432 -0
  31. agno/db/firestore/schemas.py +130 -0
  32. agno/db/firestore/utils.py +278 -0
  33. agno/db/gcs_json/__init__.py +3 -0
  34. agno/db/gcs_json/gcs_json_db.py +1001 -0
  35. agno/db/gcs_json/utils.py +194 -0
  36. agno/db/in_memory/__init__.py +3 -0
  37. agno/db/in_memory/in_memory_db.py +882 -0
  38. agno/db/in_memory/utils.py +172 -0
  39. agno/db/json/__init__.py +3 -0
  40. agno/db/json/json_db.py +1045 -0
  41. agno/db/json/utils.py +196 -0
  42. agno/db/migrations/v1_to_v2.py +162 -0
  43. agno/db/mongo/__init__.py +3 -0
  44. agno/db/mongo/mongo.py +1416 -0
  45. agno/db/mongo/schemas.py +77 -0
  46. agno/db/mongo/utils.py +204 -0
  47. agno/db/mysql/__init__.py +3 -0
  48. agno/db/mysql/mysql.py +1719 -0
  49. agno/db/mysql/schemas.py +124 -0
  50. agno/db/mysql/utils.py +297 -0
  51. agno/db/postgres/__init__.py +3 -0
  52. agno/db/postgres/postgres.py +1710 -0
  53. agno/db/postgres/schemas.py +124 -0
  54. agno/db/postgres/utils.py +280 -0
  55. agno/db/redis/__init__.py +3 -0
  56. agno/db/redis/redis.py +1367 -0
  57. agno/db/redis/schemas.py +109 -0
  58. agno/db/redis/utils.py +288 -0
  59. agno/db/schemas/__init__.py +3 -0
  60. agno/db/schemas/evals.py +33 -0
  61. agno/db/schemas/knowledge.py +40 -0
  62. agno/db/schemas/memory.py +46 -0
  63. agno/db/singlestore/__init__.py +3 -0
  64. agno/db/singlestore/schemas.py +116 -0
  65. agno/db/singlestore/singlestore.py +1712 -0
  66. agno/db/singlestore/utils.py +326 -0
  67. agno/db/sqlite/__init__.py +3 -0
  68. agno/db/sqlite/schemas.py +119 -0
  69. agno/db/sqlite/sqlite.py +1676 -0
  70. agno/db/sqlite/utils.py +268 -0
  71. agno/db/utils.py +88 -0
  72. agno/eval/__init__.py +14 -0
  73. agno/eval/accuracy.py +154 -48
  74. agno/eval/performance.py +88 -23
  75. agno/eval/reliability.py +73 -20
  76. agno/eval/utils.py +23 -13
  77. agno/integrations/discord/__init__.py +3 -0
  78. agno/{app → integrations}/discord/client.py +10 -10
  79. agno/knowledge/__init__.py +2 -2
  80. agno/{document → knowledge}/chunking/agentic.py +2 -2
  81. agno/{document → knowledge}/chunking/document.py +2 -2
  82. agno/{document → knowledge}/chunking/fixed.py +3 -3
  83. agno/{document → knowledge}/chunking/markdown.py +2 -2
  84. agno/{document → knowledge}/chunking/recursive.py +2 -2
  85. agno/{document → knowledge}/chunking/row.py +2 -2
  86. agno/knowledge/chunking/semantic.py +59 -0
  87. agno/knowledge/chunking/strategy.py +121 -0
  88. agno/knowledge/content.py +74 -0
  89. agno/knowledge/document/__init__.py +5 -0
  90. agno/{document → knowledge/document}/base.py +12 -2
  91. agno/knowledge/embedder/__init__.py +5 -0
  92. agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
  93. agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
  94. agno/{embedder → knowledge/embedder}/base.py +6 -0
  95. agno/{embedder → knowledge/embedder}/cohere.py +72 -1
  96. agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
  97. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  98. agno/{embedder → knowledge/embedder}/google.py +74 -1
  99. agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
  100. agno/{embedder → knowledge/embedder}/jina.py +48 -2
  101. agno/knowledge/embedder/langdb.py +22 -0
  102. agno/knowledge/embedder/mistral.py +139 -0
  103. agno/{embedder → knowledge/embedder}/nebius.py +1 -1
  104. agno/{embedder → knowledge/embedder}/ollama.py +54 -3
  105. agno/knowledge/embedder/openai.py +223 -0
  106. agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
  107. agno/{embedder → knowledge/embedder}/together.py +1 -1
  108. agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
  109. agno/knowledge/knowledge.py +1551 -0
  110. agno/knowledge/reader/__init__.py +7 -0
  111. agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
  112. agno/knowledge/reader/base.py +88 -0
  113. agno/{document → knowledge}/reader/csv_reader.py +47 -65
  114. agno/knowledge/reader/docx_reader.py +83 -0
  115. agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
  116. agno/{document → knowledge}/reader/json_reader.py +30 -9
  117. agno/{document → knowledge}/reader/markdown_reader.py +58 -9
  118. agno/{document → knowledge}/reader/pdf_reader.py +71 -126
  119. agno/knowledge/reader/reader_factory.py +268 -0
  120. agno/knowledge/reader/s3_reader.py +101 -0
  121. agno/{document → knowledge}/reader/text_reader.py +31 -10
  122. agno/knowledge/reader/url_reader.py +128 -0
  123. agno/knowledge/reader/web_search_reader.py +366 -0
  124. agno/{document → knowledge}/reader/website_reader.py +37 -10
  125. agno/knowledge/reader/wikipedia_reader.py +59 -0
  126. agno/knowledge/reader/youtube_reader.py +78 -0
  127. agno/knowledge/remote_content/remote_content.py +88 -0
  128. agno/{reranker → knowledge/reranker}/base.py +1 -1
  129. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  130. agno/{reranker → knowledge/reranker}/infinity.py +2 -2
  131. agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
  132. agno/knowledge/types.py +30 -0
  133. agno/knowledge/utils.py +169 -0
  134. agno/media.py +269 -268
  135. agno/memory/__init__.py +2 -10
  136. agno/memory/manager.py +1003 -148
  137. agno/models/aimlapi/__init__.py +2 -2
  138. agno/models/aimlapi/aimlapi.py +6 -6
  139. agno/models/anthropic/claude.py +128 -72
  140. agno/models/aws/bedrock.py +107 -175
  141. agno/models/aws/claude.py +64 -18
  142. agno/models/azure/ai_foundry.py +73 -23
  143. agno/models/base.py +346 -290
  144. agno/models/cerebras/cerebras.py +84 -27
  145. agno/models/cohere/chat.py +106 -98
  146. agno/models/google/gemini.py +105 -46
  147. agno/models/groq/groq.py +97 -35
  148. agno/models/huggingface/huggingface.py +92 -27
  149. agno/models/ibm/watsonx.py +72 -13
  150. agno/models/litellm/chat.py +85 -13
  151. agno/models/message.py +46 -151
  152. agno/models/meta/llama.py +85 -49
  153. agno/models/metrics.py +120 -0
  154. agno/models/mistral/mistral.py +90 -21
  155. agno/models/ollama/__init__.py +0 -2
  156. agno/models/ollama/chat.py +85 -47
  157. agno/models/openai/chat.py +154 -37
  158. agno/models/openai/responses.py +178 -105
  159. agno/models/perplexity/perplexity.py +26 -2
  160. agno/models/portkey/portkey.py +0 -7
  161. agno/models/response.py +15 -9
  162. agno/models/utils.py +20 -0
  163. agno/models/vercel/__init__.py +2 -2
  164. agno/models/vercel/v0.py +1 -1
  165. agno/models/vllm/__init__.py +2 -2
  166. agno/models/vllm/vllm.py +3 -3
  167. agno/models/xai/xai.py +10 -10
  168. agno/os/__init__.py +3 -0
  169. agno/os/app.py +497 -0
  170. agno/os/auth.py +47 -0
  171. agno/os/config.py +103 -0
  172. agno/os/interfaces/agui/__init__.py +3 -0
  173. agno/os/interfaces/agui/agui.py +31 -0
  174. agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
  175. agno/{app → os/interfaces}/agui/utils.py +65 -28
  176. agno/os/interfaces/base.py +21 -0
  177. agno/os/interfaces/slack/__init__.py +3 -0
  178. agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
  179. agno/os/interfaces/slack/slack.py +32 -0
  180. agno/os/interfaces/whatsapp/__init__.py +3 -0
  181. agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
  182. agno/os/interfaces/whatsapp/whatsapp.py +29 -0
  183. agno/os/mcp.py +235 -0
  184. agno/os/router.py +1400 -0
  185. agno/os/routers/__init__.py +3 -0
  186. agno/os/routers/evals/__init__.py +3 -0
  187. agno/os/routers/evals/evals.py +393 -0
  188. agno/os/routers/evals/schemas.py +142 -0
  189. agno/os/routers/evals/utils.py +161 -0
  190. agno/os/routers/knowledge/__init__.py +3 -0
  191. agno/os/routers/knowledge/knowledge.py +850 -0
  192. agno/os/routers/knowledge/schemas.py +118 -0
  193. agno/os/routers/memory/__init__.py +3 -0
  194. agno/os/routers/memory/memory.py +410 -0
  195. agno/os/routers/memory/schemas.py +58 -0
  196. agno/os/routers/metrics/__init__.py +3 -0
  197. agno/os/routers/metrics/metrics.py +178 -0
  198. agno/os/routers/metrics/schemas.py +47 -0
  199. agno/os/routers/session/__init__.py +3 -0
  200. agno/os/routers/session/session.py +536 -0
  201. agno/os/schema.py +945 -0
  202. agno/{app/playground → os}/settings.py +7 -15
  203. agno/os/utils.py +270 -0
  204. agno/reasoning/azure_ai_foundry.py +4 -4
  205. agno/reasoning/deepseek.py +4 -4
  206. agno/reasoning/default.py +6 -11
  207. agno/reasoning/groq.py +4 -4
  208. agno/reasoning/helpers.py +4 -6
  209. agno/reasoning/ollama.py +4 -4
  210. agno/reasoning/openai.py +4 -4
  211. agno/run/agent.py +633 -0
  212. agno/run/base.py +53 -77
  213. agno/run/cancel.py +81 -0
  214. agno/run/team.py +243 -96
  215. agno/run/workflow.py +550 -12
  216. agno/session/__init__.py +10 -0
  217. agno/session/agent.py +244 -0
  218. agno/session/summary.py +225 -0
  219. agno/session/team.py +262 -0
  220. agno/{storage/session/v2 → session}/workflow.py +47 -24
  221. agno/team/__init__.py +15 -16
  222. agno/team/team.py +3260 -4824
  223. agno/tools/agentql.py +14 -5
  224. agno/tools/airflow.py +9 -4
  225. agno/tools/api.py +7 -3
  226. agno/tools/apify.py +2 -46
  227. agno/tools/arxiv.py +8 -3
  228. agno/tools/aws_lambda.py +7 -5
  229. agno/tools/aws_ses.py +7 -1
  230. agno/tools/baidusearch.py +4 -1
  231. agno/tools/bitbucket.py +4 -4
  232. agno/tools/brandfetch.py +14 -11
  233. agno/tools/bravesearch.py +4 -1
  234. agno/tools/brightdata.py +43 -23
  235. agno/tools/browserbase.py +13 -4
  236. agno/tools/calcom.py +12 -10
  237. agno/tools/calculator.py +10 -27
  238. agno/tools/cartesia.py +20 -17
  239. agno/tools/{clickup_tool.py → clickup.py} +12 -25
  240. agno/tools/confluence.py +8 -8
  241. agno/tools/crawl4ai.py +7 -1
  242. agno/tools/csv_toolkit.py +9 -8
  243. agno/tools/dalle.py +22 -12
  244. agno/tools/daytona.py +13 -16
  245. agno/tools/decorator.py +6 -3
  246. agno/tools/desi_vocal.py +17 -8
  247. agno/tools/discord.py +11 -8
  248. agno/tools/docker.py +30 -42
  249. agno/tools/duckdb.py +34 -53
  250. agno/tools/duckduckgo.py +8 -7
  251. agno/tools/e2b.py +62 -62
  252. agno/tools/eleven_labs.py +36 -29
  253. agno/tools/email.py +4 -1
  254. agno/tools/evm.py +7 -1
  255. agno/tools/exa.py +19 -14
  256. agno/tools/fal.py +30 -30
  257. agno/tools/file.py +9 -8
  258. agno/tools/financial_datasets.py +25 -44
  259. agno/tools/firecrawl.py +17 -18
  260. agno/tools/function.py +127 -18
  261. agno/tools/giphy.py +23 -11
  262. agno/tools/github.py +48 -126
  263. agno/tools/gmail.py +45 -61
  264. agno/tools/google_bigquery.py +7 -6
  265. agno/tools/google_maps.py +11 -26
  266. agno/tools/googlesearch.py +7 -2
  267. agno/tools/googlesheets.py +21 -17
  268. agno/tools/hackernews.py +9 -5
  269. agno/tools/jina.py +5 -4
  270. agno/tools/jira.py +18 -9
  271. agno/tools/knowledge.py +31 -32
  272. agno/tools/linear.py +18 -33
  273. agno/tools/linkup.py +5 -1
  274. agno/tools/local_file_system.py +8 -5
  275. agno/tools/lumalab.py +32 -20
  276. agno/tools/mcp.py +1 -2
  277. agno/tools/mem0.py +18 -12
  278. agno/tools/memori.py +14 -10
  279. agno/tools/mlx_transcribe.py +3 -2
  280. agno/tools/models/azure_openai.py +33 -15
  281. agno/tools/models/gemini.py +59 -32
  282. agno/tools/models/groq.py +30 -23
  283. agno/tools/models/nebius.py +28 -12
  284. agno/tools/models_labs.py +40 -16
  285. agno/tools/moviepy_video.py +7 -6
  286. agno/tools/neo4j.py +10 -8
  287. agno/tools/newspaper.py +7 -2
  288. agno/tools/newspaper4k.py +8 -3
  289. agno/tools/openai.py +58 -32
  290. agno/tools/openbb.py +12 -11
  291. agno/tools/opencv.py +63 -47
  292. agno/tools/openweather.py +14 -12
  293. agno/tools/pandas.py +11 -3
  294. agno/tools/postgres.py +4 -12
  295. agno/tools/pubmed.py +4 -1
  296. agno/tools/python.py +9 -22
  297. agno/tools/reasoning.py +35 -27
  298. agno/tools/reddit.py +11 -26
  299. agno/tools/replicate.py +55 -42
  300. agno/tools/resend.py +4 -1
  301. agno/tools/scrapegraph.py +15 -14
  302. agno/tools/searxng.py +10 -23
  303. agno/tools/serpapi.py +6 -3
  304. agno/tools/serper.py +13 -4
  305. agno/tools/shell.py +9 -2
  306. agno/tools/slack.py +12 -11
  307. agno/tools/sleep.py +3 -2
  308. agno/tools/spider.py +24 -4
  309. agno/tools/sql.py +7 -6
  310. agno/tools/tavily.py +6 -4
  311. agno/tools/telegram.py +12 -4
  312. agno/tools/todoist.py +11 -31
  313. agno/tools/toolkit.py +1 -1
  314. agno/tools/trafilatura.py +22 -6
  315. agno/tools/trello.py +9 -22
  316. agno/tools/twilio.py +10 -3
  317. agno/tools/user_control_flow.py +6 -1
  318. agno/tools/valyu.py +34 -5
  319. agno/tools/visualization.py +19 -28
  320. agno/tools/webbrowser.py +4 -3
  321. agno/tools/webex.py +11 -7
  322. agno/tools/website.py +15 -46
  323. agno/tools/webtools.py +12 -4
  324. agno/tools/whatsapp.py +5 -9
  325. agno/tools/wikipedia.py +20 -13
  326. agno/tools/x.py +14 -13
  327. agno/tools/yfinance.py +13 -40
  328. agno/tools/youtube.py +26 -20
  329. agno/tools/zendesk.py +7 -2
  330. agno/tools/zep.py +10 -7
  331. agno/tools/zoom.py +10 -9
  332. agno/utils/common.py +1 -19
  333. agno/utils/events.py +100 -123
  334. agno/utils/gemini.py +1 -1
  335. agno/utils/knowledge.py +29 -0
  336. agno/utils/log.py +54 -4
  337. agno/utils/mcp.py +68 -10
  338. agno/utils/media.py +39 -0
  339. agno/utils/message.py +12 -1
  340. agno/utils/models/aws_claude.py +1 -1
  341. agno/utils/models/claude.py +6 -12
  342. agno/utils/models/cohere.py +1 -1
  343. agno/utils/models/mistral.py +8 -7
  344. agno/utils/models/schema_utils.py +3 -3
  345. agno/utils/models/watsonx.py +1 -1
  346. agno/utils/openai.py +1 -1
  347. agno/utils/pprint.py +33 -32
  348. agno/utils/print_response/agent.py +779 -0
  349. agno/utils/print_response/team.py +1669 -0
  350. agno/utils/print_response/workflow.py +1451 -0
  351. agno/utils/prompts.py +14 -14
  352. agno/utils/reasoning.py +87 -0
  353. agno/utils/response.py +42 -42
  354. agno/utils/streamlit.py +481 -0
  355. agno/utils/string.py +8 -22
  356. agno/utils/team.py +50 -0
  357. agno/utils/timer.py +2 -2
  358. agno/vectordb/base.py +33 -21
  359. agno/vectordb/cassandra/cassandra.py +287 -23
  360. agno/vectordb/chroma/chromadb.py +482 -59
  361. agno/vectordb/clickhouse/clickhousedb.py +270 -63
  362. agno/vectordb/couchbase/couchbase.py +309 -29
  363. agno/vectordb/lancedb/lance_db.py +360 -21
  364. agno/vectordb/langchaindb/__init__.py +5 -0
  365. agno/vectordb/langchaindb/langchaindb.py +145 -0
  366. agno/vectordb/lightrag/__init__.py +5 -0
  367. agno/vectordb/lightrag/lightrag.py +374 -0
  368. agno/vectordb/llamaindex/llamaindexdb.py +127 -0
  369. agno/vectordb/milvus/milvus.py +242 -32
  370. agno/vectordb/mongodb/mongodb.py +200 -24
  371. agno/vectordb/pgvector/pgvector.py +319 -37
  372. agno/vectordb/pineconedb/pineconedb.py +221 -27
  373. agno/vectordb/qdrant/qdrant.py +334 -14
  374. agno/vectordb/singlestore/singlestore.py +286 -29
  375. agno/vectordb/surrealdb/surrealdb.py +187 -7
  376. agno/vectordb/upstashdb/upstashdb.py +342 -26
  377. agno/vectordb/weaviate/weaviate.py +227 -165
  378. agno/workflow/__init__.py +17 -13
  379. agno/workflow/{v2/condition.py → condition.py} +135 -32
  380. agno/workflow/{v2/loop.py → loop.py} +115 -28
  381. agno/workflow/{v2/parallel.py → parallel.py} +138 -108
  382. agno/workflow/{v2/router.py → router.py} +133 -32
  383. agno/workflow/{v2/step.py → step.py} +207 -49
  384. agno/workflow/{v2/steps.py → steps.py} +147 -66
  385. agno/workflow/types.py +482 -0
  386. agno/workflow/workflow.py +2410 -696
  387. agno-2.0.0.dist-info/METADATA +494 -0
  388. agno-2.0.0.dist-info/RECORD +515 -0
  389. agno-2.0.0.dist-info/licenses/LICENSE +201 -0
  390. agno/agent/metrics.py +0 -110
  391. agno/api/app.py +0 -35
  392. agno/api/playground.py +0 -92
  393. agno/api/schemas/app.py +0 -12
  394. agno/api/schemas/playground.py +0 -22
  395. agno/api/schemas/user.py +0 -35
  396. agno/api/schemas/workspace.py +0 -46
  397. agno/api/user.py +0 -160
  398. agno/api/workflows.py +0 -33
  399. agno/api/workspace.py +0 -175
  400. agno/app/agui/__init__.py +0 -3
  401. agno/app/agui/app.py +0 -17
  402. agno/app/agui/sync_router.py +0 -120
  403. agno/app/base.py +0 -186
  404. agno/app/discord/__init__.py +0 -3
  405. agno/app/fastapi/__init__.py +0 -3
  406. agno/app/fastapi/app.py +0 -107
  407. agno/app/fastapi/async_router.py +0 -457
  408. agno/app/fastapi/sync_router.py +0 -448
  409. agno/app/playground/app.py +0 -228
  410. agno/app/playground/async_router.py +0 -1053
  411. agno/app/playground/deploy.py +0 -249
  412. agno/app/playground/operator.py +0 -183
  413. agno/app/playground/schemas.py +0 -223
  414. agno/app/playground/serve.py +0 -55
  415. agno/app/playground/sync_router.py +0 -1045
  416. agno/app/playground/utils.py +0 -46
  417. agno/app/settings.py +0 -15
  418. agno/app/slack/__init__.py +0 -3
  419. agno/app/slack/app.py +0 -19
  420. agno/app/slack/sync_router.py +0 -92
  421. agno/app/utils.py +0 -54
  422. agno/app/whatsapp/__init__.py +0 -3
  423. agno/app/whatsapp/app.py +0 -15
  424. agno/app/whatsapp/sync_router.py +0 -197
  425. agno/cli/auth_server.py +0 -249
  426. agno/cli/config.py +0 -274
  427. agno/cli/console.py +0 -88
  428. agno/cli/credentials.py +0 -23
  429. agno/cli/entrypoint.py +0 -571
  430. agno/cli/operator.py +0 -357
  431. agno/cli/settings.py +0 -96
  432. agno/cli/ws/ws_cli.py +0 -817
  433. agno/constants.py +0 -13
  434. agno/document/__init__.py +0 -5
  435. agno/document/chunking/semantic.py +0 -45
  436. agno/document/chunking/strategy.py +0 -31
  437. agno/document/reader/__init__.py +0 -5
  438. agno/document/reader/base.py +0 -47
  439. agno/document/reader/docx_reader.py +0 -60
  440. agno/document/reader/gcs/pdf_reader.py +0 -44
  441. agno/document/reader/s3/pdf_reader.py +0 -59
  442. agno/document/reader/s3/text_reader.py +0 -63
  443. agno/document/reader/url_reader.py +0 -59
  444. agno/document/reader/youtube_reader.py +0 -58
  445. agno/embedder/__init__.py +0 -5
  446. agno/embedder/langdb.py +0 -80
  447. agno/embedder/mistral.py +0 -82
  448. agno/embedder/openai.py +0 -78
  449. agno/file/__init__.py +0 -5
  450. agno/file/file.py +0 -16
  451. agno/file/local/csv.py +0 -32
  452. agno/file/local/txt.py +0 -19
  453. agno/infra/app.py +0 -240
  454. agno/infra/base.py +0 -144
  455. agno/infra/context.py +0 -20
  456. agno/infra/db_app.py +0 -52
  457. agno/infra/resource.py +0 -205
  458. agno/infra/resources.py +0 -55
  459. agno/knowledge/agent.py +0 -702
  460. agno/knowledge/arxiv.py +0 -33
  461. agno/knowledge/combined.py +0 -36
  462. agno/knowledge/csv.py +0 -144
  463. agno/knowledge/csv_url.py +0 -124
  464. agno/knowledge/document.py +0 -223
  465. agno/knowledge/docx.py +0 -137
  466. agno/knowledge/firecrawl.py +0 -34
  467. agno/knowledge/gcs/__init__.py +0 -0
  468. agno/knowledge/gcs/base.py +0 -39
  469. agno/knowledge/gcs/pdf.py +0 -125
  470. agno/knowledge/json.py +0 -137
  471. agno/knowledge/langchain.py +0 -71
  472. agno/knowledge/light_rag.py +0 -273
  473. agno/knowledge/llamaindex.py +0 -66
  474. agno/knowledge/markdown.py +0 -154
  475. agno/knowledge/pdf.py +0 -164
  476. agno/knowledge/pdf_bytes.py +0 -42
  477. agno/knowledge/pdf_url.py +0 -148
  478. agno/knowledge/s3/__init__.py +0 -0
  479. agno/knowledge/s3/base.py +0 -64
  480. agno/knowledge/s3/pdf.py +0 -33
  481. agno/knowledge/s3/text.py +0 -34
  482. agno/knowledge/text.py +0 -141
  483. agno/knowledge/url.py +0 -46
  484. agno/knowledge/website.py +0 -179
  485. agno/knowledge/wikipedia.py +0 -32
  486. agno/knowledge/youtube.py +0 -35
  487. agno/memory/agent.py +0 -423
  488. agno/memory/classifier.py +0 -104
  489. agno/memory/db/__init__.py +0 -5
  490. agno/memory/db/base.py +0 -42
  491. agno/memory/db/mongodb.py +0 -189
  492. agno/memory/db/postgres.py +0 -203
  493. agno/memory/db/sqlite.py +0 -193
  494. agno/memory/memory.py +0 -22
  495. agno/memory/row.py +0 -36
  496. agno/memory/summarizer.py +0 -201
  497. agno/memory/summary.py +0 -19
  498. agno/memory/team.py +0 -415
  499. agno/memory/v2/__init__.py +0 -2
  500. agno/memory/v2/db/__init__.py +0 -1
  501. agno/memory/v2/db/base.py +0 -42
  502. agno/memory/v2/db/firestore.py +0 -339
  503. agno/memory/v2/db/mongodb.py +0 -196
  504. agno/memory/v2/db/postgres.py +0 -214
  505. agno/memory/v2/db/redis.py +0 -187
  506. agno/memory/v2/db/schema.py +0 -54
  507. agno/memory/v2/db/sqlite.py +0 -209
  508. agno/memory/v2/manager.py +0 -437
  509. agno/memory/v2/memory.py +0 -1097
  510. agno/memory/v2/schema.py +0 -55
  511. agno/memory/v2/summarizer.py +0 -215
  512. agno/memory/workflow.py +0 -38
  513. agno/models/ollama/tools.py +0 -430
  514. agno/models/qwen/__init__.py +0 -5
  515. agno/playground/__init__.py +0 -10
  516. agno/playground/deploy.py +0 -3
  517. agno/playground/playground.py +0 -3
  518. agno/playground/serve.py +0 -3
  519. agno/playground/settings.py +0 -3
  520. agno/reranker/__init__.py +0 -0
  521. agno/run/response.py +0 -467
  522. agno/run/v2/__init__.py +0 -0
  523. agno/run/v2/workflow.py +0 -567
  524. agno/storage/__init__.py +0 -0
  525. agno/storage/agent/__init__.py +0 -0
  526. agno/storage/agent/dynamodb.py +0 -1
  527. agno/storage/agent/json.py +0 -1
  528. agno/storage/agent/mongodb.py +0 -1
  529. agno/storage/agent/postgres.py +0 -1
  530. agno/storage/agent/singlestore.py +0 -1
  531. agno/storage/agent/sqlite.py +0 -1
  532. agno/storage/agent/yaml.py +0 -1
  533. agno/storage/base.py +0 -60
  534. agno/storage/dynamodb.py +0 -673
  535. agno/storage/firestore.py +0 -297
  536. agno/storage/gcs_json.py +0 -261
  537. agno/storage/in_memory.py +0 -234
  538. agno/storage/json.py +0 -237
  539. agno/storage/mongodb.py +0 -328
  540. agno/storage/mysql.py +0 -685
  541. agno/storage/postgres.py +0 -682
  542. agno/storage/redis.py +0 -336
  543. agno/storage/session/__init__.py +0 -16
  544. agno/storage/session/agent.py +0 -64
  545. agno/storage/session/team.py +0 -63
  546. agno/storage/session/v2/__init__.py +0 -5
  547. agno/storage/session/workflow.py +0 -61
  548. agno/storage/singlestore.py +0 -606
  549. agno/storage/sqlite.py +0 -646
  550. agno/storage/workflow/__init__.py +0 -0
  551. agno/storage/workflow/mongodb.py +0 -1
  552. agno/storage/workflow/postgres.py +0 -1
  553. agno/storage/workflow/sqlite.py +0 -1
  554. agno/storage/yaml.py +0 -241
  555. agno/tools/thinking.py +0 -73
  556. agno/utils/defaults.py +0 -57
  557. agno/utils/filesystem.py +0 -39
  558. agno/utils/git.py +0 -52
  559. agno/utils/json_io.py +0 -30
  560. agno/utils/load_env.py +0 -19
  561. agno/utils/py_io.py +0 -19
  562. agno/utils/pyproject.py +0 -18
  563. agno/utils/resource_filter.py +0 -31
  564. agno/workflow/v2/__init__.py +0 -21
  565. agno/workflow/v2/types.py +0 -357
  566. agno/workflow/v2/workflow.py +0 -3313
  567. agno/workspace/__init__.py +0 -0
  568. agno/workspace/config.py +0 -325
  569. agno/workspace/enums.py +0 -6
  570. agno/workspace/helpers.py +0 -52
  571. agno/workspace/operator.py +0 -757
  572. agno/workspace/settings.py +0 -158
  573. agno-1.8.2.dist-info/METADATA +0 -982
  574. agno-1.8.2.dist-info/RECORD +0 -566
  575. agno-1.8.2.dist-info/entry_points.txt +0 -3
  576. agno-1.8.2.dist-info/licenses/LICENSE +0 -375
  577. /agno/{app → db/migrations}/__init__.py +0 -0
  578. /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
  579. /agno/{cli → integrations}/__init__.py +0 -0
  580. /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
  581. /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
  582. /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
  583. /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
  584. /agno/{app → os/interfaces}/slack/security.py +0 -0
  585. /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
  586. /agno/{file/local → utils/print_response}/__init__.py +0 -0
  587. /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
  588. {agno-1.8.2.dist-info → agno-2.0.0.dist-info}/WHEEL +0 -0
  589. {agno-1.8.2.dist-info → agno-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,21 @@
1
1
  import asyncio
2
2
  from hashlib import md5
3
- from typing import Any, Dict, List, Optional
3
+ from typing import Any, Dict, List, Mapping, Optional, Union, cast
4
4
 
5
5
  try:
6
6
  from chromadb import Client as ChromaDbClient
7
7
  from chromadb import PersistentClient as PersistentChromaDbClient
8
8
  from chromadb.api.client import ClientAPI
9
9
  from chromadb.api.models.Collection import Collection
10
- from chromadb.api.types import GetResult, QueryResult
10
+ from chromadb.api.types import QueryResult
11
11
 
12
12
  except ImportError:
13
13
  raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
14
14
 
15
- from agno.document import Document
16
- from agno.embedder import Embedder
17
- from agno.reranker.base import Reranker
18
- from agno.utils.log import log_debug, log_info, logger
15
+ from agno.knowledge.document import Document
16
+ from agno.knowledge.embedder import Embedder
17
+ from agno.knowledge.reranker.base import Reranker
18
+ from agno.utils.log import log_debug, log_error, log_info, logger
19
19
  from agno.vectordb.base import VectorDb
20
20
  from agno.vectordb.distance import Distance
21
21
 
@@ -36,7 +36,7 @@ class ChromaDb(VectorDb):
36
36
 
37
37
  # Embedder for embedding the document contents
38
38
  if embedder is None:
39
- from agno.embedder.openai import OpenAIEmbedder
39
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
40
40
 
41
41
  embedder = OpenAIEmbedder()
42
42
  log_info("Embedder not provided, using OpenAIEmbedder as default.")
@@ -91,60 +91,36 @@ class ChromaDb(VectorDb):
91
91
  """Create the collection asynchronously by running in a thread."""
92
92
  await asyncio.to_thread(self.create)
93
93
 
94
- def doc_exists(self, document: Document) -> bool:
95
- """Check if a document exists in the collection.
94
+ def name_exists(self, name: str) -> bool:
95
+ """Check if a document with a given name exists in the collection.
96
96
  Args:
97
- document (Document): Document to check.
97
+ name (str): Name of the document to check.
98
98
  Returns:
99
- bool: True if document exists, False otherwise.
100
- """
99
+ bool: True if document exists, False otherwise."""
101
100
  if not self.client:
102
101
  logger.warning("Client not initialized")
103
102
  return False
104
103
 
105
104
  try:
106
105
  collection: Collection = self.client.get_collection(name=self.collection_name)
107
- collection_data: GetResult = collection.get(include=["documents"]) # type: ignore
108
- existing_documents = collection_data.get("documents", [])
109
- cleaned_content = document.content.replace("\x00", "\ufffd")
110
- if cleaned_content in existing_documents: # type: ignore
111
- return True
106
+ result = collection.get(where=cast(Any, {"name": {"$eq": name}}), limit=1)
107
+ return len(result.get("ids", [])) > 0
112
108
  except Exception as e:
113
- logger.error(f"Document does not exist: {e}")
114
- return False
115
-
116
- async def async_doc_exists(self, document: Document) -> bool:
117
- """Check if a document exists asynchronously."""
118
- return await asyncio.to_thread(self.doc_exists, document)
119
-
120
- def name_exists(self, name: str) -> bool:
121
- """Check if a document with a given name exists in the collection.
122
- Args:
123
- name (str): Name of the document to check.
124
- Returns:
125
- bool: True if document exists, False otherwise."""
126
- if self.client:
127
- try:
128
- collections: Collection = self.client.get_collection(name=self.collection_name)
129
- for collection in collections: # type: ignore
130
- if name in collection:
131
- return True
132
- except Exception as e:
133
- logger.error(f"Document with given name does not exist: {e}")
109
+ logger.error(f"Error checking name existence: {e}")
134
110
  return False
135
111
 
136
112
  async def async_name_exists(self, name: str) -> bool:
137
113
  """Check if a document with given name exists asynchronously."""
138
114
  return await asyncio.to_thread(self.name_exists, name)
139
115
 
140
- def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
116
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
141
117
  """Insert documents into the collection.
142
118
 
143
119
  Args:
144
120
  documents (List[Document]): List of documents to insert
145
121
  filters (Optional[Dict[str, Any]]): Filters to merge with document metadata
146
122
  """
147
- log_debug(f"Inserting {len(documents)} documents")
123
+ log_info(f"Inserting {len(documents)} documents")
148
124
  ids: List = []
149
125
  docs: List = []
150
126
  docs_embeddings: List = []
@@ -163,6 +139,14 @@ class ChromaDb(VectorDb):
163
139
  if filters:
164
140
  metadata.update(filters)
165
141
 
142
+ # Add name, content_id to metadata
143
+ if document.name is not None:
144
+ metadata["name"] = document.name
145
+ if document.content_id is not None:
146
+ metadata["content_id"] = document.content_id
147
+
148
+ metadata["content_hash"] = content_hash
149
+
166
150
  docs_embeddings.append(document.embedding)
167
151
  docs.append(cleaned_content)
168
152
  ids.append(doc_id)
@@ -176,22 +160,82 @@ class ChromaDb(VectorDb):
176
160
  self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
177
161
  log_debug(f"Committed {len(docs)} documents")
178
162
 
179
- async def async_insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
163
+ async def async_insert(
164
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
165
+ ) -> None:
180
166
  """Insert documents asynchronously by running in a thread."""
181
- await asyncio.to_thread(self.insert, documents, filters)
167
+ log_info(f"Async Inserting {len(documents)} documents")
168
+ ids: List = []
169
+ docs: List = []
170
+ docs_embeddings: List = []
171
+ docs_metadata: List = []
172
+
173
+ if not self._collection:
174
+ self._collection = self.client.get_collection(name=self.collection_name)
175
+
176
+ try:
177
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
178
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
179
+ except Exception as e:
180
+ log_error(f"Error processing document: {e}")
181
+
182
+ for document in documents:
183
+ cleaned_content = document.content.replace("\x00", "\ufffd")
184
+ doc_id = md5(cleaned_content.encode()).hexdigest()
185
+
186
+ # Handle metadata and filters
187
+ metadata = document.meta_data or {}
188
+ if filters:
189
+ metadata.update(filters)
190
+
191
+ # Add name, content_id to metadata
192
+ if document.name is not None:
193
+ metadata["name"] = document.name
194
+ if document.content_id is not None:
195
+ metadata["content_id"] = document.content_id
196
+
197
+ metadata["content_hash"] = content_hash
198
+
199
+ docs_embeddings.append(document.embedding)
200
+ docs.append(cleaned_content)
201
+ ids.append(doc_id)
202
+ docs_metadata.append(metadata)
203
+ log_debug(f"Prepared document: {document.id} | {document.name} | {metadata}")
204
+
205
+ if self._collection is None:
206
+ logger.warning("Collection does not exist")
207
+ else:
208
+ if len(docs) > 0:
209
+ self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
210
+ log_debug(f"Committed {len(docs)} documents")
182
211
 
183
212
  def upsert_available(self) -> bool:
184
213
  """Check if upsert is available in ChromaDB."""
185
214
  return True
186
215
 
187
- def upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
216
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
188
217
  """Upsert documents into the collection.
189
218
 
190
219
  Args:
191
220
  documents (List[Document]): List of documents to upsert
192
221
  filters (Optional[Dict[str, Any]]): Filters to apply while upserting
193
222
  """
194
- log_debug(f"Upserting {len(documents)} documents")
223
+ try:
224
+ if self.content_hash_exists(content_hash):
225
+ self._delete_by_content_hash(content_hash)
226
+ self._upsert(content_hash, documents, filters)
227
+ except Exception as e:
228
+ logger.error(f"Error upserting documents by content hash: {e}")
229
+ raise
230
+
231
+ def _upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
232
+ """Upsert documents into the collection.
233
+
234
+ Args:
235
+ documents (List[Document]): List of documents to upsert
236
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
237
+ """
238
+ log_info(f"Upserting {len(documents)} documents")
195
239
  ids: List = []
196
240
  docs: List = []
197
241
  docs_embeddings: List = []
@@ -204,11 +248,25 @@ class ChromaDb(VectorDb):
204
248
  document.embed(embedder=self.embedder)
205
249
  cleaned_content = document.content.replace("\x00", "\ufffd")
206
250
  doc_id = md5(cleaned_content.encode()).hexdigest()
251
+
252
+ # Handle metadata and filters
253
+ metadata = document.meta_data or {}
254
+ if filters:
255
+ metadata.update(filters)
256
+
257
+ # Add name, content_id to metadata
258
+ if document.name is not None:
259
+ metadata["name"] = document.name
260
+ if document.content_id is not None:
261
+ metadata["content_id"] = document.content_id
262
+
263
+ metadata["content_hash"] = content_hash
264
+
207
265
  docs_embeddings.append(document.embedding)
208
266
  docs.append(cleaned_content)
209
267
  ids.append(doc_id)
210
- docs_metadata.append(document.meta_data)
211
- log_debug(f"Upserted document: {document.id} | {document.name} | {document.meta_data}")
268
+ docs_metadata.append(metadata)
269
+ log_debug(f"Upserted document: {document.id} | {document.name} | {metadata}")
212
270
 
213
271
  if self._collection is None:
214
272
  logger.warning("Collection does not exist")
@@ -217,9 +275,68 @@ class ChromaDb(VectorDb):
217
275
  self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
218
276
  log_debug(f"Committed {len(docs)} documents")
219
277
 
220
- async def async_upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
278
+ async def _async_upsert(
279
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
280
+ ) -> None:
281
+ """Upsert documents into the collection.
282
+
283
+ Args:
284
+ documents (List[Document]): List of documents to upsert
285
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
286
+ """
287
+ log_info(f"Async Upserting {len(documents)} documents")
288
+ ids: List = []
289
+ docs: List = []
290
+ docs_embeddings: List = []
291
+ docs_metadata: List = []
292
+
293
+ if not self._collection:
294
+ self._collection = self.client.get_collection(name=self.collection_name)
295
+
296
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
297
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
298
+
299
+ for document in documents:
300
+ cleaned_content = document.content.replace("\x00", "\ufffd")
301
+ doc_id = md5(cleaned_content.encode()).hexdigest()
302
+
303
+ # Handle metadata and filters
304
+ metadata = document.meta_data or {}
305
+ if filters:
306
+ metadata.update(filters)
307
+
308
+ # Add name, content_id to metadata
309
+ if document.name is not None:
310
+ metadata["name"] = document.name
311
+ if document.content_id is not None:
312
+ metadata["content_id"] = document.content_id
313
+
314
+ metadata["content_hash"] = content_hash
315
+
316
+ docs_embeddings.append(document.embedding)
317
+ docs.append(cleaned_content)
318
+ ids.append(doc_id)
319
+ docs_metadata.append(metadata)
320
+ log_debug(f"Upserted document: {document.id} | {document.name} | {metadata}")
321
+
322
+ if self._collection is None:
323
+ logger.warning("Collection does not exist")
324
+ else:
325
+ if len(docs) > 0:
326
+ self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
327
+ log_debug(f"Committed {len(docs)} documents")
328
+
329
+ async def async_upsert(
330
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
331
+ ) -> None:
221
332
  """Upsert documents asynchronously by running in a thread."""
222
- await asyncio.to_thread(self.upsert, documents, filters)
333
+ try:
334
+ if self.content_hash_exists(content_hash):
335
+ self._delete_by_content_hash(content_hash)
336
+ await self._async_upsert(content_hash, documents, filters)
337
+ except Exception as e:
338
+ logger.error(f"Error upserting documents by content hash: {e}")
339
+ raise
223
340
 
224
341
  def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
225
342
  """Search the collection for a query.
@@ -257,24 +374,62 @@ class ChromaDb(VectorDb):
257
374
  # Build search results
258
375
  search_results: List[Document] = []
259
376
 
260
- ids = result.get("ids", [[]])[0]
261
- metadata = result.get("metadatas", [{}])[0]
262
- documents = result.get("documents", [[]])[0]
263
- embeddings = result.get("embeddings")[0]
264
- embeddings = [e.tolist() if hasattr(e, "tolist") else e for e in embeddings]
265
- distances = result.get("distances", [[]])[0]
377
+ ids_list = result.get("ids", [[]])
378
+ metadata_list = result.get("metadatas", [[{}]])
379
+ documents_list = result.get("documents", [[]])
380
+ embeddings_list = result.get("embeddings")
381
+ distances_list = result.get("distances", [[]])
382
+
383
+ if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
384
+ return search_results
385
+
386
+ ids = ids_list[0]
387
+ metadata = [dict(m) if m else {} for m in metadata_list[0]] # Convert to mutable dicts
388
+ documents = documents_list[0]
389
+ embeddings_raw = embeddings_list[0] if embeddings_list else []
390
+ embeddings = []
391
+ for e in embeddings_raw:
392
+ if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
393
+ try:
394
+ embeddings.append(list(cast(Any, e).tolist()))
395
+ except (AttributeError, TypeError):
396
+ embeddings.append(list(e) if isinstance(e, (list, tuple)) else [])
397
+ elif isinstance(e, (list, tuple)):
398
+ embeddings.append([float(x) for x in e if isinstance(x, (int, float))])
399
+ elif isinstance(e, (int, float)):
400
+ embeddings.append([float(e)])
401
+ else:
402
+ embeddings.append([])
403
+ distances = distances_list[0]
266
404
 
267
405
  for idx, distance in enumerate(distances):
268
- metadata[idx]["distances"] = distance
406
+ if idx < len(metadata):
407
+ metadata[idx]["distances"] = distance
269
408
 
270
409
  try:
271
- for idx, (id_, metadata, document) in enumerate(zip(ids, metadata, documents)):
410
+ for idx, (id_, doc_metadata, document) in enumerate(zip(ids, metadata, documents)):
411
+ # Extract the fields we added to metadata
412
+ name_val = doc_metadata.pop("name", None)
413
+ content_id_val = doc_metadata.pop("content_id", None)
414
+
415
+ # Convert types to match Document constructor expectations
416
+ name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
417
+ content_id = (
418
+ str(content_id_val)
419
+ if content_id_val is not None and not isinstance(content_id_val, str)
420
+ else content_id_val
421
+ )
422
+ content = str(document) if document is not None else ""
423
+ embedding = embeddings[idx] if idx < len(embeddings) else None
424
+
272
425
  search_results.append(
273
426
  Document(
274
427
  id=id_,
275
- meta_data=metadata,
276
- content=document,
277
- embedding=embeddings[idx],
428
+ name=name,
429
+ meta_data=doc_metadata,
430
+ content=content,
431
+ embedding=embedding,
432
+ content_id=content_id,
278
433
  )
279
434
  )
280
435
  except Exception as e:
@@ -360,3 +515,271 @@ class ChromaDb(VectorDb):
360
515
  except Exception as e:
361
516
  logger.error(f"Error clearing collection: {e}")
362
517
  return False
518
+
519
+ def delete_by_id(self, id: str) -> bool:
520
+ """Delete document by ID."""
521
+ if not self.client:
522
+ logger.error("Client not initialized")
523
+ return False
524
+
525
+ try:
526
+ collection: Collection = self.client.get_collection(name=self.collection_name)
527
+
528
+ # Check if document exists
529
+ if not self.id_exists(id):
530
+ log_info(f"Document with ID '{id}' not found")
531
+ return False
532
+
533
+ # Delete the document
534
+ collection.delete(ids=[id])
535
+ log_info(f"Deleted document with ID '{id}'")
536
+ return True
537
+ except Exception as e:
538
+ logger.error(f"Error deleting document by ID '{id}': {e}")
539
+ return False
540
+
541
+ def delete_by_name(self, name: str) -> bool:
542
+ """Delete documents by name."""
543
+ if not self.client:
544
+ logger.error("Client not initialized")
545
+ return False
546
+
547
+ try:
548
+ collection: Collection = self.client.get_collection(name=self.collection_name)
549
+
550
+ # Find all documents with the given name
551
+ result = collection.get(where=cast(Any, {"name": {"$eq": name}}))
552
+ ids_to_delete = result.get("ids", [])
553
+
554
+ if not ids_to_delete:
555
+ log_info(f"No documents found with name '{name}'")
556
+ return False
557
+
558
+ # Delete all matching documents
559
+ collection.delete(ids=ids_to_delete)
560
+ log_info(f"Deleted {len(ids_to_delete)} documents with name '{name}'")
561
+ return True
562
+ except Exception as e:
563
+ logger.error(f"Error deleting documents by name '{name}': {e}")
564
+ return False
565
+
566
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
567
+ """Delete documents by metadata."""
568
+ if not self.client:
569
+ logger.error("Client not initialized")
570
+ return False
571
+
572
+ try:
573
+ collection: Collection = self.client.get_collection(name=self.collection_name)
574
+
575
+ # Build where clause for metadata filtering
576
+ where_clause = {}
577
+ for key, value in metadata.items():
578
+ where_clause[key] = {"$eq": value}
579
+
580
+ # Find all documents with the matching metadata
581
+ result = collection.get(where=cast(Any, where_clause))
582
+ ids_to_delete = result.get("ids", [])
583
+
584
+ if not ids_to_delete:
585
+ log_info(f"No documents found with metadata '{metadata}'")
586
+ return False
587
+
588
+ # Delete all matching documents
589
+ collection.delete(ids=ids_to_delete)
590
+ log_info(f"Deleted {len(ids_to_delete)} documents with metadata '{metadata}'")
591
+ return True
592
+ except Exception as e:
593
+ logger.error(f"Error deleting documents by metadata '{metadata}': {e}")
594
+ return False
595
+
596
+ def delete_by_content_id(self, content_id: str) -> bool:
597
+ """Delete documents by content ID."""
598
+ if not self.client:
599
+ logger.error("Client not initialized")
600
+ return False
601
+
602
+ try:
603
+ collection: Collection = self.client.get_collection(name=self.collection_name)
604
+
605
+ # Find all documents with the given content_id
606
+ result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
607
+ ids_to_delete = result.get("ids", [])
608
+
609
+ if not ids_to_delete:
610
+ log_info(f"No documents found with content_id '{content_id}'")
611
+ return False
612
+
613
+ # Delete all matching documents
614
+ collection.delete(ids=ids_to_delete)
615
+ log_info(f"Deleted {len(ids_to_delete)} documents with content_id '{content_id}'")
616
+ return True
617
+ except Exception as e:
618
+ logger.error(f"Error deleting documents by content_id '{content_id}': {e}")
619
+ return False
620
+
621
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
622
+ """Delete documents by content hash."""
623
+ if not self.client:
624
+ logger.error("Client not initialized")
625
+ return False
626
+
627
+ try:
628
+ collection: Collection = self.client.get_collection(name=self.collection_name)
629
+
630
+ # Find all documents with the given content_hash
631
+ result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
632
+ ids_to_delete = result.get("ids", [])
633
+
634
+ if not ids_to_delete:
635
+ log_info(f"No documents found with content_hash '{content_hash}'")
636
+ return False
637
+
638
+ # Delete all matching documents
639
+ collection.delete(ids=ids_to_delete)
640
+ log_info(f"Deleted {len(ids_to_delete)} documents with content_hash '{content_hash}'")
641
+ return True
642
+ except Exception as e:
643
+ logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
644
+ return False
645
+
646
+ def id_exists(self, id: str) -> bool:
647
+ """Check if a document with the given ID exists in the collection.
648
+
649
+ Args:
650
+ id (str): The document ID to check.
651
+
652
+ Returns:
653
+ bool: True if the document exists, False otherwise.
654
+ """
655
+ if not self.client:
656
+ logger.error("Client not initialized")
657
+ return False
658
+
659
+ try:
660
+ collection: Collection = self.client.get_collection(name=self.collection_name)
661
+ print("COLLECTION_----------", collection)
662
+ # Try to get the document by ID
663
+ result = collection.get(ids=[id])
664
+ found_ids = result.get("ids", [])
665
+
666
+ # Return True if the document was found
667
+ return len(found_ids) > 0
668
+ except Exception as e:
669
+ logger.error(f"Error checking if ID '{id}' exists: {e}")
670
+ return False
671
+
672
+ def content_hash_exists(self, content_hash: str) -> bool:
673
+ """Check if documents with the given content hash exist."""
674
+ if not self.client:
675
+ logger.error("Client not initialized")
676
+ return False
677
+
678
+ try:
679
+ collection: Collection = self.client.get_collection(name=self.collection_name)
680
+
681
+ # Try to query for documents with the given content_hash
682
+ try:
683
+ result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
684
+ # Safely extract ids from result
685
+ if hasattr(result, "get") and callable(result.get):
686
+ found_ids = result.get("ids", [])
687
+ elif hasattr(result, "__getitem__") and "ids" in result:
688
+ found_ids = result["ids"]
689
+ else:
690
+ found_ids = []
691
+
692
+ # Return True if any documents were found
693
+ if isinstance(found_ids, (list, tuple)):
694
+ return len(found_ids) > 0
695
+ elif isinstance(found_ids, int):
696
+ # Some ChromaDB versions might return a count instead of a list
697
+ return found_ids > 0
698
+ else:
699
+ return False
700
+
701
+ except TypeError as te:
702
+ if "object of type 'int' has no len()" in str(te):
703
+ # Known issue with ChromaDB 0.5.0 - internal bug
704
+ # As a workaround, assume content doesn't exist to allow processing to continue
705
+ logger.warning(
706
+ f"ChromaDB internal error (version 0.5.0 bug): {te}. Assuming content_hash '{content_hash}' does not exist."
707
+ )
708
+ return False
709
+ else:
710
+ raise te
711
+
712
+ except Exception as e:
713
+ logger.error(f"Error checking if content_hash '{content_hash}' exists: {e}")
714
+ return False
715
+
716
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
717
+ """
718
+ Update the metadata for documents with the given content_id.
719
+
720
+ Args:
721
+ content_id (str): The content ID to update
722
+ metadata (Dict[str, Any]): The metadata to update
723
+ """
724
+ try:
725
+ if not self.client:
726
+ logger.error("Client not initialized")
727
+ return
728
+
729
+ collection: Collection = self.client.get_collection(name=self.collection_name)
730
+
731
+ # Find documents with the given content_id
732
+ try:
733
+ result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
734
+
735
+ # Extract IDs and current metadata
736
+ if hasattr(result, "get") and callable(result.get):
737
+ ids = result.get("ids", [])
738
+ current_metadatas = result.get("metadatas", [])
739
+ elif hasattr(result, "__getitem__"):
740
+ ids = result.get("ids", []) if "ids" in result else []
741
+ current_metadatas = result.get("metadatas", []) if "metadatas" in result else []
742
+ else:
743
+ ids = []
744
+ current_metadatas = []
745
+
746
+ if not ids:
747
+ logger.debug(f"No documents found with content_id: {content_id}")
748
+ return
749
+
750
+ # Merge metadata for each document
751
+ updated_metadatas = []
752
+ for i, current_meta in enumerate(current_metadatas or []):
753
+ if current_meta is None:
754
+ meta_dict: Dict[str, Any] = {}
755
+ else:
756
+ meta_dict = dict(current_meta) # Convert Mapping to dict
757
+ updated_meta: Dict[str, Any] = meta_dict.copy()
758
+ updated_meta.update(metadata)
759
+
760
+ if "filters" not in updated_meta:
761
+ updated_meta["filters"] = {}
762
+ if isinstance(updated_meta["filters"], dict):
763
+ updated_meta["filters"].update(metadata)
764
+ else:
765
+ updated_meta["filters"] = metadata
766
+ updated_metadatas.append(updated_meta)
767
+
768
+ # Update the documents
769
+ # Convert to the expected type for ChromaDB
770
+ chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool, None]]], updated_metadatas)
771
+ collection.update(ids=ids, metadatas=chroma_metadatas)
772
+ logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
773
+
774
+ except TypeError as te:
775
+ if "object of type 'int' has no len()" in str(te):
776
+ logger.warning(
777
+ f"ChromaDB internal error (version 0.5.0 bug): {te}. Cannot update metadata for content_id '{content_id}'."
778
+ )
779
+ return
780
+ else:
781
+ raise te
782
+
783
+ except Exception as e:
784
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
785
+ raise