agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,23 @@
1
+ import asyncio
2
+ import json
1
3
  from hashlib import md5
2
- from typing import Any, Dict, List, Optional
4
+ from typing import Any, Dict, List, Mapping, Optional, Union, cast
3
5
 
4
6
  try:
5
7
  from chromadb import Client as ChromaDbClient
6
8
  from chromadb import PersistentClient as PersistentChromaDbClient
7
9
  from chromadb.api.client import ClientAPI
8
10
  from chromadb.api.models.Collection import Collection
9
- from chromadb.api.types import GetResult, IncludeEnum, QueryResult
11
+ from chromadb.api.types import QueryResult
10
12
 
11
13
  except ImportError:
12
14
  raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
13
15
 
14
- from agno.document import Document
15
- from agno.embedder import Embedder
16
- from agno.embedder.openai import OpenAIEmbedder
17
- from agno.reranker.base import Reranker
18
- from agno.utils.log import logger
16
+ from agno.filters import FilterExpr
17
+ from agno.knowledge.document import Document
18
+ from agno.knowledge.embedder import Embedder
19
+ from agno.knowledge.reranker.base import Reranker
20
+ from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
19
21
  from agno.vectordb.base import VectorDb
20
22
  from agno.vectordb.distance import Distance
21
23
 
@@ -24,19 +26,39 @@ class ChromaDb(VectorDb):
24
26
  def __init__(
25
27
  self,
26
28
  collection: str,
27
- embedder: Embedder = OpenAIEmbedder(),
29
+ name: Optional[str] = None,
30
+ description: Optional[str] = None,
31
+ id: Optional[str] = None,
32
+ embedder: Optional[Embedder] = None,
28
33
  distance: Distance = Distance.cosine,
29
34
  path: str = "tmp/chromadb",
30
35
  persistent_client: bool = False,
31
36
  reranker: Optional[Reranker] = None,
32
37
  **kwargs,
33
38
  ):
34
- # Collection attributes
35
- self.collection: str = collection
39
+ # Validate required parameters
40
+ if not collection:
41
+ raise ValueError("Collection name must be provided.")
42
+
43
+ # Dynamic ID generation based on unique identifiers
44
+ if id is None:
45
+ from agno.utils.string import generate_id
46
+
47
+ seed = f"{path}#{collection}"
48
+ id = generate_id(seed)
49
+
50
+ # Initialize base class with name, description, and generated ID
51
+ super().__init__(id=id, name=name, description=description)
36
52
 
53
+ # Collection attributes
54
+ self.collection_name: str = collection
37
55
  # Embedder for embedding the document contents
38
- self.embedder: Embedder = embedder
56
+ if embedder is None:
57
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
39
58
 
59
+ embedder = OpenAIEmbedder()
60
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
61
+ self.embedder: Embedder = embedder
40
62
  # Distance metric
41
63
  self.distance: Distance = distance
42
64
 
@@ -56,16 +78,54 @@ class ChromaDb(VectorDb):
56
78
  # Chroma client kwargs
57
79
  self.kwargs = kwargs
58
80
 
81
+ def _flatten_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Union[str, int, float, bool]]:
82
+ """
83
+ Flatten nested metadata to ChromaDB-compatible format.
84
+
85
+ Args:
86
+ metadata: Dictionary that may contain nested structures
87
+
88
+ Returns:
89
+ Flattened dictionary with only primitive values
90
+ """
91
+ flattened: Dict[str, Any] = {}
92
+
93
+ def _flatten_recursive(obj: Any, prefix: str = "") -> None:
94
+ if isinstance(obj, dict):
95
+ if len(obj) == 0:
96
+ # Handle empty dictionaries by converting to JSON string
97
+ flattened[prefix] = json.dumps(obj)
98
+ else:
99
+ for key, value in obj.items():
100
+ new_key = f"{prefix}.{key}" if prefix else key
101
+ _flatten_recursive(value, new_key)
102
+ elif isinstance(obj, (list, tuple)):
103
+ # Convert lists/tuples to JSON strings
104
+ flattened[prefix] = json.dumps(obj)
105
+ elif isinstance(obj, (str, int, float, bool)) or obj is None:
106
+ if obj is not None: # ChromaDB doesn't accept None values
107
+ flattened[prefix] = obj
108
+ else:
109
+ # Convert other complex types to JSON strings
110
+ try:
111
+ flattened[prefix] = json.dumps(obj)
112
+ except (TypeError, ValueError):
113
+ # If it can't be serialized, convert to string
114
+ flattened[prefix] = str(obj)
115
+
116
+ _flatten_recursive(metadata)
117
+ return flattened
118
+
59
119
  @property
60
120
  def client(self) -> ClientAPI:
61
121
  if self._client is None:
62
122
  if not self.persistent_client:
63
- logger.debug("Creating Chroma Client")
123
+ log_debug("Creating Chroma Client")
64
124
  self._client = ChromaDbClient(
65
125
  **self.kwargs,
66
126
  )
67
127
  elif self.persistent_client:
68
- logger.debug("Creating Persistent Chroma Client")
128
+ log_debug("Creating Persistent Chroma Client")
69
129
  self._client = PersistentChromaDbClient(
70
130
  path=self.path,
71
131
  **self.kwargs,
@@ -74,32 +134,18 @@ class ChromaDb(VectorDb):
74
134
 
75
135
  def create(self) -> None:
76
136
  """Create the collection in ChromaDb."""
77
- if not self.exists():
78
- logger.debug(f"Creating collection: {self.collection}")
137
+ if self.exists():
138
+ log_debug(f"Collection already exists: {self.collection_name}")
139
+ self._collection = self.client.get_collection(name=self.collection_name)
140
+ else:
141
+ log_debug(f"Creating collection: {self.collection_name}")
79
142
  self._collection = self.client.create_collection(
80
- name=self.collection, metadata={"hnsw:space": self.distance.value}
143
+ name=self.collection_name, metadata={"hnsw:space": self.distance.value}
81
144
  )
82
145
 
83
- else:
84
- logger.debug(f"Collection already exists: {self.collection}")
85
- self._collection = self.client.get_collection(name=self.collection)
86
-
87
- def doc_exists(self, document: Document) -> bool:
88
- """Check if a document exists in the collection.
89
- Args:
90
- document (Document): Document to check.
91
- Returns:
92
- bool: True if document exists, False otherwise.
93
- """
94
- if self.client:
95
- try:
96
- collection: Collection = self.client.get_collection(name=self.collection)
97
- collection_data: GetResult = collection.get(include=[IncludeEnum.documents])
98
- if collection_data.get("documents") != []:
99
- return True
100
- except Exception as e:
101
- logger.error(f"Document does not exist: {e}")
102
- return False
146
+ async def async_create(self) -> None:
147
+ """Create the collection asynchronously by running in a thread."""
148
+ await asyncio.to_thread(self.create)
103
149
 
104
150
  def name_exists(self, name: str) -> bool:
105
151
  """Check if a document with a given name exists in the collection.
@@ -107,121 +153,432 @@ class ChromaDb(VectorDb):
107
153
  name (str): Name of the document to check.
108
154
  Returns:
109
155
  bool: True if document exists, False otherwise."""
110
- if self.client:
111
- try:
112
- collections: Collection = self.client.get_collection(name=self.collection)
113
- for collection in collections: # type: ignore
114
- if name in collection:
115
- return True
116
- except Exception as e:
117
- logger.error(f"Document with given name does not exist: {e}")
156
+ if not self.client:
157
+ logger.warning("Client not initialized")
158
+ return False
159
+
160
+ try:
161
+ collection: Collection = self.client.get_collection(name=self.collection_name)
162
+ result = collection.get(where=cast(Any, {"name": {"$eq": name}}), limit=1)
163
+ return len(result.get("ids", [])) > 0
164
+ except Exception as e:
165
+ logger.error(f"Error checking name existence: {e}")
118
166
  return False
119
167
 
120
- def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
168
+ async def async_name_exists(self, name: str) -> bool:
169
+ """Check if a document with given name exists asynchronously."""
170
+ return await asyncio.to_thread(self.name_exists, name)
171
+
172
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
121
173
  """Insert documents into the collection.
122
174
 
123
175
  Args:
124
176
  documents (List[Document]): List of documents to insert
125
- filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
177
+ filters (Optional[Dict[str, Any]]): Filters to merge with document metadata
126
178
  """
127
- logger.debug(f"Inserting {len(documents)} documents")
179
+ log_info(f"Inserting {len(documents)} documents")
128
180
  ids: List = []
129
181
  docs: List = []
130
182
  docs_embeddings: List = []
183
+ docs_metadata: List = []
184
+
185
+ if not self._collection:
186
+ self._collection = self.client.get_collection(name=self.collection_name)
131
187
 
132
188
  for document in documents:
133
189
  document.embed(embedder=self.embedder)
134
190
  cleaned_content = document.content.replace("\x00", "\ufffd")
135
191
  doc_id = md5(cleaned_content.encode()).hexdigest()
192
+
193
+ # Handle metadata and filters
194
+ metadata = document.meta_data or {}
195
+ if filters:
196
+ metadata.update(filters)
197
+
198
+ # Add name, content_id to metadata
199
+ if document.name is not None:
200
+ metadata["name"] = document.name
201
+ if document.content_id is not None:
202
+ metadata["content_id"] = document.content_id
203
+
204
+ metadata["content_hash"] = content_hash
205
+
206
+ # Flatten metadata for ChromaDB compatibility
207
+ flattened_metadata = self._flatten_metadata(metadata)
208
+
209
+ docs_embeddings.append(document.embedding)
210
+ docs.append(cleaned_content)
211
+ ids.append(doc_id)
212
+ docs_metadata.append(flattened_metadata)
213
+ log_debug(f"Prepared document: {document.id} | {document.name} | {flattened_metadata}")
214
+
215
+ if self._collection is None:
216
+ logger.warning("Collection does not exist")
217
+ else:
218
+ if len(docs) > 0:
219
+ self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
220
+ log_debug(f"Committed {len(docs)} documents")
221
+
222
+ async def async_insert(
223
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
224
+ ) -> None:
225
+ """Insert documents asynchronously by running in a thread."""
226
+ log_info(f"Async Inserting {len(documents)} documents")
227
+ ids: List = []
228
+ docs: List = []
229
+ docs_embeddings: List = []
230
+ docs_metadata: List = []
231
+
232
+ if not self._collection:
233
+ self._collection = self.client.get_collection(name=self.collection_name)
234
+
235
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
236
+ # Use batch embedding when enabled and supported
237
+ try:
238
+ # Extract content from all documents
239
+ doc_contents = [doc.content for doc in documents]
240
+
241
+ # Get batch embeddings and usage
242
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
243
+
244
+ # Process documents with pre-computed embeddings
245
+ for j, doc in enumerate(documents):
246
+ try:
247
+ if j < len(embeddings):
248
+ doc.embedding = embeddings[j]
249
+ doc.usage = usages[j] if j < len(usages) else None
250
+ except Exception as e:
251
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
252
+
253
+ except Exception as e:
254
+ # Check if this is a rate limit error - don't fall back as it would make things worse
255
+ error_str = str(e).lower()
256
+ is_rate_limit = any(
257
+ phrase in error_str
258
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
259
+ )
260
+
261
+ if is_rate_limit:
262
+ logger.error(f"Rate limit detected during batch embedding. {e}")
263
+ raise e
264
+ else:
265
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
266
+ # Fall back to individual embedding
267
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
268
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
269
+ else:
270
+ # Use individual embedding
271
+ try:
272
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
273
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
274
+ except Exception as e:
275
+ log_error(f"Error processing document: {e}")
276
+
277
+ for document in documents:
278
+ cleaned_content = document.content.replace("\x00", "\ufffd")
279
+ # Include content_hash in ID to ensure uniqueness across different content hashes
280
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
281
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
282
+
283
+ # Handle metadata and filters
284
+ metadata = document.meta_data or {}
285
+ if filters:
286
+ metadata.update(filters)
287
+
288
+ # Add name, content_id to metadata
289
+ if document.name is not None:
290
+ metadata["name"] = document.name
291
+ if document.content_id is not None:
292
+ metadata["content_id"] = document.content_id
293
+
294
+ metadata["content_hash"] = content_hash
295
+
296
+ # Flatten metadata for ChromaDB compatibility
297
+ flattened_metadata = self._flatten_metadata(metadata)
298
+
136
299
  docs_embeddings.append(document.embedding)
137
300
  docs.append(cleaned_content)
138
301
  ids.append(doc_id)
139
- logger.debug(f"Inserted document: {document.id} | {document.name} | {document.meta_data}")
302
+ docs_metadata.append(flattened_metadata)
303
+ log_debug(f"Prepared document: {document.id} | {document.name} | {flattened_metadata}")
140
304
 
141
- if len(docs) > 0 and self._collection is not None:
142
- self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs)
143
- logger.debug(f"Committed {len(docs)} documents")
305
+ if self._collection is None:
306
+ logger.warning("Collection does not exist")
144
307
  else:
145
- logger.error("Collection does not exist")
308
+ if len(docs) > 0:
309
+ self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
310
+ log_debug(f"Committed {len(docs)} documents")
146
311
 
147
312
  def upsert_available(self) -> bool:
148
313
  """Check if upsert is available in ChromaDB."""
149
314
  return True
150
315
 
151
- def upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
316
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
152
317
  """Upsert documents into the collection.
153
318
 
154
319
  Args:
155
320
  documents (List[Document]): List of documents to upsert
156
321
  filters (Optional[Dict[str, Any]]): Filters to apply while upserting
157
322
  """
158
- logger.debug(f"Upserting {len(documents)} documents")
323
+ try:
324
+ if self.content_hash_exists(content_hash):
325
+ self._delete_by_content_hash(content_hash)
326
+ self._upsert(content_hash, documents, filters)
327
+ except Exception as e:
328
+ logger.error(f"Error upserting documents by content hash: {e}")
329
+ raise
330
+
331
+ def _upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
332
+ """Upsert documents into the collection.
333
+
334
+ Args:
335
+ documents (List[Document]): List of documents to upsert
336
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
337
+ """
338
+ log_info(f"Upserting {len(documents)} documents")
159
339
  ids: List = []
160
340
  docs: List = []
161
341
  docs_embeddings: List = []
342
+ docs_metadata: List = []
343
+
344
+ if not self._collection:
345
+ self._collection = self.client.get_collection(name=self.collection_name)
162
346
 
163
347
  for document in documents:
164
348
  document.embed(embedder=self.embedder)
165
349
  cleaned_content = document.content.replace("\x00", "\ufffd")
166
350
  doc_id = md5(cleaned_content.encode()).hexdigest()
351
+
352
+ # Handle metadata and filters
353
+ metadata = document.meta_data or {}
354
+ if filters:
355
+ metadata.update(filters)
356
+
357
+ # Add name, content_id to metadata
358
+ if document.name is not None:
359
+ metadata["name"] = document.name
360
+ if document.content_id is not None:
361
+ metadata["content_id"] = document.content_id
362
+
363
+ metadata["content_hash"] = content_hash
364
+
365
+ # Flatten metadata for ChromaDB compatibility
366
+ flattened_metadata = self._flatten_metadata(metadata)
367
+
167
368
  docs_embeddings.append(document.embedding)
168
369
  docs.append(cleaned_content)
169
370
  ids.append(doc_id)
170
- logger.debug(f"Upserted document: {document.id} | {document.name} | {document.meta_data}")
371
+ docs_metadata.append(flattened_metadata)
372
+ log_debug(f"Upserted document: {document.id} | {document.name} | {flattened_metadata}")
373
+
374
+ if self._collection is None:
375
+ logger.warning("Collection does not exist")
376
+ else:
377
+ if len(docs) > 0:
378
+ self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
379
+ log_debug(f"Committed {len(docs)} documents")
380
+
381
+ async def _async_upsert(
382
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
383
+ ) -> None:
384
+ """Upsert documents into the collection.
385
+
386
+ Args:
387
+ documents (List[Document]): List of documents to upsert
388
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
389
+ """
390
+ log_info(f"Async Upserting {len(documents)} documents")
391
+ ids: List = []
392
+ docs: List = []
393
+ docs_embeddings: List = []
394
+ docs_metadata: List = []
395
+
396
+ if not self._collection:
397
+ self._collection = self.client.get_collection(name=self.collection_name)
398
+
399
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
400
+ # Use batch embedding when enabled and supported
401
+ try:
402
+ # Extract content from all documents
403
+ doc_contents = [doc.content for doc in documents]
404
+
405
+ # Get batch embeddings and usage
406
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
171
407
 
172
- if len(docs) > 0 and self._collection is not None:
173
- self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs)
174
- logger.debug(f"Committed {len(docs)} documents")
408
+ # Process documents with pre-computed embeddings
409
+ for j, doc in enumerate(documents):
410
+ try:
411
+ if j < len(embeddings):
412
+ doc.embedding = embeddings[j]
413
+ doc.usage = usages[j] if j < len(usages) else None
414
+ except Exception as e:
415
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
175
416
 
417
+ except Exception as e:
418
+ # Check if this is a rate limit error - don't fall back as it would make things worse
419
+ error_str = str(e).lower()
420
+ is_rate_limit = any(
421
+ phrase in error_str
422
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
423
+ )
424
+
425
+ if is_rate_limit:
426
+ logger.error(f"Rate limit detected during batch embedding. {e}")
427
+ raise e
428
+ else:
429
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
430
+ # Fall back to individual embedding
431
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
432
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
176
433
  else:
177
- logger.error("Collection does not exist")
434
+ # Use individual embedding
435
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
436
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
437
+
438
+ for document in documents:
439
+ cleaned_content = document.content.replace("\x00", "\ufffd")
440
+ # Include content_hash in ID to ensure uniqueness across different content hashes
441
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
442
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
178
443
 
179
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
444
+ # Handle metadata and filters
445
+ metadata = document.meta_data or {}
446
+ if filters:
447
+ metadata.update(filters)
448
+
449
+ # Add name, content_id to metadata
450
+ if document.name is not None:
451
+ metadata["name"] = document.name
452
+ if document.content_id is not None:
453
+ metadata["content_id"] = document.content_id
454
+
455
+ metadata["content_hash"] = content_hash
456
+
457
+ # Flatten metadata for ChromaDB compatibility
458
+ flattened_metadata = self._flatten_metadata(metadata)
459
+
460
+ docs_embeddings.append(document.embedding)
461
+ docs.append(cleaned_content)
462
+ ids.append(doc_id)
463
+ docs_metadata.append(flattened_metadata)
464
+ log_debug(f"Upserted document: {document.id} | {document.name} | {flattened_metadata}")
465
+
466
+ if self._collection is None:
467
+ logger.warning("Collection does not exist")
468
+ else:
469
+ if len(docs) > 0:
470
+ self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
471
+ log_debug(f"Committed {len(docs)} documents")
472
+
473
+ async def async_upsert(
474
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
475
+ ) -> None:
476
+ """Upsert documents asynchronously by running in a thread."""
477
+ try:
478
+ if self.content_hash_exists(content_hash):
479
+ self._delete_by_content_hash(content_hash)
480
+ await self._async_upsert(content_hash, documents, filters)
481
+ except Exception as e:
482
+ logger.error(f"Error upserting documents by content hash: {e}")
483
+ raise
484
+
485
+ def search(
486
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
487
+ ) -> List[Document]:
180
488
  """Search the collection for a query.
181
489
 
182
490
  Args:
183
491
  query (str): Query to search for.
184
492
  limit (int): Number of results to return.
185
- filters (Optional[Dict[str, Any]]): Filters to apply while searching.
493
+ filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply while searching.
494
+ Supports ChromaDB's filtering operators:
495
+ - $eq, $ne: Equality/Inequality
496
+ - $gt, $gte, $lt, $lte: Numeric comparisons
497
+ - $in, $nin: List inclusion/exclusion
498
+ - $and, $or: Logical operators
186
499
  Returns:
187
500
  List[Document]: List of search results.
188
501
  """
502
+ if isinstance(filters, list):
503
+ log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
504
+ filters = None
189
505
  query_embedding = self.embedder.get_embedding(query)
190
506
  if query_embedding is None:
191
507
  logger.error(f"Error getting embedding for Query: {query}")
192
508
  return []
193
509
 
194
510
  if not self._collection:
195
- self._collection = self.client.get_collection(name=self.collection)
511
+ self._collection = self.client.get_collection(name=self.collection_name)
512
+
513
+ # Convert simple filters to ChromaDB's format if needed
514
+ where_filter = self._convert_filters(filters) if filters else None
196
515
 
197
516
  result: QueryResult = self._collection.query(
198
517
  query_embeddings=query_embedding,
199
518
  n_results=limit,
519
+ where=where_filter, # Add where filter
520
+ include=["metadatas", "documents", "embeddings", "distances", "uris"],
200
521
  )
201
522
 
202
523
  # Build search results
203
524
  search_results: List[Document] = []
204
525
 
205
- ids = result.get("ids", [[]])[0]
206
- metadata = result.get("metadatas", [[]])[0] # type: ignore
207
- documents = result.get("documents", [[]])[0] # type: ignore
208
- embeddings = result.get("embeddings")
209
- distances = result.get("distances", [[]])[0] # type: ignore
210
- uris = result.get("uris")
211
- data = result.get("data")
212
- metadata["distances"] = distances # type: ignore
213
- metadata["uris"] = uris # type: ignore
214
- metadata["data"] = data # type: ignore
526
+ ids_list = result.get("ids", [[]]) # type: ignore
527
+ metadata_list = result.get("metadatas", [[{}]]) # type: ignore
528
+ documents_list = result.get("documents", [[]]) # type: ignore
529
+ embeddings_list = result.get("embeddings") # type: ignore
530
+ distances_list = result.get("distances", [[]]) # type: ignore
531
+
532
+ if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
533
+ return search_results
534
+
535
+ ids = ids_list[0]
536
+ metadata = [dict(m) if m else {} for m in metadata_list[0]] # Convert to mutable dicts
537
+ documents = documents_list[0]
538
+ embeddings_raw = embeddings_list[0] if embeddings_list else []
539
+ embeddings = []
540
+ for e in embeddings_raw:
541
+ if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
542
+ try:
543
+ embeddings.append(list(cast(Any, e).tolist()))
544
+ except (AttributeError, TypeError):
545
+ embeddings.append(list(e) if isinstance(e, (list, tuple)) else [])
546
+ elif isinstance(e, (list, tuple)):
547
+ embeddings.append([float(x) for x in e if isinstance(x, (int, float))])
548
+ elif isinstance(e, (int, float)):
549
+ embeddings.append([float(e)])
550
+ else:
551
+ embeddings.append([])
552
+ distances = distances_list[0]
553
+
554
+ for idx, distance in enumerate(distances):
555
+ if idx < len(metadata):
556
+ metadata[idx]["distances"] = distance
215
557
 
216
558
  try:
217
- # Use zip to iterate over multiple lists simultaneously
218
- for id_, distance, metadata, document in zip(ids, distances, metadata, documents):
559
+ for idx, (id_, doc_metadata, document) in enumerate(zip(ids, metadata, documents)):
560
+ # Extract the fields we added to metadata
561
+ name_val = doc_metadata.pop("name", None)
562
+ content_id_val = doc_metadata.pop("content_id", None)
563
+
564
+ # Convert types to match Document constructor expectations
565
+ name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
566
+ content_id = (
567
+ str(content_id_val)
568
+ if content_id_val is not None and not isinstance(content_id_val, str)
569
+ else content_id_val
570
+ )
571
+ content = str(document) if document is not None else ""
572
+ embedding = embeddings[idx] if idx < len(embeddings) else None
573
+
219
574
  search_results.append(
220
575
  Document(
221
576
  id=id_,
222
- meta_data=metadata,
223
- content=document,
224
- embedding=embeddings, # type: ignore
577
+ name=name,
578
+ meta_data=doc_metadata,
579
+ content=content,
580
+ embedding=embedding,
581
+ content_id=content_id,
225
582
  )
226
583
  )
227
584
  except Exception as e:
@@ -230,28 +587,68 @@ class ChromaDb(VectorDb):
230
587
  if self.reranker:
231
588
  search_results = self.reranker.rerank(query=query, documents=search_results)
232
589
 
590
+ log_info(f"Found {len(search_results)} documents")
233
591
  return search_results
234
592
 
593
+ def _convert_filters(self, filters: Dict[str, Any]) -> Dict[str, Any]:
594
+ """Convert simple filters to ChromaDB's filter format.
595
+
596
+ Handles conversion of simple key-value filters to ChromaDB's operator format
597
+ when needed.
598
+ """
599
+ if not filters:
600
+ return {}
601
+
602
+ # If filters already use ChromaDB operators ($eq, $ne, etc.), return as is
603
+ if any(key.startswith("$") for key in filters.keys()):
604
+ return filters
605
+
606
+ # Convert simple key-value pairs to ChromaDB's format
607
+ converted = {}
608
+ for key, value in filters.items():
609
+ if isinstance(value, (list, tuple)):
610
+ # Convert lists to $in operator
611
+ converted[key] = {"$in": list(value)}
612
+ else:
613
+ # Convert simple equality to $eq
614
+ converted[key] = {"$eq": value}
615
+
616
+ return converted
617
+
618
+ async def async_search(
619
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
620
+ ) -> List[Document]:
621
+ """Search asynchronously by running in a thread."""
622
+ return await asyncio.to_thread(self.search, query, limit, filters)
623
+
235
624
  def drop(self) -> None:
236
625
  """Delete the collection."""
237
626
  if self.exists():
238
- logger.debug(f"Deleting collection: {self.collection}")
239
- self.client.delete_collection(name=self.collection)
627
+ log_debug(f"Deleting collection: {self.collection_name}")
628
+ self.client.delete_collection(name=self.collection_name)
629
+
630
+ async def async_drop(self) -> None:
631
+ """Drop the collection asynchronously by running in a thread."""
632
+ await asyncio.to_thread(self.drop)
240
633
 
241
634
  def exists(self) -> bool:
242
635
  """Check if the collection exists."""
243
636
  try:
244
- self.client.get_collection(name=self.collection)
637
+ self.client.get_collection(name=self.collection_name)
245
638
  return True
246
639
  except Exception as e:
247
- logger.debug(f"Collection does not exist: {e}")
640
+ log_debug(f"Collection does not exist: {e}")
248
641
  return False
249
642
 
643
+ async def async_exists(self) -> bool:
644
+ """Check if collection exists asynchronously by running in a thread."""
645
+ return await asyncio.to_thread(self.exists)
646
+
250
647
  def get_count(self) -> int:
251
648
  """Get the count of documents in the collection."""
252
649
  if self.exists():
253
650
  try:
254
- collection: Collection = self.client.get_collection(name=self.collection)
651
+ collection: Collection = self.client.get_collection(name=self.collection_name)
255
652
  return collection.count()
256
653
  except Exception as e:
257
654
  logger.error(f"Error getting count: {e}")
@@ -262,8 +659,276 @@ class ChromaDb(VectorDb):
262
659
 
263
660
  def delete(self) -> bool:
264
661
  try:
265
- self.client.delete_collection(name=self.collection)
662
+ self.client.delete_collection(name=self.collection_name)
266
663
  return True
267
664
  except Exception as e:
268
665
  logger.error(f"Error clearing collection: {e}")
269
666
  return False
667
+
668
+ def delete_by_id(self, id: str) -> bool:
669
+ """Delete document by ID."""
670
+ if not self.client:
671
+ logger.error("Client not initialized")
672
+ return False
673
+
674
+ try:
675
+ collection: Collection = self.client.get_collection(name=self.collection_name)
676
+
677
+ # Check if document exists
678
+ if not self.id_exists(id):
679
+ log_info(f"Document with ID '{id}' not found")
680
+ return False
681
+
682
+ # Delete the document
683
+ collection.delete(ids=[id])
684
+ log_info(f"Deleted document with ID '{id}'")
685
+ return True
686
+ except Exception as e:
687
+ logger.error(f"Error deleting document by ID '{id}': {e}")
688
+ return False
689
+
690
+ def delete_by_name(self, name: str) -> bool:
691
+ """Delete documents by name."""
692
+ if not self.client:
693
+ logger.error("Client not initialized")
694
+ return False
695
+
696
+ try:
697
+ collection: Collection = self.client.get_collection(name=self.collection_name)
698
+
699
+ # Find all documents with the given name
700
+ result = collection.get(where=cast(Any, {"name": {"$eq": name}}))
701
+ ids_to_delete = result.get("ids", [])
702
+
703
+ if not ids_to_delete:
704
+ log_info(f"No documents found with name '{name}'")
705
+ return False
706
+
707
+ # Delete all matching documents
708
+ collection.delete(ids=ids_to_delete)
709
+ log_info(f"Deleted {len(ids_to_delete)} documents with name '{name}'")
710
+ return True
711
+ except Exception as e:
712
+ logger.error(f"Error deleting documents by name '{name}': {e}")
713
+ return False
714
+
715
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
716
+ """Delete documents by metadata."""
717
+ if not self.client:
718
+ logger.error("Client not initialized")
719
+ return False
720
+
721
+ try:
722
+ collection: Collection = self.client.get_collection(name=self.collection_name)
723
+
724
+ # Build where clause for metadata filtering
725
+ where_clause = {}
726
+ for key, value in metadata.items():
727
+ where_clause[key] = {"$eq": value}
728
+
729
+ # Find all documents with the matching metadata
730
+ result = collection.get(where=cast(Any, where_clause))
731
+ ids_to_delete = result.get("ids", [])
732
+
733
+ if not ids_to_delete:
734
+ log_info(f"No documents found with metadata '{metadata}'")
735
+ return False
736
+
737
+ # Delete all matching documents
738
+ collection.delete(ids=ids_to_delete)
739
+ log_info(f"Deleted {len(ids_to_delete)} documents with metadata '{metadata}'")
740
+ return True
741
+ except Exception as e:
742
+ logger.error(f"Error deleting documents by metadata '{metadata}': {e}")
743
+ return False
744
+
745
+ def delete_by_content_id(self, content_id: str) -> bool:
746
+ """Delete documents by content ID."""
747
+ if not self.client:
748
+ logger.error("Client not initialized")
749
+ return False
750
+
751
+ try:
752
+ collection: Collection = self.client.get_collection(name=self.collection_name)
753
+
754
+ # Find all documents with the given content_id
755
+ result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
756
+ ids_to_delete = result.get("ids", [])
757
+
758
+ if not ids_to_delete:
759
+ log_info(f"No documents found with content_id '{content_id}'")
760
+ return False
761
+
762
+ # Delete all matching documents
763
+ collection.delete(ids=ids_to_delete)
764
+ log_info(f"Deleted {len(ids_to_delete)} documents with content_id '{content_id}'")
765
+ return True
766
+ except Exception as e:
767
+ logger.error(f"Error deleting documents by content_id '{content_id}': {e}")
768
+ return False
769
+
770
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
771
+ """Delete documents by content hash."""
772
+ if not self.client:
773
+ logger.error("Client not initialized")
774
+ return False
775
+
776
+ try:
777
+ collection: Collection = self.client.get_collection(name=self.collection_name)
778
+
779
+ # Find all documents with the given content_hash
780
+ result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
781
+ ids_to_delete = result.get("ids", [])
782
+
783
+ if not ids_to_delete:
784
+ log_info(f"No documents found with content_hash '{content_hash}'")
785
+ return False
786
+
787
+ # Delete all matching documents
788
+ collection.delete(ids=ids_to_delete)
789
+ log_info(f"Deleted {len(ids_to_delete)} documents with content_hash '{content_hash}'")
790
+ return True
791
+ except Exception as e:
792
+ logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
793
+ return False
794
+
795
+ def id_exists(self, id: str) -> bool:
796
+ """Check if a document with the given ID exists in the collection.
797
+
798
+ Args:
799
+ id (str): The document ID to check.
800
+
801
+ Returns:
802
+ bool: True if the document exists, False otherwise.
803
+ """
804
+ if not self.client:
805
+ logger.error("Client not initialized")
806
+ return False
807
+
808
+ try:
809
+ collection: Collection = self.client.get_collection(name=self.collection_name)
810
+ # Try to get the document by ID
811
+ result = collection.get(ids=[id])
812
+ found_ids = result.get("ids", [])
813
+
814
+ # Return True if the document was found
815
+ return len(found_ids) > 0
816
+ except Exception as e:
817
+ logger.error(f"Error checking if ID '{id}' exists: {e}")
818
+ return False
819
+
820
+ def content_hash_exists(self, content_hash: str) -> bool:
821
+ """Check if documents with the given content hash exist."""
822
+ if not self.client:
823
+ logger.error("Client not initialized")
824
+ return False
825
+
826
+ try:
827
+ collection: Collection = self.client.get_collection(name=self.collection_name)
828
+
829
+ # Try to query for documents with the given content_hash
830
+ try:
831
+ result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
832
+ # Safely extract ids from result
833
+ if hasattr(result, "get") and callable(result.get):
834
+ found_ids = result.get("ids", [])
835
+ elif hasattr(result, "__getitem__") and "ids" in result:
836
+ found_ids = result["ids"]
837
+ else:
838
+ found_ids = []
839
+
840
+ # Return True if any documents were found
841
+ if isinstance(found_ids, (list, tuple)):
842
+ return len(found_ids) > 0
843
+ elif isinstance(found_ids, int):
844
+ # Some ChromaDB versions might return a count instead of a list
845
+ return found_ids > 0
846
+ else:
847
+ return False
848
+
849
+ except TypeError as te:
850
+ if "object of type 'int' has no len()" in str(te):
851
+ # Known issue with ChromaDB 0.5.0 - internal bug
852
+ # As a workaround, assume content doesn't exist to allow processing to continue
853
+ logger.warning(
854
+ f"ChromaDB internal error (version 0.5.0 bug): {te}. Assuming content_hash '{content_hash}' does not exist."
855
+ )
856
+ return False
857
+ else:
858
+ raise te
859
+
860
+ except Exception as e:
861
+ logger.error(f"Error checking if content_hash '{content_hash}' exists: {e}")
862
+ return False
863
+
864
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
865
+ """
866
+ Update the metadata for documents with the given content_id.
867
+
868
+ Args:
869
+ content_id (str): The content ID to update
870
+ metadata (Dict[str, Any]): The metadata to update
871
+ """
872
+ try:
873
+ if not self.client:
874
+ logger.error("Client not initialized")
875
+ return
876
+
877
+ collection: Collection = self.client.get_collection(name=self.collection_name)
878
+
879
+ # Find documents with the given content_id
880
+ try:
881
+ result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
882
+
883
+ # Extract IDs and current metadata
884
+ if hasattr(result, "get") and callable(result.get):
885
+ ids = result.get("ids", [])
886
+ current_metadatas = result.get("metadatas", [])
887
+ elif hasattr(result, "__getitem__"):
888
+ ids = result.get("ids", []) if "ids" in result else []
889
+ current_metadatas = result.get("metadatas", []) if "metadatas" in result else []
890
+ else:
891
+ ids = []
892
+ current_metadatas = []
893
+
894
+ if not ids:
895
+ logger.debug(f"No documents found with content_id: {content_id}")
896
+ return
897
+
898
+ # Flatten the new metadata first
899
+ flattened_new_metadata = self._flatten_metadata(metadata)
900
+
901
+ # Merge metadata for each document
902
+ updated_metadatas = []
903
+ for i, current_meta in enumerate(current_metadatas or []):
904
+ if current_meta is None:
905
+ meta_dict: Dict[str, Any] = {}
906
+ else:
907
+ meta_dict = dict(current_meta) # Convert Mapping to dict
908
+
909
+ # Update with flattened metadata
910
+ meta_dict.update(flattened_new_metadata)
911
+ updated_metadatas.append(meta_dict)
912
+
913
+ # Convert to the expected type for ChromaDB
914
+ chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool]]], updated_metadatas)
915
+ chroma_metadatas = [{k: v for k, v in m.items() if k and v} for m in chroma_metadatas]
916
+ collection.update(ids=ids, metadatas=chroma_metadatas) # type: ignore
917
+ logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
918
+
919
+ except TypeError as te:
920
+ if "object of type 'int' has no len()" in str(te):
921
+ logger.warning(
922
+ f"ChromaDB internal error (version 0.5.0 bug): {te}. Cannot update metadata for content_id '{content_id}'."
923
+ )
924
+ return
925
+ else:
926
+ raise te
927
+
928
+ except Exception as e:
929
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
930
+ raise
931
+
932
+ def get_supported_search_types(self) -> List[str]:
933
+ """Get the supported search types for this vector database."""
934
+ return [] # ChromaDb doesn't use SearchType enum