agno 2.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (575) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +51 -0
  3. agno/agent/agent.py +10405 -0
  4. agno/api/__init__.py +0 -0
  5. agno/api/agent.py +28 -0
  6. agno/api/api.py +40 -0
  7. agno/api/evals.py +22 -0
  8. agno/api/os.py +17 -0
  9. agno/api/routes.py +13 -0
  10. agno/api/schemas/__init__.py +9 -0
  11. agno/api/schemas/agent.py +16 -0
  12. agno/api/schemas/evals.py +16 -0
  13. agno/api/schemas/os.py +14 -0
  14. agno/api/schemas/response.py +6 -0
  15. agno/api/schemas/team.py +16 -0
  16. agno/api/schemas/utils.py +21 -0
  17. agno/api/schemas/workflows.py +16 -0
  18. agno/api/settings.py +53 -0
  19. agno/api/team.py +30 -0
  20. agno/api/workflow.py +28 -0
  21. agno/cloud/aws/base.py +214 -0
  22. agno/cloud/aws/s3/__init__.py +2 -0
  23. agno/cloud/aws/s3/api_client.py +43 -0
  24. agno/cloud/aws/s3/bucket.py +195 -0
  25. agno/cloud/aws/s3/object.py +57 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +598 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2042 -0
  33. agno/db/dynamo/schemas.py +314 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +1795 -0
  37. agno/db/firestore/schemas.py +140 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1335 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1160 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1328 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/__init__.py +0 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/mongo/__init__.py +17 -0
  51. agno/db/mongo/async_mongo.py +2026 -0
  52. agno/db/mongo/mongo.py +1982 -0
  53. agno/db/mongo/schemas.py +87 -0
  54. agno/db/mongo/utils.py +259 -0
  55. agno/db/mysql/__init__.py +3 -0
  56. agno/db/mysql/mysql.py +2308 -0
  57. agno/db/mysql/schemas.py +138 -0
  58. agno/db/mysql/utils.py +355 -0
  59. agno/db/postgres/__init__.py +4 -0
  60. agno/db/postgres/async_postgres.py +1927 -0
  61. agno/db/postgres/postgres.py +2260 -0
  62. agno/db/postgres/schemas.py +139 -0
  63. agno/db/postgres/utils.py +442 -0
  64. agno/db/redis/__init__.py +3 -0
  65. agno/db/redis/redis.py +1660 -0
  66. agno/db/redis/schemas.py +123 -0
  67. agno/db/redis/utils.py +346 -0
  68. agno/db/schemas/__init__.py +4 -0
  69. agno/db/schemas/culture.py +120 -0
  70. agno/db/schemas/evals.py +33 -0
  71. agno/db/schemas/knowledge.py +40 -0
  72. agno/db/schemas/memory.py +46 -0
  73. agno/db/schemas/metrics.py +0 -0
  74. agno/db/singlestore/__init__.py +3 -0
  75. agno/db/singlestore/schemas.py +130 -0
  76. agno/db/singlestore/singlestore.py +2272 -0
  77. agno/db/singlestore/utils.py +384 -0
  78. agno/db/sqlite/__init__.py +4 -0
  79. agno/db/sqlite/async_sqlite.py +2293 -0
  80. agno/db/sqlite/schemas.py +133 -0
  81. agno/db/sqlite/sqlite.py +2288 -0
  82. agno/db/sqlite/utils.py +431 -0
  83. agno/db/surrealdb/__init__.py +3 -0
  84. agno/db/surrealdb/metrics.py +292 -0
  85. agno/db/surrealdb/models.py +309 -0
  86. agno/db/surrealdb/queries.py +71 -0
  87. agno/db/surrealdb/surrealdb.py +1353 -0
  88. agno/db/surrealdb/utils.py +147 -0
  89. agno/db/utils.py +116 -0
  90. agno/debug.py +18 -0
  91. agno/eval/__init__.py +14 -0
  92. agno/eval/accuracy.py +834 -0
  93. agno/eval/performance.py +773 -0
  94. agno/eval/reliability.py +306 -0
  95. agno/eval/utils.py +119 -0
  96. agno/exceptions.py +161 -0
  97. agno/filters.py +354 -0
  98. agno/guardrails/__init__.py +6 -0
  99. agno/guardrails/base.py +19 -0
  100. agno/guardrails/openai.py +144 -0
  101. agno/guardrails/pii.py +94 -0
  102. agno/guardrails/prompt_injection.py +52 -0
  103. agno/integrations/__init__.py +0 -0
  104. agno/integrations/discord/__init__.py +3 -0
  105. agno/integrations/discord/client.py +203 -0
  106. agno/knowledge/__init__.py +5 -0
  107. agno/knowledge/chunking/__init__.py +0 -0
  108. agno/knowledge/chunking/agentic.py +79 -0
  109. agno/knowledge/chunking/document.py +91 -0
  110. agno/knowledge/chunking/fixed.py +57 -0
  111. agno/knowledge/chunking/markdown.py +151 -0
  112. agno/knowledge/chunking/recursive.py +63 -0
  113. agno/knowledge/chunking/row.py +39 -0
  114. agno/knowledge/chunking/semantic.py +86 -0
  115. agno/knowledge/chunking/strategy.py +165 -0
  116. agno/knowledge/content.py +74 -0
  117. agno/knowledge/document/__init__.py +5 -0
  118. agno/knowledge/document/base.py +58 -0
  119. agno/knowledge/embedder/__init__.py +5 -0
  120. agno/knowledge/embedder/aws_bedrock.py +343 -0
  121. agno/knowledge/embedder/azure_openai.py +210 -0
  122. agno/knowledge/embedder/base.py +23 -0
  123. agno/knowledge/embedder/cohere.py +323 -0
  124. agno/knowledge/embedder/fastembed.py +62 -0
  125. agno/knowledge/embedder/fireworks.py +13 -0
  126. agno/knowledge/embedder/google.py +258 -0
  127. agno/knowledge/embedder/huggingface.py +94 -0
  128. agno/knowledge/embedder/jina.py +182 -0
  129. agno/knowledge/embedder/langdb.py +22 -0
  130. agno/knowledge/embedder/mistral.py +206 -0
  131. agno/knowledge/embedder/nebius.py +13 -0
  132. agno/knowledge/embedder/ollama.py +154 -0
  133. agno/knowledge/embedder/openai.py +195 -0
  134. agno/knowledge/embedder/sentence_transformer.py +63 -0
  135. agno/knowledge/embedder/together.py +13 -0
  136. agno/knowledge/embedder/vllm.py +262 -0
  137. agno/knowledge/embedder/voyageai.py +165 -0
  138. agno/knowledge/knowledge.py +1988 -0
  139. agno/knowledge/reader/__init__.py +7 -0
  140. agno/knowledge/reader/arxiv_reader.py +81 -0
  141. agno/knowledge/reader/base.py +95 -0
  142. agno/knowledge/reader/csv_reader.py +166 -0
  143. agno/knowledge/reader/docx_reader.py +82 -0
  144. agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
  145. agno/knowledge/reader/firecrawl_reader.py +201 -0
  146. agno/knowledge/reader/json_reader.py +87 -0
  147. agno/knowledge/reader/markdown_reader.py +137 -0
  148. agno/knowledge/reader/pdf_reader.py +431 -0
  149. agno/knowledge/reader/pptx_reader.py +101 -0
  150. agno/knowledge/reader/reader_factory.py +313 -0
  151. agno/knowledge/reader/s3_reader.py +89 -0
  152. agno/knowledge/reader/tavily_reader.py +194 -0
  153. agno/knowledge/reader/text_reader.py +115 -0
  154. agno/knowledge/reader/web_search_reader.py +372 -0
  155. agno/knowledge/reader/website_reader.py +455 -0
  156. agno/knowledge/reader/wikipedia_reader.py +59 -0
  157. agno/knowledge/reader/youtube_reader.py +78 -0
  158. agno/knowledge/remote_content/__init__.py +0 -0
  159. agno/knowledge/remote_content/remote_content.py +88 -0
  160. agno/knowledge/reranker/__init__.py +3 -0
  161. agno/knowledge/reranker/base.py +14 -0
  162. agno/knowledge/reranker/cohere.py +64 -0
  163. agno/knowledge/reranker/infinity.py +195 -0
  164. agno/knowledge/reranker/sentence_transformer.py +54 -0
  165. agno/knowledge/types.py +39 -0
  166. agno/knowledge/utils.py +189 -0
  167. agno/media.py +462 -0
  168. agno/memory/__init__.py +3 -0
  169. agno/memory/manager.py +1327 -0
  170. agno/models/__init__.py +0 -0
  171. agno/models/aimlapi/__init__.py +5 -0
  172. agno/models/aimlapi/aimlapi.py +45 -0
  173. agno/models/anthropic/__init__.py +5 -0
  174. agno/models/anthropic/claude.py +757 -0
  175. agno/models/aws/__init__.py +15 -0
  176. agno/models/aws/bedrock.py +701 -0
  177. agno/models/aws/claude.py +378 -0
  178. agno/models/azure/__init__.py +18 -0
  179. agno/models/azure/ai_foundry.py +485 -0
  180. agno/models/azure/openai_chat.py +131 -0
  181. agno/models/base.py +2175 -0
  182. agno/models/cerebras/__init__.py +12 -0
  183. agno/models/cerebras/cerebras.py +501 -0
  184. agno/models/cerebras/cerebras_openai.py +112 -0
  185. agno/models/cohere/__init__.py +5 -0
  186. agno/models/cohere/chat.py +389 -0
  187. agno/models/cometapi/__init__.py +5 -0
  188. agno/models/cometapi/cometapi.py +57 -0
  189. agno/models/dashscope/__init__.py +5 -0
  190. agno/models/dashscope/dashscope.py +91 -0
  191. agno/models/deepinfra/__init__.py +5 -0
  192. agno/models/deepinfra/deepinfra.py +28 -0
  193. agno/models/deepseek/__init__.py +5 -0
  194. agno/models/deepseek/deepseek.py +61 -0
  195. agno/models/defaults.py +1 -0
  196. agno/models/fireworks/__init__.py +5 -0
  197. agno/models/fireworks/fireworks.py +26 -0
  198. agno/models/google/__init__.py +5 -0
  199. agno/models/google/gemini.py +1085 -0
  200. agno/models/groq/__init__.py +5 -0
  201. agno/models/groq/groq.py +556 -0
  202. agno/models/huggingface/__init__.py +5 -0
  203. agno/models/huggingface/huggingface.py +491 -0
  204. agno/models/ibm/__init__.py +5 -0
  205. agno/models/ibm/watsonx.py +422 -0
  206. agno/models/internlm/__init__.py +3 -0
  207. agno/models/internlm/internlm.py +26 -0
  208. agno/models/langdb/__init__.py +1 -0
  209. agno/models/langdb/langdb.py +48 -0
  210. agno/models/litellm/__init__.py +14 -0
  211. agno/models/litellm/chat.py +468 -0
  212. agno/models/litellm/litellm_openai.py +25 -0
  213. agno/models/llama_cpp/__init__.py +5 -0
  214. agno/models/llama_cpp/llama_cpp.py +22 -0
  215. agno/models/lmstudio/__init__.py +5 -0
  216. agno/models/lmstudio/lmstudio.py +25 -0
  217. agno/models/message.py +434 -0
  218. agno/models/meta/__init__.py +12 -0
  219. agno/models/meta/llama.py +475 -0
  220. agno/models/meta/llama_openai.py +78 -0
  221. agno/models/metrics.py +120 -0
  222. agno/models/mistral/__init__.py +5 -0
  223. agno/models/mistral/mistral.py +432 -0
  224. agno/models/nebius/__init__.py +3 -0
  225. agno/models/nebius/nebius.py +54 -0
  226. agno/models/nexus/__init__.py +3 -0
  227. agno/models/nexus/nexus.py +22 -0
  228. agno/models/nvidia/__init__.py +5 -0
  229. agno/models/nvidia/nvidia.py +28 -0
  230. agno/models/ollama/__init__.py +5 -0
  231. agno/models/ollama/chat.py +441 -0
  232. agno/models/openai/__init__.py +9 -0
  233. agno/models/openai/chat.py +883 -0
  234. agno/models/openai/like.py +27 -0
  235. agno/models/openai/responses.py +1050 -0
  236. agno/models/openrouter/__init__.py +5 -0
  237. agno/models/openrouter/openrouter.py +66 -0
  238. agno/models/perplexity/__init__.py +5 -0
  239. agno/models/perplexity/perplexity.py +187 -0
  240. agno/models/portkey/__init__.py +3 -0
  241. agno/models/portkey/portkey.py +81 -0
  242. agno/models/requesty/__init__.py +5 -0
  243. agno/models/requesty/requesty.py +52 -0
  244. agno/models/response.py +199 -0
  245. agno/models/sambanova/__init__.py +5 -0
  246. agno/models/sambanova/sambanova.py +28 -0
  247. agno/models/siliconflow/__init__.py +5 -0
  248. agno/models/siliconflow/siliconflow.py +25 -0
  249. agno/models/together/__init__.py +5 -0
  250. agno/models/together/together.py +25 -0
  251. agno/models/utils.py +266 -0
  252. agno/models/vercel/__init__.py +3 -0
  253. agno/models/vercel/v0.py +26 -0
  254. agno/models/vertexai/__init__.py +0 -0
  255. agno/models/vertexai/claude.py +70 -0
  256. agno/models/vllm/__init__.py +3 -0
  257. agno/models/vllm/vllm.py +78 -0
  258. agno/models/xai/__init__.py +3 -0
  259. agno/models/xai/xai.py +113 -0
  260. agno/os/__init__.py +3 -0
  261. agno/os/app.py +876 -0
  262. agno/os/auth.py +57 -0
  263. agno/os/config.py +104 -0
  264. agno/os/interfaces/__init__.py +1 -0
  265. agno/os/interfaces/a2a/__init__.py +3 -0
  266. agno/os/interfaces/a2a/a2a.py +42 -0
  267. agno/os/interfaces/a2a/router.py +250 -0
  268. agno/os/interfaces/a2a/utils.py +924 -0
  269. agno/os/interfaces/agui/__init__.py +3 -0
  270. agno/os/interfaces/agui/agui.py +47 -0
  271. agno/os/interfaces/agui/router.py +144 -0
  272. agno/os/interfaces/agui/utils.py +534 -0
  273. agno/os/interfaces/base.py +25 -0
  274. agno/os/interfaces/slack/__init__.py +3 -0
  275. agno/os/interfaces/slack/router.py +148 -0
  276. agno/os/interfaces/slack/security.py +30 -0
  277. agno/os/interfaces/slack/slack.py +47 -0
  278. agno/os/interfaces/whatsapp/__init__.py +3 -0
  279. agno/os/interfaces/whatsapp/router.py +211 -0
  280. agno/os/interfaces/whatsapp/security.py +53 -0
  281. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  282. agno/os/mcp.py +292 -0
  283. agno/os/middleware/__init__.py +7 -0
  284. agno/os/middleware/jwt.py +233 -0
  285. agno/os/router.py +1763 -0
  286. agno/os/routers/__init__.py +3 -0
  287. agno/os/routers/evals/__init__.py +3 -0
  288. agno/os/routers/evals/evals.py +430 -0
  289. agno/os/routers/evals/schemas.py +142 -0
  290. agno/os/routers/evals/utils.py +162 -0
  291. agno/os/routers/health.py +31 -0
  292. agno/os/routers/home.py +52 -0
  293. agno/os/routers/knowledge/__init__.py +3 -0
  294. agno/os/routers/knowledge/knowledge.py +997 -0
  295. agno/os/routers/knowledge/schemas.py +178 -0
  296. agno/os/routers/memory/__init__.py +3 -0
  297. agno/os/routers/memory/memory.py +515 -0
  298. agno/os/routers/memory/schemas.py +62 -0
  299. agno/os/routers/metrics/__init__.py +3 -0
  300. agno/os/routers/metrics/metrics.py +190 -0
  301. agno/os/routers/metrics/schemas.py +47 -0
  302. agno/os/routers/session/__init__.py +3 -0
  303. agno/os/routers/session/session.py +997 -0
  304. agno/os/schema.py +1055 -0
  305. agno/os/settings.py +43 -0
  306. agno/os/utils.py +630 -0
  307. agno/py.typed +0 -0
  308. agno/reasoning/__init__.py +0 -0
  309. agno/reasoning/anthropic.py +80 -0
  310. agno/reasoning/azure_ai_foundry.py +67 -0
  311. agno/reasoning/deepseek.py +63 -0
  312. agno/reasoning/default.py +97 -0
  313. agno/reasoning/gemini.py +73 -0
  314. agno/reasoning/groq.py +71 -0
  315. agno/reasoning/helpers.py +63 -0
  316. agno/reasoning/ollama.py +67 -0
  317. agno/reasoning/openai.py +86 -0
  318. agno/reasoning/step.py +31 -0
  319. agno/reasoning/vertexai.py +76 -0
  320. agno/run/__init__.py +6 -0
  321. agno/run/agent.py +787 -0
  322. agno/run/base.py +229 -0
  323. agno/run/cancel.py +81 -0
  324. agno/run/messages.py +32 -0
  325. agno/run/team.py +753 -0
  326. agno/run/workflow.py +708 -0
  327. agno/session/__init__.py +10 -0
  328. agno/session/agent.py +295 -0
  329. agno/session/summary.py +265 -0
  330. agno/session/team.py +392 -0
  331. agno/session/workflow.py +205 -0
  332. agno/team/__init__.py +37 -0
  333. agno/team/team.py +8793 -0
  334. agno/tools/__init__.py +10 -0
  335. agno/tools/agentql.py +120 -0
  336. agno/tools/airflow.py +69 -0
  337. agno/tools/api.py +122 -0
  338. agno/tools/apify.py +314 -0
  339. agno/tools/arxiv.py +127 -0
  340. agno/tools/aws_lambda.py +53 -0
  341. agno/tools/aws_ses.py +66 -0
  342. agno/tools/baidusearch.py +89 -0
  343. agno/tools/bitbucket.py +292 -0
  344. agno/tools/brandfetch.py +213 -0
  345. agno/tools/bravesearch.py +106 -0
  346. agno/tools/brightdata.py +367 -0
  347. agno/tools/browserbase.py +209 -0
  348. agno/tools/calcom.py +255 -0
  349. agno/tools/calculator.py +151 -0
  350. agno/tools/cartesia.py +187 -0
  351. agno/tools/clickup.py +244 -0
  352. agno/tools/confluence.py +240 -0
  353. agno/tools/crawl4ai.py +158 -0
  354. agno/tools/csv_toolkit.py +185 -0
  355. agno/tools/dalle.py +110 -0
  356. agno/tools/daytona.py +475 -0
  357. agno/tools/decorator.py +262 -0
  358. agno/tools/desi_vocal.py +108 -0
  359. agno/tools/discord.py +161 -0
  360. agno/tools/docker.py +716 -0
  361. agno/tools/duckdb.py +379 -0
  362. agno/tools/duckduckgo.py +91 -0
  363. agno/tools/e2b.py +703 -0
  364. agno/tools/eleven_labs.py +196 -0
  365. agno/tools/email.py +67 -0
  366. agno/tools/evm.py +129 -0
  367. agno/tools/exa.py +396 -0
  368. agno/tools/fal.py +127 -0
  369. agno/tools/file.py +240 -0
  370. agno/tools/file_generation.py +350 -0
  371. agno/tools/financial_datasets.py +288 -0
  372. agno/tools/firecrawl.py +143 -0
  373. agno/tools/function.py +1187 -0
  374. agno/tools/giphy.py +93 -0
  375. agno/tools/github.py +1760 -0
  376. agno/tools/gmail.py +922 -0
  377. agno/tools/google_bigquery.py +117 -0
  378. agno/tools/google_drive.py +270 -0
  379. agno/tools/google_maps.py +253 -0
  380. agno/tools/googlecalendar.py +674 -0
  381. agno/tools/googlesearch.py +98 -0
  382. agno/tools/googlesheets.py +377 -0
  383. agno/tools/hackernews.py +77 -0
  384. agno/tools/jina.py +101 -0
  385. agno/tools/jira.py +170 -0
  386. agno/tools/knowledge.py +218 -0
  387. agno/tools/linear.py +426 -0
  388. agno/tools/linkup.py +58 -0
  389. agno/tools/local_file_system.py +90 -0
  390. agno/tools/lumalab.py +183 -0
  391. agno/tools/mcp/__init__.py +10 -0
  392. agno/tools/mcp/mcp.py +331 -0
  393. agno/tools/mcp/multi_mcp.py +347 -0
  394. agno/tools/mcp/params.py +24 -0
  395. agno/tools/mcp_toolbox.py +284 -0
  396. agno/tools/mem0.py +193 -0
  397. agno/tools/memori.py +339 -0
  398. agno/tools/memory.py +419 -0
  399. agno/tools/mlx_transcribe.py +139 -0
  400. agno/tools/models/__init__.py +0 -0
  401. agno/tools/models/azure_openai.py +190 -0
  402. agno/tools/models/gemini.py +203 -0
  403. agno/tools/models/groq.py +158 -0
  404. agno/tools/models/morph.py +186 -0
  405. agno/tools/models/nebius.py +124 -0
  406. agno/tools/models_labs.py +195 -0
  407. agno/tools/moviepy_video.py +349 -0
  408. agno/tools/neo4j.py +134 -0
  409. agno/tools/newspaper.py +46 -0
  410. agno/tools/newspaper4k.py +93 -0
  411. agno/tools/notion.py +204 -0
  412. agno/tools/openai.py +202 -0
  413. agno/tools/openbb.py +160 -0
  414. agno/tools/opencv.py +321 -0
  415. agno/tools/openweather.py +233 -0
  416. agno/tools/oxylabs.py +385 -0
  417. agno/tools/pandas.py +102 -0
  418. agno/tools/parallel.py +314 -0
  419. agno/tools/postgres.py +257 -0
  420. agno/tools/pubmed.py +188 -0
  421. agno/tools/python.py +205 -0
  422. agno/tools/reasoning.py +283 -0
  423. agno/tools/reddit.py +467 -0
  424. agno/tools/replicate.py +117 -0
  425. agno/tools/resend.py +62 -0
  426. agno/tools/scrapegraph.py +222 -0
  427. agno/tools/searxng.py +152 -0
  428. agno/tools/serpapi.py +116 -0
  429. agno/tools/serper.py +255 -0
  430. agno/tools/shell.py +53 -0
  431. agno/tools/slack.py +136 -0
  432. agno/tools/sleep.py +20 -0
  433. agno/tools/spider.py +116 -0
  434. agno/tools/sql.py +154 -0
  435. agno/tools/streamlit/__init__.py +0 -0
  436. agno/tools/streamlit/components.py +113 -0
  437. agno/tools/tavily.py +254 -0
  438. agno/tools/telegram.py +48 -0
  439. agno/tools/todoist.py +218 -0
  440. agno/tools/tool_registry.py +1 -0
  441. agno/tools/toolkit.py +146 -0
  442. agno/tools/trafilatura.py +388 -0
  443. agno/tools/trello.py +274 -0
  444. agno/tools/twilio.py +186 -0
  445. agno/tools/user_control_flow.py +78 -0
  446. agno/tools/valyu.py +228 -0
  447. agno/tools/visualization.py +467 -0
  448. agno/tools/webbrowser.py +28 -0
  449. agno/tools/webex.py +76 -0
  450. agno/tools/website.py +54 -0
  451. agno/tools/webtools.py +45 -0
  452. agno/tools/whatsapp.py +286 -0
  453. agno/tools/wikipedia.py +63 -0
  454. agno/tools/workflow.py +278 -0
  455. agno/tools/x.py +335 -0
  456. agno/tools/yfinance.py +257 -0
  457. agno/tools/youtube.py +184 -0
  458. agno/tools/zendesk.py +82 -0
  459. agno/tools/zep.py +454 -0
  460. agno/tools/zoom.py +382 -0
  461. agno/utils/__init__.py +0 -0
  462. agno/utils/agent.py +820 -0
  463. agno/utils/audio.py +49 -0
  464. agno/utils/certs.py +27 -0
  465. agno/utils/code_execution.py +11 -0
  466. agno/utils/common.py +132 -0
  467. agno/utils/dttm.py +13 -0
  468. agno/utils/enum.py +22 -0
  469. agno/utils/env.py +11 -0
  470. agno/utils/events.py +696 -0
  471. agno/utils/format_str.py +16 -0
  472. agno/utils/functions.py +166 -0
  473. agno/utils/gemini.py +426 -0
  474. agno/utils/hooks.py +57 -0
  475. agno/utils/http.py +74 -0
  476. agno/utils/json_schema.py +234 -0
  477. agno/utils/knowledge.py +36 -0
  478. agno/utils/location.py +19 -0
  479. agno/utils/log.py +255 -0
  480. agno/utils/mcp.py +214 -0
  481. agno/utils/media.py +352 -0
  482. agno/utils/merge_dict.py +41 -0
  483. agno/utils/message.py +118 -0
  484. agno/utils/models/__init__.py +0 -0
  485. agno/utils/models/ai_foundry.py +43 -0
  486. agno/utils/models/claude.py +358 -0
  487. agno/utils/models/cohere.py +87 -0
  488. agno/utils/models/llama.py +78 -0
  489. agno/utils/models/mistral.py +98 -0
  490. agno/utils/models/openai_responses.py +140 -0
  491. agno/utils/models/schema_utils.py +153 -0
  492. agno/utils/models/watsonx.py +41 -0
  493. agno/utils/openai.py +257 -0
  494. agno/utils/pickle.py +32 -0
  495. agno/utils/pprint.py +178 -0
  496. agno/utils/print_response/__init__.py +0 -0
  497. agno/utils/print_response/agent.py +842 -0
  498. agno/utils/print_response/team.py +1724 -0
  499. agno/utils/print_response/workflow.py +1668 -0
  500. agno/utils/prompts.py +111 -0
  501. agno/utils/reasoning.py +108 -0
  502. agno/utils/response.py +163 -0
  503. agno/utils/response_iterator.py +17 -0
  504. agno/utils/safe_formatter.py +24 -0
  505. agno/utils/serialize.py +32 -0
  506. agno/utils/shell.py +22 -0
  507. agno/utils/streamlit.py +487 -0
  508. agno/utils/string.py +231 -0
  509. agno/utils/team.py +139 -0
  510. agno/utils/timer.py +41 -0
  511. agno/utils/tools.py +102 -0
  512. agno/utils/web.py +23 -0
  513. agno/utils/whatsapp.py +305 -0
  514. agno/utils/yaml_io.py +25 -0
  515. agno/vectordb/__init__.py +3 -0
  516. agno/vectordb/base.py +127 -0
  517. agno/vectordb/cassandra/__init__.py +5 -0
  518. agno/vectordb/cassandra/cassandra.py +501 -0
  519. agno/vectordb/cassandra/extra_param_mixin.py +11 -0
  520. agno/vectordb/cassandra/index.py +13 -0
  521. agno/vectordb/chroma/__init__.py +5 -0
  522. agno/vectordb/chroma/chromadb.py +929 -0
  523. agno/vectordb/clickhouse/__init__.py +9 -0
  524. agno/vectordb/clickhouse/clickhousedb.py +835 -0
  525. agno/vectordb/clickhouse/index.py +9 -0
  526. agno/vectordb/couchbase/__init__.py +3 -0
  527. agno/vectordb/couchbase/couchbase.py +1442 -0
  528. agno/vectordb/distance.py +7 -0
  529. agno/vectordb/lancedb/__init__.py +6 -0
  530. agno/vectordb/lancedb/lance_db.py +995 -0
  531. agno/vectordb/langchaindb/__init__.py +5 -0
  532. agno/vectordb/langchaindb/langchaindb.py +163 -0
  533. agno/vectordb/lightrag/__init__.py +5 -0
  534. agno/vectordb/lightrag/lightrag.py +388 -0
  535. agno/vectordb/llamaindex/__init__.py +3 -0
  536. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  537. agno/vectordb/milvus/__init__.py +4 -0
  538. agno/vectordb/milvus/milvus.py +1182 -0
  539. agno/vectordb/mongodb/__init__.py +9 -0
  540. agno/vectordb/mongodb/mongodb.py +1417 -0
  541. agno/vectordb/pgvector/__init__.py +12 -0
  542. agno/vectordb/pgvector/index.py +23 -0
  543. agno/vectordb/pgvector/pgvector.py +1462 -0
  544. agno/vectordb/pineconedb/__init__.py +5 -0
  545. agno/vectordb/pineconedb/pineconedb.py +747 -0
  546. agno/vectordb/qdrant/__init__.py +5 -0
  547. agno/vectordb/qdrant/qdrant.py +1134 -0
  548. agno/vectordb/redis/__init__.py +9 -0
  549. agno/vectordb/redis/redisdb.py +694 -0
  550. agno/vectordb/search.py +7 -0
  551. agno/vectordb/singlestore/__init__.py +10 -0
  552. agno/vectordb/singlestore/index.py +41 -0
  553. agno/vectordb/singlestore/singlestore.py +763 -0
  554. agno/vectordb/surrealdb/__init__.py +3 -0
  555. agno/vectordb/surrealdb/surrealdb.py +699 -0
  556. agno/vectordb/upstashdb/__init__.py +5 -0
  557. agno/vectordb/upstashdb/upstashdb.py +718 -0
  558. agno/vectordb/weaviate/__init__.py +8 -0
  559. agno/vectordb/weaviate/index.py +15 -0
  560. agno/vectordb/weaviate/weaviate.py +1005 -0
  561. agno/workflow/__init__.py +23 -0
  562. agno/workflow/agent.py +299 -0
  563. agno/workflow/condition.py +738 -0
  564. agno/workflow/loop.py +735 -0
  565. agno/workflow/parallel.py +824 -0
  566. agno/workflow/router.py +702 -0
  567. agno/workflow/step.py +1432 -0
  568. agno/workflow/steps.py +592 -0
  569. agno/workflow/types.py +520 -0
  570. agno/workflow/workflow.py +4321 -0
  571. agno-2.2.13.dist-info/METADATA +614 -0
  572. agno-2.2.13.dist-info/RECORD +575 -0
  573. agno-2.2.13.dist-info/WHEEL +5 -0
  574. agno-2.2.13.dist-info/licenses/LICENSE +201 -0
  575. agno-2.2.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1182 @@
1
+ import json
2
+ from hashlib import md5
3
+ from typing import Any, Dict, List, Optional, Union
4
+
5
+ try:
6
+ import asyncio
7
+
8
+ from pymilvus import AsyncMilvusClient, MilvusClient # type: ignore
9
+ except ImportError:
10
+ raise ImportError("The `pymilvus` package is not installed. Please install it via `pip install pymilvus`.")
11
+
12
+ from agno.filters import FilterExpr
13
+ from agno.knowledge.document import Document
14
+ from agno.knowledge.embedder import Embedder
15
+ from agno.knowledge.reranker.base import Reranker
16
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
17
+ from agno.vectordb.base import VectorDb
18
+ from agno.vectordb.distance import Distance
19
+ from agno.vectordb.search import SearchType
20
+
21
+ MILVUS_DISTANCE_MAP = {
22
+ Distance.cosine: "COSINE",
23
+ Distance.l2: "L2",
24
+ Distance.max_inner_product: "IP",
25
+ }
26
+
27
+
28
+ class Milvus(VectorDb):
29
+ def __init__(
30
+ self,
31
+ collection: str,
32
+ name: Optional[str] = None,
33
+ description: Optional[str] = None,
34
+ id: Optional[str] = None,
35
+ embedder: Optional[Embedder] = None,
36
+ distance: Distance = Distance.cosine,
37
+ uri: str = "http://localhost:19530",
38
+ token: Optional[str] = None,
39
+ search_type: SearchType = SearchType.vector,
40
+ reranker: Optional[Reranker] = None,
41
+ sparse_vector_dimensions: int = 10000,
42
+ **kwargs,
43
+ ):
44
+ """
45
+ Milvus vector database.
46
+
47
+ Args:
48
+ collection (str): Name of the Milvus collection.
49
+ name (Optional[str]): Name of the vector database.
50
+ description (Optional[str]): Description of the vector database.
51
+ embedder (Embedder): Embedder to use for embedding documents.
52
+ distance (Distance): Distance metric to use for vector similarity.
53
+ uri (Optional[str]): URI of the Milvus server.
54
+ - If you only need a local vector database for small scale data or prototyping,
55
+ setting the uri as a local file, e.g.`./milvus.db`, is the most convenient method,
56
+ as it automatically utilizes [Milvus Lite](https://milvus.io/docs/milvus_lite.md)
57
+ to store all data in this file.
58
+ - If you have large scale of data, say more than a million vectors, you can set up
59
+ a more performant Milvus server on [Docker or Kubernetes](https://milvus.io/docs/quickstart.md).
60
+ In this setup, please use the server address and port as your uri, e.g.`http://localhost:19530`.
61
+ If you enable the authentication feature on Milvus,
62
+ use "<your_username>:<your_password>" as the token, otherwise don't set the token.
63
+ - If you use [Zilliz Cloud](https://zilliz.com/cloud), the fully managed cloud
64
+ service for Milvus, adjust the `uri` and `token`, which correspond to the
65
+ [Public Endpoint and API key](https://docs.zilliz.com/docs/on-zilliz-cloud-console#cluster-details)
66
+ in Zilliz Cloud.
67
+ token (Optional[str]): Token for authentication with the Milvus server.
68
+ search_type (SearchType): Type of search to perform (vector, keyword, or hybrid)
69
+ reranker (Optional[Reranker]): Reranker to use for hybrid search results
70
+ **kwargs: Additional keyword arguments to pass to the MilvusClient.
71
+ """
72
+ # Validate required parameters
73
+ if not collection:
74
+ raise ValueError("Collection name must be provided.")
75
+
76
+ # Dynamic ID generation based on unique identifiers
77
+ if id is None:
78
+ from agno.utils.string import generate_id
79
+
80
+ seed = f"{uri or 'milvus'}#{collection}"
81
+ id = generate_id(seed)
82
+
83
+ # Initialize base class with name, description, and generated ID
84
+ super().__init__(id=id, name=name, description=description)
85
+
86
+ self.collection: str = collection
87
+
88
+ if embedder is None:
89
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
90
+
91
+ embedder = OpenAIEmbedder()
92
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
93
+ self.embedder: Embedder = embedder
94
+ self.dimensions: Optional[int] = self.embedder.dimensions
95
+
96
+ self.distance: Distance = distance
97
+ self.uri: str = uri
98
+ self.token: Optional[str] = token
99
+ self._client: Optional[MilvusClient] = None
100
+ self._async_client: Optional[AsyncMilvusClient] = None
101
+ self.search_type: SearchType = search_type
102
+ self.reranker: Optional[Reranker] = reranker
103
+ self.sparse_vector_dimensions = sparse_vector_dimensions
104
+ self.kwargs = kwargs
105
+
106
+ @property
107
+ def client(self) -> MilvusClient:
108
+ if self._client is None:
109
+ log_debug("Creating Milvus Client")
110
+ self._client = MilvusClient(
111
+ uri=self.uri,
112
+ token=self.token,
113
+ **self.kwargs,
114
+ )
115
+ return self._client
116
+
117
+ @property
118
+ def async_client(self) -> AsyncMilvusClient:
119
+ if not hasattr(self, "_async_client") or self._async_client is None:
120
+ log_debug("Creating Async Milvus Client")
121
+ self._async_client = AsyncMilvusClient(
122
+ uri=self.uri,
123
+ token=self.token,
124
+ **self.kwargs,
125
+ )
126
+ return self._async_client
127
+
128
+ def _get_sparse_vector(self, text: str) -> Dict[int, float]:
129
+ """
130
+ Convert text into a sparse vector representation using a simple TF-IDF-like scoring.
131
+
132
+ This method creates a sparse vector by:
133
+ 1. Converting text to lowercase and splitting into words
134
+ 2. Computing word frequencies
135
+ 3. Creating a hash-based word ID (modulo 10000)
136
+ 4. Computing a TF-IDF-like score for each word
137
+
138
+ Args:
139
+ text: Input text to convert to sparse vector
140
+
141
+ Returns:
142
+ Dictionary mapping word IDs (int) to their TF-IDF-like scores (float)
143
+ """
144
+ from collections import Counter
145
+
146
+ import numpy as np
147
+
148
+ # Simple word-based sparse vector creation
149
+ words = text.lower().split()
150
+ word_counts = Counter(words)
151
+
152
+ # Create sparse vector (word_id: tf-idf_score)
153
+ sparse_vector = {}
154
+ for word, count in word_counts.items():
155
+ word_id = hash(word) % self.sparse_vector_dimensions
156
+ # Simple tf-idf-like score
157
+ score = count * np.log(1 + len(words))
158
+ sparse_vector[word_id] = float(score)
159
+
160
+ return sparse_vector
161
+
162
+ def _create_hybrid_schema(self) -> Any:
163
+ """Create a schema for hybrid collection with all necessary fields."""
164
+ from pymilvus import DataType
165
+
166
+ schema = MilvusClient.create_schema(
167
+ auto_id=False,
168
+ enable_dynamic_field=True,
169
+ )
170
+
171
+ # Define field configurations
172
+ fields = [
173
+ ("id", DataType.VARCHAR, 128, True), # (name, type, max_length, is_primary)
174
+ ("name", DataType.VARCHAR, 1000, False),
175
+ ("content", DataType.VARCHAR, 65535, False),
176
+ ("content_id", DataType.VARCHAR, 1000, False),
177
+ ("content_hash", DataType.VARCHAR, 1000, False),
178
+ ("text", DataType.VARCHAR, 1000, False),
179
+ ("meta_data", DataType.VARCHAR, 65535, False),
180
+ ("usage", DataType.VARCHAR, 65535, False),
181
+ ]
182
+
183
+ # Add VARCHAR fields
184
+ for field_name, datatype, max_length, is_primary in fields:
185
+ schema.add_field(field_name=field_name, datatype=datatype, max_length=max_length, is_primary=is_primary)
186
+
187
+ # Add vector fields
188
+ schema.add_field(field_name="dense_vector", datatype=DataType.FLOAT_VECTOR, dim=self.dimensions)
189
+ schema.add_field(field_name="sparse_vector", datatype=DataType.SPARSE_FLOAT_VECTOR)
190
+
191
+ return schema
192
+
193
+ def _prepare_hybrid_index_params(self) -> Any:
194
+ """Prepare index parameters for both dense and sparse vectors."""
195
+ index_params = self.client.prepare_index_params()
196
+
197
+ # Add indexes for both vector types
198
+ index_params.add_index(
199
+ field_name="dense_vector",
200
+ index_name="dense_index",
201
+ index_type="IVF_FLAT",
202
+ metric_type=self._get_metric_type(),
203
+ params={"nlist": 1024},
204
+ )
205
+
206
+ index_params.add_index(
207
+ field_name="sparse_vector",
208
+ index_name="sparse_index",
209
+ index_type="SPARSE_INVERTED_INDEX",
210
+ metric_type="IP",
211
+ params={"drop_ratio_build": 0.2},
212
+ )
213
+
214
+ return index_params
215
+
216
+ def _prepare_document_data(
217
+ self, content_hash: str, document: Document, include_vectors: bool = True
218
+ ) -> Dict[str, Union[str, List[float], Dict[int, float], None]]:
219
+ """
220
+ Prepare document data for insertion.
221
+
222
+ Args:
223
+ document: Document to prepare data for
224
+ include_vectors: Whether to include vector data
225
+
226
+ Returns:
227
+ Dictionary with document data where values can be strings, vectors (List[float]),
228
+ sparse vectors (Dict[int, float]), or None
229
+ """
230
+
231
+ cleaned_content = document.content.replace("\x00", "\ufffd")
232
+ doc_id = md5(cleaned_content.encode()).hexdigest()
233
+
234
+ # Convert dictionary fields to JSON strings
235
+ meta_data_str = json.dumps(document.meta_data) if document.meta_data else "{}"
236
+ usage_str = json.dumps(document.usage) if document.usage else "{}"
237
+
238
+ data: Dict[str, Union[str, List[float], Dict[int, float], None]] = {
239
+ "id": doc_id,
240
+ "text": cleaned_content,
241
+ "name": document.name,
242
+ "content_id": document.content_id,
243
+ "meta_data": meta_data_str,
244
+ "content": cleaned_content,
245
+ "usage": usage_str,
246
+ "content_hash": content_hash,
247
+ }
248
+
249
+ if include_vectors:
250
+ if self.search_type == SearchType.hybrid:
251
+ data.update(
252
+ {
253
+ "dense_vector": document.embedding, # List[float] or None # Dict[int, float]
254
+ "sparse_vector": self._get_sparse_vector(cleaned_content),
255
+ }
256
+ )
257
+ else:
258
+ vector_data: Optional[List[float]] = document.embedding
259
+ data["vector"] = vector_data
260
+
261
+ return data
262
+
263
+ def _create_hybrid_collection(self) -> None:
264
+ """Create a collection specifically for hybrid search."""
265
+ log_debug(f"Creating hybrid collection: {self.collection}")
266
+
267
+ schema = self._create_hybrid_schema()
268
+ index_params = self._prepare_hybrid_index_params()
269
+
270
+ self.client.create_collection(collection_name=self.collection, schema=schema, index_params=index_params)
271
+
272
+ async def _async_create_hybrid_collection(self) -> None:
273
+ """Create a hybrid collection asynchronously."""
274
+ log_debug(f"Creating hybrid collection asynchronously: {self.collection}")
275
+
276
+ schema = self._create_hybrid_schema()
277
+ index_params = self._prepare_hybrid_index_params()
278
+
279
+ await self.async_client.create_collection(
280
+ collection_name=self.collection, schema=schema, index_params=index_params
281
+ )
282
+
283
+ def create(self) -> None:
284
+ """Create a collection based on search type if it doesn't exist."""
285
+ if self.exists():
286
+ return
287
+
288
+ if self.search_type == SearchType.hybrid:
289
+ self._create_hybrid_collection()
290
+ return
291
+
292
+ _distance = self._get_metric_type()
293
+ log_debug(f"Creating collection: {self.collection}")
294
+ self.client.create_collection(
295
+ collection_name=self.collection,
296
+ dimension=self.dimensions,
297
+ metric_type=_distance,
298
+ id_type="string",
299
+ max_length=65_535,
300
+ )
301
+
302
+ async def async_create(self) -> None:
303
+ """Create collection asynchronously based on search type."""
304
+ # Use the synchronous client to check if collection exists
305
+ if not self.client.has_collection(self.collection):
306
+ if self.search_type == SearchType.hybrid:
307
+ await self._async_create_hybrid_collection()
308
+ else:
309
+ # Original async create logic for regular vector search
310
+ _distance = self._get_metric_type()
311
+ log_debug(f"Creating collection asynchronously: {self.collection}")
312
+ await self.async_client.create_collection(
313
+ collection_name=self.collection,
314
+ dimension=self.dimensions,
315
+ metric_type=_distance,
316
+ id_type="string",
317
+ max_length=65_535,
318
+ )
319
+
320
+ def doc_exists(self, document: Document) -> bool:
321
+ """
322
+ Validating if the document exists or not
323
+
324
+ Args:
325
+ document (Document): Document to validate
326
+ """
327
+ if self.client:
328
+ cleaned_content = document.content.replace("\x00", "\ufffd")
329
+ doc_id = md5(cleaned_content.encode()).hexdigest()
330
+ collection_points = self.client.get(
331
+ collection_name=self.collection,
332
+ ids=[doc_id],
333
+ )
334
+ return len(collection_points) > 0
335
+ return False
336
+
337
+ async def async_doc_exists(self, document: Document) -> bool:
338
+ """
339
+ Check if document exists asynchronously.
340
+ AsyncMilvusClient supports get().
341
+ """
342
+ cleaned_content = document.content.replace("\x00", "\ufffd")
343
+ doc_id = md5(cleaned_content.encode()).hexdigest()
344
+ collection_points = await self.async_client.get(
345
+ collection_name=self.collection,
346
+ ids=[doc_id],
347
+ )
348
+ return len(collection_points) > 0
349
+
350
+ def name_exists(self, name: str) -> bool:
351
+ """
352
+ Validates if a document with the given name exists in the collection.
353
+
354
+ Args:
355
+ name (str): The name of the document to check.
356
+
357
+ Returns:
358
+ bool: True if a document with the given name exists, False otherwise.
359
+ """
360
+ if self.client:
361
+ expr = f"name == '{name}'"
362
+ scroll_result = self.client.query(
363
+ collection_name=self.collection,
364
+ filter=expr,
365
+ limit=1,
366
+ )
367
+ return len(scroll_result) > 0 and len(scroll_result[0]) > 0
368
+ return False
369
+
370
+ def id_exists(self, id: str) -> bool:
371
+ if self.client:
372
+ collection_points = self.client.get(
373
+ collection_name=self.collection,
374
+ ids=[id],
375
+ )
376
+ return len(collection_points) > 0
377
+ return False
378
+
379
+ def content_hash_exists(self, content_hash: str) -> bool:
380
+ """
381
+ Check if a document with the given content hash exists.
382
+
383
+ Args:
384
+ content_hash (str): The content hash to check.
385
+
386
+ Returns:
387
+ bool: True if a document with the given content hash exists, False otherwise.
388
+ """
389
+ if self.client:
390
+ expr = f'content_hash == "{content_hash}"'
391
+ scroll_result = self.client.query(
392
+ collection_name=self.collection,
393
+ filter=expr,
394
+ limit=1,
395
+ )
396
+ return len(scroll_result) > 0 and len(scroll_result[0]) > 0
397
+ return False
398
+
399
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
400
+ """
401
+ Delete documents by content hash.
402
+
403
+ Args:
404
+ content_hash (str): The content hash to delete.
405
+
406
+ Returns:
407
+ bool: True if documents were deleted, False otherwise.
408
+ """
409
+ if self.client:
410
+ expr = f'content_hash == "{content_hash}"'
411
+ self.client.delete(collection_name=self.collection, filter=expr)
412
+ log_info(f"Deleted documents with content_hash '{content_hash}' from collection '{self.collection}'.")
413
+ return True
414
+ return False
415
+
416
+ def _insert_hybrid_document(self, content_hash: str, document: Document) -> None:
417
+ """Insert a document with both dense and sparse vectors."""
418
+ data = self._prepare_document_data(content_hash=content_hash, document=document, include_vectors=True)
419
+ document.embed(embedder=self.embedder)
420
+ self.client.insert(
421
+ collection_name=self.collection,
422
+ data=data,
423
+ )
424
+ log_debug(f"Inserted hybrid document: {document.name} ({document.meta_data})")
425
+
426
+ async def _async_insert_hybrid_document(self, content_hash: str, document: Document) -> None:
427
+ """Insert a document with both dense and sparse vectors asynchronously."""
428
+ data = self._prepare_document_data(content_hash=content_hash, document=document, include_vectors=True)
429
+
430
+ await self.async_client.insert(
431
+ collection_name=self.collection,
432
+ data=data,
433
+ )
434
+ log_debug(f"Inserted hybrid document asynchronously: {document.name} ({document.meta_data})")
435
+
436
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
437
+ """Insert documents based on search type."""
438
+ log_debug(f"Inserting {len(documents)} documents")
439
+
440
+ if self.search_type == SearchType.hybrid:
441
+ for document in documents:
442
+ self._insert_hybrid_document(content_hash=content_hash, document=document)
443
+ else:
444
+ for document in documents:
445
+ document.embed(embedder=self.embedder)
446
+ if not document.embedding:
447
+ log_debug(f"Skipping document without embedding: {document.name} ({document.meta_data})")
448
+ continue
449
+ cleaned_content = document.content.replace("\x00", "\ufffd")
450
+ doc_id = md5(cleaned_content.encode()).hexdigest()
451
+
452
+ meta_data = document.meta_data or {}
453
+ if filters:
454
+ meta_data.update(filters)
455
+
456
+ data = {
457
+ "id": doc_id,
458
+ "vector": document.embedding,
459
+ "name": document.name,
460
+ "content_id": document.content_id,
461
+ "meta_data": meta_data,
462
+ "content": cleaned_content,
463
+ "usage": document.usage,
464
+ "content_hash": content_hash,
465
+ }
466
+ self.client.insert(
467
+ collection_name=self.collection,
468
+ data=data,
469
+ )
470
+ log_debug(f"Inserted document: {document.name} ({meta_data})")
471
+
472
+ log_info(f"Inserted {len(documents)} documents")
473
+
474
+ async def async_insert(
475
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
476
+ ) -> None:
477
+ """Insert documents asynchronously based on search type."""
478
+ log_info(f"Inserting {len(documents)} documents asynchronously")
479
+
480
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
481
+ # Use batch embedding when enabled and supported
482
+ try:
483
+ # Extract content from all documents
484
+ doc_contents = [doc.content for doc in documents]
485
+
486
+ # Get batch embeddings and usage
487
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
488
+
489
+ # Process documents with pre-computed embeddings
490
+ for j, doc in enumerate(documents):
491
+ try:
492
+ if j < len(embeddings):
493
+ doc.embedding = embeddings[j]
494
+ doc.usage = usages[j] if j < len(usages) else None
495
+ except Exception as e:
496
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
497
+
498
+ except Exception as e:
499
+ # Check if this is a rate limit error - don't fall back as it would make things worse
500
+ error_str = str(e).lower()
501
+ is_rate_limit = any(
502
+ phrase in error_str
503
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
504
+ )
505
+
506
+ if is_rate_limit:
507
+ log_error(f"Rate limit detected during batch embedding. {e}")
508
+ raise e
509
+ else:
510
+ log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
511
+ # Fall back to individual embedding
512
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
513
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
514
+ else:
515
+ # Use individual embedding
516
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
517
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
518
+
519
+ if self.search_type == SearchType.hybrid:
520
+ await asyncio.gather(
521
+ *[self._async_insert_hybrid_document(content_hash=content_hash, document=doc) for doc in documents]
522
+ )
523
+ else:
524
+
525
+ async def process_document(document):
526
+ document.embed(embedder=self.embedder)
527
+ if not document.embedding:
528
+ log_debug(f"Skipping document without embedding: {document.name} ({document.meta_data})")
529
+ return None
530
+ cleaned_content = document.content.replace("\x00", "\ufffd")
531
+ doc_id = md5(cleaned_content.encode()).hexdigest()
532
+
533
+ meta_data = document.meta_data or {}
534
+ if filters:
535
+ meta_data.update(filters)
536
+
537
+ data = {
538
+ "id": doc_id,
539
+ "vector": document.embedding,
540
+ "name": document.name,
541
+ "content_id": document.content_id,
542
+ "meta_data": meta_data,
543
+ "content": cleaned_content,
544
+ "usage": document.usage,
545
+ "content_hash": content_hash,
546
+ }
547
+ await self.async_client.insert(
548
+ collection_name=self.collection,
549
+ data=data,
550
+ )
551
+ log_debug(f"Inserted document asynchronously: {document.name} ({document.meta_data})")
552
+ return data
553
+
554
+ await asyncio.gather(*[process_document(doc) for doc in documents])
555
+
556
+ log_info(f"Inserted {len(documents)} documents asynchronously")
557
+
558
+ def upsert_available(self) -> bool:
559
+ """
560
+ Check if upsert operation is available.
561
+
562
+ Returns:
563
+ bool: Always returns True.
564
+ """
565
+ return True
566
+
567
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
568
+ """
569
+ Upsert documents into the database.
570
+
571
+ Args:
572
+ documents (List[Document]): List of documents to upsert
573
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
574
+ """
575
+ log_debug(f"Upserting {len(documents)} documents")
576
+ for document in documents:
577
+ document.embed(embedder=self.embedder)
578
+ cleaned_content = document.content.replace("\x00", "\ufffd")
579
+ doc_id = md5(cleaned_content.encode()).hexdigest()
580
+
581
+ meta_data = document.meta_data or {}
582
+ if filters:
583
+ meta_data.update(filters)
584
+
585
+ data = {
586
+ "id": doc_id,
587
+ "vector": document.embedding,
588
+ "name": document.name,
589
+ "content_id": document.content_id,
590
+ "meta_data": document.meta_data,
591
+ "content": cleaned_content,
592
+ "usage": document.usage,
593
+ "content_hash": content_hash,
594
+ }
595
+ self.client.upsert(
596
+ collection_name=self.collection,
597
+ data=data,
598
+ )
599
+ log_debug(f"Upserted document: {document.name} ({document.meta_data})")
600
+
601
+ async def async_upsert(
602
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
603
+ ) -> None:
604
+ log_debug(f"Upserting {len(documents)} documents asynchronously")
605
+
606
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
607
+ # Use batch embedding when enabled and supported
608
+ try:
609
+ # Extract content from all documents
610
+ doc_contents = [doc.content for doc in documents]
611
+
612
+ # Get batch embeddings and usage
613
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
614
+
615
+ # Process documents with pre-computed embeddings
616
+ for j, doc in enumerate(documents):
617
+ try:
618
+ if j < len(embeddings):
619
+ doc.embedding = embeddings[j]
620
+ doc.usage = usages[j] if j < len(usages) else None
621
+ except Exception as e:
622
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
623
+
624
+ except Exception as e:
625
+ # Check if this is a rate limit error - don't fall back as it would make things worse
626
+ error_str = str(e).lower()
627
+ is_rate_limit = any(
628
+ phrase in error_str
629
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
630
+ )
631
+
632
+ if is_rate_limit:
633
+ log_error(f"Rate limit detected during batch embedding. {e}")
634
+ raise e
635
+ else:
636
+ log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
637
+ # Fall back to individual embedding
638
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
639
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
640
+ else:
641
+ # Use individual embedding
642
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
643
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
644
+
645
+ async def process_document(document):
646
+ cleaned_content = document.content.replace("\x00", "\ufffd")
647
+ doc_id = md5(cleaned_content.encode()).hexdigest()
648
+ data = {
649
+ "id": doc_id,
650
+ "vector": document.embedding,
651
+ "name": document.name,
652
+ "content_id": document.content_id,
653
+ "meta_data": document.meta_data,
654
+ "content": cleaned_content,
655
+ "usage": document.usage,
656
+ "content_hash": content_hash,
657
+ }
658
+ await self.async_client.upsert(
659
+ collection_name=self.collection,
660
+ data=data,
661
+ )
662
+ log_debug(f"Upserted document asynchronously: {document.name} ({document.meta_data})")
663
+ return data
664
+
665
+ # Process all documents in parallel
666
+ await asyncio.gather(*[process_document(doc) for doc in documents])
667
+
668
+ log_debug(f"Upserted {len(documents)} documents asynchronously in parallel")
669
+
670
+ def _get_metric_type(self) -> str:
671
+ """
672
+ Get the Milvus metric type string for the current distance setting.
673
+
674
+ Returns:
675
+ Milvus metric type string, defaults to "COSINE" if distance not found
676
+ """
677
+ return MILVUS_DISTANCE_MAP.get(self.distance, "COSINE")
678
+
679
+ def search(
680
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
681
+ ) -> List[Document]:
682
+ """
683
+ Search for documents matching the query.
684
+
685
+ Args:
686
+ query (str): Query string to search for
687
+ limit (int): Maximum number of results to return
688
+ filters (Optional[Dict[str, Any]]): Filters to apply to the search
689
+
690
+ Returns:
691
+ List[Document]: List of matching documents
692
+ """
693
+ if isinstance(filters, List):
694
+ log_warning("Filters Expressions are not supported in Milvus. No filters will be applied.")
695
+ filters = None
696
+ if self.search_type == SearchType.hybrid:
697
+ return self.hybrid_search(query, limit)
698
+
699
+ query_embedding = self.embedder.get_embedding(query)
700
+ if query_embedding is None:
701
+ log_error(f"Error getting embedding for Query: {query}")
702
+ return []
703
+
704
+ results = self.client.search(
705
+ collection_name=self.collection,
706
+ data=[query_embedding],
707
+ filter=self._build_expr(filters),
708
+ output_fields=["*"],
709
+ limit=limit,
710
+ )
711
+
712
+ # Build search results
713
+ search_results: List[Document] = []
714
+ for result in results[0]:
715
+ search_results.append(
716
+ Document(
717
+ id=result["id"],
718
+ name=result["entity"].get("name", None),
719
+ meta_data=result["entity"].get("meta_data", {}),
720
+ content=result["entity"].get("content", ""),
721
+ content_id=result["entity"].get("content_id", None),
722
+ embedder=self.embedder,
723
+ embedding=result["entity"].get("vector", None),
724
+ usage=result["entity"].get("usage", None),
725
+ )
726
+ )
727
+
728
+ # Apply reranker if available
729
+ if self.reranker and search_results:
730
+ search_results = self.reranker.rerank(query=query, documents=search_results)
731
+ search_results = search_results[:limit]
732
+
733
+ log_info(f"Found {len(search_results)} documents")
734
+ return search_results
735
+
736
+ async def async_search(
737
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
738
+ ) -> List[Document]:
739
+ if isinstance(filters, List):
740
+ log_warning("Filters Expressions are not supported in Milvus. No filters will be applied.")
741
+ filters = None
742
+ if self.search_type == SearchType.hybrid:
743
+ return await self.async_hybrid_search(query, limit, filters)
744
+
745
+ query_embedding = self.embedder.get_embedding(query)
746
+ if query_embedding is None:
747
+ log_error(f"Error getting embedding for Query: {query}")
748
+ return []
749
+
750
+ results = await self.async_client.search(
751
+ collection_name=self.collection,
752
+ data=[query_embedding],
753
+ filter=self._build_expr(filters),
754
+ output_fields=["*"],
755
+ limit=limit,
756
+ )
757
+
758
+ # Build search results
759
+ search_results: List[Document] = []
760
+ for result in results[0]:
761
+ search_results.append(
762
+ Document(
763
+ id=result["id"],
764
+ name=result["entity"].get("name", None),
765
+ meta_data=result["entity"].get("meta_data", {}),
766
+ content=result["entity"].get("content", ""),
767
+ content_id=result["entity"].get("content_id", None),
768
+ embedder=self.embedder,
769
+ embedding=result["entity"].get("vector", None),
770
+ usage=result["entity"].get("usage", None),
771
+ )
772
+ )
773
+
774
+ log_info(f"Found {len(search_results)} documents")
775
+ return search_results
776
+
777
+ def hybrid_search(
778
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
779
+ ) -> List[Document]:
780
+ """
781
+ Perform a hybrid search combining dense and sparse vector similarity.
782
+
783
+ Args:
784
+ query (str): Query string to search for
785
+ limit (int): Maximum number of results to return
786
+ filters (Optional[Dict[str, Any]]): Filters to apply to the search
787
+
788
+ Returns:
789
+ List[Document]: List of matching documents
790
+ """
791
+ from pymilvus import AnnSearchRequest, RRFRanker
792
+
793
+ # Get query embeddings
794
+ dense_vector = self.embedder.get_embedding(query)
795
+ sparse_vector = self._get_sparse_vector(query)
796
+
797
+ if dense_vector is None:
798
+ log_error(f"Error getting dense embedding for Query: {query}")
799
+ return []
800
+
801
+ if self._client is None:
802
+ log_error("Milvus client not initialized")
803
+ return []
804
+
805
+ try:
806
+ # Refer to docs for details- https://milvus.io/docs/multi-vector-search.md
807
+
808
+ # Create search request for dense vectors
809
+ dense_search_param = {
810
+ "data": [dense_vector],
811
+ "anns_field": "dense_vector",
812
+ "param": {"metric_type": self._get_metric_type(), "params": {"nprobe": 10}},
813
+ "limit": limit
814
+ * 2, # Fetch more candidates for better reranking quality - each vector search returns 2x results which are then merged and reranked
815
+ }
816
+
817
+ # Create search request for sparse vectors
818
+ sparse_search_param = {
819
+ "data": [sparse_vector],
820
+ "anns_field": "sparse_vector",
821
+ "param": {"metric_type": "IP", "params": {"drop_ratio_build": 0.2}},
822
+ "limit": limit * 2, # Match dense search limit to ensure balanced candidate pool for reranking
823
+ }
824
+
825
+ # Create search requests
826
+ dense_request = AnnSearchRequest(**dense_search_param)
827
+ sparse_request = AnnSearchRequest(**sparse_search_param)
828
+ reqs = [dense_request, sparse_request]
829
+
830
+ # Use RRFRanker for balanced importance between vectors
831
+ ranker = RRFRanker(60) # Default k=60
832
+
833
+ log_info("Performing hybrid search")
834
+ results = self._client.hybrid_search(
835
+ collection_name=self.collection, reqs=reqs, ranker=ranker, limit=limit, output_fields=["*"]
836
+ )
837
+
838
+ # Build search results
839
+ search_results: List[Document] = []
840
+ for hits in results:
841
+ for hit in hits:
842
+ entity = hit.get("entity", {})
843
+ meta_data = json.loads(entity.get("meta_data", "{}")) if entity.get("meta_data") else {}
844
+ usage = json.loads(entity.get("usage", "{}")) if entity.get("usage") else None
845
+
846
+ search_results.append(
847
+ Document(
848
+ id=hit.get("id"),
849
+ name=entity.get("name", None),
850
+ meta_data=meta_data, # Now a dictionary
851
+ content=entity.get("content", ""),
852
+ content_id=entity.get("content_id", None),
853
+ embedder=self.embedder,
854
+ embedding=entity.get("dense_vector", None),
855
+ usage=usage, # Now a dictionary or None
856
+ )
857
+ )
858
+
859
+ # Apply additional reranking if custom reranker is provided
860
+ if self.reranker and search_results:
861
+ search_results = self.reranker.rerank(query=query, documents=search_results)
862
+
863
+ log_info(f"Found {len(search_results)} documents")
864
+ return search_results
865
+
866
+ except Exception as e:
867
+ log_error(f"Error during hybrid search: {e}")
868
+ return []
869
+
870
+ async def async_hybrid_search(
871
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
872
+ ) -> List[Document]:
873
+ """
874
+ Perform an asynchronous hybrid search combining dense and sparse vector similarity.
875
+
876
+ Args:
877
+ query (str): Query string to search for
878
+ limit (int): Maximum number of results to return
879
+ filters (Optional[Dict[str, Any]]): Filters to apply to the search
880
+
881
+ Returns:
882
+ List[Document]: List of matching documents
883
+ """
884
+ from pymilvus import AnnSearchRequest, RRFRanker
885
+
886
+ # Get query embeddings
887
+ dense_vector = self.embedder.get_embedding(query)
888
+ sparse_vector = self._get_sparse_vector(query)
889
+
890
+ if dense_vector is None:
891
+ log_error(f"Error getting dense embedding for Query: {query}")
892
+ return []
893
+
894
+ try:
895
+ # Refer to docs for details- https://milvus.io/docs/multi-vector-search.md
896
+
897
+ # Create search request for dense vectors
898
+ dense_search_param = {
899
+ "data": [dense_vector],
900
+ "anns_field": "dense_vector",
901
+ "param": {"metric_type": self._get_metric_type(), "params": {"nprobe": 10}},
902
+ "limit": limit
903
+ * 2, # Fetch more candidates for better reranking quality - each vector search returns 2x results which are then merged and reranked
904
+ }
905
+
906
+ # Create search request for sparse vectors
907
+ sparse_search_param = {
908
+ "data": [sparse_vector],
909
+ "anns_field": "sparse_vector",
910
+ "param": {"metric_type": "IP", "params": {"drop_ratio_build": 0.2}},
911
+ "limit": limit * 2, # Match dense search limit to ensure balanced candidate pool for reranking
912
+ }
913
+
914
+ # Create search requests
915
+ dense_request = AnnSearchRequest(**dense_search_param)
916
+ sparse_request = AnnSearchRequest(**sparse_search_param)
917
+ reqs = [dense_request, sparse_request]
918
+
919
+ # Use RRFRanker for balanced importance between vectors
920
+ ranker = RRFRanker(60) # Default k=60
921
+
922
+ log_info("Performing async hybrid search")
923
+ results = await self.async_client.hybrid_search(
924
+ collection_name=self.collection, reqs=reqs, ranker=ranker, limit=limit, output_fields=["*"]
925
+ )
926
+
927
+ # Build search results
928
+ search_results: List[Document] = []
929
+ for hits in results:
930
+ for hit in hits:
931
+ entity = hit.get("entity", {})
932
+ meta_data = json.loads(entity.get("meta_data", "{}")) if entity.get("meta_data") else {}
933
+ usage = json.loads(entity.get("usage", "{}")) if entity.get("usage") else None
934
+
935
+ search_results.append(
936
+ Document(
937
+ id=hit.get("id"),
938
+ name=entity.get("name", None),
939
+ meta_data=meta_data, # Now a dictionary
940
+ content=entity.get("content", ""),
941
+ embedder=self.embedder,
942
+ embedding=entity.get("dense_vector", None),
943
+ usage=usage, # Now a dictionary or None
944
+ )
945
+ )
946
+
947
+ # Apply additional reranking if custom reranker is provided
948
+ if self.reranker and search_results:
949
+ search_results = self.reranker.rerank(query=query, documents=search_results)
950
+
951
+ log_info(f"Found {len(search_results)} documents")
952
+ return search_results
953
+
954
+ except Exception as e:
955
+ log_error(f"Error during async hybrid search: {e}")
956
+ return []
957
+
958
+ def drop(self) -> None:
959
+ if self.exists():
960
+ log_debug(f"Deleting collection: {self.collection}")
961
+ self.client.drop_collection(self.collection)
962
+
963
+ async def async_drop(self) -> None:
964
+ """
965
+ Drop collection asynchronously.
966
+ AsyncMilvusClient supports drop_collection().
967
+ """
968
+ # Check using synchronous client
969
+ if self.client.has_collection(self.collection):
970
+ log_debug(f"Deleting collection asynchronously: {self.collection}")
971
+ await self.async_client.drop_collection(self.collection)
972
+
973
+ def exists(self) -> bool:
974
+ if self.client:
975
+ if self.client.has_collection(self.collection):
976
+ return True
977
+ return False
978
+
979
+ async def async_exists(self) -> bool:
980
+ """
981
+ Check if collection exists asynchronously.
982
+
983
+ has_collection() is not supported by AsyncMilvusClient,
984
+ so we use the synchronous client.
985
+ """
986
+ return self.client.has_collection(self.collection)
987
+
988
+ def get_count(self) -> int:
989
+ return self.client.get_collection_stats(collection_name="test_collection")["row_count"]
990
+
991
+ def delete(self) -> bool:
992
+ if self.client:
993
+ self.client.drop_collection(self.collection)
994
+ return True
995
+ return False
996
+
997
+ def delete_by_id(self, id: str) -> bool:
998
+ """
999
+ Delete a document by its ID.
1000
+
1001
+ Args:
1002
+ id (str): The document ID to delete
1003
+
1004
+ Returns:
1005
+ bool: True if document was deleted, False otherwise
1006
+ """
1007
+ try:
1008
+ log_debug(f"Milvus VectorDB : Deleting document with ID {id}")
1009
+ if not self.id_exists(id):
1010
+ return False
1011
+
1012
+ # Delete by ID using Milvus delete operation
1013
+ self.client.delete(collection_name=self.collection, ids=[id])
1014
+ log_info(f"Deleted document with ID '{id}' from collection '{self.collection}'.")
1015
+ return True
1016
+ except Exception as e:
1017
+ log_info(f"Error deleting document with ID {id}: {e}")
1018
+ return False
1019
+
1020
+ def delete_by_name(self, name: str) -> bool:
1021
+ """
1022
+ Delete documents by name.
1023
+
1024
+ Args:
1025
+ name (str): The document name to delete
1026
+
1027
+ Returns:
1028
+ bool: True if documents were deleted, False otherwise
1029
+ """
1030
+ try:
1031
+ log_debug(f"Milvus VectorDB : Deleting documents with name {name}")
1032
+ if not self.name_exists(name):
1033
+ return False
1034
+
1035
+ # Delete by name using Milvus delete operation with filter
1036
+ expr = f'name == "{name}"'
1037
+ self.client.delete(collection_name=self.collection, filter=expr)
1038
+ log_info(f"Deleted documents with name '{name}' from collection '{self.collection}'.")
1039
+ return True
1040
+ except Exception as e:
1041
+ log_info(f"Error deleting documents with name {name}: {e}")
1042
+ return False
1043
+
1044
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
1045
+ """
1046
+ Delete documents by metadata.
1047
+
1048
+ Args:
1049
+ metadata (Dict[str, Any]): The metadata to match for deletion
1050
+
1051
+ Returns:
1052
+ bool: True if documents were deleted, False otherwise
1053
+ """
1054
+ try:
1055
+ log_debug(f"Milvus VectorDB : Deleting documents with metadata {metadata}")
1056
+
1057
+ # Build filter expression for metadata matching
1058
+ expr = self._build_expr(metadata)
1059
+ if not expr:
1060
+ return False
1061
+
1062
+ # Delete by metadata using Milvus delete operation with filter
1063
+ self.client.delete(collection_name=self.collection, filter=expr)
1064
+ log_info(f"Deleted documents with metadata '{metadata}' from collection '{self.collection}'.")
1065
+ return True
1066
+ except Exception as e:
1067
+ log_info(f"Error deleting documents with metadata {metadata}: {e}")
1068
+ return False
1069
+
1070
+ def delete_by_content_id(self, content_id: str) -> bool:
1071
+ """
1072
+ Delete documents by content ID.
1073
+
1074
+ Args:
1075
+ content_id (str): The content ID to delete
1076
+
1077
+ Returns:
1078
+ bool: True if documents were deleted, False otherwise
1079
+ """
1080
+ try:
1081
+ log_debug(f"Milvus VectorDB : Deleting documents with content_id {content_id}")
1082
+
1083
+ # Delete by content_id using Milvus delete operation with filter
1084
+ expr = f'content_id == "{content_id}"'
1085
+ self.client.delete(collection_name=self.collection, filter=expr)
1086
+ log_info(f"Deleted documents with content_id '{content_id}' from collection '{self.collection}'.")
1087
+ return True
1088
+ except Exception as e:
1089
+ log_info(f"Error deleting documents with content_id {content_id}: {e}")
1090
+ return False
1091
+
1092
+ def _build_expr(self, filters: Optional[Dict[str, Any]]) -> Optional[str]:
1093
+ """Build Milvus expression from filters."""
1094
+ if not filters:
1095
+ return None
1096
+
1097
+ expressions = []
1098
+ for k, v in filters.items():
1099
+ if isinstance(v, (list, tuple)):
1100
+ # For array values, use json_contains_any
1101
+ values_str = json.dumps(v)
1102
+ expr = f'json_contains_any(meta_data, {values_str}, "{k}")'
1103
+ elif isinstance(v, str):
1104
+ # For string values
1105
+ expr = f'meta_data["{k}"] == "{v}"'
1106
+ elif isinstance(v, bool):
1107
+ # For boolean values
1108
+ expr = f'meta_data["{k}"] == {str(v).lower()}'
1109
+ elif isinstance(v, (int, float)):
1110
+ # For numeric values
1111
+ expr = f'meta_data["{k}"] == {v}'
1112
+ elif v is None:
1113
+ # For null values
1114
+ expr = f'meta_data["{k}"] is null'
1115
+ else:
1116
+ # For other types, convert to string
1117
+ expr = f'meta_data["{k}"] == "{str(v)}"'
1118
+
1119
+ expressions.append(expr)
1120
+
1121
+ if expressions:
1122
+ return " and ".join(expressions)
1123
+ return None
1124
+
1125
+ def async_name_exists(self, name: str) -> bool:
1126
+ raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
1127
+
1128
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
1129
+ """
1130
+ Update the metadata for documents with the given content_id.
1131
+
1132
+ Args:
1133
+ content_id (str): The content ID to update
1134
+ metadata (Dict[str, Any]): The metadata to update
1135
+ """
1136
+ try:
1137
+ # Search for documents with the given content_id
1138
+ search_expr = f'content_id == "{content_id}"'
1139
+ results = self.client.query(
1140
+ collection_name=self.collection, filter=search_expr, output_fields=["id", "meta_data", "filters"]
1141
+ )
1142
+
1143
+ if not results:
1144
+ log_debug(f"No documents found with content_id: {content_id}")
1145
+ return
1146
+
1147
+ # Update each document
1148
+ updated_count = 0
1149
+ for result in results:
1150
+ doc_id = result["id"]
1151
+ current_metadata = result.get("meta_data", {})
1152
+ current_filters = result.get("filters", {})
1153
+
1154
+ # Merge existing metadata with new metadata
1155
+ if isinstance(current_metadata, dict):
1156
+ updated_metadata = current_metadata.copy()
1157
+ updated_metadata.update(metadata)
1158
+ else:
1159
+ updated_metadata = metadata
1160
+
1161
+ if isinstance(current_filters, dict):
1162
+ updated_filters = current_filters.copy()
1163
+ updated_filters.update(metadata)
1164
+ else:
1165
+ updated_filters = metadata
1166
+
1167
+ # Update the document
1168
+ self.client.upsert(
1169
+ collection_name=self.collection,
1170
+ data=[{"id": doc_id, "meta_data": updated_metadata, "filters": updated_filters}],
1171
+ )
1172
+ updated_count += 1
1173
+
1174
+ log_debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
1175
+
1176
+ except Exception as e:
1177
+ log_error(f"Error updating metadata for content_id '{content_id}': {e}")
1178
+ raise
1179
+
1180
+ def get_supported_search_types(self) -> List[str]:
1181
+ """Get the supported search types for this vector database."""
1182
+ return [SearchType.vector, SearchType.hybrid]