agno 2.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (575) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +51 -0
  3. agno/agent/agent.py +10405 -0
  4. agno/api/__init__.py +0 -0
  5. agno/api/agent.py +28 -0
  6. agno/api/api.py +40 -0
  7. agno/api/evals.py +22 -0
  8. agno/api/os.py +17 -0
  9. agno/api/routes.py +13 -0
  10. agno/api/schemas/__init__.py +9 -0
  11. agno/api/schemas/agent.py +16 -0
  12. agno/api/schemas/evals.py +16 -0
  13. agno/api/schemas/os.py +14 -0
  14. agno/api/schemas/response.py +6 -0
  15. agno/api/schemas/team.py +16 -0
  16. agno/api/schemas/utils.py +21 -0
  17. agno/api/schemas/workflows.py +16 -0
  18. agno/api/settings.py +53 -0
  19. agno/api/team.py +30 -0
  20. agno/api/workflow.py +28 -0
  21. agno/cloud/aws/base.py +214 -0
  22. agno/cloud/aws/s3/__init__.py +2 -0
  23. agno/cloud/aws/s3/api_client.py +43 -0
  24. agno/cloud/aws/s3/bucket.py +195 -0
  25. agno/cloud/aws/s3/object.py +57 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +598 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2042 -0
  33. agno/db/dynamo/schemas.py +314 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +1795 -0
  37. agno/db/firestore/schemas.py +140 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1335 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1160 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1328 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/__init__.py +0 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/mongo/__init__.py +17 -0
  51. agno/db/mongo/async_mongo.py +2026 -0
  52. agno/db/mongo/mongo.py +1982 -0
  53. agno/db/mongo/schemas.py +87 -0
  54. agno/db/mongo/utils.py +259 -0
  55. agno/db/mysql/__init__.py +3 -0
  56. agno/db/mysql/mysql.py +2308 -0
  57. agno/db/mysql/schemas.py +138 -0
  58. agno/db/mysql/utils.py +355 -0
  59. agno/db/postgres/__init__.py +4 -0
  60. agno/db/postgres/async_postgres.py +1927 -0
  61. agno/db/postgres/postgres.py +2260 -0
  62. agno/db/postgres/schemas.py +139 -0
  63. agno/db/postgres/utils.py +442 -0
  64. agno/db/redis/__init__.py +3 -0
  65. agno/db/redis/redis.py +1660 -0
  66. agno/db/redis/schemas.py +123 -0
  67. agno/db/redis/utils.py +346 -0
  68. agno/db/schemas/__init__.py +4 -0
  69. agno/db/schemas/culture.py +120 -0
  70. agno/db/schemas/evals.py +33 -0
  71. agno/db/schemas/knowledge.py +40 -0
  72. agno/db/schemas/memory.py +46 -0
  73. agno/db/schemas/metrics.py +0 -0
  74. agno/db/singlestore/__init__.py +3 -0
  75. agno/db/singlestore/schemas.py +130 -0
  76. agno/db/singlestore/singlestore.py +2272 -0
  77. agno/db/singlestore/utils.py +384 -0
  78. agno/db/sqlite/__init__.py +4 -0
  79. agno/db/sqlite/async_sqlite.py +2293 -0
  80. agno/db/sqlite/schemas.py +133 -0
  81. agno/db/sqlite/sqlite.py +2288 -0
  82. agno/db/sqlite/utils.py +431 -0
  83. agno/db/surrealdb/__init__.py +3 -0
  84. agno/db/surrealdb/metrics.py +292 -0
  85. agno/db/surrealdb/models.py +309 -0
  86. agno/db/surrealdb/queries.py +71 -0
  87. agno/db/surrealdb/surrealdb.py +1353 -0
  88. agno/db/surrealdb/utils.py +147 -0
  89. agno/db/utils.py +116 -0
  90. agno/debug.py +18 -0
  91. agno/eval/__init__.py +14 -0
  92. agno/eval/accuracy.py +834 -0
  93. agno/eval/performance.py +773 -0
  94. agno/eval/reliability.py +306 -0
  95. agno/eval/utils.py +119 -0
  96. agno/exceptions.py +161 -0
  97. agno/filters.py +354 -0
  98. agno/guardrails/__init__.py +6 -0
  99. agno/guardrails/base.py +19 -0
  100. agno/guardrails/openai.py +144 -0
  101. agno/guardrails/pii.py +94 -0
  102. agno/guardrails/prompt_injection.py +52 -0
  103. agno/integrations/__init__.py +0 -0
  104. agno/integrations/discord/__init__.py +3 -0
  105. agno/integrations/discord/client.py +203 -0
  106. agno/knowledge/__init__.py +5 -0
  107. agno/knowledge/chunking/__init__.py +0 -0
  108. agno/knowledge/chunking/agentic.py +79 -0
  109. agno/knowledge/chunking/document.py +91 -0
  110. agno/knowledge/chunking/fixed.py +57 -0
  111. agno/knowledge/chunking/markdown.py +151 -0
  112. agno/knowledge/chunking/recursive.py +63 -0
  113. agno/knowledge/chunking/row.py +39 -0
  114. agno/knowledge/chunking/semantic.py +86 -0
  115. agno/knowledge/chunking/strategy.py +165 -0
  116. agno/knowledge/content.py +74 -0
  117. agno/knowledge/document/__init__.py +5 -0
  118. agno/knowledge/document/base.py +58 -0
  119. agno/knowledge/embedder/__init__.py +5 -0
  120. agno/knowledge/embedder/aws_bedrock.py +343 -0
  121. agno/knowledge/embedder/azure_openai.py +210 -0
  122. agno/knowledge/embedder/base.py +23 -0
  123. agno/knowledge/embedder/cohere.py +323 -0
  124. agno/knowledge/embedder/fastembed.py +62 -0
  125. agno/knowledge/embedder/fireworks.py +13 -0
  126. agno/knowledge/embedder/google.py +258 -0
  127. agno/knowledge/embedder/huggingface.py +94 -0
  128. agno/knowledge/embedder/jina.py +182 -0
  129. agno/knowledge/embedder/langdb.py +22 -0
  130. agno/knowledge/embedder/mistral.py +206 -0
  131. agno/knowledge/embedder/nebius.py +13 -0
  132. agno/knowledge/embedder/ollama.py +154 -0
  133. agno/knowledge/embedder/openai.py +195 -0
  134. agno/knowledge/embedder/sentence_transformer.py +63 -0
  135. agno/knowledge/embedder/together.py +13 -0
  136. agno/knowledge/embedder/vllm.py +262 -0
  137. agno/knowledge/embedder/voyageai.py +165 -0
  138. agno/knowledge/knowledge.py +1988 -0
  139. agno/knowledge/reader/__init__.py +7 -0
  140. agno/knowledge/reader/arxiv_reader.py +81 -0
  141. agno/knowledge/reader/base.py +95 -0
  142. agno/knowledge/reader/csv_reader.py +166 -0
  143. agno/knowledge/reader/docx_reader.py +82 -0
  144. agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
  145. agno/knowledge/reader/firecrawl_reader.py +201 -0
  146. agno/knowledge/reader/json_reader.py +87 -0
  147. agno/knowledge/reader/markdown_reader.py +137 -0
  148. agno/knowledge/reader/pdf_reader.py +431 -0
  149. agno/knowledge/reader/pptx_reader.py +101 -0
  150. agno/knowledge/reader/reader_factory.py +313 -0
  151. agno/knowledge/reader/s3_reader.py +89 -0
  152. agno/knowledge/reader/tavily_reader.py +194 -0
  153. agno/knowledge/reader/text_reader.py +115 -0
  154. agno/knowledge/reader/web_search_reader.py +372 -0
  155. agno/knowledge/reader/website_reader.py +455 -0
  156. agno/knowledge/reader/wikipedia_reader.py +59 -0
  157. agno/knowledge/reader/youtube_reader.py +78 -0
  158. agno/knowledge/remote_content/__init__.py +0 -0
  159. agno/knowledge/remote_content/remote_content.py +88 -0
  160. agno/knowledge/reranker/__init__.py +3 -0
  161. agno/knowledge/reranker/base.py +14 -0
  162. agno/knowledge/reranker/cohere.py +64 -0
  163. agno/knowledge/reranker/infinity.py +195 -0
  164. agno/knowledge/reranker/sentence_transformer.py +54 -0
  165. agno/knowledge/types.py +39 -0
  166. agno/knowledge/utils.py +189 -0
  167. agno/media.py +462 -0
  168. agno/memory/__init__.py +3 -0
  169. agno/memory/manager.py +1327 -0
  170. agno/models/__init__.py +0 -0
  171. agno/models/aimlapi/__init__.py +5 -0
  172. agno/models/aimlapi/aimlapi.py +45 -0
  173. agno/models/anthropic/__init__.py +5 -0
  174. agno/models/anthropic/claude.py +757 -0
  175. agno/models/aws/__init__.py +15 -0
  176. agno/models/aws/bedrock.py +701 -0
  177. agno/models/aws/claude.py +378 -0
  178. agno/models/azure/__init__.py +18 -0
  179. agno/models/azure/ai_foundry.py +485 -0
  180. agno/models/azure/openai_chat.py +131 -0
  181. agno/models/base.py +2175 -0
  182. agno/models/cerebras/__init__.py +12 -0
  183. agno/models/cerebras/cerebras.py +501 -0
  184. agno/models/cerebras/cerebras_openai.py +112 -0
  185. agno/models/cohere/__init__.py +5 -0
  186. agno/models/cohere/chat.py +389 -0
  187. agno/models/cometapi/__init__.py +5 -0
  188. agno/models/cometapi/cometapi.py +57 -0
  189. agno/models/dashscope/__init__.py +5 -0
  190. agno/models/dashscope/dashscope.py +91 -0
  191. agno/models/deepinfra/__init__.py +5 -0
  192. agno/models/deepinfra/deepinfra.py +28 -0
  193. agno/models/deepseek/__init__.py +5 -0
  194. agno/models/deepseek/deepseek.py +61 -0
  195. agno/models/defaults.py +1 -0
  196. agno/models/fireworks/__init__.py +5 -0
  197. agno/models/fireworks/fireworks.py +26 -0
  198. agno/models/google/__init__.py +5 -0
  199. agno/models/google/gemini.py +1085 -0
  200. agno/models/groq/__init__.py +5 -0
  201. agno/models/groq/groq.py +556 -0
  202. agno/models/huggingface/__init__.py +5 -0
  203. agno/models/huggingface/huggingface.py +491 -0
  204. agno/models/ibm/__init__.py +5 -0
  205. agno/models/ibm/watsonx.py +422 -0
  206. agno/models/internlm/__init__.py +3 -0
  207. agno/models/internlm/internlm.py +26 -0
  208. agno/models/langdb/__init__.py +1 -0
  209. agno/models/langdb/langdb.py +48 -0
  210. agno/models/litellm/__init__.py +14 -0
  211. agno/models/litellm/chat.py +468 -0
  212. agno/models/litellm/litellm_openai.py +25 -0
  213. agno/models/llama_cpp/__init__.py +5 -0
  214. agno/models/llama_cpp/llama_cpp.py +22 -0
  215. agno/models/lmstudio/__init__.py +5 -0
  216. agno/models/lmstudio/lmstudio.py +25 -0
  217. agno/models/message.py +434 -0
  218. agno/models/meta/__init__.py +12 -0
  219. agno/models/meta/llama.py +475 -0
  220. agno/models/meta/llama_openai.py +78 -0
  221. agno/models/metrics.py +120 -0
  222. agno/models/mistral/__init__.py +5 -0
  223. agno/models/mistral/mistral.py +432 -0
  224. agno/models/nebius/__init__.py +3 -0
  225. agno/models/nebius/nebius.py +54 -0
  226. agno/models/nexus/__init__.py +3 -0
  227. agno/models/nexus/nexus.py +22 -0
  228. agno/models/nvidia/__init__.py +5 -0
  229. agno/models/nvidia/nvidia.py +28 -0
  230. agno/models/ollama/__init__.py +5 -0
  231. agno/models/ollama/chat.py +441 -0
  232. agno/models/openai/__init__.py +9 -0
  233. agno/models/openai/chat.py +883 -0
  234. agno/models/openai/like.py +27 -0
  235. agno/models/openai/responses.py +1050 -0
  236. agno/models/openrouter/__init__.py +5 -0
  237. agno/models/openrouter/openrouter.py +66 -0
  238. agno/models/perplexity/__init__.py +5 -0
  239. agno/models/perplexity/perplexity.py +187 -0
  240. agno/models/portkey/__init__.py +3 -0
  241. agno/models/portkey/portkey.py +81 -0
  242. agno/models/requesty/__init__.py +5 -0
  243. agno/models/requesty/requesty.py +52 -0
  244. agno/models/response.py +199 -0
  245. agno/models/sambanova/__init__.py +5 -0
  246. agno/models/sambanova/sambanova.py +28 -0
  247. agno/models/siliconflow/__init__.py +5 -0
  248. agno/models/siliconflow/siliconflow.py +25 -0
  249. agno/models/together/__init__.py +5 -0
  250. agno/models/together/together.py +25 -0
  251. agno/models/utils.py +266 -0
  252. agno/models/vercel/__init__.py +3 -0
  253. agno/models/vercel/v0.py +26 -0
  254. agno/models/vertexai/__init__.py +0 -0
  255. agno/models/vertexai/claude.py +70 -0
  256. agno/models/vllm/__init__.py +3 -0
  257. agno/models/vllm/vllm.py +78 -0
  258. agno/models/xai/__init__.py +3 -0
  259. agno/models/xai/xai.py +113 -0
  260. agno/os/__init__.py +3 -0
  261. agno/os/app.py +876 -0
  262. agno/os/auth.py +57 -0
  263. agno/os/config.py +104 -0
  264. agno/os/interfaces/__init__.py +1 -0
  265. agno/os/interfaces/a2a/__init__.py +3 -0
  266. agno/os/interfaces/a2a/a2a.py +42 -0
  267. agno/os/interfaces/a2a/router.py +250 -0
  268. agno/os/interfaces/a2a/utils.py +924 -0
  269. agno/os/interfaces/agui/__init__.py +3 -0
  270. agno/os/interfaces/agui/agui.py +47 -0
  271. agno/os/interfaces/agui/router.py +144 -0
  272. agno/os/interfaces/agui/utils.py +534 -0
  273. agno/os/interfaces/base.py +25 -0
  274. agno/os/interfaces/slack/__init__.py +3 -0
  275. agno/os/interfaces/slack/router.py +148 -0
  276. agno/os/interfaces/slack/security.py +30 -0
  277. agno/os/interfaces/slack/slack.py +47 -0
  278. agno/os/interfaces/whatsapp/__init__.py +3 -0
  279. agno/os/interfaces/whatsapp/router.py +211 -0
  280. agno/os/interfaces/whatsapp/security.py +53 -0
  281. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  282. agno/os/mcp.py +292 -0
  283. agno/os/middleware/__init__.py +7 -0
  284. agno/os/middleware/jwt.py +233 -0
  285. agno/os/router.py +1763 -0
  286. agno/os/routers/__init__.py +3 -0
  287. agno/os/routers/evals/__init__.py +3 -0
  288. agno/os/routers/evals/evals.py +430 -0
  289. agno/os/routers/evals/schemas.py +142 -0
  290. agno/os/routers/evals/utils.py +162 -0
  291. agno/os/routers/health.py +31 -0
  292. agno/os/routers/home.py +52 -0
  293. agno/os/routers/knowledge/__init__.py +3 -0
  294. agno/os/routers/knowledge/knowledge.py +997 -0
  295. agno/os/routers/knowledge/schemas.py +178 -0
  296. agno/os/routers/memory/__init__.py +3 -0
  297. agno/os/routers/memory/memory.py +515 -0
  298. agno/os/routers/memory/schemas.py +62 -0
  299. agno/os/routers/metrics/__init__.py +3 -0
  300. agno/os/routers/metrics/metrics.py +190 -0
  301. agno/os/routers/metrics/schemas.py +47 -0
  302. agno/os/routers/session/__init__.py +3 -0
  303. agno/os/routers/session/session.py +997 -0
  304. agno/os/schema.py +1055 -0
  305. agno/os/settings.py +43 -0
  306. agno/os/utils.py +630 -0
  307. agno/py.typed +0 -0
  308. agno/reasoning/__init__.py +0 -0
  309. agno/reasoning/anthropic.py +80 -0
  310. agno/reasoning/azure_ai_foundry.py +67 -0
  311. agno/reasoning/deepseek.py +63 -0
  312. agno/reasoning/default.py +97 -0
  313. agno/reasoning/gemini.py +73 -0
  314. agno/reasoning/groq.py +71 -0
  315. agno/reasoning/helpers.py +63 -0
  316. agno/reasoning/ollama.py +67 -0
  317. agno/reasoning/openai.py +86 -0
  318. agno/reasoning/step.py +31 -0
  319. agno/reasoning/vertexai.py +76 -0
  320. agno/run/__init__.py +6 -0
  321. agno/run/agent.py +787 -0
  322. agno/run/base.py +229 -0
  323. agno/run/cancel.py +81 -0
  324. agno/run/messages.py +32 -0
  325. agno/run/team.py +753 -0
  326. agno/run/workflow.py +708 -0
  327. agno/session/__init__.py +10 -0
  328. agno/session/agent.py +295 -0
  329. agno/session/summary.py +265 -0
  330. agno/session/team.py +392 -0
  331. agno/session/workflow.py +205 -0
  332. agno/team/__init__.py +37 -0
  333. agno/team/team.py +8793 -0
  334. agno/tools/__init__.py +10 -0
  335. agno/tools/agentql.py +120 -0
  336. agno/tools/airflow.py +69 -0
  337. agno/tools/api.py +122 -0
  338. agno/tools/apify.py +314 -0
  339. agno/tools/arxiv.py +127 -0
  340. agno/tools/aws_lambda.py +53 -0
  341. agno/tools/aws_ses.py +66 -0
  342. agno/tools/baidusearch.py +89 -0
  343. agno/tools/bitbucket.py +292 -0
  344. agno/tools/brandfetch.py +213 -0
  345. agno/tools/bravesearch.py +106 -0
  346. agno/tools/brightdata.py +367 -0
  347. agno/tools/browserbase.py +209 -0
  348. agno/tools/calcom.py +255 -0
  349. agno/tools/calculator.py +151 -0
  350. agno/tools/cartesia.py +187 -0
  351. agno/tools/clickup.py +244 -0
  352. agno/tools/confluence.py +240 -0
  353. agno/tools/crawl4ai.py +158 -0
  354. agno/tools/csv_toolkit.py +185 -0
  355. agno/tools/dalle.py +110 -0
  356. agno/tools/daytona.py +475 -0
  357. agno/tools/decorator.py +262 -0
  358. agno/tools/desi_vocal.py +108 -0
  359. agno/tools/discord.py +161 -0
  360. agno/tools/docker.py +716 -0
  361. agno/tools/duckdb.py +379 -0
  362. agno/tools/duckduckgo.py +91 -0
  363. agno/tools/e2b.py +703 -0
  364. agno/tools/eleven_labs.py +196 -0
  365. agno/tools/email.py +67 -0
  366. agno/tools/evm.py +129 -0
  367. agno/tools/exa.py +396 -0
  368. agno/tools/fal.py +127 -0
  369. agno/tools/file.py +240 -0
  370. agno/tools/file_generation.py +350 -0
  371. agno/tools/financial_datasets.py +288 -0
  372. agno/tools/firecrawl.py +143 -0
  373. agno/tools/function.py +1187 -0
  374. agno/tools/giphy.py +93 -0
  375. agno/tools/github.py +1760 -0
  376. agno/tools/gmail.py +922 -0
  377. agno/tools/google_bigquery.py +117 -0
  378. agno/tools/google_drive.py +270 -0
  379. agno/tools/google_maps.py +253 -0
  380. agno/tools/googlecalendar.py +674 -0
  381. agno/tools/googlesearch.py +98 -0
  382. agno/tools/googlesheets.py +377 -0
  383. agno/tools/hackernews.py +77 -0
  384. agno/tools/jina.py +101 -0
  385. agno/tools/jira.py +170 -0
  386. agno/tools/knowledge.py +218 -0
  387. agno/tools/linear.py +426 -0
  388. agno/tools/linkup.py +58 -0
  389. agno/tools/local_file_system.py +90 -0
  390. agno/tools/lumalab.py +183 -0
  391. agno/tools/mcp/__init__.py +10 -0
  392. agno/tools/mcp/mcp.py +331 -0
  393. agno/tools/mcp/multi_mcp.py +347 -0
  394. agno/tools/mcp/params.py +24 -0
  395. agno/tools/mcp_toolbox.py +284 -0
  396. agno/tools/mem0.py +193 -0
  397. agno/tools/memori.py +339 -0
  398. agno/tools/memory.py +419 -0
  399. agno/tools/mlx_transcribe.py +139 -0
  400. agno/tools/models/__init__.py +0 -0
  401. agno/tools/models/azure_openai.py +190 -0
  402. agno/tools/models/gemini.py +203 -0
  403. agno/tools/models/groq.py +158 -0
  404. agno/tools/models/morph.py +186 -0
  405. agno/tools/models/nebius.py +124 -0
  406. agno/tools/models_labs.py +195 -0
  407. agno/tools/moviepy_video.py +349 -0
  408. agno/tools/neo4j.py +134 -0
  409. agno/tools/newspaper.py +46 -0
  410. agno/tools/newspaper4k.py +93 -0
  411. agno/tools/notion.py +204 -0
  412. agno/tools/openai.py +202 -0
  413. agno/tools/openbb.py +160 -0
  414. agno/tools/opencv.py +321 -0
  415. agno/tools/openweather.py +233 -0
  416. agno/tools/oxylabs.py +385 -0
  417. agno/tools/pandas.py +102 -0
  418. agno/tools/parallel.py +314 -0
  419. agno/tools/postgres.py +257 -0
  420. agno/tools/pubmed.py +188 -0
  421. agno/tools/python.py +205 -0
  422. agno/tools/reasoning.py +283 -0
  423. agno/tools/reddit.py +467 -0
  424. agno/tools/replicate.py +117 -0
  425. agno/tools/resend.py +62 -0
  426. agno/tools/scrapegraph.py +222 -0
  427. agno/tools/searxng.py +152 -0
  428. agno/tools/serpapi.py +116 -0
  429. agno/tools/serper.py +255 -0
  430. agno/tools/shell.py +53 -0
  431. agno/tools/slack.py +136 -0
  432. agno/tools/sleep.py +20 -0
  433. agno/tools/spider.py +116 -0
  434. agno/tools/sql.py +154 -0
  435. agno/tools/streamlit/__init__.py +0 -0
  436. agno/tools/streamlit/components.py +113 -0
  437. agno/tools/tavily.py +254 -0
  438. agno/tools/telegram.py +48 -0
  439. agno/tools/todoist.py +218 -0
  440. agno/tools/tool_registry.py +1 -0
  441. agno/tools/toolkit.py +146 -0
  442. agno/tools/trafilatura.py +388 -0
  443. agno/tools/trello.py +274 -0
  444. agno/tools/twilio.py +186 -0
  445. agno/tools/user_control_flow.py +78 -0
  446. agno/tools/valyu.py +228 -0
  447. agno/tools/visualization.py +467 -0
  448. agno/tools/webbrowser.py +28 -0
  449. agno/tools/webex.py +76 -0
  450. agno/tools/website.py +54 -0
  451. agno/tools/webtools.py +45 -0
  452. agno/tools/whatsapp.py +286 -0
  453. agno/tools/wikipedia.py +63 -0
  454. agno/tools/workflow.py +278 -0
  455. agno/tools/x.py +335 -0
  456. agno/tools/yfinance.py +257 -0
  457. agno/tools/youtube.py +184 -0
  458. agno/tools/zendesk.py +82 -0
  459. agno/tools/zep.py +454 -0
  460. agno/tools/zoom.py +382 -0
  461. agno/utils/__init__.py +0 -0
  462. agno/utils/agent.py +820 -0
  463. agno/utils/audio.py +49 -0
  464. agno/utils/certs.py +27 -0
  465. agno/utils/code_execution.py +11 -0
  466. agno/utils/common.py +132 -0
  467. agno/utils/dttm.py +13 -0
  468. agno/utils/enum.py +22 -0
  469. agno/utils/env.py +11 -0
  470. agno/utils/events.py +696 -0
  471. agno/utils/format_str.py +16 -0
  472. agno/utils/functions.py +166 -0
  473. agno/utils/gemini.py +426 -0
  474. agno/utils/hooks.py +57 -0
  475. agno/utils/http.py +74 -0
  476. agno/utils/json_schema.py +234 -0
  477. agno/utils/knowledge.py +36 -0
  478. agno/utils/location.py +19 -0
  479. agno/utils/log.py +255 -0
  480. agno/utils/mcp.py +214 -0
  481. agno/utils/media.py +352 -0
  482. agno/utils/merge_dict.py +41 -0
  483. agno/utils/message.py +118 -0
  484. agno/utils/models/__init__.py +0 -0
  485. agno/utils/models/ai_foundry.py +43 -0
  486. agno/utils/models/claude.py +358 -0
  487. agno/utils/models/cohere.py +87 -0
  488. agno/utils/models/llama.py +78 -0
  489. agno/utils/models/mistral.py +98 -0
  490. agno/utils/models/openai_responses.py +140 -0
  491. agno/utils/models/schema_utils.py +153 -0
  492. agno/utils/models/watsonx.py +41 -0
  493. agno/utils/openai.py +257 -0
  494. agno/utils/pickle.py +32 -0
  495. agno/utils/pprint.py +178 -0
  496. agno/utils/print_response/__init__.py +0 -0
  497. agno/utils/print_response/agent.py +842 -0
  498. agno/utils/print_response/team.py +1724 -0
  499. agno/utils/print_response/workflow.py +1668 -0
  500. agno/utils/prompts.py +111 -0
  501. agno/utils/reasoning.py +108 -0
  502. agno/utils/response.py +163 -0
  503. agno/utils/response_iterator.py +17 -0
  504. agno/utils/safe_formatter.py +24 -0
  505. agno/utils/serialize.py +32 -0
  506. agno/utils/shell.py +22 -0
  507. agno/utils/streamlit.py +487 -0
  508. agno/utils/string.py +231 -0
  509. agno/utils/team.py +139 -0
  510. agno/utils/timer.py +41 -0
  511. agno/utils/tools.py +102 -0
  512. agno/utils/web.py +23 -0
  513. agno/utils/whatsapp.py +305 -0
  514. agno/utils/yaml_io.py +25 -0
  515. agno/vectordb/__init__.py +3 -0
  516. agno/vectordb/base.py +127 -0
  517. agno/vectordb/cassandra/__init__.py +5 -0
  518. agno/vectordb/cassandra/cassandra.py +501 -0
  519. agno/vectordb/cassandra/extra_param_mixin.py +11 -0
  520. agno/vectordb/cassandra/index.py +13 -0
  521. agno/vectordb/chroma/__init__.py +5 -0
  522. agno/vectordb/chroma/chromadb.py +929 -0
  523. agno/vectordb/clickhouse/__init__.py +9 -0
  524. agno/vectordb/clickhouse/clickhousedb.py +835 -0
  525. agno/vectordb/clickhouse/index.py +9 -0
  526. agno/vectordb/couchbase/__init__.py +3 -0
  527. agno/vectordb/couchbase/couchbase.py +1442 -0
  528. agno/vectordb/distance.py +7 -0
  529. agno/vectordb/lancedb/__init__.py +6 -0
  530. agno/vectordb/lancedb/lance_db.py +995 -0
  531. agno/vectordb/langchaindb/__init__.py +5 -0
  532. agno/vectordb/langchaindb/langchaindb.py +163 -0
  533. agno/vectordb/lightrag/__init__.py +5 -0
  534. agno/vectordb/lightrag/lightrag.py +388 -0
  535. agno/vectordb/llamaindex/__init__.py +3 -0
  536. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  537. agno/vectordb/milvus/__init__.py +4 -0
  538. agno/vectordb/milvus/milvus.py +1182 -0
  539. agno/vectordb/mongodb/__init__.py +9 -0
  540. agno/vectordb/mongodb/mongodb.py +1417 -0
  541. agno/vectordb/pgvector/__init__.py +12 -0
  542. agno/vectordb/pgvector/index.py +23 -0
  543. agno/vectordb/pgvector/pgvector.py +1462 -0
  544. agno/vectordb/pineconedb/__init__.py +5 -0
  545. agno/vectordb/pineconedb/pineconedb.py +747 -0
  546. agno/vectordb/qdrant/__init__.py +5 -0
  547. agno/vectordb/qdrant/qdrant.py +1134 -0
  548. agno/vectordb/redis/__init__.py +9 -0
  549. agno/vectordb/redis/redisdb.py +694 -0
  550. agno/vectordb/search.py +7 -0
  551. agno/vectordb/singlestore/__init__.py +10 -0
  552. agno/vectordb/singlestore/index.py +41 -0
  553. agno/vectordb/singlestore/singlestore.py +763 -0
  554. agno/vectordb/surrealdb/__init__.py +3 -0
  555. agno/vectordb/surrealdb/surrealdb.py +699 -0
  556. agno/vectordb/upstashdb/__init__.py +5 -0
  557. agno/vectordb/upstashdb/upstashdb.py +718 -0
  558. agno/vectordb/weaviate/__init__.py +8 -0
  559. agno/vectordb/weaviate/index.py +15 -0
  560. agno/vectordb/weaviate/weaviate.py +1005 -0
  561. agno/workflow/__init__.py +23 -0
  562. agno/workflow/agent.py +299 -0
  563. agno/workflow/condition.py +738 -0
  564. agno/workflow/loop.py +735 -0
  565. agno/workflow/parallel.py +824 -0
  566. agno/workflow/router.py +702 -0
  567. agno/workflow/step.py +1432 -0
  568. agno/workflow/steps.py +592 -0
  569. agno/workflow/types.py +520 -0
  570. agno/workflow/workflow.py +4321 -0
  571. agno-2.2.13.dist-info/METADATA +614 -0
  572. agno-2.2.13.dist-info/RECORD +575 -0
  573. agno-2.2.13.dist-info/WHEEL +5 -0
  574. agno-2.2.13.dist-info/licenses/LICENSE +201 -0
  575. agno-2.2.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1134 @@
1
+ from hashlib import md5
2
+ from typing import Any, Dict, List, Optional, Union
3
+
4
+ try:
5
+ from qdrant_client import AsyncQdrantClient, QdrantClient # noqa: F401
6
+ from qdrant_client.http import models
7
+ except ImportError:
8
+ raise ImportError(
9
+ "The `qdrant-client` package is not installed. Please install it via `pip install qdrant-client`."
10
+ )
11
+
12
+ from agno.filters import FilterExpr
13
+ from agno.knowledge.document import Document
14
+ from agno.knowledge.embedder import Embedder
15
+ from agno.knowledge.reranker.base import Reranker
16
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
17
+ from agno.vectordb.base import VectorDb
18
+ from agno.vectordb.distance import Distance
19
+ from agno.vectordb.search import SearchType
20
+
21
+ DEFAULT_DENSE_VECTOR_NAME = "dense"
22
+ DEFAULT_SPARSE_VECTOR_NAME = "sparse"
23
+ DEFAULT_SPARSE_MODEL = "Qdrant/bm25"
24
+
25
+
26
+ class Qdrant(VectorDb):
27
+ """Vector DB implementation powered by Qdrant - https://qdrant.tech/"""
28
+
29
+ def __init__(
30
+ self,
31
+ collection: str,
32
+ name: Optional[str] = None,
33
+ description: Optional[str] = None,
34
+ id: Optional[str] = None,
35
+ embedder: Optional[Embedder] = None,
36
+ distance: Distance = Distance.cosine,
37
+ location: Optional[str] = None,
38
+ url: Optional[str] = None,
39
+ port: Optional[int] = 6333,
40
+ grpc_port: int = 6334,
41
+ prefer_grpc: bool = False,
42
+ https: Optional[bool] = None,
43
+ api_key: Optional[str] = None,
44
+ prefix: Optional[str] = None,
45
+ timeout: Optional[float] = None,
46
+ host: Optional[str] = None,
47
+ path: Optional[str] = None,
48
+ reranker: Optional[Reranker] = None,
49
+ search_type: SearchType = SearchType.vector,
50
+ dense_vector_name: str = DEFAULT_DENSE_VECTOR_NAME,
51
+ sparse_vector_name: str = DEFAULT_SPARSE_VECTOR_NAME,
52
+ hybrid_fusion_strategy: models.Fusion = models.Fusion.RRF,
53
+ fastembed_kwargs: Optional[dict] = None,
54
+ **kwargs,
55
+ ):
56
+ """
57
+ Args:
58
+ collection (str): Name of the Qdrant collection.
59
+ name (Optional[str]): Name of the vector database.
60
+ description (Optional[str]): Description of the vector database.
61
+ embedder (Optional[Embedder]): Optional embedder for automatic vector generation.
62
+ distance (Distance): Distance metric to use (default: cosine).
63
+ location (Optional[str]): `":memory:"` for in-memory, or str used as `url`. If `None`, use default host/port.
64
+ url (Optional[str]): Full URL (scheme, host, port, prefix). Overrides host/port if provided.
65
+ port (Optional[int]): REST API port (default: 6333).
66
+ grpc_port (int): gRPC interface port (default: 6334).
67
+ prefer_grpc (bool): Prefer gRPC over REST if True.
68
+ https (Optional[bool]): Use HTTPS if True.
69
+ api_key (Optional[str]): API key for Qdrant Cloud authentication.
70
+ prefix (Optional[str]): URL path prefix (e.g., "service/v1").
71
+ timeout (Optional[float]): Request timeout (REST: default 5s, gRPC: unlimited).
72
+ host (Optional[str]): Qdrant host (default: "localhost" if not specified).
73
+ path (Optional[str]): Path for local persistence (QdrantLocal).
74
+ reranker (Optional[Reranker]): Optional reranker for result refinement.
75
+ search_type (SearchType): Whether to use vector, keyword or hybrid search.
76
+ dense_vector_name (str): Dense vector name.
77
+ sparse_vector_name (str): Sparse vector name.
78
+ hybrid_fusion_strategy (models.Fusion): Strategy for hybrid fusion.
79
+ fastembed_kwargs (Optional[dict]): Keyword args for `fastembed.SparseTextEmbedding.__init__()`.
80
+ **kwargs: Keyword args for `qdrant_client.QdrantClient.__init__()`.
81
+ """
82
+ # Validate required parameters
83
+ if not collection:
84
+ raise ValueError("Collection name must be provided.")
85
+
86
+ # Dynamic ID generation based on unique identifiers
87
+ if id is None:
88
+ from agno.utils.string import generate_id
89
+
90
+ host_identifier = host or location or url or "localhost"
91
+ seed = f"{host_identifier}#{collection}"
92
+ id = generate_id(seed)
93
+
94
+ # Initialize base class with name, description, and generated ID
95
+ super().__init__(id=id, name=name, description=description)
96
+
97
+ # Collection attributes
98
+ self.collection: str = collection
99
+
100
+ # Embedder for embedding the document contents
101
+ if embedder is None:
102
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
103
+
104
+ embedder = OpenAIEmbedder()
105
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
106
+
107
+ self.embedder: Embedder = embedder
108
+ self.dimensions: Optional[int] = self.embedder.dimensions
109
+
110
+ # Distance metric
111
+ self.distance: Distance = distance
112
+
113
+ # Qdrant client instance
114
+ self._client: Optional[QdrantClient] = None
115
+
116
+ # Qdrant async client instance
117
+ self._async_client: Optional[AsyncQdrantClient] = None
118
+
119
+ # Qdrant client arguments
120
+ self.location: Optional[str] = location
121
+ self.url: Optional[str] = url
122
+ self.port: Optional[int] = port
123
+ self.grpc_port: int = grpc_port
124
+ self.prefer_grpc: bool = prefer_grpc
125
+ self.https: Optional[bool] = https
126
+ self.api_key: Optional[str] = api_key
127
+ self.prefix: Optional[str] = prefix
128
+ self.timeout: Optional[float] = timeout
129
+ self.host: Optional[str] = host
130
+ self.path: Optional[str] = path
131
+
132
+ # Reranker instance
133
+ self.reranker: Optional[Reranker] = reranker
134
+
135
+ # Qdrant client kwargs
136
+ self.kwargs = kwargs
137
+
138
+ self.search_type = search_type
139
+ self.dense_vector_name = dense_vector_name
140
+ self.sparse_vector_name = sparse_vector_name
141
+ self.hybrid_fusion_strategy = hybrid_fusion_strategy
142
+
143
+ # TODO(v2.0.0): Remove backward compatibility for unnamed vectors
144
+ # TODO(v2.0.0): Make named vectors mandatory and simplify the codebase
145
+ self.use_named_vectors = search_type in [SearchType.hybrid]
146
+
147
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]:
148
+ try:
149
+ from fastembed import SparseTextEmbedding # type: ignore
150
+
151
+ default_kwargs = {"model_name": DEFAULT_SPARSE_MODEL}
152
+ if fastembed_kwargs:
153
+ default_kwargs.update(fastembed_kwargs)
154
+
155
+ # Type ignore for mypy as SparseTextEmbedding constructor accepts flexible kwargs
156
+ self.sparse_encoder = SparseTextEmbedding(**default_kwargs) # type: ignore
157
+
158
+ except ImportError as e:
159
+ raise ImportError(
160
+ "To use keyword/hybrid search, install the `fastembed` extra with `pip install fastembed`."
161
+ ) from e
162
+
163
+ @property
164
+ def client(self) -> QdrantClient:
165
+ if self._client is None:
166
+ log_debug("Creating Qdrant Client")
167
+ self._client = QdrantClient(
168
+ location=self.location,
169
+ url=self.url,
170
+ port=self.port,
171
+ grpc_port=self.grpc_port,
172
+ prefer_grpc=self.prefer_grpc,
173
+ https=self.https,
174
+ api_key=self.api_key,
175
+ prefix=self.prefix,
176
+ timeout=int(self.timeout) if self.timeout is not None else None,
177
+ host=self.host,
178
+ path=self.path,
179
+ **self.kwargs,
180
+ )
181
+ return self._client
182
+
183
+ @property
184
+ def async_client(self) -> AsyncQdrantClient:
185
+ """Get or create the async Qdrant client."""
186
+ if self._async_client is None:
187
+ log_debug("Creating Async Qdrant Client")
188
+ self._async_client = AsyncQdrantClient(
189
+ location=self.location,
190
+ url=self.url,
191
+ port=self.port,
192
+ grpc_port=self.grpc_port,
193
+ prefer_grpc=self.prefer_grpc,
194
+ https=self.https,
195
+ api_key=self.api_key,
196
+ prefix=self.prefix,
197
+ timeout=int(self.timeout) if self.timeout is not None else None,
198
+ host=self.host,
199
+ path=self.path,
200
+ **self.kwargs,
201
+ )
202
+ return self._async_client
203
+
204
+ def create(self) -> None:
205
+ _distance = models.Distance.COSINE
206
+ if self.distance == Distance.l2:
207
+ _distance = models.Distance.EUCLID
208
+ elif self.distance == Distance.max_inner_product:
209
+ _distance = models.Distance.DOT
210
+
211
+ if not self.exists():
212
+ log_debug(f"Creating collection: {self.collection}")
213
+
214
+ # Configure vectors based on search type
215
+ if self.search_type == SearchType.vector:
216
+ # Maintain backward compatibility with unnamed vectors
217
+ vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
218
+ else:
219
+ # Use named vectors for hybrid search
220
+ vectors_config = {
221
+ self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
222
+ } # type: ignore
223
+
224
+ self.client.create_collection(
225
+ collection_name=self.collection,
226
+ vectors_config=vectors_config,
227
+ sparse_vectors_config={self.sparse_vector_name: models.SparseVectorParams()}
228
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]
229
+ else None,
230
+ )
231
+
232
+ async def async_create(self) -> None:
233
+ """Create the collection asynchronously."""
234
+ # Collection distance
235
+ _distance = models.Distance.COSINE
236
+ if self.distance == Distance.l2:
237
+ _distance = models.Distance.EUCLID
238
+ elif self.distance == Distance.max_inner_product:
239
+ _distance = models.Distance.DOT
240
+
241
+ if not await self.async_exists():
242
+ log_debug(f"Creating collection asynchronously: {self.collection}")
243
+
244
+ # Configure vectors based on search type
245
+ if self.search_type == SearchType.vector:
246
+ # Maintain backward compatibility with unnamed vectors
247
+ vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
248
+ else:
249
+ # Use named vectors for hybrid search
250
+ vectors_config = {
251
+ self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
252
+ } # type: ignore
253
+
254
+ await self.async_client.create_collection(
255
+ collection_name=self.collection,
256
+ vectors_config=vectors_config,
257
+ sparse_vectors_config={self.sparse_vector_name: models.SparseVectorParams()}
258
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]
259
+ else None,
260
+ )
261
+
262
+ def doc_exists(self, document: Document) -> bool:
263
+ """
264
+ Validating if the document exists or not
265
+
266
+ Args:
267
+ document (Document): Document to validate
268
+ """
269
+ if self.client:
270
+ cleaned_content = document.content.replace("\x00", "\ufffd")
271
+ doc_id = md5(cleaned_content.encode()).hexdigest()
272
+ collection_points = self.client.retrieve(
273
+ collection_name=self.collection,
274
+ ids=[doc_id],
275
+ )
276
+ return len(collection_points) > 0
277
+ return False
278
+
279
+ async def async_doc_exists(self, document: Document) -> bool:
280
+ """Check if a document exists asynchronously."""
281
+ cleaned_content = document.content.replace("\x00", "\ufffd")
282
+ doc_id = md5(cleaned_content.encode()).hexdigest()
283
+ collection_points = await self.async_client.retrieve(
284
+ collection_name=self.collection,
285
+ ids=[doc_id],
286
+ )
287
+ return len(collection_points) > 0
288
+
289
+ def name_exists(self, name: str) -> bool:
290
+ """
291
+ Validates if a document with the given name exists in the collection.
292
+
293
+ Args:
294
+ name (str): The name of the document to check.
295
+
296
+ Returns:
297
+ bool: True if a document with the given name exists, False otherwise.
298
+ """
299
+ if self.client:
300
+ scroll_result = self.client.scroll(
301
+ collection_name=self.collection,
302
+ scroll_filter=models.Filter(
303
+ must=[models.FieldCondition(key="name", match=models.MatchValue(value=name))]
304
+ ),
305
+ limit=1,
306
+ )
307
+ return len(scroll_result[0]) > 0
308
+ return False
309
+
310
+ async def async_name_exists(self, name: str) -> bool: # type: ignore[override]
311
+ """
312
+ Asynchronously validates if a document with the given name exists in the collection.
313
+
314
+ Args:
315
+ name (str): The name of the document to check.
316
+
317
+ Returns:
318
+ bool: True if a document with the given name exists, False otherwise.
319
+ """
320
+ if self.async_client:
321
+ scroll_result = await self.async_client.scroll(
322
+ collection_name=self.collection,
323
+ scroll_filter=models.Filter(
324
+ must=[models.FieldCondition(key="name", match=models.MatchValue(value=name))]
325
+ ),
326
+ limit=1,
327
+ )
328
+ return len(scroll_result[0]) > 0
329
+ return False
330
+
331
+ def insert(
332
+ self,
333
+ content_hash: str,
334
+ documents: List[Document],
335
+ filters: Optional[Dict[str, Any]] = None,
336
+ batch_size: int = 10,
337
+ ) -> None:
338
+ """
339
+ Insert documents into the database.
340
+
341
+ Args:
342
+ documents (List[Document]): List of documents to insert
343
+ filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
344
+ batch_size (int): Batch size for inserting documents
345
+ """
346
+ log_debug(f"Inserting {len(documents)} documents")
347
+ points = []
348
+ for document in documents:
349
+ cleaned_content = document.content.replace("\x00", "\ufffd")
350
+ doc_id = md5(cleaned_content.encode()).hexdigest()
351
+
352
+ # TODO(v2.0.0): Remove conditional vector naming logic
353
+ if self.use_named_vectors:
354
+ vector = {self.dense_vector_name: document.embedding}
355
+ else:
356
+ vector = document.embedding # type: ignore
357
+
358
+ if self.search_type == SearchType.vector:
359
+ # For vector search, maintain backward compatibility with unnamed vectors
360
+ document.embed(embedder=self.embedder)
361
+ vector = document.embedding # type: ignore
362
+ else:
363
+ # For other search types, use named vectors
364
+ vector = {}
365
+ if self.search_type in [SearchType.hybrid]:
366
+ document.embed(embedder=self.embedder)
367
+ vector[self.dense_vector_name] = document.embedding
368
+
369
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]:
370
+ vector[self.sparse_vector_name] = next(
371
+ iter(self.sparse_encoder.embed([document.content]))
372
+ ).as_object() # type: ignore
373
+
374
+ # Create payload with document properties
375
+ payload = {
376
+ "name": document.name,
377
+ "meta_data": document.meta_data,
378
+ "content": cleaned_content,
379
+ "usage": document.usage,
380
+ "content_id": document.content_id,
381
+ "content_hash": content_hash,
382
+ }
383
+
384
+ # Add filters as metadata if provided
385
+ if filters:
386
+ # Merge filters with existing metadata
387
+ if "meta_data" not in payload:
388
+ payload["meta_data"] = {}
389
+ payload["meta_data"].update(filters) # type: ignore
390
+
391
+ points.append(
392
+ models.PointStruct(
393
+ id=doc_id,
394
+ vector=vector, # type: ignore
395
+ payload=payload,
396
+ )
397
+ )
398
+ log_debug(f"Inserted document: {document.name} ({document.meta_data})")
399
+ if len(points) > 0:
400
+ self.client.upsert(collection_name=self.collection, wait=False, points=points)
401
+ log_debug(f"Upsert {len(points)} documents")
402
+
403
+ async def async_insert(
404
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
405
+ ) -> None:
406
+ """
407
+ Insert documents asynchronously.
408
+
409
+ Args:
410
+ documents (List[Document]): List of documents to insert
411
+ filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
412
+ """
413
+ log_debug(f"Inserting {len(documents)} documents asynchronously")
414
+
415
+ # Apply batch embedding when needed for vector or hybrid search
416
+ if self.search_type in [SearchType.vector, SearchType.hybrid]:
417
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
418
+ # Use batch embedding when enabled and supported
419
+ try:
420
+ # Extract content from all documents
421
+ doc_contents = [doc.content for doc in documents]
422
+
423
+ # Get batch embeddings and usage
424
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
425
+
426
+ # Process documents with pre-computed embeddings
427
+ for j, doc in enumerate(documents):
428
+ try:
429
+ if j < len(embeddings):
430
+ doc.embedding = embeddings[j]
431
+ doc.usage = usages[j] if j < len(usages) else None
432
+ except Exception as e:
433
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
434
+
435
+ except Exception as e:
436
+ # Check if this is a rate limit error - don't fall back as it would make things worse
437
+ error_str = str(e).lower()
438
+ is_rate_limit = any(
439
+ phrase in error_str
440
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
441
+ )
442
+
443
+ if is_rate_limit:
444
+ log_error(f"Rate limit detected during batch embedding. {e}")
445
+ raise e
446
+ else:
447
+ log_warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
448
+ # Fall back to individual embedding
449
+ for doc in documents:
450
+ if self.search_type in [SearchType.vector, SearchType.hybrid]:
451
+ doc.embed(embedder=self.embedder)
452
+ else:
453
+ # Use individual embedding
454
+ for doc in documents:
455
+ if self.search_type in [SearchType.vector, SearchType.hybrid]:
456
+ doc.embed(embedder=self.embedder)
457
+
458
+ async def process_document(document):
459
+ cleaned_content = document.content.replace("\x00", "\ufffd")
460
+ doc_id = md5(cleaned_content.encode()).hexdigest()
461
+
462
+ if self.search_type == SearchType.vector:
463
+ # For vector search, maintain backward compatibility with unnamed vectors
464
+ vector = document.embedding # Already embedded above
465
+ else:
466
+ # For other search types, use named vectors
467
+ vector = {}
468
+ if self.search_type in [SearchType.hybrid]:
469
+ vector[self.dense_vector_name] = document.embedding # Already embedded above
470
+
471
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]:
472
+ vector[self.sparse_vector_name] = next(
473
+ iter(self.sparse_encoder.embed([document.content]))
474
+ ).as_object() # type: ignore
475
+
476
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]:
477
+ vector[self.sparse_vector_name] = next(iter(self.sparse_encoder.embed([document.content]))).as_object()
478
+
479
+ # Create payload with document properties
480
+ payload = {
481
+ "name": document.name,
482
+ "meta_data": document.meta_data,
483
+ "content": cleaned_content,
484
+ "usage": document.usage,
485
+ "content_id": document.content_id,
486
+ "content_hash": content_hash,
487
+ }
488
+
489
+ # Add filters as metadata if provided
490
+ if filters:
491
+ # Merge filters with existing metadata
492
+ if "meta_data" not in payload:
493
+ payload["meta_data"] = {}
494
+ payload["meta_data"].update(filters)
495
+
496
+ log_debug(f"Inserted document asynchronously: {document.name} ({document.meta_data})")
497
+ return models.PointStruct( # type: ignore
498
+ id=doc_id,
499
+ vector=vector, # type: ignore
500
+ payload=payload,
501
+ )
502
+
503
+ import asyncio
504
+
505
+ # Process all documents in parallel
506
+ points = await asyncio.gather(*[process_document(doc) for doc in documents])
507
+
508
+ if len(points) > 0:
509
+ await self.async_client.upsert(collection_name=self.collection, wait=False, points=points)
510
+ log_debug(f"Upserted {len(points)} documents asynchronously")
511
+
512
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
513
+ """
514
+ Upsert documents into the database.
515
+
516
+ Args:
517
+ documents (List[Document]): List of documents to upsert
518
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
519
+ """
520
+ log_debug("Redirecting the request to insert")
521
+ if self.content_hash_exists(content_hash):
522
+ self._delete_by_content_hash(content_hash)
523
+ self.insert(content_hash=content_hash, documents=documents, filters=filters)
524
+
525
+ async def async_upsert(
526
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
527
+ ) -> None:
528
+ """Upsert documents asynchronously."""
529
+ log_debug("Redirecting the async request to async_insert")
530
+ await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
531
+
532
+ def search(
533
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
534
+ ) -> List[Document]:
535
+ """
536
+ Search for documents in the collection.
537
+
538
+ Args:
539
+ query (str): Query to search for
540
+ limit (int): Number of search results to return
541
+ filters (Optional[Dict[str, Any]]): Filters to apply while searching
542
+ """
543
+
544
+ if isinstance(filters, List):
545
+ log_warning("Filters Expressions are not supported in Qdrant. No filters will be applied.")
546
+ filters = None
547
+
548
+ filters = self._format_filters(filters or {}) # type: ignore
549
+ if self.search_type == SearchType.vector:
550
+ results = self._run_vector_search_sync(query, limit, filters) # type: ignore
551
+ elif self.search_type == SearchType.keyword:
552
+ results = self._run_keyword_search_sync(query, limit, filters) # type: ignore
553
+ elif self.search_type == SearchType.hybrid:
554
+ results = self._run_hybrid_search_sync(query, limit, filters) # type: ignore
555
+ else:
556
+ raise ValueError(f"Unsupported search type: {self.search_type}")
557
+
558
+ return self._build_search_results(results, query)
559
+
560
+ async def async_search(
561
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
562
+ ) -> List[Document]:
563
+ if isinstance(filters, List):
564
+ log_warning("Filters Expressions are not supported in Qdrant. No filters will be applied.")
565
+ filters = None
566
+
567
+ filters = self._format_filters(filters or {}) # type: ignore
568
+ if self.search_type == SearchType.vector:
569
+ results = await self._run_vector_search_async(query, limit, filters) # type: ignore
570
+ elif self.search_type == SearchType.keyword:
571
+ results = await self._run_keyword_search_async(query, limit, filters) # type: ignore
572
+ elif self.search_type == SearchType.hybrid:
573
+ results = await self._run_hybrid_search_async(query, limit, filters) # type: ignore
574
+ else:
575
+ raise ValueError(f"Unsupported search type: {self.search_type}")
576
+
577
+ return self._build_search_results(results, query)
578
+
579
+ def _run_hybrid_search_sync(
580
+ self,
581
+ query: str,
582
+ limit: int,
583
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]],
584
+ ) -> List[models.ScoredPoint]:
585
+ dense_embedding = self.embedder.get_embedding(query)
586
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
587
+ call = self.client.query_points(
588
+ collection_name=self.collection,
589
+ prefetch=[
590
+ models.Prefetch(
591
+ query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
592
+ limit=limit,
593
+ using=self.sparse_vector_name,
594
+ ),
595
+ models.Prefetch(query=dense_embedding, limit=limit, using=self.dense_vector_name),
596
+ ],
597
+ query=models.FusionQuery(fusion=self.hybrid_fusion_strategy),
598
+ with_vectors=True,
599
+ with_payload=True,
600
+ limit=limit,
601
+ query_filter=filters,
602
+ )
603
+ return call.points
604
+
605
+ def _run_vector_search_sync(
606
+ self,
607
+ query: str,
608
+ limit: int,
609
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]],
610
+ ) -> List[models.ScoredPoint]:
611
+ dense_embedding = self.embedder.get_embedding(query)
612
+
613
+ # TODO(v2.0.0): Remove this conditional and always use named vectors
614
+ if self.use_named_vectors:
615
+ call = self.client.query_points(
616
+ collection_name=self.collection,
617
+ query=dense_embedding,
618
+ with_vectors=True,
619
+ with_payload=True,
620
+ limit=limit,
621
+ query_filter=filters,
622
+ using=self.dense_vector_name,
623
+ )
624
+ else:
625
+ # Backward compatibility mode - use unnamed vector
626
+ call = self.client.query_points(
627
+ collection_name=self.collection,
628
+ query=dense_embedding,
629
+ with_vectors=True,
630
+ with_payload=True,
631
+ limit=limit,
632
+ query_filter=filters,
633
+ )
634
+ return call.points
635
+
636
+ def _run_keyword_search_sync(
637
+ self,
638
+ query: str,
639
+ limit: int,
640
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]],
641
+ ) -> List[models.ScoredPoint]:
642
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
643
+ call = self.client.query_points(
644
+ collection_name=self.collection,
645
+ query=models.SparseVector(**sparse_embedding), # type: ignore
646
+ with_vectors=True,
647
+ with_payload=True,
648
+ limit=limit,
649
+ using=self.sparse_vector_name,
650
+ query_filter=filters,
651
+ )
652
+ return call.points
653
+
654
+ async def _run_vector_search_async(
655
+ self,
656
+ query: str,
657
+ limit: int,
658
+ filters: Optional[Dict[str, Any]],
659
+ ) -> List[models.ScoredPoint]:
660
+ dense_embedding = self.embedder.get_embedding(query)
661
+
662
+ # TODO(v2.0.0): Remove this conditional and always use named vectors
663
+ if self.use_named_vectors:
664
+ call = await self.async_client.query_points(
665
+ collection_name=self.collection,
666
+ query=dense_embedding,
667
+ with_vectors=True,
668
+ with_payload=True,
669
+ limit=limit,
670
+ query_filter=filters,
671
+ using=self.dense_vector_name,
672
+ )
673
+ else:
674
+ # Backward compatibility mode - use unnamed vector
675
+ call = await self.async_client.query_points(
676
+ collection_name=self.collection,
677
+ query=dense_embedding,
678
+ with_vectors=True,
679
+ with_payload=True,
680
+ limit=limit,
681
+ query_filter=filters,
682
+ )
683
+ return call.points
684
+
685
+ async def _run_keyword_search_async(
686
+ self,
687
+ query: str,
688
+ limit: int,
689
+ filters: Optional[Dict[str, Any]],
690
+ ) -> List[models.ScoredPoint]:
691
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
692
+ call = await self.async_client.query_points(
693
+ collection_name=self.collection,
694
+ query=models.SparseVector(**sparse_embedding), # type: ignore
695
+ with_vectors=True,
696
+ with_payload=True,
697
+ limit=limit,
698
+ using=self.sparse_vector_name,
699
+ query_filter=filters,
700
+ )
701
+ return call.points
702
+
703
+ async def _run_hybrid_search_async(
704
+ self,
705
+ query: str,
706
+ limit: int,
707
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]],
708
+ ) -> List[models.ScoredPoint]:
709
+ dense_embedding = self.embedder.get_embedding(query)
710
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
711
+ call = await self.async_client.query_points(
712
+ collection_name=self.collection,
713
+ prefetch=[
714
+ models.Prefetch(
715
+ query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
716
+ limit=limit,
717
+ using=self.sparse_vector_name,
718
+ ),
719
+ models.Prefetch(query=dense_embedding, limit=limit, using=self.dense_vector_name),
720
+ ],
721
+ query=models.FusionQuery(fusion=self.hybrid_fusion_strategy),
722
+ with_vectors=True,
723
+ with_payload=True,
724
+ limit=limit,
725
+ query_filter=filters,
726
+ )
727
+ return call.points
728
+
729
+ def _build_search_results(self, results, query: str) -> List[Document]:
730
+ search_results: List[Document] = []
731
+
732
+ for result in results:
733
+ if result.payload is None:
734
+ continue
735
+ search_results.append(
736
+ Document(
737
+ name=result.payload["name"],
738
+ meta_data=result.payload["meta_data"],
739
+ content=result.payload["content"],
740
+ embedder=self.embedder,
741
+ embedding=result.vector, # type: ignore
742
+ usage=result.payload.get("usage"),
743
+ content_id=result.payload.get("content_id"),
744
+ )
745
+ )
746
+
747
+ if self.reranker:
748
+ search_results = self.reranker.rerank(query=query, documents=search_results)
749
+
750
+ log_info(f"Found {len(search_results)} documents")
751
+ return search_results
752
+
753
+ def _format_filters(self, filters: Optional[Dict[str, Any]]) -> Optional[models.Filter]:
754
+ if filters:
755
+ filter_conditions = []
756
+ for key, value in filters.items():
757
+ # If key contains a dot already, assume it's in the correct format
758
+ # Otherwise, assume it's a metadata field and add the prefix
759
+ if "." not in key and not key.startswith("meta_data."):
760
+ # This is a simple field name, assume it's metadata
761
+ key = f"meta_data.{key}"
762
+
763
+ if isinstance(value, dict):
764
+ # Handle nested dictionaries
765
+ for sub_key, sub_value in value.items():
766
+ filter_conditions.append(
767
+ models.FieldCondition(key=f"{key}.{sub_key}", match=models.MatchValue(value=sub_value))
768
+ )
769
+ else:
770
+ # Handle direct key-value pairs
771
+ filter_conditions.append(models.FieldCondition(key=key, match=models.MatchValue(value=value)))
772
+
773
+ if filter_conditions:
774
+ return models.Filter(must=filter_conditions) # type: ignore
775
+
776
+ return None
777
+
778
+ def optimize(self) -> None:
779
+ pass
780
+
781
+ def drop(self) -> None:
782
+ if self.exists():
783
+ log_debug(f"Deleting collection: {self.collection}")
784
+ self.client.delete_collection(self.collection)
785
+
786
+ async def async_drop(self) -> None:
787
+ """Drop the collection asynchronously."""
788
+ if await self.async_exists():
789
+ log_debug(f"Deleting collection asynchronously: {self.collection}")
790
+ await self.async_client.delete_collection(self.collection)
791
+
792
+ def exists(self) -> bool:
793
+ """Check if the collection exists."""
794
+ return self.client.collection_exists(collection_name=self.collection)
795
+
796
+ async def async_exists(self) -> bool:
797
+ """Check if the collection exists asynchronously."""
798
+ return await self.async_client.collection_exists(collection_name=self.collection)
799
+
800
+ def get_count(self) -> int:
801
+ count_result: models.CountResult = self.client.count(collection_name=self.collection, exact=True)
802
+ return count_result.count
803
+
804
+ def point_exists(self, id: str) -> bool:
805
+ """Check if a point with the given ID exists in the collection."""
806
+ try:
807
+ log_info(f"Checking if point with ID '{id}' (type: {type(id)}) exists in collection '{self.collection}'")
808
+ points = self.client.retrieve(
809
+ collection_name=self.collection, ids=[id], with_payload=False, with_vectors=False
810
+ )
811
+ log_info(f"Retrieved {len(points)} points for ID '{id}'")
812
+ if len(points) > 0:
813
+ log_info(f"Found point with ID: {points[0].id} (type: {type(points[0].id)})")
814
+ return len(points) > 0
815
+ except Exception as e:
816
+ log_info(f"Error checking if point {id} exists: {e}")
817
+ return False
818
+
819
+ def delete(self) -> bool:
820
+ return self.client.delete_collection(collection_name=self.collection)
821
+
822
+ def delete_by_id(self, id: str) -> bool:
823
+ try:
824
+ # Check if point exists before deletion
825
+ if not self.point_exists(id):
826
+ log_warning(f"Point with ID {id} does not exist")
827
+ return True
828
+
829
+ self.client.delete(
830
+ collection_name=self.collection,
831
+ points_selector=models.PointIdsList(points=[id]),
832
+ wait=True, # Wait for the operation to complete
833
+ )
834
+ return True
835
+
836
+ except Exception as e:
837
+ log_info(f"Error deleting point with ID {id}: {e}")
838
+ return False
839
+
840
+ def delete_by_name(self, name: str) -> bool:
841
+ """Delete all points that have the specified name in their payload (precise match)."""
842
+ try:
843
+ log_info(f"Attempting to delete all points with name: {name}")
844
+
845
+ # Create a filter to find all points with the specified name (precise match)
846
+ filter_condition = models.Filter(
847
+ must=[models.FieldCondition(key="name", match=models.MatchValue(value=name))]
848
+ )
849
+
850
+ # First, count how many points will be deleted
851
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
852
+
853
+ if count_result.count == 0:
854
+ log_warning(f"No points found with name: {name}")
855
+ return True
856
+
857
+ log_info(f"Found {count_result.count} points to delete with name: {name}")
858
+
859
+ # Delete all points matching the filter
860
+ result = self.client.delete(
861
+ collection_name=self.collection,
862
+ points_selector=filter_condition,
863
+ wait=True, # Wait for the operation to complete
864
+ )
865
+
866
+ # Check if the deletion was successful
867
+ if result.status == models.UpdateStatus.COMPLETED:
868
+ log_info(f"Successfully deleted {count_result.count} points with name: {name}")
869
+ return True
870
+ else:
871
+ log_warning(f"Deletion failed for name {name}. Status: {result.status}")
872
+ return False
873
+
874
+ except Exception as e:
875
+ log_warning(f"Error deleting points with name {name}: {e}")
876
+ return False
877
+
878
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
879
+ """Delete all points where the given metadata is contained in the meta_data payload field."""
880
+ try:
881
+ log_info(f"Attempting to delete all points with metadata: {metadata}")
882
+
883
+ # Create filter conditions for each metadata key-value pair
884
+ filter_conditions = []
885
+ for key, value in metadata.items():
886
+ # Use the meta_data prefix since that's how metadata is stored in the payload
887
+ filter_conditions.append(
888
+ models.FieldCondition(key=f"meta_data.{key}", match=models.MatchValue(value=value))
889
+ )
890
+
891
+ # Create a filter that requires ALL metadata conditions to match
892
+ filter_condition = models.Filter(must=filter_conditions) # type: ignore
893
+
894
+ # First, count how many points will be deleted
895
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
896
+
897
+ if count_result.count == 0:
898
+ log_warning(f"No points found with metadata: {metadata}")
899
+ return True
900
+
901
+ log_info(f"Found {count_result.count} points to delete with metadata: {metadata}")
902
+
903
+ # Delete all points matching the filter
904
+ result = self.client.delete(
905
+ collection_name=self.collection,
906
+ points_selector=filter_condition,
907
+ wait=True, # Wait for the operation to complete
908
+ )
909
+
910
+ # Check if the deletion was successful
911
+ if result.status == models.UpdateStatus.COMPLETED:
912
+ log_info(f"Successfully deleted {count_result.count} points with metadata: {metadata}")
913
+ return True
914
+ else:
915
+ log_warning(f"Deletion failed for metadata {metadata}. Status: {result.status}")
916
+ return False
917
+
918
+ except Exception as e:
919
+ log_warning(f"Error deleting points with metadata {metadata}: {e}")
920
+ return False
921
+
922
+ def delete_by_content_id(self, content_id: str) -> bool:
923
+ """Delete all points that have the specified content_id in their payload."""
924
+ try:
925
+ log_info(f"Attempting to delete all points with content_id: {content_id}")
926
+
927
+ # Create a filter to find all points with the specified content_id
928
+ filter_condition = models.Filter(
929
+ must=[models.FieldCondition(key="content_id", match=models.MatchValue(value=content_id))]
930
+ )
931
+
932
+ # First, count how many points will be deleted
933
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
934
+
935
+ if count_result.count == 0:
936
+ log_warning(f"No points found with content_id: {content_id}")
937
+ return True
938
+
939
+ log_info(f"Found {count_result.count} points to delete with content_id: {content_id}")
940
+
941
+ # Delete all points matching the filter
942
+ result = self.client.delete(
943
+ collection_name=self.collection,
944
+ points_selector=filter_condition,
945
+ wait=True, # Wait for the operation to complete
946
+ )
947
+
948
+ # Check if the deletion was successful
949
+ if result.status == models.UpdateStatus.COMPLETED:
950
+ log_info(f"Successfully deleted {count_result.count} points with content_id: {content_id}")
951
+ return True
952
+ else:
953
+ log_warning(f"Deletion failed for content_id {content_id}. Status: {result.status}")
954
+ return False
955
+
956
+ except Exception as e:
957
+ log_warning(f"Error deleting points with content_id {content_id}: {e}")
958
+ return False
959
+
960
+ def id_exists(self, id: str) -> bool:
961
+ """Check if a point with the given ID exists in the collection.
962
+
963
+ Args:
964
+ id (str): The ID to check.
965
+
966
+ Returns:
967
+ bool: True if the point exists, False otherwise.
968
+ """
969
+ try:
970
+ points = self.client.retrieve(
971
+ collection_name=self.collection, ids=[id], with_payload=False, with_vectors=False
972
+ )
973
+ return len(points) > 0
974
+ except Exception as e:
975
+ log_info(f"Error checking if point {id} exists: {e}")
976
+ return False
977
+
978
+ def content_hash_exists(self, content_hash: str) -> bool:
979
+ """Check if any points with the given content hash exist in the collection.
980
+
981
+ Args:
982
+ content_hash (str): The content hash to check.
983
+
984
+ Returns:
985
+ bool: True if points with the content hash exist, False otherwise.
986
+ """
987
+ try:
988
+ # Create a filter to find points with the specified content_hash
989
+ filter_condition = models.Filter(
990
+ must=[models.FieldCondition(key="content_hash", match=models.MatchValue(value=content_hash))]
991
+ )
992
+
993
+ # Count how many points match the filter
994
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
995
+ return count_result.count > 0
996
+ except Exception as e:
997
+ log_info(f"Error checking if content_hash {content_hash} exists: {e}")
998
+ return False
999
+
1000
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
1001
+ """Delete all points that have the specified content_hash in their payload.
1002
+
1003
+ Args:
1004
+ content_hash (str): The content hash to delete.
1005
+
1006
+ Returns:
1007
+ bool: True if points were deleted successfully, False otherwise.
1008
+ """
1009
+ try:
1010
+ log_info(f"Attempting to delete all points with content_hash: {content_hash}")
1011
+
1012
+ # Create a filter to find all points with the specified content_hash
1013
+ filter_condition = models.Filter(
1014
+ must=[models.FieldCondition(key="content_hash", match=models.MatchValue(value=content_hash))]
1015
+ )
1016
+
1017
+ # First, count how many points will be deleted
1018
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
1019
+
1020
+ if count_result.count == 0:
1021
+ log_warning(f"No points found with content_hash: {content_hash}")
1022
+ return True
1023
+
1024
+ log_info(f"Found {count_result.count} points to delete with content_hash: {content_hash}")
1025
+
1026
+ # Delete all points matching the filter
1027
+ result = self.client.delete(
1028
+ collection_name=self.collection,
1029
+ points_selector=filter_condition,
1030
+ wait=True, # Wait for the operation to complete
1031
+ )
1032
+
1033
+ # Check if the deletion was successful
1034
+ if result.status == models.UpdateStatus.COMPLETED:
1035
+ log_info(f"Successfully deleted {count_result.count} points with content_hash: {content_hash}")
1036
+ return True
1037
+ else:
1038
+ log_warning(f"Deletion failed for content_hash {content_hash}. Status: {result.status}")
1039
+ return False
1040
+
1041
+ except Exception as e:
1042
+ log_warning(f"Error deleting points with content_hash {content_hash}: {e}")
1043
+ return False
1044
+
1045
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
1046
+ """
1047
+ Update the metadata for documents with the given content_id.
1048
+
1049
+ Args:
1050
+ content_id (str): The content ID to update
1051
+ metadata (Dict[str, Any]): The metadata to update
1052
+ """
1053
+ try:
1054
+ if not self.client:
1055
+ log_error("Client not initialized")
1056
+ return
1057
+
1058
+ # Create filter for content_id
1059
+ filter_condition = models.Filter(
1060
+ must=[models.FieldCondition(key="content_id", match=models.MatchValue(value=content_id))]
1061
+ )
1062
+
1063
+ # Search for points with the given content_id
1064
+ search_result = self.client.scroll(
1065
+ collection_name=self.collection,
1066
+ scroll_filter=filter_condition,
1067
+ limit=10000, # Get all matching points
1068
+ with_payload=True,
1069
+ with_vectors=False,
1070
+ )
1071
+
1072
+ if not search_result[0]: # search_result is a tuple (points, next_page_offset)
1073
+ log_error(f"No documents found with content_id: {content_id}")
1074
+ return
1075
+
1076
+ points = search_result[0]
1077
+ update_operations = []
1078
+
1079
+ # Prepare update operations for each point
1080
+ for point in points:
1081
+ point_id = point.id
1082
+ current_payload = point.payload or {}
1083
+
1084
+ # Merge existing metadata with new metadata
1085
+ updated_payload = current_payload.copy()
1086
+ updated_payload.update(metadata)
1087
+
1088
+ if "filters" not in updated_payload:
1089
+ updated_payload["filters"] = {}
1090
+ if isinstance(updated_payload["filters"], dict):
1091
+ updated_payload["filters"].update(metadata)
1092
+ else:
1093
+ updated_payload["filters"] = metadata
1094
+
1095
+ # Create set payload operation
1096
+ update_operations.append(models.SetPayload(payload=updated_payload, points=[point_id]))
1097
+
1098
+ # Execute all updates
1099
+ for operation in update_operations:
1100
+ self.client.set_payload(
1101
+ collection_name=self.collection, payload=operation.payload, points=operation.points
1102
+ )
1103
+
1104
+ log_debug(f"Updated metadata for {len(update_operations)} documents with content_id: {content_id}")
1105
+
1106
+ except Exception as e:
1107
+ log_error(f"Error updating metadata for content_id '{content_id}': {e}")
1108
+ raise
1109
+
1110
+ def close(self) -> None:
1111
+ """Close the Qdrant client connections."""
1112
+ if self._client is not None:
1113
+ try:
1114
+ self._client.close()
1115
+ log_debug("Qdrant client closed successfully")
1116
+ except Exception as e:
1117
+ log_debug(f"Error closing Qdrant client: {e}")
1118
+ finally:
1119
+ self._client = None
1120
+
1121
+ async def async_close(self) -> None:
1122
+ """Close the Qdrant client connections asynchronously."""
1123
+ if self._async_client is not None:
1124
+ try:
1125
+ await self._async_client.close()
1126
+ log_debug("Async Qdrant client closed successfully")
1127
+ except Exception as e:
1128
+ log_debug(f"Error closing async Qdrant client: {e}")
1129
+ finally:
1130
+ self._async_client = None
1131
+
1132
+ def get_supported_search_types(self) -> List[str]:
1133
+ """Get the supported search types for this vector database."""
1134
+ return [SearchType.vector, SearchType.keyword, SearchType.hybrid]