agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,37 @@
1
1
  from hashlib import md5
2
- from typing import Any, Dict, List, Optional
2
+ from typing import Any, Dict, List, Optional, Union
3
3
 
4
4
  try:
5
- from qdrant_client import QdrantClient # noqa: F401
5
+ from qdrant_client import AsyncQdrantClient, QdrantClient # noqa: F401
6
6
  from qdrant_client.http import models
7
7
  except ImportError:
8
8
  raise ImportError(
9
9
  "The `qdrant-client` package is not installed. Please install it via `pip install qdrant-client`."
10
10
  )
11
11
 
12
- from agno.document import Document
13
- from agno.embedder import Embedder
14
- from agno.reranker.base import Reranker
15
- from agno.utils.log import logger
12
+ from agno.filters import FilterExpr
13
+ from agno.knowledge.document import Document
14
+ from agno.knowledge.embedder import Embedder
15
+ from agno.knowledge.reranker.base import Reranker
16
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
16
17
  from agno.vectordb.base import VectorDb
17
18
  from agno.vectordb.distance import Distance
19
+ from agno.vectordb.search import SearchType
20
+
21
+ DEFAULT_DENSE_VECTOR_NAME = "dense"
22
+ DEFAULT_SPARSE_VECTOR_NAME = "sparse"
23
+ DEFAULT_SPARSE_MODEL = "Qdrant/bm25"
18
24
 
19
25
 
20
26
  class Qdrant(VectorDb):
27
+ """Vector DB implementation powered by Qdrant - https://qdrant.tech/"""
28
+
21
29
  def __init__(
22
30
  self,
23
31
  collection: str,
32
+ name: Optional[str] = None,
33
+ description: Optional[str] = None,
34
+ id: Optional[str] = None,
24
35
  embedder: Optional[Embedder] = None,
25
36
  distance: Distance = Distance.cosine,
26
37
  location: Optional[str] = None,
@@ -35,16 +46,64 @@ class Qdrant(VectorDb):
35
46
  host: Optional[str] = None,
36
47
  path: Optional[str] = None,
37
48
  reranker: Optional[Reranker] = None,
49
+ search_type: SearchType = SearchType.vector,
50
+ dense_vector_name: str = DEFAULT_DENSE_VECTOR_NAME,
51
+ sparse_vector_name: str = DEFAULT_SPARSE_VECTOR_NAME,
52
+ hybrid_fusion_strategy: models.Fusion = models.Fusion.RRF,
53
+ fastembed_kwargs: Optional[dict] = None,
38
54
  **kwargs,
39
55
  ):
56
+ """
57
+ Args:
58
+ collection (str): Name of the Qdrant collection.
59
+ name (Optional[str]): Name of the vector database.
60
+ description (Optional[str]): Description of the vector database.
61
+ embedder (Optional[Embedder]): Optional embedder for automatic vector generation.
62
+ distance (Distance): Distance metric to use (default: cosine).
63
+ location (Optional[str]): `":memory:"` for in-memory, or str used as `url`. If `None`, use default host/port.
64
+ url (Optional[str]): Full URL (scheme, host, port, prefix). Overrides host/port if provided.
65
+ port (Optional[int]): REST API port (default: 6333).
66
+ grpc_port (int): gRPC interface port (default: 6334).
67
+ prefer_grpc (bool): Prefer gRPC over REST if True.
68
+ https (Optional[bool]): Use HTTPS if True.
69
+ api_key (Optional[str]): API key for Qdrant Cloud authentication.
70
+ prefix (Optional[str]): URL path prefix (e.g., "service/v1").
71
+ timeout (Optional[float]): Request timeout (REST: default 5s, gRPC: unlimited).
72
+ host (Optional[str]): Qdrant host (default: "localhost" if not specified).
73
+ path (Optional[str]): Path for local persistence (QdrantLocal).
74
+ reranker (Optional[Reranker]): Optional reranker for result refinement.
75
+ search_type (SearchType): Whether to use vector, keyword or hybrid search.
76
+ dense_vector_name (str): Dense vector name.
77
+ sparse_vector_name (str): Sparse vector name.
78
+ hybrid_fusion_strategy (models.Fusion): Strategy for hybrid fusion.
79
+ fastembed_kwargs (Optional[dict]): Keyword args for `fastembed.SparseTextEmbedding.__init__()`.
80
+ **kwargs: Keyword args for `qdrant_client.QdrantClient.__init__()`.
81
+ """
82
+ # Validate required parameters
83
+ if not collection:
84
+ raise ValueError("Collection name must be provided.")
85
+
86
+ # Dynamic ID generation based on unique identifiers
87
+ if id is None:
88
+ from agno.utils.string import generate_id
89
+
90
+ host_identifier = host or location or url or "localhost"
91
+ seed = f"{host_identifier}#{collection}"
92
+ id = generate_id(seed)
93
+
94
+ # Initialize base class with name, description, and generated ID
95
+ super().__init__(id=id, name=name, description=description)
96
+
40
97
  # Collection attributes
41
98
  self.collection: str = collection
42
99
 
43
100
  # Embedder for embedding the document contents
44
101
  if embedder is None:
45
- from agno.embedder.openai import OpenAIEmbedder
102
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
46
103
 
47
104
  embedder = OpenAIEmbedder()
105
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
106
+
48
107
  self.embedder: Embedder = embedder
49
108
  self.dimensions: Optional[int] = self.embedder.dimensions
50
109
 
@@ -54,6 +113,9 @@ class Qdrant(VectorDb):
54
113
  # Qdrant client instance
55
114
  self._client: Optional[QdrantClient] = None
56
115
 
116
+ # Qdrant async client instance
117
+ self._async_client: Optional[AsyncQdrantClient] = None
118
+
57
119
  # Qdrant client arguments
58
120
  self.location: Optional[str] = location
59
121
  self.url: Optional[str] = url
@@ -73,10 +135,35 @@ class Qdrant(VectorDb):
73
135
  # Qdrant client kwargs
74
136
  self.kwargs = kwargs
75
137
 
138
+ self.search_type = search_type
139
+ self.dense_vector_name = dense_vector_name
140
+ self.sparse_vector_name = sparse_vector_name
141
+ self.hybrid_fusion_strategy = hybrid_fusion_strategy
142
+
143
+ # TODO(v2.0.0): Remove backward compatibility for unnamed vectors
144
+ # TODO(v2.0.0): Make named vectors mandatory and simplify the codebase
145
+ self.use_named_vectors = search_type in [SearchType.hybrid]
146
+
147
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]:
148
+ try:
149
+ from fastembed import SparseTextEmbedding # type: ignore
150
+
151
+ default_kwargs = {"model_name": DEFAULT_SPARSE_MODEL}
152
+ if fastembed_kwargs:
153
+ default_kwargs.update(fastembed_kwargs)
154
+
155
+ # Type ignore for mypy as SparseTextEmbedding constructor accepts flexible kwargs
156
+ self.sparse_encoder = SparseTextEmbedding(**default_kwargs) # type: ignore
157
+
158
+ except ImportError as e:
159
+ raise ImportError(
160
+ "To use keyword/hybrid search, install the `fastembed` extra with `pip install fastembed`."
161
+ ) from e
162
+
76
163
  @property
77
164
  def client(self) -> QdrantClient:
78
165
  if self._client is None:
79
- logger.debug("Creating Qdrant Client")
166
+ log_debug("Creating Qdrant Client")
80
167
  self._client = QdrantClient(
81
168
  location=self.location,
82
169
  url=self.url,
@@ -93,8 +180,28 @@ class Qdrant(VectorDb):
93
180
  )
94
181
  return self._client
95
182
 
183
+ @property
184
+ def async_client(self) -> AsyncQdrantClient:
185
+ """Get or create the async Qdrant client."""
186
+ if self._async_client is None:
187
+ log_debug("Creating Async Qdrant Client")
188
+ self._async_client = AsyncQdrantClient(
189
+ location=self.location,
190
+ url=self.url,
191
+ port=self.port,
192
+ grpc_port=self.grpc_port,
193
+ prefer_grpc=self.prefer_grpc,
194
+ https=self.https,
195
+ api_key=self.api_key,
196
+ prefix=self.prefix,
197
+ timeout=int(self.timeout) if self.timeout is not None else None,
198
+ host=self.host,
199
+ path=self.path,
200
+ **self.kwargs,
201
+ )
202
+ return self._async_client
203
+
96
204
  def create(self) -> None:
97
- # Collection distance
98
205
  _distance = models.Distance.COSINE
99
206
  if self.distance == Distance.l2:
100
207
  _distance = models.Distance.EUCLID
@@ -102,32 +209,80 @@ class Qdrant(VectorDb):
102
209
  _distance = models.Distance.DOT
103
210
 
104
211
  if not self.exists():
105
- logger.debug(f"Creating collection: {self.collection}")
212
+ log_debug(f"Creating collection: {self.collection}")
213
+
214
+ # Configure vectors based on search type
215
+ if self.search_type == SearchType.vector:
216
+ # Maintain backward compatibility with unnamed vectors
217
+ vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
218
+ else:
219
+ # Use named vectors for hybrid search
220
+ vectors_config = {
221
+ self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
222
+ } # type: ignore
223
+
106
224
  self.client.create_collection(
107
225
  collection_name=self.collection,
108
- vectors_config=models.VectorParams(size=self.dimensions, distance=_distance),
226
+ vectors_config=vectors_config,
227
+ sparse_vectors_config={self.sparse_vector_name: models.SparseVectorParams()}
228
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]
229
+ else None,
230
+ )
231
+
232
+ async def async_create(self) -> None:
233
+ """Create the collection asynchronously."""
234
+ # Collection distance
235
+ _distance = models.Distance.COSINE
236
+ if self.distance == Distance.l2:
237
+ _distance = models.Distance.EUCLID
238
+ elif self.distance == Distance.max_inner_product:
239
+ _distance = models.Distance.DOT
240
+
241
+ if not await self.async_exists():
242
+ log_debug(f"Creating collection asynchronously: {self.collection}")
243
+
244
+ # Configure vectors based on search type
245
+ if self.search_type == SearchType.vector:
246
+ # Maintain backward compatibility with unnamed vectors
247
+ vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
248
+ else:
249
+ # Use named vectors for hybrid search
250
+ vectors_config = {
251
+ self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
252
+ } # type: ignore
253
+
254
+ await self.async_client.create_collection(
255
+ collection_name=self.collection,
256
+ vectors_config=vectors_config,
257
+ sparse_vectors_config={self.sparse_vector_name: models.SparseVectorParams()}
258
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]
259
+ else None,
109
260
  )
110
261
 
111
- def doc_exists(self, document: Document) -> bool:
262
+ def name_exists(self, name: str) -> bool:
112
263
  """
113
- Validating if the document exists or not
264
+ Validates if a document with the given name exists in the collection.
114
265
 
115
266
  Args:
116
- document (Document): Document to validate
267
+ name (str): The name of the document to check.
268
+
269
+ Returns:
270
+ bool: True if a document with the given name exists, False otherwise.
117
271
  """
118
272
  if self.client:
119
- cleaned_content = document.content.replace("\x00", "\ufffd")
120
- doc_id = md5(cleaned_content.encode()).hexdigest()
121
- collection_points = self.client.retrieve(
273
+ scroll_result = self.client.scroll(
122
274
  collection_name=self.collection,
123
- ids=[doc_id],
275
+ scroll_filter=models.Filter(
276
+ must=[models.FieldCondition(key="name", match=models.MatchValue(value=name))]
277
+ ),
278
+ limit=1,
124
279
  )
125
- return len(collection_points) > 0
280
+ return len(scroll_result[0]) > 0
126
281
  return False
127
282
 
128
- def name_exists(self, name: str) -> bool:
283
+ async def async_name_exists(self, name: str) -> bool: # type: ignore[override]
129
284
  """
130
- Validates if a document with the given name exists in the collection.
285
+ Asynchronously validates if a document with the given name exists in the collection.
131
286
 
132
287
  Args:
133
288
  name (str): The name of the document to check.
@@ -135,8 +290,8 @@ class Qdrant(VectorDb):
135
290
  Returns:
136
291
  bool: True if a document with the given name exists, False otherwise.
137
292
  """
138
- if self.client:
139
- scroll_result = self.client.scroll(
293
+ if self.async_client:
294
+ scroll_result = await self.async_client.scroll(
140
295
  collection_name=self.collection,
141
296
  scroll_filter=models.Filter(
142
297
  must=[models.FieldCondition(key="name", match=models.MatchValue(value=name))]
@@ -146,7 +301,13 @@ class Qdrant(VectorDb):
146
301
  return len(scroll_result[0]) > 0
147
302
  return False
148
303
 
149
- def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None, batch_size: int = 10) -> None:
304
+ def insert(
305
+ self,
306
+ content_hash: str,
307
+ documents: List[Document],
308
+ filters: Optional[Dict[str, Any]] = None,
309
+ batch_size: int = 10,
310
+ ) -> None:
150
311
  """
151
312
  Insert documents into the database.
152
313
 
@@ -155,30 +316,177 @@ class Qdrant(VectorDb):
155
316
  filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
156
317
  batch_size (int): Batch size for inserting documents
157
318
  """
158
- logger.debug(f"Inserting {len(documents)} documents")
319
+ log_debug(f"Inserting {len(documents)} documents")
159
320
  points = []
160
321
  for document in documents:
161
- document.embed(embedder=self.embedder)
162
322
  cleaned_content = document.content.replace("\x00", "\ufffd")
163
- doc_id = md5(cleaned_content.encode()).hexdigest()
323
+ # Include content_hash in ID to ensure uniqueness across different content hashes
324
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
325
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
326
+
327
+ # TODO(v2.0.0): Remove conditional vector naming logic
328
+ if self.use_named_vectors:
329
+ vector = {self.dense_vector_name: document.embedding}
330
+ else:
331
+ vector = document.embedding # type: ignore
332
+
333
+ if self.search_type == SearchType.vector:
334
+ # For vector search, maintain backward compatibility with unnamed vectors
335
+ document.embed(embedder=self.embedder)
336
+ vector = document.embedding # type: ignore
337
+ else:
338
+ # For other search types, use named vectors
339
+ vector = {}
340
+ if self.search_type in [SearchType.hybrid]:
341
+ document.embed(embedder=self.embedder)
342
+ vector[self.dense_vector_name] = document.embedding
343
+
344
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]:
345
+ vector[self.sparse_vector_name] = next(
346
+ iter(self.sparse_encoder.embed([document.content]))
347
+ ).as_object() # type: ignore
348
+
349
+ # Create payload with document properties
350
+ payload = {
351
+ "name": document.name,
352
+ "meta_data": document.meta_data,
353
+ "content": cleaned_content,
354
+ "usage": document.usage,
355
+ "content_id": document.content_id,
356
+ "content_hash": content_hash,
357
+ }
358
+
359
+ # Add filters as metadata if provided
360
+ if filters:
361
+ # Merge filters with existing metadata
362
+ if "meta_data" not in payload:
363
+ payload["meta_data"] = {}
364
+ payload["meta_data"].update(filters) # type: ignore
365
+
164
366
  points.append(
165
367
  models.PointStruct(
166
368
  id=doc_id,
167
- vector=document.embedding,
168
- payload={
169
- "name": document.name,
170
- "meta_data": document.meta_data,
171
- "content": cleaned_content,
172
- "usage": document.usage,
173
- },
369
+ vector=vector, # type: ignore
370
+ payload=payload,
174
371
  )
175
372
  )
176
- logger.debug(f"Inserted document: {document.name} ({document.meta_data})")
373
+ log_debug(f"Inserted document: {document.name} ({document.meta_data})")
177
374
  if len(points) > 0:
178
375
  self.client.upsert(collection_name=self.collection, wait=False, points=points)
179
- logger.debug(f"Upsert {len(points)} documents")
376
+ log_debug(f"Upsert {len(points)} documents")
377
+
378
+ async def async_insert(
379
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
380
+ ) -> None:
381
+ """
382
+ Insert documents asynchronously.
383
+
384
+ Args:
385
+ documents (List[Document]): List of documents to insert
386
+ filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
387
+ """
388
+ log_debug(f"Inserting {len(documents)} documents asynchronously")
389
+
390
+ # Apply batch embedding when needed for vector or hybrid search
391
+ if self.search_type in [SearchType.vector, SearchType.hybrid]:
392
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
393
+ # Use batch embedding when enabled and supported
394
+ try:
395
+ # Extract content from all documents
396
+ doc_contents = [doc.content for doc in documents]
397
+
398
+ # Get batch embeddings and usage
399
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
400
+
401
+ # Process documents with pre-computed embeddings
402
+ for j, doc in enumerate(documents):
403
+ try:
404
+ if j < len(embeddings):
405
+ doc.embedding = embeddings[j]
406
+ doc.usage = usages[j] if j < len(usages) else None
407
+ except Exception as e:
408
+ log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
409
+
410
+ except Exception as e:
411
+ # Check if this is a rate limit error - don't fall back as it would make things worse
412
+ error_str = str(e).lower()
413
+ is_rate_limit = any(
414
+ phrase in error_str
415
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
416
+ )
417
+
418
+ if is_rate_limit:
419
+ log_error(f"Rate limit detected during batch embedding. {e}")
420
+ raise e
421
+ else:
422
+ log_warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
423
+ # Fall back to individual embedding
424
+ for doc in documents:
425
+ if self.search_type in [SearchType.vector, SearchType.hybrid]:
426
+ doc.embed(embedder=self.embedder)
427
+ else:
428
+ # Use individual embedding
429
+ for doc in documents:
430
+ if self.search_type in [SearchType.vector, SearchType.hybrid]:
431
+ doc.embed(embedder=self.embedder)
432
+
433
+ async def process_document(document):
434
+ cleaned_content = document.content.replace("\x00", "\ufffd")
435
+ # Include content_hash in ID to ensure uniqueness across different content hashes
436
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
437
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
438
+
439
+ if self.search_type == SearchType.vector:
440
+ # For vector search, maintain backward compatibility with unnamed vectors
441
+ vector = document.embedding # Already embedded above
442
+ else:
443
+ # For other search types, use named vectors
444
+ vector = {}
445
+ if self.search_type in [SearchType.hybrid]:
446
+ vector[self.dense_vector_name] = document.embedding # Already embedded above
447
+
448
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]:
449
+ vector[self.sparse_vector_name] = next(
450
+ iter(self.sparse_encoder.embed([document.content]))
451
+ ).as_object() # type: ignore
452
+
453
+ if self.search_type in [SearchType.keyword, SearchType.hybrid]:
454
+ vector[self.sparse_vector_name] = next(iter(self.sparse_encoder.embed([document.content]))).as_object()
455
+
456
+ # Create payload with document properties
457
+ payload = {
458
+ "name": document.name,
459
+ "meta_data": document.meta_data,
460
+ "content": cleaned_content,
461
+ "usage": document.usage,
462
+ "content_id": document.content_id,
463
+ "content_hash": content_hash,
464
+ }
465
+
466
+ # Add filters as metadata if provided
467
+ if filters:
468
+ # Merge filters with existing metadata
469
+ if "meta_data" not in payload:
470
+ payload["meta_data"] = {}
471
+ payload["meta_data"].update(filters)
472
+
473
+ log_debug(f"Inserted document asynchronously: {document.name} ({document.meta_data})")
474
+ return models.PointStruct( # type: ignore
475
+ id=doc_id,
476
+ vector=vector, # type: ignore
477
+ payload=payload,
478
+ )
479
+
480
+ import asyncio
481
+
482
+ # Process all documents in parallel
483
+ points = await asyncio.gather(*[process_document(doc) for doc in documents])
180
484
 
181
- def upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
485
+ if len(points) > 0:
486
+ await self.async_client.upsert(collection_name=self.collection, wait=False, points=points)
487
+ log_debug(f"Upserted {len(points)} documents asynchronously")
488
+
489
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
182
490
  """
183
491
  Upsert documents into the database.
184
492
 
@@ -186,33 +494,218 @@ class Qdrant(VectorDb):
186
494
  documents (List[Document]): List of documents to upsert
187
495
  filters (Optional[Dict[str, Any]]): Filters to apply while upserting
188
496
  """
189
- logger.debug("Redirecting the request to insert")
190
- self.insert(documents)
497
+ log_debug("Redirecting the request to insert")
498
+ if self.content_hash_exists(content_hash):
499
+ self._delete_by_content_hash(content_hash)
500
+ self.insert(content_hash=content_hash, documents=documents, filters=filters)
191
501
 
192
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
502
+ async def async_upsert(
503
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
504
+ ) -> None:
505
+ """Upsert documents asynchronously."""
506
+ log_debug("Redirecting the async request to async_insert")
507
+ await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
508
+
509
+ def search(
510
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
511
+ ) -> List[Document]:
193
512
  """
194
- Search for documents in the database.
513
+ Search for documents in the collection.
195
514
 
196
515
  Args:
197
516
  query (str): Query to search for
198
517
  limit (int): Number of search results to return
199
518
  filters (Optional[Dict[str, Any]]): Filters to apply while searching
200
519
  """
201
- query_embedding = self.embedder.get_embedding(query)
202
- if query_embedding is None:
203
- logger.error(f"Error getting embedding for Query: {query}")
204
- return []
205
520
 
206
- results = self.client.search(
521
+ if isinstance(filters, List):
522
+ log_warning("Filters Expressions are not supported in Qdrant. No filters will be applied.")
523
+ filters = None
524
+
525
+ formatted_filters = self._format_filters(filters or {}) # type: ignore
526
+ if self.search_type == SearchType.vector:
527
+ results = self._run_vector_search_sync(query, limit, formatted_filters=formatted_filters) # type: ignore
528
+ elif self.search_type == SearchType.keyword:
529
+ results = self._run_keyword_search_sync(query, limit, formatted_filters=formatted_filters) # type: ignore
530
+ elif self.search_type == SearchType.hybrid:
531
+ results = self._run_hybrid_search_sync(query, limit, formatted_filters=formatted_filters) # type: ignore
532
+ else:
533
+ raise ValueError(f"Unsupported search type: {self.search_type}")
534
+
535
+ return self._build_search_results(results, query)
536
+
537
+ async def async_search(
538
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
539
+ ) -> List[Document]:
540
+ if isinstance(filters, List):
541
+ log_warning("Filters Expressions are not supported in Qdrant. No filters will be applied.")
542
+ filters = None
543
+
544
+ formatted_filters = self._format_filters(filters or {}) # type: ignore
545
+ if self.search_type == SearchType.vector:
546
+ results = await self._run_vector_search_async(query, limit, formatted_filters=formatted_filters) # type: ignore
547
+ elif self.search_type == SearchType.keyword:
548
+ results = await self._run_keyword_search_async(query, limit, formatted_filters=formatted_filters) # type: ignore
549
+ elif self.search_type == SearchType.hybrid:
550
+ results = await self._run_hybrid_search_async(query, limit, formatted_filters=formatted_filters) # type: ignore
551
+ else:
552
+ raise ValueError(f"Unsupported search type: {self.search_type}")
553
+
554
+ return self._build_search_results(results, query)
555
+
556
+ def _run_hybrid_search_sync(
557
+ self,
558
+ query: str,
559
+ limit: int,
560
+ formatted_filters: Optional[models.Filter],
561
+ ) -> List[models.ScoredPoint]:
562
+ dense_embedding = self.embedder.get_embedding(query)
563
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
564
+ call = self.client.query_points(
565
+ collection_name=self.collection,
566
+ prefetch=[
567
+ models.Prefetch(
568
+ query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
569
+ limit=limit,
570
+ using=self.sparse_vector_name,
571
+ ),
572
+ models.Prefetch(query=dense_embedding, limit=limit, using=self.dense_vector_name),
573
+ ],
574
+ query=models.FusionQuery(fusion=self.hybrid_fusion_strategy),
575
+ with_vectors=True,
576
+ with_payload=True,
577
+ limit=limit,
578
+ query_filter=formatted_filters,
579
+ )
580
+ return call.points
581
+
582
+ def _run_vector_search_sync(
583
+ self,
584
+ query: str,
585
+ limit: int,
586
+ formatted_filters: Optional[models.Filter],
587
+ ) -> List[models.ScoredPoint]:
588
+ dense_embedding = self.embedder.get_embedding(query)
589
+
590
+ # TODO(v2.0.0): Remove this conditional and always use named vectors
591
+ if self.use_named_vectors:
592
+ call = self.client.query_points(
593
+ collection_name=self.collection,
594
+ query=dense_embedding,
595
+ with_vectors=True,
596
+ with_payload=True,
597
+ limit=limit,
598
+ query_filter=formatted_filters,
599
+ using=self.dense_vector_name,
600
+ )
601
+ else:
602
+ # Backward compatibility mode - use unnamed vector
603
+ call = self.client.query_points(
604
+ collection_name=self.collection,
605
+ query=dense_embedding,
606
+ with_vectors=True,
607
+ with_payload=True,
608
+ limit=limit,
609
+ query_filter=formatted_filters,
610
+ )
611
+ return call.points
612
+
613
+ def _run_keyword_search_sync(
614
+ self,
615
+ query: str,
616
+ limit: int,
617
+ formatted_filters: Optional[models.Filter],
618
+ ) -> List[models.ScoredPoint]:
619
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
620
+ call = self.client.query_points(
621
+ collection_name=self.collection,
622
+ query=models.SparseVector(**sparse_embedding), # type: ignore
623
+ with_vectors=True,
624
+ with_payload=True,
625
+ limit=limit,
626
+ using=self.sparse_vector_name,
627
+ query_filter=formatted_filters,
628
+ )
629
+ return call.points
630
+
631
+ async def _run_vector_search_async(
632
+ self,
633
+ query: str,
634
+ limit: int,
635
+ formatted_filters: Optional[models.Filter],
636
+ ) -> List[models.ScoredPoint]:
637
+ dense_embedding = self.embedder.get_embedding(query)
638
+
639
+ # TODO(v2.0.0): Remove this conditional and always use named vectors
640
+ if self.use_named_vectors:
641
+ call = await self.async_client.query_points(
642
+ collection_name=self.collection,
643
+ query=dense_embedding,
644
+ with_vectors=True,
645
+ with_payload=True,
646
+ limit=limit,
647
+ query_filter=formatted_filters,
648
+ using=self.dense_vector_name,
649
+ )
650
+ else:
651
+ # Backward compatibility mode - use unnamed vector
652
+ call = await self.async_client.query_points(
653
+ collection_name=self.collection,
654
+ query=dense_embedding,
655
+ with_vectors=True,
656
+ with_payload=True,
657
+ limit=limit,
658
+ query_filter=formatted_filters,
659
+ )
660
+ return call.points
661
+
662
+ async def _run_keyword_search_async(
663
+ self,
664
+ query: str,
665
+ limit: int,
666
+ formatted_filters: Optional[models.Filter],
667
+ ) -> List[models.ScoredPoint]:
668
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
669
+ call = await self.async_client.query_points(
207
670
  collection_name=self.collection,
208
- query_vector=query_embedding,
671
+ query=models.SparseVector(**sparse_embedding), # type: ignore
672
+ with_vectors=True,
673
+ with_payload=True,
674
+ limit=limit,
675
+ using=self.sparse_vector_name,
676
+ query_filter=formatted_filters,
677
+ )
678
+ return call.points
679
+
680
+ async def _run_hybrid_search_async(
681
+ self,
682
+ query: str,
683
+ limit: int,
684
+ formatted_filters: Optional[models.Filter],
685
+ ) -> List[models.ScoredPoint]:
686
+ dense_embedding = self.embedder.get_embedding(query)
687
+ sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
688
+ call = await self.async_client.query_points(
689
+ collection_name=self.collection,
690
+ prefetch=[
691
+ models.Prefetch(
692
+ query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
693
+ limit=limit,
694
+ using=self.sparse_vector_name,
695
+ ),
696
+ models.Prefetch(query=dense_embedding, limit=limit, using=self.dense_vector_name),
697
+ ],
698
+ query=models.FusionQuery(fusion=self.hybrid_fusion_strategy),
209
699
  with_vectors=True,
210
700
  with_payload=True,
211
701
  limit=limit,
702
+ query_filter=formatted_filters,
212
703
  )
704
+ return call.points
213
705
 
214
- # Build search results
706
+ def _build_search_results(self, results, query: str) -> List[Document]:
215
707
  search_results: List[Document] = []
708
+
216
709
  for result in results:
217
710
  if result.payload is None:
218
711
  continue
@@ -223,36 +716,396 @@ class Qdrant(VectorDb):
223
716
  content=result.payload["content"],
224
717
  embedder=self.embedder,
225
718
  embedding=result.vector, # type: ignore
226
- usage=result.payload["usage"],
719
+ usage=result.payload.get("usage"),
720
+ content_id=result.payload.get("content_id"),
227
721
  )
228
722
  )
229
723
 
230
724
  if self.reranker:
231
725
  search_results = self.reranker.rerank(query=query, documents=search_results)
232
726
 
727
+ log_info(f"Found {len(search_results)} documents")
233
728
  return search_results
234
729
 
730
+ def _format_filters(self, filters: Optional[Dict[str, Any]]) -> Optional[models.Filter]:
731
+ if filters:
732
+ filter_conditions = []
733
+ for key, value in filters.items():
734
+ # If key contains a dot already, assume it's in the correct format
735
+ # Otherwise, assume it's a metadata field and add the prefix
736
+ if "." not in key and not key.startswith("meta_data."):
737
+ # This is a simple field name, assume it's metadata
738
+ key = f"meta_data.{key}"
739
+
740
+ if isinstance(value, dict):
741
+ # Handle nested dictionaries
742
+ for sub_key, sub_value in value.items():
743
+ filter_conditions.append(
744
+ models.FieldCondition(key=f"{key}.{sub_key}", match=models.MatchValue(value=sub_value))
745
+ )
746
+ else:
747
+ # Handle direct key-value pairs
748
+ filter_conditions.append(models.FieldCondition(key=key, match=models.MatchValue(value=value)))
749
+
750
+ if filter_conditions:
751
+ return models.Filter(must=filter_conditions) # type: ignore
752
+
753
+ return None
754
+
755
+ def optimize(self) -> None:
756
+ pass
757
+
235
758
  def drop(self) -> None:
236
759
  if self.exists():
237
- logger.debug(f"Deleting collection: {self.collection}")
760
+ log_debug(f"Deleting collection: {self.collection}")
238
761
  self.client.delete_collection(self.collection)
239
762
 
763
+ async def async_drop(self) -> None:
764
+ """Drop the collection asynchronously."""
765
+ if await self.async_exists():
766
+ log_debug(f"Deleting collection asynchronously: {self.collection}")
767
+ await self.async_client.delete_collection(self.collection)
768
+
240
769
  def exists(self) -> bool:
241
- if self.client:
242
- collections_response: models.CollectionsResponse = self.client.get_collections()
243
- collections: List[models.CollectionDescription] = collections_response.collections
244
- for collection in collections:
245
- if collection.name == self.collection:
246
- # collection.status == models.CollectionStatus.GREEN
247
- return True
248
- return False
770
+ """Check if the collection exists."""
771
+ return self.client.collection_exists(collection_name=self.collection)
772
+
773
+ async def async_exists(self) -> bool:
774
+ """Check if the collection exists asynchronously."""
775
+ return await self.async_client.collection_exists(collection_name=self.collection)
249
776
 
250
777
  def get_count(self) -> int:
251
778
  count_result: models.CountResult = self.client.count(collection_name=self.collection, exact=True)
252
779
  return count_result.count
253
780
 
254
- def optimize(self) -> None:
255
- pass
781
+ def point_exists(self, id: str) -> bool:
782
+ """Check if a point with the given ID exists in the collection."""
783
+ try:
784
+ log_info(f"Checking if point with ID '{id}' (type: {type(id)}) exists in collection '{self.collection}'")
785
+ points = self.client.retrieve(
786
+ collection_name=self.collection, ids=[id], with_payload=False, with_vectors=False
787
+ )
788
+ log_info(f"Retrieved {len(points)} points for ID '{id}'")
789
+ if len(points) > 0:
790
+ log_info(f"Found point with ID: {points[0].id} (type: {type(points[0].id)})")
791
+ return len(points) > 0
792
+ except Exception as e:
793
+ log_info(f"Error checking if point {id} exists: {e}")
794
+ return False
256
795
 
257
796
  def delete(self) -> bool:
258
- return False
797
+ return self.client.delete_collection(collection_name=self.collection)
798
+
799
+ def delete_by_id(self, id: str) -> bool:
800
+ try:
801
+ # Check if point exists before deletion
802
+ if not self.point_exists(id):
803
+ log_warning(f"Point with ID {id} does not exist")
804
+ return True
805
+
806
+ self.client.delete(
807
+ collection_name=self.collection,
808
+ points_selector=models.PointIdsList(points=[id]),
809
+ wait=True, # Wait for the operation to complete
810
+ )
811
+ return True
812
+
813
+ except Exception as e:
814
+ log_info(f"Error deleting point with ID {id}: {e}")
815
+ return False
816
+
817
+ def delete_by_name(self, name: str) -> bool:
818
+ """Delete all points that have the specified name in their payload (precise match)."""
819
+ try:
820
+ log_info(f"Attempting to delete all points with name: {name}")
821
+
822
+ # Create a filter to find all points with the specified name (precise match)
823
+ filter_condition = models.Filter(
824
+ must=[models.FieldCondition(key="name", match=models.MatchValue(value=name))]
825
+ )
826
+
827
+ # First, count how many points will be deleted
828
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
829
+
830
+ if count_result.count == 0:
831
+ log_warning(f"No points found with name: {name}")
832
+ return True
833
+
834
+ log_info(f"Found {count_result.count} points to delete with name: {name}")
835
+
836
+ # Delete all points matching the filter
837
+ result = self.client.delete(
838
+ collection_name=self.collection,
839
+ points_selector=filter_condition,
840
+ wait=True, # Wait for the operation to complete
841
+ )
842
+
843
+ # Check if the deletion was successful
844
+ if result.status == models.UpdateStatus.COMPLETED:
845
+ log_info(f"Successfully deleted {count_result.count} points with name: {name}")
846
+ return True
847
+ else:
848
+ log_warning(f"Deletion failed for name {name}. Status: {result.status}")
849
+ return False
850
+
851
+ except Exception as e:
852
+ log_warning(f"Error deleting points with name {name}: {e}")
853
+ return False
854
+
855
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
856
+ """Delete all points where the given metadata is contained in the meta_data payload field."""
857
+ try:
858
+ log_info(f"Attempting to delete all points with metadata: {metadata}")
859
+
860
+ # Create filter conditions for each metadata key-value pair
861
+ filter_conditions = []
862
+ for key, value in metadata.items():
863
+ # Use the meta_data prefix since that's how metadata is stored in the payload
864
+ filter_conditions.append(
865
+ models.FieldCondition(key=f"meta_data.{key}", match=models.MatchValue(value=value))
866
+ )
867
+
868
+ # Create a filter that requires ALL metadata conditions to match
869
+ filter_condition = models.Filter(must=filter_conditions) # type: ignore
870
+
871
+ # First, count how many points will be deleted
872
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
873
+
874
+ if count_result.count == 0:
875
+ log_warning(f"No points found with metadata: {metadata}")
876
+ return True
877
+
878
+ log_info(f"Found {count_result.count} points to delete with metadata: {metadata}")
879
+
880
+ # Delete all points matching the filter
881
+ result = self.client.delete(
882
+ collection_name=self.collection,
883
+ points_selector=filter_condition,
884
+ wait=True, # Wait for the operation to complete
885
+ )
886
+
887
+ # Check if the deletion was successful
888
+ if result.status == models.UpdateStatus.COMPLETED:
889
+ log_info(f"Successfully deleted {count_result.count} points with metadata: {metadata}")
890
+ return True
891
+ else:
892
+ log_warning(f"Deletion failed for metadata {metadata}. Status: {result.status}")
893
+ return False
894
+
895
+ except Exception as e:
896
+ log_warning(f"Error deleting points with metadata {metadata}: {e}")
897
+ return False
898
+
899
+ def delete_by_content_id(self, content_id: str) -> bool:
900
+ """Delete all points that have the specified content_id in their payload."""
901
+ try:
902
+ log_info(f"Attempting to delete all points with content_id: {content_id}")
903
+
904
+ # Create a filter to find all points with the specified content_id
905
+ filter_condition = models.Filter(
906
+ must=[models.FieldCondition(key="content_id", match=models.MatchValue(value=content_id))]
907
+ )
908
+
909
+ # First, count how many points will be deleted
910
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
911
+
912
+ if count_result.count == 0:
913
+ log_warning(f"No points found with content_id: {content_id}")
914
+ return True
915
+
916
+ log_info(f"Found {count_result.count} points to delete with content_id: {content_id}")
917
+
918
+ # Delete all points matching the filter
919
+ result = self.client.delete(
920
+ collection_name=self.collection,
921
+ points_selector=filter_condition,
922
+ wait=True, # Wait for the operation to complete
923
+ )
924
+
925
+ # Check if the deletion was successful
926
+ if result.status == models.UpdateStatus.COMPLETED:
927
+ log_info(f"Successfully deleted {count_result.count} points with content_id: {content_id}")
928
+ return True
929
+ else:
930
+ log_warning(f"Deletion failed for content_id {content_id}. Status: {result.status}")
931
+ return False
932
+
933
+ except Exception as e:
934
+ log_warning(f"Error deleting points with content_id {content_id}: {e}")
935
+ return False
936
+
937
+ def id_exists(self, id: str) -> bool:
938
+ """Check if a point with the given ID exists in the collection.
939
+
940
+ Args:
941
+ id (str): The ID to check.
942
+
943
+ Returns:
944
+ bool: True if the point exists, False otherwise.
945
+ """
946
+ try:
947
+ points = self.client.retrieve(
948
+ collection_name=self.collection, ids=[id], with_payload=False, with_vectors=False
949
+ )
950
+ return len(points) > 0
951
+ except Exception as e:
952
+ log_info(f"Error checking if point {id} exists: {e}")
953
+ return False
954
+
955
+ def content_hash_exists(self, content_hash: str) -> bool:
956
+ """Check if any points with the given content hash exist in the collection.
957
+
958
+ Args:
959
+ content_hash (str): The content hash to check.
960
+
961
+ Returns:
962
+ bool: True if points with the content hash exist, False otherwise.
963
+ """
964
+ try:
965
+ # Create a filter to find points with the specified content_hash
966
+ filter_condition = models.Filter(
967
+ must=[models.FieldCondition(key="content_hash", match=models.MatchValue(value=content_hash))]
968
+ )
969
+
970
+ # Count how many points match the filter
971
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
972
+ return count_result.count > 0
973
+ except Exception as e:
974
+ log_info(f"Error checking if content_hash {content_hash} exists: {e}")
975
+ return False
976
+
977
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
978
+ """Delete all points that have the specified content_hash in their payload.
979
+
980
+ Args:
981
+ content_hash (str): The content hash to delete.
982
+
983
+ Returns:
984
+ bool: True if points were deleted successfully, False otherwise.
985
+ """
986
+ try:
987
+ log_info(f"Attempting to delete all points with content_hash: {content_hash}")
988
+
989
+ # Create a filter to find all points with the specified content_hash
990
+ filter_condition = models.Filter(
991
+ must=[models.FieldCondition(key="content_hash", match=models.MatchValue(value=content_hash))]
992
+ )
993
+
994
+ # First, count how many points will be deleted
995
+ count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
996
+
997
+ if count_result.count == 0:
998
+ log_warning(f"No points found with content_hash: {content_hash}")
999
+ return True
1000
+
1001
+ log_info(f"Found {count_result.count} points to delete with content_hash: {content_hash}")
1002
+
1003
+ # Delete all points matching the filter
1004
+ result = self.client.delete(
1005
+ collection_name=self.collection,
1006
+ points_selector=filter_condition,
1007
+ wait=True, # Wait for the operation to complete
1008
+ )
1009
+
1010
+ # Check if the deletion was successful
1011
+ if result.status == models.UpdateStatus.COMPLETED:
1012
+ log_info(f"Successfully deleted {count_result.count} points with content_hash: {content_hash}")
1013
+ return True
1014
+ else:
1015
+ log_warning(f"Deletion failed for content_hash {content_hash}. Status: {result.status}")
1016
+ return False
1017
+
1018
+ except Exception as e:
1019
+ log_warning(f"Error deleting points with content_hash {content_hash}: {e}")
1020
+ return False
1021
+
1022
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
1023
+ """
1024
+ Update the metadata for documents with the given content_id.
1025
+
1026
+ Args:
1027
+ content_id (str): The content ID to update
1028
+ metadata (Dict[str, Any]): The metadata to update
1029
+ """
1030
+ try:
1031
+ if not self.client:
1032
+ log_error("Client not initialized")
1033
+ return
1034
+
1035
+ # Create filter for content_id
1036
+ filter_condition = models.Filter(
1037
+ must=[models.FieldCondition(key="content_id", match=models.MatchValue(value=content_id))]
1038
+ )
1039
+
1040
+ # Search for points with the given content_id
1041
+ search_result = self.client.scroll(
1042
+ collection_name=self.collection,
1043
+ scroll_filter=filter_condition,
1044
+ limit=10000, # Get all matching points
1045
+ with_payload=True,
1046
+ with_vectors=False,
1047
+ )
1048
+
1049
+ if not search_result[0]: # search_result is a tuple (points, next_page_offset)
1050
+ log_error(f"No documents found with content_id: {content_id}")
1051
+ return
1052
+
1053
+ points = search_result[0]
1054
+ update_operations = []
1055
+
1056
+ # Prepare update operations for each point
1057
+ for point in points:
1058
+ point_id = point.id
1059
+ current_payload = point.payload or {}
1060
+
1061
+ # Merge existing metadata with new metadata
1062
+ updated_payload = current_payload.copy()
1063
+ updated_payload.update(metadata)
1064
+
1065
+ if "filters" not in updated_payload:
1066
+ updated_payload["filters"] = {}
1067
+ if isinstance(updated_payload["filters"], dict):
1068
+ updated_payload["filters"].update(metadata)
1069
+ else:
1070
+ updated_payload["filters"] = metadata
1071
+
1072
+ # Create set payload operation
1073
+ update_operations.append(models.SetPayload(payload=updated_payload, points=[point_id]))
1074
+
1075
+ # Execute all updates
1076
+ for operation in update_operations:
1077
+ self.client.set_payload(
1078
+ collection_name=self.collection, payload=operation.payload, points=operation.points
1079
+ )
1080
+
1081
+ log_debug(f"Updated metadata for {len(update_operations)} documents with content_id: {content_id}")
1082
+
1083
+ except Exception as e:
1084
+ log_error(f"Error updating metadata for content_id '{content_id}': {e}")
1085
+ raise
1086
+
1087
+ def close(self) -> None:
1088
+ """Close the Qdrant client connections."""
1089
+ if self._client is not None:
1090
+ try:
1091
+ self._client.close()
1092
+ log_debug("Qdrant client closed successfully")
1093
+ except Exception as e:
1094
+ log_debug(f"Error closing Qdrant client: {e}")
1095
+ finally:
1096
+ self._client = None
1097
+
1098
+ async def async_close(self) -> None:
1099
+ """Close the Qdrant client connections asynchronously."""
1100
+ if self._async_client is not None:
1101
+ try:
1102
+ await self._async_client.close()
1103
+ log_debug("Async Qdrant client closed successfully")
1104
+ except Exception as e:
1105
+ log_debug(f"Error closing async Qdrant client: {e}")
1106
+ finally:
1107
+ self._async_client = None
1108
+
1109
+ def get_supported_search_types(self) -> List[str]:
1110
+ """Get the supported search types for this vector database."""
1111
+ return [SearchType.vector, SearchType.keyword, SearchType.hybrid]