agno 2.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (575) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +51 -0
  3. agno/agent/agent.py +10405 -0
  4. agno/api/__init__.py +0 -0
  5. agno/api/agent.py +28 -0
  6. agno/api/api.py +40 -0
  7. agno/api/evals.py +22 -0
  8. agno/api/os.py +17 -0
  9. agno/api/routes.py +13 -0
  10. agno/api/schemas/__init__.py +9 -0
  11. agno/api/schemas/agent.py +16 -0
  12. agno/api/schemas/evals.py +16 -0
  13. agno/api/schemas/os.py +14 -0
  14. agno/api/schemas/response.py +6 -0
  15. agno/api/schemas/team.py +16 -0
  16. agno/api/schemas/utils.py +21 -0
  17. agno/api/schemas/workflows.py +16 -0
  18. agno/api/settings.py +53 -0
  19. agno/api/team.py +30 -0
  20. agno/api/workflow.py +28 -0
  21. agno/cloud/aws/base.py +214 -0
  22. agno/cloud/aws/s3/__init__.py +2 -0
  23. agno/cloud/aws/s3/api_client.py +43 -0
  24. agno/cloud/aws/s3/bucket.py +195 -0
  25. agno/cloud/aws/s3/object.py +57 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +598 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2042 -0
  33. agno/db/dynamo/schemas.py +314 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +1795 -0
  37. agno/db/firestore/schemas.py +140 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1335 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1160 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1328 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/__init__.py +0 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/mongo/__init__.py +17 -0
  51. agno/db/mongo/async_mongo.py +2026 -0
  52. agno/db/mongo/mongo.py +1982 -0
  53. agno/db/mongo/schemas.py +87 -0
  54. agno/db/mongo/utils.py +259 -0
  55. agno/db/mysql/__init__.py +3 -0
  56. agno/db/mysql/mysql.py +2308 -0
  57. agno/db/mysql/schemas.py +138 -0
  58. agno/db/mysql/utils.py +355 -0
  59. agno/db/postgres/__init__.py +4 -0
  60. agno/db/postgres/async_postgres.py +1927 -0
  61. agno/db/postgres/postgres.py +2260 -0
  62. agno/db/postgres/schemas.py +139 -0
  63. agno/db/postgres/utils.py +442 -0
  64. agno/db/redis/__init__.py +3 -0
  65. agno/db/redis/redis.py +1660 -0
  66. agno/db/redis/schemas.py +123 -0
  67. agno/db/redis/utils.py +346 -0
  68. agno/db/schemas/__init__.py +4 -0
  69. agno/db/schemas/culture.py +120 -0
  70. agno/db/schemas/evals.py +33 -0
  71. agno/db/schemas/knowledge.py +40 -0
  72. agno/db/schemas/memory.py +46 -0
  73. agno/db/schemas/metrics.py +0 -0
  74. agno/db/singlestore/__init__.py +3 -0
  75. agno/db/singlestore/schemas.py +130 -0
  76. agno/db/singlestore/singlestore.py +2272 -0
  77. agno/db/singlestore/utils.py +384 -0
  78. agno/db/sqlite/__init__.py +4 -0
  79. agno/db/sqlite/async_sqlite.py +2293 -0
  80. agno/db/sqlite/schemas.py +133 -0
  81. agno/db/sqlite/sqlite.py +2288 -0
  82. agno/db/sqlite/utils.py +431 -0
  83. agno/db/surrealdb/__init__.py +3 -0
  84. agno/db/surrealdb/metrics.py +292 -0
  85. agno/db/surrealdb/models.py +309 -0
  86. agno/db/surrealdb/queries.py +71 -0
  87. agno/db/surrealdb/surrealdb.py +1353 -0
  88. agno/db/surrealdb/utils.py +147 -0
  89. agno/db/utils.py +116 -0
  90. agno/debug.py +18 -0
  91. agno/eval/__init__.py +14 -0
  92. agno/eval/accuracy.py +834 -0
  93. agno/eval/performance.py +773 -0
  94. agno/eval/reliability.py +306 -0
  95. agno/eval/utils.py +119 -0
  96. agno/exceptions.py +161 -0
  97. agno/filters.py +354 -0
  98. agno/guardrails/__init__.py +6 -0
  99. agno/guardrails/base.py +19 -0
  100. agno/guardrails/openai.py +144 -0
  101. agno/guardrails/pii.py +94 -0
  102. agno/guardrails/prompt_injection.py +52 -0
  103. agno/integrations/__init__.py +0 -0
  104. agno/integrations/discord/__init__.py +3 -0
  105. agno/integrations/discord/client.py +203 -0
  106. agno/knowledge/__init__.py +5 -0
  107. agno/knowledge/chunking/__init__.py +0 -0
  108. agno/knowledge/chunking/agentic.py +79 -0
  109. agno/knowledge/chunking/document.py +91 -0
  110. agno/knowledge/chunking/fixed.py +57 -0
  111. agno/knowledge/chunking/markdown.py +151 -0
  112. agno/knowledge/chunking/recursive.py +63 -0
  113. agno/knowledge/chunking/row.py +39 -0
  114. agno/knowledge/chunking/semantic.py +86 -0
  115. agno/knowledge/chunking/strategy.py +165 -0
  116. agno/knowledge/content.py +74 -0
  117. agno/knowledge/document/__init__.py +5 -0
  118. agno/knowledge/document/base.py +58 -0
  119. agno/knowledge/embedder/__init__.py +5 -0
  120. agno/knowledge/embedder/aws_bedrock.py +343 -0
  121. agno/knowledge/embedder/azure_openai.py +210 -0
  122. agno/knowledge/embedder/base.py +23 -0
  123. agno/knowledge/embedder/cohere.py +323 -0
  124. agno/knowledge/embedder/fastembed.py +62 -0
  125. agno/knowledge/embedder/fireworks.py +13 -0
  126. agno/knowledge/embedder/google.py +258 -0
  127. agno/knowledge/embedder/huggingface.py +94 -0
  128. agno/knowledge/embedder/jina.py +182 -0
  129. agno/knowledge/embedder/langdb.py +22 -0
  130. agno/knowledge/embedder/mistral.py +206 -0
  131. agno/knowledge/embedder/nebius.py +13 -0
  132. agno/knowledge/embedder/ollama.py +154 -0
  133. agno/knowledge/embedder/openai.py +195 -0
  134. agno/knowledge/embedder/sentence_transformer.py +63 -0
  135. agno/knowledge/embedder/together.py +13 -0
  136. agno/knowledge/embedder/vllm.py +262 -0
  137. agno/knowledge/embedder/voyageai.py +165 -0
  138. agno/knowledge/knowledge.py +1988 -0
  139. agno/knowledge/reader/__init__.py +7 -0
  140. agno/knowledge/reader/arxiv_reader.py +81 -0
  141. agno/knowledge/reader/base.py +95 -0
  142. agno/knowledge/reader/csv_reader.py +166 -0
  143. agno/knowledge/reader/docx_reader.py +82 -0
  144. agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
  145. agno/knowledge/reader/firecrawl_reader.py +201 -0
  146. agno/knowledge/reader/json_reader.py +87 -0
  147. agno/knowledge/reader/markdown_reader.py +137 -0
  148. agno/knowledge/reader/pdf_reader.py +431 -0
  149. agno/knowledge/reader/pptx_reader.py +101 -0
  150. agno/knowledge/reader/reader_factory.py +313 -0
  151. agno/knowledge/reader/s3_reader.py +89 -0
  152. agno/knowledge/reader/tavily_reader.py +194 -0
  153. agno/knowledge/reader/text_reader.py +115 -0
  154. agno/knowledge/reader/web_search_reader.py +372 -0
  155. agno/knowledge/reader/website_reader.py +455 -0
  156. agno/knowledge/reader/wikipedia_reader.py +59 -0
  157. agno/knowledge/reader/youtube_reader.py +78 -0
  158. agno/knowledge/remote_content/__init__.py +0 -0
  159. agno/knowledge/remote_content/remote_content.py +88 -0
  160. agno/knowledge/reranker/__init__.py +3 -0
  161. agno/knowledge/reranker/base.py +14 -0
  162. agno/knowledge/reranker/cohere.py +64 -0
  163. agno/knowledge/reranker/infinity.py +195 -0
  164. agno/knowledge/reranker/sentence_transformer.py +54 -0
  165. agno/knowledge/types.py +39 -0
  166. agno/knowledge/utils.py +189 -0
  167. agno/media.py +462 -0
  168. agno/memory/__init__.py +3 -0
  169. agno/memory/manager.py +1327 -0
  170. agno/models/__init__.py +0 -0
  171. agno/models/aimlapi/__init__.py +5 -0
  172. agno/models/aimlapi/aimlapi.py +45 -0
  173. agno/models/anthropic/__init__.py +5 -0
  174. agno/models/anthropic/claude.py +757 -0
  175. agno/models/aws/__init__.py +15 -0
  176. agno/models/aws/bedrock.py +701 -0
  177. agno/models/aws/claude.py +378 -0
  178. agno/models/azure/__init__.py +18 -0
  179. agno/models/azure/ai_foundry.py +485 -0
  180. agno/models/azure/openai_chat.py +131 -0
  181. agno/models/base.py +2175 -0
  182. agno/models/cerebras/__init__.py +12 -0
  183. agno/models/cerebras/cerebras.py +501 -0
  184. agno/models/cerebras/cerebras_openai.py +112 -0
  185. agno/models/cohere/__init__.py +5 -0
  186. agno/models/cohere/chat.py +389 -0
  187. agno/models/cometapi/__init__.py +5 -0
  188. agno/models/cometapi/cometapi.py +57 -0
  189. agno/models/dashscope/__init__.py +5 -0
  190. agno/models/dashscope/dashscope.py +91 -0
  191. agno/models/deepinfra/__init__.py +5 -0
  192. agno/models/deepinfra/deepinfra.py +28 -0
  193. agno/models/deepseek/__init__.py +5 -0
  194. agno/models/deepseek/deepseek.py +61 -0
  195. agno/models/defaults.py +1 -0
  196. agno/models/fireworks/__init__.py +5 -0
  197. agno/models/fireworks/fireworks.py +26 -0
  198. agno/models/google/__init__.py +5 -0
  199. agno/models/google/gemini.py +1085 -0
  200. agno/models/groq/__init__.py +5 -0
  201. agno/models/groq/groq.py +556 -0
  202. agno/models/huggingface/__init__.py +5 -0
  203. agno/models/huggingface/huggingface.py +491 -0
  204. agno/models/ibm/__init__.py +5 -0
  205. agno/models/ibm/watsonx.py +422 -0
  206. agno/models/internlm/__init__.py +3 -0
  207. agno/models/internlm/internlm.py +26 -0
  208. agno/models/langdb/__init__.py +1 -0
  209. agno/models/langdb/langdb.py +48 -0
  210. agno/models/litellm/__init__.py +14 -0
  211. agno/models/litellm/chat.py +468 -0
  212. agno/models/litellm/litellm_openai.py +25 -0
  213. agno/models/llama_cpp/__init__.py +5 -0
  214. agno/models/llama_cpp/llama_cpp.py +22 -0
  215. agno/models/lmstudio/__init__.py +5 -0
  216. agno/models/lmstudio/lmstudio.py +25 -0
  217. agno/models/message.py +434 -0
  218. agno/models/meta/__init__.py +12 -0
  219. agno/models/meta/llama.py +475 -0
  220. agno/models/meta/llama_openai.py +78 -0
  221. agno/models/metrics.py +120 -0
  222. agno/models/mistral/__init__.py +5 -0
  223. agno/models/mistral/mistral.py +432 -0
  224. agno/models/nebius/__init__.py +3 -0
  225. agno/models/nebius/nebius.py +54 -0
  226. agno/models/nexus/__init__.py +3 -0
  227. agno/models/nexus/nexus.py +22 -0
  228. agno/models/nvidia/__init__.py +5 -0
  229. agno/models/nvidia/nvidia.py +28 -0
  230. agno/models/ollama/__init__.py +5 -0
  231. agno/models/ollama/chat.py +441 -0
  232. agno/models/openai/__init__.py +9 -0
  233. agno/models/openai/chat.py +883 -0
  234. agno/models/openai/like.py +27 -0
  235. agno/models/openai/responses.py +1050 -0
  236. agno/models/openrouter/__init__.py +5 -0
  237. agno/models/openrouter/openrouter.py +66 -0
  238. agno/models/perplexity/__init__.py +5 -0
  239. agno/models/perplexity/perplexity.py +187 -0
  240. agno/models/portkey/__init__.py +3 -0
  241. agno/models/portkey/portkey.py +81 -0
  242. agno/models/requesty/__init__.py +5 -0
  243. agno/models/requesty/requesty.py +52 -0
  244. agno/models/response.py +199 -0
  245. agno/models/sambanova/__init__.py +5 -0
  246. agno/models/sambanova/sambanova.py +28 -0
  247. agno/models/siliconflow/__init__.py +5 -0
  248. agno/models/siliconflow/siliconflow.py +25 -0
  249. agno/models/together/__init__.py +5 -0
  250. agno/models/together/together.py +25 -0
  251. agno/models/utils.py +266 -0
  252. agno/models/vercel/__init__.py +3 -0
  253. agno/models/vercel/v0.py +26 -0
  254. agno/models/vertexai/__init__.py +0 -0
  255. agno/models/vertexai/claude.py +70 -0
  256. agno/models/vllm/__init__.py +3 -0
  257. agno/models/vllm/vllm.py +78 -0
  258. agno/models/xai/__init__.py +3 -0
  259. agno/models/xai/xai.py +113 -0
  260. agno/os/__init__.py +3 -0
  261. agno/os/app.py +876 -0
  262. agno/os/auth.py +57 -0
  263. agno/os/config.py +104 -0
  264. agno/os/interfaces/__init__.py +1 -0
  265. agno/os/interfaces/a2a/__init__.py +3 -0
  266. agno/os/interfaces/a2a/a2a.py +42 -0
  267. agno/os/interfaces/a2a/router.py +250 -0
  268. agno/os/interfaces/a2a/utils.py +924 -0
  269. agno/os/interfaces/agui/__init__.py +3 -0
  270. agno/os/interfaces/agui/agui.py +47 -0
  271. agno/os/interfaces/agui/router.py +144 -0
  272. agno/os/interfaces/agui/utils.py +534 -0
  273. agno/os/interfaces/base.py +25 -0
  274. agno/os/interfaces/slack/__init__.py +3 -0
  275. agno/os/interfaces/slack/router.py +148 -0
  276. agno/os/interfaces/slack/security.py +30 -0
  277. agno/os/interfaces/slack/slack.py +47 -0
  278. agno/os/interfaces/whatsapp/__init__.py +3 -0
  279. agno/os/interfaces/whatsapp/router.py +211 -0
  280. agno/os/interfaces/whatsapp/security.py +53 -0
  281. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  282. agno/os/mcp.py +292 -0
  283. agno/os/middleware/__init__.py +7 -0
  284. agno/os/middleware/jwt.py +233 -0
  285. agno/os/router.py +1763 -0
  286. agno/os/routers/__init__.py +3 -0
  287. agno/os/routers/evals/__init__.py +3 -0
  288. agno/os/routers/evals/evals.py +430 -0
  289. agno/os/routers/evals/schemas.py +142 -0
  290. agno/os/routers/evals/utils.py +162 -0
  291. agno/os/routers/health.py +31 -0
  292. agno/os/routers/home.py +52 -0
  293. agno/os/routers/knowledge/__init__.py +3 -0
  294. agno/os/routers/knowledge/knowledge.py +997 -0
  295. agno/os/routers/knowledge/schemas.py +178 -0
  296. agno/os/routers/memory/__init__.py +3 -0
  297. agno/os/routers/memory/memory.py +515 -0
  298. agno/os/routers/memory/schemas.py +62 -0
  299. agno/os/routers/metrics/__init__.py +3 -0
  300. agno/os/routers/metrics/metrics.py +190 -0
  301. agno/os/routers/metrics/schemas.py +47 -0
  302. agno/os/routers/session/__init__.py +3 -0
  303. agno/os/routers/session/session.py +997 -0
  304. agno/os/schema.py +1055 -0
  305. agno/os/settings.py +43 -0
  306. agno/os/utils.py +630 -0
  307. agno/py.typed +0 -0
  308. agno/reasoning/__init__.py +0 -0
  309. agno/reasoning/anthropic.py +80 -0
  310. agno/reasoning/azure_ai_foundry.py +67 -0
  311. agno/reasoning/deepseek.py +63 -0
  312. agno/reasoning/default.py +97 -0
  313. agno/reasoning/gemini.py +73 -0
  314. agno/reasoning/groq.py +71 -0
  315. agno/reasoning/helpers.py +63 -0
  316. agno/reasoning/ollama.py +67 -0
  317. agno/reasoning/openai.py +86 -0
  318. agno/reasoning/step.py +31 -0
  319. agno/reasoning/vertexai.py +76 -0
  320. agno/run/__init__.py +6 -0
  321. agno/run/agent.py +787 -0
  322. agno/run/base.py +229 -0
  323. agno/run/cancel.py +81 -0
  324. agno/run/messages.py +32 -0
  325. agno/run/team.py +753 -0
  326. agno/run/workflow.py +708 -0
  327. agno/session/__init__.py +10 -0
  328. agno/session/agent.py +295 -0
  329. agno/session/summary.py +265 -0
  330. agno/session/team.py +392 -0
  331. agno/session/workflow.py +205 -0
  332. agno/team/__init__.py +37 -0
  333. agno/team/team.py +8793 -0
  334. agno/tools/__init__.py +10 -0
  335. agno/tools/agentql.py +120 -0
  336. agno/tools/airflow.py +69 -0
  337. agno/tools/api.py +122 -0
  338. agno/tools/apify.py +314 -0
  339. agno/tools/arxiv.py +127 -0
  340. agno/tools/aws_lambda.py +53 -0
  341. agno/tools/aws_ses.py +66 -0
  342. agno/tools/baidusearch.py +89 -0
  343. agno/tools/bitbucket.py +292 -0
  344. agno/tools/brandfetch.py +213 -0
  345. agno/tools/bravesearch.py +106 -0
  346. agno/tools/brightdata.py +367 -0
  347. agno/tools/browserbase.py +209 -0
  348. agno/tools/calcom.py +255 -0
  349. agno/tools/calculator.py +151 -0
  350. agno/tools/cartesia.py +187 -0
  351. agno/tools/clickup.py +244 -0
  352. agno/tools/confluence.py +240 -0
  353. agno/tools/crawl4ai.py +158 -0
  354. agno/tools/csv_toolkit.py +185 -0
  355. agno/tools/dalle.py +110 -0
  356. agno/tools/daytona.py +475 -0
  357. agno/tools/decorator.py +262 -0
  358. agno/tools/desi_vocal.py +108 -0
  359. agno/tools/discord.py +161 -0
  360. agno/tools/docker.py +716 -0
  361. agno/tools/duckdb.py +379 -0
  362. agno/tools/duckduckgo.py +91 -0
  363. agno/tools/e2b.py +703 -0
  364. agno/tools/eleven_labs.py +196 -0
  365. agno/tools/email.py +67 -0
  366. agno/tools/evm.py +129 -0
  367. agno/tools/exa.py +396 -0
  368. agno/tools/fal.py +127 -0
  369. agno/tools/file.py +240 -0
  370. agno/tools/file_generation.py +350 -0
  371. agno/tools/financial_datasets.py +288 -0
  372. agno/tools/firecrawl.py +143 -0
  373. agno/tools/function.py +1187 -0
  374. agno/tools/giphy.py +93 -0
  375. agno/tools/github.py +1760 -0
  376. agno/tools/gmail.py +922 -0
  377. agno/tools/google_bigquery.py +117 -0
  378. agno/tools/google_drive.py +270 -0
  379. agno/tools/google_maps.py +253 -0
  380. agno/tools/googlecalendar.py +674 -0
  381. agno/tools/googlesearch.py +98 -0
  382. agno/tools/googlesheets.py +377 -0
  383. agno/tools/hackernews.py +77 -0
  384. agno/tools/jina.py +101 -0
  385. agno/tools/jira.py +170 -0
  386. agno/tools/knowledge.py +218 -0
  387. agno/tools/linear.py +426 -0
  388. agno/tools/linkup.py +58 -0
  389. agno/tools/local_file_system.py +90 -0
  390. agno/tools/lumalab.py +183 -0
  391. agno/tools/mcp/__init__.py +10 -0
  392. agno/tools/mcp/mcp.py +331 -0
  393. agno/tools/mcp/multi_mcp.py +347 -0
  394. agno/tools/mcp/params.py +24 -0
  395. agno/tools/mcp_toolbox.py +284 -0
  396. agno/tools/mem0.py +193 -0
  397. agno/tools/memori.py +339 -0
  398. agno/tools/memory.py +419 -0
  399. agno/tools/mlx_transcribe.py +139 -0
  400. agno/tools/models/__init__.py +0 -0
  401. agno/tools/models/azure_openai.py +190 -0
  402. agno/tools/models/gemini.py +203 -0
  403. agno/tools/models/groq.py +158 -0
  404. agno/tools/models/morph.py +186 -0
  405. agno/tools/models/nebius.py +124 -0
  406. agno/tools/models_labs.py +195 -0
  407. agno/tools/moviepy_video.py +349 -0
  408. agno/tools/neo4j.py +134 -0
  409. agno/tools/newspaper.py +46 -0
  410. agno/tools/newspaper4k.py +93 -0
  411. agno/tools/notion.py +204 -0
  412. agno/tools/openai.py +202 -0
  413. agno/tools/openbb.py +160 -0
  414. agno/tools/opencv.py +321 -0
  415. agno/tools/openweather.py +233 -0
  416. agno/tools/oxylabs.py +385 -0
  417. agno/tools/pandas.py +102 -0
  418. agno/tools/parallel.py +314 -0
  419. agno/tools/postgres.py +257 -0
  420. agno/tools/pubmed.py +188 -0
  421. agno/tools/python.py +205 -0
  422. agno/tools/reasoning.py +283 -0
  423. agno/tools/reddit.py +467 -0
  424. agno/tools/replicate.py +117 -0
  425. agno/tools/resend.py +62 -0
  426. agno/tools/scrapegraph.py +222 -0
  427. agno/tools/searxng.py +152 -0
  428. agno/tools/serpapi.py +116 -0
  429. agno/tools/serper.py +255 -0
  430. agno/tools/shell.py +53 -0
  431. agno/tools/slack.py +136 -0
  432. agno/tools/sleep.py +20 -0
  433. agno/tools/spider.py +116 -0
  434. agno/tools/sql.py +154 -0
  435. agno/tools/streamlit/__init__.py +0 -0
  436. agno/tools/streamlit/components.py +113 -0
  437. agno/tools/tavily.py +254 -0
  438. agno/tools/telegram.py +48 -0
  439. agno/tools/todoist.py +218 -0
  440. agno/tools/tool_registry.py +1 -0
  441. agno/tools/toolkit.py +146 -0
  442. agno/tools/trafilatura.py +388 -0
  443. agno/tools/trello.py +274 -0
  444. agno/tools/twilio.py +186 -0
  445. agno/tools/user_control_flow.py +78 -0
  446. agno/tools/valyu.py +228 -0
  447. agno/tools/visualization.py +467 -0
  448. agno/tools/webbrowser.py +28 -0
  449. agno/tools/webex.py +76 -0
  450. agno/tools/website.py +54 -0
  451. agno/tools/webtools.py +45 -0
  452. agno/tools/whatsapp.py +286 -0
  453. agno/tools/wikipedia.py +63 -0
  454. agno/tools/workflow.py +278 -0
  455. agno/tools/x.py +335 -0
  456. agno/tools/yfinance.py +257 -0
  457. agno/tools/youtube.py +184 -0
  458. agno/tools/zendesk.py +82 -0
  459. agno/tools/zep.py +454 -0
  460. agno/tools/zoom.py +382 -0
  461. agno/utils/__init__.py +0 -0
  462. agno/utils/agent.py +820 -0
  463. agno/utils/audio.py +49 -0
  464. agno/utils/certs.py +27 -0
  465. agno/utils/code_execution.py +11 -0
  466. agno/utils/common.py +132 -0
  467. agno/utils/dttm.py +13 -0
  468. agno/utils/enum.py +22 -0
  469. agno/utils/env.py +11 -0
  470. agno/utils/events.py +696 -0
  471. agno/utils/format_str.py +16 -0
  472. agno/utils/functions.py +166 -0
  473. agno/utils/gemini.py +426 -0
  474. agno/utils/hooks.py +57 -0
  475. agno/utils/http.py +74 -0
  476. agno/utils/json_schema.py +234 -0
  477. agno/utils/knowledge.py +36 -0
  478. agno/utils/location.py +19 -0
  479. agno/utils/log.py +255 -0
  480. agno/utils/mcp.py +214 -0
  481. agno/utils/media.py +352 -0
  482. agno/utils/merge_dict.py +41 -0
  483. agno/utils/message.py +118 -0
  484. agno/utils/models/__init__.py +0 -0
  485. agno/utils/models/ai_foundry.py +43 -0
  486. agno/utils/models/claude.py +358 -0
  487. agno/utils/models/cohere.py +87 -0
  488. agno/utils/models/llama.py +78 -0
  489. agno/utils/models/mistral.py +98 -0
  490. agno/utils/models/openai_responses.py +140 -0
  491. agno/utils/models/schema_utils.py +153 -0
  492. agno/utils/models/watsonx.py +41 -0
  493. agno/utils/openai.py +257 -0
  494. agno/utils/pickle.py +32 -0
  495. agno/utils/pprint.py +178 -0
  496. agno/utils/print_response/__init__.py +0 -0
  497. agno/utils/print_response/agent.py +842 -0
  498. agno/utils/print_response/team.py +1724 -0
  499. agno/utils/print_response/workflow.py +1668 -0
  500. agno/utils/prompts.py +111 -0
  501. agno/utils/reasoning.py +108 -0
  502. agno/utils/response.py +163 -0
  503. agno/utils/response_iterator.py +17 -0
  504. agno/utils/safe_formatter.py +24 -0
  505. agno/utils/serialize.py +32 -0
  506. agno/utils/shell.py +22 -0
  507. agno/utils/streamlit.py +487 -0
  508. agno/utils/string.py +231 -0
  509. agno/utils/team.py +139 -0
  510. agno/utils/timer.py +41 -0
  511. agno/utils/tools.py +102 -0
  512. agno/utils/web.py +23 -0
  513. agno/utils/whatsapp.py +305 -0
  514. agno/utils/yaml_io.py +25 -0
  515. agno/vectordb/__init__.py +3 -0
  516. agno/vectordb/base.py +127 -0
  517. agno/vectordb/cassandra/__init__.py +5 -0
  518. agno/vectordb/cassandra/cassandra.py +501 -0
  519. agno/vectordb/cassandra/extra_param_mixin.py +11 -0
  520. agno/vectordb/cassandra/index.py +13 -0
  521. agno/vectordb/chroma/__init__.py +5 -0
  522. agno/vectordb/chroma/chromadb.py +929 -0
  523. agno/vectordb/clickhouse/__init__.py +9 -0
  524. agno/vectordb/clickhouse/clickhousedb.py +835 -0
  525. agno/vectordb/clickhouse/index.py +9 -0
  526. agno/vectordb/couchbase/__init__.py +3 -0
  527. agno/vectordb/couchbase/couchbase.py +1442 -0
  528. agno/vectordb/distance.py +7 -0
  529. agno/vectordb/lancedb/__init__.py +6 -0
  530. agno/vectordb/lancedb/lance_db.py +995 -0
  531. agno/vectordb/langchaindb/__init__.py +5 -0
  532. agno/vectordb/langchaindb/langchaindb.py +163 -0
  533. agno/vectordb/lightrag/__init__.py +5 -0
  534. agno/vectordb/lightrag/lightrag.py +388 -0
  535. agno/vectordb/llamaindex/__init__.py +3 -0
  536. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  537. agno/vectordb/milvus/__init__.py +4 -0
  538. agno/vectordb/milvus/milvus.py +1182 -0
  539. agno/vectordb/mongodb/__init__.py +9 -0
  540. agno/vectordb/mongodb/mongodb.py +1417 -0
  541. agno/vectordb/pgvector/__init__.py +12 -0
  542. agno/vectordb/pgvector/index.py +23 -0
  543. agno/vectordb/pgvector/pgvector.py +1462 -0
  544. agno/vectordb/pineconedb/__init__.py +5 -0
  545. agno/vectordb/pineconedb/pineconedb.py +747 -0
  546. agno/vectordb/qdrant/__init__.py +5 -0
  547. agno/vectordb/qdrant/qdrant.py +1134 -0
  548. agno/vectordb/redis/__init__.py +9 -0
  549. agno/vectordb/redis/redisdb.py +694 -0
  550. agno/vectordb/search.py +7 -0
  551. agno/vectordb/singlestore/__init__.py +10 -0
  552. agno/vectordb/singlestore/index.py +41 -0
  553. agno/vectordb/singlestore/singlestore.py +763 -0
  554. agno/vectordb/surrealdb/__init__.py +3 -0
  555. agno/vectordb/surrealdb/surrealdb.py +699 -0
  556. agno/vectordb/upstashdb/__init__.py +5 -0
  557. agno/vectordb/upstashdb/upstashdb.py +718 -0
  558. agno/vectordb/weaviate/__init__.py +8 -0
  559. agno/vectordb/weaviate/index.py +15 -0
  560. agno/vectordb/weaviate/weaviate.py +1005 -0
  561. agno/workflow/__init__.py +23 -0
  562. agno/workflow/agent.py +299 -0
  563. agno/workflow/condition.py +738 -0
  564. agno/workflow/loop.py +735 -0
  565. agno/workflow/parallel.py +824 -0
  566. agno/workflow/router.py +702 -0
  567. agno/workflow/step.py +1432 -0
  568. agno/workflow/steps.py +592 -0
  569. agno/workflow/types.py +520 -0
  570. agno/workflow/workflow.py +4321 -0
  571. agno-2.2.13.dist-info/METADATA +614 -0
  572. agno-2.2.13.dist-info/RECORD +575 -0
  573. agno-2.2.13.dist-info/WHEEL +5 -0
  574. agno-2.2.13.dist-info/licenses/LICENSE +201 -0
  575. agno-2.2.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1417 @@
1
+ import asyncio
2
+ import time
3
+ from typing import Any, Dict, List, Optional, Union
4
+
5
+ from bson import ObjectId
6
+
7
+ from agno.filters import FilterExpr
8
+ from agno.knowledge.document import Document
9
+ from agno.knowledge.embedder import Embedder
10
+ from agno.utils.log import log_debug, log_info, log_warning, logger
11
+ from agno.vectordb.base import VectorDb
12
+ from agno.vectordb.distance import Distance
13
+ from agno.vectordb.search import SearchType
14
+
15
+ try:
16
+ from hashlib import md5
17
+
18
+ except ImportError:
19
+ raise ImportError("`hashlib` not installed. Please install using `pip install hashlib`")
20
+ try:
21
+ from pymongo import AsyncMongoClient, MongoClient, errors
22
+ from pymongo.collection import Collection
23
+ from pymongo.operations import SearchIndexModel
24
+
25
+ except ImportError:
26
+ raise ImportError("`pymongo` not installed. Please install using `pip install pymongo`")
27
+
28
+
29
+ class MongoDb(VectorDb):
30
+ """
31
+ MongoDB Vector Database implementation with elegant handling of Atlas Search index creation.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ collection_name: str,
37
+ name: Optional[str] = None,
38
+ description: Optional[str] = None,
39
+ id: Optional[str] = None,
40
+ db_url: Optional[str] = "mongodb://localhost:27017/",
41
+ database: str = "agno",
42
+ embedder: Optional[Embedder] = None,
43
+ distance_metric: str = Distance.cosine,
44
+ overwrite: bool = False,
45
+ wait_until_index_ready_in_seconds: Optional[float] = 3,
46
+ wait_after_insert_in_seconds: Optional[float] = 3,
47
+ max_pool_size: int = 100,
48
+ retry_writes: bool = True,
49
+ client: Optional[MongoClient] = None,
50
+ search_index_name: Optional[str] = "vector_index_1",
51
+ cosmos_compatibility: Optional[bool] = False,
52
+ search_type: SearchType = SearchType.vector,
53
+ hybrid_vector_weight: float = 0.5,
54
+ hybrid_keyword_weight: float = 0.5,
55
+ hybrid_rank_constant: int = 60,
56
+ **kwargs,
57
+ ):
58
+ """
59
+ Initialize the MongoDb with MongoDB collection details.
60
+
61
+ Args:
62
+ collection_name (str): Name of the MongoDB collection.
63
+ name (Optional[str]): Name of the vector database.
64
+ description (Optional[str]): Description of the vector database.
65
+ db_url (Optional[str]): MongoDB connection string.
66
+ database (str): Database name.
67
+ embedder (Embedder): Embedder instance for generating embeddings.
68
+ distance_metric (str): Distance metric for similarity.
69
+ overwrite (bool): Overwrite existing collection and index if True.
70
+ wait_until_index_ready_in_seconds (float): Time in seconds to wait until the index is ready.
71
+ wait_after_insert_in_seconds (float): Time in seconds to wait after inserting documents.
72
+ max_pool_size (int): Maximum number of connections in the connection pool
73
+ retry_writes (bool): Whether to retry write operations
74
+ client (Optional[MongoClient]): An existing MongoClient instance.
75
+ search_index_name (str): Name of the search index (default: "vector_index_1")
76
+ cosmos_compatibility (bool): Whether to use Azure Cosmos DB Mongovcore compatibility mode.
77
+ search_type: The search type to use when searching for documents.
78
+ hybrid_vector_weight (float): Default weight for vector search results in hybrid search.
79
+ hybrid_keyword_weight (float): Default weight for keyword search results in hybrid search.
80
+ hybrid_rank_constant (int): Default rank constant (k) for Reciprocal Rank Fusion in hybrid search. This constant is added to the rank before taking the reciprocal, helping to smooth scores. A common value is 60.
81
+ **kwargs: Additional arguments for MongoClient.
82
+ """
83
+ # Validate required parameters
84
+ if not collection_name:
85
+ raise ValueError("Collection name must not be empty.")
86
+ if not database:
87
+ raise ValueError("Database name must not be empty.")
88
+
89
+ # Dynamic ID generation based on unique identifiers
90
+ if id is None:
91
+ from agno.utils.string import generate_id
92
+
93
+ connection_identifier = db_url or "mongodb://localhost:27017/"
94
+ seed = f"{connection_identifier}#{database}#{collection_name}"
95
+ id = generate_id(seed)
96
+
97
+ self.collection_name = collection_name
98
+ # Initialize base class with name, description, and generated ID
99
+ super().__init__(id=id, name=name, description=description)
100
+
101
+ self.database = database
102
+ self.search_index_name = search_index_name
103
+ self.cosmos_compatibility = cosmos_compatibility
104
+ self.search_type = search_type
105
+ self.hybrid_vector_weight = hybrid_vector_weight
106
+ self.hybrid_keyword_weight = hybrid_keyword_weight
107
+ self.hybrid_rank_constant = hybrid_rank_constant
108
+
109
+ if embedder is None:
110
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
111
+
112
+ embedder = OpenAIEmbedder()
113
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
114
+ self.embedder = embedder
115
+
116
+ self.distance_metric = distance_metric
117
+ self.connection_string = db_url
118
+ self.overwrite = overwrite
119
+ self.wait_until_index_ready_in_seconds = wait_until_index_ready_in_seconds
120
+ self.wait_after_insert_in_seconds = wait_after_insert_in_seconds
121
+ self.kwargs = kwargs
122
+ self.kwargs.update(
123
+ {
124
+ "maxPoolSize": max_pool_size,
125
+ "retryWrites": retry_writes,
126
+ "serverSelectionTimeoutMS": 5000, # 5 second timeout
127
+ }
128
+ )
129
+
130
+ self._client = client
131
+ self._db = None
132
+ self._collection: Optional[Collection] = None
133
+
134
+ self._async_client: Optional[AsyncMongoClient] = None
135
+ self._async_db = None
136
+ self._async_collection: Optional[Collection] = None
137
+
138
+ def _get_client(self) -> MongoClient:
139
+ """Create or retrieve the MongoDB client."""
140
+ if self._client is None:
141
+ if self.cosmos_compatibility:
142
+ try:
143
+ log_debug("Creating MongoDB Client for Azure Cosmos DB")
144
+ # Cosmos DB specific settings
145
+ cosmos_kwargs = {
146
+ "retryWrites": False,
147
+ "ssl": True,
148
+ "tlsAllowInvalidCertificates": True,
149
+ "maxPoolSize": 100,
150
+ "maxIdleTimeMS": 30000,
151
+ }
152
+
153
+ # Suppress UserWarning about CosmosDB
154
+ import warnings
155
+
156
+ with warnings.catch_warnings():
157
+ warnings.filterwarnings(
158
+ "ignore", category=UserWarning, message=".*connected to a CosmosDB cluster.*"
159
+ )
160
+ self._client = MongoClient(self.connection_string, **cosmos_kwargs) # type: ignore
161
+
162
+ self._client.admin.command("ping")
163
+
164
+ log_info("Connected to Azure Cosmos DB successfully.")
165
+ self._db = self._client.get_database(self.database) # type: ignore
166
+ log_info(f"Using database: {self.database}")
167
+
168
+ except errors.ConnectionFailure as e:
169
+ raise ConnectionError(f"Failed to connect to Azure Cosmos DB: {e}")
170
+ except Exception as e:
171
+ logger.error(f"An error occurred while connecting to Azure Cosmos DB: {e}")
172
+ raise
173
+ else:
174
+ try:
175
+ log_debug("Creating MongoDB Client")
176
+ self._client = MongoClient(self.connection_string, **self.kwargs)
177
+ # Trigger a connection to verify the client
178
+ self._client.admin.command("ping")
179
+ log_info("Connected to MongoDB successfully.")
180
+ self._db = self._client[self.database] # type: ignore
181
+ except errors.ConnectionFailure as e:
182
+ logger.error(f"Failed to connect to MongoDB: {e}")
183
+ raise ConnectionError(f"Failed to connect to MongoDB: {e}")
184
+ except Exception as e:
185
+ logger.error(f"An error occurred while connecting to MongoDB: {e}")
186
+ raise
187
+ return self._client
188
+
189
+ async def _get_async_client(self) -> AsyncMongoClient:
190
+ """Create or retrieve the async MongoDB client."""
191
+ if self._async_client is None:
192
+ log_debug("Creating Async MongoDB Client")
193
+ self._async_client = AsyncMongoClient(
194
+ self.connection_string,
195
+ maxPoolSize=self.kwargs.get("maxPoolSize", 100),
196
+ retryWrites=self.kwargs.get("retryWrites", True),
197
+ serverSelectionTimeoutMS=5000,
198
+ )
199
+ # Verify connection
200
+ try:
201
+ await self._async_client.admin.command("ping")
202
+ log_info("Connected to MongoDB asynchronously.")
203
+ except Exception as e:
204
+ logger.error(f"Failed to connect to MongoDB asynchronously: {e}")
205
+ raise
206
+ return self._async_client
207
+
208
+ def _get_or_create_collection(self) -> Collection:
209
+ """Get or create the MongoDB collection, handling Atlas Search index creation."""
210
+ self._collection = self._db[self.collection_name] # type: ignore
211
+
212
+ if not self.collection_exists():
213
+ log_info(f"Creating collection '{self.collection_name}'.")
214
+ self._db.create_collection(self.collection_name) # type: ignore
215
+ self._create_search_index()
216
+ else:
217
+ log_info(f"Using existing collection '{self.collection_name}'.")
218
+ # check if index exists
219
+ log_info(f"Checking if search index '{self.collection_name}' exists.")
220
+ if not self._search_index_exists():
221
+ log_info(f"Search index '{self.collection_name}' does not exist. Creating it.")
222
+ self._create_search_index()
223
+ if self.wait_until_index_ready_in_seconds and not self.cosmos_compatibility:
224
+ self._wait_for_index_ready()
225
+ else:
226
+ log_info("Using existing vector search index.")
227
+ return self._collection # type: ignore
228
+
229
+ def _get_collection(self) -> Collection:
230
+ """Get or create the MongoDB collection."""
231
+ if self._collection is None:
232
+ if self._client is None:
233
+ self._get_client()
234
+ self._collection = self._db[self.collection_name] # type: ignore
235
+ log_info(f"Using collection: {self.collection_name}")
236
+ return self._collection
237
+
238
+ async def _get_async_collection(self):
239
+ """Get or create the async MongoDB collection."""
240
+ if self._async_collection is None:
241
+ client = await self._get_async_client()
242
+ self._async_db = client[self.database] # type: ignore
243
+ self._async_collection = self._async_db[self.collection_name] # type: ignore
244
+ return self._async_collection
245
+
246
+ def _create_search_index(self, overwrite: bool = True) -> None:
247
+ """Create or overwrite the Atlas Search index with proper error handling."""
248
+ index_name = self.search_index_name or "vector_index_1"
249
+ max_retries = 3
250
+ retry_delay = 5
251
+
252
+ if self.cosmos_compatibility:
253
+ try:
254
+ collection = self._get_collection()
255
+
256
+ # Handle overwrite if requested
257
+ if overwrite and index_name in collection.index_information():
258
+ log_info(f"Dropping existing index '{index_name}'")
259
+ collection.drop_index(index_name)
260
+
261
+ embedding_dim = getattr(self.embedder, "dimensions", 1536)
262
+ log_info(f"Creating vector search index '{index_name}'")
263
+
264
+ # Create vector search index using Cosmos DB IVF format
265
+ collection.create_index(
266
+ [("embedding", "cosmosSearch")],
267
+ name=index_name,
268
+ cosmosSearchOptions={
269
+ "kind": "vector-ivf",
270
+ "numLists": 1,
271
+ "dimensions": embedding_dim,
272
+ "similarity": self._get_cosmos_similarity_metric(),
273
+ },
274
+ )
275
+
276
+ log_info(f"Created vector search index '{index_name}' successfully")
277
+
278
+ except Exception as e:
279
+ logger.error(f"Error creating vector search index: {e}")
280
+ raise
281
+ else:
282
+ for attempt in range(max_retries):
283
+ try:
284
+ if overwrite and self._search_index_exists():
285
+ log_info(f"Dropping existing search index '{index_name}'.")
286
+ try:
287
+ collection = self._get_collection()
288
+ collection.drop_search_index(index_name)
289
+ # Wait longer after index deletion
290
+ time.sleep(retry_delay * 2)
291
+ except errors.OperationFailure as e:
292
+ if "Index already requested to be deleted" in str(e):
293
+ log_info("Index is already being deleted, waiting...")
294
+ time.sleep(retry_delay * 2) # Wait longer for deletion to complete
295
+ else:
296
+ raise
297
+
298
+ # Verify index is gone before creating new one
299
+ retries = 3
300
+ while retries > 0 and self._search_index_exists():
301
+ log_info("Waiting for index deletion to complete...")
302
+ time.sleep(retry_delay)
303
+ retries -= 1
304
+
305
+ log_info(f"Creating search index '{index_name}'.")
306
+
307
+ # Get embedding dimension from embedder
308
+ embedding_dim = getattr(self.embedder, "dimensions", 1536)
309
+
310
+ search_index_model = SearchIndexModel(
311
+ definition={
312
+ "fields": [
313
+ {
314
+ "type": "vector",
315
+ "numDimensions": embedding_dim,
316
+ "path": "embedding",
317
+ "similarity": self.distance_metric,
318
+ },
319
+ ]
320
+ },
321
+ name=index_name,
322
+ type="vectorSearch",
323
+ )
324
+
325
+ collection = self._get_collection()
326
+ collection.create_search_index(model=search_index_model)
327
+
328
+ if self.wait_until_index_ready_in_seconds:
329
+ self._wait_for_index_ready()
330
+
331
+ log_info(f"Search index '{index_name}' created successfully.")
332
+ return
333
+
334
+ except errors.OperationFailure as e:
335
+ if "Duplicate Index" in str(e) and attempt < max_retries - 1:
336
+ logger.warning(f"Index already exists, retrying... (attempt {attempt + 1})")
337
+ time.sleep(retry_delay * (attempt + 1))
338
+ continue
339
+ logger.error(f"Failed to create search index: {e}")
340
+ raise
341
+ except Exception as e:
342
+ logger.error(f"Unexpected error creating search index: {e}")
343
+ raise
344
+
345
+ async def _create_search_index_async(self) -> None:
346
+ """Create the Atlas Search index asynchronously."""
347
+ index_name = self.search_index_name
348
+ max_retries = 3
349
+ retry_delay = 5
350
+
351
+ for attempt in range(max_retries):
352
+ try:
353
+ collection = await self._get_async_collection()
354
+
355
+ # Get embedding dimension from embedder
356
+ embedding_dim = getattr(self.embedder, "dimensions", 1536)
357
+
358
+ search_index_model = SearchIndexModel(
359
+ definition={
360
+ "fields": [
361
+ {
362
+ "type": "vector",
363
+ "numDimensions": embedding_dim,
364
+ "path": "embedding",
365
+ "similarity": self.distance_metric,
366
+ },
367
+ ]
368
+ },
369
+ name=index_name,
370
+ type="vectorSearch",
371
+ )
372
+
373
+ await collection.create_search_index(model=search_index_model)
374
+ log_info(f"Search index '{index_name}' created successfully.")
375
+ return
376
+
377
+ except Exception as e:
378
+ if attempt < max_retries - 1:
379
+ await asyncio.sleep(retry_delay * (attempt + 1))
380
+ continue
381
+ logger.error(f"Failed to create search index: {e}")
382
+ raise
383
+
384
+ def _search_index_exists(self) -> bool:
385
+ """Check if the search index exists."""
386
+ index_name = self.search_index_name
387
+ if self.cosmos_compatibility:
388
+ index_name = self.search_index_name or "vector_index_1"
389
+ try:
390
+ collection = self._get_collection()
391
+ indexes = collection.index_information()
392
+
393
+ for idx_name, idx_info in indexes.items():
394
+ if idx_name == index_name:
395
+ key_info = idx_info.get("key", [])
396
+ for key_value_pair in key_info:
397
+ # Ensure we have a tuple/list with exactly 2 elements
398
+ if isinstance(key_value_pair, (tuple, list)) and len(key_value_pair) == 2:
399
+ key, value = key_value_pair
400
+ if key == "embedding" and value == "cosmosSearch":
401
+ log_debug(f"Found existing vector search index: {index_name}")
402
+ return True
403
+
404
+ log_debug(f"Vector search index '{index_name}' not found")
405
+ return False
406
+ except Exception as e:
407
+ logger.error(f"Error checking search index existence: {e}")
408
+ return False
409
+ else:
410
+ try:
411
+ collection = self._get_collection()
412
+ indexes = list(collection.list_search_indexes()) # type: ignore
413
+ exists = any(index["name"] == index_name for index in indexes) # type: ignore
414
+ return exists
415
+ except Exception as e:
416
+ logger.error(f"Error checking search index existence: {e}")
417
+ return False
418
+
419
+ def _wait_for_index_ready(self) -> None:
420
+ """Wait until the Atlas Search index is ready."""
421
+ index_name = self.search_index_name
422
+ while True:
423
+ try:
424
+ if self._search_index_exists():
425
+ log_info(f"Search index '{index_name}' is ready.")
426
+ break
427
+ except Exception as e:
428
+ logger.error(f"Error checking index status: {e}")
429
+ raise TimeoutError("Timeout waiting for search index to become ready.")
430
+ time.sleep(1)
431
+
432
+ async def _wait_for_index_ready_async(self) -> None:
433
+ """Wait until the Atlas Search index is ready asynchronously."""
434
+ start_time = time.time()
435
+ index_name = self.search_index_name
436
+ while True:
437
+ try:
438
+ collection = await self._get_async_collection()
439
+ indexes = await collection.list_search_indexes()
440
+ if any(index["name"] == index_name for index in indexes):
441
+ log_info(f"Search index '{index_name}' is ready.")
442
+ break
443
+ except Exception as e:
444
+ logger.error(f"Error checking index status asynchronously: {e}")
445
+ import traceback
446
+
447
+ logger.error(f"Traceback: {traceback.format_exc()}")
448
+
449
+ if time.time() - start_time > self.wait_until_index_ready_in_seconds: # type: ignore
450
+ raise TimeoutError("Timeout waiting for search index to become ready.")
451
+ await asyncio.sleep(1)
452
+
453
+ def collection_exists(self) -> bool:
454
+ """Check if the collection exists in the database."""
455
+ if self._db is None:
456
+ self._get_client()
457
+ return self.collection_name in self._db.list_collection_names() # type: ignore
458
+
459
+ def create(self) -> None:
460
+ """Create the MongoDB collection and indexes if they do not exist."""
461
+ self._get_or_create_collection()
462
+
463
+ async def async_create(self) -> None:
464
+ """Create the MongoDB collection and indexes asynchronously."""
465
+ await self._get_async_collection()
466
+
467
+ if not await self.async_exists():
468
+ log_info(f"Creating collection '{self.collection_name}' asynchronously.")
469
+ await self._async_db.create_collection(self.collection_name) # type: ignore
470
+ await self._create_search_index_async()
471
+ if self.wait_until_index_ready_in_seconds:
472
+ await self._wait_for_index_ready_async()
473
+
474
+ def doc_exists(self, document: Document) -> bool:
475
+ """Check if a document exists in the MongoDB collection based on its content."""
476
+ try:
477
+ collection = self._get_collection()
478
+ # Use content hash as document ID
479
+ doc_id = md5(document.content.encode("utf-8")).hexdigest()
480
+ result = collection.find_one({"_id": doc_id})
481
+ exists = result is not None
482
+ log_debug(f"Document {'exists' if exists else 'does not exist'}: {doc_id}")
483
+ return exists
484
+ except Exception as e:
485
+ logger.error(f"Error checking document existence: {e}")
486
+ return False
487
+
488
+ def name_exists(self, name: str) -> bool:
489
+ """Check if a document with a given name exists in the collection."""
490
+ try:
491
+ collection = self._get_collection()
492
+ exists = collection.find_one({"name": name}) is not None
493
+ log_debug(f"Document with name '{name}' {'exists' if exists else 'does not exist'}")
494
+ return exists
495
+ except Exception as e:
496
+ logger.error(f"Error checking document name existence: {e}")
497
+ return False
498
+
499
+ def id_exists(self, id: str) -> bool:
500
+ """Check if a document with the given ID exists in the collection.
501
+
502
+ Args:
503
+ id (str): The document ID to check.
504
+
505
+ Returns:
506
+ bool: True if the document exists, False otherwise.
507
+ """
508
+ try:
509
+ collection = self._get_collection()
510
+ result = collection.find_one({"_id": id})
511
+ exists = result is not None
512
+ log_debug(f"Document with ID '{id}' {'exists' if exists else 'does not exist'}")
513
+ return exists
514
+ except Exception as e:
515
+ logger.error(f"Error checking document ID existence: {e}")
516
+ return False
517
+
518
+ def content_hash_exists(self, content_hash: str) -> bool:
519
+ """Check if documents with the given content hash exist in the collection.
520
+
521
+ Args:
522
+ content_hash (str): The content hash to check.
523
+
524
+ Returns:
525
+ bool: True if documents with the content hash exist, False otherwise.
526
+ """
527
+ try:
528
+ collection = self._get_collection()
529
+ result = collection.find_one({"content_hash": content_hash})
530
+ exists = result is not None
531
+ log_debug(f"Document with content_hash '{content_hash}' {'exists' if exists else 'does not exist'}")
532
+ return exists
533
+ except Exception as e:
534
+ logger.error(f"Error checking content_hash existence: {e}")
535
+ return False
536
+
537
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
538
+ """Insert documents into the MongoDB collection."""
539
+ log_debug(f"Inserting {len(documents)} documents")
540
+ collection = self._get_collection()
541
+
542
+ prepared_docs = []
543
+ for document in documents:
544
+ try:
545
+ document.embed(embedder=self.embedder)
546
+ if document.embedding is None:
547
+ raise ValueError(f"Failed to generate embedding for document: {document.id}")
548
+ doc_data = self.prepare_doc(content_hash, document, filters)
549
+ prepared_docs.append(doc_data)
550
+ except ValueError as e:
551
+ logger.error(f"Error preparing document '{document.name}': {e}")
552
+
553
+ if prepared_docs:
554
+ try:
555
+ collection.insert_many(prepared_docs, ordered=False)
556
+ log_info(f"Inserted {len(prepared_docs)} documents successfully.")
557
+ if self.wait_after_insert_in_seconds and self.wait_after_insert_in_seconds > 0:
558
+ time.sleep(self.wait_after_insert_in_seconds)
559
+ except errors.BulkWriteError as e:
560
+ logger.warning(f"Bulk write error while inserting documents: {e.details}")
561
+ except Exception as e:
562
+ logger.error(f"Error inserting documents: {e}")
563
+
564
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
565
+ """Upsert documents into the MongoDB collection."""
566
+ log_info(f"Upserting {len(documents)} documents")
567
+ collection = self._get_collection()
568
+
569
+ for document in documents:
570
+ try:
571
+ document.embed(embedder=self.embedder)
572
+ if document.embedding is None:
573
+ raise ValueError(f"Failed to generate embedding for document: {document.id}")
574
+ doc_data = self.prepare_doc(content_hash, document, filters)
575
+ collection.update_one(
576
+ {"_id": doc_data["_id"]},
577
+ {"$set": doc_data},
578
+ upsert=True,
579
+ )
580
+ log_info(f"Upserted document: {doc_data['_id']}")
581
+ except Exception as e:
582
+ logger.error(f"Error upserting document '{document.name}': {e}")
583
+
584
+ def upsert_available(self) -> bool:
585
+ """Indicate that upsert functionality is available."""
586
+ return True
587
+
588
+ def search(
589
+ self,
590
+ query: str,
591
+ limit: int = 5,
592
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
593
+ min_score: float = 0.0,
594
+ ) -> List[Document]:
595
+ """Search for documents using vector similarity."""
596
+ if isinstance(filters, List):
597
+ log_warning("Filters Expressions are not supported in MongoDB. No filters will be applied.")
598
+ filters = None
599
+ if self.search_type == SearchType.hybrid:
600
+ return self.hybrid_search(query, limit=limit, filters=filters)
601
+
602
+ query_embedding = self.embedder.get_embedding(query)
603
+ if query_embedding is None:
604
+ logger.error(f"Failed to generate embedding for query: {query}")
605
+ return []
606
+
607
+ if self.cosmos_compatibility:
608
+ # Azure Cosmos DB Mongo Vcore compatibility mode
609
+ try:
610
+ collection = self._get_collection()
611
+
612
+ # Construct the search pipeline
613
+ search_stage = {
614
+ "$search": {
615
+ "cosmosSearch": {"vector": query_embedding, "path": "embedding", "k": limit, "nProbes": 2},
616
+ "returnStoredSource": True,
617
+ }
618
+ }
619
+
620
+ pipeline = [
621
+ search_stage,
622
+ {
623
+ "$project": {
624
+ "similarityScore": {"$meta": "searchScore"},
625
+ "_id": 1,
626
+ "name": 1,
627
+ "content": 1,
628
+ "meta_data": 1,
629
+ }
630
+ },
631
+ ]
632
+
633
+ results = list(collection.aggregate(pipeline))
634
+ docs = [
635
+ Document(
636
+ id=str(doc["_id"]),
637
+ name=doc.get("name"),
638
+ content=doc["content"],
639
+ meta_data={**doc.get("meta_data", {}), "score": doc.get("similarityScore", 0.0)},
640
+ content_id=doc.get("content_id"),
641
+ )
642
+ for doc in results
643
+ ]
644
+
645
+ log_info(f"Search completed. Found {len(docs)} documents.")
646
+ return docs
647
+
648
+ except Exception as e:
649
+ logger.error(f"Error during vector search: {e}")
650
+ return []
651
+ else:
652
+ # MongoDB Atlas Search
653
+ try:
654
+ collection = self._get_collection()
655
+ pipeline = [
656
+ {
657
+ "$vectorSearch": {
658
+ "index": self.search_index_name,
659
+ "limit": limit,
660
+ "numCandidates": min(limit * 4, 100),
661
+ "queryVector": query_embedding,
662
+ "path": "embedding",
663
+ }
664
+ },
665
+ {"$set": {"score": {"$meta": "vectorSearchScore"}}},
666
+ ]
667
+
668
+ match_filters = {}
669
+ if min_score > 0:
670
+ match_filters["score"] = {"$gte": min_score}
671
+
672
+ # Handle filters if provided
673
+ if filters:
674
+ # MongoDB uses dot notation for nested fields, so we need to prepend meta_data. if needed
675
+ mongo_filters = {}
676
+ for key, value in filters.items():
677
+ # If the key doesn't already include a dot notation for meta_data
678
+ if not key.startswith("meta_data.") and "." not in key:
679
+ mongo_filters[f"meta_data.{key}"] = value
680
+ else:
681
+ mongo_filters[key] = value
682
+
683
+ match_filters.update(mongo_filters)
684
+
685
+ if match_filters:
686
+ pipeline.append({"$match": match_filters}) # type: ignore
687
+
688
+ pipeline.append({"$project": {"embedding": 0}})
689
+
690
+ results = list(collection.aggregate(pipeline)) # type: ignore
691
+
692
+ docs = []
693
+ for doc in results:
694
+ # Convert ObjectIds to strings before creating Document
695
+ clean_doc = self._convert_objectids_to_strings(doc)
696
+ document = Document(
697
+ id=str(clean_doc["_id"]),
698
+ name=clean_doc.get("name"),
699
+ content=clean_doc["content"],
700
+ meta_data={**clean_doc.get("meta_data", {}), "score": clean_doc.get("score", 0.0)},
701
+ content_id=clean_doc.get("content_id"),
702
+ )
703
+ docs.append(document)
704
+
705
+ log_info(f"Search completed. Found {len(docs)} documents.")
706
+ return docs
707
+
708
+ except Exception as e:
709
+ logger.error(f"Error during search: {e}")
710
+ raise
711
+
712
+ def vector_search(self, query: str, limit: int = 5) -> List[Document]:
713
+ """Perform a vector-based search."""
714
+ log_debug("Performing vector search.")
715
+ return self.search(query, limit=limit)
716
+
717
+ def keyword_search(self, query: str, limit: int = 5) -> List[Document]:
718
+ """Perform a keyword-based search."""
719
+ try:
720
+ collection = self._get_collection()
721
+ cursor = collection.find(
722
+ {"content": {"$regex": query, "$options": "i"}},
723
+ {"_id": 1, "name": 1, "content": 1, "meta_data": 1, "content_id": 1},
724
+ ).limit(limit)
725
+ results = [
726
+ Document(
727
+ id=str(doc["_id"]),
728
+ name=doc.get("name"),
729
+ content=doc["content"],
730
+ meta_data=doc.get("meta_data", {}),
731
+ content_id=doc.get("content_id"),
732
+ )
733
+ for doc in cursor
734
+ ]
735
+ log_debug(f"Keyword search completed. Found {len(results)} documents.")
736
+ return results
737
+ except Exception as e:
738
+ logger.error(f"Error during keyword search: {e}")
739
+ return []
740
+
741
+ def hybrid_search(
742
+ self,
743
+ query: str,
744
+ limit: int = 5,
745
+ filters: Optional[Dict[str, Any]] = None,
746
+ ) -> List[Document]:
747
+ """
748
+ Perform a hybrid search combining vector and keyword-based searches using Reciprocal Rank Fusion.
749
+
750
+ Weights for vector and keyword search are configured at the instance level (hybrid_vector_weight, hybrid_keyword_weight).
751
+ The rank constant k is used in the RRF formula `1 / (rank + k)` to smooth scores.
752
+
753
+ Reference: https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/reciprocal-rank-fusion
754
+ """
755
+
756
+ if self.cosmos_compatibility:
757
+ log_warning("Hybrid search is not implemented for Cosmos DB compatibility mode. Returning empty list.")
758
+ return []
759
+
760
+ log_debug(f"Performing hybrid search for query: '{query}' with limit: {limit}")
761
+
762
+ query_embedding = self.embedder.get_embedding(query)
763
+ if query_embedding is None:
764
+ logger.error(f"Failed to generate embedding for query: {query}")
765
+ return []
766
+
767
+ collection = self._get_collection()
768
+
769
+ k = self.hybrid_rank_constant
770
+
771
+ mongo_filters = {}
772
+ if filters:
773
+ for key, value in filters.items():
774
+ # If the key doesn't already include a dot notation for meta_data
775
+ if not key.startswith("meta_data.") and "." not in key:
776
+ mongo_filters[f"meta_data.{key}"] = value
777
+ else:
778
+ mongo_filters[key] = value
779
+
780
+ pipeline = [
781
+ # Vector Search Branch
782
+ {
783
+ "$vectorSearch": {
784
+ "index": self.search_index_name,
785
+ "path": "embedding",
786
+ "queryVector": query_embedding,
787
+ "numCandidates": min(limit * 10, 200),
788
+ "limit": limit * 2,
789
+ }
790
+ },
791
+ {"$group": {"_id": None, "docs": {"$push": "$$ROOT"}}},
792
+ {"$unwind": {"path": "$docs", "includeArrayIndex": "rank"}},
793
+ {
794
+ "$addFields": {
795
+ "_id": "$docs._id",
796
+ "name": "$docs.name",
797
+ "content": "$docs.content",
798
+ "meta_data": "$docs.meta_data",
799
+ "content_id": "$docs.content_id",
800
+ "vs_score": {
801
+ "$divide": [
802
+ self.hybrid_vector_weight,
803
+ {"$add": ["$rank", k, 1]},
804
+ ]
805
+ },
806
+ "fts_score": 0.0, # Ensure fts_score exists with a default value
807
+ }
808
+ },
809
+ {
810
+ "$project": {
811
+ "_id": 1,
812
+ "name": 1,
813
+ "content": 1,
814
+ "meta_data": 1,
815
+ "content_id": 1,
816
+ "vs_score": 1,
817
+ # Now fts_score is included with its value (0.0 here)
818
+ "fts_score": 1,
819
+ }
820
+ },
821
+ # Union with Keyword Search Branch
822
+ {
823
+ "$unionWith": {
824
+ "coll": self.collection_name,
825
+ "pipeline": [
826
+ {
827
+ "$search": {
828
+ "index": "default",
829
+ "text": {"query": query, "path": "content"},
830
+ }
831
+ },
832
+ {"$limit": limit * 2},
833
+ {"$group": {"_id": None, "docs": {"$push": "$$ROOT"}}},
834
+ {"$unwind": {"path": "$docs", "includeArrayIndex": "rank"}},
835
+ {
836
+ "$addFields": {
837
+ "_id": "$docs._id",
838
+ "name": "$docs.name",
839
+ "content": "$docs.content",
840
+ "meta_data": "$docs.meta_data",
841
+ "content_id": "$docs.content_id",
842
+ "vs_score": 0.0,
843
+ "fts_score": {
844
+ "$divide": [
845
+ self.hybrid_keyword_weight,
846
+ {"$add": ["$rank", k, 1]},
847
+ ]
848
+ },
849
+ }
850
+ },
851
+ {
852
+ "$project": {
853
+ "_id": 1,
854
+ "name": 1,
855
+ "content": 1,
856
+ "meta_data": 1,
857
+ "content_id": 1,
858
+ "vs_score": 1,
859
+ "fts_score": 1,
860
+ }
861
+ },
862
+ ],
863
+ }
864
+ },
865
+ # Combine and Rank
866
+ {
867
+ "$group": {
868
+ "_id": "$_id",
869
+ "name": {"$first": "$name"},
870
+ "content": {"$first": "$content"},
871
+ "meta_data": {"$first": "$meta_data"},
872
+ "content_id": {"$first": "$content_id"},
873
+ "vs_score": {"$sum": "$vs_score"},
874
+ "fts_score": {"$sum": "$fts_score"},
875
+ }
876
+ },
877
+ {
878
+ "$project": {
879
+ "_id": 1,
880
+ "name": 1,
881
+ "content": 1,
882
+ "meta_data": 1,
883
+ "content_id": 1,
884
+ "score": {"$add": ["$vs_score", "$fts_score"]},
885
+ }
886
+ },
887
+ {"$sort": {"score": -1}},
888
+ {"$limit": limit},
889
+ ]
890
+
891
+ # Apply filters if provided
892
+ if mongo_filters:
893
+ pipeline.append({"$match": mongo_filters})
894
+
895
+ try:
896
+ from typing import Mapping, Sequence, cast
897
+
898
+ results = list(collection.aggregate(cast(Sequence[Mapping[str, Any]], pipeline)))
899
+
900
+ docs = []
901
+ for doc in results:
902
+ # Convert ObjectIds to strings before creating Document
903
+ clean_doc = self._convert_objectids_to_strings(doc)
904
+ document = Document(
905
+ id=str(clean_doc["_id"]),
906
+ name=clean_doc.get("name"),
907
+ content=clean_doc["content"],
908
+ meta_data={**clean_doc.get("meta_data", {}), "score": clean_doc.get("score", 0.0)},
909
+ content_id=clean_doc.get("content_id"),
910
+ )
911
+ docs.append(document)
912
+
913
+ log_info(f"Hybrid search completed. Found {len(docs)} documents.")
914
+ return docs
915
+ except errors.OperationFailure as e:
916
+ logger.error(
917
+ f"Error during hybrid search, potentially due to missing or misconfigured Atlas Search index for text search: {e}"
918
+ )
919
+ logger.error(f"Details: {e.details}")
920
+ return []
921
+ except Exception as e:
922
+ logger.error(f"Error during hybrid search: {e}")
923
+ import traceback
924
+
925
+ logger.error(f"Traceback: {traceback.format_exc()}")
926
+ return []
927
+
928
+ def drop(self) -> None:
929
+ """Drop the collection and clean up indexes."""
930
+ collection = self._get_collection()
931
+ index_name = self.search_index_name or "vector_index_1"
932
+
933
+ if self.exists():
934
+ if self.cosmos_compatibility:
935
+ # Cosmos DB specific handling
936
+ try:
937
+ # Drop the index if it exists
938
+ if self._search_index_exists():
939
+ log_info(f"Dropping index '{index_name}'")
940
+ try:
941
+ collection.drop_index(index_name)
942
+ except Exception as e:
943
+ logger.error(f"Error dropping index: {e}")
944
+
945
+ except Exception as e:
946
+ logger.error(f"Error dropping collection: {e}")
947
+ raise
948
+ else:
949
+ # MongoDB Atlas specific handling
950
+ try:
951
+ if self._search_index_exists():
952
+ collection.drop_search_index(index_name)
953
+ time.sleep(2)
954
+
955
+ except Exception as e:
956
+ logger.error(f"Error dropping collection: {e}")
957
+ raise
958
+
959
+ # Drop the collection
960
+ collection.drop()
961
+ time.sleep(2)
962
+
963
+ log_info(f"Collection '{self.collection_name}' dropped successfully")
964
+
965
+ def exists(self) -> bool:
966
+ """Check if the MongoDB collection exists."""
967
+ exists = self.collection_exists()
968
+ log_debug(f"Collection '{self.collection_name}' existence: {exists}")
969
+ return exists
970
+
971
+ def optimize(self) -> None:
972
+ """TODO: not implemented"""
973
+ pass
974
+
975
+ def delete(self) -> bool:
976
+ """Delete all documents from the collection."""
977
+ if self.exists():
978
+ try:
979
+ collection = self._get_collection()
980
+ result = collection.delete_many({})
981
+ # Consider any deletion (even 0) as success
982
+ success = result.deleted_count >= 0
983
+ log_info(f"Deleted {result.deleted_count} documents from collection.")
984
+ return success
985
+ except Exception as e:
986
+ logger.error(f"Error deleting documents: {e}")
987
+ return False
988
+ # Return True if collection doesn't exist (nothing to delete)
989
+ return True
990
+
991
+ def prepare_doc(
992
+ self, content_hash: str, document: Document, filters: Optional[Dict[str, Any]] = None
993
+ ) -> Dict[str, Any]:
994
+ """Prepare a document for insertion or upsertion into MongoDB."""
995
+
996
+ # Add filters to document metadata if provided
997
+ if filters:
998
+ meta_data = document.meta_data.copy() if document.meta_data else {}
999
+ meta_data.update(filters)
1000
+ document.meta_data = meta_data
1001
+
1002
+ cleaned_content = document.content.replace("\x00", "\ufffd")
1003
+ doc_id = md5(cleaned_content.encode("utf-8")).hexdigest()
1004
+ doc_data = {
1005
+ "_id": doc_id,
1006
+ "name": document.name,
1007
+ "content": cleaned_content,
1008
+ "meta_data": document.meta_data,
1009
+ "embedding": document.embedding,
1010
+ "content_id": document.content_id,
1011
+ "content_hash": content_hash,
1012
+ }
1013
+ log_debug(f"Prepared document: {doc_data['_id']}")
1014
+ return doc_data
1015
+
1016
+ def get_count(self) -> int:
1017
+ """Get the count of documents in the MongoDB collection."""
1018
+ try:
1019
+ collection = self._get_collection()
1020
+ count = collection.count_documents({})
1021
+ log_debug(f"Collection '{self.collection_name}' has {count} documents.")
1022
+ return count
1023
+ except Exception as e:
1024
+ logger.error(f"Error getting document count: {e}")
1025
+ return 0
1026
+
1027
+ async def async_doc_exists(self, document: Document) -> bool:
1028
+ """Check if a document exists asynchronously."""
1029
+ try:
1030
+ collection = await self._get_async_collection()
1031
+ doc_id = md5(document.content.encode("utf-8")).hexdigest()
1032
+ result = await collection.find_one({"_id": doc_id})
1033
+ exists = result is not None
1034
+ log_debug(f"Document {'exists' if exists else 'does not exist'}: {doc_id}")
1035
+ return exists
1036
+ except Exception as e:
1037
+ logger.error(f"Error checking document existence asynchronously: {e}")
1038
+ return False
1039
+
1040
+ async def async_insert(
1041
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
1042
+ ) -> None:
1043
+ """Insert documents asynchronously."""
1044
+ log_debug(f"Inserting {len(documents)} documents asynchronously")
1045
+ collection = await self._get_async_collection()
1046
+
1047
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
1048
+ # Use batch embedding when enabled and supported
1049
+ try:
1050
+ # Extract content from all documents
1051
+ doc_contents = [doc.content for doc in documents]
1052
+
1053
+ # Get batch embeddings and usage
1054
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
1055
+
1056
+ # Process documents with pre-computed embeddings
1057
+ for j, doc in enumerate(documents):
1058
+ try:
1059
+ if j < len(embeddings):
1060
+ doc.embedding = embeddings[j]
1061
+ doc.usage = usages[j] if j < len(usages) else None
1062
+ except Exception as e:
1063
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
1064
+
1065
+ except Exception as e:
1066
+ # Check if this is a rate limit error - don't fall back as it would make things worse
1067
+ error_str = str(e).lower()
1068
+ is_rate_limit = any(
1069
+ phrase in error_str
1070
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
1071
+ )
1072
+
1073
+ if is_rate_limit:
1074
+ logger.error(f"Rate limit detected during batch embedding. {e}")
1075
+ raise e
1076
+ else:
1077
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
1078
+ # Fall back to individual embedding
1079
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
1080
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1081
+ else:
1082
+ # Use individual embedding
1083
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
1084
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1085
+
1086
+ prepared_docs = []
1087
+ for document in documents:
1088
+ try:
1089
+ doc_data = self.prepare_doc(content_hash, document, filters)
1090
+ prepared_docs.append(doc_data)
1091
+ except ValueError as e:
1092
+ logger.error(f"Error preparing document '{document.name}': {e}")
1093
+
1094
+ if prepared_docs:
1095
+ try:
1096
+ await collection.insert_many(prepared_docs, ordered=False)
1097
+ log_info(f"Inserted {len(prepared_docs)} documents successfully.")
1098
+ if self.wait_after_insert_in_seconds and self.wait_after_insert_in_seconds > 0:
1099
+ await asyncio.sleep(self.wait_after_insert_in_seconds)
1100
+ except errors.BulkWriteError as e:
1101
+ logger.warning(f"Bulk write error while inserting documents: {e.details}")
1102
+ except Exception as e:
1103
+ logger.error(f"Error inserting documents asynchronously: {e}")
1104
+
1105
+ async def async_upsert(
1106
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
1107
+ ) -> None:
1108
+ """Upsert documents asynchronously."""
1109
+ log_info(f"Upserting {len(documents)} documents asynchronously")
1110
+ collection = await self._get_async_collection()
1111
+
1112
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
1113
+ # Use batch embedding when enabled and supported
1114
+ try:
1115
+ # Extract content from all documents
1116
+ doc_contents = [doc.content for doc in documents]
1117
+
1118
+ # Get batch embeddings and usage
1119
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
1120
+
1121
+ # Process documents with pre-computed embeddings
1122
+ for j, doc in enumerate(documents):
1123
+ try:
1124
+ if j < len(embeddings):
1125
+ doc.embedding = embeddings[j]
1126
+ doc.usage = usages[j] if j < len(usages) else None
1127
+ except Exception as e:
1128
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
1129
+
1130
+ except Exception as e:
1131
+ # Check if this is a rate limit error - don't fall back as it would make things worse
1132
+ error_str = str(e).lower()
1133
+ is_rate_limit = any(
1134
+ phrase in error_str
1135
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
1136
+ )
1137
+
1138
+ if is_rate_limit:
1139
+ logger.error(f"Rate limit detected during batch embedding. {e}")
1140
+ raise e
1141
+ else:
1142
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
1143
+ # Fall back to individual embedding
1144
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
1145
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1146
+ else:
1147
+ # Use individual embedding
1148
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
1149
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1150
+
1151
+ for document in documents:
1152
+ try:
1153
+ doc_data = self.prepare_doc(content_hash, document, filters)
1154
+ await collection.update_one(
1155
+ {"_id": doc_data["_id"]},
1156
+ {"$set": doc_data},
1157
+ upsert=True,
1158
+ )
1159
+ log_info(f"Upserted document: {doc_data['_id']}")
1160
+ except Exception as e:
1161
+ logger.error(f"Error upserting document '{document.name}' asynchronously: {e}")
1162
+
1163
+ async def async_search(
1164
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
1165
+ ) -> List[Document]:
1166
+ """Search for documents asynchronously."""
1167
+ if isinstance(filters, List):
1168
+ log_warning("Filters Expressions are not supported in MongoDB. No filters will be applied.")
1169
+ filters = None
1170
+ query_embedding = self.embedder.get_embedding(query)
1171
+ if query_embedding is None:
1172
+ logger.error(f"Failed to generate embedding for query: {query}")
1173
+ return []
1174
+
1175
+ try:
1176
+ collection = await self._get_async_collection()
1177
+ pipeline = [
1178
+ {
1179
+ "$vectorSearch": {
1180
+ "index": self.search_index_name,
1181
+ "limit": limit,
1182
+ "numCandidates": min(limit * 4, 100),
1183
+ "queryVector": query_embedding,
1184
+ "path": "embedding",
1185
+ }
1186
+ },
1187
+ {"$set": {"score": {"$meta": "vectorSearchScore"}}},
1188
+ ]
1189
+
1190
+ # Handle filters if provided
1191
+ if filters:
1192
+ # MongoDB uses dot notation for nested fields, so we need to prepend meta_data. if needed
1193
+ mongo_filters = {}
1194
+ for key, value in filters.items():
1195
+ # If the key doesn't already include a dot notation for meta_data
1196
+ if not key.startswith("meta_data.") and "." not in key:
1197
+ mongo_filters[f"meta_data.{key}"] = value
1198
+ else:
1199
+ mongo_filters[key] = value
1200
+
1201
+ pipeline.append({"$match": mongo_filters})
1202
+
1203
+ pipeline.append({"$project": {"embedding": 0}})
1204
+
1205
+ # With AsyncMongoClient, aggregate() returns a coroutine that resolves to a cursor
1206
+ # We need to await it first to get the cursor
1207
+ cursor = await collection.aggregate(pipeline)
1208
+
1209
+ # Now we can iterate over the cursor to get results
1210
+ results = []
1211
+ async for doc in cursor:
1212
+ results.append(doc)
1213
+ if len(results) >= limit:
1214
+ break
1215
+
1216
+ docs = [
1217
+ Document(
1218
+ id=str(doc["_id"]),
1219
+ name=doc.get("name"),
1220
+ content=doc["content"],
1221
+ meta_data={**doc.get("meta_data", {}), "score": doc.get("score", 0.0)},
1222
+ content_id=doc.get("content_id"),
1223
+ )
1224
+ for doc in results
1225
+ ]
1226
+
1227
+ log_info(f"Async search completed. Found {len(docs)} documents.")
1228
+ return docs
1229
+
1230
+ except Exception as e:
1231
+ logger.error(f"Error during async search: {e}")
1232
+ # Include traceback for better debugging
1233
+ import traceback
1234
+
1235
+ logger.error(f"Traceback: {traceback.format_exc()}")
1236
+ raise
1237
+
1238
+ async def async_drop(self) -> None:
1239
+ """Drop the collection asynchronously."""
1240
+ if await self.async_exists():
1241
+ try:
1242
+ collection = await self._get_async_collection()
1243
+ await collection.drop()
1244
+ log_info(f"Collection '{self.collection_name}' dropped asynchronously")
1245
+ except Exception as e:
1246
+ logger.error(f"Error dropping collection asynchronously: {e}")
1247
+ raise
1248
+
1249
+ async def async_exists(self) -> bool:
1250
+ """Check if the collection exists asynchronously."""
1251
+ try:
1252
+ client = await self._get_async_client()
1253
+ collection_names = await client[self.database].list_collection_names()
1254
+ exists = self.collection_name in collection_names
1255
+ log_debug(f"Collection '{self.collection_name}' existence (async): {exists}")
1256
+ return exists
1257
+ except Exception as e:
1258
+ logger.error(f"Error checking collection existence asynchronously: {e}")
1259
+ return False
1260
+
1261
+ async def async_name_exists(self, name: str) -> bool:
1262
+ """Check if a document with a given name exists asynchronously."""
1263
+ try:
1264
+ collection = await self._get_async_collection()
1265
+ exists = await collection.find_one({"name": name}) is not None
1266
+ log_debug(f"Document with name '{name}' {'exists' if exists else 'does not exist'} (async)")
1267
+ return exists
1268
+ except Exception as e:
1269
+ logger.error(f"Error checking document name existence asynchronously: {e}")
1270
+ return False
1271
+
1272
+ def _get_cosmos_similarity_metric(self) -> str:
1273
+ """Convert MongoDB distance metric to Cosmos DB format."""
1274
+ # Cosmos DB supports: COS (cosine), L2 (Euclidean), IP (inner product)
1275
+ metric_mapping = {"cosine": "COS", "euclidean": "L2", "dotProduct": "IP"}
1276
+ return metric_mapping.get(self.distance_metric, "COS")
1277
+
1278
+ def _convert_objectids_to_strings(self, obj: Any) -> Any:
1279
+ """
1280
+ Recursively convert MongoDB ObjectIds to strings in any data structure.
1281
+
1282
+ Args:
1283
+ obj: Any object that might contain ObjectIds
1284
+
1285
+ Returns:
1286
+ The same object with ObjectIds converted to strings
1287
+ """
1288
+ if isinstance(obj, ObjectId):
1289
+ return str(obj)
1290
+ elif isinstance(obj, dict):
1291
+ return {key: self._convert_objectids_to_strings(value) for key, value in obj.items()}
1292
+ elif isinstance(obj, list):
1293
+ return [self._convert_objectids_to_strings(item) for item in obj]
1294
+ elif isinstance(obj, tuple):
1295
+ return tuple(self._convert_objectids_to_strings(item) for item in obj)
1296
+ else:
1297
+ return obj
1298
+
1299
+ def delete_by_id(self, id: str) -> bool:
1300
+ """Delete document by ID."""
1301
+ try:
1302
+ collection = self._get_collection()
1303
+ result = collection.delete_one({"_id": id})
1304
+
1305
+ if result.deleted_count > 0:
1306
+ log_info(
1307
+ f"Deleted {result.deleted_count} document(s) with ID '{id}' from collection '{self.collection_name}'."
1308
+ )
1309
+ return True
1310
+ else:
1311
+ log_info(f"No documents found with ID '{id}' to delete.")
1312
+ return True
1313
+ except Exception as e:
1314
+ logger.error(f"Error deleting document with ID '{id}': {e}")
1315
+ return False
1316
+
1317
+ def delete_by_name(self, name: str) -> bool:
1318
+ """Delete documents by name."""
1319
+ try:
1320
+ collection = self._get_collection()
1321
+ result = collection.delete_many({"name": name})
1322
+
1323
+ log_info(
1324
+ f"Deleted {result.deleted_count} document(s) with name '{name}' from collection '{self.collection_name}'."
1325
+ )
1326
+ return True
1327
+ except Exception as e:
1328
+ logger.error(f"Error deleting documents with name '{name}': {e}")
1329
+ return False
1330
+
1331
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
1332
+ """Delete documents by metadata."""
1333
+ try:
1334
+ collection = self._get_collection()
1335
+
1336
+ # Build MongoDB query for metadata matching
1337
+ mongo_filters = {}
1338
+ for key, value in metadata.items():
1339
+ # Use dot notation for nested metadata fields
1340
+ mongo_filters[f"meta_data.{key}"] = value
1341
+
1342
+ result = collection.delete_many(mongo_filters)
1343
+
1344
+ log_info(
1345
+ f"Deleted {result.deleted_count} document(s) with metadata '{metadata}' from collection '{self.collection_name}'."
1346
+ )
1347
+ return True
1348
+ except Exception as e:
1349
+ logger.error(f"Error deleting documents with metadata '{metadata}': {e}")
1350
+ return False
1351
+
1352
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
1353
+ """Delete documents by content hash.
1354
+
1355
+ Args:
1356
+ content_hash (str): The content hash to delete.
1357
+
1358
+ Returns:
1359
+ bool: True if documents were deleted successfully, False otherwise.
1360
+ """
1361
+ try:
1362
+ collection = self._get_collection()
1363
+ result = collection.delete_many({"content_hash": content_hash})
1364
+ log_info(f"Deleted {result.deleted_count} documents with content_hash '{content_hash}'")
1365
+ return True
1366
+ except Exception as e:
1367
+ logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
1368
+ return False
1369
+
1370
+ def delete_by_content_id(self, content_id: str) -> bool:
1371
+ """Delete documents by content ID."""
1372
+ try:
1373
+ collection = self._get_collection()
1374
+ result = collection.delete_many({"content_id": content_id})
1375
+
1376
+ log_info(
1377
+ f"Deleted {result.deleted_count} document(s) with content_id '{content_id}' from collection '{self.collection_name}'."
1378
+ )
1379
+ return True
1380
+ except Exception as e:
1381
+ logger.error(f"Error deleting documents with content_id '{content_id}': {e}")
1382
+ return False
1383
+
1384
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
1385
+ """
1386
+ Update the metadata for documents with the given content_id.
1387
+
1388
+ Args:
1389
+ content_id (str): The content ID to update
1390
+ metadata (Dict[str, Any]): The metadata to update
1391
+ """
1392
+ try:
1393
+ collection = self._client[self.database][self.collection_name] # type: ignore
1394
+
1395
+ # Create query filter for content_id
1396
+ filter_query = {"content_id": content_id}
1397
+
1398
+ update_operations = {}
1399
+ for key, value in metadata.items():
1400
+ update_operations[f"meta_data.{key}"] = value
1401
+ update_operations[f"filters.{key}"] = value
1402
+
1403
+ # Update documents
1404
+ result = collection.update_many(filter_query, {"$set": update_operations})
1405
+
1406
+ if result.matched_count == 0:
1407
+ logger.debug(f"No documents found with content_id: {content_id}")
1408
+ else:
1409
+ logger.debug(f"Updated metadata for {result.matched_count} documents with content_id: {content_id}")
1410
+
1411
+ except Exception as e:
1412
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
1413
+ raise
1414
+
1415
+ def get_supported_search_types(self) -> List[str]:
1416
+ """Get the supported search types for this vector database."""
1417
+ return [SearchType.vector, SearchType.hybrid]