agno 2.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (575) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +51 -0
  3. agno/agent/agent.py +10405 -0
  4. agno/api/__init__.py +0 -0
  5. agno/api/agent.py +28 -0
  6. agno/api/api.py +40 -0
  7. agno/api/evals.py +22 -0
  8. agno/api/os.py +17 -0
  9. agno/api/routes.py +13 -0
  10. agno/api/schemas/__init__.py +9 -0
  11. agno/api/schemas/agent.py +16 -0
  12. agno/api/schemas/evals.py +16 -0
  13. agno/api/schemas/os.py +14 -0
  14. agno/api/schemas/response.py +6 -0
  15. agno/api/schemas/team.py +16 -0
  16. agno/api/schemas/utils.py +21 -0
  17. agno/api/schemas/workflows.py +16 -0
  18. agno/api/settings.py +53 -0
  19. agno/api/team.py +30 -0
  20. agno/api/workflow.py +28 -0
  21. agno/cloud/aws/base.py +214 -0
  22. agno/cloud/aws/s3/__init__.py +2 -0
  23. agno/cloud/aws/s3/api_client.py +43 -0
  24. agno/cloud/aws/s3/bucket.py +195 -0
  25. agno/cloud/aws/s3/object.py +57 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +598 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2042 -0
  33. agno/db/dynamo/schemas.py +314 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +1795 -0
  37. agno/db/firestore/schemas.py +140 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1335 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1160 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1328 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/__init__.py +0 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/mongo/__init__.py +17 -0
  51. agno/db/mongo/async_mongo.py +2026 -0
  52. agno/db/mongo/mongo.py +1982 -0
  53. agno/db/mongo/schemas.py +87 -0
  54. agno/db/mongo/utils.py +259 -0
  55. agno/db/mysql/__init__.py +3 -0
  56. agno/db/mysql/mysql.py +2308 -0
  57. agno/db/mysql/schemas.py +138 -0
  58. agno/db/mysql/utils.py +355 -0
  59. agno/db/postgres/__init__.py +4 -0
  60. agno/db/postgres/async_postgres.py +1927 -0
  61. agno/db/postgres/postgres.py +2260 -0
  62. agno/db/postgres/schemas.py +139 -0
  63. agno/db/postgres/utils.py +442 -0
  64. agno/db/redis/__init__.py +3 -0
  65. agno/db/redis/redis.py +1660 -0
  66. agno/db/redis/schemas.py +123 -0
  67. agno/db/redis/utils.py +346 -0
  68. agno/db/schemas/__init__.py +4 -0
  69. agno/db/schemas/culture.py +120 -0
  70. agno/db/schemas/evals.py +33 -0
  71. agno/db/schemas/knowledge.py +40 -0
  72. agno/db/schemas/memory.py +46 -0
  73. agno/db/schemas/metrics.py +0 -0
  74. agno/db/singlestore/__init__.py +3 -0
  75. agno/db/singlestore/schemas.py +130 -0
  76. agno/db/singlestore/singlestore.py +2272 -0
  77. agno/db/singlestore/utils.py +384 -0
  78. agno/db/sqlite/__init__.py +4 -0
  79. agno/db/sqlite/async_sqlite.py +2293 -0
  80. agno/db/sqlite/schemas.py +133 -0
  81. agno/db/sqlite/sqlite.py +2288 -0
  82. agno/db/sqlite/utils.py +431 -0
  83. agno/db/surrealdb/__init__.py +3 -0
  84. agno/db/surrealdb/metrics.py +292 -0
  85. agno/db/surrealdb/models.py +309 -0
  86. agno/db/surrealdb/queries.py +71 -0
  87. agno/db/surrealdb/surrealdb.py +1353 -0
  88. agno/db/surrealdb/utils.py +147 -0
  89. agno/db/utils.py +116 -0
  90. agno/debug.py +18 -0
  91. agno/eval/__init__.py +14 -0
  92. agno/eval/accuracy.py +834 -0
  93. agno/eval/performance.py +773 -0
  94. agno/eval/reliability.py +306 -0
  95. agno/eval/utils.py +119 -0
  96. agno/exceptions.py +161 -0
  97. agno/filters.py +354 -0
  98. agno/guardrails/__init__.py +6 -0
  99. agno/guardrails/base.py +19 -0
  100. agno/guardrails/openai.py +144 -0
  101. agno/guardrails/pii.py +94 -0
  102. agno/guardrails/prompt_injection.py +52 -0
  103. agno/integrations/__init__.py +0 -0
  104. agno/integrations/discord/__init__.py +3 -0
  105. agno/integrations/discord/client.py +203 -0
  106. agno/knowledge/__init__.py +5 -0
  107. agno/knowledge/chunking/__init__.py +0 -0
  108. agno/knowledge/chunking/agentic.py +79 -0
  109. agno/knowledge/chunking/document.py +91 -0
  110. agno/knowledge/chunking/fixed.py +57 -0
  111. agno/knowledge/chunking/markdown.py +151 -0
  112. agno/knowledge/chunking/recursive.py +63 -0
  113. agno/knowledge/chunking/row.py +39 -0
  114. agno/knowledge/chunking/semantic.py +86 -0
  115. agno/knowledge/chunking/strategy.py +165 -0
  116. agno/knowledge/content.py +74 -0
  117. agno/knowledge/document/__init__.py +5 -0
  118. agno/knowledge/document/base.py +58 -0
  119. agno/knowledge/embedder/__init__.py +5 -0
  120. agno/knowledge/embedder/aws_bedrock.py +343 -0
  121. agno/knowledge/embedder/azure_openai.py +210 -0
  122. agno/knowledge/embedder/base.py +23 -0
  123. agno/knowledge/embedder/cohere.py +323 -0
  124. agno/knowledge/embedder/fastembed.py +62 -0
  125. agno/knowledge/embedder/fireworks.py +13 -0
  126. agno/knowledge/embedder/google.py +258 -0
  127. agno/knowledge/embedder/huggingface.py +94 -0
  128. agno/knowledge/embedder/jina.py +182 -0
  129. agno/knowledge/embedder/langdb.py +22 -0
  130. agno/knowledge/embedder/mistral.py +206 -0
  131. agno/knowledge/embedder/nebius.py +13 -0
  132. agno/knowledge/embedder/ollama.py +154 -0
  133. agno/knowledge/embedder/openai.py +195 -0
  134. agno/knowledge/embedder/sentence_transformer.py +63 -0
  135. agno/knowledge/embedder/together.py +13 -0
  136. agno/knowledge/embedder/vllm.py +262 -0
  137. agno/knowledge/embedder/voyageai.py +165 -0
  138. agno/knowledge/knowledge.py +1988 -0
  139. agno/knowledge/reader/__init__.py +7 -0
  140. agno/knowledge/reader/arxiv_reader.py +81 -0
  141. agno/knowledge/reader/base.py +95 -0
  142. agno/knowledge/reader/csv_reader.py +166 -0
  143. agno/knowledge/reader/docx_reader.py +82 -0
  144. agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
  145. agno/knowledge/reader/firecrawl_reader.py +201 -0
  146. agno/knowledge/reader/json_reader.py +87 -0
  147. agno/knowledge/reader/markdown_reader.py +137 -0
  148. agno/knowledge/reader/pdf_reader.py +431 -0
  149. agno/knowledge/reader/pptx_reader.py +101 -0
  150. agno/knowledge/reader/reader_factory.py +313 -0
  151. agno/knowledge/reader/s3_reader.py +89 -0
  152. agno/knowledge/reader/tavily_reader.py +194 -0
  153. agno/knowledge/reader/text_reader.py +115 -0
  154. agno/knowledge/reader/web_search_reader.py +372 -0
  155. agno/knowledge/reader/website_reader.py +455 -0
  156. agno/knowledge/reader/wikipedia_reader.py +59 -0
  157. agno/knowledge/reader/youtube_reader.py +78 -0
  158. agno/knowledge/remote_content/__init__.py +0 -0
  159. agno/knowledge/remote_content/remote_content.py +88 -0
  160. agno/knowledge/reranker/__init__.py +3 -0
  161. agno/knowledge/reranker/base.py +14 -0
  162. agno/knowledge/reranker/cohere.py +64 -0
  163. agno/knowledge/reranker/infinity.py +195 -0
  164. agno/knowledge/reranker/sentence_transformer.py +54 -0
  165. agno/knowledge/types.py +39 -0
  166. agno/knowledge/utils.py +189 -0
  167. agno/media.py +462 -0
  168. agno/memory/__init__.py +3 -0
  169. agno/memory/manager.py +1327 -0
  170. agno/models/__init__.py +0 -0
  171. agno/models/aimlapi/__init__.py +5 -0
  172. agno/models/aimlapi/aimlapi.py +45 -0
  173. agno/models/anthropic/__init__.py +5 -0
  174. agno/models/anthropic/claude.py +757 -0
  175. agno/models/aws/__init__.py +15 -0
  176. agno/models/aws/bedrock.py +701 -0
  177. agno/models/aws/claude.py +378 -0
  178. agno/models/azure/__init__.py +18 -0
  179. agno/models/azure/ai_foundry.py +485 -0
  180. agno/models/azure/openai_chat.py +131 -0
  181. agno/models/base.py +2175 -0
  182. agno/models/cerebras/__init__.py +12 -0
  183. agno/models/cerebras/cerebras.py +501 -0
  184. agno/models/cerebras/cerebras_openai.py +112 -0
  185. agno/models/cohere/__init__.py +5 -0
  186. agno/models/cohere/chat.py +389 -0
  187. agno/models/cometapi/__init__.py +5 -0
  188. agno/models/cometapi/cometapi.py +57 -0
  189. agno/models/dashscope/__init__.py +5 -0
  190. agno/models/dashscope/dashscope.py +91 -0
  191. agno/models/deepinfra/__init__.py +5 -0
  192. agno/models/deepinfra/deepinfra.py +28 -0
  193. agno/models/deepseek/__init__.py +5 -0
  194. agno/models/deepseek/deepseek.py +61 -0
  195. agno/models/defaults.py +1 -0
  196. agno/models/fireworks/__init__.py +5 -0
  197. agno/models/fireworks/fireworks.py +26 -0
  198. agno/models/google/__init__.py +5 -0
  199. agno/models/google/gemini.py +1085 -0
  200. agno/models/groq/__init__.py +5 -0
  201. agno/models/groq/groq.py +556 -0
  202. agno/models/huggingface/__init__.py +5 -0
  203. agno/models/huggingface/huggingface.py +491 -0
  204. agno/models/ibm/__init__.py +5 -0
  205. agno/models/ibm/watsonx.py +422 -0
  206. agno/models/internlm/__init__.py +3 -0
  207. agno/models/internlm/internlm.py +26 -0
  208. agno/models/langdb/__init__.py +1 -0
  209. agno/models/langdb/langdb.py +48 -0
  210. agno/models/litellm/__init__.py +14 -0
  211. agno/models/litellm/chat.py +468 -0
  212. agno/models/litellm/litellm_openai.py +25 -0
  213. agno/models/llama_cpp/__init__.py +5 -0
  214. agno/models/llama_cpp/llama_cpp.py +22 -0
  215. agno/models/lmstudio/__init__.py +5 -0
  216. agno/models/lmstudio/lmstudio.py +25 -0
  217. agno/models/message.py +434 -0
  218. agno/models/meta/__init__.py +12 -0
  219. agno/models/meta/llama.py +475 -0
  220. agno/models/meta/llama_openai.py +78 -0
  221. agno/models/metrics.py +120 -0
  222. agno/models/mistral/__init__.py +5 -0
  223. agno/models/mistral/mistral.py +432 -0
  224. agno/models/nebius/__init__.py +3 -0
  225. agno/models/nebius/nebius.py +54 -0
  226. agno/models/nexus/__init__.py +3 -0
  227. agno/models/nexus/nexus.py +22 -0
  228. agno/models/nvidia/__init__.py +5 -0
  229. agno/models/nvidia/nvidia.py +28 -0
  230. agno/models/ollama/__init__.py +5 -0
  231. agno/models/ollama/chat.py +441 -0
  232. agno/models/openai/__init__.py +9 -0
  233. agno/models/openai/chat.py +883 -0
  234. agno/models/openai/like.py +27 -0
  235. agno/models/openai/responses.py +1050 -0
  236. agno/models/openrouter/__init__.py +5 -0
  237. agno/models/openrouter/openrouter.py +66 -0
  238. agno/models/perplexity/__init__.py +5 -0
  239. agno/models/perplexity/perplexity.py +187 -0
  240. agno/models/portkey/__init__.py +3 -0
  241. agno/models/portkey/portkey.py +81 -0
  242. agno/models/requesty/__init__.py +5 -0
  243. agno/models/requesty/requesty.py +52 -0
  244. agno/models/response.py +199 -0
  245. agno/models/sambanova/__init__.py +5 -0
  246. agno/models/sambanova/sambanova.py +28 -0
  247. agno/models/siliconflow/__init__.py +5 -0
  248. agno/models/siliconflow/siliconflow.py +25 -0
  249. agno/models/together/__init__.py +5 -0
  250. agno/models/together/together.py +25 -0
  251. agno/models/utils.py +266 -0
  252. agno/models/vercel/__init__.py +3 -0
  253. agno/models/vercel/v0.py +26 -0
  254. agno/models/vertexai/__init__.py +0 -0
  255. agno/models/vertexai/claude.py +70 -0
  256. agno/models/vllm/__init__.py +3 -0
  257. agno/models/vllm/vllm.py +78 -0
  258. agno/models/xai/__init__.py +3 -0
  259. agno/models/xai/xai.py +113 -0
  260. agno/os/__init__.py +3 -0
  261. agno/os/app.py +876 -0
  262. agno/os/auth.py +57 -0
  263. agno/os/config.py +104 -0
  264. agno/os/interfaces/__init__.py +1 -0
  265. agno/os/interfaces/a2a/__init__.py +3 -0
  266. agno/os/interfaces/a2a/a2a.py +42 -0
  267. agno/os/interfaces/a2a/router.py +250 -0
  268. agno/os/interfaces/a2a/utils.py +924 -0
  269. agno/os/interfaces/agui/__init__.py +3 -0
  270. agno/os/interfaces/agui/agui.py +47 -0
  271. agno/os/interfaces/agui/router.py +144 -0
  272. agno/os/interfaces/agui/utils.py +534 -0
  273. agno/os/interfaces/base.py +25 -0
  274. agno/os/interfaces/slack/__init__.py +3 -0
  275. agno/os/interfaces/slack/router.py +148 -0
  276. agno/os/interfaces/slack/security.py +30 -0
  277. agno/os/interfaces/slack/slack.py +47 -0
  278. agno/os/interfaces/whatsapp/__init__.py +3 -0
  279. agno/os/interfaces/whatsapp/router.py +211 -0
  280. agno/os/interfaces/whatsapp/security.py +53 -0
  281. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  282. agno/os/mcp.py +292 -0
  283. agno/os/middleware/__init__.py +7 -0
  284. agno/os/middleware/jwt.py +233 -0
  285. agno/os/router.py +1763 -0
  286. agno/os/routers/__init__.py +3 -0
  287. agno/os/routers/evals/__init__.py +3 -0
  288. agno/os/routers/evals/evals.py +430 -0
  289. agno/os/routers/evals/schemas.py +142 -0
  290. agno/os/routers/evals/utils.py +162 -0
  291. agno/os/routers/health.py +31 -0
  292. agno/os/routers/home.py +52 -0
  293. agno/os/routers/knowledge/__init__.py +3 -0
  294. agno/os/routers/knowledge/knowledge.py +997 -0
  295. agno/os/routers/knowledge/schemas.py +178 -0
  296. agno/os/routers/memory/__init__.py +3 -0
  297. agno/os/routers/memory/memory.py +515 -0
  298. agno/os/routers/memory/schemas.py +62 -0
  299. agno/os/routers/metrics/__init__.py +3 -0
  300. agno/os/routers/metrics/metrics.py +190 -0
  301. agno/os/routers/metrics/schemas.py +47 -0
  302. agno/os/routers/session/__init__.py +3 -0
  303. agno/os/routers/session/session.py +997 -0
  304. agno/os/schema.py +1055 -0
  305. agno/os/settings.py +43 -0
  306. agno/os/utils.py +630 -0
  307. agno/py.typed +0 -0
  308. agno/reasoning/__init__.py +0 -0
  309. agno/reasoning/anthropic.py +80 -0
  310. agno/reasoning/azure_ai_foundry.py +67 -0
  311. agno/reasoning/deepseek.py +63 -0
  312. agno/reasoning/default.py +97 -0
  313. agno/reasoning/gemini.py +73 -0
  314. agno/reasoning/groq.py +71 -0
  315. agno/reasoning/helpers.py +63 -0
  316. agno/reasoning/ollama.py +67 -0
  317. agno/reasoning/openai.py +86 -0
  318. agno/reasoning/step.py +31 -0
  319. agno/reasoning/vertexai.py +76 -0
  320. agno/run/__init__.py +6 -0
  321. agno/run/agent.py +787 -0
  322. agno/run/base.py +229 -0
  323. agno/run/cancel.py +81 -0
  324. agno/run/messages.py +32 -0
  325. agno/run/team.py +753 -0
  326. agno/run/workflow.py +708 -0
  327. agno/session/__init__.py +10 -0
  328. agno/session/agent.py +295 -0
  329. agno/session/summary.py +265 -0
  330. agno/session/team.py +392 -0
  331. agno/session/workflow.py +205 -0
  332. agno/team/__init__.py +37 -0
  333. agno/team/team.py +8793 -0
  334. agno/tools/__init__.py +10 -0
  335. agno/tools/agentql.py +120 -0
  336. agno/tools/airflow.py +69 -0
  337. agno/tools/api.py +122 -0
  338. agno/tools/apify.py +314 -0
  339. agno/tools/arxiv.py +127 -0
  340. agno/tools/aws_lambda.py +53 -0
  341. agno/tools/aws_ses.py +66 -0
  342. agno/tools/baidusearch.py +89 -0
  343. agno/tools/bitbucket.py +292 -0
  344. agno/tools/brandfetch.py +213 -0
  345. agno/tools/bravesearch.py +106 -0
  346. agno/tools/brightdata.py +367 -0
  347. agno/tools/browserbase.py +209 -0
  348. agno/tools/calcom.py +255 -0
  349. agno/tools/calculator.py +151 -0
  350. agno/tools/cartesia.py +187 -0
  351. agno/tools/clickup.py +244 -0
  352. agno/tools/confluence.py +240 -0
  353. agno/tools/crawl4ai.py +158 -0
  354. agno/tools/csv_toolkit.py +185 -0
  355. agno/tools/dalle.py +110 -0
  356. agno/tools/daytona.py +475 -0
  357. agno/tools/decorator.py +262 -0
  358. agno/tools/desi_vocal.py +108 -0
  359. agno/tools/discord.py +161 -0
  360. agno/tools/docker.py +716 -0
  361. agno/tools/duckdb.py +379 -0
  362. agno/tools/duckduckgo.py +91 -0
  363. agno/tools/e2b.py +703 -0
  364. agno/tools/eleven_labs.py +196 -0
  365. agno/tools/email.py +67 -0
  366. agno/tools/evm.py +129 -0
  367. agno/tools/exa.py +396 -0
  368. agno/tools/fal.py +127 -0
  369. agno/tools/file.py +240 -0
  370. agno/tools/file_generation.py +350 -0
  371. agno/tools/financial_datasets.py +288 -0
  372. agno/tools/firecrawl.py +143 -0
  373. agno/tools/function.py +1187 -0
  374. agno/tools/giphy.py +93 -0
  375. agno/tools/github.py +1760 -0
  376. agno/tools/gmail.py +922 -0
  377. agno/tools/google_bigquery.py +117 -0
  378. agno/tools/google_drive.py +270 -0
  379. agno/tools/google_maps.py +253 -0
  380. agno/tools/googlecalendar.py +674 -0
  381. agno/tools/googlesearch.py +98 -0
  382. agno/tools/googlesheets.py +377 -0
  383. agno/tools/hackernews.py +77 -0
  384. agno/tools/jina.py +101 -0
  385. agno/tools/jira.py +170 -0
  386. agno/tools/knowledge.py +218 -0
  387. agno/tools/linear.py +426 -0
  388. agno/tools/linkup.py +58 -0
  389. agno/tools/local_file_system.py +90 -0
  390. agno/tools/lumalab.py +183 -0
  391. agno/tools/mcp/__init__.py +10 -0
  392. agno/tools/mcp/mcp.py +331 -0
  393. agno/tools/mcp/multi_mcp.py +347 -0
  394. agno/tools/mcp/params.py +24 -0
  395. agno/tools/mcp_toolbox.py +284 -0
  396. agno/tools/mem0.py +193 -0
  397. agno/tools/memori.py +339 -0
  398. agno/tools/memory.py +419 -0
  399. agno/tools/mlx_transcribe.py +139 -0
  400. agno/tools/models/__init__.py +0 -0
  401. agno/tools/models/azure_openai.py +190 -0
  402. agno/tools/models/gemini.py +203 -0
  403. agno/tools/models/groq.py +158 -0
  404. agno/tools/models/morph.py +186 -0
  405. agno/tools/models/nebius.py +124 -0
  406. agno/tools/models_labs.py +195 -0
  407. agno/tools/moviepy_video.py +349 -0
  408. agno/tools/neo4j.py +134 -0
  409. agno/tools/newspaper.py +46 -0
  410. agno/tools/newspaper4k.py +93 -0
  411. agno/tools/notion.py +204 -0
  412. agno/tools/openai.py +202 -0
  413. agno/tools/openbb.py +160 -0
  414. agno/tools/opencv.py +321 -0
  415. agno/tools/openweather.py +233 -0
  416. agno/tools/oxylabs.py +385 -0
  417. agno/tools/pandas.py +102 -0
  418. agno/tools/parallel.py +314 -0
  419. agno/tools/postgres.py +257 -0
  420. agno/tools/pubmed.py +188 -0
  421. agno/tools/python.py +205 -0
  422. agno/tools/reasoning.py +283 -0
  423. agno/tools/reddit.py +467 -0
  424. agno/tools/replicate.py +117 -0
  425. agno/tools/resend.py +62 -0
  426. agno/tools/scrapegraph.py +222 -0
  427. agno/tools/searxng.py +152 -0
  428. agno/tools/serpapi.py +116 -0
  429. agno/tools/serper.py +255 -0
  430. agno/tools/shell.py +53 -0
  431. agno/tools/slack.py +136 -0
  432. agno/tools/sleep.py +20 -0
  433. agno/tools/spider.py +116 -0
  434. agno/tools/sql.py +154 -0
  435. agno/tools/streamlit/__init__.py +0 -0
  436. agno/tools/streamlit/components.py +113 -0
  437. agno/tools/tavily.py +254 -0
  438. agno/tools/telegram.py +48 -0
  439. agno/tools/todoist.py +218 -0
  440. agno/tools/tool_registry.py +1 -0
  441. agno/tools/toolkit.py +146 -0
  442. agno/tools/trafilatura.py +388 -0
  443. agno/tools/trello.py +274 -0
  444. agno/tools/twilio.py +186 -0
  445. agno/tools/user_control_flow.py +78 -0
  446. agno/tools/valyu.py +228 -0
  447. agno/tools/visualization.py +467 -0
  448. agno/tools/webbrowser.py +28 -0
  449. agno/tools/webex.py +76 -0
  450. agno/tools/website.py +54 -0
  451. agno/tools/webtools.py +45 -0
  452. agno/tools/whatsapp.py +286 -0
  453. agno/tools/wikipedia.py +63 -0
  454. agno/tools/workflow.py +278 -0
  455. agno/tools/x.py +335 -0
  456. agno/tools/yfinance.py +257 -0
  457. agno/tools/youtube.py +184 -0
  458. agno/tools/zendesk.py +82 -0
  459. agno/tools/zep.py +454 -0
  460. agno/tools/zoom.py +382 -0
  461. agno/utils/__init__.py +0 -0
  462. agno/utils/agent.py +820 -0
  463. agno/utils/audio.py +49 -0
  464. agno/utils/certs.py +27 -0
  465. agno/utils/code_execution.py +11 -0
  466. agno/utils/common.py +132 -0
  467. agno/utils/dttm.py +13 -0
  468. agno/utils/enum.py +22 -0
  469. agno/utils/env.py +11 -0
  470. agno/utils/events.py +696 -0
  471. agno/utils/format_str.py +16 -0
  472. agno/utils/functions.py +166 -0
  473. agno/utils/gemini.py +426 -0
  474. agno/utils/hooks.py +57 -0
  475. agno/utils/http.py +74 -0
  476. agno/utils/json_schema.py +234 -0
  477. agno/utils/knowledge.py +36 -0
  478. agno/utils/location.py +19 -0
  479. agno/utils/log.py +255 -0
  480. agno/utils/mcp.py +214 -0
  481. agno/utils/media.py +352 -0
  482. agno/utils/merge_dict.py +41 -0
  483. agno/utils/message.py +118 -0
  484. agno/utils/models/__init__.py +0 -0
  485. agno/utils/models/ai_foundry.py +43 -0
  486. agno/utils/models/claude.py +358 -0
  487. agno/utils/models/cohere.py +87 -0
  488. agno/utils/models/llama.py +78 -0
  489. agno/utils/models/mistral.py +98 -0
  490. agno/utils/models/openai_responses.py +140 -0
  491. agno/utils/models/schema_utils.py +153 -0
  492. agno/utils/models/watsonx.py +41 -0
  493. agno/utils/openai.py +257 -0
  494. agno/utils/pickle.py +32 -0
  495. agno/utils/pprint.py +178 -0
  496. agno/utils/print_response/__init__.py +0 -0
  497. agno/utils/print_response/agent.py +842 -0
  498. agno/utils/print_response/team.py +1724 -0
  499. agno/utils/print_response/workflow.py +1668 -0
  500. agno/utils/prompts.py +111 -0
  501. agno/utils/reasoning.py +108 -0
  502. agno/utils/response.py +163 -0
  503. agno/utils/response_iterator.py +17 -0
  504. agno/utils/safe_formatter.py +24 -0
  505. agno/utils/serialize.py +32 -0
  506. agno/utils/shell.py +22 -0
  507. agno/utils/streamlit.py +487 -0
  508. agno/utils/string.py +231 -0
  509. agno/utils/team.py +139 -0
  510. agno/utils/timer.py +41 -0
  511. agno/utils/tools.py +102 -0
  512. agno/utils/web.py +23 -0
  513. agno/utils/whatsapp.py +305 -0
  514. agno/utils/yaml_io.py +25 -0
  515. agno/vectordb/__init__.py +3 -0
  516. agno/vectordb/base.py +127 -0
  517. agno/vectordb/cassandra/__init__.py +5 -0
  518. agno/vectordb/cassandra/cassandra.py +501 -0
  519. agno/vectordb/cassandra/extra_param_mixin.py +11 -0
  520. agno/vectordb/cassandra/index.py +13 -0
  521. agno/vectordb/chroma/__init__.py +5 -0
  522. agno/vectordb/chroma/chromadb.py +929 -0
  523. agno/vectordb/clickhouse/__init__.py +9 -0
  524. agno/vectordb/clickhouse/clickhousedb.py +835 -0
  525. agno/vectordb/clickhouse/index.py +9 -0
  526. agno/vectordb/couchbase/__init__.py +3 -0
  527. agno/vectordb/couchbase/couchbase.py +1442 -0
  528. agno/vectordb/distance.py +7 -0
  529. agno/vectordb/lancedb/__init__.py +6 -0
  530. agno/vectordb/lancedb/lance_db.py +995 -0
  531. agno/vectordb/langchaindb/__init__.py +5 -0
  532. agno/vectordb/langchaindb/langchaindb.py +163 -0
  533. agno/vectordb/lightrag/__init__.py +5 -0
  534. agno/vectordb/lightrag/lightrag.py +388 -0
  535. agno/vectordb/llamaindex/__init__.py +3 -0
  536. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  537. agno/vectordb/milvus/__init__.py +4 -0
  538. agno/vectordb/milvus/milvus.py +1182 -0
  539. agno/vectordb/mongodb/__init__.py +9 -0
  540. agno/vectordb/mongodb/mongodb.py +1417 -0
  541. agno/vectordb/pgvector/__init__.py +12 -0
  542. agno/vectordb/pgvector/index.py +23 -0
  543. agno/vectordb/pgvector/pgvector.py +1462 -0
  544. agno/vectordb/pineconedb/__init__.py +5 -0
  545. agno/vectordb/pineconedb/pineconedb.py +747 -0
  546. agno/vectordb/qdrant/__init__.py +5 -0
  547. agno/vectordb/qdrant/qdrant.py +1134 -0
  548. agno/vectordb/redis/__init__.py +9 -0
  549. agno/vectordb/redis/redisdb.py +694 -0
  550. agno/vectordb/search.py +7 -0
  551. agno/vectordb/singlestore/__init__.py +10 -0
  552. agno/vectordb/singlestore/index.py +41 -0
  553. agno/vectordb/singlestore/singlestore.py +763 -0
  554. agno/vectordb/surrealdb/__init__.py +3 -0
  555. agno/vectordb/surrealdb/surrealdb.py +699 -0
  556. agno/vectordb/upstashdb/__init__.py +5 -0
  557. agno/vectordb/upstashdb/upstashdb.py +718 -0
  558. agno/vectordb/weaviate/__init__.py +8 -0
  559. agno/vectordb/weaviate/index.py +15 -0
  560. agno/vectordb/weaviate/weaviate.py +1005 -0
  561. agno/workflow/__init__.py +23 -0
  562. agno/workflow/agent.py +299 -0
  563. agno/workflow/condition.py +738 -0
  564. agno/workflow/loop.py +735 -0
  565. agno/workflow/parallel.py +824 -0
  566. agno/workflow/router.py +702 -0
  567. agno/workflow/step.py +1432 -0
  568. agno/workflow/steps.py +592 -0
  569. agno/workflow/types.py +520 -0
  570. agno/workflow/workflow.py +4321 -0
  571. agno-2.2.13.dist-info/METADATA +614 -0
  572. agno-2.2.13.dist-info/RECORD +575 -0
  573. agno-2.2.13.dist-info/WHEEL +5 -0
  574. agno-2.2.13.dist-info/licenses/LICENSE +201 -0
  575. agno-2.2.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1442 @@
1
+ import asyncio
2
+ import time
3
+ from datetime import timedelta
4
+ from typing import Any, Dict, List, Optional, Union
5
+
6
+ from agno.filters import FilterExpr
7
+ from agno.knowledge.document import Document
8
+ from agno.knowledge.embedder import Embedder
9
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
10
+ from agno.utils.log import log_debug, log_info, log_warning, logger
11
+ from agno.vectordb.base import VectorDb
12
+
13
+ try:
14
+ from hashlib import md5
15
+
16
+ except ImportError:
17
+ raise ImportError("`hashlib` not installed. Please install using `pip install hashlib`")
18
+ try:
19
+ from acouchbase.bucket import AsyncBucket
20
+ from acouchbase.cluster import AsyncCluster
21
+ from acouchbase.collection import AsyncCollection
22
+ from acouchbase.management.search import (
23
+ ScopeSearchIndexManager as AsyncScopeSearchIndexManager,
24
+ )
25
+ from acouchbase.management.search import (
26
+ SearchIndex as AsyncSearchIndex,
27
+ )
28
+ from acouchbase.management.search import (
29
+ SearchIndexManager as AsyncSearchIndexManager,
30
+ )
31
+ from acouchbase.scope import AsyncScope
32
+ from couchbase.bucket import Bucket
33
+ from couchbase.cluster import Cluster
34
+ from couchbase.collection import Collection
35
+ from couchbase.exceptions import (
36
+ CollectionAlreadyExistsException,
37
+ CollectionNotFoundException,
38
+ ScopeAlreadyExistsException,
39
+ SearchIndexNotFoundException,
40
+ )
41
+ from couchbase.management.search import ScopeSearchIndexManager, SearchIndex, SearchIndexManager
42
+ from couchbase.n1ql import QueryScanConsistency
43
+ from couchbase.options import ClusterOptions, QueryOptions, SearchOptions
44
+ from couchbase.result import SearchResult
45
+ from couchbase.scope import Scope
46
+ from couchbase.search import SearchRequest
47
+ from couchbase.vector_search import VectorQuery, VectorSearch
48
+ except ImportError:
49
+ raise ImportError("`couchbase` not installed. Please install using `pip install couchbase`")
50
+
51
+
52
+ class CouchbaseSearch(VectorDb):
53
+ """
54
+ Couchbase Vector Database implementation with FTS (Full Text Search) index support.
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ bucket_name: str,
60
+ scope_name: str,
61
+ collection_name: str,
62
+ couchbase_connection_string: str,
63
+ cluster_options: ClusterOptions,
64
+ search_index: Union[str, SearchIndex],
65
+ embedder: Embedder = OpenAIEmbedder(),
66
+ overwrite: bool = False,
67
+ is_global_level_index: bool = False,
68
+ wait_until_index_ready: float = 0,
69
+ batch_limit: int = 500,
70
+ name: Optional[str] = None,
71
+ description: Optional[str] = None,
72
+ **kwargs,
73
+ ):
74
+ """
75
+ Initialize the CouchbaseSearch with Couchbase connection details.
76
+
77
+ Args:
78
+ bucket_name (str): Name of the Couchbase bucket.
79
+ scope_name (str): Name of the scope within the bucket.
80
+ collection_name (str): Name of the collection within the scope.
81
+ name (Optional[str]): Name of the vector database.
82
+ description (Optional[str]): Description of the vector database.
83
+ couchbase_connection_string (str): Couchbase connection string.
84
+ cluster_options (ClusterOptions): Options for configuring the Couchbase cluster connection.
85
+ search_index (Union[str, SearchIndex], optional): Search index configuration, either as index name or SearchIndex definition.
86
+ embedder (Embedder): Embedder instance for generating embeddings. Defaults to OpenAIEmbedder.
87
+ overwrite (bool): Whether to overwrite existing collection. Defaults to False.
88
+ wait_until_index_ready (float, optional): Time in seconds to wait until the index is ready. Defaults to 0.
89
+ batch_limit (int, optional): Maximum number of documents to process in a single batch (applies to both sync and async operations). Defaults to 500.
90
+ **kwargs: Additional arguments for Couchbase connection.
91
+ """
92
+ if not bucket_name:
93
+ raise ValueError("Bucket name must not be empty.")
94
+
95
+ self.bucket_name = bucket_name
96
+ self.scope_name = scope_name
97
+ self.collection_name = collection_name
98
+ self.connection_string = couchbase_connection_string
99
+ self.cluster_options = cluster_options
100
+ self.embedder = embedder
101
+ self.overwrite = overwrite
102
+ self.is_global_level_index = is_global_level_index
103
+ self.wait_until_index_ready = wait_until_index_ready
104
+ # Initialize base class with name and description
105
+ super().__init__(name=name, description=description)
106
+
107
+ self.kwargs = kwargs
108
+ self.batch_limit = batch_limit
109
+ if isinstance(search_index, str):
110
+ self.search_index_name = search_index
111
+ self.search_index_definition = None
112
+ else:
113
+ self.search_index_name = search_index.name
114
+ self.search_index_definition = search_index
115
+
116
+ self._cluster: Optional[Cluster] = None
117
+ self._bucket: Optional[Bucket] = None
118
+ self._scope: Optional[Scope] = None
119
+ self._collection: Optional[Collection] = None
120
+
121
+ self._async_cluster: Optional[AsyncCluster] = None
122
+ self._async_bucket: Optional[AsyncBucket] = None
123
+ self._async_scope: Optional[AsyncScope] = None
124
+ self._async_collection: Optional[AsyncCollection] = None
125
+
126
+ @property
127
+ def cluster(self) -> Cluster:
128
+ """Create or retrieve the Couchbase cluster connection."""
129
+ if self._cluster is None:
130
+ try:
131
+ logger.debug("Creating Couchbase Cluster connection")
132
+ cluster = Cluster(self.connection_string, self.cluster_options)
133
+ # Verify connection
134
+ cluster.wait_until_ready(timeout=timedelta(seconds=60))
135
+ logger.info("Connected to Couchbase successfully.")
136
+ self._cluster = cluster
137
+ except Exception as e:
138
+ logger.error(f"Failed to connect to Couchbase: {e}")
139
+ raise ConnectionError(f"Failed to connect to Couchbase: {e}")
140
+ return self._cluster
141
+
142
+ @property
143
+ def bucket(self) -> Bucket:
144
+ """Get the Couchbase bucket."""
145
+ if self._bucket is None:
146
+ self._bucket = self.cluster.bucket(self.bucket_name)
147
+ return self._bucket
148
+
149
+ @property
150
+ def scope(self) -> Scope:
151
+ """Get the Couchbase scope."""
152
+ if self._scope is None:
153
+ self._scope = self.bucket.scope(self.scope_name)
154
+ return self._scope
155
+
156
+ @property
157
+ def collection(self) -> Collection:
158
+ """Get the Couchbase collection."""
159
+ if self._collection is None:
160
+ self._collection = self.scope.collection(self.collection_name)
161
+ return self._collection
162
+
163
+ def _create_collection_and_scope(self):
164
+ """
165
+ Get or create the scope and collection within the bucket.
166
+
167
+ Uses EAFP principle: attempts to create scope/collection and handles
168
+ specific exceptions if they already exist or (for collections with overwrite=True)
169
+ if they are not found for dropping.
170
+
171
+ Raises:
172
+ Exception: If scope or collection creation/manipulation fails unexpectedly.
173
+ """
174
+ # 1. Ensure Scope Exists
175
+ try:
176
+ self.bucket.collections().create_scope(scope_name=self.scope_name)
177
+ logger.info(f"Created new scope '{self.scope_name}'")
178
+ except ScopeAlreadyExistsException:
179
+ logger.info(f"Scope '{self.scope_name}' already exists. Using existing scope.")
180
+ except Exception as e:
181
+ logger.error(f"Failed to create or ensure scope '{self.scope_name}' exists: {e}")
182
+ raise
183
+
184
+ collection_manager = self.bucket.collections()
185
+
186
+ # 2. Handle Collection
187
+ if self.overwrite:
188
+ # Attempt to drop the collection first since overwrite is True
189
+ try:
190
+ logger.info(
191
+ f"Overwrite is True. Attempting to drop collection '{self.collection_name}' in scope '{self.scope_name}'."
192
+ )
193
+ collection_manager.drop_collection(collection_name=self.collection_name, scope_name=self.scope_name)
194
+ logger.info(f"Successfully dropped collection '{self.collection_name}'.")
195
+ time.sleep(1) # Brief wait after drop, as in original code
196
+ except CollectionNotFoundException:
197
+ logger.info(
198
+ f"Collection '{self.collection_name}' not found in scope '{self.scope_name}'. No need to drop."
199
+ )
200
+ except Exception as e:
201
+ logger.error(f"Error dropping collection '{self.collection_name}' during overwrite: {e}")
202
+ raise
203
+
204
+ # Proceed to create the collection
205
+ try:
206
+ logger.info(f"Creating collection '{self.collection_name}' in scope '{self.scope_name}'.")
207
+ collection_manager.create_collection(scope_name=self.scope_name, collection_name=self.collection_name)
208
+ logger.info(
209
+ f"Successfully created collection '{self.collection_name}' after drop attempt (overwrite=True)."
210
+ )
211
+ except CollectionAlreadyExistsException:
212
+ # This is an unexpected state if overwrite=True and drop was supposed to clear the way.
213
+ logger.error(
214
+ f"Failed to create collection '{self.collection_name}' as it already exists, "
215
+ f"even after drop attempt for overwrite. Overwrite operation may not have completed as intended."
216
+ )
217
+ raise # Re-raise as the overwrite intent failed
218
+ except Exception as e:
219
+ logger.error(
220
+ f"Error creating collection '{self.collection_name}' after drop attempt (overwrite=True): {e}"
221
+ )
222
+ raise
223
+ else: # self.overwrite is False
224
+ try:
225
+ logger.info(
226
+ f"Overwrite is False. Attempting to create collection '{self.collection_name}' in scope '{self.scope_name}'."
227
+ )
228
+ collection_manager.create_collection(scope_name=self.scope_name, collection_name=self.collection_name)
229
+ logger.info(f"Successfully created new collection '{self.collection_name}'.")
230
+ except CollectionAlreadyExistsException:
231
+ logger.info(
232
+ f"Collection '{self.collection_name}' already exists in scope '{self.scope_name}'. Using existing collection."
233
+ )
234
+ except Exception as e:
235
+ logger.error(f"Error creating collection '{self.collection_name}': {e}")
236
+ raise
237
+
238
+ def _search_indexes_mng(self) -> Union[SearchIndexManager, ScopeSearchIndexManager]:
239
+ """Get the search indexes manager."""
240
+ if self.is_global_level_index:
241
+ return self.cluster.search_indexes()
242
+ else:
243
+ return self.scope.search_indexes()
244
+
245
+ def _create_fts_index(self):
246
+ """Create a FTS index on the collection if it doesn't exist."""
247
+ try:
248
+ # Check if index exists and handle string index name
249
+ self._search_indexes_mng().get_index(self.search_index_name)
250
+ if not self.overwrite:
251
+ return
252
+ except Exception:
253
+ if self.search_index_definition is None:
254
+ raise ValueError(f"Index '{self.search_index_name}' does not exist")
255
+
256
+ # Create or update index
257
+ try:
258
+ if self.overwrite:
259
+ try:
260
+ logger.info(f"Dropping existing FTS index '{self.search_index_name}'")
261
+ self._search_indexes_mng().drop_index(self.search_index_name)
262
+ except SearchIndexNotFoundException:
263
+ logger.warning(f"Index '{self.search_index_name}' does not exist")
264
+ except Exception as e:
265
+ logger.warning(f"Error dropping index (may not exist): {e}")
266
+
267
+ self._search_indexes_mng().upsert_index(self.search_index_definition)
268
+ logger.info(f"Created FTS index '{self.search_index_name}'")
269
+
270
+ if self.wait_until_index_ready:
271
+ self._wait_for_index_ready()
272
+
273
+ except Exception as e:
274
+ logger.error(f"Error creating FTS index '{self.search_index_name}': {e}")
275
+ raise
276
+
277
+ def _wait_for_index_ready(self):
278
+ """Wait until the FTS index is ready."""
279
+ start_time = time.time()
280
+ while True:
281
+ try:
282
+ count = self._search_indexes_mng().get_indexed_documents_count(self.search_index_name)
283
+ if count > -1:
284
+ logger.info(f"FTS index '{self.search_index_name}' is ready")
285
+ break
286
+ # logger.info(f"FTS index '{self.search_index_name}' is not ready yet status: {index['status']}")
287
+ except Exception as e:
288
+ if time.time() - start_time > self.wait_until_index_ready:
289
+ logger.error(f"Error checking index status: {e}")
290
+ raise TimeoutError("Timeout waiting for FTS index to become ready")
291
+ time.sleep(1)
292
+
293
+ def create(self) -> None:
294
+ """Create the collection and FTS index if they don't exist."""
295
+ self._create_collection_and_scope()
296
+ self._create_fts_index()
297
+
298
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
299
+ """
300
+ Insert documents into the Couchbase bucket. Fails if any document already exists.
301
+
302
+ Args:
303
+ documents: List of documents to insert
304
+ filters: Optional filters to apply to the documents
305
+ """
306
+ log_debug(f"Inserting {len(documents)} documents")
307
+
308
+ docs_to_insert: Dict[str, Any] = {}
309
+ for document in documents:
310
+ if document.embedding is None:
311
+ document.embed(embedder=self.embedder)
312
+
313
+ if document.embedding is None:
314
+ raise ValueError(f"Failed to generate embedding for document: {document.name}")
315
+ try:
316
+ doc_data = self.prepare_doc(content_hash, document)
317
+ if filters:
318
+ doc_data["filters"] = filters
319
+ # For insert_multi, the key of the dict is the document ID,
320
+ # and the value is the document content itself.
321
+ doc_id = doc_data.pop("_id")
322
+ docs_to_insert[doc_id] = doc_data
323
+ except Exception as e:
324
+ logger.error(f"Error preparing document '{document.name}': {e}")
325
+
326
+ if not docs_to_insert:
327
+ logger.info("No documents prepared for insertion.")
328
+ return
329
+
330
+ doc_ids = list(docs_to_insert.keys())
331
+ total_inserted_count = 0
332
+ total_processed_count = len(doc_ids)
333
+ errors_occurred = False
334
+
335
+ for i in range(0, len(doc_ids), self.batch_limit):
336
+ batch_doc_ids = doc_ids[i : i + self.batch_limit]
337
+ batch_docs_to_insert = {doc_id: docs_to_insert[doc_id] for doc_id in batch_doc_ids}
338
+
339
+ if not batch_docs_to_insert:
340
+ continue
341
+
342
+ log_debug(f"Inserting batch of {len(batch_docs_to_insert)} documents.")
343
+ try:
344
+ result = self.collection.insert_multi(batch_docs_to_insert)
345
+ # Check for errors in the batch result
346
+ # The actual way to count successes/failures might depend on the SDK version
347
+ # For Couchbase SDK 3.x/4.x, result.all_ok is a good indicator for the whole batch.
348
+ # If not all_ok, result.exceptions (dict) contains errors for specific keys.
349
+
350
+ # Simplistic success counting for this example, assuming partial success is possible
351
+ # and we want to count how many actually made it.
352
+ if result.all_ok:
353
+ batch_inserted_count = len(batch_docs_to_insert)
354
+ logger.info(f"Batch of {batch_inserted_count} documents inserted successfully.")
355
+ else:
356
+ # If not all_ok, count successes by checking which keys are NOT in exceptions
357
+ # This is a more robust way than just len(batch) - len(exceptions)
358
+ # as some items might succeed even if others fail.
359
+ succeeded_ids = set(batch_docs_to_insert.keys()) - set(
360
+ result.exceptions.keys() if result.exceptions else []
361
+ )
362
+ batch_inserted_count = len(succeeded_ids)
363
+ if batch_inserted_count > 0:
364
+ logger.info(f"Partially inserted {batch_inserted_count} documents in batch.")
365
+ logger.warning(f"Bulk write error during batch insert: {result.exceptions}")
366
+ errors_occurred = True
367
+ total_inserted_count += batch_inserted_count
368
+
369
+ except Exception as e:
370
+ logger.error(f"Error during batch bulk insert for {len(batch_docs_to_insert)} documents: {e}")
371
+ errors_occurred = True # Mark that an error occurred in this batch
372
+
373
+ logger.info(f"Finished processing {total_processed_count} documents for insertion.")
374
+ logger.info(f"Total successfully inserted: {total_inserted_count}.")
375
+ if errors_occurred:
376
+ logger.warning("Some errors occurred during the insert operation. Please check logs for details.")
377
+
378
+ def upsert_available(self) -> bool:
379
+ """Check if upsert is available in Couchbase."""
380
+ return True
381
+
382
+ def _upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
383
+ """
384
+ Update existing documents or insert new ones into the Couchbase bucket.
385
+ """
386
+ if self.content_hash_exists(content_hash):
387
+ self._delete_by_content_hash(content_hash)
388
+ self.insert(content_hash=content_hash, documents=documents, filters=filters)
389
+
390
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
391
+ """
392
+ Update existing documents or insert new ones into the Couchbase bucket.
393
+
394
+ Args:
395
+ documents: List of documents to upsert
396
+ filters: Optional filters to apply to the documents
397
+ """
398
+ logger.info(f"Upserting {len(documents)} documents")
399
+
400
+ docs_to_upsert: Dict[str, Any] = {}
401
+ for document in documents:
402
+ try:
403
+ if document.embedding is None:
404
+ document.embed(embedder=self.embedder)
405
+
406
+ if document.embedding is None:
407
+ raise ValueError(f"Failed to generate embedding for document: {document.name}")
408
+
409
+ doc_data = self.prepare_doc(content_hash, document)
410
+ if filters:
411
+ doc_data["filters"] = filters
412
+ # For upsert_multi, the key of the dict is the document ID,
413
+ # and the value is the document content itself.
414
+ doc_id = doc_data.pop("_id")
415
+ docs_to_upsert[doc_id] = doc_data
416
+ except Exception as e:
417
+ logger.error(f"Error preparing document '{document.name}': {e}")
418
+
419
+ if not docs_to_upsert:
420
+ logger.info("No documents prepared for upsert.")
421
+ return
422
+
423
+ doc_ids = list(docs_to_upsert.keys())
424
+ total_upserted_count = 0
425
+ total_processed_count = len(doc_ids)
426
+ errors_occurred = False
427
+
428
+ for i in range(0, len(doc_ids), self.batch_limit):
429
+ batch_doc_ids = doc_ids[i : i + self.batch_limit]
430
+ batch_docs_to_upsert = {doc_id: docs_to_upsert[doc_id] for doc_id in batch_doc_ids}
431
+
432
+ if not batch_docs_to_upsert:
433
+ continue
434
+
435
+ logger.info(f"Upserting batch of {len(batch_docs_to_upsert)} documents.")
436
+ try:
437
+ result = self.collection.upsert_multi(batch_docs_to_upsert)
438
+ # Similar to insert_multi, check for errors in the batch result.
439
+ if result.all_ok:
440
+ batch_upserted_count = len(batch_docs_to_upsert)
441
+ logger.info(f"Batch of {batch_upserted_count} documents upserted successfully.")
442
+ else:
443
+ succeeded_ids = set(batch_docs_to_upsert.keys()) - set(
444
+ result.exceptions.keys() if result.exceptions else []
445
+ )
446
+ batch_upserted_count = len(succeeded_ids)
447
+ if batch_upserted_count > 0:
448
+ logger.info(f"Partially upserted {batch_upserted_count} documents in batch.")
449
+ logger.warning(f"Bulk write error during batch upsert: {result.exceptions}")
450
+ errors_occurred = True
451
+ total_upserted_count += batch_upserted_count
452
+
453
+ except Exception as e:
454
+ logger.error(f"Error during batch bulk upsert for {len(batch_docs_to_upsert)} documents: {e}")
455
+ errors_occurred = True
456
+
457
+ logger.info(f"Finished processing {total_processed_count} documents for upsert.")
458
+ logger.info(f"Total successfully upserted: {total_upserted_count}.")
459
+ if errors_occurred:
460
+ logger.warning("Some errors occurred during the upsert operation. Please check logs for details.")
461
+
462
+ def search(
463
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
464
+ ) -> List[Document]:
465
+ if isinstance(filters, List):
466
+ log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
467
+ filters = None
468
+ """Search the Couchbase bucket for documents relevant to the query."""
469
+ query_embedding = self.embedder.get_embedding(query)
470
+ if query_embedding is None:
471
+ logger.error(f"Failed to generate embedding for query: {query}")
472
+ return []
473
+
474
+ try:
475
+ # Implement vector search using Couchbase FTS
476
+ vector_search = VectorSearch.from_vector_query(
477
+ VectorQuery(field_name="embedding", vector=query_embedding, num_candidates=limit)
478
+ )
479
+ request = SearchRequest.create(vector_search)
480
+
481
+ # Prepare the options dictionary
482
+ options_dict = {"limit": limit, "fields": ["*"]}
483
+ if filters:
484
+ options_dict["raw"] = filters
485
+
486
+ search_args = {
487
+ "index": self.search_index_name,
488
+ "request": request,
489
+ "options": SearchOptions(**options_dict), # Construct SearchOptions with the dictionary
490
+ }
491
+
492
+ if self.is_global_level_index:
493
+ results = self.cluster.search(**search_args)
494
+ else:
495
+ results = self.scope.search(**search_args)
496
+
497
+ return self.__get_doc_from_kv(results)
498
+ except Exception as e:
499
+ logger.error(f"Error during search: {e}")
500
+ raise
501
+
502
+ def __get_doc_from_kv(self, response: SearchResult) -> List[Document]:
503
+ """
504
+ Convert search results to Document objects by fetching full documents from KV store.
505
+
506
+ Args:
507
+ response: SearchResult from Couchbase search query
508
+
509
+ Returns:
510
+ List of Document objects
511
+ """
512
+ documents: List[Document] = []
513
+ search_hits = [(doc.id, doc.score) for doc in response.rows()]
514
+
515
+ if not search_hits:
516
+ return documents
517
+
518
+ # Fetch documents from KV store
519
+ ids = [hit[0] for hit in search_hits]
520
+ kv_response = self.collection.get_multi(keys=ids)
521
+
522
+ if not kv_response.all_ok:
523
+ raise Exception(f"Failed to get documents from KV store: {kv_response.exceptions}")
524
+
525
+ # Convert results to Documents
526
+ for doc_id, score in search_hits:
527
+ get_result = kv_response.results.get(doc_id)
528
+ if get_result is None or not get_result.success:
529
+ logger.warning(f"Document {doc_id} not found in KV store")
530
+ continue
531
+
532
+ value = get_result.value
533
+ documents.append(
534
+ Document(
535
+ id=doc_id,
536
+ name=value["name"],
537
+ content=value["content"],
538
+ meta_data=value["meta_data"],
539
+ embedding=value["embedding"],
540
+ content_id=value.get("content_id"),
541
+ )
542
+ )
543
+
544
+ return documents
545
+
546
+ def drop(self) -> None:
547
+ """Delete the collection from the scope."""
548
+ if self.exists():
549
+ try:
550
+ self.bucket.collections().drop_collection(
551
+ collection_name=self.collection_name, scope_name=self.scope_name
552
+ )
553
+ logger.info(f"Collection '{self.collection_name}' dropped successfully.")
554
+ except Exception as e:
555
+ logger.error(f"Error dropping collection '{self.collection_name}': {e}")
556
+ raise
557
+
558
+ def delete(self) -> bool:
559
+ """Delete the collection from the scope."""
560
+ if self.exists():
561
+ self.drop()
562
+ return True
563
+ return False
564
+
565
+ def exists(self) -> bool:
566
+ """Check if the collection exists."""
567
+ try:
568
+ scopes = self.bucket.collections().get_all_scopes()
569
+ for scope in scopes:
570
+ if scope.name == self.scope_name:
571
+ for collection in scope.collections:
572
+ if collection.name == self.collection_name:
573
+ return True
574
+ return False
575
+ except Exception:
576
+ return False
577
+
578
+ def prepare_doc(self, content_hash: str, document: Document) -> Dict[str, Any]:
579
+ """
580
+ Prepare a document for insertion into Couchbase.
581
+
582
+ Args:
583
+ document: Document to prepare
584
+
585
+ Returns:
586
+ Dictionary containing document data ready for insertion
587
+
588
+ Raises:
589
+ ValueError: If embedding generation fails
590
+ """
591
+ if not document.content:
592
+ raise ValueError(f"Document {document.name} has no content")
593
+
594
+ logger.debug(f"Preparing document: {document.name}")
595
+
596
+ # Clean content and generate ID
597
+ cleaned_content = document.content.replace("\x00", "\ufffd")
598
+ doc_id = md5(cleaned_content.encode("utf-8")).hexdigest()
599
+
600
+ return {
601
+ "_id": doc_id,
602
+ "name": document.name,
603
+ "content": cleaned_content,
604
+ "meta_data": document.meta_data, # Ensure meta_data is never None
605
+ "embedding": document.embedding,
606
+ "content_id": document.content_id,
607
+ "content_hash": content_hash,
608
+ }
609
+
610
+ def get_count(self) -> int:
611
+ """Get the count of documents in the Couchbase bucket."""
612
+ try:
613
+ search_indexes = self.cluster.search_indexes()
614
+ if not self.is_global_level_index:
615
+ search_indexes = self.scope.search_indexes()
616
+ return search_indexes.get_indexed_documents_count(self.search_index_name)
617
+ except Exception as e:
618
+ logger.error(f"Error getting document count: {e}")
619
+ return 0
620
+
621
+ def name_exists(self, name: str) -> bool:
622
+ """Check if a document exists in the bucket based on its name."""
623
+ try:
624
+ # Use N1QL query to check if document with given name exists
625
+ query = f"SELECT name FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE name = $name LIMIT 1"
626
+ result = self.scope.query(
627
+ query, QueryOptions(named_parameters={"name": name}, scan_consistency=QueryScanConsistency.REQUEST_PLUS)
628
+ )
629
+ for row in result.rows():
630
+ return True
631
+ return False
632
+ except Exception as e:
633
+ logger.error(f"Error checking document name existence: {e}")
634
+ return False
635
+
636
+ def id_exists(self, id: str) -> bool:
637
+ """Check if a document exists in the bucket based on its ID."""
638
+ try:
639
+ result = self.collection.exists(id)
640
+ if not result.exists:
641
+ logger.debug(f"Document 'does not exist': {id}")
642
+ return result.exists
643
+ except Exception as e:
644
+ logger.error(f"Error checking document existence: {e}")
645
+ return False
646
+
647
+ def content_hash_exists(self, content_hash: str) -> bool:
648
+ """Check if a document exists in the bucket based on its content hash."""
649
+ try:
650
+ # Use N1QL query to check if document with given content_hash exists
651
+ query = f"SELECT content_hash FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE content_hash = $content_hash LIMIT 1"
652
+ result = self.scope.query(
653
+ query,
654
+ QueryOptions(
655
+ named_parameters={"content_hash": content_hash}, scan_consistency=QueryScanConsistency.REQUEST_PLUS
656
+ ),
657
+ )
658
+ for row in result.rows():
659
+ return True
660
+ return False
661
+ except Exception as e:
662
+ logger.error(f"Error checking document content_hash existence: {e}")
663
+ return False
664
+
665
+ # === ASYNC SUPPORT USING acouchbase ===
666
+
667
+ async def _create_async_cluster_instance(self) -> AsyncCluster:
668
+ """Helper method to create and connect an AsyncCluster instance."""
669
+ logger.debug("Creating and connecting new AsyncCluster instance.")
670
+ cluster = await AsyncCluster.connect(self.connection_string, self.cluster_options)
671
+ # AsyncCluster.connect ensures the cluster is ready upon successful await.
672
+ # No explicit wait_until_ready is needed here for AsyncCluster.
673
+ logger.info("AsyncCluster connected successfully.")
674
+ return cluster
675
+
676
+ async def get_async_cluster(self) -> AsyncCluster:
677
+ """Gets or creates the cached AsyncCluster instance."""
678
+ if self._async_cluster is None:
679
+ logger.debug("AsyncCluster instance not cached, creating new one.")
680
+ self._async_cluster = await self._create_async_cluster_instance()
681
+ return self._async_cluster
682
+
683
+ async def get_async_bucket(self) -> AsyncBucket:
684
+ """Gets or creates the cached AsyncBucket instance."""
685
+ if self._async_bucket is None:
686
+ logger.debug("AsyncBucket instance not cached, creating new one.")
687
+ cluster = await self.get_async_cluster()
688
+ self._async_bucket = cluster.bucket(self.bucket_name)
689
+ return self._async_bucket
690
+
691
+ async def get_async_scope(self) -> AsyncScope:
692
+ """Gets or creates the cached AsyncScope instance."""
693
+ if self._async_scope is None:
694
+ logger.debug("AsyncScope instance not cached, creating new one.")
695
+ bucket = await self.get_async_bucket()
696
+ self._async_scope = bucket.scope(self.scope_name)
697
+ return self._async_scope
698
+
699
+ async def get_async_collection(self) -> AsyncCollection:
700
+ """Gets or creates the cached AsyncCollection instance."""
701
+ if self._async_collection is None:
702
+ logger.debug("AsyncCollection instance not cached, creating new one.")
703
+ scope = await self.get_async_scope()
704
+ self._async_collection = scope.collection(self.collection_name)
705
+ return self._async_collection
706
+
707
+ async def async_create(self) -> None:
708
+ # FTS index creation is not supported in acouchbase as of now, so fallback to sync for index creation
709
+ # This is a limitation of the SDK. You may want to document this.
710
+ await self._async_create_collection_and_scope()
711
+ await self._async_create_fts_index()
712
+
713
+ async def _async_create_collection_and_scope(self):
714
+ """
715
+ Get or create the scope and collection within the bucket.
716
+
717
+ Uses EAFP principle: attempts to create scope/collection and handles
718
+ specific exceptions if they already exist or (for collections with overwrite=True)
719
+ if they are not found for dropping.
720
+
721
+ Raises:
722
+ Exception: If scope or collection creation/manipulation fails unexpectedly.
723
+ """
724
+ # 1. Ensure Scope Exists
725
+ async_bucket_instance = await self.get_async_bucket()
726
+ try:
727
+ await async_bucket_instance.collections().create_scope(self.scope_name)
728
+ logger.info(f"Created new scope '{self.scope_name}'")
729
+ except ScopeAlreadyExistsException:
730
+ logger.info(f"Scope '{self.scope_name}' already exists. Using existing scope.")
731
+ except Exception as e:
732
+ logger.error(f"Failed to create or ensure scope '{self.scope_name}' exists: {e}")
733
+ raise
734
+
735
+ collection_manager = async_bucket_instance.collections()
736
+
737
+ # 2. Handle Collection
738
+ if self.overwrite:
739
+ # Attempt to drop the collection first since overwrite is True
740
+ try:
741
+ logger.info(
742
+ f"Overwrite is True. Attempting to drop collection '{self.collection_name}' in scope '{self.scope_name}'."
743
+ )
744
+ await collection_manager.drop_collection(
745
+ collection_name=self.collection_name, scope_name=self.scope_name
746
+ )
747
+ logger.info(f"Successfully dropped collection '{self.collection_name}'.")
748
+ time.sleep(1) # Brief wait after drop, as in original code
749
+ except CollectionNotFoundException:
750
+ logger.info(
751
+ f"Collection '{self.collection_name}' not found in scope '{self.scope_name}'. No need to drop."
752
+ )
753
+ except Exception as e:
754
+ logger.error(f"Error dropping collection '{self.collection_name}' during overwrite: {e}")
755
+ raise
756
+
757
+ # Proceed to create the collection
758
+ try:
759
+ logger.info(f"Creating collection '{self.collection_name}' in scope '{self.scope_name}'.")
760
+ await collection_manager.create_collection(
761
+ scope_name=self.scope_name, collection_name=self.collection_name
762
+ )
763
+ logger.info(
764
+ f"Successfully created collection '{self.collection_name}' after drop attempt (overwrite=True)."
765
+ )
766
+ except CollectionAlreadyExistsException:
767
+ # This is an unexpected state if overwrite=True and drop was supposed to clear the way.
768
+ logger.error(
769
+ f"Failed to create collection '{self.collection_name}' as it already exists, "
770
+ f"even after drop attempt for overwrite. Overwrite operation may not have completed as intended."
771
+ )
772
+ raise # Re-raise as the overwrite intent failed
773
+ except Exception as e:
774
+ logger.error(
775
+ f"Error creating collection '{self.collection_name}' after drop attempt (overwrite=True): {e}"
776
+ )
777
+ raise
778
+ else: # self.overwrite is False
779
+ try:
780
+ logger.info(
781
+ f"Overwrite is False. Attempting to create collection '{self.collection_name}' in scope '{self.scope_name}'."
782
+ )
783
+ await collection_manager.create_collection(
784
+ scope_name=self.scope_name, collection_name=self.collection_name
785
+ )
786
+ logger.info(f"Successfully created new collection '{self.collection_name}'.")
787
+ except CollectionAlreadyExistsException:
788
+ logger.info(
789
+ f"Collection '{self.collection_name}' already exists in scope '{self.scope_name}'. Using existing collection."
790
+ )
791
+ except Exception as e:
792
+ logger.error(f"Error creating collection '{self.collection_name}': {e}")
793
+ raise
794
+
795
+ async def _get_async_search_indexes_mng(self) -> Union[AsyncSearchIndexManager, AsyncScopeSearchIndexManager]:
796
+ """Get the async search indexes manager."""
797
+ if self.is_global_level_index:
798
+ cluster = await self.get_async_cluster()
799
+ return cluster.search_indexes()
800
+ else:
801
+ scope = await self.get_async_scope()
802
+ return scope.search_indexes()
803
+
804
+ async def _async_create_fts_index(self):
805
+ """Create a FTS index on the collection if it doesn't exist."""
806
+ async_search_mng = await self._get_async_search_indexes_mng()
807
+ try:
808
+ # Check if index exists and handle string index name
809
+ await async_search_mng.get_index(self.search_index_name)
810
+ if not self.overwrite:
811
+ return
812
+ except Exception:
813
+ if self.search_index_definition is None:
814
+ raise ValueError(f"Index '{self.search_index_name}' does not exist")
815
+
816
+ # Create or update index
817
+ try:
818
+ if self.overwrite:
819
+ try:
820
+ logger.info(f"Dropping existing FTS index '{self.search_index_name}'")
821
+ await async_search_mng.drop_index(self.search_index_name)
822
+ except SearchIndexNotFoundException:
823
+ logger.warning(f"Index '{self.search_index_name}' does not exist")
824
+ except Exception as e:
825
+ logger.warning(f"Error dropping index (may not exist): {e}")
826
+
827
+ await async_search_mng.upsert_index(self.search_index_definition)
828
+ logger.info(f"Created FTS index '{self.search_index_name}'")
829
+
830
+ if self.wait_until_index_ready:
831
+ await self._async_wait_for_index_ready()
832
+
833
+ except Exception as e:
834
+ logger.error(f"Error creating FTS index '{self.search_index_name}': {e}")
835
+ raise
836
+
837
+ async def _async_wait_for_index_ready(self):
838
+ """Wait until the FTS index is ready."""
839
+ start_time = time.time()
840
+ async_search_mng = await self._get_async_search_indexes_mng()
841
+ while True:
842
+ try:
843
+ count = await async_search_mng.get_indexed_documents_count(self.search_index_name)
844
+ if count > -1:
845
+ logger.info(f"FTS index '{self.search_index_name}' is ready")
846
+ break
847
+ # logger.info(f"FTS index '{self.search_index_name}' is not ready yet status: {index['status']}")
848
+ except Exception as e:
849
+ if time.time() - start_time > self.wait_until_index_ready:
850
+ logger.error(f"Error checking index status: {e}")
851
+ raise TimeoutError("Timeout waiting for FTS index to become ready")
852
+ await asyncio.sleep(1)
853
+
854
+ async def async_id_exists(self, id: str) -> bool:
855
+ try:
856
+ async_collection_instance = await self.get_async_collection()
857
+ result = await async_collection_instance.exists(id)
858
+ if not result.exists:
859
+ logger.debug(f"[async] Document does not exist: {id}")
860
+ return result.exists
861
+ except Exception as e:
862
+ logger.error(f"[async] Error checking document existence: {e}")
863
+ return False
864
+
865
+ async def async_name_exists(self, name: str) -> bool:
866
+ try:
867
+ query = f"SELECT name FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE name = $name LIMIT 1"
868
+ async_scope_instance = await self.get_async_scope()
869
+ result = async_scope_instance.query(
870
+ query, QueryOptions(named_parameters={"name": name}, scan_consistency=QueryScanConsistency.REQUEST_PLUS)
871
+ )
872
+ async for row in result.rows():
873
+ return True
874
+ return False
875
+ except Exception as e:
876
+ logger.error(f"[async] Error checking document name existence: {e}")
877
+ return False
878
+
879
+ async def async_insert(
880
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
881
+ ) -> None:
882
+ logger.info(f"[async] Inserting {len(documents)} documents")
883
+
884
+ async_collection_instance = await self.get_async_collection()
885
+ all_docs_to_insert: Dict[str, Any] = {}
886
+
887
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
888
+ # Use batch embedding when enabled and supported
889
+ try:
890
+ # Extract content from all documents
891
+ doc_contents = [doc.content for doc in documents]
892
+
893
+ # Get batch embeddings and usage
894
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
895
+
896
+ # Process documents with pre-computed embeddings
897
+ for j, doc in enumerate(documents):
898
+ try:
899
+ if j < len(embeddings):
900
+ doc.embedding = embeddings[j]
901
+ doc.usage = usages[j] if j < len(usages) else None
902
+ except Exception as e:
903
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
904
+
905
+ except Exception as e:
906
+ # Check if this is a rate limit error - don't fall back as it would make things worse
907
+ error_str = str(e).lower()
908
+ is_rate_limit = any(
909
+ phrase in error_str
910
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
911
+ )
912
+
913
+ if is_rate_limit:
914
+ logger.error(f"Rate limit detected during batch embedding. {e}")
915
+ raise e
916
+ else:
917
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
918
+ # Fall back to individual embedding
919
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
920
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
921
+ else:
922
+ # Use individual embedding
923
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
924
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
925
+
926
+ for document in documents:
927
+ try:
928
+ # User edit: self.prepare_doc is no longer awaited with to_thread
929
+ doc_data = self.prepare_doc(content_hash, document)
930
+ if filters:
931
+ doc_data["filters"] = filters
932
+ doc_id = doc_data.pop("_id") # Remove _id as it's used as key
933
+ all_docs_to_insert[doc_id] = doc_data
934
+ except Exception as e:
935
+ logger.error(f"[async] Error preparing document '{document.name}': {e}")
936
+
937
+ if not all_docs_to_insert:
938
+ logger.info("[async] No documents prepared for insertion.")
939
+ return
940
+
941
+ doc_ids = list(all_docs_to_insert.keys())
942
+ total_inserted_count = 0
943
+ total_failed_count = 0
944
+ processed_doc_count = len(all_docs_to_insert)
945
+
946
+ for i in range(0, len(doc_ids), self.batch_limit):
947
+ batch_doc_ids = doc_ids[i : i + self.batch_limit]
948
+
949
+ logger.info(f"[async] Processing batch of {len(batch_doc_ids)} documents for concurrent insertion.")
950
+
951
+ insert_tasks = []
952
+ for doc_id in batch_doc_ids:
953
+ doc_content = all_docs_to_insert[doc_id]
954
+ insert_tasks.append(async_collection_instance.insert(doc_id, doc_content))
955
+
956
+ if insert_tasks:
957
+ results = await asyncio.gather(*insert_tasks, return_exceptions=True)
958
+ for idx, result in enumerate(results):
959
+ # Get the original doc_id for logging, corresponding to the task order
960
+ current_doc_id = batch_doc_ids[idx]
961
+ if isinstance(result, Exception):
962
+ total_failed_count += 1
963
+ logger.error(f"[async] Error inserting document '{current_doc_id}': {result}")
964
+ else:
965
+ # Assuming successful insert doesn't return a specific value we need to check further,
966
+ # or if it does, the absence of an exception means success.
967
+ total_inserted_count += 1
968
+ logger.debug(f"[async] Successfully inserted document '{current_doc_id}'.")
969
+
970
+ logger.info(f"[async] Finished processing {processed_doc_count} documents.")
971
+ logger.info(f"[async] Total successfully inserted: {total_inserted_count}, Total failed: {total_failed_count}.")
972
+
973
+ async def async_upsert(
974
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
975
+ ) -> None:
976
+ """Upsert documents asynchronously."""
977
+ if self.content_hash_exists(content_hash):
978
+ self._delete_by_content_hash(content_hash)
979
+ await self._async_upsert(content_hash=content_hash, documents=documents, filters=filters)
980
+
981
+ async def _async_upsert(
982
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
983
+ ) -> None:
984
+ logger.info(f"[async] Upserting {len(documents)} documents")
985
+
986
+ async_collection_instance = await self.get_async_collection()
987
+ all_docs_to_upsert: Dict[str, Any] = {}
988
+
989
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
990
+ # Use batch embedding when enabled and supported
991
+ try:
992
+ # Extract content from all documents
993
+ doc_contents = [doc.content for doc in documents]
994
+
995
+ # Get batch embeddings and usage
996
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
997
+
998
+ # Process documents with pre-computed embeddings
999
+ for j, doc in enumerate(documents):
1000
+ try:
1001
+ if j < len(embeddings):
1002
+ doc.embedding = embeddings[j]
1003
+ doc.usage = usages[j] if j < len(usages) else None
1004
+ except Exception as e:
1005
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
1006
+
1007
+ except Exception as e:
1008
+ # Check if this is a rate limit error - don't fall back as it would make things worse
1009
+ error_str = str(e).lower()
1010
+ is_rate_limit = any(
1011
+ phrase in error_str
1012
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
1013
+ )
1014
+
1015
+ if is_rate_limit:
1016
+ logger.error(f"Rate limit detected during batch embedding. {e}")
1017
+ raise e
1018
+ else:
1019
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
1020
+ # Fall back to individual embedding
1021
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
1022
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1023
+ else:
1024
+ # Use individual embedding
1025
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
1026
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1027
+
1028
+ for document in documents:
1029
+ try:
1030
+ # Consistent with async_insert, prepare_doc is not awaited with to_thread based on prior user edits
1031
+ doc_data = self.prepare_doc(content_hash, document)
1032
+ if filters:
1033
+ doc_data["filters"] = filters
1034
+ doc_id = doc_data.pop("_id") # _id is used as key for upsert
1035
+ all_docs_to_upsert[doc_id] = doc_data
1036
+ except Exception as e:
1037
+ logger.error(f"[async] Error preparing document '{document.name}' for upsert: {e}")
1038
+
1039
+ if not all_docs_to_upsert:
1040
+ logger.info("[async] No documents prepared for upsert.")
1041
+ return
1042
+
1043
+ doc_ids = list(all_docs_to_upsert.keys())
1044
+ total_upserted_count = 0
1045
+ total_failed_count = 0
1046
+ processed_doc_count = len(all_docs_to_upsert)
1047
+
1048
+ logger.info(f"[async] Prepared {processed_doc_count} documents for upsert.")
1049
+
1050
+ for i in range(0, len(doc_ids), self.batch_limit):
1051
+ batch_doc_ids = doc_ids[i : i + self.batch_limit]
1052
+
1053
+ logger.info(f"[async] Processing batch of {len(batch_doc_ids)} documents for concurrent upsert.")
1054
+
1055
+ upsert_tasks = []
1056
+ for doc_id in batch_doc_ids:
1057
+ doc_content = all_docs_to_upsert[doc_id]
1058
+ upsert_tasks.append(async_collection_instance.upsert(doc_id, doc_content))
1059
+
1060
+ if upsert_tasks:
1061
+ results = await asyncio.gather(*upsert_tasks, return_exceptions=True)
1062
+ for idx, result in enumerate(results):
1063
+ current_doc_id = batch_doc_ids[idx]
1064
+ if isinstance(result, Exception):
1065
+ total_failed_count += 1
1066
+ logger.error(f"[async] Error upserting document '{current_doc_id}': {result}")
1067
+ else:
1068
+ # Assuming successful upsert doesn't return a specific value we need to check further,
1069
+ # or if it does, the absence of an exception means success.
1070
+ total_upserted_count += 1
1071
+ logger.debug(f"[async] Successfully upserted document '{current_doc_id}'.")
1072
+
1073
+ logger.info(f"[async] Finished processing {processed_doc_count} documents for upsert.")
1074
+ logger.info(f"[async] Total successfully upserted: {total_upserted_count}, Total failed: {total_failed_count}.")
1075
+
1076
+ async def async_search(
1077
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
1078
+ ) -> List[Document]:
1079
+ if isinstance(filters, List):
1080
+ log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
1081
+ filters = None
1082
+ query_embedding = self.embedder.get_embedding(query)
1083
+ if query_embedding is None:
1084
+ logger.error(f"[async] Failed to generate embedding for query: {query}")
1085
+ return []
1086
+ try:
1087
+ # Implement vector search using Couchbase FTS
1088
+ vector_search = VectorSearch.from_vector_query(
1089
+ VectorQuery(field_name="embedding", vector=query_embedding, num_candidates=limit)
1090
+ )
1091
+ request = SearchRequest.create(vector_search)
1092
+
1093
+ # Prepare the options dictionary
1094
+ options_dict = {"limit": limit, "fields": ["*"]}
1095
+ if filters:
1096
+ options_dict["raw"] = filters
1097
+
1098
+ search_args = {
1099
+ "index": self.search_index_name,
1100
+ "request": request,
1101
+ "options": SearchOptions(**options_dict), # Construct SearchOptions with the dictionary
1102
+ }
1103
+
1104
+ if self.is_global_level_index:
1105
+ async_cluster_instance = await self.get_async_cluster()
1106
+ results = async_cluster_instance.search(**search_args)
1107
+ else:
1108
+ async_scope_instance = await self.get_async_scope()
1109
+ results = async_scope_instance.search(**search_args)
1110
+
1111
+ return await self.__async_get_doc_from_kv(results)
1112
+ except Exception as e:
1113
+ logger.error(f"[async] Error during search: {e}")
1114
+ raise
1115
+
1116
+ async def async_drop(self) -> None:
1117
+ if await self.async_exists():
1118
+ try:
1119
+ async_bucket_instance = await self.get_async_bucket()
1120
+ await async_bucket_instance.collections().drop_collection(
1121
+ collection_name=self.collection_name, scope_name=self.scope_name
1122
+ )
1123
+ logger.info(f"[async] Collection '{self.collection_name}' dropped successfully.")
1124
+ except Exception as e:
1125
+ logger.error(f"[async] Error dropping collection '{self.collection_name}': {e}")
1126
+ raise
1127
+
1128
+ async def async_exists(self) -> bool:
1129
+ try:
1130
+ async_bucket_instance = await self.get_async_bucket()
1131
+ scopes = await async_bucket_instance.collections().get_all_scopes()
1132
+ for scope in scopes:
1133
+ if scope.name == self.scope_name:
1134
+ for collection in scope.collections:
1135
+ if collection.name == self.collection_name:
1136
+ return True
1137
+ return False
1138
+ except Exception:
1139
+ return False
1140
+
1141
+ async def __async_get_doc_from_kv(self, response: AsyncSearchIndex) -> List[Document]:
1142
+ """
1143
+ Convert search results to Document objects by fetching full documents from KV store concurrently.
1144
+
1145
+ Args:
1146
+ response: SearchResult from Couchbase search query
1147
+
1148
+ Returns:
1149
+ List of Document objects
1150
+ """
1151
+ documents: List[Document] = []
1152
+ # Assuming search_hits map directly to the order of documents we want to fetch and reconstruct
1153
+ search_hits_map = {doc.id: doc.score async for doc in response.rows()}
1154
+ doc_ids_to_fetch = list(search_hits_map.keys())
1155
+
1156
+ if not doc_ids_to_fetch:
1157
+ return documents
1158
+
1159
+ async_collection_instance = await self.get_async_collection()
1160
+
1161
+ # Process in batches
1162
+ for i in range(0, len(doc_ids_to_fetch), self.batch_limit):
1163
+ batch_doc_ids = doc_ids_to_fetch[i : i + self.batch_limit]
1164
+ if not batch_doc_ids:
1165
+ continue
1166
+
1167
+ logger.debug(f"[async] Fetching batch of {len(batch_doc_ids)} documents from KV.")
1168
+ get_tasks = [async_collection_instance.get(doc_id) for doc_id in batch_doc_ids]
1169
+
1170
+ # Fetch documents from KV store concurrently for the current batch
1171
+ results_from_kv_batch = await asyncio.gather(*get_tasks, return_exceptions=True)
1172
+
1173
+ for batch_idx, get_result in enumerate(results_from_kv_batch):
1174
+ # Original doc_id corresponding to this result within the batch
1175
+ doc_id = batch_doc_ids[batch_idx]
1176
+ # score = search_hits_map[doc_id] # Retrieve the original score
1177
+
1178
+ if isinstance(get_result, BaseException) or isinstance(get_result, Exception) or get_result is None:
1179
+ logger.warning(f"[async] Document {doc_id} not found or error fetching from KV store: {get_result}")
1180
+ continue
1181
+
1182
+ try:
1183
+ value = get_result.content_as[dict]
1184
+ if not isinstance(value, dict):
1185
+ logger.warning(
1186
+ f"[async] Document {doc_id} content from KV is not a dict: {type(value)}. Skipping."
1187
+ )
1188
+ continue
1189
+
1190
+ documents.append(
1191
+ Document(
1192
+ id=doc_id,
1193
+ name=value.get("name"),
1194
+ content=value.get("content", ""),
1195
+ meta_data=value.get("meta_data", {}),
1196
+ embedding=value.get("embedding", []),
1197
+ )
1198
+ )
1199
+ except Exception as e:
1200
+ logger.warning(
1201
+ f"[async] Error processing document {doc_id} from KV store: {e}. Value: {getattr(get_result, 'content_as', 'N/A')}"
1202
+ )
1203
+ continue
1204
+
1205
+ return documents
1206
+
1207
+ def delete_by_id(self, id: str) -> bool:
1208
+ """
1209
+ Delete a document by its ID.
1210
+
1211
+ Args:
1212
+ id (str): The document ID to delete
1213
+
1214
+ Returns:
1215
+ bool: True if document was deleted, False otherwise
1216
+ """
1217
+ try:
1218
+ log_debug(f"Couchbase VectorDB : Deleting document with ID {id}")
1219
+ if not self.id_exists(id):
1220
+ return False
1221
+
1222
+ # Delete by ID using Couchbase collection.delete()
1223
+ self.collection.remove(id)
1224
+ log_info(f"Successfully deleted document with ID {id}")
1225
+ return True
1226
+ except Exception as e:
1227
+ log_info(f"Error deleting document with ID {id}: {e}")
1228
+ return False
1229
+
1230
+ def delete_by_name(self, name: str) -> bool:
1231
+ """
1232
+ Delete documents by name.
1233
+
1234
+ Args:
1235
+ name (str): The document name to delete
1236
+
1237
+ Returns:
1238
+ bool: True if documents were deleted, False otherwise
1239
+ """
1240
+ try:
1241
+ log_debug(f"Couchbase VectorDB : Deleting documents with name {name}")
1242
+
1243
+ query = f"SELECT META().id as doc_id, * FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE name = $name"
1244
+ result = self.scope.query(
1245
+ query, QueryOptions(named_parameters={"name": name}, scan_consistency=QueryScanConsistency.REQUEST_PLUS)
1246
+ )
1247
+ rows = list(result.rows()) # Collect once
1248
+
1249
+ for row in rows:
1250
+ self.collection.remove(row.get("doc_id"))
1251
+ log_info(f"Deleted {len(rows)} documents with name {name}")
1252
+ return True
1253
+
1254
+ except Exception as e:
1255
+ log_info(f"Error deleting documents with name {name}: {e}")
1256
+ return False
1257
+
1258
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
1259
+ """
1260
+ Delete documents by metadata.
1261
+
1262
+ Args:
1263
+ metadata (Dict[str, Any]): The metadata to match for deletion
1264
+
1265
+ Returns:
1266
+ bool: True if documents were deleted, False otherwise
1267
+ """
1268
+ try:
1269
+ log_debug(f"Couchbase VectorDB : Deleting documents with metadata {metadata}")
1270
+
1271
+ if not metadata:
1272
+ log_info("No metadata provided for deletion")
1273
+ return False
1274
+
1275
+ # Build WHERE clause for metadata matching
1276
+ where_conditions = []
1277
+ named_parameters: Dict[str, Any] = {}
1278
+
1279
+ for key, value in metadata.items():
1280
+ if isinstance(value, (list, tuple)):
1281
+ # For array values, use ARRAY_CONTAINS
1282
+ where_conditions.append(
1283
+ f"(ARRAY_CONTAINS(filters.{key}, $value_{key}) OR ARRAY_CONTAINS(recipes.filters.{key}, $value_{key}))"
1284
+ )
1285
+ named_parameters[f"value_{key}"] = value
1286
+ elif isinstance(value, str):
1287
+ where_conditions.append(f"(filters.{key} = $value_{key} OR recipes.filters.{key} = $value_{key})")
1288
+ named_parameters[f"value_{key}"] = value
1289
+ elif isinstance(value, bool):
1290
+ where_conditions.append(f"(filters.{key} = $value_{key} OR recipes.filters.{key} = $value_{key})")
1291
+ named_parameters[f"value_{key}"] = value
1292
+ elif isinstance(value, (int, float)):
1293
+ where_conditions.append(f"(filters.{key} = $value_{key} OR recipes.filters.{key} = $value_{key})")
1294
+ named_parameters[f"value_{key}"] = value
1295
+ elif value is None:
1296
+ where_conditions.append(f"(filters.{key} IS NULL OR recipes.filters.{key} IS NULL)")
1297
+ else:
1298
+ # For other types, convert to string
1299
+ where_conditions.append(f"(filters.{key} = $value_{key} OR recipes.filters.{key} = $value_{key})")
1300
+ named_parameters[f"value_{key}"] = str(value)
1301
+
1302
+ if not where_conditions:
1303
+ log_info("No valid metadata conditions for deletion")
1304
+ return False
1305
+
1306
+ where_clause = " AND ".join(where_conditions)
1307
+ query = f"SELECT META().id as doc_id, * FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE {where_clause}"
1308
+
1309
+ result = self.scope.query(
1310
+ query,
1311
+ QueryOptions(named_parameters=named_parameters, scan_consistency=QueryScanConsistency.REQUEST_PLUS),
1312
+ )
1313
+ rows = list(result.rows()) # Collect once
1314
+
1315
+ for row in rows:
1316
+ self.collection.remove(row.get("doc_id"))
1317
+ log_info(f"Deleted {len(rows)} documents with metadata {metadata}")
1318
+ return True
1319
+
1320
+ except Exception as e:
1321
+ log_info(f"Error deleting documents with metadata {metadata}: {e}")
1322
+ return False
1323
+
1324
+ def delete_by_content_id(self, content_id: str) -> bool:
1325
+ """
1326
+ Delete documents by content ID.
1327
+
1328
+ Args:
1329
+ content_id (str): The content ID to delete
1330
+
1331
+ Returns:
1332
+ bool: True if documents were deleted, False otherwise
1333
+ """
1334
+ try:
1335
+ log_debug(f"Couchbase VectorDB : Deleting documents with content_id {content_id}")
1336
+
1337
+ query = f"SELECT META().id as doc_id, * FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE content_id = $content_id OR recipes.content_id = $content_id"
1338
+ result = self.scope.query(
1339
+ query,
1340
+ QueryOptions(
1341
+ named_parameters={"content_id": content_id}, scan_consistency=QueryScanConsistency.REQUEST_PLUS
1342
+ ),
1343
+ )
1344
+ rows = list(result.rows()) # Collect once
1345
+
1346
+ for row in rows:
1347
+ self.collection.remove(row.get("doc_id"))
1348
+ log_info(f"Deleted {len(rows)} documents with content_id {content_id}")
1349
+ return True
1350
+
1351
+ except Exception as e:
1352
+ log_info(f"Error deleting documents with content_id {content_id}: {e}")
1353
+ return False
1354
+
1355
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
1356
+ """
1357
+ Delete documents by content hash.
1358
+
1359
+ Args:
1360
+ content_hash (str): The content hash to delete
1361
+
1362
+ Returns:
1363
+ bool: True if documents were deleted, False otherwise
1364
+ """
1365
+ try:
1366
+ log_debug(f"Couchbase VectorDB : Deleting documents with content_hash {content_hash}")
1367
+
1368
+ query = f"SELECT META().id as doc_id, * FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE content_hash = $content_hash"
1369
+ result = self.scope.query(
1370
+ query,
1371
+ QueryOptions(
1372
+ named_parameters={"content_hash": content_hash}, scan_consistency=QueryScanConsistency.REQUEST_PLUS
1373
+ ),
1374
+ )
1375
+ rows = list(result.rows()) # Collect once
1376
+
1377
+ for row in rows:
1378
+ self.collection.remove(row.get("doc_id"))
1379
+ log_info(f"Deleted {len(rows)} documents with content_hash {content_hash}")
1380
+ return True
1381
+
1382
+ except Exception as e:
1383
+ log_info(f"Error deleting documents with content_hash {content_hash}: {e}")
1384
+ return False
1385
+
1386
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
1387
+ """
1388
+ Update the metadata for documents with the given content_id.
1389
+
1390
+ Args:
1391
+ content_id (str): The content ID to update
1392
+ metadata (Dict[str, Any]): The metadata to update
1393
+ """
1394
+ try:
1395
+ # Query for documents with the given content_id
1396
+ query = f"SELECT META().id as doc_id, meta_data, filters FROM `{self.bucket_name}` WHERE content_id = $content_id"
1397
+ result = self.cluster.query(query, content_id=content_id)
1398
+
1399
+ updated_count = 0
1400
+ for row in result:
1401
+ doc_id = row.get("doc_id")
1402
+ current_metadata = row.get("meta_data", {})
1403
+ current_filters = row.get("filters", {})
1404
+
1405
+ # Merge existing metadata with new metadata
1406
+ if isinstance(current_metadata, dict):
1407
+ updated_metadata = current_metadata.copy()
1408
+ updated_metadata.update(metadata)
1409
+ else:
1410
+ updated_metadata = metadata
1411
+
1412
+ # Merge existing filters with new metadata
1413
+ if isinstance(current_filters, dict):
1414
+ updated_filters = current_filters.copy()
1415
+ updated_filters.update(metadata)
1416
+ else:
1417
+ updated_filters = metadata
1418
+
1419
+ # Update the document
1420
+ try:
1421
+ doc = self.collection.get(doc_id)
1422
+ doc_content = doc.content_as[dict]
1423
+ doc_content["meta_data"] = updated_metadata
1424
+ doc_content["filters"] = updated_filters
1425
+
1426
+ self.collection.upsert(doc_id, doc_content)
1427
+ updated_count += 1
1428
+ except Exception as doc_error:
1429
+ logger.warning(f"Failed to update document {doc_id}: {doc_error}")
1430
+
1431
+ if updated_count == 0:
1432
+ logger.debug(f"No documents found with content_id: {content_id}")
1433
+ else:
1434
+ logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
1435
+
1436
+ except Exception as e:
1437
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
1438
+ raise
1439
+
1440
+ def get_supported_search_types(self) -> List[str]:
1441
+ """Get the supported search types for this vector database."""
1442
+ return [] # CouchbaseSearch doesn't use SearchType enum