agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1446 @@
1
+ import asyncio
2
+ import time
3
+ from datetime import timedelta
4
+ from typing import Any, Dict, List, Optional, Union
5
+
6
+ from agno.filters import FilterExpr
7
+ from agno.knowledge.document import Document
8
+ from agno.knowledge.embedder import Embedder
9
+ from agno.utils.log import log_debug, log_info, log_warning, logger
10
+ from agno.vectordb.base import VectorDb
11
+
12
+ try:
13
+ from hashlib import md5
14
+
15
+ except ImportError:
16
+ raise ImportError("`hashlib` not installed. Please install using `pip install hashlib`")
17
+ try:
18
+ from acouchbase.bucket import AsyncBucket
19
+ from acouchbase.cluster import AsyncCluster
20
+ from acouchbase.collection import AsyncCollection
21
+ from acouchbase.management.search import (
22
+ ScopeSearchIndexManager as AsyncScopeSearchIndexManager,
23
+ )
24
+ from acouchbase.management.search import (
25
+ SearchIndex as AsyncSearchIndex,
26
+ )
27
+ from acouchbase.management.search import (
28
+ SearchIndexManager as AsyncSearchIndexManager,
29
+ )
30
+ from acouchbase.scope import AsyncScope
31
+ from couchbase.bucket import Bucket
32
+ from couchbase.cluster import Cluster
33
+ from couchbase.collection import Collection
34
+ from couchbase.exceptions import (
35
+ CollectionAlreadyExistsException,
36
+ CollectionNotFoundException,
37
+ ScopeAlreadyExistsException,
38
+ SearchIndexNotFoundException,
39
+ )
40
+ from couchbase.management.search import ScopeSearchIndexManager, SearchIndex, SearchIndexManager
41
+ from couchbase.n1ql import QueryScanConsistency
42
+ from couchbase.options import ClusterOptions, QueryOptions, SearchOptions
43
+ from couchbase.result import SearchResult
44
+ from couchbase.scope import Scope
45
+ from couchbase.search import SearchRequest
46
+ from couchbase.vector_search import VectorQuery, VectorSearch
47
+ except ImportError:
48
+ raise ImportError("`couchbase` not installed. Please install using `pip install couchbase`")
49
+
50
+
51
+ class CouchbaseSearch(VectorDb):
52
+ """
53
+ Couchbase Vector Database implementation with FTS (Full Text Search) index support.
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ bucket_name: str,
59
+ scope_name: str,
60
+ collection_name: str,
61
+ couchbase_connection_string: str,
62
+ cluster_options: ClusterOptions,
63
+ search_index: Union[str, SearchIndex],
64
+ embedder: Optional[Embedder] = None,
65
+ overwrite: bool = False,
66
+ is_global_level_index: bool = False,
67
+ wait_until_index_ready: float = 0,
68
+ batch_limit: int = 500,
69
+ name: Optional[str] = None,
70
+ description: Optional[str] = None,
71
+ **kwargs,
72
+ ):
73
+ """
74
+ Initialize the CouchbaseSearch with Couchbase connection details.
75
+
76
+ Args:
77
+ bucket_name (str): Name of the Couchbase bucket.
78
+ scope_name (str): Name of the scope within the bucket.
79
+ collection_name (str): Name of the collection within the scope.
80
+ name (Optional[str]): Name of the vector database.
81
+ description (Optional[str]): Description of the vector database.
82
+ couchbase_connection_string (str): Couchbase connection string.
83
+ cluster_options (ClusterOptions): Options for configuring the Couchbase cluster connection.
84
+ search_index (Union[str, SearchIndex], optional): Search index configuration, either as index name or SearchIndex definition.
85
+ embedder (Embedder): Embedder instance for generating embeddings. Defaults to OpenAIEmbedder.
86
+ overwrite (bool): Whether to overwrite existing collection. Defaults to False.
87
+ wait_until_index_ready (float, optional): Time in seconds to wait until the index is ready. Defaults to 0.
88
+ batch_limit (int, optional): Maximum number of documents to process in a single batch (applies to both sync and async operations). Defaults to 500.
89
+ **kwargs: Additional arguments for Couchbase connection.
90
+ """
91
+ if not bucket_name:
92
+ raise ValueError("Bucket name must not be empty.")
93
+
94
+ self.bucket_name = bucket_name
95
+ self.scope_name = scope_name
96
+ self.collection_name = collection_name
97
+ self.connection_string = couchbase_connection_string
98
+ self.cluster_options = cluster_options
99
+ if embedder is None:
100
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
101
+
102
+ embedder = OpenAIEmbedder()
103
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
104
+ self.embedder = embedder
105
+ self.overwrite = overwrite
106
+ self.is_global_level_index = is_global_level_index
107
+ self.wait_until_index_ready = wait_until_index_ready
108
+ # Initialize base class with name and description
109
+ super().__init__(name=name, description=description)
110
+
111
+ self.kwargs = kwargs
112
+ self.batch_limit = batch_limit
113
+ if isinstance(search_index, str):
114
+ self.search_index_name = search_index
115
+ self.search_index_definition = None
116
+ else:
117
+ self.search_index_name = search_index.name
118
+ self.search_index_definition = search_index
119
+
120
+ self._cluster: Optional[Cluster] = None
121
+ self._bucket: Optional[Bucket] = None
122
+ self._scope: Optional[Scope] = None
123
+ self._collection: Optional[Collection] = None
124
+
125
+ self._async_cluster: Optional[AsyncCluster] = None
126
+ self._async_bucket: Optional[AsyncBucket] = None
127
+ self._async_scope: Optional[AsyncScope] = None
128
+ self._async_collection: Optional[AsyncCollection] = None
129
+
130
+ @property
131
+ def cluster(self) -> Cluster:
132
+ """Create or retrieve the Couchbase cluster connection."""
133
+ if self._cluster is None:
134
+ try:
135
+ logger.debug("Creating Couchbase Cluster connection")
136
+ cluster = Cluster(self.connection_string, self.cluster_options)
137
+ # Verify connection
138
+ cluster.wait_until_ready(timeout=timedelta(seconds=60))
139
+ logger.info("Connected to Couchbase successfully.")
140
+ self._cluster = cluster
141
+ except Exception as e:
142
+ logger.error(f"Failed to connect to Couchbase: {e}")
143
+ raise ConnectionError(f"Failed to connect to Couchbase: {e}")
144
+ return self._cluster
145
+
146
+ @property
147
+ def bucket(self) -> Bucket:
148
+ """Get the Couchbase bucket."""
149
+ if self._bucket is None:
150
+ self._bucket = self.cluster.bucket(self.bucket_name)
151
+ return self._bucket
152
+
153
+ @property
154
+ def scope(self) -> Scope:
155
+ """Get the Couchbase scope."""
156
+ if self._scope is None:
157
+ self._scope = self.bucket.scope(self.scope_name)
158
+ return self._scope
159
+
160
+ @property
161
+ def collection(self) -> Collection:
162
+ """Get the Couchbase collection."""
163
+ if self._collection is None:
164
+ self._collection = self.scope.collection(self.collection_name)
165
+ return self._collection
166
+
167
+ def _create_collection_and_scope(self):
168
+ """
169
+ Get or create the scope and collection within the bucket.
170
+
171
+ Uses EAFP principle: attempts to create scope/collection and handles
172
+ specific exceptions if they already exist or (for collections with overwrite=True)
173
+ if they are not found for dropping.
174
+
175
+ Raises:
176
+ Exception: If scope or collection creation/manipulation fails unexpectedly.
177
+ """
178
+ # 1. Ensure Scope Exists
179
+ try:
180
+ self.bucket.collections().create_scope(scope_name=self.scope_name)
181
+ logger.info(f"Created new scope '{self.scope_name}'")
182
+ except ScopeAlreadyExistsException:
183
+ logger.info(f"Scope '{self.scope_name}' already exists. Using existing scope.")
184
+ except Exception as e:
185
+ logger.error(f"Failed to create or ensure scope '{self.scope_name}' exists: {e}")
186
+ raise
187
+
188
+ collection_manager = self.bucket.collections()
189
+
190
+ # 2. Handle Collection
191
+ if self.overwrite:
192
+ # Attempt to drop the collection first since overwrite is True
193
+ try:
194
+ logger.info(
195
+ f"Overwrite is True. Attempting to drop collection '{self.collection_name}' in scope '{self.scope_name}'."
196
+ )
197
+ collection_manager.drop_collection(collection_name=self.collection_name, scope_name=self.scope_name)
198
+ logger.info(f"Successfully dropped collection '{self.collection_name}'.")
199
+ time.sleep(1) # Brief wait after drop, as in original code
200
+ except CollectionNotFoundException:
201
+ logger.info(
202
+ f"Collection '{self.collection_name}' not found in scope '{self.scope_name}'. No need to drop."
203
+ )
204
+ except Exception as e:
205
+ logger.error(f"Error dropping collection '{self.collection_name}' during overwrite: {e}")
206
+ raise
207
+
208
+ # Proceed to create the collection
209
+ try:
210
+ logger.info(f"Creating collection '{self.collection_name}' in scope '{self.scope_name}'.")
211
+ collection_manager.create_collection(scope_name=self.scope_name, collection_name=self.collection_name)
212
+ logger.info(
213
+ f"Successfully created collection '{self.collection_name}' after drop attempt (overwrite=True)."
214
+ )
215
+ except CollectionAlreadyExistsException:
216
+ # This is an unexpected state if overwrite=True and drop was supposed to clear the way.
217
+ logger.error(
218
+ f"Failed to create collection '{self.collection_name}' as it already exists, "
219
+ f"even after drop attempt for overwrite. Overwrite operation may not have completed as intended."
220
+ )
221
+ raise # Re-raise as the overwrite intent failed
222
+ except Exception as e:
223
+ logger.error(
224
+ f"Error creating collection '{self.collection_name}' after drop attempt (overwrite=True): {e}"
225
+ )
226
+ raise
227
+ else: # self.overwrite is False
228
+ try:
229
+ logger.info(
230
+ f"Overwrite is False. Attempting to create collection '{self.collection_name}' in scope '{self.scope_name}'."
231
+ )
232
+ collection_manager.create_collection(scope_name=self.scope_name, collection_name=self.collection_name)
233
+ logger.info(f"Successfully created new collection '{self.collection_name}'.")
234
+ except CollectionAlreadyExistsException:
235
+ logger.info(
236
+ f"Collection '{self.collection_name}' already exists in scope '{self.scope_name}'. Using existing collection."
237
+ )
238
+ except Exception as e:
239
+ logger.error(f"Error creating collection '{self.collection_name}': {e}")
240
+ raise
241
+
242
+ def _search_indexes_mng(self) -> Union[SearchIndexManager, ScopeSearchIndexManager]:
243
+ """Get the search indexes manager."""
244
+ if self.is_global_level_index:
245
+ return self.cluster.search_indexes()
246
+ else:
247
+ return self.scope.search_indexes()
248
+
249
+ def _create_fts_index(self):
250
+ """Create a FTS index on the collection if it doesn't exist."""
251
+ try:
252
+ # Check if index exists and handle string index name
253
+ self._search_indexes_mng().get_index(self.search_index_name)
254
+ if not self.overwrite:
255
+ return
256
+ except Exception:
257
+ if self.search_index_definition is None:
258
+ raise ValueError(f"Index '{self.search_index_name}' does not exist")
259
+
260
+ # Create or update index
261
+ try:
262
+ if self.overwrite:
263
+ try:
264
+ logger.info(f"Dropping existing FTS index '{self.search_index_name}'")
265
+ self._search_indexes_mng().drop_index(self.search_index_name)
266
+ except SearchIndexNotFoundException:
267
+ logger.warning(f"Index '{self.search_index_name}' does not exist")
268
+ except Exception as e:
269
+ logger.warning(f"Error dropping index (may not exist): {e}")
270
+
271
+ self._search_indexes_mng().upsert_index(self.search_index_definition)
272
+ logger.info(f"Created FTS index '{self.search_index_name}'")
273
+
274
+ if self.wait_until_index_ready:
275
+ self._wait_for_index_ready()
276
+
277
+ except Exception as e:
278
+ logger.error(f"Error creating FTS index '{self.search_index_name}': {e}")
279
+ raise
280
+
281
+ def _wait_for_index_ready(self):
282
+ """Wait until the FTS index is ready."""
283
+ start_time = time.time()
284
+ while True:
285
+ try:
286
+ count = self._search_indexes_mng().get_indexed_documents_count(self.search_index_name)
287
+ if count > -1:
288
+ logger.info(f"FTS index '{self.search_index_name}' is ready")
289
+ break
290
+ # logger.info(f"FTS index '{self.search_index_name}' is not ready yet status: {index['status']}")
291
+ except Exception as e:
292
+ if time.time() - start_time > self.wait_until_index_ready:
293
+ logger.error(f"Error checking index status: {e}")
294
+ raise TimeoutError("Timeout waiting for FTS index to become ready")
295
+ time.sleep(1)
296
+
297
+ def create(self) -> None:
298
+ """Create the collection and FTS index if they don't exist."""
299
+ self._create_collection_and_scope()
300
+ self._create_fts_index()
301
+
302
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
303
+ """
304
+ Insert documents into the Couchbase bucket. Fails if any document already exists.
305
+
306
+ Args:
307
+ documents: List of documents to insert
308
+ filters: Optional filters to apply to the documents
309
+ """
310
+ log_debug(f"Inserting {len(documents)} documents")
311
+
312
+ docs_to_insert: Dict[str, Any] = {}
313
+ for document in documents:
314
+ if document.embedding is None:
315
+ document.embed(embedder=self.embedder)
316
+
317
+ if document.embedding is None:
318
+ raise ValueError(f"Failed to generate embedding for document: {document.name}")
319
+ try:
320
+ doc_data = self.prepare_doc(content_hash, document)
321
+ if filters:
322
+ doc_data["filters"] = filters
323
+ # For insert_multi, the key of the dict is the document ID,
324
+ # and the value is the document content itself.
325
+ doc_id = doc_data.pop("_id")
326
+ docs_to_insert[doc_id] = doc_data
327
+ except Exception as e:
328
+ logger.error(f"Error preparing document '{document.name}': {e}")
329
+
330
+ if not docs_to_insert:
331
+ logger.info("No documents prepared for insertion.")
332
+ return
333
+
334
+ doc_ids = list(docs_to_insert.keys())
335
+ total_inserted_count = 0
336
+ total_processed_count = len(doc_ids)
337
+ errors_occurred = False
338
+
339
+ for i in range(0, len(doc_ids), self.batch_limit):
340
+ batch_doc_ids = doc_ids[i : i + self.batch_limit]
341
+ batch_docs_to_insert = {doc_id: docs_to_insert[doc_id] for doc_id in batch_doc_ids}
342
+
343
+ if not batch_docs_to_insert:
344
+ continue
345
+
346
+ log_debug(f"Inserting batch of {len(batch_docs_to_insert)} documents.")
347
+ try:
348
+ result = self.collection.insert_multi(batch_docs_to_insert)
349
+ # Check for errors in the batch result
350
+ # The actual way to count successes/failures might depend on the SDK version
351
+ # For Couchbase SDK 3.x/4.x, result.all_ok is a good indicator for the whole batch.
352
+ # If not all_ok, result.exceptions (dict) contains errors for specific keys.
353
+
354
+ # Simplistic success counting for this example, assuming partial success is possible
355
+ # and we want to count how many actually made it.
356
+ if result.all_ok:
357
+ batch_inserted_count = len(batch_docs_to_insert)
358
+ logger.info(f"Batch of {batch_inserted_count} documents inserted successfully.")
359
+ else:
360
+ # If not all_ok, count successes by checking which keys are NOT in exceptions
361
+ # This is a more robust way than just len(batch) - len(exceptions)
362
+ # as some items might succeed even if others fail.
363
+ succeeded_ids = set(batch_docs_to_insert.keys()) - set(
364
+ result.exceptions.keys() if result.exceptions else []
365
+ )
366
+ batch_inserted_count = len(succeeded_ids)
367
+ if batch_inserted_count > 0:
368
+ logger.info(f"Partially inserted {batch_inserted_count} documents in batch.")
369
+ logger.warning(f"Bulk write error during batch insert: {result.exceptions}")
370
+ errors_occurred = True
371
+ total_inserted_count += batch_inserted_count
372
+
373
+ except Exception as e:
374
+ logger.error(f"Error during batch bulk insert for {len(batch_docs_to_insert)} documents: {e}")
375
+ errors_occurred = True # Mark that an error occurred in this batch
376
+
377
+ logger.info(f"Finished processing {total_processed_count} documents for insertion.")
378
+ logger.info(f"Total successfully inserted: {total_inserted_count}.")
379
+ if errors_occurred:
380
+ logger.warning("Some errors occurred during the insert operation. Please check logs for details.")
381
+
382
+ def upsert_available(self) -> bool:
383
+ """Check if upsert is available in Couchbase."""
384
+ return True
385
+
386
+ def _upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
387
+ """
388
+ Update existing documents or insert new ones into the Couchbase bucket.
389
+ """
390
+ if self.content_hash_exists(content_hash):
391
+ self._delete_by_content_hash(content_hash)
392
+ self.insert(content_hash=content_hash, documents=documents, filters=filters)
393
+
394
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
395
+ """
396
+ Update existing documents or insert new ones into the Couchbase bucket.
397
+
398
+ Args:
399
+ documents: List of documents to upsert
400
+ filters: Optional filters to apply to the documents
401
+ """
402
+ logger.info(f"Upserting {len(documents)} documents")
403
+
404
+ docs_to_upsert: Dict[str, Any] = {}
405
+ for document in documents:
406
+ try:
407
+ if document.embedding is None:
408
+ document.embed(embedder=self.embedder)
409
+
410
+ if document.embedding is None:
411
+ raise ValueError(f"Failed to generate embedding for document: {document.name}")
412
+
413
+ doc_data = self.prepare_doc(content_hash, document)
414
+ if filters:
415
+ doc_data["filters"] = filters
416
+ # For upsert_multi, the key of the dict is the document ID,
417
+ # and the value is the document content itself.
418
+ doc_id = doc_data.pop("_id")
419
+ docs_to_upsert[doc_id] = doc_data
420
+ except Exception as e:
421
+ logger.error(f"Error preparing document '{document.name}': {e}")
422
+
423
+ if not docs_to_upsert:
424
+ logger.info("No documents prepared for upsert.")
425
+ return
426
+
427
+ doc_ids = list(docs_to_upsert.keys())
428
+ total_upserted_count = 0
429
+ total_processed_count = len(doc_ids)
430
+ errors_occurred = False
431
+
432
+ for i in range(0, len(doc_ids), self.batch_limit):
433
+ batch_doc_ids = doc_ids[i : i + self.batch_limit]
434
+ batch_docs_to_upsert = {doc_id: docs_to_upsert[doc_id] for doc_id in batch_doc_ids}
435
+
436
+ if not batch_docs_to_upsert:
437
+ continue
438
+
439
+ logger.info(f"Upserting batch of {len(batch_docs_to_upsert)} documents.")
440
+ try:
441
+ result = self.collection.upsert_multi(batch_docs_to_upsert)
442
+ # Similar to insert_multi, check for errors in the batch result.
443
+ if result.all_ok:
444
+ batch_upserted_count = len(batch_docs_to_upsert)
445
+ logger.info(f"Batch of {batch_upserted_count} documents upserted successfully.")
446
+ else:
447
+ succeeded_ids = set(batch_docs_to_upsert.keys()) - set(
448
+ result.exceptions.keys() if result.exceptions else []
449
+ )
450
+ batch_upserted_count = len(succeeded_ids)
451
+ if batch_upserted_count > 0:
452
+ logger.info(f"Partially upserted {batch_upserted_count} documents in batch.")
453
+ logger.warning(f"Bulk write error during batch upsert: {result.exceptions}")
454
+ errors_occurred = True
455
+ total_upserted_count += batch_upserted_count
456
+
457
+ except Exception as e:
458
+ logger.error(f"Error during batch bulk upsert for {len(batch_docs_to_upsert)} documents: {e}")
459
+ errors_occurred = True
460
+
461
+ logger.info(f"Finished processing {total_processed_count} documents for upsert.")
462
+ logger.info(f"Total successfully upserted: {total_upserted_count}.")
463
+ if errors_occurred:
464
+ logger.warning("Some errors occurred during the upsert operation. Please check logs for details.")
465
+
466
+ def search(
467
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
468
+ ) -> List[Document]:
469
+ if isinstance(filters, List):
470
+ log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
471
+ filters = None
472
+ """Search the Couchbase bucket for documents relevant to the query."""
473
+ query_embedding = self.embedder.get_embedding(query)
474
+ if query_embedding is None:
475
+ logger.error(f"Failed to generate embedding for query: {query}")
476
+ return []
477
+
478
+ try:
479
+ # Implement vector search using Couchbase FTS
480
+ vector_search = VectorSearch.from_vector_query(
481
+ VectorQuery(field_name="embedding", vector=query_embedding, num_candidates=limit)
482
+ )
483
+ request = SearchRequest.create(vector_search)
484
+
485
+ # Prepare the options dictionary
486
+ options_dict = {"limit": limit, "fields": ["*"]}
487
+ if filters:
488
+ options_dict["raw"] = filters
489
+
490
+ search_args = {
491
+ "index": self.search_index_name,
492
+ "request": request,
493
+ "options": SearchOptions(**options_dict), # Construct SearchOptions with the dictionary
494
+ }
495
+
496
+ if self.is_global_level_index:
497
+ results = self.cluster.search(**search_args)
498
+ else:
499
+ results = self.scope.search(**search_args)
500
+
501
+ return self.__get_doc_from_kv(results)
502
+ except Exception as e:
503
+ logger.error(f"Error during search: {e}")
504
+ raise
505
+
506
+ def __get_doc_from_kv(self, response: SearchResult) -> List[Document]:
507
+ """
508
+ Convert search results to Document objects by fetching full documents from KV store.
509
+
510
+ Args:
511
+ response: SearchResult from Couchbase search query
512
+
513
+ Returns:
514
+ List of Document objects
515
+ """
516
+ documents: List[Document] = []
517
+ search_hits = [(doc.id, doc.score) for doc in response.rows()]
518
+
519
+ if not search_hits:
520
+ return documents
521
+
522
+ # Fetch documents from KV store
523
+ ids = [hit[0] for hit in search_hits]
524
+ kv_response = self.collection.get_multi(keys=ids)
525
+
526
+ if not kv_response.all_ok:
527
+ raise Exception(f"Failed to get documents from KV store: {kv_response.exceptions}")
528
+
529
+ # Convert results to Documents
530
+ for doc_id, score in search_hits:
531
+ get_result = kv_response.results.get(doc_id)
532
+ if get_result is None or not get_result.success:
533
+ logger.warning(f"Document {doc_id} not found in KV store")
534
+ continue
535
+
536
+ value = get_result.value
537
+ documents.append(
538
+ Document(
539
+ id=doc_id,
540
+ name=value["name"],
541
+ content=value["content"],
542
+ meta_data=value["meta_data"],
543
+ embedding=value["embedding"],
544
+ content_id=value.get("content_id"),
545
+ )
546
+ )
547
+
548
+ return documents
549
+
550
+ def drop(self) -> None:
551
+ """Delete the collection from the scope."""
552
+ if self.exists():
553
+ try:
554
+ self.bucket.collections().drop_collection(
555
+ collection_name=self.collection_name, scope_name=self.scope_name
556
+ )
557
+ logger.info(f"Collection '{self.collection_name}' dropped successfully.")
558
+ except Exception as e:
559
+ logger.error(f"Error dropping collection '{self.collection_name}': {e}")
560
+ raise
561
+
562
+ def delete(self) -> bool:
563
+ """Delete the collection from the scope."""
564
+ if self.exists():
565
+ self.drop()
566
+ return True
567
+ return False
568
+
569
+ def exists(self) -> bool:
570
+ """Check if the collection exists."""
571
+ try:
572
+ scopes = self.bucket.collections().get_all_scopes()
573
+ for scope in scopes:
574
+ if scope.name == self.scope_name:
575
+ for collection in scope.collections:
576
+ if collection.name == self.collection_name:
577
+ return True
578
+ return False
579
+ except Exception:
580
+ return False
581
+
582
+ def prepare_doc(self, content_hash: str, document: Document) -> Dict[str, Any]:
583
+ """
584
+ Prepare a document for insertion into Couchbase.
585
+
586
+ Args:
587
+ document: Document to prepare
588
+
589
+ Returns:
590
+ Dictionary containing document data ready for insertion
591
+
592
+ Raises:
593
+ ValueError: If embedding generation fails
594
+ """
595
+ if not document.content:
596
+ raise ValueError(f"Document {document.name} has no content")
597
+
598
+ logger.debug(f"Preparing document: {document.name}")
599
+
600
+ # Clean content and generate ID
601
+ cleaned_content = document.content.replace("\x00", "\ufffd")
602
+ doc_id = md5(cleaned_content.encode("utf-8")).hexdigest()
603
+
604
+ return {
605
+ "_id": doc_id,
606
+ "name": document.name,
607
+ "content": cleaned_content,
608
+ "meta_data": document.meta_data, # Ensure meta_data is never None
609
+ "embedding": document.embedding,
610
+ "content_id": document.content_id,
611
+ "content_hash": content_hash,
612
+ }
613
+
614
+ def get_count(self) -> int:
615
+ """Get the count of documents in the Couchbase bucket."""
616
+ try:
617
+ search_indexes = self.cluster.search_indexes()
618
+ if not self.is_global_level_index:
619
+ search_indexes = self.scope.search_indexes()
620
+ return search_indexes.get_indexed_documents_count(self.search_index_name)
621
+ except Exception as e:
622
+ logger.error(f"Error getting document count: {e}")
623
+ return 0
624
+
625
+ def name_exists(self, name: str) -> bool:
626
+ """Check if a document exists in the bucket based on its name."""
627
+ try:
628
+ # Use N1QL query to check if document with given name exists
629
+ query = f"SELECT name FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE name = $name LIMIT 1"
630
+ result = self.scope.query(
631
+ query, QueryOptions(named_parameters={"name": name}, scan_consistency=QueryScanConsistency.REQUEST_PLUS)
632
+ )
633
+ for row in result.rows():
634
+ return True
635
+ return False
636
+ except Exception as e:
637
+ logger.error(f"Error checking document name existence: {e}")
638
+ return False
639
+
640
+ def id_exists(self, id: str) -> bool:
641
+ """Check if a document exists in the bucket based on its ID."""
642
+ try:
643
+ result = self.collection.exists(id)
644
+ if not result.exists:
645
+ logger.debug(f"Document 'does not exist': {id}")
646
+ return result.exists
647
+ except Exception as e:
648
+ logger.error(f"Error checking document existence: {e}")
649
+ return False
650
+
651
+ def content_hash_exists(self, content_hash: str) -> bool:
652
+ """Check if a document exists in the bucket based on its content hash."""
653
+ try:
654
+ # Use N1QL query to check if document with given content_hash exists
655
+ query = f"SELECT content_hash FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE content_hash = $content_hash LIMIT 1"
656
+ result = self.scope.query(
657
+ query,
658
+ QueryOptions(
659
+ named_parameters={"content_hash": content_hash}, scan_consistency=QueryScanConsistency.REQUEST_PLUS
660
+ ),
661
+ )
662
+ for row in result.rows():
663
+ return True
664
+ return False
665
+ except Exception as e:
666
+ logger.error(f"Error checking document content_hash existence: {e}")
667
+ return False
668
+
669
+ # === ASYNC SUPPORT USING acouchbase ===
670
+
671
+ async def _create_async_cluster_instance(self) -> AsyncCluster:
672
+ """Helper method to create and connect an AsyncCluster instance."""
673
+ logger.debug("Creating and connecting new AsyncCluster instance.")
674
+ cluster = await AsyncCluster.connect(self.connection_string, self.cluster_options)
675
+ # AsyncCluster.connect ensures the cluster is ready upon successful await.
676
+ # No explicit wait_until_ready is needed here for AsyncCluster.
677
+ logger.info("AsyncCluster connected successfully.")
678
+ return cluster
679
+
680
+ async def get_async_cluster(self) -> AsyncCluster:
681
+ """Gets or creates the cached AsyncCluster instance."""
682
+ if self._async_cluster is None:
683
+ logger.debug("AsyncCluster instance not cached, creating new one.")
684
+ self._async_cluster = await self._create_async_cluster_instance()
685
+ return self._async_cluster
686
+
687
+ async def get_async_bucket(self) -> AsyncBucket:
688
+ """Gets or creates the cached AsyncBucket instance."""
689
+ if self._async_bucket is None:
690
+ logger.debug("AsyncBucket instance not cached, creating new one.")
691
+ cluster = await self.get_async_cluster()
692
+ self._async_bucket = cluster.bucket(self.bucket_name)
693
+ return self._async_bucket
694
+
695
+ async def get_async_scope(self) -> AsyncScope:
696
+ """Gets or creates the cached AsyncScope instance."""
697
+ if self._async_scope is None:
698
+ logger.debug("AsyncScope instance not cached, creating new one.")
699
+ bucket = await self.get_async_bucket()
700
+ self._async_scope = bucket.scope(self.scope_name)
701
+ return self._async_scope
702
+
703
+ async def get_async_collection(self) -> AsyncCollection:
704
+ """Gets or creates the cached AsyncCollection instance."""
705
+ if self._async_collection is None:
706
+ logger.debug("AsyncCollection instance not cached, creating new one.")
707
+ scope = await self.get_async_scope()
708
+ self._async_collection = scope.collection(self.collection_name)
709
+ return self._async_collection
710
+
711
+ async def async_create(self) -> None:
712
+ # FTS index creation is not supported in acouchbase as of now, so fallback to sync for index creation
713
+ # This is a limitation of the SDK. You may want to document this.
714
+ await self._async_create_collection_and_scope()
715
+ await self._async_create_fts_index()
716
+
717
+ async def _async_create_collection_and_scope(self):
718
+ """
719
+ Get or create the scope and collection within the bucket.
720
+
721
+ Uses EAFP principle: attempts to create scope/collection and handles
722
+ specific exceptions if they already exist or (for collections with overwrite=True)
723
+ if they are not found for dropping.
724
+
725
+ Raises:
726
+ Exception: If scope or collection creation/manipulation fails unexpectedly.
727
+ """
728
+ # 1. Ensure Scope Exists
729
+ async_bucket_instance = await self.get_async_bucket()
730
+ try:
731
+ await async_bucket_instance.collections().create_scope(self.scope_name)
732
+ logger.info(f"Created new scope '{self.scope_name}'")
733
+ except ScopeAlreadyExistsException:
734
+ logger.info(f"Scope '{self.scope_name}' already exists. Using existing scope.")
735
+ except Exception as e:
736
+ logger.error(f"Failed to create or ensure scope '{self.scope_name}' exists: {e}")
737
+ raise
738
+
739
+ collection_manager = async_bucket_instance.collections()
740
+
741
+ # 2. Handle Collection
742
+ if self.overwrite:
743
+ # Attempt to drop the collection first since overwrite is True
744
+ try:
745
+ logger.info(
746
+ f"Overwrite is True. Attempting to drop collection '{self.collection_name}' in scope '{self.scope_name}'."
747
+ )
748
+ await collection_manager.drop_collection(
749
+ collection_name=self.collection_name, scope_name=self.scope_name
750
+ )
751
+ logger.info(f"Successfully dropped collection '{self.collection_name}'.")
752
+ time.sleep(1) # Brief wait after drop, as in original code
753
+ except CollectionNotFoundException:
754
+ logger.info(
755
+ f"Collection '{self.collection_name}' not found in scope '{self.scope_name}'. No need to drop."
756
+ )
757
+ except Exception as e:
758
+ logger.error(f"Error dropping collection '{self.collection_name}' during overwrite: {e}")
759
+ raise
760
+
761
+ # Proceed to create the collection
762
+ try:
763
+ logger.info(f"Creating collection '{self.collection_name}' in scope '{self.scope_name}'.")
764
+ await collection_manager.create_collection(
765
+ scope_name=self.scope_name, collection_name=self.collection_name
766
+ )
767
+ logger.info(
768
+ f"Successfully created collection '{self.collection_name}' after drop attempt (overwrite=True)."
769
+ )
770
+ except CollectionAlreadyExistsException:
771
+ # This is an unexpected state if overwrite=True and drop was supposed to clear the way.
772
+ logger.error(
773
+ f"Failed to create collection '{self.collection_name}' as it already exists, "
774
+ f"even after drop attempt for overwrite. Overwrite operation may not have completed as intended."
775
+ )
776
+ raise # Re-raise as the overwrite intent failed
777
+ except Exception as e:
778
+ logger.error(
779
+ f"Error creating collection '{self.collection_name}' after drop attempt (overwrite=True): {e}"
780
+ )
781
+ raise
782
+ else: # self.overwrite is False
783
+ try:
784
+ logger.info(
785
+ f"Overwrite is False. Attempting to create collection '{self.collection_name}' in scope '{self.scope_name}'."
786
+ )
787
+ await collection_manager.create_collection(
788
+ scope_name=self.scope_name, collection_name=self.collection_name
789
+ )
790
+ logger.info(f"Successfully created new collection '{self.collection_name}'.")
791
+ except CollectionAlreadyExistsException:
792
+ logger.info(
793
+ f"Collection '{self.collection_name}' already exists in scope '{self.scope_name}'. Using existing collection."
794
+ )
795
+ except Exception as e:
796
+ logger.error(f"Error creating collection '{self.collection_name}': {e}")
797
+ raise
798
+
799
+ async def _get_async_search_indexes_mng(self) -> Union[AsyncSearchIndexManager, AsyncScopeSearchIndexManager]:
800
+ """Get the async search indexes manager."""
801
+ if self.is_global_level_index:
802
+ cluster = await self.get_async_cluster()
803
+ return cluster.search_indexes()
804
+ else:
805
+ scope = await self.get_async_scope()
806
+ return scope.search_indexes()
807
+
808
+ async def _async_create_fts_index(self):
809
+ """Create a FTS index on the collection if it doesn't exist."""
810
+ async_search_mng = await self._get_async_search_indexes_mng()
811
+ try:
812
+ # Check if index exists and handle string index name
813
+ await async_search_mng.get_index(self.search_index_name)
814
+ if not self.overwrite:
815
+ return
816
+ except Exception:
817
+ if self.search_index_definition is None:
818
+ raise ValueError(f"Index '{self.search_index_name}' does not exist")
819
+
820
+ # Create or update index
821
+ try:
822
+ if self.overwrite:
823
+ try:
824
+ logger.info(f"Dropping existing FTS index '{self.search_index_name}'")
825
+ await async_search_mng.drop_index(self.search_index_name)
826
+ except SearchIndexNotFoundException:
827
+ logger.warning(f"Index '{self.search_index_name}' does not exist")
828
+ except Exception as e:
829
+ logger.warning(f"Error dropping index (may not exist): {e}")
830
+
831
+ await async_search_mng.upsert_index(self.search_index_definition)
832
+ logger.info(f"Created FTS index '{self.search_index_name}'")
833
+
834
+ if self.wait_until_index_ready:
835
+ await self._async_wait_for_index_ready()
836
+
837
+ except Exception as e:
838
+ logger.error(f"Error creating FTS index '{self.search_index_name}': {e}")
839
+ raise
840
+
841
+ async def _async_wait_for_index_ready(self):
842
+ """Wait until the FTS index is ready."""
843
+ start_time = time.time()
844
+ async_search_mng = await self._get_async_search_indexes_mng()
845
+ while True:
846
+ try:
847
+ count = await async_search_mng.get_indexed_documents_count(self.search_index_name)
848
+ if count > -1:
849
+ logger.info(f"FTS index '{self.search_index_name}' is ready")
850
+ break
851
+ # logger.info(f"FTS index '{self.search_index_name}' is not ready yet status: {index['status']}")
852
+ except Exception as e:
853
+ if time.time() - start_time > self.wait_until_index_ready:
854
+ logger.error(f"Error checking index status: {e}")
855
+ raise TimeoutError("Timeout waiting for FTS index to become ready")
856
+ await asyncio.sleep(1)
857
+
858
+ async def async_id_exists(self, id: str) -> bool:
859
+ try:
860
+ async_collection_instance = await self.get_async_collection()
861
+ result = await async_collection_instance.exists(id)
862
+ if not result.exists:
863
+ logger.debug(f"[async] Document does not exist: {id}")
864
+ return result.exists
865
+ except Exception as e:
866
+ logger.error(f"[async] Error checking document existence: {e}")
867
+ return False
868
+
869
+ async def async_name_exists(self, name: str) -> bool:
870
+ try:
871
+ query = f"SELECT name FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE name = $name LIMIT 1"
872
+ async_scope_instance = await self.get_async_scope()
873
+ result = async_scope_instance.query(
874
+ query, QueryOptions(named_parameters={"name": name}, scan_consistency=QueryScanConsistency.REQUEST_PLUS)
875
+ )
876
+ async for row in result.rows():
877
+ return True
878
+ return False
879
+ except Exception as e:
880
+ logger.error(f"[async] Error checking document name existence: {e}")
881
+ return False
882
+
883
+ async def async_insert(
884
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
885
+ ) -> None:
886
+ logger.info(f"[async] Inserting {len(documents)} documents")
887
+
888
+ async_collection_instance = await self.get_async_collection()
889
+ all_docs_to_insert: Dict[str, Any] = {}
890
+
891
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
892
+ # Use batch embedding when enabled and supported
893
+ try:
894
+ # Extract content from all documents
895
+ doc_contents = [doc.content for doc in documents]
896
+
897
+ # Get batch embeddings and usage
898
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
899
+
900
+ # Process documents with pre-computed embeddings
901
+ for j, doc in enumerate(documents):
902
+ try:
903
+ if j < len(embeddings):
904
+ doc.embedding = embeddings[j]
905
+ doc.usage = usages[j] if j < len(usages) else None
906
+ except Exception as e:
907
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
908
+
909
+ except Exception as e:
910
+ # Check if this is a rate limit error - don't fall back as it would make things worse
911
+ error_str = str(e).lower()
912
+ is_rate_limit = any(
913
+ phrase in error_str
914
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
915
+ )
916
+
917
+ if is_rate_limit:
918
+ logger.error(f"Rate limit detected during batch embedding. {e}")
919
+ raise e
920
+ else:
921
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
922
+ # Fall back to individual embedding
923
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
924
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
925
+ else:
926
+ # Use individual embedding
927
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
928
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
929
+
930
+ for document in documents:
931
+ try:
932
+ # User edit: self.prepare_doc is no longer awaited with to_thread
933
+ doc_data = self.prepare_doc(content_hash, document)
934
+ if filters:
935
+ doc_data["filters"] = filters
936
+ doc_id = doc_data.pop("_id") # Remove _id as it's used as key
937
+ all_docs_to_insert[doc_id] = doc_data
938
+ except Exception as e:
939
+ logger.error(f"[async] Error preparing document '{document.name}': {e}")
940
+
941
+ if not all_docs_to_insert:
942
+ logger.info("[async] No documents prepared for insertion.")
943
+ return
944
+
945
+ doc_ids = list(all_docs_to_insert.keys())
946
+ total_inserted_count = 0
947
+ total_failed_count = 0
948
+ processed_doc_count = len(all_docs_to_insert)
949
+
950
+ for i in range(0, len(doc_ids), self.batch_limit):
951
+ batch_doc_ids = doc_ids[i : i + self.batch_limit]
952
+
953
+ logger.info(f"[async] Processing batch of {len(batch_doc_ids)} documents for concurrent insertion.")
954
+
955
+ insert_tasks = []
956
+ for doc_id in batch_doc_ids:
957
+ doc_content = all_docs_to_insert[doc_id]
958
+ insert_tasks.append(async_collection_instance.insert(doc_id, doc_content))
959
+
960
+ if insert_tasks:
961
+ results = await asyncio.gather(*insert_tasks, return_exceptions=True)
962
+ for idx, result in enumerate(results):
963
+ # Get the original doc_id for logging, corresponding to the task order
964
+ current_doc_id = batch_doc_ids[idx]
965
+ if isinstance(result, Exception):
966
+ total_failed_count += 1
967
+ logger.error(f"[async] Error inserting document '{current_doc_id}': {result}")
968
+ else:
969
+ # Assuming successful insert doesn't return a specific value we need to check further,
970
+ # or if it does, the absence of an exception means success.
971
+ total_inserted_count += 1
972
+ logger.debug(f"[async] Successfully inserted document '{current_doc_id}'.")
973
+
974
+ logger.info(f"[async] Finished processing {processed_doc_count} documents.")
975
+ logger.info(f"[async] Total successfully inserted: {total_inserted_count}, Total failed: {total_failed_count}.")
976
+
977
+ async def async_upsert(
978
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
979
+ ) -> None:
980
+ """Upsert documents asynchronously."""
981
+ if self.content_hash_exists(content_hash):
982
+ self._delete_by_content_hash(content_hash)
983
+ await self._async_upsert(content_hash=content_hash, documents=documents, filters=filters)
984
+
985
+ async def _async_upsert(
986
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
987
+ ) -> None:
988
+ logger.info(f"[async] Upserting {len(documents)} documents")
989
+
990
+ async_collection_instance = await self.get_async_collection()
991
+ all_docs_to_upsert: Dict[str, Any] = {}
992
+
993
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
994
+ # Use batch embedding when enabled and supported
995
+ try:
996
+ # Extract content from all documents
997
+ doc_contents = [doc.content for doc in documents]
998
+
999
+ # Get batch embeddings and usage
1000
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
1001
+
1002
+ # Process documents with pre-computed embeddings
1003
+ for j, doc in enumerate(documents):
1004
+ try:
1005
+ if j < len(embeddings):
1006
+ doc.embedding = embeddings[j]
1007
+ doc.usage = usages[j] if j < len(usages) else None
1008
+ except Exception as e:
1009
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
1010
+
1011
+ except Exception as e:
1012
+ # Check if this is a rate limit error - don't fall back as it would make things worse
1013
+ error_str = str(e).lower()
1014
+ is_rate_limit = any(
1015
+ phrase in error_str
1016
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
1017
+ )
1018
+
1019
+ if is_rate_limit:
1020
+ logger.error(f"Rate limit detected during batch embedding. {e}")
1021
+ raise e
1022
+ else:
1023
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
1024
+ # Fall back to individual embedding
1025
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
1026
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1027
+ else:
1028
+ # Use individual embedding
1029
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
1030
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
1031
+
1032
+ for document in documents:
1033
+ try:
1034
+ # Consistent with async_insert, prepare_doc is not awaited with to_thread based on prior user edits
1035
+ doc_data = self.prepare_doc(content_hash, document)
1036
+ if filters:
1037
+ doc_data["filters"] = filters
1038
+ doc_id = doc_data.pop("_id") # _id is used as key for upsert
1039
+ all_docs_to_upsert[doc_id] = doc_data
1040
+ except Exception as e:
1041
+ logger.error(f"[async] Error preparing document '{document.name}' for upsert: {e}")
1042
+
1043
+ if not all_docs_to_upsert:
1044
+ logger.info("[async] No documents prepared for upsert.")
1045
+ return
1046
+
1047
+ doc_ids = list(all_docs_to_upsert.keys())
1048
+ total_upserted_count = 0
1049
+ total_failed_count = 0
1050
+ processed_doc_count = len(all_docs_to_upsert)
1051
+
1052
+ logger.info(f"[async] Prepared {processed_doc_count} documents for upsert.")
1053
+
1054
+ for i in range(0, len(doc_ids), self.batch_limit):
1055
+ batch_doc_ids = doc_ids[i : i + self.batch_limit]
1056
+
1057
+ logger.info(f"[async] Processing batch of {len(batch_doc_ids)} documents for concurrent upsert.")
1058
+
1059
+ upsert_tasks = []
1060
+ for doc_id in batch_doc_ids:
1061
+ doc_content = all_docs_to_upsert[doc_id]
1062
+ upsert_tasks.append(async_collection_instance.upsert(doc_id, doc_content))
1063
+
1064
+ if upsert_tasks:
1065
+ results = await asyncio.gather(*upsert_tasks, return_exceptions=True)
1066
+ for idx, result in enumerate(results):
1067
+ current_doc_id = batch_doc_ids[idx]
1068
+ if isinstance(result, Exception):
1069
+ total_failed_count += 1
1070
+ logger.error(f"[async] Error upserting document '{current_doc_id}': {result}")
1071
+ else:
1072
+ # Assuming successful upsert doesn't return a specific value we need to check further,
1073
+ # or if it does, the absence of an exception means success.
1074
+ total_upserted_count += 1
1075
+ logger.debug(f"[async] Successfully upserted document '{current_doc_id}'.")
1076
+
1077
+ logger.info(f"[async] Finished processing {processed_doc_count} documents for upsert.")
1078
+ logger.info(f"[async] Total successfully upserted: {total_upserted_count}, Total failed: {total_failed_count}.")
1079
+
1080
+ async def async_search(
1081
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
1082
+ ) -> List[Document]:
1083
+ if isinstance(filters, List):
1084
+ log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
1085
+ filters = None
1086
+ query_embedding = self.embedder.get_embedding(query)
1087
+ if query_embedding is None:
1088
+ logger.error(f"[async] Failed to generate embedding for query: {query}")
1089
+ return []
1090
+ try:
1091
+ # Implement vector search using Couchbase FTS
1092
+ vector_search = VectorSearch.from_vector_query(
1093
+ VectorQuery(field_name="embedding", vector=query_embedding, num_candidates=limit)
1094
+ )
1095
+ request = SearchRequest.create(vector_search)
1096
+
1097
+ # Prepare the options dictionary
1098
+ options_dict = {"limit": limit, "fields": ["*"]}
1099
+ if filters:
1100
+ options_dict["raw"] = filters
1101
+
1102
+ search_args = {
1103
+ "index": self.search_index_name,
1104
+ "request": request,
1105
+ "options": SearchOptions(**options_dict), # Construct SearchOptions with the dictionary
1106
+ }
1107
+
1108
+ if self.is_global_level_index:
1109
+ async_cluster_instance = await self.get_async_cluster()
1110
+ results = async_cluster_instance.search(**search_args)
1111
+ else:
1112
+ async_scope_instance = await self.get_async_scope()
1113
+ results = async_scope_instance.search(**search_args)
1114
+
1115
+ return await self.__async_get_doc_from_kv(results)
1116
+ except Exception as e:
1117
+ logger.error(f"[async] Error during search: {e}")
1118
+ raise
1119
+
1120
+ async def async_drop(self) -> None:
1121
+ if await self.async_exists():
1122
+ try:
1123
+ async_bucket_instance = await self.get_async_bucket()
1124
+ await async_bucket_instance.collections().drop_collection(
1125
+ collection_name=self.collection_name, scope_name=self.scope_name
1126
+ )
1127
+ logger.info(f"[async] Collection '{self.collection_name}' dropped successfully.")
1128
+ except Exception as e:
1129
+ logger.error(f"[async] Error dropping collection '{self.collection_name}': {e}")
1130
+ raise
1131
+
1132
+ async def async_exists(self) -> bool:
1133
+ try:
1134
+ async_bucket_instance = await self.get_async_bucket()
1135
+ scopes = await async_bucket_instance.collections().get_all_scopes()
1136
+ for scope in scopes:
1137
+ if scope.name == self.scope_name:
1138
+ for collection in scope.collections:
1139
+ if collection.name == self.collection_name:
1140
+ return True
1141
+ return False
1142
+ except Exception:
1143
+ return False
1144
+
1145
+ async def __async_get_doc_from_kv(self, response: AsyncSearchIndex) -> List[Document]:
1146
+ """
1147
+ Convert search results to Document objects by fetching full documents from KV store concurrently.
1148
+
1149
+ Args:
1150
+ response: SearchResult from Couchbase search query
1151
+
1152
+ Returns:
1153
+ List of Document objects
1154
+ """
1155
+ documents: List[Document] = []
1156
+ # Assuming search_hits map directly to the order of documents we want to fetch and reconstruct
1157
+ search_hits_map = {doc.id: doc.score async for doc in response.rows()}
1158
+ doc_ids_to_fetch = list(search_hits_map.keys())
1159
+
1160
+ if not doc_ids_to_fetch:
1161
+ return documents
1162
+
1163
+ async_collection_instance = await self.get_async_collection()
1164
+
1165
+ # Process in batches
1166
+ for i in range(0, len(doc_ids_to_fetch), self.batch_limit):
1167
+ batch_doc_ids = doc_ids_to_fetch[i : i + self.batch_limit]
1168
+ if not batch_doc_ids:
1169
+ continue
1170
+
1171
+ logger.debug(f"[async] Fetching batch of {len(batch_doc_ids)} documents from KV.")
1172
+ get_tasks = [async_collection_instance.get(doc_id) for doc_id in batch_doc_ids]
1173
+
1174
+ # Fetch documents from KV store concurrently for the current batch
1175
+ results_from_kv_batch = await asyncio.gather(*get_tasks, return_exceptions=True)
1176
+
1177
+ for batch_idx, get_result in enumerate(results_from_kv_batch):
1178
+ # Original doc_id corresponding to this result within the batch
1179
+ doc_id = batch_doc_ids[batch_idx]
1180
+ # score = search_hits_map[doc_id] # Retrieve the original score
1181
+
1182
+ if isinstance(get_result, BaseException) or isinstance(get_result, Exception) or get_result is None:
1183
+ logger.warning(f"[async] Document {doc_id} not found or error fetching from KV store: {get_result}")
1184
+ continue
1185
+
1186
+ try:
1187
+ value = get_result.content_as[dict]
1188
+ if not isinstance(value, dict):
1189
+ logger.warning(
1190
+ f"[async] Document {doc_id} content from KV is not a dict: {type(value)}. Skipping."
1191
+ )
1192
+ continue
1193
+
1194
+ documents.append(
1195
+ Document(
1196
+ id=doc_id,
1197
+ name=value.get("name"),
1198
+ content=value.get("content", ""),
1199
+ meta_data=value.get("meta_data", {}),
1200
+ embedding=value.get("embedding", []),
1201
+ )
1202
+ )
1203
+ except Exception as e:
1204
+ logger.warning(
1205
+ f"[async] Error processing document {doc_id} from KV store: {e}. Value: {getattr(get_result, 'content_as', 'N/A')}"
1206
+ )
1207
+ continue
1208
+
1209
+ return documents
1210
+
1211
+ def delete_by_id(self, id: str) -> bool:
1212
+ """
1213
+ Delete a document by its ID.
1214
+
1215
+ Args:
1216
+ id (str): The document ID to delete
1217
+
1218
+ Returns:
1219
+ bool: True if document was deleted, False otherwise
1220
+ """
1221
+ try:
1222
+ log_debug(f"Couchbase VectorDB : Deleting document with ID {id}")
1223
+ if not self.id_exists(id):
1224
+ return False
1225
+
1226
+ # Delete by ID using Couchbase collection.delete()
1227
+ self.collection.remove(id)
1228
+ log_info(f"Successfully deleted document with ID {id}")
1229
+ return True
1230
+ except Exception as e:
1231
+ log_info(f"Error deleting document with ID {id}: {e}")
1232
+ return False
1233
+
1234
+ def delete_by_name(self, name: str) -> bool:
1235
+ """
1236
+ Delete documents by name.
1237
+
1238
+ Args:
1239
+ name (str): The document name to delete
1240
+
1241
+ Returns:
1242
+ bool: True if documents were deleted, False otherwise
1243
+ """
1244
+ try:
1245
+ log_debug(f"Couchbase VectorDB : Deleting documents with name {name}")
1246
+
1247
+ query = f"SELECT META().id as doc_id, * FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE name = $name"
1248
+ result = self.scope.query(
1249
+ query, QueryOptions(named_parameters={"name": name}, scan_consistency=QueryScanConsistency.REQUEST_PLUS)
1250
+ )
1251
+ rows = list(result.rows()) # Collect once
1252
+
1253
+ for row in rows:
1254
+ self.collection.remove(row.get("doc_id"))
1255
+ log_info(f"Deleted {len(rows)} documents with name {name}")
1256
+ return True
1257
+
1258
+ except Exception as e:
1259
+ log_info(f"Error deleting documents with name {name}: {e}")
1260
+ return False
1261
+
1262
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
1263
+ """
1264
+ Delete documents by metadata.
1265
+
1266
+ Args:
1267
+ metadata (Dict[str, Any]): The metadata to match for deletion
1268
+
1269
+ Returns:
1270
+ bool: True if documents were deleted, False otherwise
1271
+ """
1272
+ try:
1273
+ log_debug(f"Couchbase VectorDB : Deleting documents with metadata {metadata}")
1274
+
1275
+ if not metadata:
1276
+ log_info("No metadata provided for deletion")
1277
+ return False
1278
+
1279
+ # Build WHERE clause for metadata matching
1280
+ where_conditions = []
1281
+ named_parameters: Dict[str, Any] = {}
1282
+
1283
+ for key, value in metadata.items():
1284
+ if isinstance(value, (list, tuple)):
1285
+ # For array values, use ARRAY_CONTAINS
1286
+ where_conditions.append(
1287
+ f"(ARRAY_CONTAINS(filters.{key}, $value_{key}) OR ARRAY_CONTAINS(recipes.filters.{key}, $value_{key}))"
1288
+ )
1289
+ named_parameters[f"value_{key}"] = value
1290
+ elif isinstance(value, str):
1291
+ where_conditions.append(f"(filters.{key} = $value_{key} OR recipes.filters.{key} = $value_{key})")
1292
+ named_parameters[f"value_{key}"] = value
1293
+ elif isinstance(value, bool):
1294
+ where_conditions.append(f"(filters.{key} = $value_{key} OR recipes.filters.{key} = $value_{key})")
1295
+ named_parameters[f"value_{key}"] = value
1296
+ elif isinstance(value, (int, float)):
1297
+ where_conditions.append(f"(filters.{key} = $value_{key} OR recipes.filters.{key} = $value_{key})")
1298
+ named_parameters[f"value_{key}"] = value
1299
+ elif value is None:
1300
+ where_conditions.append(f"(filters.{key} IS NULL OR recipes.filters.{key} IS NULL)")
1301
+ else:
1302
+ # For other types, convert to string
1303
+ where_conditions.append(f"(filters.{key} = $value_{key} OR recipes.filters.{key} = $value_{key})")
1304
+ named_parameters[f"value_{key}"] = str(value)
1305
+
1306
+ if not where_conditions:
1307
+ log_info("No valid metadata conditions for deletion")
1308
+ return False
1309
+
1310
+ where_clause = " AND ".join(where_conditions)
1311
+ query = f"SELECT META().id as doc_id, * FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE {where_clause}"
1312
+
1313
+ result = self.scope.query(
1314
+ query,
1315
+ QueryOptions(named_parameters=named_parameters, scan_consistency=QueryScanConsistency.REQUEST_PLUS),
1316
+ )
1317
+ rows = list(result.rows()) # Collect once
1318
+
1319
+ for row in rows:
1320
+ self.collection.remove(row.get("doc_id"))
1321
+ log_info(f"Deleted {len(rows)} documents with metadata {metadata}")
1322
+ return True
1323
+
1324
+ except Exception as e:
1325
+ log_info(f"Error deleting documents with metadata {metadata}: {e}")
1326
+ return False
1327
+
1328
+ def delete_by_content_id(self, content_id: str) -> bool:
1329
+ """
1330
+ Delete documents by content ID.
1331
+
1332
+ Args:
1333
+ content_id (str): The content ID to delete
1334
+
1335
+ Returns:
1336
+ bool: True if documents were deleted, False otherwise
1337
+ """
1338
+ try:
1339
+ log_debug(f"Couchbase VectorDB : Deleting documents with content_id {content_id}")
1340
+
1341
+ query = f"SELECT META().id as doc_id, * FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE content_id = $content_id OR recipes.content_id = $content_id"
1342
+ result = self.scope.query(
1343
+ query,
1344
+ QueryOptions(
1345
+ named_parameters={"content_id": content_id}, scan_consistency=QueryScanConsistency.REQUEST_PLUS
1346
+ ),
1347
+ )
1348
+ rows = list(result.rows()) # Collect once
1349
+
1350
+ for row in rows:
1351
+ self.collection.remove(row.get("doc_id"))
1352
+ log_info(f"Deleted {len(rows)} documents with content_id {content_id}")
1353
+ return True
1354
+
1355
+ except Exception as e:
1356
+ log_info(f"Error deleting documents with content_id {content_id}: {e}")
1357
+ return False
1358
+
1359
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
1360
+ """
1361
+ Delete documents by content hash.
1362
+
1363
+ Args:
1364
+ content_hash (str): The content hash to delete
1365
+
1366
+ Returns:
1367
+ bool: True if documents were deleted, False otherwise
1368
+ """
1369
+ try:
1370
+ log_debug(f"Couchbase VectorDB : Deleting documents with content_hash {content_hash}")
1371
+
1372
+ query = f"SELECT META().id as doc_id, * FROM {self.bucket_name}.{self.scope_name}.{self.collection_name} WHERE content_hash = $content_hash"
1373
+ result = self.scope.query(
1374
+ query,
1375
+ QueryOptions(
1376
+ named_parameters={"content_hash": content_hash}, scan_consistency=QueryScanConsistency.REQUEST_PLUS
1377
+ ),
1378
+ )
1379
+ rows = list(result.rows()) # Collect once
1380
+
1381
+ for row in rows:
1382
+ self.collection.remove(row.get("doc_id"))
1383
+ log_info(f"Deleted {len(rows)} documents with content_hash {content_hash}")
1384
+ return True
1385
+
1386
+ except Exception as e:
1387
+ log_info(f"Error deleting documents with content_hash {content_hash}: {e}")
1388
+ return False
1389
+
1390
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
1391
+ """
1392
+ Update the metadata for documents with the given content_id.
1393
+
1394
+ Args:
1395
+ content_id (str): The content ID to update
1396
+ metadata (Dict[str, Any]): The metadata to update
1397
+ """
1398
+ try:
1399
+ # Query for documents with the given content_id
1400
+ query = f"SELECT META().id as doc_id, meta_data, filters FROM `{self.bucket_name}` WHERE content_id = $content_id"
1401
+ result = self.cluster.query(query, content_id=content_id)
1402
+
1403
+ updated_count = 0
1404
+ for row in result:
1405
+ doc_id = row.get("doc_id")
1406
+ current_metadata = row.get("meta_data", {})
1407
+ current_filters = row.get("filters", {})
1408
+
1409
+ # Merge existing metadata with new metadata
1410
+ if isinstance(current_metadata, dict):
1411
+ updated_metadata = current_metadata.copy()
1412
+ updated_metadata.update(metadata)
1413
+ else:
1414
+ updated_metadata = metadata
1415
+
1416
+ # Merge existing filters with new metadata
1417
+ if isinstance(current_filters, dict):
1418
+ updated_filters = current_filters.copy()
1419
+ updated_filters.update(metadata)
1420
+ else:
1421
+ updated_filters = metadata
1422
+
1423
+ # Update the document
1424
+ try:
1425
+ doc = self.collection.get(doc_id)
1426
+ doc_content = doc.content_as[dict]
1427
+ doc_content["meta_data"] = updated_metadata
1428
+ doc_content["filters"] = updated_filters
1429
+
1430
+ self.collection.upsert(doc_id, doc_content)
1431
+ updated_count += 1
1432
+ except Exception as doc_error:
1433
+ logger.warning(f"Failed to update document {doc_id}: {doc_error}")
1434
+
1435
+ if updated_count == 0:
1436
+ logger.debug(f"No documents found with content_id: {content_id}")
1437
+ else:
1438
+ logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
1439
+
1440
+ except Exception as e:
1441
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
1442
+ raise
1443
+
1444
+ def get_supported_search_types(self) -> List[str]:
1445
+ """Get the supported search types for this vector database."""
1446
+ return [] # CouchbaseSearch doesn't use SearchType enum