agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1009 @@
1
+ import asyncio
2
+ import json
3
+ import uuid
4
+ from hashlib import md5
5
+ from os import getenv
6
+ from typing import Any, Dict, List, Optional, Tuple, Union
7
+
8
+ try:
9
+ from warnings import filterwarnings
10
+
11
+ import weaviate
12
+ from weaviate import WeaviateAsyncClient
13
+ from weaviate.classes.config import Configure, DataType, Property, Tokenization, VectorDistances
14
+ from weaviate.classes.init import Auth
15
+ from weaviate.classes.query import Filter
16
+
17
+ filterwarnings("ignore", category=ResourceWarning)
18
+ except ImportError:
19
+ raise ImportError("Weaviate is not installed. Install using 'pip install weaviate-client'.")
20
+
21
+ from agno.filters import FilterExpr
22
+ from agno.knowledge.document import Document
23
+ from agno.knowledge.embedder import Embedder
24
+ from agno.knowledge.reranker.base import Reranker
25
+ from agno.utils.log import log_debug, log_info, log_warning, logger
26
+ from agno.vectordb.base import VectorDb
27
+ from agno.vectordb.search import SearchType
28
+ from agno.vectordb.weaviate.index import Distance, VectorIndex
29
+
30
+
31
+ class Weaviate(VectorDb):
32
+ """
33
+ Weaviate class for managing vector operations with Weaviate vector database (v4 client).
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ # Connection/Client params
39
+ wcd_url: Optional[str] = None,
40
+ wcd_api_key: Optional[str] = None,
41
+ client: Optional[weaviate.WeaviateClient] = None,
42
+ local: bool = False,
43
+ # Collection params
44
+ collection: str = "default",
45
+ name: Optional[str] = None,
46
+ description: Optional[str] = None,
47
+ id: Optional[str] = None,
48
+ vector_index: VectorIndex = VectorIndex.HNSW,
49
+ distance: Distance = Distance.COSINE,
50
+ # Search/Embedding params
51
+ embedder: Optional[Embedder] = None,
52
+ search_type: SearchType = SearchType.vector,
53
+ reranker: Optional[Reranker] = None,
54
+ hybrid_search_alpha: float = 0.5,
55
+ ):
56
+ # Dynamic ID generation based on unique identifiers
57
+ if id is None:
58
+ from agno.utils.string import generate_id
59
+
60
+ connection_identifier = wcd_url or "local" if local else "default"
61
+ seed = f"{connection_identifier}#{collection}"
62
+ id = generate_id(seed)
63
+
64
+ # Initialize base class with name, description, and generated ID
65
+ super().__init__(id=id, name=name, description=description)
66
+
67
+ # Connection setup
68
+ self.wcd_url = wcd_url or getenv("WCD_URL")
69
+ self.wcd_api_key = wcd_api_key or getenv("WCD_API_KEY")
70
+ self.local = local
71
+ self.client = client
72
+ self.async_client = None
73
+
74
+ # Collection setup
75
+ self.collection = collection
76
+ self.vector_index = vector_index
77
+ self.distance = distance
78
+
79
+ # Embedder setup
80
+ if embedder is None:
81
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
82
+
83
+ embedder = OpenAIEmbedder()
84
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
85
+ self.embedder: Embedder = embedder
86
+
87
+ # Search setup
88
+ self.search_type: SearchType = search_type
89
+ self.reranker: Optional[Reranker] = reranker
90
+ self.hybrid_search_alpha = hybrid_search_alpha
91
+
92
+ @staticmethod
93
+ def _get_doc_uuid(document: Document) -> Tuple[uuid.UUID, str]:
94
+ cleaned_content = document.content.replace("\x00", "\ufffd")
95
+ content_hash = md5(cleaned_content.encode()).hexdigest()
96
+ doc_uuid = uuid.UUID(hex=content_hash[:32])
97
+ return doc_uuid, cleaned_content
98
+
99
+ def get_client(self) -> weaviate.WeaviateClient:
100
+ """Initialize and return a Weaviate client instance.
101
+
102
+ Attempts to create a client using WCD (Weaviate Cloud Deployment) credentials if provided,
103
+ otherwise falls back to local connection. Maintains a singleton pattern by reusing
104
+ an existing client if already initialized.
105
+
106
+ Returns:
107
+ weaviate.WeaviateClient: An initialized Weaviate client instance.
108
+ """
109
+ if self.client is None:
110
+ if self.wcd_url and self.wcd_api_key and not self.local:
111
+ log_info("Initializing Weaviate Cloud client")
112
+ self.client = weaviate.connect_to_weaviate_cloud(
113
+ cluster_url=self.wcd_url, auth_credentials=Auth.api_key(self.wcd_api_key)
114
+ )
115
+ else:
116
+ log_info("Initializing local Weaviate client")
117
+ self.client = weaviate.connect_to_local()
118
+
119
+ if not self.client.is_connected(): # type: ignore
120
+ self.client.connect() # type: ignore
121
+
122
+ if not self.client.is_ready(): # type: ignore
123
+ raise Exception("Weaviate client is not ready")
124
+
125
+ return self.client
126
+
127
+ async def get_async_client(self) -> WeaviateAsyncClient:
128
+ """Get or create the async client."""
129
+ if self.async_client is None:
130
+ if self.wcd_url and self.wcd_api_key and not self.local:
131
+ log_info("Initializing Weaviate Cloud async client")
132
+ self.async_client = weaviate.use_async_with_weaviate_cloud(
133
+ cluster_url=self.wcd_url,
134
+ auth_credentials=Auth.api_key(self.wcd_api_key), # type: ignore
135
+ )
136
+ else:
137
+ log_info("Initializing local Weaviate async client")
138
+ self.async_client = weaviate.use_async_with_local() # type: ignore
139
+
140
+ if not self.async_client.is_connected(): # type: ignore
141
+ await self.async_client.connect() # type: ignore
142
+
143
+ if not await self.async_client.is_ready(): # type: ignore
144
+ raise ConnectionError("Weaviate async client is not ready")
145
+
146
+ return self.async_client # type: ignore
147
+
148
+ def create(self) -> None:
149
+ """Create the collection in Weaviate if it doesn't exist."""
150
+ if not self.exists():
151
+ log_debug(f"Creating collection '{self.collection}' in Weaviate.")
152
+ self.get_client().collections.create(
153
+ name=self.collection,
154
+ properties=[
155
+ Property(name="name", data_type=DataType.TEXT),
156
+ Property(name="content", data_type=DataType.TEXT, tokenization=Tokenization.LOWERCASE),
157
+ Property(name="meta_data", data_type=DataType.TEXT),
158
+ Property(name="content_id", data_type=DataType.TEXT),
159
+ Property(name="content_hash", data_type=DataType.TEXT),
160
+ ],
161
+ vectorizer_config=Configure.Vectorizer.none(),
162
+ vector_index_config=self.get_vector_index_config(self.vector_index, self.distance),
163
+ )
164
+ log_debug(f"Collection '{self.collection}' created in Weaviate.")
165
+
166
+ async def async_create(self) -> None:
167
+ client = await self.get_async_client()
168
+ try:
169
+ await client.collections.create(
170
+ name=self.collection,
171
+ properties=[
172
+ Property(name="name", data_type=DataType.TEXT),
173
+ Property(name="content", data_type=DataType.TEXT, tokenization=Tokenization.LOWERCASE),
174
+ Property(name="meta_data", data_type=DataType.TEXT),
175
+ Property(name="content_id", data_type=DataType.TEXT),
176
+ Property(name="content_hash", data_type=DataType.TEXT),
177
+ ],
178
+ vectorizer_config=Configure.Vectorizer.none(),
179
+ vector_index_config=self.get_vector_index_config(self.vector_index, self.distance),
180
+ )
181
+ log_debug(f"Collection '{self.collection}' created in Weaviate asynchronously.")
182
+ finally:
183
+ await client.close()
184
+
185
+ def content_hash_exists(self, content_hash: str) -> bool:
186
+ """Check if a document with the given content hash exists in the collection."""
187
+ collection = self.get_client().collections.get(self.collection)
188
+ result = collection.query.fetch_objects(
189
+ limit=1,
190
+ filters=Filter.by_property("content_hash").equal(content_hash),
191
+ )
192
+ return len(result.objects) > 0
193
+
194
+ def name_exists(self, name: str) -> bool:
195
+ """
196
+ Validate if a document with the given name exists in Weaviate.
197
+
198
+ Args:
199
+ name (str): The name of the document to check.
200
+
201
+ Returns:
202
+ bool: True if a document with the given name exists, False otherwise.
203
+ """
204
+ collection = self.get_client().collections.get(self.collection)
205
+ result = collection.query.fetch_objects(
206
+ limit=1,
207
+ filters=Filter.by_property("name").equal(name),
208
+ )
209
+ return len(result.objects) > 0
210
+
211
+ async def async_name_exists(self, name: str) -> bool:
212
+ """
213
+ Asynchronously validate if a document with the given name exists in Weaviate.
214
+
215
+ Args:
216
+ name (str): The name of the document to check.
217
+
218
+ Returns:
219
+ bool: True if a document with the given name exists, False otherwise.
220
+ """
221
+ client = await self.get_async_client()
222
+ try:
223
+ collection = client.collections.get(self.collection)
224
+ result = await collection.query.fetch_objects(
225
+ limit=1,
226
+ filters=Filter.by_property("name").equal(name),
227
+ )
228
+ return len(result.objects) > 0
229
+ finally:
230
+ await client.close()
231
+
232
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
233
+ """
234
+ Insert documents into Weaviate.
235
+
236
+ Args:
237
+ documents (List[Document]): List of documents to insert
238
+ filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
239
+ """
240
+ log_debug(f"Inserting {len(documents)} documents into Weaviate.")
241
+ collection = self.get_client().collections.get(self.collection)
242
+
243
+ for document in documents:
244
+ document.embed(embedder=self.embedder)
245
+ if document.embedding is None:
246
+ logger.error(f"Document embedding is None: {document.name}")
247
+ continue
248
+
249
+ cleaned_content = document.content.replace("\x00", "\ufffd")
250
+ # Include content_hash in ID to ensure uniqueness across different content hashes
251
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
252
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
253
+ doc_uuid = uuid.UUID(hex=record_id[:32])
254
+
255
+ # Merge filters with metadata
256
+ meta_data = document.meta_data or {}
257
+ if filters:
258
+ meta_data.update(filters)
259
+
260
+ # Serialize meta_data to JSON string
261
+ meta_data_str = json.dumps(meta_data) if meta_data else None
262
+
263
+ collection.data.insert(
264
+ properties={
265
+ "name": document.name,
266
+ "content": cleaned_content,
267
+ "meta_data": meta_data_str,
268
+ "content_id": document.content_id,
269
+ "content_hash": content_hash,
270
+ },
271
+ vector=document.embedding,
272
+ uuid=doc_uuid,
273
+ )
274
+ log_debug(f"Inserted document: {document.name} ({meta_data})")
275
+
276
+ async def async_insert(
277
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
278
+ ) -> None:
279
+ """
280
+ Insert documents into Weaviate asynchronously.
281
+
282
+ Args:
283
+ documents (List[Document]): List of documents to insert
284
+ filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
285
+ """
286
+ log_debug(f"Inserting {len(documents)} documents into Weaviate asynchronously.")
287
+ if not documents:
288
+ return
289
+
290
+ # Apply batch embedding logic
291
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
292
+ # Use batch embedding when enabled and supported
293
+ try:
294
+ # Extract content from all documents
295
+ doc_contents = [doc.content for doc in documents]
296
+
297
+ # Get batch embeddings and usage
298
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
299
+
300
+ # Process documents with pre-computed embeddings
301
+ for j, doc in enumerate(documents):
302
+ try:
303
+ if j < len(embeddings):
304
+ doc.embedding = embeddings[j]
305
+ doc.usage = usages[j] if j < len(usages) else None
306
+ except Exception as e:
307
+ logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
308
+
309
+ except Exception as e:
310
+ # Check if this is a rate limit error - don't fall back as it would make things worse
311
+ error_str = str(e).lower()
312
+ is_rate_limit = any(
313
+ phrase in error_str
314
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
315
+ )
316
+
317
+ if is_rate_limit:
318
+ logger.error(f"Rate limit detected during batch embedding. {e}")
319
+ raise e
320
+ else:
321
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
322
+ # Fall back to individual embedding
323
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
324
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
325
+ else:
326
+ # Use individual embedding
327
+ embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
328
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
329
+
330
+ client = await self.get_async_client()
331
+ try:
332
+ collection = client.collections.get(self.collection)
333
+
334
+ # Process documents first
335
+ for document in documents:
336
+ try:
337
+ if document.embedding is None:
338
+ logger.error(f"Document embedding is None: {document.name}")
339
+ continue
340
+
341
+ # Clean content and generate UUID
342
+ cleaned_content = document.content.replace("\x00", "\ufffd")
343
+ # Include content_hash in ID to ensure uniqueness across different content hashes
344
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
345
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
346
+ doc_uuid = uuid.UUID(hex=record_id[:32])
347
+
348
+ # Serialize meta_data to JSON string
349
+ meta_data_str = json.dumps(document.meta_data) if document.meta_data else None
350
+
351
+ # Insert properties and vector separately
352
+ properties = {
353
+ "name": document.name,
354
+ "content": cleaned_content,
355
+ "meta_data": meta_data_str,
356
+ "content_id": document.content_id,
357
+ "content_hash": content_hash,
358
+ }
359
+
360
+ # Use the API correctly - properties, vector and uuid are separate parameters
361
+ await collection.data.insert(properties=properties, vector=document.embedding, uuid=doc_uuid)
362
+
363
+ log_debug(f"Inserted document asynchronously: {document.name}")
364
+
365
+ except Exception as e:
366
+ logger.error(f"Error inserting document {document.name}: {str(e)}")
367
+ finally:
368
+ await client.close()
369
+
370
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
371
+ """
372
+ Upsert documents into Weaviate.
373
+
374
+ Args:
375
+ documents (List[Document]): List of documents to upsert
376
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
377
+ """
378
+ log_debug(f"Upserting {len(documents)} documents into Weaviate.")
379
+ if self.content_hash_exists(content_hash):
380
+ self._delete_by_content_hash(content_hash)
381
+ self.insert(content_hash=content_hash, documents=documents, filters=filters)
382
+
383
+ async def async_upsert(
384
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
385
+ ) -> None:
386
+ """
387
+ Upsert documents into Weaviate asynchronously.
388
+ When documents with the same ID already exist, they will be replaced.
389
+ Otherwise, new documents will be created.
390
+
391
+ Args:
392
+ documents (List[Document]): List of documents to upsert
393
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
394
+ """
395
+ if self.content_hash_exists(content_hash):
396
+ self._delete_by_content_hash(content_hash)
397
+ await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
398
+ return
399
+
400
+ def search(
401
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
402
+ ) -> List[Document]:
403
+ """
404
+ Perform a search based on the configured search type.
405
+
406
+ Args:
407
+ query (str): The search query.
408
+ limit (int): Maximum number of results to return.
409
+ filters (Optional[Dict[str, Any]]): Filters to apply to the search.
410
+
411
+ Returns:
412
+ List[Document]: List of matching documents.
413
+ """
414
+ if isinstance(filters, List):
415
+ log_warning("Filters Expressions are not supported in Weaviate. No filters will be applied.")
416
+ filters = None
417
+ if self.search_type == SearchType.vector:
418
+ return self.vector_search(query, limit, filters)
419
+ elif self.search_type == SearchType.keyword:
420
+ return self.keyword_search(query, limit, filters)
421
+ elif self.search_type == SearchType.hybrid:
422
+ return self.hybrid_search(query, limit, filters)
423
+ else:
424
+ logger.error(f"Invalid search type '{self.search_type}'.")
425
+ return []
426
+
427
+ async def async_search(
428
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
429
+ ) -> List[Document]:
430
+ """
431
+ Perform a search based on the configured search type asynchronously.
432
+
433
+ Args:
434
+ query (str): The search query.
435
+ limit (int): Maximum number of results to return.
436
+ filters (Optional[Dict[str, Any]]): Filters to apply to the search.
437
+
438
+ Returns:
439
+ List[Document]: List of matching documents.
440
+ """
441
+ if isinstance(filters, List):
442
+ log_warning("Filters Expressions are not supported in Weaviate. No filters will be applied.")
443
+ filters = None
444
+ if self.search_type == SearchType.vector:
445
+ return await self.async_vector_search(query, limit, filters)
446
+ elif self.search_type == SearchType.keyword:
447
+ return await self.async_keyword_search(query, limit, filters)
448
+ elif self.search_type == SearchType.hybrid:
449
+ return await self.async_hybrid_search(query, limit, filters)
450
+ else:
451
+ logger.error(f"Invalid search type '{self.search_type}'.")
452
+ return []
453
+
454
+ def vector_search(
455
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
456
+ ) -> List[Document]:
457
+ try:
458
+ query_embedding = self.embedder.get_embedding(query)
459
+ if query_embedding is None:
460
+ logger.error(f"Error getting embedding for query: {query}")
461
+ return []
462
+
463
+ collection = self.get_client().collections.get(self.collection)
464
+ filter_expr = self._build_filter_expression(filters)
465
+
466
+ response = collection.query.near_vector(
467
+ near_vector=query_embedding,
468
+ limit=limit,
469
+ return_properties=["name", "content", "meta_data", "content_id"],
470
+ include_vector=True,
471
+ filters=filter_expr,
472
+ )
473
+
474
+ search_results: List[Document] = self.get_search_results(response)
475
+
476
+ if self.reranker:
477
+ search_results = self.reranker.rerank(query=query, documents=search_results)
478
+
479
+ log_info(f"Found {len(search_results)} documents")
480
+
481
+ return search_results
482
+
483
+ except Exception as e:
484
+ logger.error(f"Error searching for documents: {e}")
485
+ return []
486
+
487
+ finally:
488
+ self.get_client().close()
489
+
490
+ async def async_vector_search(
491
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
492
+ ) -> List[Document]:
493
+ """
494
+ Perform a vector search in Weaviate asynchronously.
495
+
496
+ Args:
497
+ query (str): The search query.
498
+ limit (int): Maximum number of results to return.
499
+
500
+ Returns:
501
+ List[Document]: List of matching documents.
502
+ """
503
+ query_embedding = self.embedder.get_embedding(query)
504
+ if query_embedding is None:
505
+ logger.error(f"Error getting embedding for query: {query}")
506
+ return []
507
+
508
+ search_results = []
509
+ client = await self.get_async_client()
510
+ try:
511
+ collection = client.collections.get(self.collection)
512
+ filter_expr = self._build_filter_expression(filters)
513
+
514
+ response = await collection.query.near_vector(
515
+ near_vector=query_embedding,
516
+ limit=limit,
517
+ return_properties=["name", "content", "meta_data", "content_id"],
518
+ include_vector=True,
519
+ filters=filter_expr,
520
+ )
521
+
522
+ search_results = self.get_search_results(response)
523
+
524
+ if self.reranker:
525
+ search_results = self.reranker.rerank(query=query, documents=search_results)
526
+
527
+ log_info(f"Found {len(search_results)} documents")
528
+
529
+ await client.close()
530
+ return search_results
531
+
532
+ except Exception as e:
533
+ logger.error(f"Error searching for documents: {e}")
534
+ return []
535
+
536
+ def keyword_search(
537
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
538
+ ) -> List[Document]:
539
+ try:
540
+ collection = self.get_client().collections.get(self.collection)
541
+ filter_expr = self._build_filter_expression(filters)
542
+
543
+ response = collection.query.bm25(
544
+ query=query,
545
+ query_properties=["content"],
546
+ limit=limit,
547
+ return_properties=["name", "content", "meta_data", "content_id"],
548
+ include_vector=True,
549
+ filters=filter_expr,
550
+ )
551
+
552
+ search_results: List[Document] = self.get_search_results(response)
553
+
554
+ if self.reranker:
555
+ search_results = self.reranker.rerank(query=query, documents=search_results)
556
+
557
+ log_info(f"Found {len(search_results)} documents")
558
+
559
+ return search_results
560
+
561
+ except Exception as e:
562
+ logger.error(f"Error searching for documents: {e}")
563
+ return []
564
+
565
+ finally:
566
+ self.get_client().close()
567
+
568
+ async def async_keyword_search(
569
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
570
+ ) -> List[Document]:
571
+ """
572
+ Perform a keyword search in Weaviate asynchronously.
573
+
574
+ Args:
575
+ query (str): The search query.
576
+ limit (int): Maximum number of results to return.
577
+
578
+ Returns:
579
+ List[Document]: List of matching documents.
580
+ """
581
+ search_results = []
582
+ client = await self.get_async_client()
583
+ try:
584
+ collection = client.collections.get(self.collection)
585
+
586
+ filter_expr = self._build_filter_expression(filters)
587
+ response = await collection.query.bm25(
588
+ query=query,
589
+ query_properties=["content"],
590
+ limit=limit,
591
+ return_properties=["name", "content", "meta_data", "content_id"],
592
+ include_vector=True,
593
+ filters=filter_expr,
594
+ )
595
+
596
+ search_results = self.get_search_results(response)
597
+
598
+ if self.reranker:
599
+ search_results = self.reranker.rerank(query=query, documents=search_results)
600
+
601
+ log_info(f"Found {len(search_results)} documents")
602
+
603
+ await client.close()
604
+ return search_results
605
+
606
+ except Exception as e:
607
+ logger.error(f"Error searching for documents: {e}")
608
+ return []
609
+
610
+ def hybrid_search(
611
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
612
+ ) -> List[Document]:
613
+ try:
614
+ query_embedding = self.embedder.get_embedding(query)
615
+ if query_embedding is None:
616
+ logger.error(f"Error getting embedding for query: {query}")
617
+ return []
618
+
619
+ collection = self.get_client().collections.get(self.collection)
620
+ filter_expr = self._build_filter_expression(filters)
621
+
622
+ response = collection.query.hybrid(
623
+ query=query,
624
+ vector=query_embedding,
625
+ limit=limit,
626
+ return_properties=["name", "content", "meta_data", "content_id"],
627
+ include_vector=True,
628
+ query_properties=["content"],
629
+ alpha=self.hybrid_search_alpha,
630
+ filters=filter_expr,
631
+ )
632
+
633
+ search_results: List[Document] = self.get_search_results(response)
634
+
635
+ if self.reranker:
636
+ search_results = self.reranker.rerank(query=query, documents=search_results)
637
+
638
+ log_info(f"Found {len(search_results)} documents")
639
+
640
+ return search_results
641
+
642
+ except Exception as e:
643
+ logger.error(f"Error searching for documents: {e}")
644
+ return []
645
+
646
+ finally:
647
+ self.get_client().close()
648
+
649
+ async def async_hybrid_search(
650
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
651
+ ) -> List[Document]:
652
+ """
653
+ Perform a hybrid search combining vector and keyword search in Weaviate asynchronously.
654
+
655
+ Args:
656
+ query (str): The keyword query.
657
+ limit (int): Maximum number of results to return.
658
+
659
+ Returns:
660
+ List[Document]: List of matching documents.
661
+ """
662
+ query_embedding = self.embedder.get_embedding(query)
663
+ if query_embedding is None:
664
+ logger.error(f"Error getting embedding for query: {query}")
665
+ return []
666
+
667
+ search_results = []
668
+ client = await self.get_async_client()
669
+ try:
670
+ collection = client.collections.get(self.collection)
671
+
672
+ filter_expr = self._build_filter_expression(filters)
673
+ response = await collection.query.hybrid(
674
+ query=query,
675
+ vector=query_embedding,
676
+ limit=limit,
677
+ return_properties=["name", "content", "meta_data", "content_id"],
678
+ include_vector=True,
679
+ query_properties=["content"],
680
+ alpha=self.hybrid_search_alpha,
681
+ filters=filter_expr,
682
+ )
683
+
684
+ search_results = self.get_search_results(response)
685
+
686
+ if self.reranker:
687
+ search_results = self.reranker.rerank(query=query, documents=search_results)
688
+
689
+ log_info(f"Found {len(search_results)} documents")
690
+
691
+ await client.close()
692
+ return search_results
693
+
694
+ except Exception as e:
695
+ logger.error(f"Error searching for documents: {e}")
696
+ return []
697
+
698
+ def exists(self) -> bool:
699
+ """Check if the collection exists in Weaviate."""
700
+ return self.get_client().collections.exists(self.collection)
701
+
702
+ async def async_exists(self) -> bool:
703
+ """Check if the collection exists in Weaviate asynchronously."""
704
+ client = await self.get_async_client()
705
+ try:
706
+ return await client.collections.exists(self.collection)
707
+ finally:
708
+ await client.close()
709
+
710
+ def drop(self) -> None:
711
+ """Delete the Weaviate collection."""
712
+ if self.exists():
713
+ log_debug(f"Deleting collection '{self.collection}' from Weaviate.")
714
+ self.get_client().collections.delete(self.collection)
715
+
716
+ async def async_drop(self) -> None:
717
+ """Delete the Weaviate collection asynchronously."""
718
+ if await self.async_exists():
719
+ log_debug(f"Deleting collection '{self.collection}' from Weaviate asynchronously.")
720
+ client = await self.get_async_client()
721
+ try:
722
+ await client.collections.delete(self.collection)
723
+ finally:
724
+ await client.close()
725
+
726
+ def optimize(self) -> None:
727
+ """Optimize the vector database (e.g., rebuild indexes)."""
728
+ pass
729
+
730
+ def delete(self) -> bool:
731
+ """Delete all records from the database."""
732
+ self.drop()
733
+ return True
734
+
735
+ def delete_by_id(self, id: str) -> bool:
736
+ """Delete document by ID."""
737
+ try:
738
+ try:
739
+ doc_uuid = uuid.UUID(hex=id[:32]) if len(id) == 32 else uuid.UUID(id)
740
+ except ValueError:
741
+ log_info(f"Invalid UUID format for ID '{id}' - treating as non-existent")
742
+ return True
743
+
744
+ collection = self.get_client().collections.get(self.collection)
745
+
746
+ if not collection.data.exists(doc_uuid):
747
+ log_info(f"Document with ID {id} does not exist")
748
+ return True
749
+
750
+ collection.data.delete_by_id(doc_uuid)
751
+ log_info(f"Deleted document with ID '{id}' from collection '{self.collection}'.")
752
+ return True
753
+ except Exception as e:
754
+ logger.error(f"Error deleting document by ID '{id}': {e}")
755
+ return False
756
+
757
+ def delete_by_name(self, name: str) -> bool:
758
+ """Delete content by name using direct filter deletion."""
759
+ try:
760
+ collection = self.get_client().collections.get(self.collection)
761
+
762
+ collection.data.delete_many(where=Filter.by_property("name").equal(name))
763
+
764
+ log_info(f"Deleted documents with name '{name}' from collection '{self.collection}'.")
765
+ return True
766
+
767
+ except Exception as e:
768
+ logger.error(f"Error deleting documents by name '{name}': {e}")
769
+ return False
770
+
771
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
772
+ """Delete content by metadata using direct filter deletion."""
773
+ try:
774
+ collection = self.get_client().collections.get(self.collection)
775
+
776
+ # Build filter for metadata search
777
+ filter_expr = self._build_filter_expression(metadata)
778
+ if filter_expr is None:
779
+ log_info(f"No valid filter could be built for metadata: {metadata}")
780
+ return False
781
+
782
+ collection.data.delete_many(where=filter_expr)
783
+
784
+ log_info(f"Deleted documents with metadata '{metadata}' from collection '{self.collection}'.")
785
+ return True
786
+
787
+ except Exception as e:
788
+ logger.error(f"Error deleting documents by metadata '{metadata}': {e}")
789
+ return False
790
+
791
+ def delete_by_content_id(self, content_id: str) -> bool:
792
+ """Delete content by content ID using direct filter deletion."""
793
+ try:
794
+ collection = self.get_client().collections.get(self.collection)
795
+
796
+ collection.data.delete_many(where=Filter.by_property("content_id").equal(content_id))
797
+
798
+ log_info(f"Deleted documents with content_id '{content_id}' from collection '{self.collection}'.")
799
+ return True
800
+
801
+ except Exception as e:
802
+ logger.error(f"Error deleting documents by content_id '{content_id}': {e}")
803
+ return False
804
+
805
+ def delete_by_content_hash(self, content_hash: str) -> bool:
806
+ """Delete content by content hash using direct filter deletion."""
807
+ try:
808
+ collection = self.get_client().collections.get(self.collection)
809
+ collection.data.delete_many(where=Filter.by_property("content_hash").equal(content_hash))
810
+ return True
811
+ except Exception as e:
812
+ logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
813
+ return False
814
+
815
+ def get_vector_index_config(self, index_type: VectorIndex, distance_metric: Distance):
816
+ """
817
+ Returns the appropriate vector index configuration with the specified distance metric.
818
+
819
+ Args:
820
+ index_type (VectorIndex): Type of vector index (HNSW, FLAT, DYNAMIC).
821
+ distance_metric (Distance): Distance metric (COSINE, DOT, etc).
822
+
823
+ Returns:
824
+ Configure.VectorIndex: The configured vector index instance.
825
+ """
826
+ # Get the Weaviate distance metric
827
+ distance = getattr(VectorDistances, distance_metric.name)
828
+
829
+ # Define vector index configurations based on enum value
830
+ configs = {
831
+ VectorIndex.HNSW: Configure.VectorIndex.hnsw(distance_metric=distance),
832
+ VectorIndex.FLAT: Configure.VectorIndex.flat(distance_metric=distance),
833
+ VectorIndex.DYNAMIC: Configure.VectorIndex.dynamic(distance_metric=distance),
834
+ }
835
+
836
+ return configs[index_type]
837
+
838
+ def get_search_results(self, response: Any) -> List[Document]:
839
+ """
840
+ Create search results from the Weaviate response.
841
+
842
+ Args:
843
+ response (Any): The Weaviate response object.
844
+
845
+ Returns:
846
+ List[Document]: List of matching documents.
847
+ """
848
+ search_results: List[Document] = []
849
+ for obj in response.objects:
850
+ properties = obj.properties
851
+ meta_data = json.loads(properties["meta_data"]) if properties.get("meta_data") else {}
852
+ embedding = obj.vector["default"] if isinstance(obj.vector, dict) else obj.vector
853
+
854
+ search_results.append(
855
+ Document(
856
+ name=properties.get("name"),
857
+ meta_data=meta_data,
858
+ content=properties.get("content", ""),
859
+ embedder=self.embedder,
860
+ embedding=embedding,
861
+ content_id=properties.get("content_id"),
862
+ )
863
+ )
864
+
865
+ return search_results
866
+
867
+ def upsert_available(self) -> bool:
868
+ """Indicate that upsert functionality is available."""
869
+ return True
870
+
871
+ def _build_filter_expression(self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]):
872
+ """
873
+ Build a filter expression for Weaviate queries.
874
+
875
+ Args:
876
+ filters (Optional[Dict[str, Any]]): Dictionary of filters to apply.
877
+
878
+ Returns:
879
+ Optional[Filter]: The constructed filter expression, or None if no filters provided.
880
+ """
881
+ if not filters:
882
+ return None
883
+ if isinstance(filters, List):
884
+ log_warning("Filters Expressions are not supported in Weaviate. No filters will be applied.")
885
+ return None
886
+ try:
887
+ # Create a filter for each key-value pair
888
+ filter_conditions = []
889
+ for key, value in filters.items():
890
+ # Create a pattern to match in the JSON string
891
+ if isinstance(value, (list, tuple)):
892
+ # For list values
893
+ pattern = f'"{key}": {json.dumps(value)}'
894
+ else:
895
+ # For single values
896
+ pattern = f'"{key}": "{value}"'
897
+
898
+ # Add the filter condition using like operator
899
+ filter_conditions.append(Filter.by_property("meta_data").like(f"*{pattern}*"))
900
+
901
+ # If we have multiple conditions, combine them
902
+ if len(filter_conditions) > 1:
903
+ # Use the first condition as base and chain the rest
904
+ filter_expr = filter_conditions[0]
905
+ for condition in filter_conditions[1:]:
906
+ filter_expr = filter_expr & condition
907
+ return filter_expr
908
+ elif filter_conditions:
909
+ return filter_conditions[0]
910
+
911
+ except Exception as e:
912
+ logger.error(f"Error building filter expression: {e}")
913
+ return None
914
+
915
+ return None
916
+
917
+ def id_exists(self, id: str) -> bool:
918
+ """Check if a document with the given ID exists in the collection.
919
+
920
+ Args:
921
+ id (str): The document ID to check.
922
+
923
+ Returns:
924
+ bool: True if the document exists, False otherwise.
925
+ """
926
+ try:
927
+ doc_uuid = uuid.UUID(hex=id[:32]) if len(id) == 32 else uuid.UUID(id)
928
+ collection = self.get_client().collections.get(self.collection)
929
+ return collection.data.exists(doc_uuid)
930
+ except ValueError:
931
+ log_info(f"Invalid UUID format for ID '{id}' - treating as non-existent")
932
+ return False
933
+ except Exception as e:
934
+ logger.error(f"Error checking if ID '{id}' exists: {e}")
935
+ return False
936
+
937
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
938
+ """
939
+ Update the metadata for documents with the given content_id.
940
+
941
+ Args:
942
+ content_id (str): The content ID to update
943
+ metadata (Dict[str, Any]): The metadata to update
944
+ """
945
+ try:
946
+ weaviate_client = self.get_client()
947
+ collection = weaviate_client.collections.get(self.collection)
948
+
949
+ # Query for objects with the given content_id
950
+ query_result = collection.query.fetch_objects( # type: ignore
951
+ where=Filter.by_property("content_id").equal(content_id),
952
+ limit=1000, # Get all matching objects
953
+ )
954
+
955
+ if not query_result.objects:
956
+ logger.debug(f"No documents found with content_id: {content_id}")
957
+ return
958
+
959
+ # Update each matching object
960
+ updated_count = 0
961
+ for obj in query_result.objects:
962
+ # Get current properties
963
+ current_properties = obj.properties or {}
964
+
965
+ # Merge existing metadata with new metadata
966
+ updated_properties = current_properties.copy()
967
+
968
+ # Handle nested metadata updates
969
+ if "meta_data" in updated_properties and isinstance(updated_properties["meta_data"], dict):
970
+ updated_properties["meta_data"].update(metadata)
971
+ else:
972
+ # If no existing meta_data or it's not a dict, set it directly
973
+ updated_properties["meta_data"] = metadata
974
+
975
+ if "filters" in updated_properties and isinstance(updated_properties["filters"], dict):
976
+ updated_properties["filters"].update(metadata)
977
+ else:
978
+ updated_properties["filters"] = metadata
979
+
980
+ # Update the object
981
+ collection.data.update(uuid=obj.uuid, properties=updated_properties)
982
+ updated_count += 1
983
+
984
+ logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
985
+
986
+ except Exception as e:
987
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
988
+ raise
989
+
990
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
991
+ """Delete documents by content hash using direct filter deletion."""
992
+ try:
993
+ collection = self.get_client().collections.get(self.collection)
994
+
995
+ # Build filter for content_hash search
996
+ filter_expr = Filter.by_property("content_hash").equal(content_hash)
997
+
998
+ collection.data.delete_many(where=filter_expr)
999
+
1000
+ log_info(f"Deleted documents with content_hash '{content_hash}' from collection '{self.collection}'.")
1001
+ return True
1002
+
1003
+ except Exception as e:
1004
+ logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
1005
+ return False
1006
+
1007
+ def get_supported_search_types(self) -> List[str]:
1008
+ """Get the supported search types for this vector database."""
1009
+ return [SearchType.vector, SearchType.keyword, SearchType.hybrid]