agno 2.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (575) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +51 -0
  3. agno/agent/agent.py +10405 -0
  4. agno/api/__init__.py +0 -0
  5. agno/api/agent.py +28 -0
  6. agno/api/api.py +40 -0
  7. agno/api/evals.py +22 -0
  8. agno/api/os.py +17 -0
  9. agno/api/routes.py +13 -0
  10. agno/api/schemas/__init__.py +9 -0
  11. agno/api/schemas/agent.py +16 -0
  12. agno/api/schemas/evals.py +16 -0
  13. agno/api/schemas/os.py +14 -0
  14. agno/api/schemas/response.py +6 -0
  15. agno/api/schemas/team.py +16 -0
  16. agno/api/schemas/utils.py +21 -0
  17. agno/api/schemas/workflows.py +16 -0
  18. agno/api/settings.py +53 -0
  19. agno/api/team.py +30 -0
  20. agno/api/workflow.py +28 -0
  21. agno/cloud/aws/base.py +214 -0
  22. agno/cloud/aws/s3/__init__.py +2 -0
  23. agno/cloud/aws/s3/api_client.py +43 -0
  24. agno/cloud/aws/s3/bucket.py +195 -0
  25. agno/cloud/aws/s3/object.py +57 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +598 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2042 -0
  33. agno/db/dynamo/schemas.py +314 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +1795 -0
  37. agno/db/firestore/schemas.py +140 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1335 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1160 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1328 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/__init__.py +0 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/mongo/__init__.py +17 -0
  51. agno/db/mongo/async_mongo.py +2026 -0
  52. agno/db/mongo/mongo.py +1982 -0
  53. agno/db/mongo/schemas.py +87 -0
  54. agno/db/mongo/utils.py +259 -0
  55. agno/db/mysql/__init__.py +3 -0
  56. agno/db/mysql/mysql.py +2308 -0
  57. agno/db/mysql/schemas.py +138 -0
  58. agno/db/mysql/utils.py +355 -0
  59. agno/db/postgres/__init__.py +4 -0
  60. agno/db/postgres/async_postgres.py +1927 -0
  61. agno/db/postgres/postgres.py +2260 -0
  62. agno/db/postgres/schemas.py +139 -0
  63. agno/db/postgres/utils.py +442 -0
  64. agno/db/redis/__init__.py +3 -0
  65. agno/db/redis/redis.py +1660 -0
  66. agno/db/redis/schemas.py +123 -0
  67. agno/db/redis/utils.py +346 -0
  68. agno/db/schemas/__init__.py +4 -0
  69. agno/db/schemas/culture.py +120 -0
  70. agno/db/schemas/evals.py +33 -0
  71. agno/db/schemas/knowledge.py +40 -0
  72. agno/db/schemas/memory.py +46 -0
  73. agno/db/schemas/metrics.py +0 -0
  74. agno/db/singlestore/__init__.py +3 -0
  75. agno/db/singlestore/schemas.py +130 -0
  76. agno/db/singlestore/singlestore.py +2272 -0
  77. agno/db/singlestore/utils.py +384 -0
  78. agno/db/sqlite/__init__.py +4 -0
  79. agno/db/sqlite/async_sqlite.py +2293 -0
  80. agno/db/sqlite/schemas.py +133 -0
  81. agno/db/sqlite/sqlite.py +2288 -0
  82. agno/db/sqlite/utils.py +431 -0
  83. agno/db/surrealdb/__init__.py +3 -0
  84. agno/db/surrealdb/metrics.py +292 -0
  85. agno/db/surrealdb/models.py +309 -0
  86. agno/db/surrealdb/queries.py +71 -0
  87. agno/db/surrealdb/surrealdb.py +1353 -0
  88. agno/db/surrealdb/utils.py +147 -0
  89. agno/db/utils.py +116 -0
  90. agno/debug.py +18 -0
  91. agno/eval/__init__.py +14 -0
  92. agno/eval/accuracy.py +834 -0
  93. agno/eval/performance.py +773 -0
  94. agno/eval/reliability.py +306 -0
  95. agno/eval/utils.py +119 -0
  96. agno/exceptions.py +161 -0
  97. agno/filters.py +354 -0
  98. agno/guardrails/__init__.py +6 -0
  99. agno/guardrails/base.py +19 -0
  100. agno/guardrails/openai.py +144 -0
  101. agno/guardrails/pii.py +94 -0
  102. agno/guardrails/prompt_injection.py +52 -0
  103. agno/integrations/__init__.py +0 -0
  104. agno/integrations/discord/__init__.py +3 -0
  105. agno/integrations/discord/client.py +203 -0
  106. agno/knowledge/__init__.py +5 -0
  107. agno/knowledge/chunking/__init__.py +0 -0
  108. agno/knowledge/chunking/agentic.py +79 -0
  109. agno/knowledge/chunking/document.py +91 -0
  110. agno/knowledge/chunking/fixed.py +57 -0
  111. agno/knowledge/chunking/markdown.py +151 -0
  112. agno/knowledge/chunking/recursive.py +63 -0
  113. agno/knowledge/chunking/row.py +39 -0
  114. agno/knowledge/chunking/semantic.py +86 -0
  115. agno/knowledge/chunking/strategy.py +165 -0
  116. agno/knowledge/content.py +74 -0
  117. agno/knowledge/document/__init__.py +5 -0
  118. agno/knowledge/document/base.py +58 -0
  119. agno/knowledge/embedder/__init__.py +5 -0
  120. agno/knowledge/embedder/aws_bedrock.py +343 -0
  121. agno/knowledge/embedder/azure_openai.py +210 -0
  122. agno/knowledge/embedder/base.py +23 -0
  123. agno/knowledge/embedder/cohere.py +323 -0
  124. agno/knowledge/embedder/fastembed.py +62 -0
  125. agno/knowledge/embedder/fireworks.py +13 -0
  126. agno/knowledge/embedder/google.py +258 -0
  127. agno/knowledge/embedder/huggingface.py +94 -0
  128. agno/knowledge/embedder/jina.py +182 -0
  129. agno/knowledge/embedder/langdb.py +22 -0
  130. agno/knowledge/embedder/mistral.py +206 -0
  131. agno/knowledge/embedder/nebius.py +13 -0
  132. agno/knowledge/embedder/ollama.py +154 -0
  133. agno/knowledge/embedder/openai.py +195 -0
  134. agno/knowledge/embedder/sentence_transformer.py +63 -0
  135. agno/knowledge/embedder/together.py +13 -0
  136. agno/knowledge/embedder/vllm.py +262 -0
  137. agno/knowledge/embedder/voyageai.py +165 -0
  138. agno/knowledge/knowledge.py +1988 -0
  139. agno/knowledge/reader/__init__.py +7 -0
  140. agno/knowledge/reader/arxiv_reader.py +81 -0
  141. agno/knowledge/reader/base.py +95 -0
  142. agno/knowledge/reader/csv_reader.py +166 -0
  143. agno/knowledge/reader/docx_reader.py +82 -0
  144. agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
  145. agno/knowledge/reader/firecrawl_reader.py +201 -0
  146. agno/knowledge/reader/json_reader.py +87 -0
  147. agno/knowledge/reader/markdown_reader.py +137 -0
  148. agno/knowledge/reader/pdf_reader.py +431 -0
  149. agno/knowledge/reader/pptx_reader.py +101 -0
  150. agno/knowledge/reader/reader_factory.py +313 -0
  151. agno/knowledge/reader/s3_reader.py +89 -0
  152. agno/knowledge/reader/tavily_reader.py +194 -0
  153. agno/knowledge/reader/text_reader.py +115 -0
  154. agno/knowledge/reader/web_search_reader.py +372 -0
  155. agno/knowledge/reader/website_reader.py +455 -0
  156. agno/knowledge/reader/wikipedia_reader.py +59 -0
  157. agno/knowledge/reader/youtube_reader.py +78 -0
  158. agno/knowledge/remote_content/__init__.py +0 -0
  159. agno/knowledge/remote_content/remote_content.py +88 -0
  160. agno/knowledge/reranker/__init__.py +3 -0
  161. agno/knowledge/reranker/base.py +14 -0
  162. agno/knowledge/reranker/cohere.py +64 -0
  163. agno/knowledge/reranker/infinity.py +195 -0
  164. agno/knowledge/reranker/sentence_transformer.py +54 -0
  165. agno/knowledge/types.py +39 -0
  166. agno/knowledge/utils.py +189 -0
  167. agno/media.py +462 -0
  168. agno/memory/__init__.py +3 -0
  169. agno/memory/manager.py +1327 -0
  170. agno/models/__init__.py +0 -0
  171. agno/models/aimlapi/__init__.py +5 -0
  172. agno/models/aimlapi/aimlapi.py +45 -0
  173. agno/models/anthropic/__init__.py +5 -0
  174. agno/models/anthropic/claude.py +757 -0
  175. agno/models/aws/__init__.py +15 -0
  176. agno/models/aws/bedrock.py +701 -0
  177. agno/models/aws/claude.py +378 -0
  178. agno/models/azure/__init__.py +18 -0
  179. agno/models/azure/ai_foundry.py +485 -0
  180. agno/models/azure/openai_chat.py +131 -0
  181. agno/models/base.py +2175 -0
  182. agno/models/cerebras/__init__.py +12 -0
  183. agno/models/cerebras/cerebras.py +501 -0
  184. agno/models/cerebras/cerebras_openai.py +112 -0
  185. agno/models/cohere/__init__.py +5 -0
  186. agno/models/cohere/chat.py +389 -0
  187. agno/models/cometapi/__init__.py +5 -0
  188. agno/models/cometapi/cometapi.py +57 -0
  189. agno/models/dashscope/__init__.py +5 -0
  190. agno/models/dashscope/dashscope.py +91 -0
  191. agno/models/deepinfra/__init__.py +5 -0
  192. agno/models/deepinfra/deepinfra.py +28 -0
  193. agno/models/deepseek/__init__.py +5 -0
  194. agno/models/deepseek/deepseek.py +61 -0
  195. agno/models/defaults.py +1 -0
  196. agno/models/fireworks/__init__.py +5 -0
  197. agno/models/fireworks/fireworks.py +26 -0
  198. agno/models/google/__init__.py +5 -0
  199. agno/models/google/gemini.py +1085 -0
  200. agno/models/groq/__init__.py +5 -0
  201. agno/models/groq/groq.py +556 -0
  202. agno/models/huggingface/__init__.py +5 -0
  203. agno/models/huggingface/huggingface.py +491 -0
  204. agno/models/ibm/__init__.py +5 -0
  205. agno/models/ibm/watsonx.py +422 -0
  206. agno/models/internlm/__init__.py +3 -0
  207. agno/models/internlm/internlm.py +26 -0
  208. agno/models/langdb/__init__.py +1 -0
  209. agno/models/langdb/langdb.py +48 -0
  210. agno/models/litellm/__init__.py +14 -0
  211. agno/models/litellm/chat.py +468 -0
  212. agno/models/litellm/litellm_openai.py +25 -0
  213. agno/models/llama_cpp/__init__.py +5 -0
  214. agno/models/llama_cpp/llama_cpp.py +22 -0
  215. agno/models/lmstudio/__init__.py +5 -0
  216. agno/models/lmstudio/lmstudio.py +25 -0
  217. agno/models/message.py +434 -0
  218. agno/models/meta/__init__.py +12 -0
  219. agno/models/meta/llama.py +475 -0
  220. agno/models/meta/llama_openai.py +78 -0
  221. agno/models/metrics.py +120 -0
  222. agno/models/mistral/__init__.py +5 -0
  223. agno/models/mistral/mistral.py +432 -0
  224. agno/models/nebius/__init__.py +3 -0
  225. agno/models/nebius/nebius.py +54 -0
  226. agno/models/nexus/__init__.py +3 -0
  227. agno/models/nexus/nexus.py +22 -0
  228. agno/models/nvidia/__init__.py +5 -0
  229. agno/models/nvidia/nvidia.py +28 -0
  230. agno/models/ollama/__init__.py +5 -0
  231. agno/models/ollama/chat.py +441 -0
  232. agno/models/openai/__init__.py +9 -0
  233. agno/models/openai/chat.py +883 -0
  234. agno/models/openai/like.py +27 -0
  235. agno/models/openai/responses.py +1050 -0
  236. agno/models/openrouter/__init__.py +5 -0
  237. agno/models/openrouter/openrouter.py +66 -0
  238. agno/models/perplexity/__init__.py +5 -0
  239. agno/models/perplexity/perplexity.py +187 -0
  240. agno/models/portkey/__init__.py +3 -0
  241. agno/models/portkey/portkey.py +81 -0
  242. agno/models/requesty/__init__.py +5 -0
  243. agno/models/requesty/requesty.py +52 -0
  244. agno/models/response.py +199 -0
  245. agno/models/sambanova/__init__.py +5 -0
  246. agno/models/sambanova/sambanova.py +28 -0
  247. agno/models/siliconflow/__init__.py +5 -0
  248. agno/models/siliconflow/siliconflow.py +25 -0
  249. agno/models/together/__init__.py +5 -0
  250. agno/models/together/together.py +25 -0
  251. agno/models/utils.py +266 -0
  252. agno/models/vercel/__init__.py +3 -0
  253. agno/models/vercel/v0.py +26 -0
  254. agno/models/vertexai/__init__.py +0 -0
  255. agno/models/vertexai/claude.py +70 -0
  256. agno/models/vllm/__init__.py +3 -0
  257. agno/models/vllm/vllm.py +78 -0
  258. agno/models/xai/__init__.py +3 -0
  259. agno/models/xai/xai.py +113 -0
  260. agno/os/__init__.py +3 -0
  261. agno/os/app.py +876 -0
  262. agno/os/auth.py +57 -0
  263. agno/os/config.py +104 -0
  264. agno/os/interfaces/__init__.py +1 -0
  265. agno/os/interfaces/a2a/__init__.py +3 -0
  266. agno/os/interfaces/a2a/a2a.py +42 -0
  267. agno/os/interfaces/a2a/router.py +250 -0
  268. agno/os/interfaces/a2a/utils.py +924 -0
  269. agno/os/interfaces/agui/__init__.py +3 -0
  270. agno/os/interfaces/agui/agui.py +47 -0
  271. agno/os/interfaces/agui/router.py +144 -0
  272. agno/os/interfaces/agui/utils.py +534 -0
  273. agno/os/interfaces/base.py +25 -0
  274. agno/os/interfaces/slack/__init__.py +3 -0
  275. agno/os/interfaces/slack/router.py +148 -0
  276. agno/os/interfaces/slack/security.py +30 -0
  277. agno/os/interfaces/slack/slack.py +47 -0
  278. agno/os/interfaces/whatsapp/__init__.py +3 -0
  279. agno/os/interfaces/whatsapp/router.py +211 -0
  280. agno/os/interfaces/whatsapp/security.py +53 -0
  281. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  282. agno/os/mcp.py +292 -0
  283. agno/os/middleware/__init__.py +7 -0
  284. agno/os/middleware/jwt.py +233 -0
  285. agno/os/router.py +1763 -0
  286. agno/os/routers/__init__.py +3 -0
  287. agno/os/routers/evals/__init__.py +3 -0
  288. agno/os/routers/evals/evals.py +430 -0
  289. agno/os/routers/evals/schemas.py +142 -0
  290. agno/os/routers/evals/utils.py +162 -0
  291. agno/os/routers/health.py +31 -0
  292. agno/os/routers/home.py +52 -0
  293. agno/os/routers/knowledge/__init__.py +3 -0
  294. agno/os/routers/knowledge/knowledge.py +997 -0
  295. agno/os/routers/knowledge/schemas.py +178 -0
  296. agno/os/routers/memory/__init__.py +3 -0
  297. agno/os/routers/memory/memory.py +515 -0
  298. agno/os/routers/memory/schemas.py +62 -0
  299. agno/os/routers/metrics/__init__.py +3 -0
  300. agno/os/routers/metrics/metrics.py +190 -0
  301. agno/os/routers/metrics/schemas.py +47 -0
  302. agno/os/routers/session/__init__.py +3 -0
  303. agno/os/routers/session/session.py +997 -0
  304. agno/os/schema.py +1055 -0
  305. agno/os/settings.py +43 -0
  306. agno/os/utils.py +630 -0
  307. agno/py.typed +0 -0
  308. agno/reasoning/__init__.py +0 -0
  309. agno/reasoning/anthropic.py +80 -0
  310. agno/reasoning/azure_ai_foundry.py +67 -0
  311. agno/reasoning/deepseek.py +63 -0
  312. agno/reasoning/default.py +97 -0
  313. agno/reasoning/gemini.py +73 -0
  314. agno/reasoning/groq.py +71 -0
  315. agno/reasoning/helpers.py +63 -0
  316. agno/reasoning/ollama.py +67 -0
  317. agno/reasoning/openai.py +86 -0
  318. agno/reasoning/step.py +31 -0
  319. agno/reasoning/vertexai.py +76 -0
  320. agno/run/__init__.py +6 -0
  321. agno/run/agent.py +787 -0
  322. agno/run/base.py +229 -0
  323. agno/run/cancel.py +81 -0
  324. agno/run/messages.py +32 -0
  325. agno/run/team.py +753 -0
  326. agno/run/workflow.py +708 -0
  327. agno/session/__init__.py +10 -0
  328. agno/session/agent.py +295 -0
  329. agno/session/summary.py +265 -0
  330. agno/session/team.py +392 -0
  331. agno/session/workflow.py +205 -0
  332. agno/team/__init__.py +37 -0
  333. agno/team/team.py +8793 -0
  334. agno/tools/__init__.py +10 -0
  335. agno/tools/agentql.py +120 -0
  336. agno/tools/airflow.py +69 -0
  337. agno/tools/api.py +122 -0
  338. agno/tools/apify.py +314 -0
  339. agno/tools/arxiv.py +127 -0
  340. agno/tools/aws_lambda.py +53 -0
  341. agno/tools/aws_ses.py +66 -0
  342. agno/tools/baidusearch.py +89 -0
  343. agno/tools/bitbucket.py +292 -0
  344. agno/tools/brandfetch.py +213 -0
  345. agno/tools/bravesearch.py +106 -0
  346. agno/tools/brightdata.py +367 -0
  347. agno/tools/browserbase.py +209 -0
  348. agno/tools/calcom.py +255 -0
  349. agno/tools/calculator.py +151 -0
  350. agno/tools/cartesia.py +187 -0
  351. agno/tools/clickup.py +244 -0
  352. agno/tools/confluence.py +240 -0
  353. agno/tools/crawl4ai.py +158 -0
  354. agno/tools/csv_toolkit.py +185 -0
  355. agno/tools/dalle.py +110 -0
  356. agno/tools/daytona.py +475 -0
  357. agno/tools/decorator.py +262 -0
  358. agno/tools/desi_vocal.py +108 -0
  359. agno/tools/discord.py +161 -0
  360. agno/tools/docker.py +716 -0
  361. agno/tools/duckdb.py +379 -0
  362. agno/tools/duckduckgo.py +91 -0
  363. agno/tools/e2b.py +703 -0
  364. agno/tools/eleven_labs.py +196 -0
  365. agno/tools/email.py +67 -0
  366. agno/tools/evm.py +129 -0
  367. agno/tools/exa.py +396 -0
  368. agno/tools/fal.py +127 -0
  369. agno/tools/file.py +240 -0
  370. agno/tools/file_generation.py +350 -0
  371. agno/tools/financial_datasets.py +288 -0
  372. agno/tools/firecrawl.py +143 -0
  373. agno/tools/function.py +1187 -0
  374. agno/tools/giphy.py +93 -0
  375. agno/tools/github.py +1760 -0
  376. agno/tools/gmail.py +922 -0
  377. agno/tools/google_bigquery.py +117 -0
  378. agno/tools/google_drive.py +270 -0
  379. agno/tools/google_maps.py +253 -0
  380. agno/tools/googlecalendar.py +674 -0
  381. agno/tools/googlesearch.py +98 -0
  382. agno/tools/googlesheets.py +377 -0
  383. agno/tools/hackernews.py +77 -0
  384. agno/tools/jina.py +101 -0
  385. agno/tools/jira.py +170 -0
  386. agno/tools/knowledge.py +218 -0
  387. agno/tools/linear.py +426 -0
  388. agno/tools/linkup.py +58 -0
  389. agno/tools/local_file_system.py +90 -0
  390. agno/tools/lumalab.py +183 -0
  391. agno/tools/mcp/__init__.py +10 -0
  392. agno/tools/mcp/mcp.py +331 -0
  393. agno/tools/mcp/multi_mcp.py +347 -0
  394. agno/tools/mcp/params.py +24 -0
  395. agno/tools/mcp_toolbox.py +284 -0
  396. agno/tools/mem0.py +193 -0
  397. agno/tools/memori.py +339 -0
  398. agno/tools/memory.py +419 -0
  399. agno/tools/mlx_transcribe.py +139 -0
  400. agno/tools/models/__init__.py +0 -0
  401. agno/tools/models/azure_openai.py +190 -0
  402. agno/tools/models/gemini.py +203 -0
  403. agno/tools/models/groq.py +158 -0
  404. agno/tools/models/morph.py +186 -0
  405. agno/tools/models/nebius.py +124 -0
  406. agno/tools/models_labs.py +195 -0
  407. agno/tools/moviepy_video.py +349 -0
  408. agno/tools/neo4j.py +134 -0
  409. agno/tools/newspaper.py +46 -0
  410. agno/tools/newspaper4k.py +93 -0
  411. agno/tools/notion.py +204 -0
  412. agno/tools/openai.py +202 -0
  413. agno/tools/openbb.py +160 -0
  414. agno/tools/opencv.py +321 -0
  415. agno/tools/openweather.py +233 -0
  416. agno/tools/oxylabs.py +385 -0
  417. agno/tools/pandas.py +102 -0
  418. agno/tools/parallel.py +314 -0
  419. agno/tools/postgres.py +257 -0
  420. agno/tools/pubmed.py +188 -0
  421. agno/tools/python.py +205 -0
  422. agno/tools/reasoning.py +283 -0
  423. agno/tools/reddit.py +467 -0
  424. agno/tools/replicate.py +117 -0
  425. agno/tools/resend.py +62 -0
  426. agno/tools/scrapegraph.py +222 -0
  427. agno/tools/searxng.py +152 -0
  428. agno/tools/serpapi.py +116 -0
  429. agno/tools/serper.py +255 -0
  430. agno/tools/shell.py +53 -0
  431. agno/tools/slack.py +136 -0
  432. agno/tools/sleep.py +20 -0
  433. agno/tools/spider.py +116 -0
  434. agno/tools/sql.py +154 -0
  435. agno/tools/streamlit/__init__.py +0 -0
  436. agno/tools/streamlit/components.py +113 -0
  437. agno/tools/tavily.py +254 -0
  438. agno/tools/telegram.py +48 -0
  439. agno/tools/todoist.py +218 -0
  440. agno/tools/tool_registry.py +1 -0
  441. agno/tools/toolkit.py +146 -0
  442. agno/tools/trafilatura.py +388 -0
  443. agno/tools/trello.py +274 -0
  444. agno/tools/twilio.py +186 -0
  445. agno/tools/user_control_flow.py +78 -0
  446. agno/tools/valyu.py +228 -0
  447. agno/tools/visualization.py +467 -0
  448. agno/tools/webbrowser.py +28 -0
  449. agno/tools/webex.py +76 -0
  450. agno/tools/website.py +54 -0
  451. agno/tools/webtools.py +45 -0
  452. agno/tools/whatsapp.py +286 -0
  453. agno/tools/wikipedia.py +63 -0
  454. agno/tools/workflow.py +278 -0
  455. agno/tools/x.py +335 -0
  456. agno/tools/yfinance.py +257 -0
  457. agno/tools/youtube.py +184 -0
  458. agno/tools/zendesk.py +82 -0
  459. agno/tools/zep.py +454 -0
  460. agno/tools/zoom.py +382 -0
  461. agno/utils/__init__.py +0 -0
  462. agno/utils/agent.py +820 -0
  463. agno/utils/audio.py +49 -0
  464. agno/utils/certs.py +27 -0
  465. agno/utils/code_execution.py +11 -0
  466. agno/utils/common.py +132 -0
  467. agno/utils/dttm.py +13 -0
  468. agno/utils/enum.py +22 -0
  469. agno/utils/env.py +11 -0
  470. agno/utils/events.py +696 -0
  471. agno/utils/format_str.py +16 -0
  472. agno/utils/functions.py +166 -0
  473. agno/utils/gemini.py +426 -0
  474. agno/utils/hooks.py +57 -0
  475. agno/utils/http.py +74 -0
  476. agno/utils/json_schema.py +234 -0
  477. agno/utils/knowledge.py +36 -0
  478. agno/utils/location.py +19 -0
  479. agno/utils/log.py +255 -0
  480. agno/utils/mcp.py +214 -0
  481. agno/utils/media.py +352 -0
  482. agno/utils/merge_dict.py +41 -0
  483. agno/utils/message.py +118 -0
  484. agno/utils/models/__init__.py +0 -0
  485. agno/utils/models/ai_foundry.py +43 -0
  486. agno/utils/models/claude.py +358 -0
  487. agno/utils/models/cohere.py +87 -0
  488. agno/utils/models/llama.py +78 -0
  489. agno/utils/models/mistral.py +98 -0
  490. agno/utils/models/openai_responses.py +140 -0
  491. agno/utils/models/schema_utils.py +153 -0
  492. agno/utils/models/watsonx.py +41 -0
  493. agno/utils/openai.py +257 -0
  494. agno/utils/pickle.py +32 -0
  495. agno/utils/pprint.py +178 -0
  496. agno/utils/print_response/__init__.py +0 -0
  497. agno/utils/print_response/agent.py +842 -0
  498. agno/utils/print_response/team.py +1724 -0
  499. agno/utils/print_response/workflow.py +1668 -0
  500. agno/utils/prompts.py +111 -0
  501. agno/utils/reasoning.py +108 -0
  502. agno/utils/response.py +163 -0
  503. agno/utils/response_iterator.py +17 -0
  504. agno/utils/safe_formatter.py +24 -0
  505. agno/utils/serialize.py +32 -0
  506. agno/utils/shell.py +22 -0
  507. agno/utils/streamlit.py +487 -0
  508. agno/utils/string.py +231 -0
  509. agno/utils/team.py +139 -0
  510. agno/utils/timer.py +41 -0
  511. agno/utils/tools.py +102 -0
  512. agno/utils/web.py +23 -0
  513. agno/utils/whatsapp.py +305 -0
  514. agno/utils/yaml_io.py +25 -0
  515. agno/vectordb/__init__.py +3 -0
  516. agno/vectordb/base.py +127 -0
  517. agno/vectordb/cassandra/__init__.py +5 -0
  518. agno/vectordb/cassandra/cassandra.py +501 -0
  519. agno/vectordb/cassandra/extra_param_mixin.py +11 -0
  520. agno/vectordb/cassandra/index.py +13 -0
  521. agno/vectordb/chroma/__init__.py +5 -0
  522. agno/vectordb/chroma/chromadb.py +929 -0
  523. agno/vectordb/clickhouse/__init__.py +9 -0
  524. agno/vectordb/clickhouse/clickhousedb.py +835 -0
  525. agno/vectordb/clickhouse/index.py +9 -0
  526. agno/vectordb/couchbase/__init__.py +3 -0
  527. agno/vectordb/couchbase/couchbase.py +1442 -0
  528. agno/vectordb/distance.py +7 -0
  529. agno/vectordb/lancedb/__init__.py +6 -0
  530. agno/vectordb/lancedb/lance_db.py +995 -0
  531. agno/vectordb/langchaindb/__init__.py +5 -0
  532. agno/vectordb/langchaindb/langchaindb.py +163 -0
  533. agno/vectordb/lightrag/__init__.py +5 -0
  534. agno/vectordb/lightrag/lightrag.py +388 -0
  535. agno/vectordb/llamaindex/__init__.py +3 -0
  536. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  537. agno/vectordb/milvus/__init__.py +4 -0
  538. agno/vectordb/milvus/milvus.py +1182 -0
  539. agno/vectordb/mongodb/__init__.py +9 -0
  540. agno/vectordb/mongodb/mongodb.py +1417 -0
  541. agno/vectordb/pgvector/__init__.py +12 -0
  542. agno/vectordb/pgvector/index.py +23 -0
  543. agno/vectordb/pgvector/pgvector.py +1462 -0
  544. agno/vectordb/pineconedb/__init__.py +5 -0
  545. agno/vectordb/pineconedb/pineconedb.py +747 -0
  546. agno/vectordb/qdrant/__init__.py +5 -0
  547. agno/vectordb/qdrant/qdrant.py +1134 -0
  548. agno/vectordb/redis/__init__.py +9 -0
  549. agno/vectordb/redis/redisdb.py +694 -0
  550. agno/vectordb/search.py +7 -0
  551. agno/vectordb/singlestore/__init__.py +10 -0
  552. agno/vectordb/singlestore/index.py +41 -0
  553. agno/vectordb/singlestore/singlestore.py +763 -0
  554. agno/vectordb/surrealdb/__init__.py +3 -0
  555. agno/vectordb/surrealdb/surrealdb.py +699 -0
  556. agno/vectordb/upstashdb/__init__.py +5 -0
  557. agno/vectordb/upstashdb/upstashdb.py +718 -0
  558. agno/vectordb/weaviate/__init__.py +8 -0
  559. agno/vectordb/weaviate/index.py +15 -0
  560. agno/vectordb/weaviate/weaviate.py +1005 -0
  561. agno/workflow/__init__.py +23 -0
  562. agno/workflow/agent.py +299 -0
  563. agno/workflow/condition.py +738 -0
  564. agno/workflow/loop.py +735 -0
  565. agno/workflow/parallel.py +824 -0
  566. agno/workflow/router.py +702 -0
  567. agno/workflow/step.py +1432 -0
  568. agno/workflow/steps.py +592 -0
  569. agno/workflow/types.py +520 -0
  570. agno/workflow/workflow.py +4321 -0
  571. agno-2.2.13.dist-info/METADATA +614 -0
  572. agno-2.2.13.dist-info/RECORD +575 -0
  573. agno-2.2.13.dist-info/WHEEL +5 -0
  574. agno-2.2.13.dist-info/licenses/LICENSE +201 -0
  575. agno-2.2.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,995 @@
1
+ import asyncio
2
+ import json
3
+ from hashlib import md5
4
+ from os import getenv
5
+ from typing import Any, Dict, List, Optional, Union
6
+
7
+ try:
8
+ import lancedb
9
+ import pyarrow as pa
10
+ except ImportError:
11
+ raise ImportError("`lancedb` not installed. Please install using `pip install lancedb`")
12
+
13
+ from agno.filters import FilterExpr
14
+ from agno.knowledge.document import Document
15
+ from agno.knowledge.embedder import Embedder
16
+ from agno.knowledge.reranker.base import Reranker
17
+ from agno.utils.log import log_debug, log_info, log_warning, logger
18
+ from agno.vectordb.base import VectorDb
19
+ from agno.vectordb.distance import Distance
20
+ from agno.vectordb.search import SearchType
21
+
22
+
23
+ class LanceDb(VectorDb):
24
+ """
25
+ LanceDb class for managing vector operations with LanceDb
26
+
27
+ Args:
28
+ uri: The URI of the LanceDB database.
29
+ name: Name of the vector database.
30
+ description: Description of the vector database.
31
+ connection: The LanceDB connection to use.
32
+ table: The LanceDB table instance to use.
33
+ async_connection: The LanceDB async connection to use.
34
+ async_table: The LanceDB async table instance to use.
35
+ table_name: The name of the LanceDB table to use.
36
+ api_key: The API key to use for the LanceDB connection.
37
+ embedder: The embedder to use when embedding the document contents.
38
+ search_type: The search type to use when searching for documents.
39
+ distance: The distance metric to use when searching for documents.
40
+ nprobes: The number of probes to use when searching for documents.
41
+ reranker: The reranker to use when reranking documents.
42
+ use_tantivy: Whether to use Tantivy for full text search.
43
+ on_bad_vectors: What to do if the vector is bad. One of "error", "drop", "fill", "null".
44
+ fill_value: The value to fill the vector with if on_bad_vectors is "fill".
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ uri: lancedb.URI = "/tmp/lancedb",
50
+ name: Optional[str] = None,
51
+ description: Optional[str] = None,
52
+ id: Optional[str] = None,
53
+ connection: Optional[lancedb.LanceDBConnection] = None,
54
+ table: Optional[lancedb.db.LanceTable] = None,
55
+ async_connection: Optional[lancedb.AsyncConnection] = None,
56
+ async_table: Optional[lancedb.db.AsyncTable] = None,
57
+ table_name: Optional[str] = None,
58
+ api_key: Optional[str] = None,
59
+ embedder: Optional[Embedder] = None,
60
+ search_type: SearchType = SearchType.vector,
61
+ distance: Distance = Distance.cosine,
62
+ nprobes: Optional[int] = None,
63
+ reranker: Optional[Reranker] = None,
64
+ use_tantivy: bool = True,
65
+ on_bad_vectors: Optional[str] = None, # One of "error", "drop", "fill", "null".
66
+ fill_value: Optional[float] = None, # Only used if on_bad_vectors is "fill"
67
+ ):
68
+ # Dynamic ID generation based on unique identifiers
69
+ if id is None:
70
+ from agno.utils.string import generate_id
71
+
72
+ table_identifier = table_name or "default_table"
73
+ seed = f"{uri}#{table_identifier}"
74
+ id = generate_id(seed)
75
+
76
+ # Initialize base class with name, description, and generated ID
77
+ super().__init__(id=id, name=name, description=description)
78
+
79
+ # Embedder for embedding the document contents
80
+ if embedder is None:
81
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
82
+
83
+ embedder = OpenAIEmbedder()
84
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
85
+ self.embedder: Embedder = embedder
86
+ self.dimensions: Optional[int] = self.embedder.dimensions
87
+
88
+ if self.dimensions is None:
89
+ raise ValueError("Embedder.dimensions must be set.")
90
+
91
+ # Search type
92
+ self.search_type: SearchType = search_type
93
+ # Distance metric
94
+ self.distance: Distance = distance
95
+
96
+ # Remote LanceDB connection details
97
+ self.api_key: Optional[str] = api_key
98
+
99
+ # LanceDB connection details
100
+ self.uri: lancedb.URI = uri
101
+ self.connection: lancedb.DBConnection = connection or lancedb.connect(uri=self.uri, api_key=api_key)
102
+ self.table: Optional[lancedb.db.LanceTable] = table
103
+
104
+ self.async_connection: Optional[lancedb.AsyncConnection] = async_connection
105
+ self.async_table: Optional[lancedb.db.AsyncTable] = async_table
106
+
107
+ if table_name and table_name in self.connection.table_names():
108
+ # Open the table if it exists
109
+ try:
110
+ self.table = self.connection.open_table(name=table_name)
111
+ self.table_name = self.table.name
112
+ self._vector_col = self.table.schema.names[0]
113
+ self._id = self.table.schema.names[1] # type: ignore
114
+ except ValueError as e:
115
+ # Table might have been dropped by async operations but sync connection hasn't updated
116
+ if "was not found" in str(e):
117
+ log_debug(f"Table {table_name} listed but not accessible, will create if needed")
118
+ self.table = None
119
+ else:
120
+ raise
121
+
122
+ # LanceDB table details
123
+ if self.table is None:
124
+ # LanceDB table details
125
+ if table:
126
+ if not isinstance(table, lancedb.db.LanceTable):
127
+ raise ValueError(
128
+ "table should be an instance of lancedb.db.LanceTable, ",
129
+ f"got {type(table)}",
130
+ )
131
+ self.table = table
132
+ self.table_name = self.table.name
133
+ self._vector_col = self.table.schema.names[0]
134
+ self._id = self.table.schema.names[1] # type: ignore
135
+ else:
136
+ if not table_name:
137
+ raise ValueError("Either table or table_name should be provided.")
138
+ self.table_name = table_name
139
+ self._id = "id"
140
+ self._vector_col = "vector"
141
+ self.table = self._init_table()
142
+
143
+ self.reranker: Optional[Reranker] = reranker
144
+ self.nprobes: Optional[int] = nprobes
145
+ self.on_bad_vectors: Optional[str] = on_bad_vectors
146
+ self.fill_value: Optional[float] = fill_value
147
+ self.fts_index_exists = False
148
+ self.use_tantivy = use_tantivy
149
+
150
+ if self.use_tantivy and (self.search_type in [SearchType.keyword, SearchType.hybrid]):
151
+ try:
152
+ import tantivy # noqa: F401
153
+ except ImportError:
154
+ raise ImportError(
155
+ "Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
156
+ )
157
+
158
+ log_debug(f"Initialized LanceDb with table: '{self.table_name}'")
159
+
160
+ def _prepare_vector(self, embedding) -> List[float]:
161
+ """Prepare vector embedding for insertion, ensuring correct dimensions and type."""
162
+ if embedding is not None and len(embedding) > 0:
163
+ # Convert to list of floats
164
+ vector = [float(x) for x in embedding]
165
+
166
+ # Ensure vector has correct dimensions if specified
167
+ if self.dimensions:
168
+ if len(vector) != self.dimensions:
169
+ if len(vector) > self.dimensions:
170
+ # Truncate if too long
171
+ vector = vector[: self.dimensions]
172
+ log_debug(f"Truncated vector from {len(embedding)} to {self.dimensions} dimensions")
173
+ else:
174
+ # Pad with zeros if too short
175
+ vector.extend([0.0] * (self.dimensions - len(vector)))
176
+ log_debug(f"Padded vector from {len(embedding)} to {self.dimensions} dimensions")
177
+
178
+ return vector
179
+ else:
180
+ # Fallback if embedding is None or empty
181
+ return [0.0] * (self.dimensions or 1536)
182
+
183
+ async def _get_async_connection(self) -> lancedb.AsyncConnection:
184
+ """Get or create an async connection to LanceDB."""
185
+ if self.async_connection is None:
186
+ self.async_connection = await lancedb.connect_async(self.uri)
187
+ # Only try to open table if it exists and we don't have it already
188
+ if self.async_table is None:
189
+ table_names = await self.async_connection.table_names()
190
+ if self.table_name in table_names:
191
+ try:
192
+ self.async_table = await self.async_connection.open_table(self.table_name)
193
+ except ValueError:
194
+ # Table might have been dropped by another operation
195
+ pass
196
+ return self.async_connection
197
+
198
+ def _refresh_sync_connection(self) -> None:
199
+ """Refresh the sync connection to see changes made by async operations."""
200
+ try:
201
+ # Re-establish sync connection to see async changes
202
+ if self.connection and self.table_name in self.connection.table_names():
203
+ self.table = self.connection.open_table(self.table_name)
204
+ except Exception as e:
205
+ log_debug(f"Could not refresh sync connection: {e}")
206
+ # If refresh fails, we can still function but sync methods might not see async changes
207
+
208
+ def create(self) -> None:
209
+ """Create the table if it does not exist."""
210
+ if not self.exists():
211
+ self.table = self._init_table()
212
+
213
+ async def async_create(self) -> None:
214
+ """Create the table asynchronously if it does not exist."""
215
+ if not await self.async_exists():
216
+ try:
217
+ conn = await self._get_async_connection()
218
+ schema = self._base_schema()
219
+
220
+ log_debug(f"Creating table asynchronously: {self.table_name}")
221
+ self.async_table = await conn.create_table(
222
+ self.table_name, schema=schema, mode="overwrite", exist_ok=True
223
+ )
224
+ log_debug(f"Successfully created async table: {self.table_name}")
225
+ except Exception as e:
226
+ logger.error(f"Error creating async table: {e}")
227
+ # Try to fall back to sync table creation
228
+ try:
229
+ log_debug("Falling back to sync table creation")
230
+ self.table = self._init_table()
231
+ log_debug("Sync table created successfully")
232
+ except Exception as sync_e:
233
+ logger.error(f"Sync table creation also failed: {sync_e}")
234
+ raise
235
+
236
+ def _base_schema(self) -> pa.Schema:
237
+ # Use fixed-size list for vector field as required by LanceDB
238
+ if self.dimensions:
239
+ vector_field = pa.field(self._vector_col, pa.list_(pa.float32(), self.dimensions))
240
+ else:
241
+ # Fallback to dynamic list if dimensions not known (should be rare)
242
+ vector_field = pa.field(self._vector_col, pa.list_(pa.float32()))
243
+
244
+ return pa.schema(
245
+ [
246
+ vector_field,
247
+ pa.field(self._id, pa.string()),
248
+ pa.field("payload", pa.string()),
249
+ ]
250
+ )
251
+
252
+ def _init_table(self) -> lancedb.db.LanceTable:
253
+ schema = self._base_schema()
254
+
255
+ log_info(f"Creating table: {self.table_name}")
256
+ if self.api_key or getenv("LANCEDB_API_KEY"):
257
+ log_info("API key found, creating table in remote LanceDB")
258
+ tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite") # type: ignore
259
+ else:
260
+ tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite", exist_ok=True) # type: ignore
261
+ return tbl # type: ignore
262
+
263
+ def doc_exists(self, document: Document) -> bool:
264
+ """
265
+ Validating if the document exists or not
266
+
267
+ Args:
268
+ document (Document): Document to validate
269
+ """
270
+ try:
271
+ if self.table is not None:
272
+ cleaned_content = document.content.replace("\x00", "\ufffd")
273
+ doc_id = md5(cleaned_content.encode()).hexdigest()
274
+ result = self.table.search().where(f"{self._id}='{doc_id}'").to_arrow()
275
+ return len(result) > 0
276
+ except Exception:
277
+ # Search sometimes fails with stale cache data, it means the doc doesn't exist
278
+ return False
279
+
280
+ return False
281
+
282
+ async def async_doc_exists(self, document: Document) -> bool:
283
+ """
284
+ Asynchronously validate if the document exists
285
+
286
+ Args:
287
+ document (Document): Document to validate
288
+
289
+ Returns:
290
+ bool: True if document exists, False otherwise
291
+ """
292
+ if self.connection:
293
+ self.table = self.connection.open_table(name=self.table_name)
294
+ return self.doc_exists(document)
295
+
296
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
297
+ """
298
+ Insert documents into the database.
299
+
300
+ Args:
301
+ documents (List[Document]): List of documents to insert
302
+ filters (Optional[Dict[str, Any]]): Filters to add as metadata to documents
303
+ """
304
+ if len(documents) <= 0:
305
+ log_info("No documents to insert")
306
+ return
307
+
308
+ log_debug(f"Inserting {len(documents)} documents")
309
+ data = []
310
+
311
+ for document in documents:
312
+ if self.doc_exists(document):
313
+ continue
314
+
315
+ # Add filters to document metadata if provided
316
+ if filters:
317
+ meta_data = document.meta_data.copy() if document.meta_data else {}
318
+ meta_data.update(filters)
319
+ document.meta_data = meta_data
320
+
321
+ document.embed(embedder=self.embedder)
322
+ cleaned_content = document.content.replace("\x00", "\ufffd")
323
+ doc_id = str(md5(cleaned_content.encode()).hexdigest())
324
+ payload = {
325
+ "name": document.name,
326
+ "meta_data": document.meta_data,
327
+ "content": cleaned_content,
328
+ "usage": document.usage,
329
+ "content_id": document.content_id,
330
+ "content_hash": content_hash,
331
+ }
332
+ data.append(
333
+ {
334
+ "id": doc_id,
335
+ "vector": self._prepare_vector(document.embedding),
336
+ "payload": json.dumps(payload),
337
+ }
338
+ )
339
+ log_debug(f"Parsed document: {document.name} ({document.meta_data})")
340
+
341
+ if self.table is None:
342
+ logger.error("Table not initialized. Please create the table first")
343
+ return
344
+
345
+ if not data:
346
+ log_debug("No new data to insert")
347
+ return
348
+
349
+ if self.on_bad_vectors is not None:
350
+ self.table.add(data, on_bad_vectors=self.on_bad_vectors, fill_value=self.fill_value)
351
+ else:
352
+ self.table.add(data)
353
+
354
+ log_debug(f"Inserted {len(data)} documents")
355
+
356
+ async def async_insert(
357
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
358
+ ) -> None:
359
+ """
360
+ Asynchronously insert documents into the database.
361
+
362
+ Note: Currently wraps sync insert method since LanceDB async insert has sync/async table
363
+ synchronization issues causing empty vectors. We still do async embedding for performance.
364
+
365
+ Args:
366
+ documents (List[Document]): List of documents to insert
367
+ filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
368
+ """
369
+ if len(documents) <= 0:
370
+ log_debug("No documents to insert")
371
+ return
372
+
373
+ log_debug(f"Inserting {len(documents)} documents")
374
+
375
+ # Still do async embedding for performance
376
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
377
+ try:
378
+ doc_contents = [doc.content for doc in documents]
379
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
380
+
381
+ for j, doc in enumerate(documents):
382
+ if j < len(embeddings):
383
+ doc.embedding = embeddings[j]
384
+ doc.usage = usages[j] if j < len(usages) else None
385
+ except Exception as e:
386
+ error_str = str(e).lower()
387
+ is_rate_limit = any(
388
+ phrase in error_str
389
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
390
+ )
391
+ if is_rate_limit:
392
+ logger.error(f"Rate limit detected during batch embedding. {e}")
393
+ raise e
394
+ else:
395
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
396
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
397
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
398
+ else:
399
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
400
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
401
+
402
+ # Use sync insert to avoid sync/async table synchronization issues
403
+ self.insert(content_hash, documents, filters)
404
+
405
+ def upsert_available(self) -> bool:
406
+ """Check if upsert is available in LanceDB."""
407
+ return True
408
+
409
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
410
+ """
411
+ Upsert documents into the database.
412
+
413
+ Args:
414
+ documents (List[Document]): List of documents to upsert
415
+ filters (Optional[Dict[str, Any]]): Filters to apply while upserting
416
+ """
417
+ if self.content_hash_exists(content_hash):
418
+ self._delete_by_content_hash(content_hash)
419
+ self.insert(content_hash=content_hash, documents=documents, filters=filters)
420
+
421
+ async def async_upsert(
422
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
423
+ ) -> None:
424
+ """
425
+ Asynchronously upsert documents into the database.
426
+
427
+ Note: Uses async embedding for performance, then sync upsert for reliability.
428
+ """
429
+ if len(documents) > 0:
430
+ # Do async embedding for performance
431
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
432
+ try:
433
+ doc_contents = [doc.content for doc in documents]
434
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
435
+ for j, doc in enumerate(documents):
436
+ if j < len(embeddings):
437
+ doc.embedding = embeddings[j]
438
+ doc.usage = usages[j] if j < len(usages) else None
439
+ except Exception as e:
440
+ error_str = str(e).lower()
441
+ is_rate_limit = any(
442
+ phrase in error_str
443
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
444
+ )
445
+ if is_rate_limit:
446
+ raise e
447
+ else:
448
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
449
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
450
+ else:
451
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
452
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
453
+
454
+ # Use sync upsert for reliability
455
+ self.upsert(content_hash=content_hash, documents=documents, filters=filters)
456
+
457
+ def search(
458
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
459
+ ) -> List[Document]:
460
+ """
461
+ Search for documents matching the query.
462
+
463
+ Args:
464
+ query (str): Query string to search for
465
+ limit (int): Maximum number of results to return
466
+ filters (Optional[Dict[str, Any]]): Filters to apply to the search
467
+
468
+ Returns:
469
+ List[Document]: List of matching documents
470
+ """
471
+ if self.connection:
472
+ self.table = self.connection.open_table(name=self.table_name)
473
+
474
+ results = None
475
+
476
+ if isinstance(filters, list):
477
+ log_warning("Filter Expressions are not yet supported in LanceDB. No filters will be applied.")
478
+ filters = None
479
+
480
+ if self.search_type == SearchType.vector:
481
+ results = self.vector_search(query, limit)
482
+ elif self.search_type == SearchType.keyword:
483
+ results = self.keyword_search(query, limit)
484
+ elif self.search_type == SearchType.hybrid:
485
+ results = self.hybrid_search(query, limit)
486
+ else:
487
+ logger.error(f"Invalid search type '{self.search_type}'.")
488
+ return []
489
+
490
+ if results is None:
491
+ return []
492
+
493
+ search_results = self._build_search_results(results)
494
+
495
+ # Filter results based on metadata if filters are provided
496
+ if filters and search_results:
497
+ filtered_results = []
498
+ for doc in search_results:
499
+ if doc.meta_data is None:
500
+ continue
501
+
502
+ # Check if all filter criteria match
503
+ match = True
504
+ for key, value in filters.items():
505
+ if key not in doc.meta_data or doc.meta_data[key] != value:
506
+ match = False
507
+ break
508
+
509
+ if match:
510
+ filtered_results.append(doc)
511
+
512
+ search_results = filtered_results
513
+
514
+ if self.reranker and search_results:
515
+ search_results = self.reranker.rerank(query=query, documents=search_results)
516
+
517
+ log_info(f"Found {len(search_results)} documents")
518
+ return search_results
519
+
520
+ async def async_search(
521
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
522
+ ) -> List[Document]:
523
+ """
524
+ Asynchronously search for documents matching the query.
525
+
526
+ Note: Currently wraps sync search method since LanceDB async search has sync/async table
527
+ synchronization issues. Performance impact is minimal for search operations.
528
+
529
+ Args:
530
+ query (str): Query string to search for
531
+ limit (int): Maximum number of results to return
532
+ filters (Optional[Dict[str, Any]]): Filters to apply to the search
533
+
534
+ Returns:
535
+ List[Document]: List of matching documents
536
+ """
537
+ # Wrap sync search method to avoid sync/async table synchronization issues
538
+ return self.search(query=query, limit=limit, filters=filters)
539
+
540
+ def vector_search(
541
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
542
+ ) -> List[Document]:
543
+ query_embedding = self.embedder.get_embedding(query)
544
+ if query_embedding is None:
545
+ logger.error(f"Error getting embedding for Query: {query}")
546
+ return None
547
+
548
+ if self.table is None:
549
+ logger.error("Table not initialized. Please create the table first")
550
+ return None # type: ignore
551
+
552
+ results = self.table.search(
553
+ query=query_embedding,
554
+ vector_column_name=self._vector_col,
555
+ ).limit(limit)
556
+
557
+ if self.nprobes:
558
+ results.nprobes(self.nprobes)
559
+
560
+ return results.to_pandas()
561
+
562
+ def hybrid_search(
563
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
564
+ ) -> List[Document]:
565
+ query_embedding = self.embedder.get_embedding(query)
566
+ if query_embedding is None:
567
+ logger.error(f"Error getting embedding for Query: {query}")
568
+ return []
569
+
570
+ if self.table is None:
571
+ logger.error("Table not initialized. Please create the table first")
572
+ return []
573
+
574
+ if not self.fts_index_exists:
575
+ self.table.create_fts_index("payload", use_tantivy=self.use_tantivy, replace=True)
576
+ self.fts_index_exists = True
577
+
578
+ results = (
579
+ self.table.search(
580
+ vector_column_name=self._vector_col,
581
+ query_type="hybrid",
582
+ )
583
+ .vector(query_embedding)
584
+ .text(query)
585
+ .limit(limit)
586
+ )
587
+
588
+ if self.nprobes:
589
+ results.nprobes(self.nprobes)
590
+
591
+ return results.to_pandas()
592
+
593
+ def keyword_search(
594
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
595
+ ) -> List[Document]:
596
+ if self.table is None:
597
+ logger.error("Table not initialized. Please create the table first")
598
+ return []
599
+
600
+ if not self.fts_index_exists:
601
+ self.table.create_fts_index("payload", use_tantivy=self.use_tantivy, replace=True)
602
+ self.fts_index_exists = True
603
+
604
+ results = self.table.search(
605
+ query=query,
606
+ query_type="fts",
607
+ ).limit(limit)
608
+
609
+ return results.to_pandas()
610
+
611
+ def _build_search_results(self, results) -> List[Document]: # TODO: typehint pandas?
612
+ search_results: List[Document] = []
613
+ try:
614
+ for _, item in results.iterrows():
615
+ payload = json.loads(item["payload"])
616
+ search_results.append(
617
+ Document(
618
+ name=payload["name"],
619
+ meta_data=payload["meta_data"],
620
+ content=payload["content"],
621
+ embedder=self.embedder,
622
+ embedding=item["vector"],
623
+ usage=payload["usage"],
624
+ content_id=payload.get("content_id"),
625
+ )
626
+ )
627
+
628
+ except Exception as e:
629
+ logger.error(f"Error building search results: {e}")
630
+
631
+ return search_results
632
+
633
+ def drop(self) -> None:
634
+ if self.exists():
635
+ log_debug(f"Deleting collection: {self.table_name}")
636
+ self.connection.drop_table(self.table_name) # type: ignore
637
+ # Clear the table reference after dropping
638
+ self.table = None
639
+
640
+ async def async_drop(self) -> None:
641
+ """Drop the table asynchronously."""
642
+ if await self.async_exists():
643
+ log_debug(f"Deleting collection: {self.table_name}")
644
+ conn = await self._get_async_connection()
645
+ await conn.drop_table(self.table_name)
646
+ # Clear the async table reference after dropping
647
+ self.async_table = None
648
+
649
+ def exists(self) -> bool:
650
+ # If we have an async table that was created, the table exists
651
+ if self.async_table is not None:
652
+ return True
653
+ if self.connection:
654
+ return self.table_name in self.connection.table_names()
655
+ return False
656
+
657
+ async def async_exists(self) -> bool:
658
+ """Check if the table exists asynchronously."""
659
+ # If we have an async table that was created, the table exists
660
+ if self.async_table is not None:
661
+ return True
662
+ # Check if table exists in database without trying to open it
663
+ if self.async_connection is None:
664
+ self.async_connection = await lancedb.connect_async(self.uri)
665
+ table_names = await self.async_connection.table_names()
666
+ return self.table_name in table_names
667
+
668
+ async def async_get_count(self) -> int:
669
+ """Get the number of rows in the table asynchronously."""
670
+ await self._get_async_connection()
671
+ if self.async_table is not None:
672
+ return await self.async_table.count_rows()
673
+ return 0
674
+
675
+ def get_count(self) -> int:
676
+ # If we have data in the async table but sync table isn't available, try to get count from async table
677
+ if self.async_table is not None:
678
+ try:
679
+ import asyncio
680
+
681
+ # Check if we're already in an event loop
682
+ try:
683
+ asyncio.get_running_loop()
684
+ # We're in an async context, can't use asyncio.run
685
+ log_debug("Already in async context, falling back to sync table for count")
686
+ except RuntimeError:
687
+ # No event loop running, safe to use asyncio.run
688
+ try:
689
+ return asyncio.run(self.async_get_count())
690
+ except Exception as e:
691
+ log_debug(f"Failed to get async count: {e}")
692
+ except Exception as e:
693
+ log_debug(f"Error in async count logic: {e}")
694
+
695
+ if self.exists() and self.table:
696
+ return self.table.count_rows()
697
+ return 0
698
+
699
+ def optimize(self) -> None:
700
+ pass
701
+
702
+ def delete(self) -> bool:
703
+ return False
704
+
705
+ def name_exists(self, name: str) -> bool:
706
+ """Check if a document with the given name exists in the database"""
707
+ if self.table is None:
708
+ return False
709
+
710
+ try:
711
+ result = self.table.search().select(["payload"]).to_pandas()
712
+ # Convert the JSON strings in payload column to dictionaries
713
+ payloads = result["payload"].apply(json.loads)
714
+
715
+ # Check if the name exists in any of the payloads
716
+ return any(payload.get("name") == name for payload in payloads)
717
+ except Exception as e:
718
+ logger.error(f"Error checking name existence: {e}")
719
+ return False
720
+
721
+ async def async_name_exists(self, name: str) -> bool:
722
+ raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
723
+
724
+ def id_exists(self, id: str) -> bool:
725
+ """Check if a document with the given ID exists in the database"""
726
+ if self.table is None:
727
+ logger.error("Table not initialized")
728
+ return False
729
+
730
+ try:
731
+ # Search for the document with the specific ID
732
+ result = self.table.search().where(f"{self._id} = '{id}'").to_pandas()
733
+ return len(result) > 0
734
+ except Exception as e:
735
+ logger.error(f"Error checking id existence: {e}")
736
+ return False
737
+
738
+ def delete_by_id(self, id: str) -> bool:
739
+ """Delete content by ID."""
740
+ if self.table is None:
741
+ logger.error("Table not initialized")
742
+ return False
743
+
744
+ try:
745
+ # Delete rows where the id matches
746
+ self.table.delete(f"{self._id} = '{id}'")
747
+ log_info(f"Deleted records with id '{id}' from table '{self.table_name}'.")
748
+ return True
749
+ except Exception as e:
750
+ logger.error(f"Error deleting rows by id '{id}': {e}")
751
+ return False
752
+
753
+ def delete_by_name(self, name: str) -> bool:
754
+ """Delete content by name."""
755
+ if self.table is None:
756
+ logger.error("Table not initialized")
757
+ return False
758
+
759
+ try:
760
+ total_count = self.table.count_rows()
761
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
762
+
763
+ # Find matching IDs
764
+ ids_to_delete = []
765
+ for _, row in result.iterrows():
766
+ payload = json.loads(row["payload"])
767
+ if payload.get("name") == name:
768
+ ids_to_delete.append(row["id"])
769
+
770
+ # Delete matching records
771
+ if ids_to_delete:
772
+ for doc_id in ids_to_delete:
773
+ self.table.delete(f"{self._id} = '{doc_id}'")
774
+ log_info(f"Deleted {len(ids_to_delete)} records with name '{name}' from table '{self.table_name}'.")
775
+ return True
776
+ else:
777
+ log_info(f"No records found with name '{name}' to delete.")
778
+ return False
779
+
780
+ except Exception as e:
781
+ logger.error(f"Error deleting rows by name '{name}': {e}")
782
+ return False
783
+
784
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
785
+ """Delete content by metadata."""
786
+ if self.table is None:
787
+ logger.error("Table not initialized")
788
+ return False
789
+
790
+ try:
791
+ total_count = self.table.count_rows()
792
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
793
+
794
+ # Find matching IDs
795
+ ids_to_delete = []
796
+ for _, row in result.iterrows():
797
+ payload = json.loads(row["payload"])
798
+ doc_metadata = payload.get("meta_data", {})
799
+
800
+ # Check if all metadata key-value pairs match
801
+ match = True
802
+ for key, value in metadata.items():
803
+ if key not in doc_metadata or doc_metadata[key] != value:
804
+ match = False
805
+ break
806
+
807
+ if match:
808
+ ids_to_delete.append(row["id"])
809
+
810
+ # Delete matching records
811
+ if ids_to_delete:
812
+ for doc_id in ids_to_delete:
813
+ self.table.delete(f"{self._id} = '{doc_id}'")
814
+ log_info(
815
+ f"Deleted {len(ids_to_delete)} records with metadata '{metadata}' from table '{self.table_name}'."
816
+ )
817
+ return True
818
+ else:
819
+ log_info(f"No records found with metadata '{metadata}' to delete.")
820
+ return False
821
+
822
+ except Exception as e:
823
+ logger.error(f"Error deleting rows by metadata '{metadata}': {e}")
824
+ return False
825
+
826
+ def delete_by_content_id(self, content_id: str) -> bool:
827
+ """Delete content by content ID."""
828
+ if self.table is None:
829
+ logger.error("Table not initialized")
830
+ return False
831
+
832
+ try:
833
+ total_count = self.table.count_rows()
834
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
835
+
836
+ # Find matching IDs
837
+ ids_to_delete = []
838
+ for _, row in result.iterrows():
839
+ payload = json.loads(row["payload"])
840
+ if payload.get("content_id") == content_id:
841
+ ids_to_delete.append(row["id"])
842
+
843
+ # Delete matching records
844
+ if ids_to_delete:
845
+ for doc_id in ids_to_delete:
846
+ self.table.delete(f"{self._id} = '{doc_id}'")
847
+ log_info(
848
+ f"Deleted {len(ids_to_delete)} records with content_id '{content_id}' from table '{self.table_name}'."
849
+ )
850
+ return True
851
+ else:
852
+ log_info(f"No records found with content_id '{content_id}' to delete.")
853
+ return False
854
+
855
+ except Exception as e:
856
+ logger.error(f"Error deleting rows by content_id '{content_id}': {e}")
857
+ return False
858
+
859
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
860
+ """Delete content by content hash."""
861
+ if self.table is None:
862
+ logger.error("Table not initialized")
863
+ return False
864
+
865
+ try:
866
+ total_count = self.table.count_rows()
867
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
868
+
869
+ # Find matching IDs
870
+ ids_to_delete = []
871
+ for _, row in result.iterrows():
872
+ payload = json.loads(row["payload"])
873
+ if payload.get("content_hash") == content_hash:
874
+ ids_to_delete.append(row["id"])
875
+
876
+ # Delete matching records
877
+ if ids_to_delete:
878
+ for doc_id in ids_to_delete:
879
+ self.table.delete(f"{self._id} = '{doc_id}'")
880
+ log_info(
881
+ f"Deleted {len(ids_to_delete)} records with content_hash '{content_hash}' from table '{self.table_name}'."
882
+ )
883
+ return True
884
+ else:
885
+ log_info(f"No records found with content_hash '{content_hash}' to delete.")
886
+ return False
887
+
888
+ except Exception as e:
889
+ logger.error(f"Error deleting rows by content_hash '{content_hash}': {e}")
890
+ return False
891
+
892
+ def content_hash_exists(self, content_hash: str) -> bool:
893
+ """Check if documents with the given content hash exist."""
894
+ if self.table is None:
895
+ logger.error("Table not initialized")
896
+ return False
897
+
898
+ try:
899
+ total_count = self.table.count_rows()
900
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
901
+
902
+ # Check if any records match the content_hash
903
+ for _, row in result.iterrows():
904
+ payload = json.loads(row["payload"])
905
+ if payload.get("content_hash") == content_hash:
906
+ return True
907
+
908
+ return False
909
+
910
+ except Exception as e:
911
+ logger.error(f"Error checking content_hash existence '{content_hash}': {e}")
912
+ return False
913
+
914
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
915
+ """
916
+ Update the metadata for documents with the given content_id.
917
+
918
+ Args:
919
+ content_id (str): The content ID to update
920
+ metadata (Dict[str, Any]): The metadata to update
921
+ """
922
+ import json
923
+
924
+ try:
925
+ if self.table is None:
926
+ logger.error("Table not initialized")
927
+ return
928
+
929
+ # Get all documents and filter in Python (LanceDB doesn't support JSON operators)
930
+ total_count = self.table.count_rows()
931
+ results = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
932
+
933
+ if results.empty:
934
+ logger.debug("No documents found")
935
+ return
936
+
937
+ # Find matching documents with the given content_id
938
+ matching_rows = []
939
+ for _, row in results.iterrows():
940
+ payload = json.loads(row["payload"])
941
+ if payload.get("content_id") == content_id:
942
+ matching_rows.append(row)
943
+
944
+ if not matching_rows:
945
+ logger.debug(f"No documents found with content_id: {content_id}")
946
+ return
947
+
948
+ # Update each matching document
949
+ updated_count = 0
950
+ for row in matching_rows:
951
+ row_id = row["id"]
952
+ current_payload = json.loads(row["payload"])
953
+
954
+ # Merge existing metadata with new metadata
955
+ if "meta_data" in current_payload:
956
+ current_payload["meta_data"].update(metadata)
957
+ else:
958
+ current_payload["meta_data"] = metadata
959
+
960
+ if "filters" in current_payload:
961
+ if isinstance(current_payload["filters"], dict):
962
+ current_payload["filters"].update(metadata)
963
+ else:
964
+ current_payload["filters"] = metadata
965
+ else:
966
+ current_payload["filters"] = metadata
967
+
968
+ # Update the document
969
+ update_data = {"id": row_id, "payload": json.dumps(current_payload)}
970
+
971
+ # LanceDB doesn't have a direct update, so we need to delete and re-insert
972
+ # First, get all the existing data
973
+ vector_data = row["vector"] if "vector" in row else None
974
+ text_data = row["text"] if "text" in row else None
975
+
976
+ # Create complete update record
977
+ if vector_data is not None:
978
+ update_data["vector"] = vector_data
979
+ if text_data is not None:
980
+ update_data["text"] = text_data
981
+
982
+ # Delete old record and insert updated one
983
+ self.table.delete(f"id = '{row_id}'")
984
+ self.table.add([update_data])
985
+ updated_count += 1
986
+
987
+ logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
988
+
989
+ except Exception as e:
990
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
991
+ raise
992
+
993
+ def get_supported_search_types(self) -> List[str]:
994
+ """Get the supported search types for this vector database."""
995
+ return [SearchType.vector, SearchType.keyword, SearchType.hybrid]